Browse Source

Move notification logic to hc.api.transports. Don't use "paused" state for checks.

pull/40/head
Pēteris Caune 9 years ago
parent
commit
21a042aa16
8 changed files with 230 additions and 122 deletions
  1. +1
    -1
      hc/api/admin.py
  2. +6
    -16
      hc/api/management/commands/sendalerts.py
  3. +24
    -0
      hc/api/migrations/0022_auto_20160130_2042.py
  4. +28
    -96
      hc/api/models.py
  5. +16
    -9
      hc/api/tests/test_notify.py
  6. +145
    -0
      hc/api/transports.py
  7. +5
    -0
      templates/integrations/pd_description.html
  8. +5
    -0
      templates/integrations/pushover_title.html

+ 1
- 1
hc/api/admin.py View File

@ -182,7 +182,7 @@ class NotificationsAdmin(admin.ModelAdmin):
search_fields = ["owner__name", "owner__code", "channel__value"] search_fields = ["owner__name", "owner__code", "channel__value"]
list_select_related = ("owner", "channel") list_select_related = ("owner", "channel")
list_display = ("id", "created", "check_status", "check_name", list_display = ("id", "created", "check_status", "check_name",
"channel_kind", "channel_value", "status")
"channel_kind", "channel_value")
list_filter = ("created", "check_status", "channel__kind") list_filter = ("created", "check_status", "channel__kind")
def check_name(self, obj): def check_name(self, obj):


+ 6
- 16
hc/api/management/commands/sendalerts.py View File

@ -39,27 +39,17 @@ class Command(BaseCommand):
Return False if no checks need to be processed. Return False if no checks need to be processed.
""" """
# Save the new status. If sendalerts crashes,
# it won't process this check again.
check.status = check.get_status() check.status = check.get_status()
check.save()
tmpl = "\nSending alert, status=%s, code=%s\n" tmpl = "\nSending alert, status=%s, code=%s\n"
self.stdout.write(tmpl % (check.status, check.code)) self.stdout.write(tmpl % (check.status, check.code))
check.send_alert()
try:
check.send_alert()
except:
# Catch EVERYTHING. If we crash here, what can happen is:
# - the sendalerts command will crash
# - supervisor will respawn sendalerts command
# - sendalerts will try same thing again, resulting in
# infinite loop
# So instead we catch and log all exceptions, and mark
# the checks as paused so they are not retried.
logger.error("Could not alert %s" % check.code, exc_info=True)
check.status = "paused"
finally:
check.save()
connection.close()
connection.close()
return True return True
def handle(self, *args, **options): def handle(self, *args, **options):


+ 24
- 0
hc/api/migrations/0022_auto_20160130_2042.py View File

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9 on 2016-01-30 20:42
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('api', '0021_ping_n'),
]
operations = [
migrations.RemoveField(
model_name='notification',
name='status',
),
migrations.AddField(
model_name='notification',
name='error',
field=models.CharField(blank=True, max_length=200),
),
]

+ 28
- 96
hc/api/models.py View File

@ -1,17 +1,15 @@
# coding: utf-8 # coding: utf-8
import hashlib import hashlib
import json
import uuid import uuid
from datetime import timedelta as td from datetime import timedelta as td
import requests
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.core.urlresolvers import reverse from django.core.urlresolvers import reverse
from django.db import models from django.db import models
from django.template.loader import render_to_string
from django.utils import timezone from django.utils import timezone
from hc.api import transports
from hc.lib import emails from hc.lib import emails
STATUSES = ( STATUSES = (
@ -125,103 +123,37 @@ class Channel(models.Model):
verify_link = settings.SITE_ROOT + verify_link verify_link = settings.SITE_ROOT + verify_link
emails.verify_email(self.value, {"verify_link": verify_link}) emails.verify_email(self.value, {"verify_link": verify_link})
def notify(self, check):
n = Notification(owner=check, channel=self)
n.check_status = check.status
if self.kind == "email" and self.email_verified:
ctx = {
"check": check,
"checks": self.user.check_set.order_by("created"),
"now": timezone.now()
}
emails.alert(self.value, ctx)
n.save()
elif self.kind == "webhook" and check.status == "down":
try:
headers = {"User-Agent": "healthchecks.io"}
r = requests.get(self.value, timeout=5, headers=headers)
n.status = r.status_code
except requests.exceptions.Timeout:
# Well, we tried
pass
n.save()
@property
def transport(self):
if self.kind == "email":
return transports.Email(self)
elif self.kind == "webhook":
return transports.Webhook(self)
elif self.kind == "slack": elif self.kind == "slack":
tmpl = "integrations/slack_message.json"
text = render_to_string(tmpl, {"check": check})
payload = json.loads(text)
r = requests.post(self.value, json=payload, timeout=5)
n.status = r.status_code
n.save()
return transports.Slack(self)
elif self.kind == "hipchat": elif self.kind == "hipchat":
tmpl = "integrations/hipchat_message.html"
text = render_to_string(tmpl, {"check": check})
payload = {
"message": text,
"color": "green" if check.status == "up" else "red",
}
r = requests.post(self.value, json=payload, timeout=5)
return transports.HipChat(self)
elif self.kind == "pd":
return transports.PagerDuty(self)
elif self.kind == "po":
return transports.Pushover()
else:
raise NotImplemented("Unknown channel kind: %s" % self.kind)
n.status = r.status_code
n.save()
def notify(self, check):
# Make 3 attempts--
for x in range(0, 3):
error = self.transport.notify(check) or ""
if error == "":
break # Success!
elif self.kind == "pd":
if check.status == "down":
event_type = "trigger"
description = "%s is DOWN" % check.name_then_code()
else:
event_type = "resolve"
description = "%s received a ping and is now UP" % \
check.name_then_code()
payload = {
"service_key": self.value,
"incident_key": str(check.code),
"event_type": event_type,
"description": description,
"client": "healthchecks.io",
"client_url": settings.SITE_ROOT
}
url = "https://events.pagerduty.com/generic/2010-04-15/create_event.json"
r = requests.post(url, data=json.dumps(payload), timeout=5)
n.status = r.status_code
n.save()
n = Notification(owner=check, channel=self)
n.check_status = check.status
n.error = error
n.save()
elif self.kind == "po":
tmpl = "integrations/pushover_message.html"
ctx = {
"check": check,
"down_checks": self.user.check_set.filter(status="down").exclude(code=check.code).order_by("created"),
}
text = render_to_string(tmpl, ctx).strip()
if check.status == "down":
title = "%s is DOWN" % check.name_then_code()
else:
title = "%s is now UP" % check.name_then_code()
user_key, priority, _ = self.po_value
payload = {
"token": settings.PUSHOVER_API_TOKEN,
"user": user_key,
"message": text,
"title": title,
"html": 1,
"priority": priority,
}
if priority == 2: # Emergency notification
payload["retry"] = settings.PUSHOVER_EMERGENCY_RETRY_DELAY
payload["expire"] = settings.PUSHOVER_EMERGENCY_EXPIRATION
url = "https://api.pushover.net/1/messages.json"
r = requests.post(url, data=payload, timeout=5)
n.status = r.status_code
n.save()
def test(self):
return self.transport().test()
@property @property
def po_value(self): def po_value(self):
@ -236,4 +168,4 @@ class Notification(models.Model):
check_status = models.CharField(max_length=6) check_status = models.CharField(max_length=6)
channel = models.ForeignKey(Channel) channel = models.ForeignKey(Channel)
created = models.DateTimeField(auto_now_add=True) created = models.DateTimeField(auto_now_add=True)
status = models.IntegerField(default=0)
error = models.CharField(max_length=200, blank=True)

+ 16
- 9
hc/api/tests/test_notify.py View File

@ -20,7 +20,7 @@ class NotifyTestCase(BaseTestCase):
self.channel.save() self.channel.save()
self.channel.checks.add(self.check) self.channel.checks.add(self.check)
@patch("hc.api.models.requests.get")
@patch("hc.api.transports.requests.get")
def test_webhook(self, mock_get): def test_webhook(self, mock_get):
self._setup_data("webhook", "http://example") self._setup_data("webhook", "http://example")
mock_get.return_value.status_code = 200 mock_get.return_value.status_code = 200
@ -30,16 +30,20 @@ class NotifyTestCase(BaseTestCase):
u"http://example", headers={"User-Agent": "healthchecks.io"}, u"http://example", headers={"User-Agent": "healthchecks.io"},
timeout=5) timeout=5)
@patch("hc.api.models.requests.get", side_effect=ReadTimeout)
@patch("hc.api.transports.requests.get", side_effect=ReadTimeout)
def test_webhooks_handle_timeouts(self, mock_get): def test_webhooks_handle_timeouts(self, mock_get):
self._setup_data("webhook", "http://example") self._setup_data("webhook", "http://example")
self.channel.notify(self.check) self.channel.notify(self.check)
assert Notification.objects.count() == 1
n = Notification.objects.get()
self.assertEqual(n.error, "Connection timed out")
def test_email(self): def test_email(self):
self._setup_data("email", "[email protected]") self._setup_data("email", "[email protected]")
self.channel.notify(self.check) self.channel.notify(self.check)
assert Notification.objects.count() == 1
n = Notification.objects.get()
self.assertEqual(n.error, "")
# And email should have been sent # And email should have been sent
self.assertEqual(len(mail.outbox), 1) self.assertEqual(len(mail.outbox), 1)
@ -48,21 +52,24 @@ class NotifyTestCase(BaseTestCase):
self._setup_data("email", "[email protected]", email_verified=False) self._setup_data("email", "[email protected]", email_verified=False)
self.channel.notify(self.check) self.channel.notify(self.check)
assert Notification.objects.count() == 0
assert Notification.objects.count() == 1
n = Notification.objects.first()
self.assertEqual(n.error, "Email not verified")
self.assertEqual(len(mail.outbox), 0) self.assertEqual(len(mail.outbox), 0)
@patch("hc.api.models.requests.post")
@patch("hc.api.transports.JsonTransport.post")
def test_pd(self, mock_post): def test_pd(self, mock_post):
self._setup_data("pd", "123") self._setup_data("pd", "123")
mock_post.return_value.status_code = 200
mock_post.return_value = None
self.channel.notify(self.check) self.channel.notify(self.check)
assert Notification.objects.count() == 1 assert Notification.objects.count() == 1
args, kwargs = mock_post.call_args args, kwargs = mock_post.call_args
assert "trigger" in kwargs["data"]
payload = args[1]
self.assertEqual(payload["event_type"], "trigger")
@patch("hc.api.models.requests.post")
@patch("hc.api.transports.requests.post")
def test_slack(self, mock_post): def test_slack(self, mock_post):
self._setup_data("slack", "123") self._setup_data("slack", "123")
mock_post.return_value.status_code = 200 mock_post.return_value.status_code = 200


+ 145
- 0
hc/api/transports.py View File

@ -0,0 +1,145 @@
from django.conf import settings
from django.template.loader import render_to_string
from django.utils import timezone
import json
import requests
from hc.lib import emails
def tmpl(template_name, **ctx):
template_path = "integrations/%s" % template_name
return render_to_string(template_path, ctx).strip()
class Transport(object):
def __init__(self, channel):
self.channel = channel
def notify(self, check):
""" Send notification about current status of the check.
This method returns None on success, and error message
on error.
"""
raise NotImplemented()
def test(self):
""" Send test message.
This method returns None on success, and error message
on error.
"""
raise NotImplemented()
def checks(self):
return self.channel.user.check_set.order_by("created")
class Email(Transport):
def notify(self, check):
if not self.channel.email_verified:
return "Email not verified"
ctx = {
"check": check,
"checks": self.checks(),
"now": timezone.now()
}
emails.alert(self.channel.value, ctx)
class Webhook(Transport):
def notify(self, check):
# Webhook integration only fires when check goes down.
if check.status != "down":
return
# Webhook transport sends no arguments, so the
# notify and test actions are the same
return self.test()
def test(self):
headers = {"User-Agent": "healthchecks.io"}
try:
r = requests.get(self.channel.value, timeout=5, headers=headers)
if r.status_code not in (200, 201):
return "Received status code %d" % r.status_code
except requests.exceptions.Timeout:
# Well, we tried
return "Connection timed out"
class JsonTransport(Transport):
def post(self, url, payload):
headers = {"User-Agent": "healthchecks.io"}
r = requests.post(url, json=payload, timeout=5, headers=headers)
if r.status_code not in (200, 201):
return "Received status code %d" % r.status_code
class Slack(JsonTransport):
def notify(self, check):
text = tmpl("slack_message.json", check=check)
payload = json.loads(text)
return self.post(self.channel.value, payload)
class HipChat(JsonTransport):
def notify(self, check):
text = tmpl("hipchat_message.html", check=check)
payload = {
"message": text,
"color": "green" if check.status == "up" else "red",
}
return self.post(self.channel.value, payload)
class PagerDuty(JsonTransport):
URL = "https://events.pagerduty.com/generic/2010-04-15/create_event.json"
def notify(self, check):
description = tmpl("pd_description.html", check=check)
payload = {
"service_key": self.channel.value,
"incident_key": str(check.code),
"event_type": "trigger" if check.status == "down" else "resolve",
"description": description,
"client": "healthchecks.io",
"client_url": settings.SITE_ROOT
}
return self.post(self.URL, payload)
class Pushover(JsonTransport):
URL = "https://api.pushover.net/1/messages.json"
def notify(self, check):
others = self.checks().filter(status="down").exclude(code=check.code)
ctx = {
"check": check,
"down_checks": others,
}
text = tmpl("pushover_message.html", **ctx)
title = tmpl("pushover_title.html", **ctx)
user_key, prio = self.channel.value.split("|")
payload = {
"token": settings.PUSHOVER_API_TOKEN,
"user": user_key,
"message": text,
"title": title,
"html": 1,
"priority": int(prio),
}
# Emergency notification
if prio == "2":
payload["retry"] = settings.PUSHOVER_EMERGENCY_RETRY_DELAY
payload["expire"] = settings.PUSHOVER_EMERGENCY_EXPIRATION
return self.post(self.URL, payload)

+ 5
- 0
templates/integrations/pd_description.html View File

@ -0,0 +1,5 @@
{% if check.status == "down" %}
{{ check.name_then_code }} is DOWN
{% else %}
{{ check.name_then_code }} received a ping and is now UP
{% endif %}

+ 5
- 0
templates/integrations/pushover_title.html View File

@ -0,0 +1,5 @@
{% if check.status == "down" %}
{{ check.name_then_code }} is DOWN
{% else %}
{{ check.name_then_code }} is now UP
{% endif %}

Loading…
Cancel
Save