- from datetime import timedelta as td
- import time
- from threading import Thread
-
- from django.core.management.base import BaseCommand
- from django.utils import timezone
- from hc.api.models import Check, Flip
- from statsd.defaults.env import statsd
-
- SENDING_TMPL = "Sending alert, status=%s, code=%s\n"
- SEND_TIME_TMPL = "Sending took %.1fs, code=%s\n"
-
-
- def notify(flip_id, stdout):
- flip = Flip.objects.get(id=flip_id)
-
- check = flip.owner
- # Set the historic status here but *don't save it*.
- # It would be nicer to pass the status explicitly, as a separate parameter.
- check.status = flip.new_status
- # And just to make sure it doesn't get saved by a future coding accident:
- setattr(check, "save", None)
-
- stdout.write(SENDING_TMPL % (flip.new_status, check.code))
-
- # Set dates for followup nags
- if flip.new_status == "down":
- check.project.set_next_nag_date()
-
- # Send notifications
- send_start = timezone.now()
- errors = flip.send_alerts()
- for ch, error in errors:
- stdout.write("ERROR: %s %s %s\n" % (ch.kind, ch.value, error))
-
- # If sending took more than 5s, log it
- send_time = timezone.now() - send_start
- if send_time.total_seconds() > 5:
- stdout.write(SEND_TIME_TMPL % (send_time.total_seconds(), check.code))
-
- statsd.timing("hc.sendalerts.dwellTime", send_start - flip.created)
- statsd.timing("hc.sendalerts.sendTime", send_time)
-
-
- def notify_on_thread(flip_id, stdout):
- t = Thread(target=notify, args=(flip_id, stdout))
- t.start()
-
-
- class Command(BaseCommand):
- help = "Sends UP/DOWN email alerts"
-
- def add_arguments(self, parser):
- parser.add_argument(
- "--no-loop",
- action="store_false",
- dest="loop",
- default=True,
- help="Do not keep running indefinitely in a 2 second wait loop",
- )
-
- parser.add_argument(
- "--no-threads",
- action="store_false",
- dest="use_threads",
- default=False,
- help="Send alerts synchronously, without using threads",
- )
-
- def process_one_flip(self, use_threads=True):
- """ Find unprocessed flip, send notifications. """
-
- # Order by processed, otherwise Django will automatically order by id
- # and make the query less efficient
- q = Flip.objects.filter(processed=None).order_by("processed")
- flip = q.first()
- if flip is None:
- return False
-
- q = Flip.objects.filter(id=flip.id, processed=None)
- num_updated = q.update(processed=timezone.now())
- if num_updated != 1:
- # Nothing got updated: another worker process got there first.
- return True
-
- if use_threads:
- notify_on_thread(flip.id, self.stdout)
- else:
- notify(flip.id, self.stdout)
-
- return True
-
- def handle_going_down(self):
- """ Process a single check going down. """
-
- now = timezone.now()
-
- q = Check.objects.filter(alert_after__lt=now).exclude(status="down")
- # Sort by alert_after, to avoid unnecessary sorting by id:
- check = q.order_by("alert_after").first()
- if check is None:
- return False
-
- old_status = check.status
- q = Check.objects.filter(id=check.id, status=old_status)
-
- try:
- status = check.get_status(with_started=False)
- except Exception as e:
- # Make sure we don't trip on this check again for an hour:
- # Otherwise sendalerts may end up in a crash loop.
- q.update(alert_after=now + td(hours=1))
- # Then re-raise the exception:
- raise e
-
- if status != "down":
- # It is not down yet. Update alert_after
- q.update(alert_after=check.going_down_after())
- return True
-
- # Atomically update status
- flip_time = check.going_down_after()
- num_updated = q.update(alert_after=None, status="down")
- if num_updated != 1:
- # Nothing got updated: another worker process got there first.
- return True
-
- flip = Flip(owner=check)
- flip.created = flip_time
- flip.old_status = old_status
- flip.new_status = "down"
- flip.save()
-
- return True
-
- def handle(self, use_threads=True, loop=True, *args, **options):
- self.stdout.write("sendalerts is now running\n")
-
- i, sent = 0, 0
- while True:
- # Create flips for any checks going down
- while self.handle_going_down():
- pass
-
- # Process the unprocessed flips
- while self.process_one_flip(use_threads):
- sent += 1
-
- if not loop:
- break
-
- time.sleep(2)
- i += 1
- if i % 60 == 0:
- timestamp = timezone.now().isoformat()
- self.stdout.write("-- MARK %s --\n" % timestamp)
-
- return "Sent %d alert(s)" % sent
|