You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

744 lines
24 KiB

10 years ago
10 years ago
10 years ago
8 years ago
9 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
6 years ago
7 years ago
6 years ago
  1. # coding: utf-8
  2. import hashlib
  3. import json
  4. import uuid
  5. from datetime import datetime, timedelta as td
  6. from croniter import croniter
  7. from django.conf import settings
  8. from django.db import models
  9. from django.urls import reverse
  10. from django.utils import timezone
  11. from hc.accounts.models import Project
  12. from hc.api import transports
  13. from hc.lib import emails
  14. from hc.lib.date import month_boundaries
  15. import pytz
  16. STATUSES = (("up", "Up"), ("down", "Down"), ("new", "New"), ("paused", "Paused"))
  17. DEFAULT_TIMEOUT = td(days=1)
  18. DEFAULT_GRACE = td(hours=1)
  19. NEVER = datetime(3000, 1, 1, tzinfo=pytz.UTC)
  20. CHECK_KINDS = (("simple", "Simple"), ("cron", "Cron"))
  21. CHANNEL_KINDS = (
  22. ("email", "Email"),
  23. ("webhook", "Webhook"),
  24. ("hipchat", "HipChat"),
  25. ("slack", "Slack"),
  26. ("pd", "PagerDuty"),
  27. ("pagertree", "PagerTree"),
  28. ("pagerteam", "Pager Team"),
  29. ("po", "Pushover"),
  30. ("pushbullet", "Pushbullet"),
  31. ("opsgenie", "OpsGenie"),
  32. ("victorops", "VictorOps"),
  33. ("discord", "Discord"),
  34. ("telegram", "Telegram"),
  35. ("sms", "SMS"),
  36. ("zendesk", "Zendesk"),
  37. ("trello", "Trello"),
  38. ("matrix", "Matrix"),
  39. ("whatsapp", "WhatsApp"),
  40. ("apprise", "Apprise"),
  41. )
  42. PO_PRIORITIES = {-2: "lowest", -1: "low", 0: "normal", 1: "high", 2: "emergency"}
  43. def isostring(dt):
  44. """Convert the datetime to ISO 8601 format with no microseconds. """
  45. if dt:
  46. return dt.replace(microsecond=0).isoformat()
  47. class Check(models.Model):
  48. name = models.CharField(max_length=100, blank=True)
  49. tags = models.CharField(max_length=500, blank=True)
  50. code = models.UUIDField(default=uuid.uuid4, editable=False, unique=True)
  51. desc = models.TextField(blank=True)
  52. project = models.ForeignKey(Project, models.CASCADE)
  53. created = models.DateTimeField(auto_now_add=True)
  54. kind = models.CharField(max_length=10, default="simple", choices=CHECK_KINDS)
  55. timeout = models.DurationField(default=DEFAULT_TIMEOUT)
  56. grace = models.DurationField(default=DEFAULT_GRACE)
  57. schedule = models.CharField(max_length=100, default="* * * * *")
  58. tz = models.CharField(max_length=36, default="UTC")
  59. subject = models.CharField(max_length=100, blank=True)
  60. n_pings = models.IntegerField(default=0)
  61. last_ping = models.DateTimeField(null=True, blank=True)
  62. last_start = models.DateTimeField(null=True, blank=True)
  63. last_ping_was_fail = models.NullBooleanField(default=False)
  64. has_confirmation_link = models.BooleanField(default=False)
  65. alert_after = models.DateTimeField(null=True, blank=True, editable=False)
  66. status = models.CharField(max_length=6, choices=STATUSES, default="new")
  67. class Meta:
  68. indexes = [
  69. # Index for the alert_after field. Excludes rows with status=down.
  70. # Used in the sendalerts management command.
  71. models.Index(
  72. fields=["alert_after"],
  73. name="api_check_aa_not_down",
  74. condition=~models.Q(status="down"),
  75. )
  76. ]
  77. def __str__(self):
  78. return "%s (%d)" % (self.name or self.code, self.id)
  79. def name_then_code(self):
  80. if self.name:
  81. return self.name
  82. return str(self.code)
  83. def url(self):
  84. return settings.PING_ENDPOINT + str(self.code)
  85. def details_url(self):
  86. return settings.SITE_ROOT + reverse("hc-details", args=[self.code])
  87. def email(self):
  88. return "%s@%s" % (self.code, settings.PING_EMAIL_DOMAIN)
  89. def get_grace_start(self):
  90. """ Return the datetime when the grace period starts.
  91. If the check is currently new, paused or down, return None.
  92. """
  93. # NEVER is a constant sentinel value (year 3000).
  94. # Using None instead would make the logic clunky.
  95. result = NEVER
  96. if self.kind == "simple" and self.status == "up":
  97. result = self.last_ping + self.timeout
  98. elif self.kind == "cron" and self.status == "up":
  99. # The complex case, next ping is expected based on cron schedule.
  100. # Don't convert to naive datetimes (and so avoid ambiguities around
  101. # DST transitions). Croniter will handle the timezone-aware datetimes.
  102. zone = pytz.timezone(self.tz)
  103. last_local = timezone.localtime(self.last_ping, zone)
  104. it = croniter(self.schedule, last_local)
  105. result = it.next(datetime)
  106. if self.last_start and self.status != "down":
  107. result = min(result, self.last_start)
  108. if result != NEVER:
  109. return result
  110. def going_down_after(self):
  111. """ Return the datetime when the check goes down.
  112. If the check is new or paused, and not currently running, return None.
  113. If the check is already down, also return None.
  114. """
  115. grace_start = self.get_grace_start()
  116. if grace_start is not None:
  117. return grace_start + self.grace
  118. def get_status(self, now=None, with_started=True):
  119. """ Return current status for display. """
  120. if now is None:
  121. now = timezone.now()
  122. if self.last_start:
  123. if now >= self.last_start + self.grace:
  124. return "down"
  125. elif with_started:
  126. return "started"
  127. if self.status in ("new", "paused", "down"):
  128. return self.status
  129. grace_start = self.get_grace_start()
  130. grace_end = grace_start + self.grace
  131. if now >= grace_end:
  132. return "down"
  133. if now >= grace_start:
  134. return "grace"
  135. return "up"
  136. def assign_all_channels(self):
  137. channels = Channel.objects.filter(project=self.project)
  138. self.channel_set.set(channels)
  139. def tags_list(self):
  140. return [t.strip() for t in self.tags.split(" ") if t.strip()]
  141. def matches_tag_set(self, tag_set):
  142. return tag_set.issubset(self.tags_list())
  143. def channels_str(self):
  144. """ Return a comma-separated string of assigned channel codes. """
  145. codes = self.channel_set.order_by("code").values_list("code", flat=True)
  146. return ",".join(map(str, codes))
  147. def to_dict(self, readonly=False):
  148. result = {
  149. "name": self.name,
  150. "tags": self.tags,
  151. "desc": self.desc,
  152. "grace": int(self.grace.total_seconds()),
  153. "n_pings": self.n_pings,
  154. "status": self.get_status(),
  155. "last_ping": isostring(self.last_ping),
  156. "next_ping": isostring(self.get_grace_start()),
  157. }
  158. if readonly:
  159. code_half = self.code.hex[:16]
  160. result["unique_key"] = hashlib.sha1(code_half.encode()).hexdigest()
  161. else:
  162. update_rel_url = reverse("hc-api-update", args=[self.code])
  163. pause_rel_url = reverse("hc-api-pause", args=[self.code])
  164. result["ping_url"] = self.url()
  165. result["update_url"] = settings.SITE_ROOT + update_rel_url
  166. result["pause_url"] = settings.SITE_ROOT + pause_rel_url
  167. result["channels"] = self.channels_str()
  168. if self.kind == "simple":
  169. result["timeout"] = int(self.timeout.total_seconds())
  170. elif self.kind == "cron":
  171. result["schedule"] = self.schedule
  172. result["tz"] = self.tz
  173. return result
  174. def ping(self, remote_addr, scheme, method, ua, body, action):
  175. if action == "start":
  176. self.last_start = timezone.now()
  177. # Don't update "last_ping" field.
  178. elif action == "ign":
  179. pass
  180. else:
  181. self.last_start = None
  182. self.last_ping = timezone.now()
  183. new_status = "down" if action == "fail" else "up"
  184. if self.status != new_status:
  185. flip = Flip(owner=self)
  186. flip.created = self.last_ping
  187. flip.old_status = self.status
  188. flip.new_status = new_status
  189. flip.save()
  190. self.status = new_status
  191. self.alert_after = self.going_down_after()
  192. self.n_pings = models.F("n_pings") + 1
  193. self.has_confirmation_link = "confirm" in str(body).lower()
  194. self.save()
  195. self.refresh_from_db()
  196. ping = Ping(owner=self)
  197. ping.n = self.n_pings
  198. if action in ("start", "fail", "ign"):
  199. ping.kind = action
  200. ping.remote_addr = remote_addr
  201. ping.scheme = scheme
  202. ping.method = method
  203. # If User-Agent is longer than 200 characters, truncate it:
  204. ping.ua = ua[:200]
  205. ping.body = body[:10000]
  206. ping.save()
  207. def downtimes(self, months=2):
  208. """ Calculate the number of downtimes and downtime minutes per month.
  209. Returns a list of (datetime, downtime_in_secs, number_of_outages) tuples.
  210. """
  211. def monthkey(dt):
  212. return dt.year, dt.month
  213. # Datetimes of the first days of months we're interested in. Ascending order.
  214. boundaries = month_boundaries(months=months)
  215. # Will accumulate totals here.
  216. # (year, month) -> [datetime, total_downtime, number_of_outages]
  217. totals = {monthkey(b): [b, td(), 0] for b in boundaries}
  218. # A list of flips and month boundaries
  219. events = [(b, "---") for b in boundaries]
  220. q = self.flip_set.filter(created__gt=min(boundaries))
  221. for pair in q.values_list("created", "old_status"):
  222. events.append(pair)
  223. # Iterate through flips and month boundaries in reverse order,
  224. # and for each "down" event increase the counters in `totals`.
  225. dt, status = timezone.now(), self.status
  226. for prev_dt, prev_status in sorted(events, reverse=True):
  227. if status == "down":
  228. delta = dt - prev_dt
  229. totals[monthkey(prev_dt)][1] += delta
  230. totals[monthkey(prev_dt)][2] += 1
  231. dt = prev_dt
  232. if prev_status != "---":
  233. status = prev_status
  234. return sorted(totals.values())
  235. class Ping(models.Model):
  236. id = models.BigAutoField(primary_key=True)
  237. n = models.IntegerField(null=True)
  238. owner = models.ForeignKey(Check, models.CASCADE)
  239. created = models.DateTimeField(auto_now_add=True)
  240. kind = models.CharField(max_length=6, blank=True, null=True)
  241. scheme = models.CharField(max_length=10, default="http")
  242. remote_addr = models.GenericIPAddressField(blank=True, null=True)
  243. method = models.CharField(max_length=10, blank=True)
  244. ua = models.CharField(max_length=200, blank=True)
  245. body = models.CharField(max_length=10000, blank=True, null=True)
  246. class Channel(models.Model):
  247. name = models.CharField(max_length=100, blank=True)
  248. code = models.UUIDField(default=uuid.uuid4, editable=False, unique=True)
  249. project = models.ForeignKey(Project, models.CASCADE)
  250. created = models.DateTimeField(auto_now_add=True)
  251. kind = models.CharField(max_length=20, choices=CHANNEL_KINDS)
  252. value = models.TextField(blank=True)
  253. email_verified = models.BooleanField(default=False)
  254. checks = models.ManyToManyField(Check)
  255. def __str__(self):
  256. if self.name:
  257. return self.name
  258. if self.kind == "email":
  259. return "Email to %s" % self.email_value
  260. elif self.kind == "sms":
  261. return "SMS to %s" % self.sms_number
  262. elif self.kind == "slack":
  263. return "Slack %s" % self.slack_channel
  264. elif self.kind == "telegram":
  265. return "Telegram %s" % self.telegram_name
  266. return self.get_kind_display()
  267. def to_dict(self):
  268. return {"id": str(self.code), "name": self.name, "kind": self.kind}
  269. def assign_all_checks(self):
  270. checks = Check.objects.filter(project=self.project)
  271. self.checks.add(*checks)
  272. def make_token(self):
  273. seed = "%s%s" % (self.code, settings.SECRET_KEY)
  274. seed = seed.encode()
  275. return hashlib.sha1(seed).hexdigest()
  276. def send_verify_link(self):
  277. args = [self.code, self.make_token()]
  278. verify_link = reverse("hc-verify-email", args=args)
  279. verify_link = settings.SITE_ROOT + verify_link
  280. emails.verify_email(self.email_value, {"verify_link": verify_link})
  281. def get_unsub_link(self):
  282. args = [self.code, self.make_token()]
  283. verify_link = reverse("hc-unsubscribe-alerts", args=args)
  284. return settings.SITE_ROOT + verify_link
  285. @property
  286. def transport(self):
  287. if self.kind == "email":
  288. return transports.Email(self)
  289. elif self.kind == "webhook":
  290. return transports.Webhook(self)
  291. elif self.kind == "slack":
  292. return transports.Slack(self)
  293. elif self.kind == "hipchat":
  294. return transports.HipChat(self)
  295. elif self.kind == "pd":
  296. return transports.PagerDuty(self)
  297. elif self.kind == "pagertree":
  298. return transports.PagerTree(self)
  299. elif self.kind == "pagerteam":
  300. return transports.PagerTeam(self)
  301. elif self.kind == "victorops":
  302. return transports.VictorOps(self)
  303. elif self.kind == "pushbullet":
  304. return transports.Pushbullet(self)
  305. elif self.kind == "po":
  306. return transports.Pushover(self)
  307. elif self.kind == "opsgenie":
  308. return transports.OpsGenie(self)
  309. elif self.kind == "discord":
  310. return transports.Discord(self)
  311. elif self.kind == "telegram":
  312. return transports.Telegram(self)
  313. elif self.kind == "sms":
  314. return transports.Sms(self)
  315. elif self.kind == "trello":
  316. return transports.Trello(self)
  317. elif self.kind == "matrix":
  318. return transports.Matrix(self)
  319. elif self.kind == "whatsapp":
  320. return transports.WhatsApp(self)
  321. elif self.kind == "apprise":
  322. return transports.Apprise(self)
  323. else:
  324. raise NotImplementedError("Unknown channel kind: %s" % self.kind)
  325. def notify(self, check):
  326. if self.transport.is_noop(check):
  327. return "no-op"
  328. n = Notification(owner=check, channel=self)
  329. n.check_status = check.status
  330. n.error = "Sending"
  331. n.save()
  332. if self.kind == "email":
  333. error = self.transport.notify(check, n.bounce_url()) or ""
  334. else:
  335. error = self.transport.notify(check) or ""
  336. n.error = error
  337. n.save()
  338. return error
  339. def icon_path(self):
  340. return "img/integrations/%s.png" % self.kind
  341. @property
  342. def po_priority(self):
  343. assert self.kind == "po"
  344. parts = self.value.split("|")
  345. prio = int(parts[1])
  346. return PO_PRIORITIES[prio]
  347. def webhook_spec(self, status):
  348. assert self.kind == "webhook"
  349. if not self.value.startswith("{"):
  350. parts = self.value.split("\n")
  351. url_down = parts[0]
  352. url_up = parts[1] if len(parts) > 1 else ""
  353. post_data = parts[2] if len(parts) > 2 else ""
  354. return {
  355. "method": "POST" if post_data else "GET",
  356. "url": url_down if status == "down" else url_up,
  357. "body": post_data,
  358. "headers": {},
  359. }
  360. doc = json.loads(self.value)
  361. if "post_data" in doc:
  362. # Legacy "post_data" in doc -- use the legacy fields
  363. return {
  364. "method": "POST" if doc["post_data"] else "GET",
  365. "url": doc["url_down"] if status == "down" else doc["url_up"],
  366. "body": doc["post_data"],
  367. "headers": doc["headers"],
  368. }
  369. if status == "down" and "method_down" in doc:
  370. return {
  371. "method": doc["method_down"],
  372. "url": doc["url_down"],
  373. "body": doc["body_down"],
  374. "headers": doc["headers_down"],
  375. }
  376. elif status == "up" and "method_up" in doc:
  377. return {
  378. "method": doc["method_up"],
  379. "url": doc["url_up"],
  380. "body": doc["body_up"],
  381. "headers": doc["headers_up"],
  382. }
  383. @property
  384. def down_webhook_spec(self):
  385. return self.webhook_spec("down")
  386. @property
  387. def up_webhook_spec(self):
  388. return self.webhook_spec("up")
  389. @property
  390. def url_down(self):
  391. return self.down_webhook_spec["url"]
  392. @property
  393. def url_up(self):
  394. return self.up_webhook_spec["url"]
  395. @property
  396. def slack_team(self):
  397. assert self.kind == "slack"
  398. if not self.value.startswith("{"):
  399. return None
  400. doc = json.loads(self.value)
  401. return doc["team_name"]
  402. @property
  403. def slack_channel(self):
  404. assert self.kind == "slack"
  405. if not self.value.startswith("{"):
  406. return None
  407. doc = json.loads(self.value)
  408. return doc["incoming_webhook"]["channel"]
  409. @property
  410. def slack_webhook_url(self):
  411. assert self.kind == "slack"
  412. if not self.value.startswith("{"):
  413. return self.value
  414. doc = json.loads(self.value)
  415. return doc["incoming_webhook"]["url"]
  416. @property
  417. def discord_webhook_url(self):
  418. assert self.kind == "discord"
  419. doc = json.loads(self.value)
  420. return doc["webhook"]["url"]
  421. @property
  422. def discord_webhook_id(self):
  423. assert self.kind == "discord"
  424. doc = json.loads(self.value)
  425. return doc["webhook"]["id"]
  426. @property
  427. def telegram_id(self):
  428. assert self.kind == "telegram"
  429. doc = json.loads(self.value)
  430. return doc.get("id")
  431. @property
  432. def telegram_type(self):
  433. assert self.kind == "telegram"
  434. doc = json.loads(self.value)
  435. return doc.get("type")
  436. @property
  437. def telegram_name(self):
  438. assert self.kind == "telegram"
  439. doc = json.loads(self.value)
  440. return doc.get("name")
  441. @property
  442. def pd_service_key(self):
  443. assert self.kind == "pd"
  444. if not self.value.startswith("{"):
  445. return self.value
  446. doc = json.loads(self.value)
  447. return doc["service_key"]
  448. @property
  449. def pd_account(self):
  450. assert self.kind == "pd"
  451. if self.value.startswith("{"):
  452. doc = json.loads(self.value)
  453. return doc["account"]
  454. def latest_notification(self):
  455. return Notification.objects.filter(channel=self).latest()
  456. @property
  457. def sms_number(self):
  458. assert self.kind in ("sms", "whatsapp")
  459. if self.value.startswith("{"):
  460. doc = json.loads(self.value)
  461. return doc["value"]
  462. return self.value
  463. @property
  464. def sms_label(self):
  465. assert self.kind == "sms"
  466. if self.value.startswith("{"):
  467. doc = json.loads(self.value)
  468. return doc["label"]
  469. @property
  470. def trello_token(self):
  471. assert self.kind == "trello"
  472. if self.value.startswith("{"):
  473. doc = json.loads(self.value)
  474. return doc["token"]
  475. @property
  476. def trello_board_list(self):
  477. assert self.kind == "trello"
  478. if self.value.startswith("{"):
  479. doc = json.loads(self.value)
  480. return doc["board_name"], doc["list_name"]
  481. @property
  482. def trello_list_id(self):
  483. assert self.kind == "trello"
  484. if self.value.startswith("{"):
  485. doc = json.loads(self.value)
  486. return doc["list_id"]
  487. @property
  488. def email_value(self):
  489. assert self.kind == "email"
  490. if not self.value.startswith("{"):
  491. return self.value
  492. doc = json.loads(self.value)
  493. return doc.get("value")
  494. @property
  495. def email_notify_up(self):
  496. assert self.kind == "email"
  497. if not self.value.startswith("{"):
  498. return True
  499. doc = json.loads(self.value)
  500. return doc.get("up")
  501. @property
  502. def email_notify_down(self):
  503. assert self.kind == "email"
  504. if not self.value.startswith("{"):
  505. return True
  506. doc = json.loads(self.value)
  507. return doc.get("down")
  508. @property
  509. def whatsapp_notify_up(self):
  510. assert self.kind == "whatsapp"
  511. doc = json.loads(self.value)
  512. return doc["up"]
  513. @property
  514. def whatsapp_notify_down(self):
  515. assert self.kind == "whatsapp"
  516. doc = json.loads(self.value)
  517. return doc["down"]
  518. class Notification(models.Model):
  519. class Meta:
  520. get_latest_by = "created"
  521. code = models.UUIDField(default=uuid.uuid4, null=True, editable=False)
  522. owner = models.ForeignKey(Check, models.CASCADE)
  523. check_status = models.CharField(max_length=6)
  524. channel = models.ForeignKey(Channel, models.CASCADE)
  525. created = models.DateTimeField(auto_now_add=True)
  526. error = models.CharField(max_length=200, blank=True)
  527. def bounce_url(self):
  528. return settings.SITE_ROOT + reverse("hc-api-bounce", args=[self.code])
  529. class Flip(models.Model):
  530. owner = models.ForeignKey(Check, models.CASCADE)
  531. created = models.DateTimeField()
  532. processed = models.DateTimeField(null=True, blank=True)
  533. old_status = models.CharField(max_length=8, choices=STATUSES)
  534. new_status = models.CharField(max_length=8, choices=STATUSES)
  535. class Meta:
  536. indexes = [
  537. # For quickly looking up unprocessed flips.
  538. # Used in the sendalerts management command.
  539. models.Index(
  540. fields=["processed"],
  541. name="api_flip_not_processed",
  542. condition=models.Q(processed=None),
  543. )
  544. ]
  545. def send_alerts(self):
  546. if self.new_status == "up" and self.old_status in ("new", "paused"):
  547. # Don't send alerts on new->up and paused->up transitions
  548. return []
  549. if self.new_status not in ("up", "down"):
  550. raise NotImplementedError("Unexpected status: %s" % self.status)
  551. errors = []
  552. for channel in self.owner.channel_set.all():
  553. error = channel.notify(self.owner)
  554. if error not in ("", "no-op"):
  555. errors.append((channel, error))
  556. return errors
  557. class TokenBucket(models.Model):
  558. value = models.CharField(max_length=80, unique=True)
  559. tokens = models.FloatField(default=1.0)
  560. updated = models.DateTimeField(default=timezone.now)
  561. @staticmethod
  562. def authorize(value, capacity, refill_time_secs):
  563. now = timezone.now()
  564. obj, created = TokenBucket.objects.get_or_create(value=value)
  565. if not created:
  566. # Top up the bucket:
  567. delta_secs = (now - obj.updated).total_seconds()
  568. obj.tokens = min(1.0, obj.tokens + delta_secs / refill_time_secs)
  569. obj.tokens -= 1.0 / capacity
  570. if obj.tokens < 0:
  571. # Not enough tokens
  572. return False
  573. # Race condition: two concurrent authorize calls can overwrite each
  574. # other's changes. It's OK to be a little inexact here for the sake
  575. # of simplicity.
  576. obj.updated = now
  577. obj.save()
  578. return True
  579. @staticmethod
  580. def authorize_login_email(email):
  581. # remove dots and alias:
  582. mailbox, domain = email.split("@")
  583. mailbox = mailbox.replace(".", "")
  584. mailbox = mailbox.split("+")[0]
  585. email = mailbox + "@" + domain
  586. salted_encoded = (email + settings.SECRET_KEY).encode()
  587. value = "em-%s" % hashlib.sha1(salted_encoded).hexdigest()
  588. # 20 login attempts for a single email per hour:
  589. return TokenBucket.authorize(value, 20, 3600)
  590. @staticmethod
  591. def authorize_invite(user):
  592. value = "invite-%d" % user.id
  593. # 20 invites per day
  594. return TokenBucket.authorize(value, 20, 3600 * 24)
  595. @staticmethod
  596. def authorize_login_password(email):
  597. salted_encoded = (email + settings.SECRET_KEY).encode()
  598. value = "pw-%s" % hashlib.sha1(salted_encoded).hexdigest()
  599. # 20 password attempts per day
  600. return TokenBucket.authorize(value, 20, 3600 * 24)