diff --git a/CHANGELOG.md b/CHANGELOG.md index 10bc9bd7..0766c062 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # Changelog All notable changes to this project will be documented in this file. +## v1.24.0 - Unreleased + +### Improvements +- Switch from croniter to cronsim (vendored in hc.lib.cronsim) + ## v1.23.1 - 2021-10-13 ### Bug Fixes diff --git a/hc/api/models.py b/hc/api/models.py index ccc9ccb5..35089146 100644 --- a/hc/api/models.py +++ b/hc/api/models.py @@ -6,7 +6,6 @@ import time import uuid from datetime import datetime, timedelta as td -from croniter import croniter from django.conf import settings from django.core.signing import TimestampSigner from django.db import models @@ -16,6 +15,7 @@ from django.utils.text import slugify from hc.accounts.models import Project from hc.api import transports from hc.lib import emails +from hc.lib.cronsim import CronSim from hc.lib.date import month_boundaries import pytz @@ -166,12 +166,11 @@ class Check(models.Model): elif self.kind == "cron" and self.status == "up": # The complex case, next ping is expected based on cron schedule. # Don't convert to naive datetimes (and so avoid ambiguities around - # DST transitions). Croniter will handle the timezone-aware datetimes. + # DST transitions). cronsim will handle the timezone-aware datetimes. zone = pytz.timezone(self.tz) last_local = timezone.localtime(self.last_ping, zone) - it = croniter(self.schedule, last_local) - result = it.next(datetime) + result = next(CronSim(self.schedule, last_local)) if with_started and self.last_start and self.status != "down": result = min(result, self.last_start) diff --git a/hc/front/validators.py b/hc/front/validators.py index 6acb6b44..3c3be110 100644 --- a/hc/front/validators.py +++ b/hc/front/validators.py @@ -1,5 +1,7 @@ -from croniter import croniter +from datetime import datetime + from django.core.exceptions import ValidationError +from hc.lib.cronsim import CronSim from urllib.parse import urlparse from pytz import all_timezones @@ -25,10 +27,10 @@ class CronExpressionValidator(object): raise ValidationError(message=self.message) try: - # Does croniter accept the schedule? - it = croniter(value) + # Does cronsim accept the schedule? + it = CronSim(value, datetime(2000, 1, 1)) # Can it calculate the next datetime? - it.next() + next(it) except: raise ValidationError(message=self.message) diff --git a/hc/front/views.py b/hc/front/views.py index b9957f25..322d985a 100644 --- a/hc/front/views.py +++ b/hc/front/views.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta as td +from datetime import timedelta as td import email import json import os @@ -7,7 +7,6 @@ from secrets import token_urlsafe from urllib.parse import urlencode from cron_descriptor import ExpressionDescriptor -from croniter import croniter from django.conf import settings from django.contrib import messages from django.contrib.auth.decorators import login_required @@ -50,6 +49,7 @@ from hc.front.templatetags.hc_extras import ( ) from hc.lib import jsonschema from hc.lib.badges import get_badge_url +from hc.lib.cronsim import CronSim import pytz from pytz.exceptions import UnknownTimeZoneError import requests @@ -499,9 +499,9 @@ def cron_preview(request): if len(schedule.split()) != 5: raise ValueError() - it = croniter(schedule, now_local) + it = CronSim(schedule, now_local) for i in range(0, 6): - ctx["dates"].append(it.get_next(datetime)) + ctx["dates"].append(next(it)) except UnknownTimeZoneError: ctx["bad_tz"] = True @@ -513,7 +513,7 @@ def cron_preview(request): descriptor = ExpressionDescriptor(schedule, use_24hour_time_format=True) ctx["desc"] = descriptor.get_description() except: - # We assume the schedule is valid if croniter accepts it. + # We assume the schedule is valid if cronsim accepts it. # If cron-descriptor throws an exception, don't show the description # to the user. pass diff --git a/hc/lib/cronsim.py b/hc/lib/cronsim.py new file mode 100644 index 00000000..91e32a41 --- /dev/null +++ b/hc/lib/cronsim.py @@ -0,0 +1,291 @@ +import calendar +from datetime import datetime, timedelta as td, time +from enum import IntEnum + +import pytz + +RANGES = [ + frozenset(range(0, 60)), + frozenset(range(0, 24)), + frozenset(range(1, 32)), + frozenset(range(1, 13)), + frozenset(range(0, 8)), + frozenset(range(0, 60)), +] + +SYMBOLIC_DAYS = "SUN MON TUE WED THU FRI SAT".split() +SYMBOLIC_MONTHS = "JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC".split() +DAYS_IN_MONTH = [None, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + + +class CronSimError(Exception): + pass + + +def _int(value): + if not value.isdigit(): + raise CronSimError("Bad value: %s" % value) + + return int(value) + + +class Field(IntEnum): + MINUTE = 0 + HOUR = 1 + DAY = 2 + MONTH = 3 + DOW = 4 + + def int(self, s): + if self == Field.MONTH and s.upper() in SYMBOLIC_MONTHS: + return SYMBOLIC_MONTHS.index(s.upper()) + 1 + + if self == Field.DOW and s.upper() in SYMBOLIC_DAYS: + return SYMBOLIC_DAYS.index(s.upper()) + + v = _int(s) + if v not in RANGES[self]: + raise CronSimError("Bad value: %s" % s) + + return v + + def parse(self, s): + if s == "*": + return RANGES[self] + + if "," in s: + result = set() + for term in s.split(","): + result.update(self.parse(term)) + return result + + if "#" in s and self == Field.DOW: + term, nth = s.split("#", maxsplit=1) + nth = _int(nth) + if nth < 1 or nth > 5: + raise CronSimError("Bad value: %s" % s) + + spec = (self.int(term), nth) + return {spec} + + if "/" in s: + term, step = s.split("/", maxsplit=1) + step = _int(step) + if step == 0: + raise CronSimError("Step cannot be zero") + + items = sorted(self.parse(term)) + if items == [CronSim.LAST]: + return items + + if len(items) == 1: + start = items[0] + end = max(RANGES[self]) + items = range(start, end + 1) + return set(items[::step]) + + if "-" in s: + start, end = s.split("-", maxsplit=1) + start = self.int(start) + end = self.int(end) + + if end < start: + raise CronSimError("Range end cannot be smaller than start") + + return set(range(start, end + 1)) + + if self == Field.DAY and s in ("L", "l"): + return {CronSim.LAST} + + return {self.int(s)} + + +class NoTz(object): + def localize(self, dt, is_dst=None): + return dt + + def normalize(self, dt): + return dt + + +class CronSim(object): + LAST = -1000 + + def __init__(self, expr, dt): + self.tz = dt.tzinfo or NoTz() + self.fixup_tz = None + self.dt = dt.replace(second=0, microsecond=0) + + parts = expr.split() + if len(parts) != 5: + raise CronSimError("Wrong number of fields") + + self.minutes = Field.MINUTE.parse(parts[0]) + self.hours = Field.HOUR.parse(parts[1]) + self.days = Field.DAY.parse(parts[2]) + self.months = Field.MONTH.parse(parts[3]) + self.weekdays = Field.DOW.parse(parts[4]) + + # If day is unrestricted but dow is restricted then match only with dow: + if self.days == RANGES[Field.DAY] and self.weekdays != RANGES[Field.DOW]: + self.days = set() + + # If dow is unrestricted but day is restricted then match only with day: + if self.weekdays == RANGES[Field.DOW] and self.days != RANGES[Field.DAY]: + self.weekdays = set() + + if len(self.days) and min(self.days) > 29: + # Check if we have any month with enough days + if min(self.days) > max(DAYS_IN_MONTH[month] for month in self.months): + raise CronSimError("Bad day-of-month") + + if self.dt.tzinfo in (None, pytz.utc): + # No special DST handling for naive datetimes or UTC + pass + else: + # Special handling for jobs that run at specific time, or with + # a granularity greater than one hour (to mimic Debian cron). + # Convert to naive datetime, will convert back to the tz-aware + # in __next__, right before returning the value. + if not parts[0].startswith("*") and not parts[1].startswith("*"): + self.fixup_tz, self.tz = self.tz, NoTz() + self.dt = self.dt.replace(tzinfo=None) + + def tick(self, minutes=1): + """ Roll self.dt forward by 1 or more minutes and fix timezone. """ + + self.dt = self.tz.normalize(self.dt + td(minutes=minutes)) + + def advance_minute(self): + """Roll forward the minute component until it satisfies the constraints. + + Return False if the minute meets contraints without modification. + Return True if self.dt was rolled forward. + + """ + + if self.dt.minute in self.minutes: + return False + + if len(self.minutes) == 1: + # An optimization for the special case where self.minutes has exactly + # one element. Instead of advancing one minute per iteration, + # make a jump from the current minute to the target minute. + delta = (next(iter(self.minutes)) - self.dt.minute) % 60 + self.tick(minutes=delta) + + while self.dt.minute not in self.minutes: + self.tick() + if self.dt.minute == 0: + # Break out to re-check month, day and hour + break + + return True + + def advance_hour(self): + """Roll forward the hour component until it satisfies the constraints. + + Return False if the hour meets contraints without modification. + Return True if self.dt was rolled forward. + + """ + + if self.dt.hour in self.hours: + return False + + self.dt = self.dt.replace(minute=0) + while self.dt.hour not in self.hours: + self.tick(minutes=60) + if self.dt.hour == 0: + # break out to re-check month and day + break + + return True + + def match_day(self, d): + # Does the day of the month match? + if d.day in self.days: + return True + + if CronSim.LAST in self.days: + _, last = calendar.monthrange(d.year, d.month) + if d.day == last: + return True + + # Does the day of the week match? + dow = d.weekday() + 1 + if dow in self.weekdays or dow % 7 in self.weekdays: + return True + + idx = (d.day + 6) // 7 + if (dow, idx) in self.weekdays or (dow % 7, idx) in self.weekdays: + return True + + def advance_day(self): + """Roll forward the day component until it satisfies the constraints. + + This method advances the date until it matches either the + day-of-month, or the day-of-week constraint. + + Return False if the day meets contraints without modification. + Return True if self.dt was rolled forward. + + """ + + needle = self.dt.date() + if self.match_day(needle): + return False + + while not self.match_day(needle): + needle += td(days=1) + if needle.day == 1: + # We're in a different month now, break out to re-check month + # This significantly speeds up the "0 0 * 2 MON#5" case + break + + self.dt = self.tz.localize(datetime.combine(needle, time())) + return True + + def advance_month(self): + """Roll forward the month component until it satisfies the constraints. """ + + if self.dt.month in self.months: + return + + needle = self.dt.date() + while needle.month not in self.months: + needle = (needle.replace(day=1) + td(days=32)).replace(day=1) + + self.dt = self.tz.localize(datetime.combine(needle, time())) + + def __iter__(self): + return self + + def __next__(self): + self.tick() + + while True: + self.advance_month() + + if self.advance_day(): + continue + + if self.advance_hour(): + continue + + if self.advance_minute(): + continue + + # If all constraints are satisfied then we have the result. + # The last step is to see if self.fixup_dst is set. If it is, + # localize self.dt and handle conflicts. + if self.fixup_tz: + while True: + try: + return self.fixup_tz.localize(self.dt, is_dst=None) + except pytz.AmbiguousTimeError: + return self.fixup_tz.localize(self.dt, is_dst=True) + except pytz.NonExistentTimeError: + self.dt += td(minutes=1) + + return self.dt diff --git a/hc/lib/jsonschema.py b/hc/lib/jsonschema.py index 4e562b47..6577631c 100644 --- a/hc/lib/jsonschema.py +++ b/hc/lib/jsonschema.py @@ -4,7 +4,9 @@ Supports only a tiny subset of jsonschema. """ -from croniter import croniter +from datetime import datetime + +from hc.lib.cronsim import CronSim from pytz import all_timezones @@ -26,10 +28,10 @@ def validate(obj, schema, obj_name="value"): if len(obj.split()) != 5: raise ValueError() - # Does croniter accept the schedule? - it = croniter(obj) + # Does cronsim accept the schedule? + it = CronSim(obj, datetime(2000, 1, 1)) # Can it calculate the next datetime? - it.next() + next(it) except: raise ValidationError("%s is not a valid cron expression" % obj_name) if schema.get("format") == "timezone" and obj not in all_timezones: diff --git a/requirements.txt b/requirements.txt index d5e4c149..47bb0300 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ cron-descriptor==1.2.24 -croniter==1.0.8 Django==3.2.8 django-compressor==2.4 fido2==0.9.1 diff --git a/templates/front/docs_cron.html b/templates/front/docs_cron.html index 90676976..8bb04e3c 100644 --- a/templates/front/docs_cron.html +++ b/templates/front/docs_cron.html @@ -18,7 +18,7 @@
{{ site_name }} understands most of the traditional cron syntax features. Under the hood, it uses the - croniter package + cronsim package to parse and interpret cron expressions. Below is a showcase of supported syntax features.