Check.n_pings now stores the total number of pings the check has ever received. Running "prunepings" command doesn't affect this field. +a new "prunepingsslow" command which works in smaller chunks so is appropriate for initial pruning of a huge api_ping table.pull/27/head
@ -1,13 +1,70 @@ | |||
""" | |||
Populate api_check.n_pings and api_ping.n fields. | |||
- api_ping.n stores ping's serial number, counted separately for | |||
each check. For example, if a particular check has received 100 pings, | |||
its first ping will have a n=1, and the 100th ping will have a n=100. | |||
- api_check.n_pings stores the last serial number assigned to a ping. | |||
It also is the total number of pings the check has ever received. | |||
This command works by "replaying" stored pings in their primary | |||
key order, and counting up their serial numbers. At the very end, | |||
api_check.n_pings fields are updated as well. | |||
Depending on the size of api_ping table, this command can potentially | |||
take a long time to complete. | |||
Note on ping pruning: when the prunepings command is run, some of the | |||
pings with the lowest serial numbers get removed. This doesn't affect | |||
the "n" field for remaining pings, or the "n_pings" value of checks. | |||
The serial numbers keep going up. | |||
""" | |||
import gc | |||
from collections import Counter | |||
from django.core.management.base import BaseCommand | |||
from django.db import connection, transaction | |||
from hc.api.models import Check, Ping | |||
class Command(BaseCommand): | |||
help = 'Fill check.n_pings field' | |||
help = 'Fill check.n_pings field and ping.n field' | |||
def handle(self, *args, **options): | |||
for check in Check.objects.all(): | |||
check.n_pings = Ping.objects.filter(owner=check).count() | |||
check.save(update_fields=("n_pings", )) | |||
connection.use_debug_cursor = False | |||
chunksize = 2000 | |||
# Reset all n_pings fields to zero | |||
Check.objects.update(n_pings=0) | |||
counts = Counter() | |||
pk = 0 | |||
last_pk = Ping.objects.order_by('-pk')[0].pk | |||
queryset = Ping.objects.order_by('pk') | |||
transaction.set_autocommit(False) | |||
while pk < last_pk: | |||
for ping in queryset.filter(pk__gt=pk)[:chunksize]: | |||
pk = ping.pk | |||
counts[ping.owner_id] += 1 | |||
ping.n = counts[ping.owner_id] | |||
ping.save(update_fields=("n", )) | |||
gc.collect() | |||
progress = 100 * pk / last_pk | |||
self.stdout.write("Processed ping id %d (%.2f%%)" % (pk, progress)) | |||
transaction.commit() | |||
transaction.set_autocommit(True) | |||
self.stdout.write("Updating check.n_pings") | |||
for check_id, n_pings in counts.items(): | |||
Check.objects.filter(pk=check_id).update(n_pings=n_pings) | |||
return "Done!" |
@ -0,0 +1,35 @@ | |||
from django.db.models import F | |||
from django.contrib.auth.models import User | |||
from django.core.management.base import BaseCommand | |||
from hc.accounts.models import Profile | |||
from hc.api.models import Check, Ping | |||
class Command(BaseCommand): | |||
help = """Prune pings based on limits in user profiles. | |||
This command prunes each check individually. So it does the work | |||
in small chunks instead of a few big SQL queries like the `prunepings` | |||
command. It is appropriate for initial pruning of the potentially | |||
huge api_ping table. | |||
""" | |||
def handle(self, *args, **options): | |||
# Create any missing user profiles | |||
for user in User.objects.filter(profile=None): | |||
Profile.objects.for_user(user) | |||
checks = Check.objects.annotate( | |||
limit=F("user__profile__ping_log_limit")) | |||
for check in checks: | |||
q = Ping.objects.filter(owner_id=check.id) | |||
q = q.filter(n__lt=check.n_pings - check.limit) | |||
q = q.filter(n__gt=0) | |||
n_pruned, _ = q.delete() | |||
self.stdout.write("Pruned %d pings for check %s (%s)" % | |||
(n_pruned, check.id, check.name)) | |||
return "Done!" |
@ -0,0 +1,20 @@ | |||
# -*- coding: utf-8 -*- | |||
# Generated by Django 1.9 on 2016-01-03 09:26 | |||
from __future__ import unicode_literals | |||
from django.db import migrations, models | |||
class Migration(migrations.Migration): | |||
dependencies = [ | |||
('api', '0020_check_n_pings'), | |||
] | |||
operations = [ | |||
migrations.AddField( | |||
model_name='ping', | |||
name='n', | |||
field=models.IntegerField(null=True), | |||
), | |||
] |
@ -1,21 +0,0 @@ | |||
from django.test import TestCase | |||
from hc.api.models import Check, Ping | |||
class CheckModelTestCase(TestCase): | |||
def test_prune_pings(self): | |||
check = Check() | |||
check.save() | |||
for i in range(0, 6): | |||
p = Ping(owner=check, ua="UA%d" % i) | |||
p.save() | |||
check.prune_pings(keep_limit=3) | |||
self.assertEqual(check.n_pings, 3) | |||
ua_set = set(Ping.objects.values_list("ua", flat=True)) | |||
# UA0, UA1, UA2 should have been pruned-- | |||
self.assertEqual(ua_set, set(["UA3", "UA4", "UA5"])) |