stats now faster, less code, easier to understand

parent c9d2e434
......@@ -260,13 +260,15 @@ def rate_timeline(timeline, url: Url):
previous_ratings = {}
previous_endpoints = []
url_was_once_rated = False
# work on a sorted timeline as otherwise this code is non-deterministic!
for moment in sorted(timeline):
scores = []
given_ratings = {}
if 'url_not_resolvable' in timeline[moment].keys() or 'url_is_dead' in timeline[moment].keys():
if ('url_not_resolvable' in timeline[moment].keys() or 'url_is_dead' in timeline[moment].keys()) \
and url_was_once_rated:
logger.debug('Url became non-resolvable or dead. Adding an empty rating to lower the score of'
'this domain if it had a score. It has been cleaned up. (hooray)')
# this is the end for the domain.
......@@ -311,6 +313,7 @@ def rate_timeline(timeline, url: Url):
"tls_qualys", "plain_https"]
for endpoint in relevant_endpoints:
url_was_once_rated = True
calculations = []
these_scans = {}
......@@ -387,7 +390,8 @@ def rate_timeline(timeline, url: Url):
previous_endpoints += relevant_endpoints
# prevent empty ratings cluttering the database and skewing the stats.
if not endpoint_calculations:
# todo: only do this if there never was a urlrating before this.
if not endpoint_calculations and not url_was_once_rated:
sorted_endpoints = sorted(endpoint_calculations, key=lambda k: k['points'], reverse=True)
......@@ -492,7 +496,7 @@ def rate_organization_on_moment(organization: Organization, when: datetime=None)
total_rating = 0
# todo: closing off urls, after no relevant endpoints, but still resolvable.
# todo: closing off urls, after no relevant endpoints, but still resolvable. Done.
urls = relevant_urls_at_timepoint(organizations=[organization], when=when)
all_url_ratings = []
......@@ -87,6 +87,9 @@ def metrics_per_url(url):
So if an endpoint was in a previous rating, but not in this one, the endpoint died (or there where no relevant
metrics for it to store it in the url rating).
todo: dead endpoints are not removed from url_ratings. is dead at some point, but still stored
as a rating. :( Or a new rating is not written somehow.
The best choice is to have the smallest granularity for ratings: these are ratings on an endpoint per day.
Url ratings are stored using deduplication. This saves several gigabytes of data.
......@@ -130,8 +133,8 @@ def metrics_per_url(url):
yesterdays_relevant_rating = relevant_rating
if 'endpoints' not in relevant_rating.calculation.keys():"No endpoints in this calculation. Url died or became not resolvable. "
"No metrics needed anymore :).")
logger.debug("No endpoints in this calculation. Url died, became non-resolvable or it's endpoints died."
"No metrics needed anymore for this one.")
return []
for endpoint in relevant_rating.calculation['endpoints']:
......@@ -139,6 +142,7 @@ def metrics_per_url(url):
for organization in relevant_rating.url.organization.all():
if 'low' not in rating.keys():
# When an url still is resolvable, but all endpoints themselves don't exist anymore.
#"No (low) rating in this endpoint. Is it a repeated finding? Those should "
# "have been all gone by now. What went wrong? %s" % endpoint)
......@@ -2,8 +2,8 @@ import logging
from import BaseCommand
from import (add_organization_rating, create_timeline, rebuild_ratings,
rerate_urls, show_timeline_console)
from import (add_organization_rating, create_timeline, rerate_urls,
from failmap_admin.organizations.models import Organization, Url
from failmap_admin.scanners.models import Endpoint
from failmap_admin.scanners.scanner_security_headers import scan as scan_headers
......@@ -15,9 +15,10 @@ class Command(BaseCommand):
help = 'Development command'
def handle(self, *args, **options):
# rebuild_ratings()
# develop_timeline()
# develop_determineratings()
# Command.test_sslscan_real()
# Command.test_determine_grade()
# Command.develop_sslscan()
......@@ -114,10 +115,11 @@ def develop_determineratings():
# pyflakes when = datetime(2016, 12, 31, 0, 0, tzinfo=pytz.utc)
# when =
organization = Organization.objects.filter(name="Ameland").get()
organization = Organization.objects.filter(name="Arnhem").get()
# ratings are always different since we now also save last scan date.
# only creates things for near midnight. Should check if today, and then save for now.
add_organization_rating(organization, create_history=True)
# add_organization_rating(organization, create_history=True)
# create one for NOW, not this night. This is a bug :)
# add_organization_rating(organization)
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment