Added merge and undo, brighter colors in ratings, added rss feed link, testing...

Added merge and undo, brighter colors in ratings, added rss feed link, testing can_connect, connection test, improved admin navigation
parent 0cb6147b
......@@ -2,3 +2,5 @@
layout python3
# enable DEBUG mode by default
export DEBUG=1
# during development, just have ipv6 on.
\ No newline at end of file

482 KB | W: | H:


244 KB | W: | H:

  • 2-up
  • Swipe
  • Onion skin
......@@ -78,6 +78,11 @@ def rebuild_ratings():
def rebuild_ratings_async():
"""Remove all organization and url ratings, then rebuild them from scratch."""
def add_organization_rating(organizations: List[Organization], build_history: bool=False, when: datetime=None):
......@@ -113,6 +118,7 @@ def add_url_rating(urls: List[Url], build_history: bool=False, when: datetime=No
def rerate_urls(urls: List[Url]=None):
if not urls:
urls = list(Url.objects.all().filter(is_dead=False).order_by('url'))
......@@ -122,6 +128,18 @@ def rerate_urls(urls: List[Url]=None):
rate_timeline(create_timeline(url), url)
def rerate_urls_async(urls: List[Url]=None):
if not urls:
urls = list(Url.objects.all().filter(is_dead=False).order_by('url'))
# to not have all ratings empty, do it per url
for url in urls:
(create_timeline.s(url) | rate_timeline.s(url)).apply_async()
def rerate_organizations(organizations: List[Organization]=None):
if not organizations:
organizations = list(Organization.objects.all().order_by('name'))
......@@ -219,6 +237,7 @@ def significant_moments(organizations: List[Organization]=None, urls: List[Url]=
return moments, happenings
def create_timeline(url: Url):
Maps happenings to moments.
......@@ -314,6 +333,7 @@ def latest_moment_of_datetime(datetime_: datetime):
return datetime_.replace(hour=23, minute=59, second=59, microsecond=999999, tzinfo=pytz.utc)
def rate_timeline(timeline, url: Url):"Rebuilding ratings for for %s" % url)
......@@ -419,11 +419,11 @@ div#report {
.redrow {
color: darkred;
color: #d30000;
.greenrow {
color: darkgreen;
color: #00c700;
.grayrow {
......@@ -375,31 +375,36 @@
<div class="row">
<div class="col-md-6">
<h3>Versleuteling Updates</h3>
Volg de laatste bevinden met een <a href="/data/feed/tls_qualys" target="_blank">rss feed</a>.
<p>Volg de laatste bevinden via de <a href="/data/feed/tls_qualys" target="_blank">rss feed</a>.</p>
<span id="latest_tls_qualys"></span>
<div class="col-md-6">
<h3>Gebrek aan versleuteling Updates</h3>
<p>Volg de laatste bevinden via de <a href="/data/feed/plain_https" target="_blank">rss feed</a>.</p>
<span id="latest_plain_https"></span>
<div class="row">
<div class="col-md-6">
<h3>Forceren van versleuteling Updates</h3>
<p>Volg de laatste bevinden via de <a href="/data/feed/Strict-Transport-Security" target="_blank">rss feed</a>.</p>
<span id="latest_security_headers_strict_transport_security"></span>
<div class="col-md-6">
<h3>X-Frame-Options Updates</h3>
<p>Volg de laatste bevinden via de <a href="/data/feed/X-Frame-Options" target="_blank">rss feed</a>.</p>
<span id="latest_security_headers_x_frame_options"></span>
<div class="row">
<div class="col-md-6">
<h3>X-Content-Type-Option Updates</h3>
<p>Volg de laatste bevinden via de <a href="/data/feed/X-Content-Type-Options" target="_blank">rss feed</a>.</p>
<span id="latest_security_headers_x_content_type_options"></span>
<div class="col-md-6">
<h3>X-XSS-Protection Updates</h3>
<p>Volg de laatste bevinden via de <a href="/data/feed/X-XSS-Protection" target="_blank">rss feed</a>.</p>
<span id="latest_security_headers_x_xss_protection"></span>
......@@ -4,6 +4,7 @@ from datetime import datetime
import pytz
from django.contrib import admin
from django.urls import reverse
from django.utils.html import format_html
from jet.admin import CompactInline
import failmap_admin.scanners.scanner_http as scanner_http
......@@ -118,10 +119,11 @@ class UrlAdmin(admin.ModelAdmin):
js = ('js/action_buttons.js', )
list_display = ('url', 'endpoints', 'current_rating', 'onboarded', 'uses_dns_wildcard',
'is_dead', 'not_resolvable', 'created_on')
'dead_for', 'unresolvable_for', 'created_on')
search_fields = ('url', )
list_filter = ('url', 'is_dead', 'is_dead_since', 'is_dead_reason',
'not_resolvable', 'uses_dns_wildcard', 'organization')
'not_resolvable', 'not_resolvable_since', 'not_resolvable_reason',
'uses_dns_wildcard', 'organization')
fieldsets = (
(None, {
......@@ -140,7 +142,24 @@ class UrlAdmin(admin.ModelAdmin):
readonly_fields = ['created_on', 'onboarded']
def endpoints(self, obj: Url):
return obj.endpoint_set.count()
return format_html("%s <a href='/admin/scanners/endpoint/?q=%s' target='_blank'>🔍</a>" %
(obj.endpoint_set.count(), obj.url))
def unresolvable_for(self):
if self.not_resolvable_since:
return "%s days" % ( - self.not_resolvable_since).days
return "-"
# todo: further humanize this.
def dead_for(self):
if self.is_dead_since:
return "%s days" % ( - self.is_dead_since).days
return "-"
def current_rating(obj):
This diff is collapsed.
import logging
from import BaseCommand
logger = logging.getLogger(__package__)
class Command(BaseCommand):
help = 'Adds some documentation artifacts to this project.'
def handle(self, *args, **options):
raise NotImplemented
# failmap-admin graph_models organizations scanners map -o myapp_models.png
# it's posisble to also include auth and other installed things by not specifying an app.
import datetime
import logging
from import BaseCommand
from failmap_admin.organizations.models import Url
from failmap_admin.scanners.models import Endpoint, EndpointGenericScan, Screenshot, TlsQualysScan
logger = logging.getLogger(__package__)
class Command(BaseCommand):
Everything that can die / is not resolvable etc for a short while is merged in a certain timespan.
First know that everything in failmap stacks. This is needed to show gaps over time. Consider the following
January 2017: domain exists.
Februari 2017: domain died.
March 2017: domain exists again.
In order to show this historical data (the outage of for a few months), we have an "is_dead" flag on
each url. When the url is discovered later, a new url is added, with new endpoints and such.
Due to bad network connections and other unreliable things, it might be that something is declared dead incorrectly.
For example: something is down a single day and then up again. This might be our fault via coding bugs etc.
This library helps fixing those issues, mainly to speed up rating rebuilding and debugging.
This library will merge everything that is dead for a certain timespan (a week) together. So the in the above case
nothing will hapen. But the following will be merged:
13 january 2017: exists
14 january 2017: dies
15 januaru 2017: exists
Now there are two "" urls. This can be the case, but in such a short timespan it just clutters up
the database with extra records.
help = 'Merges similar things that have been dead for a very short while.'
def handle(self, *args, **options):
def merge_endpoints_that_recently_died():
# with a timespan of a week: if within a week a new similar endpoint was created, merge them into the old one.
# find difference between "is_dead_since" and "discovered_on" for the same url.
for url in Url.objects.all():
# merging can only happen with dead endpoints that have similar endpoints within the timespan
# ordered by oldest first (the "-" syntax is so confusing)
dead_endpoints = Endpoint.objects.all().filter(url=url, is_dead=True).order_by("is_dead_since")
for dead_endpoint in dead_endpoints:
# bugs and manually entering this happen, and then there is still no date. (todo should not be possible)
if not dead_endpoint.is_dead_since:
# similar created within timespan have to be merged. Let's call it an "identical".
# no scanner takes more than a week
# dead on january 14. Means that identical endpoints < january 21 are actually the same.
the_timespan = dead_endpoint.is_dead_since + datetime.timedelta(days=7)
identical_endpoints = Endpoint.objects.all().filter(
discovered_on__gte=dead_endpoint.is_dead_since, # it's newer
discovered_on__lte=the_timespan, # but not too new
if not identical_endpoints:
continue"Found identical endpoints for %s: " % dead_endpoint)[ep for ep in identical_endpoints])
for identical_endpoint in identical_endpoints:
# merge everything that relates to the identical endpoints to the dead_endpoint:
# Copy the state of the enpoint. It goes from oldest to newest. So the latest state is used.
# Only alive endpoints are added, so a big chance that this will be alive.
dead_endpoint.is_dead = identical_endpoint.is_dead
dead_endpoint.is_dead_since = identical_endpoint.is_dead_since
dead_endpoint.is_dead_reason = identical_endpoint.is_dead_reason
# then remove the identical endpoint, and declare the dead_endpoint to be alive again.
def remove_short_deaths():
Remove scans that
raise NotImplemented
import datetime
import logging
import pytz
from import BaseCommand
from failmap_admin.scanners.models import Endpoint
logger = logging.getLogger(__package__)
class Command(BaseCommand):
Undoes certain things that happened recently. This is a specialists tool that is usually a one-shot.
It can fix certain issues that where caused by mass-scanning when for example the network died and as a result
a lot of urls or endpoints died.
As urls, endpoints and organizations stack over time (being dead etc), soem scanenrs will have already created a
new endpoint to replace the one that died accidentally. For this you can use the "merge" command, which is also
a specialists tool that requires reading the manual.
Usually run this script after merge:
failmap_admin merge
failmap_admin undo
help = 'Merges similar things that have been dead for a very short while.'
def handle(self, *args, **options):
# a short warning to help not running this command by accident.
# in a next commit this command should be empty.
answer = input("Do you want to undo all endpoint deaths on IPv6/4 in the last 4 days?")
if answer == "YES":
http_scanner_undo_endpoint_deaths(in_the_last_n_days=4, ip_version=4)
http_scanner_undo_endpoint_deaths(in_the_last_n_days=4, ip_version=6)
def http_scanner_undo_endpoint_deaths(in_the_last_n_days: int=1, ip_version: int=6):
Sets all ipv6 or 4 endpoints to alive that where killed in the past N days.
Run this if you did a scan for ipv6 networks when no ipv6 network was available.
:param in_the_last_n_days: number of days between now and the moment a mistake was made
:param ip_version: 4 or 6
# the endpoint cannot have a "new" endpoint within this timeframe. If so, you should merge.
dead_endpoints = Endpoint.objects.all().filter(
is_dead=True, - datetime.timedelta(days=in_the_last_n_days),
is_dead_reason="Not found in HTTP Scanner anymore."
# can't revive if there is a new endpoint already, those should be merged (as it contains all kinds of related data)
for dead_endpoint in dead_endpoints:
has_similar_alive_endpoints = Endpoint.objects.all().filter(
is_dead=False, # given only one can be alive at any point.
if not has_similar_alive_endpoints:"Undoing death on %s" % dead_endpoint)
dead_endpoint.is_dead = False
dead_endpoint.is_dead_reason = ""
dead_endpoint.is_dead_since = None
else:"Can't undo death on %s as there is a similar alive. Try and merge." % dead_endpoint)
from django.contrib import admin
from django.utils.html import format_html
from jet.admin import CompactInline
from import rate_url
......@@ -43,11 +44,11 @@ class UrlIpAdmin(admin.ModelAdmin):
class EndpointAdmin(admin.ModelAdmin):
list_display = ('id', 'url', 'discovered_on', 'ip_version', 'port', 'protocol', 'is_dead', 'is_dead_since',
list_display = ('id', 'url', 'visit', 'discovered_on', 'ip_version', 'port', 'protocol', 'is_dead', 'is_dead_since',
'tls_scans', 'generic_scans')
search_fields = ('url__url', 'ip_version', 'port', 'protocol', 'is_dead',
'is_dead_since', 'is_dead_reason')
list_filter = ('ip_version', 'port', 'protocol', 'is_dead')
list_filter = ('ip_version', 'port', 'protocol', 'is_dead', 'is_dead_reason')
fieldsets = (
(None, {
'fields': ('url', 'ip_version', 'protocol', 'port', 'discovered_on')
......@@ -67,6 +68,11 @@ class EndpointAdmin(admin.ModelAdmin):
def generic_scans(inst):
return EndpointGenericScan.objects.filter(
def visit(inst):
url = "%s://%s:%s/" % (inst.protocol, inst.url.url, inst.port)
return format_html("<a href='%s' target='_blank'>Visit</a>" % url)
inlines = [TlsQualysScanAdminInline, EndpointGenericScanInline]
save_as = True # Save as new is nice for duplicating endpoints.
......@@ -98,12 +104,19 @@ class TlsQualysScanAdmin(admin.ModelAdmin):
'last_scan_moment', 'rating_determined_on')
search_fields = ('endpoint__url__url', 'qualys_rating', 'qualys_rating_no_trust',
'scan_date', 'rating_determined_on')
list_filter = ('endpoint', 'qualys_rating', 'qualys_rating_no_trust',
# listing all endpoints takes ages
list_filter = ('qualys_rating', 'qualys_rating_no_trust',
'scan_date', 'rating_determined_on', 'qualys_message')
# loading related fields in django jet is not done in a smart way: everything is prefetched.
# and when there are > 10000 objects of some sort, the system becomes insanely slow.
# Should make it an autocomplete field... or something else.
# therefore endpoint is set as a readonly_field.
fields = ('endpoint', 'qualys_rating', 'qualys_rating_no_trust',
'rating_determined_on', 'last_scan_moment')
readonly_fields = ('scan_date', 'scan_time', 'last_scan_moment')
readonly_fields = ('scan_date', 'scan_time', 'last_scan_moment', 'endpoint')
actions = ['rate_url', 'scan_url']
......@@ -157,12 +170,13 @@ class EndpointGenericScanAdmin(admin.ModelAdmin):
'explanation', 'last_scan_moment', 'rating_determined_on')
search_fields = ('endpoint__url__url', 'type', 'domain', 'rating',
'explanation', 'last_scan_moment', 'rating_determined_on')
list_filter = ('endpoint', 'type', 'domain', 'rating',
list_filter = ('type', 'domain', 'rating',
'explanation', 'last_scan_moment', 'rating_determined_on')
fields = ('endpoint', 'type', 'domain', 'rating',
'explanation', 'last_scan_moment', 'rating_determined_on')
readonly_fields = ['last_scan_moment']
# see tlsqualysscan why endpoint is here.
readonly_fields = ['last_scan_moment', 'endpoint']
class EndpointGenericScanScratchpadAdmin(admin.ModelAdmin):
......@@ -2,8 +2,8 @@ import logging
from import BaseCommand
from import (add_organization_rating, create_timeline, rerate_urls,
from import (add_organization_rating, create_timeline,
rerate_organizations, rerate_urls, show_timeline_console)
from failmap_admin.organizations.models import Organization, Url
from failmap_admin.scanners.models import Endpoint
from failmap_admin.scanners.scanner_security_headers import scan as scan_headers
......@@ -15,8 +15,8 @@ class Command(BaseCommand):
help = 'Development command'
def handle(self, *args, **options):
from failmap_admin.scanners.scanner_http import get_ips
# as a task
# reset_onboard()
# rebuild_ratings()
......@@ -127,9 +127,9 @@ def develop_determineratings():
# pyflakes when = datetime(2016, 12, 31, 0, 0, tzinfo=pytz.utc)
# when =
organization = Organization.objects.filter(name="Ameland").get()
organization = Organization.objects.filter(name="Zederik").get()
# rerate_urls(Url.objects.all().filter(organization=organization))
# ratings are always different since we now also save last scan date.
# only creates things for near midnight. Should check if today, and then save for now.
# add_organization_rating(organization, create_history=True)
import logging
from import BaseCommand
from failmap_admin.scanners.scanner_http import test_network
logger = logging.getLogger(__package__)
class Command(BaseCommand):
help = 'Try to establish ipv4 and ipv6 connections to test the network, on both a worker and locally.'
def handle(self, *args, **options):
# on a worker
task = test_network.s(code_location="worker")
# locally
......@@ -25,12 +25,15 @@ Likely: 80, 8080, 8008, 8888, 8088
import logging
import random
import socket
from datetime import datetime
from typing import List
import pytz
import requests
# suppress InsecureRequestWarning, we do those request on purpose.
import urllib3
from django.conf import settings
from requests import ConnectTimeout, HTTPError, ReadTimeout, Timeout
from requests.exceptions import ConnectionError
......@@ -41,6 +44,8 @@ from failmap_admin.scanners.models import Endpoint, UrlIp
from .timeout import timeout
logger = logging.getLogger(__package__)
......@@ -88,6 +93,10 @@ def verify_endpoints(urls: List[Url]=None, port: int=None, protocol: str=None, o
if organizations:
endpoints = endpoints.filter(url__organization__in=organizations)
# randomize the endpoints to better spread load.
endpoints = list(endpoints)
for endpoint in endpoints:
scan_url(endpoint.protocol, endpoint.url, endpoint.port)
......@@ -117,6 +126,10 @@ def discover_endpoints(urls: List[Url]=None, port: int=None, protocol: str=None,
# Don't underestimate the flexibility of the internet.
ports = [80, 443, 8008, 8080, 8088, 8443, 8888]
# randomize the endpoints to better spread load.
urls = list(urls)
scan_urls(protocols, urls, ports)
......@@ -157,11 +170,13 @@ def resolve_and_scan(protocol: str, url: Url, port: int):
store_task = store_url_ips.s(url, ips) # administrative, does reverse dns query
# todo: this should be re-checked a few times before it's really killed. Retry?
if not any(ips):
kill_url_task = kill_url.s(url) # administrative
# this is not a stacking solution. Weird. Why not?
url_revive_task = revive_url.s(url)
......@@ -179,8 +194,6 @@ def resolve_and_scan(protocol: str, url: Url, port: int):
task = (connect_task | result_task)
# v6 is not yet supported, as we don't have v6 workers yet.
def get_ips(url: str):
ipv4 = ""
......@@ -207,8 +220,26 @@ def get_ips(url: str):
return ipv4, ipv6
def can_connect(protocol: str, url: Url, port: int, ip: str):
# When doing a lot of connections, try to do them in semi-random order also not to overload networks/firewalls
# Don't try and overload the network with too many connections.
# The (virtual) network (card) might have a problem keeping up.
# Firewalls might see it as hostile.
# Our database might be overloaded with work,
# To consider the rate limit:
# There are about 11000 endpoints at this moment.
# 3/s = 180/m = 1800/10m = 10800/h
# 4/s = 240/m = 2400/10m = 14400/h
# 5/s = 300/m = 3000/10m = 18000/h
# 10/s = 600/m = 6000/10m = 36000/h
# on the development machine it scans all within 10 minutes. About 20/s.
def can_connect(protocol: str, url: Url, port: int, ip: str) -> bool:
Searches for both IPv4 and IPv6 IP addresses / types.
......@@ -244,7 +275,7 @@ def can_connect(protocol: str, url: Url, port: int, ip: str):
uri = "%s://%s:%s" % (protocol, ip, port)
logger.debug("Scanning http(s) server on: %s" % uri)
logger.debug("Attempting connect on: %s: host: %s" % (uri, url.url))
......@@ -481,3 +512,42 @@ def kill_endpoint(protocol: str, url: Url, port: int, ip_version: int):
ep.is_dead_since =
ep.is_dead_reason = "Not found in HTTP Scanner anymore."
def test_network(code_location=""):
Used to see if a worker can do IPv6. Will trigger an exception when no ipv4 or ipv6 is available,
which is logged in sentry and other logs.
""""Testing network connection via %s." % code_location)
url = Url()
url.url = ""
ips = get_ips(url.url)
can_ipv4, can_ipv6 = False, False
(ipv4, ipv6) = ips
if ipv4:
can_ipv4 = can_connect("https", url, 443, ipv4)
if ipv6:
can_ipv6 = can_connect("https", url, 443, ipv6)
if not can_ipv4 and not can_ipv6:
raise ConnectionError("Both ipv6 and ipv4 networks could not be reached via %s." % code_location)
if not can_ipv4:
raise ConnectionError("Could not reach IPv4 Network via %s." % code_location)
else:"IPv4 could be reached via %s" % code_location)
if not can_ipv6:
raise ConnectionError("Could not reach IPv6 Network via %s." % code_location)
else:"IPv6 could be reached via %s" % code_location)
......@@ -51,3 +51,4 @@ raven
# allow recursive comparison of dictionaries
\ No newline at end of file
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment