Added merge and undo, brighter colors in ratings, added rss feed link, testing...

Added merge and undo, brighter colors in ratings, added rss feed link, testing can_connect, connection test, improved admin navigation
parent 0cb6147b
......@@ -2,3 +2,5 @@
layout python3
# enable DEBUG mode by default
export DEBUG=1
# during development, just have ipv6 on.
export NETWORK_SUPPORTS_IPV6=1
\ No newline at end of file
docs/failmap_models.png

482 KB | W: | H:

docs/failmap_models.png

244 KB | W: | H:

docs/failmap_models.png
docs/failmap_models.png
docs/failmap_models.png
docs/failmap_models.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -78,6 +78,11 @@ def rebuild_ratings():
rerate_organizations()
def rebuild_ratings_async():
"""Remove all organization and url ratings, then rebuild them from scratch."""
rerate_organizations()
@app.task
def add_organization_rating(organizations: List[Organization], build_history: bool=False, when: datetime=None):
"""
......@@ -113,6 +118,7 @@ def add_url_rating(urls: List[Url], build_history: bool=False, when: datetime=No
@app.task
def rerate_urls(urls: List[Url]=None):
if not urls:
urls = list(Url.objects.all().filter(is_dead=False).order_by('url'))
......@@ -122,6 +128,18 @@ def rerate_urls(urls: List[Url]=None):
rate_timeline(create_timeline(url), url)
@app.task
def rerate_urls_async(urls: List[Url]=None):
if not urls:
urls = list(Url.objects.all().filter(is_dead=False).order_by('url'))
# to not have all ratings empty, do it per url
for url in urls:
UrlRating.objects.all().filter(url=url).delete()
(create_timeline.s(url) | rate_timeline.s(url)).apply_async()
def rerate_organizations(organizations: List[Organization]=None):
if not organizations:
organizations = list(Organization.objects.all().order_by('name'))
......@@ -219,6 +237,7 @@ def significant_moments(organizations: List[Organization]=None, urls: List[Url]=
return moments, happenings
@app.task
def create_timeline(url: Url):
"""
Maps happenings to moments.
......@@ -314,6 +333,7 @@ def latest_moment_of_datetime(datetime_: datetime):
return datetime_.replace(hour=23, minute=59, second=59, microsecond=999999, tzinfo=pytz.utc)
@app.task()
def rate_timeline(timeline, url: Url):
logger.info("Rebuilding ratings for for %s" % url)
......
......@@ -419,11 +419,11 @@ div#report {
}
.redrow {
color: darkred;
color: #d30000;
}
.greenrow {
color: darkgreen;
color: #00c700;
}
.grayrow {
......
......@@ -375,31 +375,36 @@
<div class="row">
<div class="col-md-6">
<h3>Versleuteling Updates</h3>
Volg de laatste bevinden met een <a href="/data/feed/tls_qualys" target="_blank">rss feed</a>.
<p>Volg de laatste bevinden via de <a href="/data/feed/tls_qualys" target="_blank">rss feed</a>.</p>
<span id="latest_tls_qualys"></span>
</div>
<div class="col-md-6">
<h3>Gebrek aan versleuteling Updates</h3>
<p>Volg de laatste bevinden via de <a href="/data/feed/plain_https" target="_blank">rss feed</a>.</p>
<span id="latest_plain_https"></span>
</div>
</div>
<div class="row">
<div class="col-md-6">
<h3>Forceren van versleuteling Updates</h3>
<p>Volg de laatste bevinden via de <a href="/data/feed/Strict-Transport-Security" target="_blank">rss feed</a>.</p>
<span id="latest_security_headers_strict_transport_security"></span>
</div>
<div class="col-md-6">
<h3>X-Frame-Options Updates</h3>
<p>Volg de laatste bevinden via de <a href="/data/feed/X-Frame-Options" target="_blank">rss feed</a>.</p>
<span id="latest_security_headers_x_frame_options"></span>
</div>
</div>
<div class="row">
<div class="col-md-6">
<h3>X-Content-Type-Option Updates</h3>
<p>Volg de laatste bevinden via de <a href="/data/feed/X-Content-Type-Options" target="_blank">rss feed</a>.</p>
<span id="latest_security_headers_x_content_type_options"></span>
</div>
<div class="col-md-6">
<h3>X-XSS-Protection Updates</h3>
<p>Volg de laatste bevinden via de <a href="/data/feed/X-XSS-Protection" target="_blank">rss feed</a>.</p>
<span id="latest_security_headers_x_xss_protection"></span>
</div>
</div>
......
......@@ -4,6 +4,7 @@ from datetime import datetime
import pytz
from django.contrib import admin
from django.urls import reverse
from django.utils.html import format_html
from jet.admin import CompactInline
import failmap_admin.scanners.scanner_http as scanner_http
......@@ -118,10 +119,11 @@ class UrlAdmin(admin.ModelAdmin):
js = ('js/action_buttons.js', )
list_display = ('url', 'endpoints', 'current_rating', 'onboarded', 'uses_dns_wildcard',
'is_dead', 'not_resolvable', 'created_on')
'dead_for', 'unresolvable_for', 'created_on')
search_fields = ('url', )
list_filter = ('url', 'is_dead', 'is_dead_since', 'is_dead_reason',
'not_resolvable', 'uses_dns_wildcard', 'organization')
'not_resolvable', 'not_resolvable_since', 'not_resolvable_reason',
'uses_dns_wildcard', 'organization')
fieldsets = (
(None, {
......@@ -140,7 +142,24 @@ class UrlAdmin(admin.ModelAdmin):
readonly_fields = ['created_on', 'onboarded']
def endpoints(self, obj: Url):
return obj.endpoint_set.count()
return format_html("%s <a href='/admin/scanners/endpoint/?q=%s' target='_blank'>🔍</a>" %
(obj.endpoint_set.count(), obj.url))
@staticmethod
def unresolvable_for(self):
if self.not_resolvable_since:
return "%s days" % (datetime.now(pytz.utc) - self.not_resolvable_since).days
else:
return "-"
# todo: further humanize this.
@staticmethod
def dead_for(self):
if self.is_dead_since:
return "%s days" % (datetime.now(pytz.utc) - self.is_dead_since).days
else:
return "-"
@staticmethod
def current_rating(obj):
......
This diff is collapsed.
import logging
from django.core.management.base import BaseCommand
logger = logging.getLogger(__package__)
class Command(BaseCommand):
help = 'Adds some documentation artifacts to this project.'
def handle(self, *args, **options):
raise NotImplemented
# failmap-admin graph_models organizations scanners map -o myapp_models.png
# it's posisble to also include auth and other installed things by not specifying an app.
import datetime
import logging
from django.core.management.base import BaseCommand
from failmap_admin.organizations.models import Url
from failmap_admin.scanners.models import Endpoint, EndpointGenericScan, Screenshot, TlsQualysScan
logger = logging.getLogger(__package__)
class Command(BaseCommand):
"""
Everything that can die / is not resolvable etc for a short while is merged in a certain timespan.
First know that everything in failmap stacks. This is needed to show gaps over time. Consider the following
timespan:
January 2017: amsterdam.nl domain exists.
Februari 2017: amsterdam.nl domain died.
March 2017: amsterdam.nl domain exists again.
In order to show this historical data (the outage of amsterdam.nl for a few months), we have an "is_dead" flag on
each url. When the url is discovered later, a new url is added, with new endpoints and such.
Due to bad network connections and other unreliable things, it might be that something is declared dead incorrectly.
For example: something is down a single day and then up again. This might be our fault via coding bugs etc.
This library helps fixing those issues, mainly to speed up rating rebuilding and debugging.
This library will merge everything that is dead for a certain timespan (a week) together. So the in the above case
nothing will hapen. But the following will be merged:
13 january 2017: my.amsterdam.nl exists
14 january 2017: my.amsterdam.nl dies
15 januaru 2017: my.amsterdam.nl exists
Now there are two "my.amsterdam.nl" urls. This can be the case, but in such a short timespan it just clutters up
the database with extra records.
"""
help = 'Merges similar things that have been dead for a very short while.'
def handle(self, *args, **options):
merge_endpoints_that_recently_died()
def merge_endpoints_that_recently_died():
# with a timespan of a week: if within a week a new similar endpoint was created, merge them into the old one.
# find difference between "is_dead_since" and "discovered_on" for the same url.
for url in Url.objects.all():
# merging can only happen with dead endpoints that have similar endpoints within the timespan
# ordered by oldest first (the "-" syntax is so confusing)
dead_endpoints = Endpoint.objects.all().filter(url=url, is_dead=True).order_by("is_dead_since")
for dead_endpoint in dead_endpoints:
# bugs and manually entering this happen, and then there is still no date. (todo should not be possible)
if not dead_endpoint.is_dead_since:
continue
# similar created within timespan have to be merged. Let's call it an "identical".
# no scanner takes more than a week
# dead on january 14. Means that identical endpoints < january 21 are actually the same.
the_timespan = dead_endpoint.is_dead_since + datetime.timedelta(days=7)
identical_endpoints = Endpoint.objects.all().filter(
url=url,
ip_version=dead_endpoint.ip_version,
port=dead_endpoint.port,
protocol=dead_endpoint.protocol,
discovered_on__gte=dead_endpoint.is_dead_since, # it's newer
discovered_on__lte=the_timespan, # but not too new
).order_by("discovered_on")
if not identical_endpoints:
continue
logger.info("Found identical endpoints for %s: " % dead_endpoint)
logger.info([ep for ep in identical_endpoints])
for identical_endpoint in identical_endpoints:
# merge everything that relates to the identical endpoints to the dead_endpoint:
EndpointGenericScan.objects.all().filter(endpoint=identical_endpoint).update(endpoint=dead_endpoint)
TlsQualysScan.objects.all().filter(endpoint=identical_endpoint).update(endpoint=dead_endpoint)
Screenshot.objects.all().filter(endpoint=identical_endpoint).update(endpoint=dead_endpoint)
# Copy the state of the enpoint. It goes from oldest to newest. So the latest state is used.
# Only alive endpoints are added, so a big chance that this will be alive.
dead_endpoint.is_dead = identical_endpoint.is_dead
dead_endpoint.is_dead_since = identical_endpoint.is_dead_since
dead_endpoint.is_dead_reason = identical_endpoint.is_dead_reason
dead_endpoint.save()
# then remove the identical endpoint, and declare the dead_endpoint to be alive again.
identical_endpoint.delete()
def remove_short_deaths():
"""
Remove scans that
:return:
"""
raise NotImplemented
import datetime
import logging
import pytz
from django.core.management.base import BaseCommand
from failmap_admin.scanners.models import Endpoint
logger = logging.getLogger(__package__)
class Command(BaseCommand):
"""
Undoes certain things that happened recently. This is a specialists tool that is usually a one-shot.
It can fix certain issues that where caused by mass-scanning when for example the network died and as a result
a lot of urls or endpoints died.
As urls, endpoints and organizations stack over time (being dead etc), soem scanenrs will have already created a
new endpoint to replace the one that died accidentally. For this you can use the "merge" command, which is also
a specialists tool that requires reading the manual.
Usually run this script after merge:
failmap_admin merge
failmap_admin undo
"""
help = 'Merges similar things that have been dead for a very short while.'
def handle(self, *args, **options):
# a short warning to help not running this command by accident.
# in a next commit this command should be empty.
answer = input("Do you want to undo all endpoint deaths on IPv6/4 in the last 4 days?")
if answer == "YES":
http_scanner_undo_endpoint_deaths(in_the_last_n_days=4, ip_version=4)
http_scanner_undo_endpoint_deaths(in_the_last_n_days=4, ip_version=6)
def http_scanner_undo_endpoint_deaths(in_the_last_n_days: int=1, ip_version: int=6):
"""
Sets all ipv6 or 4 endpoints to alive that where killed in the past N days.
Run this if you did a scan for ipv6 networks when no ipv6 network was available.
:param in_the_last_n_days: number of days between now and the moment a mistake was made
:param ip_version: 4 or 6
:return:
"""
# the endpoint cannot have a "new" endpoint within this timeframe. If so, you should merge.
dead_endpoints = Endpoint.objects.all().filter(
is_dead=True,
is_dead_since__gte=datetime.datetime.now(pytz.utc) - datetime.timedelta(days=in_the_last_n_days),
ip_version=ip_version,
is_dead_reason="Not found in HTTP Scanner anymore."
)
# can't revive if there is a new endpoint already, those should be merged (as it contains all kinds of related data)
for dead_endpoint in dead_endpoints:
has_similar_alive_endpoints = Endpoint.objects.all().filter(
is_dead=False, # given only one can be alive at any point.
ip_version=dead_endpoint.ip_version,
protocol=dead_endpoint.protocol,
port=dead_endpoint.port,
url=dead_endpoint.url
)
if not has_similar_alive_endpoints:
logger.info("Undoing death on %s" % dead_endpoint)
dead_endpoint.is_dead = False
dead_endpoint.is_dead_reason = ""
dead_endpoint.is_dead_since = None
dead_endpoint.save()
else:
logger.info("Can't undo death on %s as there is a similar alive. Try and merge." % dead_endpoint)
from django.contrib import admin
from django.utils.html import format_html
from jet.admin import CompactInline
from failmap_admin.map.rating import rate_url
......@@ -43,11 +44,11 @@ class UrlIpAdmin(admin.ModelAdmin):
class EndpointAdmin(admin.ModelAdmin):
list_display = ('id', 'url', 'discovered_on', 'ip_version', 'port', 'protocol', 'is_dead', 'is_dead_since',
list_display = ('id', 'url', 'visit', 'discovered_on', 'ip_version', 'port', 'protocol', 'is_dead', 'is_dead_since',
'tls_scans', 'generic_scans')
search_fields = ('url__url', 'ip_version', 'port', 'protocol', 'is_dead',
'is_dead_since', 'is_dead_reason')
list_filter = ('ip_version', 'port', 'protocol', 'is_dead')
list_filter = ('ip_version', 'port', 'protocol', 'is_dead', 'is_dead_reason')
fieldsets = (
(None, {
'fields': ('url', 'ip_version', 'protocol', 'port', 'discovered_on')
......@@ -67,6 +68,11 @@ class EndpointAdmin(admin.ModelAdmin):
def generic_scans(inst):
return EndpointGenericScan.objects.filter(endpoint_id=inst.id).count()
@staticmethod
def visit(inst):
url = "%s://%s:%s/" % (inst.protocol, inst.url.url, inst.port)
return format_html("<a href='%s' target='_blank'>Visit</a>" % url)
inlines = [TlsQualysScanAdminInline, EndpointGenericScanInline]
save_as = True # Save as new is nice for duplicating endpoints.
......@@ -98,12 +104,19 @@ class TlsQualysScanAdmin(admin.ModelAdmin):
'last_scan_moment', 'rating_determined_on')
search_fields = ('endpoint__url__url', 'qualys_rating', 'qualys_rating_no_trust',
'scan_date', 'rating_determined_on')
list_filter = ('endpoint', 'qualys_rating', 'qualys_rating_no_trust',
# listing all endpoints takes ages
list_filter = ('qualys_rating', 'qualys_rating_no_trust',
'scan_date', 'rating_determined_on', 'qualys_message')
# loading related fields in django jet is not done in a smart way: everything is prefetched.
# and when there are > 10000 objects of some sort, the system becomes insanely slow.
# Should make it an autocomplete field... or something else.
# therefore endpoint is set as a readonly_field.
fields = ('endpoint', 'qualys_rating', 'qualys_rating_no_trust',
'rating_determined_on', 'last_scan_moment')
readonly_fields = ('scan_date', 'scan_time', 'last_scan_moment')
readonly_fields = ('scan_date', 'scan_time', 'last_scan_moment', 'endpoint')
actions = ['rate_url', 'scan_url']
......@@ -157,12 +170,13 @@ class EndpointGenericScanAdmin(admin.ModelAdmin):
'explanation', 'last_scan_moment', 'rating_determined_on')
search_fields = ('endpoint__url__url', 'type', 'domain', 'rating',
'explanation', 'last_scan_moment', 'rating_determined_on')
list_filter = ('endpoint', 'type', 'domain', 'rating',
list_filter = ('type', 'domain', 'rating',
'explanation', 'last_scan_moment', 'rating_determined_on')
fields = ('endpoint', 'type', 'domain', 'rating',
'explanation', 'last_scan_moment', 'rating_determined_on')
readonly_fields = ['last_scan_moment']
# see tlsqualysscan why endpoint is here.
readonly_fields = ['last_scan_moment', 'endpoint']
class EndpointGenericScanScratchpadAdmin(admin.ModelAdmin):
......
......@@ -2,8 +2,8 @@ import logging
from django.core.management.base import BaseCommand
from failmap_admin.map.rating import (add_organization_rating, create_timeline, rerate_urls,
show_timeline_console)
from failmap_admin.map.rating import (add_organization_rating, create_timeline,
rerate_organizations, rerate_urls, show_timeline_console)
from failmap_admin.organizations.models import Organization, Url
from failmap_admin.scanners.models import Endpoint
from failmap_admin.scanners.scanner_security_headers import scan as scan_headers
......@@ -15,8 +15,8 @@ class Command(BaseCommand):
help = 'Development command'
def handle(self, *args, **options):
from failmap_admin.scanners.scanner_http import get_ips
print(get_ips("arnhem.nl"))
# as a task
develop_determineratings()
# reset_onboard()
# rebuild_ratings()
......@@ -127,9 +127,9 @@ def develop_determineratings():
# pyflakes when = datetime(2016, 12, 31, 0, 0, tzinfo=pytz.utc)
# when = datetime.now(pytz.utc)
organization = Organization.objects.filter(name="Ameland").get()
rerate_urls(Url.objects.all().filter(organization=organization))
organization = Organization.objects.filter(name="Zederik").get()
# rerate_urls(Url.objects.all().filter(organization=organization))
rerate_organizations(organizations=[organization])
# ratings are always different since we now also save last scan date.
# only creates things for near midnight. Should check if today, and then save for now.
# add_organization_rating(organization, create_history=True)
......
import logging
from django.core.management.base import BaseCommand
from failmap_admin.scanners.scanner_http import test_network
logger = logging.getLogger(__package__)
class Command(BaseCommand):
help = 'Try to establish ipv4 and ipv6 connections to test the network, on both a worker and locally.'
def handle(self, *args, **options):
# on a worker
task = test_network.s(code_location="worker")
task.apply_async()
# locally
test_network(code_location="local")
......@@ -25,12 +25,15 @@ Likely: 80, 8080, 8008, 8888, 8088
"""
import logging
import random
import socket
from datetime import datetime
from typing import List
import pytz
import requests
# suppress InsecureRequestWarning, we do those request on purpose.
import urllib3
from django.conf import settings
from requests import ConnectTimeout, HTTPError, ReadTimeout, Timeout
from requests.exceptions import ConnectionError
......@@ -41,6 +44,8 @@ from failmap_admin.scanners.models import Endpoint, UrlIp
from .timeout import timeout
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
logger = logging.getLogger(__package__)
......@@ -88,6 +93,10 @@ def verify_endpoints(urls: List[Url]=None, port: int=None, protocol: str=None, o
if organizations:
endpoints = endpoints.filter(url__organization__in=organizations)
# randomize the endpoints to better spread load.
endpoints = list(endpoints)
random.shuffle(endpoints)
for endpoint in endpoints:
scan_url(endpoint.protocol, endpoint.url, endpoint.port)
......@@ -117,6 +126,10 @@ def discover_endpoints(urls: List[Url]=None, port: int=None, protocol: str=None,
# Don't underestimate the flexibility of the internet.
ports = [80, 443, 8008, 8080, 8088, 8443, 8888]
# randomize the endpoints to better spread load.
urls = list(urls)
random.shuffle(urls)
scan_urls(protocols, urls, ports)
......@@ -157,11 +170,13 @@ def resolve_and_scan(protocol: str, url: Url, port: int):
store_task = store_url_ips.s(url, ips) # administrative, does reverse dns query
store_task.apply_async()
# todo: this should be re-checked a few times before it's really killed. Retry?
if not any(ips):
kill_url_task = kill_url.s(url) # administrative
kill_url_task.apply_async()
return
# this is not a stacking solution. Weird. Why not?
url_revive_task = revive_url.s(url)
url_revive_task.apply_async()
......@@ -179,8 +194,6 @@ def resolve_and_scan(protocol: str, url: Url, port: int):
task = (connect_task | result_task)
task.apply_async()
# v6 is not yet supported, as we don't have v6 workers yet.
def get_ips(url: str):
ipv4 = ""
......@@ -207,8 +220,26 @@ def get_ips(url: str):
return ipv4, ipv6
@app.task
def can_connect(protocol: str, url: Url, port: int, ip: str):
@app.task(
# When doing a lot of connections, try to do them in semi-random order also not to overload networks/firewalls
# Don't try and overload the network with too many connections.
# The (virtual) network (card) might have a problem keeping up.
# Firewalls might see it as hostile.
# Our database might be overloaded with work,
# To consider the rate limit:
# There are about 11000 endpoints at this moment.
# 3/s = 180/m = 1800/10m = 10800/h
# 4/s = 240/m = 2400/10m = 14400/h
# 5/s = 300/m = 3000/10m = 18000/h
# 10/s = 600/m = 6000/10m = 36000/h
# on the development machine it scans all within 10 minutes. About 20/s.
rate_limit='3/s',
)
def can_connect(protocol: str, url: Url, port: int, ip: str) -> bool:
"""
Searches for both IPv4 and IPv6 IP addresses / types.
......@@ -244,7 +275,7 @@ def can_connect(protocol: str, url: Url, port: int, ip: str):
else:
uri = "%s://%s:%s" % (protocol, ip, port)
logger.debug("Scanning http(s) server on: %s" % uri)
logger.debug("Attempting connect on: %s: host: %s" % (uri, url.url))
try:
"""
......@@ -481,3 +512,42 @@ def kill_endpoint(protocol: str, url: Url, port: int, ip_version: int):
ep.is_dead_since = datetime.now(pytz.utc)
ep.is_dead_reason = "Not found in HTTP Scanner anymore."
ep.save()
@app.task()
def test_network(code_location=""):
"""
Used to see if a worker can do IPv6. Will trigger an exception when no ipv4 or ipv6 is available,
which is logged in sentry and other logs.
:return:
"""
logger.info("Testing network connection via %s." % code_location)
url = Url()
url.url = "faalkaart.nl"
ips = get_ips(url.url)
can_ipv4, can_ipv6 = False, False
(ipv4, ipv6) = ips
if ipv4:
can_ipv4 = can_connect("https", url, 443, ipv4)
if ipv6:
can_ipv6 = can_connect("https", url, 443, ipv6)
if not can_ipv4 and not can_ipv6:
raise ConnectionError("Both ipv6 and ipv4 networks could not be reached via %s." % code_location)
if not can_ipv4:
raise ConnectionError("Could not reach IPv4 Network via %s." % code_location)
else:
logger.info("IPv4 could be reached via %s" % code_location)
if not can_ipv6:
raise ConnectionError("Could not reach IPv6 Network via %s." % code_location)
else:
logger.info("IPv6 could be reached via %s" % code_location)
......@@ -51,3 +51,4 @@ raven
# allow recursive comparison of dictionaries
deepdiff
influxdb
urllib3
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment