fix tls scanner where only the first url was scanned, simplification of...

fix tls scanner where only the first url was scanned, simplification of command line interaction, various bugfixes
parent 792ac839
......@@ -21,6 +21,7 @@ app.autodiscover_tasks([app for app in settings.INSTALLED_APPS if app.startswith
# https://github.com/celery/celery/blob/f83b072fba7831f60106c81472e3477608baf289/docs/whatsnew-4.0.rst#redis-priorities-reversed
PRIO_HIGH = 9
PRIO_NORMAL = 5
PRIO_LOW = 3
class DefaultTask(Task):
......
......@@ -118,6 +118,11 @@ def http_plain_rating_based_on_scan(scan):
# changed the ratings in the database. They are not really correct.
# When there is no https at all, it's worse than having broken https. So rate them the same.
if scan.explanation == "Site does not redirect to secure url, and has no secure alternative on a standard port.":
scan.rating = 1000
high += 1
# wrong spelling (history)
if scan.explanation == "Site does not redirect to secure url, and has nosecure alternative on a standard port.":
scan.rating = 1000
high += 1
......
......@@ -111,6 +111,7 @@ def add_url_rating(urls: List[Url], build_history: bool=False, when: datetime=No
rate_url(url, when)
@app.task
def rerate_urls(urls: List[Url]=None):
if not urls:
urls = list(Url.objects.all().filter(is_dead=False).order_by('url'))
......@@ -130,7 +131,7 @@ def rerate_organizations(organizations: List[Organization]=None):
for organization in organizations:
OrganizationRating.objects.all().filter(organization=organization).delete()
default_ratings()
add_organization_rating([organization], build_history=True)
add_organization_rating(organizations=[organization], build_history=True)
def rerate_urls_of_organizations(organizations: List[Organization]):
......
......@@ -162,6 +162,8 @@ class EndpointGenericScanAdmin(admin.ModelAdmin):
fields = ('endpoint', 'type', 'domain', 'rating',
'explanation', 'last_scan_moment', 'rating_determined_on')
readonly_fields = ['last_scan_moment']
class EndpointGenericScanScratchpadAdmin(admin.ModelAdmin):
list_display = ('type', 'domain', 'when', 'data')
......
......@@ -4,6 +4,8 @@ from datetime import datetime
import pytz
from django.core.exceptions import ObjectDoesNotExist
from .models import Endpoint, EndpointGenericScan
logger = logging.getLogger(__package__)
......@@ -14,13 +16,12 @@ class EndpointScanManager:
:return:
"""
@staticmethod
def add_scan(scantype, endpoint, rating, message):
from .models import EndpointGenericScan
def add_scan(scan_type: str, endpoint: Endpoint, rating: str, message: str):
# Check if the latest scan has the same rating or not:
try:
gs = EndpointGenericScan.objects.all().filter(
type=scantype,
type=scan_type,
endpoint=endpoint,
).latest('last_scan_moment')
except ObjectDoesNotExist:
......@@ -33,23 +34,23 @@ class EndpointScanManager:
gs.last_scan_moment = datetime.now(pytz.utc)
gs.save()
else:
# make a new one, please don't update the existing one :)
# message and rating changed for this scan_type, so it's worth while to save the scan.
logger.debug("Message or rating changed: making a new generic scan.")
gs = EndpointGenericScan()
gs.explanation = message
gs.rating = rating
gs.endpoint = endpoint
gs.type = scantype
gs.type = scan_type
gs.last_scan_moment = datetime.now(pytz.utc)
gs.rating_determined_on = datetime.now(pytz.utc)
gs.save()
@staticmethod
def had_scan_with_points(scantype, endpoint):
def had_scan_with_points(scan_type: str, endpoint: Endpoint):
"""
Used for data deduplication. Don't save a scan that had zero points, but you can upgrade
to zero (or another rating)
:param scantype:
:param scan_type:
:param endpoint:
:return:
"""
......@@ -57,7 +58,7 @@ class EndpointScanManager:
try:
gs = EndpointGenericScan.objects.all().filter(
type=scantype,
type=scan_type,
endpoint=endpoint,
).latest('last_scan_moment')
if gs.rating:
......
......@@ -15,8 +15,9 @@ class Command(BaseCommand):
help = 'Development command'
def handle(self, *args, **options):
reset_onboard()
# rebuild_ratings()
develop_determineratings()
# develop_determineratings()
# develop_timeline()
#
# Command.test_sslscan_real()
......@@ -27,6 +28,14 @@ class Command(BaseCommand):
# Command.develop_celery_test_async_tasks()
def reset_onboard():
organization = Organization.objects.filter(name="Arnhem").get()
urls = Url.objects.all().filter(organization=organization)
for url in urls:
url.onboarded = False
url.save()
def develop_timeline():
if True:
......@@ -36,7 +45,7 @@ def develop_timeline():
data = create_timeline(url=url)
show_timeline_console(data, url)
rerate_urls([url])
add_organization_rating(organization=organization, create_history=True)
add_organization_rating(organizations=[organization], create_history=True)
if False:
organizations = Organization.objects.all().order_by('name')
......@@ -67,7 +76,7 @@ def develop_timeline():
def develop_sslscan():
from failmap_admin.scanners.scanner_tls import scan_url
from failmap_admin.scanners.scanner_tls_standalone import scan_url
url = Url.objects.all().filter(url='www.ibdgemeenten.nl').get()
scan_url(url)
url = Url.objects.all().filter(url='www.amersfoort.nl').get()
......@@ -75,12 +84,12 @@ def develop_sslscan():
def test_determine_grade():
from failmap_admin.scanners.scanner_tls import test_determine_grade
from failmap_admin.scanners.scanner_tls_standalone import test_determine_grade
test_determine_grade()
def test_sslscan_real():
from failmap_admin.scanners.scanner_tls import test_real
from failmap_admin.scanners.scanner_tls_standalone import test_real
test_real('johnkr.com', 443)
......
import logging
from time import sleep
from django.core.management.base import BaseCommand
......@@ -12,20 +11,4 @@ class Command(BaseCommand):
help = 'Automatically performs initial scans and tests on new urls.'
def handle(self, *args, **options):
runservice()
def runservice():
try:
logger.info("Started onboarding.")
while True:
onboard_new_urls()
logger.info("Waiting for urls to be onboarded. Sleeping for 60 seconds.")
sleep(60)
except KeyboardInterrupt:
logger.info("Onboarding interrupted.")
do_continue = input("Do you wish to quit? Y/n")
if "n" in do_continue or "N" in do_continue:
runservice()
else:
logger.info("Stopped onboarding.")
onboard_new_urls()
......@@ -42,7 +42,12 @@ class Command(BaseCommand):
def handle(self, *args, **options):
scan_type = options['scan_type']
desired_organization = options['organization'][0]
if options['organization']:
desired_organization = options['organization'][0]
else:
desired_organization = "*"
logger.debug("Scan type: %s" % scan_type)
logger.debug("Targetted organization: %s" % desired_organization)
......
......@@ -2,9 +2,9 @@ import logging
from django.core.management.base import BaseCommand
from failmap_admin.map.rating import add_organization_rating, rerate_urls
from failmap_admin.celery import PRIO_HIGH
from failmap_admin.scanners.models import Url
from failmap_admin.scanners.scanner_tls_qualys import scan, scan_task
from failmap_admin.scanners.scanner_tls_qualys import scan, scan_new_urls, scan_urls
logger = logging.getLogger(__package__)
......@@ -23,19 +23,21 @@ class Command(BaseCommand):
type=bool
)
parser.add_argument(
'--new',
help="Only scan new urls.",
type=bool
)
def handle(self, *args, **options):
if options['manual']:
value = input("Type the url, without protocol:")
url = Url.objects.all().filter(url=value).first()
scan_task(url)
rerate_urls([url])
# url can be owned by many organizations:
organizations = url.organization.all()
for organization in organizations:
add_organization_rating(organization=organization)
scan_urls(urls=[url], priority=PRIO_HIGH)
else:
while True:
if options['new']:
scan_new_urls.apply()
else:
# removed the infinite loop, so to allow scheduling.
scan.apply()
......@@ -2,7 +2,7 @@ import logging
from django.core.management.base import BaseCommand
from failmap_admin.scanners.scanner_tls import test_real
from failmap_admin.scanners.scanner_tls_standalone import test_real
logger = logging.getLogger(__package__)
......
import logging
from time import sleep
from django.core.management.base import BaseCommand
......@@ -12,11 +11,5 @@ class Command(BaseCommand):
help = 'Create screenshots of urls that don\'t have a screenshot yet'
def handle(self, *args, **options):
try:
while True:
screenshots_of_new_urls()
logger.info("No more endpoints to screenshot. Waiting 60 seconds for more.")
sleep(60)
except KeyboardInterrupt:
logger.debug("Stopped. If this was killed when making screenshots: "
"please check if there are still some browsers running.")
logger.info("Creating screenshots of new urls.")
screenshots_of_new_urls()
......@@ -8,7 +8,7 @@ import failmap_admin.scanners.scanner_http as scanner_http
import failmap_admin.scanners.scanner_plain_http as scanner_plain_http
from failmap_admin.organizations.models import Url
from failmap_admin.scanners.scanner_dns import (brute_known_subdomains, certificate_transparency,
nsec_scan)
nsec)
from failmap_admin.scanners.scanner_screenshot import screenshot_urls
from ..celery import app
......@@ -16,6 +16,7 @@ from ..celery import app
logger = logging.getLogger(__package__)
@app.task
def onboard_new_urls():
never_onboarded = Url.objects.all().filter(onboarded=False)
......@@ -37,6 +38,8 @@ def onboard_new_urls():
................................................................................
"""
logger.info("There are %s new urls to onboard! %s" % (never_onboarded.count(), cyber))
else:
logger.info("No new urls to onboard.")
onboard_urls(never_onboarded)
......@@ -44,15 +47,31 @@ def onboard_new_urls():
@app.task
def onboard_urls(urls: List[Url]):
for url in urls:
logger.info("Onboarding %s" % url)
if url.is_top_level():
logger.debug("Brute known subdomains: %s" % url)
brute_known_subdomains(urls=[url])
logger.debug("Certificate transparency: %s" % url)
certificate_transparency(urls=[url])
nsec_scan(urls=[url])
logger.debug("nsec: %s" % url)
nsec(urls=[url])
# tasks
logger.debug("Discover endpoints: %s" % url)
scanner_http.discover_endpoints(urls=[url])
# requires endpoints to be discovered, how to run groups of tasks sequentially?
logger.debug("Plain_http: %s" % url)
scanner_plain_http.scan_urls(urls=[url])
# requires endpoints to be discovered
logger.debug("Screenshots: %s" % url)
screenshot_urls(urls=[url])
# todo: add qualys tasks.
# security headers and new urls are handled elsewhere.
url.onboarded = True
url.onboarded_on = datetime.now(pytz.utc)
......
......@@ -194,11 +194,11 @@ def has_wildcards(urls: List[Url]):
logger.info("Domain %s uses wildcards, DNS brute force not possible" % url.url)
url.uses_dns_wildcard = True
url.save()
urls_with_wildcards += url
urls_with_wildcards.append(url)
else:
url.uses_dns_wildcard = False
url.save()
urls_without_wildcards += url
urls_without_wildcards.append(url)
return urls_without_wildcards, urls_with_wildcards
......
......@@ -249,7 +249,19 @@ def can_connect(protocol: str, url: Url, port: int, ip: str):
If we get a redirect, it means there is a server. Don't follow.
Any status code is enough to verify that there is an endpoint.
Some servers don't return a status code, that will trigger an exception (AttributeError?)
Some servers don't return a status code, that will trigger an exception (AttributeError)
Some servers redirect to itself (or any host you throw at it):
ipv4 = socket.gethostbyname("demo3.data.amsterdam.nl")
r = requests.get("http://185.3.211.120:80", timeout=(30, 30), allow_redirects=False,
headers={'Host': "demo3.data.amsterdam.nl"})
r.headers
{'Content-length': '0', 'Location': 'https://demo3.data.amsterdam.nl/', 'Connection': 'close'}
We don't follow redirects, because we only want to know if there is something we can connect to.
This can lead to interesting behavior: the browser times out.
https://stackoverflow.com/questions/43156023/what-is-http-host-header#43156094
"""
......@@ -294,7 +306,7 @@ def can_connect(protocol: str, url: Url, port: int, ip: str):
@app.task
def connect_result(result, protocol: str, url: Url, port: int, ip_version: int):
# logger.info("%s %s" % (url, result))
logger.info("%s %s" % (url, result))
# logger.info("%s %s" % (url, url))
# logger.info("%s %s" % (url, port))
# logger.info("%s %s" % (url, protocol))
......
......@@ -4,16 +4,22 @@ Check if a domain is only reachable on plain http, instead of both http and http
Browsers first connect to http, not https when entering a domain. That will be changed in the future.
Testing:
redis-cli flushdb
Further reading:
https://stackoverflow.com/questions/20475552/python-requests-library-redirect-new-url#20475712
"""
import logging
from typing import List
from celery import group
from failmap_admin.organizations.models import Url
from failmap_admin.scanners.endpoint_scan_manager import EndpointScanManager
from failmap_admin.scanners.scanner_http import scan_urls as scanner_http_scan_urls
from failmap_admin.scanners.scanner_http import get_ips
from ..celery import app
from .models import Endpoint
......@@ -28,6 +34,9 @@ def scan_all_urls():
If it's still not there, then well... it's points for not having https and http.
todo: how to remove entries from this list?
:return:
"""
# to save ratings
......@@ -36,12 +45,18 @@ def scan_all_urls():
urls = Url.objects.all().filter(is_dead=False,
not_resolvable=False)
# todo: haven't got the queryset logic down to filter like below. Could be just one query.
for url in urls:
scan_url(url)
scan_urls(urls=list(urls), execute=True)
def scan_urls(urls, execute=True):
def scan_urls(urls: List[Url], execute: bool=True):
"""
Scans all urls, including the entire list that is in the endpoint-generic scan list (existing problems) for missing
https on the default port.
:param urls:
:param execute: Boolean
:return:
"""
task = group([scan_url.s(url) for url in urls])
if execute:
task.apply_async()
......@@ -49,25 +64,46 @@ def scan_urls(urls, execute=True):
return task
# http://185.3.211.120:80: Host: demo3.data.amsterdam.nl Status: 301
@app.task
def scan_url(url):
def scan_url(url: Url):
"""
:param url:
:return:
"""
scan_manager = EndpointScanManager
logger.debug("Checking for http only sites on: %s" % url)
endpoints = Endpoint.objects.all().filter(url=url, is_dead=False)
has_http = False
has_https = False
http_endpoints = []
has_http_v4 = False
has_https_v4 = False
has_http_v6 = False
has_https_v6 = False
http_v4_endpoint = None
http_v6_endpoint = None
saved_by_the_bell = "Redirects to a secure site, while a secure counterpart on the standard port is missing."
no_https_at_all = "Site does not redirect to secure url, and has no secure alternative on a standard port."
cleaned_up = "Has a secure equivalent, which wasn't so in the past."
# The default ports matter for normal humans. All services on other ports are special services.
# we only give points if there is not a normal https site when there is a normal http site.
# todo: ip_version is relevant here.
for endpoint in endpoints:
if endpoint.protocol == "http" and endpoint.port == 80:
has_http = True
http_endpoints.append(endpoint)
if endpoint.protocol == "https" and endpoint.port == 443:
has_https = True
if endpoint.protocol == "http" and endpoint.port == 80 and endpoint.ip_version == 4:
has_http_v4 = True
http_v4_endpoint = endpoint
if endpoint.protocol == "https" and endpoint.port == 443 and endpoint.ip_version == 4:
has_https_v4 = True
if endpoint.protocol == "http" and endpoint.port == 80 and endpoint.ip_version == 6:
has_http_v6 = True
http_v6_endpoint = endpoint
if endpoint.protocol == "https" and endpoint.port == 443 and endpoint.ip_version == 6:
has_https_v6 = True
# calculate the score
# Organizations with wildcards can have this problem a lot:
......@@ -90,61 +126,77 @@ def scan_url(url):
# Some organizations redirect the http site to a non-standard https port.
# occurs more than once... you still have to follow redirects?
if has_http and not has_https:
if has_http_v4 and not has_https_v4:
logger.debug("This url seems to have no https at all: %s" % url)
logger.debug("Checking if they exist, to be sure there is nothing.")
# It's not secure initially, do a last check. This might result in new
# endpoints, and therefore no scan record.
# todo: hm, you can't really check ipv6 redirects on an ipv4 box, now can you...
if not verify_is_secure(url):
if not verify_is_secure(http_v4_endpoint):
logger.info("Checking if the URL redirects to a secure url: %s" % url)
if redirects_to_safety(url):
if redirects_to_safety(http_v4_endpoint):
logger.info("%s redirects to safety, saved by the bell." % url)
for http_endpoint in http_endpoints:
scan_manager.add_scan("plain_https", http_endpoint, 25,
"Redirects to a secure site, while a secure "
"counterpart on the standard port is missing.")
scan_manager.add_scan("plain_https", http_v4_endpoint, "25", saved_by_the_bell)
else:
logger.info("%s does not have a https site. Saving/updating scan." % url)
for http_endpoint in http_endpoints:
scan_manager.add_scan("plain_https", http_endpoint, 1000,
"Site does not redirect to secure url, and has no"
"secure alternative on a standard port.")
else:
# it is secure, and if there was a rating, then reduce it to 0
# (with a new rating).
for http_endpoint in http_endpoints:
if scan_manager.had_scan_with_points("plain_https", http_endpoint):
scan_manager.add_scan("plain_https", http_endpoint, 0,
"Has a secure equivalent, which wasn't so in the"
"past.")
scan_manager.add_scan("plain_https", http_v4_endpoint, "1000", no_https_at_all)
else:
# it is secure, and if there was a rating, then reduce it to 0 (with a new rating).
if scan_manager.had_scan_with_points("plain_https", http_v4_endpoint):
scan_manager.add_scan("plain_https", http_v4_endpoint, "0", cleaned_up)
if has_http_v6 and not has_https_v6:
if not verify_is_secure(http_v6_endpoint):
if redirects_to_safety(http_v6_endpoint):
scan_manager.add_scan("plain_https", http_v6_endpoint, "25", saved_by_the_bell)
else:
scan_manager.add_scan("plain_https", http_v6_endpoint, "1000", no_https_at_all)
else:
# it is secure, and if there was a rating, then reduce it to 0 (with a new rating).
if scan_manager.had_scan_with_points("plain_https", http_v6_endpoint):
scan_manager.add_scan("plain_https", http_v6_endpoint, "0", cleaned_up)
return 'done'
def verify_is_secure(url):
# i've seen qualys saying there is no TLS, while there is!
# This _might_ revive an endpoint.
def verify_is_secure(endpoint: Endpoint):
# i've seen qualys saying there is no TLS, while there is! So qualys kills the endpoint, this adds a new one.
scanner_http_scan_urls(['https'], [url], [443])
scanner_http_scan_urls(['https'], [endpoint.url], [443])
endpoints = Endpoint.objects.all().filter(url=url, is_dead=False,
protocol="https", port=443)
# might hopefully result in a new endpoint
endpoints = Endpoint.objects.all().filter(url=endpoint.url, is_dead=False, protocol="https", port=443,
ip_version=endpoint.ip_version)
if endpoints:
logger.debug("Url does seem to be secure after all: %s" % url)
logger.debug("Url does seem to be secure after all: %s" % endpoint.url)
return True
logger.debug("Url is still not secure: %s" % url)
logger.debug("Url is still not secure: %s" % endpoint.url)
return False
def redirects_to_safety(url):
def redirects_to_safety(endpoint: Endpoint):
"""
Also includes the ip-version of the endpoint.
:param endpoint:
:return:
"""
import requests
from requests import ReadTimeout, ConnectTimeout, HTTPError, Timeout, ConnectionError
domain = "%s://%s:%s" % ("http", url.url, "80")
(ipv4, ipv6) = get_ips(endpoint.url.url)
if endpoint.ip_version == 4:
uri = "%s://%s:%s" % ("http", ipv4, "80")
else:
uri = "%s://[%s]:%s" % ("http", ipv6, "80")
try:
response = requests.get(domain, timeout=(10, 10), allow_redirects=True)
response = requests.get(uri,
timeout=(30, 30), # allow for insane network lag
allow_redirects=True, # point is: redirects to safety
verify=False, # certificate validity is checked elsewhere, having some https > none
headers={'Host': endpoint.url.url})
if response.history:
logger.debug("Request was redirected, there is hope. Redirect path:")
for resp in response.history:
......@@ -159,6 +211,6 @@ def redirects_to_safety(url):
else:
logger.debug("Request was not redirected, so not going to a safe url.")
return False
except (ConnectTimeout, HTTPError, ReadTimeout, Timeout, ConnectionError):
except (ConnectTimeout, HTTPError, ReadTimeout, Timeout, ConnectionError, requests.exceptions.TooManyRedirects):
logger.debug("Request resulted into an error, it's not redirecting properly.")
return False
......@@ -35,14 +35,13 @@ import requests
from celery import group
from django.core.exceptions import ObjectDoesNotExist
from failmap_admin.map.rating import add_organization_rating, rerate_urls
from failmap_admin.organizations.models import Organization, Url
from failmap_admin.scanners.models import (Endpoint, EndpointGenericScan, TlsQualysScan,
TlsQualysScratchpad)
from failmap_admin.scanners.scanner_http import store_url_ips
from failmap_admin.scanners.state_manager import StateManager
from ..celery import app
from ..celery import PRIO_HIGH, PRIO_NORMAL, app
log = logging.getLogger(__name__)
......@@ -59,11 +58,13 @@ def scan_url_list(urls: List[Url]):
@app.task
def scan_urls(urls: List[Url], execute=True):
def scan_urls(urls: List[Url], execute: bool=True, priority: int=PRIO_NORMAL):
urls = external_service_task_rate_limit(urls)
"""Compose and execute taskset to scan specified urls."""
try:
task = compose(urls=urls)