Verified Commit d5b2987a authored by Elger Jonker's avatar Elger Jonker

refactor scanners, explicit v4 and v6 tasks

parent c2b72f8e
# Some help to translate the django part.
# This tries to help you avoid remembering the "messages" mess from Django.
import logging
from subprocess import call
from django.core.management.base import BaseCommand
try:
from pip._internal.utils.misc import get_installed_distributions
except ImportError: # pip<10
from pip import get_installed_distributions
# from pip.utils import get_installed_distributions
logger = logging.getLogger(__package__)
# todo: move to pipenv
class Command(BaseCommand):
def handle(self, *args, **options):
packages = [dist.project_name for dist in get_installed_distributions()]
call("pip install --upgrade " + ' '.join(packages), shell=True)
......@@ -54,7 +54,7 @@ def test_all_scanners():
# check the network. No network, you'll fail. Todo: you should be able to reach a domain over the net. Build
# machines often cannot do this for reasons and things.
from failmap.scanners.scanner_http import check_network
from failmap.scanners.scanner.http import check_network
log.warning("check_network")
check_network("real_scanner_test")
......@@ -69,7 +69,7 @@ def test_all_scanners():
# get errors.
call_command('scan_tls_qualys', '-v3', '-o', 'Internet Cleanup Foundation')
from failmap.scanners.scanner_dns import brute_known_subdomains, certificate_transparency, nsec
from failmap.scanners.scanner.dns import brute_known_subdomains, certificate_transparency, nsec
log.warning("brute_known_subdomains")
brute_known_subdomains(urls=[first_toplevel_url])
log.warning("certificate_transparency")
......
......@@ -17,7 +17,7 @@ from django_countries.fields import CountryField
from failmap.game.models import Contest, OrganizationSubmission, Team, UrlSubmission
from failmap.organizations.models import Organization, OrganizationType, Url
from failmap.scanners.scanner_http import resolves
from failmap.scanners.scanner.http import resolves
# todo: callback on edit address, put result in leaflet:
......
......@@ -11,7 +11,7 @@ from django.db.models import Q
from failmap.organizations.models import Organization, Url
from failmap.scanners.models import Endpoint, EndpointGenericScan, TlsQualysScan, UrlGenericScan
from failmap.scanners.scanner import q_configurations_to_display
from failmap.scanners.scanner.scanner import q_configurations_to_display
from ..celery import Task, app
from .calculate import get_calculation
......
......@@ -15,13 +15,12 @@ from import_export.admin import ImportExportModelAdmin
from jet.admin import CompactInline
from leaflet.admin import LeafletGeoAdminMixin
import failmap.scanners.scanner_http as scanner_http
import failmap.scanners.scanner.http as scanner_http
from failmap import types
from failmap.map.rating import OrganizationRating, UrlRating
from failmap.scanners import (onboard, scanner_dns, scanner_dnssec, scanner_plain_http,
scanner_security_headers, scanner_tls_qualys)
from failmap.scanners.admin import UrlIp
from failmap.scanners.models import Endpoint, EndpointGenericScan, TlsQualysScan, UrlGenericScan
from failmap.scanners.scanner import dns, dnssec, onboard, plain_http, security_headers, tls_qualys
from ..app.models import Job
from ..celery import PRIO_HIGH
......@@ -184,38 +183,38 @@ class ActionMixin:
actions = []
def scan_plain_http(self, *args, **kwargs):
return self.generic_action(scanner_plain_http.compose_task, 'Scan Plain Http', *args, **kwargs)
return self.generic_action(plain_http.compose_task, 'Scan Plain Http', *args, **kwargs)
scan_plain_http.short_description = '🔬 Missing TLS'
actions.append(scan_plain_http)
def scan_security_headers(self, *args, **kwargs):
return self.generic_action(scanner_security_headers.compose_task, 'Scan Security Headers', *args, **kwargs)
return self.generic_action(security_headers.compose_task, 'Scan Security Headers', *args, **kwargs)
scan_security_headers.short_description = '🔬 Security Headers'
actions.append(scan_security_headers)
def scan_tls_qualys(self, *args, **kwargs):
return self.generic_action(scanner_tls_qualys.compose_task, 'Scan TLS Qualys', *args, **kwargs)
return self.generic_action(tls_qualys.compose_task, 'Scan TLS Qualys', *args, **kwargs)
scan_tls_qualys.short_description = '🔬 TLS (Qualys)'
actions.append(scan_tls_qualys)
def dnssec(self, *args, **kwargs):
return self.generic_action(scanner_dnssec.compose_task, 'DNSSEC', *args, **kwargs)
return self.generic_action(dnssec.compose_task, 'DNSSEC', *args, **kwargs)
dnssec.short_description = "🔬 DNSSEC"
actions.append(dnssec)
def dns_certificate_transparency(self, *args, **kwargs):
return self.generic_action(scanner_dns.certificate_transparency_compose_task,
return self.generic_action(dns.certificate_transparency_compose_task,
'DNS Certificate transparency', *args, **kwargs)
dns_certificate_transparency.short_description = "🗺 DNS (certificate transparency)"
actions.append(dns_certificate_transparency)
def dns_nsec(self, *args, **kwargs):
return self.generic_action(scanner_dns.nsec_compose_task, 'DNS nsec1', *args, **kwargs)
return self.generic_action(dns.nsec_compose_task, 'DNS nsec1', *args, **kwargs)
dns_nsec.short_description = "🗺 DNS (nsec1)"
actions.append(dns_nsec)
def dns_known_subdomains(self, *args, **kwargs):
return self.generic_action(scanner_dns.brute_known_subdomains_compose_task, 'DNS Nsec', *args, **kwargs)
return self.generic_action(dns.brute_known_subdomains_compose_task, 'DNS Nsec', *args, **kwargs)
dns_known_subdomains.short_description = "🗺 + DNS (known subdomains)"
actions.append(dns_known_subdomains)
......
# Generated by Django 2.0.7 on 2018-07-19 12:16
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('organizations', '0035_auto_20180718_1814'),
]
operations = [
migrations.AddField(
model_name='url',
name='onboarding_stage_set_on',
field=models.DateTimeField(
blank=True, help_text='When the onboarding stage was hit. Helps with time-outs.', null=True),
),
]
......@@ -96,11 +96,12 @@ class Organization(models.Model):
verbose_name = _('organization')
verbose_name_plural = _('organizations')
# todo: find a smarter way to get the organization type name, instead of a related query... cached enums?
def __str__(self):
if self.is_dead:
return "✝ %s, %s (%s)" % (self.name, self.country, self.created_on.strftime("%b %Y"))
return "✝ %s, %s/%s (%s)" % (self.name, self.country, self.type_id, self.created_on.strftime("%b %Y"))
else:
return "%s, %s (%s)" % (self.name, self.country, self.created_on.strftime("%b %Y"))
return "%s, %s/%s (%s)" % (self.name, self.country, self.type_id, self.created_on.strftime("%b %Y"))
GEOJSON_TYPES = (
......@@ -228,6 +229,9 @@ class Url(models.Model):
" have been found. completed: onboarding is done, also onboarded flag is set."
)
onboarding_stage_set_on = models.DateTimeField(blank=True, null=True,
help_text="When the onboarding stage was hit. Helps with time-outs.")
onboarded = models.BooleanField(
default=False,
help_text="After adding a url, there is an onboarding process that runs a set of tests."
......@@ -287,7 +291,7 @@ class Url(models.Model):
def add_subdomain(self, subdomain):
# import here to prevent circular/cyclic imports, this module imports Url.
from failmap.scanners.scanner_http import resolves
from failmap.scanners.scanner.http import resolves
new_url = (subdomain + "." + self.url).lower()
......
......@@ -7,7 +7,7 @@ from django.db.models.signals import post_save
from django.dispatch import receiver
from failmap.organizations.models import Url
from failmap.scanners import onboard
from failmap.scanners.scanner import onboard
log = logging.getLogger(__name__)
......
......@@ -2,7 +2,7 @@ import logging
from django.core.management.base import BaseCommand
from failmap.scanners.scanner_http import check_network
from failmap.scanners.scanner.http import check_network
logger = logging.getLogger(__package__)
......
......@@ -37,8 +37,8 @@ class Command(BaseCommand):
def test_osaft():
from failmap.scanners.scanner_tls_osaft import scan_address, determine_grade, grade_report, scan_url
from failmap.scanners.scanner import q_configurations_to_scan
from failmap.scanners.scanner.tls_osaft import scan_address, determine_grade, grade_report, scan_url
from failmap.scanners.scanner.scanner import q_configurations_to_scan
urls = Url.objects.filter(
q_configurations_to_scan(),
......@@ -132,7 +132,7 @@ def develop_timeline():
def develop_sslscan():
from failmap.scanners.scanner_tls_standalone import scan_url
from failmap.scanners.scanner.tls_standalone import scan_url
url = Url.objects.all().filter(url='www.ibdgemeenten.nl').get()
scan_url(url)
url = Url.objects.all().filter(url='www.amersfoort.nl').get()
......@@ -140,12 +140,12 @@ def develop_sslscan():
def test_determine_grade():
from failmap.scanners.scanner_tls_standalone import test_determine_grade
from failmap.scanners.scanner.tls_standalone import test_determine_grade
test_determine_grade()
def test_sslscan_real():
from failmap.scanners.scanner_tls_standalone import test_real
from failmap.scanners.scanner.tls_standalone import test_real
test_real('johnkr.com', 443)
......
import logging
from failmap.app.management.commands._private import DiscoverTaskCommand
from failmap.scanners import scanner_ftp
from failmap.scanners.scanner import ftp
log = logging.getLogger(__name__)
......@@ -18,7 +18,7 @@ class Command(DiscoverTaskCommand):
def handle(self, *args, **options):
scanners = {
'ftp': scanner_ftp
'ftp': ftp
}
if options['scanner'][0] not in scanners:
......
import logging
from failmap.app.management.commands._private import ScannerTaskCommand
from failmap.scanners import scanner_http
from failmap.scanners.scanner import http
log = logging.getLogger(__name__)
......@@ -11,4 +11,4 @@ class Command(ScannerTaskCommand):
help = __doc__
scanner_module = scanner_http
scanner_module = http
from django.core.management.base import BaseCommand
from failmap.map.rating import DetermineRatings
from failmap.scanners.scanner_tls_qualys import ScannerTlsQualys
from failmap.scanners.scanner.tls_qualys import ScannerTlsQualys
class Command(BaseCommand):
......
......@@ -2,7 +2,7 @@ import logging
from django.core.management.base import BaseCommand
from failmap.scanners.scanner_tls_osaft import (ammend_unsuported_issues, cert_chain_is_complete,
from failmap.scanners.scanner.tls_osaft import (ammend_unsuported_issues, cert_chain_is_complete,
determine_grade, grade_report, run_osaft_scan)
logger = logging.getLogger(__package__)
......
import logging
from failmap.app.management.commands._private import ScannerTaskCommand
from failmap.scanners import (onboard, scanner_dnssec, scanner_dummy, scanner_ftp, scanner_http,
scanner_plain_http, scanner_screenshot, scanner_security_headers,
scanner_tls_osaft, scanner_tls_qualys)
from failmap.scanners.scanner import (dnssec, dummy, ftp, http, onboard, plain_http, screenshot,
security_headers, tls_osaft, tls_qualys)
log = logging.getLogger(__name__)
......@@ -20,16 +19,16 @@ class Command(ScannerTaskCommand):
def handle(self, *args, **options):
scanners = {
'dnssec': scanner_dnssec,
'headers': scanner_security_headers,
'plain': scanner_plain_http,
'endpoints': scanner_http,
'tls': scanner_tls_osaft,
'tlsq': scanner_tls_qualys,
'ftp': scanner_ftp,
'screenshot': scanner_screenshot,
'dnssec': dnssec,
'headers': security_headers,
'plain': plain_http,
'endpoints': http,
'tls': tls_osaft,
'tlsq': tls_qualys,
'ftp': ftp,
'screenshot': screenshot,
'onboard': onboard,
'dummpy': scanner_dummy
'dummy': dummy
}
if options['scanner'][0] not in scanners:
......
......@@ -3,7 +3,7 @@ import logging
from django.core.management.base import BaseCommand
from failmap.organizations.models import Organization
from failmap.scanners.scanner_dns import (brute_dutch, brute_known_subdomains, brute_three_letters,
from failmap.scanners.scanner.dns import (brute_dutch, brute_known_subdomains, brute_three_letters,
certificate_transparency, nsec, search_engines, standard)
from failmap.scanners.state_manager import StateManager
......
......@@ -2,7 +2,7 @@ import logging
from django.core.management.base import BaseCommand
import failmap.scanners.scanner_http as scanner_http
import failmap.scanners.scanner.http as scanner_http
from failmap.organizations.models import Organization
from .support.arguments import add_organization_argument
......
......@@ -4,7 +4,7 @@ from datetime import datetime
import pytz
from django.core.exceptions import ObjectDoesNotExist
from .models import Endpoint, EndpointGenericScan
from failmap.scanners.models import Endpoint, EndpointGenericScan
logger = logging.getLogger(__package__)
......@@ -55,7 +55,6 @@ class EndpointScanManager:
:param endpoint:
:return:
"""
from .models import EndpointGenericScan
try:
gs = EndpointGenericScan.objects.all().filter(
......
......@@ -4,7 +4,7 @@ from datetime import datetime
import pytz
from django.core.exceptions import ObjectDoesNotExist
from .models import Endpoint, TlsScan
from failmap.scanners.models import Endpoint, TlsScan
logger = logging.getLogger(__package__)
......@@ -54,10 +54,9 @@ class TlsScanManager:
:param endpoint:
:return:
"""
from .models import EndpointGenericScan
try:
gs = EndpointGenericScan.objects.all().filter(
gs = TlsScan.objects.all().filter(
endpoint=endpoint,
).latest('last_scan_moment')
if gs.rating:
......
......@@ -4,7 +4,7 @@ from datetime import datetime
import pytz
from django.core.exceptions import ObjectDoesNotExist
from .models import Endpoint, TlsQualysScan
from failmap.scanners.models import Endpoint, TlsQualysScan
logger = logging.getLogger(__package__)
......@@ -55,10 +55,9 @@ class TlsQualysScanManager:
:param endpoint:
:return:
"""
from .models import EndpointGenericScan
try:
gs = EndpointGenericScan.objects.all().filter(
gs = TlsQualysScan.objects.all().filter(
endpoint=endpoint,
).latest('last_scan_moment')
if gs.rating:
......
......@@ -4,7 +4,7 @@ from datetime import datetime
import pytz
from django.core.exceptions import ObjectDoesNotExist
from .models import Url, UrlGenericScan
from failmap.scanners.models import Url, UrlGenericScan
logger = logging.getLogger(__package__)
......@@ -54,7 +54,6 @@ class UrlScanManager:
:param url:
:return:
"""
from .models import UrlGenericScan
try:
gs = UrlGenericScan.objects.all().filter(
......
......@@ -27,8 +27,7 @@ from django.conf import settings
from failmap.celery import app
from failmap.organizations.models import Organization, Url
from .scanner import allowed_to_discover
from failmap.scanners.scanner.scanner import allowed_to_discover
logger = logging.getLogger(__package__)
......@@ -118,7 +117,7 @@ def nsec_compose_task(organizations_filter: dict = dict(),
urls_filter=urls_filter,
endpoints_filter=endpoints_filter)
task = group(nsec_scan.s([url]) for url in urls)
task = group(nsec_scan.si([url]) for url in urls)
return task
......@@ -139,7 +138,7 @@ def certificate_transparency_compose_task(organizations_filter: dict = dict(),
urls_filter=urls_filter,
endpoints_filter=endpoints_filter)
task = group(certificate_transparency_scan.s([url]) for url in urls)
task = group(certificate_transparency_scan.si([url]) for url in urls)
return task
......@@ -182,7 +181,7 @@ def brute_known_subdomains_compose_task(organizations_filter: dict = dict(),
# todo: this should be placed to elsewhere, but we might not have write permissions in scanners...???
update_subdomain_wordlist()
task = group(bruteforce_scan.s([url], str(wordlists["known_subdomains"]["path"])) for url in urls)
task = group(bruteforce_scan.si([url], str(wordlists["known_subdomains"]["path"])) for url in urls)
return task
......
......@@ -29,10 +29,9 @@ from django.conf import settings
from failmap.celery import ParentFailed, app
from failmap.organizations.models import Organization, Url
from failmap.scanners.url_scan_manager import UrlScanManager
from .models import Endpoint
from .scanner import allowed_to_scan, q_configurations_to_scan
from failmap.scanners.models import Endpoint
from failmap.scanners.scanmanager.url_scan_manager import UrlScanManager
from failmap.scanners.scanner.scanner import allowed_to_scan, q_configurations_to_scan
log = logging.getLogger(__name__)
......@@ -93,7 +92,7 @@ def compose_task(
# Sending entire objects is possible. How signatures (.s and .si) work is documented:
# http://docs.celeryproject.org/en/latest/reference/celery.html#celery.signature
task = group(
scan_dnssec.s(url.url) | store_dnssec.s(url) for url in urls
scan_dnssec.si(url.url) | store_dnssec.s(url) for url in urls
)
return task
......
......@@ -13,10 +13,9 @@ from django.conf import settings
from failmap.celery import ParentFailed, app
from failmap.organizations.models import Organization, Url
from failmap.scanners.endpoint_scan_manager import EndpointScanManager
from .models import Endpoint
from .scanner import allowed_to_scan, q_configurations_to_scan
from failmap.scanners.models import Endpoint
from failmap.scanners.scanmanager.endpoint_scan_manager import EndpointScanManager
from failmap.scanners.scanner.scanner import allowed_to_scan, q_configurations_to_scan
log = logging.getLogger(__name__)
......
......@@ -15,11 +15,10 @@ from django.utils import timezone
from failmap.celery import ParentFailed, app
from failmap.organizations.models import Url
from failmap.scanners.endpoint_scan_manager import EndpointScanManager
from failmap.scanners.scanner import endpoint_filters, url_filters
from .models import Endpoint
from .scanner import allowed_to_scan, q_configurations_to_scan
from failmap.scanners.models import Endpoint
from failmap.scanners.scanmanager.endpoint_scan_manager import EndpointScanManager
from failmap.scanners.scanner.scanner import (allowed_to_scan, endpoint_filters,
q_configurations_to_scan, url_filters)
log = logging.getLogger(__name__)
......@@ -63,7 +62,7 @@ def compose_task(
endpoints = endpoint_filters(endpoints, organizations_filter, urls_filter, endpoints_filter)
if not endpoints:
log.warning('Applied filters resulted in no endpoints, thus no tasks!')
log.warning('Applied filters resulted in no endpoints, thus no ftp tasks!')
return group()
# only unique endpoints
......
......@@ -42,9 +42,8 @@ from requests.exceptions import ConnectionError
from failmap.celery import app
from failmap.organizations.models import Organization, Url
from failmap.scanners.models import Endpoint, UrlIp
from .scanner import allowed_to_discover, q_configurations_to_scan
from .timeout import timeout
from failmap.scanners.scanner.scanner import allowed_to_discover, q_configurations_to_scan
from failmap.scanners.timeout import timeout
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
......@@ -56,6 +55,8 @@ STANDARD_HTTPS_PORTS = [443, 8443]
# Discover Endpoints generic task
# todo: in wildcard scenarios you can add urls that have a deviating IP from the (loadbalanced) wildcard address.
@app.task(queue="storage")
def compose_discover_task(
......@@ -525,7 +526,11 @@ def store_url_ips(url: Url, ips):
epip.url = url
epip.is_unused = False
epip.discovered_on = datetime.now(pytz.utc)
epip.rdns_name = get_rdns_name(ip)
try:
epip.rdns_name = get_rdns_name(ip)
except TimeoutError:
# we'll have to do without.
epip.rdns_name = ""
epip.save()
# and then clean up all that are not in the current set of ip's.
......@@ -543,7 +548,7 @@ def get_rdns_name(ip):
reverse_name = socket.gethostbyaddr(ip)
except (TimeoutError, socket.herror):
# takes too long
# host doesn't exist
# host doesn't exist / unknown host
pass
except BaseException as e:
logger.error('Unknown rdns failure %s on ip %s' % (str(e), ip))
......
......@@ -3,12 +3,11 @@ import logging
from celery import group
from django.utils import timezone
from failmap.celery import Task, app
from failmap.organizations.models import Url
from failmap.scanners.scanner import url_filters
from failmap.scanners.scanner.scanner import url_filters
from failmap.scanners.tasks import crawl_tasks, explore_tasks, scan_tasks
from ..celery import Task, app
log = logging.getLogger(__package__)
......@@ -41,6 +40,11 @@ def compose_task(
Todo: run this every minute.
"""
# Resetting the outdated onboarding has a risk: if the queue takes longer than the onboarding tasks to finish the
# tasks will be performed multiple time. This can grow fast and large. Therefore a very large time has been taken
# to reset onboarding of tasks. Normally onboarding should be one within 5 minutes. We'll reset after 7 days.
reset_expired_onboards()
urls = Url.objects.all().filter(onboarded=False)
urls = url_filters(urls, organizations_filter, urls_filter, endpoints_filter)
......@@ -60,13 +64,13 @@ def compose_task(
| explore_tasks(url)
| update_stage.si(url, "endpoint_finished"))
elif url.onboarding_stage in ["endpoint_finished", "scans_running"]:
elif url.onboarding_stage in ["endpoint_finished"]: # dev: , "scans_running"
log.info("Scanning on: %s", url)
tasks.append(update_stage.si(url, "scans_running")
| scan_tasks(url)
| update_stage.si(url, "scans_finished"))
elif url.onboarding_stage == "crawl_started":
elif url.onboarding_stage == "scans_finished":
log.info("Crawling on: %s", url)
tasks.append(update_stage.si(url, "endpoint_finished")
| crawl_tasks(url)
......@@ -77,9 +81,36 @@ def compose_task(
log.info("Created %s tasks to be performed." % len(tasks))
task = group(tasks)
# log.info("Task:")
# log.info(task)
return task
def reset_expired_onboards():
from datetime import datetime, timedelta
import pytz
expired = Url.objects.all().filter(onboarding_stage_set_on__lte=datetime.now(pytz.utc) - timedelta(days=7))
for url in expired:
# set the task a step back.
# retry endpoint discovery if that didn't finish.
if url.onboarding_stage == "endpoint_discovery":
url.onboarding_stage = ""
# retry scanning after discovery of endpoints
if url.onboarding_stage == "scans_running":
url.onboarding_stage = "endpoint_finished"
# retry crawling after scans are finished
if url.onboarding_stage == "crawl_started":
url.onboarding_stage = "scans_finished"
url.save()
@app.task(queue='storage')
def finish_onboarding(url):
log.info("Finishing onboarding of %s", url)
......@@ -92,7 +123,7 @@ def finish_onboarding(url):
@app.task(queue='storage')
def update_stage(url, stage=""):
log.info("Updating onboarding_stage of %s to %s", url, stage)
log.info("Updating onboarding_stage of %s from %s to %s", url, url.onboarding_stage, stage)
url.onboarding_stage = stage
url.save(update_fields=['onboarding_stage'])
return True
......@@ -4,11 +4,10 @@ import logging
from constance import config
from django.db.models import Q
from failmap.map.models import Configuration
from failmap.organizations.models import Organization, Url
from failmap.scanners.models import Endpoint
from ..map.models import Configuration
log = logging.getLogger(__name__)
......@@ -35,6 +34,9 @@ def allowed_to_scan(scanner_name: str=""):
if scanner_name == 'scanner_tls_qualys':
return config.SCAN_HTTP_TLS_QUALYS
if scanner_name == 'scanner_tls_osaft':
return config.SCAN_HTTP_TLS_OSAFT
if scanner_name == 'scanner_dnssec':
return config.SCAN_DNS_DNSSEC
......
......@@ -49,10 +49,10 @@ from PIL import Image
from failmap.celery import app
from failmap.scanners.models import Endpoint, Screenshot
from failmap.scanners.scanner.scanner import (allowed_to_scan, endpoint_filters,
q_configurations_to_scan)
from failmap.scanners.timeout import timeout
from .scanner import allowed_to_scan, endpoint_filters,