Verified Commit c2b72f8e authored by Elger Jonker's avatar Elger Jonker

[WIP] multi-stage onboarding, todo security headers signature

parent 1c8626de
Pipeline #26037139 failed with stage
in 15 minutes and 24 seconds
Task processing system
======================
.. autofunction:: failmap.types.compose_task
.. autofunction:: failmap.types.compose_discover_task
......@@ -80,7 +80,7 @@ class Job(models.Model):
def create_job(task_module: str):
"""Helper to allow Jobs to be created using Celery Beat.
task_module: module from which to call `compose_task` which results in the task to be executed
task_module: module from which to call `compose_discover_task` which results in the task to be executed
"""
module = importlib.import_module(task_module)
......
......@@ -32,11 +32,8 @@ def compose_task(
urls_filter: dict = dict(),
endpoints_filter: dict = dict(),
) -> Task:
"""Compose taskset to rebuild specified organizations/urls.
*This is an implementation of `compose_task`. For more documentation about this concept, arguments and concrete
examples of usage refer to `compose_task` in `types.py`.*
"""
Compose taskset to rebuild specified organizations/urls.
"""
if endpoints_filter:
......
......@@ -220,7 +220,7 @@ class ActionMixin:
actions.append(dns_known_subdomains)
def disover_endpoints(self, *args, **kwargs):
return self.generic_action(scanner_http.compose_task, 'Discover endpoints', *args, **kwargs)
return self.generic_action(scanner_http.compose_discover_task, 'Discover endpoints', *args, **kwargs)
disover_endpoints.short_description = "🗺 Discover endpoints"
actions.append(disover_endpoints)
......@@ -316,10 +316,14 @@ class MyUrlAdminForm(forms.ModelForm):
if not organizations:
return
logger.error(self.cleaned_data)
# make sure the URL is not added if it is already alive and matched to the selected organization.
# except yourself of course...
# todo: expemt yourself, .exclude(pk=self.cleaned_data.get("pk"))
for organization in organizations:
if Url.objects.all().filter(
url=self.cleaned_data.get("url"), is_dead=False, organization=organization).count():
url=self.cleaned_data.get("url"), is_dead=False,
organization=organization).count() > 1:
# format_html = XSS :)
raise ValidationError(format_html(_(
......@@ -336,7 +340,7 @@ class MyUrlAdminForm(forms.ModelForm):
# This url already exists and the selected organization(s) have been added to it.
if Url.objects.all().filter(
url=self.data.get("url"), is_dead=False).count():
url=self.data.get("url"), is_dead=False).count() > 1:
# format_html = XSS :)
raise ValidationError(format_html(_(
......@@ -396,7 +400,7 @@ class UrlAdmin(ActionMixin, ImportExportModelAdmin, nested_admin.NestedModelAdmi
fieldsets = (
(None, {
'fields': ('url', 'organization', 'created_on', 'onboarded')
'fields': ('url', 'organization', 'created_on', 'onboarded', 'onboarding_stage')
}),
('DNS', {
'fields': ('uses_dns_wildcard', ),
......
# Generated by Django 2.0.7 on 2018-07-18 18:14
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('organizations', '0034_url_onboarding_stage'),
]
operations = [
migrations.AlterField(
model_name='url',
name='onboarding_stage',
field=models.CharField(blank=True, help_text='Because of complexity of onboarding, not working with Celery properly, onboarding is done in multiple steps. The last completed step is saved in this value. Empty: nothing. endpoints: endpoints have been found. completed: onboarding is done, also onboarded flag is set.', max_length=150, null=True),
),
]
......@@ -220,7 +220,7 @@ class Url(models.Model):
"something on this host.")
onboarding_stage = models.CharField(
max_length=10,
max_length=150,
blank=True,
null=True,
help_text="Because of complexity of onboarding, not working with Celery properly, onboarding is done in "
......@@ -259,7 +259,7 @@ class Url(models.Model):
if self.is_dead and (not self.is_dead_since or not self.is_dead_reason):
raise ValidationError(_('When telling this is dead, also enter the date and reason for it.'))
if Url.objects.all().filter(url=self.url, is_dead=False, not_resolvable=False).exists():
if Url.objects.all().filter(url=self.url, is_dead=False, not_resolvable=False).exclude(pk=self.pk).exists():
raise ValidationError(_('Url already exists, existing url is alive and resolvable.'))
# urls must be lowercase
......
......@@ -187,15 +187,3 @@ def develop_determineratings():
# add_organization_rating(organization, create_history=True)
# create one for NOW, not this night. This is a bug :)
# add_organization_rating(organization)
def test_can_connect_to_organization():
from failmap.scanners.scanner_http import can_connect, get_ips
organization = Organization.objects.filter(name="Zederik").get()
urls = Url.objects.all().filter(organization=organization)
for url in urls:
ipv4, ipv6 = get_ips(url.url)
if ipv4:
logger.info(can_connect("http", url, 80, ipv4))
if ipv6:
logger.info(can_connect("http", url, 80, ipv6))
import logging
from django.core.management.base import BaseCommand
import failmap.scanners.scanner_http as scanner_http
from failmap.organizations.models import Organization
from .support.arguments import add_organization_argument
logger = logging.getLogger(__package__)
# todo: add command line arguments: protocol amd url.
class Command(BaseCommand):
help = 'Discover http(s) endpoints on well known ports.'
def add_arguments(self, parser):
add_organization_argument(parser)
def handle(self, *args, **options):
if not options['organization'] or options['organization'][0] == "*":
scanner_http.discover_endpoints()
else:
organization = Organization.objects.all().filter(name=options['organization'][0])
scanner_http.discover_endpoints(organizations=[organization])
......@@ -55,11 +55,12 @@ def compose_task(
# you will see this happen per worker-size (so for example per 20 things)
if not url.onboarding_stage: # While developing: or url.onboarding_stage == "endpoint_discovery":
log.info("Exploring on: %s", url)
# Of course this will still fail as the bug aforementioned was not fixed. have to rewrite that.
tasks.append(update_stage.si(url, "endpoint_discovery")
| explore_tasks(url)
| update_stage.si(url, "endpoint_finished"))
elif url.onboarding_stage == "endpoint_finished":
elif url.onboarding_stage in ["endpoint_finished", "scans_running"]:
log.info("Scanning on: %s", url)
tasks.append(update_stage.si(url, "scans_running")
| scan_tasks(url)
......
......@@ -34,8 +34,9 @@ def compose_task(
) -> Task:
"""Compose taskset to scan specified endpoints.
*This is an implementation of `compose_task`. For more documentation about this concept, arguments and concrete
examples of usage refer to `compose_task` in `types.py`.*
*This is an implementation of `compose_discover_task`.
For more documentation about this concept, arguments and concrete
examples of usage refer to `compose_discover_task` in `types.py`.*
"""
......@@ -77,8 +78,9 @@ def compose_task(
# create tasks for scanning all selected endpoints as a single managable group
# Sending entire objects is possible. How signatures (.s and .si) work is documented:
# http://docs.celeryproject.org/en/latest/reference/celery.html#celery.signature
# Make the first task imutable, so it doesn't get any arguments of other scanners by accident
task = group(
scan_dummy.s(endpoint.uri_url()) | store_dummy.s(endpoint) for endpoint in endpoints
scan_dummy.si(endpoint.uri_url()) | store_dummy.s(endpoint) for endpoint in endpoints
)
return task
......
This diff is collapsed.
......@@ -12,7 +12,7 @@ from celery import Task, group
from failmap.organizations.models import Organization, Url
from failmap.scanners.endpoint_scan_manager import EndpointScanManager
from failmap.scanners.scanner_http import (redirects_to_safety, resolve_and_scan_tasks,
from failmap.scanners.scanner_http import (can_connect, connect_result, redirects_to_safety,
resolves_on_v4, resolves_on_v6)
from ..celery import app
......@@ -27,12 +27,6 @@ def compose_task(
urls_filter: dict = dict(),
endpoints_filter: dict = dict(),
) -> Task:
"""Compose taskset to scan specified endpoints.
*This is an implementation of `compose_task`. For more documentation about this concept, arguments and concrete
examples of usage refer to `compose_task` in `types.py`.*
"""
# We might not be allowed to scan for this at all.
if not allowed_to_scan("scanner_plain_http"):
......@@ -55,12 +49,12 @@ def compose_task(
return group()
# create tasks for scanning all selected endpoints as a single managable group
task = group(scan_url.s(url) for url in urls)
task = group(scan_url.si(url) for url in urls)
return task
# This needs to be refactored to move the Endpoint iteration to `compose_task`
# This needs to be refactored to move the Endpoint iteration to `compose_discover_task`
# and split this task up in a scan and store task so scans can be performed more
# distributed. For examples see scan_dummy.py
......@@ -142,7 +136,8 @@ def scan_url(url: Url):
log.debug("This url seems to have no https at all: %s" % url)
log.debug("Checking if they exist, to be sure there is nothing.")
tasks.append(resolve_and_scan_tasks('https', url, 443)
tasks.append(can_connect.si(protocol="https", url=url, port=443, ip_version=4)
| connect_result.s(protocol="https", url=url, port=443, ip_version=4)
| handle_verify_is_secure.si(http_v4_endpoint, url))
else:
......@@ -157,8 +152,9 @@ def scan_url(url: Url):
log.debug("Does not resolve at all, so has no insecure endpoints. %s" % url)
scan_manager.add_scan("plain_https", http_v6_endpoint, "0", not_resolvable_at_all)
else:
tasks.append(
(resolve_and_scan_tasks('https', url, 443) | handle_verify_is_secure.si(http_v6_endpoint, url)))
tasks.append(can_connect.si(protocol="https", url=url, port=443, ip_version=6)
| connect_result.s(protocol="https", url=url, port=443, ip_version=6)
| handle_verify_is_secure.si(http_v6_endpoint, url))
else:
log.debug("We don't have to do anything for v6 on %s" % url)
......
......@@ -85,7 +85,7 @@ def compose_task(
endpoints = list(set(no_screenshots))
log.info("Trying to make %s screenshots." % len(endpoints))
task = group(screenshot_endpoint.s(endpoint) for endpoint in endpoints)
task = group(screenshot_endpoint.si(endpoint) for endpoint in endpoints)
return task
......
......@@ -30,10 +30,6 @@ def compose_task(
endpoints_filter: dict = dict(),
) -> Task:
"""Compose taskset to scan specified endpoints.
*This is an implementation of `compose_task`. For more documentation about this concept, arguments and concrete
examples of usage refer to `compose_task` in `types.py`.*
"""
if not allowed_to_scan("scanner_security_headers"):
......@@ -67,10 +63,8 @@ def compose_task(
# create tasks for scanning all selected endpoints as a single managable group
task = group(
get_headers.signature(
(endpoint.uri_url(),),
options={'queue': IP_VERSION_QUEUE[endpoint.ip_version]}
) | analyze_headers.s(endpoint) for endpoint in endpoints
get_headers.signature((endpoint.uri_url()), queue=IP_VERSION_QUEUE[endpoint.ip_version], immutable=True)
| analyze_headers.s(endpoint) for endpoint in endpoints
)
return task
......
......@@ -146,13 +146,8 @@ def compose_task(
urls_filter: dict = dict(),
endpoints_filter: dict = dict(),
) -> Task:
"""Compose taskset to scan specified endpoints.
*This is an implementation of `compose_task`. For more documentation about this concept, arguments and concrete
examples of usage refer to `compose_task` in `types.py`.*
"""
# todo: permission check
if not allowed_to_scan("scanner_tls_qualys"):
return group()
......
......@@ -14,7 +14,7 @@ __all__ = [scanner_tls_qualys, scanner_security_headers, scanner_dummy, scanner_
# Lists to be used elsewhere when tasks need to be composed, these lists contain compose functions.
# Other code can iterate over these functions and call them, example: see onboard.py.
TLD_DEFAULT_EXPLORERS = []
DEFAULT_EXPLORERS = [scanner_http.compose_task, scanner_ftp.compose_discover_task]
DEFAULT_EXPLORERS = [scanner_http.compose_discover_task, scanner_ftp.compose_discover_task]
TLD_DEFAULT_CRAWLERS = [
scanner_dns.brute_known_subdomains_compose_task,
......
......@@ -14,8 +14,8 @@ def compose_task(
:param urls_filter: dict: limit urls to these filters, see below
:param endpoints_filter: dict: limit endpoints to these filters, see below
*This is an abstract of the `compose_task` function which is used throughout this codebase, search for
`compose_task` to find implementations which can be used as example.*
*This is an abstract of the `compose_discover_task` function which is used throughout this codebase, search for
`compose_discover_task` to find implementations which can be used as example.*
Composition of a task is building a task from primitives (task, group, chain) and other composed tasks in order
to create a 'collection' of work that as a whole can be scheduled for execution in the task processing system.
......@@ -90,7 +90,7 @@ def compose_task(
For example, to scan all urls/endpoints for one organization named 'example' run:
>>> task = compose_task(organizations={'name__iexact': 'example'})
>>> task = compose_discover_task(organizations={'name__iexact': 'example'})
>>> result = task.apply_async()
>>> print(result.get())
......@@ -98,7 +98,7 @@ def compose_task(
Multiple filters can be applied, to scan only port 80 for organizations added today run:
>>> task = compose_task(
>>> task = compose_discover_task(
... organizations={'date_added__day': datetime.datetime.today().day},
... endpoints={'port': 80}
... )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment