django cleanup, 1.11.8, lowering command complexity

parent cc17f722
......@@ -30,3 +30,4 @@ dev_db/
failmap_debug_dataset*
temp/*
*.out
*.rdb
......@@ -151,15 +151,15 @@ The setup script performs the following steps:
# Scanning services (beta)
Todo: add celery beat information
Some scanners require redis to be installed. We're currently in transition from running scanners
manually to supporting both manual scans and redis.
Some scanners require RabbitMQ to be installed. We're currently in transition from running scanners
manually to supporting both manual scans and celery beat.
Read more about installing redis, [here](https://redis.io/topics/quickstart)
Each of the below commands requires their own command line window:
# start rabbitmq
rabbitmq-server
# start redis
redis-server
# start a worker
failmap-admin celery worker -ldebug
......
import logging
from django.core.management.base import BaseCommand
import failmap_admin.scanners.scanner_http as scanner_http
from failmap_admin.organizations.models import Organization
from .support.arguments import add_organization_argument
logger = logging.getLogger(__package__)
# todo: add command line arguments: protocol amd url.
class Command(BaseCommand):
help = 'Discover http(s) endpoints on well known ports.'
def add_arguments(self, parser):
add_organization_argument(parser)
def handle(self, *args, **options):
if not options['organization'] or options['organization'][0] == "*":
scanner_http.discover_endpoints()
else:
organization = Organization.objects.all().filter(name=options['organization'][0])
scanner_http.discover_endpoints(organizations=[organization])
import logging
from django.core.management.base import BaseCommand
from failmap_admin.organizations.models import Organization, Url
from failmap_admin.scanners.models import Endpoint
from failmap_admin.scanners.scanner_http import scan_url, scan_urls
from .support.arguments import add_organization_argument
logger = logging.getLogger(__package__)
# todo: add command line arguments: port and protocol.
# Verify that all endpoints we currently have still exist:
# failmap-admin discover-endpoints-http-https --method=verify
# try to find open ports
# failmap-admin discover-endpoints-http-https --method=discover
class Command(BaseCommand):
help = 'Discover http(s) endpoints on well known ports.'
def add_arguments(self, parser):
add_organization_argument(parser)
return parser.add_argument(
'--method', '-m',
help="verify|discover. Verify checks all existing ones, discover tries to find new ones.",
nargs='?',
required=False,
default="verify",
)
def handle(self, *args, **options):
if options['method'] == "verify":
if not options['organization'] or options['organization'][0] == "*":
verify_existing_endpoints()
else:
organization = Organization.objects.all().filter(name=options['organization'][0])
verify_existing_endpoints(organization=organization)
if options['method'] == "discover":
if not options['organization'] or options['organization'][0] == "*":
discover_endpoints()
else:
organization = Organization.objects.all().filter(name=options['organization'][0])
discover_endpoints(organization=organization)
def verify_existing_endpoints(port=None, protocol=None, organization=None):
"""
Checks all http(s) endpoints if they still exist. This is to monitor changes in the existing
dataset, without contacting an organization too often. It can be checked every few days,
as trying to find new endpoints is more involved and should not be run more than once every
two to four weeks.
The only result this scanner has is the same or less endpoints than we currently have.
:return: None
"""
endpoints = Endpoint.objects.all().filter(is_dead=False,
url__not_resolvable=False,
url__is_dead=False)
if port:
endpoints = endpoints.filter(port=port)
if protocol:
endpoints = endpoints.filter(protocol=protocol)
else:
endpoints = endpoints.filter(protocol__in=['http', 'https'])
if organization:
endpoints = endpoints.filter(url__organization=organization)
for endpoint in endpoints:
scan_url(endpoint.protocol, endpoint.url, endpoint.port)
def discover_endpoints(port=None, protocol=None, organization=None):
"""
:return: None
"""
urls = Url.objects.all().filter(is_dead=False, not_resolvable=False).filter()
if organization:
urls = urls.filter(organization=organization)
if protocol:
protocols = [protocol]
else:
protocols = ['http', 'https']
if port:
ports = [port]
else:
# Yes, HTTP sites on port 443 exist, we've seen many of them. Not just warnings(!).
# Don't underestimate the flexibility of the internet.
ports = [80, 81, 82, 88, 443, 8008, 8080, 8088, 8443, 8888, 9443]
logger.debug("Going to scan %s urls." % urls.count())
scan_urls(protocols, urls, ports)
......@@ -5,10 +5,11 @@ from time import sleep
import pytz
from django.core.management.base import BaseCommand
import failmap_admin.scanners.scanner_http as scanner_http
import failmap_admin.scanners.scanner_plain_http as scanner_plain_http
from failmap_admin.organizations.models import Url
from failmap_admin.scanners.scanner_dns import brute_known_subdomains, certificate_transparency_scan
from failmap_admin.scanners.scanner_http import scan_urls_on_standard_ports
from failmap_admin.scanners.scanner_plain_http import scan_url
from failmap_admin.scanners.scanner_dns import (brute_known_subdomains,
certificate_transparency_scan, nsec_scan)
from failmap_admin.scanners.scanner_screenshot import screenshot_urls
logger = logging.getLogger(__package__)
......@@ -18,33 +19,37 @@ class Command(BaseCommand):
help = 'Automatically performs initial scans and tests on new urls.'
def handle(self, *args, **options):
try:
logger.info("Started onboarding.")
while True:
onboard()
logger.info("Waiting for more urls to be onboarded. Sleeping for 60 seconds.")
sleep(60)
except KeyboardInterrupt:
logger.info("Onboarding interrupted.")
do_continue = input("Do you wish to quit? Y/n")
if "n" in do_continue or "N" in do_continue:
self.handle()
else:
logger.info("Stopped onboarding.")
# todo: make scan log, so you can see what has been scanned, and what completed.
runservice()
def runservice():
try:
logger.info("Started onboarding.")
while True:
onboard()
logger.info("Waiting for urls to be onboarded. Sleeping for 60 seconds.")
sleep(60)
except KeyboardInterrupt:
logger.info("Onboarding interrupted.")
do_continue = input("Do you wish to quit? Y/n")
if "n" in do_continue or "N" in do_continue:
runservice()
else:
logger.info("Stopped onboarding.")
def onboard():
urls = onboard_gather()
urls = gather()
for url in urls:
# scan for http/https endpoints
if url.is_top_level():
# some DNS scans, to find more urls to onboard.
brute_known_subdomains([url])
certificate_transparency_scan([url])
scan_urls_on_standard_ports([url])
scan_url(url)
nsec_scan([url])
scanner_http.discover_endpoints(urls=[url])
scanner_plain_http.scan_urls([url])
screenshot_urls([url])
# tls scans are picked up by scanner_tls_qualys and may take a while.
# other scans the same. They will do the ratings.
......@@ -54,16 +59,7 @@ def onboard():
url.save()
def onboard_existing_urls():
"""A quick fix for an existing database."""
urls = Url.objects.all()
for url in urls:
url.onboarded = True
url.onboarded_on = datetime.now(pytz.utc)
url.save()
def onboard_gather():
def gather():
never_onboarded = Url.objects.all().filter(onboarded=False)
......
import logging
from django.core.management.base import BaseCommand
import failmap_admin.scanners.scanner_http as scanner_http
from failmap_admin.organizations.models import Organization
from .support.arguments import add_organization_argument
logger = logging.getLogger(__package__)
class Command(BaseCommand):
help = 'Verify known endpoints.'
def add_arguments(self, parser):
add_organization_argument(parser)
def handle(self, *args, **options):
if not options['organization'] or options['organization'][0] == "*":
scanner_http.verify_endpoints()
else:
organization = Organization.objects.all().filter(name=options['organization'][0])
scanner_http.verify_endpoints(organizations=[organization])
......@@ -8,6 +8,7 @@ Performs a range of DNS scans:
It separates the scans as it might be desirable to use different scanners.
Todo: the list of known subdomains might help (a lot) with breaking nsec3 hashes?
https://github.com/anonion0/nsec3map
"""
# todo: if ScannerHttp.has_internet_connection():
......
......@@ -35,7 +35,7 @@ from requests import ConnectTimeout, HTTPError, ReadTimeout, Timeout
from requests.exceptions import ConnectionError
from failmap_admin.celery import app
from failmap_admin.organizations.models import Url
from failmap_admin.organizations.models import Organization, Url
from failmap_admin.scanners.models import Endpoint, UrlIp
from .timeout import timeout
......@@ -55,8 +55,72 @@ def validate_protocol(protocol: str):
raise ValueError("Invalid protocol %s, options are: http, https" % protocol)
def verify_endpoints(urls: List[Url]=None, port: int=None, protocol: str=None, organizations: List[Organization]=None):
"""
Checks all http(s) endpoints if they still exist. This is to monitor changes in the existing
dataset, without contacting an organization too often. It can be checked every few days,
as trying to find new endpoints is more involved and should not be run more than once every
two to four weeks.
The only result this scanner has is the same or less endpoints than we currently have.
:return: None
"""
if not urls:
endpoints = Endpoint.objects.all().filter(is_dead=False,
url__not_resolvable=False,
url__is_dead=False)
else:
endpoints = Endpoint.objects.all().filter(is_dead=False,
url__not_resolvable=False,
url__is_dead=False,
url__in=urls)
if port:
endpoints = endpoints.filter(port=port)
if protocol:
endpoints = endpoints.filter(protocol=protocol)
else:
endpoints = endpoints.filter(protocol__in=['http', 'https'])
if organizations:
endpoints = endpoints.filter(url__organization__in=organizations)
for endpoint in endpoints:
scan_url(endpoint.protocol, endpoint.url, endpoint.port)
def discover_endpoints(urls: List[Url]=None, port: int=None, protocol: str=None,
organizations: List[Organization]=None):
"""
:return: None
"""
if not urls:
urls = Url.objects.all().filter(is_dead=False, not_resolvable=False)
if organizations:
urls = urls.filter(organization__in=organizations)
if protocol:
protocols = [protocol]
else:
protocols = ['http', 'https']
if port:
ports = [port]
else:
# Yes, HTTP sites on port 443 exist, we've seen many of them. Not just warnings(!).
# Don't underestimate the flexibility of the internet.
ports = [80, 443, 8008, 8080, 8088, 8443, 8888]
scan_urls(protocols, urls, ports)
def scan_urls_on_standard_ports(urls: List[Url]):
scan_urls(['http', 'https'], urls, [80, 81, 82, 88, 443, 8008, 8080, 8088, 8443, 8888, 9443])
scan_urls(['http', 'https'], urls, [80, 443, 8008, 8080, 8088, 8443, 8888])
def scan_urls(protocols: List[str], urls: List[Url], ports: List[int]):
......@@ -181,7 +245,7 @@ def can_connect(protocol: str, url: Url, port: int, ip: str):
try:
"""
5 seconds network timeout, 8 seconds timeout for server response.
30 seconds network timeout, 30 seconds timeout for server response.
If we get a redirect, it means there is a server. Don't follow.
Any status code is enough to verify that there is an endpoint.
......@@ -189,7 +253,7 @@ def can_connect(protocol: str, url: Url, port: int, ip: str):
https://stackoverflow.com/questions/43156023/what-is-http-host-header#43156094
"""
r = requests.get(uri, timeout=(5, 8), allow_redirects=False, headers={'Host': url.url})
r = requests.get(uri, timeout=(30, 30), allow_redirects=False, headers={'Host': url.url})
if r.status_code:
logger.debug("%s: Host: %s Status: %s" % (uri, url.url, r.status_code))
return True
......
# Django jet is blocking migration to 2.0
# error: from django.views.i18n import javascript_catalog
# ImportError: cannot import name 'javascript_catalog'
django==1.11
django==1.11.8
django-jet
django-countries
django-jsonfield
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment