Verified Commit 4bc80e96 authored by Elger Jonker's avatar Elger Jonker

could be multiple returning from urls.filter

parent 1c1ca2b2
......@@ -330,8 +330,9 @@ class UrlSubmissionAdmin(ImportExportModelAdmin, admin.ModelAdmin):
# it's possible that the url already is in the system. If so, tie that to the submitted organization.
# could be dead etc... (stacking?)
url = Url.objects.all().get(url=urlsubmission.url)
url = Url.objects.all().filter(url=urlsubmission.url, is_dead=False).first()
except Url.DoesNotExist:
# if it already exists, then add the url to the organization.
url = Url(url=urlsubmission.url)
......@@ -392,11 +393,11 @@ class OrganizationSubmissionAdmin(ImportExportModelAdmin, admin.ModelAdmin):
# this might revive some old organizations, so domain knowledge is required.
# In this case the organization already exists with the same name, type and alive.
# this means we don't need to add a new one, or with new coordinates.
except Organization.DoesNotExist:
# Create a new one
# address and evidence are saved elsewhere. Since we have a reference we can auto-update after
......@@ -217,7 +217,6 @@ class UrlSubmissionForm(forms.Form):
team = None
def __init__(self, *args, **kwargs):
self.contest = kwargs.pop('contest', None) = kwargs.pop('team', None)
......@@ -232,16 +231,18 @@ class UrlSubmissionForm(forms.Form):
forward=['organization_type_name', 'country']
"<li>If you can't find the organization, try the abbreviated name.</li>"
"<li>You can also search for organization type, and it's name at the same time.</li>"
"<li>A list of all approved organizations is shown <a href='/game/submitted_organizations/'>"
"<li>If your newly added organization is missing, please ask the competition host to verify your "
"<li>Urls entered below will be added to all organizations selected here.</li>"
<li>If you can't find the organization, try the abbreviated name.</li>
<li>You can also search for organization type, and it's name at the same time.</li>
<li>A list of all approved organizations is shown <a href='/game/submitted_organizations/'>
<li>If your newly added organization is missing, please ask the competition host to verify your
<li>Urls entered below will be added to all organizations selected here.</li>
# try and inject values into the tagswidget
......@@ -253,7 +254,6 @@ class UrlSubmissionForm(forms.Form):
initial = valid
choices = []
for site in valid:
# log.debug("Valid; %s" % site)
choices.append((site, site))
# can't add initial here, results in infinite loop
# log.debug("things where submitted: %s" % valid)
......@@ -268,7 +268,6 @@ class UrlSubmissionForm(forms.Form):
# this overrides some of the implied validation that happens in MultipleChoiceField, which doesn't
# match the sites that are submitted, as they are filterd (seen above)
# This is a terrible hack, which is what you get when the complexity for the control is so insanely high.
if valid: = True
# have to add multiple... one each. A MultiValueDict...
......@@ -292,8 +291,11 @@ class UrlSubmissionForm(forms.Form):
<li>Subdomains are removed. The system will search for subdomains by itself.</li>
<li>Protocols such as https:// and http:// are removed.</li>
<li>The following is all the same url (,,,,
<li>Subdomains and protocols are removed: the system will discover these.</li>
<li>Each address will be resolved to see if it exists. This can take a while.</li>
<li>You can enter multiple sites at once using comma or space as a delimiter.
For example: The value
......@@ -301,10 +303,6 @@ class UrlSubmissionForm(forms.Form):
<li>The url will be added to all organizations selected above, be careful.</li>
<li>It's not possible to enter IP addresses: the IP's behind services/organizations often change.</li>
<li>Urls that don't resolve or are in incorrect format will be automatically removed.</li>
<li>The following is all the same url (,,,,
......@@ -334,7 +332,6 @@ class UrlSubmissionForm(forms.Form):
def filter_websites(sites):
incomplete = []
not_resolvable = []
valid = []
......@@ -361,7 +358,6 @@ class UrlSubmissionForm(forms.Form):
return incomplete, not_resolvable, valid
def clean_websites(self):
sites ='websites', [])
incomplete, not_resolvable, valid = self.filter_websites(sites)
......@@ -390,7 +386,6 @@ class UrlSubmissionForm(forms.Form):
return valid
def clean_for_organization(self):
if not self.contest:
raise ValidationError('You\'re not in a contest', 'no_contest')
......@@ -403,7 +398,6 @@ class UrlSubmissionForm(forms.Form):
existing = []
log.debug('organizations: %s', organizations)
for organization in organizations:
if not Organization.objects.filter(pk=organization,
......@@ -419,7 +413,6 @@ class UrlSubmissionForm(forms.Form):
return existing
def clean(self):
organizations ='for_organization', [])
except AttributeError:
......@@ -462,7 +455,6 @@ class UrlSubmissionForm(forms.Form):
def save(self):
# validate again to prevent duplicates within the transaction
# we can also check if the data is not in the db yet, which is nicer as it potentially saves a lot of time
......@@ -471,10 +463,6 @@ class UrlSubmissionForm(forms.Form):
organizations = self.cleaned_data.get('for_organization', None)
websites = self.cleaned_data.get('websites', None)
log.debug('adding new')
log.debug('organizations: %s', organizations)
log.debug('websites: %s', websites)
for organization in organizations:
for website in websites:
......@@ -287,22 +287,31 @@ def store_new(feature: Dict, country: str = "NL", organization_type: str = "muni
# via this way uppercase urls entered the system. ALl urls are lowercase.
website = website.lower()
extract = tldextract.extract(website)
if extract.subdomain:
url = Url(url="%s.%s.%s" % (extract.subdomain, extract.domain, extract.suffix))
url.organization.add(new_organization)"Also found a subdomain website for this organization: %s" % website)
add_url(url="%s.%s.%s" % (extract.subdomain, extract.domain, extract.suffix), organization=new_organization)
# Even if it doesn't resolve directly, it is helpful for some scans:
url = Url(url="%s.%s" % (extract.domain, extract.suffix))
url.organization.add(new_organization)"Also found a top level website for this organization: %s" % website)
add_url(url="%s.%s" % (extract.domain, extract.suffix), organization=new_organization)
def add_url(organization, url):
# only add the url if it's not existing, otherwise, add the existing url.
# first, because get crashes hard if there are data inconsistencies.
existing_url = Url.objects.all().filter(url=url, is_dead=False).first()
if not existing_url:"Added new url to this organization: %s" % url)
existing_url = Url(url=url)
else:"Added existing url to this organization: %s" % url)
def store_updates(feature: Dict, country: str = "NL", organization_type: str = "municipality", when=None):
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment