Commit 3f0f7cb4 authored by Elger Jonker's avatar Elger Jonker

could be multiple returning from urls.filter


Former-commit-id: 4bc80e96
parent 782733e6
......@@ -330,8 +330,9 @@ class UrlSubmissionAdmin(ImportExportModelAdmin, admin.ModelAdmin):
continue
# it's possible that the url already is in the system. If so, tie that to the submitted organization.
# could be dead etc... (stacking?)
try:
url = Url.objects.all().get(url=urlsubmission.url)
url = Url.objects.all().filter(url=urlsubmission.url, is_dead=False).first()
except Url.DoesNotExist:
# if it already exists, then add the url to the organization.
url = Url(url=urlsubmission.url)
......@@ -392,11 +393,11 @@ class OrganizationSubmissionAdmin(ImportExportModelAdmin, admin.ModelAdmin):
# this might revive some old organizations, so domain knowledge is required.
# In this case the organization already exists with the same name, type and alive.
# this means we don't need to add a new one, or with new coordinates.
Organization.objects.all().get(
Organization.objects.all().filter(
name=osm.organization_name,
country=osm.organization_country,
is_dead=False,
type=OrganizationType.objects.get(name=osm.organization_type_name))
type=OrganizationType.objects.get(name=osm.organization_type_name)).first()
except Organization.DoesNotExist:
# Create a new one
# address and evidence are saved elsewhere. Since we have a reference we can auto-update after
......
......@@ -217,7 +217,6 @@ class UrlSubmissionForm(forms.Form):
team = None
def __init__(self, *args, **kwargs):
log.debug("__init__")
self.contest = kwargs.pop('contest', None)
self.team = kwargs.pop('team', None)
......@@ -232,16 +231,18 @@ class UrlSubmissionForm(forms.Form):
url='/game/autocomplete/organization-autocomplete/',
forward=['organization_type_name', 'country']
),
help_text="Hints:"
"<ul>"
"<li>If you can't find the organization, try the abbreviated name.</li>"
"<li>You can also search for organization type, and it's name at the same time.</li>"
"<li>A list of all approved organizations is shown <a href='/game/submitted_organizations/'>"
"here</a></li>"
"<li>If your newly added organization is missing, please ask the competition host to verify your "
"organization.</li>"
"<li>Urls entered below will be added to all organizations selected here.</li>"
"</ul>"
help_text="""
Hints:"
<ul>
<li>If you can't find the organization, try the abbreviated name.</li>
<li>You can also search for organization type, and it's name at the same time.</li>
<li>A list of all approved organizations is shown <a href='/game/submitted_organizations/'>
here</a></li>
<li>If your newly added organization is missing, please ask the competition host to verify your
organization.</li>
<li>Urls entered below will be added to all organizations selected here.</li>
</ul>
"""
)
# try and inject values into the tagswidget
......@@ -253,7 +254,6 @@ class UrlSubmissionForm(forms.Form):
initial = valid
choices = []
for site in valid:
# log.debug("Valid; %s" % site)
choices.append((site, site))
# can't add initial here, results in infinite loop
# log.debug("things where submitted: %s" % valid)
......@@ -268,7 +268,6 @@ class UrlSubmissionForm(forms.Form):
# this overrides some of the implied validation that happens in MultipleChoiceField, which doesn't
# match the sites that are submitted, as they are filterd (seen above)
# This is a terrible hack, which is what you get when the complexity for the control is so insanely high.
log.debug(self.data)
if valid:
self.data._mutable = True
# have to add multiple... one each. A MultiValueDict...
......@@ -292,8 +291,11 @@ class UrlSubmissionForm(forms.Form):
help_text="""
Hints:
<ul>
<li>Subdomains are removed. The system will search for subdomains by itself.</li>
<li>Protocols such as https:// and http:// are removed.</li>
<li>The following is all the same url (google.com):
https://google.com, https://www.google.com, http://nonsense.google.com, bla.nonsense.google.com,
google.com
</li>
<li>Subdomains and protocols are removed: the system will discover these.</li>
<li>Each address will be resolved to see if it exists. This can take a while.</li>
<li>You can enter multiple sites at once using comma or space as a delimiter.
For example: The value
......@@ -301,10 +303,6 @@ class UrlSubmissionForm(forms.Form):
<li>The url will be added to all organizations selected above, be careful.</li>
<li>It's not possible to enter IP addresses: the IP's behind services/organizations often change.</li>
<li>Urls that don't resolve or are in incorrect format will be automatically removed.</li>
<li>The following is all the same url (google.com):
https://google.com, https://www.google.com, http://nonsense.google.com, bla.nonsense.google.com,
google.com
</li>
</ul>
"""
)
......@@ -334,7 +332,6 @@ class UrlSubmissionForm(forms.Form):
@staticmethod
def filter_websites(sites):
log.debug("filter_websites")
incomplete = []
not_resolvable = []
valid = []
......@@ -361,7 +358,6 @@ class UrlSubmissionForm(forms.Form):
return incomplete, not_resolvable, valid
def clean_websites(self):
log.debug("clean_websites")
try:
sites = self.data.getlist('websites', [])
incomplete, not_resolvable, valid = self.filter_websites(sites)
......@@ -390,7 +386,6 @@ class UrlSubmissionForm(forms.Form):
return valid
def clean_for_organization(self):
log.debug("clean_for_organization")
if not self.contest:
raise ValidationError('You\'re not in a contest', 'no_contest')
......@@ -403,7 +398,6 @@ class UrlSubmissionForm(forms.Form):
existing = []
log.debug('organizations: %s', organizations)
for organization in organizations:
if not Organization.objects.filter(pk=organization,
......@@ -419,7 +413,6 @@ class UrlSubmissionForm(forms.Form):
return existing
def clean(self):
log.debug("clean")
try:
organizations = self.data.getlist('for_organization', [])
except AttributeError:
......@@ -462,7 +455,6 @@ class UrlSubmissionForm(forms.Form):
@transaction.atomic
def save(self):
log.debug("save")
# validate again to prevent duplicates within the transaction
# we can also check if the data is not in the db yet, which is nicer as it potentially saves a lot of time
......@@ -471,10 +463,6 @@ class UrlSubmissionForm(forms.Form):
organizations = self.cleaned_data.get('for_organization', None)
websites = self.cleaned_data.get('websites', None)
log.debug('adding new')
log.debug('organizations: %s', organizations)
log.debug('websites: %s', websites)
for organization in organizations:
for website in websites:
......
......@@ -287,22 +287,31 @@ def store_new(feature: Dict, country: str = "NL", organization_type: str = "muni
# via this way uppercase urls entered the system. ALl urls are lowercase.
website = website.lower()
extract = tldextract.extract(website)
if extract.subdomain:
url = Url(url="%s.%s.%s" % (extract.subdomain, extract.domain, extract.suffix))
url.save()
url.organization.add(new_organization)
url.save()
log.info("Also found a subdomain website for this organization: %s" % website)
add_url(url="%s.%s.%s" % (extract.subdomain, extract.domain, extract.suffix), organization=new_organization)
# Even if it doesn't resolve directly, it is helpful for some scans:
url = Url(url="%s.%s" % (extract.domain, extract.suffix))
url.save()
url.organization.add(new_organization)
url.save()
log.info("Also found a top level website for this organization: %s" % website)
add_url(url="%s.%s" % (extract.domain, extract.suffix), organization=new_organization)
def add_url(organization, url):
# only add the url if it's not existing, otherwise, add the existing url.
# first, because get crashes hard if there are data inconsistencies.
existing_url = Url.objects.all().filter(url=url, is_dead=False).first()
if not existing_url:
log.info("Added new url to this organization: %s" % url)
existing_url = Url(url=url)
existing_url.save()
existing_url.organization.add(organization)
existing_url.save()
else:
log.info("Added existing url to this organization: %s" % url)
existing_url.organization.add(organization)
existing_url.save()
def store_updates(feature: Dict, country: str = "NL", organization_type: str = "municipality", when=None):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment