...
 
Commits (2)
# Najbitniji linkovi
* [Registar stambenih zajednica](https://data.gov.rs/sr/datasets/registar-stambenikh-zajednitsa-2/)
* [Analiza rezultata u Jupyeter-u](https://notebooks.azure.com/brankokokanovic/projects/stambene-zajednice/html/StambeneZajednice.ipynb)
* [Analiza rezultata u Jupyter-u](https://notebooks.azure.com/brankokokanovic/projects/stambene-zajednice/html/StambeneZajednice.ipynb)
* [Thread na OSM forumu](https://forum.openstreetmap.org/viewtopic.php?id=66945)
* [Tabela (avgust 2019.) sa spojenim podacima (PAŽNJA: veličina je 25MB!)](https://stambenezajednice.z6.web.core.windows.net/avgust2019.html)
* [CSV (avgust 2019.) sa spojenim podacima](https://stambenezajednice.z6.web.core.windows.net/avgust2019.csv)
......
geopy==1.20.0
overpy==0.4
Jinja2==2.8.1
osmium==2.15.2
\ No newline at end of file
osmium==2.15.2
plotly==4.1.1
\ No newline at end of file
from typing import List
import os
import csv
from operator import itemgetter
def read_input_csv(csvfile: str)->List:
"""
Reads input CSV, as given by RGZ (Republicki Geodetski Zavod).
Can be found somewhere over on https://data.gov.rs/sr/datasets/registar-stambenikh-zajednitsa-2/
:param csvfile: Input CSV file
:return: Read array
"""
stambene_zajednice = []
if not os.path.isfile('registar-stambenih-zajednica.csv'):
raise Exception('Missing registar-stambenih-zajednica.csv, download it from https://data.gov.rs/sr/')
with open(csvfile) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
stambene_zajednice.append(
{'district': row['OkrugNaziv1'], 'municipality': row['OpstinaNaziv1'],
'street': row['Ulica'], 'number': row['KucniBroj'],
'processed': False})
return stambene_zajednice
def read_output_csv(input_file: str, filter_district: str = None, filter_municipality: str = None,
max_row_to_read: int = -1) -> List:
"""
Reads output CSV, and normalizes data. Output CSV is output of main.py file, which contains results of analysis.
It is usually called output.csv or result.csv
:param csvfile: CSV file to read
:return: List of parsed and normalized data from CSV
"""
stambene_zajednice = []
with open(input_file) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
if len(stambene_zajednice) > max_row_to_read > 0:
break
if filter_district is not None and row['district'] != filter_district:
continue
if filter_municipality is not None and row['municipality'] != filter_municipality:
continue
stambene_zajednice.append(row)
for sz in stambene_zajednice:
sz['street'] = sz['street'].replace('"', '\\"')
sz['node'] = [] if len(sz['node']) <= 2 else [int(i) for i in sz['node'][1:-1].split(',')]
sz['way'] = [] if len(sz['way']) <= 2 else [int(i) for i in sz['way'][1:-1].split(',')]
sz['relation'] = [] if len(sz['relation']) <= 2 else [int(i) for i in sz['relation'][1:-1].split(',')]
sz['found'] = len(sz['node']) > 0 or len(sz['way']) > 0 or len(sz['relation']) > 0
sz['multiple_entities_same_housenumber'] = sz['multiple_entities_same_housenumber'] or False
sz['building_tag_present'] = sz['building_tag_present'] or False
sz['building_is_apartments'] = sz['building_is_apartments'] or False
stambene_zajednice.sort(key=itemgetter('district', 'municipality', 'street', 'number'))
return stambene_zajednice
......@@ -6,6 +6,8 @@ from operator import itemgetter
from jinja2 import Environment, PackageLoader
from common import read_output_csv
def main():
parser = argparse.ArgumentParser(
......@@ -22,35 +24,15 @@ def main():
env = Environment(loader=PackageLoader('__main__', '../templates'))
template = env.get_template('index_template.html')
stambene_zajednice = []
districts = {'': 0}
max_district_id = 0
with open(input_file) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
#if len(stambene_zajednice) > 100:
# break
if filter_district is not None and row['district'] != filter_district:
continue
if filter_municipality is not None and row['municipality'] != filter_municipality:
continue
stambene_zajednice.append(row)
stambene_zajednice = read_output_csv(input_file, filter_district, filter_municipality)
for sz in stambene_zajednice:
if sz['district'] not in districts:
max_district_id += 1
districts[sz['district']] = max_district_id
sz['district_id'] = districts[sz['district']]
sz['street'] = sz['street'].replace('"', '\\"')
sz['node'] = [] if len(sz['node']) <= 2 else [int(i) for i in sz['node'][1:-1].split(',')]
sz['way'] = [] if len(sz['way']) <= 2 else [int(i) for i in sz['way'][1:-1].split(',')]
sz['relation'] = [] if len(sz['relation']) <= 2 else [int(i) for i in sz['relation'][1:-1].split(',')]
sz['found'] = len(sz['node']) > 0 or len(sz['way']) > 0 or len(sz['relation']) > 0
sz['multiple_entities_same_housenumber'] = sz['multiple_entities_same_housenumber'] or False
sz['building_tag_present'] = sz['building_tag_present'] or False
sz['building_is_apartments'] = sz['building_is_apartments'] or False
stambene_zajednice.sort(key=itemgetter('district', 'municipality', 'street', 'number'))
output = template.render(stambene_zajednice=stambene_zajednice, districts=districts)
with open('index.html', 'w', encoding='utf-8') as fh:
fh.write(output)
......
......@@ -18,6 +18,8 @@ from geopy.exc import GeocoderServiceError
from geopy.geocoders import Nominatim
from transliteration import cyr2lat
from common import read_input_csv, read_output_csv
api = overpy.Overpass(url='http://overpass.openstreetmap.fr/api/interpreter')
geolocator = Nominatim(user_agent="https://gitlab.com/stalker314314/stambene-zajednice-analysis")
......@@ -275,21 +277,6 @@ out;
return result
def read_stambene_zajednice_csv()->List:
stambene_zajednice = []
if not os.path.isfile('registar-stambenih-zajednica.csv'):
raise Exception('Missing registar-stambenih-zajednica.csv, download it from https://data.gov.rs/sr/')
with open('registar-stambenih-zajednica.csv') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
stambene_zajednice.append(
{'district': row['OkrugNaziv1'], 'municipality': row['OpstinaNaziv1'],
'street': row['Ulica'], 'number': row['KucniBroj'],
'processed': False})
return stambene_zajednice
def process_stambena_zajednica_with_overpass(sz: dict):
"""
Deprecated. Sporo i netacno
......@@ -510,28 +497,27 @@ def main():
if recover_progress_from_csv and not os.path.isfile(progress_file) and os.path.isfile(output_file):
# This is case where once I accidentally deleted progress.pickle and had to regenerate state
# from output_file (result.csv), so I kept the code, but this path should not be used.
stambene_zajednice = read_stambene_zajednice_csv()
with open(output_file) as csvfile:
reader = csv.DictReader(csvfile)
i = 0
for row in reader:
logger.info('Processing row {0}'.format(i))
i += 1
if row['processed'] != 'True':
continue
for sz in stambene_zajednice:
if sz['district'] == row['district'] and\
sz['municipality'] == row['municipality'] and sz['street'] == row['street'] and\
sz['number'] == row['number']:
sz['processed'] = True
sz['node'] = [] if len(row['node']) <= 2 else [int(i) for i in row['node'][1:-1].split(',')]
sz['way'] = [] if len(row['way']) <= 2 else [int(i) for i in row['way'][1:-1].split(',')]
sz['relation'] = [] if len(row['relation']) <= 2 else\
[int(i) for i in row['relation'][1:-1].split(',')]
sz['multiple_entities_same_housenumber'] = row['multiple_entities_same_housenumber'] == 'True'
sz['building_tag_present'] = row['building_tag_present'] == 'True'
sz['building_is_apartments'] = row['building_is_apartments'] == 'True'
break
stambene_zajednice = read_input_csv('registar-stambenih-zajednica.csv')
stambene_zajednice_output = read_output_csv(output_file)
i = 0
for sz_output in stambene_zajednice_output:
logger.info('Processing row {0}'.format(i))
i += 1
if not sz_output['processed']:
continue
for sz in stambene_zajednice:
if sz['district'] == sz_output['district'] and \
sz['municipality'] == sz_output['municipality'] and \
sz['street'] == sz_output['street'] and \
sz['number'] == sz_output['number']:
sz['processed'] = sz_output['processed']
sz['node'] = sz_output['node']
sz['way'] = sz_output['way']
sz['relation'] = sz_output['relation']
sz['multiple_entities_same_housenumber'] = sz_output['multiple_entities_same_housenumber']
sz['building_tag_present'] = sz_output['building_tag_present']
sz['building_is_apartments'] = sz_output['building_is_apartments']
break
with open(progress_file, 'wb') as h:
pickle.dump(stambene_zajednice, h, protocol=pickle.DEFAULT_PROTOCOL)
sleep(5)
......@@ -540,7 +526,7 @@ def main():
with open(progress_file, 'rb') as h:
stambene_zajednice = pickle.load(h)
else:
stambene_zajednice = read_stambene_zajednice_csv()
stambene_zajednice = read_input_csv('registar-stambenih-zajednica.csv')
processed = 0
random.shuffle(stambene_zajednice) # shuffle to easily detect error from whole CSV
......
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from plotly import graph_objects as go
def has_elements(text):
return len(np.fromstring(text[1:-1], sep=',', dtype=int)) > 0
def get_values(csvfile, belgrade_only=False, format_v2=False):
df = pd.read_csv(csvfile)
if format_v2:
has_elements_v = np.vectorize(has_elements)
df['node'] = df['node'].apply(has_elements_v)
df['way'] = df['way'].apply(has_elements_v)
df['relation'] = df['relation'].apply(has_elements_v)
processed = df[df.processed == True]
if belgrade_only:
processed = processed[processed.district == 'ГРАД БЕОГРАД']
if format_v2:
processed = processed.assign(entity_found=(processed.node | processed.way | processed.relation))
else:
processed = processed.assign(entity_found=((~processed.node.isnull()) | (~processed.way.isnull())))
entity_found = processed[processed.entity_found == True]
if format_v2:
have_way = entity_found[entity_found.way == True]
else:
have_way = entity_found[(~entity_found.way.isnull())]
is_way_building = have_way[have_way.building_tag_present == True]
is_building_apartments = is_way_building[is_way_building.building_is_apartments == True]
return [len(processed), len(entity_found), len(have_way), len(is_way_building), len(is_building_apartments)]
def main():
# avgust-result.csv can be found at https://stambenezajednice.z6.web.core.windows.net/avgust2019.csv
x = get_values('avgust-result.csv', belgrade_only=False, format_v2=False)
fig = go.Figure(go.Funnel(
x=x,
y=["In RGZ", "Found in OSM", "Tagged as ways", "Tagged as building", "Tagged as apartments"],
textinfo="label+value+percent initial",
marker={"color": [
"rgb(118,42,131)", "rgb(175,141,195)", "rgb(231,212,232)", "rgb(127,191,123)", "rgb(27,120,55)"]},
textfont={"size": 18}
))
fig.show()
x = get_values('avgust-result.csv', belgrade_only=True, format_v2=False)
fig = go.Figure(go.Funnel(
x=x,
y=["In RGZ", "Found in OSM", "Tagged as ways", "Tagged as building", "Tagged as apartments"],
textinfo="label+value+percent initial",
marker={"color": [
"rgb(118,42,131)", "rgb(175,141,195)", "rgb(231,212,232)", "rgb(127,191,123)", "rgb(27,120,55)"]},
textfont={"size": 18}
))
fig.show()
# oktobar-result.csv can be found at https://stambenezajednice.z6.web.core.windows.net/oktobar2019.csv
x = get_values('oktobar-result.csv', belgrade_only=False, format_v2=True)
fig = go.Figure(go.Funnel(
x=x,
y=["In RGZ", "Found in OSM", "Tagged as ways", "Tagged as building", "Tagged as apartments"],
textinfo="label+value+percent initial",
marker={"color": [
"rgb(118,42,131)", "rgb(175,141,195)", "rgb(231,212,232)", "rgb(127,191,123)", "rgb(27,120,55)"]},
textfont={"size": 18}
))
fig.show()
x = get_values('output.csv', belgrade_only=True, format_v2=True)
fig = go.Figure(go.Funnel(
x=x,
y=["In RGZ", "Found in OSM", "Tagged as ways", "Tagged as building", "Tagged as apartments"],
textinfo="label+value+percent initial",
marker={"color": [
"rgb(118,42,131)", "rgb(175,141,195)", "rgb(231,212,232)", "rgb(127,191,123)", "rgb(27,120,55)"]},
textfont={"size": 18}
))
fig.show()
if __name__ == '__main__':
main()