Skip to content
Update 2.United Nations SDG Data authored by Ioanna Mandilara's avatar Ioanna Mandilara
...@@ -82,8 +82,11 @@ The SDG observations were retrieved from the United Nations [SDG API ](https://u ...@@ -82,8 +82,11 @@ The SDG observations were retrieved from the United Nations [SDG API ](https://u
First of all, we import the library requests, through which the time series data are retrieved from the UN API. First of all, we import the library requests, through which the time series data are retrieved from the UN API.
```python ```python
import requests import aiohttp
from py2neo import Graph, NodeMatcher import asyncio
import time
import nest_asyncio
nest_asyncio.apply()
``` ```
Based on the below functions we create python dictionaries, where the keys are the attributes and the dimensions codes and the values are the description of them. Based on the below functions we create python dictionaries, where the keys are the attributes and the dimensions codes and the values are the description of them.
...@@ -122,43 +125,98 @@ def convert_codes_to_description(list1,dictionary): ...@@ -122,43 +125,98 @@ def convert_codes_to_description(list1,dictionary):
return string_description return string_description
def observationExist(lbl,s,v,y):
matcher = NodeMatcher(graph)
m = matcher.match(lbl, dataProvider = s, year = y, value = v ).first()
if m is None:
return False
else:
return True
dict_dimensions,dimensions_mapping =get_metadata('/Dimensions') dict_dimensions,dimensions_mapping =get_metadata('/Dimensions')
dict_attributes,attributes_mapping =get_metadata('/Attributes') dict_attributes,attributes_mapping =get_metadata('/Attributes')
``` ```
Now, we are ready to import data regarding some indicative series codes. Now, we are ready to import data regarding some series codes.
```python ```python
codes= ['SI_POV_DAY1','SD_MDP_ANDIHH','AG_XPD_AGSGB','SH_HLF_EMED','SE_GCEDESD_TED','SG_LGL_LNDFEMOD','SH_SAN_HNDWSH'] # Get all series codes
tx=graph.begin()
result = tx.run("""\
MATCH (g:Goal)-[:HAS_TARGET]->(t:Target)-[:HAS_INDICATOR]->(i:Indicator)-[:HAS_SERIES]->(s)
with collect(distinct s.code) as s_codes
return s_codes
""").data()
graph.commit(tx)
un_codes = result[0]['s_codes']
for code1 in codes: # Get available geocodes
print(code1) tx=graph.begin()
result = tx.run("""\
MATCH (ga:GeoArea)
UNWIND ga.code as codes
RETURN COLLECT(codes) as geocodes
""").data()
graph.commit(tx)
available_neo4j_eucodes = list(map(str,result[0]['geocodes']))
geocodes = set(available_neo4j_eucodes)
```
```python
st = time.time()
dict_code_available_geocodes={}
async with aiohttp.ClientSession() as session:
for code in un_codes:
url = "https://unstats.un.org/sdgapi/v1/sdg/Series/GeoAreaCode"
async with session.post(url, data ={
'Accept': 'application/json',
'Content-Type':'application/x-www-form-urlencoded',
"seriesCodes": str(code)}) as response:
data = await response.text()
res = data.strip('][').split(',')
common_geocodes = [str(x) for x in res if str(x) in geocodes]
dict_code_available_geocodes[str(code)]= common_geocodes
# get the end time
et = time.time()
# get the execution time
elapsed_time = et - st
print('Execution time:', elapsed_time/60, 'minutes')
```
```python
# get the start time
st = time.time()
code_number = 1
for code,geoarea in dict_code_available_geocodes.items():
print(code)
for geo in geoarea:
print("Series(%s): %s, GeoArea: %s" % (code_number,code,geo))
params = []
# Initialize page number # Initialize page number
page_number=1 page_number=1
# Get information about the total pages # Get information about the total pages
response = requests.get("https://unstats.un.org/sdgapi/v1/sdg/Series/Data?seriesCode=" + str(code1)) response = requests.get("https://unstats.un.org/sdgapi/v1/sdg/Series/Data?seriesCode=" + str(code)+
"&areaCode="+str(geo))
a = response.json() a = response.json()
total_pages = a['totalPages'] total_pages = a['totalPages']
print('Total elements:',a['totalElements'])
while(page_number <= total_pages): while(page_number <= total_pages):
# request # request
response = requests.get("https://unstats.un.org/sdgapi/v1/sdg/Series/Data?seriesCode=" + str(code1) response = requests.get("https://unstats.un.org/sdgapi/v1/sdg/Series/Data?seriesCode=" + str(code)+
+'&page='+str(page_number)) "&areaCode="+str(geo)+'&page='+str(page_number))
a = response.json() result = response.json()
data = result['data']
# Records per page
data_length = len(a['data']) statement = """
# Get data UNWIND $parameters as row
data = a['data'] MATCH (ga:GeoArea),(s:Series{code:row.s_code})
# Loop over all records of the specific page WHERE row.geo in ga.code
for j in range(data_length): WITH ga,s,row
MERGE (sm:SeriesMetadata{attributesCode:row.att_codes,attributesDescription:row.att_desc,
dimensionsCode:row.dim_codes,dimensionsDescription:row.dim_desc,
seriesCode:row.s_code})
WITH ga,s,sm,row
MERGE (s)-[:HAS_METADATA]->(sm)
MERGE (sm)-[:HAS_OBSERVATION]->(o:Observation{year: row.year,value:row.value})-[:REFERS_TO_AREA]->(ga)
"""
for j in range(len(data)):
if (data[j]['value']!='NaN'): if (data[j]['value']!='NaN'):
# Get attributes & dimensions of each record # Get attributes & dimensions of each record
...@@ -172,54 +230,35 @@ for code1 in codes: ...@@ -172,54 +230,35 @@ for code1 in codes:
dimension_code_string = '|'.join(list(dimensions.values())) dimension_code_string = '|'.join(list(dimensions.values()))
dimension_description_string = convert_codes_to_description(list(dimensions.values()), dimension_description_string = convert_codes_to_description(list(dimensions.values()),
dimensions_mapping) dimensions_mapping)
# Get geoArea
geo_node_id = graph.run("MATCH (ga:GeoArea) WHERE $x in ga.code RETURN id(ga)",
x=data[j]['geoAreaCode']).evaluate()
# If exists geoArea
if (geo_node_id is not None):
# Get geo_node
geo_node = graph.nodes.get(geo_node_id)
# Get series node
serie_node = graph.nodes.match("Series", code=code1).first()
# Check if sm exists
sm_node_id = graph.run("MATCH (sm:SeriesMetadata)\
WHERE sm.attributesCode = $att_code AND sm.dimensionsCode = $dim_code\
RETURN id(sm)",att_code=attributes_code_string,
dim_code=dimension_code_string).evaluate()
# Create observation
new_obs = Node('Observation',year=int(data[j]['timePeriodStart']),
value=float(data[j]['value']),dataProvider=str(data[j]['source']))
tx=graph.begin()
if sm_node_id is not None:
# Get sm_node
sm_node = graph.nodes.get(sm_node_id)
else:
# Create SeriesMetadata node
sm_node = Node('SeriesMetadata',attributesCode=attributes_code_string,
dimensionsCode=dimension_code_string,
attributesDescription=attributes_description_string,
dimensionsDescription=dimension_description_string)
# Create relationships
s_sm = Relationship(serie_node, 'HAS_METADATA',sm_node)
sm_obs = Relationship(sm_node, 'HAS_OBSERVATION',new_obs)
obs_geo = Relationship(new_obs, 'REFERS_TO_AREA',geo_node)
tx.create(s_sm)
tx.create(sm_obs)
tx.create(obs_geo)
graph.commit(tx)
else:
continue
params_dict={
'geo':str(data[j]['geoAreaCode']),
'year':int(data[j]['timePeriodStart']),
'value':float(data[j]['value']),
'att_desc': attributes_description_string,
'att_codes': attributes_code_string,
'dim_desc': dimension_description_string,
'dim_codes': dimension_code_string,
's_code':str(code)
}
params.append(params_dict)
print('Page %s: Total size of data(Until this page): %s'%(page_number,len(params)))
page_number +=1 page_number +=1
tx = graph.auto()
tx.evaluate(statement, parameters = {"parameters" : params})
print("Data imported: %s" % (len(params)))
print('---------------------------------------------------')
print()
code_number +=1
# get the end time
et = time.time()
# get the execution time
elapsed_time = et - st
print('Execution time:', elapsed_time/60, 'minutes')
``` ```
An overview of the UN SDG ontology is shown below: An overview of the UN SDG ontology is shown below:
... ...
......