Commit d58ac4bc authored by ViktoriaO1's avatar ViktoriaO1

added vis code again

parent ee92b468
This source diff could not be displayed because it is too large. You can view the blob instead.
name: GeoEnv
channels:
- defaults
dependencies:
- appnope=0.1.0=py36hf537a9a_0
- asn1crypto=0.24.0=py36_0
- attrs=19.1.0=py36_1
- backcall=0.1.0=py36_0
- blas=1.0=mkl
- bleach=3.1.0=py36_0
- bokeh=1.1.0=py36_0
- bzip2=1.0.6=h1de35cc_5
- ca-certificates=2019.1.23=0
- cairo=1.14.12=hc4e6be7_4
- cartopy=0.16.0=py36h9263bd1_0
- certifi=2019.3.9=py36_0
- cffi=1.12.3=py36hb5b8e2f_0
- cftime=1.0.3.4=py36h1d22016_0
- chardet=3.0.4=py36_1
- click=7.0=py36_0
- click-plugins=1.1.1=py_0
- cligj=0.5.0=py36_0
- cloudpickle=1.0.0=py_0
- colorcet=2.0.1=py_0
- cryptography=2.3.1=py36hdbc3d79_0
- curl=7.61.1=ha441bb4_0
- cycler=0.10.0=py36hfc81398_0
- cytoolz=0.9.0.1=py36h1de35cc_1
- dask=1.2.2=py_0
- dask-core=1.2.2=py_0
- datashader=0.7.0=py_0
- datashape=0.5.4=py36_1
- dbus=1.13.6=h90a0687_0
- decorator=4.4.0=py36_1
- defusedxml=0.6.0=py_0
- descartes=1.1.0=py36_0
- distributed=1.28.0=py36_0
- entrypoints=0.3=py36_0
- et_xmlfile=1.0.1=py36h1315bdc_0
- expat=2.2.6=h0a44026_0
- fiona=1.7.12=py36h0dff353_0
- fontconfig=2.13.0=h5d5b041_1
- freetype=2.9.1=hb4e5f40_0
- freexl=1.0.5=h1de35cc_0
- gdal=2.2.4=py36h6440ff4_2
- geopandas=0.4.0=py36_1
- geos=3.6.2=h5470d99_2
- geoviews=1.6.2=py_0
- geoviews-core=1.6.2=py_0
- gettext=0.19.8.1=h15daf44_3
- giflib=5.1.4=h1de35cc_1
- glib=2.56.2=hd9629dc_0
- hdf4=4.2.13=h39711bb_2
- hdf5=1.8.18=h017327d_1
- heapdict=1.0.0=py36_2
- holoviews=1.12.1=py_2
- icu=58.2=h4b95b61_1
- idna=2.8=py36_0
- imageio=2.5.0=py36_0
- intel-openmp=2019.3=199
- ipykernel=4.8.2=py36_0
- ipython=7.5.0=py36h39e3cac_0
- ipython_genutils=0.2.0=py36h241746c_0
- ipywidgets=7.4.2=py36_0
- jdcal=1.4.1=py_0
- jedi=0.13.3=py36_0
- jinja2=2.10.1=py36_0
- jpeg=9b=he5867d9_2
- json-c=0.13.1=h3efe00b_0
- jsonschema=3.0.1=py36_0
- jupyter=1.0.0=py36_7
- jupyter_client=5.2.4=py36_0
- jupyter_console=6.0.0=py36_0
- jupyter_core=4.4.0=py36_0
- kealib=1.4.7=haa18932_5
- kiwisolver=1.1.0=py36h0a44026_0
- krb5=1.16.1=h24a3359_6
- libboost=1.67.0=hebc422b_4
- libcurl=7.61.1=hf30b1f0_0
- libcxx=4.0.1=hcfea43d_1
- libcxxabi=4.0.1=hcfea43d_1
- libdap4=3.19.1=h3d3e54a_0
- libedit=3.1.20181209=hb402a30_0
- libffi=3.2.1=h475c297_4
- libgdal=2.2.4=ha208382_2
- libgfortran=3.0.1=h93005f0_2
- libiconv=1.15=hdd342a3_7
- libkml=1.3.0=hbe12b63_4
- libnetcdf=4.6.1=hca06a9a_0
- libpng=1.6.37=ha441bb4_0
- libpq=10.5=hf30b1f0_0
- libsodium=1.0.16=h3efe00b_0
- libspatialindex=1.8.5=h2c08c6b_2
- libspatialite=4.3.0a=ha12ebda_19
- libssh2=1.8.0=h322a93b_4
- libtiff=4.0.10=hcb84e12_2
- libuuid=1.0.3=h6bb4b03_2
- libxml2=2.9.9=hab757c2_0
- libxslt=1.1.33=h33a18ac_0
- llvmlite=0.28.0=py36h8c7ce04_0
- locket=0.2.0=py36hca03003_1
- lxml=4.3.3=py36hef8c89e_0
- markupsafe=1.1.1=py36h1de35cc_0
- matplotlib=3.0.3=py36h54f8f79_0
- mistune=0.8.4=py36h1de35cc_0
- mkl=2019.3=199
- mkl_fft=1.0.12=py36h5e564d8_0
- mkl_random=1.0.2=py36h27c97d8_0
- msgpack-python=0.6.1=py36h04f5b5a_1
- multipledispatch=0.6.0=py36_0
- munch=2.3.2=py36_0
- nbconvert=5.5.0=py_0
- nbformat=4.4.0=py36h827af21_0
- ncurses=6.1=h0a44026_1
- netcdf4=1.4.1=py36h1767c64_0
- networkx=2.3=py_0
- notebook=5.7.8=py36_0
- numba=0.43.1=py36h6440ff4_0
- numpy=1.16.3=py36hacdab7b_0
- numpy-base=1.16.3=py36h6575580_0
- olefile=0.46=py36_0
- openjpeg=2.3.0=hb95cd4c_1
- openpyxl=2.6.2=py_0
- openssl=1.0.2r=h1de35cc_0
- owslib=0.17.1=py_0
- packaging=19.0=py36_0
- pandas=0.24.2=py36h0a44026_0
- pandoc=2.2.3.2=0
- pandocfilters=1.4.2=py36_1
- param=1.9.0=py_0
- parso=0.4.0=py_0
- partd=0.3.10=py36_1
- pcre=8.43=h0a44026_0
- pexpect=4.7.0=py36_0
- pickleshare=0.7.5=py36_0
- pillow=6.0.0=py36hb68e598_0
- pip=19.1=py36_0
- pixman=0.38.0=h1de35cc_0
- poppler=0.65.0=ha097c24_1
- poppler-data=0.4.9=0
- proj4=5.0.1=h1de35cc_0
- prometheus_client=0.6.0=py36_0
- prompt_toolkit=2.0.9=py36_0
- psutil=5.6.2=py36h1de35cc_0
- psycopg2=2.7.5=py36hdbc3d79_0
- ptyprocess=0.6.0=py36_0
- pycparser=2.19=py36_0
- pyct=0.4.6=py36_0
- pyepsg=0.4.0=py36_0
- pygments=2.3.1=py36_0
- pyopenssl=19.0.0=py36_0
- pyparsing=2.4.0=py_0
- pyproj=1.9.5.1=py36h833a5d7_1
- pyqt=5.9.2=py36h655552a_2
- pyrsistent=0.14.11=py36h1de35cc_0
- pysal=1.14.4.post1=py36_1
- pyshp=2.1.0=py_0
- pysocks=1.6.8=py36_0
- python=3.6.6=hc167b69_0
- python-dateutil=2.8.0=py36_0
- pytz=2019.1=py_0
- pyviz_comms=0.7.2=py_0
- pywavelets=1.0.3=py36h1d22016_1
- pyyaml=5.1=py36h1de35cc_0
- pyzmq=18.0.0=py36h0a44026_0
- qt=5.9.7=h468cd18_1
- qtconsole=4.4.3=py36_0
- readline=7.0=h1de35cc_5
- requests=2.21.0=py36_0
- rtree=0.8.3=py36_0
- scikit-image=0.15.0=py36h0a44026_0
- scipy=1.2.1=py36h1410ff5_0
- send2trash=1.5.0=py36_0
- setuptools=41.0.1=py36_0
- shapely=1.6.4=py36h20de77a_0
- sip=4.19.8=py36h0a44026_0
- six=1.12.0=py36_0
- sortedcontainers=2.1.0=py36_0
- sqlalchemy=1.3.3=py36h1de35cc_0
- sqlite=3.28.0=ha441bb4_0
- tblib=1.4.0=py_0
- terminado=0.8.2=py36_0
- testpath=0.4.2=py36_0
- tk=8.6.8=ha441bb4_0
- toolz=0.9.0=py36_0
- tornado=6.0.2=py36h1de35cc_0
- tqdm=4.31.1=py36_1
- traitlets=4.3.2=py36h65bd3ce_0
- urllib3=1.24.2=py36_0
- wcwidth=0.1.7=py36h8c6ec74_0
- webencodings=0.5.1=py36_1
- wheel=0.33.1=py36_0
- widgetsnbextension=3.4.2=py36_0
- xarray=0.12.1=py_0
- xerces-c=3.2.2=h44e365a_0
- xlrd=1.2.0=py36_0
- xz=5.2.4=h1de35cc_4
- yaml=0.1.7=hc338f04_2
- zeromq=4.3.1=h0a44026_3
- zict=0.1.4=py36_0
- zlib=1.2.11=h1de35cc_3
- zstd=1.3.7=h5bba6e5_0
- pip:
- knoema==1.0.17b1
- mock==3.0.5
- msgpack==0.6.1
- numexpr==2.6.9
- pandas-bokeh==0.2
- tables==3.5.1
prefix: /anaconda3/envs/GeoEnv
#Some filepaths for reading and writing data for visualisation purposes
#File for the Hack4Good program presentation on 20.05.2019
#Author: V. de La Rochefoucauld
# ----------------------- RAW DATA ------------------------- #
initial_sample: '../../Data/raw/reach_nga_msna_initial_sample.xlsx'
raw_dataset: '../../Data/raw/reach_nga_msna_clean_dataset_final.xlsx'
# ------------------- GEOGRAPHICAL DATA ---------------------- #
nigeria_map_path_states : '../../Data/GeoFiles/nga_admbnda_adm1_osgof_20190417.shp'
nigeria_map_path_LGA: '../../Data/GeoFiles/nga_admbnda_adm2_osgof_20190417.shp'
nigeria_map_path_wards : '../../Data/GeoFiles/nga_admbnda_adm3_osgof_eha_20190417.shp'
nigeria_map : '../../Data/NGA_adm/NGA_adm1.shp'
# --------------------- PROCESSED DATA ------------------------ #
human: '../../Data/processed/human.pickle'
wards_lonlat : '../../Data/processed/wards_lonlat.pickle'
wards_geometry: '../../Data/processed/wards_geometry.pickle'
predictions: '../../Data/processed/PiN_test_pred.csv'
# -------------------- VISUALISATION MODE ---------------------- #
# possible modes: 'pin_visualisation' and 'prediction'
vis_mode: 'prediction'
"""
Simple interactive map created using Bokeh and Pandas-Bokeh, a higher-level library that
facilitates the usage of Bokeh, interfacing with dataframes.
Script for the Hack4Good program presentation on 20.05.2019
Author: V. de La Rochefoucauld
"""
import geopandas as gpd
import pandas as pd
from bokeh.plotting import show
from bokeh.palettes import brewer
import pandas_bokeh
from shapely.geometry import Polygon
import pickle
import yaml
# ------------------------- STEP 0: FIRST REMARKS... ---------------------------- #
"""
#If you need to recreate the geometrical files (wards_geometry and wards_latlon), you can use this script.
import os
import sys
sys.path.append(os.path.dirname(os.path.realpath(__file__)))
import wards_geometry as geo
geo.create_wards_geometry(save=True, returnme=False)
"""
# --------------------- SETUP I/O LOCATIONS AND READ DATA ----------------------- #
# Set output location for map html file. Goes to outputs folder per default.
pandas_bokeh.output_file('outputs/nigeria_map.html')
# Read the yaml file that contains all the necessary paths of raw and processed data.
files = yaml.load(open('./file_locations.yml','rb'))
# Read the geometry file containing the geographical shapes of the wards.
wards_geometry = pickle.load(open(files['wards_geometry'], 'rb'))
# Read in the predictions that were saved into a csv file.
predictions = pd.read_csv(files['predictions'])
# ------------------- MERGE GEOGRAPHICAL DATA AND ANALYSES-------------------------#
# Merge the average pins calculated before with the geographical data.
pins_and_wards = pd.merge(predictions, wards_geometry, how='left',on='Camp')
# Drop first column if present (unimportant remnant from csv import)
try:
pins_and_wards.drop(columns=['Unnamed: 0','_parent_index'],axis=1,inplace=True)
except KeyError:
pass
# ------------------------- SPECIFY COLUMNS TO PLOT ------------------------------- #
# We now distinguish between the PiN values and 'in need' index in the dataframe.
pin_cols = list(pins_and_wards.columns[2:10])
need_cols = [col + '_in_need' for col in pin_cols]
pred_cols = [col + '_pred' for col in pin_cols]
# ----------------------- AGGREGATE DATA FOR PLOTTING ------------------------------ #
# Aggregate the data by means of the camp for plotting
data_agg = pins_and_wards.copy()
aggregated = data_agg.groupby('Camp').mean()
aggregated['Camp'] = aggregated.index
aggregated.reset_index(drop=True, inplace=True)
# Merge it back to add the spatial information that got lost while grouping.
agg_geometry = pd.merge(aggregated, wards_geometry, how='left', on='Camp')
# ------------- SELECT WHAT TO PLOT & CONVERT TO GEODATAFRAME ----------------------#
# Read in what should be plotted (either predictions which are based on the 'in need'
# criterion or the actual PiN values which are not aggregated by the 'in need'
# criterion - PiN value > 4)
mode = files['vis_mode']
# if we are dealing with the prediction visualisation, then only take the relevant columns
if mode == 'prediction':
# take only prediction columns
pred_cols = [col for col in agg_geometry.columns if col not in pin_cols]
pred_scores = agg_geometry[pred_cols].copy()
# calculate percentages of people in need per camp
for col in [col for col in pred_cols if col not in ['Camp', 'geometry']]:
pred_scores.loc[:, col] = pred_scores.loc[:, col] * 100
# convert it into a GeoDataFrame in order to be able to plot spatially.
pnw_gpd = gpd.GeoDataFrame(pred_scores)
# drop the overall columns, as we want to examine only the individual sectors
pnw_gpd.drop(columns=['Overall_in_need', 'Overall_pred'], axis=1, inplace=True)
elif mode == 'pin_visualisation':
# take only pin value columns
pin_scores = agg_geometry[['Camp', 'geometry'] + pin_cols].copy()
# convert it into a GeoDataFrame in order to be able to plot spatially.
pnw_gpd = gpd.GeoDataFrame(pin_scores)
# drop the overall column, as we want to examine only the individual sectors
pnw_gpd.drop(columns=['Overall'], axis=1, inplace=True)
# ------------------------MAKE SURE LOCATION ENCODING MATCHES ----------------------------#
pnw_gpd.crs = {'init':'epsg:4326'}
pnw_gpd.loc[pnw_gpd['geometry']==0,'geometry'] = Polygon()
pnw_gpd.dropna(subset=['Camp'],inplace=True)
# --------------------------------- SETUP PLOT -------------------------------------------#
# Select a color palette
palette = brewer['YlOrRd'][5]
# Reverse color order so that dark red is highest obesity.
palette = palette[::-1]
# Create list of dropdown columns
dropdown_cols = [col for col in pnw_gpd.columns if not col in ['geometry','Camp']]
# Make map plot via pandas_bokeh library
p = pnw_gpd.plot_bokeh(
figsize=(1300, 700),
simplify_shapes=100,
dropdown = dropdown_cols,
colormap= palette,
color = None,
hovertool_columns=['Camp', dropdown_cols[0]],
fill_alpha=0.3,
show_figure=False
)
# ----------------------------- MAKE CUSTOM ALTERATIONS -------------------------------------#
# In order to get the hover tool to update with only the relevant information according to the dropdown,
# you need to alter the CustomJS code for the javascript callback and also its arguments upon clicking
# the dropdown menu and selecting a new category.
# First, add the already existing HoverTool to the argument list of the Javascript callback
p.children[0].js_property_callbacks['change:value'][0].args['hover'] = p.children[1].hover[0]
# Then, alter the CustomJS codeblock to only show the selected category's value in the HoverTool.
p.children[0].js_property_callbacks['change:value'][0].code = """
//Change selection of field for Colormapper for choropleth plot:
geo_source.data["Colormap"] = geo_source.data[dropdown_widget.value];
geo_source.change.emit();
//Change label of Legend:
legend.label["value"] = " " + dropdown_widget.value;
var col = String(dropdown_widget.value);
// Change value of hovertool:
hover.tooltips[1][0] = "People in need %" ;
hover.tooltips[1][1] = "@"+ col;
"""
# Finally, adapt the text showing on the dropdown button to your taste.
p.children[0].label = 'Select Heatmap Criterion'
# ----------------------------------- SHOW PLOT --------------------------------------------#
# You can also just set show_figure to True above, but I like to do it down explicitly, especially after the changes.
show(p)
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
"""
Processing scripts that create the geometry spatial location conversions for the visualisations.
Script for the Hack4Good program presentation on 20.05.2019
Author: V. de La Rochefoucauld
"""
import pandas as pd
from tqdm import tqdm
import numpy as np
from shapely.geometry import Point
import yaml
import pickle
import geopandas as gpd
def ward_matching(table, wards, files, save=True):
"""
Function that matches all of the camps listed in the dataset with the geographically distinct wards.
We take the more laborious approach by looping through all longitudes and latitudes of the different camps,
in order to make sure we have no mismatches due to corrupted Camp names etc.
:param table: cleaned, human readable dataset (dataframe)
:param wards: geopandas file with spatial boundaries of nigerian wards
:param files: file location dictionary
:param save: boolean whether to save or not to save
:return wards: matched dataframe with camp names and corresponding geospatial information
"""
print('Matching wards and longitude latitudes.')
print('This might take a while...')
camps = table[['Camp','longitude','latitude']].copy()
camps = camps.groupby('Camp').mean()
wards['Camp'] = np.empty((len(wards),1),list)
for idx, row in tqdm(camps.iterrows(), total = camps.shape[0]):
for i,polygon in enumerate(wards.geometry):
if Point(row['longitude'],row['latitude']).within(polygon):
if wards.loc[i, 'Camp'] == None:
wards.loc[i, 'Camp'] = []
wards.loc[i, 'Camp'].append(idx)
continue
if save:
import pickle
pickle.dump(wards, open(files['wards_lonlat'], "wb"))
return wards
def merge_df_lists(df1, df2):
"""
Very slow, but necessary modification to the normal pandas merge function. Merging will apparently not work
when we have geospatial objects in the dataframe, so we opt for this workaround.
:param df1: dataframe 1
:param df2: dataframe 2
:return: merged dataframe
"""
for idx, row in tqdm(df1.iterrows(), total=df1.shape[0]):
for i, wardrow in df2.iterrows():
if not wardrow['Camp'] == None:
if row['Camp'] in wardrow['Camp']:
df1.loc[idx,'geometry'] = wardrow['geometry']
continue
return df1
def create_wards_geometry(save=True, returnme=False):
"""
Function that loads the cleaned dataset and shapefiles outlining Nigerian wards and either saves or returns it.
:param save: boolean, if it is to be saved
:param returnme: boolean, if instead of saving, it should be returned.
:return:
"""
# Load file locations, cleaned human readable dataset and shapefile containing nigeria ward boundaries.
files = yaml.load(open('./file_locations.yml', 'rb'))
table = pickle.load(open(files['human'], 'rb'))
wards = gpd.read_file(files['nigeria_map_path_wards'])
# Match the wards with the Camps
wards = ward_matching(table, wards, files, save=True)
# Eliminate double instances of wards
wards_single = pd.DataFrame(data={'Camp': table['Camp'].unique(), 'geometry': 0})
wards_geometry = merge_df_lists(wards_single, wards)
# Save or return
if save:
pickle.dump(wards_geometry, open(files['wards_geometry'],"wb"))
elif returnme:
return wards_geometry
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment