On this page I would like to show you how map of dog’s owner distribution was drawn.
First I created geodata_groningen.csv
- the dataset with geometry (Lat/Lon) coordinates of Groningen neighborhoods. To repeat this actions you need to download Wijk- en buurtkaart 2018 dataset and execute this code:
import geopandas as gpd
# reading database
geo_data = gpd.GeoDataFrame(gpd.read_file("path to the local database"))
#convert Rijksdriehoeksmeting (RD) GIS coordinates format to Lat/Long
geo_data.set_crs(epsg='28992')
geo_data = geo_data.to_crs(epsg='4326')
# get the only data we need
geo_data = geo_data[geo_data['gemeentenaam'] == "Groningen"]
geo_data = geo_data[['geometry', 'buurtcode', 'buurtnaam',
'percentage_personen_0_tot_15_jaar',
'percentage_personen_15_tot_25_jaar',
'percentage_personen_25_tot_45_jaar',
'percentage_personen_45_tot_65_jaar',
'percentage_personen_65_jaar_en_ouder',
'percentage_ongehuwd',
'percentage_gehuwd',
'percentage_gescheid',
'percentage_verweduwd',
'percentage_westerse_migratieachtergrond',
'percentage_niet_westerse_migratieachtergrond',
'percentage_overige_nietwestersemigratieachtergrond']]
# write it down to the .csv file
geo_data.to_csv(r'geodata_groningen.csv', index = False)
I did this transformation because this database is too big to add it to the Github repo where I store my code.
Now let’s read the geo dataset and also prepare dogs owner dataset to make it more convenient to use:
import pandas as pd
import geopandas as gpd
import folium
import matplotlib.pyplot as plt
import shapely
geo_data = pd.read_csv('geodata_groningen.csv')
dogs_data=pd.read_csv('honden_data.csv', sep = ";")
dogs_data['buurtcode'] = dogs_data.apply(lambda row: "BU00" + str(row['buurtnr']), axis=1)
dogs_data['2016'] = dogs_data['2016'].replace('.',0)
convert_dict = {'2016': int}
dogs_data = dogs_data.astype(convert_dict)
geo_data.head()
geometry | buurtcode | buurtnaam | percentage_personen_0_tot_15_jaar | percentage_personen_15_tot_25_jaar | percentage_personen_25_tot_45_jaar | percentage_personen_45_tot_65_jaar | percentage_personen_65_jaar_en_ouder | percentage_ongehuwd | percentage_gehuwd | percentage_gescheid | percentage_verweduwd | percentage_westerse_migratieachtergrond | percentage_niet_westerse_migratieachtergrond | percentage_overige_nietwestersemigratieachtergrond | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | MULTIPOLYGON (((6.567572790847121 53.222218021... | BU00140000 | Binnenstad-Noord | 2 | 49 | 33 | 10 | 6 | 88 | 8 | 4 | 1 | 20 | 11 | 8 |
1 | MULTIPOLYGON (((6.666673158437639 53.224597213... | BU00141406 | De Wierden | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 |
2 | MULTIPOLYGON (((6.571320358150779 53.210739809... | BU00140103 | Rivierenbuurt | 5 | 30 | 37 | 14 | 13 | 80 | 12 | 6 | 3 | 16 | 10 | 7 |
3 | MULTIPOLYGON (((6.531444021993535 53.250242251... | BU00141003 | Zernike Campus | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 | -99999999 |
4 | MULTIPOLYGON (((6.599527912942206 53.176890000... | BU00141701 | Haren-Zuidwest | 16 | 8 | 16 | 24 | 37 | 41 | 40 | 9 | 11 | 9 | 5 | 4 |
And now let’s take a look at the map and see where most of the dogs live and who their owners are.
m = folium.Map(location=[53.22222, 6.56757], zoom_start=12, tiles='CartoDB positron')
year = '2018'
from branca.colormap import linear
colormap = linear.YlOrRd_09.scale(min(dogs_data[year]),
max(dogs_data[year]))
colormap.caption = 'Amount of dogs owners in Groningen in ' + year
colormap.add_to(m)
def style_fn(feature):
#colormap = branca.colormap.LinearColormap(["white", "green"], vmin=0, vmax=100)
most_common = feature["properties"]["dogs_registered"]
ss = {
"fillColor": colormap(most_common),
"fillOpacity": 0.9,
"weight": 0.9,
"opacity": 1,
"color": "black",
}
return ss
def get_dogs_amount(row):
dogs_row = dogs_data[dogs_data['buurtcode'] == row['buurtcode']]
dogs = 0 if dogs_row.empty else dogs_row[year].iloc[0]
return dogs
def get_buurt_naam(row):
return row["buurtnaam"]
geo_data['dogs_registered'] = geo_data.apply(lambda row: get_dogs_amount(row), axis=1)
geo_data['owners_age'] = geo_data[['percentage_personen_0_tot_15_jaar',
'percentage_personen_15_tot_25_jaar',
'percentage_personen_25_tot_45_jaar',
'percentage_personen_45_tot_65_jaar',
'percentage_personen_65_jaar_en_ouder']].idxmax(axis=1)
geo_data['owners_age'] = geo_data['owners_age'].astype(str).str.replace('percentage_personen_', '')
geo_data['owners_age'] = geo_data['owners_age'].astype(str).str.replace('_', ' ')
geo_data['status'] = geo_data[['percentage_ongehuwd',
'percentage_gehuwd',
'percentage_gescheid',
'percentage_verweduwd']].idxmax(axis=1)
geo_data['status'] = geo_data['status'].astype(str).str.replace('percentage_', '')
geo_data['buurt'] = geo_data.apply(lambda row: get_buurt_naam(row), axis=1)
geo_data['most_migrants_met'] = geo_data[['percentage_westerse_migratieachtergrond',
'percentage_niet_westerse_migratieachtergrond',
'percentage_overige_nietwestersemigratieachtergrond']].idxmax(axis=1)
geo_data['most_migrants_met'] = geo_data['most_migrants_met'].astype(str).str.replace('percentage_', '')
geo_data['most_migrants_met'] = geo_data['most_migrants_met'].astype(str).str.replace('_', ' ')
geo_data['geometry'] = geo_data['geometry'].apply(lambda x: shapely.wkt.loads(x))
gdf = gpd.GeoDataFrame(data=geo_data, geometry=geo_data['geometry'], crs=4329)
folium.GeoJson(
gdf.__geo_interface__,
style_function=style_fn,
tooltip=folium.features.GeoJsonTooltip(['buurt', "dogs_registered", "owners_age", 'status', 'most_migrants_met']),
).add_to(m)
m
An additional data analysis of the dogs owners in Groningen in general as well as analysis of the neighborhoods where most of the dogs live you can find at the separate pages.