#!/usr/bin/env python
# -*- coding: utf-8 -*-
from functools import partial
from pathlib import Path
import warnings
import country_converter as coco
import geopandas as gpd
from mpl_toolkits.axes_grid1 import make_axes_locatable
import pandas as pd
import numpy as np
import requests
from unidecode import unidecode
from covsirphy.visualization.vbase import VisualizeBase
[docs]class ColoredMap(VisualizeBase):
"""
Create global map with pandas.DataFrame.
Args:
filename (str or None): filename to save the figure or None (display)
kwargs: the other arguments of matplotlib.pyplot.savefig()
"""
def __init__(self, filename=None, **kwargs):
super().__init__(filename=filename, **kwargs)
self._to_iso3 = partial(coco.convert, to="ISO3", not_found=None)
self._geo_dirpath = Path("input")
def __enter__(self):
return super().__enter__()
def __exit__(self, *exc_info):
return super().__exit__(*exc_info)
@property
def directory(self):
"""
str: directory to save the downloaded files of geometry information
"""
return str(self._geo_dirpath)
@directory.setter
def directory(self, name):
self._geo_dirpath = Path(name)
[docs] def plot(self, data, level="Country", included=None, excluded=None, logscale=True, **kwargs):
"""
Set dataframe and the variable to show in a colored map.
Args:
data (pandas.DataFrame): data to show
Index
reset index
Columns
- Country (str or pandas.Category): country name(s)
- Province (str or pandas.Category): province names, necessary when @level is 'Province'
- Value (int or float or None): values to coloring the map
- ISO3 (str): ISO3 codes, optional
level (str): 'Country' (global map) or 'Province' (country-specific map)
logscale (bool): whether convert the value to log10 scale values or not
included (list[str] or None): included countries/provinces or None (all)
excluded (list[str] or None): excluded countries/provinces or None (all)
kwargs: arguments of geopandas.GeoDataFrame.plot() except for 'column'
Raises:
ValueError: labels for data are not unique
UnExpectedValueError: some countries' records are included when @level is 'Province'
SubsetNotFoundError: no geometry information available for the labels
"""
expected_cols = [self.COUNTRY, "Value"] + [] if level == self.COUNTRY else [self.PROVINCE]
self._ensure_dataframe(data, name="data", columns=expected_cols)
if level == self.COUNTRY:
# Global map with country level data
if not data[self.COUNTRY].is_unique:
raise ValueError(f"{self.COUNTRY} column of data should be unique.")
gdf = self._global_data(data=data, included=included, excluded=excluded)
else:
# Country-specific map with province level data
unique_n = data[self.COUNTRY].nunique()
if unique_n > 1:
raise ValueError(
f"{self.COUNTRY} column of data should have only one country name when @level is {self.PROVINCE}, but {unique_n} found.")
country = data[self.COUNTRY].value_counts().index[0]
if not data[self.PROVINCE].is_unique:
raise ValueError(f"{self.PROVINCE} column of data should be unique.")
gdf = self._country_specific_data(data, included=included, excluded=excluded, country=country)
gdf.loc[gdf["Value"] < 0, "Value"] = 0
# Colorbar
divider = make_axes_locatable(self._ax)
cax = divider.append_axes("bottom", size="5%", pad=0.1)
# Arguments of plotting with GeoPandas
plot_kwargs = {
"legend": True,
"cmap": "coolwarm",
"ax": self._ax,
"cax": cax,
"missing_kwds": {
"color": "lightgrey",
"edgecolor": "white",
"hatch": "///",
}
}
plot_kwargs.update(kwargs)
plot_kwargs["legend_kwds"] = {"orientation": "horizontal"}
# Convert to log10 scale
if logscale:
gdf["Value"] = np.log10(gdf["Value"] + 1)
plot_kwargs["legend_kwds"]["label"] = "in log10 scale"
# Plotting
warnings.filterwarnings("ignore", category=UserWarning)
if not gdf["Value"].isna().sum():
# Missing values are not included
plot_kwargs.pop("missing_kwds")
gdf.plot(column="Value", **plot_kwargs)
# Remove all ticks
self._ax.tick_params(labelbottom=False, labelleft=False, left=False, bottom=False)
def _global_data(self, data, included, excluded):
"""
Create global map data with geometry information.
Args:
data (pandas.DataFrame): data to show
Index
reset index
Columns
- Country (str): country names
- Value (int or float or None): values to plot
- ISO3 (str): ISO3 codes, optional
included (list[str] or None): included countries or None (all)
excluded (list[str] or None): excluded countries or None (all)
Returns:
geopandas.GeoDataFrame:
Index
reset index
Columns
- Value (int or float or None)
- geometry (geopandas.GeoDataFrame.geometry): geometry information
"""
# data to plot
df = data.copy()
df[self.ISO3] = df[self.COUNTRY].apply(self._to_iso3) if self.ISO3 not in df else df[self.ISO3]
# Geometry
gdf = self._load_geo_global()
# Merge them
gdf = gdf.merge(df, how="left", on=self.ISO3)
# Select countries
included_codes = gdf[self.ISO3].tolist() if included is None else [self._to_iso3(c) for c in included]
excluded_codes = [] if excluded is None else [self._to_iso3(c) for c in excluded]
sel = set(included_codes) - set(excluded_codes)
return gdf.loc[gdf[self.ISO3].isin(sel), ["Value", "geometry"]]
def _country_specific_data(self, data, included, excluded, country):
"""
Create country-specific map data with geometry information.
Args:
data (pandas.DataFrame): data to show
Index
reset index
Columns
- Province (str): province names
- Value (int or float or None): values to plot
included (list[str] or None): included countries or None (all)
excluded (list[str] or None): excluded countries or None (all)
country (str): country name
scale (str): scale of geographic shapes, '10m', '50m' or '110m'
Returns:
geopandas.GeoDataFrame:
Index
reset index
Columns
- Value (int or float or None): values to plot
- Province (str): province names
- geometry (geopandas.GeoDataFrame.geometry): geometry information
"""
# Get geometry information of the country
iso3 = self._to_iso3(country)
scale = "50m" if iso3 == "USA" else "10m"
gdf = self._load_geo_country_specific(scale=scale)
gdf[self.ISO3] = gdf[self.ISO3].replace({"MAC": "CHN"})
warnings.filterwarnings("ignore", category=DeprecationWarning)
hkg_gdf = gdf.loc[gdf[self.ISO3] == "HKG"].dissolve()
hkg_gdf.loc[:, [self.ISO3, self.PROVINCE]] = ["CHN", "Hong Kong"]
gdf = pd.concat([gdf.loc[gdf[self.ISO3] != "HKG"], hkg_gdf], sort=True, ignore_index=True)
gdf = gdf.loc[gdf[self.ISO3] == iso3]
# Update province names
gdf[self.PROVINCE] = gdf[self.PROVINCE].replace(
{"Xizang": "Tibet", "Inner Mongol": "Inner Mongolia"})
# Merge the data with geometry information
gdf = gdf.merge(data, how="left", on=self.PROVINCE)
# Select countries
sel = set(included or gdf[self.PROVINCE].unique()) - set(excluded or [])
return gdf.loc[gdf[self.PROVINCE].isin(sel), ["Value", self.PROVINCE, "geometry"]]
def _load_geo_global(self):
"""
Retrieve geometry information for global map.
Returns:
geopandas.GeoDataFrame:
Index
reset index
Columns
- ISO3 (str): ISO3 codes
- geometry (geopandas.GeoDataFrame.geometry): geometry information
"""
# Geometry information from Natural Earth
# pop_est, continent, name, iso_a3, gdp_md_est, geometry
warnings.filterwarnings("ignore", category=DeprecationWarning)
geopath = gpd.datasets.get_path("naturalearth_lowres")
gdf = gpd.read_file(geopath)
# Data cleaning
gdf["name"] = gdf["name"].apply(unidecode)
gdf["name"].replace(
{
"Fr. S. Antarctic Lands": "French Southern and Antarctic Lands",
"S. Sudan": "South Sudan"
}, inplace=True)
# Get ISO3 codes
gdf.rename(columns={"iso_a3": self.ISO3}, inplace=True)
sel = gdf[self.ISO3] == "-99"
gdf.loc[sel, self.ISO3] = gdf.loc[sel, "name"].apply(self._to_iso3)
# Remove Antarctica and
gdf = gdf.loc[gdf[self.ISO3] != "ATA"]
return gdf.loc[:, [self.ISO3, "geometry"]]
def _load_geo_country_specific(self, scale):
"""
Load shape file from 'Natural Earth Vector'.
https://github.com/nvkelso/natural-earth-vector
Args:
scale (str): scale of geographic shapes, '10m', '50m' or '110m'
Returns:
geopandas.GeoDataFrame:
Index
reset index
Columns
- ISO3 (str): ISO3 codes
- Province (str): province name
- geometry (geopandas.GeoDataFrame.geometry): geometry information
"""
title = f"ne_{scale}_admin_1_states_provinces"
extensions = [".README.html", ".VERSION.txt", ".cpg", ".dbf", ".sbn", ".sbx", ".shp", ".shx"]
geo_dirpath = self._geo_dirpath.joinpath(title)
geo_dirpath.mkdir(parents=True, exist_ok=True)
# Download files, if necessary
for ext in extensions:
basename = f"{title}{ext}"
if geo_dirpath.joinpath(basename).exists():
continue
url = f"https://github.com/nvkelso/natural-earth-vector/blob/master/{scale}_cultural/{basename}?raw=true"
response = requests.get(url=url)
with geo_dirpath.joinpath(basename).open("wb") as fh:
fh.write(response.content)
# Data cleaning
warnings.filterwarnings("ignore", category=DeprecationWarning)
gdf = gpd.read_file(geo_dirpath.joinpath(f"{title}.shp"))
gdf["name"] = gdf["name"].fillna("").apply(unidecode)
gdf.rename(columns={"name": self.PROVINCE, "adm0_a3": self.ISO3}, inplace=True)
return gdf.loc[:, [self.ISO3, self.PROVINCE, "geometry"]]