Source code for src.sdmx
"""Module providing sdmx utilities"""
import re
import aiohttp
import pandas as pd
import requests
import sdmxthon
[docs]
class SDMXData:
"""A class to get the data from SDMX API in SDMX format
:param url_data: the API URL from which to pull the data
"""
def __init__(self, data):
self.url_data = data
[docs]
def get_data(self, yAxisConcept: str = None) -> pd.DataFrame:
"""Returns a Pandas DataFrame with the data requested and optionally
set the yAxisConcept as float.
:param yAxisConcept: str, optional: the y-axis to convert to numeric.
:returns: pd.DataFrame: the requested data.
"""
try:
response = requests.get(self.url_data, timeout=30)
if response.status_code == 200:
try:
message = sdmxthon.read_sdmx(response.text)
resource = message.payload[list(message.payload)[0]]
data = resource.data
if yAxisConcept:
data[yAxisConcept] = pd.to_numeric(
data[yAxisConcept], errors="coerce"
)
if not isinstance(data, pd.DataFrame):
raise ValueError("Data is not a Pandas dataframe")
return data
except Exception as e:
print(e)
except ConnectionError as e:
print(e)
except Exception as e:
print(e)
[docs]
async def get_data_async(self, yAxisConcept: str) -> pd.DataFrame:
"""Asynchronously returns a Pandas DataFrame with the data requested and
optionally set the yAxisConcept as float.
:param yAxisConcept: str, optional: the y-axis to convert to numeric.
:returns: pd.DataFrame: the requested data.
"""
try:
async with aiohttp.ClientSession() as session:
async with session.get(self.url_data) as response:
if response.status == 200:
try:
message = sdmxthon.read_sdmx(await response.text())
resource = message.payload[list(message.payload)[0]]
data = resource.data
if yAxisConcept:
data[yAxisConcept] = pd.to_numeric(
data[yAxisConcept], errors="coerce"
)
if not isinstance(data, pd.DataFrame):
raise ValueError("Data is not a Pandas dataframe")
return data
except Exception as e:
print(e)
except ConnectionError as e:
print(e)
except Exception as e:
print(e)
def _check_string(string: str, url: str):
"""Checks that the url contains the string and if not formats it appropriately
:param string: str: the string that needs to be contained in the url
:param url: str: the url
:returns: str: the formatted url
"""
try:
if string in url:
pass
elif "?" in url:
url = url + "&" + string
else:
url = url + "?" + string
except Exception as e:
print(e)
return url
[docs]
def get_components(url: str, descendants: bool = True):
"""Retrieve a dictionary with the SDMX data
:param url: str: the API URL from which to pull the data
:param descendants: bool: whether to include all descendants in the call to the API
(Default = True)
:returns: dict: a dictionary in JSON format with the data/metadata requested.
"""
try:
if descendants:
url = _check_string("references=descendants", url)
with requests.get(url, stream=True, timeout=30) as response:
message = sdmxthon.read_sdmx(response.text, validate=False)
components_available = list(message.payload)
components = {i: message.payload[i] for i in components_available}
if components:
return components
except Exception as e:
print(e)
[docs]
async def get_components_async(url: str, descendants: bool = True):
"""Asynchronously retrieve a dictionary with the SDMX data
:param url: str: the API URL from which to pull the data
:param descendants: bool: whether to include all descendants in the call to the API
(Default = True)
:returns: dict: a dictionary in JSON format with the data/metadata requested
"""
try:
if descendants:
url = _check_string("references=descendants", url)
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
message = sdmxthon.read_sdmx(await response.text(), validate=False)
components_available = list(message.payload)
components = {i: message.payload[i] for i in components_available}
if components:
return components
except Exception as e:
print(e)
[docs]
class SDMXMetadata:
"""A class to get the metadata from SDMX API in SDMX format
:param component: any object compatible with the `Component object \
<https://docs.sdmxthon.meaningfuldata.eu/packages/model/component.html>`_
:param concept: a string that denotes the concept to translate
"""
def __init__(self, components, concept: str = None):
self.components = components
self.concept = concept
# Initialise to None attributes obtained with other methods
self.cl_id_name = None
self.cl_id_des = None
self.cl_items = None
[docs]
def parse_message(self, component_type):
"""Parses the appropriate component and returns children artefacts.
:param component_type: a string that can only take the following values:
Dataflows, Codelists, DataStructures, Concepts
:returns: a dictionary with the children artefacts associated to
a component type.
"""
valid = {"Dataflows", "Codelists", "DataStructures", "Concepts"}
if component_type not in valid:
raise ValueError(f"Error: component_type must be one of {valid}.")
try:
if component_type == "Codelists":
content = self.components[component_type]
else:
component_id = list(self.components[component_type])[0]
content = self.components[component_type][component_id]
if content:
return content
except Exception as e:
print(e)
[docs]
def dataflow_metadata(self):
"""Returns a dictionary with the name and description of the dataflow"""
dataflow = self.parse_message(component_type="Dataflows")
if dataflow:
try:
dataflow_name = dataflow.name
dataflow_description = dataflow.description
if not dataflow_description:
dataflow_description = ""
return {"name": dataflow_name, "description": dataflow_description}
except Exception as e:
print(e)
[docs]
def datastructure_metadata(self):
"""Returns a dictionary with the id, dimensions, attributes and measures of the
queried DSD
"""
try:
dsd = self.parse_message(component_type="DataStructures")
if dsd:
try:
try:
dsd_id = dsd.id
except Exception as e:
print(e)
dsd_id = ""
try:
dsd_dimensions = dsd.dimension_codes
except Exception as e:
print(e)
dsd_dimensions = ""
try:
dsd_attributes = dsd.attribute_codes
except Exception as e:
print(e)
dsd_attributes = ""
try:
dsd_measure = dsd.measure_code
except Exception as e:
print(e)
dsd_measure = ""
return {
"id": dsd_id,
"dim": dsd_dimensions,
"attr": dsd_attributes,
"measure": dsd_measure,
}
except Exception as e:
print(e)
except Exception as e:
print(e)
[docs]
def get_codelists(self):
"""Returns a `model.itemScheme.Codelist\
<https://docs.sdmxthon.meaningfuldata.eu/packages/model/itemScheme.html#codelist>`_
"""
try:
codelists = self.parse_message(component_type="Codelists")
return codelists
except Exception as e:
print(e)
[docs]
def get_codelist_name(self, *args, **kwargs):
"""Returns a string with the Agency, ID and version of of the codelist or a
`model.itemScheme.Codelist\
<https://docs.sdmxthon.meaningfuldata.eu/packages/model/itemScheme.html#codelist>`_
if all descendants are included in the SDMX URL call
"""
default = {"mode": "auto"}
mode = {**default, **kwargs}["mode"]
if mode == "auto":
dsd = self.parse_message(component_type="DataStructures")
dsd_components = list(dsd.content.keys())
for dsd_component in dsd_components:
try:
cl_name = dsd.content[dsd_component][
self.concept
].local_representation.codelist
except Exception as e:
print(e)
cl_name = ""
return cl_name
[docs]
def get_cl(self):
"""Returns a Tuple with the name (str), description (str) and
`model.itemScheme.Code\
<https://docs.sdmxthon.meaningfuldata.eu/packages/model/item.html#code>`_
"""
try:
cl_id = self.get_codelist_name()
try:
self.cl_id_name = cl_id.name
except Exception as e:
print(e)
self.cl_id_name = self.concept
try:
self.cl_id_des = cl_id.description
except Exception as e:
print(e)
self.cl_id_des = ""
self.cl_items = cl_id.items
return self.cl_id_name, self.cl_id_des, self.cl_items
except Exception as e:
print("Error: " + str(e))
[docs]
def get_url_cl(url_dsd, cl_name):
"""get_url_cl returns a string with the URL of the codelist
:param url_dsd: the API URL of the DSD
:param cl_name: the Agency, ID and version of the codelist (eg ESTAT:CL_AREA(1.0))
:returns: a string with the URL to query against the API
"""
endpoint = url_dsd.split("datastructure")[0]
agency_id = re.sub(":", "", re.search(r"(.+?):", cl_name).group())
resource_id = re.sub(":", "", re.search(r"(?<=\:)(.*?)(?=\()", cl_name).group())
version = re.sub(":", "", re.search(r"(?<=\()(.*?)(?=\))", cl_name).group())
cl_url = (
endpoint
+ "codelist/"
+ agency_id
+ "/"
+ resource_id
+ "/"
+ version
+ "?detail=full"
)
return cl_url
[docs]
def get_cl_item_name(items, item):
"""get_cl_item_name returns a string with code name
:param items: a dictionary with the codes of the codelist
:param item: a string with the code name
:returns: the code name
"""
cl_items = items
if cl_items:
try:
cl_item = cl_items[item]
cl_item_name = cl_item.name
return cl_item_name
except Exception as e:
print(f"The code {item} could not be found in the codelist. Error:{e}")
return {"en": {"locale": "English", "content": item}}
[docs]
def get_translation(content, locale: str = "en"):
"""get_translation returns a translated string, if any language other than\
is available. Only fr, es, de are currently supported\
but this list can be easily expanded. If no language is
detected, it defaults back to English
:param content: a dictionary of dictionaries to translate
:param locale: str: the language code (en, es, de, fr); defaults to "en".
:returns: the string translated if any language is available
"""
try:
try:
name = content["".join(locale)]["content"]
except Exception as e:
print(f"Get translation raised Exception: {e}")
try:
name = content["en"]["content"]
except Exception as e_n:
print(e_n)
try:
name = content
except Exception as e_nn:
print(e_nn)
name = ""
return name
except Exception as e:
print(f"Get translation raised Exception: {e}")
[docs]
def translate_df(df, concept, items_translated):
"""translate_df returns a translated Pandas DataFrame
:param df: a Pandas DataFrame
:param concept: a string with the column to translate
:param items_translated: a dictionary with the codes translated
:returns: pd.DataFrame: the translated DataFrame
"""
if concept:
try:
if items_translated:
try:
df[concept + "_id"] = df[concept]
df[concept] = df[concept].map(items_translated)
except Exception as e:
print(e)
df[concept + "_id"] = df[concept]
except Exception as e:
print(e)
return df
[docs]
def retreive_codes_from_data(df, concept, cl_id):
"""Retrieves metadata codelists from data"""
try:
codes_df = list(set(df[concept]))
cl_items = {i: get_cl_item_name(cl_id.items, i) for i in codes_df}
if cl_id.description is None:
metadata_codelist = {
"name": cl_id.name,
"description": "",
"items": cl_items,
}
else:
metadata_codelist = {
"name": cl_id.name,
"description": cl_id.description,
"items": cl_items,
}
except Exception as e:
print(f"Could not retreive codes in data. Error{e}")
metadata_codelist = {
"name": cl_id.name,
"description": cl_id.description,
"items": "",
}
return metadata_codelist