Code source de pyspc.webservice.meteofrance.open_data

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Webservice - Météo-France - OPEN DATA
"""
from datetime import datetime as dt
try:
    from datetime import UTC
except ImportError:
    from datetime import timezone
    UTC = timezone.utc
import itertools
import os.path
import pandas as pnd
import requests

from pyspc.convention.meteofrance import MDG_HOSTANME, MDG_TIMESTEP, MDG_DSC
import pyspc.core.exception as _exception
from pyspc.webservice._proxies import setproxies_byconfig

TIMEOUT = 300
"""Durée acceptable de chaque requête"""


[docs] class OpenData(): """ Structure du client accédant aux données de METEO.DATA.GOUV.FR. Attributes ---------- hostname : str Hôte du webservice proxies : None, dict Dictionnaire des proxys {'protocol': 'proxy'} timeout : None, int Durée maximale de la requête session : requests.Session Session de la requête url : None, str Adresse de la requête filename : None, str Fichier enregistré en local verify : None, str, bool Vérification du certificat SSL. Voir requests.get """
[docs] def __init__(self, hostname=None, proxies=None, timeout=None, verify=None): """ Instanciation du webservice Parameters ---------- hostname : str Hôte du webservice. Par défaut: eaufrance proxies : None, dict Dictionnaire des proxys {'protocol': 'proxy'} timeout : None, int Durée maximale de la requête. Par défaut: 300 secondes. verify : None, str, bool Vérification du certificat SSL. Voir requests.get """ if hostname is None: self.hostname = MDG_HOSTANME else: self.hostname = hostname if isinstance(proxies, dict): self.proxies = proxies else: self.proxies = setproxies_byconfig(proxies) if isinstance(timeout, int): self.timeout = timeout else: self.timeout = TIMEOUT self.url = None self.filename = None self.verify = verify self.session = None self.login()
[docs] def login(self): """Ouvrir une session requests.Session.""" self.session = requests.Session() self.session.proxies.update(self.proxies)
[docs] def logout(self): """Fermer une session requests.Session.""" self.session.close()
def __str__(self): """ Afficher les méta-données de l'instance Hydroportail """ text = """ ******************************************* ****** WEBSERVICE - METEO.DATA.GOUV.FR **** ******************************************* * HYDROPORTAIL HOTE = {hostname} * PROXIES = {proxies} * URL REQUETE = {url} * NOM FICHIER LOCAL = {filename} ************************************* """ return text.format(**vars(self))
[docs] def get(self, remotedir=None, remotefile=None): """ Récupérer les données de METEO.DATA.GOUV.FR. Parameters ---------- remotedir : str Sous-répertoire du fichier distant remotefile : str Nom de base du fichier distant Returns ------- res : requests.models.Response Retour de requests.Session.get See Also -------- pyspc.convention.meteofrance.MDG_HOSTANME pyspc.convention.meteofrance.MDG_DSC pyspc.webservice.meteofrance.OpenData.retieve """ # ---------------------------------------------------------------- # Contrôles # ---------------------------------------------------------------- _exception.check_str(remotedir) _exception.check_str(remotefile) _exception.raise_valueerror( self.session is None, "Veuillez ouvrir une nouvelle session par l'application de la " "méthode login") # ---------------------------------------------------------------- # Définition de l'url # ---------------------------------------------------------------- self.url = f"{self.hostname}/{remotedir}/{remotefile}" # ---------------------------------------------------------------- # Gestion de la requête # ---------------------------------------------------------------- try: res = self.session.get(url=self.url, timeout=self.timeout, verify=self.verify) except requests.exceptions.SSLError as err: _exception.Warning( __name__, f"Impossible de lire l'url: {self.url}\n" "La bibliothèque <requests> renvoie le code d'erreur SSL " f"{err}. ou définir verify à False.") # On devrait retrouver self.url à partir de # - err.request.url # - err.request.body return None except requests.ConnectionError as err: _exception.Warning( __name__, f"Impossible de lire l'url: {self.url}\n" "La bibliothèque <requests> renvoie le code d'erreur " f"CONNECTION {err}. Cela peut provenir d'une erreur de proxy") # On devrait retrouver self.url à partir de # - err.request.url # - err.request.body return None # ---------------------------------------------------------------- # Renvoi du résultat # ---------------------------------------------------------------- if res.status_code != requests.codes.ok: res.raise_for_status() # Lève une erreur si pb dans requête return None if not isinstance(res, requests.models.Response): return None return res
[docs] def retrieve(self, codes=None, start=None, end=None, timestep=None, dirname='.', desc=False, rr_t_uv=None): """ Récupérer les données de METEO.DATA.GOUV.FR en local. Parameters ---------- codes : list Identifiants des sites météo start : datetime.datetime Première date end : datetime.datetime Dernière date timestep : datetime.timedelta Type d'export Hydroportail dirname : str Répertoire local d'archivage des fichiers Hydroportail. Défaut: '.' desc : bool Exporter le fichier de description. Défaut: False rr_t_uv : bool Précipitations (RR), Température de l'air (T) ou Vent (UV) Returns ------- filenames : list Fichiers enregistrés See Also -------- pyspc.convention.meteo_france.DATATYPES pyspc.webservice.meteo_france.OpenData.get Examples -------- CAS DES DONNEES HORAIRES >>> odata = OpenData() >>> odata.login()) >>> filenames = odata.retrieve(codes=['43111002'], ... start=2019-09-01 00:00:00, ... end=2024-05-13 06:58:19.347953, ... timestep=1:00:00, ... dirname='data', ... desc=True) >>> filenames ['data\\H_43_2010-2019.csv.gz', 'data\\H_43_latest-2023-2024.csv.gz', 'data\\H_43_previous-2020-2022.csv.gz', 'data\\H_descriptif_champs.csv'] >>> odata.logout()) CAS DES DONNEES EN MINUTES >>> odata = OpenData() >>> odata.login()) >>> filenames = odata.retrieve(codes=['43111002'], ... start=2019-09-01 00:00:00, ... end=2024-05-13 06:58:37.669703, ... timestep=0:06:00, ... dirname='data', ... desc=True) >>> filenames ['data\\MN_43_2010-2019.csv.gz', 'data\\MN_43_latest-2023-2024.csv.gz', 'data\\MN_43_previous-2020-2022.csv.gz', 'data\\MN_descriptif_champs.csv'] >>> odata.logout()) CAS DES DONNEES JOURNALIERES (RR, TA, UV) >>> odata = OpenData() >>> odata.login()) >>> filenames = odata.retrieve(codes=['7075001', '07105001'], ... start=2019-09-01 00:00:00, ... end=2024-05-13 06:58:56.903236, ... timestep=1 day, 0:00:00, ... dirname='data', ... desc=True, rr_t_uv=True) >>> filenames ['data\\Q_07_latest-2023-2024_RR-T-Vent.csv.gz', 'data\\Q_07_previous-1950-2022_RR-T-Vent.csv.gz', 'data\\Q_descriptif_champs_RR-T-Vent.csv'] >>> odata.logout()) CAS DES DONNEES JOURNALIERES (AUTRES GRANDEURS) >>> odata = OpenData() >>> odata.login()) >>> filenames = odata.retrieve(codes=['7075001', '07105001', ... '07154005', '43111002'], ... start=2019-09-01 00:00:00, ... end=2024-05-13 06:59:10.606568, ... timestep=1 day, 0:00:00, ... dirname='data', ... desc=True, ... rr_t_uv=False) >>> filenames ['data\\Q_07_latest-2023-2024_autres-parametres.csv.gz', 'data\\Q_07_previous-1950-2022_autres-parametres.csv.gz', 'data\\Q_43_latest-2023-2024_autres-parametres.csv.gz', 'data\\Q_43_previous-1950-2022_autres-parametres.csv.gz'] >>> odata.logout()) """ # ---------------------------------------------------------------- # Contrôles # ---------------------------------------------------------------- if self.session is None: self.login() _exception.check_listlike(codes) _exception.check_dt(start) _exception.check_dt(end) self.check_timestep(timestep) _exception.check_bool(desc) rr_t_uv = _exception.set_default(rr_t_uv, True) # if rr_t_uv is None: # rr_t_uv = True _exception.check_bool(rr_t_uv) filenames = [] # ---------------------------------------------------------------- # Lister les fichiers à télécharger # Lancer .get(subdir, basename) # Si retour OK, append filenames # ---------------------------------------------------------------- codes = [f'{c:0>8s}' for c in codes] depts = sorted(list({int(float(c[:2])) for c in codes})) dates = pnd.date_range(start, end, freq=timestep) remotedir = MDG_TIMESTEP[timestep] remotefiles = sorted(list( {self.set_basename(dept=x, date=y, timestep=timestep, rr_t_uv=rr_t_uv) for x, y in itertools.product(depts, dates)})) if desc: try: rf = MDG_DSC[(remotedir, rr_t_uv)] except KeyError: pass else: remotefiles.append(rf) for remotefile in remotefiles: localfile = os.path.join(dirname, remotefile) try: res = self.get(remotedir=remotedir, remotefile=remotefile) except ValueError: continue if res is None: continue # Enregistrement de la réponse urllib si filename connu _write_res(res, localfile) filenames.append(localfile) self.logout() return filenames
[docs] def set_basename(self, dept=None, date=None, timestep=None, rr_t_uv=None): """ Définir le nom du fichier à télécharger. Parameters ---------- dept : int Département date : datetime.datetime Date cible timestep : datetime.timedelta Pas de temps rr_t_uv : bool Précipitations (RR), Température de l'air (T) ou Vent (UV) Examples -------- >>> odata = OpenData() >>> odata.set_basename(dept=43, date=1910-01-01 00:00:00, ... timestep=1 day, 0:00:00, rr_t_uv=True) Q_43_1865-1949_RR-T-Vent.csv.gz >>> odata.set_basename(dept=43, date=1910-01-01 00:00:00, ... timestep=1 day, 0:00:00, rr_t_uv=False) Q_43_1865-1949_autres-parametres.csv.gz >>> odata.set_basename(dept=43, date=1949-12-31 00:00:00, ... timestep=1 day, 0:00:00, rr_t_uv=True) Q_43_1865-1949_RR-T-Vent.csv.gz >>> odata.set_basename(dept=43, date=1950-01-01 00:00:00, ... timestep=1 day, 0:00:00, rr_t_uv=True) Q_43_previous-1950-2022_RR-T-Vent.csv.gz >>> odata.set_basename(dept=43, date=2022-12-31 00:00:00, ... timestep=1 day, 0:00:00, rr_t_uv=True) Q_43_previous-1950-2022_RR-T-Vent.csv.gz >>> odata.set_basename(dept=43, date=2023-01-01 00:00:00, ... timestep=1 day, 0:00:00, rr_t_uv=True) Q_43_latest-2023-2024_RR-T-Vent.csv.gz >>> odata.set_basename(dept=43, date=2023-12-31 00:00:00, ... timestep=1 day, 0:00:00, rr_t_uv=True) Q_43_latest-2023-2024_RR-T-Vent.csv.gz >>> odata.set_basename(dept=43, date=2024-04-30 00:00:00, ... <<timestep=1 day, 0:00:00, rr_t_uv=True) Q_43_latest-2023-2024_RR-T-Vent.csv.gz >>> odata.set_basename(dept=15, date=1920-01-01 00:00:00, ... timestep=1:00:00, rr_t_uv=True) H_15_1920-1929.csv.gz >>> odata.set_basename(dept=15, date=1958-12-31 00:00:00, ... timestep=1:00:00, rr_t_uv=True) H_15_1950-1959.csv.gz >>> odata.set_basename(dept=15, date=2000-01-01 00:00:00, ... timestep=1:00:00, rr_t_uv=True) H_15_2000-2009.csv.gz >>> odata.set_basename(dept=15, date=2009-12-31 00:00:00, ... timestep=1:00:00, rr_t_uv=True) H_15_2000-2009.csv.gz >>> odata.set_basename(dept=15, date=2010-01-01 00:00:00, ... timestep=1:00:00, rr_t_uv=True) H_15_2010-2019.csv.gz >>> odata.set_basename(dept=15, date=2019-12-31 00:00:00, ... timestep=1:00:00, rr_t_uv=True) H_15_2010-2019.csv.gz >>> odata.set_basename(dept=15, date=2020-01-01 00:00:00, ... timestep=1:00:00, rr_t_uv=True) H_15_previous-2020-2022.csv.gz >>> odata.set_basename(dept=15, date=2022-12-31 00:00:00, ... timestep=1:00:00, rr_t_uv=True) H_15_previous-2020-2022.csv.gz >>> odata.set_basename(dept=15, date=2023-01-01 00:00:00, ... timestep=1:00:00, rr_t_uv=True) H_15_latest-2023-2024.csv.gz >>> odata.set_basename(dept=15, date=2024-05-01 00:00:00, ... timestep=1:00:00, rr_t_uv=True) H_15_latest-2023-2024.csv.gz >>> odata.set_basename(dept=7, date=2000-01-01 00:00:00, ... timestep=0:06:00, rr_t_uv=True) MN_07_2000-2009.csv.gz >>> odata.set_basename(dept=7, date=2009-12-31 00:00:00, ... timestep=0:06:00, rr_t_uv=True) MN_07_2000-2009.csv.gz >>> odata.set_basename(dept=7, date=2010-01-01 00:00:00, ... timestep=0:06:00, rr_t_uv=True) MN_07_2010-2019.csv.gz >>> odata.set_basename(dept=7, date=2019-12-31 00:00:00, ... timestep=0:06:00, rr_t_uv=True) MN_07_2010-2019.csv.gz >>> odata.set_basename(dept=7, date=2020-01-01 00:00:00, ... timestep=0:06:00, rr_t_uv=True) MN_07_previous-2020-2022.csv.gz >>> odata.set_basename(dept=7, date=2022-12-31 00:00:00, ... timestep=0:06:00, rr_t_uv=True) MN_07_previous-2020-2022.csv.gz >>> odata.set_basename(dept=7, date=2023-01-01 00:00:00, ... timestep=0:06:00, rr_t_uv=True) MN_07_latest-2023-2024.csv.gz >>> odata.set_basename(dept=7, date=2024-05-01 00:00:00, ... timestep=0:06:00, rr_t_uv=True) MN_07_latest-2023-2024.csv.gz """ # ---------------------------------------------------------------- # Contrôles # ---------------------------------------------------------------- _exception.check_int(dept) _exception.check_dt(date) self.check_timestep(timestep) rr_t_uv = _exception.set_default(rr_t_uv, True) # if rr_t_uv is None: # rr_t_uv = True _exception.check_bool(rr_t_uv) now = dt.now(UTC) y2 = now.year - 2 subdir = MDG_TIMESTEP[timestep] dm = divmod(date.year, 10) dmn = divmod(now.year, 10) res = None # ---------------------------------------------------------------- # QUOTIDIEN # ---------------------------------------------------------------- if subdir == 'QUOT': res = _set_basename_quot(date, y2, rr_t_uv, dept, now) # ---------------------------------------------------------------- # HORAIRE # ---------------------------------------------------------------- if subdir == 'HOR': res = _set_basename_hor(date, y2, rr_t_uv, dept, now, dm, dmn) # ---------------------------------------------------------------- # 6-MINUTES # ---------------------------------------------------------------- if subdir == 'MIN': res = _set_basename_min(date, y2, rr_t_uv, dept, now, dm, dmn) if res is None: raise ValueError(f'Configuration inconnue: {date}, ' f'{timestep}, {rr_t_uv}') return res
[docs] @staticmethod def check_timestep(timestep): """ Contrôler s'il s'agit bien d'un export autorisé """ try: MDG_TIMESTEP[timestep] except KeyError as ke: raise ValueError("Pas de temps des données incorrect") from ke
[docs] @classmethod def get_timesteps(cls): """ Obtenir la liste des exports Returns ------- list Liste des pas de temps des données METEO.DATA.GOUV.FR See Also -------- pyspc.convention.meteofrance.MDG_TIMESTEP """ return sorted(MDG_TIMESTEP.keys())
def _set_basename_quot(date, y2, rr_t_uv, dept, now): """Définir le nom de fichier - Données Quotidiennes.""" if 1865 <= date.year <= 1949: if rr_t_uv: return f'Q_{dept:02d}_1865-1949_RR-T-Vent.csv.gz' return f'Q_{dept:02d}_1865-1949_autres-parametres.csv.gz' if 1950 <= date.year <= y2: if rr_t_uv: return f'Q_{dept:02d}_previous-1950-{y2}_RR-T-Vent.csv.gz' return f'Q_{dept:02d}_previous-1950-{y2}'\ '_autres-parametres.csv.gz' if date.year > y2: if rr_t_uv: return f'Q_{dept:02d}_latest-{now.year-1}-{now.year}'\ '_RR-T-Vent.csv.gz' return f'Q_{dept:02d}_latest-{now.year-1}-{now.year}'\ '_autres-parametres.csv.gz' return None def _set_basename_hor(date, y2, rr_t_uv, dept, now, dm, dmn): """Définir le nom de fichier - Données Horaires.""" if date.year > y2 and rr_t_uv: return f'H_{dept:02d}_latest-{now.year-1}-{now.year}.csv.gz' if dm[0] == dmn[0] and rr_t_uv: return f'H_{dept:02d}_previous-{dm[0]*10}-{y2}.csv.gz' if date.year >= 1920 and rr_t_uv: return f'H_{dept:02d}_{dm[0]*10}-{dm[0]*10+9}.csv.gz' return None def _set_basename_min(date, y2, rr_t_uv, dept, now, dm, dmn): """Définir le nom de fichier - Données 6-minutes.""" if date.year > y2 and rr_t_uv: return f'MN_{dept:02d}_latest-{now.year-1}-{now.year}.csv.gz' if dm[0] == dmn[0] and rr_t_uv: return f'MN_{dept:02d}_previous-{dm[0]*10}-{y2}.csv.gz' if date.year >= 2000 and rr_t_uv: return f'MN_{dept:02d}_{dm[0]*10}-{dm[0]*10+9}.csv.gz' return None def _write_res(res, localfile): """Ecrire le fichier distant en local.""" if res.encoding is not None: with open(localfile, 'w', encoding=res.encoding) as f: f.write(res.text) else: with open(localfile, 'wb') as f: f.write(res.content)