#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021 R. Marty
# (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Webservice - Météo-France - OPEN DATA
"""
from datetime import datetime as dt
try:
from datetime import UTC
except ImportError:
from datetime import timezone
UTC = timezone.utc
import itertools
import os.path
import pandas as pnd
import requests
from pyspc.convention.meteofrance import MDG_HOSTANME, MDG_TIMESTEP, MDG_DSC
import pyspc.core.exception as _exception
from pyspc.webservice._proxies import setproxies_byconfig
TIMEOUT = 300
"""Durée acceptable de chaque requête"""
[docs]
class OpenData():
"""
Structure du client accédant aux données de METEO.DATA.GOUV.FR.
Attributes
----------
hostname : str
Hôte du webservice
proxies : None, dict
Dictionnaire des proxys {'protocol': 'proxy'}
timeout : None, int
Durée maximale de la requête
session : requests.Session
Session de la requête
url : None, str
Adresse de la requête
filename : None, str
Fichier enregistré en local
verify : None, str, bool
Vérification du certificat SSL. Voir requests.get
"""
[docs]
def __init__(self, hostname=None, proxies=None, timeout=None, verify=None):
"""
Instanciation du webservice
Parameters
----------
hostname : str
Hôte du webservice. Par défaut: eaufrance
proxies : None, dict
Dictionnaire des proxys {'protocol': 'proxy'}
timeout : None, int
Durée maximale de la requête. Par défaut: 300 secondes.
verify : None, str, bool
Vérification du certificat SSL. Voir requests.get
"""
if hostname is None:
self.hostname = MDG_HOSTANME
else:
self.hostname = hostname
if isinstance(proxies, dict):
self.proxies = proxies
else:
self.proxies = setproxies_byconfig(proxies)
if isinstance(timeout, int):
self.timeout = timeout
else:
self.timeout = TIMEOUT
self.url = None
self.filename = None
self.verify = verify
self.session = None
self.login()
[docs]
def login(self):
"""Ouvrir une session requests.Session."""
self.session = requests.Session()
self.session.proxies.update(self.proxies)
[docs]
def logout(self):
"""Fermer une session requests.Session."""
self.session.close()
def __str__(self):
"""
Afficher les méta-données de l'instance Hydroportail
"""
text = """
*******************************************
****** WEBSERVICE - METEO.DATA.GOUV.FR ****
*******************************************
* HYDROPORTAIL HOTE = {hostname}
* PROXIES = {proxies}
* URL REQUETE = {url}
* NOM FICHIER LOCAL = {filename}
*************************************
"""
return text.format(**vars(self))
[docs]
def get(self, remotedir=None, remotefile=None):
"""
Récupérer les données de METEO.DATA.GOUV.FR.
Parameters
----------
remotedir : str
Sous-répertoire du fichier distant
remotefile : str
Nom de base du fichier distant
Returns
-------
res : requests.models.Response
Retour de requests.Session.get
See Also
--------
pyspc.convention.meteofrance.MDG_HOSTANME
pyspc.convention.meteofrance.MDG_DSC
pyspc.webservice.meteofrance.OpenData.retieve
"""
# ----------------------------------------------------------------
# Contrôles
# ----------------------------------------------------------------
_exception.check_str(remotedir)
_exception.check_str(remotefile)
_exception.raise_valueerror(
self.session is None,
"Veuillez ouvrir une nouvelle session par l'application de la "
"méthode login")
# ----------------------------------------------------------------
# Définition de l'url
# ----------------------------------------------------------------
self.url = f"{self.hostname}/{remotedir}/{remotefile}"
# ----------------------------------------------------------------
# Gestion de la requête
# ----------------------------------------------------------------
try:
res = self.session.get(url=self.url, timeout=self.timeout,
verify=self.verify)
except requests.exceptions.SSLError as err:
_exception.Warning(
__name__,
f"Impossible de lire l'url: {self.url}\n"
"La bibliothèque <requests> renvoie le code d'erreur SSL "
f"{err}. ou définir verify à False.")
# On devrait retrouver self.url à partir de
# - err.request.url
# - err.request.body
return None
except requests.ConnectionError as err:
_exception.Warning(
__name__,
f"Impossible de lire l'url: {self.url}\n"
"La bibliothèque <requests> renvoie le code d'erreur "
f"CONNECTION {err}. Cela peut provenir d'une erreur de proxy")
# On devrait retrouver self.url à partir de
# - err.request.url
# - err.request.body
return None
# ----------------------------------------------------------------
# Renvoi du résultat
# ----------------------------------------------------------------
if res.status_code != requests.codes.ok:
res.raise_for_status() # Lève une erreur si pb dans requête
return None
if not isinstance(res, requests.models.Response):
return None
return res
[docs]
def retrieve(self, codes=None, start=None, end=None, timestep=None,
dirname='.', desc=False, rr_t_uv=None):
"""
Récupérer les données de METEO.DATA.GOUV.FR en local.
Parameters
----------
codes : list
Identifiants des sites météo
start : datetime.datetime
Première date
end : datetime.datetime
Dernière date
timestep : datetime.timedelta
Type d'export Hydroportail
dirname : str
Répertoire local d'archivage des fichiers Hydroportail. Défaut: '.'
desc : bool
Exporter le fichier de description. Défaut: False
rr_t_uv : bool
Précipitations (RR), Température de l'air (T) ou Vent (UV)
Returns
-------
filenames : list
Fichiers enregistrés
See Also
--------
pyspc.convention.meteo_france.DATATYPES
pyspc.webservice.meteo_france.OpenData.get
Examples
--------
CAS DES DONNEES HORAIRES
>>> odata = OpenData()
>>> odata.login())
>>> filenames = odata.retrieve(codes=['43111002'],
... start=2019-09-01 00:00:00,
... end=2024-05-13 06:58:19.347953,
... timestep=1:00:00,
... dirname='data',
... desc=True)
>>> filenames
['data\\H_43_2010-2019.csv.gz',
'data\\H_43_latest-2023-2024.csv.gz',
'data\\H_43_previous-2020-2022.csv.gz',
'data\\H_descriptif_champs.csv']
>>> odata.logout())
CAS DES DONNEES EN MINUTES
>>> odata = OpenData()
>>> odata.login())
>>> filenames = odata.retrieve(codes=['43111002'],
... start=2019-09-01 00:00:00,
... end=2024-05-13 06:58:37.669703,
... timestep=0:06:00,
... dirname='data',
... desc=True)
>>> filenames
['data\\MN_43_2010-2019.csv.gz',
'data\\MN_43_latest-2023-2024.csv.gz',
'data\\MN_43_previous-2020-2022.csv.gz',
'data\\MN_descriptif_champs.csv']
>>> odata.logout())
CAS DES DONNEES JOURNALIERES (RR, TA, UV)
>>> odata = OpenData()
>>> odata.login())
>>> filenames = odata.retrieve(codes=['7075001', '07105001'],
... start=2019-09-01 00:00:00,
... end=2024-05-13 06:58:56.903236,
... timestep=1 day, 0:00:00,
... dirname='data',
... desc=True, rr_t_uv=True)
>>> filenames
['data\\Q_07_latest-2023-2024_RR-T-Vent.csv.gz',
'data\\Q_07_previous-1950-2022_RR-T-Vent.csv.gz',
'data\\Q_descriptif_champs_RR-T-Vent.csv']
>>> odata.logout())
CAS DES DONNEES JOURNALIERES (AUTRES GRANDEURS)
>>> odata = OpenData()
>>> odata.login())
>>> filenames = odata.retrieve(codes=['7075001', '07105001',
... '07154005', '43111002'],
... start=2019-09-01 00:00:00,
... end=2024-05-13 06:59:10.606568,
... timestep=1 day, 0:00:00,
... dirname='data',
... desc=True,
... rr_t_uv=False)
>>> filenames
['data\\Q_07_latest-2023-2024_autres-parametres.csv.gz',
'data\\Q_07_previous-1950-2022_autres-parametres.csv.gz',
'data\\Q_43_latest-2023-2024_autres-parametres.csv.gz',
'data\\Q_43_previous-1950-2022_autres-parametres.csv.gz']
>>> odata.logout())
"""
# ----------------------------------------------------------------
# Contrôles
# ----------------------------------------------------------------
if self.session is None:
self.login()
_exception.check_listlike(codes)
_exception.check_dt(start)
_exception.check_dt(end)
self.check_timestep(timestep)
_exception.check_bool(desc)
rr_t_uv = _exception.set_default(rr_t_uv, True)
# if rr_t_uv is None:
# rr_t_uv = True
_exception.check_bool(rr_t_uv)
filenames = []
# ----------------------------------------------------------------
# Lister les fichiers à télécharger
# Lancer .get(subdir, basename)
# Si retour OK, append filenames
# ----------------------------------------------------------------
codes = [f'{c:0>8s}' for c in codes]
depts = sorted(list({int(float(c[:2])) for c in codes}))
dates = pnd.date_range(start, end, freq=timestep)
remotedir = MDG_TIMESTEP[timestep]
remotefiles = sorted(list(
{self.set_basename(dept=x, date=y, timestep=timestep,
rr_t_uv=rr_t_uv)
for x, y in itertools.product(depts, dates)}))
if desc:
try:
rf = MDG_DSC[(remotedir, rr_t_uv)]
except KeyError:
pass
else:
remotefiles.append(rf)
for remotefile in remotefiles:
localfile = os.path.join(dirname, remotefile)
try:
res = self.get(remotedir=remotedir, remotefile=remotefile)
except ValueError:
continue
if res is None:
continue
# Enregistrement de la réponse urllib si filename connu
_write_res(res, localfile)
filenames.append(localfile)
self.logout()
return filenames
[docs]
def set_basename(self, dept=None, date=None, timestep=None, rr_t_uv=None):
"""
Définir le nom du fichier à télécharger.
Parameters
----------
dept : int
Département
date : datetime.datetime
Date cible
timestep : datetime.timedelta
Pas de temps
rr_t_uv : bool
Précipitations (RR), Température de l'air (T) ou Vent (UV)
Examples
--------
>>> odata = OpenData()
>>> odata.set_basename(dept=43, date=1910-01-01 00:00:00,
... timestep=1 day, 0:00:00, rr_t_uv=True)
Q_43_1865-1949_RR-T-Vent.csv.gz
>>> odata.set_basename(dept=43, date=1910-01-01 00:00:00,
... timestep=1 day, 0:00:00, rr_t_uv=False)
Q_43_1865-1949_autres-parametres.csv.gz
>>> odata.set_basename(dept=43, date=1949-12-31 00:00:00,
... timestep=1 day, 0:00:00, rr_t_uv=True)
Q_43_1865-1949_RR-T-Vent.csv.gz
>>> odata.set_basename(dept=43, date=1950-01-01 00:00:00,
... timestep=1 day, 0:00:00, rr_t_uv=True)
Q_43_previous-1950-2022_RR-T-Vent.csv.gz
>>> odata.set_basename(dept=43, date=2022-12-31 00:00:00,
... timestep=1 day, 0:00:00, rr_t_uv=True)
Q_43_previous-1950-2022_RR-T-Vent.csv.gz
>>> odata.set_basename(dept=43, date=2023-01-01 00:00:00,
... timestep=1 day, 0:00:00, rr_t_uv=True)
Q_43_latest-2023-2024_RR-T-Vent.csv.gz
>>> odata.set_basename(dept=43, date=2023-12-31 00:00:00,
... timestep=1 day, 0:00:00, rr_t_uv=True)
Q_43_latest-2023-2024_RR-T-Vent.csv.gz
>>> odata.set_basename(dept=43, date=2024-04-30 00:00:00,
... <<timestep=1 day, 0:00:00, rr_t_uv=True)
Q_43_latest-2023-2024_RR-T-Vent.csv.gz
>>> odata.set_basename(dept=15, date=1920-01-01 00:00:00,
... timestep=1:00:00, rr_t_uv=True)
H_15_1920-1929.csv.gz
>>> odata.set_basename(dept=15, date=1958-12-31 00:00:00,
... timestep=1:00:00, rr_t_uv=True)
H_15_1950-1959.csv.gz
>>> odata.set_basename(dept=15, date=2000-01-01 00:00:00,
... timestep=1:00:00, rr_t_uv=True)
H_15_2000-2009.csv.gz
>>> odata.set_basename(dept=15, date=2009-12-31 00:00:00,
... timestep=1:00:00, rr_t_uv=True)
H_15_2000-2009.csv.gz
>>> odata.set_basename(dept=15, date=2010-01-01 00:00:00,
... timestep=1:00:00, rr_t_uv=True)
H_15_2010-2019.csv.gz
>>> odata.set_basename(dept=15, date=2019-12-31 00:00:00,
... timestep=1:00:00, rr_t_uv=True)
H_15_2010-2019.csv.gz
>>> odata.set_basename(dept=15, date=2020-01-01 00:00:00,
... timestep=1:00:00, rr_t_uv=True)
H_15_previous-2020-2022.csv.gz
>>> odata.set_basename(dept=15, date=2022-12-31 00:00:00,
... timestep=1:00:00, rr_t_uv=True)
H_15_previous-2020-2022.csv.gz
>>> odata.set_basename(dept=15, date=2023-01-01 00:00:00,
... timestep=1:00:00, rr_t_uv=True)
H_15_latest-2023-2024.csv.gz
>>> odata.set_basename(dept=15, date=2024-05-01 00:00:00,
... timestep=1:00:00, rr_t_uv=True)
H_15_latest-2023-2024.csv.gz
>>> odata.set_basename(dept=7, date=2000-01-01 00:00:00,
... timestep=0:06:00, rr_t_uv=True)
MN_07_2000-2009.csv.gz
>>> odata.set_basename(dept=7, date=2009-12-31 00:00:00,
... timestep=0:06:00, rr_t_uv=True)
MN_07_2000-2009.csv.gz
>>> odata.set_basename(dept=7, date=2010-01-01 00:00:00,
... timestep=0:06:00, rr_t_uv=True)
MN_07_2010-2019.csv.gz
>>> odata.set_basename(dept=7, date=2019-12-31 00:00:00,
... timestep=0:06:00, rr_t_uv=True)
MN_07_2010-2019.csv.gz
>>> odata.set_basename(dept=7, date=2020-01-01 00:00:00,
... timestep=0:06:00, rr_t_uv=True)
MN_07_previous-2020-2022.csv.gz
>>> odata.set_basename(dept=7, date=2022-12-31 00:00:00,
... timestep=0:06:00, rr_t_uv=True)
MN_07_previous-2020-2022.csv.gz
>>> odata.set_basename(dept=7, date=2023-01-01 00:00:00,
... timestep=0:06:00, rr_t_uv=True)
MN_07_latest-2023-2024.csv.gz
>>> odata.set_basename(dept=7, date=2024-05-01 00:00:00,
... timestep=0:06:00, rr_t_uv=True)
MN_07_latest-2023-2024.csv.gz
"""
# ----------------------------------------------------------------
# Contrôles
# ----------------------------------------------------------------
_exception.check_int(dept)
_exception.check_dt(date)
self.check_timestep(timestep)
rr_t_uv = _exception.set_default(rr_t_uv, True)
# if rr_t_uv is None:
# rr_t_uv = True
_exception.check_bool(rr_t_uv)
now = dt.now(UTC)
y2 = now.year - 2
subdir = MDG_TIMESTEP[timestep]
dm = divmod(date.year, 10)
dmn = divmod(now.year, 10)
res = None
# ----------------------------------------------------------------
# QUOTIDIEN
# ----------------------------------------------------------------
if subdir == 'QUOT':
res = _set_basename_quot(date, y2, rr_t_uv, dept, now)
# ----------------------------------------------------------------
# HORAIRE
# ----------------------------------------------------------------
if subdir == 'HOR':
res = _set_basename_hor(date, y2, rr_t_uv, dept, now, dm, dmn)
# ----------------------------------------------------------------
# 6-MINUTES
# ----------------------------------------------------------------
if subdir == 'MIN':
res = _set_basename_min(date, y2, rr_t_uv, dept, now, dm, dmn)
if res is None:
raise ValueError(f'Configuration inconnue: {date}, '
f'{timestep}, {rr_t_uv}')
return res
[docs]
@staticmethod
def check_timestep(timestep):
"""
Contrôler s'il s'agit bien d'un export autorisé
"""
try:
MDG_TIMESTEP[timestep]
except KeyError as ke:
raise ValueError("Pas de temps des données incorrect") from ke
[docs]
@classmethod
def get_timesteps(cls):
"""
Obtenir la liste des exports
Returns
-------
list
Liste des pas de temps des données METEO.DATA.GOUV.FR
See Also
--------
pyspc.convention.meteofrance.MDG_TIMESTEP
"""
return sorted(MDG_TIMESTEP.keys())
def _set_basename_quot(date, y2, rr_t_uv, dept, now):
"""Définir le nom de fichier - Données Quotidiennes."""
if 1865 <= date.year <= 1949:
if rr_t_uv:
return f'Q_{dept:02d}_1865-1949_RR-T-Vent.csv.gz'
return f'Q_{dept:02d}_1865-1949_autres-parametres.csv.gz'
if 1950 <= date.year <= y2:
if rr_t_uv:
return f'Q_{dept:02d}_previous-1950-{y2}_RR-T-Vent.csv.gz'
return f'Q_{dept:02d}_previous-1950-{y2}'\
'_autres-parametres.csv.gz'
if date.year > y2:
if rr_t_uv:
return f'Q_{dept:02d}_latest-{now.year-1}-{now.year}'\
'_RR-T-Vent.csv.gz'
return f'Q_{dept:02d}_latest-{now.year-1}-{now.year}'\
'_autres-parametres.csv.gz'
return None
def _set_basename_hor(date, y2, rr_t_uv, dept, now, dm, dmn):
"""Définir le nom de fichier - Données Horaires."""
if date.year > y2 and rr_t_uv:
return f'H_{dept:02d}_latest-{now.year-1}-{now.year}.csv.gz'
if dm[0] == dmn[0] and rr_t_uv:
return f'H_{dept:02d}_previous-{dm[0]*10}-{y2}.csv.gz'
if date.year >= 1920 and rr_t_uv:
return f'H_{dept:02d}_{dm[0]*10}-{dm[0]*10+9}.csv.gz'
return None
def _set_basename_min(date, y2, rr_t_uv, dept, now, dm, dmn):
"""Définir le nom de fichier - Données 6-minutes."""
if date.year > y2 and rr_t_uv:
return f'MN_{dept:02d}_latest-{now.year-1}-{now.year}.csv.gz'
if dm[0] == dmn[0] and rr_t_uv:
return f'MN_{dept:02d}_previous-{dm[0]*10}-{y2}.csv.gz'
if date.year >= 2000 and rr_t_uv:
return f'MN_{dept:02d}_{dm[0]*10}-{dm[0]*10+9}.csv.gz'
return None
def _write_res(res, localfile):
"""Ecrire le fichier distant en local."""
if res.encoding is not None:
with open(localfile, 'w', encoding=res.encoding) as f:
f.write(res.text)
else:
with open(localfile, 'wb') as f:
f.write(res.content)