Code source de pyspc.io.hydroportail.reader

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""Bibliothèque pyspc du projet pyspc - IO - Hydroportail - read."""
import os.path
import pandas as pnd

import pyspc.core.exception as _exception
from pyspc.core.samples import Sample, Samples
from pyspc.core.statistics import Stat, Stats
from pyspc.convention.hydroportail import DATATYPES, VARNAMES
from pyspc.metadata.hydroportail import Sample as HP_Sample


[docs] def read_Hydroportail(filenames=None, datatype=None): """ Créer une instance Series à partir d'un fichier Hydroportail. Parameters ---------- filename : list Nom des fichiers Hydroportail. datatype : str Type du fichier Hydroportail. Returns ------- - samples : pyspc.core.statistics.Samples Echantillons si `datatype` = 'hp_sample'. - stats : pyspc.core.statistics.Stats Résultats statistiques si `datatype` = 'hp_stat'. Examples -------- >>> import os.path >>> from pyspc.io.hydroportail import read_Hydroportail >>> dirname = os.path.join('data', 'metadata', 'hydroportail', 'sample') CAS AVEC PLUSIEURS FICHIERS D'ECHANTILLONS. >>> fbas = os.path.join(dirname, 'Q-X_LOI_K0550010_Echantillon.csv') >>> fvb = os.path.join(dirname, 'Q-X_LOI_K2330810_Echantillon.csv') >>> fg = os.path.join(dirname, 'QJ-X_LOI_K7433030_Echantillon.csv') >>> samples = read_Hydroportail( ... filenames=[fbas, fvb, fg], datatype='hp_sample') >>> samples ************************************* ********** SAMPLES ****************** ************************************* * NOM DE LA COLLECTION = Hydroportail * TYPE DE COLLECTION = hp_sample * NOMBRE DE SERIES = 3 * ---------------------------------- * SAMPLE #1 : Q-X_LOI_K0550010_Echantillon.csv * ---------------------------------- * SAMPLE #2 : Q-X_LOI_K2330810_Echantillon.csv * ---------------------------------- * SAMPLE #3 : QJ-X_LOI_K7433030_Echantillon.csv ************************************* >>> for f in [fbas, fvb, fg]: ... print(samples[os.path.basename(f)]) ************************************* *********** SAMPLE ****************** ************************************* * NOM VARIABLE SPC = QI * INTITULE VARIABLE = Débit instantané * IDENTIFIANT = K0550010 * FOURNISSEUR = Provider(name='Hydroportail') * NOM VARIABLE = QI * TAILLE ECHANTILLON = 30 * PREMIERE DATE = 1994-11-05 17:29:00 * DERNIERE DATE = 2024-03-10 08:47:30 ************************************* ************************************* *********** SAMPLE ****************** ************************************* * NOM VARIABLE SPC = QI * INTITULE VARIABLE = Débit instantané * IDENTIFIANT = K2330810 * FOURNISSEUR = Provider(name='Hydroportail') * NOM VARIABLE = QI * TAILLE ECHANTILLON = 30 * PREMIERE DATE = 1994-11-05 17:51:00 * DERNIERE DATE = 2024-03-10 08:30:00 ************************************* ************************************* *********** SAMPLE ****************** ************************************* * NOM VARIABLE SPC = QJ * INTITULE VARIABLE = Débit moyen journalier * IDENTIFIANT = K7433030 * FOURNISSEUR = Provider(name='Hydroportail') * NOM VARIABLE = QJ * TAILLE ECHANTILLON = 47 * PREMIERE DATE = 1978-05-01 00:00:00 * DERNIERE DATE = 2024-03-30 00:00:00 ************************************* CAS AVEC UN FICHIER DE RESULTAT STATISTIQUE ISSU DE pyspc.webservice.hydroportail # noqa >>> fbas = os.path.join(self.dirname, 'K0550010_Q-X_result-quantile.csv') >>> stats = read_Hydroportail(filenames=[fbas], datatype='hp_stat') >>> stats ************************************* ************ STATS ****************** ************************************* * NOM DE LA COLLECTION = Hydroportail * TYPE DE COLLECTION = hp_stat * NOMBRE DE SERIES = 1 * ---------------------------------- * STAT #1 : K0550010_Q-X_result-quantile.csv ************************************* >>> stats['K0550010_Q-X_result-quantile.csv'] ************************************* ************* STAT ****************** ************************************* * NOM ECHANTILLON = K0550010_Q-X_result-quantile.csv * NOM VARIABLE SPC = QI * INTITULE VARIABLE = Débit instantané * IDENTIFIANT = K0550010 * FOURNISSEUR = Provider(name='Hydroportail') * NOM VARIABLE = QI * TAILLE ECHANTILLON = None * PERIODE ECHANTILLON = None * GRADEX PLUVIOMETRIQUE= None * COUVERTURE INCERT. = None * METHODE AJUSTEMENT = None * AJUSTEMENT return_period value value_low value_high 0 1.0101 0.000000 0.000000 0.000000 1 1.0204 0.000000 0.000000 19.934164 2 1.0526 0.000000 0.000000 123.463370 3 1.1000 3.982856 0.000000 208.819089 4 1.1111 22.674447 0.000000 227.177851 5 1.2000 138.271488 0.000000 331.550388 6 1.2500 187.726734 17.795779 379.203160 7 1.3000 230.639940 58.123398 420.722736 8 1.4000 303.185126 135.543082 487.451056 9 1.5000 363.696430 198.283620 548.904692 10 1.6000 415.959004 247.031408 603.629598 11 1.7000 462.141551 287.274916 655.861528 12 1.8000 503.620245 324.440653 698.245196 13 1.8182 510.734571 331.111811 704.880204 14 1.9000 541.331536 359.994107 742.109184 15 2.0000 575.945896 392.579075 785.109444 16 2.2222 644.116785 455.016238 859.651592 17 3.0000 823.057191 597.899979 1083.983588 18 4.0000 981.211205 720.242374 1288.644738 19 5.0000 1098.285870 802.562232 1441.756226 20 6.0000 1191.397576 870.734276 1566.030160 21 7.0000 1268.747282 929.515496 1672.440712 22 8.0000 1334.922575 977.489910 1759.503397 23 9.0000 1392.755736 1019.533721 1831.313188 24 10.0000 1444.120383 1056.849569 1897.011231 25 20.0000 1775.853151 1287.805249 2333.124768 26 30.0000 1966.690828 1421.754619 2593.589002 27 40.0000 2101.237288 1522.510344 2775.271107 28 50.0000 2205.247192 1596.553099 2916.902665 29 60.0000 2290.050115 1656.890002 3036.935878 30 70.0000 2361.646277 1705.714550 3133.907485 ************************************* """ # ------------------------------------------------------------------------- # 0- Contrôles # ------------------------------------------------------------------------- _exception.check_listlike(filenames) _exception.raise_valueerror( datatype not in DATATYPES, f"Type de données Hydroportail '{datatype}' incorrect") provider = 'Hydroportail' # ------------------------------------------------------------------------- # 1.1- Conversion - DONNEES - ECHANTILLON # ------------------------------------------------------------------------- if datatype == 'hp_sample': return _hp_sample(filenames, provider) # ------------------------------------------------------------------------- # 1.2- Conversion - DONNEES - STATISTIQUES # ------------------------------------------------------------------------- if datatype == 'hp_stat': return _hp_stat(filenames, provider) # ------------------------------------------------------------------------- # X- Conversion - CAS inconnu # ------------------------------------------------------------------------- raise NotImplementedError
def _hp_stat(filenames, provider): """Conversion - META-DONNEES - ECHANTILLON.""" # Initialisation stats = Stats(datatype='hp_stat', name=provider) for filename in filenames: b = os.path.basename(filename) try: varname = VARNAMES[b.split('_')[1]] except KeyError: varname = 'QI' code = b.split('_')[0] df = pnd.read_csv(filename, sep=";", index_col=0) df['T'] = df['T'].astype(float) df = df.sort_values(by=['T']) stat = Stat.from_records( values=list(df['q'] / 1000.), return_periods=list(df['T']), values_low=list(df['IC.low'] / 1000.), values_high=list(df['IC.high'] / 1000.), name=b, code=code, varname=varname, provider=provider ) stats.add(stat=stat) return stats def _hp_sample(filenames, provider): """Conversion - META-DONNEES - ECHANTILLON.""" # Initialisation samples = Samples(datatype='hp_sample', name=provider) # Lecture reader = HP_Sample() for filename in filenames: reader.filename = filename b = os.path.basename(filename) try: varname = VARNAMES[b.split('_')[0]] except KeyError: varname = 'QI' code = b.split('_')[2] df = reader.read() try: df['DATE'] = pnd.to_datetime( df['Date de la mesure du min/max'], format='%Y-%m-%dT%H:%M:%SZ', errors='coerce') except KeyError: df['DATE'] = pnd.to_datetime( df['Date'], format='%Y-%m-%dT%H:%M:%SZ', errors='coerce') sample = Sample.from_dve( dates=list(df['DATE']), values=list(df['Valeur (en m³/s)']), exclusions=list(df['Exclue']), code=code, varname=varname, name=b, provider=provider) samples.add(sample=sample) return samples