Code source de pyspc.io.hydroportail.reader

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""Bibliothèque pyspc du projet pyspc - IO - Hydroportail - read."""
import os.path
import pandas as pnd

import pyspc.core.exception as _exception
from pyspc.core.samples import Sample, Samples
from pyspc.core.statistics import Stat, Stats
from pyspc.convention.hydroportail import DATATYPES, VARNAMES
from pyspc.metadata.hydroportail import Sample as HP_Sample



[docs]
def read_Hydroportail(filenames=None, datatype=None):
    """
    Créer une instance Series à partir d'un fichier Hydroportail.

    Parameters
    ----------
    filename : list
        Nom des fichiers Hydroportail.
    datatype : str
        Type du fichier Hydroportail.

    Returns
    -------
    - samples : pyspc.core.statistics.Samples
        Echantillons si `datatype` = 'hp_sample'.
    - stats : pyspc.core.statistics.Stats
        Résultats statistiques si `datatype` = 'hp_stat'.

    Examples
    --------
    >>> import os.path
    >>> from pyspc.io.hydroportail import read_Hydroportail
    >>> dirname = os.path.join('data', 'metadata', 'hydroportail', 'sample')

    CAS AVEC PLUSIEURS FICHIERS D'ECHANTILLONS.

    >>> fbas = os.path.join(dirname, 'Q-X_LOI_K0550010_Echantillon.csv')
    >>> fvb = os.path.join(dirname, 'Q-X_LOI_K2330810_Echantillon.csv')
    >>> fg = os.path.join(dirname, 'QJ-X_LOI_K7433030_Echantillon.csv')
    >>> samples = read_Hydroportail(
    ...     filenames=[fbas, fvb, fg], datatype='hp_sample')
    >>> samples
    *************************************
    ********** SAMPLES ******************
    *************************************
    *  NOM DE LA COLLECTION = Hydroportail
    *  TYPE DE COLLECTION   = hp_sample
    *  NOMBRE DE SERIES     = 3
    *  ----------------------------------
    *  SAMPLE #1 : Q-X_LOI_K0550010_Echantillon.csv
    *  ----------------------------------
    *  SAMPLE #2 : Q-X_LOI_K2330810_Echantillon.csv
    *  ----------------------------------
    *  SAMPLE #3 : QJ-X_LOI_K7433030_Echantillon.csv
    *************************************

    >>> for f in [fbas, fvb, fg]:
    ...     print(samples[os.path.basename(f)])
    *************************************
    *********** SAMPLE ******************
    *************************************
    *  NOM VARIABLE SPC     = QI
    *  INTITULE VARIABLE    = Débit instantané
    *  IDENTIFIANT          = K0550010
    *  FOURNISSEUR          = Provider(name='Hydroportail')
    *  NOM VARIABLE         = QI
    *  TAILLE ECHANTILLON   = 30
    *  PREMIERE DATE        = 1994-11-05 17:29:00
    *  DERNIERE DATE        = 2024-03-10 08:47:30
    *************************************
    *************************************
    *********** SAMPLE ******************
    *************************************
    *  NOM VARIABLE SPC     = QI
    *  INTITULE VARIABLE    = Débit instantané
    *  IDENTIFIANT          = K2330810
    *  FOURNISSEUR          = Provider(name='Hydroportail')
    *  NOM VARIABLE         = QI
    *  TAILLE ECHANTILLON   = 30
    *  PREMIERE DATE        = 1994-11-05 17:51:00
    *  DERNIERE DATE        = 2024-03-10 08:30:00
    *************************************
    *************************************
    *********** SAMPLE ******************
    *************************************
    *  NOM VARIABLE SPC     = QJ
    *  INTITULE VARIABLE    = Débit moyen journalier
    *  IDENTIFIANT          = K7433030
    *  FOURNISSEUR          = Provider(name='Hydroportail')
    *  NOM VARIABLE         = QJ
    *  TAILLE ECHANTILLON   = 47
    *  PREMIERE DATE        = 1978-05-01 00:00:00
    *  DERNIERE DATE        = 2024-03-30 00:00:00
    *************************************

    CAS AVEC UN FICHIER DE RESULTAT STATISTIQUE ISSU DE pyspc.webservice.hydroportail  # noqa

    >>> fbas = os.path.join(self.dirname, 'K0550010_Q-X_result-quantile.csv')
    >>> stats = read_Hydroportail(filenames=[fbas], datatype='hp_stat')
    >>> stats
    *************************************
    ************ STATS ******************
    *************************************
    *  NOM DE LA COLLECTION = Hydroportail
    *  TYPE DE COLLECTION   = hp_stat
    *  NOMBRE DE SERIES     = 1
    *  ----------------------------------
    *  STAT #1 : K0550010_Q-X_result-quantile.csv
    *************************************

    >>> stats['K0550010_Q-X_result-quantile.csv']
    *************************************
    ************* STAT ******************
    *************************************
    *  NOM ECHANTILLON      = K0550010_Q-X_result-quantile.csv
    *  NOM VARIABLE SPC     = QI
    *  INTITULE VARIABLE    = Débit instantané
    *  IDENTIFIANT          = K0550010
    *  FOURNISSEUR          = Provider(name='Hydroportail')
    *  NOM VARIABLE         = QI
    *  TAILLE ECHANTILLON   = None
    *  PERIODE ECHANTILLON  = None
    *  GRADEX PLUVIOMETRIQUE= None
    *  COUVERTURE INCERT.   = None
    *  METHODE AJUSTEMENT   = None
    *  AJUSTEMENT
        return_period        value    value_low   value_high
    0          1.0101     0.000000     0.000000     0.000000
    1          1.0204     0.000000     0.000000    19.934164
    2          1.0526     0.000000     0.000000   123.463370
    3          1.1000     3.982856     0.000000   208.819089
    4          1.1111    22.674447     0.000000   227.177851
    5          1.2000   138.271488     0.000000   331.550388
    6          1.2500   187.726734    17.795779   379.203160
    7          1.3000   230.639940    58.123398   420.722736
    8          1.4000   303.185126   135.543082   487.451056
    9          1.5000   363.696430   198.283620   548.904692
    10         1.6000   415.959004   247.031408   603.629598
    11         1.7000   462.141551   287.274916   655.861528
    12         1.8000   503.620245   324.440653   698.245196
    13         1.8182   510.734571   331.111811   704.880204
    14         1.9000   541.331536   359.994107   742.109184
    15         2.0000   575.945896   392.579075   785.109444
    16         2.2222   644.116785   455.016238   859.651592
    17         3.0000   823.057191   597.899979  1083.983588
    18         4.0000   981.211205   720.242374  1288.644738
    19         5.0000  1098.285870   802.562232  1441.756226
    20         6.0000  1191.397576   870.734276  1566.030160
    21         7.0000  1268.747282   929.515496  1672.440712
    22         8.0000  1334.922575   977.489910  1759.503397
    23         9.0000  1392.755736  1019.533721  1831.313188
    24        10.0000  1444.120383  1056.849569  1897.011231
    25        20.0000  1775.853151  1287.805249  2333.124768
    26        30.0000  1966.690828  1421.754619  2593.589002
    27        40.0000  2101.237288  1522.510344  2775.271107
    28        50.0000  2205.247192  1596.553099  2916.902665
    29        60.0000  2290.050115  1656.890002  3036.935878
    30        70.0000  2361.646277  1705.714550  3133.907485
    *************************************

    """
    # -------------------------------------------------------------------------
    # 0- Contrôles
    # -------------------------------------------------------------------------
    _exception.check_listlike(filenames)
    _exception.raise_valueerror(
        datatype not in DATATYPES,
        f"Type de données Hydroportail '{datatype}' incorrect")
    provider = 'Hydroportail'
    # -------------------------------------------------------------------------
    # 1.1- Conversion - DONNEES - ECHANTILLON
    # -------------------------------------------------------------------------
    if datatype == 'hp_sample':
        return _hp_sample(filenames, provider)
    # -------------------------------------------------------------------------
    # 1.2- Conversion - DONNEES - STATISTIQUES
    # -------------------------------------------------------------------------
    if datatype == 'hp_stat':
        return _hp_stat(filenames, provider)
    # -------------------------------------------------------------------------
    # X- Conversion - CAS inconnu
    # -------------------------------------------------------------------------
    raise NotImplementedError



def _hp_stat(filenames, provider):
    """Conversion - META-DONNEES - ECHANTILLON."""
    # Initialisation
    stats = Stats(datatype='hp_stat', name=provider)
    for filename in filenames:
        b = os.path.basename(filename)
        try:
            varname = VARNAMES[b.split('_')[1]]
        except KeyError:
            varname = 'QI'
        code = b.split('_')[0]
        df = pnd.read_csv(filename, sep=";", index_col=0)
        df['T'] = df['T'].astype(float)
        df = df.sort_values(by=['T'])
        stat = Stat.from_records(
            values=list(df['q'] / 1000.),
            return_periods=list(df['T']),
            values_low=list(df['IC.low'] / 1000.),
            values_high=list(df['IC.high'] / 1000.),
            name=b,
            code=code, varname=varname, provider=provider
        )
        stats.add(stat=stat)
    return stats


def _hp_sample(filenames, provider):
    """Conversion - META-DONNEES - ECHANTILLON."""
    # Initialisation
    samples = Samples(datatype='hp_sample', name=provider)
    # Lecture
    reader = HP_Sample()
    for filename in filenames:
        reader.filename = filename
        b = os.path.basename(filename)
        try:
            varname = VARNAMES[b.split('_')[0]]
        except KeyError:
            varname = 'QI'
        code = b.split('_')[2]
        df = reader.read()
        try:
            df['DATE'] = pnd.to_datetime(
                df['Date de la mesure du min/max'],
                format='%Y-%m-%dT%H:%M:%SZ', errors='coerce')
        except KeyError:
            df['DATE'] = pnd.to_datetime(
                df['Date'],
                format='%Y-%m-%dT%H:%M:%SZ', errors='coerce')

        sample = Sample.from_dve(
           dates=list(df['DATE']),
           values=list(df['Valeur (en m³/s)']),
           exclusions=list(df['Exclue']),
           code=code, varname=varname, name=b, provider=provider)
        samples.add(sample=sample)
    return samples