Code source de pyspc.metadata.shyreg.streamflow

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Méta-données (lieux, tronçons, statistiques) - SHYREG - Débit
"""
import os.path
import pandas as pnd
from io import StringIO



[docs]
class Streamflow():
    """Structure liée aux fiches SHYREG - Débit."""


[docs]
    def __init__(self, filename=None):
        """
        Initialisation de l'instance de la classe Streamflow.

        Parameters
        ----------
        filename : str
            Fichier local du pdf SHYREG Débit

        """
        self.filename = filename


    def __str__(self):
        """Afficher les méta-données de l'instance <Streamflow>."""
        text = """
        *************************************
        *********** SHYREG - Streamflow *****
        *************************************
        *  FICHIER          = {filename}
        *************************************
        """
        return text.format(**vars(self))


[docs]
    def read(self):
        """
        Lire le rapport 'shyreg'.

        Returns
        -------
        stats : pnd.DataFrame
            Tableau des valeurs statistiques

        Examples
        --------
        >>> from pyspc.metadata.shyreg.streamflow import Streamflow
        >>> f = 'data/webservice/report/BNBV_LO2228.pdf'
        >>> shyreg = Streamflow(filename=f)
        >>> df, dfi, pct = shyreg.read()

        Valeurs statistiques

        >>> df
        VAR   Pointe   Jour     1h  ...    72h
        Tr                          ...
        2       49.2   42.4   47.9  ...   31.0
        5       66.8   55.6   64.6  ...   40.3
        10      82.1   66.1   78.8  ...   47.4
        20      99.8   77.5   95.2  ...   54.5
        50     129.0   94.5  122.0  ...   64.0
        100    158.0  110.0  147.0  ...   71.4
        500    254.0  158.0  230.0  ...   92.2
        1000   313.0  185.0  277.0  ...  104.0

        Intervalles de confiance

        >>> dfi
        VAR        Pointe         Jour  ...          72h
        Tr                              ...
        2         38/62  35.8/49.7  ...  30.5/30.8
        5     51.6/83.7  46.8/64.9  ...  39.5/40.1
        10     63.3/102  55.2/77.2  ...  45.7/48.1
        20     77.8/123  64.8/90.6  ...  51.9/56.3
        50      103/159   79.6/111  ...    59.8/68
        100     128/194   93.4/130  ...  65.8/78.2
        500     210/310    135/188  ...   83.8/104
        1000    262/376    158/222  ...   92.1/124

        Pourcentage de couverture des intervalles

        >>> pct
        80


        """
        from pypdf import PdfReader
        if not os.path.exists(self.filename):
            return None
        # =========================================================
        # === LECTURE PDF
        # =========================================================
        reader = PdfReader(self.filename)
        # meta = reader.metadata
        # print(meta.author)  # None
        # print(meta.creation_date)  # 2019-06-21 16:07:57
        # print(meta.creator)  # R
        # print(meta.modification_date)  # 2019-06-21 16:07:57
        # print(meta.producer)  # R 3.4.4
        # print(meta.subject)  # None
        # print(meta.title)  # R Graphics Output
        page = reader.pages[0]
        values = []
        uncert = []
        in_uncert = False
        names = []
        pct = None
        for line in page.extract_text().split('\n'):
            if line.startswith('T=') and in_uncert:
                u = line.replace(' / ', '/').replace('T= ', '')
                uncert.append(_process_u(u))
            elif line.startswith('T='):
                values.append(line.replace('T= ', ''))
            elif line.startswith('Bornes'):
                in_uncert = True
            elif line.startswith('Pointe'):
                names = line.split(' ')
            elif line.startswith('IC '):
                pct = int(float(line.split(' ')[-1].replace('%', '')))
        # =========================================================
        # === MISE AU FORMAT DATAFRAME - VALEURS
        # =========================================================
        df = pnd.read_csv(StringIO('\n'.join(values)), sep=r'\s+',
                          header=None, names=names).sort_index()
        df.index.name = 'Tr'
        # =========================================================
        # === MISE AU FORMAT DATAFRAME - INCERTITUDES
        # =========================================================
        dfi = pnd.read_csv(StringIO('\n'.join(uncert)), sep=r'\s+',
                           header=None, names=names).sort_index()
        dfi.index.name = 'Tr'
        return df, dfi, pct



[docs]
    def to_csv(self):
        """
        Exporter au format csv.

        Returns
        -------
        filenames : list
            Fichiers csv créés : statistiques et intervalles de confiance

        """
        # =========================================================
        # === LECTURE
        # =========================================================
        content = self.read()
        if content is None:
            return None
        df = content[0]
        dfi = content[1]
        pct = content[2]
        filenames = []
        # =========================================================
        # === CONVERSION VALEURS
        # =========================================================
        b = os.path.basename(self.filename)
        d = os.path.dirname(self.filename)
        f = os.path.join(d, b.replace('.pdf', '.csv'))
        if isinstance(df, pnd.DataFrame):
            df.to_csv(
                f,
                sep=';',
                header=True,
                index=True,
                lineterminator='\n',
                na_rep=''
            )
            filenames.append(f)
        # =========================================================
        # === CONVERSION INCERTITUDES
        # =========================================================
        f = os.path.join(d, b.replace('.pdf', f'_interv{pct}.csv'))
        if isinstance(dfi, pnd.DataFrame):
            dfi.to_csv(
                f,
                sep=';',
                header=True,
                index=True,
                lineterminator='\n',
                na_rep=''
            )
            filenames.append(f)
        return filenames




def _process_u(u):
    """Traiter les lignes des incertitudes."""
    sep = '/'
    items = []
    for a in u.split(sep):
        if a.count(' '):
            items.append(a)
        else:
            x = a.find('.')
            r = a.rfind('.')
            d = len(a) - r - 1
            items.append(a[:x+1+d] + ' ' + a[x+1+d:])
    return sep.join(items)