Code source de pyspc.metadata.shyreg.streamflow

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Méta-données (lieux, tronçons, statistiques) - SHYREG - Débit
"""
import os.path
import pandas as pnd
from io import StringIO


[docs] class Streamflow(): """Structure liée aux fiches SHYREG - Débit."""
[docs] def __init__(self, filename=None): """ Initialisation de l'instance de la classe Streamflow. Parameters ---------- filename : str Fichier local du pdf SHYREG Débit """ self.filename = filename
def __str__(self): """Afficher les méta-données de l'instance <Streamflow>.""" text = """ ************************************* *********** SHYREG - Streamflow ***** ************************************* * FICHIER = {filename} ************************************* """ return text.format(**vars(self))
[docs] def read(self): """ Lire le rapport 'shyreg'. Returns ------- stats : pnd.DataFrame Tableau des valeurs statistiques Examples -------- >>> from pyspc.metadata.shyreg.streamflow import Streamflow >>> f = 'data/webservice/report/BNBV_LO2228.pdf' >>> shyreg = Streamflow(filename=f) >>> df, dfi, pct = shyreg.read() Valeurs statistiques >>> df VAR Pointe Jour 1h ... 72h Tr ... 2 49.2 42.4 47.9 ... 31.0 5 66.8 55.6 64.6 ... 40.3 10 82.1 66.1 78.8 ... 47.4 20 99.8 77.5 95.2 ... 54.5 50 129.0 94.5 122.0 ... 64.0 100 158.0 110.0 147.0 ... 71.4 500 254.0 158.0 230.0 ... 92.2 1000 313.0 185.0 277.0 ... 104.0 Intervalles de confiance >>> dfi VAR Pointe Jour ... 72h Tr ... 2 38/62 35.8/49.7 ... 30.5/30.8 5 51.6/83.7 46.8/64.9 ... 39.5/40.1 10 63.3/102 55.2/77.2 ... 45.7/48.1 20 77.8/123 64.8/90.6 ... 51.9/56.3 50 103/159 79.6/111 ... 59.8/68 100 128/194 93.4/130 ... 65.8/78.2 500 210/310 135/188 ... 83.8/104 1000 262/376 158/222 ... 92.1/124 Pourcentage de couverture des intervalles >>> pct 80 """ from pypdf import PdfReader if not os.path.exists(self.filename): return None # ========================================================= # === LECTURE PDF # ========================================================= reader = PdfReader(self.filename) # meta = reader.metadata # print(meta.author) # None # print(meta.creation_date) # 2019-06-21 16:07:57 # print(meta.creator) # R # print(meta.modification_date) # 2019-06-21 16:07:57 # print(meta.producer) # R 3.4.4 # print(meta.subject) # None # print(meta.title) # R Graphics Output page = reader.pages[0] values = [] uncert = [] in_uncert = False names = [] pct = None for line in page.extract_text().split('\n'): if line.startswith('T=') and in_uncert: u = line.replace(' / ', '/').replace('T= ', '') uncert.append(_process_u(u)) elif line.startswith('T='): values.append(line.replace('T= ', '')) elif line.startswith('Bornes'): in_uncert = True elif line.startswith('Pointe'): names = line.split(' ') elif line.startswith('IC '): pct = int(float(line.split(' ')[-1].replace('%', ''))) # ========================================================= # === MISE AU FORMAT DATAFRAME - VALEURS # ========================================================= df = pnd.read_csv(StringIO('\n'.join(values)), sep=r'\s+', header=None, names=names).sort_index() df.index.name = 'Tr' # ========================================================= # === MISE AU FORMAT DATAFRAME - INCERTITUDES # ========================================================= dfi = pnd.read_csv(StringIO('\n'.join(uncert)), sep=r'\s+', header=None, names=names).sort_index() dfi.index.name = 'Tr' return df, dfi, pct
[docs] def to_csv(self): """ Exporter au format csv. Returns ------- filenames : list Fichiers csv créés : statistiques et intervalles de confiance """ # ========================================================= # === LECTURE # ========================================================= content = self.read() if content is None: return None df = content[0] dfi = content[1] pct = content[2] filenames = [] # ========================================================= # === CONVERSION VALEURS # ========================================================= b = os.path.basename(self.filename) d = os.path.dirname(self.filename) f = os.path.join(d, b.replace('.pdf', '.csv')) if isinstance(df, pnd.DataFrame): df.to_csv( f, sep=';', header=True, index=True, lineterminator='\n', na_rep='' ) filenames.append(f) # ========================================================= # === CONVERSION INCERTITUDES # ========================================================= f = os.path.join(d, b.replace('.pdf', f'_interv{pct}.csv')) if isinstance(dfi, pnd.DataFrame): dfi.to_csv( f, sep=';', header=True, index=True, lineterminator='\n', na_rep='' ) filenames.append(f) return filenames
def _process_u(u): """Traiter les lignes des incertitudes.""" sep = '/' items = [] for a in u.split(sep): if a.count(' '): items.append(a) else: x = a.find('.') r = a.rfind('.') d = len(a) - r - 1 items.append(a[:x+1+d] + ' ' + a[x+1+d:]) return sep.join(items)