#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021 R. Marty
# (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Méta-données (lieux, tronçons, statistiques) - SHYREG - Débit
"""
import os.path
import pandas as pnd
from io import StringIO
[docs]
class Streamflow():
"""Structure liée aux fiches SHYREG - Débit."""
[docs]
def __init__(self, filename=None):
"""
Initialisation de l'instance de la classe Streamflow.
Parameters
----------
filename : str
Fichier local du pdf SHYREG Débit
"""
self.filename = filename
def __str__(self):
"""Afficher les méta-données de l'instance <Streamflow>."""
text = """
*************************************
*********** SHYREG - Streamflow *****
*************************************
* FICHIER = {filename}
*************************************
"""
return text.format(**vars(self))
[docs]
def read(self):
"""
Lire le rapport 'shyreg'.
Returns
-------
stats : pnd.DataFrame
Tableau des valeurs statistiques
Examples
--------
>>> from pyspc.metadata.shyreg.streamflow import Streamflow
>>> f = 'data/webservice/report/BNBV_LO2228.pdf'
>>> shyreg = Streamflow(filename=f)
>>> df, dfi, pct = shyreg.read()
Valeurs statistiques
>>> df
VAR Pointe Jour 1h ... 72h
Tr ...
2 49.2 42.4 47.9 ... 31.0
5 66.8 55.6 64.6 ... 40.3
10 82.1 66.1 78.8 ... 47.4
20 99.8 77.5 95.2 ... 54.5
50 129.0 94.5 122.0 ... 64.0
100 158.0 110.0 147.0 ... 71.4
500 254.0 158.0 230.0 ... 92.2
1000 313.0 185.0 277.0 ... 104.0
Intervalles de confiance
>>> dfi
VAR Pointe Jour ... 72h
Tr ...
2 38/62 35.8/49.7 ... 30.5/30.8
5 51.6/83.7 46.8/64.9 ... 39.5/40.1
10 63.3/102 55.2/77.2 ... 45.7/48.1
20 77.8/123 64.8/90.6 ... 51.9/56.3
50 103/159 79.6/111 ... 59.8/68
100 128/194 93.4/130 ... 65.8/78.2
500 210/310 135/188 ... 83.8/104
1000 262/376 158/222 ... 92.1/124
Pourcentage de couverture des intervalles
>>> pct
80
"""
from pypdf import PdfReader
if not os.path.exists(self.filename):
return None
# =========================================================
# === LECTURE PDF
# =========================================================
reader = PdfReader(self.filename)
# meta = reader.metadata
# print(meta.author) # None
# print(meta.creation_date) # 2019-06-21 16:07:57
# print(meta.creator) # R
# print(meta.modification_date) # 2019-06-21 16:07:57
# print(meta.producer) # R 3.4.4
# print(meta.subject) # None
# print(meta.title) # R Graphics Output
page = reader.pages[0]
values = []
uncert = []
in_uncert = False
names = []
pct = None
for line in page.extract_text().split('\n'):
if line.startswith('T=') and in_uncert:
u = line.replace(' / ', '/').replace('T= ', '')
uncert.append(_process_u(u))
elif line.startswith('T='):
values.append(line.replace('T= ', ''))
elif line.startswith('Bornes'):
in_uncert = True
elif line.startswith('Pointe'):
names = line.split(' ')
elif line.startswith('IC '):
pct = int(float(line.split(' ')[-1].replace('%', '')))
# =========================================================
# === MISE AU FORMAT DATAFRAME - VALEURS
# =========================================================
df = pnd.read_csv(StringIO('\n'.join(values)), sep=r'\s+',
header=None, names=names).sort_index()
df.index.name = 'Tr'
# =========================================================
# === MISE AU FORMAT DATAFRAME - INCERTITUDES
# =========================================================
dfi = pnd.read_csv(StringIO('\n'.join(uncert)), sep=r'\s+',
header=None, names=names).sort_index()
dfi.index.name = 'Tr'
return df, dfi, pct
[docs]
def to_csv(self):
"""
Exporter au format csv.
Returns
-------
filenames : list
Fichiers csv créés : statistiques et intervalles de confiance
"""
# =========================================================
# === LECTURE
# =========================================================
content = self.read()
if content is None:
return None
df = content[0]
dfi = content[1]
pct = content[2]
filenames = []
# =========================================================
# === CONVERSION VALEURS
# =========================================================
b = os.path.basename(self.filename)
d = os.path.dirname(self.filename)
f = os.path.join(d, b.replace('.pdf', '.csv'))
if isinstance(df, pnd.DataFrame):
df.to_csv(
f,
sep=';',
header=True,
index=True,
lineterminator='\n',
na_rep=''
)
filenames.append(f)
# =========================================================
# === CONVERSION INCERTITUDES
# =========================================================
f = os.path.join(d, b.replace('.pdf', f'_interv{pct}.csv'))
if isinstance(dfi, pnd.DataFrame):
dfi.to_csv(
f,
sep=';',
header=True,
index=True,
lineterminator='\n',
na_rep=''
)
filenames.append(f)
return filenames
def _process_u(u):
"""Traiter les lignes des incertitudes."""
sep = '/'
items = []
for a in u.split(sep):
if a.count(' '):
items.append(a)
else:
x = a.find('.')
r = a.rfind('.')
d = len(a) - r - 1
items.append(a[:x+1+d] + ' ' + a[x+1+d:])
return sep.join(items)