Code source de pyspc.metadata.hydroportail.sample
#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021 R. Marty
# (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Méta-données (lieux, tronçons, statistiques) - Hydroportail - Sample
"""
import csv
from datetime import datetime as dt
import pandas as pnd
import pyspc.core.exception as _exception
from pyspc.convention.hydroportail import OUAHS_COLS, OUAHS_NO_TRAILING_ZEROS
[docs]
class Sample():
"""
Classe permettant la manipulation des csv Hydroportail - Echantillon stat.
Attributes
----------
filename : str
Nom du fichier csv
"""
[docs]
def __init__(self, filename=None):
"""
Initialiser l'instance de la classe Sample.
Parameters
----------
filename : str
Nom du fichier csv
"""
self.filename = filename
def __str__(self):
"""
Afficher des méta-données.
"""
text = """
*************************************
***** HYDROPORTAIL - Sample (csv) ***
*************************************
* NOM FICHIER = {filename}
*************************************
"""
return text.format(**vars(self))
[docs]
def read(self):
"""
Lire un fichier csv Hydroportail - Echantillon stat.
Returns
-------
data : pnd.DataFrame
Dataframe des données
Examples
--------
>>> import os.path
>>> from pyspc.metadata.hydroportail import Sample
>>> d = 'data/metadata/hydroportail/sample/'
>>> f = os.path.join(d, 'Q-X-K0550010_Echantillon.csv')
>>> reader = Sample(filename=f)
>>> df = reader.read()
>>> df
Exclue Début de saison Fin de saison Valeur (en m³/s) \
0 Non 1994-09-01 1995-08-31 1030.0
1 Non 1995-09-01 1996-08-31 491.0
2 Non 1996-09-01 1997-08-31 2060.0
3 Non 1997-09-01 1998-08-31 280.0
4 Non 1998-09-01 1999-08-31 1010.0
5 Non 1999-09-01 2000-08-31 313.0
6 Non 2000-09-01 2001-08-31 527.0
7 Non 2001-09-01 2002-08-31 1010.0
8 Non 2002-09-01 2003-08-31 936.0
9 Non 2003-09-01 2004-08-31 1740.0
10 Non 2004-09-01 2005-08-31 367.0
11 Non 2005-09-01 2006-08-31 110.0
12 Non 2006-09-01 2007-08-31 146.0
13 Non 2007-09-01 2008-08-31 257.0
14 Non 2008-09-01 2009-08-31 2750.0
15 Non 2009-09-01 2010-08-31 226.0
16 Non 2010-09-01 2011-08-31 503.0
17 Non 2011-09-01 2012-08-31 557.0
18 Non 2012-09-01 2013-08-31 537.0
19 Non 2013-09-01 2014-08-31 377.0
20 Non 2014-09-01 2015-08-31 620.0
21 Non 2015-09-01 2016-08-31 88.4
22 Non 2016-09-01 2017-08-31 1030.0
23 Non 2017-09-01 2018-08-31 227.0
24 Non 2018-09-01 2019-08-31 293.0
25 Non 2019-09-01 2020-08-31 1260.0
26 Non 2020-09-01 2021-08-31 539.0
27 Non 2021-09-01 2022-08-31 117.0
28 Non 2022-09-01 2023-08-31 117.0
29 Non 2023-09-01 2024-08-31 818.0
Date \
0 1994-11-05T00:00:00Z
1 1996-01-24T00:00:00Z
2 1996-11-13T00:00:00Z
3 1997-12-19T00:00:00Z
4 1999-05-18T00:00:00Z
5 1999-10-21T00:00:00Z
6 2000-10-14T00:00:00Z
7 2001-10-20T00:00:00Z
8 2002-11-25T00:00:00Z
9 2003-12-02T00:00:00Z
10 2004-11-04T00:00:00Z
11 2006-04-10T00:00:00Z
12 2006-11-18T00:00:00Z
13 2008-05-29T00:00:00Z;2008-05-31T00:00:00Z
14 2008-11-02T00:00:00Z
15 2010-06-16T00:00:00Z
16 2010-11-01T00:00:00Z
17 2011-11-05T00:00:00Z
18 2013-05-19T00:00:00Z
19 2014-01-20T00:00:00Z
20 2014-11-05T00:00:00Z
21 2016-04-07T00:00:00Z
22 2016-11-23T00:00:00Z
23 2018-05-16T00:00:00Z
24 2018-11-10T00:00:00Z
25 2019-11-23T00:00:00Z
26 2021-05-11T00:00:00Z
27 2021-12-29T00:00:00Z
28 2023-05-14T00:00:00Z
29 2024-03-10T00:00:00Z
Date de la mesure du min/max Qualification Nb de points \
0 1994-11-05T17:29:00Z 20 365
1 1996-01-24T02:57:00Z 20 366
2 1996-11-13T09:05:00Z 12 365
3 1997-12-19T22:01:00Z 20 365
4 1999-05-18T12:30:00Z 20 365
5 1999-10-21T15:00:00Z 20 366
6 2000-10-14T09:36:00Z 20 365
7 2001-10-20T23:13:00Z 20 365
8 2002-11-25T09:10:00Z 20 365
9 2003-12-02T15:10:00Z 20 366
10 2004-11-04T23:40:00Z 20 365
11 2006-04-10T22:50:00Z 20 365
12 2006-11-18T10:10:00Z 20 365
13 2008-05-29T19:30:00Z;2008-05-31T07:10:00Z 20;20 366
14 2008-11-02T10:40:00Z 20 365
15 2010-06-16T19:30:00Z 20 365
16 2010-11-01T03:50:00Z 20 365
17 2011-11-05T08:10:00Z 20 366
18 2013-05-19T03:30:00Z 20 365
19 2014-01-20T05:30:00Z 20 365
20 2014-11-05T00:40:00Z 20 365
21 2016-04-07T01:30:00Z 20 366
22 2016-11-23T02:30:00Z 20 365
23 2018-05-16T04:35:00Z 20 365
24 2018-11-10T01:05:00Z 20 365
25 2019-11-23T17:40:00Z 20 366
26 2021-05-11T09:35:00Z 20 365
27 2021-12-29T16:20:00Z 20 365
28 2023-05-14T03:20:00Z 20 365
29 2024-03-10T08:47:30Z 20 366
Données continues & bonnes Données continues & douteuses \
0 96.712329 3.287671
1 100.000000 0.000000
2 99.452055 0.547945
3 90.958904 9.041096
4 97.534247 2.465753
5 100.000000 0.000000
6 100.000000 0.000000
7 99.178082 0.821918
8 100.000000 0.000000
9 100.000000 0.000000
10 100.000000 0.000000
11 100.000000 0.000000
12 100.000000 0.000000
13 100.000000 0.000000
14 100.000000 0.000000
15 100.000000 0.000000
16 100.000000 0.000000
17 100.000000 0.000000
18 100.000000 0.000000
19 100.000000 0.000000
20 100.000000 0.000000
21 100.000000 0.000000
22 100.000000 0.000000
23 100.000000 0.000000
24 100.000000 0.000000
25 99.453552 0.546448
26 100.000000 0.000000
27 100.000000 0.000000
28 100.000000 0.000000
29 100.000000 0.000000
Données continues & non qualifiées Données discontinues \
0 0 0
1 0 0
2 0 0
3 0 0
4 0 0
5 0 0
6 0 0
7 0 0
8 0 0
9 0 0
10 0 0
11 0 0
12 0 0
13 0 0
14 0 0
15 0 0
16 0 0
17 0 0
18 0 0
19 0 0
20 0 0
21 0 0
22 0 0
23 0 0
24 0 0
25 0 0
26 0 0
27 0 0
28 0 0
29 0 0
Données de discontinuités faibles Données de discontinuités neutres \
0 0 0
1 0 0
2 0 0
3 0 0
4 0 0
5 0 0
6 0 0
7 0 0
8 0 0
9 0 0
10 0 0
11 0 0
12 0 0
13 0 0
14 0 0
15 0 0
16 0 0
17 0 0
18 0 0
19 0 0
20 0 0
21 0 0
22 0 0
23 0 0
24 0 0
25 0 0
26 0 0
27 0 0
28 0 0
29 0 0
Données de discontinuités fortes
0 0
1 0
2 0
3 0
4 0
5 0
6 0
7 0
8 0
9 0
10 0
11 0
12 0
13 0
14 0
15 0
16 0
17 0
18 0
19 0
20 0
21 0
22 0
23 0
24 0
25 0
26 0
27 0
28 0
29 0
"""
df = pnd.read_csv(self.filename, sep=',')
# Retour
return df
[docs]
def write(self, df=None, cols=None):
"""
Ecrire un fichier d'échantillon Hydroportail.
Parameters
----------
df : pandas.DataFrame
Tableau des données
cols : dict
Association des colonnes des dates, valeurs et exclusions. Par
défaut: {'date': 'date', 'value': 'value', 'excluded': 'excluded'}
"""
if cols is None:
cols = {k: k for k in ['date', 'value', 'excluded']}
_exception.check_dataframe(df)
_exception.check_str(self.filename)
try:
cd = cols['date']
cv = cols['value']
ce = cols['excluded']
df['Valeur (en m³/s)'] = df[cv]
df['Exclue'] = df[ce].replace(True, "Oui").replace(False, "Non")
df['Début de saison'] = df[cd].apply(
lambda x: dt(x.year, 9, 1)
if x.month >= 9 else dt(x.year - 1, 9, 1)
)
df['Fin de saison'] = df[cd].apply(
lambda x: dt(x.year + 1, 8, 31)
if x.month >= 9 else dt(x.year, 8, 31)
)
df['Date'] = df[cd].apply(
lambda x: x.strftime('%Y-%m-%dT00:00:00Z')
)
df['Date de la mesure du min/max'] = df['Date']
for c in OUAHS_COLS:
if c not in df.columns:
df[c] = ''
df = df[OUAHS_COLS]
# Retirer les .0 superflus
for c in OUAHS_NO_TRAILING_ZEROS:
df[c] = df[c].astype(str).replace(
to_replace=r"\.0+$", value="", regex=True)
except Exception:
return None
try:
df.to_csv(self.filename, sep=',', index=False,
quoting=csv.QUOTE_ALL, line_terminator='\n')
except Exception:
df.to_csv(self.filename, sep=',', index=False,
quoting=csv.QUOTE_ALL, lineterminator='\n')
return self.filename