#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021 R. Marty
# (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Bibliothèque pyspc du projet pyspc - IO - Csv natif PyspcFile - read
"""
from datetime import datetime as dt
import itertools
import os.path
import pyspc.core.exception as _exception
from pyspc.core.keyseries import str2tuple
from pyspc.core.pyspcfile import PyspcFile
from pyspc.core.serie import Serie
from pyspc.core.series import Series
[docs]
def read_PyspcFile(dirname='.', filename=None,
stations=None, varnames=None, simulations=None,
runtimes=None, models=None, scens=None, uncerts=None,
warning=False):
"""
Créer une instance Series à partir de données PyspcFile
Parameters
----------
filename : str
Fichier PyspcFile
warning : bool
Afficher les avertissements ? défaut: False
Other Parameters
----------------
dirname : str
Répertoire des fichiers PyspcFile, si filename n'est pas défini
stations : list
Identifiants des stations, si filename n'est pas défini
varnames : list
Noms des variables, si filename n'est pas défini
simulations : list
Nom des simulations
runtimes : list
Instants de production de la prévision, si filename n'est pas défini
models : list
Modèles de prévision, si filename n'est pas défini
scens : list
Scénarios de prévision, si filename n'est pas défini
uncerts : list
Incertitudes de prévision, si filename n'est pas défini
Returns
-------
series : pyspc.core.series.Series
Collection de séries de données
Notes
-----
Les colonnes ne contenant que des valeurs manquantes ne sont pas exportées
Examples
--------
>>> from datetime import datetime as dt
>>> from pyspc.io.pyspcfile import read_PyspcFile
Cas d'un fichier avec une seule série d'observation
>>> f = 'data/core/csv/K1321810_QH.txt'
>>> series = read_PyspcFile(filename=f)
>>> series
*************************************
********** SERIES *******************
*************************************
* NOM DE LA COLLECTION = PyspcFile
* TYPE DE COLLECTION = obs
* NOMBRE DE SERIES = 1
* ----------------------------------
* SERIE #1
* - CODE = K1321810
* - VARNAME = QH
* - META = None
*************************************
Cas d'un fichier avec une seule série de simulation
>>> f = 'data/core/csv/K1321810_mohys_QH.txt'
>>> series = read_PyspcFile(filename=f)
>>> series
*************************************
********** SERIES *******************
*************************************
* NOM DE LA COLLECTION = PyspcFile
* TYPE DE COLLECTION = obs
* NOMBRE DE SERIES = 1
* ----------------------------------
* SERIE #1
* - CODE = K1321810
* - VARNAME = QH
* - META = mohys
*************************************
Cas d'un fichier avec une seule série de prévision
>>> f = 'data/core/csv/K1251810_2018010412_2001_brut_QH.txt'
>>> series = read_PyspcFile(filename=f)
>>> series
*************************************
********** SERIES *******************
*************************************
* NOM DE LA COLLECTION = PyspcFile
* TYPE DE COLLECTION = fcst
* NOMBRE DE SERIES = 1
* ----------------------------------
* SERIE #1
* - CODE = K1251810
* - VARNAME = QH
* - META = 2018-01-04 12:00:00, 2001, brut, None
*************************************
Cas de plusieurs fichiers d'observation/simulation
>>> d = 'data/core/csv'
>>> s = 'K1321810'
>>> v = 'QH'
>>> sim = [None, 'mohys']
>>> series = read_PyspcFile(dirname=d, stations=s, varnames=v,
... simulations=sim)
>>> series
*************************************
********** SERIES *******************
*************************************
* NOM DE LA COLLECTION = PyspcFile
* TYPE DE COLLECTION = obs
* NOMBRE DE SERIES = 2
* ----------------------------------
* SERIE #1
* - CODE = K1321810
* - VARNAME = QH
* - META = None
* ----------------------------------
* SERIE #2
* - CODE = K1321810
* - VARNAME = QH
* - META = mohys
*************************************
Cas de plusieurs fichiers de prévisions
>>> d = 'data/core/csv'
>>> s = 'K1251810'
>>> v = 'QH'
>>> r = dt(2018, 1, 4, 12)
>>> m = ['2001', '2011']
>>> sc = ['brut', 'pilote']
>>> u = [None, '10', '50', '90']
>>> series = read_PyspcFile(dirname=d, stations=s, varnames=v,
... runtimes=r, models=m, scens=sc, uncerts=u)
>>> series
*************************************
********** SERIES *******************
*************************************
* NOM DE LA COLLECTION = PyspcFile
* TYPE DE COLLECTION = fcst
* NOMBRE DE SERIES = 8
* ----------------------------------
* SERIE #1
* - CODE = K1251810
* - VARNAME = QH
* - META = 2018-01-04 12:00:00, 2001, brut, None
* ----------------------------------
* SERIE #2
* - CODE = K1251810
* - VARNAME = QH
* - META = 2018-01-04 12:00:00, 2001, brut, 10
* ----------------------------------
* SERIE #3
* - CODE = K1251810
* - VARNAME = QH
* - META = 2018-01-04 12:00:00, 2001, brut, 50
* ----------------------------------
* SERIE #4
* - CODE = K1251810
* - VARNAME = QH
* - META = 2018-01-04 12:00:00, 2001, brut, 90
* ----------------------------------
* SERIE #5
* - CODE = K1251810
* - VARNAME = QH
* - META = 2018-01-04 12:00:00, 2001, pilote, 10
* ----------------------------------
* SERIE #6
* - CODE = K1251810
* - VARNAME = QH
* - META = 2018-01-04 12:00:00, 2001, pilote, 50
* ----------------------------------
* SERIE #7
* - CODE = K1251810
* - VARNAME = QH
* - META = 2018-01-04 12:00:00, 2001, pilote, 90
* ----------------------------------
* SERIE #8
* - CODE = K1251810
* - VARNAME = QH
* - META = 2018-01-04 12:00:00, 2011, brut, None
*************************************
Cas d'un fichier avec plusieurs séries d'observation/simulation
>>> f = 'data/core/csv/LaLoireChadrac_plathynes_QH.txt'
>>> series = read_PyspcFile(filename=f)
>>> series
*************************************
********** SERIES *******************
*************************************
* NOM DE LA COLLECTION = PyspcFile
* TYPE DE COLLECTION = obs
* NOMBRE DE SERIES = 4
* ----------------------------------
* SERIE #1
* - CODE = LaLoireChadrac
* - VARNAME = PH
* - META = None
* ----------------------------------
* SERIE #2
* - CODE = LaLoireChadrac
* - VARNAME = QH
* - META = None
* ----------------------------------
* SERIE #3
* - CODE = LaLoireChadrac
* - VARNAME = QH
* - META = plathynes
* ----------------------------------
* SERIE #4
* - CODE = LaLoireChadrac
* - VARNAME = QH
* - META = pers
*************************************
Cas d'un fichier avec plusieurs séries de prévision
>>> f = 'data/core/csv/K0253030_2020061200_GR6J_QJ.txt'
>>> series = read_PyspcFile(filename=f)
>>> series
*************************************
********** SERIES *******************
*************************************
* NOM DE LA COLLECTION = PyspcFile
* TYPE DE COLLECTION = fcst
* NOMBRE DE SERIES = 11
* ----------------------------------
* SERIE #1
* - CODE = K0253030
* - VARNAME = QJ
* - META = 2020-06-12 00:00:00, GR6J, CEPcf, None
* ----------------------------------
* SERIE #2
* - CODE = K0253030
* - VARNAME = QJ
* - META = 2020-06-12 00:00:00, GR6J, CEPpf1, None
* ----------------------------------
* SERIE #3
* - CODE = K0253030
* - VARNAME = QJ
* - META = 2020-06-12 00:00:00, GR6J, CEPpf2, None
* ----------------------------------
* SERIE #4
* - CODE = K0253030
* - VARNAME = QJ
* - META = 2020-06-12 00:00:00, GR6J, CEPpf3, None
* ----------------------------------
* SERIE #5
* - CODE = K0253030
* - VARNAME = QJ
* - META = 2020-06-12 00:00:00, GR6J, CEPpf4, None
* ----------------------------------
* SERIE #6
* - CODE = K0253030
* - VARNAME = QJ
* - META = 2020-06-12 00:00:00, GR6J, CEPpf5, None
* ----------------------------------
* SERIE #7
* - CODE = K0253030
* - VARNAME = QJ
* - META = 2020-06-12 00:00:00, GR6J, CEPpf6, None
* ----------------------------------
* SERIE #8
* - CODE = K0253030
* - VARNAME = QJ
* - META = 2020-06-12 00:00:00, GR6J, CEPpf7, None
* ----------------------------------
* SERIE #9
* - CODE = K0253030
* - VARNAME = QJ
* - META = 2020-06-12 00:00:00, GR6J, CEPpf8, None
* ----------------------------------
* SERIE #10
* - CODE = K0253030
* - VARNAME = QJ
* - META = 2020-06-12 00:00:00, GR6J, CEPpf9, None
* ----------------------------------
* SERIE #11
* - CODE = K0253030
* - VARNAME = QJ
* - META = 2020-06-12 00:00:00, GR6J, CEPpf10, None
*************************************
"""
# -------------------------------------------------------------------------
# 0- Initialisation
# -------------------------------------------------------------------------
if stations is None or varnames is None:
_exception.check_str(filename)
filenames = [filename]
else:
_exception.check_str(dirname)
stations = _check_arguments(stations)
varnames = _check_arguments(varnames)
simulations = _check_arguments(simulations)
runtimes = _check_arguments(runtimes)
models = _check_arguments(models)
scens = _check_arguments(scens)
uncerts = _check_arguments(uncerts)
cases = [stations, varnames, simulations,
runtimes, models, scens, uncerts]
cases = list(itertools.product(*cases))
filenames = [os.path.join(dirname, PyspcFile.join_basename(*case))
for case in cases]
# -------------------------------------------------------------------------
# 1- Lecture
# -------------------------------------------------------------------------
series = None
for f in filenames:
if not os.path.exists(f):
if warning:
_exception.Warning(__file__, f"Fichier inconnu '{f}'")
continue
reader = PyspcFile(filename=f)
series = _init_series(reader, series)
df = reader.read()
reader_meta = [
reader.station, reader.varname, reader.simulation,
reader.runtime, reader.model, reader.scen, reader.uncert]
for c in df.columns:
keys = _set_keys(df, reader_meta, reader, c)
keyobs = keys[0]
key = keys[1]
serie = Serie(datval=df[c].to_frame(), code=keyobs[0],
varname=keyobs[1], warning=warning)
series.add(serie=serie, code=key[0], meta=key[2])
# -------------------------------------------------------------------------
# 2- Collection de séries
# -------------------------------------------------------------------------
return series
def _check_arguments(arg):
"""Contrôler les arguments de read_CSvFile"""
if arg is None:
return [None]
if isinstance(arg, str):
return [arg]
if isinstance(arg, dt):
return [arg]
_exception.check_listlike(arg)
return arg
def _init_series(reader, series):
"""Initialiser la collection à partir du lecteur"""
if series is None and reader.runtime is None:
return Series(datatype='obs', name='PyspcFile')
if series is None:
return Series(datatype='fcst', name='PyspcFile')
return series
def _set_keys(df, reader_meta, reader, c):
""""""
# Si une seule colonne : conservation des méta-données du fichier
if len(df.columns) == 1:
keyobs = _set_keyobs(*reader_meta)
key = _set_key(*reader_meta)
# Si plusieurs colonnes + terminaison par grandeur de pyspc
elif any([c.endswith(v) for v in PyspcFile.get_varnames()]):
keyobs = _set_keyobs(*PyspcFile.split_basename(c))
key = _set_key(*PyspcFile.split_basename(c))
# Si plusieurs colonnes + prévision
elif reader.runtime is not None:
colmeta = _set_fcstmeta(reader_meta, c.split('_'))
keyobs = _set_keyobs(*colmeta)
key = _set_key(*colmeta)
# Si plusieurs colonnes + observation/simulation
else:
colmeta = _set_obssimmeta(reader_meta, c)
keyobs = _set_keyobs(*colmeta)
key = _set_key(*colmeta)
return keyobs, key
def _set_keyobs(*args):
"""Définir la clé avec forçage obs"""
return str2tuple(
PyspcFile.join_basename(*args).replace('.txt', ''), forceobs=True)
def _set_key(*args):
"""Définir la clé"""
return str2tuple(
PyspcFile.join_basename(*args).replace('.txt', ''), forceobs=False)
def _set_obssimmeta(meta, col):
"""Compléter les méta-données obs/sim par le nom de la colonne"""
fmeta = meta[:2]
# Observation
if col.lower().startswith('obs'):
fmeta.append(None)
# Simulation dont l'intitulé est déjà défini par le fichier
elif col is None or col.lower().startswith('unnamed'):
fmeta.append(meta[2])
# Simulation
else:
fmeta.append(col)
fmeta.extend([None, None, None, None])
return fmeta
def _set_fcstmeta(meta, col):
"""Compléter les méta-données prévision par le nom de la colonne"""
fmeta = meta[:3]
for x in meta[3:]:
if x is not None:
fmeta.append(x)
else:
try:
fmeta.append(col.pop(0))
except IndexError:
fmeta.append(None)
return fmeta