Code source de pyspc.io.pyspcfile.reader

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Bibliothèque pyspc du projet pyspc - IO - Csv natif PyspcFile - read
"""
from datetime import datetime as dt
import itertools
import os.path

import pyspc.core.exception as _exception
from pyspc.core.keyseries import str2tuple
from pyspc.core.pyspcfile import PyspcFile
from pyspc.core.serie import Serie
from pyspc.core.series import Series


[docs] def read_PyspcFile(dirname='.', filename=None, stations=None, varnames=None, simulations=None, runtimes=None, models=None, scens=None, uncerts=None, warning=False): """ Créer une instance Series à partir de données PyspcFile Parameters ---------- filename : str Fichier PyspcFile warning : bool Afficher les avertissements ? défaut: False Other Parameters ---------------- dirname : str Répertoire des fichiers PyspcFile, si filename n'est pas défini stations : list Identifiants des stations, si filename n'est pas défini varnames : list Noms des variables, si filename n'est pas défini simulations : list Nom des simulations runtimes : list Instants de production de la prévision, si filename n'est pas défini models : list Modèles de prévision, si filename n'est pas défini scens : list Scénarios de prévision, si filename n'est pas défini uncerts : list Incertitudes de prévision, si filename n'est pas défini Returns ------- series : pyspc.core.series.Series Collection de séries de données Notes ----- Les colonnes ne contenant que des valeurs manquantes ne sont pas exportées Examples -------- >>> from datetime import datetime as dt >>> from pyspc.io.pyspcfile import read_PyspcFile Cas d'un fichier avec une seule série d'observation >>> f = 'data/core/csv/K1321810_QH.txt' >>> series = read_PyspcFile(filename=f) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = PyspcFile * TYPE DE COLLECTION = obs * NOMBRE DE SERIES = 1 * ---------------------------------- * SERIE #1 * - CODE = K1321810 * - VARNAME = QH * - META = None ************************************* Cas d'un fichier avec une seule série de simulation >>> f = 'data/core/csv/K1321810_mohys_QH.txt' >>> series = read_PyspcFile(filename=f) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = PyspcFile * TYPE DE COLLECTION = obs * NOMBRE DE SERIES = 1 * ---------------------------------- * SERIE #1 * - CODE = K1321810 * - VARNAME = QH * - META = mohys ************************************* Cas d'un fichier avec une seule série de prévision >>> f = 'data/core/csv/K1251810_2018010412_2001_brut_QH.txt' >>> series = read_PyspcFile(filename=f) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = PyspcFile * TYPE DE COLLECTION = fcst * NOMBRE DE SERIES = 1 * ---------------------------------- * SERIE #1 * - CODE = K1251810 * - VARNAME = QH * - META = 2018-01-04 12:00:00, 2001, brut, None ************************************* Cas de plusieurs fichiers d'observation/simulation >>> d = 'data/core/csv' >>> s = 'K1321810' >>> v = 'QH' >>> sim = [None, 'mohys'] >>> series = read_PyspcFile(dirname=d, stations=s, varnames=v, ... simulations=sim) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = PyspcFile * TYPE DE COLLECTION = obs * NOMBRE DE SERIES = 2 * ---------------------------------- * SERIE #1 * - CODE = K1321810 * - VARNAME = QH * - META = None * ---------------------------------- * SERIE #2 * - CODE = K1321810 * - VARNAME = QH * - META = mohys ************************************* Cas de plusieurs fichiers de prévisions >>> d = 'data/core/csv' >>> s = 'K1251810' >>> v = 'QH' >>> r = dt(2018, 1, 4, 12) >>> m = ['2001', '2011'] >>> sc = ['brut', 'pilote'] >>> u = [None, '10', '50', '90'] >>> series = read_PyspcFile(dirname=d, stations=s, varnames=v, ... runtimes=r, models=m, scens=sc, uncerts=u) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = PyspcFile * TYPE DE COLLECTION = fcst * NOMBRE DE SERIES = 8 * ---------------------------------- * SERIE #1 * - CODE = K1251810 * - VARNAME = QH * - META = 2018-01-04 12:00:00, 2001, brut, None * ---------------------------------- * SERIE #2 * - CODE = K1251810 * - VARNAME = QH * - META = 2018-01-04 12:00:00, 2001, brut, 10 * ---------------------------------- * SERIE #3 * - CODE = K1251810 * - VARNAME = QH * - META = 2018-01-04 12:00:00, 2001, brut, 50 * ---------------------------------- * SERIE #4 * - CODE = K1251810 * - VARNAME = QH * - META = 2018-01-04 12:00:00, 2001, brut, 90 * ---------------------------------- * SERIE #5 * - CODE = K1251810 * - VARNAME = QH * - META = 2018-01-04 12:00:00, 2001, pilote, 10 * ---------------------------------- * SERIE #6 * - CODE = K1251810 * - VARNAME = QH * - META = 2018-01-04 12:00:00, 2001, pilote, 50 * ---------------------------------- * SERIE #7 * - CODE = K1251810 * - VARNAME = QH * - META = 2018-01-04 12:00:00, 2001, pilote, 90 * ---------------------------------- * SERIE #8 * - CODE = K1251810 * - VARNAME = QH * - META = 2018-01-04 12:00:00, 2011, brut, None ************************************* Cas d'un fichier avec plusieurs séries d'observation/simulation >>> f = 'data/core/csv/LaLoireChadrac_plathynes_QH.txt' >>> series = read_PyspcFile(filename=f) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = PyspcFile * TYPE DE COLLECTION = obs * NOMBRE DE SERIES = 4 * ---------------------------------- * SERIE #1 * - CODE = LaLoireChadrac * - VARNAME = PH * - META = None * ---------------------------------- * SERIE #2 * - CODE = LaLoireChadrac * - VARNAME = QH * - META = None * ---------------------------------- * SERIE #3 * - CODE = LaLoireChadrac * - VARNAME = QH * - META = plathynes * ---------------------------------- * SERIE #4 * - CODE = LaLoireChadrac * - VARNAME = QH * - META = pers ************************************* Cas d'un fichier avec plusieurs séries de prévision >>> f = 'data/core/csv/K0253030_2020061200_GR6J_QJ.txt' >>> series = read_PyspcFile(filename=f) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = PyspcFile * TYPE DE COLLECTION = fcst * NOMBRE DE SERIES = 11 * ---------------------------------- * SERIE #1 * - CODE = K0253030 * - VARNAME = QJ * - META = 2020-06-12 00:00:00, GR6J, CEPcf, None * ---------------------------------- * SERIE #2 * - CODE = K0253030 * - VARNAME = QJ * - META = 2020-06-12 00:00:00, GR6J, CEPpf1, None * ---------------------------------- * SERIE #3 * - CODE = K0253030 * - VARNAME = QJ * - META = 2020-06-12 00:00:00, GR6J, CEPpf2, None * ---------------------------------- * SERIE #4 * - CODE = K0253030 * - VARNAME = QJ * - META = 2020-06-12 00:00:00, GR6J, CEPpf3, None * ---------------------------------- * SERIE #5 * - CODE = K0253030 * - VARNAME = QJ * - META = 2020-06-12 00:00:00, GR6J, CEPpf4, None * ---------------------------------- * SERIE #6 * - CODE = K0253030 * - VARNAME = QJ * - META = 2020-06-12 00:00:00, GR6J, CEPpf5, None * ---------------------------------- * SERIE #7 * - CODE = K0253030 * - VARNAME = QJ * - META = 2020-06-12 00:00:00, GR6J, CEPpf6, None * ---------------------------------- * SERIE #8 * - CODE = K0253030 * - VARNAME = QJ * - META = 2020-06-12 00:00:00, GR6J, CEPpf7, None * ---------------------------------- * SERIE #9 * - CODE = K0253030 * - VARNAME = QJ * - META = 2020-06-12 00:00:00, GR6J, CEPpf8, None * ---------------------------------- * SERIE #10 * - CODE = K0253030 * - VARNAME = QJ * - META = 2020-06-12 00:00:00, GR6J, CEPpf9, None * ---------------------------------- * SERIE #11 * - CODE = K0253030 * - VARNAME = QJ * - META = 2020-06-12 00:00:00, GR6J, CEPpf10, None ************************************* """ # ------------------------------------------------------------------------- # 0- Initialisation # ------------------------------------------------------------------------- if stations is None or varnames is None: _exception.check_str(filename) filenames = [filename] else: _exception.check_str(dirname) stations = _check_arguments(stations) varnames = _check_arguments(varnames) simulations = _check_arguments(simulations) runtimes = _check_arguments(runtimes) models = _check_arguments(models) scens = _check_arguments(scens) uncerts = _check_arguments(uncerts) cases = [stations, varnames, simulations, runtimes, models, scens, uncerts] cases = list(itertools.product(*cases)) filenames = [os.path.join(dirname, PyspcFile.join_basename(*case)) for case in cases] # ------------------------------------------------------------------------- # 1- Lecture # ------------------------------------------------------------------------- series = None for f in filenames: if not os.path.exists(f): if warning: _exception.Warning(__file__, f"Fichier inconnu '{f}'") continue reader = PyspcFile(filename=f) series = _init_series(reader, series) df = reader.read() reader_meta = [ reader.station, reader.varname, reader.simulation, reader.runtime, reader.model, reader.scen, reader.uncert] for c in df.columns: keys = _set_keys(df, reader_meta, reader, c) keyobs = keys[0] key = keys[1] serie = Serie(datval=df[c].to_frame(), code=keyobs[0], varname=keyobs[1], warning=warning) series.add(serie=serie, code=key[0], meta=key[2]) # ------------------------------------------------------------------------- # 2- Collection de séries # ------------------------------------------------------------------------- return series
def _check_arguments(arg): """Contrôler les arguments de read_CSvFile""" if arg is None: return [None] if isinstance(arg, str): return [arg] if isinstance(arg, dt): return [arg] _exception.check_listlike(arg) return arg def _init_series(reader, series): """Initialiser la collection à partir du lecteur""" if series is None and reader.runtime is None: return Series(datatype='obs', name='PyspcFile') if series is None: return Series(datatype='fcst', name='PyspcFile') return series def _set_keys(df, reader_meta, reader, c): """""" # Si une seule colonne : conservation des méta-données du fichier if len(df.columns) == 1: keyobs = _set_keyobs(*reader_meta) key = _set_key(*reader_meta) # Si plusieurs colonnes + terminaison par grandeur de pyspc elif any([c.endswith(v) for v in PyspcFile.get_varnames()]): keyobs = _set_keyobs(*PyspcFile.split_basename(c)) key = _set_key(*PyspcFile.split_basename(c)) # Si plusieurs colonnes + prévision elif reader.runtime is not None: colmeta = _set_fcstmeta(reader_meta, c.split('_')) keyobs = _set_keyobs(*colmeta) key = _set_key(*colmeta) # Si plusieurs colonnes + observation/simulation else: colmeta = _set_obssimmeta(reader_meta, c) keyobs = _set_keyobs(*colmeta) key = _set_key(*colmeta) return keyobs, key def _set_keyobs(*args): """Définir la clé avec forçage obs""" return str2tuple( PyspcFile.join_basename(*args).replace('.txt', ''), forceobs=True) def _set_key(*args): """Définir la clé""" return str2tuple( PyspcFile.join_basename(*args).replace('.txt', ''), forceobs=False) def _set_obssimmeta(meta, col): """Compléter les méta-données obs/sim par le nom de la colonne""" fmeta = meta[:2] # Observation if col.lower().startswith('obs'): fmeta.append(None) # Simulation dont l'intitulé est déjà défini par le fichier elif col is None or col.lower().startswith('unnamed'): fmeta.append(meta[2]) # Simulation else: fmeta.append(col) fmeta.extend([None, None, None, None]) return fmeta def _set_fcstmeta(meta, col): """Compléter les méta-données prévision par le nom de la colonne""" fmeta = meta[:3] for x in meta[3:]: if x is not None: fmeta.append(x) else: try: fmeta.append(col.pop(0)) except IndexError: fmeta.append(None) return fmeta