Code source de pyspc.io.grp16.reader

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Bibliothèque pyspc du projet pyspc - IO - GRP version 2016 - read
"""
from datetime import timedelta as td
import os.path
import pandas as pnd

from pyspc.convention.grp16 import (
    DATATYPES, CAL_EVENT_HEADERS, RT_FCST_LINEPREFIX, RT_FCST_VARNAMES)
from pyspc.core.convention import EXTERNAL_VARNAMES
import pyspc.core.exception as _exception
from pyspc.core.keyseries import str2tuple, tuple2str
from pyspc.core.serie import Serie
from pyspc.core.series import Series
from pyspc.model.grp16 import (
    GRP_Data, GRP_Event, GRP_Fcst,
    GRPRT_Archive, GRPRT_Basin, GRPRT_Data, GRPRT_Fcst, GRPRT_Intern)


[docs] def read_GRP16(datatype=None, dirname='.', filename=None, stations=None, varnames=None, warning=False): """ Créer une instance Series à partir de données GRP16 Parameters ---------- filename : str Fichier GRP datatype : str Type de données GRP warning : bool Afficher les avertissements ? défaut: False Other Parameters ---------------- dirname : str Répertoire des fichiers GRP, si filename n'est pas défini stations : list Identifiants des stations, si filename n'est pas défini varnames : list Noms des variables, si filename n'est pas défini Returns ------- series : pyspc.core.series.Series Collection de séries de données Notes ----- Les colonnes ne contenant que des valeurs manquantes ne sont pas exportées Examples -------- >>> from pyspc.io.grp16 import read_GRP16 CALAGE - Cas de données d'observation >>> d = 'grp16_cal_data' >>> f = 'data/model/grp16/cal/43091005_P.txt' >>> series = read_GRP16(filename=f, datatype=d) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = GRP16 * TYPE DE COLLECTION = obs * NOMBRE DE SERIES = 1 * ---------------------------------- * SERIE #1 * - CODE = 43091005 * - VARNAME = PH * - META = None ************************************* CALAGE - Cas de données d'événements >>> d = 'grp16_cal_event' >>> f = 'data/model/grp16/cal/K0114030-EV0001.DAT' >>> series = read_GRP16(filename=f, datatype=d) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = GRP16 * TYPE DE COLLECTION = obs * NOMBRE DE SERIES = 2 * ---------------------------------- * SERIE #1 * - CODE = K0114030 * - VARNAME = PH * - META = EV0001 * ---------------------------------- * SERIE #2 * - CODE = K0114030 * - VARNAME = QH * - META = EV0001 ************************************* CALAGE - Cas de données de prévision >>> d = 'grp16_cal_fcst' >>> f = 'data/model/grp16/cal/H_K0114030_GRP_SMN_RNA_006_PP_P1P2.TXT' >>> series = read_GRP16(filename=f, datatype=d) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = GRP16 * TYPE DE COLLECTION = obs * NOMBRE DE SERIES = 11 * ---------------------------------- * SERIE #1 * - CODE = K0114030 * - VARNAME = QH * - META = OBS000 * ---------------------------------- * SERIE #2 * - CODE = K0114030 * - VARNAME = QH * - META = OBS001 * ---------------------------------- * SERIE #3 * - CODE = K0114030 * - VARNAME = QH * - META = PRV001 * ---------------------------------- * SERIE #4 * - CODE = K0114030 * - VARNAME = QH * - META = OBS003 * ---------------------------------- * SERIE #5 * - CODE = K0114030 * - VARNAME = QH * - META = PRV003 * ---------------------------------- * SERIE #6 * - CODE = K0114030 * - VARNAME = QH * - META = OBS006 * ---------------------------------- * SERIE #7 * - CODE = K0114030 * - VARNAME = QH * - META = PRV006 * ---------------------------------- * SERIE #8 * - CODE = K0114030 * - VARNAME = QH * - META = OBS009 * ---------------------------------- * SERIE #9 * - CODE = K0114030 * - VARNAME = QH * - META = PRV009 * ---------------------------------- * SERIE #10 * - CODE = K0114030 * - VARNAME = QH * - META = OBS012 * ---------------------------------- * SERIE #11 * - CODE = K0114030 * - VARNAME = QH * - META = PRV012 ************************************* TEMPS-REEL - Cas de données d'observation >>> d = 'grp16_rt_data' >>> f = 'data/model/grp16/rt/Debit.txt' >>> series = read_GRP16(filename=f, datatype=d) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = GRP16 * TYPE DE COLLECTION = obs * NOMBRE DE SERIES = 2 * ---------------------------------- * SERIE #1 * - CODE = K0114020 * - VARNAME = QH * - META = grp16 * ---------------------------------- * SERIE #2 * - CODE = K0114030 * - VARNAME = QH * - META = grp16 ************************************* TEMPS-REEL - Cas de scénarios météorologiques >>> d = 'grp16_rt_data' >>> f = 'data/model/grp16/rt/Scen_006_PluMA.txt' >>> series = read_GRP16(filename=f, datatype=d) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = GRP16 * TYPE DE COLLECTION = fcst * NOMBRE DE SERIES = 2 * ---------------------------------- * SERIE #1 * - CODE = K0114020 * - VARNAME = PH * - META = 2017-06-13 12:00:00, grp16, 006, None * ---------------------------------- * SERIE #2 * - CODE = K0114030 * - VARNAME = PH * - META = 2017-06-13 12:00:00, grp16, 006, None ************************************* TEMPS-REEL - Cas d'export d'observation >>> d = 'grp16_rt_obs_diff' >>> f = 'data/model/grp16/rt/GRP_D_Obs.txt' >>> series = read_GRP16(filename=f, datatype=d) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = GRP16 * TYPE DE COLLECTION = obs * NOMBRE DE SERIES = 2 * ---------------------------------- * SERIE #1 * - CODE = K6173130 * - VARNAME = QH * - META = grp16 * ---------------------------------- * SERIE #2 * - CODE = K6173130 * - VARNAME = PH * - META = grp16 ************************************* TEMPS-REEL - Cas de prévision sans assimilation >>> d = 'grp16_rt_sim_diff' >>> f = 'data/model/grp16/rt/GRP_D_Simu_2001.txt' >>> series = read_GRP16(filename=f, datatype=d) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = GRP16 * TYPE DE COLLECTION = fcst * NOMBRE DE SERIES = 2 * ---------------------------------- * SERIE #1 * - CODE = K6173130 * - VARNAME = QH * - META = 2016-05-31 16:00:00, grp16-sim, 2001, None * ---------------------------------- * SERIE #2 * - CODE = K6173130 * - VARNAME = PH * - META = 2016-05-31 16:00:00, grp16-sim, 2001, None ************************************* TEMPS-REEL - Cas de prévision avec assimilation >>> d = 'grp16_rt_fcst_diff' >>> f = 'data/model/grp16/rt/GRP_D_Prev_2001.txt' >>> series = read_GRP16(filename=f, datatype=d) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = GRP16 * TYPE DE COLLECTION = fcst * NOMBRE DE SERIES = 2 * ---------------------------------- * SERIE #1 * - CODE = K6173130 * - VARNAME = QH * - META = 2016-05-31 16:00:00, grp16, 2001, None * ---------------------------------- * SERIE #2 * - CODE = K6173130 * - VARNAME = PH * - META = 2016-05-31 16:00:00, grp16, 2001, None ************************************* TEMPS-REEL - Cas de données d'archive >>> d = 'grp16_rt_archive' >>> f = 'data/model/grp16/rt/PV_10A.DAT' >>> series = read_GRP16(filename=f, datatype=d) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = GRP16 * TYPE DE COLLECTION = obs * NOMBRE DE SERIES = 1 * ---------------------------------- * SERIE #1 * - CODE = 43091005 * - VARNAME = PH * - META = None ************************************* TEMPS-REEL - Cas de données internes au modèle >>> d = 'grp16_rt_intern_diff' >>> f = 'data/model/grp16/rt/intern/PQE_1A_D.DAT' >>> series = read_GRP16(filename=f, datatype=d) >>> series ************************************* ********** SERIES ******************* ************************************* * NOM DE LA COLLECTION = GRP16 * TYPE DE COLLECTION = obs * NOMBRE DE SERIES = 4 * ---------------------------------- * SERIE #1 * - CODE = K1251810 * - VARNAME = QH * - META = grp16 * ---------------------------------- * SERIE #2 * - CODE = K1251810 * - VARNAME = QH * - META = None * ---------------------------------- * SERIE #3 * - CODE = K1251810 * - VARNAME = PH * - META = None * ---------------------------------- * SERIE #4 * - CODE = K1251810 * - VARNAME = EH * - META = None ************************************* """ # ------------------------------------------------------------------------- # 0- Contrôles # ------------------------------------------------------------------------- _exception.raise_valueerror( datatype not in DATATYPES, f"Type de données GRP16 '{datatype}' incorrect") # ------------------------------------------------------------------------- # 1.1- CALAGE - Cas de données d'observation # ------------------------------------------------------------------------- if datatype == "grp16_cal_data": return _grp16_cal_data( dirname=dirname, filename=filename, stations=stations, varnames=varnames, warning=warning) # ------------------------------------------------------------------------- # 1.2- CALAGE - Cas de données de prévision # ------------------------------------------------------------------------- if datatype == "grp16_cal_event": return _grp16_cal_event(filename=filename, warning=warning) # ------------------------------------------------------------------------- # 1.3- CALAGE - Cas de données de prévision # ------------------------------------------------------------------------- if datatype == "grp16_cal_fcst": return _grp16_cal_fcst(filename=filename, warning=warning) # ------------------------------------------------------------------------- # 2.1- TEMPS-REEL - Cas de données d'archive # ------------------------------------------------------------------------- if datatype.startswith('grp16_rt_archive'): return _grp16_rt_archive(filename=filename, warning=warning) # ------------------------------------------------------------------------- # 2.2- TEMPS-REEL - Cas de données d'observation # TEMPS-REEL - Cas de scénarios météorologiques # ------------------------------------------------------------------------- if datatype.startswith('grp16_rt_data'): return _grp16_rt_data(filename=filename, warning=warning) # ------------------------------------------------------------------------- # 2.3- TEMPS-REEL - Cas d'export d'observation # TEMPS-REEL - Cas de prévision sans assimilation # TEMPS-REEL - Cas de prévision avec assimilation # ------------------------------------------------------------------------- if (datatype.startswith('grp16_rt_obs') or datatype.startswith('grp16_rt_sim') or datatype.startswith('grp16_rt_fcst')): return _grp16_rt_fcst( filename=filename, datatype=datatype.replace('grp16_rt_', ''), warning=warning) # ------------------------------------------------------------------------- # 2.4- TEMPS-REEL - Cas de données internes au modèle # ------------------------------------------------------------------------- if datatype.startswith('grp16_rt_intern'): return _grp16_rt_intern( filename=filename, datatype=datatype.replace('grp16_rt_', ''), warning=warning) # ------------------------------------------------------------------------- # 3- Cas inconnu # ------------------------------------------------------------------------- raise NotImplementedError
def _grp16_cal_data(dirname='.', filename=None, stations=None, varnames=None, warning=False): """ CALAGE - Cas de données d'observation """ # ------------------------------------------------------------------------- # 0- Initialisation # ------------------------------------------------------------------------- if stations is None or varnames is None: _exception.check_str(filename) filenames = [filename] else: _exception.check_str(dirname) if isinstance(stations, str): stations = [stations] if isinstance(varnames, str): varnames = [varnames] external = {v: k[1] for k, v in EXTERNAL_VARNAMES.items() if k[0] == 'GRP16'} varnames = [external[v] for v in varnames if v in external] filenames = [os.path.join(dirname, GRP_Data.join_basename(station=s, varname=v)) for s in stations for v in varnames] series = Series(datatype='obs', name='GRP16') # ------------------------------------------------------------------------- # 1- Lecture # ------------------------------------------------------------------------- for f in filenames: if not os.path.exists(f): if warning: _exception.Warning( __file__, f"Fichier inconnu '{f}'") continue reader = GRP_Data(filename=f) df = reader.read() # ------------------------------------------------------------------------- # 2- Collection de séries # ------------------------------------------------------------------------- serie = Serie( datval=df, code=reader.station, varname=reader.varname, provider='GRP16', warning=warning) series.add(serie) return series def _grp16_cal_event(filename=None, warning=False): """ CALAGE - Cas de données d'événements """ # ------------------------------------------------------------------------- # 0- Initialisation # ------------------------------------------------------------------------- series = Series(datatype='obs', name='GRP16') # ------------------------------------------------------------------------- # 1- Lecture # ------------------------------------------------------------------------- reader = GRP_Event(filename=filename) df = reader.read() df = df.set_index('Date', drop=True) # ------------------------------------------------------------------------- # 2- Collection de séries # ------------------------------------------------------------------------- assoc = {v: k for k, v in CAL_EVENT_HEADERS.items()} for c in df.columns: try: varname = assoc[c] except KeyError: continue serie = Serie( datval=df[c].to_frame(), code=reader.station, varname=varname, provider='GRP16', warning=warning) series.add(serie, meta=reader.event) return series def _grp16_cal_fcst(filename=None, warning=False): """ CALAGE - Cas de données de prévision """ # ------------------------------------------------------------------------- # 0- Initialisation # ------------------------------------------------------------------------- series = Series(datatype='obs', name='GRP16') # ------------------------------------------------------------------------- # 1- Lecture # ------------------------------------------------------------------------- reader = GRP_Fcst(filename=filename) df = reader.read() # ------------------------------------------------------------------------- # 2- Collection de séries # ------------------------------------------------------------------------- for c in df.columns: serie = Serie( datval=df[c].to_frame(), code=reader.station, varname='Q', provider='GRP16', warning=warning) series.add(serie, meta=c.strip()) return series def _grp16_rt_archive(filename=None, warning=False): """ TEMPS-REEL - Cas de données d'archive """ # ------------------------------------------------------------------------- # 0- Initialisation # ------------------------------------------------------------------------- series = Series(datatype='obs', name='GRP16') # ------------------------------------------------------------------------- # 1- Lecture # ------------------------------------------------------------------------- reader = GRPRT_Archive(filename=filename) df = reader.read() # ------------------------------------------------------------------------- # 2- Collection de séries # ------------------------------------------------------------------------- for c in df.columns: serie = Serie( datval=df[c].to_frame(), code=c.strip(), varname=reader.varname[0], provider='GRP16', warning=warning) series.add(serie) # , meta='archive' return series def _grp16_rt_data(filename=None, warning=None): """ TEMPS-REEL - Cas de données d'observation TEMPS-REEL - Cas de scénarios météorologiques """ # ------------------------------------------------------------------------- # 0- Initialisation # ------------------------------------------------------------------------- # ------------------------------------------------------------------------- # 1- Lecture # ------------------------------------------------------------------------- reader = GRPRT_Data(filename=filename) df = reader.read() # Nettoyer la colonne superflue df = df.dropna(how="all", axis=1) # Nettoyer les lignes != prefix df.drop(df[df.PREFIX != reader.lineprefix].index, inplace=True) # Supprimer la colonne PREFIX df.drop(columns=['PREFIX'], inplace=True) # Créer l'index Datetime à partir des colonnes DATE et HOUR df.index = pnd.to_datetime(df['DATE'].astype(str) + ' ' + df['HOUR'], format='%Y%m%d %H:00') # Supprimer les colonne 'DATE', 'HOUR' df.drop(columns=['DATE', 'HOUR'], inplace=True) # Colonne 'CODE' -> Multi-Index (None, CODE) avec None: ['VALUE', 'CODE'] df = df.pivot(columns='CODE') # Retirer VALUE en trop dans columns df.columns = df.columns.droplevel(0) # ------------------------------------------------------------------------- # 2.1- Collection de séries OBS # ------------------------------------------------------------------------- if reader.scen is None: series = Series(datatype='obs', name='GRP16') for c in df.columns: serie = Serie( datval=df[c].to_frame(), code=c, varname=reader.varname, provider='GRP16', warning=warning) series.add(serie) return series # ------------------------------------------------------------------------- # 2.2- Collection de séries SIM/PRV # ------------------------------------------------------------------------- series = Series(datatype='fcst', name='GRP16') for c in df.columns: runtime = df[c].first_valid_index() key = (c, reader.varname, (runtime, 'grp16', reader.scen, None)) keystr = str2tuple(tuple2str(key), forceobs=True)[0] serie = Serie( datval=df[c].to_frame(), code=keystr, varname=reader.varname, provider='GRP16', warning=warning) series.add(serie, code=c, meta=key[2]) return series def _grp16_rt_fcst(filename=None, datatype=None, warning=None): """ TEMPS-REEL - Cas d'export d'observation TEMPS-REEL - Cas de prévision sans assimilation TEMPS-REEL - Cas de prévision avec assimilation """ # ------------------------------------------------------------------------- # 0- Initialisation # ------------------------------------------------------------------------- # Préfixe de la ligne lineprefix = RT_FCST_LINEPREFIX[datatype.split('_')[0]] # ------------------------------------------------------------------------- # 1- Lecture # ------------------------------------------------------------------------- # Lecture du contenu reader = GRPRT_Fcst(filename=filename, datatype=datatype) df = reader.read() # Nettoyer les labels des colonnes df.columns = [c.strip() for c in df.columns] # Nettoyer les lignes != lineprefix df = df.drop(df[df.TYP != lineprefix].index) df = df.dropna(axis=1, how='all') # Supprimer la colonne TYP df.drop(columns=['TYP'], inplace=True) # Nettoyer les CODE df['CODE'] = df['CODE'].map(lambda x: x.strip()) # Colonne 'DATE' -> Index df = df.set_index(keys='DATE(TU)', drop=True) # Colonne 'Code' -> Multi-Index (None, Code) avec None: ['Debit (l/s)'] df = df.pivot(columns='CODE') # MultiIndex : (CODE, VAR) df.columns = df.columns.swaplevel() df.columns = [(c[0], RT_FCST_VARNAMES[c[1].split('(')[0]]) for c in df.columns] # ------------------------------------------------------------------------- # 2.1- Collection de séries OBS # ------------------------------------------------------------------------- if datatype.startswith('obs'): series = Series(datatype='obs', name='GRP16') for c in df.columns: serie = Serie( datval=df[c].to_frame(), code=c[0], varname=c[1], provider='GRP16', warning=warning) series.add(serie, meta='grp16') return series # ------------------------------------------------------------------------- # 2.2- Collection de séries SIM/PRV # ------------------------------------------------------------------------- series = Series(datatype='fcst', name='GRP16') runtime = df.index[0] - td(hours=1) runtime = runtime.to_pydatetime() model = f'grp16-{lineprefix.lower()}'.replace('-prv', '') for c in df.columns: code = c[0] varname = EXTERNAL_VARNAMES[('GRP16', c[1])] key = (code, varname, (runtime, model, reader.scen, None)) keystr = str2tuple(tuple2str(key), forceobs=True)[0] serie = Serie( datval=df[c].to_frame(), code=keystr, varname=varname, provider='GRP16', warning=warning) series.add(serie, code=code, meta=key[2]) return series def _grp16_rt_intern(filename=None, datatype=None, warning=False): """ TEMPS-REEL - Cas de données internes au modèle """ # ------------------------------------------------------------------------- # 0- Initialisation # ------------------------------------------------------------------------- series = Series(datatype='obs', name='GRP16') varnames = {'Qsim(mm)': 'Q', 'Qobs(mm)': 'Q', 'P(mm)': 'P', 'T(C)': 'T', 'ETP(mm)': 'EH'} # ------------------------------------------------------------------------- # 1- Lecture # ------------------------------------------------------------------------- reader = GRPRT_Intern(filename=filename) reader.check_datatype(datatype) df = reader.read() df = df.reindex(varnames.keys(), axis=1).dropna(how='all', axis=1) basin = GRPRT_Basin( filename=os.path.join(os.path.dirname(filename), 'BASSIN.DAT')) basin.read() code = list(basin['Q'].keys())[0] area = float(basin['S']) # ------------------------------------------------------------------------- # 2- Collection de séries # ------------------------------------------------------------------------- for c in df.columns: v = varnames[c] if v.startswith('Q'): df[c] = df[c] * area / 3.6 if 'sim' in c: meta = 'grp16' else: meta = None serie = Serie( datval=df[c].to_frame(), code=code, varname=v, provider='GRP16', warning=warning) series.add(serie, meta=meta) return series