Code source de pyspc.model.otamin16.rt_data

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Incertitudes de modélisation - Projet OTAMIN v2016 - Fichier prv
"""
import collections
import copy as _copy
from datetime import datetime as dt
import pandas as pnd
from pyspc.convention.otamin16 import (
    EXPORT_COLS, EXPORT_TREND_COLS, EXPORT_DTYPES, EXPORT_UNITS)

import pyspc.core.exception as _exception


DATE_FORMAT = '%d-%m-%Y %H:%M'
"""Format des dates dans les fichiers Prv pour Otamin v2016"""


def date_parser(txt):
    """Convertisseur de date."""
    return dt.strptime(txt, DATE_FORMAT)


def date_formatter(date):
    """Formateur de date."""
    return date.strftime(DATE_FORMAT)


[docs] class RT_Data(): """ Classe permettant la manipulation du prv OTAMIN v2016. Attributes ---------- filename : str Nom du fichier prv de OTAMIN v2016 datatype : str Type du fichier de données """
[docs] def __init__(self, filename=None, datatype=None): """ Initialiser l'instance de la classe RT_Data (prv) de Otamin v2016. Parameters ---------- filename : str Nom du fichier prv de OTAMIN v2016 datatype : str Type du fichier de données """ if datatype in self.get_types(): self.datatype = datatype else: raise ValueError('Type de fichier PRV inconnu') self.filename = filename
def __str__(self): """ Afficher des méta-données de l'instance RT_Data (prv) de Otamin v2016. """ text = """ ************************************* ***** OTAMIN 2016 - RT_Data (prv) *** ************************************* * NOM FICHIER = {filename} * TYPE DE FICHIER = {datatype} ************************************* """ return text.format(**vars(self)) def _extend_columns(self, df, prefix, cols): """ Etendre les valeurs et noms des colonnes du multi-index du dataframe dans le cas de fichier de prévision Parameters ---------- data : pnd.DataFrame Dataframe des données prefix : list Noms du multi-index cols : list Noms des colonnes à ajouter, commençant par '#' Returns ------- data : pnd.DataFrame Dataframe des données prefix : list Noms du multi-index """ # Initialisation metadata = collections.OrderedDict() with open(self.filename, 'r', encoding='utf-8') as f: for line in f.readlines(): if line.startswith('#'): info = line.strip('\n').strip('\r').split(';') if info[0] in cols: metadata.setdefault(info[0], info[1:]) nc = [] nn = list(df.columns.names) for k, c in enumerate(df.columns): c2 = list(c) for p in metadata: try: c2.append(metadata[p][k]) except IndexError as ie: raise ValueError(f"Méta-donné '{p}' incorrecte") from ie nc.append(tuple(c2)) for p in metadata: nn.append(p) df.columns = pnd.MultiIndex.from_tuples(nc, names=nn) prefix.extend(cols) return df, prefix
[docs] def read(self): """ Lire un fichier prv Otamin. Returns ------- data : pnd.DataFrame Dataframe des données Examples -------- >>> from pyspc.verification.otamin16 import RT_Data >>> f = 'data/model/otamin16/GRP_B_20200911_1515_2.prv' >>> d = RT_Data(filename=f, datatype='fcst') >>> df = d.read() >>> df Stations A6701210 Grandeurs Q IdSeries 57gGRPd000_2001 57gGRPd000_2002 57gGRPd000_2003 # Modeles 57gGRPd000 57gGRPd000 57gGRPd000 # Scenarios 2001 2002 2003 # DtDerObs 03-02-2020 12:00 03-02-2020 12:00 03-02-2020 12:00 2020-02-03 13:00:00 41.689 41.614 41.784 2020-02-03 14:00:00 43.426 43.161 43.760 2020-02-03 15:00:00 44.285 43.699 45.025 2020-02-03 16:00:00 44.395 43.438 45.649 2020-02-03 17:00:00 44.109 42.775 45.949 2020-02-03 18:00:00 43.340 41.659 45.793 2020-02-03 19:00:00 42.448 40.459 45.491 2020-02-03 20:00:00 41.470 39.230 45.023 2020-02-03 21:00:00 40.312 37.941 44.198 2020-02-03 22:00:00 39.044 36.614 43.123 2020-02-03 23:00:00 37.754 35.292 41.936 2020-02-04 00:00:00 36.612 34.060 40.888 2020-02-04 01:00:00 36.184 33.290 40.909 2020-02-04 02:00:00 36.708 33.133 42.427 2020-02-04 03:00:00 38.304 33.660 45.723 2020-02-04 04:00:00 40.523 34.575 50.083 2020-02-04 05:00:00 43.099 35.706 55.065 2020-02-04 06:00:00 45.781 36.908 60.224 2020-02-04 07:00:00 48.223 37.971 64.957 2020-02-04 08:00:00 50.064 38.681 68.613 2020-02-04 09:00:00 50.648 38.654 70.013 2020-02-04 10:00:00 50.161 38.023 69.503 2020-02-04 11:00:00 48.963 37.008 67.750 2020-02-04 12:00:00 47.745 36.014 66.030 >>> f = 'data/model/otamin16/GRP_B_20200911_1515_DA_2.prv' >>> d = Data(filename=f, datatype='fcst') >>> df = d.read() >>> df Stations A6701210 ... Grandeurs RR ... TA IdSeries 57gGRPd000_2001 ... 57gGRPd000_2003 # Modeles 57gGRPd000 ... 57gGRPd000 # Scenarios 2001 ... 2003 # DtDerObs 03-02-2020 12:00 ... 03-02-2020 12:00 2020-02-03 13:00:00 1.1 ... NaN 2020-02-03 14:00:00 1.1 ... NaN 2020-02-03 15:00:00 1.1 ... NaN 2020-02-03 16:00:00 0.2 ... NaN 2020-02-03 17:00:00 0.2 ... NaN 2020-02-03 18:00:00 0.2 ... NaN 2020-02-03 19:00:00 0.3 ... NaN 2020-02-03 20:00:00 0.3 ... NaN 2020-02-03 21:00:00 0.3 ... NaN 2020-02-03 22:00:00 0.4 ... NaN 2020-02-03 23:00:00 0.4 ... NaN 2020-02-04 00:00:00 0.4 ... NaN 2020-02-04 01:00:00 3.6 ... NaN 2020-02-04 02:00:00 3.6 ... NaN 2020-02-04 03:00:00 3.6 ... NaN 2020-02-04 04:00:00 0.8 ... NaN 2020-02-04 05:00:00 0.8 ... NaN 2020-02-04 06:00:00 0.8 ... NaN 2020-02-04 07:00:00 0.4 ... NaN 2020-02-04 08:00:00 0.4 ... NaN 2020-02-04 09:00:00 0.4 ... NaN 2020-02-04 10:00:00 0.7 ... NaN 2020-02-04 11:00:00 0.7 ... NaN 2020-02-04 12:00:00 0.7 ... NaN >>> f = 'data/model/otamin16/GRP_B_20200203_1200_2.prv' >>> d = Data(filename=f, datatype='trend') >>> df = d.read() >>> df Stations A6701210 Grandeurs Q IdSeries 57gGRPd000_2001_-1 ... 57gGRPd000_2001_90 # Modeles 57gGRPd000 ... 57gGRPp000 # Scenarios 2001 ... 2001 # DtDerObs 03-02-2020 12:00 ... 03-02-2020 12 # Probas -1 ... 90 2020-02-03 13:00:00 41.689 ... 42.977 2020-02-03 14:00:00 43.426 ... 46.273 2020-02-03 15:00:00 44.285 ... 48.725 2020-02-03 16:00:00 44.395 ... 50.069 2020-02-03 17:00:00 44.109 ... 50.962 2020-02-03 18:00:00 43.340 ... 51.269 2020-02-03 19:00:00 42.448 ... 51.050 2020-02-03 20:00:00 41.470 ... 50.691 2020-02-03 21:00:00 40.312 ... 50.070 2020-02-03 22:00:00 39.044 ... 49.208 2020-02-03 23:00:00 37.754 ... 48.272 2020-02-04 00:00:00 36.612 ... 47.480 2020-02-04 01:00:00 36.184 ... 47.263 2020-02-04 02:00:00 36.708 ... 48.290 2020-02-04 03:00:00 38.304 ... 50.748 2020-02-04 04:00:00 40.523 ... 54.066 2020-02-04 05:00:00 43.099 ... 57.906 2020-02-04 06:00:00 45.781 ... 61.938 2020-02-04 07:00:00 48.223 ... 65.412 2020-02-04 08:00:00 50.064 ... 68.085 2020-02-04 09:00:00 50.648 ... 69.058 2020-02-04 10:00:00 50.161 ... 68.571 2020-02-04 11:00:00 48.963 ... 67.106 2020-02-04 12:00:00 47.745 ... 65.605 """ # Initialisation prefix = [c for c in EXPORT_COLS if not c.startswith('#')] # Lecture par pandas try: df = pnd.read_csv( self.filename, sep=';', comment='#', header=[0, 1, 2], index_col=0, parse_dates=True, date_format=DATE_FORMAT, na_values=[-99.900, '-99.900', -999.999, '-999.999'], keep_default_na=True, ) except IndexError as ie: raise ValueError( f"Formattage incorrect du fichier {self.filename}") from ie # Ajouts des informations commentées df, prefix = self._extend_columns( df, prefix, [c for c in EXPORT_COLS if c not in prefix]) if self.datatype == 'trend': df, prefix = self._extend_columns(df, prefix, EXPORT_TREND_COLS) # Contrôles for p in prefix: if p not in df.columns.names: raise ValueError(f"Méta-donné '{p}' manquante") # Ordre du multi-index df = df.reorder_levels(prefix, axis=1) # Retour return df
[docs] def write(self, data=None): """ Ecrire un fichier prv OTAMIN. Parameters ---------- data : pnd.DataFrame Dataframe des données """ # Initialisation prefix = EXPORT_COLS if self.datatype == 'trend': prefix = _copy.deepcopy(EXPORT_COLS) prefix.extend(EXPORT_TREND_COLS) # Contrôles _exception.check_dataframe(data) for p in prefix: if p not in data.columns.names: raise ValueError(f"Méta-donné '{p}' manquante") # Ordre du multi-index data = data.reorder_levels(prefix, axis=1) # Entête with open(self.filename, 'w', encoding='utf-8', newline='\r\n') as f: f.write(f'# {EXPORT_DTYPES[self.datatype]}\n') f.write('# TZ ; UTC\n') varnames = data.columns.get_level_values('Grandeurs') for v in sorted(list(set(varnames))): f.write(f"# {v} ; {EXPORT_UNITS.get(v, '')}\n") # Tableau de données data.to_csv( self.filename, mode='a', sep=';', float_format='%.3f', na_rep='-99.900', header=True, date_format=DATE_FORMAT, lineterminator='\r\n' )
[docs] @classmethod def get_types(cls): """ Liste des types de format prv Otamin. Returns ------- list Types de format prv Otamin .. seealso:: pyspc.verification.Otamin.convention.EXPORT_DTYPES """ return list(EXPORT_DTYPES.keys())