Code source de pyspc.verification.scores.data

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Evaluation de simulations et prévisions - Projet SCORES - Fichier prv
"""
import collections
import copy as _copy
import pandas as pnd
from pyspc.convention.scores import (
    EXPORT_COLS, EXPORT_FCST_COLS, EXPORT_DTYPES, EXPORT_UNITS)


DATE_FORMAT = '%d-%m-%Y %H:%M'
"""Format des dates dans les fichiers Prv pour Scores"""


[docs] class Data(): """ Classe permettant la manipulation du prv Scores Attributes ---------- filename : str Nom du fichier prv de Scores datatype : str Type du fichier de données prv Scores """
[docs] def __init__(self, filename=None, datatype=None): """ Initialiser l'instance de la classe Data (prv) de Scores Parameters ---------- filename : str Nom du fichier prv de Scores datatype : str Type du fichier de données prv Scores """ if datatype in self.get_types(): self.datatype = datatype else: raise ValueError('Type de fichier PRV inconnu') self.filename = filename
def __str__(self): """ Afficher des méta-données de l'instance Data (prv) de Scores """ text = """ ************************************* ********** SCORES - Data (prv) ****** ************************************* * NOM FICHIER = {filename} * TYPE DE FICHIER = {datatype} ************************************* """ return text.format(**vars(self)) def _extend_fcst_columns(self, df, prefix): """ Etendre les valeurs et noms des colonnes du multi-index du dataframe dans le cas de fichier de prévision Parameters ---------- data : pnd.DataFrame Dataframe des données prefix : list Noms du multi-index Returns ------- data : pnd.DataFrame Dataframe des données prefix : list Noms du multi-index """ # Initialisation fcst_prefix = _copy.deepcopy(EXPORT_FCST_COLS) metadata = collections.OrderedDict() with open(self.filename, 'r', encoding='utf-8') as f: for line in f.readlines(): if line.startswith('#'): info = line.strip('\n').strip('\r').split(';') if info[0] in fcst_prefix: metadata.setdefault(info[0], info[1:]) else: break nc = [] nn = [] for k, c in enumerate(df.columns): c2 = [] for p in metadata: try: c2.append(metadata[p][k]) except IndexError as ie: raise ValueError(f"Méta-donné '{p}' incorrecte") from ie c2.extend(list(c)) nc.append(tuple(c2)) for p in metadata: nn.append(p) nn.extend(list(df.columns.names)) df.columns = pnd.MultiIndex.from_tuples(nc, names=nn) fcst_prefix.extend(prefix) return df, fcst_prefix
[docs] def read(self): """ Lire un fichier prv SCORES/OTAMIN Returns ------- data : pnd.DataFrame Dataframe des données Examples -------- >>> from pyspc.verification.scores import Data >>> f = 'data/verification/scores/K6373020_Q.txt' >>> d = Data(filename=f, datatype='obs') >>> df = d.read() >>> df Stations K6373020 Grandeurs Q IdSeries obs 2016-05-31 12:00:00 79.3 2016-05-31 13:00:00 85.7 2016-05-31 14:00:00 89.6 2016-05-31 15:00:00 91.8 2016-05-31 16:00:00 93.2 2016-05-31 17:00:00 94.8 2016-05-31 18:00:00 96.0 2016-05-31 19:00:00 96.4 2016-05-31 20:00:00 96.4 2016-05-31 21:00:00 95.6 2016-05-31 22:00:00 94.8 2016-05-31 23:00:00 94.0 2016-06-01 00:00:00 93.2 2016-06-01 01:00:00 92.4 2016-06-01 02:00:00 91.6 2016-06-01 03:00:00 90.8 2016-06-01 04:00:00 90.0 2016-06-01 05:00:00 88.6 2016-06-01 06:00:00 87.3 >>> f = 'data/verification/scores/K6373020_Q_sim.txt' >>> d = Data(filename=f, datatype='sim') >>> df = d.read() >>> df Stations K6373020 Grandeurs Q IdSeries sim 2016-05-31 12:00:00 71.02 2016-05-31 13:00:00 73.67 2016-05-31 14:00:00 76.08 2016-05-31 15:00:00 78.20 2016-05-31 16:00:00 80.04 2016-05-31 17:00:00 81.72 2016-05-31 18:00:00 83.16 2016-05-31 19:00:00 84.08 2016-05-31 20:00:00 84.69 2016-05-31 21:00:00 84.94 2016-05-31 22:00:00 84.75 2016-05-31 23:00:00 84.10 2016-06-01 00:00:00 83.06 2016-06-01 01:00:00 81.82 2016-06-01 02:00:00 80.44 2016-06-01 03:00:00 78.95 2016-06-01 04:00:00 77.34 2016-06-01 05:00:00 75.61 2016-06-01 06:00:00 73.79 >>> f = 'data/verification/scores/K6373020_Q_9.txt' >>> d = Data(filename=f, datatype='fcst') >>> df = d.read() >>> df # Modeles 45hEAOtt00 # Scenarios 2007 # DtDerObs 31-05-2016 12:00 # Probas 50 Stations K6373020 Grandeurs Q IdSeries 45hEAOtt00_2007_50 2016-05-31 12:00:00 71.02 2016-05-31 13:00:00 73.67 2016-05-31 14:00:00 76.08 2016-05-31 15:00:00 78.20 2016-05-31 16:00:00 80.04 2016-05-31 17:00:00 81.72 2016-05-31 18:00:00 83.16 2016-05-31 19:00:00 84.08 2016-05-31 20:00:00 84.69 2016-05-31 21:00:00 84.94 2016-05-31 22:00:00 84.75 2016-05-31 23:00:00 84.10 2016-06-01 00:00:00 83.06 2016-06-01 01:00:00 81.82 2016-06-01 02:00:00 80.44 2016-06-01 03:00:00 78.95 2016-06-01 04:00:00 77.34 2016-06-01 05:00:00 75.61 2016-06-01 06:00:00 73.79 """ # Initialisation prefix = EXPORT_COLS # Lecture par pandas try: df = pnd.read_csv( self.filename, sep=';', comment='#', header=[0, 1, 2], index_col=0, parse_dates=True, date_format=DATE_FORMAT, ) except IndexError as ie: raise ValueError( f"Formattage incorrect du fichier {self.filename}") from ie # Ajouts des informations commentées si 'fcst' if self.datatype == 'fcst': df, prefix = self._extend_fcst_columns(df, prefix) # Contrôles for p in prefix: if p not in df.columns.names: raise ValueError(f"Méta-donné '{p}' manquante") # Ordre du multi-index df = df.reorder_levels(prefix, axis=1) # Retour return df
[docs] def write(self, data=None): """ Ecrire un fichier prv SCORES/OTAMIN Parameters ---------- data : pnd.DataFrame Dataframe des données """ # Initialisation prefix = EXPORT_COLS if self.datatype == 'fcst': prefix = _copy.deepcopy(EXPORT_FCST_COLS) prefix.extend(EXPORT_COLS) # Contrôles if not isinstance(data, pnd.DataFrame): raise ValueError('Les données sont mal formatées') for p in prefix: if p not in data.columns.names: raise ValueError(f"Méta-donné '{p}' manquante") # Ordre du multi-index data = data.reorder_levels(prefix, axis=1) # Entête with open(self.filename, 'w', encoding='utf-8', newline='\r\n') as f: f.write(f'# {EXPORT_DTYPES[self.datatype]} SCORES\n') f.write('# TZ ; UTC\n') varnames = data.columns.get_level_values('Grandeurs') for v in list(set(varnames)): f.write(f"# {v} ; {EXPORT_UNITS.get(v, '')}\n") # Tableau de données data.to_csv( self.filename, mode='a', sep=';', float_format='%.2f', header=True, date_format=DATE_FORMAT, lineterminator='\r\n' )
[docs] @classmethod def get_types(cls): """ Liste des types de format prv Scores Returns ------- list Types de format prv Scores .. seealso:: pyspc.verification.scores.convention.EXPORT_DTYPES """ return list(EXPORT_DTYPES.keys())