#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021 R. Marty
# (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Evaluation de simulations et prévisions - Projet SCORES - Fichier prv
"""
import collections
import copy as _copy
import pandas as pnd
from pyspc.convention.scores import (
EXPORT_COLS, EXPORT_FCST_COLS, EXPORT_DTYPES, EXPORT_UNITS)
DATE_FORMAT = '%d-%m-%Y %H:%M'
"""Format des dates dans les fichiers Prv pour Scores"""
[docs]
class Data():
"""
Classe permettant la manipulation du prv Scores
Attributes
----------
filename : str
Nom du fichier prv de Scores
datatype : str
Type du fichier de données prv Scores
"""
[docs]
def __init__(self, filename=None, datatype=None):
"""
Initialiser l'instance de la classe Data (prv) de Scores
Parameters
----------
filename : str
Nom du fichier prv de Scores
datatype : str
Type du fichier de données prv Scores
"""
if datatype in self.get_types():
self.datatype = datatype
else:
raise ValueError('Type de fichier PRV inconnu')
self.filename = filename
def __str__(self):
"""
Afficher des méta-données de l'instance Data (prv) de Scores
"""
text = """
*************************************
********** SCORES - Data (prv) ******
*************************************
* NOM FICHIER = {filename}
* TYPE DE FICHIER = {datatype}
*************************************
"""
return text.format(**vars(self))
def _extend_fcst_columns(self, df, prefix):
"""
Etendre les valeurs et noms des colonnes du multi-index du dataframe
dans le cas de fichier de prévision
Parameters
----------
data : pnd.DataFrame
Dataframe des données
prefix : list
Noms du multi-index
Returns
-------
data : pnd.DataFrame
Dataframe des données
prefix : list
Noms du multi-index
"""
# Initialisation
fcst_prefix = _copy.deepcopy(EXPORT_FCST_COLS)
metadata = collections.OrderedDict()
with open(self.filename, 'r', encoding='utf-8') as f:
for line in f.readlines():
if line.startswith('#'):
info = line.strip('\n').strip('\r').split(';')
if info[0] in fcst_prefix:
metadata.setdefault(info[0], info[1:])
else:
break
nc = []
nn = []
for k, c in enumerate(df.columns):
c2 = []
for p in metadata:
try:
c2.append(metadata[p][k])
except IndexError as ie:
raise ValueError(f"Méta-donné '{p}' incorrecte") from ie
c2.extend(list(c))
nc.append(tuple(c2))
for p in metadata:
nn.append(p)
nn.extend(list(df.columns.names))
df.columns = pnd.MultiIndex.from_tuples(nc, names=nn)
fcst_prefix.extend(prefix)
return df, fcst_prefix
[docs]
def read(self):
"""
Lire un fichier prv SCORES/OTAMIN
Returns
-------
data : pnd.DataFrame
Dataframe des données
Examples
--------
>>> from pyspc.verification.scores import Data
>>> f = 'data/verification/scores/K6373020_Q.txt'
>>> d = Data(filename=f, datatype='obs')
>>> df = d.read()
>>> df
Stations K6373020
Grandeurs Q
IdSeries obs
2016-05-31 12:00:00 79.3
2016-05-31 13:00:00 85.7
2016-05-31 14:00:00 89.6
2016-05-31 15:00:00 91.8
2016-05-31 16:00:00 93.2
2016-05-31 17:00:00 94.8
2016-05-31 18:00:00 96.0
2016-05-31 19:00:00 96.4
2016-05-31 20:00:00 96.4
2016-05-31 21:00:00 95.6
2016-05-31 22:00:00 94.8
2016-05-31 23:00:00 94.0
2016-06-01 00:00:00 93.2
2016-06-01 01:00:00 92.4
2016-06-01 02:00:00 91.6
2016-06-01 03:00:00 90.8
2016-06-01 04:00:00 90.0
2016-06-01 05:00:00 88.6
2016-06-01 06:00:00 87.3
>>> f = 'data/verification/scores/K6373020_Q_sim.txt'
>>> d = Data(filename=f, datatype='sim')
>>> df = d.read()
>>> df
Stations K6373020
Grandeurs Q
IdSeries sim
2016-05-31 12:00:00 71.02
2016-05-31 13:00:00 73.67
2016-05-31 14:00:00 76.08
2016-05-31 15:00:00 78.20
2016-05-31 16:00:00 80.04
2016-05-31 17:00:00 81.72
2016-05-31 18:00:00 83.16
2016-05-31 19:00:00 84.08
2016-05-31 20:00:00 84.69
2016-05-31 21:00:00 84.94
2016-05-31 22:00:00 84.75
2016-05-31 23:00:00 84.10
2016-06-01 00:00:00 83.06
2016-06-01 01:00:00 81.82
2016-06-01 02:00:00 80.44
2016-06-01 03:00:00 78.95
2016-06-01 04:00:00 77.34
2016-06-01 05:00:00 75.61
2016-06-01 06:00:00 73.79
>>> f = 'data/verification/scores/K6373020_Q_9.txt'
>>> d = Data(filename=f, datatype='fcst')
>>> df = d.read()
>>> df
# Modeles 45hEAOtt00
# Scenarios 2007
# DtDerObs 31-05-2016 12:00
# Probas 50
Stations K6373020
Grandeurs Q
IdSeries 45hEAOtt00_2007_50
2016-05-31 12:00:00 71.02
2016-05-31 13:00:00 73.67
2016-05-31 14:00:00 76.08
2016-05-31 15:00:00 78.20
2016-05-31 16:00:00 80.04
2016-05-31 17:00:00 81.72
2016-05-31 18:00:00 83.16
2016-05-31 19:00:00 84.08
2016-05-31 20:00:00 84.69
2016-05-31 21:00:00 84.94
2016-05-31 22:00:00 84.75
2016-05-31 23:00:00 84.10
2016-06-01 00:00:00 83.06
2016-06-01 01:00:00 81.82
2016-06-01 02:00:00 80.44
2016-06-01 03:00:00 78.95
2016-06-01 04:00:00 77.34
2016-06-01 05:00:00 75.61
2016-06-01 06:00:00 73.79
"""
# Initialisation
prefix = EXPORT_COLS
# Lecture par pandas
try:
df = pnd.read_csv(
self.filename,
sep=';',
comment='#',
header=[0, 1, 2],
index_col=0,
parse_dates=True,
date_format=DATE_FORMAT,
)
except IndexError as ie:
raise ValueError(
f"Formattage incorrect du fichier {self.filename}") from ie
# Ajouts des informations commentées si 'fcst'
if self.datatype == 'fcst':
df, prefix = self._extend_fcst_columns(df, prefix)
# Contrôles
for p in prefix:
if p not in df.columns.names:
raise ValueError(f"Méta-donné '{p}' manquante")
# Ordre du multi-index
df = df.reorder_levels(prefix, axis=1)
# Retour
return df
[docs]
def write(self, data=None):
"""
Ecrire un fichier prv SCORES/OTAMIN
Parameters
----------
data : pnd.DataFrame
Dataframe des données
"""
# Initialisation
prefix = EXPORT_COLS
if self.datatype == 'fcst':
prefix = _copy.deepcopy(EXPORT_FCST_COLS)
prefix.extend(EXPORT_COLS)
# Contrôles
if not isinstance(data, pnd.DataFrame):
raise ValueError('Les données sont mal formatées')
for p in prefix:
if p not in data.columns.names:
raise ValueError(f"Méta-donné '{p}' manquante")
# Ordre du multi-index
data = data.reorder_levels(prefix, axis=1)
# Entête
with open(self.filename, 'w', encoding='utf-8', newline='\r\n') as f:
f.write(f'# {EXPORT_DTYPES[self.datatype]} SCORES\n')
f.write('# TZ ; UTC\n')
varnames = data.columns.get_level_values('Grandeurs')
for v in list(set(varnames)):
f.write(f"# {v} ; {EXPORT_UNITS.get(v, '')}\n")
# Tableau de données
data.to_csv(
self.filename,
mode='a',
sep=';',
float_format='%.2f',
header=True,
date_format=DATE_FORMAT,
lineterminator='\r\n'
)
[docs]
@classmethod
def get_types(cls):
"""
Liste des types de format prv Scores
Returns
-------
list
Types de format prv Scores
.. seealso::
pyspc.verification.scores.convention.EXPORT_DTYPES
"""
return list(EXPORT_DTYPES.keys())