#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021 R. Marty
# (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Incertitudes de modélisation - Projet OTAMIN v2016 - Fichier Calage
"""
import collections
import copy as _copy
from datetime import datetime as dt, timedelta as td
import os.path
import pandas as pnd
from pyspc.convention.otamin16 import CAL_COLS, CAL_TDELTA
DATE_FORMAT = '%d-%m-%Y %H:%M'
"""Format des dates dans les fichiers csv OTAMIN v2016 (Calage)"""
def date_parser(txt):
""""Convertisseur de date"""
return dt.strptime(txt, DATE_FORMAT)
[docs]
class Data():
"""
Classe permettant la manipulation du csv OTAMIN v2016 (Calage)
Attributes
----------
filename : str
Nom du fichier csv OTAMIN v2016 (Calage)
station : str
Code du lieu
model : str
Code du modèle selon la convention POM
leadtime : timedelta
Echéance de prévision
"""
[docs]
def __init__(self, filename=None):
"""
Initialiser l'instance de la classe Data (csv) de Otamin v2016.
Parameters
----------
filename : str
Nom du fichier prv de OTAMIN v2016
"""
self.filename = filename
if self.filename is not None:
meta = self.split_basename(self.filename)
self.station = meta[0]
self.model = meta[1]
self.leadtime = meta[2]
else:
self.station = None
self.model = None
self.leadtime = None
def __str__(self):
"""
Afficher des méta-données de l'instance Data (csv) de Otamin v2016.
"""
text = """
*************************************
***** OTAMIN 2016 - Data (csv) ******
*************************************
* NOM FICHIER = {filename}
* STATION = {station}
* MODELE = {model}
* ECHEANCE [h] = {leadtime}
*************************************
"""
return text.format(**vars(self))
[docs]
def read(self):
"""
Lire un fichier csv Otamin.
Returns
-------
data : pnd.DataFrame
Dataframe des données
Examples
--------
>>> from pyspc.verification.otamin16 import Data
>>> f = 'data/model/otamin16/K0403010_45gGRPd000_012.csv'
>>> d = Data(filename=f)
>>> df = d.read()
>>> df
OBS PREV
# JJ-MM-AAAA HH:MM
2008-11-01 18:00:00 22.7 25.2565
2008-11-01 19:00:00 30.2 30.3346
2008-11-01 20:00:00 41.7 42.1571
2008-11-01 21:00:00 67.8 64.2836
2008-11-01 22:00:00 136.0 94.1275
2008-11-01 23:00:00 177.0 125.9666
2008-11-02 00:00:00 188.0 149.4239
2008-11-02 01:00:00 172.0 159.6839
2008-11-02 02:00:00 160.0 163.2515
2008-11-02 03:00:00 186.0 164.4469
2008-11-02 04:00:00 203.0 163.0889
2008-11-02 05:00:00 186.0 160.1685
2008-11-02 06:00:00 174.0 158.0497
2008-11-02 07:00:00 164.0 153.9042
2008-11-02 08:00:00 145.0 133.8308
2008-11-02 09:00:00 121.0 123.6565
2008-11-02 10:00:00 97.0 112.3536
2008-11-02 11:00:00 75.2 85.3825
2008-11-02 12:00:00 64.5 73.2027
"""
# Initialisation
cols = _copy.deepcopy(CAL_COLS)
metadata = collections.OrderedDict()
c = 0
# Entête
with open(self.filename, 'r', encoding='utf-8', newline='\n') as f:
for line in f.readlines():
if line.startswith('#'):
break
info = line.strip('\n').strip('\r').split(';')
if info[0] in cols:
metadata.setdefault(info[0], info[1])
c += 1
# Tableau de données
df = pnd.read_csv(
self.filename,
sep=';',
skiprows=c,
index_col=0,
parse_dates=True,
date_format=DATE_FORMAT,
na_values=['-99.900', -99.900,
'-999.999', -999.999,
'-999.9990', -999.9990],
keep_default_na=True,
)
# Ajout des méta-données
nc = []
nn = []
for c in df.columns:
c2 = []
for m in metadata:
c2.append(metadata[m])
c2.append(c)
nc.append(tuple(c2))
for m in metadata:
nn.append(m)
nn.extend(list(df.columns.names))
cols.extend(list(df.columns.names))
df.columns = pnd.MultiIndex.from_tuples(nc, names=nn)
# Contrôles
for c in cols:
if c not in df.columns.names:
raise ValueError(f"Méta-donné '{c}' manquante")
# Ordre du multi-index
df = df.reorder_levels(cols, axis=1)
return df
[docs]
def write(self, data=None):
"""
Ecrire un fichier csv Otamin.
Parameters
----------
data : pnd.DataFrame
Dataframe des données
"""
# Entête
with open(self.filename, 'w', encoding='utf-8', newline='\n') as f:
for c in CAL_COLS:
v = list(set(data.columns.get_level_values(c)))
f.write(f'{c};{v[0]}\n')
# Tableau de données
data = data.droplevel(CAL_COLS, axis=1)
data.to_csv(
self.filename,
mode='a',
sep=';',
float_format='%.4f',
na_rep='-99.900',
header=True,
date_format=DATE_FORMAT,
lineterminator='\n'
)
[docs]
@staticmethod
def split_basename(filename=None):
"""
Extraire les informations depuis le nom du fichier.
Parameters
----------
filename : str
Fichier csv OTAMIN v2016 (Calage)
Returns
-------
station : str
Code du lieu
model : str
Code du modèle selon la convention POM
leadtime : timedelta
Echéance de prévision
"""
if filename is None:
return None, None, None, None
basename = os.path.basename(filename).replace('.csv', '')
try:
[station, model, leadtime] = basename.split('_')
except ValueError as ve:
raise ValueError("Le nom de fichier ne respecte pas le "
"nommage de OTAMIN") from ve
leadtime = int(float(leadtime)) * CAL_TDELTA
return station, model, leadtime
[docs]
@staticmethod
def join_basename(station=None, model=None, leadtime=None):
"""
Extraire les informations depuis le nom du fichier.
Parameters
----------
station : str
Code du lieu
model : str
Code du modèle selon la convention POM
leadtime : timedelta
Echéance de prévision
Returns
-------
filename : str
Fichier csv OTAMIN v2016 (Calage)
"""
if station is None or model is None or not isinstance(leadtime, td):
raise ValueError('Définition incorrecte des arguments')
leadtime = int(float(leadtime / CAL_TDELTA))
return f'{station}_{model}_{leadtime:03d}.csv'