Code source de pyspc.model.otamin16.data

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Incertitudes de modélisation - Projet OTAMIN v2016 - Fichier Calage
"""
import collections
import copy as _copy
from datetime import datetime as dt, timedelta as td
import os.path
import pandas as pnd
from pyspc.convention.otamin16 import CAL_COLS, CAL_TDELTA


DATE_FORMAT = '%d-%m-%Y %H:%M'
"""Format des dates dans les fichiers csv OTAMIN v2016 (Calage)"""


def date_parser(txt):
    """"Convertisseur de date"""
    return dt.strptime(txt, DATE_FORMAT)


[docs] class Data(): """ Classe permettant la manipulation du csv OTAMIN v2016 (Calage) Attributes ---------- filename : str Nom du fichier csv OTAMIN v2016 (Calage) station : str Code du lieu model : str Code du modèle selon la convention POM leadtime : timedelta Echéance de prévision """
[docs] def __init__(self, filename=None): """ Initialiser l'instance de la classe Data (csv) de Otamin v2016. Parameters ---------- filename : str Nom du fichier prv de OTAMIN v2016 """ self.filename = filename if self.filename is not None: meta = self.split_basename(self.filename) self.station = meta[0] self.model = meta[1] self.leadtime = meta[2] else: self.station = None self.model = None self.leadtime = None
def __str__(self): """ Afficher des méta-données de l'instance Data (csv) de Otamin v2016. """ text = """ ************************************* ***** OTAMIN 2016 - Data (csv) ****** ************************************* * NOM FICHIER = {filename} * STATION = {station} * MODELE = {model} * ECHEANCE [h] = {leadtime} ************************************* """ return text.format(**vars(self))
[docs] def read(self): """ Lire un fichier csv Otamin. Returns ------- data : pnd.DataFrame Dataframe des données Examples -------- >>> from pyspc.verification.otamin16 import Data >>> f = 'data/model/otamin16/K0403010_45gGRPd000_012.csv' >>> d = Data(filename=f) >>> df = d.read() >>> df OBS PREV # JJ-MM-AAAA HH:MM 2008-11-01 18:00:00 22.7 25.2565 2008-11-01 19:00:00 30.2 30.3346 2008-11-01 20:00:00 41.7 42.1571 2008-11-01 21:00:00 67.8 64.2836 2008-11-01 22:00:00 136.0 94.1275 2008-11-01 23:00:00 177.0 125.9666 2008-11-02 00:00:00 188.0 149.4239 2008-11-02 01:00:00 172.0 159.6839 2008-11-02 02:00:00 160.0 163.2515 2008-11-02 03:00:00 186.0 164.4469 2008-11-02 04:00:00 203.0 163.0889 2008-11-02 05:00:00 186.0 160.1685 2008-11-02 06:00:00 174.0 158.0497 2008-11-02 07:00:00 164.0 153.9042 2008-11-02 08:00:00 145.0 133.8308 2008-11-02 09:00:00 121.0 123.6565 2008-11-02 10:00:00 97.0 112.3536 2008-11-02 11:00:00 75.2 85.3825 2008-11-02 12:00:00 64.5 73.2027 """ # Initialisation cols = _copy.deepcopy(CAL_COLS) metadata = collections.OrderedDict() c = 0 # Entête with open(self.filename, 'r', encoding='utf-8', newline='\n') as f: for line in f.readlines(): if line.startswith('#'): break info = line.strip('\n').strip('\r').split(';') if info[0] in cols: metadata.setdefault(info[0], info[1]) c += 1 # Tableau de données df = pnd.read_csv( self.filename, sep=';', skiprows=c, index_col=0, parse_dates=True, date_format=DATE_FORMAT, na_values=['-99.900', -99.900, '-999.999', -999.999, '-999.9990', -999.9990], keep_default_na=True, ) # Ajout des méta-données nc = [] nn = [] for c in df.columns: c2 = [] for m in metadata: c2.append(metadata[m]) c2.append(c) nc.append(tuple(c2)) for m in metadata: nn.append(m) nn.extend(list(df.columns.names)) cols.extend(list(df.columns.names)) df.columns = pnd.MultiIndex.from_tuples(nc, names=nn) # Contrôles for c in cols: if c not in df.columns.names: raise ValueError(f"Méta-donné '{c}' manquante") # Ordre du multi-index df = df.reorder_levels(cols, axis=1) return df
[docs] def write(self, data=None): """ Ecrire un fichier csv Otamin. Parameters ---------- data : pnd.DataFrame Dataframe des données """ # Entête with open(self.filename, 'w', encoding='utf-8', newline='\n') as f: for c in CAL_COLS: v = list(set(data.columns.get_level_values(c))) f.write(f'{c};{v[0]}\n') # Tableau de données data = data.droplevel(CAL_COLS, axis=1) data.to_csv( self.filename, mode='a', sep=';', float_format='%.4f', na_rep='-99.900', header=True, date_format=DATE_FORMAT, lineterminator='\n' )
[docs] @staticmethod def split_basename(filename=None): """ Extraire les informations depuis le nom du fichier. Parameters ---------- filename : str Fichier csv OTAMIN v2016 (Calage) Returns ------- station : str Code du lieu model : str Code du modèle selon la convention POM leadtime : timedelta Echéance de prévision """ if filename is None: return None, None, None, None basename = os.path.basename(filename).replace('.csv', '') try: [station, model, leadtime] = basename.split('_') except ValueError as ve: raise ValueError("Le nom de fichier ne respecte pas le " "nommage de OTAMIN") from ve leadtime = int(float(leadtime)) * CAL_TDELTA return station, model, leadtime
[docs] @staticmethod def join_basename(station=None, model=None, leadtime=None): """ Extraire les informations depuis le nom du fichier. Parameters ---------- station : str Code du lieu model : str Code du modèle selon la convention POM leadtime : timedelta Echéance de prévision Returns ------- filename : str Fichier csv OTAMIN v2016 (Calage) """ if station is None or model is None or not isinstance(leadtime, td): raise ValueError('Définition incorrecte des arguments') leadtime = int(float(leadtime / CAL_TDELTA)) return f'{station}_{model}_{leadtime:03d}.csv'