Code source de pyspc.model.otamin16.rt_data

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Incertitudes de modélisation - Projet OTAMIN v2016 - Fichier prv
"""
import collections
import copy as _copy
from datetime import datetime as dt
import pandas as pnd
from pyspc.convention.otamin16 import (
    EXPORT_COLS, EXPORT_TREND_COLS, EXPORT_DTYPES, EXPORT_UNITS)

import pyspc.core.exception as _exception


DATE_FORMAT = '%d-%m-%Y %H:%M'
"""Format des dates dans les fichiers Prv pour Otamin v2016"""


def date_parser(txt):
    """Convertisseur de date."""
    return dt.strptime(txt, DATE_FORMAT)


def date_formatter(date):
    """Formateur de date."""
    return date.strftime(DATE_FORMAT)



[docs]
class RT_Data():
    """
    Classe permettant la manipulation du prv OTAMIN v2016.

    Attributes
    ----------
    filename : str
        Nom du fichier prv de OTAMIN v2016
    datatype : str
        Type du fichier de données

    """


[docs]
    def __init__(self, filename=None, datatype=None):
        """
        Initialiser l'instance de la classe RT_Data (prv) de Otamin v2016.

        Parameters
        ----------
        filename : str
            Nom du fichier prv de OTAMIN v2016
        datatype : str
            Type du fichier de données

        """
        if datatype in self.get_types():
            self.datatype = datatype
        else:
            raise ValueError('Type de fichier PRV inconnu')
        self.filename = filename


    def __str__(self):
        """
        Afficher des méta-données de l'instance RT_Data (prv) de Otamin v2016.
        """
        text = """
        *************************************
        ***** OTAMIN 2016 - RT_Data (prv) ***
        *************************************
        *  NOM FICHIER      = {filename}
        *  TYPE DE FICHIER  = {datatype}
        *************************************
        """
        return text.format(**vars(self))

    def _extend_columns(self, df, prefix, cols):
        """
        Etendre les valeurs et noms des colonnes du multi-index du dataframe
        dans le cas de fichier de prévision

        Parameters
        ----------
        data : pnd.DataFrame
            Dataframe des données
        prefix : list
            Noms du multi-index
        cols : list
            Noms des colonnes à ajouter, commençant par '#'

        Returns
        -------
        data : pnd.DataFrame
            Dataframe des données
        prefix : list
            Noms du multi-index

        """
        # Initialisation
        metadata = collections.OrderedDict()
        with open(self.filename, 'r', encoding='utf-8') as f:
            for line in f.readlines():
                if line.startswith('#'):
                    info = line.strip('\n').strip('\r').split(';')
                    if info[0] in cols:
                        metadata.setdefault(info[0], info[1:])
        nc = []
        nn = list(df.columns.names)
        for k, c in enumerate(df.columns):
            c2 = list(c)
            for p in metadata:
                try:
                    c2.append(metadata[p][k])
                except IndexError as ie:
                    raise ValueError(f"Méta-donné '{p}' incorrecte") from ie
            nc.append(tuple(c2))
        for p in metadata:
            nn.append(p)
        df.columns = pnd.MultiIndex.from_tuples(nc, names=nn)
        prefix.extend(cols)
        return df, prefix


[docs]
    def read(self):
        """
        Lire un fichier prv Otamin.

        Returns
        -------
        data : pnd.DataFrame
            Dataframe des données

        Examples
        --------
        >>> from pyspc.verification.otamin16 import RT_Data
        >>> f = 'data/model/otamin16/GRP_B_20200911_1515_2.prv'
        >>> d = RT_Data(filename=f, datatype='fcst')
        >>> df = d.read()
        >>> df
        Stations                    A6701210
        Grandeurs                          Q
        IdSeries             57gGRPd000_2001  57gGRPd000_2002  57gGRPd000_2003
        # Modeles                 57gGRPd000       57gGRPd000       57gGRPd000
        # Scenarios                     2001             2002             2003
        # DtDerObs          03-02-2020 12:00 03-02-2020 12:00 03-02-2020 12:00
        2020-02-03 13:00:00           41.689           41.614           41.784
        2020-02-03 14:00:00           43.426           43.161           43.760
        2020-02-03 15:00:00           44.285           43.699           45.025
        2020-02-03 16:00:00           44.395           43.438           45.649
        2020-02-03 17:00:00           44.109           42.775           45.949
        2020-02-03 18:00:00           43.340           41.659           45.793
        2020-02-03 19:00:00           42.448           40.459           45.491
        2020-02-03 20:00:00           41.470           39.230           45.023
        2020-02-03 21:00:00           40.312           37.941           44.198
        2020-02-03 22:00:00           39.044           36.614           43.123
        2020-02-03 23:00:00           37.754           35.292           41.936
        2020-02-04 00:00:00           36.612           34.060           40.888
        2020-02-04 01:00:00           36.184           33.290           40.909
        2020-02-04 02:00:00           36.708           33.133           42.427
        2020-02-04 03:00:00           38.304           33.660           45.723
        2020-02-04 04:00:00           40.523           34.575           50.083
        2020-02-04 05:00:00           43.099           35.706           55.065
        2020-02-04 06:00:00           45.781           36.908           60.224
        2020-02-04 07:00:00           48.223           37.971           64.957
        2020-02-04 08:00:00           50.064           38.681           68.613
        2020-02-04 09:00:00           50.648           38.654           70.013
        2020-02-04 10:00:00           50.161           38.023           69.503
        2020-02-04 11:00:00           48.963           37.008           67.750
        2020-02-04 12:00:00           47.745           36.014           66.030

        >>> f = 'data/model/otamin16/GRP_B_20200911_1515_DA_2.prv'
        >>> d = Data(filename=f, datatype='fcst')
        >>> df = d.read()
        >>> df
        Stations                    A6701210  ...
        Grandeurs                         RR  ...               TA
        IdSeries             57gGRPd000_2001  ...  57gGRPd000_2003
        # Modeles                 57gGRPd000  ...       57gGRPd000
        # Scenarios                     2001  ...             2003
        # DtDerObs          03-02-2020 12:00  ... 03-02-2020 12:00
        2020-02-03 13:00:00              1.1  ...              NaN
        2020-02-03 14:00:00              1.1  ...              NaN
        2020-02-03 15:00:00              1.1  ...              NaN
        2020-02-03 16:00:00              0.2  ...              NaN
        2020-02-03 17:00:00              0.2  ...              NaN
        2020-02-03 18:00:00              0.2  ...              NaN
        2020-02-03 19:00:00              0.3  ...              NaN
        2020-02-03 20:00:00              0.3  ...              NaN
        2020-02-03 21:00:00              0.3  ...              NaN
        2020-02-03 22:00:00              0.4  ...              NaN
        2020-02-03 23:00:00              0.4  ...              NaN
        2020-02-04 00:00:00              0.4  ...              NaN
        2020-02-04 01:00:00              3.6  ...              NaN
        2020-02-04 02:00:00              3.6  ...              NaN
        2020-02-04 03:00:00              3.6  ...              NaN
        2020-02-04 04:00:00              0.8  ...              NaN
        2020-02-04 05:00:00              0.8  ...              NaN
        2020-02-04 06:00:00              0.8  ...              NaN
        2020-02-04 07:00:00              0.4  ...              NaN
        2020-02-04 08:00:00              0.4  ...              NaN
        2020-02-04 09:00:00              0.4  ...              NaN
        2020-02-04 10:00:00              0.7  ...              NaN
        2020-02-04 11:00:00              0.7  ...              NaN
        2020-02-04 12:00:00              0.7  ...              NaN

        >>> f = 'data/model/otamin16/GRP_B_20200203_1200_2.prv'
        >>> d = Data(filename=f, datatype='trend')
        >>> df = d.read()
        >>> df
        Stations                      A6701210
        Grandeurs                            Q
        IdSeries            57gGRPd000_2001_-1 ... 57gGRPd000_2001_90
        # Modeles                   57gGRPd000 ...         57gGRPp000
        # Scenarios                       2001 ...               2001
        # DtDerObs            03-02-2020 12:00 ...      03-02-2020 12
        # Probas                            -1 ...                 90
        2020-02-03 13:00:00             41.689 ...             42.977
        2020-02-03 14:00:00             43.426 ...             46.273
        2020-02-03 15:00:00             44.285 ...             48.725
        2020-02-03 16:00:00             44.395 ...             50.069
        2020-02-03 17:00:00             44.109 ...             50.962
        2020-02-03 18:00:00             43.340 ...             51.269
        2020-02-03 19:00:00             42.448 ...             51.050
        2020-02-03 20:00:00             41.470 ...             50.691
        2020-02-03 21:00:00             40.312 ...             50.070
        2020-02-03 22:00:00             39.044 ...             49.208
        2020-02-03 23:00:00             37.754 ...             48.272
        2020-02-04 00:00:00             36.612 ...             47.480
        2020-02-04 01:00:00             36.184 ...             47.263
        2020-02-04 02:00:00             36.708 ...             48.290
        2020-02-04 03:00:00             38.304 ...             50.748
        2020-02-04 04:00:00             40.523 ...             54.066
        2020-02-04 05:00:00             43.099 ...             57.906
        2020-02-04 06:00:00             45.781 ...             61.938
        2020-02-04 07:00:00             48.223 ...             65.412
        2020-02-04 08:00:00             50.064 ...             68.085
        2020-02-04 09:00:00             50.648 ...             69.058
        2020-02-04 10:00:00             50.161 ...             68.571
        2020-02-04 11:00:00             48.963 ...             67.106
        2020-02-04 12:00:00             47.745 ...             65.605

        """
        # Initialisation
        prefix = [c for c in EXPORT_COLS if not c.startswith('#')]
        # Lecture par pandas
        try:
            df = pnd.read_csv(
                self.filename,
                sep=';',
                comment='#',
                header=[0, 1, 2],
                index_col=0,
                parse_dates=True,
                date_format=DATE_FORMAT,
                na_values=[-99.900, '-99.900', -999.999, '-999.999'],
                keep_default_na=True,
            )
        except IndexError as ie:
            raise ValueError(
                f"Formattage incorrect du fichier {self.filename}") from ie
        # Ajouts des informations commentées
        df, prefix = self._extend_columns(
            df, prefix, [c for c in EXPORT_COLS if c not in prefix])
        if self.datatype == 'trend':
            df, prefix = self._extend_columns(df, prefix, EXPORT_TREND_COLS)
        # Contrôles
        for p in prefix:
            if p not in df.columns.names:
                raise ValueError(f"Méta-donné '{p}' manquante")
        # Ordre du multi-index
        df = df.reorder_levels(prefix, axis=1)
        # Retour
        return df



[docs]
    def write(self, data=None):
        """
        Ecrire un fichier prv OTAMIN.

        Parameters
        ----------
        data : pnd.DataFrame
            Dataframe des données

        """
        # Initialisation
        prefix = EXPORT_COLS
        if self.datatype == 'trend':
            prefix = _copy.deepcopy(EXPORT_COLS)
            prefix.extend(EXPORT_TREND_COLS)
        # Contrôles
        _exception.check_dataframe(data)
        for p in prefix:
            if p not in data.columns.names:
                raise ValueError(f"Méta-donné '{p}' manquante")
        # Ordre du multi-index
        data = data.reorder_levels(prefix, axis=1)
        # Entête
        with open(self.filename, 'w', encoding='utf-8', newline='\r\n') as f:
            f.write(f'# {EXPORT_DTYPES[self.datatype]}\n')
            f.write('# TZ ; UTC\n')
            varnames = data.columns.get_level_values('Grandeurs')
            for v in sorted(list(set(varnames))):
                f.write(f"# {v} ; {EXPORT_UNITS.get(v, '')}\n")
        # Tableau de données
        data.to_csv(
            self.filename,
            mode='a',
            sep=';',
            float_format='%.3f',
            na_rep='-99.900',
            header=True,
            date_format=DATE_FORMAT,
            lineterminator='\r\n'
        )



[docs]
    @classmethod
    def get_types(cls):
        """
        Liste des types de format prv Otamin.

        Returns
        -------
        list
            Types de format prv Otamin

        .. seealso::
            pyspc.verification.Otamin.convention.EXPORT_DTYPES

        """
        return list(EXPORT_DTYPES.keys())