Code source de pyspc.model.grp22.cal_verif

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Modélisations hydrologiques - GRP version 2022 - Vérification
"""
import collections
import glob
import os
import numpy as np
import pandas as pnd

from pyspc.convention.grp22 import CAL_VERIF_DTYPES, CAL_VERIF_SCORES
import pyspc.core.exception as _exception



[docs]
class GRP_Verif():
    """
    Structure des résultats des fiches de performance (PDF) de GRP *Calage*

    Attributes
    ----------
    filename : str
        Nom de la fiche de performance
    datatype : str
        Type de fiche de performance
    model : str
        Modèle
    loc : str
        Bassin
    timestep : str
        Pas de temps de calcul
    leadtime : str
        Horizon de calage
    threshold : float
        Seuil de vigilance

    """

[docs]
    def __init__(self, filename=None):
        """
        Initialisation de l'instance de la classe <GRP_Verif>

        Parameters
        ----------
        filename : str
            Nom de la fiche de performance

        """
        self.filename = filename
        config = self.split_basename(filename=filename)
        self.datatype = config['datatype']
        self.model = config['model']
        self.loc = config['loc']
        self.timestep = config['timestep']
        self.leadtime = config['leadtime']
        self.threshold = config['threshold']
        self.threshold_cal = config['threshold_cal']


    def __str__(self):
        """
        Afficher les méta-données de l'instance <GRP_Verif>
        """
        text = """
        *************************************
        ********** Classe GRP_Verif *********
        *************************************
        *  NOM FICHIER      = {filename}
        *  TYPE FICHIER     = {datatype}
        *  MODELE           = {model}
        *  BASSIN VERSANT   = {loc}
        *  PAS DE TEMPS     = {timestep}
        *  HORIZON CALAGE   = {leadtime}
        *  SEUIL CALAGE     = {threshold_cal}
        *  SEUIL VIGILANCE  = {threshold}
        *************************************
        """
        return text.format(**vars(self))


[docs]
    def read(self):
        """
        Lecture de la fiche de performance (PDF) de GRP *Calage*

        Examples
        --------
        >>> from pyspc.model.grp22.cal_verif import GRP_Verif
        >>> f = 'data/model/grp22/cal/Perf_CALAG_GRP_RH10585x_PDT_00J01H00M_HOR_00J03H00M_Scal_5d00_Svig_5d00.DAT'
        >>> reader = GRP_Verif(filename=filename)
        >>> data = reader.read()
        >>> data
        {'SMN_TAN':
            {'Eff_Cal': np.nan, 'Eff_Val': 0.807,
            'POD': 78.2, 'FAR': 40.3, 'CSI': 51.2}}

        """
        if not os.path.exists(self.filename):
            return OSError(f'Fichier inconnu: {self.filename}')
        data = collections.OrderedDict()
        cfg = None
        with open(self.filename, 'r', encoding='iso-8859-1') as f:
            for line in f.readlines():
                if ':' not in line:
                    continue
                content = line.replace('(%)', '').replace('(-)', '').split(':')
                header = content.pop(0).strip()
                if header in CAL_VERIF_SCORES:
                    try:
                        value = float(content[-1].strip())
                    except ValueError:
                        value = np.nan
                    data[cfg].setdefault(header, value)
                elif line.startswith("#! CONFIG"):
                    cfg = content[-1].split(' ')[-1].strip()
                    data.setdefault(cfg, {})
        return data



[docs]
    @staticmethod
    def split_basename(filename=None, datatype=None):
        """
        Extraire la configuration du calage depuis le nom de la fiche de
        performance

        Parameters
        ----------
        filename : str
            Nom de la fiche de performance

        Returns
        -------
        meta : dict
            Dictionnaire des informations
            - datatype : Type de fiche de performance
            - model : Modèle
            - loc : Lieu, bassin
            - timestep : Pas de temps du modèle
            - leadtime : Horizon de calage
            - threshold : Seuil de vigilance

        Raise
        -----
        ValueError
            Si le type de fiche de performance est incorrect

        Examples
        --------
        >>> from pyspc.model.grp22.cal_verif import GRP_Verif
        >>> f = 'data/model/grp22/cal/Perf_CALAG_GRP_RH10585x_PDT_00J01H00M_HOR_00J03H00M_Scal_5d00_Svig_5d00.DAT'
        >>> meta = GRP_Verif.split_basename(filename=filename)
        >>> meta
        {'datatype': 'rtime',
         'model': 'GRP',
         'loc': 'RH10585x',
         'timestep': '00J01H00M',
         'leadtime': '00J03H00M',
         'threshold_cal': '5d00',
         'threshold': '5d00'}

        """
        _exception.check_str(filename)
        basename = os.path.splitext(os.path.basename(filename))[0]
        meta = basename.split('_')
        try:
            meta.pop(0)  # Perf
            datatype = CAL_VERIF_DTYPES[meta.pop(0)]
            model = meta.pop(0)
            loc = meta.pop(0)
            meta.pop(0)  # PDT
            timestep = meta.pop(0)
            meta.pop(0)  # HOR
            leadtime = meta.pop(0)
            meta.pop(0)  # Scal
            threshold_cal = meta.pop(0)
            meta.pop(0)  # Svig
            threshold = meta.pop(0)
        except IndexError as ie:
            raise ValueError('Nommage de la fiche de performance '
                             'incorrect') from ie
        return {'datatype': datatype, 'model': model, 'loc': loc,
                'timestep': timestep, 'leadtime': leadtime,
                'threshold_cal': threshold_cal, 'threshold': threshold}



[docs]
    @classmethod
    def check_datatype(cls, datatype=None):
        """
        Tester le type de fiche de performance

        Parameters
        ----------
        datatype : str
            Nom du type de fiche de performance

        Raise
        -----
        ValueError
            Si le type de fiche de performance est incorrect

        """
        if datatype not in cls.get_datatypes():
            raise ValueError(
                f"Type de fiche de performance est incorrect : '{datatype}'")



[docs]
    @classmethod
    def get_datatypes(cls):
        """
        Liste des types de fiche de performance

        Returns
        -------
        list
            Types de fiche de performance
            - cal   : calage
            - rtime : calage sur la période complète

        """
        return sorted(CAL_VERIF_DTYPES.values())



[docs]
    @classmethod
    def concat(cls, loc=None, timestep=None, dirname=None, datatype=None):
        """
        Concaténer les fiches de performance pour un lieu donné
        et un type de fiche

        Parameters
        ----------
        loc : str
            Identifiant du lieu, du bassin
        timestep : str
            Pas de temps de modélisation
        dirname : str
            Répertoire de recherche
        datatype : str
            Nom du type de fiche de performance

        Returns
        -------
        df : pandas.DataFrame

        Examples
        --------
        >>> from pyspc.model.grp22.cal_verif import GRP_Verif
        >>> loc = 'K0403010'
        >>> timestep = '00J01H00M'
        >>> dirname = 'data/model/grp22/cal'
        >>> datatype = 'rtime'
        >>> df = GRP_Verif.concat(loc=loc, timestep=timestep, dirname=dirname,
        ...                       datatype=datatype)
        >>> df
                 HOR    SC    SA_RT    SV  Eff_Cal  Eff_Val   POD   FAR   CSI
        0  00J03H00M  1.83  SMN_RNA  41.0      NaN     0.89  75.0  30.2  56.6
        1  00J03H00M  1.83  SMN_RNA  82.0      NaN     0.91  58.6   0.0  58.6

        """
        # ---------------------------------------------------------------------
        # 0- Contrôles
        # ---------------------------------------------------------------------
        _exception.check_str(loc)
        _exception.check_str(timestep)
        _exception.check_str(dirname)
        cls.check_datatype(datatype=datatype)
        # ---------------------------------------------------------------------
        # 1- Lecture des fiches
        # ---------------------------------------------------------------------
        pattern = [k for k, v in CAL_VERIF_DTYPES.items() if v == datatype][0]
        dfs = []
        vfs = glob.glob(os.path.join(
            dirname, f'Perf_{pattern}_GRP_{loc}_PDT_{timestep}_*.DAT'))
        _exception.raise_valueerror(not vfs, "Aucune fiche correspondante")
        for f in vfs:
            reader = cls(filename=f)
            data = reader.read()
            df = pnd.DataFrame(data)
            df = df.T
            df['SA_RT'] = df.index
            df['HOR'] = reader.leadtime
            df['SC'] = float(reader.threshold_cal.replace('d', '.'))
            df['SV'] = float(reader.threshold.replace('d', '.'))
            df = df.reset_index(drop=True)
            dfs.append(df)
        # ---------------------------------------------------------------------
        # 2- Synthèse
        # ---------------------------------------------------------------------
        dfs = pnd.concat(dfs, sort=True).reset_index(drop=True)
        cols = ['HOR', 'SC', 'SA_RT', 'SV']
        cols.extend(CAL_VERIF_SCORES)
        return dfs[cols]