Code source de pyspc.io.prv.reader

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Bibliothèque pyspc du projet pyspc - IO - prv (Otamin, Scores) - read
"""
from datetime import datetime as dt

from pyspc.convention.prv import DATATYPES
from pyspc.core.convention import EXTERNAL_VARNAMES
import pyspc.core.exception as _exception
from pyspc.core.keyseries import str2tuple, tuple2str
from pyspc.core.parameter import Parameter
from pyspc.core.serie import Serie
from pyspc.core.series import Series
from pyspc.model.otamin16 import RT_Data as PRV_otamin16
from pyspc.model.otamin18 import RT_Data as PRV_otamin18
from pyspc.verification.scores import Data as PRV_scores



[docs]
def read_prv(filename=None, datatype=None, warning=True):
    """
    Créer une instance Series à partir d'un fichier prv (Otamin, Scores)

    Parameters
    ----------
    filename : str
        Nom du fichier prv
    datatype : str
        Type du fichier de données
    warning : bool
        Affiche les avertissements si True

    Returns
    -------
    series : pyspc.core.series.Series
        Collection de séries de données

    """
    # -------------------------------------------------------------------------
    # 0- Contrôles
    # -------------------------------------------------------------------------
    _exception.check_str(filename)
    _exception.raise_valueerror(
        datatype not in DATATYPES,
        f"Type de données prv '{datatype}' incorrect")
    _exception.check_bool(warning)
    provider = 'prv'
    # -------------------------------------------------------------------------
    # 1- Lecture
    # -------------------------------------------------------------------------
    reader = None
    subtype = datatype.split('_')[-1]
    if datatype.startswith('otamin16'):
        reader = PRV_otamin16(filename=filename, datatype=subtype)
    elif datatype.startswith('otamin18'):
        reader = PRV_otamin18(filename=filename, datatype=subtype)
    elif datatype.startswith('scores'):
        reader = PRV_scores(filename=filename, datatype=subtype)
    df = reader.read()
    # -------------------------------------------------------------------------
    # 2- Conversion - CAS OBSERVATION / SIMULATION
    # -------------------------------------------------------------------------
    colnames = df.columns.names
    if subtype in ['obs', 'sim']:
        return _prv_obssim(
            df=df, datatype=datatype, subtype=subtype, colnames=colnames,
            provider=provider, warning=warning)
    # -------------------------------------------------------------------------
    # 3- Conversion - CAS PREVISION / TENDANCE
    # -------------------------------------------------------------------------
    if subtype in ['fcst', 'trend']:
        return _prv_fcsttrend(
            df=df, datatype=datatype, colnames=colnames, provider=provider,
            warning=warning)
    # -------------------------------------------------------------------------
    # 4- Conversion - CAS inconnu
    # -------------------------------------------------------------------------
    raise NotImplementedError



def _prv_obssim(df=None, datatype=None, subtype=None, colnames=None,
                provider=None, warning=True):
    """
    CAS OBSERVATION / SIMULATION
    """
    series = Series(datatype='obs', name=datatype)
    for col in df.columns:
        c = {n: c for c, n in zip(col, colnames)}
        try:
            station = c['Stations']
            varname = c['Grandeurs']
            model = c['IdSeries']
        except KeyError:
            if warning:
                _exception.Warning(
                    None,
                    f"La série '{col}' est mal formatée. Elle est ignorée")
            continue
        try:
            prefix = list({v[0] for k, v in EXTERNAL_VARNAMES.items()
                           if k[0] == provider and k[1][0] == varname})[0]
#            prefix = EXTERNAL_VARNAMES[(provider, varname)][0]
            target = Parameter.infer_timestep(
                index=df.index, prefix=prefix)
            param = Parameter.find(prefix=prefix, timedelta=target)
        except (IndexError, KeyError):
            if warning:
                _exception.Warning(
                    None,
                    f"La grandeur '{col}' est inconnue. La série est ignorée")
            continue
        else:
            varname = param.spc_varname
        if subtype == 'obs':
            model = None

        key = (station, varname, model)
        keystr = str2tuple(tuple2str(key), forceobs=True)
        serie = Serie(df[col].to_frame(), code=keystr[0], varname=varname,
                      provider=provider, warning=warning)
        series.add(serie, code=station, meta=key[2])
    return series


def _prv_fcsttrend(df=None, datatype=None, colnames=None, provider=None,
                   warning=True):
    """
    CAS PREVISION / TENDANCE
    """
    series = Series(datatype='fcst', name=datatype)
    for col in df.columns:
        c = {n: c for c, n in zip(col, colnames)}
        try:
            station = c['Stations']
            varname = c['Grandeurs']
            runtime = dt.strptime(
                c.get('# DtDerObs',
                      dt.strftime(df.index[0], '%d-%m-%Y %H:%M')),
                '%d-%m-%Y %H:%M'
            )
            ids = c["IdSeries"].split('_')
            if len(ids) == 2:
                ids.append(None)
            model = c.get('# Modeles', ids[0])
            scen = c.get('# Scenarios', ids[1])
            prob = c.get('# Probas', ids[2])
        except KeyError:
            if warning:
                _exception.Warning(
                    None,
                    f"La série '{col}' est mal formatée. Elle est ignorée")
            continue
        try:
            prefix = list({v[0] for k, v in EXTERNAL_VARNAMES.items()
                           if k[0] == provider and k[1][0] == varname})[0]
#            prefix = EXTERNAL_VARNAMES[(provider, varname)][0]
            target = Parameter.infer_timestep(
                index=df.index, prefix=prefix)
            param = Parameter.find(prefix=prefix, timedelta=target)
        except KeyError:
            if warning:
                _exception.Warning(
                    None,
                    f"La grandeur '{col}' est inconnue. La série est ignorée")
            continue
        else:
            varname = param.spc_varname
        key = (station, varname, (runtime, model, scen, prob))
        keystr = str2tuple(tuple2str(key), forceobs=True)
        serie = Serie(df[col].to_frame(), code=keystr[0], varname=varname,
                      provider=provider, warning=warning)
        series.add(serie, code=station, meta=key[2])
    return series