Code source de pyspc.data.cristal.data

#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021  R. Marty
#   (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Méta-données (statistiques, synthèse) - Cristal - Données
"""
from datetime import datetime as dt
import numpy as np
import pandas as pnd

import pyspc.core.exception as _exception
from pyspc.convention.cristal import LONGNAMES

DATE_FORMAT = '%d/%m/%Y%H%M%S'
"""Format des dates dans les archives Cristal"""


def date_parser(txt):
    """Convertisseur de date."""
    return dt.strptime(txt.strip(), DATE_FORMAT)


def on8char(x):
    """Code sur 8 caractères."""
    try:
        return f'{x:0<8s}'
    except ValueError:
        if np.isnan(x):
            return 'K9876543'
        return f'{x:08d}'


def decimal(x):
    """Symbole décimal."""
    try:
        return x.strip().replace(',', '.')
    except AttributeError:
        return x


[docs] class Cristal(): """ Structure des archives Cristal. Attributes ---------- filename : str Nom du fichier Hydro2 """
[docs] def __init__(self, filename=None): """ Initialiser l'instance de la classe Cristal. Parameters ---------- filename : str Nom du fichier Cristal """ self.filename = filename
def __str__(self): """Afficher les méta-données de l'instance Cristal.""" text = """ ************************************* ******** CRISTAL - Data ************* ************************************* * NOM FICHIER = {filename} ************************************* """ return text.format(filename=self.filename)
[docs] def read(self, stations=None, longnames=None): """ Lecture du fichier d'archives Cristal. Returns ------- dict_of_df : dict Dictionnaire de dataframe - clé : (STATION, GRANDEUR, TYPE DE VALEUR) - valeur : pnd.DataFrame Notes ----- - TYPE DE VALEUR = 'VALUE_MES' pour la mesure élaborée - TYPE DE VALEUR = 'VALUE_CONV' pour la mesure convertie Examples -------- >>> from pyspc.data.cristal import Cristal Cas avec des données >>> f = 'data/data/cristal/2008/ARCHIVE_2008_11.csv' >>> reader = Cristal(filename=f) >>> content = reader.read() >>> content { ('K0550010', 'KMHEAU', 'VALUE_MES'): VALUE DATE 2008-11-01 00:00:00 -0.71 2008-11-01 00:10:00 -0.71 2008-11-01 00:20:00 -0.71 2008-11-01 00:30:00 -0.71 2008-11-01 00:40:00 -0.71 2008-11-01 00:50:00 -0.71 2008-11-01 01:00:00 -0.70 2008-11-01 01:10:00 -0.70 2008-11-01 01:20:00 -0.70 2008-11-01 01:30:00 -0.70 2008-11-01 01:40:00 -0.69 2008-11-01 01:50:00 -0.70 2008-11-01 02:00:00 -0.69 2008-11-01 02:10:00 -0.69 2008-11-01 02:20:00 -0.70 2008-11-01 02:30:00 -0.70 2008-11-01 02:40:00 -0.70 2008-11-01 02:50:00 -0.70 2008-11-01 03:00:00 -0.71 2008-11-01 03:10:00 -0.71 2008-11-01 03:20:00 -0.71 2008-11-01 03:30:00 -0.72 2008-11-01 03:40:00 -0.72 2008-11-01 03:50:00 -0.72 2008-11-01 04:00:00 -0.72 2008-11-01 04:10:00 -0.73 2008-11-01 04:20:00 -0.73 2008-11-01 04:30:00 -0.73 2008-11-01 04:40:00 -0.74 2008-11-01 04:50:00 -0.74 ... ... 2008-11-01 18:10:00 -0.57 2008-11-01 18:20:00 -0.56 2008-11-01 18:30:00 -0.55 2008-11-01 18:40:00 -0.55 2008-11-01 18:50:00 -0.54 2008-11-01 19:00:00 -0.52 2008-11-01 19:10:00 -0.51 2008-11-01 19:20:00 -0.50 2008-11-01 19:30:00 -0.49 2008-11-01 19:40:00 -0.47 2008-11-01 19:50:00 -0.46 2008-11-01 20:00:00 -0.44 2008-11-01 20:10:00 -0.42 2008-11-01 20:20:00 -0.39 2008-11-01 20:30:00 -0.36 2008-11-01 20:40:00 -0.33 2008-11-01 20:50:00 -0.29 2008-11-01 21:00:00 -0.26 2008-11-01 21:10:00 -0.22 2008-11-01 21:20:00 -0.18 2008-11-01 21:30:00 -0.15 2008-11-01 21:40:00 -0.11 2008-11-01 21:50:00 -0.08 2008-11-01 22:00:00 -0.05 2008-11-01 22:10:00 -0.01 2008-11-01 22:20:00 0.02 2008-11-01 22:30:00 0.05 2008-11-01 22:40:00 0.07 2008-11-01 22:50:00 0.10 2008-11-01 23:00:00 0.13 [139 rows x 1 columns], ('K0550010', 'KMHEAU', 'VALUE_CONV'): VALUE DATE 2008-11-01 00:00:00 57.15 2008-11-01 00:10:00 57.15 2008-11-01 00:20:00 57.15 2008-11-01 00:30:00 57.15 2008-11-01 00:40:00 57.15 2008-11-01 00:50:00 57.15 2008-11-01 01:00:00 58.53 2008-11-01 01:10:00 58.53 2008-11-01 01:20:00 58.53 2008-11-01 01:30:00 58.53 2008-11-01 01:40:00 59.91 2008-11-01 01:50:00 58.53 2008-11-01 02:00:00 59.91 2008-11-01 02:10:00 59.91 2008-11-01 02:20:00 58.53 2008-11-01 02:30:00 58.53 2008-11-01 02:40:00 58.53 2008-11-01 02:50:00 58.53 2008-11-01 03:00:00 57.15 2008-11-01 03:10:00 57.15 2008-11-01 03:20:00 57.15 2008-11-01 03:30:00 55.78 2008-11-01 03:40:00 55.78 2008-11-01 03:50:00 55.78 2008-11-01 04:00:00 55.78 2008-11-01 04:10:00 54.40 2008-11-01 04:20:00 54.40 2008-11-01 04:30:00 54.40 2008-11-01 04:40:00 53.15 2008-11-01 04:50:00 53.15 ... ... 2008-11-01 18:10:00 76.44 2008-11-01 18:20:00 77.81 2008-11-01 18:30:00 79.19 2008-11-01 18:40:00 79.19 2008-11-01 18:50:00 80.57 2008-11-01 19:00:00 83.32 2008-11-01 19:10:00 84.70 2008-11-01 19:20:00 86.21 2008-11-01 19:30:00 87.73 2008-11-01 19:40:00 90.76 2008-11-01 19:50:00 92.27 2008-11-01 20:00:00 95.30 2008-11-01 20:10:00 98.53 2008-11-01 20:20:00 103.38 2008-11-01 20:30:00 108.00 2008-11-01 20:40:00 114.00 2008-11-01 20:50:00 122.00 2008-11-01 21:00:00 128.00 2008-11-01 21:10:00 139.00 2008-11-01 21:20:00 145.00 2008-11-01 21:30:00 154.14 2008-11-01 21:40:00 162.71 2008-11-01 21:50:00 169.14 2008-11-01 22:00:00 175.57 2008-11-01 22:10:00 184.70 2008-11-01 22:20:00 192.80 2008-11-01 22:30:00 200.90 2008-11-01 22:40:00 206.30 2008-11-01 22:50:00 214.40 2008-11-01 23:00:00 222.50 [139 rows x 1 columns] } """ # --------------------------------------------------------------------- # 0- Contrôles # --------------------------------------------------------------------- if stations is not None: _exception.check_listlike(stations) if longnames is None: longnames = LONGNAMES _exception.check_listlike(longnames) # --------------------------------------------------------------------- # 1- Lecture par pnd.read_csv # --------------------------------------------------------------------- names = ['LONGNAME', 'STATION', 'DATE', 'VALUE_MES', 'VALUE_CONV'] try: df = pnd.read_csv( self.filename, sep=';', header=None, index_col=None, usecols=[0, 1, 4, 7, 8], names=names, dtype=str, na_values=' -9999,99', on_bad_lines='warn', low_memory=False, ) except UnicodeDecodeError: df = pnd.read_csv( self.filename, sep=';', header=None, index_col=None, usecols=[0, 1, 4, 7, 8], names=names, dtype=str, na_values=' -9999,99', on_bad_lines='warn', # python 3.10 64 bits low_memory=False, encoding='latin_1' ) # --------------------------------------------------------------------- # 2- Nettoyages # --------------------------------------------------------------------- # Obtenir des codes sur 8 caractères df['STATION'] = df['STATION'].map(on8char) # Conserver les données des stations ciblées if isinstance(stations, list): df.drop(df[~df['STATION'].isin(stations)].index, inplace=True) # Conserver les données des grandeurs ciblées df.drop(df[~df['LONGNAME'].isin(longnames)].index, inplace=True) # Convertir les dates df['DATE'] = df['DATE'].map(date_parser) # Convertir les valeurs en float df['VALUE_MES'] = df['VALUE_MES'].map(decimal) df['VALUE_MES'] = pnd.to_numeric(df['VALUE_MES'], errors='coerce') df['VALUE_CONV'] = df['VALUE_CONV'].map(decimal) df['VALUE_CONV'] = pnd.to_numeric(df['VALUE_CONV'], errors='coerce') # Index : DATE df.set_index(keys='DATE', drop=True, inplace=True) df.sort_index(inplace=True) # Reformater les colonnes # STATION_LONGNAME -> CODE df['CODE'] = df['STATION'] + '_' + df['LONGNAME'] df.drop(columns=['STATION', 'LONGNAME'], inplace=True) # Ré-roganiser le tableau de données try: df = df.pivot(columns='CODE') # si index dupliqué lors de l'application de pivot # alors je retiens la moyenne des valeurs de ces index except ValueError: print(f"!!! Index dupliqués dans {self.filename}") df = df.pivot_table(index=df.index, columns='CODE', aggfunc='mean') df.columns = df.columns.swaplevel() # Dictionnaire de df dict_of_df = {(c[0].split('_')[0], c[0].split('_')[1], c[1]): df.xs(c, axis=1).to_frame() for c in df.columns} emptys = [] for k in dict_of_df.keys(): # Ne conserver que la dernière valeur si un index est dupliqué dict_of_df[k] = dict_of_df[k][ ~dict_of_df[k].index.duplicated(keep='last')] # Supprimer les NO-DATA dict_of_df[k] = dict_of_df[k].dropna(axis=0, how='any') if dict_of_df[k].empty: emptys.append(k) # Renommage de la colonne des valeurs dict_of_df[k].columns = ['VALUE'] for k in emptys: del dict_of_df[k] return dict_of_df
[docs] def write(self): """ Ecrire le fichier d'archives Cristal. Raises ------ NotImplementedError """ raise NotImplementedError