#!/usr/bin/python3
# -*- coding: utf-8 -*-
########################################################################
#
# This file is part of python module <pyspc>.
# Copyright (C) 2013-2021 R. Marty
# (renaud.marty@developpement-durable.gouv.fr)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING.txt).
# If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Méta-données (statistiques, synthèse) - Cristal - Données
"""
from datetime import datetime as dt
import numpy as np
import pandas as pnd
import pyspc.core.exception as _exception
from pyspc.convention.cristal import LONGNAMES
DATE_FORMAT = '%d/%m/%Y%H%M%S'
"""Format des dates dans les archives Cristal"""
def date_parser(txt):
"""Convertisseur de date."""
return dt.strptime(txt.strip(), DATE_FORMAT)
def on8char(x):
"""Code sur 8 caractères."""
try:
return f'{x:0<8s}'
except ValueError:
if np.isnan(x):
return 'K9876543'
return f'{x:08d}'
def decimal(x):
"""Symbole décimal."""
try:
return x.strip().replace(',', '.')
except AttributeError:
return x
[docs]
class Cristal():
"""
Structure des archives Cristal.
Attributes
----------
filename : str
Nom du fichier Hydro2
"""
[docs]
def __init__(self, filename=None):
"""
Initialiser l'instance de la classe Cristal.
Parameters
----------
filename : str
Nom du fichier Cristal
"""
self.filename = filename
def __str__(self):
"""Afficher les méta-données de l'instance Cristal."""
text = """
*************************************
******** CRISTAL - Data *************
*************************************
* NOM FICHIER = {filename}
*************************************
"""
return text.format(filename=self.filename)
[docs]
def read(self, stations=None, longnames=None):
"""
Lecture du fichier d'archives Cristal.
Returns
-------
dict_of_df : dict
Dictionnaire de dataframe
- clé : (STATION, GRANDEUR, TYPE DE VALEUR)
- valeur : pnd.DataFrame
Notes
-----
- TYPE DE VALEUR = 'VALUE_MES' pour la mesure élaborée
- TYPE DE VALEUR = 'VALUE_CONV' pour la mesure convertie
Examples
--------
>>> from pyspc.data.cristal import Cristal
Cas avec des données
>>> f = 'data/data/cristal/2008/ARCHIVE_2008_11.csv'
>>> reader = Cristal(filename=f)
>>> content = reader.read()
>>> content
{
('K0550010', 'KMHEAU', 'VALUE_MES'):
VALUE
DATE
2008-11-01 00:00:00 -0.71
2008-11-01 00:10:00 -0.71
2008-11-01 00:20:00 -0.71
2008-11-01 00:30:00 -0.71
2008-11-01 00:40:00 -0.71
2008-11-01 00:50:00 -0.71
2008-11-01 01:00:00 -0.70
2008-11-01 01:10:00 -0.70
2008-11-01 01:20:00 -0.70
2008-11-01 01:30:00 -0.70
2008-11-01 01:40:00 -0.69
2008-11-01 01:50:00 -0.70
2008-11-01 02:00:00 -0.69
2008-11-01 02:10:00 -0.69
2008-11-01 02:20:00 -0.70
2008-11-01 02:30:00 -0.70
2008-11-01 02:40:00 -0.70
2008-11-01 02:50:00 -0.70
2008-11-01 03:00:00 -0.71
2008-11-01 03:10:00 -0.71
2008-11-01 03:20:00 -0.71
2008-11-01 03:30:00 -0.72
2008-11-01 03:40:00 -0.72
2008-11-01 03:50:00 -0.72
2008-11-01 04:00:00 -0.72
2008-11-01 04:10:00 -0.73
2008-11-01 04:20:00 -0.73
2008-11-01 04:30:00 -0.73
2008-11-01 04:40:00 -0.74
2008-11-01 04:50:00 -0.74
... ...
2008-11-01 18:10:00 -0.57
2008-11-01 18:20:00 -0.56
2008-11-01 18:30:00 -0.55
2008-11-01 18:40:00 -0.55
2008-11-01 18:50:00 -0.54
2008-11-01 19:00:00 -0.52
2008-11-01 19:10:00 -0.51
2008-11-01 19:20:00 -0.50
2008-11-01 19:30:00 -0.49
2008-11-01 19:40:00 -0.47
2008-11-01 19:50:00 -0.46
2008-11-01 20:00:00 -0.44
2008-11-01 20:10:00 -0.42
2008-11-01 20:20:00 -0.39
2008-11-01 20:30:00 -0.36
2008-11-01 20:40:00 -0.33
2008-11-01 20:50:00 -0.29
2008-11-01 21:00:00 -0.26
2008-11-01 21:10:00 -0.22
2008-11-01 21:20:00 -0.18
2008-11-01 21:30:00 -0.15
2008-11-01 21:40:00 -0.11
2008-11-01 21:50:00 -0.08
2008-11-01 22:00:00 -0.05
2008-11-01 22:10:00 -0.01
2008-11-01 22:20:00 0.02
2008-11-01 22:30:00 0.05
2008-11-01 22:40:00 0.07
2008-11-01 22:50:00 0.10
2008-11-01 23:00:00 0.13
[139 rows x 1 columns],
('K0550010', 'KMHEAU', 'VALUE_CONV'):
VALUE
DATE
2008-11-01 00:00:00 57.15
2008-11-01 00:10:00 57.15
2008-11-01 00:20:00 57.15
2008-11-01 00:30:00 57.15
2008-11-01 00:40:00 57.15
2008-11-01 00:50:00 57.15
2008-11-01 01:00:00 58.53
2008-11-01 01:10:00 58.53
2008-11-01 01:20:00 58.53
2008-11-01 01:30:00 58.53
2008-11-01 01:40:00 59.91
2008-11-01 01:50:00 58.53
2008-11-01 02:00:00 59.91
2008-11-01 02:10:00 59.91
2008-11-01 02:20:00 58.53
2008-11-01 02:30:00 58.53
2008-11-01 02:40:00 58.53
2008-11-01 02:50:00 58.53
2008-11-01 03:00:00 57.15
2008-11-01 03:10:00 57.15
2008-11-01 03:20:00 57.15
2008-11-01 03:30:00 55.78
2008-11-01 03:40:00 55.78
2008-11-01 03:50:00 55.78
2008-11-01 04:00:00 55.78
2008-11-01 04:10:00 54.40
2008-11-01 04:20:00 54.40
2008-11-01 04:30:00 54.40
2008-11-01 04:40:00 53.15
2008-11-01 04:50:00 53.15
... ...
2008-11-01 18:10:00 76.44
2008-11-01 18:20:00 77.81
2008-11-01 18:30:00 79.19
2008-11-01 18:40:00 79.19
2008-11-01 18:50:00 80.57
2008-11-01 19:00:00 83.32
2008-11-01 19:10:00 84.70
2008-11-01 19:20:00 86.21
2008-11-01 19:30:00 87.73
2008-11-01 19:40:00 90.76
2008-11-01 19:50:00 92.27
2008-11-01 20:00:00 95.30
2008-11-01 20:10:00 98.53
2008-11-01 20:20:00 103.38
2008-11-01 20:30:00 108.00
2008-11-01 20:40:00 114.00
2008-11-01 20:50:00 122.00
2008-11-01 21:00:00 128.00
2008-11-01 21:10:00 139.00
2008-11-01 21:20:00 145.00
2008-11-01 21:30:00 154.14
2008-11-01 21:40:00 162.71
2008-11-01 21:50:00 169.14
2008-11-01 22:00:00 175.57
2008-11-01 22:10:00 184.70
2008-11-01 22:20:00 192.80
2008-11-01 22:30:00 200.90
2008-11-01 22:40:00 206.30
2008-11-01 22:50:00 214.40
2008-11-01 23:00:00 222.50
[139 rows x 1 columns]
}
"""
# ---------------------------------------------------------------------
# 0- Contrôles
# ---------------------------------------------------------------------
if stations is not None:
_exception.check_listlike(stations)
if longnames is None:
longnames = LONGNAMES
_exception.check_listlike(longnames)
# ---------------------------------------------------------------------
# 1- Lecture par pnd.read_csv
# ---------------------------------------------------------------------
names = ['LONGNAME', 'STATION', 'DATE', 'VALUE_MES', 'VALUE_CONV']
try:
df = pnd.read_csv(
self.filename,
sep=';',
header=None,
index_col=None,
usecols=[0, 1, 4, 7, 8],
names=names,
dtype=str,
na_values=' -9999,99',
on_bad_lines='warn',
low_memory=False,
)
except UnicodeDecodeError:
df = pnd.read_csv(
self.filename,
sep=';',
header=None,
index_col=None,
usecols=[0, 1, 4, 7, 8],
names=names,
dtype=str,
na_values=' -9999,99',
on_bad_lines='warn', # python 3.10 64 bits
low_memory=False,
encoding='latin_1'
)
# ---------------------------------------------------------------------
# 2- Nettoyages
# ---------------------------------------------------------------------
# Obtenir des codes sur 8 caractères
df['STATION'] = df['STATION'].map(on8char)
# Conserver les données des stations ciblées
if isinstance(stations, list):
df.drop(df[~df['STATION'].isin(stations)].index, inplace=True)
# Conserver les données des grandeurs ciblées
df.drop(df[~df['LONGNAME'].isin(longnames)].index, inplace=True)
# Convertir les dates
df['DATE'] = df['DATE'].map(date_parser)
# Convertir les valeurs en float
df['VALUE_MES'] = df['VALUE_MES'].map(decimal)
df['VALUE_MES'] = pnd.to_numeric(df['VALUE_MES'], errors='coerce')
df['VALUE_CONV'] = df['VALUE_CONV'].map(decimal)
df['VALUE_CONV'] = pnd.to_numeric(df['VALUE_CONV'], errors='coerce')
# Index : DATE
df.set_index(keys='DATE', drop=True, inplace=True)
df.sort_index(inplace=True)
# Reformater les colonnes
# STATION_LONGNAME -> CODE
df['CODE'] = df['STATION'] + '_' + df['LONGNAME']
df.drop(columns=['STATION', 'LONGNAME'], inplace=True)
# Ré-roganiser le tableau de données
try:
df = df.pivot(columns='CODE')
# si index dupliqué lors de l'application de pivot
# alors je retiens la moyenne des valeurs de ces index
except ValueError:
print(f"!!! Index dupliqués dans {self.filename}")
df = df.pivot_table(index=df.index, columns='CODE', aggfunc='mean')
df.columns = df.columns.swaplevel()
# Dictionnaire de df
dict_of_df = {(c[0].split('_')[0], c[0].split('_')[1], c[1]):
df.xs(c, axis=1).to_frame() for c in df.columns}
emptys = []
for k in dict_of_df.keys():
# Ne conserver que la dernière valeur si un index est dupliqué
dict_of_df[k] = dict_of_df[k][
~dict_of_df[k].index.duplicated(keep='last')]
# Supprimer les NO-DATA
dict_of_df[k] = dict_of_df[k].dropna(axis=0, how='any')
if dict_of_df[k].empty:
emptys.append(k)
# Renommage de la colonne des valeurs
dict_of_df[k].columns = ['VALUE']
for k in emptys:
del dict_of_df[k]
return dict_of_df
[docs]
def write(self):
"""
Ecrire le fichier d'archives Cristal.
Raises
------
NotImplementedError
"""
raise NotImplementedError