Source code for disdrodb.l0.readers.reader_template
#!/usr/bin/env python3
# -----------------------------------------------------------------------------.
# Copyright (c) 2021-2023 DISDRODB developers
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------.
from disdrodb.l0 import run_l0a
from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
[docs]@is_documented_by(reader_generic_docstring)
def reader(
raw_dir,
processed_dir,
station_name,
# Processing options
force=False,
verbose=False,
parallel=False,
debugging_mode=False,
):
"""Reader."""
##------------------------------------------------------------------------.
###########################
#### CUSTOMIZABLE CODE ####
###########################
#### - Define raw data headers
column_names = [] # [ADD THE COLUMN NAMES LIST HERE]
##------------------------------------------------------------------------.
#### - Define reader options
# - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
reader_kwargs = {}
# - Define reader engine
# - C engine is faster
# - Python engine is more feature-complete
reader_kwargs["engine"] = "python"
# - Define on-the-fly decompression of on-disk data
# - Available: gzip, bz2, zip
reader_kwargs["compression"] = "infer"
# - Strings to recognize as NA/NaN and replace with standard NA flags
# - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
# '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
# 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
reader_kwargs["na_values"] = ["na", "", "error", " NA"]
# - Define behaviour when encountering bad lines
reader_kwargs["on_bad_lines"] = "skip"
# Skip first row as columns names
reader_kwargs["header"] = None
# - Avoid first column to become df index
reader_kwargs["index_col"] = False
# - Define delimiter [THIS MIGHT BE CUSTOMIZED]
reader_kwargs["delimiter"] = ","
# Skip a specific number of rows [THIS MIGHT BE CUSTOMIZED]
reader_kwargs["skiprows"] = None
##------------------------------------------------------------------------.
#### - Define facultative dataframe sanitizer function for L0 processing
# - Enable to deal with bad raw data files
# - Enable to standardize raw data files to L0 standards
def df_sanitizer_fun(df):
# Import dask or pandas
# [ADD YOUR CUSTOM CODE HERE]
# ...
return df
##------------------------------------------------------------------------.
#### - Define glob pattern to search data files within raw_dir/data/<station_name>
glob_patterns = "*" # [TO BE ADAPTED TO THE STATION RAW FILE NAME PATTERN]
####----------------------------------------------------------------------.
#### - Create L0A products
run_l0a(
raw_dir=raw_dir,
processed_dir=processed_dir,
station_name=station_name,
# Custom arguments of the reader for L0A processing
glob_patterns=glob_patterns,
column_names=column_names,
reader_kwargs=reader_kwargs,
df_sanitizer_fun=df_sanitizer_fun,
# Processing options
force=force,
verbose=verbose,
parallel=parallel,
debugging_mode=debugging_mode,
)