# -----------------------------------------------------------------------------.
# Copyright (c) 2021-2023 DISDRODB developers
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------.
"""DISDRODB CLI routine wrappers."""
import datetime
import time
from typing import Optional
from disdrodb.api.search import available_stations, get_required_product
from disdrodb.utils.cli import execute_cmd
####--------------------------------------------------------------------------.
#### Run DISDRODB Station Processing
[docs]
def run_l0_station(
data_source,
campaign_name,
station_name,
# L0 archive options
l0a_processing: bool = True,
l0b_processing: bool = True,
l0c_processing: bool = True,
remove_l0a: bool = False,
remove_l0b: bool = False,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""Run the L0 processing of a specific DISDRODB station from the terminal.
Parameters
----------
data_source : str
Institution name (when campaign data spans more than 1 country),
or country (when all campaigns (or sensor networks) are inside a given country).
Must be UPPER CASE.
campaign_name : str
Campaign name. Must be UPPER CASE.
station_name : str
Station name
l0a_processing : bool
Whether to launch processing to generate L0A Apache Parquet file(s) from raw data.
The default value is ``True``.
l0b_processing : bool
Whether to launch processing to generate L0B netCDF4 file(s) from L0A data.
The default value is ``True``.
l0b_processing : bool
Whether to launch processing to generate L0C netCDF4 file(s) from L0B data.
The default value is ``True``.
l0c_processing : bool
Whether to launch processing to generate L0C netCDF4 file(s) from L0C data.
The default is True.
remove_l0a : bool
Whether to keep the L0A files after having generated the L0B netCDF products.
The default value is ``False``.
remove_l0b : bool
Whether to remove the L0B files after having produced L0C netCDF files.
The default is False.
force : bool
If ``True``, overwrite existing data into destination directories.
If ``False``, raise an error if there are already data into destination directories.
The default value is ``False``.
verbose : bool
Whether to print detailed processing information into terminal.
The default value is ``True``.
parallel : bool
If ``True``, the files are processed simultaneously in multiple processes.
Each process will use a single thread to avoid issues with the HDF/netCDF library.
By default, the number of process is defined with ``os.cpu_count()``.
If ``False``, the files are processed sequentially in a single process.
If ``False``, multi-threading is automatically exploited to speed up I/0 tasks.
debugging_mode : bool
If ``True``, it reduces the amount of data to process.
For L0A, it processes just the first 3 raw data files for each station.
For L0B, it processes 100 rows sampled from 3 L0A files for each station.
The default value is ``False``.
data_archive_dir : str (optional)
The directory path where the DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
"""
# ---------------------------------------------------------------------.
t_i = time.time()
print(f"L0 processing of station {station_name} has started.")
# ------------------------------------------------------------------.
# L0A processing
if l0a_processing:
run_l0a_station(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Station arguments
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
# Processing options
force=force,
verbose=verbose,
debugging_mode=debugging_mode,
parallel=parallel,
)
# ------------------------------------------------------------------.
# L0B processing
if l0b_processing:
run_l0b_station(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Station arguments
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
# L0B processing options
remove_l0a=remove_l0a,
# Processing options
force=force,
verbose=verbose,
debugging_mode=debugging_mode,
parallel=parallel,
)
# ------------------------------------------------------------------.
# L0C processing
if l0c_processing:
run_l0c_station(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Station arguments
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
# L0C processing options
remove_l0b=remove_l0b,
# Processing options
force=force,
verbose=verbose,
debugging_mode=debugging_mode,
parallel=parallel,
)
# -------------------------------------------------------------------------.
# End of L0 processing for all stations
timedelta_str = str(datetime.timedelta(seconds=round(time.time() - t_i)))
print(f"L0 processing of stations {station_name} completed in {timedelta_str}")
[docs]
def run_l0a_station(
# Station arguments
data_source,
campaign_name,
station_name,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""
Run the L0A processing of a station by invoking the disdrodb_run_l0a_station command in the terminal.
Parameters
----------
data_source : str
The name of the data source.
campaign_name : str
The name of the campaign.
station_name : str
The name of the station.
force : bool, optional
If ``True``, overwrite existing data in destination directories.
The default value is ``False``.
verbose : bool, optional
If ``True``, print detailed processing information to the terminal.
The default value is ``False``.
debugging_mode : bool, optional
If ``True``, reduce the amount of data to process for debugging.
The default value is ``False``.
parallel : bool, optional
If ``True``, process files in multiple processes simultaneously.
The default value is ``True``.
data_archive_dir
The directory path where the local DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
metadata_archive_dir
The directory path where the DISDRODB Metadata Archive is located.
The directory path must end with ``<...>/DISDRODB-METADATA/DISDRODB``.
If ``None``, it uses the ``metadata_archive_dir`` path specified
in the DISDRODB active configuration.
"""
# Define command
cmd = " ".join(
[
"disdrodb_run_l0a_station",
# Station arguments
data_source,
campaign_name,
station_name,
# DISDRODB root directories
"--data_archive_dir",
str(data_archive_dir),
"--metadata_archive_dir",
str(metadata_archive_dir),
# Processing options
"--force",
str(force),
"--verbose",
str(verbose),
"--debugging_mode",
str(debugging_mode),
"--parallel",
str(parallel),
],
)
# Execute command
execute_cmd(cmd)
[docs]
def run_l0b_station(
# Station arguments
data_source,
campaign_name,
station_name,
# L0B processing options
remove_l0a: bool = False,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""
Run the L0B processing of a station by invoking the disdrodb_run_l0b_station command in the terminal.
Parameters
----------
data_archive_dir
The directory path where the local DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
metadata_archive_dir
The directory path where the DISDRODB Metadata Archive is located.
The directory path must end with ``<...>/DISDRODB-METADATA/DISDRODB``.
If ``None``, it uses the ``metadata_archive_dir`` path specified
in the DISDRODB active configuration.
data_source : str
The name of the data source.
campaign_name : str
The name of the campaign.
station_name : str
The name of the station.
remove_l0a : bool, optional
Whether to keep the L0A files after generating L0B netCDF files.
The default value is ``False``.
force : bool, optional
If ``True``, overwrite existing data in destination directories.
The default value is ``False``.
verbose : bool, optional
If ``True``, print detailed processing information to the terminal.
The default value is ``False``.
debugging_mode : bool, optional
If ``True``, reduce the amount of data processed for debugging.
The default value is ``False``.
parallel : bool, optional
If ``True``, process files in multiple processes simultaneously.
The default value is ``True``.
"""
# Define command
cmd = " ".join(
[
"disdrodb_run_l0b_station",
# Station arguments
data_source,
campaign_name,
station_name,
# DISDRODB root directories
"--data_archive_dir",
str(data_archive_dir),
"--metadata_archive_dir",
str(metadata_archive_dir),
# L0B processing options
"--remove_l0a",
str(remove_l0a),
# Processing options
"--force",
str(force),
"--verbose",
str(verbose),
"--debugging_mode",
str(debugging_mode),
"--parallel",
str(parallel),
],
)
# Execute command
execute_cmd(cmd)
[docs]
def run_l0c_station(
# Station arguments
data_source,
campaign_name,
station_name,
# L0C options
remove_l0b: bool = False,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""
Run the L0C processing of a station by invoking the disdrodb_run_l0c_station command in the terminal.
Parameters
----------
data_source : str
The name of the data source.
campaign_name : str
The name of the campaign.
station_name : str
The name of the station.
remove_l0b : bool, optional
Whether to remove the L0B files after generating L0C netCDF files.
The default value is ``False``.
force : bool, optional
If ``True``, overwrite existing data in destination directories.
The default value is ``False``.
verbose : bool, optional
If ``True``, print detailed processing information to the terminal.
The default value is ``False``.
debugging_mode : bool, optional
If ``True``, reduce the amount of data processed for debugging.
The default value is ``False``.
parallel : bool, optional
If ``True``, process files in multiple processes simultaneously.
The default value is ``True``.
data_archive_dir
The directory path where the local DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
metadata_archive_dir
The directory path where the DISDRODB Metadata Archive is located.
The directory path must end with ``<...>/DISDRODB-METADATA/DISDRODB``.
If ``None``, it uses the ``metadata_archive_dir`` path specified
in the DISDRODB active configuration.
"""
# Define command
cmd = " ".join(
[
"disdrodb_run_l0c_station",
# Station arguments
data_source,
campaign_name,
station_name,
# DISDRODB root directories
"--data_archive_dir",
str(data_archive_dir),
"--metadata_archive_dir",
str(metadata_archive_dir),
# L0C processing options
"--remove_l0b",
str(remove_l0b),
# Processing options
"--force",
str(force),
"--verbose",
str(verbose),
"--debugging_mode",
str(debugging_mode),
"--parallel",
str(parallel),
],
)
# Execute command
execute_cmd(cmd)
[docs]
def run_l1_station(
# Station arguments
data_source,
campaign_name,
station_name,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""
Run the L1 processing of a station by invoking the disdrodb_run_l1_station command in the terminal.
Parameters
----------
data_source : str
The name of the data source.
campaign_name : str
The name of the campaign.
station_name : str
The name of the station.
force : bool, optional
If ``True``, overwrite existing data in destination directories.
The default value is ``False``.
verbose : bool, optional
If ``True``, print detailed processing information to the terminal.
The default value is ``False``.
debugging_mode : bool, optional
If ``True``, reduce the amount of data processed for debugging.
The default value is ``False``.
parallel : bool, optional
If ``True``, process files in multiple processes simultaneously.
The default value is ``True``.
data_archive_dir
The directory path where the local DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
metadata_archive_dir
The directory path where the DISDRODB Metadata Archive is located.
The directory path must end with ``<...>/DISDRODB-METADATA/DISDRODB``.
If ``None``, it uses the ``metadata_archive_dir`` path specified
in the DISDRODB active configuration.
"""
# Define command
cmd = " ".join(
[
"disdrodb_run_l1_station",
# Station arguments
data_source,
campaign_name,
station_name,
# DISDRODB root directories
"--data_archive_dir",
str(data_archive_dir),
"--metadata_archive_dir",
str(metadata_archive_dir),
# Processing options
"--force",
str(force),
"--verbose",
str(verbose),
"--debugging_mode",
str(debugging_mode),
"--parallel",
str(parallel),
],
)
# Execute command
execute_cmd(cmd)
[docs]
def run_l2e_station(
# Station arguments
data_source,
campaign_name,
station_name,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""
Run the L2E processing of a station by invoking the disdrodb_run_l2e_station command in the terminal.
Parameters
----------
data_source : str
The name of the data source.
campaign_name : str
The name of the campaign.
station_name : str
The name of the station.
force : bool, optional
If ``True``, overwrite existing data in destination directories.
The default value is ``False``.
verbose : bool, optional
If ``True``, print detailed processing information to the terminal.
The default value is ``False``.
debugging_mode : bool, optional
If ``True``, reduce the amount of data processed for debugging.
The default value is ``False``.
parallel : bool, optional
If ``True``, process files in multiple processes simultaneously.
The default value is ``True``.
data_archive_dir
The directory path where the local DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
metadata_archive_dir
The directory path where the DISDRODB Metadata Archive is located.
The directory path must end with ``<...>/DISDRODB-METADATA/DISDRODB``.
If ``None``, it uses the ``metadata_archive_dir`` path specified
in the DISDRODB active configuration.
"""
# Define command
cmd = " ".join(
[
"disdrodb_run_l2e_station",
# Station arguments
data_source,
campaign_name,
station_name,
# DISDRODB root directories
"--data_archive_dir",
str(data_archive_dir),
"--metadata_archive_dir",
str(metadata_archive_dir),
# Processing options
"--force",
str(force),
"--verbose",
str(verbose),
"--debugging_mode",
str(debugging_mode),
"--parallel",
str(parallel),
],
)
# Execute command
execute_cmd(cmd)
[docs]
def run_l2m_station(
# Station arguments
data_source,
campaign_name,
station_name,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""
Run the L2M processing of a station by invoking the disdrodb_run_l2m_station command in the terminal.
Parameters
----------
data_source : str
The name of the data source.
campaign_name : str
The name of the campaign.
station_name : str
The name of the station.
force : bool, optional
If ``True``, overwrite existing data in destination directories.
The default value is ``False``.
verbose : bool, optional
If ``True``, print detailed processing information to the terminal.
The default value is ``False``.
debugging_mode : bool, optional
If ``True``, reduce the amount of data processed for debugging.
The default value is ``False``.
parallel : bool, optional
If ``True``, process files in multiple processes simultaneously.
The default value is ``True``.
data_archive_dir
The directory path where the local DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
metadata_archive_dir
The directory path where the DISDRODB Metadata Archive is located.
The directory path must end with ``<...>/DISDRODB-METADATA/DISDRODB``.
If ``None``, it uses the ``metadata_archive_dir`` path specified
in the DISDRODB active configuration.
"""
# Define command
cmd = " ".join(
[
"disdrodb_run_l2m_station",
# Station arguments
data_source,
campaign_name,
station_name,
# DISDRODB root directories
"--data_archive_dir",
str(data_archive_dir),
"--metadata_archive_dir",
str(metadata_archive_dir),
# Processing options
"--force",
str(force),
"--verbose",
str(verbose),
"--debugging_mode",
str(debugging_mode),
"--parallel",
str(parallel),
],
)
# Execute command
execute_cmd(cmd)
[docs]
def create_summary_station(
data_source,
campaign_name,
station_name,
parallel=False,
data_archive_dir=None,
):
"""Create summary figures and tables for a DISDRODB station."""
# Define command
cmd = " ".join(
[
"disdrodb_create_summary_station",
# Station arguments
data_source,
campaign_name,
station_name,
"--data_archive_dir",
str(data_archive_dir),
"--parallel",
str(parallel),
],
)
# Execute command
execute_cmd(cmd)
####--------------------------------------------------------------------------.
#### Run DISDRODB Archive Processing
[docs]
def run_l0a(
data_sources=None,
campaign_names=None,
station_names=None,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""Run the L0A processing of DISDRODB stations.
This function allows to launch the processing of many DISDRODB stations with a single command.
From the list of all available DISDRODB stations, it runs the processing of the
stations matching the provided data_sources, campaign_names and station_names.
Parameters
----------
data_sources : list
Name of data source(s) to process.
The name(s) must be UPPER CASE.
If campaign_names and station are not specified, process all stations.
The default value is ``None``.
campaign_names : list
Name of the campaign(s) to process.
The name(s) must be UPPER CASE.
The default value is ``None``.
station_names : list
Station names to process.
The default value is ``None``.
force : bool
If ``True``, overwrite existing data into destination directories.
If ``False``, raise an error if there are already data into destination directories.
The default value is ``False``.
verbose : bool
Whether to print detailed processing information into terminal.
The default value is ``True``.
parallel : bool
If ``True``, the files are processed simultaneously in multiple processes.
By default, the number of process is defined with ``os.cpu_count()``.
If ``False``, the files are processed sequentially in a single process.
debugging_mode : bool
If ``True``, it processes just the first 3 raw data files.
The default value is ``False``.
data_archive_dir : str (optional)
The directory path where the DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
metadata_archive_dir
The directory path where the DISDRODB Metadata Archive is located.
The directory path must end with ``<...>/DISDRODB-METADATA/DISDRODB``.
If ``None``, it uses the ``metadata_archive_dir`` path specified
in the DISDRODB active configuration.
"""
# Define products
product = "L0A"
required_product = get_required_product(product)
# Get list of available stations
list_info = available_stations(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Stations arguments
data_sources=data_sources,
campaign_names=campaign_names,
station_names=station_names,
# Search options
product=required_product,
raise_error_if_empty=True,
)
# Print message
n_stations = len(list_info)
print(f"{product} processing of {n_stations} stations started.")
# Loop over stations
for data_source, campaign_name, station_name in list_info:
print(f"{product} processing of {data_source} {campaign_name} {station_name} station started.")
# Run processing
run_l0a_station(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Station arguments
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
# Process options
force=force,
verbose=verbose,
debugging_mode=debugging_mode,
parallel=parallel,
)
print(f"{product} processing of {data_source} {campaign_name} {station_name} station ended.")
[docs]
def run_l0b(
data_sources=None,
campaign_names=None,
station_names=None,
# L0B processing options
remove_l0a: bool = False,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""Run the L0B processing of DISDRODB stations.
This function allows to launch the processing of many DISDRODB stations with a single command.
From the list of all available DISDRODB L0A stations, it runs the processing of the
stations matching the provided data_sources, campaign_names and station_names.
Parameters
----------
data_sources : list
Name of data source(s) to process.
The name(s) must be UPPER CASE.
If campaign_names and station are not specified, process all stations.
The default value is ``None``.
campaign_names : list
Name of the campaign(s) to process.
The name(s) must be UPPER CASE.
The default value is ``None``.
station_names : list
Station names to process.
The default value is ``None``.
remove_l0a : bool
Whether to keep the L0A files after having generated the L0B netCDF products.
The default value is ``False``.
force : bool
If ``True``, overwrite existing data into destination directories.
If ``False``, raise an error if there are already data into destination directories.
The default value is ``False``.
verbose : bool
Whether to print detailed processing information into terminal.
The default value is ``True``.
parallel : bool
If ``True``, the files are processed simultaneously in multiple processes.
By default, the number of process is defined with ``os.cpu_count()``.
If ``False``, the files are processed sequentially in a single process.
debugging_mode : bool
If ``True``, it reduces the amount of data to process.
For L0B, it processes 100 rows sampled from 3 L0A files.
The default value is ``False``.
data_archive_dir : str (optional)
The directory path where the DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
metadata_archive_dir
The directory path where the DISDRODB Metadata Archive is located.
The directory path must end with ``<...>/DISDRODB-METADATA/DISDRODB``.
If ``None``, it uses the ``metadata_archive_dir`` path specified
in the DISDRODB active configuration.
"""
# Define products
product = "L0B"
required_product = get_required_product(product)
# Get list of available stations
list_info = available_stations(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Stations arguments
data_sources=data_sources,
campaign_names=campaign_names,
station_names=station_names,
# Search options
product=required_product,
raise_error_if_empty=True,
)
# Print message
n_stations = len(list_info)
print(f"{product} processing of {n_stations} stations started.")
# Loop over stations
for data_source, campaign_name, station_name in list_info:
print(f"{product} processing of {data_source} {campaign_name} {station_name} station started.")
# Run processing
run_l0b_station(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Station arguments
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
# L0B options
remove_l0a=remove_l0a,
# Process options
force=force,
verbose=verbose,
debugging_mode=debugging_mode,
parallel=parallel,
)
print(f"{product} processing of {data_source} {campaign_name} {station_name} station ended.")
[docs]
def run_l0c(
data_sources=None,
campaign_names=None,
station_names=None,
# L0C options
remove_l0b: bool = False,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""Run the L0C processing of DISDRODB stations.
This function allows to launch the processing of many DISDRODB stations with a single command.
From the list of all available DISDRODB stations, it runs the processing of the
stations matching the provided data_sources, campaign_names and station_names.
Parameters
----------
data_sources : list
Name of data source(s) to process.
The name(s) must be UPPER CASE.
If campaign_names and station are not specified, process all stations.
The default value is ``None``.
campaign_names : list
Name of the campaign(s) to process.
The name(s) must be UPPER CASE.
The default value is ``None``.
station_names : list
Station names to process.
The default value is ``None``.
remove_l0b : bool
Whether to remove the L0B files after having produced L0C netCDF files.
The default value is ``False``.
force : bool
If ``True``, overwrite existing data into destination directories.
If ``False``, raise an error if there are already data into destination directories.
The default value is ``False``.
verbose : bool
Whether to print detailed processing information into terminal.
The default value is ``False``.
parallel : bool
If ``True``, the files are processed simultaneously in multiple processes.
Each process will use a single thread to avoid issues with the HDF/netCDF library.
By default, the number of process is defined with ``os.cpu_count()``.
If ``False``, the files are processed sequentially in a single process.
If ``False``, multi-threading is automatically exploited to speed up I/0 tasks.
debugging_mode : bool
If ``True``, it reduces the amount of data to process.
For L1B, it processes just 3 L0B files.
The default value is ``False``.
data_archive_dir : str (optional)
The directory path where the DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
metadata_archive_dir
The directory path where the DISDRODB Metadata Archive is located.
The directory path must end with ``<...>/DISDRODB-METADATA/DISDRODB``.
If ``None``, it uses the ``metadata_archive_dir`` path specified
in the DISDRODB active configuration.
"""
# Define products
product = "L0C"
required_product = get_required_product(product)
# Get list of available stations
list_info = available_stations(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Stations arguments
data_sources=data_sources,
campaign_names=campaign_names,
station_names=station_names,
# Search options
product=required_product,
raise_error_if_empty=True,
)
# Print message
n_stations = len(list_info)
print(f"{product} processing of {n_stations} stations started.")
# Loop over stations
for data_source, campaign_name, station_name in list_info:
print(f"{product} processing of {data_source} {campaign_name} {station_name} station started.")
# Run processing
run_l0c_station(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Station arguments
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
# L0C options
remove_l0b=remove_l0b,
# Process options
force=force,
verbose=verbose,
debugging_mode=debugging_mode,
parallel=parallel,
)
print(f"{product} processing of {data_source} {campaign_name} {station_name} station ended.")
[docs]
def run_l0(
data_sources=None,
campaign_names=None,
station_names=None,
# L0 archive options
l0a_processing: bool = True,
l0b_processing: bool = True,
l0c_processing: bool = True,
remove_l0a: bool = False,
remove_l0b: bool = False,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""Run the L0 processing of DISDRODB stations.
This function allows to launch the processing of many DISDRODB stations with a single command.
From the list of all available DISDRODB stations, it runs the processing of the
stations matching the provided data_sources, campaign_names and station_names.
Parameters
----------
data_sources : list
Name of data source(s) to process.
The name(s) must be UPPER CASE.
If campaign_names and station are not specified, process all stations.
The default value is ``None``.
campaign_names : list
Name of the campaign(s) to process.
The name(s) must be UPPER CASE.
The default value is ``None``.
station_names : list
Station names to process.
The default value is ``None``.
l0a_processing : bool
Whether to launch processing to generate L0A Apache Parquet file(s) from raw data.
The default value is ``True``.
l0b_processing : bool
Whether to launch processing to generate L0B netCDF4 file(s) from L0A data.
The default value is ``True``.
l0c_processing : bool
Whether to launch processing to generate L0C netCDF4 file(s) from L0B data.
The default value is ``True``.
remove_l0a : bool
Whether to keep the L0A files after having generated the L0B netCDF products.
The default value is ``False``.
remove_l0b : bool
Whether to remove the L0B files after having produced all L0C netCDF files.
The default value is ``False``.
force : bool
If ``True``, overwrite existing data into destination directories.
If ``False``, raise an error if there are already data into destination directories.
The default value is ``False``.
verbose : bool
Whether to print detailed processing information into terminal.
The default value is ``False``.
parallel : bool
If ``True``, the files are processed simultaneously in multiple processes.
Each process will use a single thread to avoid issues with the HDF/netCDF library.
By default, the number of process is defined with ``os.cpu_count()``.
If ``False``, the files are processed sequentially in a single process.
If ``False``, multi-threading is automatically exploited to speed up I/0 tasks.
debugging_mode : bool
If ``True``, it reduces the amount of data to process.
For L0A, it processes just the first 3 raw data files.
For L0B, it processes 100 rows sampled from 3 L0A files.
The default value is ``False``.
data_archive_dir : str (optional)
The directory path where the DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
metadata_archive_dir
The directory path where the DISDRODB Metadata Archive is located.
The directory path must end with ``<...>/DISDRODB-METADATA/DISDRODB``.
If ``None``, it uses the ``metadata_archive_dir`` path specified
in the DISDRODB active configuration.
"""
# Define starting product
if l0c_processing:
required_product = get_required_product("L0C")
if l0b_processing:
required_product = get_required_product("L0B")
if l0a_processing:
required_product = get_required_product("L0A")
# Get list of available stations
list_info = available_stations(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Stations arguments
data_sources=data_sources,
campaign_names=campaign_names,
station_names=station_names,
# Search options
product=required_product,
raise_error_if_empty=True,
)
# Print message
n_stations = len(list_info)
print(f"L0 processing of {n_stations} stations started.")
# Loop over stations
for data_source, campaign_name, station_name in list_info:
print(f"L0 processing of {data_source} {campaign_name} {station_name} station started.")
# Run processing
run_l0_station(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Station arguments
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
# L0 archive options
l0a_processing=l0a_processing,
l0b_processing=l0b_processing,
l0c_processing=l0c_processing,
remove_l0a=remove_l0a,
remove_l0b=remove_l0b,
# Process options
force=force,
verbose=verbose,
debugging_mode=debugging_mode,
parallel=parallel,
)
print(f"L0 processing of {data_source} {campaign_name} {station_name} station ended.")
[docs]
def run_l1(
data_sources=None,
campaign_names=None,
station_names=None,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""Run the L1 processing of DISDRODB stations.
This function allows to launch the processing of many DISDRODB stations with a single command.
From the list of all available DISDRODB stations, it runs the processing of the
stations matching the provided data_sources, campaign_names and station_names.
Parameters
----------
data_sources : list
Name of data source(s) to process.
The name(s) must be UPPER CASE.
If campaign_names and station are not specified, process all stations.
The default value is ``None``.
campaign_names : list
Name of the campaign(s) to process.
The name(s) must be UPPER CASE.
The default value is ``None``.
station_names : list
Station names to process.
The default value is ``None``.
force : bool
If ``True``, overwrite existing data into destination directories.
If ``False``, raise an error if there are already data into destination directories.
The default value is ``False``.
verbose : bool
Whether to print detailed processing information into terminal.
The default value is ``False``.
parallel : bool
If ``True``, the files are processed simultaneously in multiple processes.
Each process will use a single thread to avoid issues with the HDF/netCDF library.
By default, the number of process is defined with ``os.cpu_count()``.
If ``False``, the files are processed sequentially in a single process.
If ``False``, multi-threading is automatically exploited to speed up I/0 tasks.
debugging_mode : bool
If ``True``, it reduces the amount of data to process.
For L1B, it processes just 3 L0B files.
The default value is ``False``.
data_archive_dir : str (optional)
The directory path where the DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
metadata_archive_dir
The directory path where the DISDRODB Metadata Archive is located.
The directory path must end with ``<...>/DISDRODB-METADATA/DISDRODB``.
If ``None``, it uses the ``metadata_archive_dir`` path specified
in the DISDRODB active configuration.
"""
product = "L1"
required_product = get_required_product(product)
# Get list of available stations
list_info = available_stations(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Stations arguments
product=required_product,
data_sources=data_sources,
campaign_names=campaign_names,
station_names=station_names,
# Search options
available_data=False, # Check for station product directory is present only
raise_error_if_empty=True,
)
# Print message
n_stations = len(list_info)
print(f"{product} processing of {n_stations} stations started.")
# Loop over stations
for data_source, campaign_name, station_name in list_info:
print(f"{product} processing of {data_source} {campaign_name} {station_name} station started.")
# Run processing
run_l1_station(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Station arguments
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
# Process options
force=force,
verbose=verbose,
debugging_mode=debugging_mode,
parallel=parallel,
)
print(f"{product} processing of {data_source} {campaign_name} {station_name} station ended.")
[docs]
def run_l2e(
data_sources=None,
campaign_names=None,
station_names=None,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""Run the L2E processing of DISDRODB stations.
This function allows to launch the processing of many DISDRODB stations with a single command.
From the list of all available DISDRODB stations, it runs the processing of the
stations matching the provided data_sources, campaign_names and station_names.
Parameters
----------
data_sources : list
Name of data source(s) to process.
The name(s) must be UPPER CASE.
If campaign_names and station are not specified, process all stations.
The default value is ``None``.
campaign_names : list
Name of the campaign(s) to process.
The name(s) must be UPPER CASE.
The default value is ``None``.
station_names : list
Station names to process.
The default value is ``None``.
force : bool
If ``True``, overwrite existing data into destination directories.
If ``False``, raise an error if there are already data into destination directories.
The default value is ``False``.
verbose : bool
Whether to print detailed processing information into terminal.
The default value is ``False``.
parallel : bool
If ``True``, the files are processed simultaneously in multiple processes.
Each process will use a single thread to avoid issues with the HDF/netCDF library.
By default, the number of process is defined with ``os.cpu_count()``.
If ``False``, the files are processed sequentially in a single process.
If ``False``, multi-threading is automatically exploited to speed up I/0 tasks.
debugging_mode : bool
If ``True``, it reduces the amount of data to process.
For L2E, it processes just 3 L1 files.
The default value is ``False``.
data_archive_dir : str (optional)
The directory path where the DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
metadata_archive_dir
The directory path where the DISDRODB Metadata Archive is located.
The directory path must end with ``<...>/DISDRODB-METADATA/DISDRODB``.
If ``None``, it uses the ``metadata_archive_dir`` path specified
in the DISDRODB active configuration.
"""
product = "L2E"
required_product = get_required_product(product)
# Get list of available stations
list_info = available_stations(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Stations arguments
data_sources=data_sources,
campaign_names=campaign_names,
station_names=station_names,
# Search options
product=required_product,
raise_error_if_empty=True,
)
# Print message
n_stations = len(list_info)
print(f"{product} processing of {n_stations} stations started.")
# Loop over stations
for data_source, campaign_name, station_name in list_info:
print(f"{product} processing of {data_source} {campaign_name} {station_name} station started.")
# Run processing
run_l2e_station(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Station arguments
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
# Process options
force=force,
verbose=verbose,
debugging_mode=debugging_mode,
parallel=parallel,
)
print(f"{product} processing of {data_source} {campaign_name} {station_name} station ended.")
[docs]
def run_l2m(
data_sources=None,
campaign_names=None,
station_names=None,
# Processing options
force: bool = False,
verbose: bool = False,
debugging_mode: bool = False,
parallel: bool = True,
# DISDRODB root directories
data_archive_dir: Optional[str] = None,
metadata_archive_dir: Optional[str] = None,
):
"""Run the L2M processing of DISDRODB stations.
This function allows to launch the processing of many DISDRODB stations with a single command.
From the list of all available DISDRODB stations, it runs the processing of the
stations matching the provided data_sources, campaign_names and station_names.
Parameters
----------
data_sources : list
Name of data source(s) to process.
The name(s) must be UPPER CASE.
If campaign_names and station are not specified, process all stations.
The default value is ``None``.
campaign_names : list
Name of the campaign(s) to process.
The name(s) must be UPPER CASE.
The default value is ``None``.
station_names : list
Station names to process.
The default value is ``None``.
force : bool
If ``True``, overwrite existing data into destination directories.
If ``False``, raise an error if there are already data into destination directories.
The default value is ``False``.
verbose : bool
Whether to print detailed processing information into terminal.
The default value is ``False``.
parallel : bool
If ``True``, the files are processed simultaneously in multiple processes.
Each process will use a single thread to avoid issues with the HDF/netCDF library.
By default, the number of process is defined with ``os.cpu_count()``.
If ``False``, the files are processed sequentially in a single process.
If ``False``, multi-threading is automatically exploited to speed up I/0 tasks.
debugging_mode : bool
If ``True``, it reduces the amount of data to process.
For L2MB, it processes just 3 L0B files.
The default value is ``False``.
data_archive_dir : str (optional)
The directory path where the DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
metadata_archive_dir
The directory path where the DISDRODB Metadata Archive is located.
The directory path must end with ``<...>/DISDRODB-METADATA/DISDRODB``.
If ``None``, it uses the ``metadata_archive_dir`` path specified
in the DISDRODB active configuration.
"""
product = "L2M"
required_product = get_required_product(product)
# Get list of available stations
list_info = available_stations(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Stations arguments
data_sources=data_sources,
campaign_names=campaign_names,
station_names=station_names,
# Search options
product=required_product,
raise_error_if_empty=True,
)
# Print message
n_stations = len(list_info)
print(f"{product} processing of {n_stations} stations started.")
# Loop over stations
for data_source, campaign_name, station_name in list_info:
print(f"{product} processing of {data_source} {campaign_name} {station_name} station started.")
# Run processing
run_l2m_station(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Station arguments
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
# Process options
force=force,
verbose=verbose,
debugging_mode=debugging_mode,
parallel=parallel,
)
print(f"{product} processing of {data_source} {campaign_name} {station_name} station ended.")
[docs]
def create_summary(
data_sources=None,
campaign_names=None,
station_names=None,
parallel=False,
data_archive_dir=None,
metadata_archive_dir=None,
):
"""Create summary figures and tables for a set of DISDRODB station.
Parameters
----------
data_sources : list
Name of data source(s) to process.
The name(s) must be UPPER CASE.
If campaign_names and station are not specified, process all stations.
The default value is ``None``.
campaign_names : list
Name of the campaign(s) to process.
The name(s) must be UPPER CASE.
The default value is ``None``.
station_names : list
Station names to process.
The default value is ``None``.
data_archive_dir : str (optional)
The directory path where the DISDRODB Data Archive is located.
The directory path must end with ``<...>/DISDRODB``.
If ``None``, it uses the ``data_archive_dir`` path specified
in the DISDRODB active configuration.
"""
# Get list of available stations
list_info = available_stations(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
metadata_archive_dir=metadata_archive_dir,
# Stations arguments
data_sources=data_sources,
campaign_names=campaign_names,
station_names=station_names,
# Search options
product="L2E",
product_kwargs={"rolling": False, "sample_interval": 60},
raise_error_if_empty=True,
)
# Loop over stations
print(f"Creation of summaries for {len(list_info)} stations has started.")
for data_source, campaign_name, station_name in list_info:
# Run processing
create_summary_station(
# DISDRODB root directories
data_archive_dir=data_archive_dir,
# Station arguments
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
# Processing option
parallel=parallel,
)
print("Creation of station summaries has terminated.")
####--------------------------------------------------------------------------.