Data Reduction: Individual Tags#
In the cells below, remember to check the definitions of the constant values (and update them if needed!).
0. Initialization#
!pip install xdggs
!pip install xarray --upgrade
!pip install --upgrade "cf_xarray>=0.10.4"
# import libraries
import os
import json
import tqdm
import sys
from pathlib import Path
SOURCE_BUCKET = "gfts-ifremer"
TARGET_BUCKET = "destine-gfts-visualisation-data"
TAG_ROOT = "https://data-taos.ifremer.fr/data_tmp/cleaned/tag/"
TAG_ROOT_STORAGE_OPTIONS = {}
SOURCE_PREFIX = f"kbatch_papermill/{os.getenv("JUPYTERHUB_USER")}/"
SOURCE_SUFFIX = ""
TARGET_PREFIX = "taos_pollock/"
# set the constant values as environment variables
os.environ["SOURCE_BUCKET"] = SOURCE_BUCKET
os.environ["TARGET_BUCKET"] = TARGET_BUCKET
os.environ["TAG_ROOT"] = TAG_ROOT
os.environ["TAG_ROOT_STORAGE_OPTIONS"] = json.dumps(TAG_ROOT_STORAGE_OPTIONS)
os.environ["SOURCE_PREFIX"] = SOURCE_PREFIX
os.environ["SOURCE_SUFFIX"] = SOURCE_SUFFIX
os.environ["TARGET_PREFIX"] = TARGET_PREFIX
# add the patch to `regroup.py`
path_to_local_gfts = "gfts"
sys.path.append(Path().home() / path_to_local_gfts / "scripts")
from simplify import list_tags, process_tag # noqa: E402
1. Execution#
tag_list = list_tags()
tag_list
# possibly, filter the tags to only select some of them
# ...
tag_list = tag_list[:-1]
tag_list
for tag_name in tqdm.tqdm(tag_list):
process_tag(tag_name)
Optionally, you can inspect the results:
import s3fs # noqa: E402
storage_options = {
"anon": False,
"profile": "gfts",
"client_kwargs": {
"endpoint_url": "https://s3.gra.perf.cloud.ovh.net/",
"region_name": "gra",
},
}
s3 = s3fs.S3FileSystem(**storage_options)
s3.ls(f"{TARGET_BUCKET}/{TARGET_PREFIX}")