Launching the scientific plotting notebooks as jobs#
Let’s launch the previous notebook, that computes an example of scientific plot for result inspection and analysis, on the three tags that were run in the previous kbatch-papermill
tutorial.
import os
import re
import s3fs
from pathlib import Path
from tqdm.notebook import tqdm
from kbatch_papermill import kbatch_papermill
First, clone the GFTS’s repository to have the notebook we want to run:
# in a new terminal
git clone https://github.com/destination-earth/DestinE_ESA_GFTS gfts
# input variables
code_dir = Path.home() / "gfts/docs"
notebook = "workflow/compute.ipynb"
s3_dest = "s3://gfts-ifremer/kbatch_papermill/" # we expect the results to be there
user_name = os.getenv("JUPYTERHUB_USER")
storage_options = {
"anon": False,
"client_kwargs": {
"endpoint_url": "https://s3.gra.perf.cloud.ovh.net",
"region_name": "gra",
},
}
s3_dest += user_name
# the notebooks will be stored there (feel free to change it)
s3_nb_dest = f"{s3_dest}/nbs"
print("Remote storage root:", s3_dest)
print("The notebooks will be saved in:", s3_nb_dest)
# input parameters for the notebook
parameters = {
# remote accessor configuration
"storage_options": storage_options,
# path to where the biologging data has been formatted
"tag_root": "https://data-taos.ifremer.fr/data_tmp/cleaned/tag/",
# path the results
"result_root": s3_dest,
}
tag_list = ["A19124", "A18831", "A18832"]
job_dict = {}
for tag_name in tqdm(tag_list, desc="Processing tags"):
try:
safe_tag_name = re.sub(r"[^a-z0-9-]", "", tag_name.lower())
# parameters (with `tag_name`)
params = parameters | {"tag_name": tag_name}
s3_nb_path = f"{s3_nb_dest}/{tag_name}2.ipynb"
print(code_dir, notebook, s3_nb_path)
job_id = kbatch_papermill(
# input info
code_dir=code_dir,
notebook=notebook,
# output info
s3_dest=s3_nb_path,
parameters=params,
# additional parameters (not explained here)
job_name=f"html-{safe_tag_name}", # name of the job (here, w.r.t the name of the tag)
s3_code_dir=f"gfts-ifremer/kbatch/{user_name}", # where to zip and dump the code for the container
profile_name="default", # specification of the container's hardware
)
print(
f'Notebook for the tag "{tag_name}" has been launched as the job "{job_id}"!'
)
# we keep the remote paths of the launched jobs
job_dict[job_id] = s3_nb_path
except Exception as e:
print(f"Error for {tag_name}: {e.__class__.__name__}: {e}")
raise
Once the jobs are finished (and assuming they succeeded), a plot for the scientific validation has been saved as a HTML file ts_track_plot.html
in each tag folder under result_root
:
s3fs.S3FileSystem(**storage_options).ls(f"{s3_dest}/{tag_list[0]}/")