Launching the scientific plotting notebooks as jobs
Let’s launch the previous notebook, that computes an example of scientific plot for result inspection and analysis, on the three tags that were run in the previous kbatch-papermill
tutorial.
import os
import re
import s3fs
from pathlib import Path
from tqdm.notebook import tqdm
from kbatch_papermill import kbatch_papermill
First, clone the GFTS’s repository to have the notebook we want to run:
# in a new terminal
git clone https://github.com/destination-earth/DestinE_ESA_GFTS gfts
# input variables
code_dir = Path.home() / "gfts/docs"
notebook = "workflow/compute.ipynb"
s3_dest = "s3://gfts-ifremer/kbatch_papermill/" # we expect the results to be there
user_name = os.getenv("JUPYTERHUB_USER")
storage_options = {
"anon": False,
"client_kwargs": {
"endpoint_url": "https://s3.gra.perf.cloud.ovh.net",
"region_name": "gra",
},
}
s3_dest += user_name
# the notebooks will be stored there (feel free to change it)
s3_nb_dest = f"{s3_dest}/nbs"
print("Remote storage root:", s3_dest)
print("The notebooks will be saved in:", s3_nb_dest)
# input parameters for the notebook
parameters = {
# remote accessor configuration
"storage_options": storage_options,
# path to where the biologging data has been formatted
"tag_root": "https://data-taos.ifremer.fr/data_tmp/cleaned/tag/",
# path the results
"result_root": s3_dest,
}
tag_list = ["A19124", "A18831", "A18832"]
job_dict = {}
for tag_name in tqdm(tag_list, desc="Processing tags"):
try:
safe_tag_name = re.sub(r"[^a-z0-9-]", "", tag_name.lower())
# parameters (with `tag_name`)
params = parameters | {"tag_name": tag_name}
s3_nb_path = f"{s3_nb_dest}/{tag_name}2.ipynb"
print(code_dir, notebook, s3_nb_path)
job_id = kbatch_papermill(
# input info
code_dir=code_dir,
notebook=notebook,
# output info
s3_dest=s3_nb_path,
parameters=params,
# additional parameters (not explained here)
job_name=f"html-{safe_tag_name}", # name of the job (here, w.r.t the name of the tag)
s3_code_dir=f"gfts-ifremer/kbatch/{user_name}", # where to zip and dump the code for the container
profile_name="default", # specification of the container's hardware
)
print(
f'Notebook for the tag "{tag_name}" has been launched as the job "{job_id}"!'
)
# we keep the remote paths of the launched jobs
job_dict[job_id] = s3_nb_path
except Exception as e:
print(f"Error for {tag_name}: {e.__class__.__name__}: {e}")
raise
Once the jobs are finished (and assuming they succeeded), a plot for the scientific validation has been saved as a HTML file ts_track_plot.html
in each tag folder under result_root
:
s3fs.S3FileSystem(**storage_options).ls(f"{s3_dest}/{tag_list[0]}/")