Launching the scientific plotting notebooks as jobs

Launching the scientific plotting notebooks as jobs#

Let’s launch the previous notebook, that computes an example of scientific plot for result inspection and analysis, on the three tags that were run in the previous kbatch-papermill tutorial.

import os
import re
import s3fs

from pathlib import Path
from tqdm.notebook import tqdm

from kbatch_papermill import kbatch_papermill

First, clone the GFTS’s repository to have the notebook we want to run:

# in a new terminal
git clone https://github.com/destination-earth/DestinE_ESA_GFTS gfts
# input variables
code_dir = Path.home() / "gfts/docs"
notebook = "workflow/compute.ipynb"
s3_dest = "s3://gfts-ifremer/kbatch_papermill/"  # we expect the results to be there
user_name = os.getenv("JUPYTERHUB_USER")
storage_options = {
    "anon": False,
    "client_kwargs": {
        "endpoint_url": "https://s3.gra.perf.cloud.ovh.net",
        "region_name": "gra",
    },
}
s3_dest += user_name
# the notebooks will be stored there (feel free to change it)
s3_nb_dest = f"{s3_dest}/nbs"
print("Remote storage root:", s3_dest)
print("The notebooks will be saved in:", s3_nb_dest)
# input parameters for the notebook
parameters = {
    # remote accessor configuration
    "storage_options": storage_options,
    # path to where the biologging data has been formatted
    "tag_root": "https://data-taos.ifremer.fr/data_tmp/cleaned/tag/",
    # path the results
    "result_root": s3_dest,
}
tag_list = ["A19124", "A18831", "A18832"]
job_dict = {}
for tag_name in tqdm(tag_list, desc="Processing tags"):
    try:
        safe_tag_name = re.sub(r"[^a-z0-9-]", "", tag_name.lower())
        # parameters (with `tag_name`)
        params = parameters | {"tag_name": tag_name}
        s3_nb_path = f"{s3_nb_dest}/{tag_name}2.ipynb"

        print(code_dir, notebook, s3_nb_path)

        job_id = kbatch_papermill(
            # input info
            code_dir=code_dir,
            notebook=notebook,
            # output info
            s3_dest=s3_nb_path,
            parameters=params,
            # additional parameters (not explained here)
            job_name=f"html-{safe_tag_name}",  # name of the job (here, w.r.t the name of the tag)
            s3_code_dir=f"gfts-ifremer/kbatch/{user_name}",  # where to zip and dump the code for the container
            profile_name="default",  # specification of the container's hardware
        )
        print(
            f'Notebook for the tag "{tag_name}" has been launched as the job "{job_id}"!'
        )

        # we keep the remote paths of the launched jobs
        job_dict[job_id] = s3_nb_path
    except Exception as e:
        print(f"Error for {tag_name}: {e.__class__.__name__}: {e}")
        raise

Once the jobs are finished (and assuming they succeeded), a plot for the scientific validation has been saved as a HTML file ts_track_plot.html in each tag folder under result_root:

s3fs.S3FileSystem(**storage_options).ls(f"{s3_dest}/{tag_list[0]}/")