Computing 3D PDF from State matrix and tag pressure information

Contents

Computing 3D PDF from State matrix and tag pressure information#

2. Compute pdf_depth#

In this step, we compute the PDF only in depth for each time step used with the model. First, we load the reference model to choose the time bins used for creating the depth PDF (we can use the state matrix for that too).

Note: Here, the maximum depth is fixed at 42, and the interval is set at 2. This can be made ‘automatic’ or fixed according to the depth levels of our future climate data.

# Drop tag data outside the tagged events interval.
from pangeo_fish.cf import bounds_to_bins
from pangeo_fish.tags import reshape_by_bins, to_time_slice

time_slice = to_time_slice(tag["tagging_events/time"])
tag_log = tag["dst"].ds.sel(time=time_slice)

%%time
# Save probability distribution, state matrix.
states = xr.open_zarr(f"{target_root}/states.zarr")
states

CPU times: user 1.82 s, sys: 181 ms, total: 2 s
Wall time: 2.69 s

<xarray.Dataset> Size: 405MB
Dimensions:     (y: 294, x: 659, time: 258)
Coordinates:
    cell_ids    (y, x) int64 2MB dask.array<chunksize=(294, 659), meta=np.ndarray>
    latitude    (y, x) float64 2MB dask.array<chunksize=(294, 659), meta=np.ndarray>
    longitude   (y, x) float64 2MB dask.array<chunksize=(294, 659), meta=np.ndarray>
    resolution  float64 8B ...
  * time        (time) datetime64[ns] 2kB 2022-06-13T12:00:00 ... 2022-06-24T...
Dimensions without coordinates: y, x
Data variables:
    states      (time, y, x) float64 400MB dask.array<chunksize=(24, 294, 659), meta=np.ndarray>

%%time
# Reshape the tag log so that it bins to the time step of the reference model.
reshaped_tag = reshape_by_bins(
    tag_log,
    dim="time",
    bins=(
        states.cf.add_bounds(["time"], output_dim="bounds")
        .pipe(bounds_to_bins, bounds_dim="bounds")
        .get("time_bins")
    ),
    bin_dim="bincount",
    other_dim="obs",
).chunk({"time": chunk_time})
reshaped_tag

CPU times: user 449 ms, sys: 12.9 ms, total: 462 ms
Wall time: 455 ms

<xarray.Dataset> Size: 168kB
Dimensions:      (obs: 40, time: 258)
Coordinates:
  * obs          (obs) int64 320B 0 1 2 3 4 5 6 7 8 ... 32 33 34 35 36 37 38 39
  * time         (time) datetime64[ns] 2kB 2022-06-13T12:00:00 ... 2022-06-24...
    resolution   float64 8B ...
Data variables:
    temperature  (time, obs) float64 83kB dask.array<chunksize=(24, 40), meta=np.ndarray>
    pressure     (time, obs) float64 83kB dask.array<chunksize=(24, 40), meta=np.ndarray>

import numpy as np

maxdepth = 42
interval = 2
bins = np.arange(0, maxdepth, interval)


def compute_pdf(data, bins=np.arange(0, maxdepth, interval)):
    data = data[~np.isnan(data)]

    # Remove NaN values.
    hist, bin_edges = np.histogram(data, bins=bins, density=True)

    # Calculate the histogram.
    bin_width = bin_edges[1] - bin_edges[0]
    pdf = hist * bin_width
    return pdf  # , bin_edges


def compute_pdf_bins(bins=bins):
    # Normalize the histogram to ensure the sum of the PDF is 1.
    data = np.full(
        reshaped_tag.obs.size, 1.0
    )  # here i make fake dataset just to compute the bins and center of bins

    hist, bin_edges = np.histogram(data, bins=bins, density=True)
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
    return bin_edges, bin_centers


bin_edges, bin_centers = compute_pdf_bins()

depth_pdf = (
    xr.apply_ufunc(
        compute_pdf,  # the function
        reshaped_tag["pressure"],
        input_core_dims=[["obs"]],
        output_core_dims=[["depth"]],
        exclude_dims=set(("obs",)),
        vectorize=True,
        dask="parallelized",
        output_dtypes=[reshaped_tag.pressure.dtype],
        dask_gufunc_kwargs={
            "output_sizes": {
                "depth": bin_centers.size,
            }
        },
    )
    .assign_attrs({"long_name": "depth_pdf"})
    .to_dataset(name="depth_pdf")
    .assign_coords(depth=bin_centers)
)

depth_pdf
depth_pdf.depth_pdf.plot(x="time", y="depth")

<matplotlib.collections.QuadMesh at 0x7f5450e67550>

_images/48c27f0140aaa36aa8b8f6638ae6e6dc035471d4957720b2ace7654ce1f431c3.png

depth_pdf

<xarray.Dataset> Size: 44kB
Dimensions:     (time: 258, depth: 20)
Coordinates:
  * time        (time) datetime64[ns] 2kB 2022-06-13T12:00:00 ... 2022-06-24T...
    resolution  float64 8B ...
  * depth       (depth) float64 160B 1.0 3.0 5.0 7.0 9.0 ... 33.0 35.0 37.0 39.0
Data variables:
    depth_pdf   (time, depth) float64 41kB dask.array<chunksize=(24, 20), meta=np.ndarray>

%%time
# Remove NaN values.
depth_pdf.compute().to_zarr(
    f"{target_root}/depth_pdf.zarr",
    mode="w",
    consolidated=True,
    storage_options=storage_options,
)

3. Compute 3D PDF#

Multiply the 2D map with the depth PDF to obtain the 3D PDF. Plot and veirify the result.

ds = states.states.chunk(chunks={"time": 24 * 2}) * depth_pdf.depth_pdf.chunk(
    chunks={"time": 24 * 2}
)
ds = ds.persist().assign_attrs({"long_name": "3D_pdf"}).to_dataset(name="3d_pdf")
ds

<xarray.Dataset> Size: 8GB
Dimensions:     (y: 294, x: 659, time: 258, depth: 20)
Coordinates:
    cell_ids    (y, x) int64 2MB dask.array<chunksize=(294, 659), meta=np.ndarray>
    latitude    (y, x) float64 2MB dask.array<chunksize=(294, 659), meta=np.ndarray>
    longitude   (y, x) float64 2MB dask.array<chunksize=(294, 659), meta=np.ndarray>
    resolution  float64 8B 0.0002498
  * time        (time) datetime64[ns] 2kB 2022-06-13T12:00:00 ... 2022-06-24T...
  * depth       (depth) float64 160B 1.0 3.0 5.0 7.0 9.0 ... 33.0 35.0 37.0 39.0
Dimensions without coordinates: y, x
Data variables:
    3d_pdf      (time, y, x, depth) float64 8GB dask.array<chunksize=(48, 294, 659, 20), meta=np.ndarray>

%%time
# Calculate the histogram.
(
    ds.to_zarr(
        f"{target_root}/three_d_pdf.zarr",
        mode="w",
        consolidated=True,
        storage_options=storage_options,
    )
)

ds.hvplot.quadmesh(x="longitude", y="latitude", groupby=["time", "depth"])

ds["3d_pdf"].sum(dim=["x", "y"]).plot(x="time", y="depth")

<matplotlib.collections.QuadMesh at 0x7f54421d4890>

_images/8393216b601333ccf7a1914cdc8b81e303d24887f7298855a686ba48b642caa2.png