Read parquet kerchunk catalog
import fsspec
import xarray as xr
%%time
# references are on an OSN pod (no credentials needed)
url = "s3://gfts-reference-data/CMEMS_v6r1_NWS_PHY_NRT_NL_01hav_AN_2D_combined.parq/"
target_opts = {"anon": False}
# netcdf files are on the AWS public dataset program (no credentials needed)
remote_opts = {"anon": False}
fs = fsspec.filesystem(
"reference",
fo=url,
remote_protocol="s3",
remote_options=remote_opts,
target_options=target_opts,
)
m = fs.get_mapper("")
CPU times: user 286 ms, sys: 30.5 ms, total: 316 ms
Wall time: 421 ms
%%time
ds = xr.open_dataset(
m, engine="zarr", chunks={}, backend_kwargs={"consolidated": False}
)
CPU times: user 2.1 s, sys: 191 ms, total: 2.29 s
Wall time: 2.63 s
ds
<xarray.Dataset> Size: 521GB
Dimensions: (latitude: 551, longitude: 936, time: 18048)
Coordinates:
* latitude (latitude) float32 2kB 46.0 46.03 46.06 ... 61.23 61.25 61.28
* longitude (longitude) float32 4kB -16.0 -15.97 -15.94 ... 9.921 9.949 9.977
* time (time) datetime64[ns] 144kB 2022-04-02T00:30:00 ... 2024-04-22...
Data variables:
mlotst (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>
thetao (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>
ubar (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>
uo (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>
vbar (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>
vo (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>
zos (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>
Attributes: (12/13)
Conventions: CF-1.8
comment:
contact: https://marine.copernicus.eu/contact
domain_name: NWS36
field_date: 20220402
field_type: mean
... ...
forecast_type: analysis
institution: Nologin (Spain)
licence: https://marine.copernicus.eu/user-corner/service-commitm...
references: http://marine.copernicus.eu/
source: NEMO3.6
title: Ocean surface hourly mean fields for the North West Shel...ds["thetao"]
<xarray.DataArray 'thetao' (time: 18048, latitude: 551, longitude: 936)> Size: 74GB
dask.array<open_dataset-thetao, shape=(18048, 551, 936), dtype=float64, chunksize=(1, 551, 936), chunktype=numpy.ndarray>
Coordinates:
* latitude (latitude) float32 2kB 46.0 46.03 46.06 ... 61.23 61.25 61.28
* longitude (longitude) float32 4kB -16.0 -15.97 -15.94 ... 9.921 9.949 9.977
* time (time) datetime64[ns] 144kB 2022-04-02T00:30:00 ... 2024-04-22...
Attributes: (12/14)
easting: longitude
latitude_max: 61.2819f
latitude_min: 46.0036f
long_name: Temperature
longitude_max: 9.977f
longitude_min: -15.996f
... ...
unit_long: degrees_C
units: degrees_C
valid_max: 22000
valid_min: -12000
z_max: 0.494025f
z_min: 0.494025fda = ds["thetao"].sel(time="2022-04-02 00:00", method="nearest").load()
da
<xarray.DataArray 'thetao' (latitude: 551, longitude: 936)> Size: 4MB
array([[13.31200016, 13.32400016, 13.33900016, ..., nan,
nan, nan],
[13.32600016, 13.34100016, 13.34700016, ..., nan,
nan, nan],
[13.34300016, 13.35300016, 13.34600016, ..., nan,
nan, nan],
...,
[ 8.45599993, 8.49499993, 8.53999993, ..., nan,
nan, nan],
[ 8.44899993, 8.47899993, 8.53999993, ..., nan,
nan, nan],
[ 8.45999993, 8.47099993, 8.53399993, ..., nan,
nan, nan]])
Coordinates:
* latitude (latitude) float32 2kB 46.0 46.03 46.06 ... 61.23 61.25 61.28
* longitude (longitude) float32 4kB -16.0 -15.97 -15.94 ... 9.921 9.949 9.977
time datetime64[ns] 8B 2022-04-02T00:30:00
Attributes: (12/14)
easting: longitude
latitude_max: 61.2819f
latitude_min: 46.0036f
long_name: Temperature
longitude_max: 9.977f
longitude_min: -15.996f
... ...
unit_long: degrees_C
units: degrees_C
valid_max: 22000
valid_min: -12000
z_max: 0.494025f
z_min: 0.494025fda.hvplot.quadmesh(x="longitude", y="latitude", rasterize=True, data_aspect=1)
Read 3D data
%%time
# references are on an OSN pod (no credentials needed)
url = "s3://gfts-reference-data/CMEMS_v6r1_NWS_PHY_NRT_NL_3D_combined.parq/"
target_opts = {"anon": False}
# netcdf files are on the AWS public dataset program (no credentials needed)
remote_opts = {"anon": False}
fs3D = fsspec.filesystem(
"reference",
fo=url,
remote_protocol="s3",
remote_options=remote_opts,
target_options=target_opts,
)
m3D = fs3D.get_mapper("")
CPU times: user 20.6 ms, sys: 1.34 ms, total: 21.9 ms
Wall time: 67.7 ms
%%time
ds3D = xr.open_dataset(
m3D, engine="zarr", chunks={}, backend_kwargs={"consolidated": False}
)
CPU times: user 102 ms, sys: 620 µs, total: 102 ms
Wall time: 413 ms
ds3D
<xarray.Dataset> Size: 6TB
Dimensions: (depth: 50, latitude: 551, longitude: 936, time: 7104)
Coordinates:
* depth (depth) float32 200B 0.494 1.541 2.646 ... 5.275e+03 5.728e+03
* latitude (latitude) float32 2kB 46.0 46.03 46.06 ... 61.23 61.25 61.28
* longitude (longitude) float32 4kB -16.0 -15.97 -15.94 ... 9.921 9.949 9.977
* time (time) datetime64[ns] 57kB 2023-07-02T00:30:00 ... 2024-04-22T...
Data variables:
so (time, depth, latitude, longitude) float64 1TB dask.array<chunksize=(1, 1, 551, 936), meta=np.ndarray>
thetao (time, depth, latitude, longitude) float64 1TB dask.array<chunksize=(1, 1, 551, 936), meta=np.ndarray>
uo (time, depth, latitude, longitude) float64 1TB dask.array<chunksize=(1, 1, 551, 936), meta=np.ndarray>
vo (time, depth, latitude, longitude) float64 1TB dask.array<chunksize=(1, 1, 551, 936), meta=np.ndarray>
Attributes: (12/13)
Conventions: CF-1.8
comment:
contact: https://marine.copernicus.eu/contact
domain_name: NWS36
field_date: 20230702
field_type: mean
... ...
forecast_type: hindcast
institution: Nologin (Spain)
licence: https://marine.copernicus.eu/user-corner/service-commitm...
references: http://marine.copernicus.eu/
source: NEMO3.6
title: Ocean 3D hourly mean fields for the North West Shelf (NW...da3D = (
ds3D["thetao"].isel(depth=0).sel(time="2022-04-02 00:00", method="nearest").load()
)
da3D.hvplot.quadmesh(x="longitude", y="latitude", rasterize=True, data_aspect=1)