Read parquet kerchunk catalog#
import fsspec
import xarray as xr
%%time
# references are on an OSN pod (no credentials needed)
url = "s3://gfts-reference-data/CMEMS_v6r1_NWS_PHY_NRT_NL_01hav_AN_2D_combined.parq/"
target_opts = {"anon": False}
# netcdf files are on the AWS public dataset program (no credentials needed)
remote_opts = {"anon": False}
fs = fsspec.filesystem(
"reference",
fo=url,
remote_protocol="s3",
remote_options=remote_opts,
target_options=target_opts,
)
m = fs.get_mapper("")
CPU times: user 286 ms, sys: 30.5 ms, total: 316 ms
Wall time: 421 ms
%%time
ds = xr.open_dataset(
m, engine="zarr", chunks={}, backend_kwargs={"consolidated": False}
)
CPU times: user 2.1 s, sys: 191 ms, total: 2.29 s
Wall time: 2.63 s
ds
<xarray.Dataset> Size: 521GB Dimensions: (latitude: 551, longitude: 936, time: 18048) Coordinates: * latitude (latitude) float32 2kB 46.0 46.03 46.06 ... 61.23 61.25 61.28 * longitude (longitude) float32 4kB -16.0 -15.97 -15.94 ... 9.921 9.949 9.977 * time (time) datetime64[ns] 144kB 2022-04-02T00:30:00 ... 2024-04-22... Data variables: mlotst (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray> thetao (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray> ubar (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray> uo (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray> vbar (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray> vo (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray> zos (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray> Attributes: (12/13) Conventions: CF-1.8 comment: contact: https://marine.copernicus.eu/contact domain_name: NWS36 field_date: 20220402 field_type: mean ... ... forecast_type: analysis institution: Nologin (Spain) licence: https://marine.copernicus.eu/user-corner/service-commitm... references: http://marine.copernicus.eu/ source: NEMO3.6 title: Ocean surface hourly mean fields for the North West Shel...
ds["thetao"]
<xarray.DataArray 'thetao' (time: 18048, latitude: 551, longitude: 936)> Size: 74GB dask.array<open_dataset-thetao, shape=(18048, 551, 936), dtype=float64, chunksize=(1, 551, 936), chunktype=numpy.ndarray> Coordinates: * latitude (latitude) float32 2kB 46.0 46.03 46.06 ... 61.23 61.25 61.28 * longitude (longitude) float32 4kB -16.0 -15.97 -15.94 ... 9.921 9.949 9.977 * time (time) datetime64[ns] 144kB 2022-04-02T00:30:00 ... 2024-04-22... Attributes: (12/14) easting: longitude latitude_max: 61.2819f latitude_min: 46.0036f long_name: Temperature longitude_max: 9.977f longitude_min: -15.996f ... ... unit_long: degrees_C units: degrees_C valid_max: 22000 valid_min: -12000 z_max: 0.494025f z_min: 0.494025f
da = ds["thetao"].sel(time="2022-04-02 00:00", method="nearest").load()
da
<xarray.DataArray 'thetao' (latitude: 551, longitude: 936)> Size: 4MB array([[13.31200016, 13.32400016, 13.33900016, ..., nan, nan, nan], [13.32600016, 13.34100016, 13.34700016, ..., nan, nan, nan], [13.34300016, 13.35300016, 13.34600016, ..., nan, nan, nan], ..., [ 8.45599993, 8.49499993, 8.53999993, ..., nan, nan, nan], [ 8.44899993, 8.47899993, 8.53999993, ..., nan, nan, nan], [ 8.45999993, 8.47099993, 8.53399993, ..., nan, nan, nan]]) Coordinates: * latitude (latitude) float32 2kB 46.0 46.03 46.06 ... 61.23 61.25 61.28 * longitude (longitude) float32 4kB -16.0 -15.97 -15.94 ... 9.921 9.949 9.977 time datetime64[ns] 8B 2022-04-02T00:30:00 Attributes: (12/14) easting: longitude latitude_max: 61.2819f latitude_min: 46.0036f long_name: Temperature longitude_max: 9.977f longitude_min: -15.996f ... ... unit_long: degrees_C units: degrees_C valid_max: 22000 valid_min: -12000 z_max: 0.494025f z_min: 0.494025f
da.hvplot.quadmesh(x="longitude", y="latitude", rasterize=True, data_aspect=1)
Read 3D data#
%%time
# references are on an OSN pod (no credentials needed)
url = "s3://gfts-reference-data/CMEMS_v6r1_NWS_PHY_NRT_NL_3D_combined.parq/"
target_opts = {"anon": False}
# netcdf files are on the AWS public dataset program (no credentials needed)
remote_opts = {"anon": False}
fs3D = fsspec.filesystem(
"reference",
fo=url,
remote_protocol="s3",
remote_options=remote_opts,
target_options=target_opts,
)
m3D = fs3D.get_mapper("")
CPU times: user 20.6 ms, sys: 1.34 ms, total: 21.9 ms
Wall time: 67.7 ms
%%time
ds3D = xr.open_dataset(
m3D, engine="zarr", chunks={}, backend_kwargs={"consolidated": False}
)
CPU times: user 102 ms, sys: 620 µs, total: 102 ms
Wall time: 413 ms
ds3D
<xarray.Dataset> Size: 6TB Dimensions: (depth: 50, latitude: 551, longitude: 936, time: 7104) Coordinates: * depth (depth) float32 200B 0.494 1.541 2.646 ... 5.275e+03 5.728e+03 * latitude (latitude) float32 2kB 46.0 46.03 46.06 ... 61.23 61.25 61.28 * longitude (longitude) float32 4kB -16.0 -15.97 -15.94 ... 9.921 9.949 9.977 * time (time) datetime64[ns] 57kB 2023-07-02T00:30:00 ... 2024-04-22T... Data variables: so (time, depth, latitude, longitude) float64 1TB dask.array<chunksize=(1, 1, 551, 936), meta=np.ndarray> thetao (time, depth, latitude, longitude) float64 1TB dask.array<chunksize=(1, 1, 551, 936), meta=np.ndarray> uo (time, depth, latitude, longitude) float64 1TB dask.array<chunksize=(1, 1, 551, 936), meta=np.ndarray> vo (time, depth, latitude, longitude) float64 1TB dask.array<chunksize=(1, 1, 551, 936), meta=np.ndarray> Attributes: (12/13) Conventions: CF-1.8 comment: contact: https://marine.copernicus.eu/contact domain_name: NWS36 field_date: 20230702 field_type: mean ... ... forecast_type: hindcast institution: Nologin (Spain) licence: https://marine.copernicus.eu/user-corner/service-commitm... references: http://marine.copernicus.eu/ source: NEMO3.6 title: Ocean 3D hourly mean fields for the North West Shelf (NW...
da3D = (
ds3D["thetao"].isel(depth=0).sel(time="2022-04-02 00:00", method="nearest").load()
)
da3D.hvplot.quadmesh(x="longitude", y="latitude", rasterize=True, data_aspect=1)