{ "cells": [ { "cell_type": "markdown", "id": "705600ae-ee96-4326-8712-bad50960f870", "metadata": {}, "source": [ "# Read parquet kerchunk catalog" ] }, { "cell_type": "code", "execution_count": 1, "id": "e364d80a-2e58-4d20-908e-ee05fd51cf11", "metadata": {}, "outputs": [], "source": [ "import fsspec\n", "import xarray as xr" ] }, { "cell_type": "code", "execution_count": 2, "id": "c5e000f8-d3d8-4fa3-8513-5774732f6c23", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 286 ms, sys: 30.5 ms, total: 316 ms\n", "Wall time: 421 ms\n" ] } ], "source": [ "%%time\n", "# references are on an OSN pod (no credentials needed)\n", "url = \"s3://gfts-reference-data/CMEMS_v6r1_NWS_PHY_NRT_NL_01hav_AN_2D_combined.parq/\"\n", "\n", "target_opts = {\"anon\": False}\n", "\n", "# netcdf files are on the AWS public dataset program (no credentials needed)\n", "remote_opts = {\"anon\": False}\n", "\n", "fs = fsspec.filesystem(\n", " \"reference\",\n", " fo=url,\n", " remote_protocol=\"s3\",\n", " remote_options=remote_opts,\n", " target_options=target_opts,\n", ")\n", "m = fs.get_mapper(\"\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "4744b47f-20fd-475e-b5a5-5c11298ee32d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 2.1 s, sys: 191 ms, total: 2.29 s\n", "Wall time: 2.63 s\n" ] } ], "source": [ "%%time\n", "ds = xr.open_dataset(\n", " m, engine=\"zarr\", chunks={}, backend_kwargs={\"consolidated\": False}\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "id": "1ee76201-9860-4e05-93b8-f8140558cd60", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
<xarray.Dataset> Size: 521GB\n", "Dimensions: (latitude: 551, longitude: 936, time: 18048)\n", "Coordinates:\n", " * latitude (latitude) float32 2kB 46.0 46.03 46.06 ... 61.23 61.25 61.28\n", " * longitude (longitude) float32 4kB -16.0 -15.97 -15.94 ... 9.921 9.949 9.977\n", " * time (time) datetime64[ns] 144kB 2022-04-02T00:30:00 ... 2024-04-22...\n", "Data variables:\n", " mlotst (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>\n", " thetao (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>\n", " ubar (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>\n", " uo (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>\n", " vbar (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>\n", " vo (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>\n", " zos (time, latitude, longitude) float64 74GB dask.array<chunksize=(1, 551, 936), meta=np.ndarray>\n", "Attributes: (12/13)\n", " Conventions: CF-1.8\n", " comment: \n", " contact: https://marine.copernicus.eu/contact\n", " domain_name: NWS36\n", " field_date: 20220402\n", " field_type: mean\n", " ... ...\n", " forecast_type: analysis\n", " institution: Nologin (Spain)\n", " licence: https://marine.copernicus.eu/user-corner/service-commitm...\n", " references: http://marine.copernicus.eu/\n", " source: NEMO3.6\n", " title: Ocean surface hourly mean fields for the North West Shel...
<xarray.DataArray 'thetao' (time: 18048, latitude: 551, longitude: 936)> Size: 74GB\n", "dask.array<open_dataset-thetao, shape=(18048, 551, 936), dtype=float64, chunksize=(1, 551, 936), chunktype=numpy.ndarray>\n", "Coordinates:\n", " * latitude (latitude) float32 2kB 46.0 46.03 46.06 ... 61.23 61.25 61.28\n", " * longitude (longitude) float32 4kB -16.0 -15.97 -15.94 ... 9.921 9.949 9.977\n", " * time (time) datetime64[ns] 144kB 2022-04-02T00:30:00 ... 2024-04-22...\n", "Attributes: (12/14)\n", " easting: longitude\n", " latitude_max: 61.2819f\n", " latitude_min: 46.0036f\n", " long_name: Temperature\n", " longitude_max: 9.977f\n", " longitude_min: -15.996f\n", " ... ...\n", " unit_long: degrees_C\n", " units: degrees_C\n", " valid_max: 22000\n", " valid_min: -12000\n", " z_max: 0.494025f\n", " z_min: 0.494025f
<xarray.DataArray 'thetao' (latitude: 551, longitude: 936)> Size: 4MB\n", "array([[13.31200016, 13.32400016, 13.33900016, ..., nan,\n", " nan, nan],\n", " [13.32600016, 13.34100016, 13.34700016, ..., nan,\n", " nan, nan],\n", " [13.34300016, 13.35300016, 13.34600016, ..., nan,\n", " nan, nan],\n", " ...,\n", " [ 8.45599993, 8.49499993, 8.53999993, ..., nan,\n", " nan, nan],\n", " [ 8.44899993, 8.47899993, 8.53999993, ..., nan,\n", " nan, nan],\n", " [ 8.45999993, 8.47099993, 8.53399993, ..., nan,\n", " nan, nan]])\n", "Coordinates:\n", " * latitude (latitude) float32 2kB 46.0 46.03 46.06 ... 61.23 61.25 61.28\n", " * longitude (longitude) float32 4kB -16.0 -15.97 -15.94 ... 9.921 9.949 9.977\n", " time datetime64[ns] 8B 2022-04-02T00:30:00\n", "Attributes: (12/14)\n", " easting: longitude\n", " latitude_max: 61.2819f\n", " latitude_min: 46.0036f\n", " long_name: Temperature\n", " longitude_max: 9.977f\n", " longitude_min: -15.996f\n", " ... ...\n", " unit_long: degrees_C\n", " units: degrees_C\n", " valid_max: 22000\n", " valid_min: -12000\n", " z_max: 0.494025f\n", " z_min: 0.494025f
<xarray.Dataset> Size: 6TB\n", "Dimensions: (depth: 50, latitude: 551, longitude: 936, time: 7104)\n", "Coordinates:\n", " * depth (depth) float32 200B 0.494 1.541 2.646 ... 5.275e+03 5.728e+03\n", " * latitude (latitude) float32 2kB 46.0 46.03 46.06 ... 61.23 61.25 61.28\n", " * longitude (longitude) float32 4kB -16.0 -15.97 -15.94 ... 9.921 9.949 9.977\n", " * time (time) datetime64[ns] 57kB 2023-07-02T00:30:00 ... 2024-04-22T...\n", "Data variables:\n", " so (time, depth, latitude, longitude) float64 1TB dask.array<chunksize=(1, 1, 551, 936), meta=np.ndarray>\n", " thetao (time, depth, latitude, longitude) float64 1TB dask.array<chunksize=(1, 1, 551, 936), meta=np.ndarray>\n", " uo (time, depth, latitude, longitude) float64 1TB dask.array<chunksize=(1, 1, 551, 936), meta=np.ndarray>\n", " vo (time, depth, latitude, longitude) float64 1TB dask.array<chunksize=(1, 1, 551, 936), meta=np.ndarray>\n", "Attributes: (12/13)\n", " Conventions: CF-1.8\n", " comment: \n", " contact: https://marine.copernicus.eu/contact\n", " domain_name: NWS36\n", " field_date: 20230702\n", " field_type: mean\n", " ... ...\n", " forecast_type: hindcast\n", " institution: Nologin (Spain)\n", " licence: https://marine.copernicus.eu/user-corner/service-commitm...\n", " references: http://marine.copernicus.eu/\n", " source: NEMO3.6\n", " title: Ocean 3D hourly mean fields for the North West Shelf (NW...