From 829f7303c080406c7c07a4fe5192053c37ad56cb Mon Sep 17 00:00:00 2001 From: Russell Stoneback Date: Tue, 29 Mar 2022 16:52:13 -0500 Subject: [PATCH 1/3] ENH: Added tutorial on writing files. --- .../Tutorial-Creating_netCDF4_Files.ipynb | 181 ++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 pysatTutorials/Tutorial-Creating_netCDF4_Files.ipynb diff --git a/pysatTutorials/Tutorial-Creating_netCDF4_Files.ipynb b/pysatTutorials/Tutorial-Creating_netCDF4_Files.ipynb new file mode 100644 index 0000000..02e46fd --- /dev/null +++ b/pysatTutorials/Tutorial-Creating_netCDF4_Files.ipynb @@ -0,0 +1,181 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "60958439", + "metadata": {}, + "outputs": [], + "source": [ + "import pysat\n", + "import netCDF4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bfb7858c", + "metadata": {}, + "outputs": [], + "source": [ + "filename = 'demo_test_file.nc'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1312043f", + "metadata": {}, + "outputs": [], + "source": [ + "# Instrument with variety of 1D variables\n", + "inst = pysat.Instrument('pysat', 'testing')\n", + "\n", + "# # Instrument with a variety of 1D and 2D variables\n", + "# inst = pysat.Instrument('pysat', 'testing2D')\n", + "\n", + "# Instrument with xarray data, mixed data dimensipnality.\n", + "inst = pysat.Instrument('pysat', 'testmodel')\n", + "\n", + "# Instrument with xarray data, mixed data dimensipnality.\n", + "inst = pysat.Instrument('pysat', 'testing2D_xarray')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad9e2ccf", + "metadata": {}, + "outputs": [], + "source": [ + "# Load data\n", + "inst.load(2009, 1, use_header=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d630ea4", + "metadata": {}, + "outputs": [], + "source": [ + "# Options when writing files.\n", + "\n", + "# Translate metadata labels to something new.\n", + "\n", + "# Metadata labels in the file may be different than used by Instrument object.\n", + "# Default behavior\n", + "meta_translation = None\n", + "inv_translation = None\n", + "\n", + "# # Map existing labels to multiple labels in the file\n", + "# meta_translation = {inst.meta.labels.units: ['funny_units', 'serious_units'],\n", + "# inst.meta.labels.fill_val: ['funny_fill', 'fill_serious']}\n", + "# inv_translation = {'funny_units': inst.meta.labels.units,\n", + "# 'serious_units': inst.meta.labels.units,\n", + "# 'funny_fill': inst.meta.labels.fill_val,\n", + "# 'fill_serious': inst.meta.labels.fill_val}\n", + "\n", + "# Arbitrary processing of metadata when writing files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54735345", + "metadata": {}, + "outputs": [], + "source": [ + "pysat.utils.io.inst_to_netcdf(inst, filename, meta_translation=meta_translation)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc822200", + "metadata": {}, + "outputs": [], + "source": [ + "# List global file attributes\n", + "with netCDF4.Dataset(filename) as data:\n", + " print('Global File Attributes\\n')\n", + " for attr in data.ncattrs():\n", + " print('\\n', attr, ': ', data.getncattr(attr))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "222dcfc7", + "metadata": {}, + "outputs": [], + "source": [ + "# List variable metadata\n", + "with netCDF4.Dataset(filename) as data:\n", + " print('File Variable Attributes\\n')\n", + " for var in data.variables.keys():\n", + " print('File Variable: ', var)\n", + " print(''.join(['----------------','-'*len(var)]))\n", + " \n", + " for nc_key in data.variables[var].ncattrs():\n", + " print(nc_key, ': ', data.variables[var].getncattr(nc_key))\n", + " \n", + " print('\\n')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "296ec38c", + "metadata": {}, + "outputs": [], + "source": [ + "data, meta = pysat.utils.io.load_netcdf(filename, pandas_format=inst.pandas_format, \n", + " meta_translation=inv_translation)\n", + "\n", + "# Loaded metadata information\n", + "meta.data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3d3f2fe", + "metadata": {}, + "outputs": [], + "source": [ + "# Loaded data\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e90e841", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From ab47078c49cb4037bc6f75dca19fefb08f402b24 Mon Sep 17 00:00:00 2001 From: Russell Stoneback Date: Mon, 2 May 2022 14:00:11 -0500 Subject: [PATCH 2/3] ENH: Updated writing files tutorial --- .../Tutorial-Creating_netCDF4_Files.ipynb | 134 ++++++++++++++---- 1 file changed, 109 insertions(+), 25 deletions(-) diff --git a/pysatTutorials/Tutorial-Creating_netCDF4_Files.ipynb b/pysatTutorials/Tutorial-Creating_netCDF4_Files.ipynb index 02e46fd..fce5508 100644 --- a/pysatTutorials/Tutorial-Creating_netCDF4_Files.ipynb +++ b/pysatTutorials/Tutorial-Creating_netCDF4_Files.ipynb @@ -3,10 +3,13 @@ { "cell_type": "code", "execution_count": null, - "id": "60958439", + "id": "575e87c5", "metadata": {}, "outputs": [], "source": [ + "import datetime as dt\n", + "import os\n", + "\n", "import pysat\n", "import netCDF4" ] @@ -14,48 +17,62 @@ { "cell_type": "code", "execution_count": null, - "id": "bfb7858c", + "id": "009bad8b", "metadata": {}, "outputs": [], "source": [ - "filename = 'demo_test_file.nc'" + "# Check for pysat data directory\n", + "if len(pysat.params['data_dirs']) == 0:\n", + " print('Assigning demo directory.')\n", + " pysat.params['data_dirs'] = './pysatDemo'" ] }, { "cell_type": "code", "execution_count": null, - "id": "1312043f", + "id": "22d0c4ee", + "metadata": {}, + "outputs": [], + "source": [ + "filename = 'demo_test_file_{year:04d}{day:03d}.nc'\n", + "date = dt.datetime(2009, 1, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ab3a520", "metadata": {}, "outputs": [], "source": [ "# Instrument with variety of 1D variables\n", "inst = pysat.Instrument('pysat', 'testing')\n", "\n", - "# # Instrument with a variety of 1D and 2D variables\n", + "# Instrument with a variety of 1D and 2D variables\n", "# inst = pysat.Instrument('pysat', 'testing2D')\n", "\n", "# Instrument with xarray data, mixed data dimensipnality.\n", - "inst = pysat.Instrument('pysat', 'testmodel')\n", + "# inst = pysat.Instrument('pysat', 'testmodel')\n", "\n", "# Instrument with xarray data, mixed data dimensipnality.\n", - "inst = pysat.Instrument('pysat', 'testing2D_xarray')" + "# inst = pysat.Instrument('pysat', 'testing2D_xarray')" ] }, { "cell_type": "code", "execution_count": null, - "id": "ad9e2ccf", + "id": "528d27f8", "metadata": {}, "outputs": [], "source": [ "# Load data\n", - "inst.load(2009, 1, use_header=True)" + "inst.load(date=date, use_header=True)" ] }, { "cell_type": "code", "execution_count": null, - "id": "3d630ea4", + "id": "73e28290", "metadata": {}, "outputs": [], "source": [ @@ -67,8 +84,11 @@ "# Default behavior\n", "meta_translation = None\n", "inv_translation = None\n", + "export_nan = None\n", "\n", "# # Map existing labels to multiple labels in the file\n", + "\n", + "# Made up translation #1\n", "# meta_translation = {inst.meta.labels.units: ['funny_units', 'serious_units'],\n", "# inst.meta.labels.fill_val: ['funny_fill', 'fill_serious']}\n", "# inv_translation = {'funny_units': inst.meta.labels.units,\n", @@ -76,28 +96,67 @@ "# 'funny_fill': inst.meta.labels.fill_val,\n", "# 'fill_serious': inst.meta.labels.fill_val}\n", "\n", - "# Arbitrary processing of metadata when writing files." + "\n", + "# Arbitrary processing of metadata when writing files.\n", + "\n", + "\n", + "# Add additional metadata\n", + "\n", + "# `new_label` will only appear in the file for 'mlt' since values for other variables are NaN. To include metadata\n", + "# with NaN values, use the `export_nan` keyword. It will, by default, include fill, and the min and max values.\n", + "# Note that adding a new metadata type to meta will also add it to `meta.labels`.\n", + "\n", + "# drop_label = 'new_label'\n", + "# inst.meta['mlt'] = {drop_label: 1.}\n", + "# inst.meta.data\n", + "\n", + "# export_nan = [inst.meta.labels.fill_val, inst.meta.labels.max_val,\n", + "# inst.meta.labels.min_val, inst.meta.labels.drop_label]\n", + "\n", + "\n", + "# By setting the `drop_meta_labels` keyword, users can prevent metadata information from being loaded.\n", + "\n", + "# Keep all metadata\n", + "drop_labels = []\n", + "\n", + "# Drop newly added label\n", + "# drop_labels = ['new_label']" ] }, { "cell_type": "code", "execution_count": null, - "id": "54735345", + "id": "b14524d2", "metadata": {}, "outputs": [], "source": [ - "pysat.utils.io.inst_to_netcdf(inst, filename, meta_translation=meta_translation)" + "# Add custom information to `inst.meta.header` which is written to file.\n", + "inst.meta.header.demo_thang_pysat_style = 'Yes'" ] }, { "cell_type": "code", "execution_count": null, - "id": "fc822200", + "id": "13bbd531", + "metadata": {}, + "outputs": [], + "source": [ + "# Write file, but first, format filename.\n", + "form_filename = os.path.join(inst.files.data_path, filename.format(year=inst.yr, day=inst.doy))\n", + "\n", + "# Write file using `pysat.utils.io`.\n", + "pysat.utils.io.inst_to_netcdf(inst, form_filename, meta_translation=meta_translation, export_nan=export_nan)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "405c4d02", "metadata": {}, "outputs": [], "source": [ "# List global file attributes\n", - "with netCDF4.Dataset(filename) as data:\n", + "with netCDF4.Dataset(form_filename) as data:\n", " print('Global File Attributes\\n')\n", " for attr in data.ncattrs():\n", " print('\\n', attr, ': ', data.getncattr(attr))" @@ -106,12 +165,12 @@ { "cell_type": "code", "execution_count": null, - "id": "222dcfc7", + "id": "6575b4cd", "metadata": {}, "outputs": [], "source": [ "# List variable metadata\n", - "with netCDF4.Dataset(filename) as data:\n", + "with netCDF4.Dataset(form_filename) as data:\n", " print('File Variable Attributes\\n')\n", " for var in data.variables.keys():\n", " print('File Variable: ', var)\n", @@ -126,35 +185,60 @@ { "cell_type": "code", "execution_count": null, - "id": "296ec38c", + "id": "a4ee3cba", "metadata": {}, "outputs": [], "source": [ - "data, meta = pysat.utils.io.load_netcdf(filename, pandas_format=inst.pandas_format, \n", - " meta_translation=inv_translation)\n", + "# Load data and and meta using `pysat.utils`\n", + "data, meta = pysat.utils.io.load_netcdf(form_filename, pandas_format=inst.pandas_format, \n", + " meta_translation=inv_translation,\n", + " drop_meta_labels=drop_labels)\n", "\n", - "# Loaded metadata information\n", + "# Print loaded metadata information to Jupyter.\n", "meta.data" ] }, { "cell_type": "code", "execution_count": null, - "id": "d3d3f2fe", + "id": "e4d8b292", "metadata": {}, "outputs": [], "source": [ - "# Loaded data\n", + "# Print loaded data to Jupyter.\n", "data" ] }, { "cell_type": "code", "execution_count": null, - "id": "7e90e841", + "id": "3d93a458", + "metadata": {}, + "outputs": [], + "source": [ + "# Load data using pysat.Instrument. Presumes that current `inst` created NetCDF file.\n", + "# First, define general pysat.Instrument.\n", + "load_inst = pysat.Instrument('pysat', 'netcdf', pandas_format=inst.pandas_format, \n", + " update_files=True, file_format=filename, data_dir=inst.files.data_path,\n", + " meta_translation=inv_translation, drop_meta_labels=drop_labels)\n", + "\n", + "# Load data.\n", + "load_inst.load(2009, 1, use_header=True)\n", + "\n", + "# Print to Jupyter.\n", + "load_inst.data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18edf55b", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Print metadata to Jupyter.\n", + "load_inst.meta.data" + ] } ], "metadata": { From b29af21c63f6f733b97d2f7716ed78b081138b97 Mon Sep 17 00:00:00 2001 From: Russell Stoneback Date: Mon, 9 May 2022 14:36:54 -0500 Subject: [PATCH 3/3] ENH: Updated writing files tutorial comment. --- pysatTutorials/Tutorial-Creating_netCDF4_Files.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pysatTutorials/Tutorial-Creating_netCDF4_Files.ipynb b/pysatTutorials/Tutorial-Creating_netCDF4_Files.ipynb index fce5508..2d7c780 100644 --- a/pysatTutorials/Tutorial-Creating_netCDF4_Files.ipynb +++ b/pysatTutorials/Tutorial-Creating_netCDF4_Files.ipynb @@ -97,7 +97,8 @@ "# 'fill_serious': inst.meta.labels.fill_val}\n", "\n", "\n", - "# Arbitrary processing of metadata when writing files.\n", + "# Arbitrary processing of metadata is also supported when writing/loading files.\n", + "# See pysat documentation for more on the `meta_processor` keyword.\n", "\n", "\n", "# Add additional metadata\n",