diff --git a/0_download_data.ipynb b/0_download_data.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..ae88c52d350f01883e8054fc3e5f82fdbf745481 --- /dev/null +++ b/0_download_data.ipynb @@ -0,0 +1,205 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "rAzEuBRkz7Av" + }, + "outputs": [], + "source": [ + "# CDS API\n", + "import cdsapi" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Directories for Data" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "A-mWHEgM0Jpp" + }, + "outputs": [], + "source": [ + "DATADIR = 'era5/'\n", + "LABELDIR = 'fire_danger/'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download ERA5 data for Argentina from 2002 to 2022" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "P_4OCcXt0SR2" + }, + "outputs": [], + "source": [ + "for year in range(2002,2023):\n", + " target_file = DATADIR + f'{year}.nc'\n", + " c = cdsapi.Client()\n", + " c.retrieve(\n", + " 'reanalysis-era5-single-levels',\n", + " {\n", + " 'product_type': 'reanalysis',\n", + " 'format': 'netcdf',\n", + " 'month': [\n", + " '01','02','12'\n", + " ],\n", + " 'day': [\n", + " '01', '02', '03',\n", + " '04', '05', '06',\n", + " '07', '08', '09',\n", + " '10', '11', '12',\n", + " '13', '14', '15',\n", + " '16', '17', '18',\n", + " '19', '20', '21',\n", + " '22', '23', '24',\n", + " '25', '26', '27',\n", + " '28', '29', '30',\n", + " '31',\n", + " ],\n", + " 'time': [\n", + " '15:00',\n", + " ],\n", + " 'variable': [\n", + " '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature',\n", + " 'leaf_area_index_high_vegetation', 'leaf_area_index_low_vegetation', 'total_precipitation',\n", + " ],\n", + " 'year': [str(year)],\n", + " 'area': [\n", + " -20, -79, -57, # North West South East\n", + " -43,\n", + " ],\n", + " },\n", + " target_file)\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download Fire Danger Index" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-28 12:15:49,638 INFO Welcome to the CDS\n", + "2023-07-28 12:15:49,639 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/cems-fire-historical-v1\n", + "2023-07-28 12:15:49,854 INFO Request is completed\n", + "2023-07-28 12:15:49,855 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.external-1690558130.304354-9896-9-762d05cd-253d-467d-99ae-08ef92f88bbd.nc to fire_danger/2019.nc (7.4M)\n", + "2023-07-28 12:15:52,491 INFO Download rate 2.8M/s \n", + "2023-07-28 12:15:53,197 INFO Welcome to the CDS\n", + "2023-07-28 12:15:53,199 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/cems-fire-historical-v1\n", + "2023-07-28 12:15:53,439 INFO Request is completed\n", + "2023-07-28 12:15:53,440 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.external-1690558130.304354-9896-9-762d05cd-253d-467d-99ae-08ef92f88bbd.nc to fire_danger/2020.nc (7.4M)\n", + "2023-07-28 12:15:54,590 INFO Download rate 6.5M/s \n", + "2023-07-28 12:15:54,976 INFO Welcome to the CDS\n", + "2023-07-28 12:15:54,977 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/cems-fire-historical-v1\n", + "2023-07-28 12:15:55,220 INFO Request is completed\n", + "2023-07-28 12:15:55,221 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.external-1690558130.304354-9896-9-762d05cd-253d-467d-99ae-08ef92f88bbd.nc to fire_danger/2021.nc (7.4M)\n", + "2023-07-28 12:15:56,690 INFO Download rate 5.1M/s \n", + "2023-07-28 12:15:57,082 INFO Welcome to the CDS\n", + "2023-07-28 12:15:57,083 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/cems-fire-historical-v1\n", + "2023-07-28 12:15:57,301 INFO Request is completed\n", + "2023-07-28 12:15:57,302 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.external-1690558130.304354-9896-9-762d05cd-253d-467d-99ae-08ef92f88bbd.nc to fire_danger/2022.nc (7.4M)\n", + "2023-07-28 12:15:58,812 INFO Download rate 4.9M/s \n" + ] + } + ], + "source": [ + "for year in range(2002,2023):\n", + " target_file = LABELDIR + f'{year}.nc'\n", + " c = cdsapi.Client()\n", + " c.retrieve(\n", + " 'cems-fire-historical-v1',\n", + " {\n", + " 'product_type': 'reanalysis',\n", + " 'variable': 'fire_danger_index',\n", + " 'dataset_type': 'consolidated_dataset',\n", + " 'system_version': '4_1',\n", + " 'year': '2002',\n", + " 'month': [\n", + " '01', '02', '12',\n", + " ],\n", + " 'day': [\n", + " '01', '02', '03',\n", + " '04', '05', '06',\n", + " '07', '08', '09',\n", + " '10', '11', '12',\n", + " '13', '14', '15',\n", + " '16', '17', '18',\n", + " '19', '20', '21',\n", + " '22', '23', '24',\n", + " '25', '26', '27',\n", + " '28', '29', '30',\n", + " '31',\n", + " ],\n", + " 'area': [\n", + " -20, -79, -57,\n", + " -43,\n", + " ],\n", + " 'grid': '0.25/0.25',\n", + " 'format': 'netcdf',\n", + " },\n", + " target_file)\n", + "\n", + "\n", + "\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/1_prepare_data.ipynb b/1_prepare_data.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..caaf84e7ef66aba60e53fba36b464efcb6e77c33 --- /dev/null +++ b/1_prepare_data.ipynb @@ -0,0 +1,234 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "DATADIR = 'era5/' # directory containing downloaded era5 data\n", + "FIREDATADIR = 'fire_danger/' # directory containing fire data\n", + "DESTDIR = 'processed_era5/' # directory to save .npy files for each time step and variable\n", + "FIREDESTDIR = 'processed_fire_data/' # directory to save .npy files for each time step and variable for fire data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import netCDF4 as nc\n", + "import os\n", + "from tqdm.notebook import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "vars = ['u10','v10','t2m','lai_hv','lai_lv','tp'] #considered variables (see 0_download_data.ipynb for long names)\n", + "months = [(1,31),(2,28),(12,31)] # months + days in month in dowloaded era5 .nc files\n", + "years = np.arange(2002,2023) # downloaded years\n", + "fire_vars = ['fdimrk'] # fire data variables" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "#root = nc.Dataset(FIREDATADIR + f\"2002.nc\", 'r')\n", + "#root['fdimrk']\n", + "#root.close()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "77f93c51256047b981775b4b6469c9d7", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/21 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9a27a43175944c7aad21cf81d8c3b31f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/21 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5ef1a176e6204c19a63852d952442609", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/21 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fbf30b91845a4c9bb5ca2ee543ea20ec", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/21 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6dc83680a62a42f78ba7cb7e056ac54e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/21 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b4f23446188144a389cf64ca317e40cc", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/21 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Processing ERA 5 data\n", + "for var in vars:\n", + " if not os.path.exists(DESTDIR + f\"{var}\"):\n", + " os.makedirs(DESTDIR + f\"{var}\")\n", + " \n", + " for year in tqdm(years):\n", + " root = nc.Dataset(DATADIR + f\"{year:d}.nc\", 'r')\n", + " v = root.variables[var][:,:-9,:-5] #crop to get to a size suitable for the considered Unet-like model, here 140x140\n", + " v = v.data\n", + " root.close()\n", + " if var in ['tp']: #change unit from m to mm for precipitation \n", + " v = 1000 * v\n", + " t = 0 # time step within v array that I am working on\n", + " for month, days in months:\n", + " for day in range(days):\n", + " np.save(DESTDIR + f\"{var}/{year}_{month:02d}_{day+1:02d}.npy\",v[t])\n", + " t += 1" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b3a75dfe23fa44d7a96e2ff92f9ed211", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/21 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Processing Fire data\n", + "for var in fire_vars:\n", + " if not os.path.exists(FIREDESTDIR + f\"{var}\"):\n", + " os.makedirs(FIREDESTDIR + f\"{var}\")\n", + " \n", + " for year in tqdm(years):\n", + " root = nc.Dataset(FIREDATADIR + f\"{year:d}.nc\", 'r')\n", + " v = root.variables[var][:,:-9,:-5] #crop to get to a size suitable for the considered Unet-like model, here 140x140\n", + " v = v.data\n", + " root.close()\n", + " t = 0 # time step within v array that I am working on\n", + " for month, days in months:\n", + " for day in range(days):\n", + " np.save(FIREDESTDIR + f\"{var}/{year}_{month:02d}_{day+1:02d}.npy\",v[t])\n", + " t += 1" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "root = nc.Dataset(DATADIR + f\"2002.nc\", 'r') #constant in time -> take from any year\n", + "lat = root.variables['latitude'][:-9].data #crop to get to a size suitable for the considered Unet-like model\n", + "lon = root.variables['longitude'][:-5].data \n", + "np.save(DESTDIR + 'lat.npy', lat)\n", + "np.save(DESTDIR + 'lon.npy', lon)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/2_norm_consts.ipynb b/2_norm_consts.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..2fb05496a6ee2500e3979335869dcf50ddd5b7d6 --- /dev/null +++ b/2_norm_consts.ipynb @@ -0,0 +1,102 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DATADIR = 'processed_era5/'\n", + "FIREDATADIR = 'processed_fire_data/'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from tqdm.notebook import tqdm\n", + "import os, gc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vars = ['u10','v10','t2m','lai_hv','lai_lv','tp']\n", + "target_vars = ['fdimrk']\n", + "months = [1,2,12] \n", + "val, test = [2015,2018], [2019,2022] #years to use for validation and testing, do not use these years to compute normalization constants\n", + "train = [x for x in np.arange(2002,2015) if x not in val+test] " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for var in vars:\n", + " tmp = [] #save values from all time steps and locations into this list and then compute mean and std\n", + " files = sorted(os.listdir(DATADIR+var)) #SHOULD NOT CONTAIN ANY OTHER FILES THAN THOSE CREATED IN 1_prepareData.ipynb\n", + " for f in tqdm(files):\n", + " y,m,d=f.split('_')\n", + " if int(y) in train and int(m) in months:\n", + " t_array = np.load(DATADIR+var+'/'+f)\n", + " tmp += list(t_array.flatten())\n", + " mean, std = np.mean(tmp), np.std(tmp)\n", + " print(f'Mean {mean}, std {std}')\n", + " np.save(f'norm_consts/input_{var}.npy',np.array([mean, std]))\n", + "del tmp\n", + "gc.collect()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for var in target_vars:\n", + " tmp = [] #save values from all time steps and locations into this list and then compute mean and std\n", + " files = sorted(os.listdir(FIREDATADIR+var)) #SHOULD NOT CONTAIN ANY OTHER FILES THAN THOSE CREATED IN 1_prepareData.ipynb\n", + " for f in tqdm(files):\n", + " y,m,d=f.split('_')\n", + " if int(y) in train and int(m) in months:\n", + " t_array = np.load(FIREDATADIR+var+'/'+f) \n", + " tmp += list(t_array.flatten())\n", + " mean, std = np.mean(tmp), np.std(tmp)\n", + " print(f'Mean {mean}, std {std}')\n", + " np.save(f'norm_consts/target_{var}.npy',np.array([mean, std]))\n", + "del tmp\n", + "gc.collect()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}