diff --git a/0_download_data.ipynb b/0_download_data.ipynb index baebee348b0deccb80810cd8d430985e5a0dd4ca..9546aba45fc1d93ea49305c8bb79181a33be85d3 100644 --- a/0_download_data.ipynb +++ b/0_download_data.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "id": "rAzEuBRkz7Av" }, @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "id": "A-mWHEgM0Jpp" }, @@ -107,36 +107,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-07-28 12:15:49,638 INFO Welcome to the CDS\n", - "2023-07-28 12:15:49,639 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/cems-fire-historical-v1\n", - "2023-07-28 12:15:49,854 INFO Request is completed\n", - "2023-07-28 12:15:49,855 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.external-1690558130.304354-9896-9-762d05cd-253d-467d-99ae-08ef92f88bbd.nc to fire_danger/2019.nc (7.4M)\n", - "2023-07-28 12:15:52,491 INFO Download rate 2.8M/s \n", - "2023-07-28 12:15:53,197 INFO Welcome to the CDS\n", - "2023-07-28 12:15:53,199 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/cems-fire-historical-v1\n", - "2023-07-28 12:15:53,439 INFO Request is completed\n", - "2023-07-28 12:15:53,440 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.external-1690558130.304354-9896-9-762d05cd-253d-467d-99ae-08ef92f88bbd.nc to fire_danger/2020.nc (7.4M)\n", - "2023-07-28 12:15:54,590 INFO Download rate 6.5M/s \n", - "2023-07-28 12:15:54,976 INFO Welcome to the CDS\n", - "2023-07-28 12:15:54,977 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/cems-fire-historical-v1\n", - "2023-07-28 12:15:55,220 INFO Request is completed\n", - "2023-07-28 12:15:55,221 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.external-1690558130.304354-9896-9-762d05cd-253d-467d-99ae-08ef92f88bbd.nc to fire_danger/2021.nc (7.4M)\n", - "2023-07-28 12:15:56,690 INFO Download rate 5.1M/s \n", - "2023-07-28 12:15:57,082 INFO Welcome to the CDS\n", - "2023-07-28 12:15:57,083 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/cems-fire-historical-v1\n", - "2023-07-28 12:15:57,301 INFO Request is completed\n", - "2023-07-28 12:15:57,302 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.external-1690558130.304354-9896-9-762d05cd-253d-467d-99ae-08ef92f88bbd.nc to fire_danger/2022.nc (7.4M)\n", - "2023-07-28 12:15:58,812 INFO Download rate 4.9M/s \n" - ] - } - ], + "outputs": [], "source": [ "for year in range(2002,2023):\n", " target_file = LABELDIR + f'{year}.nc'\n", diff --git a/3_train_model.ipynb b/3_train_model.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b5dc74bacb255803573bd7bc562b8d0b4b8a455d --- /dev/null +++ b/3_train_model.ipynb @@ -0,0 +1,345 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Configurar directorios" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DATADIR = 'processed_data/'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cargar bibliotecas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np #DL specific imports below\n", + "import sys, time, copy\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "from torch.autograd import Variable\n", + "from torch.utils.data import Dataset, DataLoader\n", + "# from torchvision import models, transforms\n", + "from skorch.net import NeuralNet #pytorch wrapper skorch\n", + "from skorch.helper import predefined_split\n", + "from skorch.callbacks import Checkpoint, EarlyStopping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Variables de configuración" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "months = np.array([[(1,31),(2,28),(12,31)]])\n", + "valid, test = [2015,2018], [2019,2022] #years to use for validation and testing, do not use these years to compute normalization constants\n", + "train = [x for x in np.arange(2002,2015) if x not in valid+test] \n", + "\n", + "targetVar = 'fdimrk'\n", + "var = ['u10','v10','t2m','lai_hv','lai_lv','tp']\n", + "means = np.array([np.load(f'norm_consts/input_{v}.npy')[0] for v in var]) \n", + "stds = np.array([np.load(f'norm_consts/input_{v}.npy')[1] for v in var])\n", + "targetMean, targetStd = np.load(f'norm_consts/target_{targetVar}.npy')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Parámetros de aprendizaje profundo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##### DL parameters \n", + "batch_size = 64\n", + "learning_rate = 1e-3\n", + "num_epochs = 200\n", + "num_workers = 8\n", + "weight_decay=0.\n", + "patience=30 # early stopping if valid loss did not improve for 30 epochs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Parámetros de Antorcha" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "member = 0 #ensemble member = seed for weight initialization \n", + "torch.manual_seed(member) #for reproducibility and creation of a seed ensemble\n", + "np.random.seed(member)\n", + "torch.backends.cudnn.benchmark = False\n", + "# torch.set_deterministic(True) \n", + "torch.use_deterministic_algorithms(True)\n", + "\n", + "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", + "print(device)\n", + "\n", + "def pytorch_count_params(model): #counts the number trainable parameters in a pytorch model \n", + " tot = 0\n", + " for x in model.parameters():\n", + " #print(x.size())\n", + " tot += np.prod(x.size())\n", + " return tot" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cuenta el número de parámetros entrenables en un modelo pytorch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def pytorch_count_params(model): #counts the number trainable parameters in a pytorch model \n", + " tot = 0\n", + " for x in model.parameters():\n", + " #print(x.size())\n", + " tot += np.prod(x.size())\n", + " return tot" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Conjunto de datos del Ãndice de peligro de incendios y ERA5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class copernicusDataset(Dataset):\n", + " def __init__(self,years,aug=False): #aug = use rotation and flipping as data augmentation\n", + " self.years = years\n", + " self.length = len(self.years)*sum([m[1] for m in months]) #nb of years * nb of time steps each year\n", + " self.aug = aug\n", + " \n", + " def idxToFile(self,idx): #conversion between time step index and (year, month, day, hour)-input and target file\n", + " year = self.years[idx//(sum([m[1] for m in months]))]\n", + " tInYear = idx%(sum([m[1] for m in months])) #time step within this year\n", + " monthIdx = np.argmax(tInYear < np.array([sum(months[:m,1]) for m in range(1,len(months)+1)]))\n", + " month = months[monthIdx,0]\n", + " tInMonth = tInYear - sum(months[:monthIdx,1]) #time step within this month\n", + " day = tInMonth + 1 #day numbering starts with 1\n", + " return f\"/{year:d}_{month:02d}_{day:02d}.npy\", f\"/{year:d}_{month:02d}_{day:02d}.npy\" \n", + "\n", + " def normalize(self, x): #normalize the input fields\n", + " return ((x.transpose()-means)/stds).transpose()\n", + " \n", + " def __len__(self):\n", + " return self.length\n", + " \n", + " def __getitem__(self, idx):\n", + " if torch.is_tensor(idx):\n", + " idx = idx.tolist()\n", + " inpFile, targetFile = self.idxToFile(idx)\n", + " inp = []\n", + " for v in var:\n", + " inp += [np.load(DATADIR+v+inpFile)]\n", + " inp = self.normalize(np.stack(inp))\n", + " target = ((np.load(DATADIR+targetVar+targetFile)-targetMean)/targetStd).reshape((1,100,100)) \n", + " if self.aug: #50 % probability to rotate by 180 deg, 50 % probability to flip left and right\n", + " rot = np.random.randint(2) #0 -> no rotate, 1 -> rotate\n", + " inp = np.rot90(inp,k=2*rot,axes=(1,2))\n", + " target = np.rot90(target,k=2*rot,axes=(1,2)) \n", + " if np.random.randint(2): #0 -> no flip, 1 -> flip\n", + " inp = np.flip(inp,axis=2)\n", + " target = np.flip(target,axis=2)\n", + " return torch.tensor(inp.astype(np.float32)), torch.tensor(target.astype(np.float32)) \n", + "\n", + "trainset = copernicusDataset(train, aug=True)\n", + "validset = copernicusDataset(valid)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Modelo Unet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class UNet(nn.Module): \n", + " def __init__(self):\n", + " super(UNet, self).__init__() # in: len(var) x 120 x 180 140x140\n", + " self.conv1 = nn.Conv2d(in_channels=len(var),out_channels=64,kernel_size=3) # out: 64 x 118 x 178 \n", + " self.bn1 = nn.BatchNorm2d(64)\n", + " self.conv2 = nn.Conv2d(64,64,3) # out: 64 x 116 x 176 \n", + " self.bn2 = nn.BatchNorm2d(64)\n", + " self.pool1 = nn.MaxPool2d(2) # out: 64 x 58 x 88\n", + " self.conv3 = nn.Conv2d(64,128,3) # out: 128 x 56 x 86 \n", + " self.bn3 = nn.BatchNorm2d(128)\n", + " self.conv4 = nn.Conv2d(128,128,3) # out: 128 x 54 x 84\n", + " self.bn4 = nn.BatchNorm2d(128)\n", + " self.pool2 = nn.MaxPool2d(2) # out: 128 x 27 x 42\n", + " self.conv5 = nn.Conv2d(128,256,3) # out: 256 x 25 x 40\n", + " self.bn5 = nn.BatchNorm2d(256)\n", + " self.conv6 = nn.Conv2d(256,256,3) # out: 256 x 23 x 38\n", + " self.bn6 = nn.BatchNorm2d(256)\n", + " self.upconv1 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1) #out: 128 x 46 x 76\n", + " ### concat with crop(conv 4) -> out: 256 x 46 x 76\n", + " self.conv7 = nn.Conv2d(256,128,3) # out: 128 x 44 x 74\n", + " self.bn7 = nn.BatchNorm2d(128)\n", + " self.conv8 = nn.Conv2d(128,128,3) # out: 128 x 42 x 72\n", + " self.bn8 = nn.BatchNorm2d(128)\n", + " self.upconv2 = nn.ConvTranspose2d(128,64,4,2,1) # out 64 x 84 x 144\n", + " ### concat with crop(conv4) -> out: 128 x 84 x 144\n", + " self.conv9 = nn.Conv2d(128,64,3) # out: 64 x 82 x 142\n", + " self.bn9 = nn.BatchNorm2d(64)\n", + " self.conv10 = nn.Conv2d(64,64,3) # out: 64 x 80 x 140\n", + " self.bn10 = nn.BatchNorm2d(64)\n", + " self.conv11 = nn.Conv2d(64,1,1) # out: 1 x 80 x 140\n", + "\n", + " def forward(self, x):\n", + " level1 = F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(x))))))\n", + " level2 = F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.pool1(level1)))))))\n", + " level3 = F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.pool2(level2)))))))\n", + " ### going up again - to center crop the concatenated array, use the pad function with negative padding\n", + " level2 = F.relu(self.bn8(self.conv8(F.relu(self.bn7(self.conv7(torch.cat((F.pad(level2,[-4,-4,-4,-4]), self.upconv1(level3)), dim=1)))))))\n", + " level1 = F.relu(self.bn10(self.conv10(F.relu(self.bn9(self.conv9(torch.cat((F.pad(level1,[-16,-16,-16,-16]), self.upconv2(level2)), dim=1)))))))\n", + " return self.conv11(level1)\n", + "\n", + "model = UNet()\n", + "\n", + "print('Number of parameters in the model', pytorch_count_params(model))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if torch.cuda.device_count() > 1:\n", + " print(\"Let's use\", torch.cuda.device_count(), \"GPUs!\")\n", + " model = nn.DataParallel(model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Error medio cuadrado" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class myMSE(nn.Module): # just normal MSE, but pytorch implementation somehow did not work properly\n", + " def forward(self, input, target):\n", + " return ((input-target)**2).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Crear red neuronal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "net = NeuralNet( #skorch wrapper facility\n", + " model,\n", + " criterion=myMSE,\n", + " batch_size=batch_size,\n", + " lr=learning_rate,\n", + " max_epochs=num_epochs,\n", + " optimizer=optim.Adam, \n", + " iterator_train__shuffle=True,\n", + " iterator_train__num_workers=num_workers,\n", + " iterator_valid__shuffle=False,\n", + " iterator_valid__num_workers=num_workers,\n", + " train_split=predefined_split(validset), #strange naming, but validset will be used for validation not training, see skorch.helper.predefined_split documentation\n", + " callbacks=[Checkpoint(dirname=f'training',f_params='best_params.pt'), #Saves the best parameters to best_params.pt.\n", + " EarlyStopping(patience=patience, threshold=1e-3, threshold_mode='abs')], #stops training if valid loss did not improve for patience epochs \n", + " device=device\n", + ")\n", + "\n", + "tstart = time.time()\n", + "net.fit(trainset)\n", + "print('Time for training', (time.time()-tstart)/60, 'min')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/assets/fdimrk.jpg b/assets/fdimrk.jpg deleted file mode 100644 index 9916783a25785d04f233209bf78076a4dd4ce179..0000000000000000000000000000000000000000 Binary files a/assets/fdimrk.jpg and /dev/null differ diff --git a/assets/lai_hv.jpg b/assets/lai_hv.jpg deleted file mode 100644 index 0f2750a92569414d88cba0f0768c644f382df3b9..0000000000000000000000000000000000000000 Binary files a/assets/lai_hv.jpg and /dev/null differ diff --git a/assets/t2m.jpg b/assets/t2m.jpg deleted file mode 100644 index 1c5f7ea51edd1d81f21d3faba913286c65c4ac59..0000000000000000000000000000000000000000 Binary files a/assets/t2m.jpg and /dev/null differ