Skip to content
Snippets Groups Projects
Commit 9b8075f7 authored by David Akim's avatar David Akim
Browse files

Update 3 files

- /1_prepare_data.ipynb
- /2_norm_consts.ipynb
- /0_download_data.ipynb
parent 0298a327
No related branches found
No related tags found
No related merge requests found
Pipeline #2931 failed with stages
in 1 minute and 39 seconds
%% Cell type:markdown id: tags:
Load libraries
%% Cell type:code id: tags:
```
# CDS API
import cdsapi
```
%% Cell type:markdown id: tags:
Directories for Data
%% Cell type:code id: tags:
```
DATADIR = 'era5/'
LABELDIR = 'fire_danger/'
```
%% Cell type:markdown id: tags:
Download ERA5 data for Argentina from 2002 to 2022
%% Cell type:code id: tags:
```
for year in range(2002,2023):
target_file = DATADIR + f'{year}.nc'
c = cdsapi.Client()
c.retrieve(
'reanalysis-era5-single-levels',
{
'product_type': 'reanalysis',
'format': 'netcdf',
'month': [
'01','02','12'
],
'day': [
'01', '02', '03',
'04', '05', '06',
'07', '08', '09',
'10', '11', '12',
'13', '14', '15',
'16', '17', '18',
'19', '20', '21',
'22', '23', '24',
'25', '26', '27',
'28', '29', '30',
'31',
],
'time': [
'15:00',
],
'variable': [
'10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature',
'leaf_area_index_high_vegetation', 'leaf_area_index_low_vegetation', 'total_precipitation',
],
'year': [str(year)],
'area': [
-20, -79, -57, # North West South East
-43,
],
},
target_file)
```
%% Cell type:markdown id: tags:
Download Fire Danger Index
%% Cell type:code id: tags:
```
for year in range(2002,2023):
target_file = LABELDIR + f'{year}.nc'
c = cdsapi.Client()
c.retrieve(
'cems-fire-historical-v1',
{
'product_type': 'reanalysis',
'variable': 'fire_danger_index',
'dataset_type': 'consolidated_dataset',
'system_version': '4_1',
'year': '2002',
'month': [
'01', '02', '12',
],
'day': [
'01', '02', '03',
'04', '05', '06',
'07', '08', '09',
'10', '11', '12',
'13', '14', '15',
'16', '17', '18',
'19', '20', '21',
'22', '23', '24',
'25', '26', '27',
'28', '29', '30',
'31',
],
'area': [
-20, -79, -57,
-43,
],
'grid': '0.25/0.25',
'format': 'netcdf',
},
target_file)
```
%% Output
2023-07-28 12:15:49,638 INFO Welcome to the CDS
2023-07-28 12:15:49,639 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/cems-fire-historical-v1
2023-07-28 12:15:49,854 INFO Request is completed
2023-07-28 12:15:49,855 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.external-1690558130.304354-9896-9-762d05cd-253d-467d-99ae-08ef92f88bbd.nc to fire_danger/2019.nc (7.4M)
2023-07-28 12:15:52,491 INFO Download rate 2.8M/s
2023-07-28 12:15:53,197 INFO Welcome to the CDS
2023-07-28 12:15:53,199 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/cems-fire-historical-v1
2023-07-28 12:15:53,439 INFO Request is completed
2023-07-28 12:15:53,440 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.external-1690558130.304354-9896-9-762d05cd-253d-467d-99ae-08ef92f88bbd.nc to fire_danger/2020.nc (7.4M)
2023-07-28 12:15:54,590 INFO Download rate 6.5M/s
2023-07-28 12:15:54,976 INFO Welcome to the CDS
2023-07-28 12:15:54,977 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/cems-fire-historical-v1
2023-07-28 12:15:55,220 INFO Request is completed
2023-07-28 12:15:55,221 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.external-1690558130.304354-9896-9-762d05cd-253d-467d-99ae-08ef92f88bbd.nc to fire_danger/2021.nc (7.4M)
2023-07-28 12:15:56,690 INFO Download rate 5.1M/s
2023-07-28 12:15:57,082 INFO Welcome to the CDS
2023-07-28 12:15:57,083 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/cems-fire-historical-v1
2023-07-28 12:15:57,301 INFO Request is completed
2023-07-28 12:15:57,302 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data0/adaptor.mars.external-1690558130.304354-9896-9-762d05cd-253d-467d-99ae-08ef92f88bbd.nc to fire_danger/2022.nc (7.4M)
2023-07-28 12:15:58,812 INFO Download rate 4.9M/s
%% Cell type:code id: tags:
``` python
DATADIR = 'era5/' # directory containing downloaded era5 data
FIREDATADIR = 'fire_danger/' # directory containing fire data
DESTDIR = 'processed_era5/' # directory to save .npy files for each time step and variable
FIREDESTDIR = 'processed_fire_data/' # directory to save .npy files for each time step and variable for fire data
```
%% Cell type:code id: tags:
``` python
import numpy as np
import netCDF4 as nc
import os
from tqdm.notebook import tqdm
```
%% Cell type:code id: tags:
``` python
vars = ['u10','v10','t2m','lai_hv','lai_lv','tp'] #considered variables (see 0_download_data.ipynb for long names)
months = [(1,31),(2,28),(12,31)] # months + days in month in dowloaded era5 .nc files
years = np.arange(2002,2023) # downloaded years
fire_vars = ['fdimrk'] # fire data variables
```
%% Cell type:code id: tags:
``` python
#root = nc.Dataset(FIREDATADIR + f"2002.nc", 'r')
#root['fdimrk']
#root.close()
```
%% Cell type:code id: tags:
``` python
# Processing ERA 5 data
for var in vars:
if not os.path.exists(DESTDIR + f"{var}"):
os.makedirs(DESTDIR + f"{var}")
for year in tqdm(years):
root = nc.Dataset(DATADIR + f"{year:d}.nc", 'r')
v = root.variables[var][:,:-9,:-5] #crop to get to a size suitable for the considered Unet-like model, here 140x140
v = v.data
root.close()
if var in ['tp']: #change unit from m to mm for precipitation
v = 1000 * v
t = 0 # time step within v array that I am working on
for month, days in months:
for day in range(days):
np.save(DESTDIR + f"{var}/{year}_{month:02d}_{day+1:02d}.npy",v[t])
t += 1
```
%% Output
%% Cell type:code id: tags:
``` python
# Processing Fire data
for var in fire_vars:
if not os.path.exists(FIREDESTDIR + f"{var}"):
os.makedirs(FIREDESTDIR + f"{var}")
for year in tqdm(years):
root = nc.Dataset(FIREDATADIR + f"{year:d}.nc", 'r')
v = root.variables[var][:,:-9,:-5] #crop to get to a size suitable for the considered Unet-like model, here 140x140
v = v.data
root.close()
t = 0 # time step within v array that I am working on
for month, days in months:
for day in range(days):
np.save(FIREDESTDIR + f"{var}/{year}_{month:02d}_{day+1:02d}.npy",v[t])
t += 1
```
%% Output
%% Cell type:code id: tags:
``` python
root = nc.Dataset(DATADIR + f"2002.nc", 'r') #constant in time -> take from any year
lat = root.variables['latitude'][:-9].data #crop to get to a size suitable for the considered Unet-like model
lon = root.variables['longitude'][:-5].data
np.save(DESTDIR + 'lat.npy', lat)
np.save(DESTDIR + 'lon.npy', lon)
```
%% Cell type:code id: tags:
``` python
DATADIR = 'processed_era5/'
FIREDATADIR = 'processed_fire_data/'
```
%% Cell type:code id: tags:
``` python
import numpy as np
from tqdm.notebook import tqdm
import os, gc
```
%% Cell type:code id: tags:
``` python
vars = ['u10','v10','t2m','lai_hv','lai_lv','tp']
target_vars = ['fdimrk']
months = [1,2,12]
val, test = [2015,2018], [2019,2022] #years to use for validation and testing, do not use these years to compute normalization constants
train = [x for x in np.arange(2002,2015) if x not in val+test]
```
%% Cell type:code id: tags:
``` python
for var in vars:
tmp = [] #save values from all time steps and locations into this list and then compute mean and std
files = sorted(os.listdir(DATADIR+var)) #SHOULD NOT CONTAIN ANY OTHER FILES THAN THOSE CREATED IN 1_prepareData.ipynb
for f in tqdm(files):
y,m,d=f.split('_')
if int(y) in train and int(m) in months:
t_array = np.load(DATADIR+var+'/'+f)
tmp += list(t_array.flatten())
mean, std = np.mean(tmp), np.std(tmp)
print(f'Mean {mean}, std {std}')
np.save(f'norm_consts/input_{var}.npy',np.array([mean, std]))
del tmp
gc.collect()
```
%% Cell type:code id: tags:
``` python
for var in target_vars:
tmp = [] #save values from all time steps and locations into this list and then compute mean and std
files = sorted(os.listdir(FIREDATADIR+var)) #SHOULD NOT CONTAIN ANY OTHER FILES THAN THOSE CREATED IN 1_prepareData.ipynb
for f in tqdm(files):
y,m,d=f.split('_')
if int(y) in train and int(m) in months:
t_array = np.load(FIREDATADIR+var+'/'+f)
tmp += list(t_array.flatten())
mean, std = np.mean(tmp), np.std(tmp)
print(f'Mean {mean}, std {std}')
np.save(f'norm_consts/target_{var}.npy',np.array([mean, std]))
del tmp
gc.collect()
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment