Skip to content
Snippets Groups Projects
Commit 84b93577 authored by Alexander Martínez Méndez's avatar Alexander Martínez Méndez
Browse files

Update Upload_dataverse.ipynb, Upload_dataverse_automatic.ipynb

parent e390396b
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id:ff1fd295 tags:
### Script for upload data from LiMoNet
%% Cell type:code id:5402e405 tags:
``` python
from dataverse import Connection
import numpy as np
import sys
import os
import dataverse
from lxml import etree
import json
import glob
%matplotlib inline
sys.getdefaultencoding()
```
%% Output
'utf-8'
%% Cell type:code id:fbc872a5 tags:
``` python
# Token from repository
%env API_TOKEN=0a1616ce-fbe8-44f7-955f-d095f1061617
```
%% Output
env: API_TOKEN=0a1616ce-fbe8-44f7-955f-d095f1061617
%% Cell type:code id:bf5ecb80 tags:
``` python
API_TOKEN = os.environ['API_TOKEN']
host = 'dataverse.redclara.net' # All clients >4.0 are supported
# Conexión a repositorio
connection = Connection(host, API_TOKEN)
# Selección de dataverse a user
dataverse_id = connection.get_dataverse('limonet') # Dataverse id
```
%% Cell type:code id:9e50bd8b tags:
``` python
# Metadata
# https://docs.python.org/3/library/xml.etree.elementtree.html
# https://www.tutorialspoint.com/python3/python_xml_processing.htm
# https://lxml.de/2.0/parsing.html
# https://github.com/IQSS/dataverse-client-python
description = 'This repository contains lightning data files recorded by LiMoNet at Bucaramanga, Colombia.'
creator = 'Peña, Jesús'
```
%% Cell type:code id:1bc2b3ec tags:
``` python
# Create dataset
dataset_id = dataverse.Dataverse.create_dataset(dataverse_id, 'LM_2021_04_05', description, creator)
```
%% Output
---------------------------------------------------------------------------
OperationFailedError Traceback (most recent call last)
<ipython-input-292-6691b1c12b86> in <module>
1 # Create dataset
2
----> 3 dataset_id = dataverse.Dataverse.create_dataset(dataverse_id, 'LM_2021_04_05', description, creator)
~/src/dataverse/dataverse/dataverse.py in create_dataset(self, title, description, creator, **kwargs)
98 )
99
--> 100 self._add_dataset(dataset)
101 return dataset
102
~/src/dataverse/dataverse/dataverse.py in _add_dataset(self, dataset)
111
112 if resp.status_code != 201:
--> 113 raise OperationFailedError('This dataset could not be added.')
114
115 dataset.dataverse = self
OperationFailedError: This dataset could not be added.
%% Cell type:markdown id:527fd60d tags:
Los campos del archivo .json tienen palabras claves que se pueden encontrar aquí:
https://guides.dataverse.org/en/4.18.1/_downloads/dataset-create-new-all-default-fields.json
%% Cell type:code id:41773832 tags:
``` python
# Modify metadata fields: title and dates
date = '2021-10-28'
title = 'LM_2021_04_01'
with open("metadata_limonet.json", 'r') as f:
json_data = json.load(f)
json_data['metadataBlocks']['citation']['fields'][3]['value'][0]['dsDescriptionDate']['value']= date
json_data['metadataBlocks']['citation']['fields'][8]['value']= date
json_data['metadataBlocks']['citation']['fields'][0]['value']= title
with open('metadata_limonet.json', 'w') as f:
json.dump(json_data, f, indent = 2)
```
%% Cell type:code id:398d4deb tags:
``` python
metadata_file = open("metadata_limonet.json",)
# returns JSON object as a dictionary
metadata = json.load(metadata_file)
```
%% Cell type:code id:84b4fd7e tags:
``` python
# Get metadata
dataset_id.update_metadata(metadata)
```
%% Cell type:code id:33845dfe tags:
``` python
# Upload data
# ej: dataset_id.upload_filepath('Lightning/Lighting_2021_04_01_18_52.dat')
files = glob.glob("Lightning/Lighting_2021_04_01*.dat")
M = len(files)
for i in range(M):
print (files[i])
dataset_id.upload_filepath(files[i])
print ('Data uploaded')
```
%% Output
Lightning/Lighting_2021_04_01_20_27.dat
Lightning/Lighting_2021_04_01_19_41.dat
Lightning/Lighting_2021_04_01_19_37.dat
Lightning/Lighting_2021_04_01_19_23.dat
Lightning/Lighting_2021_04_01_18_52.dat
Lightning/Lighting_2021_04_01_19_31.dat
Lightning/Lighting_2021_04_01_19_44.dat
Data uploaded
%% Cell type:code id:891dfa50 tags:
``` python
# tree = etree.parse("metadata_limonet.xml")
# xslt_root = etree.parse("xml2json.xslt")
# transform = etree.XSLT(xslt_root)
# result = transform(tree)
# json_load = json.loads(str(result))
# json_dump = json.dumps(json_load, indent=2)
# print(json_dump)
```
%% Cell type:code id:68e76018 tags:
``` python
# print file metadata
dataset_id.get_metadata()
```
%% Cell type:markdown id:ff1fd295 tags:
# Script for automatically uploading LiMoNet data to a Dataverse repository
%% Cell type:markdown id:750b30d8 tags:
This script uploads data collected by the LiMoNet (Lightning Monitoring Network) to a Dataverse repository. The code firstly load the python packages needed for the connection to dataverse, load metadata from a **.json** file and search data files in a folder. We define some functions: **create_dataset**, **modify_metadata**, **load_metadata** and **upload_data**. For more information, some references are listed along the script.
Author: J. Peña-Rodríguez
2021
%% Cell type:code id:5402e405 tags:
``` python
from dataverse import Connection
import numpy as np
import sys
import os
import dataverse
from lxml import etree
import json
import glob
import datetime
%matplotlib inline
sys.getdefaultencoding()
```
%% Output
'utf-8'
%% Cell type:code id:de9b2c88 tags:
``` python
def progressbar(it, prefix="", size=60, file=sys.stdout):
# Progress bar animation
count = len(it)
def show(j):
x = int(size*j/count)
file.write("%s[%s%s] %i/%i\r" % (prefix, "#"*x, "."*(size-x), j, count))
file.flush()
show(0)
for i, item in enumerate(it):
yield item
show(i+1)
file.write("\n")
file.flush()
```
%% Cell type:code id:9e50bd8b tags:
``` python
def create_dataset(dataset_name):
# Metadata
# https://docs.python.org/3/library/xml.etree.elementtree.html
# https://www.tutorialspoint.com/python3/python_xml_processing.htm
# https://lxml.de/2.0/parsing.html
# https://github.com/IQSS/dataverse-client-python
description = 'This repository contains lightning data files recorded by LiMoNet at Bucaramanga, Colombia.'
creator = 'Peña, Jesús'
# Create dataset
dataset_id = dataverse.Dataverse.create_dataset(dataverse_id, dataset_name, description, creator)
return dataset_id
```
%% Cell type:markdown id:5c527720 tags:
Los campos del archivo .json tienen palabras claves que se pueden encontrar aquí:
https://guides.dataverse.org/en/4.18.1/_downloads/dataset-create-new-all-default-fields.json
%% Cell type:code id:83d72718 tags:
``` python
def modify_metadata(dataset_name, date):
# Modify the metadata file metadata_limonet.json
# Modified metadata fields: title and dates
# All the fields can be midified depending on your necessity
date = date
title = dataset_name
with open("metadata_limonet.json", 'r') as f:
json_data = json.load(f)
json_data['metadataBlocks']['citation']['fields'][3]['value'][0]['dsDescriptionDate']['value']= date
json_data['metadataBlocks']['citation']['fields'][8]['value']= date
json_data['metadataBlocks']['citation']['fields'][0]['value']= title
with open('metadata_limonet.json', 'w') as f:
json.dump(json_data, f, indent = 2)
```
%% Cell type:code id:98f30841 tags:
``` python
def load_metadata(dataset_id):
# Update the repository metadata
metadata_file = open("metadata_limonet.json",)
# Returns JSON object as a dictionary
metadata = json.load(metadata_file)
# Get metadata
dataset_id.update_metadata(metadata)
```
%% Cell type:code id:3e2ffe24 tags:
``` python
def upload_data(dataset_id, day):
# Upload data
# ej: dataset_id.upload_filepath('Lightning/Lighting_2021_04_01_18_52.dat')
files = sorted(glob.glob("Lightning/Lighting_" + day + "*.dat")) # Sort datafiles
M = len(files)
for i in progressbar(range(M), "Uploading: ", 50):
dataset_id.upload_filepath(files[i])
print ('\nData uploaded\n')
```
%% Cell type:markdown id:16a19fd8 tags:
## Upload data
%% Cell type:code id:283b362e tags:
``` python
# Token from repository
%env API_TOKEN=0a1616ce-fbe8-44f7-955f-d095f1061617
```
%% Output
env: API_TOKEN=0a1616ce-fbe8-44f7-955f-d095f1061617
%% Cell type:code id:8d8bc25f tags:
``` python
API_TOKEN = os.environ['API_TOKEN']
host = 'dataverse.redclara.net' # All clients >4.0 are supported
# Conexión a repositorio
connection = Connection(host, API_TOKEN)
# Selección de dataverse a user
dataverse_id = connection.get_dataverse('limonet') # Dataverse id
```
%% Cell type:code id:a437ef59 tags:
``` python
year = '2021'
month = '11'
now = datetime.datetime.now()
upload_date = ("%s-%s-%s" % (now.year, now.month, now.day))
for i in range(11,17):
day = str(i).zfill(2)
file_date = ("%s_%s_%s" % (year, month, day))
file_name = ("Lightning/Lighting_%s*.dat" % file_date)
files = glob.glob(file_name)
M = len(files)
if files != []: # Check files existence
dataset_name = ("LM_%s" % file_date)
print ("Files: %s Dataset: %s Date: %s" % (file_name, dataset_name, upload_date))
dataset_id = create_dataset(dataset_name)
modify_metadata(dataset_name, upload_date)
load_metadata(dataset_id)
upload_data(dataset_id, file_date)
```
%% Output
Files: Lightning/Lighting_2021_11_11*.dat Dataset: LM_2021_11_11 Date: 2021-11-17
Uploading: [##################################################] 1/1
Data uploaded
Files: Lightning/Lighting_2021_11_13*.dat Dataset: LM_2021_11_13 Date: 2021-11-17
Uploading: [##################################################] 19/19
Data uploaded
Files: Lightning/Lighting_2021_11_14*.dat Dataset: LM_2021_11_14 Date: 2021-11-17
Uploading: [##################################################] 28/28
Data uploaded
Files: Lightning/Lighting_2021_11_15*.dat Dataset: LM_2021_11_15 Date: 2021-11-17
Uploading: [##################################################] 45/45
Data uploaded
Files: Lightning/Lighting_2021_11_16*.dat Dataset: LM_2021_11_16 Date: 2021-11-17
Uploading: [##################################################] 1/1
Data uploaded
%% Cell type:code id:53fa4cfc tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment