diff --git a/Upload_dataverse.ipynb b/Upload_dataverse.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..ad4d37fda7f87cc3137ab5bb456137ee3b3b7a91 --- /dev/null +++ b/Upload_dataverse.ipynb @@ -0,0 +1,263 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ff1fd295", + "metadata": {}, + "source": [ + "### Script for upload data from LiMoNet" + ] + }, + { + "cell_type": "code", + "execution_count": 288, + "id": "5402e405", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'utf-8'" + ] + }, + "execution_count": 288, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dataverse import Connection\n", + "import numpy as np\n", + "import sys\n", + "import os\n", + "import dataverse\n", + "from lxml import etree\n", + "import json\n", + "import glob\n", + "\n", + "%matplotlib inline\n", + "sys.getdefaultencoding()" + ] + }, + { + "cell_type": "code", + "execution_count": 289, + "id": "fbc872a5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: API_TOKEN=0a1616ce-fbe8-44f7-955f-d095f1061617\n" + ] + } + ], + "source": [ + "# Token from repository\n", + "\n", + "%env API_TOKEN=0a1616ce-fbe8-44f7-955f-d095f1061617 " + ] + }, + { + "cell_type": "code", + "execution_count": 290, + "id": "bf5ecb80", + "metadata": {}, + "outputs": [], + "source": [ + "API_TOKEN = os.environ['API_TOKEN']\n", + "host = 'dataverse.redclara.net' # All clients >4.0 are supported\n", + "# Conexión a repositorio\n", + "connection = Connection(host, API_TOKEN)\n", + "# Selección de dataverse a user\n", + "dataverse_id = connection.get_dataverse('limonet') # Dataverse id" + ] + }, + { + "cell_type": "code", + "execution_count": 291, + "id": "9e50bd8b", + "metadata": {}, + "outputs": [], + "source": [ + "# Metadata\n", + "# https://docs.python.org/3/library/xml.etree.elementtree.html\n", + "# https://www.tutorialspoint.com/python3/python_xml_processing.htm\n", + "# https://lxml.de/2.0/parsing.html\n", + "# https://github.com/IQSS/dataverse-client-python\n", + "\n", + "description = 'This repository contains lightning data files recorded by LiMoNet at Bucaramanga, Colombia.'\n", + "creator = 'Peña, Jesús'" + ] + }, + { + "cell_type": "code", + "execution_count": 292, + "id": "1bc2b3ec", + "metadata": {}, + "outputs": [ + { + "ename": "OperationFailedError", + "evalue": "This dataset could not be added.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mOperationFailedError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-292-6691b1c12b86>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Create dataset\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdataset_id\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataverse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataverse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataverse_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'LM_2021_04_05'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdescription\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/src/dataverse/dataverse/dataverse.py\u001b[0m in \u001b[0;36mcreate_dataset\u001b[0;34m(self, title, description, creator, **kwargs)\u001b[0m\n\u001b[1;32m 98\u001b[0m )\n\u001b[1;32m 99\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 100\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_add_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 101\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/dataverse/dataverse/dataverse.py\u001b[0m in \u001b[0;36m_add_dataset\u001b[0;34m(self, dataset)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus_code\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m201\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 113\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mOperationFailedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'This dataset could not be added.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 114\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataverse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mOperationFailedError\u001b[0m: This dataset could not be added." + ] + } + ], + "source": [ + "# Create dataset\n", + "\n", + "dataset_id = dataverse.Dataverse.create_dataset(dataverse_id, 'LM_2021_04_05', description, creator)" + ] + }, + { + "cell_type": "markdown", + "id": "527fd60d", + "metadata": {}, + "source": [ + "Los campos del archivo .json tienen palabras claves que se pueden encontrar aquÃ:\n", + "\n", + "https://guides.dataverse.org/en/4.18.1/_downloads/dataset-create-new-all-default-fields.json\n" + ] + }, + { + "cell_type": "code", + "execution_count": 273, + "id": "41773832", + "metadata": {}, + "outputs": [], + "source": [ + "# Modify metadata fields: title and dates\n", + "\n", + "date = '2021-10-28'\n", + "title = 'LM_2021_04_01'\n", + "\n", + "with open(\"metadata_limonet.json\", 'r') as f:\n", + " json_data = json.load(f)\n", + " json_data['metadataBlocks']['citation']['fields'][3]['value'][0]['dsDescriptionDate']['value']= date\n", + " json_data['metadataBlocks']['citation']['fields'][8]['value']= date\n", + " json_data['metadataBlocks']['citation']['fields'][0]['value']= title\n", + " \n", + "with open('metadata_limonet.json', 'w') as f:\n", + " json.dump(json_data, f, indent = 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 274, + "id": "398d4deb", + "metadata": {}, + "outputs": [], + "source": [ + "metadata_file = open(\"metadata_limonet.json\",)\n", + " \n", + "# returns JSON object as a dictionary\n", + "metadata = json.load(metadata_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 275, + "id": "84b4fd7e", + "metadata": {}, + "outputs": [], + "source": [ + "# Get metadata\n", + "dataset_id.update_metadata(metadata)" + ] + }, + { + "cell_type": "code", + "execution_count": 280, + "id": "33845dfe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Lightning/Lighting_2021_04_01_20_27.dat\n", + "Lightning/Lighting_2021_04_01_19_41.dat\n", + "Lightning/Lighting_2021_04_01_19_37.dat\n", + "Lightning/Lighting_2021_04_01_19_23.dat\n", + "Lightning/Lighting_2021_04_01_18_52.dat\n", + "Lightning/Lighting_2021_04_01_19_31.dat\n", + "Lightning/Lighting_2021_04_01_19_44.dat\n", + "Data uploaded\n" + ] + } + ], + "source": [ + "# Upload data\n", + "# ej: dataset_id.upload_filepath('Lightning/Lighting_2021_04_01_18_52.dat')\n", + "\n", + "files = glob.glob(\"Lightning/Lighting_2021_04_01*.dat\")\n", + "M = len(files)\n", + "\n", + "for i in range(M):\n", + " \n", + " print (files[i])\n", + " dataset_id.upload_filepath(files[i])\n", + " \n", + "print ('Data uploaded')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "891dfa50", + "metadata": {}, + "outputs": [], + "source": [ + "# tree = etree.parse(\"metadata_limonet.xml\")\n", + "# xslt_root = etree.parse(\"xml2json.xslt\")\n", + "# transform = etree.XSLT(xslt_root)\n", + "\n", + "# result = transform(tree)\n", + "# json_load = json.loads(str(result))\n", + "# json_dump = json.dumps(json_load, indent=2)\n", + "\n", + "# print(json_dump)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68e76018", + "metadata": {}, + "outputs": [], + "source": [ + "# print file metadata\n", + "dataset_id.get_metadata()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Upload_dataverse_automatic.ipynb b/Upload_dataverse_automatic.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..c6cdc6946a8e814d0b52ab2bc57afeb21b6eea51 --- /dev/null +++ b/Upload_dataverse_automatic.ipynb @@ -0,0 +1,316 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ff1fd295", + "metadata": {}, + "source": [ + "# Script for automatically uploading LiMoNet data to a Dataverse repository" + ] + }, + { + "cell_type": "markdown", + "id": "750b30d8", + "metadata": {}, + "source": [ + "This script uploads data collected by the LiMoNet (Lightning Monitoring Network) to a Dataverse repository. The code firstly load the python packages needed for the connection to dataverse, load metadata from a **.json** file and search data files in a folder. We define some functions: **create_dataset**, **modify_metadata**, **load_metadata** and **upload_data**. For more information, some references are listed along the script.\n", + "\n", + "Author: J. Peña-RodrÃguez\n", + "2021" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "5402e405", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'utf-8'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dataverse import Connection\n", + "import numpy as np\n", + "import sys\n", + "import os\n", + "import dataverse\n", + "from lxml import etree\n", + "import json\n", + "import glob\n", + "import datetime\n", + "\n", + "%matplotlib inline\n", + "sys.getdefaultencoding()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "de9b2c88", + "metadata": {}, + "outputs": [], + "source": [ + "def progressbar(it, prefix=\"\", size=60, file=sys.stdout):\n", + " \n", + " # Progress bar animation\n", + " count = len(it)\n", + " def show(j):\n", + " x = int(size*j/count)\n", + " file.write(\"%s[%s%s] %i/%i\\r\" % (prefix, \"#\"*x, \".\"*(size-x), j, count))\n", + " file.flush() \n", + " show(0)\n", + " for i, item in enumerate(it):\n", + " yield item\n", + " show(i+1)\n", + " file.write(\"\\n\")\n", + " file.flush()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9e50bd8b", + "metadata": {}, + "outputs": [], + "source": [ + "def create_dataset(dataset_name):\n", + " \n", + " # Metadata\n", + " # https://docs.python.org/3/library/xml.etree.elementtree.html\n", + " # https://www.tutorialspoint.com/python3/python_xml_processing.htm\n", + " # https://lxml.de/2.0/parsing.html\n", + " # https://github.com/IQSS/dataverse-client-python\n", + "\n", + " description = 'This repository contains lightning data files recorded by LiMoNet at Bucaramanga, Colombia.'\n", + " creator = 'Peña, Jesús'\n", + " \n", + " # Create dataset\n", + "\n", + " dataset_id = dataverse.Dataverse.create_dataset(dataverse_id, dataset_name, description, creator)\n", + " return dataset_id" + ] + }, + { + "cell_type": "markdown", + "id": "5c527720", + "metadata": {}, + "source": [ + "Los campos del archivo .json tienen palabras claves que se pueden encontrar aquÃ:\n", + "\n", + "https://guides.dataverse.org/en/4.18.1/_downloads/dataset-create-new-all-default-fields.json\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "83d72718", + "metadata": {}, + "outputs": [], + "source": [ + "def modify_metadata(dataset_name, date):\n", + " \n", + " # Modify the metadata file metadata_limonet.json\n", + " # Modified metadata fields: title and dates\n", + " # All the fields can be midified depending on your necessity\n", + "\n", + " date = date\n", + " title = dataset_name\n", + "\n", + " with open(\"metadata_limonet.json\", 'r') as f:\n", + " json_data = json.load(f)\n", + " json_data['metadataBlocks']['citation']['fields'][3]['value'][0]['dsDescriptionDate']['value']= date\n", + " json_data['metadataBlocks']['citation']['fields'][8]['value']= date\n", + " json_data['metadataBlocks']['citation']['fields'][0]['value']= title\n", + "\n", + " with open('metadata_limonet.json', 'w') as f:\n", + " json.dump(json_data, f, indent = 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "98f30841", + "metadata": {}, + "outputs": [], + "source": [ + "def load_metadata(dataset_id):\n", + "\n", + " # Update the repository metadata\n", + " \n", + " metadata_file = open(\"metadata_limonet.json\",)\n", + "\n", + " # Returns JSON object as a dictionary\n", + " metadata = json.load(metadata_file)\n", + " # Get metadata\n", + " dataset_id.update_metadata(metadata)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3e2ffe24", + "metadata": {}, + "outputs": [], + "source": [ + "def upload_data(dataset_id, day):\n", + " \n", + " # Upload data\n", + " # ej: dataset_id.upload_filepath('Lightning/Lighting_2021_04_01_18_52.dat')\n", + "\n", + " files = sorted(glob.glob(\"Lightning/Lighting_\" + day + \"*.dat\")) # Sort datafiles\n", + " M = len(files)\n", + "\n", + " for i in progressbar(range(M), \"Uploading: \", 50):\n", + "\n", + " dataset_id.upload_filepath(files[i])\n", + "\n", + " print ('\\nData uploaded\\n')" + ] + }, + { + "cell_type": "markdown", + "id": "16a19fd8", + "metadata": {}, + "source": [ + "## Upload data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "283b362e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: API_TOKEN=0a1616ce-fbe8-44f7-955f-d095f1061617\n" + ] + } + ], + "source": [ + "# Token from repository\n", + "\n", + "%env API_TOKEN=0a1616ce-fbe8-44f7-955f-d095f1061617 " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8d8bc25f", + "metadata": {}, + "outputs": [], + "source": [ + "API_TOKEN = os.environ['API_TOKEN']\n", + "host = 'dataverse.redclara.net' # All clients >4.0 are supported\n", + "# Conexión a repositorio\n", + "connection = Connection(host, API_TOKEN)\n", + "# Selección de dataverse a user\n", + "dataverse_id = connection.get_dataverse('limonet') # Dataverse id" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a437ef59", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Files: Lightning/Lighting_2021_11_11*.dat Dataset: LM_2021_11_11 Date: 2021-11-17\n", + "Uploading: [##################################################] 1/1\n", + "\n", + "Data uploaded\n", + "\n", + "Files: Lightning/Lighting_2021_11_13*.dat Dataset: LM_2021_11_13 Date: 2021-11-17\n", + "Uploading: [##################################################] 19/19\n", + "\n", + "Data uploaded\n", + "\n", + "Files: Lightning/Lighting_2021_11_14*.dat Dataset: LM_2021_11_14 Date: 2021-11-17\n", + "Uploading: [##################################################] 28/28\n", + "\n", + "Data uploaded\n", + "\n", + "Files: Lightning/Lighting_2021_11_15*.dat Dataset: LM_2021_11_15 Date: 2021-11-17\n", + "Uploading: [##################################################] 45/45\n", + "\n", + "Data uploaded\n", + "\n", + "Files: Lightning/Lighting_2021_11_16*.dat Dataset: LM_2021_11_16 Date: 2021-11-17\n", + "Uploading: [##################################################] 1/1\n", + "\n", + "Data uploaded\n", + "\n" + ] + } + ], + "source": [ + "year = '2021'\n", + "month = '11'\n", + "\n", + "now = datetime.datetime.now()\n", + "upload_date = (\"%s-%s-%s\" % (now.year, now.month, now.day))\n", + "\n", + "for i in range(11,17):\n", + " day = str(i).zfill(2)\n", + " file_date = (\"%s_%s_%s\" % (year, month, day))\n", + " file_name = (\"Lightning/Lighting_%s*.dat\" % file_date)\n", + " \n", + "\n", + " files = glob.glob(file_name)\n", + " M = len(files)\n", + "\n", + " if files != []: # Check files existence\n", + " \n", + " dataset_name = (\"LM_%s\" % file_date)\n", + " print (\"Files: %s Dataset: %s Date: %s\" % (file_name, dataset_name, upload_date))\n", + " \n", + " dataset_id = create_dataset(dataset_name)\n", + " modify_metadata(dataset_name, upload_date)\n", + " load_metadata(dataset_id)\n", + " upload_data(dataset_id, file_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53fa4cfc", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}