diff --git a/.ipynb_checkpoints/ejercicio3-checkpoint.ipynb b/.ipynb_checkpoints/ejercicio3-checkpoint.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..61b3bf0036bdd49079aa93d539a5f87eaacba104 --- /dev/null +++ b/.ipynb_checkpoints/ejercicio3-checkpoint.ipynb @@ -0,0 +1,386 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "qC9XbgbBzYgV" + }, + "source": [ + "# Ejercicios-clase-02-datos" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CEiGCo_FzQp7" + }, + "source": [ + "***Siria Sadeddin*** " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nhSYUDX3fNYu" + }, + "source": [ + "# Ejercicio No. 3 - Amigos congueros\n", + "\n", + "* Entre en contacto con 10 estudiantes del curso de datos y 2 profesores o personal de soporte\n", + "de LaConga, uno del curso de datos y otro de afuera, y consulte su nombre completo, su\n", + "nombre de usuario en mattermost, edad, pais de origen, ciudad donde residen, su especialidad\n", + "cientÃfica, nombre del instituto en que estudian/laboran, y un hobbie o afición.\n", + "* Cree un diccionario llamado “compasâ€, donde la llave sea el nombre de usuario en mattermost,\n", + "y si depliego el valor almacenado, por ejemplo en compas[“juan-pinedaâ€], lo que obtengo es\n", + "a la vez otro diccionario, con las llaves “nombreâ€, “apellidoâ€, “paÃsâ€, “residenciaâ€, “edadâ€,\n", + "“instituciónâ€, “hobbieâ€." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "wxxnPu9Hei_w" + }, + "outputs": [], + "source": [ + "#UNCOMENT TO INSTALL UNIDECODE\n", + "#!pip install unidecode \n", + "\n", + "## HE CREADO UNA FUNCION QUE TOMA UN DICCIONARIO DE VALORES Y LO AGREGA AL DICIONARIO \"COMPAS\" DE ACUERDO A UNA LLAVE \"USUARIO\"\n", + "## PARA ASEGURAR QUE LOS DATOS SEAN CONSISTENTES SE HA HECHO UNA NORMALIZACION DE TEXTOS DE ENTRADA, USANDO LA LIBRERIA UNIDECODE, \n", + "## ESTO ELIMINA LAS MAYUSCULAS Y LOS ACENTOS EN EL TEXTO DE ENTRADA, ASEGURANDO QUE SE PUEDA HACER UN ANALISIS DE DATOS POSTERIOR \n", + "\n", + "\n", + "## FUNCION QUE AGREGA MAS DATOS AL DICCIONARIO \"COMPAS\", ESTA FUNCION VERIFICA QUE TODOS LOS CAMPOS REQUERIDOS ESTEN EN LA LISTA DE ENTRADA Y QUE SEAN VALIDOS\n", + "def add_values_in_dict(compas, key, values):\n", + " \"\"\"adds new item to compas dictionary \n", + "\n", + " Args:\n", + " arg[0]: compas old dictionary\n", + " arg[1]: new item (single) \n", + " arg[2]: new item's information\n", + "\n", + " Returns:\n", + " dictionary: new dictionary of compas\n", + " \"\"\"\n", + "\n", + " import unidecode\n", + " class bcolors:\n", + " FAIL = '\\033[91m'\n", + " ENDC = '\\033[0m'\n", + "\n", + " valid_items=['nombres', 'apellidos', 'edad', 'pais','residencia', 'especialidad cientifica', 'institucion', 'hobbie']\n", + " \n", + " values=dict((k.lower(), v.lower()) for k,v in values.items())\n", + " \n", + " values=dict((unidecode.unidecode(k), unidecode.unidecode(v)) for k,v in values.items())\n", + " \n", + " if all([k in valid_items for k in values.keys()]) and all([k in values.keys() for k in valid_items]):\n", + " \n", + " if key not in compas:\n", + " compas[key]=dict()\n", + " compas[key]=dict(values)\n", + " return compas\n", + " \n", + " else:\n", + " print(f\"{bcolors.FAIL}FALLO: el usuario {key} ya fue registrado{bcolors.ENDC}\")\n", + " \n", + " else: \n", + " if not all([k in values.keys() for k in valid_items]):\n", + " if len([k for k in valid_items if k not in values.keys()])==1:\n", + " print(f\"{bcolors.FAIL}FALLO: el valor {[k for k in valid_items if k not in values.keys()]} es requerido{bcolors.ENDC}\")\n", + " else:\n", + " print(f\"{bcolors.FAIL}FALLO: los valores {[k for k in valid_items if k not in values.keys()]} son requeridos{bcolors.ENDC}\")\n", + "\n", + " if not all([k in valid_items for k in values.keys()]):\n", + " if len([ k for k,v in values.items() if k not in valid_items])==1:\n", + " print(f\"{bcolors.FAIL}FALLO: el valor {[ k for k,v in values.items() if k not in valid_items]} no es valido{bcolors.ENDC}\")\n", + " else:\n", + " print(f\"{bcolors.FAIL}FALLO: los valores {[ k for k,v in values.items() if k not in valid_items]} no son validos{bcolors.ENDC}\")\n", + "\n", + "compas=dict({'@sadeddins':\n", + " dict({'nombres':'siria',\n", + " 'apellidos':'Saddedin',\n", + " 'edad':'30',\n", + " 'pais':'Venezuela',\n", + " 'residencia':'Colombia',\n", + " 'especialidad cientifica':'data science',\n", + " 'institucion':'Universidad Simon Bolivar',\n", + " 'hobbie':'Ciencia de datos'\n", + " })\n", + " })" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "aYCzQ1cRUwfR" + }, + "outputs": [], + "source": [ + "## crearemos una funcion que acepte tanto la inclusion de usuarios individuales como de archivos de datos\n", + "\n", + "def compas_add(*arg):\n", + " \"\"\"adds new compas to compas dictionary \n", + "\n", + " Args:\n", + " arg: 2 or 3 arguments\n", + " 2 arguments:\n", + " arg[0]: compas old dictionary\n", + " arg[1]: csv file with new items to add\n", + " 3 arguments:\n", + " arg[0]: compas old dictionary\n", + " arg[1]: new item (single) \n", + " arg[2]: new item's information\n", + "\n", + " Returns:\n", + " dictionary: new dictionary of compas\n", + " \"\"\"\n", + "\n", + "\n", + " import os\n", + " import pandas as pd\n", + "\n", + " class bcolors:\n", + " FAIL = '\\033[91m'\n", + " ENDC = '\\033[0m'\n", + "\n", + " if (len(arg)==3) and all([isinstance(x,str) for x in arg],str):\n", + " \n", + " return add_values_in_dict(arg[0],arg[1],arg[2])\n", + " \n", + " elif (len(arg)==2) and (os.path.isfile(arg[1])) and (arg[1].endswith('.csv')):\n", + " \n", + " users=pd.read_csv(arg[1])\n", + " data=[]\n", + " compas=arg[0]\n", + " \n", + " for i in range(len(users)):\n", + " datai=[users.loc[i,'Usuario de Mattermost'],[{str(k):str(v) for k,v in zip(users.columns[1:],users.iloc[i,1:])}][0]]\n", + " data.append(datai)\n", + " \n", + " for user in data:\n", + " compas=add_values_in_dict(compas,user[0],user[1])\n", + " if compas==None:\n", + " break\n", + " return compas\n", + " else:\n", + " print(f\"{bcolors.FAIL}FALLO: los valores de entrada no son validos, revise documentacion{bcolors.ENDC}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "927dMxyYYHCl", + "outputId": "d1f25b17-47b9-4eeb-b46b-23f6a94c3b22" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'@sadeddins': {'nombres': 'siria',\n", + " 'apellidos': 'Saddedin',\n", + " 'edad': '30',\n", + " 'pais': 'Venezuela',\n", + " 'residencia': 'Colombia',\n", + " 'especialidad cientifica': 'data science',\n", + " 'institucion': 'Universidad Simon Bolivar',\n", + " 'hobbie': 'Ciencia de datos'},\n", + " '@malamboj': {'nombres': 'jorge enrique',\n", + " 'apellidos': 'malambo martinez',\n", + " 'edad': '39',\n", + " 'pais': 'colombia',\n", + " 'residencia': 'bogota dc',\n", + " 'especialidad cientifica': 'modelaje y simulacion de materiales',\n", + " 'institucion': 'universidad nacional de colombia',\n", + " 'hobbie': 'talla en madera y ecoturismo'},\n", + " '@bermudezj': {'nombres': 'jesus david',\n", + " 'apellidos': ' bermudez sanchez',\n", + " 'edad': '25',\n", + " 'pais': 'colombia',\n", + " 'residencia': ' bogota dc',\n", + " 'especialidad cientifica': 'nan',\n", + " 'institucion': 'universidad nacional de colombia',\n", + " 'hobbie': 'senderismo y bicicleta'},\n", + " '@andreatugores': {'nombres': 'andrea carolina',\n", + " 'apellidos': 'tugores hernandez',\n", + " 'edad': '24',\n", + " 'pais': 'venezuela',\n", + " 'residencia': 'caracas',\n", + " 'especialidad cientifica': 'nan',\n", + " 'institucion': 'universidad central de venezuela',\n", + " 'hobbie': 'tennis'},\n", + " '@fernadezn': {'nombres': 'nicolas',\n", + " 'apellidos': ' fernandez cinquepalmi',\n", + " 'edad': '24',\n", + " 'pais': 'argentina',\n", + " 'residencia': 'peru',\n", + " 'especialidad cientifica': 'ciencia de materiales',\n", + " 'institucion': 'universidad nacional mayor de san marcos',\n", + " 'hobbie': 'tocar el piano'},\n", + " '@grisalej': {'nombres': 'jennifer',\n", + " 'apellidos': 'grisales casadiegos',\n", + " 'edad': '27',\n", + " 'pais': 'colombia',\n", + " 'residencia': 'bucaramanga',\n", + " 'especialidad cientifica': 'astroparticulas',\n", + " 'institucion': 'universidad insdustrial de santander',\n", + " 'hobbie': 'rugby y yoga'},\n", + " '@mamaniy': {'nombres': 'yhony',\n", + " 'apellidos': 'mamani arce',\n", + " 'edad': '26',\n", + " 'pais': 'peru',\n", + " 'residencia': 'lima',\n", + " 'especialidad cientifica': 'fisica del estado solido',\n", + " 'institucion': 'universidad nacional mayor de san marcos',\n", + " 'hobbie': 'correr'},\n", + " '@acerot': {'nombres': 'tatiana',\n", + " 'apellidos': 'acero cuellar',\n", + " 'edad': '23',\n", + " 'pais': 'colombia',\n", + " 'residencia': 'bogota dc',\n", + " 'especialidad cientifica': 'mecanica celeste',\n", + " 'institucion': 'universidad nacional de colombia',\n", + " 'hobbie': 'ver anime, cocinar y senderismo'},\n", + " '@cristian.velandia': {'nombres': 'cristian',\n", + " 'apellidos': 'velandia',\n", + " 'edad': '27',\n", + " 'pais': 'colombia',\n", + " 'residencia': 'nan',\n", + " 'especialidad cientifica': 'optica',\n", + " 'institucion': 'universidad nacional de colombia',\n", + " 'hobbie': 'ver anime y los videojuegos'},\n", + " '@carrilloj': {'nombres': 'juan guillermo',\n", + " 'apellidos': 'carrillo reyes',\n", + " 'edad': '25',\n", + " 'pais': 'colombia',\n", + " 'residencia': 'bogota dc',\n", + " 'especialidad cientifica': 'astronomia',\n", + " 'institucion': 'universidad nacional de colombia',\n", + " 'hobbie': 'basket y videojuegos'},\n", + " '@teofilo': {'nombres': 'teofilo',\n", + " 'apellidos': 'vargas auccalla',\n", + " 'edad': '54',\n", + " 'pais': 'peru',\n", + " 'residencia': 'lima',\n", + " 'especialidad cientifica': 'cosmologia',\n", + " 'institucion': 'universidad nacional mayor de san marcos',\n", + " 'hobbie': 'wing chun kuen'},\n", + " '@juan-pineda': {'nombres': 'juan carlos',\n", + " 'apellidos': 'basto pineda',\n", + " 'edad': '37',\n", + " 'pais': 'colombia',\n", + " 'residencia': 'bucaramanga',\n", + " 'especialidad cientifica': 'astronomia',\n", + " 'institucion': 'universidad insdustrial de santander',\n", + " 'hobbie': 'dibujar y caminatas'}}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "info='./info.csv'\n", + "compas_add(compas,info)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DNJSQjosfgP1" + }, + "source": [ + "* Cree una función que reciba como entrada el diccionario y un paÃs de origen, y retorne las\n", + "informaciones completas de todas las personas de ese paÃs, tabuladas en una forma fácil de\n", + "entender. -Busque una forma de calcular, a partir del diccionario, el promedio de edad de\n", + "todas las personas en él, y una forma de mostrar todas las instituciones (sin repetición)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "M-XLZrpDffhX", + "outputId": "45d8c208-4440-4d50-aaa8-ae1fa3f81b54" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "29.0" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## CALCULA LA EDAD PROMEDIO DE LOS COMPAS FILTRADO POR PAIS\n", + "def ages(dic,pais):\n", + " \"\"\" mean ages of compas given a country \n", + " Args:\n", + " dic (dictionary): compas dictionary\n", + " pais (string): country of interest \n", + "\n", + " Returns:\n", + " float: mean age of compas for pais \n", + " \"\"\"\n", + " class bcolors:\n", + " FAIL = '\\033[91m'\n", + " ENDC = '\\033[0m'\n", + " \n", + " if isinstance(dic,dict) and isinstance(pais,str):\n", + " \n", + " import pandas as pd\n", + " import numpy as np \n", + " import unidecode\n", + "\n", + " pais=unidecode.unidecode(pais.lower())\n", + " df=pd.DataFrame(compas).transpose()\n", + " \n", + " return np.mean(np.array(df[df.pais==pais].edad,dtype=int))\n", + " \n", + " else:\n", + " print(f\"{bcolors.FAIL}FALLO:valores de entrada invalidos{bcolors.ENDC}\")\n", + "\n", + "ages(compas,\"Colombia\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "oAUgJGN9vku3" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Tareas_Clase_2", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/ejercicio3.ipynb b/ejercicio3.ipynb index 870bceae679ac8edf168364b8397647d738f2400..61b3bf0036bdd49079aa93d539a5f87eaacba104 100644 --- a/ejercicio3.ipynb +++ b/ejercicio3.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 2, "metadata": { "id": "wxxnPu9Hei_w" }, @@ -114,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 3, "metadata": { "id": "aYCzQ1cRUwfR" }, @@ -172,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -282,13 +282,13 @@ " 'hobbie': 'dibujar y caminatas'}}" ] }, - "execution_count": 19, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "info='C:/Users/ECF0124A/Downloads/info.csv'\n", + "info='./info.csv'\n", "compas_add(compas,info)" ] }, @@ -306,7 +306,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -321,7 +321,7 @@ "29.0" ] }, - "execution_count": 20, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -379,18 +379,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.3" } }, "nbformat": 4,