algoritmos prontos

1af8deec · Pércio Reinert · 52257ee0 · 1af8deec · 1af8deec · 1af8deec
Commit 1af8deec authored 4 years ago by Pércio Reinert
Hide whitespace changes
Inline Side-by-side

Showing

with 139 additions and 0 deletions
+139 -0
--- a/.gitignore
+++ b/.gitignore
+venv/
+__pycache__/
+.idea
\ No newline at end of file
--- a/dailyCsv.py
+++ b/dailyCsv.py
+import pandas as pd
+import json
+from datetime import date
+
+
+"""
+Request the daily CSV based on the current date.
+"""
+# get current date
+today = date.today()
+
+# GET - request the daily CSV
+url = f'http://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/inf_diario_fi_{today.strftime("%Y%m")}.csv'
+df2 = pd.read_csv(url, sep=';', low_memory=False, encoding='latin1')
+
+"""
+Gera novo JSON com dados com o CNPJ_FUNDO como index.
+Além disso, pega somente a última ocorrência daquele JSON no CSV
+"""
+# Group by CNPJ_FUNDO and get the last occurrence of it
+df_2 = df2.groupby(['CNPJ_FUNDO'], as_index=False, sort=False).tail(1)
+
+# remove all NaNs
+df_2 = df_2.fillna("")
+
+# json with CNPJ as index
+df_2_json = df_2.set_index('CNPJ_FUNDO').to_dict('index')
+
+# write on file
+with open('result2.json', 'w') as fp:
+    json.dump(df_2_json, fp)
--- a/firstCsv.py
+++ b/firstCsv.py
+import pandas as pd
+import json
+import functions
+
+df = pd.read_csv('http://dados.cvm.gov.br/dados/FI/CAD/DADOS/cad_fi.csv', sep=';', low_memory=False, encoding='latin1')
+
+"""
+Scrapping script
+
+@madeby Hercílio, Matheus e Pércio
+"""
+# remove cancelled
+df_1 = df.loc[df['SIT'] != 'CANCELADA']
+
+# get only the columns we want
+df_1 = df_1.filter(functions.getSelectedColumns(), axis=1)
+
+# merge duplicated rows
+df_1 = df_1.groupby(['CNPJ_FUNDO'], as_index=False, sort=False).aggregate(functions.aggregateDuplicatedRows)
+
+# Conver specif columns to string
+df_1 = functions.convertSpecifColumnsToString(df_1)
+
+# remove all NaNs
+df_1.fillna('', inplace=True)
+
+# json with CNPJ as index
+df_json = df_1.set_index('CNPJ_FUNDO').to_dict('index')
+
+# write on file
+with open('result.json', 'w') as fp:
+    json.dump(df_json, fp)
--- a/functions.py
+++ b/functions.py
+"""
+Function to write a file
+"""
+
+
+def writeFile(filename, data, ext='csv', op='w'):
+    f = open(filename + "." + ext, op)
+    f.write(data)
+    f.close()
+    print("DONE")
+
+
+"""
+Return Selected Columns
+"""
+
+
+def getSelectedColumns():
+    return ['CNPJ_FUNDO',
+            'SIT',
+            'ADMIN',
+            'CD_CVM',
+            'CLASSE',
+            'CNPJ_ADMIN',
+            'CONDOM',
+            'CPF_CNPJ_GESTOR',
+            'DENOM_SOCIAL',
+            'DT_CANCEL',
+            'DT_CONST',
+            'DT_FIM_EXERC',
+            'DT_INI_ATIV',
+            'DT_INI_CLASSE',
+            'DT_INI_EXERC',
+            'DT_INI_SIT',
+            'DT_PATRIM_LIQ',
+            'FUNDO_COTAS',
+            'FUNDO_EXCLUSIVO',
+            'GESTOR',
+            'INVEST_QUALIF',
+            'PF_PJ_GESTOR',
+            'RENTAB_FUNDO',
+            'TAXA_ADM',
+            'TAXA_PERFM',
+            'TP_FUNDO',
+            'TRIB_LPRAZO',
+            'VL_PATRIM_LIQ',
+            'AUDITOR',
+            'CNPJ_AUDITOR']
+
+
+"""
+Callback function that merges non-unique duplicated values
+"""
+
+
+def aggregateDuplicatedRows(x):
+    if x.size <= 1:
+        return x
+    if x.unique().size <= 1:
+        return x.unique()
+    return ', '.join(x.unique().astype(str))
+
+
+"""
+Convert specif columns to String
+"""
+
+
+def convertSpecifColumnsToString(df):
+    columns = ['CD_CVM']
+    for col in columns:
+        df[col] = df[col].astype(str)
+    return df