Commit 1af8deec authored by Pércio Reinert's avatar Pércio Reinert
Browse files

algoritmos prontos

1 merge request!1Algoritmos prontos
Showing with 139 additions and 0 deletions
+139 -0
venv/
__pycache__/
.idea
\ No newline at end of file
import pandas as pd
import json
from datetime import date
"""
Request the daily CSV based on the current date.
"""
# get current date
today = date.today()
# GET - request the daily CSV
url = f'http://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/inf_diario_fi_{today.strftime("%Y%m")}.csv'
df2 = pd.read_csv(url, sep=';', low_memory=False, encoding='latin1')
"""
Gera novo JSON com dados com o CNPJ_FUNDO como index.
Além disso, pega somente a última ocorrência daquele JSON no CSV
"""
# Group by CNPJ_FUNDO and get the last occurrence of it
df_2 = df2.groupby(['CNPJ_FUNDO'], as_index=False, sort=False).tail(1)
# remove all NaNs
df_2 = df_2.fillna("")
# json with CNPJ as index
df_2_json = df_2.set_index('CNPJ_FUNDO').to_dict('index')
# write on file
with open('result2.json', 'w') as fp:
json.dump(df_2_json, fp)
import pandas as pd
import json
import functions
df = pd.read_csv('http://dados.cvm.gov.br/dados/FI/CAD/DADOS/cad_fi.csv', sep=';', low_memory=False, encoding='latin1')
"""
Scrapping script
@madeby Hercílio, Matheus e Pércio
"""
# remove cancelled
df_1 = df.loc[df['SIT'] != 'CANCELADA']
# get only the columns we want
df_1 = df_1.filter(functions.getSelectedColumns(), axis=1)
# merge duplicated rows
df_1 = df_1.groupby(['CNPJ_FUNDO'], as_index=False, sort=False).aggregate(functions.aggregateDuplicatedRows)
# Conver specif columns to string
df_1 = functions.convertSpecifColumnsToString(df_1)
# remove all NaNs
df_1.fillna('', inplace=True)
# json with CNPJ as index
df_json = df_1.set_index('CNPJ_FUNDO').to_dict('index')
# write on file
with open('result.json', 'w') as fp:
json.dump(df_json, fp)
"""
Function to write a file
"""
def writeFile(filename, data, ext='csv', op='w'):
f = open(filename + "." + ext, op)
f.write(data)
f.close()
print("DONE")
"""
Return Selected Columns
"""
def getSelectedColumns():
return ['CNPJ_FUNDO',
'SIT',
'ADMIN',
'CD_CVM',
'CLASSE',
'CNPJ_ADMIN',
'CONDOM',
'CPF_CNPJ_GESTOR',
'DENOM_SOCIAL',
'DT_CANCEL',
'DT_CONST',
'DT_FIM_EXERC',
'DT_INI_ATIV',
'DT_INI_CLASSE',
'DT_INI_EXERC',
'DT_INI_SIT',
'DT_PATRIM_LIQ',
'FUNDO_COTAS',
'FUNDO_EXCLUSIVO',
'GESTOR',
'INVEST_QUALIF',
'PF_PJ_GESTOR',
'RENTAB_FUNDO',
'TAXA_ADM',
'TAXA_PERFM',
'TP_FUNDO',
'TRIB_LPRAZO',
'VL_PATRIM_LIQ',
'AUDITOR',
'CNPJ_AUDITOR']
"""
Callback function that merges non-unique duplicated values
"""
def aggregateDuplicatedRows(x):
if x.size <= 1:
return x
if x.unique().size <= 1:
return x.unique()
return ', '.join(x.unique().astype(str))
"""
Convert specif columns to String
"""
def convertSpecifColumnsToString(df):
columns = ['CD_CVM']
for col in columns:
df[col] = df[col].astype(str)
return df
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment