Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Radar do Investidor
radardoinvestidor-scraper
Commits
1af8deec
Commit
1af8deec
authored
4 years ago
by
Pércio Reinert
Browse files
Options
Download
Email Patches
Plain Diff
algoritmos prontos
parent
52257ee0
feat/algoritmos
Feat/history
dev
1 merge request
!1
Algoritmos prontos
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
.gitignore
+3
-0
.gitignore
dailyCsv.py
+31
-0
dailyCsv.py
firstCsv.py
+32
-0
firstCsv.py
functions.py
+73
-0
functions.py
with
139 additions
and
0 deletions
+139
-0
.gitignore
0 → 100644
View file @
1af8deec
venv/
__pycache__/
.idea
\ No newline at end of file
This diff is collapsed.
Click to expand it.
dailyCsv.py
0 → 100644
View file @
1af8deec
import
pandas
as
pd
import
json
from
datetime
import
date
"""
Request the daily CSV based on the current date.
"""
# get current date
today
=
date
.
today
()
# GET - request the daily CSV
url
=
f
'http://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/inf_diario_fi_
{
today
.
strftime
(
"%Y%m"
)
}
.csv'
df2
=
pd
.
read_csv
(
url
,
sep
=
';'
,
low_memory
=
False
,
encoding
=
'latin1'
)
"""
Gera novo JSON com dados com o CNPJ_FUNDO como index.
Além disso, pega somente a última ocorrência daquele JSON no CSV
"""
# Group by CNPJ_FUNDO and get the last occurrence of it
df_2
=
df2
.
groupby
([
'CNPJ_FUNDO'
],
as_index
=
False
,
sort
=
False
).
tail
(
1
)
# remove all NaNs
df_2
=
df_2
.
fillna
(
""
)
# json with CNPJ as index
df_2_json
=
df_2
.
set_index
(
'CNPJ_FUNDO'
).
to_dict
(
'index'
)
# write on file
with
open
(
'result2.json'
,
'w'
)
as
fp
:
json
.
dump
(
df_2_json
,
fp
)
This diff is collapsed.
Click to expand it.
firstCsv.py
0 → 100644
View file @
1af8deec
import
pandas
as
pd
import
json
import
functions
df
=
pd
.
read_csv
(
'http://dados.cvm.gov.br/dados/FI/CAD/DADOS/cad_fi.csv'
,
sep
=
';'
,
low_memory
=
False
,
encoding
=
'latin1'
)
"""
Scrapping script
@madeby Hercílio, Matheus e Pércio
"""
# remove cancelled
df_1
=
df
.
loc
[
df
[
'SIT'
]
!=
'CANCELADA'
]
# get only the columns we want
df_1
=
df_1
.
filter
(
functions
.
getSelectedColumns
(),
axis
=
1
)
# merge duplicated rows
df_1
=
df_1
.
groupby
([
'CNPJ_FUNDO'
],
as_index
=
False
,
sort
=
False
).
aggregate
(
functions
.
aggregateDuplicatedRows
)
# Conver specif columns to string
df_1
=
functions
.
convertSpecifColumnsToString
(
df_1
)
# remove all NaNs
df_1
.
fillna
(
''
,
inplace
=
True
)
# json with CNPJ as index
df_json
=
df_1
.
set_index
(
'CNPJ_FUNDO'
).
to_dict
(
'index'
)
# write on file
with
open
(
'result.json'
,
'w'
)
as
fp
:
json
.
dump
(
df_json
,
fp
)
This diff is collapsed.
Click to expand it.
functions.py
0 → 100644
View file @
1af8deec
"""
Function to write a file
"""
def
writeFile
(
filename
,
data
,
ext
=
'csv'
,
op
=
'w'
):
f
=
open
(
filename
+
"."
+
ext
,
op
)
f
.
write
(
data
)
f
.
close
()
print
(
"DONE"
)
"""
Return Selected Columns
"""
def
getSelectedColumns
():
return
[
'CNPJ_FUNDO'
,
'SIT'
,
'ADMIN'
,
'CD_CVM'
,
'CLASSE'
,
'CNPJ_ADMIN'
,
'CONDOM'
,
'CPF_CNPJ_GESTOR'
,
'DENOM_SOCIAL'
,
'DT_CANCEL'
,
'DT_CONST'
,
'DT_FIM_EXERC'
,
'DT_INI_ATIV'
,
'DT_INI_CLASSE'
,
'DT_INI_EXERC'
,
'DT_INI_SIT'
,
'DT_PATRIM_LIQ'
,
'FUNDO_COTAS'
,
'FUNDO_EXCLUSIVO'
,
'GESTOR'
,
'INVEST_QUALIF'
,
'PF_PJ_GESTOR'
,
'RENTAB_FUNDO'
,
'TAXA_ADM'
,
'TAXA_PERFM'
,
'TP_FUNDO'
,
'TRIB_LPRAZO'
,
'VL_PATRIM_LIQ'
,
'AUDITOR'
,
'CNPJ_AUDITOR'
]
"""
Callback function that merges non-unique duplicated values
"""
def
aggregateDuplicatedRows
(
x
):
if
x
.
size
<=
1
:
return
x
if
x
.
unique
().
size
<=
1
:
return
x
.
unique
()
return
', '
.
join
(
x
.
unique
().
astype
(
str
))
"""
Convert specif columns to String
"""
def
convertSpecifColumnsToString
(
df
):
columns
=
[
'CD_CVM'
]
for
col
in
columns
:
df
[
col
]
=
df
[
col
].
astype
(
str
)
return
df
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment
Menu
Projects
Groups
Snippets
Help