Skip to content

Commit 23d6a9e

Browse files
committed
Resultado por grupos em xlsx
1 parent 4bea027 commit 23d6a9e

File tree

66 files changed

+12
-45839
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+12
-45839
lines changed

grupos/COMUNICAÇÃO_EM_CONTEXTOS_ORGANIZACIONAIS.csv

-1,036
This file was deleted.

grupos/COMUNICAÇÃO_E_CAMPO_DO_INCONSCIENTE.csv

-371
This file was deleted.

grupos/COMUNICAÇÃO_E_CIBERCULTURA.csv

-2,934
This file was deleted.

grupos/COMUNICAÇÃO_E_CIDADANIA.csv

-1,653
This file was deleted.

grupos/COMUNICAÇÃO_E_CULTURA.csv

-2,795
This file was deleted.

grupos/COMUNICAÇÃO_E_EXPERIÊNCIA_ESTÉTICA.csv

-1,492
This file was deleted.

grupos/COMUNICAÇÃO_E_POLÍTICA.csv

-4,128
This file was deleted.

grupos/COMUNICAÇÃO_E_POÉTICAS_DIGITAIS.csv

-475
This file was deleted.

grupos/COMUNICAÇÃO_E_SOCIABILIDADE.csv

-3,212
This file was deleted.

grupos/COMUNICAÇÃO_E_SOCIEDADE_TECNOLÓGICA(GT_INATIVO).csv

-169
This file was deleted.

grupos/COMUNICAÇÃO_E_SOCIEDADE_TECNOLÓGICA.csv

-214
This file was deleted.

grupos/CONSUMOS_E_PROCESSOS_DE_COMUNICAÇÃO.csv

-763
This file was deleted.

grupos/CRIAÇÃO_E_POÉTICAS_DIGITAIS.csv

-484
This file was deleted.

grupos/CULTURA_DAS_MÍDIAS.csv

-2,553
This file was deleted.

grupos/ECONOMIA_POLÍTICA_E_POLÍTICAS_DE_COMUNICAÇÃO.csv

-696
This file was deleted.

grupos/EPISTEMOLOGIA_DA_COMUNICAÇÃO.csv

-2,580
This file was deleted.

grupos/ESTRATÉGIAS_E_POLÍTICAS_DE_COMUNICAÇÃO_(GT_INATIVO).csv

-366
This file was deleted.

grupos/ESTUDOS_DE_CINEMA,_FOTOGRAFIA_E_AUDIOVISUAL.csv

-1,820
This file was deleted.

grupos/ESTUDOS_DE_JORNALISMO.csv

-3,910
This file was deleted.

grupos/ESTUDOS_DE_SOM_E_MÚSICA.csv

-846
This file was deleted.

grupos/ESTUDOS_DE_TELEVISÃO.csv

-1,624
This file was deleted.

grupos/ESTÉTICAS_DA_COMUNICAÇÃO.csv

-666
This file was deleted.

grupos/FOTOGRAFIA,_CINEMA_E_VÍDEO.csv

-1,036
This file was deleted.

grupos/IMAGEM_E_IMAGINÁRIOS_MIDIÁTICOS.csv

-1,542
This file was deleted.

grupos/MEMÓRIA_NAS_MÍDIAS.csv

-685
This file was deleted.

grupos/MÍDIA_E_ENTRETENIMENTO.csv

-697
This file was deleted.

grupos/MÍDIA_E_RECEPÇÃO_(GT_INATIVO).csv

-1,094
This file was deleted.

grupos/POLÍTICAS_E_ESTRATÉGIAS_DE_COMUNICAÇÃO.csv

-462
This file was deleted.

grupos/PRODUÇÃO_DE_SENTIDO_NAS_MÍDIAS_(GT_INATIVO).csv

-868
This file was deleted.

grupos/PRÁTICAS_INTERACIONAIS_E_LINGUAGENS_NA_COMUNICAÇÃO.csv

-1,401
This file was deleted.

grupos/RECEPÇÃO:_PROCESSOS_DE_INTERPRETAÇÃO,_USO_E_CONSUMO_MIDIÁTICOS.csv

-2,620
This file was deleted.

grupos/TECNOLOGIAS_INFORMACIONAIS_DE_COMUNICAÇÃO_E_SOCIEDADE.csv

-644
This file was deleted.

principal.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
print('1 Navegação no site')
1717
print('- Navega encontros')
1818
encontros = obter_encontros()
19-
print('- Navega grupos de trabalhos')
19+
print('- Navega csv de trabalhos')
2020
gts = flat(map(obter_gts, encontros))
2121
print('- Navega trabalhos')
2222
trabalhos = flat(map(obter_trabalhos, gts))
@@ -42,10 +42,17 @@
4242
print('2 Raspagem prévia encontrada (para refazer, delete o arquivo referencias.csv)')
4343
referencias = le_csv('referencias.csv')
4444

45+
46+
import unidecode
47+
from pyexcel.cookbook import merge_all_to_a_book
48+
import glob
4549
grupos = list(set([i['GTR'] for i in referencias]))
4650
for grupo in grupos:
4751
ref_grupo = [i for i in referencias if i['GTR'] == grupo]
48-
cria_csv(ref_grupo, './grupos/%s.csv' % grupo.replace(' ', '_'))
52+
novo_nome = unidecode.unidecode(grupo)
53+
novo_nome = grupo.replace(' ', '_').replace(',', '').replace(':', '').replace('(', '').replace(')', '')
54+
cria_csv(ref_grupo, './csv/%s.csv' % novo_nome)
55+
merge_all_to_a_book(glob.glob("./csv/%s.csv" % novo_nome), "./xlsx/%s.xlsx" % novo_nome)
4956

5057
print('4 Fim')
5158

requirements.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,6 @@ cssselect
33
pdfminer3k
44
requests-cache
55
pymongo
6-
unidecode
6+
unidecode
7+
pyexcel
8+
pyexcel-xlsx
Binary file not shown.
Binary file not shown.
570 KB
Binary file not shown.

xlsx/COMUNICAÇÃO_E_CIDADANIA.xlsx

407 KB
Binary file not shown.

xlsx/COMUNICAÇÃO_E_CULTURA.xlsx

689 KB
Binary file not shown.
Binary file not shown.

xlsx/COMUNICAÇÃO_E_POLÍTICA.xlsx

1.08 MB
Binary file not shown.
114 KB
Binary file not shown.
761 KB
Binary file not shown.
58.4 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
169 KB
Binary file not shown.

xlsx/CULTURA_DAS_MÍDIAS.xlsx

621 KB
Binary file not shown.
Binary file not shown.
624 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.

xlsx/ESTUDOS_DE_JORNALISMO.xlsx

925 KB
Binary file not shown.

xlsx/ESTUDOS_DE_SOM_E_MÚSICA.xlsx

190 KB
Binary file not shown.

xlsx/ESTUDOS_DE_TELEVISÃO.xlsx

361 KB
Binary file not shown.

xlsx/ESTÉTICAS_DA_COMUNICAÇÃO.xlsx

121 KB
Binary file not shown.

xlsx/FOTOGRAFIA_CINEMA_E_VÍDEO.xlsx

268 KB
Binary file not shown.
345 KB
Binary file not shown.

xlsx/MEMÓRIA_NAS_MÍDIAS.xlsx

176 KB
Binary file not shown.

xlsx/MÍDIA_E_ENTRETENIMENTO.xlsx

176 KB
Binary file not shown.
329 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)