From 44421549ff86e6f894ffa936b19121702d393994 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 01:08:29 -0300 Subject: [PATCH 01/31] =?UTF-8?q?Adicionadas=20depend=C3=AAncias=20ao=20pr?= =?UTF-8?q?ojeto?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .eslintrc.json | 17 +++++++++++++++++ .gitignore | 3 +++ .prettierrc | 5 +++++ package.json | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+) create mode 100644 .eslintrc.json create mode 100644 .prettierrc create mode 100644 package.json diff --git a/.eslintrc.json b/.eslintrc.json new file mode 100644 index 0000000..dd8653a --- /dev/null +++ b/.eslintrc.json @@ -0,0 +1,17 @@ +{ + "env": { + "es2021": true, + "node": true + }, + "extends": "eslint:recommended", + "parserOptions": { + "ecmaVersion": "latest", + "sourceType": "module" + }, + "plugins": [ + "prettier" + ], + "rules": { + "prettier/prettier": "error" + } +} diff --git a/.gitignore b/.gitignore index 9f11b75..4853d14 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ .idea/ +.env +node_modules +package-lock.json \ No newline at end of file diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..36301bc --- /dev/null +++ b/.prettierrc @@ -0,0 +1,5 @@ +{ + "semi": false, + "singleQuote": true, + "trailingComma": "es5" +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..b0e7f34 --- /dev/null +++ b/package.json @@ -0,0 +1,33 @@ +{ + "name": "backend-challenge", + "version": "1.0.0", + "description": "O web scraping (raspagem de rede, em tradução livre), também conhecido como extração de dados da web,\r é o nome dado ao processo de coleta de dados estruturados da web de maneira automatizada. Em geral,\r esse método é usado por pessoas, empresas que desejam usar a vasta quantidade de dados da web disponíveis\r publicamente para tomar decisões mais inteligentes.", + "main": "index.js", + "scripts": { + "start": "node index.js", + "test": "echo \"Error: no test specified\" && exit 1" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/agilize/backend-challenge.git" + }, + "keywords": [], + "author": "", + "license": "ISC", + "bugs": { + "url": "https://github.com/agilize/backend-challenge/issues" + }, + "homepage": "https://github.com/agilize/backend-challenge#readme", + "dependencies": { + "dotenv": "^16.0.0", + "express": "^4.17.3", + "mysql2": "^2.3.3", + "puppeteer": "^13.5.0", + "sequelize": "^6.17.0", + "sequelize-cli": "^6.4.1" + }, + "devDependencies": { + "eslint": "^8.10.0", + "prettier": "2.5.1" + } +} From 153477bfe2666f7e9cc873082276e9af6b247e2d Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 17:22:28 -0300 Subject: [PATCH 02/31] =?UTF-8?q?Criada=20l=C3=B3gica=20de=20extra=C3=A7?= =?UTF-8?q?=C3=A3o=20das=20infos=20necess=C3=A1rias=20do=20site=20utilizad?= =?UTF-8?q?o?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- index.js | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 index.js diff --git a/index.js b/index.js new file mode 100644 index 0000000..906548b --- /dev/null +++ b/index.js @@ -0,0 +1,32 @@ +const puppeteer = require('puppeteer'); + +async function teste() { + const browser = await puppeteer.launch({ headless: false }) + const page = await browser.newPage() + await page.goto( + 'https://www.transparencia.gov.br/despesas/orgao?ordenarPor=orgaoSuperior&direcao=asc' + ) + + const infosFromTable = await page.evaluate(() => { + const tds = Array.from(document.querySelectorAll('#lista tr td')); + return tds.map(td => td.innerText); + }) + + await browser.close(); + return infosFromTable; +}; + +teste().then((result) => { + const arraysOfInfos = result.filter(value => value !== 'Detalhar').reduce((resultArray, item, index) => { + const breakOfArray = Math.floor(index/7); + + if (!resultArray[breakOfArray]) { + resultArray[breakOfArray] = []; + } + + resultArray[breakOfArray].push(item); + + return resultArray; + }, []); + console.log(arraysOfInfos); +}); \ No newline at end of file From 53860733e38fd8e1e08d952e0766eeb0b797155c Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 17:25:37 -0300 Subject: [PATCH 03/31] =?UTF-8?q?Apropriando=20c=C3=B3digo=20ao=20prettier?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- index.js | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/index.js b/index.js index 906548b..d5b2262 100644 --- a/index.js +++ b/index.js @@ -1,32 +1,34 @@ const puppeteer = require('puppeteer'); async function teste() { - const browser = await puppeteer.launch({ headless: false }) - const page = await browser.newPage() + const browser = await puppeteer.launch({ headless: false }); + const page = await browser.newPage(); await page.goto( 'https://www.transparencia.gov.br/despesas/orgao?ordenarPor=orgaoSuperior&direcao=asc' - ) + ); const infosFromTable = await page.evaluate(() => { const tds = Array.from(document.querySelectorAll('#lista tr td')); - return tds.map(td => td.innerText); - }) + return tds.map((td) => td.innerText); + }); await browser.close(); return infosFromTable; }; teste().then((result) => { - const arraysOfInfos = result.filter(value => value !== 'Detalhar').reduce((resultArray, item, index) => { - const breakOfArray = Math.floor(index/7); + const arraysOfInfos = result + .filter((value) => value !== 'Detalhar') + .reduce((resultArray, item, index) => { + const breakOfArray = Math.floor(index / 7); - if (!resultArray[breakOfArray]) { - resultArray[breakOfArray] = []; - } + if (!resultArray[breakOfArray]) { + resultArray[breakOfArray] = [] + } - resultArray[breakOfArray].push(item); + resultArray[breakOfArray].push(item); - return resultArray; - }, []); + return resultArray; + }, []) console.log(arraysOfInfos); -}); \ No newline at end of file +}); From 32ce74e0cd238a3bdfd9ff57e783fa665d75c49b Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 18:07:30 -0300 Subject: [PATCH 04/31] =?UTF-8?q?Criado=20documento=20de=20configura=C3=A7?= =?UTF-8?q?=C3=A3o=20para=20conectar=20ao=20sql?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- database/config/config.js | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 database/config/config.js diff --git a/database/config/config.js b/database/config/config.js new file mode 100644 index 0000000..f6c1dd9 --- /dev/null +++ b/database/config/config.js @@ -0,0 +1,25 @@ +require('dotenv').config(); + +module.exports = { + development: { + username: process.env.MYSQL_USER, + password: process.env.MYSQL_PASSWORD, + database: process.env.MYSQL_DB_NAME, + host: process.env.HOSTNAME, + dialect: 'mysql', + }, + test: { + username: process.env.MYSQL_USER, + password: process.env.MYSQL_PASSWORD, + database: process.env.MYSQL_DB_NAME, + host: process.env.HOSTNAME, + dialect: 'mysql', + }, + production: { + username: process.env.MYSQL_USER, + password: process.env.MYSQL_PASSWORD, + database: process.env.MYSQL_DB_NAME, + host: process.env.HOSTNAME, + dialect: 'mysql', + }, +}; \ No newline at end of file From f065e9fadc3c915c31b21c0faa5434e1fdf211ff Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 18:07:57 -0300 Subject: [PATCH 05/31] =?UTF-8?q?Criado=20modelo=20do=20banco=20de=20dados?= =?UTF-8?q?=20com=20tabela=20para=20armazenamento=20das=20informa=C3=A7?= =?UTF-8?q?=C3=B5es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- database/models/index.js | 37 ++++++++++++++++++++++++++++++++ database/models/infosgov.js | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 database/models/index.js create mode 100644 database/models/infosgov.js diff --git a/database/models/index.js b/database/models/index.js new file mode 100644 index 0000000..d2c6918 --- /dev/null +++ b/database/models/index.js @@ -0,0 +1,37 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const Sequelize = require('sequelize'); +const basename = path.basename(__filename); +const env = process.env.NODE_ENV || 'development'; +const config = require(__dirname + '/../config/config.js')[env]; +const db = {}; + +let sequelize; +if (config.use_env_variable) { + sequelize = new Sequelize(process.env[config.use_env_variable], config); +} else { + sequelize = new Sequelize(config.database, config.username, config.password, config); +} + +fs + .readdirSync(__dirname) + .filter(file => { + return (file.indexOf('.') !== 0) && (file !== basename) && (file.slice(-3) === '.js'); + }) + .forEach(file => { + const model = require(path.join(__dirname, file))(sequelize, Sequelize.DataTypes); + db[model.name] = model; + }); + +Object.keys(db).forEach(modelName => { + if (db[modelName].associate) { + db[modelName].associate(db); + } +}); + +db.sequelize = sequelize; +db.Sequelize = Sequelize; + +module.exports = db; diff --git a/database/models/infosgov.js b/database/models/infosgov.js new file mode 100644 index 0000000..baa8a33 --- /dev/null +++ b/database/models/infosgov.js @@ -0,0 +1,42 @@ +module.exports = (sequelize, DataTypes) => { + const InfosGov = sequelize.define( + 'Infos_Gov', + { + mesAno: { + type: DataTypes.STRING, + mesAno: 'mes_ano', + }, + programaOrcamentario: { + type: DataTypes.STRING, + programaOrcamentario: 'programa_orcamentario', + }, + acaoOrcamentaria: { + type: DataTypes.STRING, + acaoOrcamentaria: 'acao_orcamentaria', + }, + valorEmpenhado: { + type: DataTypes.FLOAT, + valorEmpenhado: 'valor_empenhado' + }, + valorLiquidado: { + type: DataTypes.FLOAT, + valorLiquidado: 'valor_liquidado' + }, + valorPago: { + type: DataTypes.FLOAT, + valorPago: 'valor_pago' + }, + valorRestosAPagarPagos: { + type: DataTypes.FLOAT, + valorRestosAPagarPagos: 'valor_restos_a_pagar_pagos' + }, + }, + { + timestamps: false, + tablename: 'infos_gov', + underscored: true, + } + ); + + return InfosGov; +} From 077af2b6005069387a928ba0ac41b27fbbc48e41 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 18:08:19 -0300 Subject: [PATCH 06/31] =?UTF-8?q?Criado=20arquivo=20de=20migra=C3=A7=C3=A3?= =?UTF-8?q?o=20para=20cria=C3=A7=C3=A3o=20dos=20campos=20que=20armazenam?= =?UTF-8?q?=20as=20infos=20do=20projeto?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../20220310204522-create-infosgov.js | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 database/migrations/20220310204522-create-infosgov.js diff --git a/database/migrations/20220310204522-create-infosgov.js b/database/migrations/20220310204522-create-infosgov.js new file mode 100644 index 0000000..c4ab980 --- /dev/null +++ b/database/migrations/20220310204522-create-infosgov.js @@ -0,0 +1,52 @@ +'use strict'; + +module.exports = { + up: async (queryInterface, Sequelize) => { + await queryInterface.createTable('infos_gov', { + id: { + allowNull: false, + autoIncrement: true, + primaryKey: true, + type: Sequelize.INTEGER + }, + mesAno: { + allowNull: false, + type: Sequelize.STRING, + field: 'mes_ano' + }, + programaOrcamentario: { + allowNull: false, + type: Sequelize.STRING, + field: 'programa_orcamentario' + }, + acaoOrcamentaria: { + allowNull: false, + type: Sequelize.STRING, + field: 'acao_orcamentaria' + }, + valorEmpenhado: { + allowNull: false, + type: Sequelize.FLOAT, + field: 'valor_empenhado' + }, + valorLiquidado: { + allowNull: false, + type: Sequelize.FLOAT, + field: 'valor_liquidado' + }, + valorPago: { + allowNull: false, + type: Sequelize.FLOAT, + field: 'valor_pago' + }, + valorRestosAPagarPagos: { + allowNull: false, + type: Sequelize.FLOAT, + field: 'valor_restos_a_pagar_pagos' + }, + }); + }, + down: async (queryInterface, _Sequelize) => { + await queryInterface.dropTable('infos_gov'); + }, +}; \ No newline at end of file From 4cc329e6e76b9b6810db77ddfbfa4592a619e0cc Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 18:18:56 -0300 Subject: [PATCH 07/31] Criando ambiente do node --- index.js | 36 ++++++------------------------------ 1 file changed, 6 insertions(+), 30 deletions(-) diff --git a/index.js b/index.js index d5b2262..955dd86 100644 --- a/index.js +++ b/index.js @@ -1,34 +1,10 @@ -const puppeteer = require('puppeteer'); +const express = require('express'); -async function teste() { - const browser = await puppeteer.launch({ headless: false }); - const page = await browser.newPage(); - await page.goto( - 'https://www.transparencia.gov.br/despesas/orgao?ordenarPor=orgaoSuperior&direcao=asc' - ); +const app = express(); +const PORT = process.env.PORT || 3000; - const infosFromTable = await page.evaluate(() => { - const tds = Array.from(document.querySelectorAll('#lista tr td')); - return tds.map((td) => td.innerText); - }); +app.use(express.json()); - await browser.close(); - return infosFromTable; -}; +app.use(errorHandler); -teste().then((result) => { - const arraysOfInfos = result - .filter((value) => value !== 'Detalhar') - .reduce((resultArray, item, index) => { - const breakOfArray = Math.floor(index / 7); - - if (!resultArray[breakOfArray]) { - resultArray[breakOfArray] = [] - } - - resultArray[breakOfArray].push(item); - - return resultArray; - }, []) - console.log(arraysOfInfos); -}); +app.listen(PORT, () => console.log(`App running on port ${PORT}!`)); From 650aba0a96ad2fa15381789a7053b6d6a698549d Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 18:19:21 -0300 Subject: [PATCH 08/31] Retirando interpretador de json --- index.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/index.js b/index.js index 955dd86..5976275 100644 --- a/index.js +++ b/index.js @@ -3,8 +3,6 @@ const express = require('express'); const app = express(); const PORT = process.env.PORT || 3000; -app.use(express.json()); - app.use(errorHandler); app.listen(PORT, () => console.log(`App running on port ${PORT}!`)); From 46c5d60fa258cbf154608404701e914b10fb5ac1 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 18:26:51 -0300 Subject: [PATCH 09/31] =?UTF-8?q?Tentando=20adicionar=20ponto=20e=20v?= =?UTF-8?q?=C3=ADrgula=20automaticamente=20ao=20final=20das=20expres=C3=B5?= =?UTF-8?q?es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .prettierrc | 1 + 1 file changed, 1 insertion(+) diff --git a/.prettierrc b/.prettierrc index 36301bc..e3c1ec3 100644 --- a/.prettierrc +++ b/.prettierrc @@ -1,5 +1,6 @@ { "semi": false, "singleQuote": true, + "semiColon": true, "trailingComma": "es5" } From ba4e39e1bfef4df0f7843d89168621dd13119603 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 18:27:04 -0300 Subject: [PATCH 10/31] Ajustando ambiente Node --- index.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/index.js b/index.js index 5976275..08cbaee 100644 --- a/index.js +++ b/index.js @@ -1,6 +1,9 @@ const express = require('express'); +const errorHandler = require('./middlewares/errorHandler'); + const app = express(); + const PORT = process.env.PORT || 3000; app.use(errorHandler); From c43e311e461a4ff4e7a584383d93df790a531042 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 18:27:18 -0300 Subject: [PATCH 11/31] Criado middleware para tratamento de erros --- middlewares/errorHandler.js | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 middlewares/errorHandler.js diff --git a/middlewares/errorHandler.js b/middlewares/errorHandler.js new file mode 100644 index 0000000..33c7d88 --- /dev/null +++ b/middlewares/errorHandler.js @@ -0,0 +1,8 @@ +module.exports = (err, _req, res, _next) => { + if (err.status) { + return res.status(err.status).json({ message: err.message }); + } + + console.log(err) + return res.status(500).json({ message: 'Internal Server Error' }); +}; From 69190b5d67c5e1a0c85fee62e0f2fb52246e7db9 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 18:27:31 -0300 Subject: [PATCH 12/31] =?UTF-8?q?Utilit=C3=A1rio=20de=20dicion=C3=A1rio=20?= =?UTF-8?q?com=20express=C3=B5es=20de=20rota?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- utils/dictionary.js | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 utils/dictionary.js diff --git a/utils/dictionary.js b/utils/dictionary.js new file mode 100644 index 0000000..8128142 --- /dev/null +++ b/utils/dictionary.js @@ -0,0 +1,11 @@ +module.exports = { + success: 200, + created: 201, + noContent: 204, + badRequest: 400, + unauthorized: 401, + notFound: 404, + conflict: 409, + unprocessableEntity: 422, + serverError: 500, +}; From 690eff54b52a0f86cf35ddb82f16c474f7221f10 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 18:27:43 -0300 Subject: [PATCH 13/31] =?UTF-8?q?Fun=C3=A7=C3=A3o=20para=20middleware=20de?= =?UTF-8?q?=20tratamento=20de=20erros?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- utils/errorHandling.js | 1 + 1 file changed, 1 insertion(+) create mode 100644 utils/errorHandling.js diff --git a/utils/errorHandling.js b/utils/errorHandling.js new file mode 100644 index 0000000..3f34222 --- /dev/null +++ b/utils/errorHandling.js @@ -0,0 +1 @@ +module.exports = (status, message) => ({ status, message }); From 040656bd92d099967a1d8a2b87243f2003a7d5b6 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 19:40:28 -0300 Subject: [PATCH 14/31] =?UTF-8?q?Criado=20arquivo=20da=20camada=20de=20ser?= =?UTF-8?q?vi=C3=A7os=20que=20possui=20fun=C3=A7=C3=B5es=20de=20scrapping?= =?UTF-8?q?=20das=20informa=C3=A7=C3=B5es=20do=20site=20solicitado,=20inse?= =?UTF-8?q?r=C3=A7=C3=A3o=20das=20infos=20no=20banco=20de=20dados=20e=20bu?= =?UTF-8?q?sca=20das=20mesmas?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- services/infosgovService.js | 59 +++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 services/infosgovService.js diff --git a/services/infosgovService.js b/services/infosgovService.js new file mode 100644 index 0000000..87bb9ef --- /dev/null +++ b/services/infosgovService.js @@ -0,0 +1,59 @@ +const { InfosGov } = require('../database/models'); +const errorHandling = require('../utils/errorHandling'); + +const puppeteer = require('puppeteer'); +const { badRequest, notFound } = require('../utils/dictionary'); + +const scrapping = async () => { + const browser = await puppeteer.launch({ headless: false }); + const page = await browser.newPage(); + await page.goto( + 'https://www.transparencia.gov.br/despesas/orgao?ordenarPor=orgaoSuperior&direcao=asc' + ); + + const infosFromTable = await page.evaluate(() => { + const tds = Array.from(document.querySelectorAll('#lista tr td')); + return tds.map((td) => td.innerText); + }); + + await browser.close(); + return infosFromTable; +}; + +const createInfosGov = async () => { + const resultOfScrapping = await scrapping(); + + if (!resultOfScrapping || resultOfScrapping.length === 0) { + throw errorHandling(badRequest, 'Something went wrong'); + } + + const arraysOfInfos = resultOfScrapping + .filter((value) => value !== 'Detalhar') + .reduce((resultArray, item, index) => { + const breakOfArray = Math.floor(index / 7); + + if (!resultArray[breakOfArray]) { + resultArray[breakOfArray] = [] + } + + resultArray[breakOfArray].push(item); + + return resultArray; + }, []); + + arraysOfInfos.map(array => { + const [mesAno, programaOrcamentario, acaoOrcamentaria, valorEmpenhado, valorLiquidado, valorRestosAPagarPagos] = array; + await InfosGov.create({ + mesAno, programaOrcamentario, acaoOrcamentaria, valorEmpenhado, valorLiquidado, valorRestosAPagarPagos + }); + }); + +}; + +const getInfosGov = async () => { + const infosFromGov = await InfosGov.findAll(); + + if (!infosFromGov) throw errorHandling(notFound, 'Infos not found'); + + return infosFromGov; +}; From 895100a44f390c0fa6d6208d2fa95e200e4c73f8 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 19:47:47 -0300 Subject: [PATCH 15/31] =?UTF-8?q?Criado=20arquivo=20na=20camada=20de=20con?= =?UTF-8?q?trole=20que=20possui=20fun=C3=A7=C3=A3o=20de=20rota=20para=20bu?= =?UTF-8?q?sca=20das=20infos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- controllers/infosgovController.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 controllers/infosgovController.js diff --git a/controllers/infosgovController.js b/controllers/infosgovController.js new file mode 100644 index 0000000..bc68d48 --- /dev/null +++ b/controllers/infosgovController.js @@ -0,0 +1,17 @@ +const { getInfosGov } = require('../services/infosgovService'); +const { success } = require('../utils/dictionary'); + +const getInfosGovController = async (req, res, next) => { + try { + const infos = await getInfosGov(); + + return res.status(success).json(infos); + } catch (error) { + console.log(`GET INFOS -> ${error.message}`); + next(error); + } +}; + +module.exports = { + getInfosGovController, +}; \ No newline at end of file From f6dbee4e467ea2e36be2df74ff0783b6156ec464 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 19:47:57 -0300 Subject: [PATCH 16/31] Adicionada rota ao index --- index.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/index.js b/index.js index 08cbaee..96d0534 100644 --- a/index.js +++ b/index.js @@ -1,9 +1,12 @@ const express = require('express'); +const { getInfosGovController } = require('./controllers/infosgovController'); const errorHandler = require('./middlewares/errorHandler'); const app = express(); +app.get('/api/dados', getInfosGovController); + const PORT = process.env.PORT || 3000; app.use(errorHandler); From 0b4dbe96d6b23e49de4702d2518bf01fc88e30c3 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Thu, 10 Mar 2022 19:48:08 -0300 Subject: [PATCH 17/31] Erros corrigidos --- services/infosgovService.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/services/infosgovService.js b/services/infosgovService.js index 87bb9ef..09d2481 100644 --- a/services/infosgovService.js +++ b/services/infosgovService.js @@ -51,9 +51,14 @@ const createInfosGov = async () => { }; const getInfosGov = async () => { + await createInfosGov(); const infosFromGov = await InfosGov.findAll(); if (!infosFromGov) throw errorHandling(notFound, 'Infos not found'); return infosFromGov; }; + +module.exports = { + getInfosGov, +}; From 340bb5c31b875159f191aee59b577c3580effd1d Mon Sep 17 00:00:00 2001 From: Leonardo Date: Fri, 11 Mar 2022 03:59:03 -0300 Subject: [PATCH 18/31] =?UTF-8?q?Vari=C3=A1vel=20PORT=20de=20ambiente=20ad?= =?UTF-8?q?icionada=20ao=20index?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- index.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/index.js b/index.js index 96d0534..c7ea101 100644 --- a/index.js +++ b/index.js @@ -1,14 +1,14 @@ const express = require('express'); +require('dotenv').config(); const { getInfosGovController } = require('./controllers/infosgovController'); const errorHandler = require('./middlewares/errorHandler'); const app = express(); +const PORT = process.env.PORT || 8090; app.get('/api/dados', getInfosGovController); -const PORT = process.env.PORT || 3000; - app.use(errorHandler); -app.listen(PORT, () => console.log(`App running on port ${PORT}!`)); +app.listen(PORT, () => console.log(`App running on port ${PORT}!`)); \ No newline at end of file From 7c75a8d8fda1864fb68c28ad0cdabefdaa31d274 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Fri, 11 Mar 2022 03:59:55 -0300 Subject: [PATCH 19/31] Apagaos arquivos sequelize que estavam na pasta database pois o sequelize estava acusando erro em caminho de arquivo. --- database/config/config.js | 25 --------- .../20220310204522-create-infosgov.js | 52 ------------------- database/models/index.js | 37 ------------- database/models/infosgov.js | 42 --------------- 4 files changed, 156 deletions(-) delete mode 100644 database/config/config.js delete mode 100644 database/migrations/20220310204522-create-infosgov.js delete mode 100644 database/models/index.js delete mode 100644 database/models/infosgov.js diff --git a/database/config/config.js b/database/config/config.js deleted file mode 100644 index f6c1dd9..0000000 --- a/database/config/config.js +++ /dev/null @@ -1,25 +0,0 @@ -require('dotenv').config(); - -module.exports = { - development: { - username: process.env.MYSQL_USER, - password: process.env.MYSQL_PASSWORD, - database: process.env.MYSQL_DB_NAME, - host: process.env.HOSTNAME, - dialect: 'mysql', - }, - test: { - username: process.env.MYSQL_USER, - password: process.env.MYSQL_PASSWORD, - database: process.env.MYSQL_DB_NAME, - host: process.env.HOSTNAME, - dialect: 'mysql', - }, - production: { - username: process.env.MYSQL_USER, - password: process.env.MYSQL_PASSWORD, - database: process.env.MYSQL_DB_NAME, - host: process.env.HOSTNAME, - dialect: 'mysql', - }, -}; \ No newline at end of file diff --git a/database/migrations/20220310204522-create-infosgov.js b/database/migrations/20220310204522-create-infosgov.js deleted file mode 100644 index c4ab980..0000000 --- a/database/migrations/20220310204522-create-infosgov.js +++ /dev/null @@ -1,52 +0,0 @@ -'use strict'; - -module.exports = { - up: async (queryInterface, Sequelize) => { - await queryInterface.createTable('infos_gov', { - id: { - allowNull: false, - autoIncrement: true, - primaryKey: true, - type: Sequelize.INTEGER - }, - mesAno: { - allowNull: false, - type: Sequelize.STRING, - field: 'mes_ano' - }, - programaOrcamentario: { - allowNull: false, - type: Sequelize.STRING, - field: 'programa_orcamentario' - }, - acaoOrcamentaria: { - allowNull: false, - type: Sequelize.STRING, - field: 'acao_orcamentaria' - }, - valorEmpenhado: { - allowNull: false, - type: Sequelize.FLOAT, - field: 'valor_empenhado' - }, - valorLiquidado: { - allowNull: false, - type: Sequelize.FLOAT, - field: 'valor_liquidado' - }, - valorPago: { - allowNull: false, - type: Sequelize.FLOAT, - field: 'valor_pago' - }, - valorRestosAPagarPagos: { - allowNull: false, - type: Sequelize.FLOAT, - field: 'valor_restos_a_pagar_pagos' - }, - }); - }, - down: async (queryInterface, _Sequelize) => { - await queryInterface.dropTable('infos_gov'); - }, -}; \ No newline at end of file diff --git a/database/models/index.js b/database/models/index.js deleted file mode 100644 index d2c6918..0000000 --- a/database/models/index.js +++ /dev/null @@ -1,37 +0,0 @@ -'use strict'; - -const fs = require('fs'); -const path = require('path'); -const Sequelize = require('sequelize'); -const basename = path.basename(__filename); -const env = process.env.NODE_ENV || 'development'; -const config = require(__dirname + '/../config/config.js')[env]; -const db = {}; - -let sequelize; -if (config.use_env_variable) { - sequelize = new Sequelize(process.env[config.use_env_variable], config); -} else { - sequelize = new Sequelize(config.database, config.username, config.password, config); -} - -fs - .readdirSync(__dirname) - .filter(file => { - return (file.indexOf('.') !== 0) && (file !== basename) && (file.slice(-3) === '.js'); - }) - .forEach(file => { - const model = require(path.join(__dirname, file))(sequelize, Sequelize.DataTypes); - db[model.name] = model; - }); - -Object.keys(db).forEach(modelName => { - if (db[modelName].associate) { - db[modelName].associate(db); - } -}); - -db.sequelize = sequelize; -db.Sequelize = Sequelize; - -module.exports = db; diff --git a/database/models/infosgov.js b/database/models/infosgov.js deleted file mode 100644 index baa8a33..0000000 --- a/database/models/infosgov.js +++ /dev/null @@ -1,42 +0,0 @@ -module.exports = (sequelize, DataTypes) => { - const InfosGov = sequelize.define( - 'Infos_Gov', - { - mesAno: { - type: DataTypes.STRING, - mesAno: 'mes_ano', - }, - programaOrcamentario: { - type: DataTypes.STRING, - programaOrcamentario: 'programa_orcamentario', - }, - acaoOrcamentaria: { - type: DataTypes.STRING, - acaoOrcamentaria: 'acao_orcamentaria', - }, - valorEmpenhado: { - type: DataTypes.FLOAT, - valorEmpenhado: 'valor_empenhado' - }, - valorLiquidado: { - type: DataTypes.FLOAT, - valorLiquidado: 'valor_liquidado' - }, - valorPago: { - type: DataTypes.FLOAT, - valorPago: 'valor_pago' - }, - valorRestosAPagarPagos: { - type: DataTypes.FLOAT, - valorRestosAPagarPagos: 'valor_restos_a_pagar_pagos' - }, - }, - { - timestamps: false, - tablename: 'infos_gov', - underscored: true, - } - ); - - return InfosGov; -} From 61abf23dbe549588622b8ca3cec50970df62eed2 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Fri, 11 Mar 2022 04:00:11 -0300 Subject: [PATCH 20/31] =?UTF-8?q?Arquivo=20de=20migra=C3=A7=C3=A3o=20de=20?= =?UTF-8?q?bancoo=20de=20dados=20criado?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- migrations/20220310204522-create-infosgov.js | 52 ++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 migrations/20220310204522-create-infosgov.js diff --git a/migrations/20220310204522-create-infosgov.js b/migrations/20220310204522-create-infosgov.js new file mode 100644 index 0000000..868f96c --- /dev/null +++ b/migrations/20220310204522-create-infosgov.js @@ -0,0 +1,52 @@ +'use strict'; + +module.exports = { + up: async (queryInterface, Sequelize) => { + await queryInterface.createTable('infos_govs', { + id: { + allowNull: false, + autoIncrement: true, + primaryKey: true, + type: Sequelize.INTEGER + }, + mesAno: { + allowNull: false, + type: Sequelize.STRING, + field: 'mes_ano' + }, + programaOrcamentario: { + allowNull: false, + type: Sequelize.STRING, + field: 'programa_orcamentario' + }, + acaoOrcamentaria: { + allowNull: false, + type: Sequelize.STRING, + field: 'acao_orcamentaria' + }, + valorEmpenhado: { + allowNull: false, + type: Sequelize.STRING, + field: 'valor_empenhado' + }, + valorLiquidado: { + allowNull: false, + type: Sequelize.STRING, + field: 'valor_liquidado' + }, + valorPago: { + allowNull: false, + type: Sequelize.STRING, + field: 'valor_pago' + }, + valorRestosAPagarPagos: { + allowNull: false, + type: Sequelize.STRING, + field: 'valor_restos_a_pagar_pagos' + }, + }); + }, + down: async (queryInterface, _Sequelize) => { + await queryInterface.dropTable('infos_gov'); + }, +}; \ No newline at end of file From 8c794aed160fd7900904462a355f91be2683bb2f Mon Sep 17 00:00:00 2001 From: Leonardo Date: Fri, 11 Mar 2022 04:00:30 -0300 Subject: [PATCH 21/31] Index de arquivo da camada de modelo criado --- models/index.js | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 models/index.js diff --git a/models/index.js b/models/index.js new file mode 100644 index 0000000..33f09e7 --- /dev/null +++ b/models/index.js @@ -0,0 +1,37 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const Sequelize = require('sequelize'); +const basename = path.basename(__filename); +const env = process.env.NODE_ENV || 'development'; +const config = require(__dirname + '/../config/config.json')[env]; +const db = {}; + +let sequelize; +if (config.use_env_variable) { + sequelize = new Sequelize(process.env[config.use_env_variable], config); +} else { + sequelize = new Sequelize(config.database, config.username, config.password, config); +} + +fs + .readdirSync(__dirname) + .filter(file => { + return (file.indexOf('.') !== 0) && (file !== basename) && (file.slice(-3) === '.js'); + }) + .forEach(file => { + const model = require(path.join(__dirname, file))(sequelize, Sequelize.DataTypes); + db[model.name] = model; + }); + +Object.keys(db).forEach(modelName => { + if (db[modelName].associate) { + db[modelName].associate(db); + } +}); + +db.sequelize = sequelize; +db.Sequelize = Sequelize; + +module.exports = db; From 4aa74a8527eb764b524f13a245eaa700c68c3dcc Mon Sep 17 00:00:00 2001 From: Leonardo Date: Fri, 11 Mar 2022 04:00:50 -0300 Subject: [PATCH 22/31] Arquivo da camada de modeloo da tabela Infos Gov criado --- models/infosgov.js | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 models/infosgov.js diff --git a/models/infosgov.js b/models/infosgov.js new file mode 100644 index 0000000..9adc8bd --- /dev/null +++ b/models/infosgov.js @@ -0,0 +1,42 @@ +module.exports = (sequelize, DataTypes) => { + const InfosGov = sequelize.define( + 'InfosGov', + { + mesAno: { + type: DataTypes.STRING, + mesAno: 'mes_ano', + }, + programaOrcamentario: { + type: DataTypes.STRING, + programaOrcamentario: 'programa_orcamentario', + }, + acaoOrcamentaria: { + type: DataTypes.STRING, + acaoOrcamentaria: 'acao_orcamentaria', + }, + valorEmpenhado: { + type: DataTypes.STRING, + valorEmpenhado: 'valor_empenhado' + }, + valorLiquidado: { + type: DataTypes.STRING, + valorLiquidado: 'valor_liquidado' + }, + valorPago: { + type: DataTypes.STRING, + valorPago: 'valor_pago' + }, + valorRestosAPagarPagos: { + type: DataTypes.STRING, + valorRestosAPagarPagos: 'valor_restos_a_pagar_pagos' + }, + }, + { + timestamps: false, + tablename: 'infos_govs', + underscored: true, + } + ); + + return InfosGov; +} From 836b2db28d0c04145eb09a63d361c579f8ac08ca Mon Sep 17 00:00:00 2001 From: Leonardo Date: Fri, 11 Mar 2022 04:01:50 -0300 Subject: [PATCH 23/31] =?UTF-8?q?Altera=C3=A7=C3=B5es=20na=20fun=C3=A7?= =?UTF-8?q?=C3=A3o=20de=20busca=20de=20informa=C3=A7=C3=B5es=20para=20func?= =?UTF-8?q?ionar=20da=20maneira=20mais=20apropriada=20encontrada?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- services/infosgovService.js | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/services/infosgovService.js b/services/infosgovService.js index 09d2481..62fe143 100644 --- a/services/infosgovService.js +++ b/services/infosgovService.js @@ -1,11 +1,11 @@ -const { InfosGov } = require('../database/models'); +const { InfosGov } = require('../models'); const errorHandling = require('../utils/errorHandling'); const puppeteer = require('puppeteer'); const { badRequest, notFound } = require('../utils/dictionary'); const scrapping = async () => { - const browser = await puppeteer.launch({ headless: false }); + const browser = await puppeteer.launch({ headless: true }); const page = await browser.newPage(); await page.goto( 'https://www.transparencia.gov.br/despesas/orgao?ordenarPor=orgaoSuperior&direcao=asc' @@ -23,8 +23,8 @@ const scrapping = async () => { const createInfosGov = async () => { const resultOfScrapping = await scrapping(); - if (!resultOfScrapping || resultOfScrapping.length === 0) { - throw errorHandling(badRequest, 'Something went wrong'); + if (resultOfScrapping === undefined || resultOfScrapping.length === 0) { + throw errorHandling(badRequest, 'Scrapping failed. Please try again.'); } const arraysOfInfos = resultOfScrapping @@ -41,20 +41,22 @@ const createInfosGov = async () => { return resultArray; }, []); - arraysOfInfos.map(array => { - const [mesAno, programaOrcamentario, acaoOrcamentaria, valorEmpenhado, valorLiquidado, valorRestosAPagarPagos] = array; + arraysOfInfos.map(async (array) => { + const [mesAno, programaOrcamentario, acaoOrcamentaria, valorEmpenhado, valorLiquidado, valorPago, valorRestosAPagarPagos] = array; await InfosGov.create({ - mesAno, programaOrcamentario, acaoOrcamentaria, valorEmpenhado, valorLiquidado, valorRestosAPagarPagos + mesAno, programaOrcamentario, acaoOrcamentaria, valorEmpenhado, valorLiquidado, valorPago, valorRestosAPagarPagos }); }); }; const getInfosGov = async () => { - await createInfosGov(); const infosFromGov = await InfosGov.findAll(); - if (!infosFromGov) throw errorHandling(notFound, 'Infos not found'); + if (infosFromGov.length === 0) { + await createInfosGov(); + return 'Database filled. Request again to see the infos.'; + } return infosFromGov; }; From 5ce71b770ecb76e39ef5ef5fdf237d31e96753e9 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Fri, 11 Mar 2022 04:02:20 -0300 Subject: [PATCH 24/31] =?UTF-8?q?NotFound=20retirado=20pois=20n=C3=A3o=20s?= =?UTF-8?q?er=C3=A1=20usado?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- services/infosgovService.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/infosgovService.js b/services/infosgovService.js index 62fe143..181efdd 100644 --- a/services/infosgovService.js +++ b/services/infosgovService.js @@ -2,7 +2,7 @@ const { InfosGov } = require('../models'); const errorHandling = require('../utils/errorHandling'); const puppeteer = require('puppeteer'); -const { badRequest, notFound } = require('../utils/dictionary'); +const { badRequest } = require('../utils/dictionary'); const scrapping = async () => { const browser = await puppeteer.launch({ headless: true }); From 55bc7b13e405f8858c61763b540df86e500ea467 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Fri, 11 Mar 2022 04:02:49 -0300 Subject: [PATCH 25/31] =?UTF-8?q?arquivo=20de=20configura=C3=A7=C3=A3o=20p?= =?UTF-8?q?ara=20conex=C3=A3o=20com=20banco=20de=20dados=20criado?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/config.json | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 config/config.json diff --git a/config/config.json b/config/config.json new file mode 100644 index 0000000..0fd35ea --- /dev/null +++ b/config/config.json @@ -0,0 +1,23 @@ +{ + "development": { + "username": "root", + "password": null, + "database": "portal_gov", + "host": "127.0.0.1", + "dialect": "mysql" + }, + "test": { + "username": "root", + "password": null, + "database": "database_test", + "host": "127.0.0.1", + "dialect": "mysql" + }, + "production": { + "username": "root", + "password": null, + "database": "database_production", + "host": "127.0.0.1", + "dialect": "mysql" + } +} From a4d9a60a3d59815004b1954b64c67a2b53fae391 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Sat, 12 Mar 2022 02:22:47 -0300 Subject: [PATCH 26/31] =?UTF-8?q?Comentados=20links=20que=20me=20ajudaram?= =?UTF-8?q?=20no=20c=C3=B3digo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- services/infosgovService.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/services/infosgovService.js b/services/infosgovService.js index 181efdd..8f3d940 100644 --- a/services/infosgovService.js +++ b/services/infosgovService.js @@ -11,6 +11,8 @@ const scrapping = async () => { 'https://www.transparencia.gov.br/despesas/orgao?ordenarPor=orgaoSuperior&direcao=asc' ); + // link que me ajudou na lógica de pegar infos da tabela e juntar em um array + // https://stackoverflow.com/questions/49236981/want-to-scrape-table-using-puppeteer-how-can-i-get-all-rows-iterate-through-ro const infosFromTable = await page.evaluate(() => { const tds = Array.from(document.querySelectorAll('#lista tr td')); return tds.map((td) => td.innerText); @@ -27,6 +29,8 @@ const createInfosGov = async () => { throw errorHandling(badRequest, 'Scrapping failed. Please try again.'); } + // link que me ajudou na lógica de cortar o array em arrays menores + // https://stackoverflow.com/questions/8495687/split-array-into-chunks const arraysOfInfos = resultOfScrapping .filter((value) => value !== 'Detalhar') .reduce((resultArray, item, index) => { From e58dac44d6c58436ff4025409bbea28eb83cc503 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Sun, 20 Mar 2022 16:17:55 -0300 Subject: [PATCH 27/31] =?UTF-8?q?Fun=C3=A7=C3=A3o=20de=20scrapping=20dos?= =?UTF-8?q?=20dados=20chamada=20quando=20o=20servidor=20inicia?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- index.js | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/index.js b/index.js index c7ea101..abd7502 100644 --- a/index.js +++ b/index.js @@ -1,6 +1,9 @@ const express = require('express'); require('dotenv').config(); const { getInfosGovController } = require('./controllers/infosgovController'); +const { createInfosGov } = require('./services/infosgovService'); +const errorHandling = require('./utils/errorHandling'); +const { badRequest } = require('./utils/dictionary'); const errorHandler = require('./middlewares/errorHandler'); @@ -9,6 +12,16 @@ const PORT = process.env.PORT || 8090; app.get('/api/dados', getInfosGovController); +app.listen( + PORT, + () => console.log(`App running on port ${PORT}!`), + async () => { + await createInfosGov().catch(() => { + throw errorHandling(badRequest, 'Scrapping failed. Try restarting the server.'); + }); + } +); + app.use(errorHandler); -app.listen(PORT, () => console.log(`App running on port ${PORT}!`)); \ No newline at end of file +module.exports = app; From f4206d27c579b1deb91a44ffcb2915397e159791 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Sun, 20 Mar 2022 16:18:40 -0300 Subject: [PATCH 28/31] =?UTF-8?q?Arquivo=20de=20configura=C3=A7=C3=A3o=20j?= =?UTF-8?q?son=20setado=20para=20usar=20banco=20de=20dados=20'portal=5Fgov?= =?UTF-8?q?'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.json b/config/config.json index 0fd35ea..8160015 100644 --- a/config/config.json +++ b/config/config.json @@ -9,7 +9,7 @@ "test": { "username": "root", "password": null, - "database": "database_test", + "database": "portal_gov", "host": "127.0.0.1", "dialect": "mysql" }, From dc9c91e0f5e6a301d59b19cce7f92acb1fbd26d0 Mon Sep 17 00:00:00 2001 From: Leonardo Date: Sun, 20 Mar 2022 16:19:22 -0300 Subject: [PATCH 29/31] =?UTF-8?q?Criada=20nova=20fun=C3=A7=C3=A3o=20para?= =?UTF-8?q?=20deixar=20cada=20qual=20com=20uma=20=C3=BAnica=20responsabili?= =?UTF-8?q?dade=20seguindo=20os=20princ=C3=ADpios=20SOLID?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- services/infosgovService.js | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/services/infosgovService.js b/services/infosgovService.js index 8f3d940..fb7cbab 100644 --- a/services/infosgovService.js +++ b/services/infosgovService.js @@ -22,7 +22,7 @@ const scrapping = async () => { return infosFromTable; }; -const createInfosGov = async () => { +const breakArray = async () => { const resultOfScrapping = await scrapping(); if (resultOfScrapping === undefined || resultOfScrapping.length === 0) { @@ -44,7 +44,13 @@ const createInfosGov = async () => { return resultArray; }, []); - + + return arraysOfInfos; +} + +const createInfosGov = async () => { + const arraysOfInfos = await breakArray().catch(() => { throw errorHandling(badRequest, 'Scrapping failed. Please try again.') } ); + arraysOfInfos.map(async (array) => { const [mesAno, programaOrcamentario, acaoOrcamentaria, valorEmpenhado, valorLiquidado, valorPago, valorRestosAPagarPagos] = array; await InfosGov.create({ @@ -66,5 +72,6 @@ const getInfosGov = async () => { }; module.exports = { + createInfosGov, getInfosGov, }; From 1b1285a81bd1889b45d55293052d3c373b50ecfa Mon Sep 17 00:00:00 2001 From: Leonardo Date: Sun, 20 Mar 2022 18:20:50 -0300 Subject: [PATCH 30/31] =?UTF-8?q?Retirando=20depend=C3=AAncias=20n=C3=A3o?= =?UTF-8?q?=20utilizadas?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index b0e7f34..3754175 100644 --- a/package.json +++ b/package.json @@ -4,8 +4,8 @@ "description": "O web scraping (raspagem de rede, em tradução livre), também conhecido como extração de dados da web,\r é o nome dado ao processo de coleta de dados estruturados da web de maneira automatizada. Em geral,\r esse método é usado por pessoas, empresas que desejam usar a vasta quantidade de dados da web disponíveis\r publicamente para tomar decisões mais inteligentes.", "main": "index.js", "scripts": { - "start": "node index.js", - "test": "echo \"Error: no test specified\" && exit 1" + "test": "", + "start": "node index.js" }, "repository": { "type": "git", From fd26fa09264d3a765639839411dfb95768caf2bc Mon Sep 17 00:00:00 2001 From: Leonardo Date: Sun, 20 Mar 2022 18:21:15 -0300 Subject: [PATCH 31/31] =?UTF-8?q?Arquivo=20para=20documenta=C3=A7=C3=A3o?= =?UTF-8?q?=20do=20projeto=20e=20explica=C3=A7=C3=A3o=20de=20como=20rod?= =?UTF-8?q?=C3=A1-lo=20e=20o=20que=20foi=20feito?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PROJETO.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 PROJETO.md diff --git a/PROJETO.md b/PROJETO.md new file mode 100644 index 0000000..e747267 --- /dev/null +++ b/PROJETO.md @@ -0,0 +1,57 @@ +## Aplicação back end de raspagem de dados + +Uma aplicação back end para raspagem de dados, coletando informações da tabela do Portal da Transparência do Governo Federal e inserindo em um banco +de dados relacional. + +Foram usadas as tecnologias: +- Node JS +- MySQL +- Express +- Sequelize +- Puppeteer +- Thunder Client + +## Funcionamento da aplicação + +A aplicação funciona com o bot do Puppeteer para raspagem de dados, ou seja, para coleta de informações de algum site. Nesse caso, foi escolhida a página https://www.transparencia.gov.br/despesas/orgao?ordenarPor=orgaoSuperior&direcao=asc para alvo da raspagem. O servidor é iniciado e o bot é ativado para fazer a raspagem dos dados da tabela alvo. Se as informações foram coletadas com sucesso, elas são adicionadas ao banco de dados MySQL por meio do Sequelize. Caso algo tenha dado errado, o cliente receberá tal aviso e será guiado para que o erro seja solucionado. Ao fazer a requisição para a rota /api/dados, caso o banco de dados esteja vazio por conta de algum erro na raspagem ao início da aplicação, essa requisição fará a inserção dos dados no banco de dados. Caso esteja tudo certo, a requisição entregará os dados ao usuário. + +## Requisitos para rodar o projeto + +- Node JS instalado +- MySQL instalado + +## Como clonar o repositório e testar o projeto + +Clone o projeto do Github : + +```sh +$ git clone git@github.com:leonardomunsa/backend-challenge.gitt or git clone https://github.com/leonardomunsa/backend-challenge.git +$ cd backend-challenge +``` + +#### Para que o banco de dados funcione: + +``` +- abra o arquivo config.json da pasta config +- edite a variável de senha no objeto development para sua senha usada no usuário do MySQL* +``` +A partir disso o projeto está configurado para conectar com o bando de dados a partir do sequelize +*nunca commite esse documento com sua senha, apenas salve para uso do banco de dados + +#### Instale as dependências e inicie a aplicação para fazer as requisições: + +```sh +$ npm install +$ npx sequelize-cli db:create +$ npx sequelize-cli db:migrate +$ npm start +``` + +Se estiver tudo ok, a aplicação deve rodar no link: + +```bash +http://localhost:8090 +``` + +Teste para ver se está correto, adicione ingredientes ao bando de dados e entre no link http://localhost:8090/api/dados, +o resultado deve ser, em formado json, as informações da tabela do site do Portal de Transparência do Governo.