Skip to content

Commit

Permalink
Integration updated, need to be checked
Browse files Browse the repository at this point in the history
  • Loading branch information
Carlos González Gamella committed Oct 23, 2024
1 parent fbea958 commit 0b5db13
Show file tree
Hide file tree
Showing 4 changed files with 161,394 additions and 4,038 deletions.
20 changes: 20 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
_docs/
_proc/
PlataformasAgregadasSinMenores_2018.zip
Accuracy_RAG_tool.ipynb
dspy_solution.ipynb
dspy_solution-Copy1.ipynb


*.bak
.gitattributes
Expand All @@ -17,6 +21,10 @@ tags

# Byte-compiled / optimized / DLL files
__pycache__/
*.pyc
__pycache__/
sproc/__pycache__/
sproc/__pycache__/__init__.cpython-310.pyc
*.py[cod]
*$py.class

Expand Down Expand Up @@ -158,7 +166,19 @@ data
notebooks
*.parquet

# files
descarga_minors/DUPLICADOS.xlsx
descarga_minors/datos_outsiders_gencat_mezcla_codi_dir3_y_codi_organ_place.csv
descarga_minors/estadisticas_df_gencat_json_outsiders_con_info.xlsx
descarga_minors/estadisticas_df_gencat_json_todos_con_info.xlsx
descarga_minors/failed_urls.csv
descarga_minors/outsiders_no_match.csv
descarga_minors/outsiders_no_match.csv.zip
descarga_minors/stopwords.ipynb
failed_urls.csv

# folders
minors_madrid/
minors_zaragoza/
DESCARGAS/

20 changes: 9 additions & 11 deletions descarga_minors/descarga_contratos_mad_zgz_gencat.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,19 @@ def download_contracts_gencat(domain, dataset_identifier, destination_directory,
while True:
# Obtener un bloque de resultados usando el offset
results = client.get(dataset_identifier, limit=limit, offset=offset)

# Romper el bucle si no se obtienen más resultados

if not results:
break

# Convertir resultados a DataFrame y agregar a la lista combinada
# Covert results to a DataFrame
df = pd.DataFrame.from_records(results)
results_combined.append(df)

# Actualizar el número total de filas descargadas
# Update total rows count
total_rows += len(results)

# Mostrar progreso
print(f"Descargadas {total_rows} filas hasta ahora...")

# Incrementar el offset para la siguiente solicitud
# Increment offset for the next iteration
offset += limit

# Combinar todos los DataFrames en uno solo
Expand Down Expand Up @@ -104,9 +101,9 @@ def download_contracts_zaragoza(contract_ids, detail_url_template, file_path):
print(f"Detalles de los contratos descargados y guardados en {file_path}")

def download_zaragoza_wrapper(base_url, params, detail_base_url, file_path):
# Obtener los IDs de los contratos primero
# Obtain the contract IDs first
contract_ids = get_contract_ids(base_url, params)
# Luego descargar los detalles de los contratos
# Download the details of each contract
download_contracts_zaragoza(contract_ids, detail_base_url, file_path)

def download_contracts_madrid(url, destination_directory, start_year,file_path):
Expand Down Expand Up @@ -134,7 +131,7 @@ def download_contracts_madrid(url, destination_directory, start_year,file_path):
file_name = href.split('/')[-1]
# Download the file linked from the URL.
file_response = requests.get(full_url)
file_response.raise_for_status() # Ensure the download was successful.
file_response.raise_for_status()
file_path = os.path.join(destination_directory, file_name)
with open(file_path, 'wb') as file:
file.write(file_response.content)
Expand Down Expand Up @@ -207,11 +204,12 @@ def main():
start_url = 'https://transparencia.madrid.es/portales/transparencia/es/Economia-y-presupuestos/Contratacion/Contratacion-administrativa/Contratos-menores'
if args.file_path:
download_contracts_madrid(start_url, args.file_path, args.start_year)

elif args.city == 'gencat':
domain = "analisi.transparenciacatalunya.cat"
dataset_identifier = "ybgg-dgi6"
if args.file_path:
download_contracts_gencat(domain, dataset_identifier, args.file_path, "contratacion_publica_catalunya_completo.csv")
download_contracts_gencat(domain, dataset_identifier, args.file_path, "contratacion_publica_catalunya_completo1.csv")
elif args.city == 'all':
if args.file_path:
download_all_contracts(args.file_path)
Expand Down
Loading

0 comments on commit 0b5db13

Please sign in to comment.