From d59f4eaa06e3f321c4387902ab03f61bdc12b6d7 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Wed, 28 Apr 2021 22:14:22 +0000 Subject: [PATCH 001/136] Upgrade to GitHub-native Dependabot --- .github/dependabot.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..b5158981 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,18 @@ +version: 2 +registries: + python-index-pypi-org: + type: python-index + url: https://pypi.org/ + replaces-base: true + username: "${{secrets.PYTHON_INDEX_PYPI_ORG_USERNAME}}" + password: "${{secrets.PYTHON_INDEX_PYPI_ORG_PASSWORD}}" + +updates: +- package-ecosystem: pip + directory: "/" + schedule: + interval: daily + time: "19:00" + open-pull-requests-limit: 10 + registries: + - python-index-pypi-org From 963ada115dc7038b0d2dcd29dba5765627b0477c Mon Sep 17 00:00:00 2001 From: william dutton Date: Wed, 9 Nov 2022 08:52:01 +1000 Subject: [PATCH 002/136] fix workflow --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e963e1f6..8a601204 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,7 +13,7 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: - python-version: '3.6' + python-version: '3.x' - name: Install requirements run: pip install flake8 pycodestyle - name: Check syntax From 9f96e1676c0be03774273917040546a1b97b2f3f Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Mon, 17 Apr 2023 11:22:32 +1000 Subject: [PATCH 003/136] [QOLDEV-347] apply 'str' fallback type correctly, #182 - If all types have been rejected, ensure that the fallback flag is correctly set --- ckanext/xloader/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py index cbffaa2f..79facbea 100644 --- a/ckanext/xloader/utils.py +++ b/ckanext/xloader/utils.py @@ -175,10 +175,10 @@ def type_guess(rows, types=TYPES, strict=False): for ci, cell in enumerate(row): if not cell: continue - at_least_one_value[ci] = True for type in list(guesses[ci].keys()): if not isinstance(cell, type): guesses[ci].pop(type) + at_least_one_value[ci] = True if guesses[ci] else False # no need to set guessing weights before this # because we only accept a type if it never fails for i, guess in enumerate(guesses): From cf04a5c5c38443f3d98e0e7b8a4ed0ceede90aa0 Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Mon, 17 Apr 2023 12:50:09 +1000 Subject: [PATCH 004/136] [QOLDEV-347] fix validation errors on empty strings, #182 - replace empty strings with None if they have types that will choke on empty string --- ckanext/xloader/loader.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py index afc3c980..75bddf51 100644 --- a/ckanext/xloader/loader.py +++ b/ckanext/xloader/loader.py @@ -318,9 +318,16 @@ def row_iterator(): logger.info('Copying to database...') count = 0 + # Some types cannot be stored as empty strings and must be converted to None, + # https://github.com/ckan/ckanext-xloader/issues/182 + non_empty_types = ['timestamp', 'numeric'] for i, records in enumerate(chunky(result, 250)): count += len(records) logger.info('Saving chunk {number}'.format(number=i)) + for row in records: + for column_index, column_name in enumerate(row): + if headers_dicts[column_index]['type'] in non_empty_types and row[column_name] == '': + row[column_name] = None send_resource_to_datastore(resource_id, headers_dicts, records) logger.info('...copying done') From b8b99143be32bd0da3e7e125e5f02cff7c8212eb Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Mon, 17 Apr 2023 14:54:23 +1000 Subject: [PATCH 005/136] [QOLDEV-347] add tests for edge cases we're fixing - Column that has some rows with free text and others with numeric data - Column that has some rows with timestamp and others with empty string --- .../samples/mixed_numeric_string_sample.csv | 3 +++ .../tests/samples/sample_with_blanks.csv | 4 ++++ ckanext/xloader/tests/test_loader.py | 24 +++++++++++++++++++ 3 files changed, 31 insertions(+) create mode 100644 ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv create mode 100644 ckanext/xloader/tests/samples/sample_with_blanks.csv diff --git a/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv b/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv new file mode 100644 index 00000000..9d076602 --- /dev/null +++ b/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv @@ -0,0 +1,3 @@ +Funding agency,Program title,Maximum (indicative) grant amount +DTIS,Accessible Tourism Infrastructure Grants,Five hundred thousand dollars +DTIS,Boosting Accessible Tourism Experiences Grants,5000 diff --git a/ckanext/xloader/tests/samples/sample_with_blanks.csv b/ckanext/xloader/tests/samples/sample_with_blanks.csv new file mode 100644 index 00000000..b53b25db --- /dev/null +++ b/ckanext/xloader/tests/samples/sample_with_blanks.csv @@ -0,0 +1,4 @@ +Funding agency,Program title,Opening date,Service ID +DTIS,Visitor First Experiences Fund,23/03/2023,63039 +DTIS,First Nations Sport and Recreation Program Round 2,22/03/2023,63040 +,,,63041 diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py index f31b663b..68452d11 100644 --- a/ckanext/xloader/tests/test_loader.py +++ b/ckanext/xloader/tests/test_loader.py @@ -612,6 +612,30 @@ def test_german(self, Session): u"tsvector", ] + [u"text"] * (len(records[0]) - 1) + def test_with_blanks(self, Session): + csv_filepath = get_sample_filepath("sample_with_blanks.csv") + resource_id = "test1" + factories.Resource(id=resource_id) + loader.load_csv( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, "test1")) == 3 + + def test_with_mixed_types(self, Session): + csv_filepath = get_sample_filepath("mixed_numeric_string_sample.csv") + resource_id = "test1" + factories.Resource(id=resource_id) + loader.load_csv( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, "test1")) == 2 + def test_reload(self, Session): csv_filepath = get_sample_filepath("simple.csv") resource_id = "test1" From b62aa6ccfc2f54008e4bbd240fb031bb130cd1ed Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Mon, 17 Apr 2023 16:05:54 +1000 Subject: [PATCH 006/136] [QOLDEV-347] tighten Flake8 rules - Remove unused imports, or tag those that serve a purpose (testing what can be imported) - Remove obsolete exclusions from Flake8 config --- .flake8 | 4 ---- ckanext/xloader/jobs.py | 2 +- ckanext/xloader/loader.py | 1 - ckanext/xloader/parser.py | 2 -- ckanext/xloader/plugin.py | 1 - ckanext/xloader/tests/ckan_setup.py | 2 +- ckanext/xloader/tests/fixtures.py | 5 ++--- 7 files changed, 4 insertions(+), 13 deletions(-) diff --git a/.flake8 b/.flake8 index a4eea9e3..32068ca7 100644 --- a/.flake8 +++ b/.flake8 @@ -17,8 +17,4 @@ max-line-length=127 # List ignore rules one per line. ignore = - E501 - C901 W503 - F401 - F403 diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py index 4c4068f9..0d242db1 100644 --- a/ckanext/xloader/jobs.py +++ b/ckanext/xloader/jobs.py @@ -16,7 +16,7 @@ import sqlalchemy as sa from ckan import model -from ckan.plugins.toolkit import get_action, asbool, ObjectNotFound, config, check_ckan_version +from ckan.plugins.toolkit import get_action, asbool, ObjectNotFound, config from . import loader from . import db diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py index 75bddf51..55c9cab5 100644 --- a/ckanext/xloader/loader.py +++ b/ckanext/xloader/loader.py @@ -14,7 +14,6 @@ from unidecode import unidecode import ckan.plugins as p -import ckan.plugins.toolkit as tk from .job_exceptions import FileCouldNotBeLoadedError, LoaderError from .parser import XloaderCSVParser diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py index b2a6f889..b52c59a3 100644 --- a/ckanext/xloader/parser.py +++ b/ckanext/xloader/parser.py @@ -1,10 +1,8 @@ # -*- coding: utf-8 -*- import csv -from codecs import iterencode from decimal import Decimal, InvalidOperation from itertools import chain -import six from ckan.plugins.toolkit import asbool from dateutil.parser import isoparser, parser from dateutil.parser import ParserError diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py index dbde8ed5..159b99de 100644 --- a/ckanext/xloader/plugin.py +++ b/ckanext/xloader/plugin.py @@ -6,7 +6,6 @@ from ckan.plugins import toolkit from . import action, auth, helpers as xloader_helpers, utils -from .loader import fulltext_function_exists, get_write_engine try: config_declarations = toolkit.blanket.config_declarations diff --git a/ckanext/xloader/tests/ckan_setup.py b/ckanext/xloader/tests/ckan_setup.py index ae8bfb3e..ff43d74c 100644 --- a/ckanext/xloader/tests/ckan_setup.py +++ b/ckanext/xloader/tests/ckan_setup.py @@ -1,5 +1,5 @@ try: - from ckan.tests.pytest_ckan.ckan_setup import * + from ckan.tests.pytest_ckan.ckan_setup import * # noqa except ImportError: import pkg_resources from paste.deploy import loadapp diff --git a/ckanext/xloader/tests/fixtures.py b/ckanext/xloader/tests/fixtures.py index f43916ab..9a7ad37f 100644 --- a/ckanext/xloader/tests/fixtures.py +++ b/ckanext/xloader/tests/fixtures.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import sqlalchemy -import sqlalchemy.orm as orm +from sqlalchemy import orm import os from ckanext.datastore.tests import helpers as datastore_helpers @@ -11,7 +10,7 @@ ) try: - from ckan.tests.pytest_ckan.fixtures import * + from ckan.tests.pytest_ckan.fixtures import * # noqa except ImportError: import pytest From 939ff6bfc9125d3c281b3401cff3d92456c05d44 Mon Sep 17 00:00:00 2001 From: antuarc Date: Fri, 19 May 2023 11:39:10 +1000 Subject: [PATCH 007/136] [QOLSVC-1863] truncate on-page XLoader logs if there are too many - Show the first 100 and last 100 logs, with a message between to say how many were skipped --- ckanext/xloader/templates/xloader/resource_data.html | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html index a94ad631..e9786776 100644 --- a/ckanext/xloader/templates/xloader/resource_data.html +++ b/ckanext/xloader/templates/xloader/resource_data.html @@ -62,6 +62,8 @@

{{ _('Upload Log') }}