From b8eb530f710196c7e4083814ca1d642257f0a7fc Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Tue, 9 Apr 2024 11:22:08 -0400 Subject: [PATCH 001/138] Add WIP edits to subsequences README --- kobo/apps/subsequences/README.md | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences/README.md index 57e23aa4da..682502103d 100644 --- a/kobo/apps/subsequences/README.md +++ b/kobo/apps/subsequences/README.md @@ -58,13 +58,20 @@ class DecimalRounder(BaseAction): ## STEP 4: define the handler def run_change(self, submission): + # `_destination_field` is defined by `BaseAction` to be `_supplementalDetails` _data = submission.get(self._destination_field, {}) fuel_cost = submission.get('fuel_cost') - _data['rounded'] = round(fuel_cost * 100) / 100 + _data[self.ID] = {'fuel_cost': round(fuel_cost * 100) / 100} return {**submission, self._destination_field: _data} ``` +#### Step 4a: modify `ADVANCED_FEATURES_PARAMS_SCHEMA` + +…otherwise, you will be unable to add `decimal_rounder` to `asset.advanced_features` in step 5. + +TODO: figure out if we should really be maintaining the schema as one big constant, or if we should have a method in each action class that returns its own schema + #### Step 5: specify which surveys (`Asset`) should be passed to this handler Somewhere, either through the API or elsewhere, add relevant details to the asset's `advanced_features` field: @@ -96,9 +103,10 @@ class DecimalRounder(BaseAction): def run_change(self, submission): _data = submission.get(self._destination_field, {}) - for field in self.fields_to_round: + _data[self.ID] = {} + for field_name in self.fields_to_round: fuel_cost = submission.get(field_name) - _data[field_name] = round(fuel_cost * 100) / 100 + _data[self.ID][field_name] = round(fuel_cost * 100) / 100 return {**submission, self._destination_field: _data} ``` @@ -121,13 +129,15 @@ GET "/advanced_submission_post/aSsEtUiD?submission=" { "submission": "submissionUuid", "_supplementalDetails": { - "rounded": { + "decimal_rounder": { "fuel_cost": 1.23 } } } ``` +TODO: does GET to `advanced_submission_post` actually work? + #### Step 9 (optional): Define a validator Because `advanced_submission_post` data can be sourced from anywhere, it should be validated. The prominent way to do this is with a jsonschema defined in the action class. @@ -138,14 +148,16 @@ class DecimalRounder(BaseAction): # ... # `modify_jsonschema` appended to the class above def modify_jsonschema(self, schema): - defs = schema.get('definitions', {}) - props = schema.get('properties', {}) - defs['roundedschemadef'] = { + defs = schema.setdefault('definitions', {}) + props = schema.setdefault('properties', {}) + # TODO: make sure this actually works… + defs[self.ID] = {} + defs[self.ID]['roundednumber'] = { 'type': 'number', } for field_name in self.fields_to_round: - props[field_name] = {'$ref': '#/defs/roundedschemadef'} + props[field_name] = {'$ref': f'#/defs/{self.ID}/roundednumber'} ``` #### Step 10: Test the module @@ -175,8 +187,8 @@ this should print out the resulting submission: { "fuel_cost": 5.678901, "_supplementalDetails": { - "fuel_cost": { - "rounded": 5.68 + "decimal_rounder": { + "fuel_cost": 5.68 } } } From 3a6d5822dcdb5e067d497fbfdec25346276f365b Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Tue, 9 Apr 2024 20:37:36 -0400 Subject: [PATCH 002/138] Make NumberDoubler action class work --- .../actions/automatic_transcription.py | 5 ++- kobo/apps/subsequences/actions/base.py | 8 ++-- .../actions/manual_transcription.py | 11 +++-- .../subsequences/actions/number_doubler.py | 45 ++++++++++++++----- kobo/apps/subsequences/actions/qual.py | 8 ++-- kobo/apps/subsequences/actions/translation.py | 8 ++-- .../advanced_features_params_schema.py | 16 +++---- .../tests/test_automatic_transcription.py | 6 +-- .../subsequences/tests/test_number_doubler.py | 6 +-- kobo/apps/subsequences/utils/__init__.py | 45 ++++++++++++------- .../determine_export_cols_with_values.py | 2 +- kobo/settings/dev.py | 2 + 12 files changed, 100 insertions(+), 62 deletions(-) diff --git a/kobo/apps/subsequences/actions/automatic_transcription.py b/kobo/apps/subsequences/actions/automatic_transcription.py index 202f598dea..cf9f25e531 100644 --- a/kobo/apps/subsequences/actions/automatic_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_transcription.py @@ -9,16 +9,17 @@ DT_MOD = BaseAction.DATE_MODIFIED_FIELD DT_CREATED = BaseAction.DATE_CREATED_FIELD + class AutomaticTranscriptionAction(BaseAction): ID = 'transcript' MANUAL = 'user_transcribed' @classmethod - def build_params(kls, params, content): + def build_params(cls, content, **kwargs): possible_transcribed_fields = [] for row in content.get('survey', []): if row['type'] in ['audio', 'video']: - possible_transcribed_fields.append(kls.get_qpath(kls, row)) + possible_transcribed_fields.append(cls.get_qpath(cls, row)) params = {'values': possible_transcribed_fields, 'services': []} return params diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index fdae53cc79..80cac30e3f 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -7,6 +7,7 @@ ACTION_NEEDED = 'ACTION_NEEDED' PASSES = 'PASSES' + class BaseAction: ID = None _destination_field = '_supplementalDetails' @@ -103,8 +104,8 @@ def has_change(self, original, edit): return self.record_repr(original) != self.record_repr(edit) @classmethod - def build_params(kls, *args, **kwargs): - raise NotImplementedError(f'{kls.__name__} has not implemented a build_params method') + def build_params(cls, *args, **kwargs): + raise NotImplementedError(f'{cls.__name__} has not implemented a build_params method') def get_qpath(self, row): # return the full path... @@ -113,7 +114,8 @@ def get_qpath(self, row): return row[name_field] return None - def get_name(self, row): + @classmethod + def get_name(cls, row): for name_field in ['name', '$autoname']: if name_field in row: return row[name_field] diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index a9aab1a364..d6d210ab3c 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -7,17 +7,17 @@ class ManualTranscriptionAction(BaseAction): ID = 'manual_transcription' @classmethod - def build_params(kls, survey_content): + def build_params(cls, content, **kwargs): possible_transcribed_fields = [] - for row in survey_content.get('survey', []): + for row in content.get('survey', []): if row['type'] in ['audio', 'video']: - possible_transcribed_fields.append(row['name']) + possible_transcribed_fields.append(cls.get_name(row)) params = {'values': possible_transcribed_fields} return params - + def load_params(self, params): self.possible_transcribed_fields = params['values'] - + def check_submission_status(self, submission): if self._destination_field not in submission: return ACTION_NEEDED @@ -25,4 +25,3 @@ def check_submission_status(self, submission): # needs to be built out return PASSES - \ No newline at end of file diff --git a/kobo/apps/subsequences/actions/number_doubler.py b/kobo/apps/subsequences/actions/number_doubler.py index 9a4dc6f2be..5e4d2704bc 100644 --- a/kobo/apps/subsequences/actions/number_doubler.py +++ b/kobo/apps/subsequences/actions/number_doubler.py @@ -20,14 +20,13 @@ class NumberDoubler(BaseAction): ID = 'number_doubler' def load_params(self, params): - self.values = params['values'] + self.values = params.get('values', []) - def run_change(self, submission): - additions = submission.get(self._destination_field, {}) - for key, dest_key in self.values.items(): - original = submission.get(key) - additions[dest_key] = double_number(original) - return {**submission, self._destination_field: additions} + def has_change(self, original, edit): + return True + + def revise_field(self, previous, edit): + return {'value': double_number(edit['value'])} def check_submission_status(self, submission): if self._destination_field not in submission: @@ -39,11 +38,35 @@ def check_submission_status(self, submission): return ACTION_NEEDED @classmethod - def build_params(kls, params, asset_content): + def build_params(cls, content, **kwargs): numeric_questions = {} - for row in asset_content['survey']: - if row.get('type') in ['number', 'decimal']: - name = row['name'] + for row in content['survey']: + if row.get('type') in ['integer', 'decimal']: + name = cls.get_name(row) numeric_questions[name] = f'{name}_doubled' params = {'values': numeric_questions} return params + + @classmethod + def get_values_for_content(cls, content): + """ + If no "values" are defined for a given asset, then this method will + generate a set of defaults. + """ + values = [] + for row in content.get('survey', []): + if row['type'] in ['integer', 'decimal']: + values.append(cls.get_qpath(cls, row)) + return values + + def modify_jsonschema(self, schema): + defs = schema.setdefault('definitions', {}) + props = schema.setdefault('properties', {}) + defs[f'{self.ID}schemadef'] = { + 'type': 'object', + } + + for field_name in self.values: + props[field_name] = {'$ref': f'#/definitions/{self.ID}schemadef'} + + return schema diff --git a/kobo/apps/subsequences/actions/qual.py b/kobo/apps/subsequences/actions/qual.py index 1092e94901..f0a85303ca 100644 --- a/kobo/apps/subsequences/actions/qual.py +++ b/kobo/apps/subsequences/actions/qual.py @@ -6,17 +6,17 @@ class QualAction(BaseAction): ID = 'qual' @classmethod - def build_params(kls, survey_content): + def build_params(cls, content, **kwargs): _fields = [] - for row in survey_content.get('survey', []): + for row in content.get('survey', []): if row['type'] in ['audio', 'video']: - _fields.append(row['name']) + _fields.append(cls.get_name(row)) return {'values': _fields} def load_params(self, params): ''' Action.load_params is called when the instance is initialized - for each Asset. It will + for each Asset. It will ''' self.fields = params.get('values', []) self.qual_survey = params.get('qual_survey', []) diff --git a/kobo/apps/subsequences/actions/translation.py b/kobo/apps/subsequences/actions/translation.py index dd92459e26..1677a8b844 100644 --- a/kobo/apps/subsequences/actions/translation.py +++ b/kobo/apps/subsequences/actions/translation.py @@ -11,12 +11,12 @@ class TranslationAction(BaseAction): MANUAL = 'user_translated' @classmethod - def build_params(kls, survey_content): + def build_params(cls, content, **kwargs): audio_questions = [] translatable_fields = [] - for row in survey_content.get('survey', []): + for row in content.get('survey', []): if row['type'] in ['audio', 'video', 'text']: - translatable_fields.append(kls.get_qpath(kls, row)) + translatable_fields.append(cls.get_qpath(cls, row)) params = {'values': translatable_fields} return params @@ -32,7 +32,7 @@ def get_values_for_content(kls, content): def load_params(self, params): self.translatable_fields = params.get('values', []) - self.languages = params['languages'] + self.languages = params.get('languages', []) self.available_services = params.get('services', []) def has_change(self, orecord, erecord): diff --git a/kobo/apps/subsequences/advanced_features_params_schema.py b/kobo/apps/subsequences/advanced_features_params_schema.py index 90bb507110..e8e050e344 100644 --- a/kobo/apps/subsequences/advanced_features_params_schema.py +++ b/kobo/apps/subsequences/advanced_features_params_schema.py @@ -25,23 +25,23 @@ 'type': 'array', 'items': {'type': 'string'}, }, - } + }, }, 'translation': { 'type': 'object', 'properties': { - 'languages': { - 'type': 'array', - 'items': {'type': 'string'} - }, + 'languages': {'type': 'array', 'items': {'type': 'string'}}, 'values': { 'type': 'array', 'items': {'type': 'string'}, }, }, - 'required': ['languages'] - } - } + 'required': ['languages'], + }, + 'number_doubler': { + 'type': 'object', + }, + }, } # User-defined qualitative analysis forms diff --git a/kobo/apps/subsequences/tests/test_automatic_transcription.py b/kobo/apps/subsequences/tests/test_automatic_transcription.py index 3ad064490b..c73d3d32a6 100644 --- a/kobo/apps/subsequences/tests/test_automatic_transcription.py +++ b/kobo/apps/subsequences/tests/test_automatic_transcription.py @@ -25,14 +25,14 @@ def _survey_and_submission(): def test_param_builder(): AutomaticTranscriptionAction.TRANSCRIPTION_SERVICES = TEST_TRANSCRIPTION_SERVICES survey = _survey_and_submission()[0] - built_params = AutomaticTranscriptionAction.build_params({}, survey) + built_params = AutomaticTranscriptionAction.build_params(content=survey) assert built_params['values'] == ['ask_a_question'] assert 'services' in built_params def test_instantiate_action_with_params(): survey = _survey_and_submission()[0] - action_params = AutomaticTranscriptionAction.build_params({}, survey) + action_params = AutomaticTranscriptionAction.build_params(content=survey) action_instance = AutomaticTranscriptionAction(action_params) assert action_instance is not None @@ -40,7 +40,7 @@ def test_instantiate_action_with_params(): @pytest.mark.skip(reason='transcription currently does not depend on this working') def test_submission_status_before_change(): survey, submission = _survey_and_submission() - action_params = AutomaticTranscriptionAction.build_params({}, survey) + action_params = AutomaticTranscriptionAction.build_params(content=survey) action_instance = AutomaticTranscriptionAction(action_params) # check that the changes ARE needed diff --git a/kobo/apps/subsequences/tests/test_number_doubler.py b/kobo/apps/subsequences/tests/test_number_doubler.py index ca0c572788..95c50242c4 100644 --- a/kobo/apps/subsequences/tests/test_number_doubler.py +++ b/kobo/apps/subsequences/tests/test_number_doubler.py @@ -14,7 +14,7 @@ def _survey_and_submission_with_numerics(): def test_param_builder(): survey = _survey_and_submission_with_numerics()[0] - built_params = NumberDoubler.build_params({}, survey) + built_params = NumberDoubler.build_params(content=survey) assert 'values' in built_params # assert built_params['values']['num1'] == 'num1_doubled' assert [*built_params['values'].keys()] == ['num1', 'num2', 'num3', 'num4'] @@ -24,13 +24,13 @@ def test_param_builder(): def test_instantiate_action_with_params(): survey = _survey_and_submission_with_numerics()[0] - action_params = NumberDoubler.build_params({}, survey) + action_params = NumberDoubler.build_params(content=survey) action_instance = NumberDoubler(action_params) assert action_instance is not None def test_submission_status_before_and_after_change(): survey, submission = _survey_and_submission_with_numerics() - action_params = NumberDoubler.build_params({}, survey) + action_params = NumberDoubler.build_params(content=survey) action_instance = NumberDoubler(action_params) # check that the changes ARE needed diff --git a/kobo/apps/subsequences/utils/__init__.py b/kobo/apps/subsequences/utils/__init__.py index 50db59c7fa..9188273bc6 100644 --- a/kobo/apps/subsequences/utils/__init__.py +++ b/kobo/apps/subsequences/utils/__init__.py @@ -2,6 +2,7 @@ from ..actions.automatic_transcription import AutomaticTranscriptionAction from ..actions.translation import TranslationAction from ..actions.qual import QualAction +from ..actions.number_doubler import NumberDoubler from ..actions.unknown_action import UnknownAction @@ -10,6 +11,7 @@ AutomaticTranscriptionAction, TranslationAction, QualAction, + NumberDoubler, ) ACTIONS_BY_ID = dict([ @@ -41,14 +43,16 @@ # if not action.test_submission_passes_action(submission): # return action + def advanced_feature_instances(content, actions): action_instances = [] for action_id, action_params in actions.items(): action_kls = ACTIONS_BY_ID[action_id] - if action_params == True: - action_params = action_kls.build_params({}, content) + if action_params: + action_params = action_kls.build_params(content=content) yield action_kls(action_params) + def populate_paths(_content): content = deepcopy(_content) group_stack = [] @@ -75,22 +79,23 @@ def populate_paths(_content): row['qpath'] = '-'.join([*group_stack, rowname]) return content + def advanced_submission_jsonschema(content, actions, url=None): actions = deepcopy(actions) action_instances = [] content = populate_paths(content) # devhack: this keeps serializer from breaking when old params # are still in the database - if 'translated' in actions: # migration + if 'translated' in actions: # migration actions['translation'] = actions['translated'] # migration assert 'languages' in actions['translation'] - del actions['translated'] # migration + del actions['translated'] # migration # /devhack - + # breakpoint() for action_id, action_params in actions.items(): action_kls = ACTIONS_BY_ID[action_id] - if action_params == True: - action_params = action_kls.build_params({}, content) + if action_params: + action_params = action_kls.build_params(content=content) if 'values' not in action_params: action_params['values'] = action_kls.get_values_for_content(content) action_instances.append(action_kls(action_params)) @@ -99,26 +104,32 @@ def advanced_submission_jsonschema(content, actions, url=None): # def _empty_obj(): # return {'type': 'object', 'properties': {}, 'additionalProperties': False} + def get_jsonschema(action_instances=(), url=None): sub_props = {} if url is None: url = '/advanced_submission_post/' - schema = {'type': 'object', - '$description': FEATURE_JSONSCHEMA_DESCRIPTION, - 'url': url, - 'properties': { - 'submission': {'type': 'string', - 'description': 'the uuid of the submission'}, - }, - 'additionalProperties': False, - 'required': ['submission'], - } + schema = { + 'type': 'object', + '$description': FEATURE_JSONSCHEMA_DESCRIPTION, + 'url': url, + 'properties': { + 'submission': { + 'type': 'string', + 'description': 'the uuid of the submission', + }, + }, + 'additionalProperties': False, + 'required': ['submission'], + } for instance in action_instances: schema = instance.modify_jsonschema(schema) return schema + SUPPLEMENTAL_DETAILS_KEY = '_supplementalDetails' + def stream_with_extras(submission_stream, asset): extras = dict( asset.submission_extras.values_list('submission_uuid', 'content') diff --git a/kobo/apps/subsequences/utils/determine_export_cols_with_values.py b/kobo/apps/subsequences/utils/determine_export_cols_with_values.py index e945e007b3..51c762ba72 100644 --- a/kobo/apps/subsequences/utils/determine_export_cols_with_values.py +++ b/kobo/apps/subsequences/utils/determine_export_cols_with_values.py @@ -28,7 +28,7 @@ def get_lang_code(key, tvals): elif key == 'translation': for key in tvals.keys(): yield key - elif key == 'translated': # migration + elif key == 'translated': # migration raise ValueError('key "translated" should not be in the asset. Run management command:' ' python manage.py runscript repop_known_cols" to fix') diff --git a/kobo/settings/dev.py b/kobo/settings/dev.py index 958ad50d43..85c1882f14 100644 --- a/kobo/settings/dev.py +++ b/kobo/settings/dev.py @@ -35,3 +35,5 @@ def show_toolbar(request): # with option `--print-sql` SHELL_PLUS_PRINT_SQL_TRUNCATE = None RUNSERVER_PLUS_PRINT_SQL_TRUNCATE = None + +CELERY_TASK_ALWAYS_EAGER = True From 23d3b22e15e21b848803c0df56f6da9941295aea Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 10 Apr 2024 14:26:44 -0400 Subject: [PATCH 003/138] send number_doubler to formpack in super hacky way --- kpi/models/asset.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/kpi/models/asset.py b/kpi/models/asset.py index 1e5e83af12..0d7e13c42e 100644 --- a/kpi/models/asset.py +++ b/kpi/models/asset.py @@ -475,6 +475,25 @@ def analysis_form_json(self): additional_fields = list(self._get_additional_fields()) engines = dict(self._get_engines()) output = {'engines': engines, 'additional_fields': additional_fields} + try: + number_doubler_field = self.advanced_features[ + 'number_doubler' + ]['number_doubler_fields'] # just a singular string lol + except KeyError: + pass + else: + additional_fields.append(dict( + # What do all these do? + label=f'{number_doubler_field} DOUBLED!', # understood + name=number_doubler_field + '__avoid_collision_with_source_question_name', # arbitrary? + dtpath=number_doubler_field, # unknown + type='doubled_number', # understood; xref with formpack `data_type_classes` + language='??', # only useful for transx? what does it do? + source=number_doubler_field, # probably understood; formpack field can reference e.g. for building labels + qpath=number_doubler_field, # probably understood; but compare to `source`? + settings='??', # only used by transx so far? + path=[number_doubler_field], # does this get `_supplementalDetails/` prepended to it? haven't looked yet + )) try: qual_survey = self.advanced_features['qual']['qual_survey'] except KeyError: From fba697e90052a49fe6c0510f31c63010ab8cbf24 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 10 Apr 2024 16:57:06 -0400 Subject: [PATCH 004/138] yay --- kobo/apps/subsequences/README.md | 2 ++ kobo/apps/subsequences/models.py | 1 + 2 files changed, 3 insertions(+) diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences/README.md index 682502103d..bf4b003f82 100644 --- a/kobo/apps/subsequences/README.md +++ b/kobo/apps/subsequences/README.md @@ -112,6 +112,8 @@ class DecimalRounder(BaseAction): #### Step 8: After a submission has come in, POST metadata to the `/advanced_submission_post/` API endpoint +TODO: for number_doubler, we ended up using something like `"this_number": {"number_doubler": {"value": 667}}}` + ``` POST to "/advanced_submission_post/aSsEtUiD" diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index b967dcf110..df2bb66d27 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -1,4 +1,5 @@ # coding: utf-8 + from django.db import models from kobo.apps.languages.models.transcription import TranscriptionService From 678e8ec1db563e27a523c4678b4b0a54e83f3f8b Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 10 Apr 2024 11:17:16 -0400 Subject: [PATCH 005/138] wip --- kobo/apps/subsequences/README.md | 9 ++++++++- .../apps/subsequences/actions/automatic_transcription.py | 1 + kobo/apps/subsequences/actions/base.py | 3 +++ kobo/apps/subsequences/actions/manual_transcription.py | 1 + kobo/apps/subsequences/actions/qual.py | 1 + kobo/apps/subsequences/actions/translation.py | 1 + 6 files changed, 15 insertions(+), 1 deletion(-) diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences/README.md index bf4b003f82..2d6f53f0e5 100644 --- a/kobo/apps/subsequences/README.md +++ b/kobo/apps/subsequences/README.md @@ -66,6 +66,8 @@ class DecimalRounder(BaseAction): ``` +TODO: `run_change()` is never called. Ouch! Is the `revise_field()` method its successor? + #### Step 4a: modify `ADVANCED_FEATURES_PARAMS_SCHEMA` …otherwise, you will be unable to add `decimal_rounder` to `asset.advanced_features` in step 5. @@ -110,6 +112,9 @@ class DecimalRounder(BaseAction): return {**submission, self._destination_field: _data} ``` +TODO: `build_params()` also appears in real-life actions. What is it? It is never called(!) because +`action_params == True` in `utils/__init__.py` never evaluates to true. + #### Step 8: After a submission has come in, POST metadata to the `/advanced_submission_post/` API endpoint TODO: for number_doubler, we ended up using something like `"this_number": {"number_doubler": {"value": 667}}}` @@ -159,7 +164,9 @@ class DecimalRounder(BaseAction): } for field_name in self.fields_to_round: - props[field_name] = {'$ref': f'#/defs/{self.ID}/roundednumber'} + props[field_name] = {'$ref': f'#/definitions/{self.ID}/roundednumber'} + + return schema ``` #### Step 10: Test the module diff --git a/kobo/apps/subsequences/actions/automatic_transcription.py b/kobo/apps/subsequences/actions/automatic_transcription.py index cf9f25e531..cb47acd671 100644 --- a/kobo/apps/subsequences/actions/automatic_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_transcription.py @@ -16,6 +16,7 @@ class AutomaticTranscriptionAction(BaseAction): @classmethod def build_params(cls, content, **kwargs): + raise Exception('Fuck You') possible_transcribed_fields = [] for row in content.get('survey', []): if row['type'] in ['audio', 'video']: diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 80cac30e3f..b4ab2c1d79 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -39,6 +39,9 @@ def compile_revised_record(self, content, edits): a method that applies changes to a json structure and appends previous changes to a revision history ''' + # TODO: should this handle managing `DATE_CREATED_FIELD`, + # `DATE_MODIFIED_FIELD`, etc. instead of delegating that to + # `revise_record()` as it currently does? if self.ID is None: return content for field_name, vals in edits.items(): diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index d6d210ab3c..1aec153dfd 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -8,6 +8,7 @@ class ManualTranscriptionAction(BaseAction): @classmethod def build_params(cls, content, **kwargs): + raise Exception('Fuck You') possible_transcribed_fields = [] for row in content.get('survey', []): if row['type'] in ['audio', 'video']: diff --git a/kobo/apps/subsequences/actions/qual.py b/kobo/apps/subsequences/actions/qual.py index f0a85303ca..ef62313266 100644 --- a/kobo/apps/subsequences/actions/qual.py +++ b/kobo/apps/subsequences/actions/qual.py @@ -7,6 +7,7 @@ class QualAction(BaseAction): @classmethod def build_params(cls, content, **kwargs): + raise Exception('Fuck You') _fields = [] for row in content.get('survey', []): if row['type'] in ['audio', 'video']: diff --git a/kobo/apps/subsequences/actions/translation.py b/kobo/apps/subsequences/actions/translation.py index 1677a8b844..d6eeb080bc 100644 --- a/kobo/apps/subsequences/actions/translation.py +++ b/kobo/apps/subsequences/actions/translation.py @@ -12,6 +12,7 @@ class TranslationAction(BaseAction): @classmethod def build_params(cls, content, **kwargs): + raise Exception('Fuck You') audio_questions = [] translatable_fields = [] for row in content.get('survey', []): From 6f1a98269d855eca7e42a5534993d612f4598016 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Tue, 19 Aug 2025 14:55:52 -0400 Subject: [PATCH 006/138] Make unit tests pass again after merging `main` --- kobo/apps/subsequences/README.md | 1 + .../subsequences/actions/automatic_transcription.py | 1 - .../apps/subsequences/actions/manual_transcription.py | 1 - kobo/apps/subsequences/actions/number_doubler.py | 11 ++++++++++- kobo/apps/subsequences/actions/qual.py | 1 - kobo/apps/subsequences/actions/translation.py | 1 - kobo/apps/subsequences/tests/test_nlp_integration.py | 6 +++--- kobo/apps/subsequences/tests/test_number_doubler.py | 6 +++--- kobo/apps/subsequences/utils/__init__.py | 10 ++++++++-- kobo/settings/dev.py | 2 -- 10 files changed, 25 insertions(+), 15 deletions(-) diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences/README.md index 2d6f53f0e5..be21c56354 100644 --- a/kobo/apps/subsequences/README.md +++ b/kobo/apps/subsequences/README.md @@ -67,6 +67,7 @@ class DecimalRounder(BaseAction): ``` TODO: `run_change()` is never called. Ouch! Is the `revise_field()` method its successor? +Maybe `run_change()` is correct for actions that do not take edits #### Step 4a: modify `ADVANCED_FEATURES_PARAMS_SCHEMA` diff --git a/kobo/apps/subsequences/actions/automatic_transcription.py b/kobo/apps/subsequences/actions/automatic_transcription.py index 20577b3b66..49464c6c06 100644 --- a/kobo/apps/subsequences/actions/automatic_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_transcription.py @@ -17,7 +17,6 @@ class AutomaticTranscriptionAction(BaseAction): @classmethod def build_params(cls, content, **kwargs): - raise Exception('Fuck You') possible_transcribed_fields = [] for row in content.get('survey', []): if row['type'] in TRANSCRIBABLE_SOURCE_TYPES: diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 7e56afcd42..48c96f42d5 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -9,7 +9,6 @@ class ManualTranscriptionAction(BaseAction): @classmethod def build_params(cls, content, **kwargs): - raise Exception('Fuck You') possible_transcribed_fields = [] for row in content.get('survey', []): if row['type'] in TRANSCRIBABLE_SOURCE_TYPES: diff --git a/kobo/apps/subsequences/actions/number_doubler.py b/kobo/apps/subsequences/actions/number_doubler.py index 5e4d2704bc..f8997c3808 100644 --- a/kobo/apps/subsequences/actions/number_doubler.py +++ b/kobo/apps/subsequences/actions/number_doubler.py @@ -25,7 +25,16 @@ def load_params(self, params): def has_change(self, original, edit): return True + def run_change(self, submission): + additions = submission.get(self._destination_field, {}) + for key, dest_key in self.values.items(): + original = submission.get(key) + additions[dest_key] = double_number(original) + return {**submission, self._destination_field: additions} + def revise_field(self, previous, edit): + # FIXME: idk if this makes sense here. Maybe `run_change()` is correct + # for actions that do not take edits return {'value': double_number(edit['value'])} def check_submission_status(self, submission): @@ -56,7 +65,7 @@ def get_values_for_content(cls, content): values = [] for row in content.get('survey', []): if row['type'] in ['integer', 'decimal']: - values.append(cls.get_qpath(cls, row)) + values.append(cls.get_xpath(cls, row)) return values def modify_jsonschema(self, schema): diff --git a/kobo/apps/subsequences/actions/qual.py b/kobo/apps/subsequences/actions/qual.py index 3ac2eeb2fd..e5aae96782 100644 --- a/kobo/apps/subsequences/actions/qual.py +++ b/kobo/apps/subsequences/actions/qual.py @@ -8,7 +8,6 @@ class QualAction(BaseAction): @classmethod def build_params(cls, content, **kwargs): - raise Exception('Fuck You') _fields = [] for row in content.get('survey', []): if row['type'] in QUAL_SOURCE_TYPES: diff --git a/kobo/apps/subsequences/actions/translation.py b/kobo/apps/subsequences/actions/translation.py index c15b21db14..2a838aae2a 100644 --- a/kobo/apps/subsequences/actions/translation.py +++ b/kobo/apps/subsequences/actions/translation.py @@ -11,7 +11,6 @@ class TranslationAction(BaseAction): @classmethod def build_params(cls, content, **kwargs): - raise Exception('Fuck You') translatable_fields = [] for row in content.get('survey', []): if row['type'] in TRANSLATABLE_SOURCE_TYPES: diff --git a/kobo/apps/subsequences/tests/test_nlp_integration.py b/kobo/apps/subsequences/tests/test_nlp_integration.py index ee24b8a7a7..ca45c99fe9 100644 --- a/kobo/apps/subsequences/tests/test_nlp_integration.py +++ b/kobo/apps/subsequences/tests/test_nlp_integration.py @@ -49,14 +49,14 @@ def setUp(self): def test_param_builder(self): AutomaticTranscriptionAction.TRANSCRIPTION_SERVICES = TEST_TRANSCRIPTION_SERVICES survey = self.asset.content - built_params = AutomaticTranscriptionAction.build_params({}, survey) + built_params = AutomaticTranscriptionAction.build_params(content=survey) assert built_params['values'] == ['ask_a_question'] assert 'services' in built_params def test_instantiate_action_with_params(self): survey = self.asset.content - action_params = AutomaticTranscriptionAction.build_params({}, survey) + action_params = AutomaticTranscriptionAction.build_params(content=survey) action_instance = AutomaticTranscriptionAction(action_params) assert action_instance is not None @@ -66,7 +66,7 @@ def test_submission_status_before_change(): submission = {'ask_a_question': 'blah.mp3', '_attachments': [ {'filename': 'blah.mp3', } ]} - action_params = AutomaticTranscriptionAction.build_params({}, survey) + action_params = AutomaticTranscriptionAction.build_params(content=survey) action_instance = AutomaticTranscriptionAction(action_params) # check that the changes ARE needed diff --git a/kobo/apps/subsequences/tests/test_number_doubler.py b/kobo/apps/subsequences/tests/test_number_doubler.py index 95c50242c4..4de07e06bd 100644 --- a/kobo/apps/subsequences/tests/test_number_doubler.py +++ b/kobo/apps/subsequences/tests/test_number_doubler.py @@ -4,8 +4,8 @@ def _survey_and_submission_with_numerics(): survey = {'survey': [ - {'type': 'number', 'name': 'num1'}, - {'type': 'number', 'name': 'num2'}, + {'type': 'integer', 'name': 'num1'}, + {'type': 'integer', 'name': 'num2'}, {'type': 'decimal', 'name': 'num3'}, {'type': 'decimal', 'name': 'num4'}, ]} @@ -13,7 +13,7 @@ def _survey_and_submission_with_numerics(): return (survey, submission) def test_param_builder(): - survey = _survey_and_submission_with_numerics()[0] + survey, _ = _survey_and_submission_with_numerics() built_params = NumberDoubler.build_params(content=survey) assert 'values' in built_params # assert built_params['values']['num1'] == 'num1_doubled' diff --git a/kobo/apps/subsequences/utils/__init__.py b/kobo/apps/subsequences/utils/__init__.py index 0a73574195..9756bda641 100644 --- a/kobo/apps/subsequences/utils/__init__.py +++ b/kobo/apps/subsequences/utils/__init__.py @@ -49,7 +49,10 @@ def advanced_feature_instances(content, actions): for action_id, action_params in actions.items(): action_kls = ACTIONS_BY_ID[action_id] - if action_params: # FIXME: is this really a boolean? We were testing with `==` and later `is` + # FIXME: calling `build_params()` when `action_params` is already a + # valid dict breaks everything, but `action_params` being a simple + # boolean `True` is used by unit tests + if action_params is True: action_params = action_kls.build_params(content=content) yield action_kls(action_params) @@ -95,7 +98,10 @@ def advanced_submission_jsonschema(content, actions, url=None): # breakpoint() for action_id, action_params in actions.items(): action_kls = ACTIONS_BY_ID[action_id] - if action_params: # FIXME: boolean? See other FIXME + # FIXME: calling `build_params()` when `action_params` is already a + # valid dict breaks everything, but `action_params` being a simple + # boolean `True` is used by unit tests + if action_params is True: action_params = action_kls.build_params(content=content) if 'values' not in action_params: action_params['values'] = action_kls.get_values_for_content(content) diff --git a/kobo/settings/dev.py b/kobo/settings/dev.py index 5cf1217967..2bdf474604 100644 --- a/kobo/settings/dev.py +++ b/kobo/settings/dev.py @@ -28,5 +28,3 @@ def show_toolbar(request): # with option `--print-sql` SHELL_PLUS_PRINT_SQL_TRUNCATE = None RUNSERVER_PLUS_PRINT_SQL_TRUNCATE = None - -CELERY_TASK_ALWAYS_EAGER = True From 91ad964098856729fb742195e5587e3b22b25ea1 Mon Sep 17 00:00:00 2001 From: jnm Date: Tue, 19 Aug 2025 16:57:44 -0400 Subject: [PATCH 007/138] Add grievances to README.md --- kobo/apps/subsequences/README.md | 57 +++++++++++++++++++------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences/README.md index be21c56354..c6eb60a222 100644 --- a/kobo/apps/subsequences/README.md +++ b/kobo/apps/subsequences/README.md @@ -17,7 +17,9 @@ ### All actions must have the following components * an identifier -* a method that decides if a given submission needs to be handled +* a jsonschema to validate the parameters used to configure the action +* ~~a method that decides if a given submission needs to be handled~~ + * let's drop this requirement. everything is manually triggered, and it doesn't seem necessary even for automatic processing. `check_submission_status()` is not used anywhere except unit tests (?) * a handler that receives a submission (and other metadata) and processes it * a jsonschema to validate that a response is valid @@ -33,7 +35,7 @@ We will define a class `DecimalRounder` that inherits from `subsequences.actions.BaseAction` -#### step 2. pick an identifier for the metadata +#### step 2. pick an identifier for the new action ```python from kobo.apps.subsequences import BaseAction @@ -41,39 +43,38 @@ class DecimalRounder(BaseAction): ID = 'decimal_rounder' ``` -#### step 3: when should a record should be modified or handled +#### step 3: how should a record should be modified or handled? In this example, we would probably want to handle any submission that has a non-null value in the `fuel_cost` field of the submission. We would define that like this: ```python -from subsequences.status ACTION_NEEDED, PASSES - class DecimalRounder(BaseAction): ID = 'decimal_rounder' - def check_submission_status(self, submission): - if submission.get('fuel_cost') != None: - return ACTION_NEEDED - return PASSES - -## STEP 4: define the handler def run_change(self, submission): # `_destination_field` is defined by `BaseAction` to be `_supplementalDetails` _data = submission.get(self._destination_field, {}) - fuel_cost = submission.get('fuel_cost') + if (fuel_cost := submission.get('fuel_cost')) is None: + return # FIXME: this probably needs some different value to avoid blowing up _data[self.ID] = {'fuel_cost': round(fuel_cost * 100) / 100} return {**submission, self._destination_field: _data} ``` +:warning: `run_change()` makes sense for actions that do not take "edits", such as: +* this decimal rounder +* the number doubler example +* automatic transcripts and translations +* the unfinished keyword counter -TODO: `run_change()` is never called. Ouch! Is the `revise_field()` method its successor? -Maybe `run_change()` is correct for actions that do not take edits +However, when actions *do* take edits, `revise_field()` is used. An example of that should be given here. Actions that take edits include: +* manual transcripts and translations +* qualitative analysis forms -#### Step 4a: modify `ADVANCED_FEATURES_PARAMS_SCHEMA` +#### Step 4: modify `ADVANCED_FEATURES_PARAMS_SCHEMA` …otherwise, you will be unable to add `decimal_rounder` to `asset.advanced_features` in step 5. -TODO: figure out if we should really be maintaining the schema as one big constant, or if we should have a method in each action class that returns its own schema +FIXME: stop maintaining the schema as one big constant and have a method in each action class that returns its own schema. A new method can gather and return all these together as one schema; it will be fast enough because it's a Python-only operation. #### Step 5: specify which surveys (`Asset`) should be passed to this handler @@ -90,11 +91,14 @@ for asset in Asset.objects.filter(name__contains='fuel'): asset.save() ``` +:information_source: +* `advanced_features` probably has to stay as an optimization against invoking the whole `SubmissionExtras` injection mechanism for assets that don't use it +* `decimal_rounder_fields` is nice as a general concept. `QualAction` uses something similar with `scope` and `xpath` + * Translation and transcription actions should be migrated to store the source questions in `advanced_features` instead of `known_cols` + #### Step 6: modify the `DecimalRounder` class to receive these params from `asset.advanced_features` ```python -from subsequences.status ACTION_NEEDED, PASSES - class DecimalRounder(BaseAction): ID = 'decimal_rounder' @@ -102,8 +106,11 @@ class DecimalRounder(BaseAction): # `params` is loaded from the asset self.fields_to_round = params['decimal_rounder_fields'] -## STEP 7: modify `run_change` to use the params +#### STEP 7: modify `run_change` to use the params +```python +class DecimalRounder(BaseAction): + … def run_change(self, submission): _data = submission.get(self._destination_field, {}) _data[self.ID] = {} @@ -113,13 +120,14 @@ class DecimalRounder(BaseAction): return {**submission, self._destination_field: _data} ``` -TODO: `build_params()` also appears in real-life actions. What is it? It is never called(!) because -`action_params == True` in `utils/__init__.py` never evaluates to true. +TODO: `build_params()` also appears in real-life actions but seems to be only used in unit tests, which are the only place where `action_params == True` in `utils/__init__.py` evaluates to true. #### Step 8: After a submission has come in, POST metadata to the `/advanced_submission_post/` API endpoint TODO: for number_doubler, we ended up using something like `"this_number": {"number_doubler": {"value": 667}}}` +FIXME: no real-world actions require POSTing the survey response like how `fuel_cost` is shown below. They read from the stored submission data. Actions that accept manual input (manual transcripts and translations) do take that input as POST data from `/advanced_submission_post/` with the submission UUID in the `submission` parameter. + ``` POST to "/advanced_submission_post/aSsEtUiD" @@ -135,7 +143,7 @@ This will create a record in the `submission_extras` table with the following va GET "/advanced_submission_post/aSsEtUiD?submission=" { - "submission": "submissionUuid", + "submission": "submissionUuid", # FIXME: this is not returned; the `_supplementalDetails` are returned unwrapped and directly "_supplementalDetails": { "decimal_rounder": { "fuel_cost": 1.23 @@ -144,10 +152,13 @@ GET "/advanced_submission_post/aSsEtUiD?submission=" } ``` -TODO: does GET to `advanced_submission_post` actually work? +TODO: does GET to `advanced_submission_post` actually work? Yes, and the front end uses it (!) But is there a reason to have it in addition to the regular data API, e.g. +https://kf.kobotoolbox.org/api/v2/assets/aCHy38fwjmXaBfZkjCyWZa/data/510941a3-cea4-4a9a-81d8-ec0329c964db/? #### Step 9 (optional): Define a validator +FIXME: This is a mess. `roundednumber` isn't used anywhere. It's probably talking about setting up a schema to validate the output of the action, i.e. `{"decimal_rounder": {"fuel_cost": 1.23}}`. However, it could be referring to validating the POST to `advanced_submission_post`, which for some actions (as described above) would take manual content (like manual transcripts and translations). For the number rounder, though, we would not trivially re-POST the `"fuel_cost": 1.23456` value from the original submission. + Because `advanced_submission_post` data can be sourced from anywhere, it should be validated. The prominent way to do this is with a jsonschema defined in the action class. ```python From 24a07b4afa7f519010d6484ee57204e746019f91 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 20 Aug 2025 11:32:53 -0400 Subject: [PATCH 008/138] Start drafting new README based on what we want MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …and with less tiptoeing around what's already there --- kobo/apps/long_running_migrations/app.py | 2 +- kobo/apps/subsequences/README-draft.md | 221 ++++++++++++++++++ .../advanced_features_params_schema.py | 6 + 3 files changed, 228 insertions(+), 1 deletion(-) create mode 100644 kobo/apps/subsequences/README-draft.md diff --git a/kobo/apps/long_running_migrations/app.py b/kobo/apps/long_running_migrations/app.py index 859374a92d..ebdf6e48aa 100644 --- a/kobo/apps/long_running_migrations/app.py +++ b/kobo/apps/long_running_migrations/app.py @@ -99,4 +99,4 @@ def check_must_complete_long_running_migrations(app_configs, **kwargs): ] -register(check_must_complete_long_running_migrations, Tags.database) +#register(check_must_complete_long_running_migrations, Tags.database) diff --git a/kobo/apps/subsequences/README-draft.md b/kobo/apps/subsequences/README-draft.md new file mode 100644 index 0000000000..55423813a1 --- /dev/null +++ b/kobo/apps/subsequences/README-draft.md @@ -0,0 +1,221 @@ +# `subsequences` app + +### Purpose: + +* Allow pluggable python code to handle and process submission data +* Pass this data through to front end views and exports +* Allow targeting of specific users/groups/forms (not all code is sitewide) + + +### The name + +* The name `subsequences` reflects the way the data is handled sequentially after submission or after another "action" has handled the data + + +## The code + +### All actions must have the following components + +* a unique identifier for the action +* three jsonschemas: + 1. one to validate the parameters used to configure the action + * `ADVANCED_FEATURES_PARAMS_SCHEMA` + 2. one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) + * the result of `modify_jsonschema()` + 3. one to validate the result of the action - the result of `modify_jsonschema()` + * OH NO, this doesn't happen at all yet +* a handler that receives a submission (and other metadata) and processes it + +## An example + +* A one question survey and we want to round down the decimal on the survey + * `{ + "type": "decimal", + "name": "fuel_cost" + }` + +#### step 1. subclass the "BaseAction" + +We will define a class `DecimalRounder` that inherits from `subsequences.actions.BaseAction` + +#### step 2. pick an identifier for the new action + +```python +from kobo.apps.subsequences import BaseAction +class DecimalRounder(BaseAction): + ID = 'decimal_rounder' +``` + +#### step 3: how should a record should be modified or handled? + +In this example, we would probably want to handle any submission that has a non-null value in the `fuel_cost` field of the submission. We would define that like this: + +```python +class DecimalRounder(BaseAction): + ID = 'decimal_rounder' + + def run_change(self, submission): + # `_destination_field` is defined by `BaseAction` to be `_supplementalDetails` + _data = submission.get(self._destination_field, {}) + if (fuel_cost := submission.get('fuel_cost')) is not None: + _data[self.ID] = {'fuel_cost': round(fuel_cost * 100) / 100} + return {**submission, self._destination_field: _data} + +``` + +#### Step 4: write a method to update `ADVANCED_FEATURES_PARAMS_SCHEMA` + +…and change `ADVANCED_FEATURES_PARAMS_SCHEMA` to be dynamically generated by a a new method that gathers and returns schemas for all enabled actions together as one schema; it will be fast enough because it's a Python-only operation. + +TODO: add example + +#### Step 5: specify which surveys (`Asset`) should be passed to this handler + +Somewhere, either through the API or elsewhere, add relevant details to the asset's `advanced_features` field: + +```python +for asset in Asset.objects.filter(name__contains='fuel'): + asset.advanced_features = { + # 'decimal_rounder' is the ID of the action, defined above + 'decimal_rounder': { + 'decimal_rounder_fields': ['fuel_cost'] + } + } + asset.save() +``` +EHHH, fields might be okay? better than known_cols, but is the rest of the config going to be the same for all fields? prob not + + +#### Step 6: modify the `DecimalRounder` class to receive these params from `asset.advanced_features` + +```python +class DecimalRounder(BaseAction): + ID = 'decimal_rounder' + + def load_params(self, params): + # `params` is loaded from the asset + self.fields_to_round = params['decimal_rounder_fields'] +``` + +#### STEP 7: modify `run_change` to use the params + +```python +class DecimalRounder(BaseAction): + … + def run_change(self, submission): + _data = submission.get(self._destination_field, {}) + _data[self.ID] = {} + for field_name in self.fields_to_round: + if (fuel_cost := submission.get(field_name)) is None: + continue + _data[self.ID][field_name] = round(fuel_cost * 100) / 100 + return {**submission, self._destination_field: _data} +``` + +TODO: `build_params()` also appears in real-life actions but seems to be only used in unit tests, which are the only place where `action_params == True` in `utils/__init__.py` evaluates to true. + +#### Step 8: After a submission has come in, POST metadata to the `/advanced_submission_post/` API endpoint + +TODO: Actions that accept manual input (manual transcripts and translations) do take that input as POST data from `/advanced_submission_post/` alongside the submission UUID in the `submission` parameter. Provide an example of this. + +``` +POST to "/advanced_submission_post/aSsEtUiD" + +{ + "submission": "submission-uuid", # this submission contains 1.23456 as the response to "fuel_cost" +} +``` + +This will create a record in the `submission_extras` table with the following values: + +``` +GET "/advanced_submission_post/aSsEtUiD?submission=" + +{ + "submission": "submissionUuid", # FIXME: this is not returned; the `_supplementalDetails` are returned unwrapped and directly + "_supplementalDetails": { + "decimal_rounder": { + "fuel_cost": 1.23 + } + } +} +``` + +TODO: does GET to `advanced_submission_post` actually work? Yes, and the front end uses it (!) But is there a reason to have it in addition to the regular data API, e.g. +https://kf.kobotoolbox.org/api/v2/assets/aCHy38fwjmXaBfZkjCyWZa/data/510941a3-cea4-4a9a-81d8-ec0329c964db/? + +#### Step 9 (optional): Define a validator + +FIXME: This is a mess. `roundednumber` isn't used anywhere. It's probably talking about setting up a schema to validate the output of the action, i.e. `{"decimal_rounder": {"fuel_cost": 1.23}}`. However, it could be referring to validating the POST to `advanced_submission_post`, which for some actions (as described above) would take manual content (like manual transcripts and translations). For the number rounder, though, we would not trivially re-POST the `"fuel_cost": 1.23456` value from the original submission. + +Because `advanced_submission_post` data can be sourced from anywhere, it should be validated. The prominent way to do this is with a jsonschema defined in the action class. + +```python +class DecimalRounder(BaseAction): + ID = 'decimal_rounder' + # ... + # `modify_jsonschema` appended to the class above + def modify_jsonschema(self, schema): + defs = schema.setdefault('definitions', {}) + props = schema.setdefault('properties', {}) + # TODO: make sure this actually works… + defs[self.ID] = {} + defs[self.ID]['roundednumber'] = { + 'type': 'number', + } + + for field_name in self.fields_to_round: + props[field_name] = {'$ref': f'#/definitions/{self.ID}/roundednumber'} + + return schema +``` + +#### Step 10: Test the module + +~~There is a utility to help "kick the tires" of your action subclass.~~ + +> NO THERE ISN'T! `subsequences_action_test` doesn't exist! + +`python manage.py runscript subsequences_action_test < params_plus_submission.json` + +where `params_plus_submission.json` looks like this: + +```json +{ + "advanced_features": { + "decimal_rounder": { + "decimal_rounder_fields": ["fuel_cost"] + } + }, + "submission": { + "fuel_cost": 5.678901 + } +} +``` + +this should print out the resulting submission: + +``` +{ + "fuel_cost": 5.678901, + "_supplementalDetails": { + "decimal_rounder": { + "fuel_cost": 5.68 + } + } +} +``` + +## Further development + +These modules can be used in sequence to allow connection to external services or pulling data from other forms. + +It can be used to store large amounts of unstructured data so be sure to test the jsonschema to make sure that POSTed values pass narrowly. + +jsonschema resources: +* [json-schema.org](https://json-schema.org/) +* [json-schema-validator](https://www.jsonschemavalidator.net/) + +## Further development + +The changes are triggered with a POST to `/advanced_submission_post/`, but in the future could be triggered automatically by a hook when a submission is first received into the system. diff --git a/kobo/apps/subsequences/advanced_features_params_schema.py b/kobo/apps/subsequences/advanced_features_params_schema.py index 4d669353f2..811dad8e5a 100644 --- a/kobo/apps/subsequences/advanced_features_params_schema.py +++ b/kobo/apps/subsequences/advanced_features_params_schema.py @@ -10,6 +10,10 @@ 'type': 'object', 'additionalProperties': False, 'properties': { + # FIXME: YES!!! Just make sure all of these keys are the IDs of the + # actions, import the actions, and call some staticmethod on each to + # shove their schemas in here similarly to what's being done at the end + # for `qual` 'transcript': { 'type': 'object', 'properties': { @@ -46,6 +50,8 @@ # User-defined qualitative analysis forms ADVANCED_FEATURES_PARAMS_SCHEMA['$defs'] = { + # FIXME: can all this stuff be nested within `qual`, i.e. the ID of the action? it'd be nice if `$defs` allows that + # The answer is Yes 'qualQuestionType': { 'type': 'string', 'enum': [ From 3421691f1d185dfce2546eae0408d286c46cae9a Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 20 Aug 2025 16:46:25 -0400 Subject: [PATCH 009/138] Begin rewriting manual transcription action --- .../actions/manual_transcription.py | 140 +++++++++++++++--- 1 file changed, 123 insertions(+), 17 deletions(-) diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 48c96f42d5..f1a51d4119 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -1,28 +1,134 @@ +import jsonschema from ..constants import TRANSCRIBABLE_SOURCE_TYPES -from ..actions.base import BaseAction, ACTION_NEEDED, PASSES +#from ..actions.base import BaseAction -PENDING = 'PENDING' +""" +### All actions must have the following components +* (check!) a unique identifier for the action +* three jsonschemas: + 1. one to validate the parameters used to configure the action + * `ADVANCED_FEATURES_PARAMS_SCHEMA` + 2. one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) + * the result of `modify_jsonschema()` + 3. one to validate the result of the action - the result of `modify_jsonschema()` + * OH NO, this doesn't happen at all yet +* a handler that receives a submission (and other metadata) and processes it +""" + +""" +idea of example content in asset.advanced_features (what kind of actions are activated per question) +{ + 'version': '20250820', + 'schema': { + 'my_audio_question': { + 'manual_transcription': [ + {'language': 'ar'}, + {'language': 'bn'}, + {'language': 'es'}, + ], + 'manual_translation': [{'language': 'fr'}], + }, + 'my_video_question': { + 'manual_transcription': [{'language': 'en'}], + }, + 'my_number_question': { + 'number_multiplier': [{'multiplier': 3}], + }, + }, +} +""" + +class BaseAction: + @classmethod + def validate_params(cls, params): + jsonschema.validate(params, cls.params_schema) + + def validate_data(self, data): + jsonschema.validate(data, self.data_schema) class ManualTranscriptionAction(BaseAction): ID = 'manual_transcription' + def __init__(self, source_question_xpath, params): + self.source_question_xpath = source_question_xpath + self.params = params + + """ + For an audio question called `my_audio_question` that's transcribed + into 3 languages, the schema for `Asset.advanced_features` might look + like: + 'my_audio_question': { + 'manual_transcription': [ + {'language': 'ar'}, + {'language': 'bn'}, + {'language': 'es'}, + ], + } + + The `params_schema` attribute defines the shape of the array where each + element is an object with a single string property for the transcript + language. + """ + params_schema = { + 'type': 'array', + 'items': { + 'additionalProperties': False, + 'properties': { + 'language': { + 'type': 'string', + } + }, + 'required': ['language'], + 'type': 'object', + }, + } + + @property + def data_schema(self): # for lack of a better name + """ + (currently) POST to "/advanced_submission_post/aSsEtUiD" + POST to "/api/v2/assets//data//supplemental" # idk, rename? + { + 'manual_transcription': { + 'language': 'es', + 'transcript': 'Almorzamos muy bien hoy', + } + } + """ + languages = [] + for individual_params in self.params: + languages.append(individual_params['language']) + + return { + 'additionalProperties': False, + 'properties': { + 'language': { + 'type': 'string', + 'enum': languages, + }, + 'transcript': { + 'type': 'string', + }, + }, + 'required': ['language', 'transcript'], + 'type': 'object', + } + + @property @classmethod - def build_params(cls, content, **kwargs): - possible_transcribed_fields = [] - for row in content.get('survey', []): - if row['type'] in TRANSCRIBABLE_SOURCE_TYPES: - possible_transcribed_fields.append(cls.get_name(row)) - params = {'values': possible_transcribed_fields} - return params + def result_schema(cls): + """ + we also need a schema to define the final result that will be written + into SubmissionExtras - def load_params(self, params): - self.possible_transcribed_fields = params['values'] + we need to solve the problem of storing multiple results for a single action + """ + raise NotImplementedError - def check_submission_status(self, submission): - if self._destination_field not in submission: - return ACTION_NEEDED - supp_data = submission[self._destination_field] - # needs to be built out - return PASSES + def load_params(self, params): + """ + idk maybe we use this to read the language out of `Asset.advanced_features` + """ + self.possible_transcribed_fields = params['values'] From 5cd5896ace322c475edf73bee817c0afc6444e83 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 20 Aug 2025 17:09:49 -0400 Subject: [PATCH 010/138] Continue rewriting manual transcription action --- kobo/apps/subsequences/actions/manual_transcription.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index f1a51d4119..1a64212763 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -7,9 +7,9 @@ * (check!) a unique identifier for the action * three jsonschemas: - 1. one to validate the parameters used to configure the action + 1. (check!) one to validate the parameters used to configure the action * `ADVANCED_FEATURES_PARAMS_SCHEMA` - 2. one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) + 2. (check!) one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) * the result of `modify_jsonschema()` 3. one to validate the result of the action - the result of `modify_jsonschema()` * OH NO, this doesn't happen at all yet From 4b85d2fd86048b9fcf95e0db3220a85288a3dad4 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 20 Aug 2025 17:16:47 -0400 Subject: [PATCH 011/138] =?UTF-8?q?Create=20fresh=20`subsequences`=20direc?= =?UTF-8?q?tory,=20and=20move=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit previous work to `subsequences__old` --- .../actions/manual_transcription.py | 2 +- .../README-draft.md | 0 .../README.md | 0 .../__init__.py | 0 .../actions/__init__.py | 0 .../actions/automatic_transcription.py | 0 .../actions/base.py | 0 .../actions/keyword_search.py | 0 .../actions/manual_transcription.py | 134 ++++++++++++++++++ .../actions/number_doubler.py | 0 .../actions/qual.py | 0 .../actions/states.py | 0 .../actions/translation.py | 0 .../actions/unknown_action.py | 0 .../advanced_features_params_schema.py | 0 .../api_view.py | 0 .../apps.py | 0 .../constants.py | 0 .../exceptions.py | 0 .../integrations/__init__.py | 0 .../integrations/google/__init__.py | 0 .../integrations/google/base.py | 0 .../integrations/google/google_transcribe.py | 0 .../integrations/google/google_translate.py | 0 .../integrations/google/utils.py | 0 .../integrations/misc.py | 0 .../integrations/translate.py | 0 .../jsonschemas/qual_schema.py | 0 .../migrations/0001_initial.py | 0 ...ique_together_asset_and_submission_uuid.py | 0 ..._submissionextras_date_created_and_more.py | 0 ...4_increase_subsequences_submission_uuid.py | 0 .../migrations/__init__.py | 0 .../models.py | 0 .../prev.py | 0 .../scripts/__init__.py | 0 ...vate_advanced_features_for_newest_asset.py | 0 ...add_qual_to_last_question_of_last_asset.py | 0 .../scripts/export_analysis_form.py | 0 .../scripts/recalc_latest_subex.py | 0 .../scripts/repop_known_cols.py | 0 .../scripts/subsequences_export.py | 0 .../tasks/__init__.py | 0 .../tests/__init__.py | 0 .../tests/test_known_cols_utils.py | 0 .../tests/test_nlp_integration.py | 0 .../tests/test_number_doubler.py | 0 .../tests/test_proj_advanced_features.py | 0 .../tests/test_submission_extras_api_post.py | 0 .../tests/test_submission_extras_content.py | 0 .../tests/test_submission_stream.py | 0 .../urls.py | 0 .../utils/__init__.py | 0 .../utils/deprecation.py | 0 .../determine_export_cols_with_values.py | 0 .../utils/parse_known_cols.py | 0 56 files changed, 135 insertions(+), 1 deletion(-) rename kobo/apps/{subsequences => subsequences__old}/README-draft.md (100%) rename kobo/apps/{subsequences => subsequences__old}/README.md (100%) rename kobo/apps/{subsequences => subsequences__old}/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/automatic_transcription.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/base.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/keyword_search.py (100%) create mode 100644 kobo/apps/subsequences__old/actions/manual_transcription.py rename kobo/apps/{subsequences => subsequences__old}/actions/number_doubler.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/qual.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/states.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/translation.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/unknown_action.py (100%) rename kobo/apps/{subsequences => subsequences__old}/advanced_features_params_schema.py (100%) rename kobo/apps/{subsequences => subsequences__old}/api_view.py (100%) rename kobo/apps/{subsequences => subsequences__old}/apps.py (100%) rename kobo/apps/{subsequences => subsequences__old}/constants.py (100%) rename kobo/apps/{subsequences => subsequences__old}/exceptions.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/google/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/google/base.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/google/google_transcribe.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/google/google_translate.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/google/utils.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/misc.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/translate.py (100%) rename kobo/apps/{subsequences => subsequences__old}/jsonschemas/qual_schema.py (100%) rename kobo/apps/{subsequences => subsequences__old}/migrations/0001_initial.py (100%) rename kobo/apps/{subsequences => subsequences__old}/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py (100%) rename kobo/apps/{subsequences => subsequences__old}/migrations/0003_alter_submissionextras_date_created_and_more.py (100%) rename kobo/apps/{subsequences => subsequences__old}/migrations/0004_increase_subsequences_submission_uuid.py (100%) rename kobo/apps/{subsequences => subsequences__old}/migrations/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/models.py (100%) rename kobo/apps/{subsequences => subsequences__old}/prev.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/activate_advanced_features_for_newest_asset.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/add_qual_to_last_question_of_last_asset.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/export_analysis_form.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/recalc_latest_subex.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/repop_known_cols.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/subsequences_export.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tasks/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_known_cols_utils.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_nlp_integration.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_number_doubler.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_proj_advanced_features.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_submission_extras_api_post.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_submission_extras_content.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_submission_stream.py (100%) rename kobo/apps/{subsequences => subsequences__old}/urls.py (100%) rename kobo/apps/{subsequences => subsequences__old}/utils/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/utils/deprecation.py (100%) rename kobo/apps/{subsequences => subsequences__old}/utils/determine_export_cols_with_values.py (100%) rename kobo/apps/{subsequences => subsequences__old}/utils/parse_known_cols.py (100%) diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 1a64212763..a95201af97 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -1,5 +1,5 @@ import jsonschema -from ..constants import TRANSCRIBABLE_SOURCE_TYPES +#from ..constants import TRANSCRIBABLE_SOURCE_TYPES #from ..actions.base import BaseAction """ diff --git a/kobo/apps/subsequences/README-draft.md b/kobo/apps/subsequences__old/README-draft.md similarity index 100% rename from kobo/apps/subsequences/README-draft.md rename to kobo/apps/subsequences__old/README-draft.md diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences__old/README.md similarity index 100% rename from kobo/apps/subsequences/README.md rename to kobo/apps/subsequences__old/README.md diff --git a/kobo/apps/subsequences/__init__.py b/kobo/apps/subsequences__old/__init__.py similarity index 100% rename from kobo/apps/subsequences/__init__.py rename to kobo/apps/subsequences__old/__init__.py diff --git a/kobo/apps/subsequences/actions/__init__.py b/kobo/apps/subsequences__old/actions/__init__.py similarity index 100% rename from kobo/apps/subsequences/actions/__init__.py rename to kobo/apps/subsequences__old/actions/__init__.py diff --git a/kobo/apps/subsequences/actions/automatic_transcription.py b/kobo/apps/subsequences__old/actions/automatic_transcription.py similarity index 100% rename from kobo/apps/subsequences/actions/automatic_transcription.py rename to kobo/apps/subsequences__old/actions/automatic_transcription.py diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences__old/actions/base.py similarity index 100% rename from kobo/apps/subsequences/actions/base.py rename to kobo/apps/subsequences__old/actions/base.py diff --git a/kobo/apps/subsequences/actions/keyword_search.py b/kobo/apps/subsequences__old/actions/keyword_search.py similarity index 100% rename from kobo/apps/subsequences/actions/keyword_search.py rename to kobo/apps/subsequences__old/actions/keyword_search.py diff --git a/kobo/apps/subsequences__old/actions/manual_transcription.py b/kobo/apps/subsequences__old/actions/manual_transcription.py new file mode 100644 index 0000000000..1a64212763 --- /dev/null +++ b/kobo/apps/subsequences__old/actions/manual_transcription.py @@ -0,0 +1,134 @@ +import jsonschema +from ..constants import TRANSCRIBABLE_SOURCE_TYPES +#from ..actions.base import BaseAction + +""" +### All actions must have the following components + +* (check!) a unique identifier for the action +* three jsonschemas: + 1. (check!) one to validate the parameters used to configure the action + * `ADVANCED_FEATURES_PARAMS_SCHEMA` + 2. (check!) one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) + * the result of `modify_jsonschema()` + 3. one to validate the result of the action - the result of `modify_jsonschema()` + * OH NO, this doesn't happen at all yet +* a handler that receives a submission (and other metadata) and processes it +""" + +""" +idea of example content in asset.advanced_features (what kind of actions are activated per question) +{ + 'version': '20250820', + 'schema': { + 'my_audio_question': { + 'manual_transcription': [ + {'language': 'ar'}, + {'language': 'bn'}, + {'language': 'es'}, + ], + 'manual_translation': [{'language': 'fr'}], + }, + 'my_video_question': { + 'manual_transcription': [{'language': 'en'}], + }, + 'my_number_question': { + 'number_multiplier': [{'multiplier': 3}], + }, + }, +} +""" + +class BaseAction: + @classmethod + def validate_params(cls, params): + jsonschema.validate(params, cls.params_schema) + + def validate_data(self, data): + jsonschema.validate(data, self.data_schema) + +class ManualTranscriptionAction(BaseAction): + ID = 'manual_transcription' + + def __init__(self, source_question_xpath, params): + self.source_question_xpath = source_question_xpath + self.params = params + + """ + For an audio question called `my_audio_question` that's transcribed + into 3 languages, the schema for `Asset.advanced_features` might look + like: + 'my_audio_question': { + 'manual_transcription': [ + {'language': 'ar'}, + {'language': 'bn'}, + {'language': 'es'}, + ], + } + + The `params_schema` attribute defines the shape of the array where each + element is an object with a single string property for the transcript + language. + """ + params_schema = { + 'type': 'array', + 'items': { + 'additionalProperties': False, + 'properties': { + 'language': { + 'type': 'string', + } + }, + 'required': ['language'], + 'type': 'object', + }, + } + + @property + def data_schema(self): # for lack of a better name + """ + (currently) POST to "/advanced_submission_post/aSsEtUiD" + POST to "/api/v2/assets//data//supplemental" # idk, rename? + { + 'manual_transcription': { + 'language': 'es', + 'transcript': 'Almorzamos muy bien hoy', + } + } + """ + languages = [] + for individual_params in self.params: + languages.append(individual_params['language']) + + return { + 'additionalProperties': False, + 'properties': { + 'language': { + 'type': 'string', + 'enum': languages, + }, + 'transcript': { + 'type': 'string', + }, + }, + 'required': ['language', 'transcript'], + 'type': 'object', + } + + @property + @classmethod + def result_schema(cls): + """ + we also need a schema to define the final result that will be written + into SubmissionExtras + + we need to solve the problem of storing multiple results for a single action + """ + raise NotImplementedError + + + def load_params(self, params): + """ + idk maybe we use this to read the language out of `Asset.advanced_features` + """ + self.possible_transcribed_fields = params['values'] diff --git a/kobo/apps/subsequences/actions/number_doubler.py b/kobo/apps/subsequences__old/actions/number_doubler.py similarity index 100% rename from kobo/apps/subsequences/actions/number_doubler.py rename to kobo/apps/subsequences__old/actions/number_doubler.py diff --git a/kobo/apps/subsequences/actions/qual.py b/kobo/apps/subsequences__old/actions/qual.py similarity index 100% rename from kobo/apps/subsequences/actions/qual.py rename to kobo/apps/subsequences__old/actions/qual.py diff --git a/kobo/apps/subsequences/actions/states.py b/kobo/apps/subsequences__old/actions/states.py similarity index 100% rename from kobo/apps/subsequences/actions/states.py rename to kobo/apps/subsequences__old/actions/states.py diff --git a/kobo/apps/subsequences/actions/translation.py b/kobo/apps/subsequences__old/actions/translation.py similarity index 100% rename from kobo/apps/subsequences/actions/translation.py rename to kobo/apps/subsequences__old/actions/translation.py diff --git a/kobo/apps/subsequences/actions/unknown_action.py b/kobo/apps/subsequences__old/actions/unknown_action.py similarity index 100% rename from kobo/apps/subsequences/actions/unknown_action.py rename to kobo/apps/subsequences__old/actions/unknown_action.py diff --git a/kobo/apps/subsequences/advanced_features_params_schema.py b/kobo/apps/subsequences__old/advanced_features_params_schema.py similarity index 100% rename from kobo/apps/subsequences/advanced_features_params_schema.py rename to kobo/apps/subsequences__old/advanced_features_params_schema.py diff --git a/kobo/apps/subsequences/api_view.py b/kobo/apps/subsequences__old/api_view.py similarity index 100% rename from kobo/apps/subsequences/api_view.py rename to kobo/apps/subsequences__old/api_view.py diff --git a/kobo/apps/subsequences/apps.py b/kobo/apps/subsequences__old/apps.py similarity index 100% rename from kobo/apps/subsequences/apps.py rename to kobo/apps/subsequences__old/apps.py diff --git a/kobo/apps/subsequences/constants.py b/kobo/apps/subsequences__old/constants.py similarity index 100% rename from kobo/apps/subsequences/constants.py rename to kobo/apps/subsequences__old/constants.py diff --git a/kobo/apps/subsequences/exceptions.py b/kobo/apps/subsequences__old/exceptions.py similarity index 100% rename from kobo/apps/subsequences/exceptions.py rename to kobo/apps/subsequences__old/exceptions.py diff --git a/kobo/apps/subsequences/integrations/__init__.py b/kobo/apps/subsequences__old/integrations/__init__.py similarity index 100% rename from kobo/apps/subsequences/integrations/__init__.py rename to kobo/apps/subsequences__old/integrations/__init__.py diff --git a/kobo/apps/subsequences/integrations/google/__init__.py b/kobo/apps/subsequences__old/integrations/google/__init__.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/__init__.py rename to kobo/apps/subsequences__old/integrations/google/__init__.py diff --git a/kobo/apps/subsequences/integrations/google/base.py b/kobo/apps/subsequences__old/integrations/google/base.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/base.py rename to kobo/apps/subsequences__old/integrations/google/base.py diff --git a/kobo/apps/subsequences/integrations/google/google_transcribe.py b/kobo/apps/subsequences__old/integrations/google/google_transcribe.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/google_transcribe.py rename to kobo/apps/subsequences__old/integrations/google/google_transcribe.py diff --git a/kobo/apps/subsequences/integrations/google/google_translate.py b/kobo/apps/subsequences__old/integrations/google/google_translate.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/google_translate.py rename to kobo/apps/subsequences__old/integrations/google/google_translate.py diff --git a/kobo/apps/subsequences/integrations/google/utils.py b/kobo/apps/subsequences__old/integrations/google/utils.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/utils.py rename to kobo/apps/subsequences__old/integrations/google/utils.py diff --git a/kobo/apps/subsequences/integrations/misc.py b/kobo/apps/subsequences__old/integrations/misc.py similarity index 100% rename from kobo/apps/subsequences/integrations/misc.py rename to kobo/apps/subsequences__old/integrations/misc.py diff --git a/kobo/apps/subsequences/integrations/translate.py b/kobo/apps/subsequences__old/integrations/translate.py similarity index 100% rename from kobo/apps/subsequences/integrations/translate.py rename to kobo/apps/subsequences__old/integrations/translate.py diff --git a/kobo/apps/subsequences/jsonschemas/qual_schema.py b/kobo/apps/subsequences__old/jsonschemas/qual_schema.py similarity index 100% rename from kobo/apps/subsequences/jsonschemas/qual_schema.py rename to kobo/apps/subsequences__old/jsonschemas/qual_schema.py diff --git a/kobo/apps/subsequences/migrations/0001_initial.py b/kobo/apps/subsequences__old/migrations/0001_initial.py similarity index 100% rename from kobo/apps/subsequences/migrations/0001_initial.py rename to kobo/apps/subsequences__old/migrations/0001_initial.py diff --git a/kobo/apps/subsequences/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py b/kobo/apps/subsequences__old/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py similarity index 100% rename from kobo/apps/subsequences/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py rename to kobo/apps/subsequences__old/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py diff --git a/kobo/apps/subsequences/migrations/0003_alter_submissionextras_date_created_and_more.py b/kobo/apps/subsequences__old/migrations/0003_alter_submissionextras_date_created_and_more.py similarity index 100% rename from kobo/apps/subsequences/migrations/0003_alter_submissionextras_date_created_and_more.py rename to kobo/apps/subsequences__old/migrations/0003_alter_submissionextras_date_created_and_more.py diff --git a/kobo/apps/subsequences/migrations/0004_increase_subsequences_submission_uuid.py b/kobo/apps/subsequences__old/migrations/0004_increase_subsequences_submission_uuid.py similarity index 100% rename from kobo/apps/subsequences/migrations/0004_increase_subsequences_submission_uuid.py rename to kobo/apps/subsequences__old/migrations/0004_increase_subsequences_submission_uuid.py diff --git a/kobo/apps/subsequences/migrations/__init__.py b/kobo/apps/subsequences__old/migrations/__init__.py similarity index 100% rename from kobo/apps/subsequences/migrations/__init__.py rename to kobo/apps/subsequences__old/migrations/__init__.py diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences__old/models.py similarity index 100% rename from kobo/apps/subsequences/models.py rename to kobo/apps/subsequences__old/models.py diff --git a/kobo/apps/subsequences/prev.py b/kobo/apps/subsequences__old/prev.py similarity index 100% rename from kobo/apps/subsequences/prev.py rename to kobo/apps/subsequences__old/prev.py diff --git a/kobo/apps/subsequences/scripts/__init__.py b/kobo/apps/subsequences__old/scripts/__init__.py similarity index 100% rename from kobo/apps/subsequences/scripts/__init__.py rename to kobo/apps/subsequences__old/scripts/__init__.py diff --git a/kobo/apps/subsequences/scripts/activate_advanced_features_for_newest_asset.py b/kobo/apps/subsequences__old/scripts/activate_advanced_features_for_newest_asset.py similarity index 100% rename from kobo/apps/subsequences/scripts/activate_advanced_features_for_newest_asset.py rename to kobo/apps/subsequences__old/scripts/activate_advanced_features_for_newest_asset.py diff --git a/kobo/apps/subsequences/scripts/add_qual_to_last_question_of_last_asset.py b/kobo/apps/subsequences__old/scripts/add_qual_to_last_question_of_last_asset.py similarity index 100% rename from kobo/apps/subsequences/scripts/add_qual_to_last_question_of_last_asset.py rename to kobo/apps/subsequences__old/scripts/add_qual_to_last_question_of_last_asset.py diff --git a/kobo/apps/subsequences/scripts/export_analysis_form.py b/kobo/apps/subsequences__old/scripts/export_analysis_form.py similarity index 100% rename from kobo/apps/subsequences/scripts/export_analysis_form.py rename to kobo/apps/subsequences__old/scripts/export_analysis_form.py diff --git a/kobo/apps/subsequences/scripts/recalc_latest_subex.py b/kobo/apps/subsequences__old/scripts/recalc_latest_subex.py similarity index 100% rename from kobo/apps/subsequences/scripts/recalc_latest_subex.py rename to kobo/apps/subsequences__old/scripts/recalc_latest_subex.py diff --git a/kobo/apps/subsequences/scripts/repop_known_cols.py b/kobo/apps/subsequences__old/scripts/repop_known_cols.py similarity index 100% rename from kobo/apps/subsequences/scripts/repop_known_cols.py rename to kobo/apps/subsequences__old/scripts/repop_known_cols.py diff --git a/kobo/apps/subsequences/scripts/subsequences_export.py b/kobo/apps/subsequences__old/scripts/subsequences_export.py similarity index 100% rename from kobo/apps/subsequences/scripts/subsequences_export.py rename to kobo/apps/subsequences__old/scripts/subsequences_export.py diff --git a/kobo/apps/subsequences/tasks/__init__.py b/kobo/apps/subsequences__old/tasks/__init__.py similarity index 100% rename from kobo/apps/subsequences/tasks/__init__.py rename to kobo/apps/subsequences__old/tasks/__init__.py diff --git a/kobo/apps/subsequences/tests/__init__.py b/kobo/apps/subsequences__old/tests/__init__.py similarity index 100% rename from kobo/apps/subsequences/tests/__init__.py rename to kobo/apps/subsequences__old/tests/__init__.py diff --git a/kobo/apps/subsequences/tests/test_known_cols_utils.py b/kobo/apps/subsequences__old/tests/test_known_cols_utils.py similarity index 100% rename from kobo/apps/subsequences/tests/test_known_cols_utils.py rename to kobo/apps/subsequences__old/tests/test_known_cols_utils.py diff --git a/kobo/apps/subsequences/tests/test_nlp_integration.py b/kobo/apps/subsequences__old/tests/test_nlp_integration.py similarity index 100% rename from kobo/apps/subsequences/tests/test_nlp_integration.py rename to kobo/apps/subsequences__old/tests/test_nlp_integration.py diff --git a/kobo/apps/subsequences/tests/test_number_doubler.py b/kobo/apps/subsequences__old/tests/test_number_doubler.py similarity index 100% rename from kobo/apps/subsequences/tests/test_number_doubler.py rename to kobo/apps/subsequences__old/tests/test_number_doubler.py diff --git a/kobo/apps/subsequences/tests/test_proj_advanced_features.py b/kobo/apps/subsequences__old/tests/test_proj_advanced_features.py similarity index 100% rename from kobo/apps/subsequences/tests/test_proj_advanced_features.py rename to kobo/apps/subsequences__old/tests/test_proj_advanced_features.py diff --git a/kobo/apps/subsequences/tests/test_submission_extras_api_post.py b/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py similarity index 100% rename from kobo/apps/subsequences/tests/test_submission_extras_api_post.py rename to kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py diff --git a/kobo/apps/subsequences/tests/test_submission_extras_content.py b/kobo/apps/subsequences__old/tests/test_submission_extras_content.py similarity index 100% rename from kobo/apps/subsequences/tests/test_submission_extras_content.py rename to kobo/apps/subsequences__old/tests/test_submission_extras_content.py diff --git a/kobo/apps/subsequences/tests/test_submission_stream.py b/kobo/apps/subsequences__old/tests/test_submission_stream.py similarity index 100% rename from kobo/apps/subsequences/tests/test_submission_stream.py rename to kobo/apps/subsequences__old/tests/test_submission_stream.py diff --git a/kobo/apps/subsequences/urls.py b/kobo/apps/subsequences__old/urls.py similarity index 100% rename from kobo/apps/subsequences/urls.py rename to kobo/apps/subsequences__old/urls.py diff --git a/kobo/apps/subsequences/utils/__init__.py b/kobo/apps/subsequences__old/utils/__init__.py similarity index 100% rename from kobo/apps/subsequences/utils/__init__.py rename to kobo/apps/subsequences__old/utils/__init__.py diff --git a/kobo/apps/subsequences/utils/deprecation.py b/kobo/apps/subsequences__old/utils/deprecation.py similarity index 100% rename from kobo/apps/subsequences/utils/deprecation.py rename to kobo/apps/subsequences__old/utils/deprecation.py diff --git a/kobo/apps/subsequences/utils/determine_export_cols_with_values.py b/kobo/apps/subsequences__old/utils/determine_export_cols_with_values.py similarity index 100% rename from kobo/apps/subsequences/utils/determine_export_cols_with_values.py rename to kobo/apps/subsequences__old/utils/determine_export_cols_with_values.py diff --git a/kobo/apps/subsequences/utils/parse_known_cols.py b/kobo/apps/subsequences__old/utils/parse_known_cols.py similarity index 100% rename from kobo/apps/subsequences/utils/parse_known_cols.py rename to kobo/apps/subsequences__old/utils/parse_known_cols.py From 5091c6421d9d2bf362b30c9b9d80ddc227efd7af Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 20 Aug 2025 17:48:38 -0400 Subject: [PATCH 012/138] Remove unused `load_params()` --- kobo/apps/subsequences/actions/manual_transcription.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index a95201af97..5194e3dd8d 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -125,10 +125,3 @@ def result_schema(cls): we need to solve the problem of storing multiple results for a single action """ raise NotImplementedError - - - def load_params(self, params): - """ - idk maybe we use this to read the language out of `Asset.advanced_features` - """ - self.possible_transcribed_fields = params['values'] From 81d4d6cedbed1b532deda052c0cb668fb4858431 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 20 Aug 2025 19:16:21 -0400 Subject: [PATCH 013/138] Add preliminary manual transcription tests --- kobo/apps/subsequences/__init__.py | 0 kobo/apps/subsequences/tests/__init__.py | 0 .../tests/test_manual_transcription.py | 120 ++++++++++++++++++ 3 files changed, 120 insertions(+) create mode 100644 kobo/apps/subsequences/__init__.py create mode 100644 kobo/apps/subsequences/tests/__init__.py create mode 100644 kobo/apps/subsequences/tests/test_manual_transcription.py diff --git a/kobo/apps/subsequences/__init__.py b/kobo/apps/subsequences/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/subsequences/tests/__init__.py b/kobo/apps/subsequences/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/subsequences/tests/test_manual_transcription.py b/kobo/apps/subsequences/tests/test_manual_transcription.py new file mode 100644 index 0000000000..c055eee9b8 --- /dev/null +++ b/kobo/apps/subsequences/tests/test_manual_transcription.py @@ -0,0 +1,120 @@ +import copy +import dateutil +import jsonschema +import pytest + +from ..actions.manual_transcription import ManualTranscriptionAction + + +def cur_time(): + import datetime + from zoneinfo import ZoneInfo + + return ( + datetime.datetime.now(tz=ZoneInfo('UTC')) + .isoformat() + .replace("+00:00", "Z") + ) + + +def test_valid_params_pass_validation(): + params = [{'language': 'fr'}, {'language': 'es'}] + ManualTranscriptionAction.validate_params(params) + + +def test_invalid_params_fail_validation(): + params = [{'language': 123}, {'language': 'es'}] + with pytest.raises(jsonschema.exceptions.ValidationError): + ManualTranscriptionAction.validate_params(params) + + +def test_valid_transcript_data_passes_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + action = ManualTranscriptionAction(xpath, params) + data = {'language': 'fr', 'transcript': 'Ne pas idée'} + action.validate_data(data) + + +def test_invalid_transcript_data_fails_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + action = ManualTranscriptionAction(xpath, params) + data = {'language': 'en', 'transcript': 'No idea'} + with pytest.raises(jsonschema.exceptions.ValidationError): + action.validate_data(data) + + +def test_transcript_is_stored_in_supplemental_details(): + pass + + +def test_transcript_revisions_are_retained_in_supplemental_details(): + fake_sup_det = {} + + def get_supplemental_details(): + return fake_sup_det + + def revise_supplemental_details(new): + existing = fake_sup_det # modify directly + revisions = existing.pop('_revisions', []) + existing['_dateCreated'] = existing['_dateModified'] + del existing['_dateModified'] + revisions.append(copy.deepcopy(existing)) + + # Ensure special keys starting with underscores cannot be overwritten + for k in list(new.keys()): # unsure if coercion needed + if k.startswith('_'): + del k # log a warning? + + existing.update(new) + existing['_dateModified'] = cur_time() + existing['_revisions'] = revisions + + return fake_sup_det + + first = {'language': 'en', 'transcript': 'No idea'} + second = {'language': 'fr', 'transcript': 'Ne pas idée'} + + # now call imaginary method to store first transcript + fake_sup_det.update(first) + # is a leading underscore a good convention for marking things that must not be set by the action result? + # alternatively, we could nest all the action results inside some object + # or, we could nest all the non-action-result metadata-type things inside + # an object, and protect that from being overwritten by the action + fake_sup_det['_dateCreated'] = fake_sup_det['_dateModified'] = cur_time() + fake_sup_det['_revisions'] = [] + + sup_det = get_supplemental_details() + assert sup_det['language'] == 'en' + assert sup_det['transcript'] == 'No idea' + assert sup_det['_dateCreated'] == sup_det['_dateModified'] + assert sup_det['_revisions'] == [] + first_time = sup_det['_dateCreated'] + + # now call imaginary method to store second transcript + sup_det = revise_supplemental_details(second) + + assert len(sup_det['_revisions']) == 1 + + # the revision should encompass the first transcript + assert sup_det['_revisions'][0].items() >= first.items() + + # the revision should have a creation timestamp equal to that of the first + # transcript + assert sup_det['_revisions'][0]['_dateCreated'] == first_time + + # revisions should not list a modification timestamp + assert '_dateModified' not in sup_det['_revisions'] + + # the record itself (not revision) should have an unchanged creation + # timestamp + assert sup_det['_dateCreated'] == first_time + + # the record itself should have an updated modification timestamp + assert dateutil.parser.parse( + sup_det['_dateModified'] + ) > dateutil.parser.parse(sup_det['_dateCreated']) + + # the record itself should encompass the second transcript + assert sup_det.items() >= second.items() From 7d53d2a9c78cb074244faa64f3277dd25e7200f3 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Thu, 21 Aug 2025 09:30:17 -0400 Subject: [PATCH 014/138] Move new work to subsequence__new instead, restore previous Django app --- .../README-draft.md | 0 .../README.md | 0 kobo/apps/subsequences/__init__.py | 17 +++++++++++++++++ .../actions/__init__.py | 0 .../actions/automatic_transcription.py | 0 .../actions/base.py | 0 .../actions/keyword_search.py | 0 .../actions/manual_transcription.py | 9 ++++++++- .../actions/number_doubler.py | 0 .../actions/qual.py | 0 .../actions/states.py | 0 .../actions/translation.py | 0 .../actions/unknown_action.py | 0 .../advanced_features_params_schema.py | 0 .../api_view.py | 0 .../{subsequences__old => subsequences}/apps.py | 0 .../constants.py | 0 .../exceptions.py | 0 .../integrations/__init__.py | 0 .../integrations/google/__init__.py | 0 .../integrations/google/base.py | 0 .../integrations/google/google_transcribe.py | 0 .../integrations/google/google_translate.py | 0 .../integrations/google/utils.py | 0 .../integrations/misc.py | 0 .../integrations/translate.py | 0 .../jsonschemas/qual_schema.py | 0 .../migrations/0001_initial.py | 0 ...unique_together_asset_and_submission_uuid.py | 0 ...er_submissionextras_date_created_and_more.py | 0 ...004_increase_subsequences_submission_uuid.py | 0 .../migrations/__init__.py | 0 .../models.py | 0 .../{subsequences__old => subsequences}/prev.py | 0 .../scripts/__init__.py | 0 ...tivate_advanced_features_for_newest_asset.py | 0 .../add_qual_to_last_question_of_last_asset.py | 0 .../scripts/export_analysis_form.py | 0 .../scripts/recalc_latest_subex.py | 0 .../scripts/repop_known_cols.py | 0 .../scripts/subsequences_export.py | 0 .../tasks/__init__.py | 0 .../tests/test_known_cols_utils.py | 0 .../tests/test_nlp_integration.py | 0 .../tests/test_number_doubler.py | 0 .../tests/test_proj_advanced_features.py | 0 .../tests/test_submission_extras_api_post.py | 0 .../tests/test_submission_extras_content.py | 0 .../tests/test_submission_stream.py | 0 .../{subsequences__old => subsequences}/urls.py | 0 .../utils/__init__.py | 0 .../utils/deprecation.py | 0 .../utils/determine_export_cols_with_values.py | 0 .../utils/parse_known_cols.py | 0 .../tests => subsequences__new}/__init__.py | 0 .../actions/manual_transcription.py | 9 +-------- kobo/apps/subsequences__new/tests/__init__.py | 0 .../tests/test_manual_transcription.py | 0 kobo/apps/subsequences__old/__init__.py | 17 ----------------- 59 files changed, 26 insertions(+), 26 deletions(-) rename kobo/apps/{subsequences__old => subsequences}/README-draft.md (100%) rename kobo/apps/{subsequences__old => subsequences}/README.md (100%) rename kobo/apps/{subsequences__old => subsequences}/actions/__init__.py (100%) rename kobo/apps/{subsequences__old => subsequences}/actions/automatic_transcription.py (100%) rename kobo/apps/{subsequences__old => subsequences}/actions/base.py (100%) rename kobo/apps/{subsequences__old => subsequences}/actions/keyword_search.py (100%) rename kobo/apps/{subsequences__old => subsequences}/actions/number_doubler.py (100%) rename kobo/apps/{subsequences__old => subsequences}/actions/qual.py (100%) rename kobo/apps/{subsequences__old => subsequences}/actions/states.py (100%) rename kobo/apps/{subsequences__old => subsequences}/actions/translation.py (100%) rename kobo/apps/{subsequences__old => subsequences}/actions/unknown_action.py (100%) rename kobo/apps/{subsequences__old => subsequences}/advanced_features_params_schema.py (100%) rename kobo/apps/{subsequences__old => subsequences}/api_view.py (100%) rename kobo/apps/{subsequences__old => subsequences}/apps.py (100%) rename kobo/apps/{subsequences__old => subsequences}/constants.py (100%) rename kobo/apps/{subsequences__old => subsequences}/exceptions.py (100%) rename kobo/apps/{subsequences__old => subsequences}/integrations/__init__.py (100%) rename kobo/apps/{subsequences__old => subsequences}/integrations/google/__init__.py (100%) rename kobo/apps/{subsequences__old => subsequences}/integrations/google/base.py (100%) rename kobo/apps/{subsequences__old => subsequences}/integrations/google/google_transcribe.py (100%) rename kobo/apps/{subsequences__old => subsequences}/integrations/google/google_translate.py (100%) rename kobo/apps/{subsequences__old => subsequences}/integrations/google/utils.py (100%) rename kobo/apps/{subsequences__old => subsequences}/integrations/misc.py (100%) rename kobo/apps/{subsequences__old => subsequences}/integrations/translate.py (100%) rename kobo/apps/{subsequences__old => subsequences}/jsonschemas/qual_schema.py (100%) rename kobo/apps/{subsequences__old => subsequences}/migrations/0001_initial.py (100%) rename kobo/apps/{subsequences__old => subsequences}/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py (100%) rename kobo/apps/{subsequences__old => subsequences}/migrations/0003_alter_submissionextras_date_created_and_more.py (100%) rename kobo/apps/{subsequences__old => subsequences}/migrations/0004_increase_subsequences_submission_uuid.py (100%) rename kobo/apps/{subsequences__old => subsequences}/migrations/__init__.py (100%) rename kobo/apps/{subsequences__old => subsequences}/models.py (100%) rename kobo/apps/{subsequences__old => subsequences}/prev.py (100%) rename kobo/apps/{subsequences__old => subsequences}/scripts/__init__.py (100%) rename kobo/apps/{subsequences__old => subsequences}/scripts/activate_advanced_features_for_newest_asset.py (100%) rename kobo/apps/{subsequences__old => subsequences}/scripts/add_qual_to_last_question_of_last_asset.py (100%) rename kobo/apps/{subsequences__old => subsequences}/scripts/export_analysis_form.py (100%) rename kobo/apps/{subsequences__old => subsequences}/scripts/recalc_latest_subex.py (100%) rename kobo/apps/{subsequences__old => subsequences}/scripts/repop_known_cols.py (100%) rename kobo/apps/{subsequences__old => subsequences}/scripts/subsequences_export.py (100%) rename kobo/apps/{subsequences__old => subsequences}/tasks/__init__.py (100%) rename kobo/apps/{subsequences__old => subsequences}/tests/test_known_cols_utils.py (100%) rename kobo/apps/{subsequences__old => subsequences}/tests/test_nlp_integration.py (100%) rename kobo/apps/{subsequences__old => subsequences}/tests/test_number_doubler.py (100%) rename kobo/apps/{subsequences__old => subsequences}/tests/test_proj_advanced_features.py (100%) rename kobo/apps/{subsequences__old => subsequences}/tests/test_submission_extras_api_post.py (100%) rename kobo/apps/{subsequences__old => subsequences}/tests/test_submission_extras_content.py (100%) rename kobo/apps/{subsequences__old => subsequences}/tests/test_submission_stream.py (100%) rename kobo/apps/{subsequences__old => subsequences}/urls.py (100%) rename kobo/apps/{subsequences__old => subsequences}/utils/__init__.py (100%) rename kobo/apps/{subsequences__old => subsequences}/utils/deprecation.py (100%) rename kobo/apps/{subsequences__old => subsequences}/utils/determine_export_cols_with_values.py (100%) rename kobo/apps/{subsequences__old => subsequences}/utils/parse_known_cols.py (100%) rename kobo/apps/{subsequences__old/tests => subsequences__new}/__init__.py (100%) rename kobo/apps/{subsequences__old => subsequences__new}/actions/manual_transcription.py (93%) create mode 100644 kobo/apps/subsequences__new/tests/__init__.py rename kobo/apps/{subsequences => subsequences__new}/tests/test_manual_transcription.py (100%) delete mode 100644 kobo/apps/subsequences__old/__init__.py diff --git a/kobo/apps/subsequences__old/README-draft.md b/kobo/apps/subsequences/README-draft.md similarity index 100% rename from kobo/apps/subsequences__old/README-draft.md rename to kobo/apps/subsequences/README-draft.md diff --git a/kobo/apps/subsequences__old/README.md b/kobo/apps/subsequences/README.md similarity index 100% rename from kobo/apps/subsequences__old/README.md rename to kobo/apps/subsequences/README.md diff --git a/kobo/apps/subsequences/__init__.py b/kobo/apps/subsequences/__init__.py index e69de29bb2..5f46bdbac3 100644 --- a/kobo/apps/subsequences/__init__.py +++ b/kobo/apps/subsequences/__init__.py @@ -0,0 +1,17 @@ +''' +`kobo.apps.subsequences` --as in Sub(mission)Sequences is an app for defining +and following a sequence of actions or changes to a submission that has come +into kobo. + +models: +- SubmissionData: + Holds a JSONField with the "supplementalData" necessary to complete the + +tasks: +(things that are queued in celery for later action) + +needs writeup: + - how to develop / debug within this app + - description of tests + +''' diff --git a/kobo/apps/subsequences__old/actions/__init__.py b/kobo/apps/subsequences/actions/__init__.py similarity index 100% rename from kobo/apps/subsequences__old/actions/__init__.py rename to kobo/apps/subsequences/actions/__init__.py diff --git a/kobo/apps/subsequences__old/actions/automatic_transcription.py b/kobo/apps/subsequences/actions/automatic_transcription.py similarity index 100% rename from kobo/apps/subsequences__old/actions/automatic_transcription.py rename to kobo/apps/subsequences/actions/automatic_transcription.py diff --git a/kobo/apps/subsequences__old/actions/base.py b/kobo/apps/subsequences/actions/base.py similarity index 100% rename from kobo/apps/subsequences__old/actions/base.py rename to kobo/apps/subsequences/actions/base.py diff --git a/kobo/apps/subsequences__old/actions/keyword_search.py b/kobo/apps/subsequences/actions/keyword_search.py similarity index 100% rename from kobo/apps/subsequences__old/actions/keyword_search.py rename to kobo/apps/subsequences/actions/keyword_search.py diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 5194e3dd8d..1a64212763 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -1,5 +1,5 @@ import jsonschema -#from ..constants import TRANSCRIBABLE_SOURCE_TYPES +from ..constants import TRANSCRIBABLE_SOURCE_TYPES #from ..actions.base import BaseAction """ @@ -125,3 +125,10 @@ def result_schema(cls): we need to solve the problem of storing multiple results for a single action """ raise NotImplementedError + + + def load_params(self, params): + """ + idk maybe we use this to read the language out of `Asset.advanced_features` + """ + self.possible_transcribed_fields = params['values'] diff --git a/kobo/apps/subsequences__old/actions/number_doubler.py b/kobo/apps/subsequences/actions/number_doubler.py similarity index 100% rename from kobo/apps/subsequences__old/actions/number_doubler.py rename to kobo/apps/subsequences/actions/number_doubler.py diff --git a/kobo/apps/subsequences__old/actions/qual.py b/kobo/apps/subsequences/actions/qual.py similarity index 100% rename from kobo/apps/subsequences__old/actions/qual.py rename to kobo/apps/subsequences/actions/qual.py diff --git a/kobo/apps/subsequences__old/actions/states.py b/kobo/apps/subsequences/actions/states.py similarity index 100% rename from kobo/apps/subsequences__old/actions/states.py rename to kobo/apps/subsequences/actions/states.py diff --git a/kobo/apps/subsequences__old/actions/translation.py b/kobo/apps/subsequences/actions/translation.py similarity index 100% rename from kobo/apps/subsequences__old/actions/translation.py rename to kobo/apps/subsequences/actions/translation.py diff --git a/kobo/apps/subsequences__old/actions/unknown_action.py b/kobo/apps/subsequences/actions/unknown_action.py similarity index 100% rename from kobo/apps/subsequences__old/actions/unknown_action.py rename to kobo/apps/subsequences/actions/unknown_action.py diff --git a/kobo/apps/subsequences__old/advanced_features_params_schema.py b/kobo/apps/subsequences/advanced_features_params_schema.py similarity index 100% rename from kobo/apps/subsequences__old/advanced_features_params_schema.py rename to kobo/apps/subsequences/advanced_features_params_schema.py diff --git a/kobo/apps/subsequences__old/api_view.py b/kobo/apps/subsequences/api_view.py similarity index 100% rename from kobo/apps/subsequences__old/api_view.py rename to kobo/apps/subsequences/api_view.py diff --git a/kobo/apps/subsequences__old/apps.py b/kobo/apps/subsequences/apps.py similarity index 100% rename from kobo/apps/subsequences__old/apps.py rename to kobo/apps/subsequences/apps.py diff --git a/kobo/apps/subsequences__old/constants.py b/kobo/apps/subsequences/constants.py similarity index 100% rename from kobo/apps/subsequences__old/constants.py rename to kobo/apps/subsequences/constants.py diff --git a/kobo/apps/subsequences__old/exceptions.py b/kobo/apps/subsequences/exceptions.py similarity index 100% rename from kobo/apps/subsequences__old/exceptions.py rename to kobo/apps/subsequences/exceptions.py diff --git a/kobo/apps/subsequences__old/integrations/__init__.py b/kobo/apps/subsequences/integrations/__init__.py similarity index 100% rename from kobo/apps/subsequences__old/integrations/__init__.py rename to kobo/apps/subsequences/integrations/__init__.py diff --git a/kobo/apps/subsequences__old/integrations/google/__init__.py b/kobo/apps/subsequences/integrations/google/__init__.py similarity index 100% rename from kobo/apps/subsequences__old/integrations/google/__init__.py rename to kobo/apps/subsequences/integrations/google/__init__.py diff --git a/kobo/apps/subsequences__old/integrations/google/base.py b/kobo/apps/subsequences/integrations/google/base.py similarity index 100% rename from kobo/apps/subsequences__old/integrations/google/base.py rename to kobo/apps/subsequences/integrations/google/base.py diff --git a/kobo/apps/subsequences__old/integrations/google/google_transcribe.py b/kobo/apps/subsequences/integrations/google/google_transcribe.py similarity index 100% rename from kobo/apps/subsequences__old/integrations/google/google_transcribe.py rename to kobo/apps/subsequences/integrations/google/google_transcribe.py diff --git a/kobo/apps/subsequences__old/integrations/google/google_translate.py b/kobo/apps/subsequences/integrations/google/google_translate.py similarity index 100% rename from kobo/apps/subsequences__old/integrations/google/google_translate.py rename to kobo/apps/subsequences/integrations/google/google_translate.py diff --git a/kobo/apps/subsequences__old/integrations/google/utils.py b/kobo/apps/subsequences/integrations/google/utils.py similarity index 100% rename from kobo/apps/subsequences__old/integrations/google/utils.py rename to kobo/apps/subsequences/integrations/google/utils.py diff --git a/kobo/apps/subsequences__old/integrations/misc.py b/kobo/apps/subsequences/integrations/misc.py similarity index 100% rename from kobo/apps/subsequences__old/integrations/misc.py rename to kobo/apps/subsequences/integrations/misc.py diff --git a/kobo/apps/subsequences__old/integrations/translate.py b/kobo/apps/subsequences/integrations/translate.py similarity index 100% rename from kobo/apps/subsequences__old/integrations/translate.py rename to kobo/apps/subsequences/integrations/translate.py diff --git a/kobo/apps/subsequences__old/jsonschemas/qual_schema.py b/kobo/apps/subsequences/jsonschemas/qual_schema.py similarity index 100% rename from kobo/apps/subsequences__old/jsonschemas/qual_schema.py rename to kobo/apps/subsequences/jsonschemas/qual_schema.py diff --git a/kobo/apps/subsequences__old/migrations/0001_initial.py b/kobo/apps/subsequences/migrations/0001_initial.py similarity index 100% rename from kobo/apps/subsequences__old/migrations/0001_initial.py rename to kobo/apps/subsequences/migrations/0001_initial.py diff --git a/kobo/apps/subsequences__old/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py b/kobo/apps/subsequences/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py similarity index 100% rename from kobo/apps/subsequences__old/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py rename to kobo/apps/subsequences/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py diff --git a/kobo/apps/subsequences__old/migrations/0003_alter_submissionextras_date_created_and_more.py b/kobo/apps/subsequences/migrations/0003_alter_submissionextras_date_created_and_more.py similarity index 100% rename from kobo/apps/subsequences__old/migrations/0003_alter_submissionextras_date_created_and_more.py rename to kobo/apps/subsequences/migrations/0003_alter_submissionextras_date_created_and_more.py diff --git a/kobo/apps/subsequences__old/migrations/0004_increase_subsequences_submission_uuid.py b/kobo/apps/subsequences/migrations/0004_increase_subsequences_submission_uuid.py similarity index 100% rename from kobo/apps/subsequences__old/migrations/0004_increase_subsequences_submission_uuid.py rename to kobo/apps/subsequences/migrations/0004_increase_subsequences_submission_uuid.py diff --git a/kobo/apps/subsequences__old/migrations/__init__.py b/kobo/apps/subsequences/migrations/__init__.py similarity index 100% rename from kobo/apps/subsequences__old/migrations/__init__.py rename to kobo/apps/subsequences/migrations/__init__.py diff --git a/kobo/apps/subsequences__old/models.py b/kobo/apps/subsequences/models.py similarity index 100% rename from kobo/apps/subsequences__old/models.py rename to kobo/apps/subsequences/models.py diff --git a/kobo/apps/subsequences__old/prev.py b/kobo/apps/subsequences/prev.py similarity index 100% rename from kobo/apps/subsequences__old/prev.py rename to kobo/apps/subsequences/prev.py diff --git a/kobo/apps/subsequences__old/scripts/__init__.py b/kobo/apps/subsequences/scripts/__init__.py similarity index 100% rename from kobo/apps/subsequences__old/scripts/__init__.py rename to kobo/apps/subsequences/scripts/__init__.py diff --git a/kobo/apps/subsequences__old/scripts/activate_advanced_features_for_newest_asset.py b/kobo/apps/subsequences/scripts/activate_advanced_features_for_newest_asset.py similarity index 100% rename from kobo/apps/subsequences__old/scripts/activate_advanced_features_for_newest_asset.py rename to kobo/apps/subsequences/scripts/activate_advanced_features_for_newest_asset.py diff --git a/kobo/apps/subsequences__old/scripts/add_qual_to_last_question_of_last_asset.py b/kobo/apps/subsequences/scripts/add_qual_to_last_question_of_last_asset.py similarity index 100% rename from kobo/apps/subsequences__old/scripts/add_qual_to_last_question_of_last_asset.py rename to kobo/apps/subsequences/scripts/add_qual_to_last_question_of_last_asset.py diff --git a/kobo/apps/subsequences__old/scripts/export_analysis_form.py b/kobo/apps/subsequences/scripts/export_analysis_form.py similarity index 100% rename from kobo/apps/subsequences__old/scripts/export_analysis_form.py rename to kobo/apps/subsequences/scripts/export_analysis_form.py diff --git a/kobo/apps/subsequences__old/scripts/recalc_latest_subex.py b/kobo/apps/subsequences/scripts/recalc_latest_subex.py similarity index 100% rename from kobo/apps/subsequences__old/scripts/recalc_latest_subex.py rename to kobo/apps/subsequences/scripts/recalc_latest_subex.py diff --git a/kobo/apps/subsequences__old/scripts/repop_known_cols.py b/kobo/apps/subsequences/scripts/repop_known_cols.py similarity index 100% rename from kobo/apps/subsequences__old/scripts/repop_known_cols.py rename to kobo/apps/subsequences/scripts/repop_known_cols.py diff --git a/kobo/apps/subsequences__old/scripts/subsequences_export.py b/kobo/apps/subsequences/scripts/subsequences_export.py similarity index 100% rename from kobo/apps/subsequences__old/scripts/subsequences_export.py rename to kobo/apps/subsequences/scripts/subsequences_export.py diff --git a/kobo/apps/subsequences__old/tasks/__init__.py b/kobo/apps/subsequences/tasks/__init__.py similarity index 100% rename from kobo/apps/subsequences__old/tasks/__init__.py rename to kobo/apps/subsequences/tasks/__init__.py diff --git a/kobo/apps/subsequences__old/tests/test_known_cols_utils.py b/kobo/apps/subsequences/tests/test_known_cols_utils.py similarity index 100% rename from kobo/apps/subsequences__old/tests/test_known_cols_utils.py rename to kobo/apps/subsequences/tests/test_known_cols_utils.py diff --git a/kobo/apps/subsequences__old/tests/test_nlp_integration.py b/kobo/apps/subsequences/tests/test_nlp_integration.py similarity index 100% rename from kobo/apps/subsequences__old/tests/test_nlp_integration.py rename to kobo/apps/subsequences/tests/test_nlp_integration.py diff --git a/kobo/apps/subsequences__old/tests/test_number_doubler.py b/kobo/apps/subsequences/tests/test_number_doubler.py similarity index 100% rename from kobo/apps/subsequences__old/tests/test_number_doubler.py rename to kobo/apps/subsequences/tests/test_number_doubler.py diff --git a/kobo/apps/subsequences__old/tests/test_proj_advanced_features.py b/kobo/apps/subsequences/tests/test_proj_advanced_features.py similarity index 100% rename from kobo/apps/subsequences__old/tests/test_proj_advanced_features.py rename to kobo/apps/subsequences/tests/test_proj_advanced_features.py diff --git a/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py b/kobo/apps/subsequences/tests/test_submission_extras_api_post.py similarity index 100% rename from kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py rename to kobo/apps/subsequences/tests/test_submission_extras_api_post.py diff --git a/kobo/apps/subsequences__old/tests/test_submission_extras_content.py b/kobo/apps/subsequences/tests/test_submission_extras_content.py similarity index 100% rename from kobo/apps/subsequences__old/tests/test_submission_extras_content.py rename to kobo/apps/subsequences/tests/test_submission_extras_content.py diff --git a/kobo/apps/subsequences__old/tests/test_submission_stream.py b/kobo/apps/subsequences/tests/test_submission_stream.py similarity index 100% rename from kobo/apps/subsequences__old/tests/test_submission_stream.py rename to kobo/apps/subsequences/tests/test_submission_stream.py diff --git a/kobo/apps/subsequences__old/urls.py b/kobo/apps/subsequences/urls.py similarity index 100% rename from kobo/apps/subsequences__old/urls.py rename to kobo/apps/subsequences/urls.py diff --git a/kobo/apps/subsequences__old/utils/__init__.py b/kobo/apps/subsequences/utils/__init__.py similarity index 100% rename from kobo/apps/subsequences__old/utils/__init__.py rename to kobo/apps/subsequences/utils/__init__.py diff --git a/kobo/apps/subsequences__old/utils/deprecation.py b/kobo/apps/subsequences/utils/deprecation.py similarity index 100% rename from kobo/apps/subsequences__old/utils/deprecation.py rename to kobo/apps/subsequences/utils/deprecation.py diff --git a/kobo/apps/subsequences__old/utils/determine_export_cols_with_values.py b/kobo/apps/subsequences/utils/determine_export_cols_with_values.py similarity index 100% rename from kobo/apps/subsequences__old/utils/determine_export_cols_with_values.py rename to kobo/apps/subsequences/utils/determine_export_cols_with_values.py diff --git a/kobo/apps/subsequences__old/utils/parse_known_cols.py b/kobo/apps/subsequences/utils/parse_known_cols.py similarity index 100% rename from kobo/apps/subsequences__old/utils/parse_known_cols.py rename to kobo/apps/subsequences/utils/parse_known_cols.py diff --git a/kobo/apps/subsequences__old/tests/__init__.py b/kobo/apps/subsequences__new/__init__.py similarity index 100% rename from kobo/apps/subsequences__old/tests/__init__.py rename to kobo/apps/subsequences__new/__init__.py diff --git a/kobo/apps/subsequences__old/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py similarity index 93% rename from kobo/apps/subsequences__old/actions/manual_transcription.py rename to kobo/apps/subsequences__new/actions/manual_transcription.py index 1a64212763..5194e3dd8d 100644 --- a/kobo/apps/subsequences__old/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -1,5 +1,5 @@ import jsonschema -from ..constants import TRANSCRIBABLE_SOURCE_TYPES +#from ..constants import TRANSCRIBABLE_SOURCE_TYPES #from ..actions.base import BaseAction """ @@ -125,10 +125,3 @@ def result_schema(cls): we need to solve the problem of storing multiple results for a single action """ raise NotImplementedError - - - def load_params(self, params): - """ - idk maybe we use this to read the language out of `Asset.advanced_features` - """ - self.possible_transcribed_fields = params['values'] diff --git a/kobo/apps/subsequences__new/tests/__init__.py b/kobo/apps/subsequences__new/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/subsequences/tests/test_manual_transcription.py b/kobo/apps/subsequences__new/tests/test_manual_transcription.py similarity index 100% rename from kobo/apps/subsequences/tests/test_manual_transcription.py rename to kobo/apps/subsequences__new/tests/test_manual_transcription.py diff --git a/kobo/apps/subsequences__old/__init__.py b/kobo/apps/subsequences__old/__init__.py deleted file mode 100644 index 5f46bdbac3..0000000000 --- a/kobo/apps/subsequences__old/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -''' -`kobo.apps.subsequences` --as in Sub(mission)Sequences is an app for defining -and following a sequence of actions or changes to a submission that has come -into kobo. - -models: -- SubmissionData: - Holds a JSONField with the "supplementalData" necessary to complete the - -tasks: -(things that are queued in celery for later action) - -needs writeup: - - how to develop / debug within this app - - description of tests - -''' From 249abadad474d9b7319cc71a6e501ec8e7c02f5e Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Thu, 21 Aug 2025 10:07:41 -0400 Subject: [PATCH 015/138] Update revise_field to support new structure --- .../actions/manual_transcription.py | 47 ++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 5194e3dd8d..6ac5b714f3 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -1,4 +1,9 @@ import jsonschema +from copy import deepcopy + +# from django.utils import timezone +from datetime import datetime as timezone + #from ..constants import TRANSCRIBABLE_SOURCE_TYPES #from ..actions.base import BaseAction @@ -40,6 +45,11 @@ """ class BaseAction: + + DATE_CREATED_FIELD = 'dateCreated' + DATE_MODIFIED_FIELD = 'dateModified' + DELETE = '⌫' + @classmethod def validate_params(cls, params): jsonschema.validate(params, cls.params_schema) @@ -47,6 +57,13 @@ def validate_params(cls, params): def validate_data(self, data): jsonschema.validate(data, self.data_schema) + def record_repr(self, record : dict) -> dict: + raise NotImplementedError() + + def revise_field(self, submission_extra: dict, edit: dict) -> dict: + raise NotImplementedError + + class ManualTranscriptionAction(BaseAction): ID = 'manual_transcription' @@ -115,8 +132,11 @@ def data_schema(self): # for lack of a better name 'type': 'object', } - @property + def record_repr(self, record : dict) -> dict: + return record.get('transcript', '') + @classmethod + @property def result_schema(cls): """ we also need a schema to define the final result that will be written @@ -125,3 +145,28 @@ def result_schema(cls): we need to solve the problem of storing multiple results for a single action """ raise NotImplementedError + + def revise_field(self, submission_extra: dict, edit: dict) -> dict: + """ + """ + + if self.record_repr(edit) == self.DELETE: + return {} + + now_str = timezone.now().strftime('%Y-%m-%dT%H:%M:%SZ') + revision = deepcopy(submission_extra) + new_record = deepcopy(edit) + revisions = revision.pop('revisions', []) + + revision_creation_date = revision.pop(self.DATE_MODIFIED_FIELD, now_str) + record_creation_date = revision.pop(self.DATE_CREATED_FIELD, now_str) + revision[self.DATE_CREATED_FIELD] = revision_creation_date + new_record[self.DATE_MODIFIED_FIELD] = now_str + + if submission_extra: + revisions.insert(0, revision) + new_record['revisions'] = revisions + + new_record[self.DATE_CREATED_FIELD] = record_creation_date + + return new_record From 4bfdc04aedb7e0209b0d515fabc7c5a3c0fc6eba Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Thu, 21 Aug 2025 10:34:39 -0400 Subject: [PATCH 016/138] typo --- kobo/apps/subsequences__new/tests/test_manual_transcription.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kobo/apps/subsequences__new/tests/test_manual_transcription.py b/kobo/apps/subsequences__new/tests/test_manual_transcription.py index c055eee9b8..b732c244e4 100644 --- a/kobo/apps/subsequences__new/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences__new/tests/test_manual_transcription.py @@ -74,7 +74,7 @@ def revise_supplemental_details(new): return fake_sup_det first = {'language': 'en', 'transcript': 'No idea'} - second = {'language': 'fr', 'transcript': 'Ne pas idée'} + second = {'language': 'fr', 'transcript': "Pas d'idée"} # now call imaginary method to store first transcript fake_sup_det.update(first) From dc067cc753d483d06cbeabe84943ad35e0f368cf Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Thu, 21 Aug 2025 11:03:30 -0400 Subject: [PATCH 017/138] More manual transcription tests, tweaks to `revise_field` --- .../actions/manual_transcription.py | 26 +++++++--- .../tests/test_manual_transcription.py | 47 ++++++++++++++++++- 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 6ac5b714f3..4efc7469d8 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -44,10 +44,22 @@ } """ +def utc_datetime_to_simplified_iso8601(dt): + # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format + if dt.utcoffset(): + raise NotImplementedError('Only UTC datetimes are supported') + return dt.isoformat().replace("+00:00", "Z") + class BaseAction: - DATE_CREATED_FIELD = 'dateCreated' - DATE_MODIFIED_FIELD = 'dateModified' + # is a leading underscore a good convention for marking things that must + # not be set by the action result? alternatively, we could nest all the + # action results inside some object or, we could nest all the + # non-action-result metadata-type things inside an object, and protect that + # from being overwritten by the action + DATE_CREATED_FIELD = '_dateCreated' + DATE_MODIFIED_FIELD = '_dateModified' + REVISIONS_FIELD = '_revisions' DELETE = '⌫' @classmethod @@ -151,12 +163,14 @@ def revise_field(self, submission_extra: dict, edit: dict) -> dict: """ if self.record_repr(edit) == self.DELETE: + # we might want to retain the revisions. also, we might want to + # trash the whole approach with the weird delete character return {} - now_str = timezone.now().strftime('%Y-%m-%dT%H:%M:%SZ') + now_str = utc_datetime_to_simplified_iso8601(timezone.now()) revision = deepcopy(submission_extra) new_record = deepcopy(edit) - revisions = revision.pop('revisions', []) + revisions = revision.pop(self.REVISIONS_FIELD, []) revision_creation_date = revision.pop(self.DATE_MODIFIED_FIELD, now_str) record_creation_date = revision.pop(self.DATE_CREATED_FIELD, now_str) @@ -164,8 +178,8 @@ def revise_field(self, submission_extra: dict, edit: dict) -> dict: new_record[self.DATE_MODIFIED_FIELD] = now_str if submission_extra: - revisions.insert(0, revision) - new_record['revisions'] = revisions + revisions.insert(0, revision) # valid approach, but opposite what i was imaginging. TODO: add unit test for the behavior we agree upon :) + new_record[self.REVISIONS_FIELD] = revisions new_record[self.DATE_CREATED_FIELD] = record_creation_date diff --git a/kobo/apps/subsequences__new/tests/test_manual_transcription.py b/kobo/apps/subsequences__new/tests/test_manual_transcription.py index c055eee9b8..7967f07fd9 100644 --- a/kobo/apps/subsequences__new/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences__new/tests/test_manual_transcription.py @@ -49,7 +49,7 @@ def test_transcript_is_stored_in_supplemental_details(): pass -def test_transcript_revisions_are_retained_in_supplemental_details(): +def test_transcript_revisions_are_retained_in_supplemental_details__fake(): fake_sup_det = {} def get_supplemental_details(): @@ -74,7 +74,7 @@ def revise_supplemental_details(new): return fake_sup_det first = {'language': 'en', 'transcript': 'No idea'} - second = {'language': 'fr', 'transcript': 'Ne pas idée'} + second = {'language': 'fr', 'transcript': "Pas d'idée"} # now call imaginary method to store first transcript fake_sup_det.update(first) @@ -118,3 +118,46 @@ def revise_supplemental_details(new): # the record itself should encompass the second transcript assert sup_det.items() >= second.items() + + +def test_transcript_revisions_are_retained_in_supplemental_details__realish(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = ManualTranscriptionAction(xpath, params) + + first = {'language': 'en', 'transcript': 'No idea'} + second = {'language': 'fr', 'transcript': "Pas d'idée"} + + mock_sup_det = action.revise_field({}, first) + + assert mock_sup_det['language'] == 'en' + assert mock_sup_det['transcript'] == 'No idea' + assert mock_sup_det['_dateCreated'] == mock_sup_det['_dateModified'] + # ehh... should we initialize with an empty list on the first edit + # assert mock_sup_det['_revisions'] == [] + first_time = mock_sup_det['_dateCreated'] + + mock_sup_det = action.revise_field(mock_sup_det, second) + assert len(mock_sup_det['_revisions']) == 1 + + # the revision should encompass the first transcript + assert mock_sup_det['_revisions'][0].items() >= first.items() + + # the revision should have a creation timestamp equal to that of the first + # transcript + assert mock_sup_det['_revisions'][0]['_dateCreated'] == first_time + + # revisions should not list a modification timestamp + assert '_dateModified' not in mock_sup_det['_revisions'] + + # the record itself (not revision) should have an unchanged creation + # timestamp + assert mock_sup_det['_dateCreated'] == first_time + + # the record itself should have an updated modification timestamp + assert dateutil.parser.parse( + mock_sup_det['_dateModified'] + ) > dateutil.parser.parse(mock_sup_det['_dateCreated']) + + # the record itself should encompass the second transcript + assert mock_sup_det.items() >= second.items() From aaa17f1ce1feab5d380da9560f0b454db6772d10 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Thu, 21 Aug 2025 16:02:44 -0400 Subject: [PATCH 018/138] wip --- .../actions/manual_transcription.py | 109 +++++++++++++----- .../tests/test_manual_transcription.py | 55 ++++++++- 2 files changed, 136 insertions(+), 28 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 4efc7469d8..46aa9dad47 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -4,8 +4,8 @@ # from django.utils import timezone from datetime import datetime as timezone -#from ..constants import TRANSCRIBABLE_SOURCE_TYPES -#from ..actions.base import BaseAction +# from ..constants import TRANSCRIBABLE_SOURCE_TYPES +# from ..actions.base import BaseAction """ ### All actions must have the following components @@ -44,12 +44,14 @@ } """ + def utc_datetime_to_simplified_iso8601(dt): # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format if dt.utcoffset(): raise NotImplementedError('Only UTC datetimes are supported') return dt.isoformat().replace("+00:00", "Z") + class BaseAction: # is a leading underscore a good convention for marking things that must @@ -60,7 +62,6 @@ class BaseAction: DATE_CREATED_FIELD = '_dateCreated' DATE_MODIFIED_FIELD = '_dateModified' REVISIONS_FIELD = '_revisions' - DELETE = '⌫' @classmethod def validate_params(cls, params): @@ -69,7 +70,7 @@ def validate_params(cls, params): def validate_data(self, data): jsonschema.validate(data, self.data_schema) - def record_repr(self, record : dict) -> dict: + def record_repr(self, record: dict) -> dict: raise NotImplementedError() def revise_field(self, submission_extra: dict, edit: dict) -> dict: @@ -130,42 +131,98 @@ def data_schema(self): # for lack of a better name languages.append(individual_params['language']) return { - 'additionalProperties': False, - 'properties': { - 'language': { - 'type': 'string', - 'enum': languages, + 'oneOf': [ + { + 'additionalProperties': False, + 'properties': { + 'language': { + 'type': 'string', + 'enum': languages, + }, + 'transcript': { + 'type': 'string', + }, + }, + 'required': ['language', 'transcript'], + 'type': 'object', }, - 'transcript': { - 'type': 'string', + { + # also allow an empty object (used to delete the transcript) + 'additionalProperties': False, + 'type': 'object', }, - }, - 'required': ['language', 'transcript'], - 'type': 'object', + ] } - def record_repr(self, record : dict) -> dict: + def record_repr(self, record: dict) -> dict: return record.get('transcript', '') - @classmethod @property - def result_schema(cls): + def result_schema(self): """ we also need a schema to define the final result that will be written into SubmissionExtras we need to solve the problem of storing multiple results for a single action """ - raise NotImplementedError + return { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'title': 'Transcript with revisions', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'language': {'$ref': '#/$defs/lang'}, + 'transcript': {'$ref': '#/$defs/transcript'}, + 'revisions': { + 'type': 'array', + 'minItems': 1, + 'items': {'$ref': '#/$defs/revision'}, + }, + '_dateCreated': {'$ref': '#/$defs/dateTime'}, + '_dateModified': {'$ref': '#/$defs/dateTime'}, + }, + 'required': ['_dateCreated', '_dateModified'], + 'allOf': [ + { + 'if': {'required': ['language']}, + 'then': {'required': ['transcript']}, + }, + { + 'if': {'required': ['transcript']}, + 'then': {'required': ['language']}, + }, + ], + '$defs': { + 'lang': {'type': 'string', 'enum': ['fr', 'en', 'es']}, + 'transcript': {'type': 'string'}, + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'revision': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'language': {'$ref': '#/$defs/lang'}, + 'transcript': {'$ref': '#/$defs/transcript'}, + '_dateCreated': {'$ref': '#/$defs/dateTime'}, + }, + 'required': ['_dateCreated'], + 'allOf': [ + { + 'if': {'required': ['language']}, + 'then': {'required': ['transcript']}, + }, + { + 'if': {'required': ['transcript']}, + 'then': {'required': ['language']}, + }, + ], + }, + }, + } - def revise_field(self, submission_extra: dict, edit: dict) -> dict: - """ - """ - if self.record_repr(edit) == self.DELETE: - # we might want to retain the revisions. also, we might want to - # trash the whole approach with the weird delete character - return {} + def revise_field(self, submission_extra: dict, edit: dict) -> dict: + """ """ + self.validate_data(edit) now_str = utc_datetime_to_simplified_iso8601(timezone.now()) revision = deepcopy(submission_extra) @@ -178,7 +235,7 @@ def revise_field(self, submission_extra: dict, edit: dict) -> dict: new_record[self.DATE_MODIFIED_FIELD] = now_str if submission_extra: - revisions.insert(0, revision) # valid approach, but opposite what i was imaginging. TODO: add unit test for the behavior we agree upon :) + revisions.insert(0, revision) new_record[self.REVISIONS_FIELD] = revisions new_record[self.DATE_CREATED_FIELD] = record_creation_date diff --git a/kobo/apps/subsequences__new/tests/test_manual_transcription.py b/kobo/apps/subsequences__new/tests/test_manual_transcription.py index 7967f07fd9..e3c6c7e49a 100644 --- a/kobo/apps/subsequences__new/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences__new/tests/test_manual_transcription.py @@ -133,8 +133,7 @@ def test_transcript_revisions_are_retained_in_supplemental_details__realish(): assert mock_sup_det['language'] == 'en' assert mock_sup_det['transcript'] == 'No idea' assert mock_sup_det['_dateCreated'] == mock_sup_det['_dateModified'] - # ehh... should we initialize with an empty list on the first edit - # assert mock_sup_det['_revisions'] == [] + assert '_revisions' not in mock_sup_det first_time = mock_sup_det['_dateCreated'] mock_sup_det = action.revise_field(mock_sup_det, second) @@ -161,3 +160,55 @@ def test_transcript_revisions_are_retained_in_supplemental_details__realish(): # the record itself should encompass the second transcript assert mock_sup_det.items() >= second.items() + +def test_setting_transcript_to_empty_string(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = ManualTranscriptionAction(xpath, params) + + first = {'language': 'fr', 'transcript': "Pas d'idée"} + second = {'language': 'fr', 'transcript': ''} + + mock_sup_det = action.revise_field({}, first) + assert mock_sup_det['transcript'] == "Pas d'idée" + + mock_sup_det = action.revise_field(mock_sup_det, second) + assert mock_sup_det['transcript'] == '' + assert mock_sup_det['_revisions'][0]['transcript'] == "Pas d'idée" + +def test_setting_transcript_to_empty_object(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = ManualTranscriptionAction(xpath, params) + + first = {'language': 'fr', 'transcript': "Pas d'idée"} + second = {} + + mock_sup_det = action.revise_field({}, first) + assert mock_sup_det['transcript'] == "Pas d'idée" + + mock_sup_det = action.revise_field(mock_sup_det, second) + assert 'transcript' not in mock_sup_det + assert mock_sup_det['_revisions'][0]['transcript'] == "Pas d'idée" + + print(mock_sup_det) + +def test_latest_revision_is_first(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = ManualTranscriptionAction(xpath, params) + + first = {'language': 'fr', 'transcript': 'un'} + second = {'language': 'fr', 'transcript': 'deux'} + third = {'language': 'fr', 'transcript': 'trois'} + + mock_sup_det = {} + for data in first, second, third: + mock_sup_det = action.revise_field(mock_sup_det, data) + + assert mock_sup_det['transcript'] == 'trois' + assert mock_sup_det['_revisions'][0]['transcript'] == 'deux' + assert mock_sup_det['_revisions'][1]['transcript'] == 'un' + + import json + print(json.dumps(mock_sup_det, indent=2)) From c03ee570a6917358ff3a62a0c06e8daabe1ddf9c Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Thu, 21 Aug 2025 16:47:27 -0400 Subject: [PATCH 019/138] Use data schema to build result schema --- .../actions/manual_transcription.py | 179 ++++++++++++++---- 1 file changed, 141 insertions(+), 38 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 46aa9dad47..5bd10f7c57 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -126,34 +126,72 @@ def data_schema(self): # for lack of a better name } } """ - languages = [] - for individual_params in self.params: - languages.append(individual_params['language']) + # languages = [] + # for individual_params in self.params: + # languages.append(individual_params['language']) + # return { + # 'oneOf': [ + # { + # 'additionalProperties': False, + # 'properties': { + # 'language': { + # 'type': 'string', + # 'enum': languages, + # }, + # 'transcript': { + # 'type': 'string', + # }, + # }, + # 'required': ['language', 'transcript'], + # 'type': 'object', + # }, + # { + # # also allow an empty object (used to delete the transcript) + # 'additionalProperties': False, + # 'type': 'object', + # }, + # ] + # } return { - 'oneOf': [ + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'title': 'Data with optional transcript', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'language': {'$ref': '#/$defs/lang'}, + 'transcript': {'$ref': '#/$defs/transcript'} + }, + 'allOf': [ { - 'additionalProperties': False, - 'properties': { - 'language': { - 'type': 'string', - 'enum': languages, - }, - 'transcript': { - 'type': 'string', + '$ref': '#/$defs/lang_transcript_dependency' + } + ], + '$defs': { + 'lang': {'type': 'string', 'enum': self.languages}, + 'transcript': {'type': 'string'}, + 'lang_transcript_dependency': { + 'allOf': [ + { + 'if': {'required': ['language']}, + 'then': {'required': ['transcript']} }, - }, - 'required': ['language', 'transcript'], - 'type': 'object', - }, - { - # also allow an empty object (used to delete the transcript) - 'additionalProperties': False, - 'type': 'object', - }, - ] + { + 'if': {'required': ['transcript']}, + 'then': {'required': ['language']} + } + ] + } + } } + @property + def languages(self) -> list[str]: + languages = [] + for individual_params in self.params: + languages.append(individual_params['language']) + return languages + def record_repr(self, record: dict) -> dict: return record.get('transcript', '') @@ -165,7 +203,7 @@ def result_schema(self): we need to solve the problem of storing multiple results for a single action """ - return { + schema = { '$schema': 'https://json-schema.org/draft/2020-12/schema', 'title': 'Transcript with revisions', 'type': 'object', @@ -184,18 +222,25 @@ def result_schema(self): 'required': ['_dateCreated', '_dateModified'], 'allOf': [ { - 'if': {'required': ['language']}, - 'then': {'required': ['transcript']}, - }, - { - 'if': {'required': ['transcript']}, - 'then': {'required': ['language']}, - }, + '$ref': '#/$defs/lang_transcript_dependency' + } ], '$defs': { - 'lang': {'type': 'string', 'enum': ['fr', 'en', 'es']}, + 'lang': {'type': 'string', 'enum': self.languages}, 'transcript': {'type': 'string'}, 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'lang_transcript_dependency': { + 'allOf': [ + { + 'if': {'required': ['language']}, + 'then': {'required': ['transcript']} + }, + { + 'if': {'required': ['transcript']}, + 'then': {'required': ['language']} + } + ] + }, 'revision': { 'type': 'object', 'additionalProperties': False, @@ -207,18 +252,76 @@ def result_schema(self): 'required': ['_dateCreated'], 'allOf': [ { - 'if': {'required': ['language']}, - 'then': {'required': ['transcript']}, - }, - { - 'if': {'required': ['transcript']}, - 'then': {'required': ['language']}, - }, + "$ref": "#/$defs/lang_transcript_dependency" + } ], }, }, } + schema_1 = { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'title': 'Transcript with revisions', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'revisions': { + 'type': 'array', + 'minItems': 1, + 'items': {'$ref': '#/$defs/revision'}, + }, + '_dateCreated': {'$ref': '#/$defs/dateTime'}, + '_dateModified': {'$ref': '#/$defs/dateTime'}, + }, + 'required': ['_dateCreated', '_dateModified'], + '$defs': { + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'revision': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + '_dateCreated': {'$ref': '#/$defs/dateTime'}, + }, + 'required': ['_dateCreated'], + } + }, + } + + data_schema = deepcopy(self.data_schema) + skipped_attributes = ['$schema', 'title', 'type'] + for key, value in data_schema.items(): + if key in skipped_attributes: + continue + if key in schema_1: + if isinstance(schema_1[key], dict): + schema_1[key].update(data_schema[key]) + elif isinstance(schema_1[key], list): + schema_1[key].extend(data_schema[key]) + else: + schema_1[key] = data_schema[key] + else: + schema_1[key] = data_schema[key] + + skipped_attributes = ['$schema', 'title', '$defs'] + destination_dict = schema_1['$defs']['revision'] + for key, value in data_schema.items(): + if key in skipped_attributes: + continue + + if key in destination_dict: + if isinstance(destination_dict[key], dict): + destination_dict[key].update(data_schema[key]) + elif isinstance(destination_dict[key], list): + destination_dict[key].extend(data_schema[key]) + else: + destination_dict[key] = data_schema[key] + else: + destination_dict[key] = data_schema[key] + + assert schema_1 == schema + + return schema + def revise_field(self, submission_extra: dict, edit: dict) -> dict: """ """ From 36e864b4bb4f1106f37917ab4c7a9c5d5c3d99d9 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Thu, 21 Aug 2025 16:48:05 -0400 Subject: [PATCH 020/138] more --- .../actions/manual_transcription.py | 47 +++++++++++++++---- .../tests/test_manual_transcription.py | 40 ++++++++++++++-- 2 files changed, 73 insertions(+), 14 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 46aa9dad47..f3b4d948e6 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -70,12 +70,33 @@ def validate_params(cls, params): def validate_data(self, data): jsonschema.validate(data, self.data_schema) + def validate_result(self, result): + jsonschema.validate(result, self.result_schema) + def record_repr(self, record: dict) -> dict: raise NotImplementedError() def revise_field(self, submission_extra: dict, edit: dict) -> dict: raise NotImplementedError + @staticmethod + def raise_for_any_leading_underscore_key(d: dict): + """ + Keys with leading underscores are reserved for metadata like + `_dateCreated`, `_dateModified`, and `_revisions`. No key with a + leading underscore should be present in data POSTed by a client or + generated by an action. + + Schema validation should block invalid keys, but this method exists as + a redundant check to guard against schema mistakes. + """ + for k in list(d.keys()): + try: + match = k.startswith('_') + except AttributeError: + continue + if match: + raise Exception('An unexpected key with a leading underscore was found') class ManualTranscriptionAction(BaseAction): ID = 'manual_transcription' @@ -165,23 +186,25 @@ def result_schema(self): we need to solve the problem of storing multiple results for a single action """ + languages = [] + for individual_params in self.params: + languages.append(individual_params['language']) return { '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'title': 'Transcript with revisions', 'type': 'object', 'additionalProperties': False, 'properties': { 'language': {'$ref': '#/$defs/lang'}, 'transcript': {'$ref': '#/$defs/transcript'}, - 'revisions': { + self.REVISIONS_FIELD: { 'type': 'array', 'minItems': 1, 'items': {'$ref': '#/$defs/revision'}, }, - '_dateCreated': {'$ref': '#/$defs/dateTime'}, - '_dateModified': {'$ref': '#/$defs/dateTime'}, + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, }, - 'required': ['_dateCreated', '_dateModified'], + 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], 'allOf': [ { 'if': {'required': ['language']}, @@ -193,7 +216,7 @@ def result_schema(self): }, ], '$defs': { - 'lang': {'type': 'string', 'enum': ['fr', 'en', 'es']}, + 'lang': {'type': 'string', 'enum': languages}, 'transcript': {'type': 'string'}, 'dateTime': {'type': 'string', 'format': 'date-time'}, 'revision': { @@ -202,9 +225,9 @@ def result_schema(self): 'properties': { 'language': {'$ref': '#/$defs/lang'}, 'transcript': {'$ref': '#/$defs/transcript'}, - '_dateCreated': {'$ref': '#/$defs/dateTime'}, + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, }, - 'required': ['_dateCreated'], + 'required': [self.DATE_CREATED_FIELD], 'allOf': [ { 'if': {'required': ['language']}, @@ -221,8 +244,14 @@ def result_schema(self): def revise_field(self, submission_extra: dict, edit: dict) -> dict: - """ """ + """ + really, we want to generalize this to all actions. + for actions that may have lengthy data, are we content to store the + entirety of the data for each revision, or do we need some kind of + differencing system? + """ self.validate_data(edit) + self.raise_for_any_leading_underscore_key(edit) now_str = utc_datetime_to_simplified_iso8601(timezone.now()) revision = deepcopy(submission_extra) diff --git a/kobo/apps/subsequences__new/tests/test_manual_transcription.py b/kobo/apps/subsequences__new/tests/test_manual_transcription.py index e3c6c7e49a..ab2dd65ef2 100644 --- a/kobo/apps/subsequences__new/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences__new/tests/test_manual_transcription.py @@ -44,6 +44,41 @@ def test_invalid_transcript_data_fails_validation(): with pytest.raises(jsonschema.exceptions.ValidationError): action.validate_data(data) +def test_valid_result_passes_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = ManualTranscriptionAction(xpath, params) + + first = {'language': 'fr', 'transcript': 'un'} + second = {'language': 'en', 'transcript': 'two'} + third = {'language': 'fr', 'transcript': 'trois'} + fourth = {} + fifth = {'language': 'en', 'transcript': 'fifth'} + mock_sup_det = {} + for data in first, second, third, fourth, fifth: + mock_sup_det = action.revise_field(mock_sup_det, data) + action.validate_result(mock_sup_det) + +def test_invalid_result_fails_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = ManualTranscriptionAction(xpath, params) + + first = {'language': 'fr', 'transcript': 'un'} + second = {'language': 'en', 'transcript': 'two'} + third = {'language': 'fr', 'transcript': 'trois'} + fourth = {} + fifth = {'language': 'en', 'transcript': 'fifth'} + mock_sup_det = {} + for data in first, second, third, fourth, fifth: + mock_sup_det = action.revise_field(mock_sup_det, data) + + # erroneously add '_dateModified' onto a revision + mock_sup_det['_revisions'][0]['_dateModified'] = mock_sup_det['_revisions'][0]['_dateCreated'] + + with pytest.raises(jsonschema.exceptions.ValidationError): + action.validate_result(mock_sup_det) + def test_transcript_is_stored_in_supplemental_details(): pass @@ -191,8 +226,6 @@ def test_setting_transcript_to_empty_object(): assert 'transcript' not in mock_sup_det assert mock_sup_det['_revisions'][0]['transcript'] == "Pas d'idée" - print(mock_sup_det) - def test_latest_revision_is_first(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] @@ -209,6 +242,3 @@ def test_latest_revision_is_first(): assert mock_sup_det['transcript'] == 'trois' assert mock_sup_det['_revisions'][0]['transcript'] == 'deux' assert mock_sup_det['_revisions'][1]['transcript'] == 'un' - - import json - print(json.dumps(mock_sup_det, indent=2)) From e82b0ec80827b86fae8d0e0e318da4336832f454 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Thu, 21 Aug 2025 17:12:05 -0400 Subject: [PATCH 021/138] even more --- .../actions/manual_transcription.py | 41 ++++++++++++++++++- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index f3b4d948e6..64281c507f 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -32,7 +32,7 @@ {'language': 'bn'}, {'language': 'es'}, ], - 'manual_translation': [{'language': 'fr'}], + 'manual_translation': [{'language': 'fr'}, {'language': 'en'}], }, 'my_video_question': { 'manual_transcription': [{'language': 'en'}], @@ -42,12 +42,49 @@ }, }, } + +idea of example data in SubmissionExtras based on the above +{ + 'version': '20250820', + 'submission': '', + 'my_audio_question': { + 'manual_transcription': { + 'transcript': 'هائج', + 'language': 'ar', + '_dateCreated': '2025-08-21T20:55:42.012053Z', + '_dateModified': '2025-08-21T20:57:28.154567Z', + '_revisions': [ + { + 'transcript': 'فارغ', + 'language': 'ar', + '_dateCreated': '2025-08-21T20:55:42.012053Z', + } + ], + }, + # WIP 'manual_translation': [{'language': 'fr'}, {'language': 'en'}], + }, + 'my_video_question': { + 'manual_transcription': { + 'transcript': 'sea horse sea hell', + 'language': 'en', + '_dateCreated': '2025-08-21T21:06:20.059117Z', + '_dateModified': '2025-08-21T21:06:20.059117Z', + }, + }, + 'my_number_question': { + 'number_multiplier': { + 'numberMultiplied': 99, + '_dateCreated': '2025-08-21T21:09:34.504546Z', + '_dateModified': '2025-08-21T21:09:34.504546Z', + }, + }, +} """ def utc_datetime_to_simplified_iso8601(dt): # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format - if dt.utcoffset(): + if dt.utcoffset() or not dt.tzinfo: raise NotImplementedError('Only UTC datetimes are supported') return dt.isoformat().replace("+00:00", "Z") From 9f51715da2a5a05666b4a00d8b3e197526947cba Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Thu, 21 Aug 2025 17:13:01 -0400 Subject: [PATCH 022/138] Make result schema more dynamic --- .../actions/manual_transcription.py | 165 +++++++++--------- 1 file changed, 79 insertions(+), 86 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 5bd10f7c57..60cb2f424f 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -203,63 +203,64 @@ def result_schema(self): we need to solve the problem of storing multiple results for a single action """ - schema = { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'title': 'Transcript with revisions', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - 'language': {'$ref': '#/$defs/lang'}, - 'transcript': {'$ref': '#/$defs/transcript'}, - 'revisions': { - 'type': 'array', - 'minItems': 1, - 'items': {'$ref': '#/$defs/revision'}, - }, - '_dateCreated': {'$ref': '#/$defs/dateTime'}, - '_dateModified': {'$ref': '#/$defs/dateTime'}, - }, - 'required': ['_dateCreated', '_dateModified'], - 'allOf': [ - { - '$ref': '#/$defs/lang_transcript_dependency' - } - ], - '$defs': { - 'lang': {'type': 'string', 'enum': self.languages}, - 'transcript': {'type': 'string'}, - 'dateTime': {'type': 'string', 'format': 'date-time'}, - 'lang_transcript_dependency': { - 'allOf': [ - { - 'if': {'required': ['language']}, - 'then': {'required': ['transcript']} - }, - { - 'if': {'required': ['transcript']}, - 'then': {'required': ['language']} - } - ] - }, - 'revision': { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - 'language': {'$ref': '#/$defs/lang'}, - 'transcript': {'$ref': '#/$defs/transcript'}, - '_dateCreated': {'$ref': '#/$defs/dateTime'}, - }, - 'required': ['_dateCreated'], - 'allOf': [ - { - "$ref": "#/$defs/lang_transcript_dependency" - } - ], - }, - }, - } + # We want schema to look like this at the end + # schema_orig = { + # '$schema': 'https://json-schema.org/draft/2020-12/schema', + # 'title': 'Transcript with revisions', + # 'type': 'object', + # 'additionalProperties': False, + # 'properties': { + # 'language': {'$ref': '#/$defs/lang'}, + # 'transcript': {'$ref': '#/$defs/transcript'}, + # 'revisions': { + # 'type': 'array', + # 'minItems': 1, + # 'items': {'$ref': '#/$defs/revision'}, + # }, + # '_dateCreated': {'$ref': '#/$defs/dateTime'}, + # '_dateModified': {'$ref': '#/$defs/dateTime'}, + # }, + # 'required': ['_dateCreated', '_dateModified'], + # 'allOf': [ + # { + # '$ref': '#/$defs/lang_transcript_dependency' + # } + # ], + # '$defs': { + # 'lang': {'type': 'string', 'enum': self.languages}, + # 'transcript': {'type': 'string'}, + # 'dateTime': {'type': 'string', 'format': 'date-time'}, + # 'lang_transcript_dependency': { + # 'allOf': [ + # { + # 'if': {'required': ['language']}, + # 'then': {'required': ['transcript']} + # }, + # { + # 'if': {'required': ['transcript']}, + # 'then': {'required': ['language']} + # } + # ] + # }, + # 'revision': { + # 'type': 'object', + # 'additionalProperties': False, + # 'properties': { + # 'language': {'$ref': '#/$defs/lang'}, + # 'transcript': {'$ref': '#/$defs/transcript'}, + # '_dateCreated': {'$ref': '#/$defs/dateTime'}, + # }, + # 'required': ['_dateCreated'], + # 'allOf': [ + # { + # "$ref": "#/$defs/lang_transcript_dependency" + # } + # ], + # }, + # }, + # } - schema_1 = { + result_schema_template = { '$schema': 'https://json-schema.org/draft/2020-12/schema', 'title': 'Transcript with revisions', 'type': 'object', @@ -287,38 +288,30 @@ def result_schema(self): }, } - data_schema = deepcopy(self.data_schema) - skipped_attributes = ['$schema', 'title', 'type'] - for key, value in data_schema.items(): - if key in skipped_attributes: - continue - if key in schema_1: - if isinstance(schema_1[key], dict): - schema_1[key].update(data_schema[key]) - elif isinstance(schema_1[key], list): - schema_1[key].extend(data_schema[key]) - else: - schema_1[key] = data_schema[key] - else: - schema_1[key] = data_schema[key] - - skipped_attributes = ['$schema', 'title', '$defs'] - destination_dict = schema_1['$defs']['revision'] - for key, value in data_schema.items(): - if key in skipped_attributes: - continue - - if key in destination_dict: - if isinstance(destination_dict[key], dict): - destination_dict[key].update(data_schema[key]) - elif isinstance(destination_dict[key], list): - destination_dict[key].extend(data_schema[key]) + def _inject_data_schema(destination_schema: dict, skipped_keys: list) -> dict: + + for key, value in self.data_schema.items(): + if key in skipped_keys: + continue + + if key in destination_schema: + if isinstance(destination_schema[key], dict): + destination_schema[key].update(self.data_schema[key]) + elif isinstance(destination_schema[key], list): + destination_schema[key].extend(self.data_schema[key]) + else: + destination_schema[key] = self.data_schema[key] else: - destination_dict[key] = data_schema[key] - else: - destination_dict[key] = data_schema[key] + destination_schema[key] = self.data_schema[key] + + # Inject data schema in result schema template + schema = deepcopy(result_schema_template) + _inject_data_schema(schema, ['$schema', 'title', 'type']) - assert schema_1 == schema + # Also inject data schema in the revision definition + _inject_data_schema( + schema['$defs']['revision'], ['$schema', 'title', '$defs'] + ) return schema From 5602de64653d71bf210570050a125fdd5c0b4cdd Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Thu, 21 Aug 2025 17:24:27 -0400 Subject: [PATCH 023/138] Comment out timezone detection in "utc_datetime_to_simplified_iso8601" --- kobo/apps/subsequences__new/actions/manual_transcription.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 6d13fae90f..dc23955cb6 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -84,7 +84,7 @@ def utc_datetime_to_simplified_iso8601(dt): # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format - if dt.utcoffset() or not dt.tzinfo: + if dt.utcoffset(): # or not dt.tzinfo: raise NotImplementedError('Only UTC datetimes are supported') return dt.isoformat().replace("+00:00", "Z") From ac0ae7557acaca09ce2f1eb01841b39a716289ce Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Thu, 21 Aug 2025 17:26:09 -0400 Subject: [PATCH 024/138] Move result_schema to base class --- .../actions/manual_transcription.py | 241 +++++++++--------- 1 file changed, 120 insertions(+), 121 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index dc23955cb6..ac768d3c25 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -113,6 +113,126 @@ def validate_result(self, result): def record_repr(self, record: dict) -> dict: raise NotImplementedError() + @property + def result_schema(self): + """ + we also need a schema to define the final result that will be written + into SubmissionExtras + + we need to solve the problem of storing multiple results for a single action + """ + + # We want schema to look like this at the end + # schema_orig = { + # '$schema': 'https://json-schema.org/draft/2020-12/schema', + # 'title': 'Transcript with revisions', + # 'type': 'object', + # 'additionalProperties': False, + # 'properties': { + # 'language': {'$ref': '#/$defs/lang'}, + # 'transcript': {'$ref': '#/$defs/transcript'}, + # 'revisions': { + # 'type': 'array', + # 'minItems': 1, + # 'items': {'$ref': '#/$defs/revision'}, + # }, + # '_dateCreated': {'$ref': '#/$defs/dateTime'}, + # '_dateModified': {'$ref': '#/$defs/dateTime'}, + # }, + # 'required': ['_dateCreated', '_dateModified'], + # 'allOf': [ + # { + # '$ref': '#/$defs/lang_transcript_dependency' + # } + # ], + # '$defs': { + # 'lang': {'type': 'string', 'enum': self.languages}, + # 'transcript': {'type': 'string'}, + # 'dateTime': {'type': 'string', 'format': 'date-time'}, + # 'lang_transcript_dependency': { + # 'allOf': [ + # { + # 'if': {'required': ['language']}, + # 'then': {'required': ['transcript']} + # }, + # { + # 'if': {'required': ['transcript']}, + # 'then': {'required': ['language']} + # } + # ] + # }, + # 'revision': { + # 'type': 'object', + # 'additionalProperties': False, + # 'properties': { + # 'language': {'$ref': '#/$defs/lang'}, + # 'transcript': {'$ref': '#/$defs/transcript'}, + # '_dateCreated': {'$ref': '#/$defs/dateTime'}, + # }, + # 'required': ['_dateCreated'], + # 'allOf': [ + # { + # "$ref": "#/$defs/lang_transcript_dependency" + # } + # ], + # }, + # }, + # } + + result_schema_template = { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.REVISIONS_FIELD: { + 'type': 'array', + 'minItems': 1, + 'items': {'$ref': '#/$defs/revision'}, + }, + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], + '$defs': { + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'revision': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD], + } + }, + } + + def _inject_data_schema(destination_schema: dict, skipped_keys: list) -> dict: + + for key, value in self.data_schema.items(): + if key in skipped_keys: + continue + + if key in destination_schema: + if isinstance(destination_schema[key], dict): + destination_schema[key].update(self.data_schema[key]) + elif isinstance(destination_schema[key], list): + destination_schema[key].extend(self.data_schema[key]) + else: + destination_schema[key] = self.data_schema[key] + else: + destination_schema[key] = self.data_schema[key] + + # Inject data schema in result schema template + schema = deepcopy(result_schema_template) + _inject_data_schema(schema, ['$schema', 'title', 'type']) + + # Also inject data schema in the revision definition + _inject_data_schema( + schema['$defs']['revision'], ['$schema', 'title', '$defs'] + ) + + return schema + def revise_field(self, submission_extra: dict, edit: dict) -> dict: raise NotImplementedError @@ -252,127 +372,6 @@ def languages(self) -> list[str]: def record_repr(self, record: dict) -> dict: return record.get('transcript', '') - @property - def result_schema(self): - """ - we also need a schema to define the final result that will be written - into SubmissionExtras - - we need to solve the problem of storing multiple results for a single action - """ - - # We want schema to look like this at the end - # schema_orig = { - # '$schema': 'https://json-schema.org/draft/2020-12/schema', - # 'title': 'Transcript with revisions', - # 'type': 'object', - # 'additionalProperties': False, - # 'properties': { - # 'language': {'$ref': '#/$defs/lang'}, - # 'transcript': {'$ref': '#/$defs/transcript'}, - # 'revisions': { - # 'type': 'array', - # 'minItems': 1, - # 'items': {'$ref': '#/$defs/revision'}, - # }, - # '_dateCreated': {'$ref': '#/$defs/dateTime'}, - # '_dateModified': {'$ref': '#/$defs/dateTime'}, - # }, - # 'required': ['_dateCreated', '_dateModified'], - # 'allOf': [ - # { - # '$ref': '#/$defs/lang_transcript_dependency' - # } - # ], - # '$defs': { - # 'lang': {'type': 'string', 'enum': self.languages}, - # 'transcript': {'type': 'string'}, - # 'dateTime': {'type': 'string', 'format': 'date-time'}, - # 'lang_transcript_dependency': { - # 'allOf': [ - # { - # 'if': {'required': ['language']}, - # 'then': {'required': ['transcript']} - # }, - # { - # 'if': {'required': ['transcript']}, - # 'then': {'required': ['language']} - # } - # ] - # }, - # 'revision': { - # 'type': 'object', - # 'additionalProperties': False, - # 'properties': { - # 'language': {'$ref': '#/$defs/lang'}, - # 'transcript': {'$ref': '#/$defs/transcript'}, - # '_dateCreated': {'$ref': '#/$defs/dateTime'}, - # }, - # 'required': ['_dateCreated'], - # 'allOf': [ - # { - # "$ref": "#/$defs/lang_transcript_dependency" - # } - # ], - # }, - # }, - # } - - result_schema_template = { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.REVISIONS_FIELD: { - 'type': 'array', - 'minItems': 1, - 'items': {'$ref': '#/$defs/revision'}, - }, - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], - '$defs': { - 'dateTime': {'type': 'string', 'format': 'date-time'}, - 'revision': { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD], - } - }, - } - - def _inject_data_schema(destination_schema: dict, skipped_keys: list) -> dict: - - for key, value in self.data_schema.items(): - if key in skipped_keys: - continue - - if key in destination_schema: - if isinstance(destination_schema[key], dict): - destination_schema[key].update(self.data_schema[key]) - elif isinstance(destination_schema[key], list): - destination_schema[key].extend(self.data_schema[key]) - else: - destination_schema[key] = self.data_schema[key] - else: - destination_schema[key] = self.data_schema[key] - - # Inject data schema in result schema template - schema = deepcopy(result_schema_template) - _inject_data_schema(schema, ['$schema', 'title', 'type']) - - # Also inject data schema in the revision definition - _inject_data_schema( - schema['$defs']['revision'], ['$schema', 'title', '$defs'] - ) - - return schema - - def revise_field(self, submission_extra: dict, edit: dict) -> dict: """ really, we want to generalize this to all actions. From 24cfa0d7659a27a49c4e161481f69a24a53acb54 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Thu, 21 Aug 2025 17:33:23 -0400 Subject: [PATCH 025/138] clean --- .../actions/manual_transcription.py | 24 ++++-- .../tests/test_manual_transcription.py | 84 +------------------ 2 files changed, 20 insertions(+), 88 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index ac768d3c25..941c5c222d 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -1,8 +1,14 @@ +import datetime import jsonschema from copy import deepcopy # from django.utils import timezone -from datetime import datetime as timezone +class FakeDjangoTimezoneUtil: + @staticmethod + def now(): + from zoneinfo import ZoneInfo + return datetime.datetime.now(tz=ZoneInfo('UTC')) +timezone = FakeDjangoTimezoneUtil() # from ..constants import TRANSCRIBABLE_SOURCE_TYPES # from ..actions.base import BaseAction @@ -61,7 +67,7 @@ } ], }, - # WIP 'manual_translation': [{'language': 'fr'}, {'language': 'en'}], + 'manual_translation': [{'language': 'fr'}, {'language': 'en'}], }, 'my_video_question': { 'manual_transcription': { @@ -82,14 +88,22 @@ """ -def utc_datetime_to_simplified_iso8601(dt): +def utc_datetime_to_js_str(dt: datetime.datetime) -> str: + """ + Return a string to represent a `datetime` following the simplification of + the ISO 8601 format used by JavaScript + """ # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format - if dt.utcoffset(): # or not dt.tzinfo: + if dt.utcoffset() or not dt.tzinfo: raise NotImplementedError('Only UTC datetimes are supported') return dt.isoformat().replace("+00:00", "Z") class BaseAction: + def something_to_get_the_data_back_out(self): + # might need to deal with multiple columns for one action + # ^ definitely will + raise NotImplementedError # is a leading underscore a good convention for marking things that must # not be set by the action result? alternatively, we could nest all the @@ -382,7 +396,7 @@ def revise_field(self, submission_extra: dict, edit: dict) -> dict: self.validate_data(edit) self.raise_for_any_leading_underscore_key(edit) - now_str = utc_datetime_to_simplified_iso8601(timezone.now()) + now_str = utc_datetime_to_js_str(timezone.now()) revision = deepcopy(submission_extra) new_record = deepcopy(edit) revisions = revision.pop(self.REVISIONS_FIELD, []) diff --git a/kobo/apps/subsequences__new/tests/test_manual_transcription.py b/kobo/apps/subsequences__new/tests/test_manual_transcription.py index ab2dd65ef2..ba4704765e 100644 --- a/kobo/apps/subsequences__new/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences__new/tests/test_manual_transcription.py @@ -6,17 +6,6 @@ from ..actions.manual_transcription import ManualTranscriptionAction -def cur_time(): - import datetime - from zoneinfo import ZoneInfo - - return ( - datetime.datetime.now(tz=ZoneInfo('UTC')) - .isoformat() - .replace("+00:00", "Z") - ) - - def test_valid_params_pass_validation(): params = [{'language': 'fr'}, {'language': 'es'}] ManualTranscriptionAction.validate_params(params) @@ -84,78 +73,7 @@ def test_transcript_is_stored_in_supplemental_details(): pass -def test_transcript_revisions_are_retained_in_supplemental_details__fake(): - fake_sup_det = {} - - def get_supplemental_details(): - return fake_sup_det - - def revise_supplemental_details(new): - existing = fake_sup_det # modify directly - revisions = existing.pop('_revisions', []) - existing['_dateCreated'] = existing['_dateModified'] - del existing['_dateModified'] - revisions.append(copy.deepcopy(existing)) - - # Ensure special keys starting with underscores cannot be overwritten - for k in list(new.keys()): # unsure if coercion needed - if k.startswith('_'): - del k # log a warning? - - existing.update(new) - existing['_dateModified'] = cur_time() - existing['_revisions'] = revisions - - return fake_sup_det - - first = {'language': 'en', 'transcript': 'No idea'} - second = {'language': 'fr', 'transcript': "Pas d'idée"} - - # now call imaginary method to store first transcript - fake_sup_det.update(first) - # is a leading underscore a good convention for marking things that must not be set by the action result? - # alternatively, we could nest all the action results inside some object - # or, we could nest all the non-action-result metadata-type things inside - # an object, and protect that from being overwritten by the action - fake_sup_det['_dateCreated'] = fake_sup_det['_dateModified'] = cur_time() - fake_sup_det['_revisions'] = [] - - sup_det = get_supplemental_details() - assert sup_det['language'] == 'en' - assert sup_det['transcript'] == 'No idea' - assert sup_det['_dateCreated'] == sup_det['_dateModified'] - assert sup_det['_revisions'] == [] - first_time = sup_det['_dateCreated'] - - # now call imaginary method to store second transcript - sup_det = revise_supplemental_details(second) - - assert len(sup_det['_revisions']) == 1 - - # the revision should encompass the first transcript - assert sup_det['_revisions'][0].items() >= first.items() - - # the revision should have a creation timestamp equal to that of the first - # transcript - assert sup_det['_revisions'][0]['_dateCreated'] == first_time - - # revisions should not list a modification timestamp - assert '_dateModified' not in sup_det['_revisions'] - - # the record itself (not revision) should have an unchanged creation - # timestamp - assert sup_det['_dateCreated'] == first_time - - # the record itself should have an updated modification timestamp - assert dateutil.parser.parse( - sup_det['_dateModified'] - ) > dateutil.parser.parse(sup_det['_dateCreated']) - - # the record itself should encompass the second transcript - assert sup_det.items() >= second.items() - - -def test_transcript_revisions_are_retained_in_supplemental_details__realish(): +def test_transcript_revisions_are_retained_in_supplemental_details(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] action = ManualTranscriptionAction(xpath, params) From 9b012f7da143f1daff142286c2a5f9b767c682bf Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Thu, 21 Aug 2025 18:28:50 -0400 Subject: [PATCH 026/138] add example data for manual translations --- .../actions/manual_transcription.py | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 941c5c222d..7abe7c8443 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -67,7 +67,27 @@ def now(): } ], }, - 'manual_translation': [{'language': 'fr'}, {'language': 'en'}], + 'manual_translation': [ + { + 'language': 'en', + 'translation': 'berserk', + '_dateCreated': '2025-08-21T21:39:42.141306Z', + '_dateModified': '2025-08-21T21:39:42.141306Z', + }, + { + 'language': 'es', + 'translation': 'enloquecido', + '_dateCreated': '2025-08-21T21:40:54.644308Z', + '_dateModified': '2025-08-21T22:00:10.862880Z', + '_revisions': [ + { + 'translation': 'loco', + 'language': 'es', + '_dateCreated': '2025-08-21T21:40:54.644308Z', + } + ], + }, + ], }, 'my_video_question': { 'manual_transcription': { From 97a5b7e04fe22d06e3d9dc8ddad09512ef0cd9ab Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Thu, 21 Aug 2025 20:47:06 -0400 Subject: [PATCH 027/138] Add stripped-down SubmissionExtras model --- kobo/apps/subsequences__new/models.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 kobo/apps/subsequences__new/models.py diff --git a/kobo/apps/subsequences__new/models.py b/kobo/apps/subsequences__new/models.py new file mode 100644 index 0000000000..551284d699 --- /dev/null +++ b/kobo/apps/subsequences__new/models.py @@ -0,0 +1,22 @@ +# coding: utf-8 + +from django.db import models + +from kpi.models import Asset +from kpi.models.abstract_models import AbstractTimeStampedModel + + +class SubmissionExtras(AbstractTimeStampedModel): + + submission_uuid = models.CharField(max_length=249) + content = models.JSONField(default=dict) + asset = models.ForeignKey( + Asset, + related_name='submission_extras', + on_delete=models.CASCADE, + ) + + class Meta: + # ideally `submission_uuid` is universally unique, but its uniqueness + # per-asset is most important + unique_together = (('asset', 'submission_uuid'),) From 2f5c53104372cb5461fb3540eb731b9fdd1384f8 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Thu, 21 Aug 2025 21:43:11 -0400 Subject: [PATCH 028/138] =?UTF-8?q?add=20subsequences=20router=20thing=20t?= =?UTF-8?q?o=20process=20incoming=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit data for actions. doesn't do much yet, though, because you can't save a useful `advanced_schema` into any asset because of the outdated `ADVANCED_FEATURES_PARAMS_SCHEMA` --- .../subsequences__new/actions/__init__.py | 3 + .../actions/manual_transcription.py | 281 +----------------- kobo/apps/subsequences__new/router.py | 66 ++++ 3 files changed, 72 insertions(+), 278 deletions(-) create mode 100644 kobo/apps/subsequences__new/actions/__init__.py create mode 100644 kobo/apps/subsequences__new/router.py diff --git a/kobo/apps/subsequences__new/actions/__init__.py b/kobo/apps/subsequences__new/actions/__init__.py new file mode 100644 index 0000000000..c47a4c2eb0 --- /dev/null +++ b/kobo/apps/subsequences__new/actions/__init__.py @@ -0,0 +1,3 @@ +from manual_transcription import ManualTranscriptionAction + +ACTIONS = (ManualTranscriptionAction,) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 7abe7c8443..41ae6e52ee 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -2,6 +2,9 @@ import jsonschema from copy import deepcopy +from ..actions.base import BaseAction + + # from django.utils import timezone class FakeDjangoTimezoneUtil: @staticmethod @@ -10,284 +13,6 @@ def now(): return datetime.datetime.now(tz=ZoneInfo('UTC')) timezone = FakeDjangoTimezoneUtil() -# from ..constants import TRANSCRIBABLE_SOURCE_TYPES -# from ..actions.base import BaseAction - -""" -### All actions must have the following components - -* (check!) a unique identifier for the action -* three jsonschemas: - 1. (check!) one to validate the parameters used to configure the action - * `ADVANCED_FEATURES_PARAMS_SCHEMA` - 2. (check!) one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) - * the result of `modify_jsonschema()` - 3. one to validate the result of the action - the result of `modify_jsonschema()` - * OH NO, this doesn't happen at all yet -* a handler that receives a submission (and other metadata) and processes it -""" - -""" -idea of example content in asset.advanced_features (what kind of actions are activated per question) -{ - 'version': '20250820', - 'schema': { - 'my_audio_question': { - 'manual_transcription': [ - {'language': 'ar'}, - {'language': 'bn'}, - {'language': 'es'}, - ], - 'manual_translation': [{'language': 'fr'}, {'language': 'en'}], - }, - 'my_video_question': { - 'manual_transcription': [{'language': 'en'}], - }, - 'my_number_question': { - 'number_multiplier': [{'multiplier': 3}], - }, - }, -} - -idea of example data in SubmissionExtras based on the above -{ - 'version': '20250820', - 'submission': '', - 'my_audio_question': { - 'manual_transcription': { - 'transcript': 'هائج', - 'language': 'ar', - '_dateCreated': '2025-08-21T20:55:42.012053Z', - '_dateModified': '2025-08-21T20:57:28.154567Z', - '_revisions': [ - { - 'transcript': 'فارغ', - 'language': 'ar', - '_dateCreated': '2025-08-21T20:55:42.012053Z', - } - ], - }, - 'manual_translation': [ - { - 'language': 'en', - 'translation': 'berserk', - '_dateCreated': '2025-08-21T21:39:42.141306Z', - '_dateModified': '2025-08-21T21:39:42.141306Z', - }, - { - 'language': 'es', - 'translation': 'enloquecido', - '_dateCreated': '2025-08-21T21:40:54.644308Z', - '_dateModified': '2025-08-21T22:00:10.862880Z', - '_revisions': [ - { - 'translation': 'loco', - 'language': 'es', - '_dateCreated': '2025-08-21T21:40:54.644308Z', - } - ], - }, - ], - }, - 'my_video_question': { - 'manual_transcription': { - 'transcript': 'sea horse sea hell', - 'language': 'en', - '_dateCreated': '2025-08-21T21:06:20.059117Z', - '_dateModified': '2025-08-21T21:06:20.059117Z', - }, - }, - 'my_number_question': { - 'number_multiplier': { - 'numberMultiplied': 99, - '_dateCreated': '2025-08-21T21:09:34.504546Z', - '_dateModified': '2025-08-21T21:09:34.504546Z', - }, - }, -} -""" - - -def utc_datetime_to_js_str(dt: datetime.datetime) -> str: - """ - Return a string to represent a `datetime` following the simplification of - the ISO 8601 format used by JavaScript - """ - # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format - if dt.utcoffset() or not dt.tzinfo: - raise NotImplementedError('Only UTC datetimes are supported') - return dt.isoformat().replace("+00:00", "Z") - - -class BaseAction: - def something_to_get_the_data_back_out(self): - # might need to deal with multiple columns for one action - # ^ definitely will - raise NotImplementedError - - # is a leading underscore a good convention for marking things that must - # not be set by the action result? alternatively, we could nest all the - # action results inside some object or, we could nest all the - # non-action-result metadata-type things inside an object, and protect that - # from being overwritten by the action - DATE_CREATED_FIELD = '_dateCreated' - DATE_MODIFIED_FIELD = '_dateModified' - REVISIONS_FIELD = '_revisions' - - @classmethod - def validate_params(cls, params): - jsonschema.validate(params, cls.params_schema) - - def validate_data(self, data): - jsonschema.validate(data, self.data_schema) - - def validate_result(self, result): - jsonschema.validate(result, self.result_schema) - - def record_repr(self, record: dict) -> dict: - raise NotImplementedError() - - @property - def result_schema(self): - """ - we also need a schema to define the final result that will be written - into SubmissionExtras - - we need to solve the problem of storing multiple results for a single action - """ - - # We want schema to look like this at the end - # schema_orig = { - # '$schema': 'https://json-schema.org/draft/2020-12/schema', - # 'title': 'Transcript with revisions', - # 'type': 'object', - # 'additionalProperties': False, - # 'properties': { - # 'language': {'$ref': '#/$defs/lang'}, - # 'transcript': {'$ref': '#/$defs/transcript'}, - # 'revisions': { - # 'type': 'array', - # 'minItems': 1, - # 'items': {'$ref': '#/$defs/revision'}, - # }, - # '_dateCreated': {'$ref': '#/$defs/dateTime'}, - # '_dateModified': {'$ref': '#/$defs/dateTime'}, - # }, - # 'required': ['_dateCreated', '_dateModified'], - # 'allOf': [ - # { - # '$ref': '#/$defs/lang_transcript_dependency' - # } - # ], - # '$defs': { - # 'lang': {'type': 'string', 'enum': self.languages}, - # 'transcript': {'type': 'string'}, - # 'dateTime': {'type': 'string', 'format': 'date-time'}, - # 'lang_transcript_dependency': { - # 'allOf': [ - # { - # 'if': {'required': ['language']}, - # 'then': {'required': ['transcript']} - # }, - # { - # 'if': {'required': ['transcript']}, - # 'then': {'required': ['language']} - # } - # ] - # }, - # 'revision': { - # 'type': 'object', - # 'additionalProperties': False, - # 'properties': { - # 'language': {'$ref': '#/$defs/lang'}, - # 'transcript': {'$ref': '#/$defs/transcript'}, - # '_dateCreated': {'$ref': '#/$defs/dateTime'}, - # }, - # 'required': ['_dateCreated'], - # 'allOf': [ - # { - # "$ref": "#/$defs/lang_transcript_dependency" - # } - # ], - # }, - # }, - # } - - result_schema_template = { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.REVISIONS_FIELD: { - 'type': 'array', - 'minItems': 1, - 'items': {'$ref': '#/$defs/revision'}, - }, - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], - '$defs': { - 'dateTime': {'type': 'string', 'format': 'date-time'}, - 'revision': { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD], - } - }, - } - - def _inject_data_schema(destination_schema: dict, skipped_keys: list) -> dict: - - for key, value in self.data_schema.items(): - if key in skipped_keys: - continue - - if key in destination_schema: - if isinstance(destination_schema[key], dict): - destination_schema[key].update(self.data_schema[key]) - elif isinstance(destination_schema[key], list): - destination_schema[key].extend(self.data_schema[key]) - else: - destination_schema[key] = self.data_schema[key] - else: - destination_schema[key] = self.data_schema[key] - - # Inject data schema in result schema template - schema = deepcopy(result_schema_template) - _inject_data_schema(schema, ['$schema', 'title', 'type']) - - # Also inject data schema in the revision definition - _inject_data_schema( - schema['$defs']['revision'], ['$schema', 'title', '$defs'] - ) - - return schema - - def revise_field(self, submission_extra: dict, edit: dict) -> dict: - raise NotImplementedError - - @staticmethod - def raise_for_any_leading_underscore_key(d: dict): - """ - Keys with leading underscores are reserved for metadata like - `_dateCreated`, `_dateModified`, and `_revisions`. No key with a - leading underscore should be present in data POSTed by a client or - generated by an action. - - Schema validation should block invalid keys, but this method exists as - a redundant check to guard against schema mistakes. - """ - for k in list(d.keys()): - try: - match = k.startswith('_') - except AttributeError: - continue - if match: - raise Exception('An unexpected key with a leading underscore was found') class ManualTranscriptionAction(BaseAction): ID = 'manual_transcription' diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences__new/router.py new file mode 100644 index 0000000000..52ffc0be04 --- /dev/null +++ b/kobo/apps/subsequences__new/router.py @@ -0,0 +1,66 @@ +from kobo.apps.subsequences.models import ( + SubmissionExtras, +) # just bullshit for now + +from .actions import ACTIONS + +ids_to_actions = {a.ID: a for a in ACTIONS} + + +class InvalidAction(Exception): + """ + The referenced action does not exist or was not configured for the given + question XPath at the asset level + """ + + pass + + +class InvalidXPath(Exception): + """ + The referenced question XPath was not configured for supplemental data at + the asset level + """ + + pass + + +def handle_incoming_data(asset, data): + schema_version = data.pop('_version') + if schema_version != '20250820': + # TODO: migrate from old per-submission schema + raise NotImplementedError + + submission_uuid = data.pop('_submission') + supplemental_data = SubmissionExtras.objects.get_or_create( + asset=asset, submission_uuid=submission_uuid + ).content # lock it? + + for question_xpath, data_for_this_question in data.items(): + if asset.advanced_features['_version'] != '20250820': + # TODO: migrate from old per-asset schema + raise NotImplementedError + try: + action_configs_for_this_question = asset.advanced_features[ + '_schema' + ][question_xpath] + except KeyError as e: + raise InvalidXPath from e + + for action_id, action_data in data_for_this_question.items(): + try: + action_class = ids_to_actions[action_id] + except KeyError as e: + raise InvalidAction from e + try: + action_params = action_configs_for_this_question[action_id] + except KeyError as e: + raise InvalidAction from e + + action = action_class(question_xpath, action_params) + # action.validate_data(action_data) # called by revise_field + action.revise_field(supplemental_data, action_data) + + SubmissionExtras.objects.filter( + asset=asset, submission_uuid=submission_uuid + ).update(content=supplemental_data) From c2ed3280e0af8bd6143b971c803f2a916844aec8 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Thu, 21 Aug 2025 21:56:20 -0400 Subject: [PATCH 029/138] note that submission uuid will be removed from POST data --- kobo/apps/subsequences__new/router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences__new/router.py index 52ffc0be04..488e4311c3 100644 --- a/kobo/apps/subsequences__new/router.py +++ b/kobo/apps/subsequences__new/router.py @@ -31,7 +31,7 @@ def handle_incoming_data(asset, data): # TODO: migrate from old per-submission schema raise NotImplementedError - submission_uuid = data.pop('_submission') + submission_uuid = data.pop('_submission') # not needed in POST data bc of nested endpoint supplemental_data = SubmissionExtras.objects.get_or_create( asset=asset, submission_uuid=submission_uuid ).content # lock it? From 17ea9e2c4eb0c1219d53d1e6bea5d37202c658b1 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Thu, 21 Aug 2025 22:25:40 -0400 Subject: [PATCH 030/138] WIP new viewset --- kobo/apps/subsequences/api_view.py | 2 ++ .../actions/manual_transcription.py | 3 +++ kobo/apps/subsequences__new/type_aliases.py | 8 ++++++++ kobo/apps/subsequences__new/utils/action_loader.py | 12 ++++++++++++ kpi/views/v2/data.py | 8 ++++++++ 5 files changed, 33 insertions(+) create mode 100644 kobo/apps/subsequences__new/type_aliases.py create mode 100644 kobo/apps/subsequences__new/utils/action_loader.py diff --git a/kobo/apps/subsequences/api_view.py b/kobo/apps/subsequences/api_view.py index afbfc421c1..8411936978 100644 --- a/kobo/apps/subsequences/api_view.py +++ b/kobo/apps/subsequences/api_view.py @@ -113,6 +113,8 @@ def get(self, request, asset_uid, format=None): def post(self, request, asset_uid, format=None): posted_data = request.data + + print('POSTED', posted_data, flush=True) schema = self.asset.get_advanced_submission_schema() try: validate(posted_data, schema) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 7abe7c8443..af59baedd5 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -134,6 +134,9 @@ def something_to_get_the_data_back_out(self): DATE_MODIFIED_FIELD = '_dateModified' REVISIONS_FIELD = '_revisions' + + + @classmethod def validate_params(cls, params): jsonschema.validate(params, cls.params_schema) diff --git a/kobo/apps/subsequences__new/type_aliases.py b/kobo/apps/subsequences__new/type_aliases.py new file mode 100644 index 0000000000..2cd7e34d9c --- /dev/null +++ b/kobo/apps/subsequences__new/type_aliases.py @@ -0,0 +1,8 @@ +from typing import Type, TypeAlias, Union + +from .actions.manual_transcription import ManualTranscriptionAction + +# A list of possible action classes +ActionClassType: TypeAlias = Union[ + Type[ManualTranscriptionAction], +] diff --git a/kobo/apps/subsequences__new/utils/action_loader.py b/kobo/apps/subsequences__new/utils/action_loader.py new file mode 100644 index 0000000000..c51105d4b9 --- /dev/null +++ b/kobo/apps/subsequences__new/utils/action_loader.py @@ -0,0 +1,12 @@ +from ..actions.manual_transcription import ManualTranscriptionAction +from ..type_aliases import ActionClassType + +ACTION_CLASS_ID_MAPPING = { + ManualTranscriptionAction.ID: ManualTranscriptionAction, +} + +def get_action_class(post_data: dict) -> ActionClassType: + question_xpath = next(iter(post_data)) + action_id = next(iter(post_data[question_xpath])) + action_cls = ACTION_CLASS_ID_MAPPING[action_id] + return question_xpath, action_cls, post_data[question_xpath][action_id] diff --git a/kpi/views/v2/data.py b/kpi/views/v2/data.py index 1d455788c4..43a9a974c0 100644 --- a/kpi/views/v2/data.py +++ b/kpi/views/v2/data.py @@ -20,6 +20,7 @@ from kobo.apps.audit_log.utils import SubmissionUpdate from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix from kobo.apps.openrosa.libs.utils.logger_tools import http_open_rosa_error_handler +from kobo.apps.subsequences__new.utils.action_loader import get_action_class from kpi.authentication import EnketoSessionAuthentication from kpi.constants import ( PERM_CHANGE_SUBMISSIONS, @@ -527,6 +528,13 @@ def retrieve(self, request, pk, *args, **kwargs): submission = list(submissions)[0] return Response(submission) + @action(detail=True, methods=['PATCH']) + def supplemental(self, request, submission_uuid, *args, **kwargs): + + # Do something with John's work + return Response({'detail': 'Not implemented'}) + + @action(detail=True, methods=['GET', 'PATCH', 'DELETE'], renderer_classes=[renderers.JSONRenderer], permission_classes=[SubmissionValidationStatusPermission]) From 371801add6128b30e6cdc557583b868f818a83b7 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Thu, 21 Aug 2025 23:21:27 -0400 Subject: [PATCH 031/138] =?UTF-8?q?PoC=20action-generated=20asset-level=20?= =?UTF-8?q?params=20schema=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit and saving action data into (for now) the old SubmissionExtras model --- .../subsequences__new/actions/__init__.py | 22 ++++++++++++++++++- .../actions/manual_transcription.py | 2 +- kobo/apps/subsequences__new/router.py | 20 ++++++++--------- kpi/models/asset.py | 4 +--- 4 files changed, 32 insertions(+), 16 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/__init__.py b/kobo/apps/subsequences__new/actions/__init__.py index c47a4c2eb0..d74ef53452 100644 --- a/kobo/apps/subsequences__new/actions/__init__.py +++ b/kobo/apps/subsequences__new/actions/__init__.py @@ -1,3 +1,23 @@ -from manual_transcription import ManualTranscriptionAction +from .manual_transcription import ManualTranscriptionAction ACTIONS = (ManualTranscriptionAction,) +ACTION_IDS_TO_CLASSES = {a.ID: a for a in ACTIONS} + +ADVANCED_FEATURES_PARAMS_SCHEMA = { # rename? + 'properties': { + '_schema': { + 'additionalProperties': False, + 'patternProperties': { + # not the full complexity of XPath, but a slash-delimited path + # of valid XML tag names to convey group hierarchy + '^([A-Za-z_][A-Za-z0-9_-]*)(/[A-Za-z_][A-Za-z0-9_-]*)*$': { + 'additionalProperties': False, + 'properties': {a.ID: a.params_schema for a in ACTIONS}, + 'type': 'object', + } + }, + 'type': 'object', + }, + '_version': {'const': '20250820'}, + } +} diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 41ae6e52ee..51f138576a 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -2,7 +2,7 @@ import jsonschema from copy import deepcopy -from ..actions.base import BaseAction +from ..actions.base import BaseAction, utc_datetime_to_js_str # from django.utils import timezone diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences__new/router.py index 488e4311c3..1a4babaea7 100644 --- a/kobo/apps/subsequences__new/router.py +++ b/kobo/apps/subsequences__new/router.py @@ -1,10 +1,8 @@ from kobo.apps.subsequences.models import ( - SubmissionExtras, -) # just bullshit for now + SubmissionExtras, # just bullshit for now +) -from .actions import ACTIONS - -ids_to_actions = {a.ID: a for a in ACTIONS} +from .actions import ACTION_IDS_TO_CLASSES class InvalidAction(Exception): @@ -34,7 +32,7 @@ def handle_incoming_data(asset, data): submission_uuid = data.pop('_submission') # not needed in POST data bc of nested endpoint supplemental_data = SubmissionExtras.objects.get_or_create( asset=asset, submission_uuid=submission_uuid - ).content # lock it? + )[0].content # lock it? for question_xpath, data_for_this_question in data.items(): if asset.advanced_features['_version'] != '20250820': @@ -49,7 +47,7 @@ def handle_incoming_data(asset, data): for action_id, action_data in data_for_this_question.items(): try: - action_class = ids_to_actions[action_id] + action_class = ACTION_IDS_TO_CLASSES[action_id] except KeyError as e: raise InvalidAction from e try: @@ -59,8 +57,8 @@ def handle_incoming_data(asset, data): action = action_class(question_xpath, action_params) # action.validate_data(action_data) # called by revise_field - action.revise_field(supplemental_data, action_data) + supplemental_data = action.revise_field(supplemental_data, action_data) - SubmissionExtras.objects.filter( - asset=asset, submission_uuid=submission_uuid - ).update(content=supplemental_data) + SubmissionExtras.objects.filter( + asset=asset, submission_uuid=submission_uuid + ).update(content=supplemental_data) diff --git a/kpi/models/asset.py b/kpi/models/asset.py index ea5f2d2190..163997e227 100644 --- a/kpi/models/asset.py +++ b/kpi/models/asset.py @@ -18,9 +18,7 @@ from formpack.utils.json_hash import json_hash from formpack.utils.kobo_locking import strip_kobo_locking_profile from kobo.apps.reports.constants import DEFAULT_REPORTS_KEY, SPECIFIC_REPORTS_KEY -from kobo.apps.subsequences.advanced_features_params_schema import ( - ADVANCED_FEATURES_PARAMS_SCHEMA, -) +from kobo.apps.subsequences__new.actions import ADVANCED_FEATURES_PARAMS_SCHEMA from kobo.apps.subsequences.utils import ( advanced_feature_instances, advanced_submission_jsonschema, From 91628ca9ed5d7ddd9eb90b97aa95cf1921c0ddec Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 22 Aug 2025 09:31:27 -0400 Subject: [PATCH 032/138] Add usage limit check to action class --- kobo/apps/subsequences__new/actions/base.py | 0 .../actions/manual_transcription.py | 44 ++++++++++++++++--- kobo/apps/subsequences__new/router.py | 1 + 3 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 kobo/apps/subsequences__new/actions/base.py diff --git a/kobo/apps/subsequences__new/actions/base.py b/kobo/apps/subsequences__new/actions/base.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index e50314ce74..b1e6c457fe 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -2,7 +2,12 @@ import jsonschema from copy import deepcopy -from ..actions.base import BaseAction, utc_datetime_to_js_str +from django.conf import settings + +from kobo.apps.kobo_auth.shortcuts import User +from kpi.exceptions import UsageLimitExceededException +from kpi.utils.usage_calculator import ServiceUsageCalculator +# from ..actions.base import BaseAction, utc_datetime_to_js_str # from django.utils import timezone @@ -121,7 +126,7 @@ def utc_datetime_to_js_str(dt: datetime.datetime) -> str: raise NotImplementedError('Only UTC datetimes are supported') return dt.isoformat().replace("+00:00", "Z") - +# TODO Move it to its own file "base.py" class BaseAction: def something_to_get_the_data_back_out(self): # might need to deal with multiple columns for one action @@ -138,8 +143,6 @@ def something_to_get_the_data_back_out(self): REVISIONS_FIELD = '_revisions' - - @classmethod def validate_params(cls, params): jsonschema.validate(params, cls.params_schema) @@ -161,7 +164,19 @@ def result_schema(self): we need to solve the problem of storing multiple results for a single action """ - raise NotImplementedError() + raise NotImplementedError() + + def check_limits(self, user: User): + + if not settings.STRIPE_ENABLED or not self._is_usage_limited: + return + + calculator = ServiceUsageCalculator(user) + balances = calculator.get_usage_balances() + + balance = balances[self._limit_identifier] + if balance and balance['exceeded']: + raise UsageLimitExceededException() def revise_field(self, submission_extra: dict, edit: dict) -> dict: raise NotImplementedError @@ -185,6 +200,21 @@ def raise_for_any_leading_underscore_key(d: dict): if match: raise Exception('An unexpected key with a leading underscore was found') + @property + def _is_usage_limited(self): + """ + Returns whether an action should check for usage limits. + """ + raise NotImplementedError() + + @property + def _limit_identifier(self): + # Example for automatic transcription + # + # from kobo.apps.organizations.constants import UsageType + # return UsageType.ASR_SECONDS + raise NotImplementedError() + class ManualTranscriptionAction(BaseAction): ID = 'manual_transcription' @@ -361,3 +391,7 @@ def revise_field(self, submission_extra: dict, edit: dict) -> dict: new_record[self.DATE_CREATED_FIELD] = record_creation_date return new_record + + @property + def _is_usage_limited(self): + return False diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences__new/router.py index 1a4babaea7..9a8ff51f61 100644 --- a/kobo/apps/subsequences__new/router.py +++ b/kobo/apps/subsequences__new/router.py @@ -56,6 +56,7 @@ def handle_incoming_data(asset, data): raise InvalidAction from e action = action_class(question_xpath, action_params) + action.check_limits(asset.owner) # action.validate_data(action_data) # called by revise_field supplemental_data = action.revise_field(supplemental_data, action_data) From 36f1449c9744de36aced9aea91e83a98df3a59d5 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 22 Aug 2025 09:34:24 -0400 Subject: [PATCH 033/138] Add comments --- kobo/apps/subsequences__new/actions/__init__.py | 1 + kobo/apps/subsequences__new/type_aliases.py | 2 ++ kobo/apps/subsequences__new/utils/action_loader.py | 1 + 3 files changed, 4 insertions(+) diff --git a/kobo/apps/subsequences__new/actions/__init__.py b/kobo/apps/subsequences__new/actions/__init__.py index d74ef53452..b09722df29 100644 --- a/kobo/apps/subsequences__new/actions/__init__.py +++ b/kobo/apps/subsequences__new/actions/__init__.py @@ -1,5 +1,6 @@ from .manual_transcription import ManualTranscriptionAction +# TODO, what about using a loader for every class in "actions" folder (except base.py)? ACTIONS = (ManualTranscriptionAction,) ACTION_IDS_TO_CLASSES = {a.ID: a for a in ACTIONS} diff --git a/kobo/apps/subsequences__new/type_aliases.py b/kobo/apps/subsequences__new/type_aliases.py index 2cd7e34d9c..1cb1166985 100644 --- a/kobo/apps/subsequences__new/type_aliases.py +++ b/kobo/apps/subsequences__new/type_aliases.py @@ -1,3 +1,5 @@ +# NOT USED anymore, to be removed + from typing import Type, TypeAlias, Union from .actions.manual_transcription import ManualTranscriptionAction diff --git a/kobo/apps/subsequences__new/utils/action_loader.py b/kobo/apps/subsequences__new/utils/action_loader.py index c51105d4b9..83e1a543b8 100644 --- a/kobo/apps/subsequences__new/utils/action_loader.py +++ b/kobo/apps/subsequences__new/utils/action_loader.py @@ -1,3 +1,4 @@ +# NOT USED anymore, to be removed from ..actions.manual_transcription import ManualTranscriptionAction from ..type_aliases import ActionClassType From 7fe16c5510fb534e99304672e412afb34bde4cad Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 22 Aug 2025 09:43:18 -0400 Subject: [PATCH 034/138] More comments --- kobo/apps/subsequences__new/router.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences__new/router.py index 9a8ff51f61..ef40fb8fc4 100644 --- a/kobo/apps/subsequences__new/router.py +++ b/kobo/apps/subsequences__new/router.py @@ -1,7 +1,7 @@ from kobo.apps.subsequences.models import ( SubmissionExtras, # just bullshit for now ) - +from kpi.models import Asset from .actions import ACTION_IDS_TO_CLASSES @@ -22,8 +22,12 @@ class InvalidXPath(Exception): pass - -def handle_incoming_data(asset, data): +# ChatGPT suggestions: +# - dispatch_action_payload +# - dispatch_incoming_data +# - process_action_request +# - run_action +def handle_incoming_data(asset: Asset, data: dict): schema_version = data.pop('_version') if schema_version != '20250820': # TODO: migrate from old per-submission schema From 7529adaf5a2c398fcc7045f8b276e024e72ed8f4 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 10:24:38 -0400 Subject: [PATCH 035/138] add forgotten base.py --- kobo/apps/subsequences__new/actions/base.py | 282 ++++++++++++++++++++ 1 file changed, 282 insertions(+) diff --git a/kobo/apps/subsequences__new/actions/base.py b/kobo/apps/subsequences__new/actions/base.py index e69de29bb2..dced06e7ae 100644 --- a/kobo/apps/subsequences__new/actions/base.py +++ b/kobo/apps/subsequences__new/actions/base.py @@ -0,0 +1,282 @@ +import datetime +import jsonschema +from copy import deepcopy + +# from django.utils import timezone +class FakeDjangoTimezoneUtil: + @staticmethod + def now(): + from zoneinfo import ZoneInfo + return datetime.datetime.now(tz=ZoneInfo('UTC')) +timezone = FakeDjangoTimezoneUtil() + +""" +### All actions must have the following components + +* (check!) a unique identifier for the action +* three jsonschemas: + 1. (check!) one to validate the parameters used to configure the action + * `ADVANCED_FEATURES_PARAMS_SCHEMA` + 2. (check!) one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) + * the result of `modify_jsonschema()` + 3. one to validate the result of the action - the result of `modify_jsonschema()` + * OH NO, this doesn't happen at all yet +* a handler that receives a submission (and other metadata) and processes it +""" + +""" +idea of example content in asset.advanced_features (what kind of actions are activated per question) +{ + '_version': '20250820', + '_schema': { + 'my_audio_question': { + 'manual_transcription': [ + {'language': 'ar'}, + {'language': 'bn'}, + {'language': 'es'}, + ], + 'manual_translation': [{'language': 'fr'}, {'language': 'en'}], + }, + 'my_video_question': { + 'manual_transcription': [{'language': 'en'}], + }, + 'my_number_question': { + 'number_multiplier': [{'multiplier': 3}], + }, + }, +} + +idea of example data in SubmissionExtras based on the above +{ + '_version': '20250820', + '_submission': '', + 'my_audio_question': { + 'manual_transcription': { + 'transcript': 'هائج', + 'language': 'ar', + '_dateCreated': '2025-08-21T20:55:42.012053Z', + '_dateModified': '2025-08-21T20:57:28.154567Z', + '_revisions': [ + { + 'transcript': 'فارغ', + 'language': 'ar', + '_dateCreated': '2025-08-21T20:55:42.012053Z', + } + ], + }, + 'manual_translation': [ + { + 'language': 'en', + 'translation': 'berserk', + '_dateCreated': '2025-08-21T21:39:42.141306Z', + '_dateModified': '2025-08-21T21:39:42.141306Z', + }, + { + 'language': 'es', + 'translation': 'enloquecido', + '_dateCreated': '2025-08-21T21:40:54.644308Z', + '_dateModified': '2025-08-21T22:00:10.862880Z', + '_revisions': [ + { + 'translation': 'loco', + 'language': 'es', + '_dateCreated': '2025-08-21T21:40:54.644308Z', + } + ], + }, + ], + }, + 'my_video_question': { + 'manual_transcription': { + 'transcript': 'sea horse sea hell', + 'language': 'en', + '_dateCreated': '2025-08-21T21:06:20.059117Z', + '_dateModified': '2025-08-21T21:06:20.059117Z', + }, + }, + 'my_number_question': { + 'number_multiplier': { + 'numberMultiplied': 99, + '_dateCreated': '2025-08-21T21:09:34.504546Z', + '_dateModified': '2025-08-21T21:09:34.504546Z', + }, + }, +} +""" + + +def utc_datetime_to_js_str(dt: datetime.datetime) -> str: + """ + Return a string to represent a `datetime` following the simplification of + the ISO 8601 format used by JavaScript + """ + # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format + if dt.utcoffset() or not dt.tzinfo: + raise NotImplementedError('Only UTC datetimes are supported') + return dt.isoformat().replace("+00:00", "Z") + + +class BaseAction: + def something_to_get_the_data_back_out(self): + # might need to deal with multiple columns for one action + # ^ definitely will + raise NotImplementedError + + DATE_CREATED_FIELD = '_dateCreated' + DATE_MODIFIED_FIELD = '_dateModified' + REVISIONS_FIELD = '_revisions' + + @classmethod + def validate_params(cls, params): + jsonschema.validate(params, cls.params_schema) + + def validate_data(self, data): + jsonschema.validate(data, self.data_schema) + + def validate_result(self, result): + jsonschema.validate(result, self.result_schema) + + def record_repr(self, record: dict) -> dict: + raise NotImplementedError() + + @property + def result_schema(self): + """ + we also need a schema to define the final result that will be written + into SubmissionExtras + + we need to solve the problem of storing multiple results for a single action + """ + + # We want schema to look like this at the end + # schema_orig = { + # '$schema': 'https://json-schema.org/draft/2020-12/schema', + # 'title': 'Transcript with revisions', + # 'type': 'object', + # 'additionalProperties': False, + # 'properties': { + # 'language': {'$ref': '#/$defs/lang'}, + # 'transcript': {'$ref': '#/$defs/transcript'}, + # 'revisions': { + # 'type': 'array', + # 'minItems': 1, + # 'items': {'$ref': '#/$defs/revision'}, + # }, + # '_dateCreated': {'$ref': '#/$defs/dateTime'}, + # '_dateModified': {'$ref': '#/$defs/dateTime'}, + # }, + # 'required': ['_dateCreated', '_dateModified'], + # 'allOf': [ + # { + # '$ref': '#/$defs/lang_transcript_dependency' + # } + # ], + # '$defs': { + # 'lang': {'type': 'string', 'enum': self.languages}, + # 'transcript': {'type': 'string'}, + # 'dateTime': {'type': 'string', 'format': 'date-time'}, + # 'lang_transcript_dependency': { + # 'allOf': [ + # { + # 'if': {'required': ['language']}, + # 'then': {'required': ['transcript']} + # }, + # { + # 'if': {'required': ['transcript']}, + # 'then': {'required': ['language']} + # } + # ] + # }, + # 'revision': { + # 'type': 'object', + # 'additionalProperties': False, + # 'properties': { + # 'language': {'$ref': '#/$defs/lang'}, + # 'transcript': {'$ref': '#/$defs/transcript'}, + # '_dateCreated': {'$ref': '#/$defs/dateTime'}, + # }, + # 'required': ['_dateCreated'], + # 'allOf': [ + # { + # "$ref": "#/$defs/lang_transcript_dependency" + # } + # ], + # }, + # }, + # } + + result_schema_template = { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.REVISIONS_FIELD: { + 'type': 'array', + 'minItems': 1, + 'items': {'$ref': '#/$defs/revision'}, + }, + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], + '$defs': { + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'revision': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD], + } + }, + } + + def _inject_data_schema(destination_schema: dict, skipped_keys: list) -> dict: + + for key, value in self.data_schema.items(): + if key in skipped_keys: + continue + + if key in destination_schema: + if isinstance(destination_schema[key], dict): + destination_schema[key].update(self.data_schema[key]) + elif isinstance(destination_schema[key], list): + destination_schema[key].extend(self.data_schema[key]) + else: + destination_schema[key] = self.data_schema[key] + else: + destination_schema[key] = self.data_schema[key] + + # Inject data schema in result schema template + schema = deepcopy(result_schema_template) + _inject_data_schema(schema, ['$schema', 'title', 'type']) + + # Also inject data schema in the revision definition + _inject_data_schema( + schema['$defs']['revision'], ['$schema', 'title', '$defs'] + ) + + return schema + + def revise_field(self, submission_extra: dict, edit: dict) -> dict: + raise NotImplementedError + + @staticmethod + def raise_for_any_leading_underscore_key(d: dict): + """ + Keys with leading underscores are reserved for metadata like + `_dateCreated`, `_dateModified`, and `_revisions`. No key with a + leading underscore should be present in data POSTed by a client or + generated by an action. + + Schema validation should block invalid keys, but this method exists as + a redundant check to guard against schema mistakes. + """ + for k in list(d.keys()): + try: + match = k.startswith('_') + except AttributeError: + continue + if match: + raise Exception('An unexpected key with a leading underscore was found') From e28efb2342ec55b18c6cae9b8dc2a0f7251045cf Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 10:41:30 -0400 Subject: [PATCH 036/138] continue cleanup from forgetting to add base.py --- kobo/apps/subsequences__new/actions/base.py | 55 +++- .../actions/manual_transcription.py | 239 +----------------- 2 files changed, 56 insertions(+), 238 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/base.py b/kobo/apps/subsequences__new/actions/base.py index dced06e7ae..8e6f48335b 100644 --- a/kobo/apps/subsequences__new/actions/base.py +++ b/kobo/apps/subsequences__new/actions/base.py @@ -1,14 +1,12 @@ import datetime -import jsonschema from copy import deepcopy -# from django.utils import timezone -class FakeDjangoTimezoneUtil: - @staticmethod - def now(): - from zoneinfo import ZoneInfo - return datetime.datetime.now(tz=ZoneInfo('UTC')) -timezone = FakeDjangoTimezoneUtil() +import jsonschema +from django.conf import settings + +from kobo.apps.kobo_auth.shortcuts import User +from kpi.exceptions import UsageLimitExceededException +from kpi.utils.usage_calculator import ServiceUsageCalculator """ ### All actions must have the following components @@ -113,7 +111,7 @@ def utc_datetime_to_js_str(dt: datetime.datetime) -> str: # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format if dt.utcoffset() or not dt.tzinfo: raise NotImplementedError('Only UTC datetimes are supported') - return dt.isoformat().replace("+00:00", "Z") + return dt.isoformat().replace('+00:00', 'Z') class BaseAction: @@ -228,11 +226,13 @@ def result_schema(self): self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, }, 'required': [self.DATE_CREATED_FIELD], - } + }, }, } - def _inject_data_schema(destination_schema: dict, skipped_keys: list) -> dict: + def _inject_data_schema( + destination_schema: dict, skipped_keys: list + ) -> dict: for key, value in self.data_schema.items(): if key in skipped_keys: @@ -259,6 +259,18 @@ def _inject_data_schema(destination_schema: dict, skipped_keys: list) -> dict: return schema + def check_limits(self, user: User): + + if not settings.STRIPE_ENABLED or not self._is_usage_limited: + return + + calculator = ServiceUsageCalculator(user) + balances = calculator.get_usage_balances() + + balance = balances[self._limit_identifier] + if balance and balance['exceeded']: + raise UsageLimitExceededException() + def revise_field(self, submission_extra: dict, edit: dict) -> dict: raise NotImplementedError @@ -279,4 +291,23 @@ def raise_for_any_leading_underscore_key(d: dict): except AttributeError: continue if match: - raise Exception('An unexpected key with a leading underscore was found') + raise Exception( + 'An unexpected key with a leading underscore was found' + ) + + @property + def _is_usage_limited(self): + """ + Returns whether an action should check for usage limits. + """ + raise NotImplementedError() + + @property + def _limit_identifier(self): + # Example for automatic transcription + # + # from kobo.apps.organizations.constants import UsageType + # return UsageType.ASR_SECONDS + raise NotImplementedError() + + diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index b1e6c457fe..4f780cd5f6 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -1,219 +1,8 @@ -import datetime -import jsonschema from copy import deepcopy -from django.conf import settings +from django.utils import timezone -from kobo.apps.kobo_auth.shortcuts import User -from kpi.exceptions import UsageLimitExceededException -from kpi.utils.usage_calculator import ServiceUsageCalculator -# from ..actions.base import BaseAction, utc_datetime_to_js_str - - -# from django.utils import timezone -class FakeDjangoTimezoneUtil: - @staticmethod - def now(): - from zoneinfo import ZoneInfo - return datetime.datetime.now(tz=ZoneInfo('UTC')) -timezone = FakeDjangoTimezoneUtil() - -# from ..constants import TRANSCRIBABLE_SOURCE_TYPES -# from ..actions.base import BaseAction - -""" -### All actions must have the following components - -* (check!) a unique identifier for the action -* three jsonschemas: - 1. (check!) one to validate the parameters used to configure the action - * `ADVANCED_FEATURES_PARAMS_SCHEMA` - 2. (check!) one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) - * the result of `modify_jsonschema()` - 3. one to validate the result of the action - the result of `modify_jsonschema()` - * OH NO, this doesn't happen at all yet -* a handler that receives a submission (and other metadata) and processes it -""" - -""" -idea of example content in asset.advanced_features (what kind of actions are activated per question) -{ - 'version': '20250820', - 'schema': { - 'my_audio_question': { - 'manual_transcription': [ - {'language': 'ar'}, - {'language': 'bn'}, - {'language': 'es'}, - ], - 'manual_translation': [{'language': 'fr'}, {'language': 'en'}], - }, - 'my_video_question': { - 'manual_transcription': [{'language': 'en'}], - }, - 'my_number_question': { - 'number_multiplier': [{'multiplier': 3}], - }, - }, -} - -idea of example data in SubmissionExtras based on the above -{ - 'version': '20250820', - 'submission': '', - 'my_audio_question': { - 'manual_transcription': { - 'transcript': 'هائج', - 'language': 'ar', - '_dateCreated': '2025-08-21T20:55:42.012053Z', - '_dateModified': '2025-08-21T20:57:28.154567Z', - '_revisions': [ - { - 'transcript': 'فارغ', - 'language': 'ar', - '_dateCreated': '2025-08-21T20:55:42.012053Z', - } - ], - }, - 'manual_translation': [ - { - 'language': 'en', - 'translation': 'berserk', - '_dateCreated': '2025-08-21T21:39:42.141306Z', - '_dateModified': '2025-08-21T21:39:42.141306Z', - }, - { - 'language': 'es', - 'translation': 'enloquecido', - '_dateCreated': '2025-08-21T21:40:54.644308Z', - '_dateModified': '2025-08-21T22:00:10.862880Z', - '_revisions': [ - { - 'translation': 'loco', - 'language': 'es', - '_dateCreated': '2025-08-21T21:40:54.644308Z', - } - ], - }, - ], - }, - 'my_video_question': { - 'manual_transcription': { - 'transcript': 'sea horse sea hell', - 'language': 'en', - '_dateCreated': '2025-08-21T21:06:20.059117Z', - '_dateModified': '2025-08-21T21:06:20.059117Z', - }, - }, - 'my_number_question': { - 'number_multiplier': { - 'numberMultiplied': 99, - '_dateCreated': '2025-08-21T21:09:34.504546Z', - '_dateModified': '2025-08-21T21:09:34.504546Z', - }, - }, -} -""" - - -def utc_datetime_to_js_str(dt: datetime.datetime) -> str: - """ - Return a string to represent a `datetime` following the simplification of - the ISO 8601 format used by JavaScript - """ - # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format - if dt.utcoffset() or not dt.tzinfo: - raise NotImplementedError('Only UTC datetimes are supported') - return dt.isoformat().replace("+00:00", "Z") - -# TODO Move it to its own file "base.py" -class BaseAction: - def something_to_get_the_data_back_out(self): - # might need to deal with multiple columns for one action - # ^ definitely will - raise NotImplementedError - - # is a leading underscore a good convention for marking things that must - # not be set by the action result? alternatively, we could nest all the - # action results inside some object or, we could nest all the - # non-action-result metadata-type things inside an object, and protect that - # from being overwritten by the action - DATE_CREATED_FIELD = '_dateCreated' - DATE_MODIFIED_FIELD = '_dateModified' - REVISIONS_FIELD = '_revisions' - - - @classmethod - def validate_params(cls, params): - jsonschema.validate(params, cls.params_schema) - - def validate_data(self, data): - jsonschema.validate(data, self.data_schema) - - def validate_result(self, result): - jsonschema.validate(result, self.result_schema) - - def record_repr(self, record: dict) -> dict: - raise NotImplementedError() - - @property - def result_schema(self): - """ - we also need a schema to define the final result that will be written - into SubmissionExtras - - we need to solve the problem of storing multiple results for a single action - """ - raise NotImplementedError() - - def check_limits(self, user: User): - - if not settings.STRIPE_ENABLED or not self._is_usage_limited: - return - - calculator = ServiceUsageCalculator(user) - balances = calculator.get_usage_balances() - - balance = balances[self._limit_identifier] - if balance and balance['exceeded']: - raise UsageLimitExceededException() - - def revise_field(self, submission_extra: dict, edit: dict) -> dict: - raise NotImplementedError - - @staticmethod - def raise_for_any_leading_underscore_key(d: dict): - """ - Keys with leading underscores are reserved for metadata like - `_dateCreated`, `_dateModified`, and `_revisions`. No key with a - leading underscore should be present in data POSTed by a client or - generated by an action. - - Schema validation should block invalid keys, but this method exists as - a redundant check to guard against schema mistakes. - """ - for k in list(d.keys()): - try: - match = k.startswith('_') - except AttributeError: - continue - if match: - raise Exception('An unexpected key with a leading underscore was found') - - @property - def _is_usage_limited(self): - """ - Returns whether an action should check for usage limits. - """ - raise NotImplementedError() - - @property - def _limit_identifier(self): - # Example for automatic transcription - # - # from kobo.apps.organizations.constants import UsageType - # return UsageType.ASR_SECONDS - raise NotImplementedError() +from .base import BaseAction, utc_datetime_to_js_str class ManualTranscriptionAction(BaseAction): @@ -272,13 +61,9 @@ def data_schema(self): # for lack of a better name 'additionalProperties': False, 'properties': { 'language': {'$ref': '#/$defs/lang'}, - 'transcript': {'$ref': '#/$defs/transcript'} + 'transcript': {'$ref': '#/$defs/transcript'}, }, - 'allOf': [ - { - '$ref': '#/$defs/lang_transcript_dependency' - } - ], + 'allOf': [{'$ref': '#/$defs/lang_transcript_dependency'}], '$defs': { 'lang': {'type': 'string', 'enum': self.languages}, 'transcript': {'type': 'string'}, @@ -286,15 +71,15 @@ def data_schema(self): # for lack of a better name 'allOf': [ { 'if': {'required': ['language']}, - 'then': {'required': ['transcript']} + 'then': {'required': ['transcript']}, }, { 'if': {'required': ['transcript']}, - 'then': {'required': ['language']} - } + 'then': {'required': ['language']}, + }, ] - } - } + }, + }, } @property @@ -333,11 +118,13 @@ def result_schema(self): self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, }, 'required': [self.DATE_CREATED_FIELD], - } + }, }, } - def _inject_data_schema(destination_schema: dict, skipped_keys: list) -> dict: + def _inject_data_schema( + destination_schema: dict, skipped_keys: list + ) -> dict: for key, value in self.data_schema.items(): if key in skipped_keys: From 36ecc7bf9120c2d4253a8f27f5d193ae185015c2 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 10:53:38 -0400 Subject: [PATCH 037/138] =?UTF-8?q?rename=20`=5Fschema`=20to=20`=5FactionC?= =?UTF-8?q?onfigs`;=20expect=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `submission_uuid` as argument from nested view instead of POST data --- kobo/apps/subsequences__new/actions/__init__.py | 2 +- kobo/apps/subsequences__new/router.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/__init__.py b/kobo/apps/subsequences__new/actions/__init__.py index b09722df29..df6f8d8e87 100644 --- a/kobo/apps/subsequences__new/actions/__init__.py +++ b/kobo/apps/subsequences__new/actions/__init__.py @@ -6,7 +6,7 @@ ADVANCED_FEATURES_PARAMS_SCHEMA = { # rename? 'properties': { - '_schema': { + '_actionConfigs': { 'additionalProperties': False, 'patternProperties': { # not the full complexity of XPath, but a slash-delimited path diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences__new/router.py index ef40fb8fc4..8be0f66089 100644 --- a/kobo/apps/subsequences__new/router.py +++ b/kobo/apps/subsequences__new/router.py @@ -27,13 +27,13 @@ class InvalidXPath(Exception): # - dispatch_incoming_data # - process_action_request # - run_action -def handle_incoming_data(asset: Asset, data: dict): +def handle_incoming_data(asset: Asset, submission_uuid: str, data: dict): schema_version = data.pop('_version') if schema_version != '20250820': # TODO: migrate from old per-submission schema raise NotImplementedError - submission_uuid = data.pop('_submission') # not needed in POST data bc of nested endpoint + # TODO: validate that such a submission even exists! supplemental_data = SubmissionExtras.objects.get_or_create( asset=asset, submission_uuid=submission_uuid )[0].content # lock it? @@ -44,7 +44,7 @@ def handle_incoming_data(asset: Asset, data: dict): raise NotImplementedError try: action_configs_for_this_question = asset.advanced_features[ - '_schema' + '_actionConfigs' ][question_xpath] except KeyError as e: raise InvalidXPath from e From 3f79f0e200b0e0e1bae592146db97bfde77dcb07 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 22 Aug 2025 13:17:59 -0400 Subject: [PATCH 038/138] WIP new endpoint --- .../subsequences__new/tests/api/__init__.py | 0 .../tests/api/v2/__init__.py | 0 .../subsequences__new/tests/api/v2/base.py | 86 ++++++++ .../tests/api/v2/test_permissions.py | 186 ++++++++++++++++++ kpi/views/v2/data.py | 58 +++++- 5 files changed, 325 insertions(+), 5 deletions(-) create mode 100644 kobo/apps/subsequences__new/tests/api/__init__.py create mode 100644 kobo/apps/subsequences__new/tests/api/v2/__init__.py create mode 100644 kobo/apps/subsequences__new/tests/api/v2/base.py create mode 100644 kobo/apps/subsequences__new/tests/api/v2/test_permissions.py diff --git a/kobo/apps/subsequences__new/tests/api/__init__.py b/kobo/apps/subsequences__new/tests/api/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/subsequences__new/tests/api/v2/__init__.py b/kobo/apps/subsequences__new/tests/api/v2/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/subsequences__new/tests/api/v2/base.py b/kobo/apps/subsequences__new/tests/api/v2/base.py new file mode 100644 index 0000000000..b7ca35632e --- /dev/null +++ b/kobo/apps/subsequences__new/tests/api/v2/base.py @@ -0,0 +1,86 @@ +import uuid +from copy import deepcopy +from unittest.mock import Mock, patch + +import pytest +from constance.test import override_config +from django.conf import settings +from django.test import override_settings +from django.urls import reverse +from google.cloud import translate_v3 +from jsonschema import validate +from rest_framework import status +from rest_framework.test import APITestCase + +from kobo.apps.kobo_auth.shortcuts import User +from kobo.apps.languages.models.language import Language, LanguageRegion +from kobo.apps.languages.models.transcription import ( + TranscriptionService, + TranscriptionServiceLanguageM2M, +) +from kobo.apps.languages.models.translation import ( + TranslationService, + TranslationServiceLanguageM2M, +) +from kobo.apps.openrosa.apps.logger.models import Instance +from kobo.apps.openrosa.apps.logger.xform_instance_parser import add_uuid_prefix +from kobo.apps.organizations.constants import UsageType +from kpi.constants import ( + PERM_ADD_SUBMISSIONS, + PERM_CHANGE_ASSET, + PERM_CHANGE_SUBMISSIONS, + PERM_PARTIAL_SUBMISSIONS, + PERM_VIEW_ASSET, + PERM_VIEW_SUBMISSIONS, +) +from kpi.models.asset import Asset +from kpi.tests.base_test_case import BaseTestCase +from kpi.tests.kpi_test_case import KpiTestCase +from kpi.urls.router_api_v2 import URL_NAMESPACE as ROUTER_URL_NAMESPACE +from kpi.utils.fuzzy_int import FuzzyInt +from kpi.utils.xml import ( + edit_submission_xml, + fromstring_preserve_root_xmlns, + xml_tostring, +) + + +class SubsequenceBaseTestCase(KpiTestCase): + + fixtures = ['test_data'] + URL_NAMESPACE = ROUTER_URL_NAMESPACE + + def setUp(self): + user = User.objects.get(username='someuser') + self.asset = Asset( + owner=user, + content={'survey': [{'type': 'audio', 'label': 'q1', 'name': 'q1'}]}, + ) + self.asset.advanced_features = {} + self.asset.save() + self.asset.deploy(backend='mock', active=True) + self.asset_uid = self.asset.uid + self.asset_url = reverse( + self._get_endpoint('asset-detail'), args=[self.asset_uid] + ) + + uuid_ = uuid.uuid4() + self.submission_uuid = str(uuid_) + + # add a submission + submission_data = { + 'q1': 'answer', + '_uuid': self.submission_uuid, + '_submitted_by': 'someuser', + } + + self.asset.deployment.mock_submissions([submission_data]) + self.client.force_login(user) + self.supplement_details_url = reverse( + self._get_endpoint('submission-supplemental'), + args=[self.asset.uid, self.submission_uuid] + ) + + def set_asset_advanced_features(self, features): + self.asset.advanced_features = features + self.asset.save(update_fields=['advanced_features']) diff --git a/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py b/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py new file mode 100644 index 0000000000..ac05eea4f0 --- /dev/null +++ b/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py @@ -0,0 +1,186 @@ +import pytest +from ddt import data, ddt, unpack +from rest_framework import status + +from kobo.apps.kobo_auth.shortcuts import User +from kobo.apps.subsequences__new.tests.api.v2.base import SubsequenceBaseTestCase +from kpi.constants import ( + PERM_CHANGE_SUBMISSIONS, + PERM_PARTIAL_SUBMISSIONS, + PERM_VIEW_SUBMISSIONS, +) +from kpi.utils.object_permission import get_anonymous_user + +@ddt +class SubsequencePermissionTestCase(SubsequenceBaseTestCase): + + @data( + # owner: Obviously, no need to share. + ( + 'anotheruser', + False, + status.HTTP_200_OK, + ), + # regular user with no permissions + ( + 'anotheruser', + False, + status.HTTP_404_NOT_FOUND, + ), + # regular user with view permission + ( + 'anotheruser', + True, + status.HTTP_200_OK, + ), + # admin user with no permissions + ( + 'adminuser', + False, + status.HTTP_404_NOT_FOUND, + ), + # admin user with view permissions + ( + 'adminuser', + True, + status.HTTP_200_OK, + ), + # anonymous user with no permissions + ( + 'anonymous', + False, + status.HTTP_404_NOT_FOUND, + ), + # anonymous user with view permissions + ( + 'anonymous', + True, + status.HTTP_200_OK, + ), + ) + @unpack + def test_can_read(self, username, shared, status_code): + user = get_anonymous_user() + self.client.logout() + if username != 'anonymous': + user = User.objects.get(username=username) + self.client.force_login(user) + + if shared: + self.asset.assign_perm(user, PERM_VIEW_SUBMISSIONS) + + response = self.client.get(self.supplement_details_url) + assert response.status_code == status_code + if status_code == status.HTTP_200_OK: + assert response.data == {} + + @data( + # owner: Obviously, no need to share. + ( + 'anotheruser', + False, + status.HTTP_200_OK, + ), + # regular user with no permissions + ( + 'anotheruser', + False, + status.HTTP_404_NOT_FOUND, + ), + # regular user with view permission + ( + 'anotheruser', + True, + status.HTTP_200_OK, + ), + # admin user with no permissions + ( + 'adminuser', + False, + status.HTTP_404_NOT_FOUND, + ), + # admin user with view permissions + ( + 'adminuser', + True, + status.HTTP_200_OK, + ), + # anonymous user with no permissions + ( + 'anonymous', + False, + status.HTTP_404_NOT_FOUND, + ), + # anonymous user with view permissions + ( + 'anonymous', + True, + status.HTTP_200_OK, + ), + ) + @unpack + def test_can_write(self, username, shared, status_code): + payload = { + '_version': '20250820', + 'q1': { + 'manual_transcription': { + 'language': 'es', + 'transcript': 'buenas noches', + } + }, + } + + user = get_anonymous_user() + self.client.logout() + if username != 'anonymous': + user = User.objects.get(username=username) + self.client.force_login(user) + + if shared: + self.asset.assign_perm(user, PERM_CHANGE_SUBMISSIONS) + + response = self.client.patch(self.supplement_details_url, data=payload) + assert response.status_code == status_code + if status_code == status.HTTP_200_OK: + assert response.data == {} + + +class SubsequencePartialPermissionTestCase(SubsequenceBaseTestCase): + """ + Ensure that users with partial change_submission permission cannot access or + update submission supplement data, especially for submissions they are not + authorized to view. + """ + + def test_cannot_post_data(self): + anotheruser = User.objects.get(username='anotheruser') + partial_perms = { + PERM_CHANGE_SUBMISSIONS: [{'_submitted_by': anotheruser.username}] + } + self.asset.assign_perm( + anotheruser, PERM_PARTIAL_SUBMISSIONS, partial_perms=partial_perms + ) + self.client.force_login(anotheruser) + payload = { + '_version': '20250820', + 'q1': { + 'manual_transcription': { + 'language': 'es', + 'transcript': 'buenas noches', + } + }, + } + response = self.client.post(self.supplement_details_url, data=payload) + assert response.status_code == status.HTTP_404_NOT_FOUND + + def test_cannot_read_data(self): + anotheruser = User.objects.get(username='anotheruser') + partial_perms = { + PERM_VIEW_SUBMISSIONS: [{'_submitted_by': anotheruser.username}] + } + self.asset.assign_perm( + anotheruser, PERM_PARTIAL_SUBMISSIONS, partial_perms=partial_perms + ) + self.client.force_login(anotheruser) + response = self.client.get(self.supplement_details_url) + assert response.status_code == status.HTTP_404_NOT_FOUND diff --git a/kpi/views/v2/data.py b/kpi/views/v2/data.py index 43a9a974c0..eff862ac81 100644 --- a/kpi/views/v2/data.py +++ b/kpi/views/v2/data.py @@ -18,9 +18,9 @@ from kobo.apps.audit_log.base_views import AuditLoggedViewSet from kobo.apps.audit_log.models import AuditType from kobo.apps.audit_log.utils import SubmissionUpdate -from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix +from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix, \ + add_uuid_prefix from kobo.apps.openrosa.libs.utils.logger_tools import http_open_rosa_error_handler -from kobo.apps.subsequences__new.utils.action_loader import get_action_class from kpi.authentication import EnketoSessionAuthentication from kpi.constants import ( PERM_CHANGE_SUBMISSIONS, @@ -38,6 +38,7 @@ from kpi.models import Asset from kpi.paginators import DataPagination from kpi.permissions import ( + AdvancedSubmissionPermission, DuplicateSubmissionPermission, EditLinkSubmissionPermission, SubmissionPermission, @@ -528,12 +529,59 @@ def retrieve(self, request, pk, *args, **kwargs): submission = list(submissions)[0] return Response(submission) - @action(detail=True, methods=['PATCH']) + @action( + detail=True, + methods=['PATCH'], + permission_classes=[AdvancedSubmissionPermission] + ) def supplemental(self, request, submission_uuid, *args, **kwargs): - # Do something with John's work - return Response({'detail': 'Not implemented'}) + ### TO BE MOVED + from kobo.apps.subsequences__new.router import ( + handle_incoming_data, + InvalidAction, + InvalidXPath, + ) + def retrieve_supplemental_data(): + return { + "q1": { + "manual_transcription": { + "transcript": "I speak English, yes!", + "language": "en", + "dateModified": "2025-08-22T15:34:46Z", + "dateCreated": "2025-08-20T09:20:21Z", + "revisions": [ + { + "transcript": "No speak English :-(", + "dateCreated": "2025-08-20T09:20:21Z", + "language": "en" + } + ] + } + } + } + + ### END TO BE MOVED + + deployment = self._get_deployment() + try: + submission = next(deployment.get_submissions( + user=request.user, + query={'meta/rootUuid': add_uuid_prefix(submission_uuid)} + )) + except StopIteration: + raise Http404 + submission_root_uuid = submission[deployment.SUBMISSION_ROOT_UUID_XPATH] + + post_data = request.data + try: + handle_incoming_data(submission_root_uuid, post_data) + except InvalidAction: + raise serializers.ValidationError({'detail': 'Invalid action'}) + except InvalidXPath: + raise serializers.ValidationError({'detail': 'Invalid question name'}) + return Response(retrieve_supplemental_data(self.asset, submission_root_uuid)) @action(detail=True, methods=['GET', 'PATCH', 'DELETE'], renderer_classes=[renderers.JSONRenderer], From c8a9c65bb194f1843fab3f765c6fba42fc7a6c8d Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 14:09:15 -0400 Subject: [PATCH 039/138] =?UTF-8?q?add=20method=20for=20retrieving=20suppl?= =?UTF-8?q?emental=20data=20at=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the submission level, and fix method for storing it --- kobo/apps/subsequences__new/actions/base.py | 45 ++++++++-- .../actions/manual_transcription.py | 32 +------ kobo/apps/subsequences__new/router.py | 87 +++++++++++++++++-- 3 files changed, 121 insertions(+), 43 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/base.py b/kobo/apps/subsequences__new/actions/base.py index 8e6f48335b..3b38993fc7 100644 --- a/kobo/apps/subsequences__new/actions/base.py +++ b/kobo/apps/subsequences__new/actions/base.py @@ -3,6 +3,7 @@ import jsonschema from django.conf import settings +from django.utils import timezone from kobo.apps.kobo_auth.shortcuts import User from kpi.exceptions import UsageLimitExceededException @@ -134,9 +135,6 @@ def validate_data(self, data): def validate_result(self, result): jsonschema.validate(result, self.result_schema) - def record_repr(self, record: dict) -> dict: - raise NotImplementedError() - @property def result_schema(self): """ @@ -271,8 +269,45 @@ def check_limits(self, user: User): if balance and balance['exceeded']: raise UsageLimitExceededException() - def revise_field(self, submission_extra: dict, edit: dict) -> dict: - raise NotImplementedError + def retrieve_data(self, action_data: dict) -> dict: + """ + `action_data` must be ONLY the data for this particular action + instance, not the entire SubmissionExtras caboodle + + descendant classes could override with special manipulation if needed + """ + return action_data + + def revise_field( + self, submission: dict, submission_supplement: dict, edit: dict + ) -> dict: + # maybe rename to revise_data? + """ + for actions that may have lengthy data, are we content to store the + entirety of the data for each revision, or do we need some kind of + differencing system? + """ + self.validate_data(edit) + self.raise_for_any_leading_underscore_key(edit) + + now_str = utc_datetime_to_js_str(timezone.now()) + revision = deepcopy(submission_supplement) + new_record = deepcopy(edit) + revisions = revision.pop(self.REVISIONS_FIELD, []) + + revision_creation_date = revision.pop(self.DATE_MODIFIED_FIELD, now_str) + record_creation_date = revision.pop(self.DATE_CREATED_FIELD, now_str) + revision[self.DATE_CREATED_FIELD] = revision_creation_date + new_record[self.DATE_MODIFIED_FIELD] = now_str + + if submission_supplement: + revisions.insert(0, revision) + new_record[self.REVISIONS_FIELD] = revisions + + new_record[self.DATE_CREATED_FIELD] = record_creation_date + + return new_record + @staticmethod def raise_for_any_leading_underscore_key(d: dict): diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 4f780cd5f6..281b33e9c8 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -1,8 +1,6 @@ from copy import deepcopy -from django.utils import timezone - -from .base import BaseAction, utc_datetime_to_js_str +from .base import BaseAction class ManualTranscriptionAction(BaseAction): @@ -151,34 +149,6 @@ def _inject_data_schema( return schema - def revise_field(self, submission_extra: dict, edit: dict) -> dict: - """ - really, we want to generalize this to all actions. - for actions that may have lengthy data, are we content to store the - entirety of the data for each revision, or do we need some kind of - differencing system? - """ - self.validate_data(edit) - self.raise_for_any_leading_underscore_key(edit) - - now_str = utc_datetime_to_js_str(timezone.now()) - revision = deepcopy(submission_extra) - new_record = deepcopy(edit) - revisions = revision.pop(self.REVISIONS_FIELD, []) - - revision_creation_date = revision.pop(self.DATE_MODIFIED_FIELD, now_str) - record_creation_date = revision.pop(self.DATE_CREATED_FIELD, now_str) - revision[self.DATE_CREATED_FIELD] = revision_creation_date - new_record[self.DATE_MODIFIED_FIELD] = now_str - - if submission_extra: - revisions.insert(0, revision) - new_record[self.REVISIONS_FIELD] = revisions - - new_record[self.DATE_CREATED_FIELD] = record_creation_date - - return new_record - @property def _is_usage_limited(self): return False diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences__new/router.py index 8be0f66089..18374c5dda 100644 --- a/kobo/apps/subsequences__new/router.py +++ b/kobo/apps/subsequences__new/router.py @@ -1,7 +1,10 @@ +from typing import Optional + from kobo.apps.subsequences.models import ( SubmissionExtras, # just bullshit for now ) from kpi.models import Asset + from .actions import ACTION_IDS_TO_CLASSES @@ -22,26 +25,29 @@ class InvalidXPath(Exception): pass + # ChatGPT suggestions: # - dispatch_action_payload # - dispatch_incoming_data # - process_action_request # - run_action -def handle_incoming_data(asset: Asset, submission_uuid: str, data: dict): +def handle_incoming_data(asset: Asset, submission: dict, data: dict): + # it'd be better if this returned the same thing as retrieve_supplemental_data schema_version = data.pop('_version') if schema_version != '20250820': # TODO: migrate from old per-submission schema raise NotImplementedError - # TODO: validate that such a submission even exists! + if asset.advanced_features['_version'] != schema_version: + # TODO: migrate from old per-asset schema + raise NotImplementedError + + submission_uuid = submission['meta/rootUuid'] # constant? supplemental_data = SubmissionExtras.objects.get_or_create( asset=asset, submission_uuid=submission_uuid )[0].content # lock it? for question_xpath, data_for_this_question in data.items(): - if asset.advanced_features['_version'] != '20250820': - # TODO: migrate from old per-asset schema - raise NotImplementedError try: action_configs_for_this_question = asset.advanced_features[ '_actionConfigs' @@ -61,9 +67,76 @@ def handle_incoming_data(asset: Asset, submission_uuid: str, data: dict): action = action_class(question_xpath, action_params) action.check_limits(asset.owner) - # action.validate_data(action_data) # called by revise_field - supplemental_data = action.revise_field(supplemental_data, action_data) + action_supplemental_data = supplemental_data.setdefault( + question_xpath, {} + ).setdefault(action_id, {}) + action_supplemental_data = action.revise_field( + submission, action_supplemental_data, action_data + ) + supplemental_data['_version'] = schema_version SubmissionExtras.objects.filter( asset=asset, submission_uuid=submission_uuid ).update(content=supplemental_data) + + +def retrieve_supplemental_data(asset: Asset, submission_uuid: str) -> dict: + try: + supplemental_data = SubmissionExtras.objects.get( + asset=asset, submission_uuid=submission_uuid + ).content + except SubmissionExtras.DoesNotExist: + return {} + + schema_version = supplemental_data.pop('_version') + if schema_version != '20250820': + # TODO: migrate from old per-submission schema + raise NotImplementedError + + if asset.advanced_features['_version'] != schema_version: + # TODO: migrate from old per-asset schema + raise NotImplementedError + + processed_supplemental_data = {} + + for question_xpath, data_for_this_question in supplemental_data.items(): + processed_data_for_this_question = ( + processed_supplemental_data.setdefault(question_xpath, {}) + ) + action_configs = asset.advanced_features['_actionConfigs'] + try: + action_configs_for_this_question = action_configs[question_xpath] + except KeyError: + # There's still supplemental data for this question at the + # submission level, but the question is no longer configured at the + # asset level. + # Allow this for now, but maybe forbid later and also forbid + # removing things from the asset-level action configuration? + # Actions could be disabled or hidden instead of being removed + continue + + for action_id, action_data in data_for_this_question.items(): + try: + action_class = ACTION_IDS_TO_CLASSES[action_id] + except KeyError: + # An action class present in the submission data no longer + # exists in the application code + # TODO: log an error + continue + try: + action_params = action_configs_for_this_question[action_id] + except KeyError: + # An action class present in the submission data is no longer + # configured at the asset level for this question + # Allow this for now, but maybe forbid later and also forbid + # removing things from the asset-level action configuration? + # Actions could be disabled or hidden instead of being removed + continue + + action = action_class(question_xpath, action_params) + processed_data_for_this_question[action_id] = action.retrieve_data( + action_data + ) + + processed_supplemental_data['_version'] = schema_version + return processed_supplemental_data From 7ef30fb95b910bb62961687b8320548316dcbf4e Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 14:13:05 -0400 Subject: [PATCH 040/138] Add submission arg to tests for `revise_field()` --- .../tests/test_manual_transcription.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kobo/apps/subsequences__new/tests/test_manual_transcription.py b/kobo/apps/subsequences__new/tests/test_manual_transcription.py index ba4704765e..b68e358c15 100644 --- a/kobo/apps/subsequences__new/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences__new/tests/test_manual_transcription.py @@ -45,7 +45,7 @@ def test_valid_result_passes_validation(): fifth = {'language': 'en', 'transcript': 'fifth'} mock_sup_det = {} for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_field(mock_sup_det, data) + mock_sup_det = action.revise_field({}, mock_sup_det, data) action.validate_result(mock_sup_det) def test_invalid_result_fails_validation(): @@ -60,7 +60,7 @@ def test_invalid_result_fails_validation(): fifth = {'language': 'en', 'transcript': 'fifth'} mock_sup_det = {} for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_field(mock_sup_det, data) + mock_sup_det = action.revise_field({}, mock_sup_det, data) # erroneously add '_dateModified' onto a revision mock_sup_det['_revisions'][0]['_dateModified'] = mock_sup_det['_revisions'][0]['_dateCreated'] @@ -81,7 +81,7 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): first = {'language': 'en', 'transcript': 'No idea'} second = {'language': 'fr', 'transcript': "Pas d'idée"} - mock_sup_det = action.revise_field({}, first) + mock_sup_det = action.revise_field({}, {}, first) assert mock_sup_det['language'] == 'en' assert mock_sup_det['transcript'] == 'No idea' @@ -89,7 +89,7 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): assert '_revisions' not in mock_sup_det first_time = mock_sup_det['_dateCreated'] - mock_sup_det = action.revise_field(mock_sup_det, second) + mock_sup_det = action.revise_field({}, mock_sup_det, second) assert len(mock_sup_det['_revisions']) == 1 # the revision should encompass the first transcript @@ -122,10 +122,10 @@ def test_setting_transcript_to_empty_string(): first = {'language': 'fr', 'transcript': "Pas d'idée"} second = {'language': 'fr', 'transcript': ''} - mock_sup_det = action.revise_field({}, first) + mock_sup_det = action.revise_field({}, {}, first) assert mock_sup_det['transcript'] == "Pas d'idée" - mock_sup_det = action.revise_field(mock_sup_det, second) + mock_sup_det = action.revise_field({}, mock_sup_det, second) assert mock_sup_det['transcript'] == '' assert mock_sup_det['_revisions'][0]['transcript'] == "Pas d'idée" @@ -137,10 +137,10 @@ def test_setting_transcript_to_empty_object(): first = {'language': 'fr', 'transcript': "Pas d'idée"} second = {} - mock_sup_det = action.revise_field({}, first) + mock_sup_det = action.revise_field({}, {}, first) assert mock_sup_det['transcript'] == "Pas d'idée" - mock_sup_det = action.revise_field(mock_sup_det, second) + mock_sup_det = action.revise_field({}, mock_sup_det, second) assert 'transcript' not in mock_sup_det assert mock_sup_det['_revisions'][0]['transcript'] == "Pas d'idée" @@ -155,7 +155,7 @@ def test_latest_revision_is_first(): mock_sup_det = {} for data in first, second, third: - mock_sup_det = action.revise_field(mock_sup_det, data) + mock_sup_det = action.revise_field({}, mock_sup_det, data) assert mock_sup_det['transcript'] == 'trois' assert mock_sup_det['_revisions'][0]['transcript'] == 'deux' From 121341a2a152f9c1733fb28ebded549da2741c07 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 14:29:49 -0400 Subject: [PATCH 041/138] =?UTF-8?q?Fix=20`handle=5Fincoming=5Fdata()`,=20a?= =?UTF-8?q?gain=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kobo/apps/subsequences__new/router.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences__new/router.py index 18374c5dda..4596dbdd67 100644 --- a/kobo/apps/subsequences__new/router.py +++ b/kobo/apps/subsequences__new/router.py @@ -31,9 +31,9 @@ class InvalidXPath(Exception): # - dispatch_incoming_data # - process_action_request # - run_action -def handle_incoming_data(asset: Asset, submission: dict, data: dict): +def handle_incoming_data(asset: Asset, submission: dict, incoming_data: dict): # it'd be better if this returned the same thing as retrieve_supplemental_data - schema_version = data.pop('_version') + schema_version = incoming_data.pop('_version') if schema_version != '20250820': # TODO: migrate from old per-submission schema raise NotImplementedError @@ -47,7 +47,7 @@ def handle_incoming_data(asset: Asset, submission: dict, data: dict): asset=asset, submission_uuid=submission_uuid )[0].content # lock it? - for question_xpath, data_for_this_question in data.items(): + for question_xpath, data_for_this_question in incoming_data.items(): try: action_configs_for_this_question = asset.advanced_features[ '_actionConfigs' @@ -67,12 +67,14 @@ def handle_incoming_data(asset: Asset, submission: dict, data: dict): action = action_class(question_xpath, action_params) action.check_limits(asset.owner) - action_supplemental_data = supplemental_data.setdefault( + question_supplemental_data = supplemental_data.setdefault( question_xpath, {} - ).setdefault(action_id, {}) + ) + action_supplemental_data = question_supplemental_data.setdefault(action_id, {}) action_supplemental_data = action.revise_field( submission, action_supplemental_data, action_data ) + question_supplemental_data[action_id] = action_supplemental_data supplemental_data['_version'] = schema_version SubmissionExtras.objects.filter( From 6289e4a6b4a19daf14a98b6d9b982012fa2c0ef3 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 14:54:57 -0400 Subject: [PATCH 042/138] =?UTF-8?q?Make=20`handle=5Fincoming=5Fdata()`=20r?= =?UTF-8?q?eturn=20something,=20=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit effectively the same thing as if `retrieve_supplemental_data()` were called immediately afterwards --- kobo/apps/subsequences__new/router.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences__new/router.py index 4596dbdd67..0dedf0156a 100644 --- a/kobo/apps/subsequences__new/router.py +++ b/kobo/apps/subsequences__new/router.py @@ -1,4 +1,3 @@ -from typing import Optional from kobo.apps.subsequences.models import ( SubmissionExtras, # just bullshit for now @@ -31,7 +30,9 @@ class InvalidXPath(Exception): # - dispatch_incoming_data # - process_action_request # - run_action -def handle_incoming_data(asset: Asset, submission: dict, incoming_data: dict): +def handle_incoming_data( + asset: Asset, submission: dict, incoming_data: dict +) -> dict: # it'd be better if this returned the same thing as retrieve_supplemental_data schema_version = incoming_data.pop('_version') if schema_version != '20250820': @@ -47,6 +48,8 @@ def handle_incoming_data(asset: Asset, submission: dict, incoming_data: dict): asset=asset, submission_uuid=submission_uuid )[0].content # lock it? + retrieved_supplemental_data = {} + for question_xpath, data_for_this_question in incoming_data.items(): try: action_configs_for_this_question = asset.advanced_features[ @@ -70,17 +73,25 @@ def handle_incoming_data(asset: Asset, submission: dict, incoming_data: dict): question_supplemental_data = supplemental_data.setdefault( question_xpath, {} ) - action_supplemental_data = question_supplemental_data.setdefault(action_id, {}) + action_supplemental_data = question_supplemental_data.setdefault( + action_id, {} + ) action_supplemental_data = action.revise_field( submission, action_supplemental_data, action_data ) question_supplemental_data[action_id] = action_supplemental_data + retrieved_supplemental_data.setdefault(question_xpath, {})[ + action_id + ] = action.retrieve_data(action_supplemental_data) supplemental_data['_version'] = schema_version SubmissionExtras.objects.filter( asset=asset, submission_uuid=submission_uuid ).update(content=supplemental_data) + retrieved_supplemental_data['_version'] = schema_version + return retrieved_supplemental_data + def retrieve_supplemental_data(asset: Asset, submission_uuid: str) -> dict: try: @@ -99,11 +110,11 @@ def retrieve_supplemental_data(asset: Asset, submission_uuid: str) -> dict: # TODO: migrate from old per-asset schema raise NotImplementedError - processed_supplemental_data = {} + retrieved_supplemental_data = {} for question_xpath, data_for_this_question in supplemental_data.items(): processed_data_for_this_question = ( - processed_supplemental_data.setdefault(question_xpath, {}) + retrieved_supplemental_data.setdefault(question_xpath, {}) ) action_configs = asset.advanced_features['_actionConfigs'] try: @@ -140,5 +151,5 @@ def retrieve_supplemental_data(asset: Asset, submission_uuid: str) -> dict: action_data ) - processed_supplemental_data['_version'] = schema_version - return processed_supplemental_data + retrieved_supplemental_data['_version'] = schema_version + return retrieved_supplemental_data From 01248d1251cc4559b7b02af81fc96ee11b1d5862 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 22 Aug 2025 14:46:11 -0400 Subject: [PATCH 043/138] Replace the lookup field of data endpoint with "submission_id_or_root_uuid" --- .../tests/test_project_history_logs.py | 6 +- .../tests/test_organizations_api.py | 4 +- .../tests/api/v2/test_permissions.py | 10 +-- .../admin/test_attachment_trash_admin.py | 2 +- .../test_attachment_cleanup.py | 2 +- kpi/tests/api/v1/test_api_submissions.py | 4 +- kpi/tests/api/v2/test_api_attachments.py | 2 +- .../v2/test_api_attachments_delete_viewset.py | 4 +- .../v2/test_api_invalid_password_access.py | 2 +- kpi/tests/api/v2/test_api_submissions.py | 64 +++++++------- kpi/tests/utils/mixins.py | 6 +- kpi/urls/router_api_v1.py | 6 +- kpi/urls/router_api_v2.py | 6 +- kpi/views/v2/data.py | 86 +++++++++---------- 14 files changed, 95 insertions(+), 109 deletions(-) diff --git a/kobo/apps/audit_log/tests/test_project_history_logs.py b/kobo/apps/audit_log/tests/test_project_history_logs.py index 62a63f6d1a..b83292cda9 100644 --- a/kobo/apps/audit_log/tests/test_project_history_logs.py +++ b/kobo/apps/audit_log/tests/test_project_history_logs.py @@ -1520,7 +1520,7 @@ def test_log_created_for_duplicate_submission(self, duplicating_user): self._get_endpoint('submission-duplicate'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) # whoever performs the duplication request will be considered the submitter @@ -1654,7 +1654,7 @@ def test_update_single_submission_validation_status(self, username): 'api_v2:submission-validation-status', kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ), request_data={'validation_status.uid': 'validation_status_on_hold'}, @@ -1786,7 +1786,7 @@ def test_delete_single_submission(self): 'api_v2:submission-detail', kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ), request_data={}, diff --git a/kobo/apps/organizations/tests/test_organizations_api.py b/kobo/apps/organizations/tests/test_organizations_api.py index d3b4690df4..c13c3f3750 100644 --- a/kobo/apps/organizations/tests/test_organizations_api.py +++ b/kobo/apps/organizations/tests/test_organizations_api.py @@ -392,7 +392,7 @@ def setUp(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.submission['_id'], + 'submission_id_or_root_uuid': self.submission['_id'], }, ) self.submission_list_url = reverse( @@ -870,7 +870,7 @@ def setUp(self): self._get_endpoint('submission-validation-status'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.submission['_id'], + 'submission_id_or_root_uuid': self.submission['_id'], }, ) self.validation_statuses_url = reverse( diff --git a/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py b/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py index ac05eea4f0..768bb223a8 100644 --- a/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py +++ b/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py @@ -17,7 +17,7 @@ class SubsequencePermissionTestCase(SubsequenceBaseTestCase): @data( # owner: Obviously, no need to share. ( - 'anotheruser', + 'someuser', False, status.HTTP_200_OK, ), @@ -77,7 +77,7 @@ def test_can_read(self, username, shared, status_code): @data( # owner: Obviously, no need to share. ( - 'anotheruser', + 'someuser', False, status.HTTP_200_OK, ), @@ -111,12 +111,6 @@ def test_can_read(self, username, shared, status_code): False, status.HTTP_404_NOT_FOUND, ), - # anonymous user with view permissions - ( - 'anonymous', - True, - status.HTTP_200_OK, - ), ) @unpack def test_can_write(self, username, shared, status_code): diff --git a/kobo/apps/trash_bin/tests/admin/test_attachment_trash_admin.py b/kobo/apps/trash_bin/tests/admin/test_attachment_trash_admin.py index 31a016e1f0..dcdab64237 100644 --- a/kobo/apps/trash_bin/tests/admin/test_attachment_trash_admin.py +++ b/kobo/apps/trash_bin/tests/admin/test_attachment_trash_admin.py @@ -123,7 +123,7 @@ def test_put_back_action_updates_is_deleted_flag(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.instance.pk, + 'submission_id_or_root_uuid': self.instance.pk, }, ) response = self.client.get(submission_detail_url) diff --git a/kobo/apps/trash_bin/tests/storage_cleanup/test_attachment_cleanup.py b/kobo/apps/trash_bin/tests/storage_cleanup/test_attachment_cleanup.py index 41db03ac2e..43994b8689 100644 --- a/kobo/apps/trash_bin/tests/storage_cleanup/test_attachment_cleanup.py +++ b/kobo/apps/trash_bin/tests/storage_cleanup/test_attachment_cleanup.py @@ -105,7 +105,7 @@ def test_auto_delete_excess_attachments_user_exceeds_limit(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.instance.pk, + 'submission_id_or_root_uuid': self.instance.pk, }, ) response = self.client.get(submission_detail_url) diff --git a/kpi/tests/api/v1/test_api_submissions.py b/kpi/tests/api/v1/test_api_submissions.py index a0163c405d..aec3a539b9 100644 --- a/kpi/tests/api/v1/test_api_submissions.py +++ b/kpi/tests/api/v1/test_api_submissions.py @@ -104,7 +104,7 @@ def test_delete_submission_as_owner(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) @@ -122,7 +122,7 @@ def test_delete_submission_shared_as_anotheruser(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.delete(url, HTTP_ACCEPT='application/json') diff --git a/kpi/tests/api/v2/test_api_attachments.py b/kpi/tests/api/v2/test_api_attachments.py index 55096a4a1d..d82c2989b5 100644 --- a/kpi/tests/api/v2/test_api_attachments.py +++ b/kpi/tests/api/v2/test_api_attachments.py @@ -187,7 +187,7 @@ def test_duplicate_attachment_with_submission(self): self._get_endpoint('submission-duplicate'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.post(duplicate_url, {'format': 'json'}) diff --git a/kpi/tests/api/v2/test_api_attachments_delete_viewset.py b/kpi/tests/api/v2/test_api_attachments_delete_viewset.py index 84470e4c88..03dce7ea15 100644 --- a/kpi/tests/api/v2/test_api_attachments_delete_viewset.py +++ b/kpi/tests/api/v2/test_api_attachments_delete_viewset.py @@ -214,7 +214,7 @@ def test_delete_single_attachment_updates_is_deleted_flag_in_mongo(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.first_instance.pk, + 'submission_id_or_root_uuid': self.first_instance.pk, }, ) response = self.client.get(submission_detail_url) @@ -256,7 +256,7 @@ def test_delete_bulk_attachments_updates_is_deleted_flag_in_mongo(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.first_instance.pk, + 'submission_id_or_root_uuid': self.first_instance.pk, }, ) response = self.client.get(submission_detail_url) diff --git a/kpi/tests/api/v2/test_api_invalid_password_access.py b/kpi/tests/api/v2/test_api_invalid_password_access.py index fc2f2918ef..024b3dc14e 100644 --- a/kpi/tests/api/v2/test_api_invalid_password_access.py +++ b/kpi/tests/api/v2/test_api_invalid_password_access.py @@ -128,7 +128,7 @@ def _access_endpoints(self, access_granted: bool, headers: dict = {}): kwargs={ 'format': 'json', 'parent_lookup_asset': self.asset.uid, - 'pk': submission_id, + 'submission_id_or_root_uuid': submission_id, }, ), **headers, diff --git a/kpi/tests/api/v2/test_api_submissions.py b/kpi/tests/api/v2/test_api_submissions.py index 8ada142223..60e93e1591 100644 --- a/kpi/tests/api/v2/test_api_submissions.py +++ b/kpi/tests/api/v2/test_api_submissions.py @@ -726,7 +726,7 @@ def test_retrieve_submission_as_owner(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.get(url, {'format': 'json'}) @@ -743,7 +743,7 @@ def test_retrieve_submission_by_uuid(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_uuid'], + 'submission_id_or_root_uuid': submission['_uuid'], }, ) @@ -763,7 +763,7 @@ def test_retrieve_submission_not_shared_as_anotheruser(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.get(url, {'format': 'json'}) @@ -781,7 +781,7 @@ def test_retrieve_submission_shared_as_anotheruser(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.get(url, {'format': 'json'}) @@ -810,7 +810,7 @@ def test_retrieve_submission_with_partial_permissions_as_anotheruser(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.get(url, {'format': 'json'}) @@ -822,7 +822,7 @@ def test_retrieve_submission_with_partial_permissions_as_anotheruser(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.get(url, {'format': 'json'}) @@ -846,7 +846,7 @@ def test_delete_not_existing_submission_as_owner(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': 9999, + 'submission_id_or_root_uuid': 9999, }, ) response = self.client.delete(url, HTTP_ACCEPT='application/json') @@ -866,7 +866,7 @@ def test_delete_submission_as_anonymous(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) @@ -886,7 +886,7 @@ def test_delete_submission_not_shared_as_anotheruser(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) @@ -906,7 +906,7 @@ def test_delete_submission_shared_as_anotheruser(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.delete(url, HTTP_ACCEPT='application/json') @@ -949,7 +949,7 @@ def test_delete_submission_with_partial_perms_as_anotheruser(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.delete( @@ -964,7 +964,7 @@ def test_delete_submission_with_partial_perms_as_anotheruser(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.delete( @@ -1122,7 +1122,7 @@ def test_attachments_rewrite(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) @@ -1177,7 +1177,7 @@ def test_inject_root_uuid_if_not_present(self): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.get(url, {'format': 'json'}) @@ -1206,21 +1206,21 @@ def setUp(self): self._get_endpoint('submission-enketo-edit'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.submission['_id'], + 'submission_id_or_root_uuid': self.submission['_id'], }, ) self.submission_url_legacy = reverse( self._get_endpoint('submission-enketo-edit-legacy'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.submission['_id'], + 'submission_id_or_root_uuid': self.submission['_id'], }, ) self.submission_redirect_url = reverse( self._get_endpoint('submission-enketo-edit-redirect'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.submission['_id'], + 'submission_id_or_root_uuid': self.submission['_id'], }, ) @@ -1364,7 +1364,7 @@ def test_get_edit_link_with_partial_perms_as_anotheruser(self): self._get_endpoint('submission-enketo-edit'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.get(url, {'format': 'json'}) @@ -1376,7 +1376,7 @@ def test_get_edit_link_with_partial_perms_as_anotheruser(self): self._get_endpoint('submission-enketo-edit'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) @@ -1495,7 +1495,7 @@ def test_get_multiple_edit_links_and_attempt_submit_edits(self): self._get_endpoint('submission-enketo-edit'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) self.client.get(edit_url, {'format': 'json'}) @@ -1597,7 +1597,7 @@ def test_edit_submission_with_xml_encoding_declaration(self): self._get_endpoint('submission-enketo-edit'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) @@ -1663,7 +1663,7 @@ def test_edit_submission_with_xml_missing_uuids(self): self._get_endpoint('submission-enketo-edit'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission_json['_id'], + 'submission_id_or_root_uuid': submission_json['_id'], }, ) @@ -1966,14 +1966,14 @@ def setUp(self): self._get_endpoint('submission-enketo-view'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.submission['_id'], + 'submission_id_or_root_uuid': self.submission['_id'], }, ) self.submission_view_redirect_url = reverse( self._get_endpoint('submission-enketo-view-redirect'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.submission['_id'], + 'submission_id_or_root_uuid': self.submission['_id'], }, ) assert 'redirect' in self.submission_view_redirect_url @@ -2082,7 +2082,7 @@ def test_get_view_link_with_partial_perms_as_anotheruser(self): self._get_endpoint('submission-enketo-view'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) @@ -2095,7 +2095,7 @@ def test_get_view_link_with_partial_perms_as_anotheruser(self): self._get_endpoint('submission-enketo-view'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) @@ -2139,7 +2139,7 @@ def setUp(self): self._get_endpoint('submission-duplicate'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.submission['_id'], + 'submission_id_or_root_uuid': self.submission['_id'], }, ) @@ -2307,7 +2307,7 @@ def test_duplicate_submission_as_anotheruser_with_partial_perms(self): self._get_endpoint('submission-duplicate'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.post(url, {'format': 'json'}) @@ -2319,7 +2319,7 @@ def test_duplicate_submission_as_anotheruser_with_partial_perms(self): self._get_endpoint('submission-duplicate'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.post(url, {'format': 'json'}) @@ -2598,7 +2598,7 @@ def setUp(self): self._get_endpoint('submission-validation-status'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.submission['_id'], + 'submission_id_or_root_uuid': self.submission['_id'], }, ) @@ -2772,7 +2772,7 @@ def test_edit_status_with_partial_perms_as_anotheruser(self): self._get_endpoint('submission-validation-status'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.patch(url, data=data) @@ -2784,7 +2784,7 @@ def test_edit_status_with_partial_perms_as_anotheruser(self): self._get_endpoint('submission-validation-status'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) response = self.client.patch(url, data=data) diff --git a/kpi/tests/utils/mixins.py b/kpi/tests/utils/mixins.py index 87d6f2fc7c..facd943673 100644 --- a/kpi/tests/utils/mixins.py +++ b/kpi/tests/utils/mixins.py @@ -152,7 +152,7 @@ def _delete_submission(self, submission: dict): self._get_endpoint('submission-detail'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': submission['_id'], + 'submission_id_or_root_uuid': submission['_id'], }, ) @@ -197,7 +197,7 @@ def _get_edit_link(self): self._get_endpoint('submission-enketo-edit'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.submission['_id'], + 'submission_id_or_root_uuid': self.submission['_id'], }, ) @@ -352,7 +352,7 @@ def _get_view_link(self): self._get_endpoint('submission-enketo-view'), kwargs={ 'parent_lookup_asset': self.asset.uid, - 'pk': self.submission['_id'], + 'submission_id_or_root_uuid': self.submission['_id'], }, ) diff --git a/kpi/urls/router_api_v1.py b/kpi/urls/router_api_v1.py index 16d9f2ed9f..d7e5a94c2c 100644 --- a/kpi/urls/router_api_v1.py +++ b/kpi/urls/router_api_v1.py @@ -69,17 +69,17 @@ # them correctly, often resulting in broken routes and schema generation errors. enketo_url_aliases = [ path( - 'assets//submissions//edit/', + 'assets//submissions//edit/', SubmissionViewSet.as_view({'get': 'enketo_edit'}), name='submission-enketo-edit-legacy', ), path( - 'assets//submissions//enketo/redirect/edit/', + 'assets//submissions//enketo/redirect/edit/', SubmissionViewSet.as_view({'get': 'enketo_edit'}), name='submission-enketo-edit-redirect', ), path( - 'assets//submissions//enketo/redirect/view/', + 'assets//submissions//enketo/redirect/view/', SubmissionViewSet.as_view({'get': 'enketo_view'}), name='submission-enketo-view-redirect', ), diff --git a/kpi/urls/router_api_v2.py b/kpi/urls/router_api_v2.py index 11ba0ebce3..55e358815d 100644 --- a/kpi/urls/router_api_v2.py +++ b/kpi/urls/router_api_v2.py @@ -210,17 +210,17 @@ def get_urls(self, *args, **kwargs): # them correctly, often resulting in broken routes and schema generation errors. enketo_url_aliases = [ path( - 'assets//data//edit/', + 'assets//data//edit/', DataViewSet.as_view({'get': 'enketo_edit'}, renderer_classes=[JSONRenderer]), name='submission-enketo-edit-legacy', ), path( - 'assets//data//enketo/redirect/edit/', + 'assets//data//enketo/redirect/edit/', DataViewSet.as_view({'get': 'enketo_edit'}, renderer_classes=[JSONRenderer]), name='submission-enketo-edit-redirect', ), path( - 'assets//data//enketo/redirect/view/', + 'assets//data//enketo/redirect/view/', DataViewSet.as_view({'get': 'enketo_view'}, renderer_classes=[JSONRenderer]), name='submission-enketo-view-redirect', ), diff --git a/kpi/views/v2/data.py b/kpi/views/v2/data.py index e824acfaa6..de2cbb5704 100644 --- a/kpi/views/v2/data.py +++ b/kpi/views/v2/data.py @@ -1,6 +1,7 @@ import copy import json import re +from typing import Union import requests from django.conf import settings @@ -19,8 +20,10 @@ from kobo.apps.audit_log.base_views import AuditLoggedViewSet from kobo.apps.audit_log.models import AuditType from kobo.apps.audit_log.utils import SubmissionUpdate -from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix, \ - add_uuid_prefix +from kobo.apps.openrosa.apps.logger.xform_instance_parser import ( + remove_uuid_prefix, + add_uuid_prefix, +) from kobo.apps.openrosa.libs.utils.logger_tools import http_open_rosa_error_handler from kpi.authentication import EnketoSessionAuthentication from kpi.constants import ( @@ -202,6 +205,7 @@ class DataViewSet( pagination_class = DataPagination log_type = AuditType.PROJECT_HISTORY logged_fields = [] + lookup_field = 'submission_id_or_root_uuid' @extend_schema( methods=['PATCH'], @@ -234,10 +238,10 @@ def bulk(self, request, *args, **kwargs): return Response(**response) - def destroy(self, request, pk, *args, **kwargs): + def destroy(self, request, submission_id_or_root_uuid: int, *args, **kwargs): deployment = self._get_deployment() - # Coerce to int because back end only finds matches with same type - submission_id = positive_int(pk) + # Coerce to int because the back-end only finds matches with the same type + submission_id = positive_int(submission_id_or_root_uuid) if deployment.delete_submission(submission_id, user=request.user): response = { @@ -258,13 +262,13 @@ def destroy(self, request, pk, *args, **kwargs): renderer_classes=[renderers.JSONRenderer], permission_classes=[DuplicateSubmissionPermission], ) - def duplicate(self, request, pk, *args, **kwargs): + def duplicate(self, request, submission_id_or_root_uuid: int, *args, **kwargs): """ Creates a duplicate of the submission with a given `pk` """ deployment = self._get_deployment() - # Coerce to int because the back end only finds matches with the same type - submission_id = positive_int(pk) + # Coerce to int because the back-end only finds matches with the same type + submission_id = positive_int(submission_id_or_root_uuid) original_submission = deployment.get_submission( submission_id=submission_id, user=request.user, fields=['_id', '_uuid'] ) @@ -317,8 +321,8 @@ def duplicate(self, request, pk, *args, **kwargs): permission_classes=[EditLinkSubmissionPermission], url_path='enketo/edit', ) - def enketo_edit(self, request, pk, *args, **kwargs): - submission_id = positive_int(pk) + def enketo_edit(self, request, submission_id_or_root_uuid: int, *args, **kwargs): + submission_id = positive_int(submission_id_or_root_uuid) enketo_response = self._get_enketo_link(request, submission_id, 'edit') if enketo_response.status_code in ( status.HTTP_201_CREATED, status.HTTP_200_OK @@ -353,8 +357,8 @@ def enketo_edit(self, request, pk, *args, **kwargs): permission_classes=[ViewSubmissionPermission], url_path='enketo/view', ) - def enketo_view(self, request, pk, *args, **kwargs): - submission_id = positive_int(pk) + def enketo_view(self, request, submission_id_or_root_uuid: int, *args, **kwargs): + submission_id = positive_int(submission_id_or_root_uuid) enketo_response = self._get_enketo_link(request, submission_id, 'view') return self._handle_enketo_redirect(request, enketo_response, *args, **kwargs) @@ -409,7 +413,13 @@ def list(self, request, *args, **kwargs): return Response(list(submissions)) - def retrieve(self, request, pk, *args, **kwargs): + def retrieve( + self, + request, + submission_id_or_root_uuid: Union[int, str], + *args, + **kwargs + ): """ Retrieve a submission by its primary key or its UUID. @@ -426,14 +436,11 @@ def retrieve(self, request, pk, *args, **kwargs): } filters = self._filter_mongo_query(request) - # Unfortunately, Django expects that the URL parameter is `pk`, - # its name cannot be changed (easily). - submission_id_or_uuid = pk try: - submission_id_or_uuid = positive_int(submission_id_or_uuid) + submission_id_or_root_uuid = positive_int(submission_id_or_root_uuid) except ValueError: if not re.match( - r'[a-z\d]{8}-([a-z\d]{4}-){3}[a-z\d]{12}', submission_id_or_uuid + r'[a-z\d]{8}-([a-z\d]{4}-){3}[a-z\d]{12}', submission_id_or_root_uuid ): raise Http404 @@ -443,10 +450,10 @@ def retrieve(self, request, pk, *args, **kwargs): raise serializers.ValidationError( {'query': t('Value must be valid JSON.')} ) - query['_uuid'] = submission_id_or_uuid + query['_uuid'] = submission_id_or_root_uuid filters['query'] = query else: - params['submission_ids'] = [submission_id_or_uuid] + params['submission_ids'] = [submission_id_or_root_uuid] # Join all parameters to be passed to `deployment.get_submissions()` params.update(filters) @@ -462,43 +469,26 @@ def retrieve(self, request, pk, *args, **kwargs): detail=True, methods=['PATCH'], permission_classes=[AdvancedSubmissionPermission], - lookup_field='submission_uuid', - lookup_url_kwarg='submission_uuid', ) - def supplemental(self, request, submission_uuid, *args, **kwargs): + def supplemental(self, request, submission_id_or_root_uuid: str, *args, **kwargs): + + # make it clear, a root uuid is expected here + submission_root_uuid = submission_id_or_root_uuid ### TO BE MOVED from kobo.apps.subsequences__new.router import ( handle_incoming_data, + retrieve_supplemental_data, InvalidAction, InvalidXPath, ) - def retrieve_supplemental_data(): - return { - "q1": { - "manual_transcription": { - "transcript": "I speak English, yes!", - "language": "en", - "dateModified": "2025-08-22T15:34:46Z", - "dateCreated": "2025-08-20T09:20:21Z", - "revisions": [ - { - "transcript": "No speak English :-(", - "dateCreated": "2025-08-20T09:20:21Z", - "language": "en" - } - ] - } - } - } - ### END TO BE MOVED deployment = self._get_deployment() try: submission = next(deployment.get_submissions( user=request.user, - query={'meta/rootUuid': add_uuid_prefix(submission_uuid)} + query={'meta/rootUuid': add_uuid_prefix(submission_root_uuid)} )) except StopIteration: raise Http404 @@ -507,7 +497,7 @@ def retrieve_supplemental_data(): post_data = request.data try: - handle_incoming_data(submission_root_uuid, post_data) + handle_incoming_data(self.asset, submission, post_data) except InvalidAction: raise serializers.ValidationError({'detail': 'Invalid action'}) except InvalidXPath: @@ -576,10 +566,12 @@ def retrieve_supplemental_data(): detail=True, methods=['GET', 'PATCH', 'DELETE'], permission_classes=[SubmissionValidationStatusPermission] ) - def validation_status(self, request, pk, *args, **kwargs): + def validation_status( + self, request, submission_id_or_root_uuid: int, *args, **kwargs + ): deployment = self._get_deployment() - # Coerce to int because back end only finds matches with same type - submission_id = positive_int(pk) + # Coerce to int because the back-end only finds matches with the same type + submission_id = positive_int(submission_id_or_root_uuid) if request.method == 'GET': json_response = deployment.get_validation_status( submission_id=submission_id, From 5a303b57581d3bc0277dc25bd7aac2781a0548a6 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 16:33:33 -0400 Subject: [PATCH 044/138] Validate the entire submission supplement --- .../subsequences__new/actions/__init__.py | 19 ------------------- kobo/apps/subsequences__new/actions/base.py | 11 +++++++---- kobo/apps/subsequences__new/router.py | 16 +++++++++++++--- kpi/models/asset.py | 4 ++-- 4 files changed, 22 insertions(+), 28 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/__init__.py b/kobo/apps/subsequences__new/actions/__init__.py index df6f8d8e87..274ca102a5 100644 --- a/kobo/apps/subsequences__new/actions/__init__.py +++ b/kobo/apps/subsequences__new/actions/__init__.py @@ -3,22 +3,3 @@ # TODO, what about using a loader for every class in "actions" folder (except base.py)? ACTIONS = (ManualTranscriptionAction,) ACTION_IDS_TO_CLASSES = {a.ID: a for a in ACTIONS} - -ADVANCED_FEATURES_PARAMS_SCHEMA = { # rename? - 'properties': { - '_actionConfigs': { - 'additionalProperties': False, - 'patternProperties': { - # not the full complexity of XPath, but a slash-delimited path - # of valid XML tag names to convey group hierarchy - '^([A-Za-z_][A-Za-z0-9_-]*)(/[A-Za-z_][A-Za-z0-9_-]*)*$': { - 'additionalProperties': False, - 'properties': {a.ID: a.params_schema for a in ACTIONS}, - 'type': 'object', - } - }, - 'type': 'object', - }, - '_version': {'const': '20250820'}, - } -} diff --git a/kobo/apps/subsequences__new/actions/base.py b/kobo/apps/subsequences__new/actions/base.py index 3b38993fc7..24522f1e6f 100644 --- a/kobo/apps/subsequences__new/actions/base.py +++ b/kobo/apps/subsequences__new/actions/base.py @@ -278,10 +278,13 @@ def retrieve_data(self, action_data: dict) -> dict: """ return action_data - def revise_field( + def revise_field(self, *args, **kwargs): + # TODO: remove this alias + return self.revise_data(*args, **kwargs) + + def revise_data( self, submission: dict, submission_supplement: dict, edit: dict ) -> dict: - # maybe rename to revise_data? """ for actions that may have lengthy data, are we content to store the entirety of the data for each revision, or do we need some kind of @@ -306,6 +309,8 @@ def revise_field( new_record[self.DATE_CREATED_FIELD] = record_creation_date + self.validate_result(new_record) + return new_record @@ -344,5 +349,3 @@ def _limit_identifier(self): # from kobo.apps.organizations.constants import UsageType # return UsageType.ASR_SECONDS raise NotImplementedError() - - diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences__new/router.py index 0dedf0156a..0b16378aef 100644 --- a/kobo/apps/subsequences__new/router.py +++ b/kobo/apps/subsequences__new/router.py @@ -1,10 +1,10 @@ - from kobo.apps.subsequences.models import ( SubmissionExtras, # just bullshit for now ) from kpi.models import Asset from .actions import ACTION_IDS_TO_CLASSES +from .schemas import validate_submission_supplement class InvalidAction(Exception): @@ -30,10 +30,14 @@ class InvalidXPath(Exception): # - dispatch_incoming_data # - process_action_request # - run_action -def handle_incoming_data( +def handle_incoming_data(*args, **kwargs): + # TODO: remove this alias + return revise_supplemental_data(*args, **kwargs) + + +def revise_supplemental_data( asset: Asset, submission: dict, incoming_data: dict ) -> dict: - # it'd be better if this returned the same thing as retrieve_supplemental_data schema_version = incoming_data.pop('_version') if schema_version != '20250820': # TODO: migrate from old per-submission schema @@ -85,6 +89,7 @@ def handle_incoming_data( ] = action.retrieve_data(action_supplemental_data) supplemental_data['_version'] = schema_version + validate_submission_supplement(asset, supplemental_data) SubmissionExtras.objects.filter( asset=asset, submission_uuid=submission_uuid ).update(content=supplemental_data) @@ -126,6 +131,11 @@ def retrieve_supplemental_data(asset: Asset, submission_uuid: str) -> dict: # Allow this for now, but maybe forbid later and also forbid # removing things from the asset-level action configuration? # Actions could be disabled or hidden instead of being removed + + # FIXME: divergence between the asset-level configuration and + # submission-level supplemental data is going to cause schema + # validation failures! We defo need to forbid removal of actions + # and instead provide a way to mark them as deleted continue for action_id, action_data in data_for_this_question.items(): diff --git a/kpi/models/asset.py b/kpi/models/asset.py index 5a3e619e1a..8731658891 100644 --- a/kpi/models/asset.py +++ b/kpi/models/asset.py @@ -18,7 +18,6 @@ from taggit.utils import require_instance_manager from kobo.apps.reports.constants import DEFAULT_REPORTS_KEY, SPECIFIC_REPORTS_KEY -from kobo.apps.subsequences__new.actions import ADVANCED_FEATURES_PARAMS_SCHEMA from kobo.apps.subsequences.utils import ( advanced_feature_instances, advanced_submission_jsonschema, @@ -30,6 +29,7 @@ qpath_to_xpath, ) from kobo.apps.subsequences.utils.parse_known_cols import parse_known_cols +from kobo.apps.subsequences__new.schemas import ACTION_PARAMS_SCHEMA from kpi.constants import ( ASSET_TYPE_BLOCK, ASSET_TYPE_COLLECTION, @@ -1304,7 +1304,7 @@ def validate_advanced_features(self): jsonschema.validate( instance=self.advanced_features, - schema=ADVANCED_FEATURES_PARAMS_SCHEMA, + schema=ACTION_PARAMS_SCHEMA, ) @property From 0eef8596c8d5424b1b595e69e4d145201b299813 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 17:10:06 -0400 Subject: [PATCH 045/138] =?UTF-8?q?Add=20forgotten=20file=E2=80=A6again?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kobo/apps/subsequences__new/schemas.py | 57 ++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 kobo/apps/subsequences__new/schemas.py diff --git a/kobo/apps/subsequences__new/schemas.py b/kobo/apps/subsequences__new/schemas.py new file mode 100644 index 0000000000..60f5fd924f --- /dev/null +++ b/kobo/apps/subsequences__new/schemas.py @@ -0,0 +1,57 @@ +import jsonschema + +from .actions import ACTION_IDS_TO_CLASSES, ACTIONS + +# not the full complexity of XPath, but a slash-delimited path of valid XML tag +# names to convey group hierarchy +QUESTION_XPATH_PATTERN = '^([A-Za-z_][A-Za-z0-9_-]*)(/[A-Za-z_][A-Za-z0-9_-]*)*$' + +ACTION_PARAMS_SCHEMA = { + 'additionalProperties': False, + 'properties': { + '_actionConfigs': { + 'additionalProperties': False, + 'patternProperties': { + QUESTION_XPATH_PATTERN: { + 'additionalProperties': False, + 'properties': {a.ID: a.params_schema for a in ACTIONS}, + 'type': 'object', + } + }, + 'type': 'object', + }, + '_version': {'const': '20250820'}, + }, + 'type': 'object', +} + + +def validate_submission_supplement(asset: 'kpi.models.Asset', supplement: dict): + jsonschema.validate(get_submission_supplement_schema(asset), supplement) + + +def get_submission_supplement_schema(asset: 'kpi.models.Asset') -> dict: + if asset.advanced_features.get('_version') != '20250820': + # TODO: migrate from old per-asset schema + raise NotImplementedError + + submission_supplement_schema = { + 'additionalProperties': False, + 'properties': {'_version': {'const': '20250820'}}, + 'type': 'object', + } + + for ( + question_xpath, + action_configs_for_this_question, + ) in asset.advanced_features['_actionConfigs'].items(): + for ( + action_id, + action_params, + ) in action_configs_for_this_question.items(): + action = ACTION_IDS_TO_CLASSES[action_id](question_xpath, action_params) + submission_supplement_schema['properties'].setdefault(question_xpath, {})[ + action_id + ] = action.result_schema + + return submission_supplement_schema From 7e601f741d242a3c9f8c02a030535238616a945e Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 22 Aug 2025 17:28:36 -0400 Subject: [PATCH 046/138] fix supplement unit tests --- .../subsequences__new/tests/api/v2/base.py | 6 ++- .../tests/api/v2/test_permissions.py | 44 ++++++++++++++++--- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/kobo/apps/subsequences__new/tests/api/v2/base.py b/kobo/apps/subsequences__new/tests/api/v2/base.py index b7ca35632e..9ad394e09c 100644 --- a/kobo/apps/subsequences__new/tests/api/v2/base.py +++ b/kobo/apps/subsequences__new/tests/api/v2/base.py @@ -83,4 +83,8 @@ def setUp(self): def set_asset_advanced_features(self, features): self.asset.advanced_features = features - self.asset.save(update_fields=['advanced_features']) + self.asset.save( + adjust_content=False, + create_version=False, + update_fields=['advanced_features'], + ) diff --git a/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py b/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py index 768bb223a8..9b63fbac60 100644 --- a/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py +++ b/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py @@ -1,5 +1,9 @@ +from datetime import datetime +from zoneinfo import ZoneInfo + import pytest from ddt import data, ddt, unpack +from freezegun import freeze_time from rest_framework import status from kobo.apps.kobo_auth.shortcuts import User @@ -37,7 +41,7 @@ class SubsequencePermissionTestCase(SubsequenceBaseTestCase): ( 'adminuser', False, - status.HTTP_404_NOT_FOUND, + status.HTTP_200_OK, ), # admin user with view permissions ( @@ -97,7 +101,7 @@ def test_can_read(self, username, shared, status_code): ( 'adminuser', False, - status.HTTP_404_NOT_FOUND, + status.HTTP_200_OK, ), # admin user with view permissions ( @@ -130,13 +134,41 @@ def test_can_write(self, username, shared, status_code): user = User.objects.get(username=username) self.client.force_login(user) + # Activate advanced features for the project + self.set_asset_advanced_features({ + '_version': '20250820', + '_actionConfigs': { + 'q1': { + 'manual_transcription': [ + {'language': 'es'}, + ] + } + } + }) + if shared: self.asset.assign_perm(user, PERM_CHANGE_SUBMISSIONS) - response = self.client.patch(self.supplement_details_url, data=payload) + frozen_datetime_now = datetime(2024, 4, 8, 15, 27, 0, tzinfo=ZoneInfo('UTC')) + with freeze_time(frozen_datetime_now): + response = self.client.patch( + self.supplement_details_url, data=payload, format='json' + ) + assert response.status_code == status_code if status_code == status.HTTP_200_OK: - assert response.data == {} + expected = { + '_version': '20250820', + 'q1': { + 'manual_transcription': { + '_dateCreated': '2024-04-08T15:27:00Z', + '_dateModified': '2024-04-08T15:27:00Z', + 'language': 'es', + 'transcript': 'buenas noches', + }, + }, + } + assert response.data == expected class SubsequencePartialPermissionTestCase(SubsequenceBaseTestCase): @@ -164,7 +196,9 @@ def test_cannot_post_data(self): } }, } - response = self.client.post(self.supplement_details_url, data=payload) + response = self.client.post( + self.supplement_details_url, data=payload, format='json' + ) assert response.status_code == status.HTTP_404_NOT_FOUND def test_cannot_read_data(self): From 8561cda327b794b79ddbce99117c124b2b3d9b0e Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 22 Aug 2025 17:28:52 -0400 Subject: [PATCH 047/138] Draft documentation --- kpi/docs/api/v2/data/supplement_retrieve.md | 1 + kpi/docs/api/v2/data/supplement_update.md | 1 + kpi/views/v2/data.py | 132 +++++++++++++------- 3 files changed, 88 insertions(+), 46 deletions(-) create mode 100644 kpi/docs/api/v2/data/supplement_retrieve.md create mode 100644 kpi/docs/api/v2/data/supplement_update.md diff --git a/kpi/docs/api/v2/data/supplement_retrieve.md b/kpi/docs/api/v2/data/supplement_retrieve.md new file mode 100644 index 0000000000..c1ce431843 --- /dev/null +++ b/kpi/docs/api/v2/data/supplement_retrieve.md @@ -0,0 +1 @@ +## TBC Supplement retrieve diff --git a/kpi/docs/api/v2/data/supplement_update.md b/kpi/docs/api/v2/data/supplement_update.md new file mode 100644 index 0000000000..cd370eb445 --- /dev/null +++ b/kpi/docs/api/v2/data/supplement_update.md @@ -0,0 +1 @@ +## TBC Supplement update diff --git a/kpi/views/v2/data.py b/kpi/views/v2/data.py index de2cbb5704..8880fcfb42 100644 --- a/kpi/views/v2/data.py +++ b/kpi/views/v2/data.py @@ -85,7 +85,7 @@ type=str, location=OpenApiParameter.PATH, required=True, - description='UID of the parent asset', + description='Asset identifier', ), ], ) @@ -98,11 +98,11 @@ ), parameters=[ OpenApiParameter( - name='id', - type=int, + name='submission_id_or_root_uuid', + type=str, location=OpenApiParameter.PATH, required=True, - description='ID of the data', + description='Submission identifier', ), ], ), @@ -117,11 +117,11 @@ ), parameters=[ OpenApiParameter( - name='id', - type=int, + name='submission_id_or_root_uuid', + type=str, location=OpenApiParameter.PATH, required=True, - description='ID of the data', + description='Submission identifier', ), ], ), @@ -146,11 +146,11 @@ ), parameters=[ OpenApiParameter( - name='id', - type=int, + name='submission_id_or_root_uuid', + type=str, location=OpenApiParameter.PATH, required=True, - description='ID of the data', + description='Submission identifier', ), ], ), @@ -164,20 +164,22 @@ class DataViewSet( Available actions: - bulk → DELETE /api/v2/assets/ - bulk → PATCH /api/v2/asset_usage/ - - delete → DELETE /api/v2/asset_usage/{parent_lookup_asset}/data/{id} - - duplicate → POST /api/v2/asset_usage/{parent_lookup_asset}/data/duplicate # noqa - - list → GET /api/v2/asset_usage/{parent_lookup_asset}/data - - retrieve → GET /api/v2/asset_usage/{parent_lookup_asset}/data/{id} - - validation_status → GET /api/v2/asset_usage/{parent_lookup_asset}/data/{id}/validation_status # noqa - - validation_status → DELETE /api/v2/asset_usage/{parent_lookup_asset}/data/{id}/validation_status # noqa - - validation_status → PATCH /api/v2/asset_usage/{parent_lookup_asset}/data/{id}/validation_status # noqa - - validation_statuses → DELETE /api/v2/asset_usage/{parent_lookup_asset}/data/{id}/validation_statuses # noqa - - validation_statuses → PATCH /api/v2/asset_usage/{parent_lookup_asset}/data/{id}/validation_statuses # noqa - - enketo_edit → GET /api/v2/assets/{parent_lookup_asset}/data/{id}/edit/ - - enketo_edit → GET /api/v2/assets/{parent_lookup_asset}/data/{id}/enketo/edit/ - - enketo_edit → GET /api/v2/assets/{parent_lookup_asset}/data/{id}/enketo/redirect/edit/ - - enketo_view → GET /api/v2/assets/{parent_lookup_asset}/data/{id}/enketo/view/ - - enketo_view → GET /api/v2/assets/{parent_lookup_asset}/data/{id}/enketo/redirect/view/ + - delete → DELETE /api/v2/asset_usage/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/ + - duplicate → POST /api/v2/asset_usage/{parent_lookup_asset}/data/duplicate/ # noqa + - list → GET /api/v2/asset_usage/{parent_lookup_asset}/data/ + - retrieve → GET /api/v2/asset_usage/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/ + - validation_status → GET /api/v2/asset_usage/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/validation_status/ # noqa + - validation_status → DELETE /api/v2/asset_usage/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/validation_status/ # noqa + - validation_status → PATCH /api/v2/asset_usage/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/validation_status/ # noqa + - validation_statuses → DELETE /api/v2/asset_usage/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/validation_statuses/ # noqa + - validation_statuses → PATCH /api/v2/asset_usage/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/validation_statuses/ # noqa + - enketo_edit → GET /api/v2/assets/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/edit/ + - enketo_edit → GET /api/v2/assets/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/enketo/edit/ + - enketo_edit → GET /api/v2/assets/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/enketo/redirect/edit/ + - enketo_view → GET /api/v2/assets/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/enketo/view/ + - enketo_view → GET /api/v2/assets/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/enketo/redirect/view/ + - supplement → GET /api/v2/assets/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/supplement/ + - supplement → PATCH /api/v2/assets/{parent_lookup_asset}/data/{submission_id_or_root_uuid}/supplement/ Documentation: - docs/api/v2/data/bulk_delete.md @@ -193,6 +195,8 @@ class DataViewSet( - docs/api/v2/data/validation_statuses_update.md - docs/api/v2/data/enketo_view.md - docs/api/v2/data/enketo_edit.md + - docs/api/v2/data/supplement_retrieve.md + - docs/api/v2/data/supplement_update.md """ parent_model = Asset @@ -264,7 +268,7 @@ def destroy(self, request, submission_id_or_root_uuid: int, *args, **kwargs): ) def duplicate(self, request, submission_id_or_root_uuid: int, *args, **kwargs): """ - Creates a duplicate of the submission with a given `pk` + Creates a duplicate of the submission with a given `submission_id_or_root_uuid` """ deployment = self._get_deployment() # Coerce to int because the back-end only finds matches with the same type @@ -306,11 +310,11 @@ def duplicate(self, request, submission_id_or_root_uuid: int, *args, **kwargs): ), parameters=[ OpenApiParameter( - name='id', - type=int, + name='submission_id_or_root_uuid', + type=str, location=OpenApiParameter.PATH, required=True, - description='ID of the data', + description='Submission identifier', ), ], ) @@ -342,11 +346,11 @@ def enketo_edit(self, request, submission_id_or_root_uuid: int, *args, **kwargs) ), parameters=[ OpenApiParameter( - name='id', - type=int, + name='submission_id_or_root_uuid', + type=str, location=OpenApiParameter.PATH, required=True, - description='ID of the data', + description='Submission `id` or `rootUuid`', ), ], ) @@ -465,9 +469,39 @@ def retrieve( submission = list(submissions)[0] return Response(submission) + @extend_schema( + methods=['GET'], + description=read_md('kpi', 'data/supplement_retrieve.md'), + request={'application/json': DataValidationStatusUpdatePayload}, + responses=open_api_200_ok_response(DataValidationStatusUpdateResponse), + parameters=[ + OpenApiParameter( + name='submission_id_or_root_uuid', + type=str, + location=OpenApiParameter.PATH, + required=True, + description='Submission identifier', + ), + ], + ) + @extend_schema( + methods=['PATCH'], + description=read_md('kpi', 'data/supplement_update.md'), + request={'application/json': DataValidationStatusUpdatePayload}, + responses=open_api_200_ok_response(DataValidationStatusUpdateResponse), + parameters=[ + OpenApiParameter( + name='submission_id_or_root_uuid', + type=str, + location=OpenApiParameter.PATH, + required=True, + description='Submission identifier', + ), + ], + ) @action( detail=True, - methods=['PATCH'], + methods=['GET', 'PATCH'], permission_classes=[AdvancedSubmissionPermission], ) def supplemental(self, request, submission_id_or_root_uuid: str, *args, **kwargs): @@ -486,24 +520,30 @@ def supplemental(self, request, submission_id_or_root_uuid: str, *args, **kwargs deployment = self._get_deployment() try: - submission = next(deployment.get_submissions( + submission = list(deployment.get_submissions( user=request.user, query={'meta/rootUuid': add_uuid_prefix(submission_root_uuid)} - )) - except StopIteration: + ))[0] + except IndexError: raise Http404 submission_root_uuid = submission[deployment.SUBMISSION_ROOT_UUID_XPATH] + if request.method == 'GET': + return Response(retrieve_supplemental_data(self.asset, submission_root_uuid)) + post_data = request.data + try: - handle_incoming_data(self.asset, submission, post_data) + supplemental_data = handle_incoming_data( + self.asset, submission, post_data + ) except InvalidAction: raise serializers.ValidationError({'detail': 'Invalid action'}) except InvalidXPath: raise serializers.ValidationError({'detail': 'Invalid question name'}) - return Response(retrieve_supplemental_data(self.asset, submission_root_uuid)) + return Response(supplemental_data) @extend_schema( methods=['PATCH'], @@ -517,11 +557,11 @@ def supplemental(self, request, submission_id_or_root_uuid: str, *args, **kwargs ), parameters=[ OpenApiParameter( - name='id', - type=int, + name='submission_id_or_root_uuid', + type=str, location=OpenApiParameter.PATH, required=True, - description='ID of the data', + description='Submission identifier', ), ], ) @@ -534,11 +574,11 @@ def supplemental(self, request, submission_id_or_root_uuid: str, *args, **kwargs ), parameters=[ OpenApiParameter( - name='id', - type=int, + name='submission_id_or_root_uuid', + type=str, location=OpenApiParameter.PATH, required=True, - description='ID of the data', + description='Submission identifier', ), ], ) @@ -554,11 +594,11 @@ def supplemental(self, request, submission_id_or_root_uuid: str, *args, **kwargs ), parameters=[ OpenApiParameter( - name='id', - type=int, + name='submission_id_or_root_uuid', + type=str, location=OpenApiParameter.PATH, required=True, - description='ID of the data', + description='Submission identifier', ), ], ) From 598b00049e13c242dfe959bd2e742b71a27f2b3a Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 17:36:58 -0400 Subject: [PATCH 048/138] Refactor 'routers' logic into new proxy model --- kobo/apps/subsequences__new/exceptions.py | 16 +++ kobo/apps/subsequences__new/models.py | 151 +++++++++++++++++--- kobo/apps/subsequences__new/router.py | 166 +--------------------- 3 files changed, 156 insertions(+), 177 deletions(-) create mode 100644 kobo/apps/subsequences__new/exceptions.py diff --git a/kobo/apps/subsequences__new/exceptions.py b/kobo/apps/subsequences__new/exceptions.py new file mode 100644 index 0000000000..d15edc89ee --- /dev/null +++ b/kobo/apps/subsequences__new/exceptions.py @@ -0,0 +1,16 @@ +class InvalidAction(Exception): + """ + The referenced action does not exist or was not configured for the given + question XPath at the asset level + """ + + pass + + +class InvalidXPath(Exception): + """ + The referenced question XPath was not configured for supplemental data at + the asset level + """ + + pass \ No newline at end of file diff --git a/kobo/apps/subsequences__new/models.py b/kobo/apps/subsequences__new/models.py index 551284d699..7d49e6c746 100644 --- a/kobo/apps/subsequences__new/models.py +++ b/kobo/apps/subsequences__new/models.py @@ -1,22 +1,141 @@ -# coding: utf-8 +from kobo.apps.subsequences.models import ( + SubmissionExtras, # just bullshit for now +) +from kpi.models import Asset +from .actions import ACTION_IDS_TO_CLASSES +from .exceptions import InvalidAction, InvalidXPath +from .schemas import validate_submission_supplement -from django.db import models +class SubmissionSupplement(SubmissionExtras): + class Meta(SubmissionExtras.Meta): + proxy = True + app_label = 'subsequences' -from kpi.models import Asset -from kpi.models.abstract_models import AbstractTimeStampedModel + def revise_data( + asset: Asset, submission: dict, incoming_data: dict + ) -> dict: + schema_version = incoming_data.pop('_version') + if schema_version != '20250820': + # TODO: migrate from old per-submission schema + raise NotImplementedError + + if asset.advanced_features['_version'] != schema_version: + # TODO: migrate from old per-asset schema + raise NotImplementedError + + submission_uuid = submission['meta/rootUuid'] # constant? + supplemental_data = SubmissionExtras.objects.get_or_create( + asset=asset, submission_uuid=submission_uuid + )[0].content # lock it? + + retrieved_supplemental_data = {} + + for question_xpath, data_for_this_question in incoming_data.items(): + try: + action_configs_for_this_question = asset.advanced_features[ + '_actionConfigs' + ][question_xpath] + except KeyError as e: + raise InvalidXPath from e + + for action_id, action_data in data_for_this_question.items(): + try: + action_class = ACTION_IDS_TO_CLASSES[action_id] + except KeyError as e: + raise InvalidAction from e + try: + action_params = action_configs_for_this_question[action_id] + except KeyError as e: + raise InvalidAction from e + + action = action_class(question_xpath, action_params) + action.check_limits(asset.owner) + question_supplemental_data = supplemental_data.setdefault( + question_xpath, {} + ) + action_supplemental_data = question_supplemental_data.setdefault( + action_id, {} + ) + action_supplemental_data = action.revise_field( + submission, action_supplemental_data, action_data + ) + question_supplemental_data[action_id] = action_supplemental_data + retrieved_supplemental_data.setdefault(question_xpath, {})[ + action_id + ] = action.retrieve_data(action_supplemental_data) + + supplemental_data['_version'] = schema_version + validate_submission_supplement(asset, supplemental_data) + SubmissionExtras.objects.filter( + asset=asset, submission_uuid=submission_uuid + ).update(content=supplemental_data) + + retrieved_supplemental_data['_version'] = schema_version + return retrieved_supplemental_data + + + def retrieve_data(asset: Asset, submission_uuid: str) -> dict: + try: + supplemental_data = SubmissionExtras.objects.get( + asset=asset, submission_uuid=submission_uuid + ).content + except SubmissionExtras.DoesNotExist: + return {} + + schema_version = supplemental_data.pop('_version') + if schema_version != '20250820': + # TODO: migrate from old per-submission schema + raise NotImplementedError + + if asset.advanced_features['_version'] != schema_version: + # TODO: migrate from old per-asset schema + raise NotImplementedError + + retrieved_supplemental_data = {} + + for question_xpath, data_for_this_question in supplemental_data.items(): + processed_data_for_this_question = ( + retrieved_supplemental_data.setdefault(question_xpath, {}) + ) + action_configs = asset.advanced_features['_actionConfigs'] + try: + action_configs_for_this_question = action_configs[question_xpath] + except KeyError: + # There's still supplemental data for this question at the + # submission level, but the question is no longer configured at the + # asset level. + # Allow this for now, but maybe forbid later and also forbid + # removing things from the asset-level action configuration? + # Actions could be disabled or hidden instead of being removed + # FIXME: divergence between the asset-level configuration and + # submission-level supplemental data is going to cause schema + # validation failures! We defo need to forbid removal of actions + # and instead provide a way to mark them as deleted + continue -class SubmissionExtras(AbstractTimeStampedModel): + for action_id, action_data in data_for_this_question.items(): + try: + action_class = ACTION_IDS_TO_CLASSES[action_id] + except KeyError: + # An action class present in the submission data no longer + # exists in the application code + # TODO: log an error + continue + try: + action_params = action_configs_for_this_question[action_id] + except KeyError: + # An action class present in the submission data is no longer + # configured at the asset level for this question + # Allow this for now, but maybe forbid later and also forbid + # removing things from the asset-level action configuration? + # Actions could be disabled or hidden instead of being removed + continue - submission_uuid = models.CharField(max_length=249) - content = models.JSONField(default=dict) - asset = models.ForeignKey( - Asset, - related_name='submission_extras', - on_delete=models.CASCADE, - ) + action = action_class(question_xpath, action_params) + processed_data_for_this_question[action_id] = action.retrieve_data( + action_data + ) - class Meta: - # ideally `submission_uuid` is universally unique, but its uniqueness - # per-asset is most important - unique_together = (('asset', 'submission_uuid'),) + retrieved_supplemental_data['_version'] = schema_version + return retrieved_supplemental_data \ No newline at end of file diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences__new/router.py index 0b16378aef..f58222f78a 100644 --- a/kobo/apps/subsequences__new/router.py +++ b/kobo/apps/subsequences__new/router.py @@ -1,165 +1,9 @@ -from kobo.apps.subsequences.models import ( - SubmissionExtras, # just bullshit for now -) -from kpi.models import Asset +from .models import SubmissionSupplement -from .actions import ACTION_IDS_TO_CLASSES -from .schemas import validate_submission_supplement - - -class InvalidAction(Exception): - """ - The referenced action does not exist or was not configured for the given - question XPath at the asset level - """ - - pass - - -class InvalidXPath(Exception): - """ - The referenced question XPath was not configured for supplemental data at - the asset level - """ - - pass - - -# ChatGPT suggestions: -# - dispatch_action_payload -# - dispatch_incoming_data -# - process_action_request -# - run_action def handle_incoming_data(*args, **kwargs): # TODO: remove this alias - return revise_supplemental_data(*args, **kwargs) - - -def revise_supplemental_data( - asset: Asset, submission: dict, incoming_data: dict -) -> dict: - schema_version = incoming_data.pop('_version') - if schema_version != '20250820': - # TODO: migrate from old per-submission schema - raise NotImplementedError - - if asset.advanced_features['_version'] != schema_version: - # TODO: migrate from old per-asset schema - raise NotImplementedError - - submission_uuid = submission['meta/rootUuid'] # constant? - supplemental_data = SubmissionExtras.objects.get_or_create( - asset=asset, submission_uuid=submission_uuid - )[0].content # lock it? - - retrieved_supplemental_data = {} - - for question_xpath, data_for_this_question in incoming_data.items(): - try: - action_configs_for_this_question = asset.advanced_features[ - '_actionConfigs' - ][question_xpath] - except KeyError as e: - raise InvalidXPath from e + return SubmissionSupplement.revise_data(*args, **kwargs) - for action_id, action_data in data_for_this_question.items(): - try: - action_class = ACTION_IDS_TO_CLASSES[action_id] - except KeyError as e: - raise InvalidAction from e - try: - action_params = action_configs_for_this_question[action_id] - except KeyError as e: - raise InvalidAction from e - - action = action_class(question_xpath, action_params) - action.check_limits(asset.owner) - question_supplemental_data = supplemental_data.setdefault( - question_xpath, {} - ) - action_supplemental_data = question_supplemental_data.setdefault( - action_id, {} - ) - action_supplemental_data = action.revise_field( - submission, action_supplemental_data, action_data - ) - question_supplemental_data[action_id] = action_supplemental_data - retrieved_supplemental_data.setdefault(question_xpath, {})[ - action_id - ] = action.retrieve_data(action_supplemental_data) - - supplemental_data['_version'] = schema_version - validate_submission_supplement(asset, supplemental_data) - SubmissionExtras.objects.filter( - asset=asset, submission_uuid=submission_uuid - ).update(content=supplemental_data) - - retrieved_supplemental_data['_version'] = schema_version - return retrieved_supplemental_data - - -def retrieve_supplemental_data(asset: Asset, submission_uuid: str) -> dict: - try: - supplemental_data = SubmissionExtras.objects.get( - asset=asset, submission_uuid=submission_uuid - ).content - except SubmissionExtras.DoesNotExist: - return {} - - schema_version = supplemental_data.pop('_version') - if schema_version != '20250820': - # TODO: migrate from old per-submission schema - raise NotImplementedError - - if asset.advanced_features['_version'] != schema_version: - # TODO: migrate from old per-asset schema - raise NotImplementedError - - retrieved_supplemental_data = {} - - for question_xpath, data_for_this_question in supplemental_data.items(): - processed_data_for_this_question = ( - retrieved_supplemental_data.setdefault(question_xpath, {}) - ) - action_configs = asset.advanced_features['_actionConfigs'] - try: - action_configs_for_this_question = action_configs[question_xpath] - except KeyError: - # There's still supplemental data for this question at the - # submission level, but the question is no longer configured at the - # asset level. - # Allow this for now, but maybe forbid later and also forbid - # removing things from the asset-level action configuration? - # Actions could be disabled or hidden instead of being removed - - # FIXME: divergence between the asset-level configuration and - # submission-level supplemental data is going to cause schema - # validation failures! We defo need to forbid removal of actions - # and instead provide a way to mark them as deleted - continue - - for action_id, action_data in data_for_this_question.items(): - try: - action_class = ACTION_IDS_TO_CLASSES[action_id] - except KeyError: - # An action class present in the submission data no longer - # exists in the application code - # TODO: log an error - continue - try: - action_params = action_configs_for_this_question[action_id] - except KeyError: - # An action class present in the submission data is no longer - # configured at the asset level for this question - # Allow this for now, but maybe forbid later and also forbid - # removing things from the asset-level action configuration? - # Actions could be disabled or hidden instead of being removed - continue - - action = action_class(question_xpath, action_params) - processed_data_for_this_question[action_id] = action.retrieve_data( - action_data - ) - - retrieved_supplemental_data['_version'] = schema_version - return retrieved_supplemental_data +def retrieve_supplemental_data(*args, **kwargs): + # TODO: remove this alias + return SubmissionSupplement.retrieve_data(*args, **kwargs) \ No newline at end of file From fe9811d021f33fc5afb636770c7db2df179b8229 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 17:41:22 -0400 Subject: [PATCH 049/138] Update exceptions import --- kpi/views/v2/data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kpi/views/v2/data.py b/kpi/views/v2/data.py index 8880fcfb42..415368c454 100644 --- a/kpi/views/v2/data.py +++ b/kpi/views/v2/data.py @@ -513,6 +513,8 @@ def supplemental(self, request, submission_id_or_root_uuid: str, *args, **kwargs from kobo.apps.subsequences__new.router import ( handle_incoming_data, retrieve_supplemental_data, + ) + from kobo.apps.subsequences__new.exceptions import ( InvalidAction, InvalidXPath, ) From 178589b3f607f087dc98bc288ac8c310f79592fc Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 18:14:55 -0400 Subject: [PATCH 050/138] Warn about deprecation; clean a few things --- kobo/apps/subsequences__new/actions/base.py | 2 ++ kobo/apps/subsequences__new/router.py | 3 +++ .../tests/test_manual_transcription.py | 20 +++++++++---------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/base.py b/kobo/apps/subsequences__new/actions/base.py index 24522f1e6f..159b9c6a42 100644 --- a/kobo/apps/subsequences__new/actions/base.py +++ b/kobo/apps/subsequences__new/actions/base.py @@ -280,6 +280,8 @@ def retrieve_data(self, action_data: dict) -> dict: def revise_field(self, *args, **kwargs): # TODO: remove this alias + import warnings + warnings.warn('Oh no, this method is going away!', DeprecationWarning) return self.revise_data(*args, **kwargs) def revise_data( diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences__new/router.py index f58222f78a..77b23dcec7 100644 --- a/kobo/apps/subsequences__new/router.py +++ b/kobo/apps/subsequences__new/router.py @@ -1,5 +1,8 @@ from .models import SubmissionSupplement +import warnings +warnings.warn('Oh no, this file is going away!', DeprecationWarning) + def handle_incoming_data(*args, **kwargs): # TODO: remove this alias return SubmissionSupplement.revise_data(*args, **kwargs) diff --git a/kobo/apps/subsequences__new/tests/test_manual_transcription.py b/kobo/apps/subsequences__new/tests/test_manual_transcription.py index b68e358c15..a51b48202c 100644 --- a/kobo/apps/subsequences__new/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences__new/tests/test_manual_transcription.py @@ -1,10 +1,10 @@ -import copy import dateutil import jsonschema import pytest from ..actions.manual_transcription import ManualTranscriptionAction +EMPTY_SUBMISSION = {} def test_valid_params_pass_validation(): params = [{'language': 'fr'}, {'language': 'es'}] @@ -45,7 +45,7 @@ def test_valid_result_passes_validation(): fifth = {'language': 'en', 'transcript': 'fifth'} mock_sup_det = {} for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_field({}, mock_sup_det, data) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) action.validate_result(mock_sup_det) def test_invalid_result_fails_validation(): @@ -60,7 +60,7 @@ def test_invalid_result_fails_validation(): fifth = {'language': 'en', 'transcript': 'fifth'} mock_sup_det = {} for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_field({}, mock_sup_det, data) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) # erroneously add '_dateModified' onto a revision mock_sup_det['_revisions'][0]['_dateModified'] = mock_sup_det['_revisions'][0]['_dateCreated'] @@ -81,7 +81,7 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): first = {'language': 'en', 'transcript': 'No idea'} second = {'language': 'fr', 'transcript': "Pas d'idée"} - mock_sup_det = action.revise_field({}, {}, first) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, {}, first) assert mock_sup_det['language'] == 'en' assert mock_sup_det['transcript'] == 'No idea' @@ -89,7 +89,7 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): assert '_revisions' not in mock_sup_det first_time = mock_sup_det['_dateCreated'] - mock_sup_det = action.revise_field({}, mock_sup_det, second) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) assert len(mock_sup_det['_revisions']) == 1 # the revision should encompass the first transcript @@ -122,10 +122,10 @@ def test_setting_transcript_to_empty_string(): first = {'language': 'fr', 'transcript': "Pas d'idée"} second = {'language': 'fr', 'transcript': ''} - mock_sup_det = action.revise_field({}, {}, first) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, {}, first) assert mock_sup_det['transcript'] == "Pas d'idée" - mock_sup_det = action.revise_field({}, mock_sup_det, second) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) assert mock_sup_det['transcript'] == '' assert mock_sup_det['_revisions'][0]['transcript'] == "Pas d'idée" @@ -137,10 +137,10 @@ def test_setting_transcript_to_empty_object(): first = {'language': 'fr', 'transcript': "Pas d'idée"} second = {} - mock_sup_det = action.revise_field({}, {}, first) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, {}, first) assert mock_sup_det['transcript'] == "Pas d'idée" - mock_sup_det = action.revise_field({}, mock_sup_det, second) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) assert 'transcript' not in mock_sup_det assert mock_sup_det['_revisions'][0]['transcript'] == "Pas d'idée" @@ -155,7 +155,7 @@ def test_latest_revision_is_first(): mock_sup_det = {} for data in first, second, third: - mock_sup_det = action.revise_field({}, mock_sup_det, data) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) assert mock_sup_det['transcript'] == 'trois' assert mock_sup_det['_revisions'][0]['transcript'] == 'deux' From 06d72134078001381484855155ca32ad5b4bfb50 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 18:16:25 -0400 Subject: [PATCH 051/138] Revert mistakenly committed hack --- kobo/apps/long_running_migrations/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kobo/apps/long_running_migrations/app.py b/kobo/apps/long_running_migrations/app.py index ebdf6e48aa..859374a92d 100644 --- a/kobo/apps/long_running_migrations/app.py +++ b/kobo/apps/long_running_migrations/app.py @@ -99,4 +99,4 @@ def check_must_complete_long_running_migrations(app_configs, **kwargs): ] -#register(check_must_complete_long_running_migrations, Tags.database) +register(check_must_complete_long_running_migrations, Tags.database) From 5d6517557e56733e687fb6545be9973d2c1a68aa Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 22 Aug 2025 19:10:01 -0400 Subject: [PATCH 052/138] Draft data supplement endpoint documentation --- kobo/apps/long_running_migrations/app.py | 2 +- .../subsequences__new/tests/api/v2/base.py | 2 +- kpi/schema_extensions/v2/data/extensions.py | 47 ++++++++++++++++++- kpi/schema_extensions/v2/data/fields.py | 4 ++ kpi/schema_extensions/v2/data/serializers.py | 16 +++++++ kpi/views/v2/data.py | 12 +++-- 6 files changed, 75 insertions(+), 8 deletions(-) diff --git a/kobo/apps/long_running_migrations/app.py b/kobo/apps/long_running_migrations/app.py index ebdf6e48aa..859374a92d 100644 --- a/kobo/apps/long_running_migrations/app.py +++ b/kobo/apps/long_running_migrations/app.py @@ -99,4 +99,4 @@ def check_must_complete_long_running_migrations(app_configs, **kwargs): ] -#register(check_must_complete_long_running_migrations, Tags.database) +register(check_must_complete_long_running_migrations, Tags.database) diff --git a/kobo/apps/subsequences__new/tests/api/v2/base.py b/kobo/apps/subsequences__new/tests/api/v2/base.py index 9ad394e09c..6dedff1fe5 100644 --- a/kobo/apps/subsequences__new/tests/api/v2/base.py +++ b/kobo/apps/subsequences__new/tests/api/v2/base.py @@ -77,7 +77,7 @@ def setUp(self): self.asset.deployment.mock_submissions([submission_data]) self.client.force_login(user) self.supplement_details_url = reverse( - self._get_endpoint('submission-supplemental'), + self._get_endpoint('submission-supplement'), args=[self.asset.uid, self.submission_uuid] ) diff --git a/kpi/schema_extensions/v2/data/extensions.py b/kpi/schema_extensions/v2/data/extensions.py index 847829a242..005a2ee3c5 100644 --- a/kpi/schema_extensions/v2/data/extensions.py +++ b/kpi/schema_extensions/v2/data/extensions.py @@ -1,4 +1,7 @@ -from drf_spectacular.extensions import OpenApiSerializerFieldExtension +from drf_spectacular.extensions import ( + OpenApiSerializerExtension, + OpenApiSerializerFieldExtension, +) from drf_spectacular.plumbing import ( build_array_type, build_basic_type, @@ -91,6 +94,48 @@ def map_serializer_field(self, auto_schema, direction): ) +class DataSupplementPayloadExtension(OpenApiSerializerExtension): + target_class = 'kpi.schema_extensions.v2.data.serializers.DataSupplementPayload' + + def map_serializer(self, auto_schema, direction): + return build_object_type( + properties={ + '_version': { + 'type': 'string', + 'example': '20250812', + }, + 'question_name_xpath': build_object_type( + additionalProperties=False, + properties={ + 'manual_transcription': build_object_type( + additionalProperties=False, + properties={ + 'language': GENERIC_STRING_SCHEMA, + 'transcript': GENERIC_STRING_SCHEMA, + }, + required=['language', 'transcript'], + ), + 'manual_translation': build_array_type( + schema=build_object_type( + additionalProperties=False, + properties={ + 'language': GENERIC_STRING_SCHEMA, + 'translation': GENERIC_STRING_SCHEMA, + }, + required=['language', 'translation'], + ), + min_length=1, + ) + }, + anyOf=[ + {'required': ['manual_transcription']}, + {'required': ['manual_translation']}, + ], + ), + } + ) + + class DataValidationPayloadFieldExtension(OpenApiSerializerFieldExtension): target_class = 'kpi.schema_extensions.v2.data.fields.DataValidationPayloadField' diff --git a/kpi/schema_extensions/v2/data/fields.py b/kpi/schema_extensions/v2/data/fields.py index d98b73219b..e4d708e06b 100644 --- a/kpi/schema_extensions/v2/data/fields.py +++ b/kpi/schema_extensions/v2/data/fields.py @@ -23,6 +23,10 @@ class DataBulkUpdateResultField(serializers.ListField): pass +class DataSupplementPayloadField(serializers.JSONField): + pass + + class DataValidationPayloadField(serializers.JSONField): pass diff --git a/kpi/schema_extensions/v2/data/serializers.py b/kpi/schema_extensions/v2/data/serializers.py index 0351774458..26e6e99989 100644 --- a/kpi/schema_extensions/v2/data/serializers.py +++ b/kpi/schema_extensions/v2/data/serializers.py @@ -6,6 +6,7 @@ DataBulkDeleteField, DataBulkUpdatePayloadField, DataBulkUpdateResultField, + DataSupplementPayloadField, DataValidationPayloadField, EmptyListField, EmptyObjectField, @@ -69,6 +70,21 @@ }, ) +DataSupplementPayload = inline_serializer_class( + name='DataSupplementPayload', + fields={ + '_version': serializers.CharField(), + 'question_name_xpath': serializers.JSONField(), + } +) + + +DataSupplementResponse = inline_serializer_class( + name='DataSupplementResponse', + fields={} +) + + DataValidationStatusUpdatePayload = inline_serializer_class( name='DataValidationStatusUpdatePayload', fields={ diff --git a/kpi/views/v2/data.py b/kpi/views/v2/data.py index 8880fcfb42..6d699b30c8 100644 --- a/kpi/views/v2/data.py +++ b/kpi/views/v2/data.py @@ -56,6 +56,8 @@ DataBulkUpdateResponse, DataResponse, DataStatusesUpdate, + DataSupplementPayload, + DataSupplementResponse, DataValidationStatusesUpdatePayload, DataValidationStatusUpdatePayload, DataValidationStatusUpdateResponse, @@ -472,8 +474,7 @@ def retrieve( @extend_schema( methods=['GET'], description=read_md('kpi', 'data/supplement_retrieve.md'), - request={'application/json': DataValidationStatusUpdatePayload}, - responses=open_api_200_ok_response(DataValidationStatusUpdateResponse), + responses=open_api_200_ok_response(DataSupplementResponse), # TODO CHANGEME parameters=[ OpenApiParameter( name='submission_id_or_root_uuid', @@ -487,8 +488,8 @@ def retrieve( @extend_schema( methods=['PATCH'], description=read_md('kpi', 'data/supplement_update.md'), - request={'application/json': DataValidationStatusUpdatePayload}, - responses=open_api_200_ok_response(DataValidationStatusUpdateResponse), + request={'application/json': DataSupplementPayload}, + responses=open_api_200_ok_response(DataSupplementResponse), parameters=[ OpenApiParameter( name='submission_id_or_root_uuid', @@ -502,9 +503,10 @@ def retrieve( @action( detail=True, methods=['GET', 'PATCH'], + renderer_classes=[renderers.JSONRenderer], permission_classes=[AdvancedSubmissionPermission], ) - def supplemental(self, request, submission_id_or_root_uuid: str, *args, **kwargs): + def supplement(self, request, submission_id_or_root_uuid: str, *args, **kwargs): # make it clear, a root uuid is expected here submission_root_uuid = submission_id_or_root_uuid From 54daa42037046719f2c6ee2cda543c62c89fe8c4 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 22 Aug 2025 21:45:53 -0400 Subject: [PATCH 053/138] add drf-spectular schema and documentation --- kpi/schema_extensions/v2/data/extensions.py | 111 ++++++++++++++++++- kpi/schema_extensions/v2/data/serializers.py | 5 +- 2 files changed, 112 insertions(+), 4 deletions(-) diff --git a/kpi/schema_extensions/v2/data/extensions.py b/kpi/schema_extensions/v2/data/extensions.py index 005a2ee3c5..728c9f333a 100644 --- a/kpi/schema_extensions/v2/data/extensions.py +++ b/kpi/schema_extensions/v2/data/extensions.py @@ -111,9 +111,9 @@ def map_serializer(self, auto_schema, direction): additionalProperties=False, properties={ 'language': GENERIC_STRING_SCHEMA, - 'transcript': GENERIC_STRING_SCHEMA, + 'value': GENERIC_STRING_SCHEMA, }, - required=['language', 'transcript'], + required=['language', 'value'], ), 'manual_translation': build_array_type( schema=build_object_type( @@ -122,7 +122,7 @@ def map_serializer(self, auto_schema, direction): 'language': GENERIC_STRING_SCHEMA, 'translation': GENERIC_STRING_SCHEMA, }, - required=['language', 'translation'], + required=['language', 'value'], ), min_length=1, ) @@ -136,6 +136,111 @@ def map_serializer(self, auto_schema, direction): ) +class DataSupplementResponseExtension(OpenApiSerializerExtension): + target_class = 'kpi.schema_extensions.v2.data.serializers.DataSupplementResponse' + + def map_serializer(self, auto_schema, direction): + # Reusable building blocks to reduce redundancy + LANG_STR = GENERIC_STRING_SCHEMA + VALUE_STR = GENERIC_STRING_SCHEMA + DATETIME = build_basic_type(OpenApiTypes.DATETIME) + + # Constraint helper: "language" and "value" must be both present or both absent + PAIR_LANG_VALUE_BOTH_OR_NONE = { + 'anyOf': [ + {'required': ['language', 'value']}, # both present + { + 'not': { # forbid the cases where only one is present + 'anyOf': [ + {'required': ['language']}, + {'required': ['value']}, + ] + } + }, + ] + } + + # Shared properties for objects that carry a language/value pair + VALUE_PROPS = { + 'language': LANG_STR, + 'value': VALUE_STR, + } + + # Generic revision item: + # - requires _dateCreated; + # - language/value are coupled (both-or-none) + REVISION_ITEM = build_object_type( + additionalProperties=False, + properties={ + **VALUE_PROPS, + '_dateCreated': DATETIME, # Always required for a revision entry + }, + required=['_dateCreated'], + **PAIR_LANG_VALUE_BOTH_OR_NONE, + ) + + # Manual transcription object: + # - _dateCreated and _dateModified are always required + # - language/value: both-or-none + # - "revisions" is an array of REVISION_ITEMs + MANUAL_TRANSCRIPTION = build_object_type( + additionalProperties=False, + properties={ + **VALUE_PROPS, # Coupled via PAIR_LANG_VALUE_BOTH_OR_NONE + '_dateCreated': DATETIME, # Always required + '_dateModified': DATETIME, # Always required + 'revisions': build_array_type( + schema=REVISION_ITEM, + ), + }, + required=['_dateCreated', '_dateModified'], + **PAIR_LANG_VALUE_BOTH_OR_NONE, + ) + + # Each item in manual_translation: + # - requires _dateCreated + # - language/value: both-or-none + # - has a "_revisions" array with the same REVISION_ITEM structure + MANUAL_TRANSLATION_ITEM = build_object_type( + additionalProperties=False, + properties={ + **VALUE_PROPS, # Coupled via PAIR_LANG_VALUE_BOTH_OR_NONE + '_dateCreated': DATETIME, # Always required + '_revisions': build_array_type( + schema=REVISION_ITEM, + ), + }, + required=['_dateCreated'], + **PAIR_LANG_VALUE_BOTH_OR_NONE, + ) + + MANUAL_TRANSLATION = build_array_type( + schema=MANUAL_TRANSLATION_ITEM, + min_length=1, + ) + + return build_object_type( + properties={ + '_version': { + 'type': 'string', + 'example': '20250812', + }, + 'question_name_xpath': build_object_type( + additionalProperties=False, + properties={ + 'manual_transcription': MANUAL_TRANSCRIPTION, + 'manual_translation': MANUAL_TRANSLATION, + }, + # At least one of "manual_transcription" or "manual_translation" must be present + anyOf=[ + {'required': ['manual_transcription']}, + {'required': ['manual_translation']}, + ], + ), + } + ) + + class DataValidationPayloadFieldExtension(OpenApiSerializerFieldExtension): target_class = 'kpi.schema_extensions.v2.data.fields.DataValidationPayloadField' diff --git a/kpi/schema_extensions/v2/data/serializers.py b/kpi/schema_extensions/v2/data/serializers.py index 26e6e99989..ccd1a72a45 100644 --- a/kpi/schema_extensions/v2/data/serializers.py +++ b/kpi/schema_extensions/v2/data/serializers.py @@ -81,7 +81,10 @@ DataSupplementResponse = inline_serializer_class( name='DataSupplementResponse', - fields={} + fields={ + '_version': serializers.CharField(), + 'question_name_xpath': serializers.JSONField(), + } ) From 976e7b52a8b4fb8e85fc2d51c609c64f2cd476be Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 22 Aug 2025 21:52:49 -0400 Subject: [PATCH 054/138] Replace "transcript" with "value" to be consistent with other actions --- .../actions/manual_transcription.py | 10 ++-- .../tests/api/v2/test_permissions.py | 6 +- .../tests/test_manual_transcription.py | 56 +++++++++---------- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 281b33e9c8..55d30a9b7a 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -48,7 +48,7 @@ def data_schema(self): # for lack of a better name { 'manual_transcription': { 'language': 'es', - 'transcript': 'Almorzamos muy bien hoy', + 'value': 'Almorzamos muy bien hoy', } } """ @@ -59,7 +59,7 @@ def data_schema(self): # for lack of a better name 'additionalProperties': False, 'properties': { 'language': {'$ref': '#/$defs/lang'}, - 'transcript': {'$ref': '#/$defs/transcript'}, + 'value': {'$ref': '#/$defs/transcript'}, }, 'allOf': [{'$ref': '#/$defs/lang_transcript_dependency'}], '$defs': { @@ -69,10 +69,10 @@ def data_schema(self): # for lack of a better name 'allOf': [ { 'if': {'required': ['language']}, - 'then': {'required': ['transcript']}, + 'then': {'required': ['value']}, }, { - 'if': {'required': ['transcript']}, + 'if': {'required': ['value']}, 'then': {'required': ['language']}, }, ] @@ -88,7 +88,7 @@ def languages(self) -> list[str]: return languages def record_repr(self, record: dict) -> dict: - return record.get('transcript', '') + return record.get('value', '') @property def result_schema(self): diff --git a/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py b/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py index 9b63fbac60..519bb06e94 100644 --- a/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py +++ b/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py @@ -123,7 +123,7 @@ def test_can_write(self, username, shared, status_code): 'q1': { 'manual_transcription': { 'language': 'es', - 'transcript': 'buenas noches', + 'value': 'buenas noches', } }, } @@ -164,7 +164,7 @@ def test_can_write(self, username, shared, status_code): '_dateCreated': '2024-04-08T15:27:00Z', '_dateModified': '2024-04-08T15:27:00Z', 'language': 'es', - 'transcript': 'buenas noches', + 'value': 'buenas noches', }, }, } @@ -192,7 +192,7 @@ def test_cannot_post_data(self): 'q1': { 'manual_transcription': { 'language': 'es', - 'transcript': 'buenas noches', + 'value': 'buenas noches', } }, } diff --git a/kobo/apps/subsequences__new/tests/test_manual_transcription.py b/kobo/apps/subsequences__new/tests/test_manual_transcription.py index a51b48202c..51a8e1f3f8 100644 --- a/kobo/apps/subsequences__new/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences__new/tests/test_manual_transcription.py @@ -21,7 +21,7 @@ def test_valid_transcript_data_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] action = ManualTranscriptionAction(xpath, params) - data = {'language': 'fr', 'transcript': 'Ne pas idée'} + data = {'language': 'fr', 'value': 'Aucune idée'} action.validate_data(data) @@ -29,7 +29,7 @@ def test_invalid_transcript_data_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] action = ManualTranscriptionAction(xpath, params) - data = {'language': 'en', 'transcript': 'No idea'} + data = {'language': 'en', 'value': 'No idea'} with pytest.raises(jsonschema.exceptions.ValidationError): action.validate_data(data) @@ -38,11 +38,11 @@ def test_valid_result_passes_validation(): params = [{'language': 'fr'}, {'language': 'en'}] action = ManualTranscriptionAction(xpath, params) - first = {'language': 'fr', 'transcript': 'un'} - second = {'language': 'en', 'transcript': 'two'} - third = {'language': 'fr', 'transcript': 'trois'} + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'en', 'value': 'two'} + third = {'language': 'fr', 'value': 'trois'} fourth = {} - fifth = {'language': 'en', 'transcript': 'fifth'} + fifth = {'language': 'en', 'value': 'fifth'} mock_sup_det = {} for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) @@ -53,11 +53,11 @@ def test_invalid_result_fails_validation(): params = [{'language': 'fr'}, {'language': 'en'}] action = ManualTranscriptionAction(xpath, params) - first = {'language': 'fr', 'transcript': 'un'} - second = {'language': 'en', 'transcript': 'two'} - third = {'language': 'fr', 'transcript': 'trois'} + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'en', 'value': 'two'} + third = {'language': 'fr', 'value': 'trois'} fourth = {} - fifth = {'language': 'en', 'transcript': 'fifth'} + fifth = {'language': 'en', 'value': 'fifth'} mock_sup_det = {} for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) @@ -78,13 +78,13 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): params = [{'language': 'fr'}, {'language': 'en'}] action = ManualTranscriptionAction(xpath, params) - first = {'language': 'en', 'transcript': 'No idea'} - second = {'language': 'fr', 'transcript': "Pas d'idée"} + first = {'language': 'en', 'value': 'No idea'} + second = {'language': 'fr', 'value': 'Aucune idée'} mock_sup_det = action.revise_data(EMPTY_SUBMISSION, {}, first) assert mock_sup_det['language'] == 'en' - assert mock_sup_det['transcript'] == 'No idea' + assert mock_sup_det['value'] == 'No idea' assert mock_sup_det['_dateCreated'] == mock_sup_det['_dateModified'] assert '_revisions' not in mock_sup_det first_time = mock_sup_det['_dateCreated'] @@ -119,44 +119,44 @@ def test_setting_transcript_to_empty_string(): params = [{'language': 'fr'}, {'language': 'en'}] action = ManualTranscriptionAction(xpath, params) - first = {'language': 'fr', 'transcript': "Pas d'idée"} - second = {'language': 'fr', 'transcript': ''} + first = {'language': 'fr', 'value': 'Aucune idée'} + second = {'language': 'fr', 'value': ''} mock_sup_det = action.revise_data(EMPTY_SUBMISSION, {}, first) - assert mock_sup_det['transcript'] == "Pas d'idée" + assert mock_sup_det['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) - assert mock_sup_det['transcript'] == '' - assert mock_sup_det['_revisions'][0]['transcript'] == "Pas d'idée" + assert mock_sup_det['value'] == '' + assert mock_sup_det['_revisions'][0]['value'] == 'Aucune idée' def test_setting_transcript_to_empty_object(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] action = ManualTranscriptionAction(xpath, params) - first = {'language': 'fr', 'transcript': "Pas d'idée"} + first = {'language': 'fr', 'value': 'Aucune idée'} second = {} mock_sup_det = action.revise_data(EMPTY_SUBMISSION, {}, first) - assert mock_sup_det['transcript'] == "Pas d'idée" + assert mock_sup_det['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) - assert 'transcript' not in mock_sup_det - assert mock_sup_det['_revisions'][0]['transcript'] == "Pas d'idée" + assert 'value' not in mock_sup_det + assert mock_sup_det['_revisions'][0]['value'] == 'Aucune idée' def test_latest_revision_is_first(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] action = ManualTranscriptionAction(xpath, params) - first = {'language': 'fr', 'transcript': 'un'} - second = {'language': 'fr', 'transcript': 'deux'} - third = {'language': 'fr', 'transcript': 'trois'} + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'fr', 'value': 'deux'} + third = {'language': 'fr', 'value': 'trois'} mock_sup_det = {} for data in first, second, third: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) - assert mock_sup_det['transcript'] == 'trois' - assert mock_sup_det['_revisions'][0]['transcript'] == 'deux' - assert mock_sup_det['_revisions'][1]['transcript'] == 'un' + assert mock_sup_det['value'] == 'trois' + assert mock_sup_det['_revisions'][0]['value'] == 'deux' + assert mock_sup_det['_revisions'][1]['value'] == 'un' From 2ee23e20664a2ed1e9f27d5872b8fafd4d6ddfd5 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 22 Aug 2025 23:11:54 -0400 Subject: [PATCH 055/138] Draft manual_translation --- kobo/apps/subsequences__new/actions/base.py | 159 ++++-------------- .../actions/manual_transcription.py | 28 +-- .../actions/manual_translation.py | 144 ++++++++++++++++ kobo/apps/subsequences__new/type_aliases.py | 10 -- .../subsequences__new/utils/action_loader.py | 13 -- 5 files changed, 182 insertions(+), 172 deletions(-) create mode 100644 kobo/apps/subsequences__new/actions/manual_translation.py delete mode 100644 kobo/apps/subsequences__new/type_aliases.py delete mode 100644 kobo/apps/subsequences__new/utils/action_loader.py diff --git a/kobo/apps/subsequences__new/actions/base.py b/kobo/apps/subsequences__new/actions/base.py index 159b9c6a42..47d532ee7d 100644 --- a/kobo/apps/subsequences__new/actions/base.py +++ b/kobo/apps/subsequences__new/actions/base.py @@ -125,6 +125,18 @@ def something_to_get_the_data_back_out(self): DATE_MODIFIED_FIELD = '_dateModified' REVISIONS_FIELD = '_revisions' + def check_limits(self, user: User): + + if not settings.STRIPE_ENABLED or not self._is_usage_limited: + return + + calculator = ServiceUsageCalculator(user) + balances = calculator.get_usage_balances() + + balance = balances[self._limit_identifier] + if balance and balance['exceeded']: + raise UsageLimitExceededException() + @classmethod def validate_params(cls, params): jsonschema.validate(params, cls.params_schema) @@ -143,131 +155,7 @@ def result_schema(self): we need to solve the problem of storing multiple results for a single action """ - - # We want schema to look like this at the end - # schema_orig = { - # '$schema': 'https://json-schema.org/draft/2020-12/schema', - # 'title': 'Transcript with revisions', - # 'type': 'object', - # 'additionalProperties': False, - # 'properties': { - # 'language': {'$ref': '#/$defs/lang'}, - # 'transcript': {'$ref': '#/$defs/transcript'}, - # 'revisions': { - # 'type': 'array', - # 'minItems': 1, - # 'items': {'$ref': '#/$defs/revision'}, - # }, - # '_dateCreated': {'$ref': '#/$defs/dateTime'}, - # '_dateModified': {'$ref': '#/$defs/dateTime'}, - # }, - # 'required': ['_dateCreated', '_dateModified'], - # 'allOf': [ - # { - # '$ref': '#/$defs/lang_transcript_dependency' - # } - # ], - # '$defs': { - # 'lang': {'type': 'string', 'enum': self.languages}, - # 'transcript': {'type': 'string'}, - # 'dateTime': {'type': 'string', 'format': 'date-time'}, - # 'lang_transcript_dependency': { - # 'allOf': [ - # { - # 'if': {'required': ['language']}, - # 'then': {'required': ['transcript']} - # }, - # { - # 'if': {'required': ['transcript']}, - # 'then': {'required': ['language']} - # } - # ] - # }, - # 'revision': { - # 'type': 'object', - # 'additionalProperties': False, - # 'properties': { - # 'language': {'$ref': '#/$defs/lang'}, - # 'transcript': {'$ref': '#/$defs/transcript'}, - # '_dateCreated': {'$ref': '#/$defs/dateTime'}, - # }, - # 'required': ['_dateCreated'], - # 'allOf': [ - # { - # "$ref": "#/$defs/lang_transcript_dependency" - # } - # ], - # }, - # }, - # } - - result_schema_template = { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.REVISIONS_FIELD: { - 'type': 'array', - 'minItems': 1, - 'items': {'$ref': '#/$defs/revision'}, - }, - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], - '$defs': { - 'dateTime': {'type': 'string', 'format': 'date-time'}, - 'revision': { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD], - }, - }, - } - - def _inject_data_schema( - destination_schema: dict, skipped_keys: list - ) -> dict: - - for key, value in self.data_schema.items(): - if key in skipped_keys: - continue - - if key in destination_schema: - if isinstance(destination_schema[key], dict): - destination_schema[key].update(self.data_schema[key]) - elif isinstance(destination_schema[key], list): - destination_schema[key].extend(self.data_schema[key]) - else: - destination_schema[key] = self.data_schema[key] - else: - destination_schema[key] = self.data_schema[key] - - # Inject data schema in result schema template - schema = deepcopy(result_schema_template) - _inject_data_schema(schema, ['$schema', 'title', 'type']) - - # Also inject data schema in the revision definition - _inject_data_schema( - schema['$defs']['revision'], ['$schema', 'title', '$defs'] - ) - - return schema - - def check_limits(self, user: User): - - if not settings.STRIPE_ENABLED or not self._is_usage_limited: - return - - calculator = ServiceUsageCalculator(user) - balances = calculator.get_usage_balances() - - balance = balances[self._limit_identifier] - if balance and balance['exceeded']: - raise UsageLimitExceededException() + return NotImplementedError def retrieve_data(self, action_data: dict) -> dict: """ @@ -344,6 +232,27 @@ def _is_usage_limited(self): """ raise NotImplementedError() + def _inject_data_schema(self, destination_schema: dict, skipped_keys: list): + """ + Utility function to inject data schema into another schema to + avoid repeating the same schema. + Useful to produce result schema. + """ + + for key, value in self.data_schema.items(): + if key in skipped_keys: + continue + + if key in destination_schema: + if isinstance(destination_schema[key], dict): + destination_schema[key].update(self.data_schema[key]) + elif isinstance(destination_schema[key], list): + destination_schema[key].extend(self.data_schema[key]) + else: + destination_schema[key] = self.data_schema[key] + else: + destination_schema[key] = self.data_schema[key] + @property def _limit_identifier(self): # Example for automatic transcription diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index 55d30a9b7a..c6263d95aa 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -43,8 +43,7 @@ def __init__(self, source_question_xpath, params): @property def data_schema(self): # for lack of a better name """ - (currently) POST to "/advanced_submission_post/aSsEtUiD" - POST to "/api/v2/assets//data//supplemental" # idk, rename? + POST to "/api/v2/assets//data//supplemental" { 'manual_transcription': { 'language': 'es', @@ -93,7 +92,7 @@ def record_repr(self, record: dict) -> dict: @property def result_schema(self): - result_schema_template = { + schema = { '$schema': 'https://json-schema.org/draft/2020-12/schema', 'type': 'object', 'additionalProperties': False, @@ -120,30 +119,11 @@ def result_schema(self): }, } - def _inject_data_schema( - destination_schema: dict, skipped_keys: list - ) -> dict: - - for key, value in self.data_schema.items(): - if key in skipped_keys: - continue - - if key in destination_schema: - if isinstance(destination_schema[key], dict): - destination_schema[key].update(self.data_schema[key]) - elif isinstance(destination_schema[key], list): - destination_schema[key].extend(self.data_schema[key]) - else: - destination_schema[key] = self.data_schema[key] - else: - destination_schema[key] = self.data_schema[key] - # Inject data schema in result schema template - schema = deepcopy(result_schema_template) - _inject_data_schema(schema, ['$schema', 'title', 'type']) + self._inject_data_schema(schema, ['$schema', 'title', 'type']) # Also inject data schema in the revision definition - _inject_data_schema( + self.__inject_data_schema( schema['$defs']['revision'], ['$schema', 'title', '$defs'] ) diff --git a/kobo/apps/subsequences__new/actions/manual_translation.py b/kobo/apps/subsequences__new/actions/manual_translation.py new file mode 100644 index 0000000000..f8e5528465 --- /dev/null +++ b/kobo/apps/subsequences__new/actions/manual_translation.py @@ -0,0 +1,144 @@ +from copy import deepcopy + +from .base import BaseAction + + +class ManualTranslationAction(BaseAction): + ID = 'manual_translation' + + def __init__(self, source_question_xpath, params): + self.source_question_xpath = source_question_xpath + self.params = params + + """ + For an audio question called `my_audio_question` that's translated + into 3 languages, the schema for `Asset.advanced_features` might look + like: + 'my_audio_question': { + 'manual_translation': [ + {'language': 'fr'}, + {'language': 'es'}, + ], + } + + The `params_schema` attribute defines the shape of the array where each + element is an object with a single string property for the translation + language. + """ + params_schema = { + 'type': 'array', + 'items': { + 'additionalProperties': False, + 'properties': { + 'language': { + 'type': 'string', + } + }, + 'required': ['language'], + 'type': 'object', + }, + } + + @property + def data_schema(self): # for lack of a better name + """ + POST to "/api/v2/assets//data//supplemental" + { + 'manual_translation': { + 'language': 'es', + 'value': 'Almorzamos muy bien hoy', + } + } + """ + + return { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'language': {'$ref': '#/$defs/lang'}, + 'value': {'$ref': '#/$defs/translation'}, + }, + 'allOf': [{'$ref': '#/$defs/lang_translation_dependency'}], + '$defs': { + 'lang': {'type': 'string', 'enum': self.languages}, + 'translation': {'type': 'string'}, + 'lang_translation_dependency': { + 'allOf': [ + { + 'if': {'required': ['language']}, + 'then': {'required': ['value']}, + }, + { + 'if': {'required': ['value']}, + 'then': {'required': ['language']}, + }, + ] + }, + }, + } + + @property + def languages(self) -> list[str]: + languages = [] + for individual_params in self.params: + languages.append(individual_params['language']) + return languages + + def record_repr(self, record: dict) -> dict: + return record.get('value', '') + + @property + def result_schema(self): + + localized_value_schema = { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.REVISIONS_FIELD: { + 'type': 'array', + 'minItems': 1, + 'items': {'$ref': '#/$defs/revision'}, + }, + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], + } + + # Inject data schema in result schema template + self._inject_data_schema(localized_value_schema, ['$schema', 'title', 'type']) + + # Move localized_value_schema definitions to main schema + localized_value_schema_defs = localized_value_schema.pop('$defs') + + schema = { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'array', + 'additionalProperties': False, + 'items': {'$ref': '#/$defs/localized_value_schema'}, + '$defs': { + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'revision': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD], + }, + 'localized_value_schema': localized_value_schema, + **localized_value_schema_defs, + }, + } + + # Also inject data schema in the revision definition + self._inject_data_schema( + schema['$defs']['revision'], ['$schema', 'title', '$defs'] + ) + + return schema + + @property + def _is_usage_limited(self): + return False diff --git a/kobo/apps/subsequences__new/type_aliases.py b/kobo/apps/subsequences__new/type_aliases.py deleted file mode 100644 index 1cb1166985..0000000000 --- a/kobo/apps/subsequences__new/type_aliases.py +++ /dev/null @@ -1,10 +0,0 @@ -# NOT USED anymore, to be removed - -from typing import Type, TypeAlias, Union - -from .actions.manual_transcription import ManualTranscriptionAction - -# A list of possible action classes -ActionClassType: TypeAlias = Union[ - Type[ManualTranscriptionAction], -] diff --git a/kobo/apps/subsequences__new/utils/action_loader.py b/kobo/apps/subsequences__new/utils/action_loader.py deleted file mode 100644 index 83e1a543b8..0000000000 --- a/kobo/apps/subsequences__new/utils/action_loader.py +++ /dev/null @@ -1,13 +0,0 @@ -# NOT USED anymore, to be removed -from ..actions.manual_transcription import ManualTranscriptionAction -from ..type_aliases import ActionClassType - -ACTION_CLASS_ID_MAPPING = { - ManualTranscriptionAction.ID: ManualTranscriptionAction, -} - -def get_action_class(post_data: dict) -> ActionClassType: - question_xpath = next(iter(post_data)) - action_id = next(iter(post_data[question_xpath])) - action_cls = ACTION_CLASS_ID_MAPPING[action_id] - return question_xpath, action_cls, post_data[question_xpath][action_id] From 8787be258ae38ad32c4d5fe8c814943949462a57 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 23:26:43 -0400 Subject: [PATCH 056/138] =?UTF-8?q?Rename=20subsequences=20to=20subsequenc?= =?UTF-8?q?es=5F=5Fold=20and=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit subsequences__new to subsequences --- kobo/apps/subsequences/__init__.py | 17 - kobo/apps/subsequences/actions/__init__.py | 5 + kobo/apps/subsequences/actions/base.py | 330 ++++++++++++------ .../actions/manual_transcription.py | 142 ++++---- .../actions/manual_translation.py | 0 kobo/apps/subsequences/exceptions.py | 22 +- kobo/apps/subsequences/models.py | 201 +++++++---- .../router.py | 0 .../schemas.py | 0 .../google => tests/api}/__init__.py | 0 .../{migrations => tests/api/v2}/__init__.py | 0 .../tests/api/v2/base.py | 0 .../tests/api/v2/test_permissions.py | 0 .../tests/test_manual_transcription.py | 0 .../subsequences__new/actions/__init__.py | 5 - kobo/apps/subsequences__new/actions/base.py | 262 -------------- .../actions/manual_transcription.py | 134 ------- kobo/apps/subsequences__new/exceptions.py | 16 - kobo/apps/subsequences__new/models.py | 141 -------- .../README-draft.md | 0 .../README.md | 0 kobo/apps/subsequences__old/__init__.py | 17 + .../actions}/__init__.py | 0 .../actions/automatic_transcription.py | 0 kobo/apps/subsequences__old/actions/base.py | 128 +++++++ .../actions/keyword_search.py | 0 .../actions/manual_transcription.py | 134 +++++++ .../actions/number_doubler.py | 0 .../actions/qual.py | 0 .../actions/states.py | 0 .../actions/translation.py | 0 .../actions/unknown_action.py | 0 .../advanced_features_params_schema.py | 0 .../api_view.py | 0 .../apps.py | 0 .../constants.py | 0 kobo/apps/subsequences__old/exceptions.py | 20 ++ .../integrations/__init__.py | 0 .../integrations/google}/__init__.py | 0 .../integrations/google/base.py | 0 .../integrations/google/google_transcribe.py | 0 .../integrations/google/google_translate.py | 0 .../integrations/google/utils.py | 0 .../integrations/misc.py | 0 .../integrations/translate.py | 0 .../jsonschemas/qual_schema.py | 0 .../migrations/0001_initial.py | 0 ...ique_together_asset_and_submission_uuid.py | 0 ..._submissionextras_date_created_and_more.py | 0 ...4_increase_subsequences_submission_uuid.py | 0 .../migrations}/__init__.py | 0 kobo/apps/subsequences__old/models.py | 64 ++++ .../prev.py | 0 .../scripts}/__init__.py | 0 ...vate_advanced_features_for_newest_asset.py | 0 ...add_qual_to_last_question_of_last_asset.py | 0 .../scripts/export_analysis_form.py | 0 .../scripts/recalc_latest_subex.py | 0 .../scripts/repop_known_cols.py | 0 .../scripts/subsequences_export.py | 0 .../tasks}/__init__.py | 0 .../tests}/__init__.py | 0 .../tests/test_known_cols_utils.py | 0 .../tests/test_nlp_integration.py | 0 .../tests/test_number_doubler.py | 0 .../tests/test_proj_advanced_features.py | 0 .../tests/test_submission_extras_api_post.py | 0 .../tests/test_submission_extras_content.py | 0 .../tests/test_submission_stream.py | 0 .../urls.py | 0 .../utils/__init__.py | 0 .../utils/deprecation.py | 0 .../determine_export_cols_with_values.py | 0 .../utils/parse_known_cols.py | 0 74 files changed, 819 insertions(+), 819 deletions(-) rename kobo/apps/{subsequences__new => subsequences}/actions/manual_translation.py (100%) rename kobo/apps/{subsequences__new => subsequences}/router.py (100%) rename kobo/apps/{subsequences__new => subsequences}/schemas.py (100%) rename kobo/apps/subsequences/{integrations/google => tests/api}/__init__.py (100%) rename kobo/apps/subsequences/{migrations => tests/api/v2}/__init__.py (100%) rename kobo/apps/{subsequences__new => subsequences}/tests/api/v2/base.py (100%) rename kobo/apps/{subsequences__new => subsequences}/tests/api/v2/test_permissions.py (100%) rename kobo/apps/{subsequences__new => subsequences}/tests/test_manual_transcription.py (100%) delete mode 100644 kobo/apps/subsequences__new/actions/__init__.py delete mode 100644 kobo/apps/subsequences__new/actions/base.py delete mode 100644 kobo/apps/subsequences__new/actions/manual_transcription.py delete mode 100644 kobo/apps/subsequences__new/exceptions.py delete mode 100644 kobo/apps/subsequences__new/models.py rename kobo/apps/{subsequences => subsequences__old}/README-draft.md (100%) rename kobo/apps/{subsequences => subsequences__old}/README.md (100%) create mode 100644 kobo/apps/subsequences__old/__init__.py rename kobo/apps/{subsequences/scripts => subsequences__old/actions}/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/automatic_transcription.py (100%) create mode 100644 kobo/apps/subsequences__old/actions/base.py rename kobo/apps/{subsequences => subsequences__old}/actions/keyword_search.py (100%) create mode 100644 kobo/apps/subsequences__old/actions/manual_transcription.py rename kobo/apps/{subsequences => subsequences__old}/actions/number_doubler.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/qual.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/states.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/translation.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/unknown_action.py (100%) rename kobo/apps/{subsequences => subsequences__old}/advanced_features_params_schema.py (100%) rename kobo/apps/{subsequences => subsequences__old}/api_view.py (100%) rename kobo/apps/{subsequences => subsequences__old}/apps.py (100%) rename kobo/apps/{subsequences => subsequences__old}/constants.py (100%) create mode 100644 kobo/apps/subsequences__old/exceptions.py rename kobo/apps/{subsequences => subsequences__old}/integrations/__init__.py (100%) rename kobo/apps/{subsequences/tasks => subsequences__old/integrations/google}/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/google/base.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/google/google_transcribe.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/google/google_translate.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/google/utils.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/misc.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/translate.py (100%) rename kobo/apps/{subsequences => subsequences__old}/jsonschemas/qual_schema.py (100%) rename kobo/apps/{subsequences => subsequences__old}/migrations/0001_initial.py (100%) rename kobo/apps/{subsequences => subsequences__old}/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py (100%) rename kobo/apps/{subsequences => subsequences__old}/migrations/0003_alter_submissionextras_date_created_and_more.py (100%) rename kobo/apps/{subsequences => subsequences__old}/migrations/0004_increase_subsequences_submission_uuid.py (100%) rename kobo/apps/{subsequences__new => subsequences__old/migrations}/__init__.py (100%) create mode 100644 kobo/apps/subsequences__old/models.py rename kobo/apps/{subsequences => subsequences__old}/prev.py (100%) rename kobo/apps/{subsequences__new/tests => subsequences__old/scripts}/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/activate_advanced_features_for_newest_asset.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/add_qual_to_last_question_of_last_asset.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/export_analysis_form.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/recalc_latest_subex.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/repop_known_cols.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/subsequences_export.py (100%) rename kobo/apps/{subsequences__new/tests/api => subsequences__old/tasks}/__init__.py (100%) rename kobo/apps/{subsequences__new/tests/api/v2 => subsequences__old/tests}/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_known_cols_utils.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_nlp_integration.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_number_doubler.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_proj_advanced_features.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_submission_extras_api_post.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_submission_extras_content.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_submission_stream.py (100%) rename kobo/apps/{subsequences => subsequences__old}/urls.py (100%) rename kobo/apps/{subsequences => subsequences__old}/utils/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/utils/deprecation.py (100%) rename kobo/apps/{subsequences => subsequences__old}/utils/determine_export_cols_with_values.py (100%) rename kobo/apps/{subsequences => subsequences__old}/utils/parse_known_cols.py (100%) diff --git a/kobo/apps/subsequences/__init__.py b/kobo/apps/subsequences/__init__.py index 5f46bdbac3..e69de29bb2 100644 --- a/kobo/apps/subsequences/__init__.py +++ b/kobo/apps/subsequences/__init__.py @@ -1,17 +0,0 @@ -''' -`kobo.apps.subsequences` --as in Sub(mission)Sequences is an app for defining -and following a sequence of actions or changes to a submission that has come -into kobo. - -models: -- SubmissionData: - Holds a JSONField with the "supplementalData" necessary to complete the - -tasks: -(things that are queued in celery for later action) - -needs writeup: - - how to develop / debug within this app - - description of tests - -''' diff --git a/kobo/apps/subsequences/actions/__init__.py b/kobo/apps/subsequences/actions/__init__.py index e69de29bb2..274ca102a5 100644 --- a/kobo/apps/subsequences/actions/__init__.py +++ b/kobo/apps/subsequences/actions/__init__.py @@ -0,0 +1,5 @@ +from .manual_transcription import ManualTranscriptionAction + +# TODO, what about using a loader for every class in "actions" folder (except base.py)? +ACTIONS = (ManualTranscriptionAction,) +ACTION_IDS_TO_CLASSES = {a.ID: a for a in ACTIONS} diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index f8dbe659aa..47d532ee7d 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -1,128 +1,262 @@ import datetime -from zoneinfo import ZoneInfo +from copy import deepcopy +import jsonschema +from django.conf import settings from django.utils import timezone -from kobo.apps.subsequences.constants import GOOGLETS, GOOGLETX +from kobo.apps.kobo_auth.shortcuts import User +from kpi.exceptions import UsageLimitExceededException +from kpi.utils.usage_calculator import ServiceUsageCalculator -ACTION_NEEDED = 'ACTION_NEEDED' -PASSES = 'PASSES' +""" +### All actions must have the following components + +* (check!) a unique identifier for the action +* three jsonschemas: + 1. (check!) one to validate the parameters used to configure the action + * `ADVANCED_FEATURES_PARAMS_SCHEMA` + 2. (check!) one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) + * the result of `modify_jsonschema()` + 3. one to validate the result of the action - the result of `modify_jsonschema()` + * OH NO, this doesn't happen at all yet +* a handler that receives a submission (and other metadata) and processes it +""" + +""" +idea of example content in asset.advanced_features (what kind of actions are activated per question) +{ + '_version': '20250820', + '_schema': { + 'my_audio_question': { + 'manual_transcription': [ + {'language': 'ar'}, + {'language': 'bn'}, + {'language': 'es'}, + ], + 'manual_translation': [{'language': 'fr'}, {'language': 'en'}], + }, + 'my_video_question': { + 'manual_transcription': [{'language': 'en'}], + }, + 'my_number_question': { + 'number_multiplier': [{'multiplier': 3}], + }, + }, +} + +idea of example data in SubmissionExtras based on the above +{ + '_version': '20250820', + '_submission': '', + 'my_audio_question': { + 'manual_transcription': { + 'transcript': 'هائج', + 'language': 'ar', + '_dateCreated': '2025-08-21T20:55:42.012053Z', + '_dateModified': '2025-08-21T20:57:28.154567Z', + '_revisions': [ + { + 'transcript': 'فارغ', + 'language': 'ar', + '_dateCreated': '2025-08-21T20:55:42.012053Z', + } + ], + }, + 'manual_translation': [ + { + 'language': 'en', + 'translation': 'berserk', + '_dateCreated': '2025-08-21T21:39:42.141306Z', + '_dateModified': '2025-08-21T21:39:42.141306Z', + }, + { + 'language': 'es', + 'translation': 'enloquecido', + '_dateCreated': '2025-08-21T21:40:54.644308Z', + '_dateModified': '2025-08-21T22:00:10.862880Z', + '_revisions': [ + { + 'translation': 'loco', + 'language': 'es', + '_dateCreated': '2025-08-21T21:40:54.644308Z', + } + ], + }, + ], + }, + 'my_video_question': { + 'manual_transcription': { + 'transcript': 'sea horse sea hell', + 'language': 'en', + '_dateCreated': '2025-08-21T21:06:20.059117Z', + '_dateModified': '2025-08-21T21:06:20.059117Z', + }, + }, + 'my_number_question': { + 'number_multiplier': { + 'numberMultiplied': 99, + '_dateCreated': '2025-08-21T21:09:34.504546Z', + '_dateModified': '2025-08-21T21:09:34.504546Z', + }, + }, +} +""" + + +def utc_datetime_to_js_str(dt: datetime.datetime) -> str: + """ + Return a string to represent a `datetime` following the simplification of + the ISO 8601 format used by JavaScript + """ + # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format + if dt.utcoffset() or not dt.tzinfo: + raise NotImplementedError('Only UTC datetimes are supported') + return dt.isoformat().replace('+00:00', 'Z') class BaseAction: - ID = None - _destination_field = '_supplementalDetails' + def something_to_get_the_data_back_out(self): + # might need to deal with multiple columns for one action + # ^ definitely will + raise NotImplementedError + + DATE_CREATED_FIELD = '_dateCreated' + DATE_MODIFIED_FIELD = '_dateModified' + REVISIONS_FIELD = '_revisions' + + def check_limits(self, user: User): + + if not settings.STRIPE_ENABLED or not self._is_usage_limited: + return + + calculator = ServiceUsageCalculator(user) + balances = calculator.get_usage_balances() - DATE_CREATED_FIELD = 'dateCreated' - DATE_MODIFIED_FIELD = 'dateModified' - DELETE = '⌫' + balance = balances[self._limit_identifier] + if balance and balance['exceeded']: + raise UsageLimitExceededException() - def __init__(self, params): - self.load_params(params) + @classmethod + def validate_params(cls, params): + jsonschema.validate(params, cls.params_schema) - def cur_time(self): - return datetime.datetime.now(tz=ZoneInfo('UTC')).strftime('%Y-%m-%dT%H:%M:%SZ') + def validate_data(self, data): + jsonschema.validate(data, self.data_schema) - def load_params(self, params): - raise NotImplementedError('subclass must define a load_params method') + def validate_result(self, result): + jsonschema.validate(result, self.result_schema) - def run_change(self, params): - raise NotImplementedError('subclass must define a run_change method') + @property + def result_schema(self): + """ + we also need a schema to define the final result that will be written + into SubmissionExtras - def check_submission_status(self, submission): - return PASSES + we need to solve the problem of storing multiple results for a single action + """ + return NotImplementedError - def modify_jsonschema(self, schema): - return schema + def retrieve_data(self, action_data: dict) -> dict: + """ + `action_data` must be ONLY the data for this particular action + instance, not the entire SubmissionExtras caboodle - def compile_revised_record(self, content, edits): + descendant classes could override with special manipulation if needed """ - a method that applies changes to a json structure and appends previous - changes to a revision history + return action_data + + def revise_field(self, *args, **kwargs): + # TODO: remove this alias + import warnings + warnings.warn('Oh no, this method is going away!', DeprecationWarning) + return self.revise_data(*args, **kwargs) + + def revise_data( + self, submission: dict, submission_supplement: dict, edit: dict + ) -> dict: + """ + for actions that may have lengthy data, are we content to store the + entirety of the data for each revision, or do we need some kind of + differencing system? """ + self.validate_data(edit) + self.raise_for_any_leading_underscore_key(edit) - # TODO: should this handle managing `DATE_CREATED_FIELD`, - # `DATE_MODIFIED_FIELD`, etc. instead of delegating that to - # `revise_record()` as it currently does? + now_str = utc_datetime_to_js_str(timezone.now()) + revision = deepcopy(submission_supplement) + new_record = deepcopy(edit) + revisions = revision.pop(self.REVISIONS_FIELD, []) - if self.ID is None: - return content - for field_name, vals in edits.items(): - if field_name == 'submission': - continue + revision_creation_date = revision.pop(self.DATE_MODIFIED_FIELD, now_str) + record_creation_date = revision.pop(self.DATE_CREATED_FIELD, now_str) + revision[self.DATE_CREATED_FIELD] = revision_creation_date + new_record[self.DATE_MODIFIED_FIELD] = now_str + + if submission_supplement: + revisions.insert(0, revision) + new_record[self.REVISIONS_FIELD] = revisions - erecord = vals.get(self.ID) - o_keyval = content.get(field_name, {}) - for extra in [GOOGLETX, GOOGLETS]: - if extra in vals: - o_keyval[extra] = vals[extra] - content[field_name] = o_keyval + new_record[self.DATE_CREATED_FIELD] = record_creation_date - orecord = o_keyval.get(self.ID) - if erecord is None: + self.validate_result(new_record) + + return new_record + + + @staticmethod + def raise_for_any_leading_underscore_key(d: dict): + """ + Keys with leading underscores are reserved for metadata like + `_dateCreated`, `_dateModified`, and `_revisions`. No key with a + leading underscore should be present in data POSTed by a client or + generated by an action. + + Schema validation should block invalid keys, but this method exists as + a redundant check to guard against schema mistakes. + """ + for k in list(d.keys()): + try: + match = k.startswith('_') + except AttributeError: continue - if self.is_auto_request(erecord): - content[field_name].update( - self.auto_request_repr(erecord) + if match: + raise Exception( + 'An unexpected key with a leading underscore was found' ) - continue - if orecord is None: - compiled_record = self.init_field(erecord) - elif not self.has_change(orecord, erecord): - continue - else: - compiled_record = self.revise_field(orecord, erecord) - o_keyval[self.ID] = compiled_record - content[field_name] = o_keyval - return content - def auto_request_repr(self, erecord): + @property + def _is_usage_limited(self): + """ + Returns whether an action should check for usage limits. + """ raise NotImplementedError() - def is_auto_request(self, erecord): - return self.record_repr(erecord) == 'GOOGLE' - - def init_field(self, edit): - edit[self.DATE_CREATED_FIELD] = \ - edit[self.DATE_MODIFIED_FIELD] = \ - str(timezone.now()).split('.')[0] - return {**edit, 'revisions': []} - - def revise_field(self, original, edit): - if self.record_repr(edit) == self.DELETE: - return {} - record = {**original} - revisions = record.pop('revisions', []) - if self.DATE_CREATED_FIELD in record: - del record[self.DATE_CREATED_FIELD] - edit[self.DATE_MODIFIED_FIELD] = \ - edit[self.DATE_CREATED_FIELD] = \ - str(timezone.now()).split('.')[0] - if len(revisions) > 0: - date_modified = revisions[-1].get(self.DATE_MODIFIED_FIELD) - edit[self.DATE_CREATED_FIELD] = date_modified - return {**edit, 'revisions': [record, *revisions]} - - def record_repr(self, record): - return record.get('value') - - def has_change(self, original, edit): - return self.record_repr(original) != self.record_repr(edit) + def _inject_data_schema(self, destination_schema: dict, skipped_keys: list): + """ + Utility function to inject data schema into another schema to + avoid repeating the same schema. + Useful to produce result schema. + """ - @classmethod - def build_params(cls, *args, **kwargs): - raise NotImplementedError(f'{cls.__name__} has not implemented a build_params method') + for key, value in self.data_schema.items(): + if key in skipped_keys: + continue - def get_xpath(self, row): - # return the full path... - for name_field in ['xpath', 'name', '$autoname']: - if name_field in row: - return row[name_field] - return None + if key in destination_schema: + if isinstance(destination_schema[key], dict): + destination_schema[key].update(self.data_schema[key]) + elif isinstance(destination_schema[key], list): + destination_schema[key].extend(self.data_schema[key]) + else: + destination_schema[key] = self.data_schema[key] + else: + destination_schema[key] = self.data_schema[key] - @classmethod - def get_name(cls, row): - for name_field in ['name', '$autoname']: - if name_field in row: - return row[name_field] - return None + @property + def _limit_identifier(self): + # Example for automatic transcription + # + # from kobo.apps.organizations.constants import UsageType + # return UsageType.ASR_SECONDS + raise NotImplementedError() diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 1a64212763..c6263d95aa 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -1,51 +1,7 @@ -import jsonschema -from ..constants import TRANSCRIBABLE_SOURCE_TYPES -#from ..actions.base import BaseAction - -""" -### All actions must have the following components - -* (check!) a unique identifier for the action -* three jsonschemas: - 1. (check!) one to validate the parameters used to configure the action - * `ADVANCED_FEATURES_PARAMS_SCHEMA` - 2. (check!) one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) - * the result of `modify_jsonschema()` - 3. one to validate the result of the action - the result of `modify_jsonschema()` - * OH NO, this doesn't happen at all yet -* a handler that receives a submission (and other metadata) and processes it -""" - -""" -idea of example content in asset.advanced_features (what kind of actions are activated per question) -{ - 'version': '20250820', - 'schema': { - 'my_audio_question': { - 'manual_transcription': [ - {'language': 'ar'}, - {'language': 'bn'}, - {'language': 'es'}, - ], - 'manual_translation': [{'language': 'fr'}], - }, - 'my_video_question': { - 'manual_transcription': [{'language': 'en'}], - }, - 'my_number_question': { - 'number_multiplier': [{'multiplier': 3}], - }, - }, -} -""" +from copy import deepcopy -class BaseAction: - @classmethod - def validate_params(cls, params): - jsonschema.validate(params, cls.params_schema) +from .base import BaseAction - def validate_data(self, data): - jsonschema.validate(data, self.data_schema) class ManualTranscriptionAction(BaseAction): ID = 'manual_transcription' @@ -87,48 +43,92 @@ def __init__(self, source_question_xpath, params): @property def data_schema(self): # for lack of a better name """ - (currently) POST to "/advanced_submission_post/aSsEtUiD" - POST to "/api/v2/assets//data//supplemental" # idk, rename? + POST to "/api/v2/assets//data//supplemental" { 'manual_transcription': { 'language': 'es', - 'transcript': 'Almorzamos muy bien hoy', + 'value': 'Almorzamos muy bien hoy', } } """ + + return { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'language': {'$ref': '#/$defs/lang'}, + 'value': {'$ref': '#/$defs/transcript'}, + }, + 'allOf': [{'$ref': '#/$defs/lang_transcript_dependency'}], + '$defs': { + 'lang': {'type': 'string', 'enum': self.languages}, + 'transcript': {'type': 'string'}, + 'lang_transcript_dependency': { + 'allOf': [ + { + 'if': {'required': ['language']}, + 'then': {'required': ['value']}, + }, + { + 'if': {'required': ['value']}, + 'then': {'required': ['language']}, + }, + ] + }, + }, + } + + @property + def languages(self) -> list[str]: languages = [] for individual_params in self.params: languages.append(individual_params['language']) + return languages - return { + def record_repr(self, record: dict) -> dict: + return record.get('value', '') + + @property + def result_schema(self): + + schema = { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', 'additionalProperties': False, 'properties': { - 'language': { - 'type': 'string', - 'enum': languages, + self.REVISIONS_FIELD: { + 'type': 'array', + 'minItems': 1, + 'items': {'$ref': '#/$defs/revision'}, }, - 'transcript': { - 'type': 'string', + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], + '$defs': { + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'revision': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD], }, }, - 'required': ['language', 'transcript'], - 'type': 'object', } - @property - @classmethod - def result_schema(cls): - """ - we also need a schema to define the final result that will be written - into SubmissionExtras + # Inject data schema in result schema template + self._inject_data_schema(schema, ['$schema', 'title', 'type']) - we need to solve the problem of storing multiple results for a single action - """ - raise NotImplementedError + # Also inject data schema in the revision definition + self.__inject_data_schema( + schema['$defs']['revision'], ['$schema', 'title', '$defs'] + ) + return schema - def load_params(self, params): - """ - idk maybe we use this to read the language out of `Asset.advanced_features` - """ - self.possible_transcribed_fields = params['values'] + @property + def _is_usage_limited(self): + return False diff --git a/kobo/apps/subsequences__new/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py similarity index 100% rename from kobo/apps/subsequences__new/actions/manual_translation.py rename to kobo/apps/subsequences/actions/manual_translation.py diff --git a/kobo/apps/subsequences/exceptions.py b/kobo/apps/subsequences/exceptions.py index 541e4d3f9c..d15edc89ee 100644 --- a/kobo/apps/subsequences/exceptions.py +++ b/kobo/apps/subsequences/exceptions.py @@ -1,20 +1,16 @@ -class AudioTooLongError(Exception): - """Audio file is too long for specified speech service""" - +class InvalidAction(Exception): + """ + The referenced action does not exist or was not configured for the given + question XPath at the asset level + """ -class SubsequenceTimeoutError(Exception): pass -class TranscriptionResultsNotFound(Exception): +class InvalidXPath(Exception): """ - No results returned by specified transcription service + The referenced question XPath was not configured for supplemental data at + the asset level """ - -class TranslationAsyncResultAvailable(Exception): - pass - - -class TranslationResultsNotFound(Exception): - pass + pass \ No newline at end of file diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index e26957e8dc..7d49e6c746 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -1,64 +1,141 @@ -# coding: utf-8 +from kobo.apps.subsequences.models import ( + SubmissionExtras, # just bullshit for now +) +from kpi.models import Asset +from .actions import ACTION_IDS_TO_CLASSES +from .exceptions import InvalidAction, InvalidXPath +from .schemas import validate_submission_supplement -from django.db import models +class SubmissionSupplement(SubmissionExtras): + class Meta(SubmissionExtras.Meta): + proxy = True + app_label = 'subsequences' -from kpi.models import Asset -from kpi.models.abstract_models import AbstractTimeStampedModel -from .constants import GOOGLETS, GOOGLETX -from .utils.determine_export_cols_with_values import determine_export_cols_indiv - - -class SubmissionExtras(AbstractTimeStampedModel): - - submission_uuid = models.CharField(max_length=249) - content = models.JSONField(default=dict) - asset = models.ForeignKey( - Asset, - related_name='submission_extras', - on_delete=models.CASCADE, - ) - - class Meta: - # ideally `submission_uuid` is universally unique, but its uniqueness - # per-asset is most important - unique_together = (('asset', 'submission_uuid'),) - - def save(self, *args, **kwargs): - # We need to import these here because of circular imports - from .integrations.google.google_transcribe import GoogleTranscriptionService - from .integrations.google.google_translate import GoogleTranslationService - - features = self.asset.advanced_features - for xpath, vals in self.content.items(): - if 'transcript' in features: - options = vals.get(GOOGLETS, {}) - if options.get('status') == 'requested': - service = GoogleTranscriptionService(self) - vals[GOOGLETS] = service.process_data(xpath, vals) - if 'translation' in features: - options = vals.get(GOOGLETX, {}) - if options.get('status') == 'requested': - service = GoogleTranslationService(self) - vals[GOOGLETX] = service.process_data(xpath, vals) - - asset_changes = False - asset_known_cols = self.asset.known_cols - for kc in determine_export_cols_indiv(self.content): - if kc not in asset_known_cols: - asset_changes = True - asset_known_cols.append(kc) - - if asset_changes: - self.asset.known_cols = asset_known_cols - self.asset.save(create_version=False) - - super().save(*args, **kwargs) - - @property - def full_content(self): - _content = {} - _content.update(self.content) - _content.update({ - 'timestamp': str(self.date_created), - }) - return _content + def revise_data( + asset: Asset, submission: dict, incoming_data: dict + ) -> dict: + schema_version = incoming_data.pop('_version') + if schema_version != '20250820': + # TODO: migrate from old per-submission schema + raise NotImplementedError + + if asset.advanced_features['_version'] != schema_version: + # TODO: migrate from old per-asset schema + raise NotImplementedError + + submission_uuid = submission['meta/rootUuid'] # constant? + supplemental_data = SubmissionExtras.objects.get_or_create( + asset=asset, submission_uuid=submission_uuid + )[0].content # lock it? + + retrieved_supplemental_data = {} + + for question_xpath, data_for_this_question in incoming_data.items(): + try: + action_configs_for_this_question = asset.advanced_features[ + '_actionConfigs' + ][question_xpath] + except KeyError as e: + raise InvalidXPath from e + + for action_id, action_data in data_for_this_question.items(): + try: + action_class = ACTION_IDS_TO_CLASSES[action_id] + except KeyError as e: + raise InvalidAction from e + try: + action_params = action_configs_for_this_question[action_id] + except KeyError as e: + raise InvalidAction from e + + action = action_class(question_xpath, action_params) + action.check_limits(asset.owner) + question_supplemental_data = supplemental_data.setdefault( + question_xpath, {} + ) + action_supplemental_data = question_supplemental_data.setdefault( + action_id, {} + ) + action_supplemental_data = action.revise_field( + submission, action_supplemental_data, action_data + ) + question_supplemental_data[action_id] = action_supplemental_data + retrieved_supplemental_data.setdefault(question_xpath, {})[ + action_id + ] = action.retrieve_data(action_supplemental_data) + + supplemental_data['_version'] = schema_version + validate_submission_supplement(asset, supplemental_data) + SubmissionExtras.objects.filter( + asset=asset, submission_uuid=submission_uuid + ).update(content=supplemental_data) + + retrieved_supplemental_data['_version'] = schema_version + return retrieved_supplemental_data + + + def retrieve_data(asset: Asset, submission_uuid: str) -> dict: + try: + supplemental_data = SubmissionExtras.objects.get( + asset=asset, submission_uuid=submission_uuid + ).content + except SubmissionExtras.DoesNotExist: + return {} + + schema_version = supplemental_data.pop('_version') + if schema_version != '20250820': + # TODO: migrate from old per-submission schema + raise NotImplementedError + + if asset.advanced_features['_version'] != schema_version: + # TODO: migrate from old per-asset schema + raise NotImplementedError + + retrieved_supplemental_data = {} + + for question_xpath, data_for_this_question in supplemental_data.items(): + processed_data_for_this_question = ( + retrieved_supplemental_data.setdefault(question_xpath, {}) + ) + action_configs = asset.advanced_features['_actionConfigs'] + try: + action_configs_for_this_question = action_configs[question_xpath] + except KeyError: + # There's still supplemental data for this question at the + # submission level, but the question is no longer configured at the + # asset level. + # Allow this for now, but maybe forbid later and also forbid + # removing things from the asset-level action configuration? + # Actions could be disabled or hidden instead of being removed + + # FIXME: divergence between the asset-level configuration and + # submission-level supplemental data is going to cause schema + # validation failures! We defo need to forbid removal of actions + # and instead provide a way to mark them as deleted + continue + + for action_id, action_data in data_for_this_question.items(): + try: + action_class = ACTION_IDS_TO_CLASSES[action_id] + except KeyError: + # An action class present in the submission data no longer + # exists in the application code + # TODO: log an error + continue + try: + action_params = action_configs_for_this_question[action_id] + except KeyError: + # An action class present in the submission data is no longer + # configured at the asset level for this question + # Allow this for now, but maybe forbid later and also forbid + # removing things from the asset-level action configuration? + # Actions could be disabled or hidden instead of being removed + continue + + action = action_class(question_xpath, action_params) + processed_data_for_this_question[action_id] = action.retrieve_data( + action_data + ) + + retrieved_supplemental_data['_version'] = schema_version + return retrieved_supplemental_data \ No newline at end of file diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences/router.py similarity index 100% rename from kobo/apps/subsequences__new/router.py rename to kobo/apps/subsequences/router.py diff --git a/kobo/apps/subsequences__new/schemas.py b/kobo/apps/subsequences/schemas.py similarity index 100% rename from kobo/apps/subsequences__new/schemas.py rename to kobo/apps/subsequences/schemas.py diff --git a/kobo/apps/subsequences/integrations/google/__init__.py b/kobo/apps/subsequences/tests/api/__init__.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/__init__.py rename to kobo/apps/subsequences/tests/api/__init__.py diff --git a/kobo/apps/subsequences/migrations/__init__.py b/kobo/apps/subsequences/tests/api/v2/__init__.py similarity index 100% rename from kobo/apps/subsequences/migrations/__init__.py rename to kobo/apps/subsequences/tests/api/v2/__init__.py diff --git a/kobo/apps/subsequences__new/tests/api/v2/base.py b/kobo/apps/subsequences/tests/api/v2/base.py similarity index 100% rename from kobo/apps/subsequences__new/tests/api/v2/base.py rename to kobo/apps/subsequences/tests/api/v2/base.py diff --git a/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py b/kobo/apps/subsequences/tests/api/v2/test_permissions.py similarity index 100% rename from kobo/apps/subsequences__new/tests/api/v2/test_permissions.py rename to kobo/apps/subsequences/tests/api/v2/test_permissions.py diff --git a/kobo/apps/subsequences__new/tests/test_manual_transcription.py b/kobo/apps/subsequences/tests/test_manual_transcription.py similarity index 100% rename from kobo/apps/subsequences__new/tests/test_manual_transcription.py rename to kobo/apps/subsequences/tests/test_manual_transcription.py diff --git a/kobo/apps/subsequences__new/actions/__init__.py b/kobo/apps/subsequences__new/actions/__init__.py deleted file mode 100644 index 274ca102a5..0000000000 --- a/kobo/apps/subsequences__new/actions/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .manual_transcription import ManualTranscriptionAction - -# TODO, what about using a loader for every class in "actions" folder (except base.py)? -ACTIONS = (ManualTranscriptionAction,) -ACTION_IDS_TO_CLASSES = {a.ID: a for a in ACTIONS} diff --git a/kobo/apps/subsequences__new/actions/base.py b/kobo/apps/subsequences__new/actions/base.py deleted file mode 100644 index 47d532ee7d..0000000000 --- a/kobo/apps/subsequences__new/actions/base.py +++ /dev/null @@ -1,262 +0,0 @@ -import datetime -from copy import deepcopy - -import jsonschema -from django.conf import settings -from django.utils import timezone - -from kobo.apps.kobo_auth.shortcuts import User -from kpi.exceptions import UsageLimitExceededException -from kpi.utils.usage_calculator import ServiceUsageCalculator - -""" -### All actions must have the following components - -* (check!) a unique identifier for the action -* three jsonschemas: - 1. (check!) one to validate the parameters used to configure the action - * `ADVANCED_FEATURES_PARAMS_SCHEMA` - 2. (check!) one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) - * the result of `modify_jsonschema()` - 3. one to validate the result of the action - the result of `modify_jsonschema()` - * OH NO, this doesn't happen at all yet -* a handler that receives a submission (and other metadata) and processes it -""" - -""" -idea of example content in asset.advanced_features (what kind of actions are activated per question) -{ - '_version': '20250820', - '_schema': { - 'my_audio_question': { - 'manual_transcription': [ - {'language': 'ar'}, - {'language': 'bn'}, - {'language': 'es'}, - ], - 'manual_translation': [{'language': 'fr'}, {'language': 'en'}], - }, - 'my_video_question': { - 'manual_transcription': [{'language': 'en'}], - }, - 'my_number_question': { - 'number_multiplier': [{'multiplier': 3}], - }, - }, -} - -idea of example data in SubmissionExtras based on the above -{ - '_version': '20250820', - '_submission': '', - 'my_audio_question': { - 'manual_transcription': { - 'transcript': 'هائج', - 'language': 'ar', - '_dateCreated': '2025-08-21T20:55:42.012053Z', - '_dateModified': '2025-08-21T20:57:28.154567Z', - '_revisions': [ - { - 'transcript': 'فارغ', - 'language': 'ar', - '_dateCreated': '2025-08-21T20:55:42.012053Z', - } - ], - }, - 'manual_translation': [ - { - 'language': 'en', - 'translation': 'berserk', - '_dateCreated': '2025-08-21T21:39:42.141306Z', - '_dateModified': '2025-08-21T21:39:42.141306Z', - }, - { - 'language': 'es', - 'translation': 'enloquecido', - '_dateCreated': '2025-08-21T21:40:54.644308Z', - '_dateModified': '2025-08-21T22:00:10.862880Z', - '_revisions': [ - { - 'translation': 'loco', - 'language': 'es', - '_dateCreated': '2025-08-21T21:40:54.644308Z', - } - ], - }, - ], - }, - 'my_video_question': { - 'manual_transcription': { - 'transcript': 'sea horse sea hell', - 'language': 'en', - '_dateCreated': '2025-08-21T21:06:20.059117Z', - '_dateModified': '2025-08-21T21:06:20.059117Z', - }, - }, - 'my_number_question': { - 'number_multiplier': { - 'numberMultiplied': 99, - '_dateCreated': '2025-08-21T21:09:34.504546Z', - '_dateModified': '2025-08-21T21:09:34.504546Z', - }, - }, -} -""" - - -def utc_datetime_to_js_str(dt: datetime.datetime) -> str: - """ - Return a string to represent a `datetime` following the simplification of - the ISO 8601 format used by JavaScript - """ - # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format - if dt.utcoffset() or not dt.tzinfo: - raise NotImplementedError('Only UTC datetimes are supported') - return dt.isoformat().replace('+00:00', 'Z') - - -class BaseAction: - def something_to_get_the_data_back_out(self): - # might need to deal with multiple columns for one action - # ^ definitely will - raise NotImplementedError - - DATE_CREATED_FIELD = '_dateCreated' - DATE_MODIFIED_FIELD = '_dateModified' - REVISIONS_FIELD = '_revisions' - - def check_limits(self, user: User): - - if not settings.STRIPE_ENABLED or not self._is_usage_limited: - return - - calculator = ServiceUsageCalculator(user) - balances = calculator.get_usage_balances() - - balance = balances[self._limit_identifier] - if balance and balance['exceeded']: - raise UsageLimitExceededException() - - @classmethod - def validate_params(cls, params): - jsonschema.validate(params, cls.params_schema) - - def validate_data(self, data): - jsonschema.validate(data, self.data_schema) - - def validate_result(self, result): - jsonschema.validate(result, self.result_schema) - - @property - def result_schema(self): - """ - we also need a schema to define the final result that will be written - into SubmissionExtras - - we need to solve the problem of storing multiple results for a single action - """ - return NotImplementedError - - def retrieve_data(self, action_data: dict) -> dict: - """ - `action_data` must be ONLY the data for this particular action - instance, not the entire SubmissionExtras caboodle - - descendant classes could override with special manipulation if needed - """ - return action_data - - def revise_field(self, *args, **kwargs): - # TODO: remove this alias - import warnings - warnings.warn('Oh no, this method is going away!', DeprecationWarning) - return self.revise_data(*args, **kwargs) - - def revise_data( - self, submission: dict, submission_supplement: dict, edit: dict - ) -> dict: - """ - for actions that may have lengthy data, are we content to store the - entirety of the data for each revision, or do we need some kind of - differencing system? - """ - self.validate_data(edit) - self.raise_for_any_leading_underscore_key(edit) - - now_str = utc_datetime_to_js_str(timezone.now()) - revision = deepcopy(submission_supplement) - new_record = deepcopy(edit) - revisions = revision.pop(self.REVISIONS_FIELD, []) - - revision_creation_date = revision.pop(self.DATE_MODIFIED_FIELD, now_str) - record_creation_date = revision.pop(self.DATE_CREATED_FIELD, now_str) - revision[self.DATE_CREATED_FIELD] = revision_creation_date - new_record[self.DATE_MODIFIED_FIELD] = now_str - - if submission_supplement: - revisions.insert(0, revision) - new_record[self.REVISIONS_FIELD] = revisions - - new_record[self.DATE_CREATED_FIELD] = record_creation_date - - self.validate_result(new_record) - - return new_record - - - @staticmethod - def raise_for_any_leading_underscore_key(d: dict): - """ - Keys with leading underscores are reserved for metadata like - `_dateCreated`, `_dateModified`, and `_revisions`. No key with a - leading underscore should be present in data POSTed by a client or - generated by an action. - - Schema validation should block invalid keys, but this method exists as - a redundant check to guard against schema mistakes. - """ - for k in list(d.keys()): - try: - match = k.startswith('_') - except AttributeError: - continue - if match: - raise Exception( - 'An unexpected key with a leading underscore was found' - ) - - @property - def _is_usage_limited(self): - """ - Returns whether an action should check for usage limits. - """ - raise NotImplementedError() - - def _inject_data_schema(self, destination_schema: dict, skipped_keys: list): - """ - Utility function to inject data schema into another schema to - avoid repeating the same schema. - Useful to produce result schema. - """ - - for key, value in self.data_schema.items(): - if key in skipped_keys: - continue - - if key in destination_schema: - if isinstance(destination_schema[key], dict): - destination_schema[key].update(self.data_schema[key]) - elif isinstance(destination_schema[key], list): - destination_schema[key].extend(self.data_schema[key]) - else: - destination_schema[key] = self.data_schema[key] - else: - destination_schema[key] = self.data_schema[key] - - @property - def _limit_identifier(self): - # Example for automatic transcription - # - # from kobo.apps.organizations.constants import UsageType - # return UsageType.ASR_SECONDS - raise NotImplementedError() diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py deleted file mode 100644 index c6263d95aa..0000000000 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ /dev/null @@ -1,134 +0,0 @@ -from copy import deepcopy - -from .base import BaseAction - - -class ManualTranscriptionAction(BaseAction): - ID = 'manual_transcription' - - def __init__(self, source_question_xpath, params): - self.source_question_xpath = source_question_xpath - self.params = params - - """ - For an audio question called `my_audio_question` that's transcribed - into 3 languages, the schema for `Asset.advanced_features` might look - like: - 'my_audio_question': { - 'manual_transcription': [ - {'language': 'ar'}, - {'language': 'bn'}, - {'language': 'es'}, - ], - } - - The `params_schema` attribute defines the shape of the array where each - element is an object with a single string property for the transcript - language. - """ - params_schema = { - 'type': 'array', - 'items': { - 'additionalProperties': False, - 'properties': { - 'language': { - 'type': 'string', - } - }, - 'required': ['language'], - 'type': 'object', - }, - } - - @property - def data_schema(self): # for lack of a better name - """ - POST to "/api/v2/assets//data//supplemental" - { - 'manual_transcription': { - 'language': 'es', - 'value': 'Almorzamos muy bien hoy', - } - } - """ - - return { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - 'language': {'$ref': '#/$defs/lang'}, - 'value': {'$ref': '#/$defs/transcript'}, - }, - 'allOf': [{'$ref': '#/$defs/lang_transcript_dependency'}], - '$defs': { - 'lang': {'type': 'string', 'enum': self.languages}, - 'transcript': {'type': 'string'}, - 'lang_transcript_dependency': { - 'allOf': [ - { - 'if': {'required': ['language']}, - 'then': {'required': ['value']}, - }, - { - 'if': {'required': ['value']}, - 'then': {'required': ['language']}, - }, - ] - }, - }, - } - - @property - def languages(self) -> list[str]: - languages = [] - for individual_params in self.params: - languages.append(individual_params['language']) - return languages - - def record_repr(self, record: dict) -> dict: - return record.get('value', '') - - @property - def result_schema(self): - - schema = { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.REVISIONS_FIELD: { - 'type': 'array', - 'minItems': 1, - 'items': {'$ref': '#/$defs/revision'}, - }, - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], - '$defs': { - 'dateTime': {'type': 'string', 'format': 'date-time'}, - 'revision': { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD], - }, - }, - } - - # Inject data schema in result schema template - self._inject_data_schema(schema, ['$schema', 'title', 'type']) - - # Also inject data schema in the revision definition - self.__inject_data_schema( - schema['$defs']['revision'], ['$schema', 'title', '$defs'] - ) - - return schema - - @property - def _is_usage_limited(self): - return False diff --git a/kobo/apps/subsequences__new/exceptions.py b/kobo/apps/subsequences__new/exceptions.py deleted file mode 100644 index d15edc89ee..0000000000 --- a/kobo/apps/subsequences__new/exceptions.py +++ /dev/null @@ -1,16 +0,0 @@ -class InvalidAction(Exception): - """ - The referenced action does not exist or was not configured for the given - question XPath at the asset level - """ - - pass - - -class InvalidXPath(Exception): - """ - The referenced question XPath was not configured for supplemental data at - the asset level - """ - - pass \ No newline at end of file diff --git a/kobo/apps/subsequences__new/models.py b/kobo/apps/subsequences__new/models.py deleted file mode 100644 index 7d49e6c746..0000000000 --- a/kobo/apps/subsequences__new/models.py +++ /dev/null @@ -1,141 +0,0 @@ -from kobo.apps.subsequences.models import ( - SubmissionExtras, # just bullshit for now -) -from kpi.models import Asset -from .actions import ACTION_IDS_TO_CLASSES -from .exceptions import InvalidAction, InvalidXPath -from .schemas import validate_submission_supplement - -class SubmissionSupplement(SubmissionExtras): - class Meta(SubmissionExtras.Meta): - proxy = True - app_label = 'subsequences' - - def revise_data( - asset: Asset, submission: dict, incoming_data: dict - ) -> dict: - schema_version = incoming_data.pop('_version') - if schema_version != '20250820': - # TODO: migrate from old per-submission schema - raise NotImplementedError - - if asset.advanced_features['_version'] != schema_version: - # TODO: migrate from old per-asset schema - raise NotImplementedError - - submission_uuid = submission['meta/rootUuid'] # constant? - supplemental_data = SubmissionExtras.objects.get_or_create( - asset=asset, submission_uuid=submission_uuid - )[0].content # lock it? - - retrieved_supplemental_data = {} - - for question_xpath, data_for_this_question in incoming_data.items(): - try: - action_configs_for_this_question = asset.advanced_features[ - '_actionConfigs' - ][question_xpath] - except KeyError as e: - raise InvalidXPath from e - - for action_id, action_data in data_for_this_question.items(): - try: - action_class = ACTION_IDS_TO_CLASSES[action_id] - except KeyError as e: - raise InvalidAction from e - try: - action_params = action_configs_for_this_question[action_id] - except KeyError as e: - raise InvalidAction from e - - action = action_class(question_xpath, action_params) - action.check_limits(asset.owner) - question_supplemental_data = supplemental_data.setdefault( - question_xpath, {} - ) - action_supplemental_data = question_supplemental_data.setdefault( - action_id, {} - ) - action_supplemental_data = action.revise_field( - submission, action_supplemental_data, action_data - ) - question_supplemental_data[action_id] = action_supplemental_data - retrieved_supplemental_data.setdefault(question_xpath, {})[ - action_id - ] = action.retrieve_data(action_supplemental_data) - - supplemental_data['_version'] = schema_version - validate_submission_supplement(asset, supplemental_data) - SubmissionExtras.objects.filter( - asset=asset, submission_uuid=submission_uuid - ).update(content=supplemental_data) - - retrieved_supplemental_data['_version'] = schema_version - return retrieved_supplemental_data - - - def retrieve_data(asset: Asset, submission_uuid: str) -> dict: - try: - supplemental_data = SubmissionExtras.objects.get( - asset=asset, submission_uuid=submission_uuid - ).content - except SubmissionExtras.DoesNotExist: - return {} - - schema_version = supplemental_data.pop('_version') - if schema_version != '20250820': - # TODO: migrate from old per-submission schema - raise NotImplementedError - - if asset.advanced_features['_version'] != schema_version: - # TODO: migrate from old per-asset schema - raise NotImplementedError - - retrieved_supplemental_data = {} - - for question_xpath, data_for_this_question in supplemental_data.items(): - processed_data_for_this_question = ( - retrieved_supplemental_data.setdefault(question_xpath, {}) - ) - action_configs = asset.advanced_features['_actionConfigs'] - try: - action_configs_for_this_question = action_configs[question_xpath] - except KeyError: - # There's still supplemental data for this question at the - # submission level, but the question is no longer configured at the - # asset level. - # Allow this for now, but maybe forbid later and also forbid - # removing things from the asset-level action configuration? - # Actions could be disabled or hidden instead of being removed - - # FIXME: divergence between the asset-level configuration and - # submission-level supplemental data is going to cause schema - # validation failures! We defo need to forbid removal of actions - # and instead provide a way to mark them as deleted - continue - - for action_id, action_data in data_for_this_question.items(): - try: - action_class = ACTION_IDS_TO_CLASSES[action_id] - except KeyError: - # An action class present in the submission data no longer - # exists in the application code - # TODO: log an error - continue - try: - action_params = action_configs_for_this_question[action_id] - except KeyError: - # An action class present in the submission data is no longer - # configured at the asset level for this question - # Allow this for now, but maybe forbid later and also forbid - # removing things from the asset-level action configuration? - # Actions could be disabled or hidden instead of being removed - continue - - action = action_class(question_xpath, action_params) - processed_data_for_this_question[action_id] = action.retrieve_data( - action_data - ) - - retrieved_supplemental_data['_version'] = schema_version - return retrieved_supplemental_data \ No newline at end of file diff --git a/kobo/apps/subsequences/README-draft.md b/kobo/apps/subsequences__old/README-draft.md similarity index 100% rename from kobo/apps/subsequences/README-draft.md rename to kobo/apps/subsequences__old/README-draft.md diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences__old/README.md similarity index 100% rename from kobo/apps/subsequences/README.md rename to kobo/apps/subsequences__old/README.md diff --git a/kobo/apps/subsequences__old/__init__.py b/kobo/apps/subsequences__old/__init__.py new file mode 100644 index 0000000000..5f46bdbac3 --- /dev/null +++ b/kobo/apps/subsequences__old/__init__.py @@ -0,0 +1,17 @@ +''' +`kobo.apps.subsequences` --as in Sub(mission)Sequences is an app for defining +and following a sequence of actions or changes to a submission that has come +into kobo. + +models: +- SubmissionData: + Holds a JSONField with the "supplementalData" necessary to complete the + +tasks: +(things that are queued in celery for later action) + +needs writeup: + - how to develop / debug within this app + - description of tests + +''' diff --git a/kobo/apps/subsequences/scripts/__init__.py b/kobo/apps/subsequences__old/actions/__init__.py similarity index 100% rename from kobo/apps/subsequences/scripts/__init__.py rename to kobo/apps/subsequences__old/actions/__init__.py diff --git a/kobo/apps/subsequences/actions/automatic_transcription.py b/kobo/apps/subsequences__old/actions/automatic_transcription.py similarity index 100% rename from kobo/apps/subsequences/actions/automatic_transcription.py rename to kobo/apps/subsequences__old/actions/automatic_transcription.py diff --git a/kobo/apps/subsequences__old/actions/base.py b/kobo/apps/subsequences__old/actions/base.py new file mode 100644 index 0000000000..f8dbe659aa --- /dev/null +++ b/kobo/apps/subsequences__old/actions/base.py @@ -0,0 +1,128 @@ +import datetime +from zoneinfo import ZoneInfo + +from django.utils import timezone + +from kobo.apps.subsequences.constants import GOOGLETS, GOOGLETX + +ACTION_NEEDED = 'ACTION_NEEDED' +PASSES = 'PASSES' + + +class BaseAction: + ID = None + _destination_field = '_supplementalDetails' + + DATE_CREATED_FIELD = 'dateCreated' + DATE_MODIFIED_FIELD = 'dateModified' + DELETE = '⌫' + + def __init__(self, params): + self.load_params(params) + + def cur_time(self): + return datetime.datetime.now(tz=ZoneInfo('UTC')).strftime('%Y-%m-%dT%H:%M:%SZ') + + def load_params(self, params): + raise NotImplementedError('subclass must define a load_params method') + + def run_change(self, params): + raise NotImplementedError('subclass must define a run_change method') + + def check_submission_status(self, submission): + return PASSES + + def modify_jsonschema(self, schema): + return schema + + def compile_revised_record(self, content, edits): + """ + a method that applies changes to a json structure and appends previous + changes to a revision history + """ + + # TODO: should this handle managing `DATE_CREATED_FIELD`, + # `DATE_MODIFIED_FIELD`, etc. instead of delegating that to + # `revise_record()` as it currently does? + + if self.ID is None: + return content + for field_name, vals in edits.items(): + if field_name == 'submission': + continue + + erecord = vals.get(self.ID) + o_keyval = content.get(field_name, {}) + for extra in [GOOGLETX, GOOGLETS]: + if extra in vals: + o_keyval[extra] = vals[extra] + content[field_name] = o_keyval + + orecord = o_keyval.get(self.ID) + if erecord is None: + continue + if self.is_auto_request(erecord): + content[field_name].update( + self.auto_request_repr(erecord) + ) + continue + if orecord is None: + compiled_record = self.init_field(erecord) + elif not self.has_change(orecord, erecord): + continue + else: + compiled_record = self.revise_field(orecord, erecord) + o_keyval[self.ID] = compiled_record + content[field_name] = o_keyval + return content + + def auto_request_repr(self, erecord): + raise NotImplementedError() + + def is_auto_request(self, erecord): + return self.record_repr(erecord) == 'GOOGLE' + + def init_field(self, edit): + edit[self.DATE_CREATED_FIELD] = \ + edit[self.DATE_MODIFIED_FIELD] = \ + str(timezone.now()).split('.')[0] + return {**edit, 'revisions': []} + + def revise_field(self, original, edit): + if self.record_repr(edit) == self.DELETE: + return {} + record = {**original} + revisions = record.pop('revisions', []) + if self.DATE_CREATED_FIELD in record: + del record[self.DATE_CREATED_FIELD] + edit[self.DATE_MODIFIED_FIELD] = \ + edit[self.DATE_CREATED_FIELD] = \ + str(timezone.now()).split('.')[0] + if len(revisions) > 0: + date_modified = revisions[-1].get(self.DATE_MODIFIED_FIELD) + edit[self.DATE_CREATED_FIELD] = date_modified + return {**edit, 'revisions': [record, *revisions]} + + def record_repr(self, record): + return record.get('value') + + def has_change(self, original, edit): + return self.record_repr(original) != self.record_repr(edit) + + @classmethod + def build_params(cls, *args, **kwargs): + raise NotImplementedError(f'{cls.__name__} has not implemented a build_params method') + + def get_xpath(self, row): + # return the full path... + for name_field in ['xpath', 'name', '$autoname']: + if name_field in row: + return row[name_field] + return None + + @classmethod + def get_name(cls, row): + for name_field in ['name', '$autoname']: + if name_field in row: + return row[name_field] + return None diff --git a/kobo/apps/subsequences/actions/keyword_search.py b/kobo/apps/subsequences__old/actions/keyword_search.py similarity index 100% rename from kobo/apps/subsequences/actions/keyword_search.py rename to kobo/apps/subsequences__old/actions/keyword_search.py diff --git a/kobo/apps/subsequences__old/actions/manual_transcription.py b/kobo/apps/subsequences__old/actions/manual_transcription.py new file mode 100644 index 0000000000..1a64212763 --- /dev/null +++ b/kobo/apps/subsequences__old/actions/manual_transcription.py @@ -0,0 +1,134 @@ +import jsonschema +from ..constants import TRANSCRIBABLE_SOURCE_TYPES +#from ..actions.base import BaseAction + +""" +### All actions must have the following components + +* (check!) a unique identifier for the action +* three jsonschemas: + 1. (check!) one to validate the parameters used to configure the action + * `ADVANCED_FEATURES_PARAMS_SCHEMA` + 2. (check!) one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) + * the result of `modify_jsonschema()` + 3. one to validate the result of the action - the result of `modify_jsonschema()` + * OH NO, this doesn't happen at all yet +* a handler that receives a submission (and other metadata) and processes it +""" + +""" +idea of example content in asset.advanced_features (what kind of actions are activated per question) +{ + 'version': '20250820', + 'schema': { + 'my_audio_question': { + 'manual_transcription': [ + {'language': 'ar'}, + {'language': 'bn'}, + {'language': 'es'}, + ], + 'manual_translation': [{'language': 'fr'}], + }, + 'my_video_question': { + 'manual_transcription': [{'language': 'en'}], + }, + 'my_number_question': { + 'number_multiplier': [{'multiplier': 3}], + }, + }, +} +""" + +class BaseAction: + @classmethod + def validate_params(cls, params): + jsonschema.validate(params, cls.params_schema) + + def validate_data(self, data): + jsonschema.validate(data, self.data_schema) + +class ManualTranscriptionAction(BaseAction): + ID = 'manual_transcription' + + def __init__(self, source_question_xpath, params): + self.source_question_xpath = source_question_xpath + self.params = params + + """ + For an audio question called `my_audio_question` that's transcribed + into 3 languages, the schema for `Asset.advanced_features` might look + like: + 'my_audio_question': { + 'manual_transcription': [ + {'language': 'ar'}, + {'language': 'bn'}, + {'language': 'es'}, + ], + } + + The `params_schema` attribute defines the shape of the array where each + element is an object with a single string property for the transcript + language. + """ + params_schema = { + 'type': 'array', + 'items': { + 'additionalProperties': False, + 'properties': { + 'language': { + 'type': 'string', + } + }, + 'required': ['language'], + 'type': 'object', + }, + } + + @property + def data_schema(self): # for lack of a better name + """ + (currently) POST to "/advanced_submission_post/aSsEtUiD" + POST to "/api/v2/assets//data//supplemental" # idk, rename? + { + 'manual_transcription': { + 'language': 'es', + 'transcript': 'Almorzamos muy bien hoy', + } + } + """ + languages = [] + for individual_params in self.params: + languages.append(individual_params['language']) + + return { + 'additionalProperties': False, + 'properties': { + 'language': { + 'type': 'string', + 'enum': languages, + }, + 'transcript': { + 'type': 'string', + }, + }, + 'required': ['language', 'transcript'], + 'type': 'object', + } + + @property + @classmethod + def result_schema(cls): + """ + we also need a schema to define the final result that will be written + into SubmissionExtras + + we need to solve the problem of storing multiple results for a single action + """ + raise NotImplementedError + + + def load_params(self, params): + """ + idk maybe we use this to read the language out of `Asset.advanced_features` + """ + self.possible_transcribed_fields = params['values'] diff --git a/kobo/apps/subsequences/actions/number_doubler.py b/kobo/apps/subsequences__old/actions/number_doubler.py similarity index 100% rename from kobo/apps/subsequences/actions/number_doubler.py rename to kobo/apps/subsequences__old/actions/number_doubler.py diff --git a/kobo/apps/subsequences/actions/qual.py b/kobo/apps/subsequences__old/actions/qual.py similarity index 100% rename from kobo/apps/subsequences/actions/qual.py rename to kobo/apps/subsequences__old/actions/qual.py diff --git a/kobo/apps/subsequences/actions/states.py b/kobo/apps/subsequences__old/actions/states.py similarity index 100% rename from kobo/apps/subsequences/actions/states.py rename to kobo/apps/subsequences__old/actions/states.py diff --git a/kobo/apps/subsequences/actions/translation.py b/kobo/apps/subsequences__old/actions/translation.py similarity index 100% rename from kobo/apps/subsequences/actions/translation.py rename to kobo/apps/subsequences__old/actions/translation.py diff --git a/kobo/apps/subsequences/actions/unknown_action.py b/kobo/apps/subsequences__old/actions/unknown_action.py similarity index 100% rename from kobo/apps/subsequences/actions/unknown_action.py rename to kobo/apps/subsequences__old/actions/unknown_action.py diff --git a/kobo/apps/subsequences/advanced_features_params_schema.py b/kobo/apps/subsequences__old/advanced_features_params_schema.py similarity index 100% rename from kobo/apps/subsequences/advanced_features_params_schema.py rename to kobo/apps/subsequences__old/advanced_features_params_schema.py diff --git a/kobo/apps/subsequences/api_view.py b/kobo/apps/subsequences__old/api_view.py similarity index 100% rename from kobo/apps/subsequences/api_view.py rename to kobo/apps/subsequences__old/api_view.py diff --git a/kobo/apps/subsequences/apps.py b/kobo/apps/subsequences__old/apps.py similarity index 100% rename from kobo/apps/subsequences/apps.py rename to kobo/apps/subsequences__old/apps.py diff --git a/kobo/apps/subsequences/constants.py b/kobo/apps/subsequences__old/constants.py similarity index 100% rename from kobo/apps/subsequences/constants.py rename to kobo/apps/subsequences__old/constants.py diff --git a/kobo/apps/subsequences__old/exceptions.py b/kobo/apps/subsequences__old/exceptions.py new file mode 100644 index 0000000000..541e4d3f9c --- /dev/null +++ b/kobo/apps/subsequences__old/exceptions.py @@ -0,0 +1,20 @@ +class AudioTooLongError(Exception): + """Audio file is too long for specified speech service""" + + +class SubsequenceTimeoutError(Exception): + pass + + +class TranscriptionResultsNotFound(Exception): + """ + No results returned by specified transcription service + """ + + +class TranslationAsyncResultAvailable(Exception): + pass + + +class TranslationResultsNotFound(Exception): + pass diff --git a/kobo/apps/subsequences/integrations/__init__.py b/kobo/apps/subsequences__old/integrations/__init__.py similarity index 100% rename from kobo/apps/subsequences/integrations/__init__.py rename to kobo/apps/subsequences__old/integrations/__init__.py diff --git a/kobo/apps/subsequences/tasks/__init__.py b/kobo/apps/subsequences__old/integrations/google/__init__.py similarity index 100% rename from kobo/apps/subsequences/tasks/__init__.py rename to kobo/apps/subsequences__old/integrations/google/__init__.py diff --git a/kobo/apps/subsequences/integrations/google/base.py b/kobo/apps/subsequences__old/integrations/google/base.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/base.py rename to kobo/apps/subsequences__old/integrations/google/base.py diff --git a/kobo/apps/subsequences/integrations/google/google_transcribe.py b/kobo/apps/subsequences__old/integrations/google/google_transcribe.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/google_transcribe.py rename to kobo/apps/subsequences__old/integrations/google/google_transcribe.py diff --git a/kobo/apps/subsequences/integrations/google/google_translate.py b/kobo/apps/subsequences__old/integrations/google/google_translate.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/google_translate.py rename to kobo/apps/subsequences__old/integrations/google/google_translate.py diff --git a/kobo/apps/subsequences/integrations/google/utils.py b/kobo/apps/subsequences__old/integrations/google/utils.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/utils.py rename to kobo/apps/subsequences__old/integrations/google/utils.py diff --git a/kobo/apps/subsequences/integrations/misc.py b/kobo/apps/subsequences__old/integrations/misc.py similarity index 100% rename from kobo/apps/subsequences/integrations/misc.py rename to kobo/apps/subsequences__old/integrations/misc.py diff --git a/kobo/apps/subsequences/integrations/translate.py b/kobo/apps/subsequences__old/integrations/translate.py similarity index 100% rename from kobo/apps/subsequences/integrations/translate.py rename to kobo/apps/subsequences__old/integrations/translate.py diff --git a/kobo/apps/subsequences/jsonschemas/qual_schema.py b/kobo/apps/subsequences__old/jsonschemas/qual_schema.py similarity index 100% rename from kobo/apps/subsequences/jsonschemas/qual_schema.py rename to kobo/apps/subsequences__old/jsonschemas/qual_schema.py diff --git a/kobo/apps/subsequences/migrations/0001_initial.py b/kobo/apps/subsequences__old/migrations/0001_initial.py similarity index 100% rename from kobo/apps/subsequences/migrations/0001_initial.py rename to kobo/apps/subsequences__old/migrations/0001_initial.py diff --git a/kobo/apps/subsequences/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py b/kobo/apps/subsequences__old/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py similarity index 100% rename from kobo/apps/subsequences/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py rename to kobo/apps/subsequences__old/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py diff --git a/kobo/apps/subsequences/migrations/0003_alter_submissionextras_date_created_and_more.py b/kobo/apps/subsequences__old/migrations/0003_alter_submissionextras_date_created_and_more.py similarity index 100% rename from kobo/apps/subsequences/migrations/0003_alter_submissionextras_date_created_and_more.py rename to kobo/apps/subsequences__old/migrations/0003_alter_submissionextras_date_created_and_more.py diff --git a/kobo/apps/subsequences/migrations/0004_increase_subsequences_submission_uuid.py b/kobo/apps/subsequences__old/migrations/0004_increase_subsequences_submission_uuid.py similarity index 100% rename from kobo/apps/subsequences/migrations/0004_increase_subsequences_submission_uuid.py rename to kobo/apps/subsequences__old/migrations/0004_increase_subsequences_submission_uuid.py diff --git a/kobo/apps/subsequences__new/__init__.py b/kobo/apps/subsequences__old/migrations/__init__.py similarity index 100% rename from kobo/apps/subsequences__new/__init__.py rename to kobo/apps/subsequences__old/migrations/__init__.py diff --git a/kobo/apps/subsequences__old/models.py b/kobo/apps/subsequences__old/models.py new file mode 100644 index 0000000000..e26957e8dc --- /dev/null +++ b/kobo/apps/subsequences__old/models.py @@ -0,0 +1,64 @@ +# coding: utf-8 + +from django.db import models + +from kpi.models import Asset +from kpi.models.abstract_models import AbstractTimeStampedModel +from .constants import GOOGLETS, GOOGLETX +from .utils.determine_export_cols_with_values import determine_export_cols_indiv + + +class SubmissionExtras(AbstractTimeStampedModel): + + submission_uuid = models.CharField(max_length=249) + content = models.JSONField(default=dict) + asset = models.ForeignKey( + Asset, + related_name='submission_extras', + on_delete=models.CASCADE, + ) + + class Meta: + # ideally `submission_uuid` is universally unique, but its uniqueness + # per-asset is most important + unique_together = (('asset', 'submission_uuid'),) + + def save(self, *args, **kwargs): + # We need to import these here because of circular imports + from .integrations.google.google_transcribe import GoogleTranscriptionService + from .integrations.google.google_translate import GoogleTranslationService + + features = self.asset.advanced_features + for xpath, vals in self.content.items(): + if 'transcript' in features: + options = vals.get(GOOGLETS, {}) + if options.get('status') == 'requested': + service = GoogleTranscriptionService(self) + vals[GOOGLETS] = service.process_data(xpath, vals) + if 'translation' in features: + options = vals.get(GOOGLETX, {}) + if options.get('status') == 'requested': + service = GoogleTranslationService(self) + vals[GOOGLETX] = service.process_data(xpath, vals) + + asset_changes = False + asset_known_cols = self.asset.known_cols + for kc in determine_export_cols_indiv(self.content): + if kc not in asset_known_cols: + asset_changes = True + asset_known_cols.append(kc) + + if asset_changes: + self.asset.known_cols = asset_known_cols + self.asset.save(create_version=False) + + super().save(*args, **kwargs) + + @property + def full_content(self): + _content = {} + _content.update(self.content) + _content.update({ + 'timestamp': str(self.date_created), + }) + return _content diff --git a/kobo/apps/subsequences/prev.py b/kobo/apps/subsequences__old/prev.py similarity index 100% rename from kobo/apps/subsequences/prev.py rename to kobo/apps/subsequences__old/prev.py diff --git a/kobo/apps/subsequences__new/tests/__init__.py b/kobo/apps/subsequences__old/scripts/__init__.py similarity index 100% rename from kobo/apps/subsequences__new/tests/__init__.py rename to kobo/apps/subsequences__old/scripts/__init__.py diff --git a/kobo/apps/subsequences/scripts/activate_advanced_features_for_newest_asset.py b/kobo/apps/subsequences__old/scripts/activate_advanced_features_for_newest_asset.py similarity index 100% rename from kobo/apps/subsequences/scripts/activate_advanced_features_for_newest_asset.py rename to kobo/apps/subsequences__old/scripts/activate_advanced_features_for_newest_asset.py diff --git a/kobo/apps/subsequences/scripts/add_qual_to_last_question_of_last_asset.py b/kobo/apps/subsequences__old/scripts/add_qual_to_last_question_of_last_asset.py similarity index 100% rename from kobo/apps/subsequences/scripts/add_qual_to_last_question_of_last_asset.py rename to kobo/apps/subsequences__old/scripts/add_qual_to_last_question_of_last_asset.py diff --git a/kobo/apps/subsequences/scripts/export_analysis_form.py b/kobo/apps/subsequences__old/scripts/export_analysis_form.py similarity index 100% rename from kobo/apps/subsequences/scripts/export_analysis_form.py rename to kobo/apps/subsequences__old/scripts/export_analysis_form.py diff --git a/kobo/apps/subsequences/scripts/recalc_latest_subex.py b/kobo/apps/subsequences__old/scripts/recalc_latest_subex.py similarity index 100% rename from kobo/apps/subsequences/scripts/recalc_latest_subex.py rename to kobo/apps/subsequences__old/scripts/recalc_latest_subex.py diff --git a/kobo/apps/subsequences/scripts/repop_known_cols.py b/kobo/apps/subsequences__old/scripts/repop_known_cols.py similarity index 100% rename from kobo/apps/subsequences/scripts/repop_known_cols.py rename to kobo/apps/subsequences__old/scripts/repop_known_cols.py diff --git a/kobo/apps/subsequences/scripts/subsequences_export.py b/kobo/apps/subsequences__old/scripts/subsequences_export.py similarity index 100% rename from kobo/apps/subsequences/scripts/subsequences_export.py rename to kobo/apps/subsequences__old/scripts/subsequences_export.py diff --git a/kobo/apps/subsequences__new/tests/api/__init__.py b/kobo/apps/subsequences__old/tasks/__init__.py similarity index 100% rename from kobo/apps/subsequences__new/tests/api/__init__.py rename to kobo/apps/subsequences__old/tasks/__init__.py diff --git a/kobo/apps/subsequences__new/tests/api/v2/__init__.py b/kobo/apps/subsequences__old/tests/__init__.py similarity index 100% rename from kobo/apps/subsequences__new/tests/api/v2/__init__.py rename to kobo/apps/subsequences__old/tests/__init__.py diff --git a/kobo/apps/subsequences/tests/test_known_cols_utils.py b/kobo/apps/subsequences__old/tests/test_known_cols_utils.py similarity index 100% rename from kobo/apps/subsequences/tests/test_known_cols_utils.py rename to kobo/apps/subsequences__old/tests/test_known_cols_utils.py diff --git a/kobo/apps/subsequences/tests/test_nlp_integration.py b/kobo/apps/subsequences__old/tests/test_nlp_integration.py similarity index 100% rename from kobo/apps/subsequences/tests/test_nlp_integration.py rename to kobo/apps/subsequences__old/tests/test_nlp_integration.py diff --git a/kobo/apps/subsequences/tests/test_number_doubler.py b/kobo/apps/subsequences__old/tests/test_number_doubler.py similarity index 100% rename from kobo/apps/subsequences/tests/test_number_doubler.py rename to kobo/apps/subsequences__old/tests/test_number_doubler.py diff --git a/kobo/apps/subsequences/tests/test_proj_advanced_features.py b/kobo/apps/subsequences__old/tests/test_proj_advanced_features.py similarity index 100% rename from kobo/apps/subsequences/tests/test_proj_advanced_features.py rename to kobo/apps/subsequences__old/tests/test_proj_advanced_features.py diff --git a/kobo/apps/subsequences/tests/test_submission_extras_api_post.py b/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py similarity index 100% rename from kobo/apps/subsequences/tests/test_submission_extras_api_post.py rename to kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py diff --git a/kobo/apps/subsequences/tests/test_submission_extras_content.py b/kobo/apps/subsequences__old/tests/test_submission_extras_content.py similarity index 100% rename from kobo/apps/subsequences/tests/test_submission_extras_content.py rename to kobo/apps/subsequences__old/tests/test_submission_extras_content.py diff --git a/kobo/apps/subsequences/tests/test_submission_stream.py b/kobo/apps/subsequences__old/tests/test_submission_stream.py similarity index 100% rename from kobo/apps/subsequences/tests/test_submission_stream.py rename to kobo/apps/subsequences__old/tests/test_submission_stream.py diff --git a/kobo/apps/subsequences/urls.py b/kobo/apps/subsequences__old/urls.py similarity index 100% rename from kobo/apps/subsequences/urls.py rename to kobo/apps/subsequences__old/urls.py diff --git a/kobo/apps/subsequences/utils/__init__.py b/kobo/apps/subsequences__old/utils/__init__.py similarity index 100% rename from kobo/apps/subsequences/utils/__init__.py rename to kobo/apps/subsequences__old/utils/__init__.py diff --git a/kobo/apps/subsequences/utils/deprecation.py b/kobo/apps/subsequences__old/utils/deprecation.py similarity index 100% rename from kobo/apps/subsequences/utils/deprecation.py rename to kobo/apps/subsequences__old/utils/deprecation.py diff --git a/kobo/apps/subsequences/utils/determine_export_cols_with_values.py b/kobo/apps/subsequences__old/utils/determine_export_cols_with_values.py similarity index 100% rename from kobo/apps/subsequences/utils/determine_export_cols_with_values.py rename to kobo/apps/subsequences__old/utils/determine_export_cols_with_values.py diff --git a/kobo/apps/subsequences/utils/parse_known_cols.py b/kobo/apps/subsequences__old/utils/parse_known_cols.py similarity index 100% rename from kobo/apps/subsequences/utils/parse_known_cols.py rename to kobo/apps/subsequences__old/utils/parse_known_cols.py From 725d15d094f5a8b2583e8174fd17e600c518933d Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 21:45:46 -0400 Subject: [PATCH 057/138] Rip out old subsequences references --- kobo/apps/subsequences/models.py | 27 ++- .../tests/api/v2/test_permissions.py | 2 +- kobo/urls.py | 1 - kpi/deployment_backends/openrosa_backend.py | 3 +- kpi/models/asset.py | 172 +----------------- kpi/models/import_export_task.py | 11 +- kpi/serializers/v2/asset.py | 14 -- kpi/views/v2/data.py | 4 +- 8 files changed, 34 insertions(+), 200 deletions(-) diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 7d49e6c746..1866e785ef 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -1,15 +1,32 @@ -from kobo.apps.subsequences.models import ( - SubmissionExtras, # just bullshit for now -) +from django.db import models + from kpi.models import Asset +from kpi.models.abstract_models import AbstractTimeStampedModel from .actions import ACTION_IDS_TO_CLASSES from .exceptions import InvalidAction, InvalidXPath from .schemas import validate_submission_supplement +class SubmissionExtras(AbstractTimeStampedModel): + # TODO: trash this and rename the model + submission_uuid = models.CharField(max_length=249) + content = models.JSONField(default=dict) + asset = models.ForeignKey( + Asset, + related_name='submission_extras', + on_delete=models.CASCADE, + ) + + class Meta: + # ideally `submission_uuid` is universally unique, but its uniqueness + # per-asset is most important + unique_together = (('asset', 'submission_uuid'),) + class SubmissionSupplement(SubmissionExtras): class Meta(SubmissionExtras.Meta): proxy = True - app_label = 'subsequences' + + def __repr__(self): + return f'Supplement for submission {self.submission_uuid}' def revise_data( asset: Asset, submission: dict, incoming_data: dict @@ -138,4 +155,4 @@ def retrieve_data(asset: Asset, submission_uuid: str) -> dict: ) retrieved_supplemental_data['_version'] = schema_version - return retrieved_supplemental_data \ No newline at end of file + return retrieved_supplemental_data diff --git a/kobo/apps/subsequences/tests/api/v2/test_permissions.py b/kobo/apps/subsequences/tests/api/v2/test_permissions.py index 519bb06e94..d1ca51405e 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_permissions.py +++ b/kobo/apps/subsequences/tests/api/v2/test_permissions.py @@ -7,7 +7,7 @@ from rest_framework import status from kobo.apps.kobo_auth.shortcuts import User -from kobo.apps.subsequences__new.tests.api.v2.base import SubsequenceBaseTestCase +from kobo.apps.subsequences.tests.api.v2.base import SubsequenceBaseTestCase from kpi.constants import ( PERM_CHANGE_SUBMISSIONS, PERM_PARTIAL_SUBMISSIONS, diff --git a/kobo/urls.py b/kobo/urls.py index a19bce0ca8..28f7e70fa7 100644 --- a/kobo/urls.py +++ b/kobo/urls.py @@ -60,7 +60,6 @@ r'^accounts/register/?', RedirectView.as_view(url='/accounts/signup/', permanent=False), ), - re_path(r'^', include('kobo.apps.subsequences.urls')), re_path(r'^', include('kpi.urls')), re_path(r'^', include('kobo.apps.openrosa.apps.main.urls')), re_path(r'^markdownx/', include('markdownx.urls')), diff --git a/kpi/deployment_backends/openrosa_backend.py b/kpi/deployment_backends/openrosa_backend.py index 4a15bc2cf3..fbc90a277c 100644 --- a/kpi/deployment_backends/openrosa_backend.py +++ b/kpi/deployment_backends/openrosa_backend.py @@ -46,7 +46,6 @@ from kobo.apps.openrosa.apps.viewer.models import ParsedInstance from kobo.apps.openrosa.libs.utils.logger_tools import create_instance, publish_xls_form from kobo.apps.openrosa.libs.utils.viewer_tools import get_mongo_userform_id -from kobo.apps.subsequences.utils import stream_with_extras from kobo.apps.trackers.models import NLPUsageCounter from kpi.constants import ( PERM_CHANGE_SUBMISSIONS, @@ -1567,7 +1566,7 @@ def __get_submissions_in_json( add_supplemental_details_to_query = False if add_supplemental_details_to_query: - mongo_cursor = stream_with_extras(mongo_cursor, self.asset) + raise NotImplementedError # FIXME all_attachment_xpaths = self.asset.get_all_attachment_xpaths() diff --git a/kpi/models/asset.py b/kpi/models/asset.py index 8731658891..06195d0bf4 100644 --- a/kpi/models/asset.py +++ b/kpi/models/asset.py @@ -18,18 +18,7 @@ from taggit.utils import require_instance_manager from kobo.apps.reports.constants import DEFAULT_REPORTS_KEY, SPECIFIC_REPORTS_KEY -from kobo.apps.subsequences.utils import ( - advanced_feature_instances, - advanced_submission_jsonschema, -) -from kobo.apps.subsequences.utils.deprecation import ( - get_sanitized_advanced_features, - get_sanitized_dict_keys, - get_sanitized_known_columns, - qpath_to_xpath, -) -from kobo.apps.subsequences.utils.parse_known_cols import parse_known_cols -from kobo.apps.subsequences__new.schemas import ACTION_PARAMS_SCHEMA +from kobo.apps.subsequences.schemas import ACTION_PARAMS_SCHEMA from kpi.constants import ( ASSET_TYPE_BLOCK, ASSET_TYPE_COLLECTION, @@ -539,73 +528,6 @@ def adjust_content_on_save(self): # Remove newlines and tabs (they are stripped in front end anyway) self.name = re.sub(r'[\n\t]+', '', _title) - def analysis_form_json(self, omit_question_types=None): - if omit_question_types is None: - omit_question_types = [] - - additional_fields = list(self._get_additional_fields()) - engines = dict(self._get_engines()) - output = {'engines': engines, 'additional_fields': additional_fields} - try: - number_doubler_field = self.advanced_features[ - 'number_doubler' - ]['number_doubler_fields'] # just a singular string lol - except KeyError: - pass - else: - additional_fields.append(dict( - # What do all these do? - label=f'{number_doubler_field} DOUBLED!', # understood - name=number_doubler_field + '__avoid_collision_with_source_question_name', # arbitrary? - dtpath=number_doubler_field, # unknown - type='doubled_number', # understood; xref with formpack `data_type_classes` - language='??', # only useful for transx? what does it do? - source=number_doubler_field, # probably understood; formpack field can reference e.g. for building labels - qpath=number_doubler_field, # probably understood; but compare to `source`? - settings='??', # only used by transx so far? - path=[number_doubler_field], # does this get `_supplementalDetails/` prepended to it? haven't looked yet - )) - try: - qual_survey = self.advanced_features['qual']['qual_survey'] - except KeyError: - return output - for qual_question in qual_survey: - # Surely some of this stuff is not actually used… - # (added to match extend_col_deets() from - # kobo/apps/subsequences/utils/parse_known_cols) - # - # See also injectSupplementalRowsIntoListOfRows() in - # assetUtils.ts - try: - xpath = qual_question['xpath'] - except KeyError: - xpath = self.get_xpath_from_qpath(qual_question['qpath']) - - field = dict( - label=qual_question['labels']['_default'], - name=f"{xpath}/{qual_question['uuid']}", - dtpath=f"{xpath}/{qual_question['uuid']}", - type=qual_question['type'], - # could say '_default' or the language of the transcript, - # but really that would be meaningless and misleading - language='??', - source=xpath, - xpath=f"{xpath}/{qual_question['uuid']}", - # seems not applicable given the transx questions describe - # manual vs. auto here and which engine was used - settings='??', - path=[xpath, qual_question['uuid']], - ) - if field['type'] in omit_question_types: - continue - try: - field['choices'] = qual_question['choices'] - except KeyError: - pass - additional_fields.append(field) - - return output - def clone(self, version_uid=None): # not currently used, but this is how "to_clone_dict" should work return Asset.objects.create(**self.to_clone_dict(version=version_uid)) @@ -654,31 +576,6 @@ def discoverable_when_public(self): return self.permissions.filter(permission__codename=PERM_DISCOVER_ASSET, user_id=settings.ANONYMOUS_USER_ID).exists() - def get_advanced_feature_instances(self): - return advanced_feature_instances(self.content, self.advanced_features) - - def get_advanced_submission_schema(self, url=None, content=False): - - if len(self.advanced_features) == 0: - NO_FEATURES_MSG = 'no advanced features activated for this form' - return {'type': 'object', '$description': NO_FEATURES_MSG} - - if advanced_features := get_sanitized_advanced_features(self): - self.advanced_features = advanced_features - - last_deployed_version = self.deployed_versions.first() - if content: - return advanced_submission_jsonschema( - content, self.advanced_features, url=url - ) - if last_deployed_version is None: - NO_DEPLOYMENT_MSG = 'asset needs a deployment for this feature' - return {'type': 'object', '$description': NO_DEPLOYMENT_MSG} - content = last_deployed_version.version_content - return advanced_submission_jsonschema( - content, self.advanced_features, url=url - ) - def get_all_attachment_xpaths(self) -> list: # We previously used `cache_for_request`, but it provides no benefit in Celery @@ -858,27 +755,11 @@ def get_partial_perms( return None - def get_xpath_from_qpath(self, qpath: str) -> str: - - # We could have used `cache_for_request` in the `qpath_to_xpath` utility, - # but it provides no benefit in Celery tasks. - # Instead, we use a "protected" property on the Asset model to cache the result - # during the lifetime of the asset instance. - qpaths_xpaths_mapping = getattr(self, '_qpaths_xpaths_mapping', {}) - - try: - xpath = qpaths_xpaths_mapping[qpath] - except KeyError: - qpaths_xpaths_mapping[qpath] = qpath_to_xpath(qpath, self) - xpath = qpaths_xpaths_mapping[qpath] - - setattr(self, '_qpaths_xpaths_mapping', qpaths_xpaths_mapping) - return xpath - @property def has_advanced_features(self): if self.advanced_features is None: return False + # FIXME: has dubious utility with new advanced_features that always have `_version`? return len(self.advanced_features) > 0 def has_subscribed_user(self, user_id): @@ -1218,36 +1099,6 @@ def update_search_field(self, **kwargs): self.search_field[key] = value jsonschema.validate(instance=self.search_field, schema=SEARCH_FIELD_SCHEMA) - def update_submission_extra(self, content, user=None): - submission_uuid = content.get('submission') - # the view had better have handled this - assert submission_uuid is not None - - # `select_for_update()` can only lock things that exist; make sure - # a `SubmissionExtras` exists for this submission before proceeding - self.submission_extras.get_or_create(submission_uuid=submission_uuid) - - with transaction.atomic(): - sub = ( - self.submission_extras.filter(submission_uuid=submission_uuid) - .select_for_update() - .first() - ) - instances = self.get_advanced_feature_instances() - if sub_extra_content := get_sanitized_dict_keys(sub.content, self): - sub.content = sub_extra_content - - compiled_content = {**sub.content} - - for instance in instances: - compiled_content = instance.compile_revised_record( - compiled_content, edits=content - ) - sub.content = compiled_content - sub.save() - - return sub - def update_languages(self, children=None): """ Updates object's languages by aggregating all its children's languages @@ -1299,9 +1150,6 @@ def validate_advanced_features(self): if self.advanced_features is None: self.advanced_features = {} - if advanced_features := get_sanitized_advanced_features(self): - self.advanced_features = advanced_features - jsonschema.validate( instance=self.advanced_features, schema=ACTION_PARAMS_SCHEMA, @@ -1335,22 +1183,6 @@ def version_number_and_date(self) -> str: return f'{count} {self.date_modified:(%Y-%m-%d %H:%M:%S)}' - def _get_additional_fields(self): - - # TODO Remove line below when when every asset is repopulated with `xpath` - self.known_cols = get_sanitized_known_columns(self) - - return parse_known_cols(self.known_cols) - - def _get_engines(self): - """ - engines are individual NLP services that can be used - """ - for instance in self.get_advanced_feature_instances(): - if hasattr(instance, 'engines'): - for key, val in instance.engines(): - yield key, val - def _populate_report_styles(self): default = self.report_styles.get(DEFAULT_REPORTS_KEY, {}) specifieds = self.report_styles.get(SPECIFIC_REPORTS_KEY, {}) diff --git a/kpi/models/import_export_task.py b/kpi/models/import_export_task.py index 525918491d..35b502a108 100644 --- a/kpi/models/import_export_task.py +++ b/kpi/models/import_export_task.py @@ -41,7 +41,6 @@ from werkzeug.http import parse_options_header from kobo.apps.reports.report_data import build_formpack -from kobo.apps.subsequences.utils import stream_with_extras from kpi.constants import ( ASSET_TYPE_COLLECTION, ASSET_TYPE_EMPTY, @@ -1040,16 +1039,18 @@ def get_export_object( ) if source.has_advanced_features: - submission_stream = stream_with_extras(submission_stream, source) + raise NotImplementedError # FIXME pack, submission_stream = build_formpack( source, submission_stream, self._fields_from_all_versions ) if source.has_advanced_features: - pack.extend_survey( - source.analysis_form_json(omit_question_types=['qual_note']) - ) + raise NotImplementedError # FIXME + ''' + pack.extend_survey(…) + omit_question_types=['qual_note'] + ''' # Wrap the submission stream in a generator that records the most # recent timestamp diff --git a/kpi/serializers/v2/asset.py b/kpi/serializers/v2/asset.py index 4df7911694..c6afc02643 100644 --- a/kpi/serializers/v2/asset.py +++ b/kpi/serializers/v2/asset.py @@ -346,9 +346,7 @@ class AssetSerializer(serializers.HyperlinkedModelSerializer): advanced_features = WriteableJsonWithSchemaField( schema_field=AdvancedFeatureField, required=False ) - advanced_submission_schema = serializers.SerializerMethodField() files = serializers.SerializerMethodField() - analysis_form_json = serializers.SerializerMethodField() xls_link = serializers.SerializerMethodField() summary = ReadOnlyFieldWithSchemaField(schema_field=SummaryField) xform_link = serializers.SerializerMethodField() @@ -438,8 +436,6 @@ class Meta: 'report_styles', 'report_custom', 'advanced_features', - 'advanced_submission_schema', - 'analysis_form_json', 'map_styles', 'map_custom', 'content', @@ -565,16 +561,6 @@ def get_files(self, obj): context=self.context, ).data - @extend_schema_field(AdvancedSubmissionSchemaField) - def get_advanced_submission_schema(self, obj): - req = self.context.get('request') - url = req.build_absolute_uri(f'/advanced_submission_post/{obj.uid}') - return obj.get_advanced_submission_schema(url=url) - - @extend_schema_field(AnalysisFormJsonField) - def get_analysis_form_json(self, obj): - return obj.analysis_form_json() - def get_deployment_status(self, obj: Asset) -> str: if deployment_status := obj.deployment_status: return deployment_status diff --git a/kpi/views/v2/data.py b/kpi/views/v2/data.py index c24f94eae4..a726b733d0 100644 --- a/kpi/views/v2/data.py +++ b/kpi/views/v2/data.py @@ -512,11 +512,11 @@ def supplement(self, request, submission_id_or_root_uuid: str, *args, **kwargs): submission_root_uuid = submission_id_or_root_uuid ### TO BE MOVED - from kobo.apps.subsequences__new.router import ( + from kobo.apps.subsequences.router import ( handle_incoming_data, retrieve_supplemental_data, ) - from kobo.apps.subsequences__new.exceptions import ( + from kobo.apps.subsequences.exceptions import ( InvalidAction, InvalidXPath, ) From ba361423cb420be8b3aabd596e80e9b66aa3b365 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 23:13:25 -0400 Subject: [PATCH 058/138] Get data API working minimally --- kobo/apps/subsequences/models.py | 30 ++++++++++++++------ kobo/apps/subsequences/utils.py | 31 +++++++++++++++++++++ kpi/deployment_backends/openrosa_backend.py | 9 +++--- 3 files changed, 57 insertions(+), 13 deletions(-) create mode 100644 kobo/apps/subsequences/utils.py diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 1866e785ef..9997f7d869 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -1,6 +1,8 @@ from django.db import models -from kpi.models import Asset +from kobo.apps.openrosa.apps.logger.xform_instance_parser import ( + remove_uuid_prefix, +) from kpi.models.abstract_models import AbstractTimeStampedModel from .actions import ACTION_IDS_TO_CLASSES from .exceptions import InvalidAction, InvalidXPath @@ -11,7 +13,7 @@ class SubmissionExtras(AbstractTimeStampedModel): submission_uuid = models.CharField(max_length=249) content = models.JSONField(default=dict) asset = models.ForeignKey( - Asset, + 'kpi.Asset', related_name='submission_extras', on_delete=models.CASCADE, ) @@ -29,7 +31,7 @@ def __repr__(self): return f'Supplement for submission {self.submission_uuid}' def revise_data( - asset: Asset, submission: dict, incoming_data: dict + asset: 'kpi.Asset', submission: dict, incoming_data: dict ) -> dict: schema_version = incoming_data.pop('_version') if schema_version != '20250820': @@ -91,12 +93,22 @@ def revise_data( return retrieved_supplemental_data - def retrieve_data(asset: Asset, submission_uuid: str) -> dict: - try: - supplemental_data = SubmissionExtras.objects.get( - asset=asset, submission_uuid=submission_uuid - ).content - except SubmissionExtras.DoesNotExist: + def retrieve_data(asset: 'kpi.Asset', submission_root_uuid: str | None = None, prefetched_supplement: dict | None = None) -> dict: + if (submission_root_uuid is None) == (prefetched_supplement is None): + raise ValueError('Specify either `submission_root_uuid` or `prefetched_supplement`') + + if submission_root_uuid: + submission_uuid = remove_uuid_prefix(submission_root_uuid) + try: + supplemental_data = SubmissionExtras.objects.get( + asset=asset, submission_uuid=submission_uuid + ).content + except SubmissionExtras.DoesNotExist: + supplemental_data = None + else: + supplemental_data = prefetched_supplement + + if not supplemental_data: return {} schema_version = supplemental_data.pop('_version') diff --git a/kobo/apps/subsequences/utils.py b/kobo/apps/subsequences/utils.py new file mode 100644 index 0000000000..c9e1184550 --- /dev/null +++ b/kobo/apps/subsequences/utils.py @@ -0,0 +1,31 @@ +from collections import defaultdict +from copy import deepcopy +from typing import Generator + +from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix +from .models import SubmissionSupplement + +SUBMISSION_UUID_FIELD = 'meta/rootUuid' # FIXME: import from elsewhere +SUPPLEMENT_KEY = '_supplementalDetails' # leave unchanged for backwards compatibility + + +def stream_with_supplements(asset: 'kpi.models.Asset', submission_stream: Generator): + # FIXME: eww, this is bad, but maybe better than one query per submission? + # Probably need to go up a few generators and grab an entire page of + # submissions and supplements, then yield each of those, and grab again from + # the database only once the page is exhausted + extras = dict( + SubmissionSupplement.objects.filter(asset=asset).values_list('submission_uuid', 'content') + ) + + if not asset.advanced_features: + yield from submission_stream + return + + for submission in submission_stream: + submission_uuid = remove_uuid_prefix(submission[SUBMISSION_UUID_FIELD]) + submission[SUPPLEMENT_KEY] = SubmissionSupplement.retrieve_data( + asset, + prefetched_supplement=extras.get(submission_uuid) + ) + yield submission diff --git a/kpi/deployment_backends/openrosa_backend.py b/kpi/deployment_backends/openrosa_backend.py index fbc90a277c..cf81b6b5b9 100644 --- a/kpi/deployment_backends/openrosa_backend.py +++ b/kpi/deployment_backends/openrosa_backend.py @@ -46,6 +46,7 @@ from kobo.apps.openrosa.apps.viewer.models import ParsedInstance from kobo.apps.openrosa.libs.utils.logger_tools import create_instance, publish_xls_form from kobo.apps.openrosa.libs.utils.viewer_tools import get_mongo_userform_id +from kobo.apps.subsequences.utils import stream_with_supplements from kobo.apps.trackers.models import NLPUsageCounter from kpi.constants import ( PERM_CHANGE_SUBMISSIONS, @@ -1558,15 +1559,15 @@ def __get_submissions_in_json( # Python-only attribute used by `kpi.views.v2.data.DataViewSet.list()` self.current_submission_count = total_count - add_supplemental_details_to_query = self.asset.has_advanced_features + add_supplements_to_query = self.asset.has_advanced_features fields = params.get('fields', []) if len(fields) > 0 and '_uuid' not in fields: # skip the query if submission '_uuid' is not even q'd from mongo - add_supplemental_details_to_query = False + add_supplements_to_query = False - if add_supplemental_details_to_query: - raise NotImplementedError # FIXME + if add_supplements_to_query: + mongo_cursor = stream_with_supplements(self.asset, mongo_cursor) all_attachment_xpaths = self.asset.get_all_attachment_xpaths() From 122e0d343fcf0fbbf94fba095be2d9ee6bcd8092 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Fri, 22 Aug 2025 23:18:14 -0400 Subject: [PATCH 059/138] Take teeny, tiny step toward reconnecting formpack --- kpi/models/import_export_task.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kpi/models/import_export_task.py b/kpi/models/import_export_task.py index 35b502a108..8c65b2d25f 100644 --- a/kpi/models/import_export_task.py +++ b/kpi/models/import_export_task.py @@ -41,6 +41,7 @@ from werkzeug.http import parse_options_header from kobo.apps.reports.report_data import build_formpack +from kobo.apps.subsequences.utils import stream_with_supplements from kpi.constants import ( ASSET_TYPE_COLLECTION, ASSET_TYPE_EMPTY, @@ -1039,7 +1040,9 @@ def get_export_object( ) if source.has_advanced_features: - raise NotImplementedError # FIXME + submission_stream = stream_with_supplements( + source, submission_stream + ) pack, submission_stream = build_formpack( source, submission_stream, self._fields_from_all_versions From 3f701bbd3d3d7f3cd789140a6046eb484f49e47f Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 22 Aug 2025 23:59:01 -0400 Subject: [PATCH 060/138] Make BaseAction.revise_data support lists --- .../subsequences__new/actions/__init__.py | 6 ++- kobo/apps/subsequences__new/actions/base.py | 40 +++++++++++++++++-- .../actions/manual_transcription.py | 5 +-- .../actions/manual_translation.py | 4 +- kobo/apps/subsequences__new/exceptions.py | 9 ++++- kobo/apps/subsequences__new/models.py | 3 +- 6 files changed, 53 insertions(+), 14 deletions(-) diff --git a/kobo/apps/subsequences__new/actions/__init__.py b/kobo/apps/subsequences__new/actions/__init__.py index 274ca102a5..f439fed567 100644 --- a/kobo/apps/subsequences__new/actions/__init__.py +++ b/kobo/apps/subsequences__new/actions/__init__.py @@ -1,5 +1,9 @@ from .manual_transcription import ManualTranscriptionAction +from .manual_translation import ManualTranslationAction # TODO, what about using a loader for every class in "actions" folder (except base.py)? -ACTIONS = (ManualTranscriptionAction,) +ACTIONS = ( + ManualTranscriptionAction, + ManualTranslationAction, +) ACTION_IDS_TO_CLASSES = {a.ID: a for a in ACTIONS} diff --git a/kobo/apps/subsequences__new/actions/base.py b/kobo/apps/subsequences__new/actions/base.py index 47d532ee7d..1913db7608 100644 --- a/kobo/apps/subsequences__new/actions/base.py +++ b/kobo/apps/subsequences__new/actions/base.py @@ -8,6 +8,7 @@ from kobo.apps.kobo_auth.shortcuts import User from kpi.exceptions import UsageLimitExceededException from kpi.utils.usage_calculator import ServiceUsageCalculator +from ..exceptions import InvalidItem """ ### All actions must have the following components @@ -125,6 +126,9 @@ def something_to_get_the_data_back_out(self): DATE_MODIFIED_FIELD = '_dateModified' REVISIONS_FIELD = '_revisions' + # Change my name, my parents hate me when I was born + item_reference_property = None + def check_limits(self, user: User): if not settings.STRIPE_ENABLED or not self._is_usage_limited: @@ -184,7 +188,22 @@ def revise_data( self.raise_for_any_leading_underscore_key(edit) now_str = utc_datetime_to_js_str(timezone.now()) - revision = deepcopy(submission_supplement) + item_index = None + submission_supplement_copy = deepcopy(submission_supplement) + if not self.item_reference_property: + revision = submission_supplement_copy + else: + needle = edit[self.item_reference_property] + revision = {} + if not isinstance(submission_supplement, list): + raise InvalidItem + + for idx, item in enumerate(submission_supplement): + if needle == item[self.item_reference_property]: + revision = deepcopy(item) + item_index = idx + break + new_record = deepcopy(edit) revisions = revision.pop(self.REVISIONS_FIELD, []) @@ -193,12 +212,25 @@ def revise_data( revision[self.DATE_CREATED_FIELD] = revision_creation_date new_record[self.DATE_MODIFIED_FIELD] = now_str - if submission_supplement: - revisions.insert(0, revision) - new_record[self.REVISIONS_FIELD] = revisions + if not self.item_reference_property: + if submission_supplement: + revisions.insert(0, revision) + new_record[self.REVISIONS_FIELD] = revisions + else: + if item_index is not None: + revisions.insert(0, revision) + new_record[self.REVISIONS_FIELD] = revisions new_record[self.DATE_CREATED_FIELD] = record_creation_date + if self.item_reference_property: + if item_index is None: + submission_supplement_copy.append(new_record) + else: + submission_supplement_copy[item_index] = new_record + + new_record = submission_supplement_copy + self.validate_result(new_record) return new_record diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py index c6263d95aa..21b5f12c3b 100644 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ b/kobo/apps/subsequences__new/actions/manual_transcription.py @@ -86,9 +86,6 @@ def languages(self) -> list[str]: languages.append(individual_params['language']) return languages - def record_repr(self, record: dict) -> dict: - return record.get('value', '') - @property def result_schema(self): @@ -123,7 +120,7 @@ def result_schema(self): self._inject_data_schema(schema, ['$schema', 'title', 'type']) # Also inject data schema in the revision definition - self.__inject_data_schema( + self._inject_data_schema( schema['$defs']['revision'], ['$schema', 'title', '$defs'] ) diff --git a/kobo/apps/subsequences__new/actions/manual_translation.py b/kobo/apps/subsequences__new/actions/manual_translation.py index f8e5528465..9348b5e173 100644 --- a/kobo/apps/subsequences__new/actions/manual_translation.py +++ b/kobo/apps/subsequences__new/actions/manual_translation.py @@ -5,6 +5,7 @@ class ManualTranslationAction(BaseAction): ID = 'manual_translation' + item_reference_property = 'language' def __init__(self, source_question_xpath, params): self.source_question_xpath = source_question_xpath @@ -85,9 +86,6 @@ def languages(self) -> list[str]: languages.append(individual_params['language']) return languages - def record_repr(self, record: dict) -> dict: - return record.get('value', '') - @property def result_schema(self): diff --git a/kobo/apps/subsequences__new/exceptions.py b/kobo/apps/subsequences__new/exceptions.py index d15edc89ee..3bfaae1331 100644 --- a/kobo/apps/subsequences__new/exceptions.py +++ b/kobo/apps/subsequences__new/exceptions.py @@ -7,10 +7,17 @@ class InvalidAction(Exception): pass +class InvalidItem(Exception): + """ + The referenced action does not contain a list of items + """ + + pass + class InvalidXPath(Exception): """ The referenced question XPath was not configured for supplemental data at the asset level """ - pass \ No newline at end of file + pass diff --git a/kobo/apps/subsequences__new/models.py b/kobo/apps/subsequences__new/models.py index 7d49e6c746..ee23f14aa5 100644 --- a/kobo/apps/subsequences__new/models.py +++ b/kobo/apps/subsequences__new/models.py @@ -11,6 +11,7 @@ class Meta(SubmissionExtras.Meta): proxy = True app_label = 'subsequences' + @staticmethod def revise_data( asset: Asset, submission: dict, incoming_data: dict ) -> dict: @@ -138,4 +139,4 @@ def retrieve_data(asset: Asset, submission_uuid: str) -> dict: ) retrieved_supplemental_data['_version'] = schema_version - return retrieved_supplemental_data \ No newline at end of file + return retrieved_supplemental_data From 8f39d0166a7b2c05ef2e3012bf65583484eace35 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Sat, 23 Aug 2025 00:09:04 -0400 Subject: [PATCH 061/138] Clean up --- kobo/apps/subsequences/actions/base.py | 16 ++++------------ kobo/apps/subsequences/models.py | 2 +- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 47d532ee7d..f46cbba553 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -116,10 +116,6 @@ def utc_datetime_to_js_str(dt: datetime.datetime) -> str: class BaseAction: - def something_to_get_the_data_back_out(self): - # might need to deal with multiple columns for one action - # ^ definitely will - raise NotImplementedError DATE_CREATED_FIELD = '_dateCreated' DATE_MODIFIED_FIELD = '_dateModified' @@ -150,10 +146,7 @@ def validate_result(self, result): @property def result_schema(self): """ - we also need a schema to define the final result that will be written - into SubmissionExtras - - we need to solve the problem of storing multiple results for a single action + must be implemented by subclasses """ return NotImplementedError @@ -162,7 +155,7 @@ def retrieve_data(self, action_data: dict) -> dict: `action_data` must be ONLY the data for this particular action instance, not the entire SubmissionExtras caboodle - descendant classes could override with special manipulation if needed + subclasses could override with special manipulation if needed """ return action_data @@ -176,9 +169,8 @@ def revise_data( self, submission: dict, submission_supplement: dict, edit: dict ) -> dict: """ - for actions that may have lengthy data, are we content to store the - entirety of the data for each revision, or do we need some kind of - differencing system? + `submission` argument for future use by subclasses + this method might need to be made more friendly for overriding """ self.validate_data(edit) self.raise_for_any_leading_underscore_key(edit) diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 9997f7d869..906bb5098f 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -75,7 +75,7 @@ def revise_data( action_supplemental_data = question_supplemental_data.setdefault( action_id, {} ) - action_supplemental_data = action.revise_field( + action_supplemental_data = action.revise_data( submission, action_supplemental_data, action_data ) question_supplemental_data[action_id] = action_supplemental_data From d73dd89bfd870bf388577449cea1056b351b8f8f Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Sat, 23 Aug 2025 00:16:40 -0400 Subject: [PATCH 062/138] Lint and format --- kobo/apps/subsequences/actions/base.py | 6 +-- .../actions/manual_transcription.py | 1 - .../actions/manual_translation.py | 2 - kobo/apps/subsequences/exceptions.py | 2 +- kobo/apps/subsequences/models.py | 29 +++++++----- kobo/apps/subsequences/router.py | 7 ++- kobo/apps/subsequences/tests/api/v2/base.py | 39 +--------------- .../tests/api/v2/test_permissions.py | 22 +++++----- .../tests/test_manual_transcription.py | 16 +++++-- kobo/apps/subsequences/utils.py | 9 ++-- kpi/models/import_export_task.py | 8 ++-- kpi/urls/__init__.py | 3 +- kpi/views/v2/data.py | 44 +++++++++---------- 13 files changed, 80 insertions(+), 108 deletions(-) diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index f46cbba553..86ccdcf740 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -162,6 +162,7 @@ def retrieve_data(self, action_data: dict) -> dict: def revise_field(self, *args, **kwargs): # TODO: remove this alias import warnings + warnings.warn('Oh no, this method is going away!', DeprecationWarning) return self.revise_data(*args, **kwargs) @@ -195,7 +196,6 @@ def revise_data( return new_record - @staticmethod def raise_for_any_leading_underscore_key(d: dict): """ @@ -213,9 +213,7 @@ def raise_for_any_leading_underscore_key(d: dict): except AttributeError: continue if match: - raise Exception( - 'An unexpected key with a leading underscore was found' - ) + raise Exception('An unexpected key with a leading underscore was found') @property def _is_usage_limited(self): diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index c6263d95aa..b9910f68e7 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -1,4 +1,3 @@ -from copy import deepcopy from .base import BaseAction diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index f8e5528465..8ece0b99b9 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -1,5 +1,3 @@ -from copy import deepcopy - from .base import BaseAction diff --git a/kobo/apps/subsequences/exceptions.py b/kobo/apps/subsequences/exceptions.py index d15edc89ee..05278c6c8d 100644 --- a/kobo/apps/subsequences/exceptions.py +++ b/kobo/apps/subsequences/exceptions.py @@ -13,4 +13,4 @@ class InvalidXPath(Exception): the asset level """ - pass \ No newline at end of file + pass diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 906bb5098f..e951c37a59 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -1,13 +1,12 @@ from django.db import models -from kobo.apps.openrosa.apps.logger.xform_instance_parser import ( - remove_uuid_prefix, -) +from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix from kpi.models.abstract_models import AbstractTimeStampedModel from .actions import ACTION_IDS_TO_CLASSES from .exceptions import InvalidAction, InvalidXPath from .schemas import validate_submission_supplement + class SubmissionExtras(AbstractTimeStampedModel): # TODO: trash this and rename the model submission_uuid = models.CharField(max_length=249) @@ -23,6 +22,7 @@ class Meta: # per-asset is most important unique_together = (('asset', 'submission_uuid'),) + class SubmissionSupplement(SubmissionExtras): class Meta(SubmissionExtras.Meta): proxy = True @@ -30,9 +30,7 @@ class Meta(SubmissionExtras.Meta): def __repr__(self): return f'Supplement for submission {self.submission_uuid}' - def revise_data( - asset: 'kpi.Asset', submission: dict, incoming_data: dict - ) -> dict: + def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> dict: schema_version = incoming_data.pop('_version') if schema_version != '20250820': # TODO: migrate from old per-submission schema @@ -45,7 +43,9 @@ def revise_data( submission_uuid = submission['meta/rootUuid'] # constant? supplemental_data = SubmissionExtras.objects.get_or_create( asset=asset, submission_uuid=submission_uuid - )[0].content # lock it? + )[ + 0 + ].content # lock it? retrieved_supplemental_data = {} @@ -92,10 +92,15 @@ def revise_data( retrieved_supplemental_data['_version'] = schema_version return retrieved_supplemental_data - - def retrieve_data(asset: 'kpi.Asset', submission_root_uuid: str | None = None, prefetched_supplement: dict | None = None) -> dict: + def retrieve_data( + asset: 'kpi.Asset', + submission_root_uuid: str | None = None, + prefetched_supplement: dict | None = None, + ) -> dict: if (submission_root_uuid is None) == (prefetched_supplement is None): - raise ValueError('Specify either `submission_root_uuid` or `prefetched_supplement`') + raise ValueError( + 'Specify either `submission_root_uuid` or `prefetched_supplement`' + ) if submission_root_uuid: submission_uuid = remove_uuid_prefix(submission_root_uuid) @@ -123,8 +128,8 @@ def retrieve_data(asset: 'kpi.Asset', submission_root_uuid: str | None = None, p retrieved_supplemental_data = {} for question_xpath, data_for_this_question in supplemental_data.items(): - processed_data_for_this_question = ( - retrieved_supplemental_data.setdefault(question_xpath, {}) + processed_data_for_this_question = retrieved_supplemental_data.setdefault( + question_xpath, {} ) action_configs = asset.advanced_features['_actionConfigs'] try: diff --git a/kobo/apps/subsequences/router.py b/kobo/apps/subsequences/router.py index 77b23dcec7..5445066256 100644 --- a/kobo/apps/subsequences/router.py +++ b/kobo/apps/subsequences/router.py @@ -1,12 +1,15 @@ +import warnings + from .models import SubmissionSupplement -import warnings warnings.warn('Oh no, this file is going away!', DeprecationWarning) + def handle_incoming_data(*args, **kwargs): # TODO: remove this alias return SubmissionSupplement.revise_data(*args, **kwargs) + def retrieve_supplemental_data(*args, **kwargs): # TODO: remove this alias - return SubmissionSupplement.retrieve_data(*args, **kwargs) \ No newline at end of file + return SubmissionSupplement.retrieve_data(*args, **kwargs) diff --git a/kobo/apps/subsequences/tests/api/v2/base.py b/kobo/apps/subsequences/tests/api/v2/base.py index 6dedff1fe5..42c0722762 100644 --- a/kobo/apps/subsequences/tests/api/v2/base.py +++ b/kobo/apps/subsequences/tests/api/v2/base.py @@ -1,48 +1,11 @@ import uuid -from copy import deepcopy -from unittest.mock import Mock, patch -import pytest -from constance.test import override_config -from django.conf import settings -from django.test import override_settings from django.urls import reverse -from google.cloud import translate_v3 -from jsonschema import validate -from rest_framework import status -from rest_framework.test import APITestCase from kobo.apps.kobo_auth.shortcuts import User -from kobo.apps.languages.models.language import Language, LanguageRegion -from kobo.apps.languages.models.transcription import ( - TranscriptionService, - TranscriptionServiceLanguageM2M, -) -from kobo.apps.languages.models.translation import ( - TranslationService, - TranslationServiceLanguageM2M, -) -from kobo.apps.openrosa.apps.logger.models import Instance -from kobo.apps.openrosa.apps.logger.xform_instance_parser import add_uuid_prefix -from kobo.apps.organizations.constants import UsageType -from kpi.constants import ( - PERM_ADD_SUBMISSIONS, - PERM_CHANGE_ASSET, - PERM_CHANGE_SUBMISSIONS, - PERM_PARTIAL_SUBMISSIONS, - PERM_VIEW_ASSET, - PERM_VIEW_SUBMISSIONS, -) from kpi.models.asset import Asset -from kpi.tests.base_test_case import BaseTestCase from kpi.tests.kpi_test_case import KpiTestCase from kpi.urls.router_api_v2 import URL_NAMESPACE as ROUTER_URL_NAMESPACE -from kpi.utils.fuzzy_int import FuzzyInt -from kpi.utils.xml import ( - edit_submission_xml, - fromstring_preserve_root_xmlns, - xml_tostring, -) class SubsequenceBaseTestCase(KpiTestCase): @@ -78,7 +41,7 @@ def setUp(self): self.client.force_login(user) self.supplement_details_url = reverse( self._get_endpoint('submission-supplement'), - args=[self.asset.uid, self.submission_uuid] + args=[self.asset.uid, self.submission_uuid], ) def set_asset_advanced_features(self, features): diff --git a/kobo/apps/subsequences/tests/api/v2/test_permissions.py b/kobo/apps/subsequences/tests/api/v2/test_permissions.py index d1ca51405e..92f7a27178 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_permissions.py +++ b/kobo/apps/subsequences/tests/api/v2/test_permissions.py @@ -1,7 +1,6 @@ from datetime import datetime from zoneinfo import ZoneInfo -import pytest from ddt import data, ddt, unpack from freezegun import freeze_time from rest_framework import status @@ -15,6 +14,7 @@ ) from kpi.utils.object_permission import get_anonymous_user + @ddt class SubsequencePermissionTestCase(SubsequenceBaseTestCase): @@ -135,16 +135,18 @@ def test_can_write(self, username, shared, status_code): self.client.force_login(user) # Activate advanced features for the project - self.set_asset_advanced_features({ - '_version': '20250820', - '_actionConfigs': { - 'q1': { - 'manual_transcription': [ - {'language': 'es'}, - ] - } + self.set_asset_advanced_features( + { + '_version': '20250820', + '_actionConfigs': { + 'q1': { + 'manual_transcription': [ + {'language': 'es'}, + ] + } + }, } - }) + ) if shared: self.asset.assign_perm(user, PERM_CHANGE_SUBMISSIONS) diff --git a/kobo/apps/subsequences/tests/test_manual_transcription.py b/kobo/apps/subsequences/tests/test_manual_transcription.py index 51a8e1f3f8..81f7c1dd4d 100644 --- a/kobo/apps/subsequences/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences/tests/test_manual_transcription.py @@ -6,6 +6,7 @@ EMPTY_SUBMISSION = {} + def test_valid_params_pass_validation(): params = [{'language': 'fr'}, {'language': 'es'}] ManualTranscriptionAction.validate_params(params) @@ -33,6 +34,7 @@ def test_invalid_transcript_data_fails_validation(): with pytest.raises(jsonschema.exceptions.ValidationError): action.validate_data(data) + def test_valid_result_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] @@ -48,6 +50,7 @@ def test_valid_result_passes_validation(): mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) action.validate_result(mock_sup_det) + def test_invalid_result_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] @@ -63,7 +66,9 @@ def test_invalid_result_fails_validation(): mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) # erroneously add '_dateModified' onto a revision - mock_sup_det['_revisions'][0]['_dateModified'] = mock_sup_det['_revisions'][0]['_dateCreated'] + mock_sup_det['_revisions'][0]['_dateModified'] = mock_sup_det['_revisions'][0][ + '_dateCreated' + ] with pytest.raises(jsonschema.exceptions.ValidationError): action.validate_result(mock_sup_det) @@ -107,13 +112,14 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): assert mock_sup_det['_dateCreated'] == first_time # the record itself should have an updated modification timestamp - assert dateutil.parser.parse( - mock_sup_det['_dateModified'] - ) > dateutil.parser.parse(mock_sup_det['_dateCreated']) + assert dateutil.parser.parse(mock_sup_det['_dateModified']) > dateutil.parser.parse( + mock_sup_det['_dateCreated'] + ) # the record itself should encompass the second transcript assert mock_sup_det.items() >= second.items() + def test_setting_transcript_to_empty_string(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] @@ -129,6 +135,7 @@ def test_setting_transcript_to_empty_string(): assert mock_sup_det['value'] == '' assert mock_sup_det['_revisions'][0]['value'] == 'Aucune idée' + def test_setting_transcript_to_empty_object(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] @@ -144,6 +151,7 @@ def test_setting_transcript_to_empty_object(): assert 'value' not in mock_sup_det assert mock_sup_det['_revisions'][0]['value'] == 'Aucune idée' + def test_latest_revision_is_first(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] diff --git a/kobo/apps/subsequences/utils.py b/kobo/apps/subsequences/utils.py index c9e1184550..93bfccbbbe 100644 --- a/kobo/apps/subsequences/utils.py +++ b/kobo/apps/subsequences/utils.py @@ -1,5 +1,3 @@ -from collections import defaultdict -from copy import deepcopy from typing import Generator from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix @@ -15,7 +13,9 @@ def stream_with_supplements(asset: 'kpi.models.Asset', submission_stream: Genera # submissions and supplements, then yield each of those, and grab again from # the database only once the page is exhausted extras = dict( - SubmissionSupplement.objects.filter(asset=asset).values_list('submission_uuid', 'content') + SubmissionSupplement.objects.filter(asset=asset).values_list( + 'submission_uuid', 'content' + ) ) if not asset.advanced_features: @@ -25,7 +25,6 @@ def stream_with_supplements(asset: 'kpi.models.Asset', submission_stream: Genera for submission in submission_stream: submission_uuid = remove_uuid_prefix(submission[SUBMISSION_UUID_FIELD]) submission[SUPPLEMENT_KEY] = SubmissionSupplement.retrieve_data( - asset, - prefetched_supplement=extras.get(submission_uuid) + asset, prefetched_supplement=extras.get(submission_uuid) ) yield submission diff --git a/kpi/models/import_export_task.py b/kpi/models/import_export_task.py index 8c65b2d25f..3dc71425e6 100644 --- a/kpi/models/import_export_task.py +++ b/kpi/models/import_export_task.py @@ -1040,9 +1040,7 @@ def get_export_object( ) if source.has_advanced_features: - submission_stream = stream_with_supplements( - source, submission_stream - ) + submission_stream = stream_with_supplements(source, submission_stream) pack, submission_stream = build_formpack( source, submission_stream, self._fields_from_all_versions @@ -1050,10 +1048,10 @@ def get_export_object( if source.has_advanced_features: raise NotImplementedError # FIXME - ''' + """ pack.extend_survey(…) omit_question_types=['qual_note'] - ''' + """ # Wrap the submission stream in a generator that records the most # recent timestamp diff --git a/kpi/urls/__init__.py b/kpi/urls/__init__.py index 303f1be498..684ddbd814 100644 --- a/kpi/urls/__init__.py +++ b/kpi/urls/__init__.py @@ -11,7 +11,8 @@ from kpi.views.v2.authorized_application_user import AuthorizedApplicationUserViewSet from kpi.views.v2.logout import logout_from_all_devices from .router_api_v1 import urls_patterns as router_api_v1_urls -from .router_api_v2 import URL_NAMESPACE, urls_patterns as router_api_v2_urls +from .router_api_v2 import URL_NAMESPACE +from .router_api_v2 import urls_patterns as router_api_v2_urls # TODO: Give other apps their own `urls.py` files instead of importing their # views directly! See diff --git a/kpi/views/v2/data.py b/kpi/views/v2/data.py index a726b733d0..6f71acd792 100644 --- a/kpi/views/v2/data.py +++ b/kpi/views/v2/data.py @@ -21,8 +21,8 @@ from kobo.apps.audit_log.models import AuditType from kobo.apps.audit_log.utils import SubmissionUpdate from kobo.apps.openrosa.apps.logger.xform_instance_parser import ( - remove_uuid_prefix, add_uuid_prefix, + remove_uuid_prefix, ) from kobo.apps.openrosa.libs.utils.logger_tools import http_open_rosa_error_handler from kpi.authentication import EnketoSessionAuthentication @@ -233,8 +233,9 @@ class DataViewSet( ), ) @action( - detail=False, methods=['PATCH', 'DELETE'], - renderer_classes=[renderers.JSONRenderer] + detail=False, + methods=['PATCH', 'DELETE'], + renderer_classes=[renderers.JSONRenderer], ) def bulk(self, request, *args, **kwargs): if request.method == 'DELETE': @@ -420,11 +421,7 @@ def list(self, request, *args, **kwargs): return Response(list(submissions)) def retrieve( - self, - request, - submission_id_or_root_uuid: Union[int, str], - *args, - **kwargs + self, request, submission_id_or_root_uuid: Union[int, str], *args, **kwargs ): """ Retrieve a submission by its primary key or its UUID. @@ -474,7 +471,7 @@ def retrieve( @extend_schema( methods=['GET'], description=read_md('kpi', 'data/supplement_retrieve.md'), - responses=open_api_200_ok_response(DataSupplementResponse), # TODO CHANGEME + responses=open_api_200_ok_response(DataSupplementResponse), # TODO CHANGEME parameters=[ OpenApiParameter( name='submission_id_or_root_uuid', @@ -512,36 +509,36 @@ def supplement(self, request, submission_id_or_root_uuid: str, *args, **kwargs): submission_root_uuid = submission_id_or_root_uuid ### TO BE MOVED + from kobo.apps.subsequences.exceptions import InvalidAction, InvalidXPath from kobo.apps.subsequences.router import ( handle_incoming_data, retrieve_supplemental_data, ) - from kobo.apps.subsequences.exceptions import ( - InvalidAction, - InvalidXPath, - ) + ### END TO BE MOVED deployment = self._get_deployment() try: - submission = list(deployment.get_submissions( - user=request.user, - query={'meta/rootUuid': add_uuid_prefix(submission_root_uuid)} - ))[0] + submission = list( + deployment.get_submissions( + user=request.user, + query={'meta/rootUuid': add_uuid_prefix(submission_root_uuid)}, + ) + )[0] except IndexError: raise Http404 submission_root_uuid = submission[deployment.SUBMISSION_ROOT_UUID_XPATH] if request.method == 'GET': - return Response(retrieve_supplemental_data(self.asset, submission_root_uuid)) + return Response( + retrieve_supplemental_data(self.asset, submission_root_uuid) + ) post_data = request.data try: - supplemental_data = handle_incoming_data( - self.asset, submission, post_data - ) + supplemental_data = handle_incoming_data(self.asset, submission, post_data) except InvalidAction: raise serializers.ValidationError({'detail': 'Invalid action'}) except InvalidXPath: @@ -607,8 +604,9 @@ def supplement(self, request, submission_id_or_root_uuid: str, *args, **kwargs): ], ) @action( - detail=True, methods=['GET', 'PATCH', 'DELETE'], - permission_classes=[SubmissionValidationStatusPermission] + detail=True, + methods=['GET', 'PATCH', 'DELETE'], + permission_classes=[SubmissionValidationStatusPermission], ) def validation_status( self, request, submission_id_or_root_uuid: int, *args, **kwargs From 4612f4c1206252a6eacb7613b6e3c3b4a40d0655 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Sat, 23 Aug 2025 00:24:27 -0400 Subject: [PATCH 063/138] Stop mutating incoming data --- kobo/apps/subsequences/models.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index e951c37a59..2ac4704612 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -31,7 +31,7 @@ def __repr__(self): return f'Supplement for submission {self.submission_uuid}' def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> dict: - schema_version = incoming_data.pop('_version') + schema_version = incoming_data.get('_version') if schema_version != '20250820': # TODO: migrate from old per-submission schema raise NotImplementedError @@ -50,6 +50,10 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di retrieved_supplemental_data = {} for question_xpath, data_for_this_question in incoming_data.items(): + if question_xpath == '_version': + # FIXME: what's a better way? skip all leading underscore keys? + # pop off the known special keys first? + continue try: action_configs_for_this_question = asset.advanced_features[ '_actionConfigs' From 9d9e46d2ce95e060c86d91e930a8662a8fdc2a5f Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Sat, 23 Aug 2025 00:33:43 -0400 Subject: [PATCH 064/138] Make SubmissionSupplement.revise_data support lists --- kobo/apps/subsequences__new/models.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kobo/apps/subsequences__new/models.py b/kobo/apps/subsequences__new/models.py index ee23f14aa5..1cfce0925c 100644 --- a/kobo/apps/subsequences__new/models.py +++ b/kobo/apps/subsequences__new/models.py @@ -54,10 +54,15 @@ def revise_data( question_supplemental_data = supplemental_data.setdefault( question_xpath, {} ) + default_action_supplemental_data = ( + {} + if action.item_reference_property is None + else [] + ) action_supplemental_data = question_supplemental_data.setdefault( - action_id, {} + action_id, default_action_supplemental_data ) - action_supplemental_data = action.revise_field( + action_supplemental_data = action.revise_data( submission, action_supplemental_data, action_data ) question_supplemental_data[action_id] = action_supplemental_data From 7dedf2766bec284ac33e900cba38ad548e406798 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Sat, 23 Aug 2025 00:34:14 -0400 Subject: [PATCH 065/138] Add FIXME for `revise_data()` bug --- kobo/apps/subsequences/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 2ac4704612..e78dcddcb3 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -93,6 +93,8 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di asset=asset, submission_uuid=submission_uuid ).update(content=supplemental_data) + # FIXME: bug! this will not return data from the other actions (and + # questions?) that were not affected by the revision retrieved_supplemental_data['_version'] = schema_version return retrieved_supplemental_data From fa636b829d252cbda64b836bbc0877ffc3b5457a Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Sat, 23 Aug 2025 00:45:29 -0400 Subject: [PATCH 066/138] Shuffle --- kobo/apps/subsequences/__init__.py | 17 - kobo/apps/subsequences/actions/__init__.py | 9 + kobo/apps/subsequences/actions/base.py | 362 +++++++++++++----- .../actions/manual_transcription.py | 139 ++++--- .../actions/manual_translation.py | 0 kobo/apps/subsequences/exceptions.py | 21 +- kobo/apps/subsequences/models.py | 207 +++++++--- .../router.py | 0 .../schemas.py | 0 .../google => tests/api}/__init__.py | 0 .../{migrations => tests/api/v2}/__init__.py | 0 .../tests/api/v2/base.py | 0 .../tests/api/v2/test_permissions.py | 0 .../tests/test_manual_transcription.py | 0 .../subsequences__new/actions/__init__.py | 9 - kobo/apps/subsequences__new/actions/base.py | 294 -------------- .../actions/manual_transcription.py | 131 ------- kobo/apps/subsequences__new/exceptions.py | 23 -- kobo/apps/subsequences__new/models.py | 147 ------- .../README-draft.md | 0 .../README.md | 0 kobo/apps/subsequences__old/__init__.py | 17 + .../actions}/__init__.py | 0 .../actions/automatic_transcription.py | 0 kobo/apps/subsequences__old/actions/base.py | 128 +++++++ .../actions/keyword_search.py | 0 .../actions/manual_transcription.py | 134 +++++++ .../actions/number_doubler.py | 0 .../actions/qual.py | 0 .../actions/states.py | 0 .../actions/translation.py | 0 .../actions/unknown_action.py | 0 .../advanced_features_params_schema.py | 0 .../api_view.py | 0 .../apps.py | 0 .../constants.py | 0 kobo/apps/subsequences__old/exceptions.py | 20 + .../integrations/__init__.py | 0 .../integrations/google}/__init__.py | 0 .../integrations/google/base.py | 0 .../integrations/google/google_transcribe.py | 0 .../integrations/google/google_translate.py | 0 .../integrations/google/utils.py | 0 .../integrations/misc.py | 0 .../integrations/translate.py | 0 .../jsonschemas/qual_schema.py | 0 .../migrations/0001_initial.py | 0 ...ique_together_asset_and_submission_uuid.py | 0 ..._submissionextras_date_created_and_more.py | 0 ...4_increase_subsequences_submission_uuid.py | 0 .../migrations}/__init__.py | 0 kobo/apps/subsequences__old/models.py | 64 ++++ .../prev.py | 0 .../scripts}/__init__.py | 0 ...vate_advanced_features_for_newest_asset.py | 0 ...add_qual_to_last_question_of_last_asset.py | 0 .../scripts/export_analysis_form.py | 0 .../scripts/recalc_latest_subex.py | 0 .../scripts/repop_known_cols.py | 0 .../scripts/subsequences_export.py | 0 .../tasks}/__init__.py | 0 .../tests}/__init__.py | 0 .../tests/test_known_cols_utils.py | 0 .../tests/test_nlp_integration.py | 0 .../tests/test_number_doubler.py | 0 .../tests/test_proj_advanced_features.py | 0 .../tests/test_submission_extras_api_post.py | 0 .../tests/test_submission_extras_content.py | 0 .../tests/test_submission_stream.py | 0 .../urls.py | 0 .../utils/__init__.py | 0 .../utils/deprecation.py | 0 .../determine_export_cols_with_values.py | 0 .../utils/parse_known_cols.py | 0 74 files changed, 861 insertions(+), 861 deletions(-) rename kobo/apps/{subsequences__new => subsequences}/actions/manual_translation.py (100%) rename kobo/apps/{subsequences__new => subsequences}/router.py (100%) rename kobo/apps/{subsequences__new => subsequences}/schemas.py (100%) rename kobo/apps/subsequences/{integrations/google => tests/api}/__init__.py (100%) rename kobo/apps/subsequences/{migrations => tests/api/v2}/__init__.py (100%) rename kobo/apps/{subsequences__new => subsequences}/tests/api/v2/base.py (100%) rename kobo/apps/{subsequences__new => subsequences}/tests/api/v2/test_permissions.py (100%) rename kobo/apps/{subsequences__new => subsequences}/tests/test_manual_transcription.py (100%) delete mode 100644 kobo/apps/subsequences__new/actions/__init__.py delete mode 100644 kobo/apps/subsequences__new/actions/base.py delete mode 100644 kobo/apps/subsequences__new/actions/manual_transcription.py delete mode 100644 kobo/apps/subsequences__new/exceptions.py delete mode 100644 kobo/apps/subsequences__new/models.py rename kobo/apps/{subsequences => subsequences__old}/README-draft.md (100%) rename kobo/apps/{subsequences => subsequences__old}/README.md (100%) create mode 100644 kobo/apps/subsequences__old/__init__.py rename kobo/apps/{subsequences/scripts => subsequences__old/actions}/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/automatic_transcription.py (100%) create mode 100644 kobo/apps/subsequences__old/actions/base.py rename kobo/apps/{subsequences => subsequences__old}/actions/keyword_search.py (100%) create mode 100644 kobo/apps/subsequences__old/actions/manual_transcription.py rename kobo/apps/{subsequences => subsequences__old}/actions/number_doubler.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/qual.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/states.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/translation.py (100%) rename kobo/apps/{subsequences => subsequences__old}/actions/unknown_action.py (100%) rename kobo/apps/{subsequences => subsequences__old}/advanced_features_params_schema.py (100%) rename kobo/apps/{subsequences => subsequences__old}/api_view.py (100%) rename kobo/apps/{subsequences => subsequences__old}/apps.py (100%) rename kobo/apps/{subsequences => subsequences__old}/constants.py (100%) create mode 100644 kobo/apps/subsequences__old/exceptions.py rename kobo/apps/{subsequences => subsequences__old}/integrations/__init__.py (100%) rename kobo/apps/{subsequences/tasks => subsequences__old/integrations/google}/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/google/base.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/google/google_transcribe.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/google/google_translate.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/google/utils.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/misc.py (100%) rename kobo/apps/{subsequences => subsequences__old}/integrations/translate.py (100%) rename kobo/apps/{subsequences => subsequences__old}/jsonschemas/qual_schema.py (100%) rename kobo/apps/{subsequences => subsequences__old}/migrations/0001_initial.py (100%) rename kobo/apps/{subsequences => subsequences__old}/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py (100%) rename kobo/apps/{subsequences => subsequences__old}/migrations/0003_alter_submissionextras_date_created_and_more.py (100%) rename kobo/apps/{subsequences => subsequences__old}/migrations/0004_increase_subsequences_submission_uuid.py (100%) rename kobo/apps/{subsequences__new => subsequences__old/migrations}/__init__.py (100%) create mode 100644 kobo/apps/subsequences__old/models.py rename kobo/apps/{subsequences => subsequences__old}/prev.py (100%) rename kobo/apps/{subsequences__new/tests => subsequences__old/scripts}/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/activate_advanced_features_for_newest_asset.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/add_qual_to_last_question_of_last_asset.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/export_analysis_form.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/recalc_latest_subex.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/repop_known_cols.py (100%) rename kobo/apps/{subsequences => subsequences__old}/scripts/subsequences_export.py (100%) rename kobo/apps/{subsequences__new/tests/api => subsequences__old/tasks}/__init__.py (100%) rename kobo/apps/{subsequences__new/tests/api/v2 => subsequences__old/tests}/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_known_cols_utils.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_nlp_integration.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_number_doubler.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_proj_advanced_features.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_submission_extras_api_post.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_submission_extras_content.py (100%) rename kobo/apps/{subsequences => subsequences__old}/tests/test_submission_stream.py (100%) rename kobo/apps/{subsequences => subsequences__old}/urls.py (100%) rename kobo/apps/{subsequences => subsequences__old}/utils/__init__.py (100%) rename kobo/apps/{subsequences => subsequences__old}/utils/deprecation.py (100%) rename kobo/apps/{subsequences => subsequences__old}/utils/determine_export_cols_with_values.py (100%) rename kobo/apps/{subsequences => subsequences__old}/utils/parse_known_cols.py (100%) diff --git a/kobo/apps/subsequences/__init__.py b/kobo/apps/subsequences/__init__.py index 5f46bdbac3..e69de29bb2 100644 --- a/kobo/apps/subsequences/__init__.py +++ b/kobo/apps/subsequences/__init__.py @@ -1,17 +0,0 @@ -''' -`kobo.apps.subsequences` --as in Sub(mission)Sequences is an app for defining -and following a sequence of actions or changes to a submission that has come -into kobo. - -models: -- SubmissionData: - Holds a JSONField with the "supplementalData" necessary to complete the - -tasks: -(things that are queued in celery for later action) - -needs writeup: - - how to develop / debug within this app - - description of tests - -''' diff --git a/kobo/apps/subsequences/actions/__init__.py b/kobo/apps/subsequences/actions/__init__.py index e69de29bb2..f439fed567 100644 --- a/kobo/apps/subsequences/actions/__init__.py +++ b/kobo/apps/subsequences/actions/__init__.py @@ -0,0 +1,9 @@ +from .manual_transcription import ManualTranscriptionAction +from .manual_translation import ManualTranslationAction + +# TODO, what about using a loader for every class in "actions" folder (except base.py)? +ACTIONS = ( + ManualTranscriptionAction, + ManualTranslationAction, +) +ACTION_IDS_TO_CLASSES = {a.ID: a for a in ACTIONS} diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index f8dbe659aa..1913db7608 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -1,128 +1,294 @@ import datetime -from zoneinfo import ZoneInfo +from copy import deepcopy +import jsonschema +from django.conf import settings from django.utils import timezone -from kobo.apps.subsequences.constants import GOOGLETS, GOOGLETX +from kobo.apps.kobo_auth.shortcuts import User +from kpi.exceptions import UsageLimitExceededException +from kpi.utils.usage_calculator import ServiceUsageCalculator +from ..exceptions import InvalidItem -ACTION_NEEDED = 'ACTION_NEEDED' -PASSES = 'PASSES' +""" +### All actions must have the following components + +* (check!) a unique identifier for the action +* three jsonschemas: + 1. (check!) one to validate the parameters used to configure the action + * `ADVANCED_FEATURES_PARAMS_SCHEMA` + 2. (check!) one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) + * the result of `modify_jsonschema()` + 3. one to validate the result of the action - the result of `modify_jsonschema()` + * OH NO, this doesn't happen at all yet +* a handler that receives a submission (and other metadata) and processes it +""" + +""" +idea of example content in asset.advanced_features (what kind of actions are activated per question) +{ + '_version': '20250820', + '_schema': { + 'my_audio_question': { + 'manual_transcription': [ + {'language': 'ar'}, + {'language': 'bn'}, + {'language': 'es'}, + ], + 'manual_translation': [{'language': 'fr'}, {'language': 'en'}], + }, + 'my_video_question': { + 'manual_transcription': [{'language': 'en'}], + }, + 'my_number_question': { + 'number_multiplier': [{'multiplier': 3}], + }, + }, +} + +idea of example data in SubmissionExtras based on the above +{ + '_version': '20250820', + '_submission': '', + 'my_audio_question': { + 'manual_transcription': { + 'transcript': 'هائج', + 'language': 'ar', + '_dateCreated': '2025-08-21T20:55:42.012053Z', + '_dateModified': '2025-08-21T20:57:28.154567Z', + '_revisions': [ + { + 'transcript': 'فارغ', + 'language': 'ar', + '_dateCreated': '2025-08-21T20:55:42.012053Z', + } + ], + }, + 'manual_translation': [ + { + 'language': 'en', + 'translation': 'berserk', + '_dateCreated': '2025-08-21T21:39:42.141306Z', + '_dateModified': '2025-08-21T21:39:42.141306Z', + }, + { + 'language': 'es', + 'translation': 'enloquecido', + '_dateCreated': '2025-08-21T21:40:54.644308Z', + '_dateModified': '2025-08-21T22:00:10.862880Z', + '_revisions': [ + { + 'translation': 'loco', + 'language': 'es', + '_dateCreated': '2025-08-21T21:40:54.644308Z', + } + ], + }, + ], + }, + 'my_video_question': { + 'manual_transcription': { + 'transcript': 'sea horse sea hell', + 'language': 'en', + '_dateCreated': '2025-08-21T21:06:20.059117Z', + '_dateModified': '2025-08-21T21:06:20.059117Z', + }, + }, + 'my_number_question': { + 'number_multiplier': { + 'numberMultiplied': 99, + '_dateCreated': '2025-08-21T21:09:34.504546Z', + '_dateModified': '2025-08-21T21:09:34.504546Z', + }, + }, +} +""" + + +def utc_datetime_to_js_str(dt: datetime.datetime) -> str: + """ + Return a string to represent a `datetime` following the simplification of + the ISO 8601 format used by JavaScript + """ + # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format + if dt.utcoffset() or not dt.tzinfo: + raise NotImplementedError('Only UTC datetimes are supported') + return dt.isoformat().replace('+00:00', 'Z') class BaseAction: - ID = None - _destination_field = '_supplementalDetails' + def something_to_get_the_data_back_out(self): + # might need to deal with multiple columns for one action + # ^ definitely will + raise NotImplementedError - DATE_CREATED_FIELD = 'dateCreated' - DATE_MODIFIED_FIELD = 'dateModified' - DELETE = '⌫' + DATE_CREATED_FIELD = '_dateCreated' + DATE_MODIFIED_FIELD = '_dateModified' + REVISIONS_FIELD = '_revisions' - def __init__(self, params): - self.load_params(params) + # Change my name, my parents hate me when I was born + item_reference_property = None - def cur_time(self): - return datetime.datetime.now(tz=ZoneInfo('UTC')).strftime('%Y-%m-%dT%H:%M:%SZ') + def check_limits(self, user: User): - def load_params(self, params): - raise NotImplementedError('subclass must define a load_params method') + if not settings.STRIPE_ENABLED or not self._is_usage_limited: + return - def run_change(self, params): - raise NotImplementedError('subclass must define a run_change method') + calculator = ServiceUsageCalculator(user) + balances = calculator.get_usage_balances() - def check_submission_status(self, submission): - return PASSES + balance = balances[self._limit_identifier] + if balance and balance['exceeded']: + raise UsageLimitExceededException() - def modify_jsonschema(self, schema): - return schema + @classmethod + def validate_params(cls, params): + jsonschema.validate(params, cls.params_schema) - def compile_revised_record(self, content, edits): + def validate_data(self, data): + jsonschema.validate(data, self.data_schema) + + def validate_result(self, result): + jsonschema.validate(result, self.result_schema) + + @property + def result_schema(self): """ - a method that applies changes to a json structure and appends previous - changes to a revision history + we also need a schema to define the final result that will be written + into SubmissionExtras + + we need to solve the problem of storing multiple results for a single action """ + return NotImplementedError - # TODO: should this handle managing `DATE_CREATED_FIELD`, - # `DATE_MODIFIED_FIELD`, etc. instead of delegating that to - # `revise_record()` as it currently does? + def retrieve_data(self, action_data: dict) -> dict: + """ + `action_data` must be ONLY the data for this particular action + instance, not the entire SubmissionExtras caboodle - if self.ID is None: - return content - for field_name, vals in edits.items(): - if field_name == 'submission': - continue + descendant classes could override with special manipulation if needed + """ + return action_data + + def revise_field(self, *args, **kwargs): + # TODO: remove this alias + import warnings + warnings.warn('Oh no, this method is going away!', DeprecationWarning) + return self.revise_data(*args, **kwargs) + + def revise_data( + self, submission: dict, submission_supplement: dict, edit: dict + ) -> dict: + """ + for actions that may have lengthy data, are we content to store the + entirety of the data for each revision, or do we need some kind of + differencing system? + """ + self.validate_data(edit) + self.raise_for_any_leading_underscore_key(edit) + + now_str = utc_datetime_to_js_str(timezone.now()) + item_index = None + submission_supplement_copy = deepcopy(submission_supplement) + if not self.item_reference_property: + revision = submission_supplement_copy + else: + needle = edit[self.item_reference_property] + revision = {} + if not isinstance(submission_supplement, list): + raise InvalidItem + + for idx, item in enumerate(submission_supplement): + if needle == item[self.item_reference_property]: + revision = deepcopy(item) + item_index = idx + break + + new_record = deepcopy(edit) + revisions = revision.pop(self.REVISIONS_FIELD, []) + + revision_creation_date = revision.pop(self.DATE_MODIFIED_FIELD, now_str) + record_creation_date = revision.pop(self.DATE_CREATED_FIELD, now_str) + revision[self.DATE_CREATED_FIELD] = revision_creation_date + new_record[self.DATE_MODIFIED_FIELD] = now_str + + if not self.item_reference_property: + if submission_supplement: + revisions.insert(0, revision) + new_record[self.REVISIONS_FIELD] = revisions + else: + if item_index is not None: + revisions.insert(0, revision) + new_record[self.REVISIONS_FIELD] = revisions - erecord = vals.get(self.ID) - o_keyval = content.get(field_name, {}) - for extra in [GOOGLETX, GOOGLETS]: - if extra in vals: - o_keyval[extra] = vals[extra] - content[field_name] = o_keyval + new_record[self.DATE_CREATED_FIELD] = record_creation_date - orecord = o_keyval.get(self.ID) - if erecord is None: + if self.item_reference_property: + if item_index is None: + submission_supplement_copy.append(new_record) + else: + submission_supplement_copy[item_index] = new_record + + new_record = submission_supplement_copy + + self.validate_result(new_record) + + return new_record + + + @staticmethod + def raise_for_any_leading_underscore_key(d: dict): + """ + Keys with leading underscores are reserved for metadata like + `_dateCreated`, `_dateModified`, and `_revisions`. No key with a + leading underscore should be present in data POSTed by a client or + generated by an action. + + Schema validation should block invalid keys, but this method exists as + a redundant check to guard against schema mistakes. + """ + for k in list(d.keys()): + try: + match = k.startswith('_') + except AttributeError: continue - if self.is_auto_request(erecord): - content[field_name].update( - self.auto_request_repr(erecord) + if match: + raise Exception( + 'An unexpected key with a leading underscore was found' ) - continue - if orecord is None: - compiled_record = self.init_field(erecord) - elif not self.has_change(orecord, erecord): - continue - else: - compiled_record = self.revise_field(orecord, erecord) - o_keyval[self.ID] = compiled_record - content[field_name] = o_keyval - return content - def auto_request_repr(self, erecord): + @property + def _is_usage_limited(self): + """ + Returns whether an action should check for usage limits. + """ raise NotImplementedError() - def is_auto_request(self, erecord): - return self.record_repr(erecord) == 'GOOGLE' - - def init_field(self, edit): - edit[self.DATE_CREATED_FIELD] = \ - edit[self.DATE_MODIFIED_FIELD] = \ - str(timezone.now()).split('.')[0] - return {**edit, 'revisions': []} - - def revise_field(self, original, edit): - if self.record_repr(edit) == self.DELETE: - return {} - record = {**original} - revisions = record.pop('revisions', []) - if self.DATE_CREATED_FIELD in record: - del record[self.DATE_CREATED_FIELD] - edit[self.DATE_MODIFIED_FIELD] = \ - edit[self.DATE_CREATED_FIELD] = \ - str(timezone.now()).split('.')[0] - if len(revisions) > 0: - date_modified = revisions[-1].get(self.DATE_MODIFIED_FIELD) - edit[self.DATE_CREATED_FIELD] = date_modified - return {**edit, 'revisions': [record, *revisions]} - - def record_repr(self, record): - return record.get('value') - - def has_change(self, original, edit): - return self.record_repr(original) != self.record_repr(edit) + def _inject_data_schema(self, destination_schema: dict, skipped_keys: list): + """ + Utility function to inject data schema into another schema to + avoid repeating the same schema. + Useful to produce result schema. + """ - @classmethod - def build_params(cls, *args, **kwargs): - raise NotImplementedError(f'{cls.__name__} has not implemented a build_params method') + for key, value in self.data_schema.items(): + if key in skipped_keys: + continue - def get_xpath(self, row): - # return the full path... - for name_field in ['xpath', 'name', '$autoname']: - if name_field in row: - return row[name_field] - return None + if key in destination_schema: + if isinstance(destination_schema[key], dict): + destination_schema[key].update(self.data_schema[key]) + elif isinstance(destination_schema[key], list): + destination_schema[key].extend(self.data_schema[key]) + else: + destination_schema[key] = self.data_schema[key] + else: + destination_schema[key] = self.data_schema[key] - @classmethod - def get_name(cls, row): - for name_field in ['name', '$autoname']: - if name_field in row: - return row[name_field] - return None + @property + def _limit_identifier(self): + # Example for automatic transcription + # + # from kobo.apps.organizations.constants import UsageType + # return UsageType.ASR_SECONDS + raise NotImplementedError() diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 1a64212763..21b5f12c3b 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -1,51 +1,7 @@ -import jsonschema -from ..constants import TRANSCRIBABLE_SOURCE_TYPES -#from ..actions.base import BaseAction +from copy import deepcopy -""" -### All actions must have the following components +from .base import BaseAction -* (check!) a unique identifier for the action -* three jsonschemas: - 1. (check!) one to validate the parameters used to configure the action - * `ADVANCED_FEATURES_PARAMS_SCHEMA` - 2. (check!) one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) - * the result of `modify_jsonschema()` - 3. one to validate the result of the action - the result of `modify_jsonschema()` - * OH NO, this doesn't happen at all yet -* a handler that receives a submission (and other metadata) and processes it -""" - -""" -idea of example content in asset.advanced_features (what kind of actions are activated per question) -{ - 'version': '20250820', - 'schema': { - 'my_audio_question': { - 'manual_transcription': [ - {'language': 'ar'}, - {'language': 'bn'}, - {'language': 'es'}, - ], - 'manual_translation': [{'language': 'fr'}], - }, - 'my_video_question': { - 'manual_transcription': [{'language': 'en'}], - }, - 'my_number_question': { - 'number_multiplier': [{'multiplier': 3}], - }, - }, -} -""" - -class BaseAction: - @classmethod - def validate_params(cls, params): - jsonschema.validate(params, cls.params_schema) - - def validate_data(self, data): - jsonschema.validate(data, self.data_schema) class ManualTranscriptionAction(BaseAction): ID = 'manual_transcription' @@ -87,48 +43,89 @@ def __init__(self, source_question_xpath, params): @property def data_schema(self): # for lack of a better name """ - (currently) POST to "/advanced_submission_post/aSsEtUiD" - POST to "/api/v2/assets//data//supplemental" # idk, rename? + POST to "/api/v2/assets//data//supplemental" { 'manual_transcription': { 'language': 'es', - 'transcript': 'Almorzamos muy bien hoy', + 'value': 'Almorzamos muy bien hoy', } } """ + + return { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'language': {'$ref': '#/$defs/lang'}, + 'value': {'$ref': '#/$defs/transcript'}, + }, + 'allOf': [{'$ref': '#/$defs/lang_transcript_dependency'}], + '$defs': { + 'lang': {'type': 'string', 'enum': self.languages}, + 'transcript': {'type': 'string'}, + 'lang_transcript_dependency': { + 'allOf': [ + { + 'if': {'required': ['language']}, + 'then': {'required': ['value']}, + }, + { + 'if': {'required': ['value']}, + 'then': {'required': ['language']}, + }, + ] + }, + }, + } + + @property + def languages(self) -> list[str]: languages = [] for individual_params in self.params: languages.append(individual_params['language']) + return languages - return { + @property + def result_schema(self): + + schema = { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', 'additionalProperties': False, 'properties': { - 'language': { - 'type': 'string', - 'enum': languages, + self.REVISIONS_FIELD: { + 'type': 'array', + 'minItems': 1, + 'items': {'$ref': '#/$defs/revision'}, }, - 'transcript': { - 'type': 'string', + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], + '$defs': { + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'revision': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD], }, }, - 'required': ['language', 'transcript'], - 'type': 'object', } - @property - @classmethod - def result_schema(cls): - """ - we also need a schema to define the final result that will be written - into SubmissionExtras + # Inject data schema in result schema template + self._inject_data_schema(schema, ['$schema', 'title', 'type']) - we need to solve the problem of storing multiple results for a single action - """ - raise NotImplementedError + # Also inject data schema in the revision definition + self._inject_data_schema( + schema['$defs']['revision'], ['$schema', 'title', '$defs'] + ) + return schema - def load_params(self, params): - """ - idk maybe we use this to read the language out of `Asset.advanced_features` - """ - self.possible_transcribed_fields = params['values'] + @property + def _is_usage_limited(self): + return False diff --git a/kobo/apps/subsequences__new/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py similarity index 100% rename from kobo/apps/subsequences__new/actions/manual_translation.py rename to kobo/apps/subsequences/actions/manual_translation.py diff --git a/kobo/apps/subsequences/exceptions.py b/kobo/apps/subsequences/exceptions.py index 541e4d3f9c..3bfaae1331 100644 --- a/kobo/apps/subsequences/exceptions.py +++ b/kobo/apps/subsequences/exceptions.py @@ -1,20 +1,23 @@ -class AudioTooLongError(Exception): - """Audio file is too long for specified speech service""" - +class InvalidAction(Exception): + """ + The referenced action does not exist or was not configured for the given + question XPath at the asset level + """ -class SubsequenceTimeoutError(Exception): pass -class TranscriptionResultsNotFound(Exception): +class InvalidItem(Exception): """ - No results returned by specified transcription service + The referenced action does not contain a list of items """ - -class TranslationAsyncResultAvailable(Exception): pass +class InvalidXPath(Exception): + """ + The referenced question XPath was not configured for supplemental data at + the asset level + """ -class TranslationResultsNotFound(Exception): pass diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index e26957e8dc..1cfce0925c 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -1,64 +1,147 @@ -# coding: utf-8 +from kobo.apps.subsequences.models import ( + SubmissionExtras, # just bullshit for now +) +from kpi.models import Asset +from .actions import ACTION_IDS_TO_CLASSES +from .exceptions import InvalidAction, InvalidXPath +from .schemas import validate_submission_supplement -from django.db import models +class SubmissionSupplement(SubmissionExtras): + class Meta(SubmissionExtras.Meta): + proxy = True + app_label = 'subsequences' -from kpi.models import Asset -from kpi.models.abstract_models import AbstractTimeStampedModel -from .constants import GOOGLETS, GOOGLETX -from .utils.determine_export_cols_with_values import determine_export_cols_indiv - - -class SubmissionExtras(AbstractTimeStampedModel): - - submission_uuid = models.CharField(max_length=249) - content = models.JSONField(default=dict) - asset = models.ForeignKey( - Asset, - related_name='submission_extras', - on_delete=models.CASCADE, - ) - - class Meta: - # ideally `submission_uuid` is universally unique, but its uniqueness - # per-asset is most important - unique_together = (('asset', 'submission_uuid'),) - - def save(self, *args, **kwargs): - # We need to import these here because of circular imports - from .integrations.google.google_transcribe import GoogleTranscriptionService - from .integrations.google.google_translate import GoogleTranslationService - - features = self.asset.advanced_features - for xpath, vals in self.content.items(): - if 'transcript' in features: - options = vals.get(GOOGLETS, {}) - if options.get('status') == 'requested': - service = GoogleTranscriptionService(self) - vals[GOOGLETS] = service.process_data(xpath, vals) - if 'translation' in features: - options = vals.get(GOOGLETX, {}) - if options.get('status') == 'requested': - service = GoogleTranslationService(self) - vals[GOOGLETX] = service.process_data(xpath, vals) - - asset_changes = False - asset_known_cols = self.asset.known_cols - for kc in determine_export_cols_indiv(self.content): - if kc not in asset_known_cols: - asset_changes = True - asset_known_cols.append(kc) - - if asset_changes: - self.asset.known_cols = asset_known_cols - self.asset.save(create_version=False) - - super().save(*args, **kwargs) - - @property - def full_content(self): - _content = {} - _content.update(self.content) - _content.update({ - 'timestamp': str(self.date_created), - }) - return _content + @staticmethod + def revise_data( + asset: Asset, submission: dict, incoming_data: dict + ) -> dict: + schema_version = incoming_data.pop('_version') + if schema_version != '20250820': + # TODO: migrate from old per-submission schema + raise NotImplementedError + + if asset.advanced_features['_version'] != schema_version: + # TODO: migrate from old per-asset schema + raise NotImplementedError + + submission_uuid = submission['meta/rootUuid'] # constant? + supplemental_data = SubmissionExtras.objects.get_or_create( + asset=asset, submission_uuid=submission_uuid + )[0].content # lock it? + + retrieved_supplemental_data = {} + + for question_xpath, data_for_this_question in incoming_data.items(): + try: + action_configs_for_this_question = asset.advanced_features[ + '_actionConfigs' + ][question_xpath] + except KeyError as e: + raise InvalidXPath from e + + for action_id, action_data in data_for_this_question.items(): + try: + action_class = ACTION_IDS_TO_CLASSES[action_id] + except KeyError as e: + raise InvalidAction from e + try: + action_params = action_configs_for_this_question[action_id] + except KeyError as e: + raise InvalidAction from e + + action = action_class(question_xpath, action_params) + action.check_limits(asset.owner) + question_supplemental_data = supplemental_data.setdefault( + question_xpath, {} + ) + default_action_supplemental_data = ( + {} + if action.item_reference_property is None + else [] + ) + action_supplemental_data = question_supplemental_data.setdefault( + action_id, default_action_supplemental_data + ) + action_supplemental_data = action.revise_data( + submission, action_supplemental_data, action_data + ) + question_supplemental_data[action_id] = action_supplemental_data + retrieved_supplemental_data.setdefault(question_xpath, {})[ + action_id + ] = action.retrieve_data(action_supplemental_data) + + supplemental_data['_version'] = schema_version + validate_submission_supplement(asset, supplemental_data) + SubmissionExtras.objects.filter( + asset=asset, submission_uuid=submission_uuid + ).update(content=supplemental_data) + + retrieved_supplemental_data['_version'] = schema_version + return retrieved_supplemental_data + + + def retrieve_data(asset: Asset, submission_uuid: str) -> dict: + try: + supplemental_data = SubmissionExtras.objects.get( + asset=asset, submission_uuid=submission_uuid + ).content + except SubmissionExtras.DoesNotExist: + return {} + + schema_version = supplemental_data.pop('_version') + if schema_version != '20250820': + # TODO: migrate from old per-submission schema + raise NotImplementedError + + if asset.advanced_features['_version'] != schema_version: + # TODO: migrate from old per-asset schema + raise NotImplementedError + + retrieved_supplemental_data = {} + + for question_xpath, data_for_this_question in supplemental_data.items(): + processed_data_for_this_question = ( + retrieved_supplemental_data.setdefault(question_xpath, {}) + ) + action_configs = asset.advanced_features['_actionConfigs'] + try: + action_configs_for_this_question = action_configs[question_xpath] + except KeyError: + # There's still supplemental data for this question at the + # submission level, but the question is no longer configured at the + # asset level. + # Allow this for now, but maybe forbid later and also forbid + # removing things from the asset-level action configuration? + # Actions could be disabled or hidden instead of being removed + + # FIXME: divergence between the asset-level configuration and + # submission-level supplemental data is going to cause schema + # validation failures! We defo need to forbid removal of actions + # and instead provide a way to mark them as deleted + continue + + for action_id, action_data in data_for_this_question.items(): + try: + action_class = ACTION_IDS_TO_CLASSES[action_id] + except KeyError: + # An action class present in the submission data no longer + # exists in the application code + # TODO: log an error + continue + try: + action_params = action_configs_for_this_question[action_id] + except KeyError: + # An action class present in the submission data is no longer + # configured at the asset level for this question + # Allow this for now, but maybe forbid later and also forbid + # removing things from the asset-level action configuration? + # Actions could be disabled or hidden instead of being removed + continue + + action = action_class(question_xpath, action_params) + processed_data_for_this_question[action_id] = action.retrieve_data( + action_data + ) + + retrieved_supplemental_data['_version'] = schema_version + return retrieved_supplemental_data diff --git a/kobo/apps/subsequences__new/router.py b/kobo/apps/subsequences/router.py similarity index 100% rename from kobo/apps/subsequences__new/router.py rename to kobo/apps/subsequences/router.py diff --git a/kobo/apps/subsequences__new/schemas.py b/kobo/apps/subsequences/schemas.py similarity index 100% rename from kobo/apps/subsequences__new/schemas.py rename to kobo/apps/subsequences/schemas.py diff --git a/kobo/apps/subsequences/integrations/google/__init__.py b/kobo/apps/subsequences/tests/api/__init__.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/__init__.py rename to kobo/apps/subsequences/tests/api/__init__.py diff --git a/kobo/apps/subsequences/migrations/__init__.py b/kobo/apps/subsequences/tests/api/v2/__init__.py similarity index 100% rename from kobo/apps/subsequences/migrations/__init__.py rename to kobo/apps/subsequences/tests/api/v2/__init__.py diff --git a/kobo/apps/subsequences__new/tests/api/v2/base.py b/kobo/apps/subsequences/tests/api/v2/base.py similarity index 100% rename from kobo/apps/subsequences__new/tests/api/v2/base.py rename to kobo/apps/subsequences/tests/api/v2/base.py diff --git a/kobo/apps/subsequences__new/tests/api/v2/test_permissions.py b/kobo/apps/subsequences/tests/api/v2/test_permissions.py similarity index 100% rename from kobo/apps/subsequences__new/tests/api/v2/test_permissions.py rename to kobo/apps/subsequences/tests/api/v2/test_permissions.py diff --git a/kobo/apps/subsequences__new/tests/test_manual_transcription.py b/kobo/apps/subsequences/tests/test_manual_transcription.py similarity index 100% rename from kobo/apps/subsequences__new/tests/test_manual_transcription.py rename to kobo/apps/subsequences/tests/test_manual_transcription.py diff --git a/kobo/apps/subsequences__new/actions/__init__.py b/kobo/apps/subsequences__new/actions/__init__.py deleted file mode 100644 index f439fed567..0000000000 --- a/kobo/apps/subsequences__new/actions/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .manual_transcription import ManualTranscriptionAction -from .manual_translation import ManualTranslationAction - -# TODO, what about using a loader for every class in "actions" folder (except base.py)? -ACTIONS = ( - ManualTranscriptionAction, - ManualTranslationAction, -) -ACTION_IDS_TO_CLASSES = {a.ID: a for a in ACTIONS} diff --git a/kobo/apps/subsequences__new/actions/base.py b/kobo/apps/subsequences__new/actions/base.py deleted file mode 100644 index 1913db7608..0000000000 --- a/kobo/apps/subsequences__new/actions/base.py +++ /dev/null @@ -1,294 +0,0 @@ -import datetime -from copy import deepcopy - -import jsonschema -from django.conf import settings -from django.utils import timezone - -from kobo.apps.kobo_auth.shortcuts import User -from kpi.exceptions import UsageLimitExceededException -from kpi.utils.usage_calculator import ServiceUsageCalculator -from ..exceptions import InvalidItem - -""" -### All actions must have the following components - -* (check!) a unique identifier for the action -* three jsonschemas: - 1. (check!) one to validate the parameters used to configure the action - * `ADVANCED_FEATURES_PARAMS_SCHEMA` - 2. (check!) one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) - * the result of `modify_jsonschema()` - 3. one to validate the result of the action - the result of `modify_jsonschema()` - * OH NO, this doesn't happen at all yet -* a handler that receives a submission (and other metadata) and processes it -""" - -""" -idea of example content in asset.advanced_features (what kind of actions are activated per question) -{ - '_version': '20250820', - '_schema': { - 'my_audio_question': { - 'manual_transcription': [ - {'language': 'ar'}, - {'language': 'bn'}, - {'language': 'es'}, - ], - 'manual_translation': [{'language': 'fr'}, {'language': 'en'}], - }, - 'my_video_question': { - 'manual_transcription': [{'language': 'en'}], - }, - 'my_number_question': { - 'number_multiplier': [{'multiplier': 3}], - }, - }, -} - -idea of example data in SubmissionExtras based on the above -{ - '_version': '20250820', - '_submission': '', - 'my_audio_question': { - 'manual_transcription': { - 'transcript': 'هائج', - 'language': 'ar', - '_dateCreated': '2025-08-21T20:55:42.012053Z', - '_dateModified': '2025-08-21T20:57:28.154567Z', - '_revisions': [ - { - 'transcript': 'فارغ', - 'language': 'ar', - '_dateCreated': '2025-08-21T20:55:42.012053Z', - } - ], - }, - 'manual_translation': [ - { - 'language': 'en', - 'translation': 'berserk', - '_dateCreated': '2025-08-21T21:39:42.141306Z', - '_dateModified': '2025-08-21T21:39:42.141306Z', - }, - { - 'language': 'es', - 'translation': 'enloquecido', - '_dateCreated': '2025-08-21T21:40:54.644308Z', - '_dateModified': '2025-08-21T22:00:10.862880Z', - '_revisions': [ - { - 'translation': 'loco', - 'language': 'es', - '_dateCreated': '2025-08-21T21:40:54.644308Z', - } - ], - }, - ], - }, - 'my_video_question': { - 'manual_transcription': { - 'transcript': 'sea horse sea hell', - 'language': 'en', - '_dateCreated': '2025-08-21T21:06:20.059117Z', - '_dateModified': '2025-08-21T21:06:20.059117Z', - }, - }, - 'my_number_question': { - 'number_multiplier': { - 'numberMultiplied': 99, - '_dateCreated': '2025-08-21T21:09:34.504546Z', - '_dateModified': '2025-08-21T21:09:34.504546Z', - }, - }, -} -""" - - -def utc_datetime_to_js_str(dt: datetime.datetime) -> str: - """ - Return a string to represent a `datetime` following the simplification of - the ISO 8601 format used by JavaScript - """ - # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format - if dt.utcoffset() or not dt.tzinfo: - raise NotImplementedError('Only UTC datetimes are supported') - return dt.isoformat().replace('+00:00', 'Z') - - -class BaseAction: - def something_to_get_the_data_back_out(self): - # might need to deal with multiple columns for one action - # ^ definitely will - raise NotImplementedError - - DATE_CREATED_FIELD = '_dateCreated' - DATE_MODIFIED_FIELD = '_dateModified' - REVISIONS_FIELD = '_revisions' - - # Change my name, my parents hate me when I was born - item_reference_property = None - - def check_limits(self, user: User): - - if not settings.STRIPE_ENABLED or not self._is_usage_limited: - return - - calculator = ServiceUsageCalculator(user) - balances = calculator.get_usage_balances() - - balance = balances[self._limit_identifier] - if balance and balance['exceeded']: - raise UsageLimitExceededException() - - @classmethod - def validate_params(cls, params): - jsonschema.validate(params, cls.params_schema) - - def validate_data(self, data): - jsonschema.validate(data, self.data_schema) - - def validate_result(self, result): - jsonschema.validate(result, self.result_schema) - - @property - def result_schema(self): - """ - we also need a schema to define the final result that will be written - into SubmissionExtras - - we need to solve the problem of storing multiple results for a single action - """ - return NotImplementedError - - def retrieve_data(self, action_data: dict) -> dict: - """ - `action_data` must be ONLY the data for this particular action - instance, not the entire SubmissionExtras caboodle - - descendant classes could override with special manipulation if needed - """ - return action_data - - def revise_field(self, *args, **kwargs): - # TODO: remove this alias - import warnings - warnings.warn('Oh no, this method is going away!', DeprecationWarning) - return self.revise_data(*args, **kwargs) - - def revise_data( - self, submission: dict, submission_supplement: dict, edit: dict - ) -> dict: - """ - for actions that may have lengthy data, are we content to store the - entirety of the data for each revision, or do we need some kind of - differencing system? - """ - self.validate_data(edit) - self.raise_for_any_leading_underscore_key(edit) - - now_str = utc_datetime_to_js_str(timezone.now()) - item_index = None - submission_supplement_copy = deepcopy(submission_supplement) - if not self.item_reference_property: - revision = submission_supplement_copy - else: - needle = edit[self.item_reference_property] - revision = {} - if not isinstance(submission_supplement, list): - raise InvalidItem - - for idx, item in enumerate(submission_supplement): - if needle == item[self.item_reference_property]: - revision = deepcopy(item) - item_index = idx - break - - new_record = deepcopy(edit) - revisions = revision.pop(self.REVISIONS_FIELD, []) - - revision_creation_date = revision.pop(self.DATE_MODIFIED_FIELD, now_str) - record_creation_date = revision.pop(self.DATE_CREATED_FIELD, now_str) - revision[self.DATE_CREATED_FIELD] = revision_creation_date - new_record[self.DATE_MODIFIED_FIELD] = now_str - - if not self.item_reference_property: - if submission_supplement: - revisions.insert(0, revision) - new_record[self.REVISIONS_FIELD] = revisions - else: - if item_index is not None: - revisions.insert(0, revision) - new_record[self.REVISIONS_FIELD] = revisions - - new_record[self.DATE_CREATED_FIELD] = record_creation_date - - if self.item_reference_property: - if item_index is None: - submission_supplement_copy.append(new_record) - else: - submission_supplement_copy[item_index] = new_record - - new_record = submission_supplement_copy - - self.validate_result(new_record) - - return new_record - - - @staticmethod - def raise_for_any_leading_underscore_key(d: dict): - """ - Keys with leading underscores are reserved for metadata like - `_dateCreated`, `_dateModified`, and `_revisions`. No key with a - leading underscore should be present in data POSTed by a client or - generated by an action. - - Schema validation should block invalid keys, but this method exists as - a redundant check to guard against schema mistakes. - """ - for k in list(d.keys()): - try: - match = k.startswith('_') - except AttributeError: - continue - if match: - raise Exception( - 'An unexpected key with a leading underscore was found' - ) - - @property - def _is_usage_limited(self): - """ - Returns whether an action should check for usage limits. - """ - raise NotImplementedError() - - def _inject_data_schema(self, destination_schema: dict, skipped_keys: list): - """ - Utility function to inject data schema into another schema to - avoid repeating the same schema. - Useful to produce result schema. - """ - - for key, value in self.data_schema.items(): - if key in skipped_keys: - continue - - if key in destination_schema: - if isinstance(destination_schema[key], dict): - destination_schema[key].update(self.data_schema[key]) - elif isinstance(destination_schema[key], list): - destination_schema[key].extend(self.data_schema[key]) - else: - destination_schema[key] = self.data_schema[key] - else: - destination_schema[key] = self.data_schema[key] - - @property - def _limit_identifier(self): - # Example for automatic transcription - # - # from kobo.apps.organizations.constants import UsageType - # return UsageType.ASR_SECONDS - raise NotImplementedError() diff --git a/kobo/apps/subsequences__new/actions/manual_transcription.py b/kobo/apps/subsequences__new/actions/manual_transcription.py deleted file mode 100644 index 21b5f12c3b..0000000000 --- a/kobo/apps/subsequences__new/actions/manual_transcription.py +++ /dev/null @@ -1,131 +0,0 @@ -from copy import deepcopy - -from .base import BaseAction - - -class ManualTranscriptionAction(BaseAction): - ID = 'manual_transcription' - - def __init__(self, source_question_xpath, params): - self.source_question_xpath = source_question_xpath - self.params = params - - """ - For an audio question called `my_audio_question` that's transcribed - into 3 languages, the schema for `Asset.advanced_features` might look - like: - 'my_audio_question': { - 'manual_transcription': [ - {'language': 'ar'}, - {'language': 'bn'}, - {'language': 'es'}, - ], - } - - The `params_schema` attribute defines the shape of the array where each - element is an object with a single string property for the transcript - language. - """ - params_schema = { - 'type': 'array', - 'items': { - 'additionalProperties': False, - 'properties': { - 'language': { - 'type': 'string', - } - }, - 'required': ['language'], - 'type': 'object', - }, - } - - @property - def data_schema(self): # for lack of a better name - """ - POST to "/api/v2/assets//data//supplemental" - { - 'manual_transcription': { - 'language': 'es', - 'value': 'Almorzamos muy bien hoy', - } - } - """ - - return { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - 'language': {'$ref': '#/$defs/lang'}, - 'value': {'$ref': '#/$defs/transcript'}, - }, - 'allOf': [{'$ref': '#/$defs/lang_transcript_dependency'}], - '$defs': { - 'lang': {'type': 'string', 'enum': self.languages}, - 'transcript': {'type': 'string'}, - 'lang_transcript_dependency': { - 'allOf': [ - { - 'if': {'required': ['language']}, - 'then': {'required': ['value']}, - }, - { - 'if': {'required': ['value']}, - 'then': {'required': ['language']}, - }, - ] - }, - }, - } - - @property - def languages(self) -> list[str]: - languages = [] - for individual_params in self.params: - languages.append(individual_params['language']) - return languages - - @property - def result_schema(self): - - schema = { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.REVISIONS_FIELD: { - 'type': 'array', - 'minItems': 1, - 'items': {'$ref': '#/$defs/revision'}, - }, - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], - '$defs': { - 'dateTime': {'type': 'string', 'format': 'date-time'}, - 'revision': { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD], - }, - }, - } - - # Inject data schema in result schema template - self._inject_data_schema(schema, ['$schema', 'title', 'type']) - - # Also inject data schema in the revision definition - self._inject_data_schema( - schema['$defs']['revision'], ['$schema', 'title', '$defs'] - ) - - return schema - - @property - def _is_usage_limited(self): - return False diff --git a/kobo/apps/subsequences__new/exceptions.py b/kobo/apps/subsequences__new/exceptions.py deleted file mode 100644 index 3bfaae1331..0000000000 --- a/kobo/apps/subsequences__new/exceptions.py +++ /dev/null @@ -1,23 +0,0 @@ -class InvalidAction(Exception): - """ - The referenced action does not exist or was not configured for the given - question XPath at the asset level - """ - - pass - - -class InvalidItem(Exception): - """ - The referenced action does not contain a list of items - """ - - pass - -class InvalidXPath(Exception): - """ - The referenced question XPath was not configured for supplemental data at - the asset level - """ - - pass diff --git a/kobo/apps/subsequences__new/models.py b/kobo/apps/subsequences__new/models.py deleted file mode 100644 index 1cfce0925c..0000000000 --- a/kobo/apps/subsequences__new/models.py +++ /dev/null @@ -1,147 +0,0 @@ -from kobo.apps.subsequences.models import ( - SubmissionExtras, # just bullshit for now -) -from kpi.models import Asset -from .actions import ACTION_IDS_TO_CLASSES -from .exceptions import InvalidAction, InvalidXPath -from .schemas import validate_submission_supplement - -class SubmissionSupplement(SubmissionExtras): - class Meta(SubmissionExtras.Meta): - proxy = True - app_label = 'subsequences' - - @staticmethod - def revise_data( - asset: Asset, submission: dict, incoming_data: dict - ) -> dict: - schema_version = incoming_data.pop('_version') - if schema_version != '20250820': - # TODO: migrate from old per-submission schema - raise NotImplementedError - - if asset.advanced_features['_version'] != schema_version: - # TODO: migrate from old per-asset schema - raise NotImplementedError - - submission_uuid = submission['meta/rootUuid'] # constant? - supplemental_data = SubmissionExtras.objects.get_or_create( - asset=asset, submission_uuid=submission_uuid - )[0].content # lock it? - - retrieved_supplemental_data = {} - - for question_xpath, data_for_this_question in incoming_data.items(): - try: - action_configs_for_this_question = asset.advanced_features[ - '_actionConfigs' - ][question_xpath] - except KeyError as e: - raise InvalidXPath from e - - for action_id, action_data in data_for_this_question.items(): - try: - action_class = ACTION_IDS_TO_CLASSES[action_id] - except KeyError as e: - raise InvalidAction from e - try: - action_params = action_configs_for_this_question[action_id] - except KeyError as e: - raise InvalidAction from e - - action = action_class(question_xpath, action_params) - action.check_limits(asset.owner) - question_supplemental_data = supplemental_data.setdefault( - question_xpath, {} - ) - default_action_supplemental_data = ( - {} - if action.item_reference_property is None - else [] - ) - action_supplemental_data = question_supplemental_data.setdefault( - action_id, default_action_supplemental_data - ) - action_supplemental_data = action.revise_data( - submission, action_supplemental_data, action_data - ) - question_supplemental_data[action_id] = action_supplemental_data - retrieved_supplemental_data.setdefault(question_xpath, {})[ - action_id - ] = action.retrieve_data(action_supplemental_data) - - supplemental_data['_version'] = schema_version - validate_submission_supplement(asset, supplemental_data) - SubmissionExtras.objects.filter( - asset=asset, submission_uuid=submission_uuid - ).update(content=supplemental_data) - - retrieved_supplemental_data['_version'] = schema_version - return retrieved_supplemental_data - - - def retrieve_data(asset: Asset, submission_uuid: str) -> dict: - try: - supplemental_data = SubmissionExtras.objects.get( - asset=asset, submission_uuid=submission_uuid - ).content - except SubmissionExtras.DoesNotExist: - return {} - - schema_version = supplemental_data.pop('_version') - if schema_version != '20250820': - # TODO: migrate from old per-submission schema - raise NotImplementedError - - if asset.advanced_features['_version'] != schema_version: - # TODO: migrate from old per-asset schema - raise NotImplementedError - - retrieved_supplemental_data = {} - - for question_xpath, data_for_this_question in supplemental_data.items(): - processed_data_for_this_question = ( - retrieved_supplemental_data.setdefault(question_xpath, {}) - ) - action_configs = asset.advanced_features['_actionConfigs'] - try: - action_configs_for_this_question = action_configs[question_xpath] - except KeyError: - # There's still supplemental data for this question at the - # submission level, but the question is no longer configured at the - # asset level. - # Allow this for now, but maybe forbid later and also forbid - # removing things from the asset-level action configuration? - # Actions could be disabled or hidden instead of being removed - - # FIXME: divergence between the asset-level configuration and - # submission-level supplemental data is going to cause schema - # validation failures! We defo need to forbid removal of actions - # and instead provide a way to mark them as deleted - continue - - for action_id, action_data in data_for_this_question.items(): - try: - action_class = ACTION_IDS_TO_CLASSES[action_id] - except KeyError: - # An action class present in the submission data no longer - # exists in the application code - # TODO: log an error - continue - try: - action_params = action_configs_for_this_question[action_id] - except KeyError: - # An action class present in the submission data is no longer - # configured at the asset level for this question - # Allow this for now, but maybe forbid later and also forbid - # removing things from the asset-level action configuration? - # Actions could be disabled or hidden instead of being removed - continue - - action = action_class(question_xpath, action_params) - processed_data_for_this_question[action_id] = action.retrieve_data( - action_data - ) - - retrieved_supplemental_data['_version'] = schema_version - return retrieved_supplemental_data diff --git a/kobo/apps/subsequences/README-draft.md b/kobo/apps/subsequences__old/README-draft.md similarity index 100% rename from kobo/apps/subsequences/README-draft.md rename to kobo/apps/subsequences__old/README-draft.md diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences__old/README.md similarity index 100% rename from kobo/apps/subsequences/README.md rename to kobo/apps/subsequences__old/README.md diff --git a/kobo/apps/subsequences__old/__init__.py b/kobo/apps/subsequences__old/__init__.py new file mode 100644 index 0000000000..5f46bdbac3 --- /dev/null +++ b/kobo/apps/subsequences__old/__init__.py @@ -0,0 +1,17 @@ +''' +`kobo.apps.subsequences` --as in Sub(mission)Sequences is an app for defining +and following a sequence of actions or changes to a submission that has come +into kobo. + +models: +- SubmissionData: + Holds a JSONField with the "supplementalData" necessary to complete the + +tasks: +(things that are queued in celery for later action) + +needs writeup: + - how to develop / debug within this app + - description of tests + +''' diff --git a/kobo/apps/subsequences/scripts/__init__.py b/kobo/apps/subsequences__old/actions/__init__.py similarity index 100% rename from kobo/apps/subsequences/scripts/__init__.py rename to kobo/apps/subsequences__old/actions/__init__.py diff --git a/kobo/apps/subsequences/actions/automatic_transcription.py b/kobo/apps/subsequences__old/actions/automatic_transcription.py similarity index 100% rename from kobo/apps/subsequences/actions/automatic_transcription.py rename to kobo/apps/subsequences__old/actions/automatic_transcription.py diff --git a/kobo/apps/subsequences__old/actions/base.py b/kobo/apps/subsequences__old/actions/base.py new file mode 100644 index 0000000000..f8dbe659aa --- /dev/null +++ b/kobo/apps/subsequences__old/actions/base.py @@ -0,0 +1,128 @@ +import datetime +from zoneinfo import ZoneInfo + +from django.utils import timezone + +from kobo.apps.subsequences.constants import GOOGLETS, GOOGLETX + +ACTION_NEEDED = 'ACTION_NEEDED' +PASSES = 'PASSES' + + +class BaseAction: + ID = None + _destination_field = '_supplementalDetails' + + DATE_CREATED_FIELD = 'dateCreated' + DATE_MODIFIED_FIELD = 'dateModified' + DELETE = '⌫' + + def __init__(self, params): + self.load_params(params) + + def cur_time(self): + return datetime.datetime.now(tz=ZoneInfo('UTC')).strftime('%Y-%m-%dT%H:%M:%SZ') + + def load_params(self, params): + raise NotImplementedError('subclass must define a load_params method') + + def run_change(self, params): + raise NotImplementedError('subclass must define a run_change method') + + def check_submission_status(self, submission): + return PASSES + + def modify_jsonschema(self, schema): + return schema + + def compile_revised_record(self, content, edits): + """ + a method that applies changes to a json structure and appends previous + changes to a revision history + """ + + # TODO: should this handle managing `DATE_CREATED_FIELD`, + # `DATE_MODIFIED_FIELD`, etc. instead of delegating that to + # `revise_record()` as it currently does? + + if self.ID is None: + return content + for field_name, vals in edits.items(): + if field_name == 'submission': + continue + + erecord = vals.get(self.ID) + o_keyval = content.get(field_name, {}) + for extra in [GOOGLETX, GOOGLETS]: + if extra in vals: + o_keyval[extra] = vals[extra] + content[field_name] = o_keyval + + orecord = o_keyval.get(self.ID) + if erecord is None: + continue + if self.is_auto_request(erecord): + content[field_name].update( + self.auto_request_repr(erecord) + ) + continue + if orecord is None: + compiled_record = self.init_field(erecord) + elif not self.has_change(orecord, erecord): + continue + else: + compiled_record = self.revise_field(orecord, erecord) + o_keyval[self.ID] = compiled_record + content[field_name] = o_keyval + return content + + def auto_request_repr(self, erecord): + raise NotImplementedError() + + def is_auto_request(self, erecord): + return self.record_repr(erecord) == 'GOOGLE' + + def init_field(self, edit): + edit[self.DATE_CREATED_FIELD] = \ + edit[self.DATE_MODIFIED_FIELD] = \ + str(timezone.now()).split('.')[0] + return {**edit, 'revisions': []} + + def revise_field(self, original, edit): + if self.record_repr(edit) == self.DELETE: + return {} + record = {**original} + revisions = record.pop('revisions', []) + if self.DATE_CREATED_FIELD in record: + del record[self.DATE_CREATED_FIELD] + edit[self.DATE_MODIFIED_FIELD] = \ + edit[self.DATE_CREATED_FIELD] = \ + str(timezone.now()).split('.')[0] + if len(revisions) > 0: + date_modified = revisions[-1].get(self.DATE_MODIFIED_FIELD) + edit[self.DATE_CREATED_FIELD] = date_modified + return {**edit, 'revisions': [record, *revisions]} + + def record_repr(self, record): + return record.get('value') + + def has_change(self, original, edit): + return self.record_repr(original) != self.record_repr(edit) + + @classmethod + def build_params(cls, *args, **kwargs): + raise NotImplementedError(f'{cls.__name__} has not implemented a build_params method') + + def get_xpath(self, row): + # return the full path... + for name_field in ['xpath', 'name', '$autoname']: + if name_field in row: + return row[name_field] + return None + + @classmethod + def get_name(cls, row): + for name_field in ['name', '$autoname']: + if name_field in row: + return row[name_field] + return None diff --git a/kobo/apps/subsequences/actions/keyword_search.py b/kobo/apps/subsequences__old/actions/keyword_search.py similarity index 100% rename from kobo/apps/subsequences/actions/keyword_search.py rename to kobo/apps/subsequences__old/actions/keyword_search.py diff --git a/kobo/apps/subsequences__old/actions/manual_transcription.py b/kobo/apps/subsequences__old/actions/manual_transcription.py new file mode 100644 index 0000000000..1a64212763 --- /dev/null +++ b/kobo/apps/subsequences__old/actions/manual_transcription.py @@ -0,0 +1,134 @@ +import jsonschema +from ..constants import TRANSCRIBABLE_SOURCE_TYPES +#from ..actions.base import BaseAction + +""" +### All actions must have the following components + +* (check!) a unique identifier for the action +* three jsonschemas: + 1. (check!) one to validate the parameters used to configure the action + * `ADVANCED_FEATURES_PARAMS_SCHEMA` + 2. (check!) one to validate users' requests to invoke the action, which many contain content (e.g. a manual transcript) + * the result of `modify_jsonschema()` + 3. one to validate the result of the action - the result of `modify_jsonschema()` + * OH NO, this doesn't happen at all yet +* a handler that receives a submission (and other metadata) and processes it +""" + +""" +idea of example content in asset.advanced_features (what kind of actions are activated per question) +{ + 'version': '20250820', + 'schema': { + 'my_audio_question': { + 'manual_transcription': [ + {'language': 'ar'}, + {'language': 'bn'}, + {'language': 'es'}, + ], + 'manual_translation': [{'language': 'fr'}], + }, + 'my_video_question': { + 'manual_transcription': [{'language': 'en'}], + }, + 'my_number_question': { + 'number_multiplier': [{'multiplier': 3}], + }, + }, +} +""" + +class BaseAction: + @classmethod + def validate_params(cls, params): + jsonschema.validate(params, cls.params_schema) + + def validate_data(self, data): + jsonschema.validate(data, self.data_schema) + +class ManualTranscriptionAction(BaseAction): + ID = 'manual_transcription' + + def __init__(self, source_question_xpath, params): + self.source_question_xpath = source_question_xpath + self.params = params + + """ + For an audio question called `my_audio_question` that's transcribed + into 3 languages, the schema for `Asset.advanced_features` might look + like: + 'my_audio_question': { + 'manual_transcription': [ + {'language': 'ar'}, + {'language': 'bn'}, + {'language': 'es'}, + ], + } + + The `params_schema` attribute defines the shape of the array where each + element is an object with a single string property for the transcript + language. + """ + params_schema = { + 'type': 'array', + 'items': { + 'additionalProperties': False, + 'properties': { + 'language': { + 'type': 'string', + } + }, + 'required': ['language'], + 'type': 'object', + }, + } + + @property + def data_schema(self): # for lack of a better name + """ + (currently) POST to "/advanced_submission_post/aSsEtUiD" + POST to "/api/v2/assets//data//supplemental" # idk, rename? + { + 'manual_transcription': { + 'language': 'es', + 'transcript': 'Almorzamos muy bien hoy', + } + } + """ + languages = [] + for individual_params in self.params: + languages.append(individual_params['language']) + + return { + 'additionalProperties': False, + 'properties': { + 'language': { + 'type': 'string', + 'enum': languages, + }, + 'transcript': { + 'type': 'string', + }, + }, + 'required': ['language', 'transcript'], + 'type': 'object', + } + + @property + @classmethod + def result_schema(cls): + """ + we also need a schema to define the final result that will be written + into SubmissionExtras + + we need to solve the problem of storing multiple results for a single action + """ + raise NotImplementedError + + + def load_params(self, params): + """ + idk maybe we use this to read the language out of `Asset.advanced_features` + """ + self.possible_transcribed_fields = params['values'] diff --git a/kobo/apps/subsequences/actions/number_doubler.py b/kobo/apps/subsequences__old/actions/number_doubler.py similarity index 100% rename from kobo/apps/subsequences/actions/number_doubler.py rename to kobo/apps/subsequences__old/actions/number_doubler.py diff --git a/kobo/apps/subsequences/actions/qual.py b/kobo/apps/subsequences__old/actions/qual.py similarity index 100% rename from kobo/apps/subsequences/actions/qual.py rename to kobo/apps/subsequences__old/actions/qual.py diff --git a/kobo/apps/subsequences/actions/states.py b/kobo/apps/subsequences__old/actions/states.py similarity index 100% rename from kobo/apps/subsequences/actions/states.py rename to kobo/apps/subsequences__old/actions/states.py diff --git a/kobo/apps/subsequences/actions/translation.py b/kobo/apps/subsequences__old/actions/translation.py similarity index 100% rename from kobo/apps/subsequences/actions/translation.py rename to kobo/apps/subsequences__old/actions/translation.py diff --git a/kobo/apps/subsequences/actions/unknown_action.py b/kobo/apps/subsequences__old/actions/unknown_action.py similarity index 100% rename from kobo/apps/subsequences/actions/unknown_action.py rename to kobo/apps/subsequences__old/actions/unknown_action.py diff --git a/kobo/apps/subsequences/advanced_features_params_schema.py b/kobo/apps/subsequences__old/advanced_features_params_schema.py similarity index 100% rename from kobo/apps/subsequences/advanced_features_params_schema.py rename to kobo/apps/subsequences__old/advanced_features_params_schema.py diff --git a/kobo/apps/subsequences/api_view.py b/kobo/apps/subsequences__old/api_view.py similarity index 100% rename from kobo/apps/subsequences/api_view.py rename to kobo/apps/subsequences__old/api_view.py diff --git a/kobo/apps/subsequences/apps.py b/kobo/apps/subsequences__old/apps.py similarity index 100% rename from kobo/apps/subsequences/apps.py rename to kobo/apps/subsequences__old/apps.py diff --git a/kobo/apps/subsequences/constants.py b/kobo/apps/subsequences__old/constants.py similarity index 100% rename from kobo/apps/subsequences/constants.py rename to kobo/apps/subsequences__old/constants.py diff --git a/kobo/apps/subsequences__old/exceptions.py b/kobo/apps/subsequences__old/exceptions.py new file mode 100644 index 0000000000..541e4d3f9c --- /dev/null +++ b/kobo/apps/subsequences__old/exceptions.py @@ -0,0 +1,20 @@ +class AudioTooLongError(Exception): + """Audio file is too long for specified speech service""" + + +class SubsequenceTimeoutError(Exception): + pass + + +class TranscriptionResultsNotFound(Exception): + """ + No results returned by specified transcription service + """ + + +class TranslationAsyncResultAvailable(Exception): + pass + + +class TranslationResultsNotFound(Exception): + pass diff --git a/kobo/apps/subsequences/integrations/__init__.py b/kobo/apps/subsequences__old/integrations/__init__.py similarity index 100% rename from kobo/apps/subsequences/integrations/__init__.py rename to kobo/apps/subsequences__old/integrations/__init__.py diff --git a/kobo/apps/subsequences/tasks/__init__.py b/kobo/apps/subsequences__old/integrations/google/__init__.py similarity index 100% rename from kobo/apps/subsequences/tasks/__init__.py rename to kobo/apps/subsequences__old/integrations/google/__init__.py diff --git a/kobo/apps/subsequences/integrations/google/base.py b/kobo/apps/subsequences__old/integrations/google/base.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/base.py rename to kobo/apps/subsequences__old/integrations/google/base.py diff --git a/kobo/apps/subsequences/integrations/google/google_transcribe.py b/kobo/apps/subsequences__old/integrations/google/google_transcribe.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/google_transcribe.py rename to kobo/apps/subsequences__old/integrations/google/google_transcribe.py diff --git a/kobo/apps/subsequences/integrations/google/google_translate.py b/kobo/apps/subsequences__old/integrations/google/google_translate.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/google_translate.py rename to kobo/apps/subsequences__old/integrations/google/google_translate.py diff --git a/kobo/apps/subsequences/integrations/google/utils.py b/kobo/apps/subsequences__old/integrations/google/utils.py similarity index 100% rename from kobo/apps/subsequences/integrations/google/utils.py rename to kobo/apps/subsequences__old/integrations/google/utils.py diff --git a/kobo/apps/subsequences/integrations/misc.py b/kobo/apps/subsequences__old/integrations/misc.py similarity index 100% rename from kobo/apps/subsequences/integrations/misc.py rename to kobo/apps/subsequences__old/integrations/misc.py diff --git a/kobo/apps/subsequences/integrations/translate.py b/kobo/apps/subsequences__old/integrations/translate.py similarity index 100% rename from kobo/apps/subsequences/integrations/translate.py rename to kobo/apps/subsequences__old/integrations/translate.py diff --git a/kobo/apps/subsequences/jsonschemas/qual_schema.py b/kobo/apps/subsequences__old/jsonschemas/qual_schema.py similarity index 100% rename from kobo/apps/subsequences/jsonschemas/qual_schema.py rename to kobo/apps/subsequences__old/jsonschemas/qual_schema.py diff --git a/kobo/apps/subsequences/migrations/0001_initial.py b/kobo/apps/subsequences__old/migrations/0001_initial.py similarity index 100% rename from kobo/apps/subsequences/migrations/0001_initial.py rename to kobo/apps/subsequences__old/migrations/0001_initial.py diff --git a/kobo/apps/subsequences/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py b/kobo/apps/subsequences__old/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py similarity index 100% rename from kobo/apps/subsequences/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py rename to kobo/apps/subsequences__old/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py diff --git a/kobo/apps/subsequences/migrations/0003_alter_submissionextras_date_created_and_more.py b/kobo/apps/subsequences__old/migrations/0003_alter_submissionextras_date_created_and_more.py similarity index 100% rename from kobo/apps/subsequences/migrations/0003_alter_submissionextras_date_created_and_more.py rename to kobo/apps/subsequences__old/migrations/0003_alter_submissionextras_date_created_and_more.py diff --git a/kobo/apps/subsequences/migrations/0004_increase_subsequences_submission_uuid.py b/kobo/apps/subsequences__old/migrations/0004_increase_subsequences_submission_uuid.py similarity index 100% rename from kobo/apps/subsequences/migrations/0004_increase_subsequences_submission_uuid.py rename to kobo/apps/subsequences__old/migrations/0004_increase_subsequences_submission_uuid.py diff --git a/kobo/apps/subsequences__new/__init__.py b/kobo/apps/subsequences__old/migrations/__init__.py similarity index 100% rename from kobo/apps/subsequences__new/__init__.py rename to kobo/apps/subsequences__old/migrations/__init__.py diff --git a/kobo/apps/subsequences__old/models.py b/kobo/apps/subsequences__old/models.py new file mode 100644 index 0000000000..e26957e8dc --- /dev/null +++ b/kobo/apps/subsequences__old/models.py @@ -0,0 +1,64 @@ +# coding: utf-8 + +from django.db import models + +from kpi.models import Asset +from kpi.models.abstract_models import AbstractTimeStampedModel +from .constants import GOOGLETS, GOOGLETX +from .utils.determine_export_cols_with_values import determine_export_cols_indiv + + +class SubmissionExtras(AbstractTimeStampedModel): + + submission_uuid = models.CharField(max_length=249) + content = models.JSONField(default=dict) + asset = models.ForeignKey( + Asset, + related_name='submission_extras', + on_delete=models.CASCADE, + ) + + class Meta: + # ideally `submission_uuid` is universally unique, but its uniqueness + # per-asset is most important + unique_together = (('asset', 'submission_uuid'),) + + def save(self, *args, **kwargs): + # We need to import these here because of circular imports + from .integrations.google.google_transcribe import GoogleTranscriptionService + from .integrations.google.google_translate import GoogleTranslationService + + features = self.asset.advanced_features + for xpath, vals in self.content.items(): + if 'transcript' in features: + options = vals.get(GOOGLETS, {}) + if options.get('status') == 'requested': + service = GoogleTranscriptionService(self) + vals[GOOGLETS] = service.process_data(xpath, vals) + if 'translation' in features: + options = vals.get(GOOGLETX, {}) + if options.get('status') == 'requested': + service = GoogleTranslationService(self) + vals[GOOGLETX] = service.process_data(xpath, vals) + + asset_changes = False + asset_known_cols = self.asset.known_cols + for kc in determine_export_cols_indiv(self.content): + if kc not in asset_known_cols: + asset_changes = True + asset_known_cols.append(kc) + + if asset_changes: + self.asset.known_cols = asset_known_cols + self.asset.save(create_version=False) + + super().save(*args, **kwargs) + + @property + def full_content(self): + _content = {} + _content.update(self.content) + _content.update({ + 'timestamp': str(self.date_created), + }) + return _content diff --git a/kobo/apps/subsequences/prev.py b/kobo/apps/subsequences__old/prev.py similarity index 100% rename from kobo/apps/subsequences/prev.py rename to kobo/apps/subsequences__old/prev.py diff --git a/kobo/apps/subsequences__new/tests/__init__.py b/kobo/apps/subsequences__old/scripts/__init__.py similarity index 100% rename from kobo/apps/subsequences__new/tests/__init__.py rename to kobo/apps/subsequences__old/scripts/__init__.py diff --git a/kobo/apps/subsequences/scripts/activate_advanced_features_for_newest_asset.py b/kobo/apps/subsequences__old/scripts/activate_advanced_features_for_newest_asset.py similarity index 100% rename from kobo/apps/subsequences/scripts/activate_advanced_features_for_newest_asset.py rename to kobo/apps/subsequences__old/scripts/activate_advanced_features_for_newest_asset.py diff --git a/kobo/apps/subsequences/scripts/add_qual_to_last_question_of_last_asset.py b/kobo/apps/subsequences__old/scripts/add_qual_to_last_question_of_last_asset.py similarity index 100% rename from kobo/apps/subsequences/scripts/add_qual_to_last_question_of_last_asset.py rename to kobo/apps/subsequences__old/scripts/add_qual_to_last_question_of_last_asset.py diff --git a/kobo/apps/subsequences/scripts/export_analysis_form.py b/kobo/apps/subsequences__old/scripts/export_analysis_form.py similarity index 100% rename from kobo/apps/subsequences/scripts/export_analysis_form.py rename to kobo/apps/subsequences__old/scripts/export_analysis_form.py diff --git a/kobo/apps/subsequences/scripts/recalc_latest_subex.py b/kobo/apps/subsequences__old/scripts/recalc_latest_subex.py similarity index 100% rename from kobo/apps/subsequences/scripts/recalc_latest_subex.py rename to kobo/apps/subsequences__old/scripts/recalc_latest_subex.py diff --git a/kobo/apps/subsequences/scripts/repop_known_cols.py b/kobo/apps/subsequences__old/scripts/repop_known_cols.py similarity index 100% rename from kobo/apps/subsequences/scripts/repop_known_cols.py rename to kobo/apps/subsequences__old/scripts/repop_known_cols.py diff --git a/kobo/apps/subsequences/scripts/subsequences_export.py b/kobo/apps/subsequences__old/scripts/subsequences_export.py similarity index 100% rename from kobo/apps/subsequences/scripts/subsequences_export.py rename to kobo/apps/subsequences__old/scripts/subsequences_export.py diff --git a/kobo/apps/subsequences__new/tests/api/__init__.py b/kobo/apps/subsequences__old/tasks/__init__.py similarity index 100% rename from kobo/apps/subsequences__new/tests/api/__init__.py rename to kobo/apps/subsequences__old/tasks/__init__.py diff --git a/kobo/apps/subsequences__new/tests/api/v2/__init__.py b/kobo/apps/subsequences__old/tests/__init__.py similarity index 100% rename from kobo/apps/subsequences__new/tests/api/v2/__init__.py rename to kobo/apps/subsequences__old/tests/__init__.py diff --git a/kobo/apps/subsequences/tests/test_known_cols_utils.py b/kobo/apps/subsequences__old/tests/test_known_cols_utils.py similarity index 100% rename from kobo/apps/subsequences/tests/test_known_cols_utils.py rename to kobo/apps/subsequences__old/tests/test_known_cols_utils.py diff --git a/kobo/apps/subsequences/tests/test_nlp_integration.py b/kobo/apps/subsequences__old/tests/test_nlp_integration.py similarity index 100% rename from kobo/apps/subsequences/tests/test_nlp_integration.py rename to kobo/apps/subsequences__old/tests/test_nlp_integration.py diff --git a/kobo/apps/subsequences/tests/test_number_doubler.py b/kobo/apps/subsequences__old/tests/test_number_doubler.py similarity index 100% rename from kobo/apps/subsequences/tests/test_number_doubler.py rename to kobo/apps/subsequences__old/tests/test_number_doubler.py diff --git a/kobo/apps/subsequences/tests/test_proj_advanced_features.py b/kobo/apps/subsequences__old/tests/test_proj_advanced_features.py similarity index 100% rename from kobo/apps/subsequences/tests/test_proj_advanced_features.py rename to kobo/apps/subsequences__old/tests/test_proj_advanced_features.py diff --git a/kobo/apps/subsequences/tests/test_submission_extras_api_post.py b/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py similarity index 100% rename from kobo/apps/subsequences/tests/test_submission_extras_api_post.py rename to kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py diff --git a/kobo/apps/subsequences/tests/test_submission_extras_content.py b/kobo/apps/subsequences__old/tests/test_submission_extras_content.py similarity index 100% rename from kobo/apps/subsequences/tests/test_submission_extras_content.py rename to kobo/apps/subsequences__old/tests/test_submission_extras_content.py diff --git a/kobo/apps/subsequences/tests/test_submission_stream.py b/kobo/apps/subsequences__old/tests/test_submission_stream.py similarity index 100% rename from kobo/apps/subsequences/tests/test_submission_stream.py rename to kobo/apps/subsequences__old/tests/test_submission_stream.py diff --git a/kobo/apps/subsequences/urls.py b/kobo/apps/subsequences__old/urls.py similarity index 100% rename from kobo/apps/subsequences/urls.py rename to kobo/apps/subsequences__old/urls.py diff --git a/kobo/apps/subsequences/utils/__init__.py b/kobo/apps/subsequences__old/utils/__init__.py similarity index 100% rename from kobo/apps/subsequences/utils/__init__.py rename to kobo/apps/subsequences__old/utils/__init__.py diff --git a/kobo/apps/subsequences/utils/deprecation.py b/kobo/apps/subsequences__old/utils/deprecation.py similarity index 100% rename from kobo/apps/subsequences/utils/deprecation.py rename to kobo/apps/subsequences__old/utils/deprecation.py diff --git a/kobo/apps/subsequences/utils/determine_export_cols_with_values.py b/kobo/apps/subsequences__old/utils/determine_export_cols_with_values.py similarity index 100% rename from kobo/apps/subsequences/utils/determine_export_cols_with_values.py rename to kobo/apps/subsequences__old/utils/determine_export_cols_with_values.py diff --git a/kobo/apps/subsequences/utils/parse_known_cols.py b/kobo/apps/subsequences__old/utils/parse_known_cols.py similarity index 100% rename from kobo/apps/subsequences/utils/parse_known_cols.py rename to kobo/apps/subsequences__old/utils/parse_known_cols.py From 06df33aace476e2e290480fceaabe6f0b9fee23b Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Sat, 23 Aug 2025 01:51:57 -0400 Subject: [PATCH 067/138] Add forgotten staticmethod decorator --- kobo/apps/subsequences/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 6a0589de8d..54b2c0e8bd 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -30,6 +30,7 @@ class Meta(SubmissionExtras.Meta): def __repr__(self): return f'Supplement for submission {self.submission_uuid}' + @staticmethod def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> dict: schema_version = incoming_data.get('_version') if schema_version != '20250820': @@ -103,6 +104,7 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di retrieved_supplemental_data['_version'] = schema_version return retrieved_supplemental_data + @staticmethod def retrieve_data( asset: 'kpi.Asset', submission_root_uuid: str | None = None, From 7222f8ba35b44f278a4713b2363cf977837712a2 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Sat, 23 Aug 2025 01:52:49 -0400 Subject: [PATCH 068/138] Remove `uuid:` prefix in `revise_data()` --- kobo/apps/subsequences/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 54b2c0e8bd..55b450c737 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -41,7 +41,7 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di # TODO: migrate from old per-asset schema raise NotImplementedError - submission_uuid = submission['meta/rootUuid'] # constant? + submission_uuid = remove_uuid_prefix(submission['meta/rootUuid']) # constant? supplemental_data = SubmissionExtras.objects.get_or_create( asset=asset, submission_uuid=submission_uuid )[ From f1a5fe1666cee52938bcbbd6b8086bed00ee7f77 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Sun, 24 Aug 2025 11:18:55 -0400 Subject: [PATCH 069/138] Kill a little bit more old subsequence django app --- .../migrations/0001_initial.py | 0 ...nique_together_asset_and_submission_uuid.py | 0 ...r_submissionextras_date_created_and_more.py | 0 ...04_increase_subsequences_submission_uuid.py | 0 .../migrations/__init__.py | 0 .../actions/automatic_transcription.py | 2 +- kobo/apps/subsequences__old/actions/base.py | 2 +- .../actions/manual_transcription.py | 2 +- .../subsequences__old/actions/translation.py | 2 +- kobo/apps/subsequences__old/api_view.py | 10 +++++----- kobo/apps/subsequences__old/apps.py | 4 ++-- .../integrations/google/base.py | 4 ++-- kobo/apps/subsequences__old/models.py | 4 ++-- .../scripts/export_analysis_form.py | 6 +++--- .../scripts/recalc_latest_subex.py | 6 +++--- .../scripts/repop_known_cols.py | 14 +++++++------- .../scripts/subsequences_export.py | 6 +++--- .../tests/test_known_cols_utils.py | 2 +- .../tests/test_nlp_integration.py | 10 +++++----- .../tests/test_submission_extras_api_post.py | 18 +++++++++--------- .../tests/test_submission_stream.py | 12 ++++++------ .../utils/determine_export_cols_with_values.py | 2 +- kobo/settings/base.py | 1 + 23 files changed, 54 insertions(+), 53 deletions(-) rename kobo/apps/{subsequences__old => subsequences}/migrations/0001_initial.py (100%) rename kobo/apps/{subsequences__old => subsequences}/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py (100%) rename kobo/apps/{subsequences__old => subsequences}/migrations/0003_alter_submissionextras_date_created_and_more.py (100%) rename kobo/apps/{subsequences__old => subsequences}/migrations/0004_increase_subsequences_submission_uuid.py (100%) rename kobo/apps/{subsequences__old => subsequences}/migrations/__init__.py (100%) diff --git a/kobo/apps/subsequences__old/migrations/0001_initial.py b/kobo/apps/subsequences/migrations/0001_initial.py similarity index 100% rename from kobo/apps/subsequences__old/migrations/0001_initial.py rename to kobo/apps/subsequences/migrations/0001_initial.py diff --git a/kobo/apps/subsequences__old/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py b/kobo/apps/subsequences/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py similarity index 100% rename from kobo/apps/subsequences__old/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py rename to kobo/apps/subsequences/migrations/0002_non_nullable_unique_together_asset_and_submission_uuid.py diff --git a/kobo/apps/subsequences__old/migrations/0003_alter_submissionextras_date_created_and_more.py b/kobo/apps/subsequences/migrations/0003_alter_submissionextras_date_created_and_more.py similarity index 100% rename from kobo/apps/subsequences__old/migrations/0003_alter_submissionextras_date_created_and_more.py rename to kobo/apps/subsequences/migrations/0003_alter_submissionextras_date_created_and_more.py diff --git a/kobo/apps/subsequences__old/migrations/0004_increase_subsequences_submission_uuid.py b/kobo/apps/subsequences/migrations/0004_increase_subsequences_submission_uuid.py similarity index 100% rename from kobo/apps/subsequences__old/migrations/0004_increase_subsequences_submission_uuid.py rename to kobo/apps/subsequences/migrations/0004_increase_subsequences_submission_uuid.py diff --git a/kobo/apps/subsequences__old/migrations/__init__.py b/kobo/apps/subsequences/migrations/__init__.py similarity index 100% rename from kobo/apps/subsequences__old/migrations/__init__.py rename to kobo/apps/subsequences/migrations/__init__.py diff --git a/kobo/apps/subsequences__old/actions/automatic_transcription.py b/kobo/apps/subsequences__old/actions/automatic_transcription.py index 49464c6c06..acb73159f3 100644 --- a/kobo/apps/subsequences__old/actions/automatic_transcription.py +++ b/kobo/apps/subsequences__old/actions/automatic_transcription.py @@ -1,4 +1,4 @@ -from kobo.apps.subsequences.constants import GOOGLETS +from kobo.apps.subsequences__old.constants import GOOGLETS from ..constants import TRANSCRIBABLE_SOURCE_TYPES from ..actions.base import BaseAction, ACTION_NEEDED, PASSES diff --git a/kobo/apps/subsequences__old/actions/base.py b/kobo/apps/subsequences__old/actions/base.py index f8dbe659aa..4597d7dd4c 100644 --- a/kobo/apps/subsequences__old/actions/base.py +++ b/kobo/apps/subsequences__old/actions/base.py @@ -3,7 +3,7 @@ from django.utils import timezone -from kobo.apps.subsequences.constants import GOOGLETS, GOOGLETX +from kobo.apps.subsequences__old.constants import GOOGLETS, GOOGLETX ACTION_NEEDED = 'ACTION_NEEDED' PASSES = 'PASSES' diff --git a/kobo/apps/subsequences__old/actions/manual_transcription.py b/kobo/apps/subsequences__old/actions/manual_transcription.py index 1a64212763..c5e07d249f 100644 --- a/kobo/apps/subsequences__old/actions/manual_transcription.py +++ b/kobo/apps/subsequences__old/actions/manual_transcription.py @@ -120,7 +120,7 @@ def data_schema(self): # for lack of a better name def result_schema(cls): """ we also need a schema to define the final result that will be written - into SubmissionExtras + into SubmissionExtrasOld we need to solve the problem of storing multiple results for a single action """ diff --git a/kobo/apps/subsequences__old/actions/translation.py b/kobo/apps/subsequences__old/actions/translation.py index 2a838aae2a..8ca67aa44c 100644 --- a/kobo/apps/subsequences__old/actions/translation.py +++ b/kobo/apps/subsequences__old/actions/translation.py @@ -1,4 +1,4 @@ -from kobo.apps.subsequences.constants import GOOGLETX +from kobo.apps.subsequences__old.constants import GOOGLETX from ..constants import TRANSLATABLE_SOURCE_TYPES from ..actions.base import BaseAction diff --git a/kobo/apps/subsequences__old/api_view.py b/kobo/apps/subsequences__old/api_view.py index 26292425e4..6db0d35401 100644 --- a/kobo/apps/subsequences__old/api_view.py +++ b/kobo/apps/subsequences__old/api_view.py @@ -12,9 +12,9 @@ from kobo.apps.audit_log.models import AuditType from kobo.apps.openrosa.apps.logger.models import Instance from kobo.apps.organizations.constants import UsageType -from kobo.apps.subsequences.constants import GOOGLETS, GOOGLETX -from kobo.apps.subsequences.models import SubmissionExtras -from kobo.apps.subsequences.utils.deprecation import get_sanitized_dict_keys +from kobo.apps.subsequences__old.constants import GOOGLETS, GOOGLETX +from kobo.apps.subsequences__old.models import SubmissionExtrasOld +from kobo.apps.subsequences__old.utils.deprecation import get_sanitized_dict_keys from kpi.exceptions import UsageLimitExceededException from kpi.models import Asset from kpi.utils.usage_calculator import ServiceUsageCalculator @@ -138,6 +138,6 @@ def get_submission_processing(asset, s_uuid): submission_extra.content = content return Response(submission_extra.content) - except SubmissionExtras.DoesNotExist: - # submission might exist but no SubmissionExtras object has been created + except SubmissionExtrasOld.DoesNotExist: + # submission might exist but no SubmissionExtrasOld object has been created return Response({'info': f'nothing found for submission: {s_uuid}'}) diff --git a/kobo/apps/subsequences__old/apps.py b/kobo/apps/subsequences__old/apps.py index 1ad7274a83..9a9078401d 100644 --- a/kobo/apps/subsequences__old/apps.py +++ b/kobo/apps/subsequences__old/apps.py @@ -2,5 +2,5 @@ from django.apps import AppConfig -class SubsequencesConfig(AppConfig): - name = 'kobo.apps.subsequences' +class SubsequencesOldConfig(AppConfig): + name = 'kobo.apps.subsequences__old' diff --git a/kobo/apps/subsequences__old/integrations/google/base.py b/kobo/apps/subsequences__old/integrations/google/base.py index 8dcd4cf894..f55c7a7447 100644 --- a/kobo/apps/subsequences__old/integrations/google/base.py +++ b/kobo/apps/subsequences__old/integrations/google/base.py @@ -15,7 +15,7 @@ from kpi.utils.log import logging from ...constants import GOOGLE_CACHE_TIMEOUT, make_nlp_async_cache_key from ...exceptions import SubsequenceTimeoutError -from ...models import SubmissionExtras +from ...models import SubmissionExtrasOld from .utils import google_credentials_from_constance_config @@ -30,7 +30,7 @@ class GoogleService(ABC): API_VERSION = None API_RESOURCE = None - def __init__(self, submission: SubmissionExtras): + def __init__(self, submission: SubmissionExtrasOld): super().__init__() self.submission = submission self.asset = submission.asset diff --git a/kobo/apps/subsequences__old/models.py b/kobo/apps/subsequences__old/models.py index e26957e8dc..4185e21bc2 100644 --- a/kobo/apps/subsequences__old/models.py +++ b/kobo/apps/subsequences__old/models.py @@ -8,13 +8,13 @@ from .utils.determine_export_cols_with_values import determine_export_cols_indiv -class SubmissionExtras(AbstractTimeStampedModel): +class SubmissionExtrasOld: submission_uuid = models.CharField(max_length=249) content = models.JSONField(default=dict) asset = models.ForeignKey( Asset, - related_name='submission_extras', + related_name='submission_extras_old', on_delete=models.CASCADE, ) diff --git a/kobo/apps/subsequences__old/scripts/export_analysis_form.py b/kobo/apps/subsequences__old/scripts/export_analysis_form.py index fbfea28938..4b64a8d5dc 100644 --- a/kobo/apps/subsequences__old/scripts/export_analysis_form.py +++ b/kobo/apps/subsequences__old/scripts/export_analysis_form.py @@ -1,5 +1,5 @@ -from kobo.apps.subsequences.models import SubmissionExtras -from kobo.apps.subsequences.utils import stream_with_extras +from kobo.apps.subsequences__old.models import SubmissionExtrasOld +from kobo.apps.subsequences__old.utils import stream_with_extras from kobo.apps.reports.report_data import build_formpack @@ -11,7 +11,7 @@ def run(): - latest_xtra = SubmissionExtras.objects.last() + latest_xtra = SubmissionExtrasOld.objects.last() asset = latest_xtra.asset user = asset.owner submission_stream = asset.deployment.get_submissions( diff --git a/kobo/apps/subsequences__old/scripts/recalc_latest_subex.py b/kobo/apps/subsequences__old/scripts/recalc_latest_subex.py index f70dfecad7..bc8e8c51a7 100644 --- a/kobo/apps/subsequences__old/scripts/recalc_latest_subex.py +++ b/kobo/apps/subsequences__old/scripts/recalc_latest_subex.py @@ -2,13 +2,13 @@ from pprint import pprint -from kobo.apps.subsequences.models import SubmissionExtras -from kobo.apps.subsequences.utils.determine_export_cols_with_values import ( +from kobo.apps.subsequences__old.models import SubmissionExtrasOld +from kobo.apps.subsequences__old.utils.determine_export_cols_with_values import ( determine_export_cols_indiv ) def run(): - ss = SubmissionExtras.objects.last() + ss = SubmissionExtrasOld.objects.last() pprint( list( determine_export_cols_indiv(ss.content) diff --git a/kobo/apps/subsequences__old/scripts/repop_known_cols.py b/kobo/apps/subsequences__old/scripts/repop_known_cols.py index a1fbd40e82..9215612d8f 100644 --- a/kobo/apps/subsequences__old/scripts/repop_known_cols.py +++ b/kobo/apps/subsequences__old/scripts/repop_known_cols.py @@ -7,20 +7,20 @@ from django.core.paginator import Paginator -from kobo.apps.subsequences.models import SubmissionExtras -from kobo.apps.subsequences.utils.deprecation import ( +from kobo.apps.subsequences__old.models import SubmissionExtrasOld +from kobo.apps.subsequences__old.utils.deprecation import ( get_sanitized_dict_keys, get_sanitized_known_columns, ) -from kobo.apps.subsequences.utils.determine_export_cols_with_values import ( +from kobo.apps.subsequences__old.utils.determine_export_cols_with_values import ( determine_export_cols_with_values, ) from kpi.models.asset import Asset def migrate_subex_content( - sub_ex: SubmissionExtras, asset: Asset, save=True -) -> SubmissionExtras: + sub_ex: SubmissionExtrasOld, asset: Asset, save=True +) -> SubmissionExtrasOld: content_string = json.dumps(sub_ex.content) if '"translated"' in content_string: # migration content_string = content_string.replace( @@ -69,7 +69,7 @@ def migrate_advanced_features(asset, save=True): def run(asset_uid=None): if asset_uid == '!': - SubmissionExtras.objects.all().delete() + SubmissionExtrasOld.objects.all().delete() for asset in Asset.objects.exclude(advanced_features__exact={}).iterator(): asset.advanced_features = {} asset.save(create_version=False) @@ -116,7 +116,7 @@ def run(asset_uid=None): ) if updated_submission_extras: - SubmissionExtras.objects.bulk_update( + SubmissionExtrasOld.objects.bulk_update( updated_submission_extras, ['content'] ) else: diff --git a/kobo/apps/subsequences__old/scripts/subsequences_export.py b/kobo/apps/subsequences__old/scripts/subsequences_export.py index 4ea6c2de3b..e470e2cbdd 100644 --- a/kobo/apps/subsequences__old/scripts/subsequences_export.py +++ b/kobo/apps/subsequences__old/scripts/subsequences_export.py @@ -1,7 +1,7 @@ from kpi.models import Asset -from kobo.apps.subsequences.models import SubmissionExtras -from kobo.apps.subsequences.utils import stream_with_extras +from kobo.apps.subsequences__old.models import SubmissionExtrasOld +from kobo.apps.subsequences__old.utils import stream_with_extras from kobo.apps.reports.report_data import build_formpack @@ -38,7 +38,7 @@ def run_on_asset(asset): def run(asset_uid): if asset_uid is None: - asset = SubmissionExtras.ojects.last().asset + asset = SubmissionExtrasOld.ojects.last().asset else: asset = Asset.objects.get(uid=asset_uid) run_on_asset(asset) diff --git a/kobo/apps/subsequences__old/tests/test_known_cols_utils.py b/kobo/apps/subsequences__old/tests/test_known_cols_utils.py index 4ed79fb7b7..979f6acb0a 100644 --- a/kobo/apps/subsequences__old/tests/test_known_cols_utils.py +++ b/kobo/apps/subsequences__old/tests/test_known_cols_utils.py @@ -1,4 +1,4 @@ -from kobo.apps.subsequences.utils.parse_known_cols import parse_known_cols +from kobo.apps.subsequences__old.utils.parse_known_cols import parse_known_cols def test_known_cols_transc_duplicates(): diff --git a/kobo/apps/subsequences__old/tests/test_nlp_integration.py b/kobo/apps/subsequences__old/tests/test_nlp_integration.py index ca45c99fe9..253971b2be 100644 --- a/kobo/apps/subsequences__old/tests/test_nlp_integration.py +++ b/kobo/apps/subsequences__old/tests/test_nlp_integration.py @@ -14,7 +14,7 @@ PENDING, ) from ..constants import GOOGLETS, GOOGLETX -from ..models import SubmissionExtras +from ..models import SubmissionExtrasOld TEST_TRANSCRIPTION_SERVICES = [ 'acme_1_speech2text', @@ -90,8 +90,8 @@ def test_submission_status_before_change(): example_fs_key = [*sdeets.keys()][0] assert sdeets[example_fs_key] == PENDING - @patch('kobo.apps.subsequences.integrations.google.google_transcribe.GoogleTranscriptionService') - @patch('kobo.apps.subsequences.integrations.google.google_translate.GoogleTranslationService') + @patch('kobo.apps.subsequences__old.integrations.google.google_transcribe.GoogleTranscriptionService') + @patch('kobo.apps.subsequences__old.integrations.google.google_translate.GoogleTranslationService') def test_transcription_requested( self, mock_TranslationService, @@ -100,7 +100,7 @@ def test_transcription_requested( mock_transcript_object = Mock(process_data=Mock(return_value={})) mock_TranscriptionService.return_value = mock_transcript_object - submission = SubmissionExtras.objects.create( + submission = SubmissionExtrasOld.objects.create( asset = self.asset, submission_uuid='123abc', content={ @@ -116,7 +116,7 @@ def test_transcription_requested( mock_translation_object = Mock(process_data=Mock(return_value={})) mock_TranslationService.return_value = mock_translation_object - submission = SubmissionExtras.objects.create( + submission = SubmissionExtrasOld.objects.create( asset = self.asset, submission_uuid='1234abcd', content={ diff --git a/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py b/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py index bec625550e..73a86bd91e 100644 --- a/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py +++ b/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py @@ -43,7 +43,7 @@ xml_tostring, ) from ..constants import GOOGLETS, GOOGLETX -from ..models import SubmissionExtras +from ..models import SubmissionExtrasOld class BaseSubsequenceTestCase(APITestCase): @@ -568,7 +568,7 @@ def test_google_transcript_post(self, m1, m2): CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}}, ) @override_config(ASR_MT_INVITEE_USERNAMES='*') - @patch('kobo.apps.subsequences.integrations.google.google_translate.translate') + @patch('kobo.apps.subsequences__old.integrations.google.google_translate.translate') @patch('google.cloud.speech.SpeechClient') @patch('google.cloud.storage.Client') def test_google_services_usage_limit_checks(self, m1, m2, translate): @@ -605,7 +605,7 @@ def test_google_services_usage_limit_checks(self, m1, m2, translate): UsageType.MT_CHARACTERS: {'exceeded': True}, } with patch( - 'kobo.apps.subsequences.api_view.ServiceUsageCalculator.get_usage_balances', + 'kobo.apps.subsequences__old.api_view.ServiceUsageCalculator.get_usage_balances', return_value=mock_balances, ): data = { @@ -630,7 +630,7 @@ def test_google_services_usage_limit_checks(self, m1, m2, translate): UsageType.MT_CHARACTERS: {'exceeded': False}, } with patch( - 'kobo.apps.subsequences.api_view.ServiceUsageCalculator.get_usage_balances', + 'kobo.apps.subsequences__old.api_view.ServiceUsageCalculator.get_usage_balances', return_value=mock_balances, ): data = { @@ -657,7 +657,7 @@ def test_google_services_usage_limit_checks(self, m1, m2, translate): CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}}, ) @override_config(ASR_MT_INVITEE_USERNAMES='*') - @patch('kobo.apps.subsequences.integrations.google.google_translate.translate') + @patch('kobo.apps.subsequences__old.integrations.google.google_translate.translate') @patch('google.cloud.speech.SpeechClient') @patch('google.cloud.storage.Client') def test_google_services_usage_limit_checks_disabled(self, m1, m2, translate): @@ -694,7 +694,7 @@ def test_google_services_usage_limit_checks_disabled(self, m1, m2, translate): UsageType.MT_CHARACTERS: {'exceeded': True}, } with patch( - 'kobo.apps.subsequences.api_view.ServiceUsageCalculator.get_usage_balances', + 'kobo.apps.subsequences__old.api_view.ServiceUsageCalculator.get_usage_balances', return_value=mock_balances, ): data = { @@ -723,7 +723,7 @@ def test_google_transcript_permissions(self): '_submitted_by': self.user.username } self.asset.deployment.mock_submissions([submission]) - SubmissionExtras.objects.create( + SubmissionExtrasOld.objects.create( submission_uuid=submission_id, content={'q1': {'transcript': {'value': 'hello'}}}, asset=self.asset @@ -744,8 +744,8 @@ def test_google_transcript_permissions(self): STRIPE_ENABLED=False, ) @override_config(ASR_MT_INVITEE_USERNAMES='*') - @patch('kobo.apps.subsequences.integrations.google.google_translate.translate') - @patch('kobo.apps.subsequences.integrations.google.base.storage') + @patch('kobo.apps.subsequences__old.integrations.google.google_translate.translate') + @patch('kobo.apps.subsequences__old.integrations.google.base.storage') def test_google_translate_post(self, storage, translate): url = reverse('advanced-submission-post', args=[self.asset.uid]) submission_id = 'abc123-def456' diff --git a/kobo/apps/subsequences__old/tests/test_submission_stream.py b/kobo/apps/subsequences__old/tests/test_submission_stream.py index 38aebdc22c..5a42e46726 100644 --- a/kobo/apps/subsequences__old/tests/test_submission_stream.py +++ b/kobo/apps/subsequences__old/tests/test_submission_stream.py @@ -5,8 +5,8 @@ from django.test import TestCase from kobo.apps.openrosa.apps.logger.exceptions import ConflictingSubmissionUUIDError -from kobo.apps.subsequences.models import SubmissionExtras -from kobo.apps.subsequences.utils import stream_with_extras +from kobo.apps.subsequences__old.models import SubmissionExtrasOld +from kobo.apps.subsequences__old.utils import stream_with_extras from kpi.models import Asset @@ -155,7 +155,7 @@ def _create_submission_extras(self): }, ] for subex in subexes: - SubmissionExtras.objects.create(**subex) + SubmissionExtrasOld.objects.create(**subex) def setUp(self): self._create_asset() @@ -182,7 +182,7 @@ def mock_submission_stream(): asset = Asset.objects.create() - SubmissionExtras.objects.create( + SubmissionExtrasOld.objects.create( asset=asset, submission_uuid='aaa', content={ @@ -213,7 +213,7 @@ def mock_submission_stream(): } }, ) - SubmissionExtras.objects.create( + SubmissionExtrasOld.objects.create( asset=asset, submission_uuid='bbb', content={ @@ -292,7 +292,7 @@ def test_stream_with_extras_handles_duplicated_submission_uuids(self): def test_stream_with_extras_ignores_empty_qual_responses(self): # Modify submission extras 'val' to be an empty string - submission_extras = SubmissionExtras.objects.get( + submission_extras = SubmissionExtrasOld.objects.get( submission_uuid='1c05898e-b43c-491d-814c-79595eb84e81' ) content = submission_extras.content diff --git a/kobo/apps/subsequences__old/utils/determine_export_cols_with_values.py b/kobo/apps/subsequences__old/utils/determine_export_cols_with_values.py index 7c31c7e04e..bb94bddbbb 100644 --- a/kobo/apps/subsequences__old/utils/determine_export_cols_with_values.py +++ b/kobo/apps/subsequences__old/utils/determine_export_cols_with_values.py @@ -35,7 +35,7 @@ def get_lang_code(key, tvals): def determine_export_cols_indiv(sub_ex_content): """ - used primarily when a SubmissionExtras object is saved. + used primarily when a SubmissionExtrasOld object is saved. iterates through content to see which questions have transcripts/translations that need to end up in the export diff --git a/kobo/settings/base.py b/kobo/settings/base.py index a80fe4ef45..c74b3566d3 100644 --- a/kobo/settings/base.py +++ b/kobo/settings/base.py @@ -121,6 +121,7 @@ 'kobo.apps.superuser_stats.SuperuserStatsAppConfig', 'kobo.apps.service_health', 'kobo.apps.subsequences', + 'kobo.apps.subsequences__old', # Temporary hack to make pytest start 'constance', 'kobo.apps.hook.apps.HookAppConfig', 'django_celery_beat', From 7c806f066c002633fd66cef4274b67daba5139d3 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Sun, 24 Aug 2025 11:26:56 -0400 Subject: [PATCH 070/138] Make unit tests for refactored subsequence pass --- kobo/apps/subsequences/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kobo/apps/subsequences/utils.py b/kobo/apps/subsequences/utils.py index 93bfccbbbe..87f6a4a748 100644 --- a/kobo/apps/subsequences/utils.py +++ b/kobo/apps/subsequences/utils.py @@ -25,6 +25,6 @@ def stream_with_supplements(asset: 'kpi.models.Asset', submission_stream: Genera for submission in submission_stream: submission_uuid = remove_uuid_prefix(submission[SUBMISSION_UUID_FIELD]) submission[SUPPLEMENT_KEY] = SubmissionSupplement.retrieve_data( - asset, prefetched_supplement=extras.get(submission_uuid) + asset, prefetched_supplement=extras.get(submission_uuid, {}) ) yield submission From b4c8b466aad11677b63734c6c982647352181229 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Sun, 24 Aug 2025 18:11:00 -0400 Subject: [PATCH 071/138] Unit tests, unit tests everywhere!!! --- kobo/apps/subsequences/actions/base.py | 7 +- .../actions/manual_transcription.py | 18 +- .../actions/manual_translation.py | 18 +- kobo/apps/subsequences/constants.py | 2 + kobo/apps/subsequences/models.py | 5 +- .../subsequences/tests/api/v2/test_actions.py | 87 +++++ .../tests/api/v2/test_permissions.py | 2 +- kobo/apps/subsequences/tests/constants.py | 2 + .../tests/test_manual_transcription.py | 45 ++- .../tests/test_manual_translation.py | 188 +++++++++++ kobo/apps/subsequences/tests/test_models.py | 309 ++++++++++++++++++ kobo/apps/subsequences/utils.py | 18 +- 12 files changed, 639 insertions(+), 62 deletions(-) create mode 100644 kobo/apps/subsequences/constants.py create mode 100644 kobo/apps/subsequences/tests/api/v2/test_actions.py create mode 100644 kobo/apps/subsequences/tests/constants.py create mode 100644 kobo/apps/subsequences/tests/test_manual_translation.py create mode 100644 kobo/apps/subsequences/tests/test_models.py diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 4a0ab21add..fb33dfb5e2 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -49,7 +49,6 @@ idea of example data in SubmissionExtras based on the above { '_version': '20250820', - '_submission': '', 'my_audio_question': { 'manual_transcription': { 'transcript': 'هائج', @@ -137,6 +136,12 @@ def check_limits(self, user: User): if balance and balance['exceeded']: raise UsageLimitExceededException() + @property + def default_type(self): + if self.result_schema['type'] == 'array': + return [] + return {} + @classmethod def validate_params(cls, params): jsonschema.validate(params, cls.params_schema) diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 107ba0f604..72c99ac418 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -56,24 +56,12 @@ def data_schema(self): # for lack of a better name 'additionalProperties': False, 'properties': { 'language': {'$ref': '#/$defs/lang'}, - 'value': {'$ref': '#/$defs/transcript'}, + 'value': {'$ref': '#/$defs/value'}, }, - 'allOf': [{'$ref': '#/$defs/lang_transcript_dependency'}], + 'required': ['language', 'value'], '$defs': { 'lang': {'type': 'string', 'enum': self.languages}, - 'transcript': {'type': 'string'}, - 'lang_transcript_dependency': { - 'allOf': [ - { - 'if': {'required': ['language']}, - 'then': {'required': ['value']}, - }, - { - 'if': {'required': ['value']}, - 'then': {'required': ['language']}, - }, - ] - }, + 'value': {'type': ['string', 'null']}, }, } diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index b2f3b0ed05..856659b2b8 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -56,24 +56,12 @@ def data_schema(self): # for lack of a better name 'additionalProperties': False, 'properties': { 'language': {'$ref': '#/$defs/lang'}, - 'value': {'$ref': '#/$defs/translation'}, + 'value': {'$ref': '#/$defs/value'}, }, - 'allOf': [{'$ref': '#/$defs/lang_translation_dependency'}], + 'required': ['language', 'value'], '$defs': { 'lang': {'type': 'string', 'enum': self.languages}, - 'translation': {'type': 'string'}, - 'lang_translation_dependency': { - 'allOf': [ - { - 'if': {'required': ['language']}, - 'then': {'required': ['value']}, - }, - { - 'if': {'required': ['value']}, - 'then': {'required': ['language']}, - }, - ] - }, + 'value': {'type': ['string', 'null']}, }, } diff --git a/kobo/apps/subsequences/constants.py b/kobo/apps/subsequences/constants.py new file mode 100644 index 0000000000..308941ac1e --- /dev/null +++ b/kobo/apps/subsequences/constants.py @@ -0,0 +1,2 @@ +SUBMISSION_UUID_FIELD = 'meta/rootUuid' # FIXME: import from elsewhere +SUPPLEMENT_KEY = '_supplementalDetails' # leave unchanged for backwards compatibility diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 55b450c737..4503eaea73 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -99,10 +99,7 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di asset=asset, submission_uuid=submission_uuid ).update(content=supplemental_data) - # FIXME: bug! this will not return data from the other actions (and - # questions?) that were not affected by the revision - retrieved_supplemental_data['_version'] = schema_version - return retrieved_supplemental_data + return supplemental_data @staticmethod def retrieve_data( diff --git a/kobo/apps/subsequences/tests/api/v2/test_actions.py b/kobo/apps/subsequences/tests/api/v2/test_actions.py new file mode 100644 index 0000000000..7a78f1e38f --- /dev/null +++ b/kobo/apps/subsequences/tests/api/v2/test_actions.py @@ -0,0 +1,87 @@ +from datetime import datetime +from zoneinfo import ZoneInfo + +from ddt import data, ddt, unpack +from freezegun import freeze_time +from rest_framework import status + +from kobo.apps.kobo_auth.shortcuts import User +from kobo.apps.subsequences.tests.api.v2.base import SubsequenceBaseTestCase +from kpi.constants import ( + PERM_CHANGE_SUBMISSIONS, + PERM_PARTIAL_SUBMISSIONS, + PERM_VIEW_SUBMISSIONS, +) +from kpi.utils.object_permission import get_anonymous_user + + +class SubmissionSupplementAPITestCase(SubsequenceBaseTestCase): + + def test_cannot_patch_if_action_is_invalid(self): + # FIXME fails because asset.advanced_features in empty + payload = { + '_version': '20250820', + 'q1': { + 'manual_translation': { + 'language': 'es', + 'value': 'buenas noches', + } + }, + } + + # No actions activated at the asset level + response = self.client.patch( + self.supplement_details_url, data=payload, format='json' + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert 'Invalid question name' in str(response.data) + + # Activate manual transcription (even if payload asks for translation) + self.set_asset_advanced_features( + { + '_version': '20250820', + '_actionConfigs': { + 'q1': { + 'manual_transcription': [ + {'language': 'es'}, + ] + } + }, + } + ) + response = self.client.patch( + self.supplement_details_url, data=payload, format='json' + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert 'Invalid action' in str(response.data) + + def test_cannot_patch_with_invalid_payload(self): + self.set_asset_advanced_features( + { + '_version': '20250820', + '_actionConfigs': { + 'q1': { + 'manual_transcription': [ + {'language': 'es'}, + ] + } + }, + } + ) + + payload = { + '_version': '20250820', + 'q1': { + 'manual_translation': { + 'languageCode': 'es', # wrong attribute + 'value': 'buenas noches', + } + }, + } + + response = self.client.patch( + self.supplement_details_url, data=payload, format='json' + ) + + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert 'Invalid action' in str(response.data) diff --git a/kobo/apps/subsequences/tests/api/v2/test_permissions.py b/kobo/apps/subsequences/tests/api/v2/test_permissions.py index 92f7a27178..2cd5f8c9b0 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_permissions.py +++ b/kobo/apps/subsequences/tests/api/v2/test_permissions.py @@ -198,7 +198,7 @@ def test_cannot_post_data(self): } }, } - response = self.client.post( + response = self.client.patch( self.supplement_details_url, data=payload, format='json' ) assert response.status_code == status.HTTP_404_NOT_FOUND diff --git a/kobo/apps/subsequences/tests/constants.py b/kobo/apps/subsequences/tests/constants.py new file mode 100644 index 0000000000..aa0d5fdb03 --- /dev/null +++ b/kobo/apps/subsequences/tests/constants.py @@ -0,0 +1,2 @@ +EMPTY_SUBMISSION = {} +EMPTY_SUPPLEMENT = {} diff --git a/kobo/apps/subsequences/tests/test_manual_transcription.py b/kobo/apps/subsequences/tests/test_manual_transcription.py index 81f7c1dd4d..390eb4e7d9 100644 --- a/kobo/apps/subsequences/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences/tests/test_manual_transcription.py @@ -2,10 +2,9 @@ import jsonschema import pytest +from .constants import EMPTY_SUBMISSION from ..actions.manual_transcription import ManualTranscriptionAction -EMPTY_SUBMISSION = {} - def test_valid_params_pass_validation(): params = [{'language': 'fr'}, {'language': 'es'}] @@ -22,9 +21,19 @@ def test_valid_transcript_data_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] action = ManualTranscriptionAction(xpath, params) + + # Trivial case data = {'language': 'fr', 'value': 'Aucune idée'} action.validate_data(data) + # No transcript + data = {'language': 'fr', 'value': ''} + action.validate_data(data) + + # Delete transcript + data = {'language': 'fr', 'value': None} + action.validate_data(data) + def test_invalid_transcript_data_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test @@ -34,6 +43,10 @@ def test_invalid_transcript_data_fails_validation(): with pytest.raises(jsonschema.exceptions.ValidationError): action.validate_data(data) + data = {} + with pytest.raises(jsonschema.exceptions.ValidationError): + action.validate_data(data) + def test_valid_result_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test @@ -43,9 +56,9 @@ def test_valid_result_passes_validation(): first = {'language': 'fr', 'value': 'un'} second = {'language': 'en', 'value': 'two'} third = {'language': 'fr', 'value': 'trois'} - fourth = {} + fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = {} + mock_sup_det = action.default_type for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) action.validate_result(mock_sup_det) @@ -59,9 +72,9 @@ def test_invalid_result_fails_validation(): first = {'language': 'fr', 'value': 'un'} second = {'language': 'en', 'value': 'two'} third = {'language': 'fr', 'value': 'trois'} - fourth = {} + fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = {} + mock_sup_det = action.default_type for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) @@ -74,10 +87,6 @@ def test_invalid_result_fails_validation(): action.validate_result(mock_sup_det) -def test_transcript_is_stored_in_supplemental_details(): - pass - - def test_transcript_revisions_are_retained_in_supplemental_details(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] @@ -86,7 +95,7 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): first = {'language': 'en', 'value': 'No idea'} second = {'language': 'fr', 'value': 'Aucune idée'} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, {}, first) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.default_type, first) assert mock_sup_det['language'] == 'en' assert mock_sup_det['value'] == 'No idea' @@ -105,7 +114,7 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): assert mock_sup_det['_revisions'][0]['_dateCreated'] == first_time # revisions should not list a modification timestamp - assert '_dateModified' not in mock_sup_det['_revisions'] + assert '_dateModified' not in mock_sup_det['_revisions'][0] # the record itself (not revision) should have an unchanged creation # timestamp @@ -128,7 +137,7 @@ def test_setting_transcript_to_empty_string(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': ''} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, {}, first) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.default_type, first) assert mock_sup_det['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) @@ -136,19 +145,19 @@ def test_setting_transcript_to_empty_string(): assert mock_sup_det['_revisions'][0]['value'] == 'Aucune idée' -def test_setting_transcript_to_empty_object(): +def test_setting_transcript_to_none(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] action = ManualTranscriptionAction(xpath, params) first = {'language': 'fr', 'value': 'Aucune idée'} - second = {} + second = {'language': 'fr', 'value': None} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, {}, first) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.default_type, first) assert mock_sup_det['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) - assert 'value' not in mock_sup_det + assert mock_sup_det['value'] is None assert mock_sup_det['_revisions'][0]['value'] == 'Aucune idée' @@ -161,7 +170,7 @@ def test_latest_revision_is_first(): second = {'language': 'fr', 'value': 'deux'} third = {'language': 'fr', 'value': 'trois'} - mock_sup_det = {} + mock_sup_det = action.default_type for data in first, second, third: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) diff --git a/kobo/apps/subsequences/tests/test_manual_translation.py b/kobo/apps/subsequences/tests/test_manual_translation.py new file mode 100644 index 0000000000..7d28f6609f --- /dev/null +++ b/kobo/apps/subsequences/tests/test_manual_translation.py @@ -0,0 +1,188 @@ +import dateutil +import jsonschema +import pytest + +from .constants import EMPTY_SUBMISSION +from ..actions.manual_translation import ManualTranslationAction + +DEFAULT_SUPPLEMENT_DATA = [] + + +def test_valid_params_pass_validation(): + params = [{'language': 'fr'}, {'language': 'es'}] + ManualTranslationAction.validate_params(params) + + +def test_invalid_params_fail_validation(): + params = [{'language': 123}, {'language': 'es'}] + with pytest.raises(jsonschema.exceptions.ValidationError): + ManualTranslationAction.validate_params(params) + + +def test_valid_translation_data_passes_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + action = ManualTranslationAction(xpath, params) + # Trivial case + data = {'language': 'fr', 'value': 'Aucune idée'} + action.validate_data(data) + + # No transcript + data = {'language': 'fr', 'value': ''} + action.validate_data(data) + + # Delete transcript + data = {'language': 'fr', 'value': None} + action.validate_data(data) + + +def test_invalid_translation_data_fails_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + action = ManualTranslationAction(xpath, params) + + data = {'language': 'en', 'value': 'No idea'} + with pytest.raises(jsonschema.exceptions.ValidationError): + action.validate_data(data) + + data = {} + with pytest.raises(jsonschema.exceptions.ValidationError): + action.validate_data(data) + +def test_valid_result_passes_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = ManualTranslationAction(xpath, params) + + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'en', 'value': 'two'} + third = {'language': 'fr', 'value': 'trois'} + fourth = {'language': 'fr', 'value': None} + fifth = {'language': 'en', 'value': 'fifth'} + mock_sup_det = action.default_type + for data in first, second, third, fourth, fifth: + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + action.validate_result(mock_sup_det) + + +def test_invalid_result_fails_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = ManualTranslationAction(xpath, params) + + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'en', 'value': 'two'} + third = {'language': 'fr', 'value': 'trois'} + fourth = {'language': 'fr', 'value': None} + fifth = {'language': 'en', 'value': 'fifth'} + mock_sup_det = action.default_type + for data in first, second, third, fourth, fifth: + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + + # erroneously add '_dateModified' onto a revision + first_revision = mock_sup_det[0]['_revisions'][0] + first_revision['_dateModified'] = first_revision['_dateCreated'] + + with pytest.raises(jsonschema.exceptions.ValidationError): + action.validate_result(mock_sup_det) + + +def test_translation_revisions_are_retained_in_supplemental_details(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = ManualTranslationAction(xpath, params) + + first = {'language': 'en', 'value': 'No idea'} + second = {'language': 'fr', 'value': 'Aucune idée'} + third = {'language': 'en', 'value': 'No clue'} + + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.default_type, first) + + assert len(mock_sup_det) == 1 + assert mock_sup_det[0]['language'] == 'en' + assert mock_sup_det[0]['value'] == 'No idea' + assert mock_sup_det[0]['_dateCreated'] == mock_sup_det[0]['_dateModified'] + assert '_revisions' not in mock_sup_det[0] + first_time = mock_sup_det[0]['_dateCreated'] + + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) + assert len(mock_sup_det) == 2 + assert mock_sup_det[1]['language'] == 'fr' + assert mock_sup_det[1]['value'] == 'Aucune idée' + assert mock_sup_det[1]['_dateCreated'] == mock_sup_det[1]['_dateModified'] + assert '_revisions' not in mock_sup_det[1] + + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, third) + assert len(mock_sup_det) == 2 + + # the revision should encompass the first translation + assert mock_sup_det[0]['_revisions'][0].items() >= first.items() + + # the revision should have a creation timestamp equal to that of the first + # translation + assert mock_sup_det[0]['_revisions'][0]['_dateCreated'] == first_time + + # revisions should not list a modification timestamp + assert '_dateModified' not in mock_sup_det[0]['_revisions'][0] + + # the record itself (not revision) should have an unchanged creation + # timestamp + assert mock_sup_det[0]['_dateCreated'] == first_time + + # the record itself should have an updated modification timestamp + assert dateutil.parser.parse(mock_sup_det[0]['_dateModified']) > dateutil.parser.parse( + mock_sup_det[0]['_dateCreated'] + ) + + # the record itself should encompass the second translation + assert mock_sup_det[0].items() >= third.items() + + +def test_setting_translation_to_empty_string(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = ManualTranslationAction(xpath, params) + + first = {'language': 'fr', 'value': 'Aucune idée'} + second = {'language': 'fr', 'value': ''} + + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.default_type, first) + assert mock_sup_det[0]['value'] == 'Aucune idée' + + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) + assert mock_sup_det[0]['value'] == '' + assert mock_sup_det[0]['_revisions'][0]['value'] == 'Aucune idée' + + +def test_setting_translation_to_none(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = ManualTranslationAction(xpath, params) + + first = {'language': 'fr', 'value': 'Aucune idée'} + second = {'language': 'fr', 'value': None} + + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.default_type, first) + assert mock_sup_det[0]['value'] == 'Aucune idée' + + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) + assert mock_sup_det[0]['value'] is None + assert mock_sup_det[0]['_revisions'][0]['value'] == 'Aucune idée' + + +def test_latest_revision_is_first(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = ManualTranslationAction(xpath, params) + + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'fr', 'value': 'deux'} + third = {'language': 'fr', 'value': 'trois'} + + mock_sup_det = action.default_type + for data in first, second, third: + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + + assert mock_sup_det[0]['value'] == 'trois' + assert mock_sup_det[0]['_revisions'][0]['value'] == 'deux' + assert mock_sup_det[0]['_revisions'][1]['value'] == 'un' diff --git a/kobo/apps/subsequences/tests/test_models.py b/kobo/apps/subsequences/tests/test_models.py new file mode 100644 index 0000000000..9620e29251 --- /dev/null +++ b/kobo/apps/subsequences/tests/test_models.py @@ -0,0 +1,309 @@ +from copy import deepcopy +from datetime import datetime +from zoneinfo import ZoneInfo + +import pytest +from django.test import TestCase +from freezegun import freeze_time + +from kobo.apps.kobo_auth.shortcuts import User +from kpi.models import Asset +from .constants import EMPTY_SUPPLEMENT +from ..constants import SUBMISSION_UUID_FIELD +from ..exceptions import InvalidAction, InvalidXPath +from ..models import SubmissionSupplement + + +class SubmissionSupplementTestCase(TestCase): + + # Asset-level config. + # - Allow manual transcription for Arabic + # - Allow manual translation for English and Spanish + ADVANCED_FEATURES = { + '_version': '20250820', + '_actionConfigs': { + 'group_name/question_name': { + 'manual_transcription': [{'language': 'ar'}], + 'manual_translation': [{'language': 'en'}, {'language': 'es'}], + } + }, + } + + EXPECTED_SUBMISSION_SUPPLEMENT = { + "_version": "20250820", + "group_name/question_name": { + "manual_transcription": { + "language": "ar", + "value": "فارغ", + "_dateCreated": "2024-04-08T15:27:00Z", + "_dateModified": "2024-04-08T15:31:00Z", + "_revisions": [ + { + "language": "ar", + "value": "هائج", + "_dateCreated": "2024-04-08T15:27:00Z", + } + ], + }, + "manual_translation": [ + { + "language": "en", + "value": "berserk", + "_dateCreated": "2024-04-08T15:27:00Z", + "_dateModified": "2024-04-08T15:27:00Z", + }, + { + "language": "es", + "value": "enloquecido", + "_dateCreated": "2024-04-08T15:29:00Z", + "_dateModified": "2024-04-08T15:32:00Z", + "_revisions": [ + { + "language": "es", + "value": "loco", + "_dateCreated": "2024-04-08T15:29:00Z", + } + ], + }, + ], + }, + } + + def setUp(self): + # Create owner user + self.owner = User.objects.create_user( + username='alice', + email='alice@example.com', + password='password', + ) + + # Create Asset with minimal advanced_features + self.asset = Asset.objects.create( + owner=self.owner, + name='Test Asset', + advanced_features=self.ADVANCED_FEATURES, + ) + + # Mock submission with minimal info needed for subsequence actions + self.submission_root_uuid = '123e4567-e89b-12d3-a456-426614174000' + self.submission = { + SUBMISSION_UUID_FIELD: self.submission_root_uuid, + 'group_name/question_name': 'audio.m4a', + } + + def test_retrieve_empty_data(self): + assert ( + SubmissionSupplement.retrieve_data( + self.asset, self.submission_root_uuid + ) + == EMPTY_SUPPLEMENT + ) + + def test_retrieve_data_with_invalid_arguments(self): + with pytest.raises(ValueError): + SubmissionSupplement.retrieve_data( + self.asset, submission_root_uuid=None, prefetched_supplement=None + ) + + def test_retrieve_data_with_stale_questions(self): + SubmissionSupplement.objects.create( + asset=self.asset, + submission_uuid=self.submission_root_uuid, + content=self.EXPECTED_SUBMISSION_SUPPLEMENT, + ) + advanced_features = deepcopy(self.ADVANCED_FEATURES) + config = advanced_features['_actionConfigs'].pop('group_name/question_name') + advanced_features['_actionConfigs']['group_name/renamed_question_name'] = config + submission_supplement = SubmissionSupplement.retrieve_data( + self.asset, self.submission_root_uuid + ) + assert submission_supplement == EMPTY_SUPPLEMENT + + def test_retrieve_data_from_migrated_data(self): + submission_supplement = { + "group_name/question_name": { + "transcript": { + "languageCode": "ar", + "value": "فارغ", + "dateCreated": "2024-04-08T15:27:00Z", + "dateModified": "2024-04-08T15:31:00Z", + "revisions": [ + { + "languageCode": "ar", + "value": "هائج", + "dateModified": "2024-04-08T15:27:00Z", + } + ], + }, + "translation": [ + { + "languageCode": "en", + "value": "berserk", + "dateCreated": "2024-04-08T15:27:00Z", + "dateModified": "2024-04-08T15:27:00Z", + }, + { + "languageCode": "es", + "value": "enloquecido", + "dateCreated": "2024-04-08T15:29:00Z", + "dateModified": "2024-04-08T15:32:00Z", + "revisions": [ + { + "languageCode": "es", + "value": "loco", + "dateModified": "2024-04-08T15:29:00Z", + } + ], + }, + ], + }, + } + + SubmissionSupplement.objects.create( + asset=self.asset, + submission_uuid=self.submission_root_uuid, + content=submission_supplement, + ) + submission_supplement = SubmissionSupplement.retrieve_data( + self.asset, submission_root_uuid=self.submission_root_uuid + ) + assert submission_supplement == self.EXPECTED_SUBMISSION_SUPPLEMENT + + + def test_retrieve_data_with_submission_root_uuid(self): + self.test_revise_data() + submission_supplement = SubmissionSupplement.retrieve_data( + self.asset, submission_root_uuid=self.submission_root_uuid + ) + assert submission_supplement == self.EXPECTED_SUBMISSION_SUPPLEMENT + + def test_revise_data(self): + assert not SubmissionSupplement.objects.filter( + submission_uuid=self.submission_root_uuid + ).exists() + + frozen_datetime_now = datetime( + 2024, 4, 8, 15, 27, 0, tzinfo=ZoneInfo('UTC') + ) + with freeze_time(frozen_datetime_now): + + # 1) First call with transcription (ar) and translation (en) + SubmissionSupplement.revise_data( + self.asset, + self.submission, + { + "_version": "20250820", + "group_name/question_name": { + "manual_transcription": { + "language": "ar", + "value": "هائج", + }, + "manual_translation": { + "language": "en", + "value": "berserk", + }, + }, + }, + ) + + # Make sure a SubmissionSupplement object has been created + assert SubmissionSupplement.objects.filter( + submission_uuid=self.submission_root_uuid + ).exists() + + # 2) Call with translation es = "loco" + frozen_datetime_now = datetime( + 2024, 4, 8, 15, 29, 0, tzinfo=ZoneInfo('UTC') + ) + with freeze_time(frozen_datetime_now): + SubmissionSupplement.revise_data( + self.asset, + self.submission, + { + "_version": "20250820", + "group_name/question_name": { + "manual_translation": { + "language": "es", + "value": "loco", + }, + }, + }, + ) + + assert ( + SubmissionSupplement.objects.filter( + submission_uuid=self.submission_root_uuid + ).count() + == 1 + ) + + # 3) Call with transcription ar = 'فارغ' + frozen_datetime_now = datetime( + 2024, 4, 8, 15, 31, 0, tzinfo=ZoneInfo('UTC') + ) + with freeze_time(frozen_datetime_now): + submission_supplement = SubmissionSupplement.revise_data( + self.asset, + self.submission, + { + "_version": "20250820", + "group_name/question_name": { + "manual_transcription": { + "language": "ar", + "value": 'فارغ', + }, + }, + }, + ) + + # 4) Call with translation es = "enloquecido" + frozen_datetime_now = datetime( + 2024, 4, 8, 15, 32, 0, tzinfo=ZoneInfo('UTC') + ) + with freeze_time(frozen_datetime_now): + submission_supplement = SubmissionSupplement.revise_data( + self.asset, + self.submission, + { + "_version": "20250820", + "group_name/question_name": { + "manual_translation": { + "language": "es", + "value": "enloquecido", + }, + }, + }, + ) + + assert submission_supplement == self.EXPECTED_SUBMISSION_SUPPLEMENT + + def test_revise_data_raise_error_wrong_action(self): + + with pytest.raises(InvalidAction): + SubmissionSupplement.revise_data( + self.asset, + self.submission, + { + "_version": "20250820", + "group_name/question_name": { + "my_other_action": {"param": "foo"} + }, + }, + ) + + def test_revise_data_raise_error_wrong_question_name(self): + + with pytest.raises(InvalidXPath): + SubmissionSupplement.revise_data( + self.asset, + self.submission, + { + "_version": "20250820", + "group_name/other_question_name": { + "manual_translation": { + "language": "en", + "value": "crazy", + } + }, + }, + ) diff --git a/kobo/apps/subsequences/utils.py b/kobo/apps/subsequences/utils.py index 87f6a4a748..3f14f63d29 100644 --- a/kobo/apps/subsequences/utils.py +++ b/kobo/apps/subsequences/utils.py @@ -1,27 +1,29 @@ from typing import Generator from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix +from .constants import SUPPLEMENT_KEY, SUBMISSION_UUID_FIELD from .models import SubmissionSupplement -SUBMISSION_UUID_FIELD = 'meta/rootUuid' # FIXME: import from elsewhere -SUPPLEMENT_KEY = '_supplementalDetails' # leave unchanged for backwards compatibility - - def stream_with_supplements(asset: 'kpi.models.Asset', submission_stream: Generator): + if not asset.advanced_features: + yield from submission_stream + return + # FIXME: eww, this is bad, but maybe better than one query per submission? # Probably need to go up a few generators and grab an entire page of # submissions and supplements, then yield each of those, and grab again from # the database only once the page is exhausted + + # 2025-08-24: oleger's comment: we could narrow down this query to submissions + # only available in the page (`page` as in pagination). No need to retrieve data for + # all submissions if we are only injecting supplement data for a portion of + # them. Question? How we do that without consuming the mongo cursor twice? extras = dict( SubmissionSupplement.objects.filter(asset=asset).values_list( 'submission_uuid', 'content' ) ) - if not asset.advanced_features: - yield from submission_stream - return - for submission in submission_stream: submission_uuid = remove_uuid_prefix(submission[SUBMISSION_UUID_FIELD]) submission[SUPPLEMENT_KEY] = SubmissionSupplement.retrieve_data( From 517232137ecd9bbb2a90822f83a244fb4c287d5c Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Sun, 24 Aug 2025 20:14:45 -0400 Subject: [PATCH 072/138] Introduce LookupConfig dataclass, remove "item_reference_property" --- kobo/apps/subsequences/actions/base.py | 33 ++-- .../actions/manual_transcription.py | 3 +- .../actions/manual_translation.py | 5 +- kobo/apps/subsequences/models.py | 12 +- .../subsequences/tests/api/v2/test_actions.py | 15 +- .../tests/test_manual_transcription.py | 21 +- .../tests/test_manual_translation.py | 23 ++- kobo/apps/subsequences/tests/test_models.py | 179 ++++++++---------- kobo/apps/subsequences/utils.py | 3 +- 9 files changed, 141 insertions(+), 153 deletions(-) diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index fb33dfb5e2..d3e9ea6f67 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -1,5 +1,6 @@ import datetime from copy import deepcopy +from dataclasses import dataclass import jsonschema from django.conf import settings @@ -115,14 +116,26 @@ def utc_datetime_to_js_str(dt: datetime.datetime) -> str: return dt.isoformat().replace('+00:00', 'Z') +@dataclass +class ActionLookupConfig: + """ + Defines how items in a result schema can be resolved. + - key: the dictionary field used to identify or match an item (e.g., "language"). + - default_type: the default container type to return when no items exist + (usually {} for objects or [] for arrays). + """ + + default_type: dict | list + key: str | None + + class BaseAction: DATE_CREATED_FIELD = '_dateCreated' DATE_MODIFIED_FIELD = '_dateModified' REVISIONS_FIELD = '_revisions' - # Change my name, my parents hate me when I was born - item_reference_property = None + lookup_config: ActionLookupConfig | None = None def check_limits(self, user: User): @@ -136,12 +149,6 @@ def check_limits(self, user: User): if balance and balance['exceeded']: raise UsageLimitExceededException() - @property - def default_type(self): - if self.result_schema['type'] == 'array': - return [] - return {} - @classmethod def validate_params(cls, params): jsonschema.validate(params, cls.params_schema) @@ -188,16 +195,16 @@ def revise_data( now_str = utc_datetime_to_js_str(timezone.now()) item_index = None submission_supplement_copy = deepcopy(submission_supplement) - if not self.item_reference_property: + if not isinstance(self.lookup_config.default_type, list): revision = submission_supplement_copy else: - needle = edit[self.item_reference_property] + needle = edit[self.lookup_config.key] revision = {} if not isinstance(submission_supplement, list): raise InvalidItem for idx, item in enumerate(submission_supplement): - if needle == item[self.item_reference_property]: + if needle == item[self.lookup_config.key]: revision = deepcopy(item) item_index = idx break @@ -210,7 +217,7 @@ def revise_data( revision[self.DATE_CREATED_FIELD] = revision_creation_date new_record[self.DATE_MODIFIED_FIELD] = now_str - if not self.item_reference_property: + if not isinstance(self.lookup_config.default_type, list): if submission_supplement: revisions.insert(0, revision) new_record[self.REVISIONS_FIELD] = revisions @@ -221,7 +228,7 @@ def revise_data( new_record[self.DATE_CREATED_FIELD] = record_creation_date - if self.item_reference_property: + if isinstance(self.lookup_config.default_type, list): if item_index is None: submission_supplement_copy.append(new_record) else: diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 72c99ac418..eb33f2657e 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -1,8 +1,9 @@ -from .base import BaseAction +from .base import BaseAction, ActionLookupConfig class ManualTranscriptionAction(BaseAction): ID = 'manual_transcription' + lookup_config = ActionLookupConfig({}, None) def __init__(self, source_question_xpath, params): self.source_question_xpath = source_question_xpath diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index 856659b2b8..9168313648 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -1,9 +1,10 @@ -from .base import BaseAction +from .base import BaseAction, ActionLookupConfig class ManualTranslationAction(BaseAction): ID = 'manual_translation' - item_reference_property = 'language' + + lookup_config = ActionLookupConfig([], 'language') def __init__(self, source_question_xpath, params): self.source_question_xpath = source_question_xpath diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 4503eaea73..3c55fe3e36 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -32,6 +32,10 @@ def __repr__(self): @staticmethod def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> dict: + + if not asset.advanced_features: + raise InvalidAction + schema_version = incoming_data.get('_version') if schema_version != '20250820': # TODO: migrate from old per-submission schema @@ -77,13 +81,9 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di question_supplemental_data = supplemental_data.setdefault( question_xpath, {} ) - default_action_supplemental_data = ( - {} - if action.item_reference_property is None - else [] - ) + action_supplemental_data = question_supplemental_data.setdefault( - action_id, default_action_supplemental_data + action_id, action.lookup_config.default_type ) action_supplemental_data = action.revise_data( submission, action_supplemental_data, action_data diff --git a/kobo/apps/subsequences/tests/api/v2/test_actions.py b/kobo/apps/subsequences/tests/api/v2/test_actions.py index 7a78f1e38f..569cb61937 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_actions.py +++ b/kobo/apps/subsequences/tests/api/v2/test_actions.py @@ -1,24 +1,11 @@ -from datetime import datetime -from zoneinfo import ZoneInfo - -from ddt import data, ddt, unpack -from freezegun import freeze_time from rest_framework import status -from kobo.apps.kobo_auth.shortcuts import User from kobo.apps.subsequences.tests.api.v2.base import SubsequenceBaseTestCase -from kpi.constants import ( - PERM_CHANGE_SUBMISSIONS, - PERM_PARTIAL_SUBMISSIONS, - PERM_VIEW_SUBMISSIONS, -) -from kpi.utils.object_permission import get_anonymous_user class SubmissionSupplementAPITestCase(SubsequenceBaseTestCase): def test_cannot_patch_if_action_is_invalid(self): - # FIXME fails because asset.advanced_features in empty payload = { '_version': '20250820', 'q1': { @@ -34,7 +21,7 @@ def test_cannot_patch_if_action_is_invalid(self): self.supplement_details_url, data=payload, format='json' ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert 'Invalid question name' in str(response.data) + assert 'Invalid action' in str(response.data) # Activate manual transcription (even if payload asks for translation) self.set_asset_advanced_features( diff --git a/kobo/apps/subsequences/tests/test_manual_transcription.py b/kobo/apps/subsequences/tests/test_manual_transcription.py index 390eb4e7d9..95964b349d 100644 --- a/kobo/apps/subsequences/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences/tests/test_manual_transcription.py @@ -2,8 +2,8 @@ import jsonschema import pytest -from .constants import EMPTY_SUBMISSION from ..actions.manual_transcription import ManualTranscriptionAction +from .constants import EMPTY_SUBMISSION def test_valid_params_pass_validation(): @@ -58,7 +58,7 @@ def test_valid_result_passes_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = action.default_type + mock_sup_det = action.lookup_config.default_type for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) action.validate_result(mock_sup_det) @@ -74,7 +74,7 @@ def test_invalid_result_fails_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = action.default_type + mock_sup_det = action.lookup_config.default_type for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) @@ -94,8 +94,9 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): first = {'language': 'en', 'value': 'No idea'} second = {'language': 'fr', 'value': 'Aucune idée'} - - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.default_type, first) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, action.lookup_config.default_type, first + ) assert mock_sup_det['language'] == 'en' assert mock_sup_det['value'] == 'No idea' @@ -137,7 +138,9 @@ def test_setting_transcript_to_empty_string(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': ''} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.default_type, first) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, action.lookup_config.default_type, first + ) assert mock_sup_det['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) @@ -153,7 +156,9 @@ def test_setting_transcript_to_none(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': None} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.default_type, first) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, action.lookup_config.default_type, first + ) assert mock_sup_det['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) @@ -170,7 +175,7 @@ def test_latest_revision_is_first(): second = {'language': 'fr', 'value': 'deux'} third = {'language': 'fr', 'value': 'trois'} - mock_sup_det = action.default_type + mock_sup_det = action.lookup_config.default_type for data in first, second, third: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) diff --git a/kobo/apps/subsequences/tests/test_manual_translation.py b/kobo/apps/subsequences/tests/test_manual_translation.py index 7d28f6609f..7cccae9e98 100644 --- a/kobo/apps/subsequences/tests/test_manual_translation.py +++ b/kobo/apps/subsequences/tests/test_manual_translation.py @@ -2,8 +2,8 @@ import jsonschema import pytest -from .constants import EMPTY_SUBMISSION from ..actions.manual_translation import ManualTranslationAction +from .constants import EMPTY_SUBMISSION DEFAULT_SUPPLEMENT_DATA = [] @@ -49,6 +49,7 @@ def test_invalid_translation_data_fails_validation(): with pytest.raises(jsonschema.exceptions.ValidationError): action.validate_data(data) + def test_valid_result_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] @@ -59,7 +60,7 @@ def test_valid_result_passes_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = action.default_type + mock_sup_det = action.lookup_config.default_type for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) action.validate_result(mock_sup_det) @@ -75,7 +76,7 @@ def test_invalid_result_fails_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = action.default_type + mock_sup_det = action.lookup_config.default_type for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) @@ -95,8 +96,7 @@ def test_translation_revisions_are_retained_in_supplemental_details(): first = {'language': 'en', 'value': 'No idea'} second = {'language': 'fr', 'value': 'Aucune idée'} third = {'language': 'en', 'value': 'No clue'} - - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.default_type, first) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.lookup_config.default_type, first) assert len(mock_sup_det) == 1 assert mock_sup_det[0]['language'] == 'en' @@ -130,9 +130,9 @@ def test_translation_revisions_are_retained_in_supplemental_details(): assert mock_sup_det[0]['_dateCreated'] == first_time # the record itself should have an updated modification timestamp - assert dateutil.parser.parse(mock_sup_det[0]['_dateModified']) > dateutil.parser.parse( - mock_sup_det[0]['_dateCreated'] - ) + assert dateutil.parser.parse( + mock_sup_det[0]['_dateModified'] + ) > dateutil.parser.parse(mock_sup_det[0]['_dateCreated']) # the record itself should encompass the second translation assert mock_sup_det[0].items() >= third.items() @@ -145,8 +145,7 @@ def test_setting_translation_to_empty_string(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': ''} - - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.default_type, first) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.lookup_config.default_type, first) assert mock_sup_det[0]['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) @@ -162,7 +161,7 @@ def test_setting_translation_to_none(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': None} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.default_type, first) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.lookup_config.default_type, first) assert mock_sup_det[0]['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) @@ -179,7 +178,7 @@ def test_latest_revision_is_first(): second = {'language': 'fr', 'value': 'deux'} third = {'language': 'fr', 'value': 'trois'} - mock_sup_det = action.default_type + mock_sup_det = action.lookup_config.default_type for data in first, second, third: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) diff --git a/kobo/apps/subsequences/tests/test_models.py b/kobo/apps/subsequences/tests/test_models.py index 9620e29251..dd8537da97 100644 --- a/kobo/apps/subsequences/tests/test_models.py +++ b/kobo/apps/subsequences/tests/test_models.py @@ -8,10 +8,10 @@ from kobo.apps.kobo_auth.shortcuts import User from kpi.models import Asset -from .constants import EMPTY_SUPPLEMENT from ..constants import SUBMISSION_UUID_FIELD from ..exceptions import InvalidAction, InvalidXPath from ..models import SubmissionSupplement +from .constants import EMPTY_SUPPLEMENT class SubmissionSupplementTestCase(TestCase): @@ -30,38 +30,38 @@ class SubmissionSupplementTestCase(TestCase): } EXPECTED_SUBMISSION_SUPPLEMENT = { - "_version": "20250820", - "group_name/question_name": { - "manual_transcription": { - "language": "ar", - "value": "فارغ", - "_dateCreated": "2024-04-08T15:27:00Z", - "_dateModified": "2024-04-08T15:31:00Z", - "_revisions": [ + '_version': '20250820', + 'group_name/question_name': { + 'manual_transcription': { + 'language': 'ar', + 'value': 'فارغ', + '_dateCreated': '2024-04-08T15:27:00Z', + '_dateModified': '2024-04-08T15:31:00Z', + '_revisions': [ { - "language": "ar", - "value": "هائج", - "_dateCreated": "2024-04-08T15:27:00Z", + 'language': 'ar', + 'value': 'هائج', + '_dateCreated': '2024-04-08T15:27:00Z', } ], }, - "manual_translation": [ + 'manual_translation': [ { - "language": "en", - "value": "berserk", - "_dateCreated": "2024-04-08T15:27:00Z", - "_dateModified": "2024-04-08T15:27:00Z", + 'language': 'en', + 'value': 'berserk', + '_dateCreated': '2024-04-08T15:27:00Z', + '_dateModified': '2024-04-08T15:27:00Z', }, { - "language": "es", - "value": "enloquecido", - "_dateCreated": "2024-04-08T15:29:00Z", - "_dateModified": "2024-04-08T15:32:00Z", - "_revisions": [ + 'language': 'es', + 'value': 'enloquecido', + '_dateCreated': '2024-04-08T15:29:00Z', + '_dateModified': '2024-04-08T15:32:00Z', + '_revisions': [ { - "language": "es", - "value": "loco", - "_dateCreated": "2024-04-08T15:29:00Z", + 'language': 'es', + 'value': 'loco', + '_dateCreated': '2024-04-08T15:29:00Z', } ], }, @@ -93,9 +93,7 @@ def setUp(self): def test_retrieve_empty_data(self): assert ( - SubmissionSupplement.retrieve_data( - self.asset, self.submission_root_uuid - ) + SubmissionSupplement.retrieve_data(self.asset, self.submission_root_uuid) == EMPTY_SUPPLEMENT ) @@ -121,37 +119,37 @@ def test_retrieve_data_with_stale_questions(self): def test_retrieve_data_from_migrated_data(self): submission_supplement = { - "group_name/question_name": { - "transcript": { - "languageCode": "ar", - "value": "فارغ", - "dateCreated": "2024-04-08T15:27:00Z", - "dateModified": "2024-04-08T15:31:00Z", - "revisions": [ + 'group_name/question_name': { + 'transcript': { + 'languageCode': 'ar', + 'value': 'فارغ', + 'dateCreated': '2024-04-08T15:27:00Z', + 'dateModified': '2024-04-08T15:31:00Z', + 'revisions': [ { - "languageCode": "ar", - "value": "هائج", - "dateModified": "2024-04-08T15:27:00Z", + 'languageCode': 'ar', + 'value': 'هائج', + 'dateModified': '2024-04-08T15:27:00Z', } ], }, - "translation": [ + 'translation': [ { - "languageCode": "en", - "value": "berserk", - "dateCreated": "2024-04-08T15:27:00Z", - "dateModified": "2024-04-08T15:27:00Z", + 'languageCode': 'en', + 'value': 'berserk', + 'dateCreated': '2024-04-08T15:27:00Z', + 'dateModified': '2024-04-08T15:27:00Z', }, { - "languageCode": "es", - "value": "enloquecido", - "dateCreated": "2024-04-08T15:29:00Z", - "dateModified": "2024-04-08T15:32:00Z", - "revisions": [ + 'languageCode': 'es', + 'value': 'enloquecido', + 'dateCreated': '2024-04-08T15:29:00Z', + 'dateModified': '2024-04-08T15:32:00Z', + 'revisions': [ { - "languageCode": "es", - "value": "loco", - "dateModified": "2024-04-08T15:29:00Z", + 'languageCode': 'es', + 'value': 'loco', + 'dateModified': '2024-04-08T15:29:00Z', } ], }, @@ -169,7 +167,6 @@ def test_retrieve_data_from_migrated_data(self): ) assert submission_supplement == self.EXPECTED_SUBMISSION_SUPPLEMENT - def test_retrieve_data_with_submission_root_uuid(self): self.test_revise_data() submission_supplement = SubmissionSupplement.retrieve_data( @@ -182,9 +179,7 @@ def test_revise_data(self): submission_uuid=self.submission_root_uuid ).exists() - frozen_datetime_now = datetime( - 2024, 4, 8, 15, 27, 0, tzinfo=ZoneInfo('UTC') - ) + frozen_datetime_now = datetime(2024, 4, 8, 15, 27, 0, tzinfo=ZoneInfo('UTC')) with freeze_time(frozen_datetime_now): # 1) First call with transcription (ar) and translation (en) @@ -192,15 +187,15 @@ def test_revise_data(self): self.asset, self.submission, { - "_version": "20250820", - "group_name/question_name": { - "manual_transcription": { - "language": "ar", - "value": "هائج", + '_version': '20250820', + 'group_name/question_name': { + 'manual_transcription': { + 'language': 'ar', + 'value': 'هائج', }, - "manual_translation": { - "language": "en", - "value": "berserk", + 'manual_translation': { + 'language': 'en', + 'value': 'berserk', }, }, }, @@ -212,19 +207,17 @@ def test_revise_data(self): ).exists() # 2) Call with translation es = "loco" - frozen_datetime_now = datetime( - 2024, 4, 8, 15, 29, 0, tzinfo=ZoneInfo('UTC') - ) + frozen_datetime_now = datetime(2024, 4, 8, 15, 29, 0, tzinfo=ZoneInfo('UTC')) with freeze_time(frozen_datetime_now): SubmissionSupplement.revise_data( self.asset, self.submission, { - "_version": "20250820", - "group_name/question_name": { - "manual_translation": { - "language": "es", - "value": "loco", + '_version': '20250820', + 'group_name/question_name': { + 'manual_translation': { + 'language': 'es', + 'value': 'loco', }, }, }, @@ -238,38 +231,34 @@ def test_revise_data(self): ) # 3) Call with transcription ar = 'فارغ' - frozen_datetime_now = datetime( - 2024, 4, 8, 15, 31, 0, tzinfo=ZoneInfo('UTC') - ) + frozen_datetime_now = datetime(2024, 4, 8, 15, 31, 0, tzinfo=ZoneInfo('UTC')) with freeze_time(frozen_datetime_now): submission_supplement = SubmissionSupplement.revise_data( self.asset, self.submission, { - "_version": "20250820", - "group_name/question_name": { - "manual_transcription": { - "language": "ar", - "value": 'فارغ', + '_version': '20250820', + 'group_name/question_name': { + 'manual_transcription': { + 'language': 'ar', + 'value': 'فارغ', }, }, }, ) # 4) Call with translation es = "enloquecido" - frozen_datetime_now = datetime( - 2024, 4, 8, 15, 32, 0, tzinfo=ZoneInfo('UTC') - ) + frozen_datetime_now = datetime(2024, 4, 8, 15, 32, 0, tzinfo=ZoneInfo('UTC')) with freeze_time(frozen_datetime_now): submission_supplement = SubmissionSupplement.revise_data( self.asset, self.submission, { - "_version": "20250820", - "group_name/question_name": { - "manual_translation": { - "language": "es", - "value": "enloquecido", + '_version': '20250820', + 'group_name/question_name': { + 'manual_translation': { + 'language': 'es', + 'value': 'enloquecido', }, }, }, @@ -284,10 +273,8 @@ def test_revise_data_raise_error_wrong_action(self): self.asset, self.submission, { - "_version": "20250820", - "group_name/question_name": { - "my_other_action": {"param": "foo"} - }, + '_version': '20250820', + 'group_name/question_name': {'my_other_action': {'param': 'foo'}}, }, ) @@ -298,11 +285,11 @@ def test_revise_data_raise_error_wrong_question_name(self): self.asset, self.submission, { - "_version": "20250820", - "group_name/other_question_name": { - "manual_translation": { - "language": "en", - "value": "crazy", + '_version': '20250820', + 'group_name/other_question_name': { + 'manual_translation': { + 'language': 'en', + 'value': 'crazy', } }, }, diff --git a/kobo/apps/subsequences/utils.py b/kobo/apps/subsequences/utils.py index 3f14f63d29..feb62e1ba4 100644 --- a/kobo/apps/subsequences/utils.py +++ b/kobo/apps/subsequences/utils.py @@ -1,9 +1,10 @@ from typing import Generator from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix -from .constants import SUPPLEMENT_KEY, SUBMISSION_UUID_FIELD +from .constants import SUBMISSION_UUID_FIELD, SUPPLEMENT_KEY from .models import SubmissionSupplement + def stream_with_supplements(asset: 'kpi.models.Asset', submission_stream: Generator): if not asset.advanced_features: yield from submission_stream From b5f28e073fc57e68365b513d0b1c3da5d96fe292 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Sun, 24 Aug 2025 18:56:28 -0400 Subject: [PATCH 073/138] Prepare and arbitrate supplemental data for output --- kobo/apps/subsequences/actions/__init__.py | 2 + .../actions/automatic_transcription.py | 9 +++ kobo/apps/subsequences/actions/base.py | 39 ++++++++----- .../actions/manual_transcription.py | 30 ++++++++-- .../actions/manual_translation.py | 30 ++++++++-- kobo/apps/subsequences/models.py | 38 ++++++++++-- kobo/apps/subsequences/time_utils.py | 25 ++++++++ kobo/apps/subsequences/utils.py | 58 ++++++++++++++++++- kpi/models/asset.py | 5 ++ kpi/models/import_export_task.py | 9 +-- kpi/serializers/v2/asset.py | 1 + 11 files changed, 211 insertions(+), 35 deletions(-) create mode 100644 kobo/apps/subsequences/actions/automatic_transcription.py create mode 100644 kobo/apps/subsequences/time_utils.py diff --git a/kobo/apps/subsequences/actions/__init__.py b/kobo/apps/subsequences/actions/__init__.py index f439fed567..9797299438 100644 --- a/kobo/apps/subsequences/actions/__init__.py +++ b/kobo/apps/subsequences/actions/__init__.py @@ -1,8 +1,10 @@ +from .automatic_transcription import AutomaticTranscriptionAction from .manual_transcription import ManualTranscriptionAction from .manual_translation import ManualTranslationAction # TODO, what about using a loader for every class in "actions" folder (except base.py)? ACTIONS = ( + AutomaticTranscriptionAction, ManualTranscriptionAction, ManualTranslationAction, ) diff --git a/kobo/apps/subsequences/actions/automatic_transcription.py b/kobo/apps/subsequences/actions/automatic_transcription.py new file mode 100644 index 0000000000..4fe8590d2e --- /dev/null +++ b/kobo/apps/subsequences/actions/automatic_transcription.py @@ -0,0 +1,9 @@ +from .manual_transcription import ManualTranscriptionAction + + +class AutomaticTranscriptionAction(ManualTranscriptionAction): + ID = 'automatic_transcription' + # this doesn't do shit except give me a way to test manual vs. automatic + # transcripts for the same response and see if i can get the logic right for + # arbitrating based on acceptance dates + pass \ No newline at end of file diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index d3e9ea6f67..a67b141843 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -1,4 +1,3 @@ -import datetime from copy import deepcopy from dataclasses import dataclass @@ -10,6 +9,7 @@ from kpi.exceptions import UsageLimitExceededException from kpi.utils.usage_calculator import ServiceUsageCalculator from ..exceptions import InvalidItem +from ..time_utils import utc_datetime_to_js_str """ ### All actions must have the following components @@ -104,18 +104,6 @@ } """ - -def utc_datetime_to_js_str(dt: datetime.datetime) -> str: - """ - Return a string to represent a `datetime` following the simplification of - the ISO 8601 format used by JavaScript - """ - # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format - if dt.utcoffset() or not dt.tzinfo: - raise NotImplementedError('Only UTC datetimes are supported') - return dt.isoformat().replace('+00:00', 'Z') - - @dataclass class ActionLookupConfig: """ @@ -137,6 +125,10 @@ class BaseAction: lookup_config: ActionLookupConfig | None = None + def __init__(self, source_question_xpath, params): + self.source_question_xpath = source_question_xpath + self.params = params + def check_limits(self, user: User): if not settings.STRIPE_ENABLED or not self._is_usage_limited: @@ -149,6 +141,25 @@ def check_limits(self, user: User): if balance and balance['exceeded']: raise UsageLimitExceededException() + def get_output_fields(self) -> list[dict]: + """ + Returns a list of fields contributed by this action to outputted + submission data as shown in exports, the table view UI, etc. + + For a manual transcription to French, this might look like: + [ + { + 'language': 'fr', + 'name': 'group_name/question_name/transcript__fr', + 'source': 'group_name/question_name', + 'type': 'transcript', + } + ] + + Must be implemented by subclasses. + """ + raise NotImplementedError() + @classmethod def validate_params(cls, params): jsonschema.validate(params, cls.params_schema) @@ -164,7 +175,7 @@ def result_schema(self): """ must be implemented by subclasses """ - return NotImplementedError + raise NotImplementedError() def retrieve_data(self, action_data: dict) -> dict: """ diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index eb33f2657e..251f956bba 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -5,10 +5,6 @@ class ManualTranscriptionAction(BaseAction): ID = 'manual_transcription' lookup_config = ActionLookupConfig({}, None) - def __init__(self, source_question_xpath, params): - self.source_question_xpath = source_question_xpath - self.params = params - """ For an audio question called `my_audio_question` that's transcribed into 3 languages, the schema for `Asset.advanced_features` might look @@ -39,6 +35,32 @@ def __init__(self, source_question_xpath, params): }, } + def _get_output_field_name(self, language: str) -> str: + language = language.split('-')[0] # ignore region if any + return f"{self.source_question_xpath}/transcription__{language}" + + def get_output_fields(self) -> list[dict]: + return [ + { + 'language': params['language'], + 'name': self._get_output_field_name(params['language']), + 'source': self.source_question_xpath, + 'type': 'transcript', + } for params in self.params + ] + + def transform_data_for_output(self, action_data: dict) -> list[dict]: + # keep next to `get_output_fields()` for now + + # Sir, there's only one current transcript per response + return { + self._get_output_field_name(action_data['language']): { + 'language': action_data['language'], + 'value': action_data['value'], + "_dateAccepted": action_data[self.DATE_MODIFIED_FIELD], + } + } + @property def data_schema(self): # for lack of a better name """ diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index 9168313648..a874a68178 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -3,13 +3,8 @@ class ManualTranslationAction(BaseAction): ID = 'manual_translation' - lookup_config = ActionLookupConfig([], 'language') - def __init__(self, source_question_xpath, params): - self.source_question_xpath = source_question_xpath - self.params = params - """ For an audio question called `my_audio_question` that's translated into 3 languages, the schema for `Asset.advanced_features` might look @@ -39,6 +34,31 @@ def __init__(self, source_question_xpath, params): }, } + def _get_output_field_name(self, language: str) -> str: + language = language.split('-')[0] # ignore region if any + return f"{self.source_question_xpath}/translation__{language}" + + def get_output_fields(self): + return [ + { + 'language': params['language'], + 'name': self._get_output_field_name(params['language']), + 'source': self.source_question_xpath, + 'type': 'translation', + } for params in self.params + ] + + def transform_data_for_output(self, action_data: list[dict]) -> list[dict]: + # keep next to `get_output_fields()` for now + return { + self._get_output_field_name(translation_data['language']): { + 'language': translation_data['language'], + 'value': translation_data['value'], + '_dateAccepted': translation_data[self.DATE_MODIFIED_FIELD], + } + for translation_data in action_data + } + @property def data_schema(self): # for lack of a better name """ diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 3c55fe3e36..d7854d8b58 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -106,7 +106,15 @@ def retrieve_data( asset: 'kpi.Asset', submission_root_uuid: str | None = None, prefetched_supplement: dict | None = None, - ) -> dict: + for_output: bool = False, + ) -> dict | list[dict]: + """ + `for_output = True` returns a flattened and simplified list of columns + (field names) and values contributed by each enabled action, for use in + exports and the like. Where multiple actions attempt to provide the + same column, the most recently accepted action result is used as the + value + """ if (submission_root_uuid is None) == (prefetched_supplement is None): raise ValueError( 'Specify either `submission_root_uuid` or `prefetched_supplement`' @@ -136,6 +144,7 @@ def retrieve_data( raise NotImplementedError retrieved_supplemental_data = {} + data_for_output = {} for question_xpath, data_for_this_question in supplemental_data.items(): processed_data_for_this_question = retrieved_supplemental_data.setdefault( @@ -177,9 +186,30 @@ def retrieve_data( continue action = action_class(question_xpath, action_params) - processed_data_for_this_question[action_id] = action.retrieve_data( - action_data - ) + + retrieved_data = action.retrieve_data(action_data) + processed_data_for_this_question[action_id] = retrieved_data + if for_output: + # Arbitrate the output data so that each column is only + # represented once, and that the most recently accepted + # action result is used as the value + transformed_data = action.transform_data_for_output(retrieved_data) + for field_name, field_data in transformed_data.items(): + # Omit `_dateAccepted` from the output data + new_acceptance_date = field_data.pop('_dateAccepted', None) + if not new_acceptance_date: + # Never return unaccepted data + continue + existing_acceptance_date = data_for_output.get(field_name, {}).get('_dateAccepted') + if ( + not existing_acceptance_date + or existing_acceptance_date < new_acceptance_date + ): + data_for_output[field_name] = field_data retrieved_supplemental_data['_version'] = schema_version + + if for_output: + return data_for_output + return retrieved_supplemental_data diff --git a/kobo/apps/subsequences/time_utils.py b/kobo/apps/subsequences/time_utils.py new file mode 100644 index 0000000000..5ae9f9ddec --- /dev/null +++ b/kobo/apps/subsequences/time_utils.py @@ -0,0 +1,25 @@ +# idk if DRF is doing this work for us automatically, but if not, find another +# place where these utils already exist in the app + +# if they must stay here, probably need to move utils.py to utils/something.py +# and put this in utils/time.py. can't go together due to circular imports + +import datetime + +def utc_datetime_to_js_str(dt: datetime.datetime) -> str: + """ + Return a string to represent a `datetime` following the simplification of + the ISO 8601 format used by JavaScript + """ + # https://tc39.es/ecma262/multipage/numbers-and-dates.html#sec-date-time-string-format + if dt.utcoffset() or not dt.tzinfo: + raise NotImplementedError('Only UTC datetimes are supported') + return dt.isoformat().replace('+00:00', 'Z') + + +def js_str_to_datetime(js_str: str) -> datetime.datetime: + """ + Return a `datetime` from a string following the simplification of the ISO + 8601 format used by JavaScript + """ + return datetime.datetime.fromisoformat(js_str.replace('Z', '+00:00')) diff --git a/kobo/apps/subsequences/utils.py b/kobo/apps/subsequences/utils.py index feb62e1ba4..efb44c45a4 100644 --- a/kobo/apps/subsequences/utils.py +++ b/kobo/apps/subsequences/utils.py @@ -1,11 +1,65 @@ from typing import Generator from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix +from .actions import ACTION_IDS_TO_CLASSES from .constants import SUBMISSION_UUID_FIELD, SUPPLEMENT_KEY from .models import SubmissionSupplement -def stream_with_supplements(asset: 'kpi.models.Asset', submission_stream: Generator): +def get_supplemental_output_fields(asset: 'kpi.models.Asset') -> list[dict]: + """ + these are the fields added to exports, displayed in the table view, etc. + + multiple actions could result in only a single field, such as a manual + transcript and an automatic transcript for a given language only resulting + in one field in the output data + + Returns a list of fields contributed by all enabled actions (at the asset + level) to outputted submission data as shown in exports, the table view UI, + etc. + + Consider transcribing `group_name/question_name` into French, both manually + and automatically. The output fields need to contain only *one* unified + field for the French transcript: + [ + { + 'language': 'fr', + 'name': 'group_name/question_name/transcript_fr', + 'source': 'group_name/question_name', + 'type': 'transcript', + } + ] + + When it's time to get the data, we'll have to arbitrate between the manual + and automatic transcripts if both are ever present for a particular + submission. We'll do that by looking at the acceptance dates and letting + the most recent win + """ + advanced_features = asset.advanced_features + + if advanced_features.get('_version') != '20250820': + # TODO: add a migration to update the schema version + raise NotImplementedError() + + output_fields = [] + # FIXME: `_actionConfigs` is 👎 and should be dropped in favor of top-level configs, eh? + # data already exists at the top level alongisde leading-underscore metadata like _version + for source_question_xpath, per_question_actions in advanced_features['_actionConfigs'].items(): + for action_id, action_config in per_question_actions.items(): + action = ACTION_IDS_TO_CLASSES[action_id](source_question_xpath, action_config) + output_fields.extend(action.get_output_fields()) + + # since we want transcripts always to come before translations, à la + # + # and we're lucky with alphabetical order, we can just sort by name + return sorted(output_fields, key=lambda field: field['name']) + + +def stream_with_supplements( + asset: 'kpi.models.Asset', + submission_stream: Generator, + for_output: bool = False +) -> Generator: if not asset.advanced_features: yield from submission_stream return @@ -28,6 +82,6 @@ def stream_with_supplements(asset: 'kpi.models.Asset', submission_stream: Genera for submission in submission_stream: submission_uuid = remove_uuid_prefix(submission[SUBMISSION_UUID_FIELD]) submission[SUPPLEMENT_KEY] = SubmissionSupplement.retrieve_data( - asset, prefetched_supplement=extras.get(submission_uuid, {}) + asset, for_output=for_output, prefetched_supplement=extras.get(submission_uuid, {}) ) yield submission diff --git a/kpi/models/asset.py b/kpi/models/asset.py index 06195d0bf4..c4d0cc32f9 100644 --- a/kpi/models/asset.py +++ b/kpi/models/asset.py @@ -19,6 +19,7 @@ from kobo.apps.reports.constants import DEFAULT_REPORTS_KEY, SPECIFIC_REPORTS_KEY from kobo.apps.subsequences.schemas import ACTION_PARAMS_SCHEMA +from kobo.apps.subsequences.utils import get_supplemental_output_fields from kpi.constants import ( ASSET_TYPE_BLOCK, ASSET_TYPE_COLLECTION, @@ -1044,6 +1045,10 @@ def set_deployment_status(self): else: self._deployment_status = AssetDeploymentStatus.DRAFT + @property + def supplemental_output_fields(self): + return get_supplemental_output_fields(self) + @property def tag_string(self): try: diff --git a/kpi/models/import_export_task.py b/kpi/models/import_export_task.py index 3dc71425e6..b6aab31b10 100644 --- a/kpi/models/import_export_task.py +++ b/kpi/models/import_export_task.py @@ -1040,18 +1040,15 @@ def get_export_object( ) if source.has_advanced_features: - submission_stream = stream_with_supplements(source, submission_stream) + submission_stream = stream_with_supplements(source, submission_stream, for_output=True) pack, submission_stream = build_formpack( source, submission_stream, self._fields_from_all_versions ) if source.has_advanced_features: - raise NotImplementedError # FIXME - """ - pack.extend_survey(…) - omit_question_types=['qual_note'] - """ + pack.extend_survey(source.supplemental_output_fields) + # FIXME: (when rebuilding support for qual) omit_question_types=['qual_note'] # Wrap the submission stream in a generator that records the most # recent timestamp diff --git a/kpi/serializers/v2/asset.py b/kpi/serializers/v2/asset.py index c6afc02643..1bd2ff8b7e 100644 --- a/kpi/serializers/v2/asset.py +++ b/kpi/serializers/v2/asset.py @@ -436,6 +436,7 @@ class Meta: 'report_styles', 'report_custom', 'advanced_features', + 'supplemental_output_fields', 'map_styles', 'map_custom', 'content', From 9945e565d2d94cac918c5f84ae5a23e43338bc9e Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Mon, 25 Aug 2025 02:31:06 -0400 Subject: [PATCH 074/138] =?UTF-8?q?Update=20formpack=20requirement=20for?= =?UTF-8?q?=20new=20supplemental=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit format --- dependencies/pip/dev_requirements.txt | 2 +- dependencies/pip/requirements.in | 2 +- dependencies/pip/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dependencies/pip/dev_requirements.txt b/dependencies/pip/dev_requirements.txt index 9f97617d38..7cf585814e 100644 --- a/dependencies/pip/dev_requirements.txt +++ b/dependencies/pip/dev_requirements.txt @@ -8,7 +8,7 @@ # via -r dependencies/pip/requirements.in -e git+https://github.com/trevoriancox/django-dont-vary-on.git@01a804122b7ddcdc22f50b40993f91c27b03bef6#egg=django-dont-vary-on # via -r dependencies/pip/requirements.in --e git+https://github.com/kobotoolbox/formpack.git@efddba5933955bc00dda538b305fd93a1482aa1d#egg=formpack +-e git+https://github.com/kobotoolbox/formpack.git@c3115c145b7da34d8ff78f27160dca964b66f1ec#egg=formpack # via -r dependencies/pip/requirements.in -e git+https://github.com/dimagi/python-digest@5c94bb74516b977b60180ee832765c0695ff2b56#egg=python_digest # via -r dependencies/pip/requirements.in diff --git a/dependencies/pip/requirements.in b/dependencies/pip/requirements.in index 450f1a1fcf..0e2cd75f6c 100644 --- a/dependencies/pip/requirements.in +++ b/dependencies/pip/requirements.in @@ -2,7 +2,7 @@ # https://github.com/bndr/pipreqs is a handy utility, too. # formpack --e git+https://github.com/kobotoolbox/formpack.git@efddba5933955bc00dda538b305fd93a1482aa1d#egg=formpack +-e git+https://github.com/kobotoolbox/formpack.git@c3115c145b7da34d8ff78f27160dca964b66f1ec#egg=formpack # More up-to-date version of django-digest than PyPI seems to have. # Also, python-digest is an unlisted dependency thereof. diff --git a/dependencies/pip/requirements.txt b/dependencies/pip/requirements.txt index 08ed5392da..b4a3fe7128 100644 --- a/dependencies/pip/requirements.txt +++ b/dependencies/pip/requirements.txt @@ -8,7 +8,7 @@ # via -r dependencies/pip/requirements.in -e git+https://github.com/trevoriancox/django-dont-vary-on.git@01a804122b7ddcdc22f50b40993f91c27b03bef6#egg=django-dont-vary-on # via -r dependencies/pip/requirements.in --e git+https://github.com/kobotoolbox/formpack.git@efddba5933955bc00dda538b305fd93a1482aa1d#egg=formpack +-e git+https://github.com/kobotoolbox/formpack.git@c3115c145b7da34d8ff78f27160dca964b66f1ec#egg=formpack # via -r dependencies/pip/requirements.in -e git+https://github.com/dimagi/python-digest@5c94bb74516b977b60180ee832765c0695ff2b56#egg=python_digest # via -r dependencies/pip/requirements.in From 89c486cded674585f9bd8b60c4c8784160213301 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Mon, 25 Aug 2025 10:35:51 -0400 Subject: [PATCH 075/138] =?UTF-8?q?Update=20formpack=20requirement=20for?= =?UTF-8?q?=20qualitative=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit analysis simplification --- dependencies/pip/dev_requirements.txt | 2 +- dependencies/pip/requirements.in | 4 ++-- dependencies/pip/requirements.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dependencies/pip/dev_requirements.txt b/dependencies/pip/dev_requirements.txt index 7cf585814e..49641b9e71 100644 --- a/dependencies/pip/dev_requirements.txt +++ b/dependencies/pip/dev_requirements.txt @@ -8,7 +8,7 @@ # via -r dependencies/pip/requirements.in -e git+https://github.com/trevoriancox/django-dont-vary-on.git@01a804122b7ddcdc22f50b40993f91c27b03bef6#egg=django-dont-vary-on # via -r dependencies/pip/requirements.in --e git+https://github.com/kobotoolbox/formpack.git@c3115c145b7da34d8ff78f27160dca964b66f1ec#egg=formpack +-e git+https://github.com/kobotoolbox/formpack.git@6b87821dabda94fc7d3a3467923b48fd6c96e239#egg=formpack # via -r dependencies/pip/requirements.in -e git+https://github.com/dimagi/python-digest@5c94bb74516b977b60180ee832765c0695ff2b56#egg=python_digest # via -r dependencies/pip/requirements.in diff --git a/dependencies/pip/requirements.in b/dependencies/pip/requirements.in index 0e2cd75f6c..eae610310c 100644 --- a/dependencies/pip/requirements.in +++ b/dependencies/pip/requirements.in @@ -2,7 +2,7 @@ # https://github.com/bndr/pipreqs is a handy utility, too. # formpack --e git+https://github.com/kobotoolbox/formpack.git@c3115c145b7da34d8ff78f27160dca964b66f1ec#egg=formpack +-e git+https://github.com/kobotoolbox/formpack.git@6b87821dabda94fc7d3a3467923b48fd6c96e239#egg=formpack # More up-to-date version of django-digest than PyPI seems to have. # Also, python-digest is an unlisted dependency thereof. @@ -114,4 +114,4 @@ djangorestframework-jsonp pandas # Api Documentation -drf-spectacular \ No newline at end of file +drf-spectacular diff --git a/dependencies/pip/requirements.txt b/dependencies/pip/requirements.txt index b4a3fe7128..9417e64c74 100644 --- a/dependencies/pip/requirements.txt +++ b/dependencies/pip/requirements.txt @@ -8,7 +8,7 @@ # via -r dependencies/pip/requirements.in -e git+https://github.com/trevoriancox/django-dont-vary-on.git@01a804122b7ddcdc22f50b40993f91c27b03bef6#egg=django-dont-vary-on # via -r dependencies/pip/requirements.in --e git+https://github.com/kobotoolbox/formpack.git@c3115c145b7da34d8ff78f27160dca964b66f1ec#egg=formpack +-e git+https://github.com/kobotoolbox/formpack.git@6b87821dabda94fc7d3a3467923b48fd6c96e239#egg=formpack # via -r dependencies/pip/requirements.in -e git+https://github.com/dimagi/python-digest@5c94bb74516b977b60180ee832765c0695ff2b56#egg=python_digest # via -r dependencies/pip/requirements.in From 43afcf9313db17c7359403491e9f657a0a248091 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Mon, 25 Aug 2025 11:33:30 -0400 Subject: [PATCH 076/138] Deduplicate in `supplemental_output_fields` --- kobo/apps/subsequences/utils.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/kobo/apps/subsequences/utils.py b/kobo/apps/subsequences/utils.py index efb44c45a4..abf4e91408 100644 --- a/kobo/apps/subsequences/utils.py +++ b/kobo/apps/subsequences/utils.py @@ -41,18 +41,32 @@ def get_supplemental_output_fields(asset: 'kpi.models.Asset') -> list[dict]: # TODO: add a migration to update the schema version raise NotImplementedError() - output_fields = [] + output_fields_by_name = {} # FIXME: `_actionConfigs` is 👎 and should be dropped in favor of top-level configs, eh? # data already exists at the top level alongisde leading-underscore metadata like _version - for source_question_xpath, per_question_actions in advanced_features['_actionConfigs'].items(): + for source_question_xpath, per_question_actions in advanced_features[ + '_actionConfigs' + ].items(): for action_id, action_config in per_question_actions.items(): - action = ACTION_IDS_TO_CLASSES[action_id](source_question_xpath, action_config) - output_fields.extend(action.get_output_fields()) + action = ACTION_IDS_TO_CLASSES[action_id]( + source_question_xpath, action_config + ) + for field in action.get_output_fields(): + try: + existing = output_fields_by_name[field['name']] + except KeyError: + output_fields_by_name[field['name']] = field + else: + # It's normal for multiple actions to contribute the same + # field, but they'd better be exactly the same! + assert field == existing # since we want transcripts always to come before translations, à la # # and we're lucky with alphabetical order, we can just sort by name - return sorted(output_fields, key=lambda field: field['name']) + return sorted( + output_fields_by_name.values(), key=lambda field: field['name'] + ) def stream_with_supplements( From 8eba576b63c75fe543595e01ddc881125a374139 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Mon, 25 Aug 2025 12:00:55 -0400 Subject: [PATCH 077/138] WIP Draft automatic translation with Google --- kobo/apps/subsequences/actions/__init__.py | 2 + .../actions/automatic_google_transcription.py | 231 ++++++++++++++++++ kobo/apps/subsequences/actions/base.py | 100 ++++++-- .../actions/manual_transcription.py | 12 +- .../actions/manual_translation.py | 4 +- kobo/apps/subsequences/constants.py | 14 ++ kobo/apps/subsequences/exceptions.py | 18 ++ .../integrations}/__init__.py | 0 .../integrations/google/__init__.py | 0 .../integrations/google/base.py | 41 +++- .../integrations/google/google_transcribe.py | 110 +++++---- .../integrations/google/google_translate.py | 2 +- .../integrations/utils/__init__.py | 0 .../subsequences/integrations/utils/cache.py | 15 ++ .../integrations/utils/google.py} | 0 kobo/apps/subsequences/models.py | 31 ++- .../tests/test_manual_transcription.py | 12 +- .../tests/test_manual_translation.py | 12 +- kobo/apps/subsequences__old/api_view.py | 1 - 19 files changed, 501 insertions(+), 104 deletions(-) create mode 100644 kobo/apps/subsequences/actions/automatic_google_transcription.py rename kobo/apps/{subsequences__old/integrations/google => subsequences/integrations}/__init__.py (100%) create mode 100644 kobo/apps/subsequences/integrations/google/__init__.py rename kobo/apps/{subsequences__old => subsequences}/integrations/google/base.py (73%) rename kobo/apps/{subsequences__old => subsequences}/integrations/google/google_transcribe.py (64%) rename kobo/apps/{subsequences__old => subsequences}/integrations/google/google_translate.py (99%) create mode 100644 kobo/apps/subsequences/integrations/utils/__init__.py create mode 100644 kobo/apps/subsequences/integrations/utils/cache.py rename kobo/apps/{subsequences__old/integrations/google/utils.py => subsequences/integrations/utils/google.py} (100%) diff --git a/kobo/apps/subsequences/actions/__init__.py b/kobo/apps/subsequences/actions/__init__.py index 9797299438..9860abe206 100644 --- a/kobo/apps/subsequences/actions/__init__.py +++ b/kobo/apps/subsequences/actions/__init__.py @@ -1,9 +1,11 @@ +from .automatic_google_transcription import AutomaticGoogleTranscriptionAction from .automatic_transcription import AutomaticTranscriptionAction from .manual_transcription import ManualTranscriptionAction from .manual_translation import ManualTranslationAction # TODO, what about using a loader for every class in "actions" folder (except base.py)? ACTIONS = ( + AutomaticGoogleTranscriptionAction, AutomaticTranscriptionAction, ManualTranscriptionAction, ManualTranslationAction, diff --git a/kobo/apps/subsequences/actions/automatic_google_transcription.py b/kobo/apps/subsequences/actions/automatic_google_transcription.py new file mode 100644 index 0000000000..456523dbd1 --- /dev/null +++ b/kobo/apps/subsequences/actions/automatic_google_transcription.py @@ -0,0 +1,231 @@ +from kobo.apps.organizations.constants import UsageType +from .base import BaseAction, ActionClassConfig +from ..integrations.google.google_transcribe import GoogleTranscriptionService + + +class AutomaticGoogleTranscriptionAction(BaseAction): + ID = 'automatic_google_transcription' + action_class_config = ActionClassConfig({}, None, True) + + params_schema = { + 'type': 'array', + 'items': { + 'additionalProperties': False, + 'properties': { + 'language': { + 'type': 'string', + } + }, + 'required': ['language'], + 'type': 'object', + }, + } + + @property + def automated_data_schema(self) -> dict: + """ + Schema rules: + + - The field `status` is always required and must be one of: + ["requested", "in_progress", "completed", "failed"]. + - If `status` == "done": + * The field `value` becomes required and must be a string. + - If `status` == "failed": + * The field `error` becomes required and must be a string. + - No additional properties are allowed beyond `language`, `status` and `value`. + """ + return { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'language': {'$ref': '#/$defs/lang'}, + 'status': {'$ref': '#/$defs/action_status'}, + 'value': {'$ref': '#/$defs/value'}, + 'error': {'$ref': '#/$defs/error'}, + 'accepted': {'$ref': '#/$defs/accepted'}, + }, + 'required': ['language', 'status'], + 'allOf': [ + # value must be present iff status == "complete" + {'$ref': '#/$defs/rule_value_presence_when_complete'}, + # error must be present iff status == "failed" + {'$ref': '#/$defs/rule_error_presence_when_failed'}, + # accepted must be present iff status == "complete" + {'$ref': '#/$defs/rule_accepted_only_when_complete'}, + ], + '$defs': { + 'lang': {'type': 'string', 'enum': self.languages}, + 'action_status': { + 'type': 'string', + 'enum': ['in_progress', 'complete', 'failed'], + }, + 'value': {'type': 'string'}, + 'error': {'type': 'string'}, + 'accepted': {'type': 'boolean'}, + + # If status == "complete" → require "value"; else "value" must be absent + 'rule_value_presence_when_complete': { + 'if': { + 'required': ['status'], + 'properties': {'status': {'const': 'complete'}}, + }, + 'then': {'required': ['value']}, + 'else': {'not': {'required': ['value']}}, + }, + + # If status == "failed" → require "error"; else "error" must be absent + 'rule_error_presence_when_failed': { + 'if': { + 'required': ['status'], + 'properties': {'status': {'const': 'failed'}}, + }, + 'then': {'required': ['error']}, + 'else': {'not': {'required': ['error']}}, + }, + + # If status == "complete" → accepted is allowed but optional + # Else → accepted must not be present + 'rule_accepted_only_when_complete': { + 'if': { + 'required': ['status'], + 'properties': {'status': {'const': 'complete'}}, + }, + 'then': {}, # no requirement: accepted may be present or absent + 'else': {'not': {'required': ['accepted']}}, + } + }, + } + + @property + def data_schema(self) -> dict: + """ + Schema rules: + + - The field `status` is always required and must be one of: + ["requested", "in_progress"]. + - `value` should not be present + - No additional properties are allowed beyond `language`, `status`. + """ + return { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'language': {'$ref': '#/$defs/lang'}, + 'accepted': {'$ref': '#/$defs/accepted'}, + }, + 'required': ['language'], + '$defs': { + 'lang': {'type': 'string', 'enum': self.languages}, + 'accepted': {'type': 'boolean'}, + }, + } + + @property + def languages(self) -> list[str]: + languages = [] + for individual_params in self.params: + languages.append(individual_params['language']) + return languages + + @property + def result_schema(self): + + schema = { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.REVISIONS_FIELD: { + 'type': 'array', + 'minItems': 1, + 'items': {'$ref': '#/$defs/revision'}, + }, + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], + '$defs': { + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'revision': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD], + }, + }, + } + + # Inject data schema in result schema template + self._inject_data_schema(schema, ['$schema', 'title', 'type']) + + # Also inject data schema in the revision definition + self._inject_data_schema( + schema['$defs']['revision'], ['$schema', 'title', '$defs'] + ) + + # FIXME _inject_data_schema does not merge nested children + schema['$defs']['action_status'] = { + 'action_status': { + 'type': 'string', + 'enum': ['in_progress', 'complete', 'error'], + }, + } + return schema + + def run_automatic_process( + self, + submission: dict, + submission_supplement: dict, + action_data: dict, + *args, + **kwargs, + ) -> dict: + """ + Run the automatic transcription process using the Google API. + + This method validates and processes the incoming `action_data` before it is + passed to `revise_data()`. If the payload indicates that the user accepts the + last completed transcription, the method returns early with the accepted data. + Otherwise, it triggers the external Google transcription service and returns + the processed result. + + Returns: + dict: Processed transcription data, ready to be merged and validated by + `revise_data()`. + """ + + # Validate `action_data` against schema rules before further processing. + # `revise_data()` will perform the final validation once merged with the + # supplement returned by this method. + self.validate_data(action_data) + self.raise_for_any_leading_underscore_key(action_data) + + # If the client explicitly provided "accepted", it means they only want to + # accept the last completed transcription. In this case, return immediately. + # `revise_data()` will handle merging and final validation of the acceptance. + accepted = action_data.get('accepted', None) + if ( + submission_supplement.get('status') == 'complete' + and accepted is not None + ): + return { + 'value': submission_supplement['value'], + 'status': 'complete', + } + + # Otherwise, call the Google transcription service to process the input data. + service = GoogleTranscriptionService(submission, asset=kwargs['asset']) + service_data = service.process_data( + self.source_question_xpath, action_data + ) + + return service_data + + def _limit_identifier(self): + return UsageType.ASR_SECONDS diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index a67b141843..f68168cc25 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -105,7 +105,7 @@ """ @dataclass -class ActionLookupConfig: +class ActionClassConfig: """ Defines how items in a result schema can be resolved. - key: the dictionary field used to identify or match an item (e.g., "language"). @@ -115,15 +115,17 @@ class ActionLookupConfig: default_type: dict | list key: str | None + automatic: bool class BaseAction: DATE_CREATED_FIELD = '_dateCreated' DATE_MODIFIED_FIELD = '_dateModified' + DATE_ACCEPTED_FIELD = '_dateAccepted' REVISIONS_FIELD = '_revisions' - lookup_config: ActionLookupConfig | None = None + action_class_config: ActionClassConfig | None = None def __init__(self, source_question_xpath, params): self.source_question_xpath = source_question_xpath @@ -141,6 +143,14 @@ def check_limits(self, user: User): if balance and balance['exceeded']: raise UsageLimitExceededException() + @property + def automated_data_schema(self): + raise NotImplementedError + + @property + def data_schema(self): + raise NotImplementedError + def get_output_fields(self) -> list[dict]: """ Returns a list of fields contributed by this action to outputted @@ -160,13 +170,16 @@ def get_output_fields(self) -> list[dict]: """ raise NotImplementedError() - @classmethod - def validate_params(cls, params): - jsonschema.validate(params, cls.params_schema) + def validate_automated_data(self, data): + jsonschema.validate(data, self.automated_data_schema) def validate_data(self, data): jsonschema.validate(data, self.data_schema) + @classmethod + def validate_params(cls, params): + jsonschema.validate(params, cls.params_schema) + def validate_result(self, result): jsonschema.validate(result, self.result_schema) @@ -200,22 +213,31 @@ def revise_data( `submission` argument for future use by subclasses this method might need to be made more friendly for overriding """ - self.validate_data(edit) + + # Validate differently when automatic process ran, to allow internal fields + # but block them from user input. + if self.action_class_config.automatic: + self.validate_automated_data(edit) + accepted = edit.pop('accepted', None) + else: + self.validate_data(edit) + accepted = True + self.raise_for_any_leading_underscore_key(edit) now_str = utc_datetime_to_js_str(timezone.now()) item_index = None submission_supplement_copy = deepcopy(submission_supplement) - if not isinstance(self.lookup_config.default_type, list): + if not isinstance(self.action_class_config.default_type, list): revision = submission_supplement_copy else: - needle = edit[self.lookup_config.key] + needle = edit[self.action_class_config.key] revision = {} if not isinstance(submission_supplement, list): raise InvalidItem for idx, item in enumerate(submission_supplement): - if needle == item[self.lookup_config.key]: + if needle == item[self.action_class_config.key]: revision = deepcopy(item) item_index = idx break @@ -228,18 +250,39 @@ def revise_data( revision[self.DATE_CREATED_FIELD] = revision_creation_date new_record[self.DATE_MODIFIED_FIELD] = now_str - if not isinstance(self.lookup_config.default_type, list): + # If the default type is not a list, we handle a single record case. + if not isinstance(self.action_class_config.default_type, list): if submission_supplement: revisions.insert(0, revision) new_record[self.REVISIONS_FIELD] = revisions else: + # When the default type is a list, we are handling an item within it. if item_index is not None: revisions.insert(0, revision) new_record[self.REVISIONS_FIELD] = revisions new_record[self.DATE_CREATED_FIELD] = record_creation_date - if isinstance(self.lookup_config.default_type, list): + # For manual actions, always mark as accepted. + # For automatic actions, revert the just-created revision (remove it and + # reapply its dates) to avoid adding extra branching earlier in the method. + if self.action_class_config.automatic: + if accepted is not None: + revision = new_record[self.REVISIONS_FIELD].pop(0) + if not len(new_record[self.REVISIONS_FIELD]): + del new_record[self.REVISIONS_FIELD] + # reassign date + new_record[self.DATE_MODIFIED_FIELD] = revision[self.DATE_CREATED_FIELD] + if accepted: + new_record[self.DATE_ACCEPTED_FIELD] = now_str + else: + new_record[self.DATE_ACCEPTED_FIELD] = now_str + + if isinstance(self.action_class_config.default_type, list): + # Handle the case where the default type is a list: + # - If no index is provided, append the new record. + # - Otherwise, replace the record at the given index. + # Finally, update `new_record` to reference the full updated list. if item_index is None: submission_supplement_copy.append(new_record) else: @@ -270,12 +313,11 @@ def raise_for_any_leading_underscore_key(d: dict): if match: raise Exception('An unexpected key with a leading underscore was found') - @property - def _is_usage_limited(self): + def run_automatic_process(self, submission: dict, submission_supplement: dict, edit: dict, *args, **kwargs): """ - Returns whether an action should check for usage limits. + Update edit with automatic process """ - raise NotImplementedError() + raise NotImplementedError def _inject_data_schema(self, destination_schema: dict, skipped_keys: list): """ @@ -284,24 +326,34 @@ def _inject_data_schema(self, destination_schema: dict, skipped_keys: list): Useful to produce result schema. """ - for key, value in self.data_schema.items(): + schema_to_inject = ( + self.automated_data_schema + if self.action_class_config.automatic + else self.data_schema + ) + + for key, value in schema_to_inject.items(): if key in skipped_keys: continue if key in destination_schema: if isinstance(destination_schema[key], dict): - destination_schema[key].update(self.data_schema[key]) + destination_schema[key].update(schema_to_inject[key]) elif isinstance(destination_schema[key], list): - destination_schema[key].extend(self.data_schema[key]) + destination_schema[key].extend(schema_to_inject[key]) else: - destination_schema[key] = self.data_schema[key] + destination_schema[key] = schema_to_inject[key] else: - destination_schema[key] = self.data_schema[key] + destination_schema[key] = schema_to_inject[key] + + @property + def _is_usage_limited(self): + """ + Returns whether an action should check for usage limits. + """ + return self.action_class_config.automatic @property def _limit_identifier(self): - # Example for automatic transcription - # - # from kobo.apps.organizations.constants import UsageType - # return UsageType.ASR_SECONDS + # See AutomaticGoogleTranscriptionAction._limit_identifier() for example raise NotImplementedError() diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 251f956bba..811d1fe8a1 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -1,9 +1,11 @@ -from .base import BaseAction, ActionLookupConfig +from typing import Any + +from .base import BaseAction, ActionClassConfig class ManualTranscriptionAction(BaseAction): ID = 'manual_transcription' - lookup_config = ActionLookupConfig({}, None) + action_class_config = ActionClassConfig({}, None, False) """ For an audio question called `my_audio_question` that's transcribed @@ -49,7 +51,7 @@ def get_output_fields(self) -> list[dict]: } for params in self.params ] - def transform_data_for_output(self, action_data: dict) -> list[dict]: + def transform_data_for_output(self, action_data: dict) -> dict[str, dict[str, Any]]: # keep next to `get_output_fields()` for now # Sir, there's only one current transcript per response @@ -134,7 +136,3 @@ def result_schema(self): ) return schema - - @property - def _is_usage_limited(self): - return False diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index a874a68178..3b01a222b8 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -1,9 +1,9 @@ -from .base import BaseAction, ActionLookupConfig +from .base import BaseAction, ActionClassConfig class ManualTranslationAction(BaseAction): ID = 'manual_translation' - lookup_config = ActionLookupConfig([], 'language') + action_class_config = ActionClassConfig([], 'language', False) """ For an audio question called `my_audio_question` that's translated diff --git a/kobo/apps/subsequences/constants.py b/kobo/apps/subsequences/constants.py index 308941ac1e..724d5161cf 100644 --- a/kobo/apps/subsequences/constants.py +++ b/kobo/apps/subsequences/constants.py @@ -1,2 +1,16 @@ SUBMISSION_UUID_FIELD = 'meta/rootUuid' # FIXME: import from elsewhere SUPPLEMENT_KEY = '_supplementalDetails' # leave unchanged for backwards compatibility + +# Could allow more types in the future? See +# formpack.utils.replace_aliases.MEDIA_TYPES +TRANSCRIBABLE_SOURCE_TYPES = ['audio', 'video', 'background-audio'] +TRANSLATABLE_SOURCE_TYPES = TRANSCRIBABLE_SOURCE_TYPES + ['text'] +QUAL_SOURCE_TYPES = TRANSLATABLE_SOURCE_TYPES + + +ASYNC_TRANSLATION_DELAY_INTERVAL = 5 + +SUBSEQUENCES_ASYNC_CACHE_KEY = 'subsequences' +# Google speech api limits audio to ~480 Minutes* +# Processing time is not audio length, but it's an estimate +GOOGLE_CACHE_TIMEOUT = 28800 # 8 hours diff --git a/kobo/apps/subsequences/exceptions.py b/kobo/apps/subsequences/exceptions.py index 7b7b3d59c4..be147a30c5 100644 --- a/kobo/apps/subsequences/exceptions.py +++ b/kobo/apps/subsequences/exceptions.py @@ -1,3 +1,9 @@ +class AudioTooLongError(Exception): + """ + Audio file is too long for the specified speech service + """ + + class InvalidAction(Exception): """ The referenced action does not exist or was not configured for the given @@ -22,3 +28,15 @@ class InvalidXPath(Exception): """ pass + + +class SubsequenceTimeoutError(Exception): + pass + + +class TranslationAsyncResultAvailable(Exception): + pass + + +class TranslationResultsNotFound(Exception): + pass diff --git a/kobo/apps/subsequences__old/integrations/google/__init__.py b/kobo/apps/subsequences/integrations/__init__.py similarity index 100% rename from kobo/apps/subsequences__old/integrations/google/__init__.py rename to kobo/apps/subsequences/integrations/__init__.py diff --git a/kobo/apps/subsequences/integrations/google/__init__.py b/kobo/apps/subsequences/integrations/google/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/subsequences__old/integrations/google/base.py b/kobo/apps/subsequences/integrations/google/base.py similarity index 73% rename from kobo/apps/subsequences__old/integrations/google/base.py rename to kobo/apps/subsequences/integrations/google/base.py index f55c7a7447..218dfe7b1f 100644 --- a/kobo/apps/subsequences__old/integrations/google/base.py +++ b/kobo/apps/subsequences/integrations/google/base.py @@ -13,10 +13,15 @@ from kobo.apps.trackers.utils import update_nlp_counter from kpi.utils.log import logging -from ...constants import GOOGLE_CACHE_TIMEOUT, make_nlp_async_cache_key +from ...constants import ( + SUBMISSION_UUID_FIELD, + GOOGLE_CACHE_TIMEOUT, + SUBSEQUENCES_ASYNC_CACHE_KEY, +) from ...exceptions import SubsequenceTimeoutError -from ...models import SubmissionExtrasOld -from .utils import google_credentials_from_constance_config +# from ...models import SubmissionSupplement +# from ..utils.cache import generate_cache_key +from ..utils.google import google_credentials_from_constance_config class GoogleService(ABC): @@ -30,11 +35,10 @@ class GoogleService(ABC): API_VERSION = None API_RESOURCE = None - def __init__(self, submission: SubmissionExtrasOld): + def __init__(self, submission: dict, asset: 'kpi.models.Asset', *args, **kwargs): super().__init__() self.submission = submission - self.asset = submission.asset - self.user = submission.asset.owner + self.asset = asset # Need to retrieve the attachment content self.credentials = google_credentials_from_constance_config() self.storage_client = storage.Client(credentials=self.credentials) if settings.GS_BUCKET_NAME is None: @@ -71,10 +75,10 @@ def counter_name(self): def handle_google_operation( self, xpath: str, source_lang: str, target_lang: str, content: Any=None ) -> str: - submission_id = self.submission.submission_uuid - cache_key = make_nlp_async_cache_key( - self.user.pk, submission_id, xpath, source_lang, target_lang - ) + + # If cache_key is still present, the job is not complete (or it crashed). + # Fetch the latest update from Google API, but do not resend the same operation. + cache_key = self._get_cache_key(xpath, source_lang, target_lang) if operation_name := cache.get(cache_key): google_service = discovery.build( self.API_NAME, self.API_VERSION, credentials=self.credentials @@ -91,7 +95,7 @@ def handle_google_operation( cache.delete(cache_key) return self.adapt_response(operation) else: - (response, amount) = self.begin_google_operation( + response, amount = self.begin_google_operation( xpath, source_lang, target_lang, content ) if isinstance(response, Operation): @@ -108,9 +112,12 @@ def handle_google_operation( cache.delete(cache_key) return self.adapt_response(result) + if isinstance(response, str): return response + return + @abstractmethod def process_data(self, xpath: str, options: dict) -> dict: pass @@ -122,3 +129,15 @@ def update_counters(self, amount) -> None: self.asset.owner_id, self.asset.id, ) + + def _get_cache_key(self, xpath: str, source_lang: str, target_lang: str | None) -> str: + submission_root_uuid = self.submission[SUBMISSION_UUID_FIELD] + action = 'transcribe' if target_lang is None else 'translate' + args = [self.asset.owner_id, submission_root_uuid, xpath, source_lang] + if target_lang is None: + args.insert(0, 'transcribe') + else: + args.insert(0, 'translate') + args.append(target_lang) + + return '::'.join(map(str, [SUBSEQUENCES_ASYNC_CACHE_KEY, *args])) diff --git a/kobo/apps/subsequences__old/integrations/google/google_transcribe.py b/kobo/apps/subsequences/integrations/google/google_transcribe.py similarity index 64% rename from kobo/apps/subsequences__old/integrations/google/google_transcribe.py rename to kobo/apps/subsequences/integrations/google/google_transcribe.py index 16626ca619..22adb79d73 100644 --- a/kobo/apps/subsequences__old/integrations/google/google_transcribe.py +++ b/kobo/apps/subsequences/integrations/google/google_transcribe.py @@ -7,17 +7,24 @@ import constance from django.conf import settings +from django.core.cache import cache from google.api_core.exceptions import InvalidArgument from google.cloud import speech from kpi.utils.log import logging - -from ...constants import GOOGLETS +from kpi.exceptions import ( + InvalidXPathException, + SubmissionNotFoundException, + XPathNotFoundException, + AttachmentNotFoundException, + NotSupportedFormatException, +) +from ...constants import SUBMISSION_UUID_FIELD from ...exceptions import ( AudioTooLongError, SubsequenceTimeoutError, - TranscriptionResultsNotFound, ) +from ..utils.cache import generate_cache_key from .base import GoogleService # https://cloud.google.com/speech-to-text/quotas#content @@ -31,12 +38,12 @@ class GoogleTranscriptionService(GoogleService): API_VERSION = 'v1' API_RESOURCE = 'operations' - def __init__(self, *args): + def __init__(self, submission: dict, asset: 'kpi.models.Asset', *args, **kwargs): """ This service takes a submission object as a GoogleService inheriting class. It uses google cloud transcript v1 API. """ - super().__init__(*args) + super().__init__(submission=submission, asset=asset, *args, **kwargs) self.destination_path = None def adapt_response(self, response: Union[dict, list]) -> str: @@ -68,9 +75,9 @@ def begin_google_operation( content: Any, ) -> tuple[str, int]: """ - Set up transcription operation + Set up the transcription operation """ - submission_uuid = self.submission.submission_uuid + submission_uuid = self.submission[SUBMISSION_UUID_FIELD] flac_content, duration = content total_seconds = int(duration.total_seconds()) @@ -104,7 +111,7 @@ def begin_google_operation( speech_results = speech_client.long_running_recognize( audio=audio, config=config ) - return (speech_results, total_seconds) + return speech_results, total_seconds @property def counter_name(self): @@ -116,29 +123,61 @@ def get_converted_audio( """ Converts attachment audio or video file to flac """ + attachment = self.asset.deployment.get_attachment( submission_uuid, user, xpath=xpath ) return attachment.get_transcoded_audio('flac', include_duration=True) - def process_data(self, xpath: str, vals: dict) -> dict: - autoparams = vals[GOOGLETS] - language_code = autoparams.get('languageCode') - region_code = autoparams.get('regionCode') - vals[GOOGLETS] = { - 'status': 'in_progress', - 'languageCode': language_code, - 'regionCode': region_code, - } - region_or_language_code = region_code or language_code + def process_data(self, xpath: str, params: dict) -> dict: + # params.get('status') #language_code = autoparams.get('languageCode') + #region_code = autoparams.get('regionCode') + #vals[GOOGLETS] = { + # 'status': 'in_progress', + # 'language': language_code, + # 'regionCode': region_code, + #} + #region_or_language_code = region_code or language_code + language = params['language'] + + cache_key = self._get_cache_key(xpath, language, target_lang=None) + if cache.get(cache_key): + # Operation is still in progress, no need to process the audio file + converted_audio = None + else: + try: + converted_audio = self.get_converted_audio( + xpath, + self.submission[SUBMISSION_UUID_FIELD], + self.asset.owner, + ) + except SubmissionNotFoundException: + return { + 'status': 'failed', + 'error': {f'Submission not found'}, + } + except AttachmentNotFoundException: + return { + 'status': 'failed', + 'error': {f'Attachment not found'}, + } + except (InvalidXPathException,XPathNotFoundException): + return { + 'status': 'failed', + 'error': {f'Invalid question name XPath'}, + } + except NotSupportedFormatException: + return { + 'status': 'failed', + 'error': 'Unsupported format' + } + try: - flac_content, duration = self.get_converted_audio( + value = self.handle_google_operation( xpath, - self.submission.submission_uuid, - self.user, - ) - value = self.transcribe_file( - xpath, region_or_language_code, (flac_content, duration) + source_lang=language, + target_lang=None, + content=converted_audio, ) except SubsequenceTimeoutError: logging.error( @@ -146,36 +185,19 @@ def process_data(self, xpath: str, vals: dict) -> dict: ) return { 'status': 'in_progress', - 'languageCode': language_code, - 'regionCode': region_code, } - except (TranscriptionResultsNotFound, InvalidArgument) as e: + except InvalidArgument as e: logging.error(f'No transcriptions found for xpath={xpath}') return { - 'status': 'error', - 'value': None, - 'responseJSON': { - 'error': f'Transcription failed with error {e}' - }, + 'status': 'failed', + 'error': f'Transcription failed with error {str(e)}' } return { 'status': 'complete', 'value': value, - 'languageCode': language_code, - 'regionCode': region_code, } - def transcribe_file( - self, xpath: str, source_lang: str, content: tuple[object, int] - ) -> str: - """ - Transcribe file with cache layer around Google operations - When speech api times out, rerun function with same params - to check if operation is finished and return results - """ - return self.handle_google_operation(xpath, source_lang, None, content) - def store_file(self, content): """ Store temporary file. Needed to avoid limits. diff --git a/kobo/apps/subsequences__old/integrations/google/google_translate.py b/kobo/apps/subsequences/integrations/google/google_translate.py similarity index 99% rename from kobo/apps/subsequences__old/integrations/google/google_translate.py rename to kobo/apps/subsequences/integrations/google/google_translate.py index 09468d97ef..41415b9d04 100644 --- a/kobo/apps/subsequences__old/integrations/google/google_translate.py +++ b/kobo/apps/subsequences/integrations/google/google_translate.py @@ -172,7 +172,7 @@ def get_unique_paths( ) return source_path, output_path - def process_data(self, xpath: str, vals: dict) -> dict: + def process_data(self, xpath: str, params: dict) -> dict: """ Translates the value for a given xpath and its json values. """ diff --git a/kobo/apps/subsequences/integrations/utils/__init__.py b/kobo/apps/subsequences/integrations/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/subsequences/integrations/utils/cache.py b/kobo/apps/subsequences/integrations/utils/cache.py new file mode 100644 index 0000000000..407a8edd81 --- /dev/null +++ b/kobo/apps/subsequences/integrations/utils/cache.py @@ -0,0 +1,15 @@ +from ...constants import SUBSEQUENCES_ASYNC_CACHE_KEY + +# TODO REMOVE ME, I'm not used anymore +def generate_cache_key( + user_id: int, + submission_uuid: str, + xpath: str, + source_lang: str, + target_lang: str +) -> str: + """ + Make a cache key from the parameters for NLP + """ + args = [user_id, submission_uuid, xpath, source_lang, target_lang] + return '-'.join(map(str, [SUBSEQUENCES_ASYNC_CACHE_KEY, *args])) diff --git a/kobo/apps/subsequences__old/integrations/google/utils.py b/kobo/apps/subsequences/integrations/utils/google.py similarity index 100% rename from kobo/apps/subsequences__old/integrations/google/utils.py rename to kobo/apps/subsequences/integrations/utils/google.py diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index d7854d8b58..d3c93c3f11 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -1,3 +1,5 @@ +from copy import deepcopy + from django.db import models from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix @@ -81,10 +83,35 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di question_supplemental_data = supplemental_data.setdefault( question_xpath, {} ) - action_supplemental_data = question_supplemental_data.setdefault( - action_id, action.lookup_config.default_type + action_id, action.action_class_config.default_type ) + + # If the action is automatic, run the external process first. + # If status is still "in_progress", just return the current supplemental + # data with an updated version (job not finished). + # Otherwise, merge the service response into action_data. + # + # In all cases, call `revise_data` afterwards for final validation + # and to produce the updated supplemental data. + if action.action_class_config.automatic: + service_response = action.run_automatic_process( + submission, + action_supplemental_data, + action_data, + asset=asset, + ) + if ( + action_data.get('status') + == action_supplemental_data.get('status') + == 'in_progress' + ): + supplemental_data['_version'] = schema_version + return supplemental_data + else: + action_data = deepcopy(action_data) + action_data.update(service_response) + action_supplemental_data = action.revise_data( submission, action_supplemental_data, action_data ) diff --git a/kobo/apps/subsequences/tests/test_manual_transcription.py b/kobo/apps/subsequences/tests/test_manual_transcription.py index 95964b349d..1bd0e859a7 100644 --- a/kobo/apps/subsequences/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences/tests/test_manual_transcription.py @@ -58,7 +58,7 @@ def test_valid_result_passes_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = action.lookup_config.default_type + mock_sup_det = action.action_class_config.default_type for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) action.validate_result(mock_sup_det) @@ -74,7 +74,7 @@ def test_invalid_result_fails_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = action.lookup_config.default_type + mock_sup_det = action.action_class_config.default_type for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) @@ -95,7 +95,7 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): first = {'language': 'en', 'value': 'No idea'} second = {'language': 'fr', 'value': 'Aucune idée'} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, action.lookup_config.default_type, first + EMPTY_SUBMISSION, action.action_class_config.default_type, first ) assert mock_sup_det['language'] == 'en' @@ -139,7 +139,7 @@ def test_setting_transcript_to_empty_string(): second = {'language': 'fr', 'value': ''} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, action.lookup_config.default_type, first + EMPTY_SUBMISSION, action.action_class_config.default_type, first ) assert mock_sup_det['value'] == 'Aucune idée' @@ -157,7 +157,7 @@ def test_setting_transcript_to_none(): second = {'language': 'fr', 'value': None} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, action.lookup_config.default_type, first + EMPTY_SUBMISSION, action.action_class_config.default_type, first ) assert mock_sup_det['value'] == 'Aucune idée' @@ -175,7 +175,7 @@ def test_latest_revision_is_first(): second = {'language': 'fr', 'value': 'deux'} third = {'language': 'fr', 'value': 'trois'} - mock_sup_det = action.lookup_config.default_type + mock_sup_det = action.action_class_config.default_type for data in first, second, third: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) diff --git a/kobo/apps/subsequences/tests/test_manual_translation.py b/kobo/apps/subsequences/tests/test_manual_translation.py index 7cccae9e98..19965f2900 100644 --- a/kobo/apps/subsequences/tests/test_manual_translation.py +++ b/kobo/apps/subsequences/tests/test_manual_translation.py @@ -60,7 +60,7 @@ def test_valid_result_passes_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = action.lookup_config.default_type + mock_sup_det = action.action_class_config.default_type for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) action.validate_result(mock_sup_det) @@ -76,7 +76,7 @@ def test_invalid_result_fails_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = action.lookup_config.default_type + mock_sup_det = action.action_class_config.default_type for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) @@ -96,7 +96,7 @@ def test_translation_revisions_are_retained_in_supplemental_details(): first = {'language': 'en', 'value': 'No idea'} second = {'language': 'fr', 'value': 'Aucune idée'} third = {'language': 'en', 'value': 'No clue'} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.lookup_config.default_type, first) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.action_class_config.default_type, first) assert len(mock_sup_det) == 1 assert mock_sup_det[0]['language'] == 'en' @@ -145,7 +145,7 @@ def test_setting_translation_to_empty_string(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': ''} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.lookup_config.default_type, first) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.action_class_config.default_type, first) assert mock_sup_det[0]['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) @@ -161,7 +161,7 @@ def test_setting_translation_to_none(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': None} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.lookup_config.default_type, first) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.action_class_config.default_type, first) assert mock_sup_det[0]['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) @@ -178,7 +178,7 @@ def test_latest_revision_is_first(): second = {'language': 'fr', 'value': 'deux'} third = {'language': 'fr', 'value': 'trois'} - mock_sup_det = action.lookup_config.default_type + mock_sup_det = action.action_class_config.default_type for data in first, second, third: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) diff --git a/kobo/apps/subsequences__old/api_view.py b/kobo/apps/subsequences__old/api_view.py index 6db0d35401..cb95c885c7 100644 --- a/kobo/apps/subsequences__old/api_view.py +++ b/kobo/apps/subsequences__old/api_view.py @@ -103,7 +103,6 @@ def get(self, request, asset_uid, format=None): def post(self, request, asset_uid, format=None): posted_data = request.data - print('POSTED', posted_data, flush=True) schema = self.asset.get_advanced_submission_schema() try: validate(posted_data, schema) From 5e9700b454383d1204b358de93b2b23ddfa908ef Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Mon, 25 Aug 2025 18:36:02 -0400 Subject: [PATCH 078/138] Use Base class for language related actions --- kobo/apps/subsequences/actions/__init__.py | 2 - .../actions/automatic_google_transcription.py | 25 +---------- .../actions/automatic_transcription.py | 9 ---- kobo/apps/subsequences/actions/base.py | 40 +++++++++++++++++ .../actions/manual_transcription.py | 45 +++---------------- .../actions/manual_translation.py | 42 ++--------------- 6 files changed, 51 insertions(+), 112 deletions(-) delete mode 100644 kobo/apps/subsequences/actions/automatic_transcription.py diff --git a/kobo/apps/subsequences/actions/__init__.py b/kobo/apps/subsequences/actions/__init__.py index 9860abe206..7e41e57aa1 100644 --- a/kobo/apps/subsequences/actions/__init__.py +++ b/kobo/apps/subsequences/actions/__init__.py @@ -1,12 +1,10 @@ from .automatic_google_transcription import AutomaticGoogleTranscriptionAction -from .automatic_transcription import AutomaticTranscriptionAction from .manual_transcription import ManualTranscriptionAction from .manual_translation import ManualTranslationAction # TODO, what about using a loader for every class in "actions" folder (except base.py)? ACTIONS = ( AutomaticGoogleTranscriptionAction, - AutomaticTranscriptionAction, ManualTranscriptionAction, ManualTranslationAction, ) diff --git a/kobo/apps/subsequences/actions/automatic_google_transcription.py b/kobo/apps/subsequences/actions/automatic_google_transcription.py index 456523dbd1..4545eb72b2 100644 --- a/kobo/apps/subsequences/actions/automatic_google_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_google_transcription.py @@ -1,26 +1,12 @@ from kobo.apps.organizations.constants import UsageType -from .base import BaseAction, ActionClassConfig +from .base import ActionClassConfig, BaseLanguageAction from ..integrations.google.google_transcribe import GoogleTranscriptionService -class AutomaticGoogleTranscriptionAction(BaseAction): +class AutomaticGoogleTranscriptionAction(BaseLanguageAction): ID = 'automatic_google_transcription' action_class_config = ActionClassConfig({}, None, True) - params_schema = { - 'type': 'array', - 'items': { - 'additionalProperties': False, - 'properties': { - 'language': { - 'type': 'string', - } - }, - 'required': ['language'], - 'type': 'object', - }, - } - @property def automated_data_schema(self) -> dict: """ @@ -122,13 +108,6 @@ def data_schema(self) -> dict: }, } - @property - def languages(self) -> list[str]: - languages = [] - for individual_params in self.params: - languages.append(individual_params['language']) - return languages - @property def result_schema(self): diff --git a/kobo/apps/subsequences/actions/automatic_transcription.py b/kobo/apps/subsequences/actions/automatic_transcription.py deleted file mode 100644 index 4fe8590d2e..0000000000 --- a/kobo/apps/subsequences/actions/automatic_transcription.py +++ /dev/null @@ -1,9 +0,0 @@ -from .manual_transcription import ManualTranscriptionAction - - -class AutomaticTranscriptionAction(ManualTranscriptionAction): - ID = 'automatic_transcription' - # this doesn't do shit except give me a way to test manual vs. automatic - # transcripts for the same response and see if i can get the logic right for - # arbitrating based on acceptance dates - pass \ No newline at end of file diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index f68168cc25..9781f5e395 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -357,3 +357,43 @@ def _is_usage_limited(self): def _limit_identifier(self): # See AutomaticGoogleTranscriptionAction._limit_identifier() for example raise NotImplementedError() + + +class BaseLanguageAction(BaseAction): + + """ + For an audio question called `my_audio_question` that's transcribed + into 3 languages, the schema for `Asset.advanced_features` might look + like: + 'my_audio_question': { + 'language_action_id': [ + {'language': 'ar'}, + {'language': 'bn'}, + {'language': 'es'}, + ], + } + + The `params_schema` attribute defines the shape of the array where each + element is an object with a single string property for the transcript + language. + """ + params_schema = { + 'type': 'array', + 'items': { + 'additionalProperties': False, + 'properties': { + 'language': { + 'type': 'string', + } + }, + 'required': ['language'], + 'type': 'object', + }, + } + + @property + def languages(self) -> list[str]: + languages = [] + for individual_params in self.params: + languages.append(individual_params['language']) + return languages diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 811d1fe8a1..192eb521e2 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -1,42 +1,12 @@ from typing import Any -from .base import BaseAction, ActionClassConfig +from .base import ActionClassConfig, BaseLanguageAction -class ManualTranscriptionAction(BaseAction): +class ManualTranscriptionAction(BaseLanguageAction): ID = 'manual_transcription' action_class_config = ActionClassConfig({}, None, False) - """ - For an audio question called `my_audio_question` that's transcribed - into 3 languages, the schema for `Asset.advanced_features` might look - like: - 'my_audio_question': { - 'manual_transcription': [ - {'language': 'ar'}, - {'language': 'bn'}, - {'language': 'es'}, - ], - } - - The `params_schema` attribute defines the shape of the array where each - element is an object with a single string property for the transcript - language. - """ - params_schema = { - 'type': 'array', - 'items': { - 'additionalProperties': False, - 'properties': { - 'language': { - 'type': 'string', - } - }, - 'required': ['language'], - 'type': 'object', - }, - } - def _get_output_field_name(self, language: str) -> str: language = language.split('-')[0] # ignore region if any return f"{self.source_question_xpath}/transcription__{language}" @@ -59,7 +29,7 @@ def transform_data_for_output(self, action_data: dict) -> dict[str, dict[str, An self._get_output_field_name(action_data['language']): { 'language': action_data['language'], 'value': action_data['value'], - "_dateAccepted": action_data[self.DATE_MODIFIED_FIELD], + self.DATE_ACCEPTED_FIELD: action_data[self.DATE_MODIFIED_FIELD], } } @@ -90,13 +60,6 @@ def data_schema(self): # for lack of a better name }, } - @property - def languages(self) -> list[str]: - languages = [] - for individual_params in self.params: - languages.append(individual_params['language']) - return languages - @property def result_schema(self): @@ -112,6 +75,7 @@ def result_schema(self): }, self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, }, 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], '$defs': { @@ -121,6 +85,7 @@ def result_schema(self): 'additionalProperties': False, 'properties': { self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, }, 'required': [self.DATE_CREATED_FIELD], }, diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index 3b01a222b8..7a4523f19f 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -1,39 +1,10 @@ -from .base import BaseAction, ActionClassConfig +from .base import ActionClassConfig, BaseLanguageAction -class ManualTranslationAction(BaseAction): +class ManualTranslationAction(BaseLanguageAction): ID = 'manual_translation' action_class_config = ActionClassConfig([], 'language', False) - """ - For an audio question called `my_audio_question` that's translated - into 3 languages, the schema for `Asset.advanced_features` might look - like: - 'my_audio_question': { - 'manual_translation': [ - {'language': 'fr'}, - {'language': 'es'}, - ], - } - - The `params_schema` attribute defines the shape of the array where each - element is an object with a single string property for the translation - language. - """ - params_schema = { - 'type': 'array', - 'items': { - 'additionalProperties': False, - 'properties': { - 'language': { - 'type': 'string', - } - }, - 'required': ['language'], - 'type': 'object', - }, - } - def _get_output_field_name(self, language: str) -> str: language = language.split('-')[0] # ignore region if any return f"{self.source_question_xpath}/translation__{language}" @@ -86,13 +57,6 @@ def data_schema(self): # for lack of a better name }, } - @property - def languages(self) -> list[str]: - languages = [] - for individual_params in self.params: - languages.append(individual_params['language']) - return languages - @property def result_schema(self): @@ -107,6 +71,7 @@ def result_schema(self): }, self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, }, 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], } @@ -129,6 +94,7 @@ def result_schema(self): 'additionalProperties': False, 'properties': { self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, }, 'required': [self.DATE_CREATED_FIELD], }, From 75e42b69e4c35155bf5d29f889d34ce2cd949125 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Mon, 25 Aug 2025 18:41:40 -0400 Subject: [PATCH 079/138] Make unit tests support dateAccepted --- kobo/apps/subsequences/actions/base.py | 3 +++ kobo/apps/subsequences/actions/manual_translation.py | 7 +++++-- kobo/apps/subsequences/tests/api/v2/test_permissions.py | 1 + kobo/apps/subsequences/tests/test_models.py | 5 +++++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 9781f5e395..f4d4b12e2c 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -149,6 +149,9 @@ def automated_data_schema(self): @property def data_schema(self): + """ + Schema to validate payload POSTed to "/api/v2/assets//data//supplemental" # noqa + """ raise NotImplementedError def get_output_fields(self) -> list[dict]: diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index 7a4523f19f..efe2bf7945 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -1,3 +1,5 @@ +from typing import Any + from .base import ActionClassConfig, BaseLanguageAction @@ -19,7 +21,9 @@ def get_output_fields(self): } for params in self.params ] - def transform_data_for_output(self, action_data: list[dict]) -> list[dict]: + def transform_data_for_output( + self, action_data: list[dict] + ) -> dict[str, dict[str, Any]]: # keep next to `get_output_fields()` for now return { self._get_output_field_name(translation_data['language']): { @@ -33,7 +37,6 @@ def transform_data_for_output(self, action_data: list[dict]) -> list[dict]: @property def data_schema(self): # for lack of a better name """ - POST to "/api/v2/assets//data//supplemental" { 'manual_translation': { 'language': 'es', diff --git a/kobo/apps/subsequences/tests/api/v2/test_permissions.py b/kobo/apps/subsequences/tests/api/v2/test_permissions.py index 2cd5f8c9b0..100a0a5205 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_permissions.py +++ b/kobo/apps/subsequences/tests/api/v2/test_permissions.py @@ -165,6 +165,7 @@ def test_can_write(self, username, shared, status_code): 'manual_transcription': { '_dateCreated': '2024-04-08T15:27:00Z', '_dateModified': '2024-04-08T15:27:00Z', + '_dateAccepted': '2024-04-08T15:27:00Z', 'language': 'es', 'value': 'buenas noches', }, diff --git a/kobo/apps/subsequences/tests/test_models.py b/kobo/apps/subsequences/tests/test_models.py index dd8537da97..12b805ac1b 100644 --- a/kobo/apps/subsequences/tests/test_models.py +++ b/kobo/apps/subsequences/tests/test_models.py @@ -37,11 +37,13 @@ class SubmissionSupplementTestCase(TestCase): 'value': 'فارغ', '_dateCreated': '2024-04-08T15:27:00Z', '_dateModified': '2024-04-08T15:31:00Z', + '_dateAccepted': '2024-04-08T15:31:00Z', '_revisions': [ { 'language': 'ar', 'value': 'هائج', '_dateCreated': '2024-04-08T15:27:00Z', + '_dateAccepted': '2024-04-08T15:27:00Z', } ], }, @@ -51,17 +53,20 @@ class SubmissionSupplementTestCase(TestCase): 'value': 'berserk', '_dateCreated': '2024-04-08T15:27:00Z', '_dateModified': '2024-04-08T15:27:00Z', + '_dateAccepted': '2024-04-08T15:27:00Z', }, { 'language': 'es', 'value': 'enloquecido', '_dateCreated': '2024-04-08T15:29:00Z', '_dateModified': '2024-04-08T15:32:00Z', + '_dateAccepted': '2024-04-08T15:32:00Z', '_revisions': [ { 'language': 'es', 'value': 'loco', '_dateCreated': '2024-04-08T15:29:00Z', + '_dateAccepted': '2024-04-08T15:29:00Z', } ], }, From 4e614703a0b6f452a70997db408a12a8e0ec3d57 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Tue, 26 Aug 2025 07:42:10 -0400 Subject: [PATCH 080/138] Add support for locale --- .../actions/automatic_google_transcription.py | 4 +++ kobo/apps/subsequences/actions/base.py | 33 +++++++++++++++++++ .../actions/manual_transcription.py | 27 --------------- .../actions/manual_translation.py | 26 --------------- .../integrations/google/google_transcribe.py | 15 ++------- 5 files changed, 40 insertions(+), 65 deletions(-) diff --git a/kobo/apps/subsequences/actions/automatic_google_transcription.py b/kobo/apps/subsequences/actions/automatic_google_transcription.py index 4545eb72b2..86ca64a294 100644 --- a/kobo/apps/subsequences/actions/automatic_google_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_google_transcription.py @@ -26,6 +26,7 @@ def automated_data_schema(self) -> dict: 'additionalProperties': False, 'properties': { 'language': {'$ref': '#/$defs/lang'}, + 'locale': {'$ref': '#/$defs/locale'}, 'status': {'$ref': '#/$defs/action_status'}, 'value': {'$ref': '#/$defs/value'}, 'error': {'$ref': '#/$defs/error'}, @@ -42,6 +43,7 @@ def automated_data_schema(self) -> dict: ], '$defs': { 'lang': {'type': 'string', 'enum': self.languages}, + 'locale': {'type': ['string', 'null']}, 'action_status': { 'type': 'string', 'enum': ['in_progress', 'complete', 'failed'], @@ -99,11 +101,13 @@ def data_schema(self) -> dict: 'additionalProperties': False, 'properties': { 'language': {'$ref': '#/$defs/lang'}, + 'locale': {'$ref': '#/$defs/locale'}, 'accepted': {'$ref': '#/$defs/accepted'}, }, 'required': ['language'], '$defs': { 'lang': {'type': 'string', 'enum': self.languages}, + 'locale': {'type': ['string', 'null']}, 'accepted': {'type': 'boolean'}, }, } diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index f4d4b12e2c..146d1fc9d6 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -234,6 +234,9 @@ def revise_data( if not isinstance(self.action_class_config.default_type, list): revision = submission_supplement_copy else: + # TODO: Multiple keys are not supported. + # Not a big issue for now since translation actions don’t use locale + # (yet?) and transcription actions only involve one occurrence at a time. needle = edit[self.action_class_config.key] revision = {} if not isinstance(submission_supplement, list): @@ -394,6 +397,36 @@ class BaseLanguageAction(BaseAction): }, } + @property + def data_schema(self): + """ + POST to "/api/v2/assets//data//supplemental/" + { + 'language_action_id': { + 'language': 'es', + 'locale': 'es-ES', + 'value': 'Almorzamos muy bien hoy', + } + } + """ + + return { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'language': {'$ref': '#/$defs/lang'}, + 'locale': {'$ref': '#/$defs/locale'}, + 'value': {'$ref': '#/$defs/value'}, + }, + 'required': ['language', 'value'], + '$defs': { + 'lang': {'type': 'string', 'enum': self.languages}, + 'value': {'type': ['string', 'null']}, + 'locale': {'type': ['string', 'null']}, + }, + } + @property def languages(self) -> list[str]: languages = [] diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 192eb521e2..3bfacaa0a3 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -33,33 +33,6 @@ def transform_data_for_output(self, action_data: dict) -> dict[str, dict[str, An } } - @property - def data_schema(self): # for lack of a better name - """ - POST to "/api/v2/assets//data//supplemental" - { - 'manual_transcription': { - 'language': 'es', - 'value': 'Almorzamos muy bien hoy', - } - } - """ - - return { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - 'language': {'$ref': '#/$defs/lang'}, - 'value': {'$ref': '#/$defs/value'}, - }, - 'required': ['language', 'value'], - '$defs': { - 'lang': {'type': 'string', 'enum': self.languages}, - 'value': {'type': ['string', 'null']}, - }, - } - @property def result_schema(self): diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index efe2bf7945..4feafc187c 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -34,32 +34,6 @@ def transform_data_for_output( for translation_data in action_data } - @property - def data_schema(self): # for lack of a better name - """ - { - 'manual_translation': { - 'language': 'es', - 'value': 'Almorzamos muy bien hoy', - } - } - """ - - return { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - 'language': {'$ref': '#/$defs/lang'}, - 'value': {'$ref': '#/$defs/value'}, - }, - 'required': ['language', 'value'], - '$defs': { - 'lang': {'type': 'string', 'enum': self.languages}, - 'value': {'type': ['string', 'null']}, - }, - } - @property def result_schema(self): diff --git a/kobo/apps/subsequences/integrations/google/google_transcribe.py b/kobo/apps/subsequences/integrations/google/google_transcribe.py index 22adb79d73..56b6f725f8 100644 --- a/kobo/apps/subsequences/integrations/google/google_transcribe.py +++ b/kobo/apps/subsequences/integrations/google/google_transcribe.py @@ -130,17 +130,8 @@ def get_converted_audio( return attachment.get_transcoded_audio('flac', include_duration=True) def process_data(self, xpath: str, params: dict) -> dict: - # params.get('status') #language_code = autoparams.get('languageCode') - #region_code = autoparams.get('regionCode') - #vals[GOOGLETS] = { - # 'status': 'in_progress', - # 'language': language_code, - # 'regionCode': region_code, - #} - #region_or_language_code = region_code or language_code - language = params['language'] - - cache_key = self._get_cache_key(xpath, language, target_lang=None) + source_language = params.get('locale') or params['language'] + cache_key = self._get_cache_key(xpath, source_language, target_lang=None) if cache.get(cache_key): # Operation is still in progress, no need to process the audio file converted_audio = None @@ -175,7 +166,7 @@ def process_data(self, xpath: str, params: dict) -> dict: try: value = self.handle_google_operation( xpath, - source_lang=language, + source_lang=source_language, target_lang=None, content=converted_audio, ) From f567a4fdc9e8cf849b7fe180bb0fd61c804adb10 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Tue, 26 Aug 2025 13:28:36 -0400 Subject: [PATCH 081/138] Improved process flow for automated actions --- .../actions/automatic_google_transcription.py | 58 ++++++++++--------- kobo/apps/subsequences/actions/base.py | 58 +++++++++++++------ kobo/apps/subsequences/models.py | 32 ++-------- 3 files changed, 77 insertions(+), 71 deletions(-) diff --git a/kobo/apps/subsequences/actions/automatic_google_transcription.py b/kobo/apps/subsequences/actions/automatic_google_transcription.py index 86ca64a294..12f56922e2 100644 --- a/kobo/apps/subsequences/actions/automatic_google_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_google_transcription.py @@ -161,54 +161,58 @@ def result_schema(self): } return schema - def run_automatic_process( + @property + def _limit_identifier(self): + return UsageType.ASR_SECONDS + + def _run_automatic_process( self, submission: dict, submission_supplement: dict, action_data: dict, *args, - **kwargs, - ) -> dict: + ** kwargs, + ) -> dict | None: """ Run the automatic transcription process using the Google API. - This method validates and processes the incoming `action_data` before it is - passed to `revise_data()`. If the payload indicates that the user accepts the - last completed transcription, the method returns early with the accepted data. - Otherwise, it triggers the external Google transcription service and returns - the processed result. - - Returns: - dict: Processed transcription data, ready to be merged and validated by - `revise_data()`. + This method is intended to be called by `revise_data()`, which will finalize + the validation and merging of `action_data`. If the user explicitly accepts + the last completed transcription, the method short-circuits and returns it + immediately. If the transcription request is still in progress, the method + returns None so that `revise_data()` can exit early and skip unnecessary + processing. Otherwise, it calls the Google API and returns the processed + result, ready to be passed back to `revise_data()`. """ - # Validate `action_data` against schema rules before further processing. - # `revise_data()` will perform the final validation once merged with the - # supplement returned by this method. - self.validate_data(action_data) - self.raise_for_any_leading_underscore_key(action_data) - - # If the client explicitly provided "accepted", it means they only want to - # accept the last completed transcription. In this case, return immediately. - # `revise_data()` will handle merging and final validation of the acceptance. + # If the client sent "accepted" while the supplement is already complete, + # return the completed transcription right away. `revise_data()` will handle + # the merge and final validation of this acceptance. accepted = action_data.get('accepted', None) if ( - submission_supplement.get('status') == 'complete' - and accepted is not None + submission_supplement.get('status') == 'complete' + and accepted is not None ): return { 'value': submission_supplement['value'], 'status': 'complete', } - # Otherwise, call the Google transcription service to process the input data. + # Otherwise, trigger the external Google transcription service. service = GoogleTranscriptionService(submission, asset=kwargs['asset']) service_data = service.process_data( self.source_question_xpath, action_data ) - return service_data + # If the transcription request is still running, stop processing here. + # Returning None ensures that `revise_data()` will not be called afterwards. + if ( + accepted is None + and submission_supplement['status'] + == service_data['status'] + == 'in_progress' + ): + return None - def _limit_identifier(self): - return UsageType.ASR_SECONDS + # Normal case: return the processed transcription data. + return service_data diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 146d1fc9d6..a38a0be179 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -210,24 +210,41 @@ def revise_field(self, *args, **kwargs): return self.revise_data(*args, **kwargs) def revise_data( - self, submission: dict, submission_supplement: dict, edit: dict - ) -> dict: + self, + submission: dict, + submission_supplement: dict, + action_data: dict, + asset: 'kpi.models.Asset' = None, + ) -> dict | None: """ `submission` argument for future use by subclasses this method might need to be made more friendly for overriding """ - # Validate differently when automatic process ran, to allow internal fields - # but block them from user input. + self.validate_data(action_data) + self.raise_for_any_leading_underscore_key(action_data) + if self.action_class_config.automatic: - self.validate_automated_data(edit) - accepted = edit.pop('accepted', None) + # If the action is automatic, run the external process first. + if not (service_response := self.run_automatic_process( + submission, + submission_supplement, + action_data, + asset=asset, + )): + # If the service response is None, the automatic task is still running. + # Stop here to avoid processing data and creating redundant revisions. + return None + + # Otherwise, merge the service response into action_data and keep going + # the validation process. + action_data = deepcopy(action_data) + action_data.update(service_response) + self.validate_automated_data(action_data) + accepted = action_data.pop('accepted', None) else: - self.validate_data(edit) accepted = True - self.raise_for_any_leading_underscore_key(edit) - now_str = utc_datetime_to_js_str(timezone.now()) item_index = None submission_supplement_copy = deepcopy(submission_supplement) @@ -237,7 +254,7 @@ def revise_data( # TODO: Multiple keys are not supported. # Not a big issue for now since translation actions don’t use locale # (yet?) and transcription actions only involve one occurrence at a time. - needle = edit[self.action_class_config.key] + needle = action_data[self.action_class_config.key] revision = {} if not isinstance(submission_supplement, list): raise InvalidItem @@ -248,7 +265,7 @@ def revise_data( item_index = idx break - new_record = deepcopy(edit) + new_record = deepcopy(action_data) revisions = revision.pop(self.REVISIONS_FIELD, []) revision_creation_date = revision.pop(self.DATE_MODIFIED_FIELD, now_str) @@ -319,12 +336,6 @@ def raise_for_any_leading_underscore_key(d: dict): if match: raise Exception('An unexpected key with a leading underscore was found') - def run_automatic_process(self, submission: dict, submission_supplement: dict, edit: dict, *args, **kwargs): - """ - Update edit with automatic process - """ - raise NotImplementedError - def _inject_data_schema(self, destination_schema: dict, skipped_keys: list): """ Utility function to inject data schema into another schema to @@ -364,6 +375,19 @@ def _limit_identifier(self): # See AutomaticGoogleTranscriptionAction._limit_identifier() for example raise NotImplementedError() + def _run_automatic_process( + self, + submission: dict, + submission_supplement: dict, + action_data: dict, + *args, + **kwargs, + ) -> dict | bool: + """ + Update action_data with automatic process + """ + raise NotImplementedError + class BaseLanguageAction(BaseAction): diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index d3c93c3f11..7322ec32da 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -87,34 +87,12 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di action_id, action.action_class_config.default_type ) - # If the action is automatic, run the external process first. - # If status is still "in_progress", just return the current supplemental - # data with an updated version (job not finished). - # Otherwise, merge the service response into action_data. - # - # In all cases, call `revise_data` afterwards for final validation - # and to produce the updated supplemental data. - if action.action_class_config.automatic: - service_response = action.run_automatic_process( - submission, - action_supplemental_data, - action_data, - asset=asset, - ) - if ( - action_data.get('status') - == action_supplemental_data.get('status') - == 'in_progress' - ): - supplemental_data['_version'] = schema_version - return supplemental_data - else: - action_data = deepcopy(action_data) - action_data.update(service_response) - - action_supplemental_data = action.revise_data( + if not (action_supplemental_data := action.revise_data( submission, action_supplemental_data, action_data - ) + )): + supplemental_data['_version'] = schema_version + return supplemental_data + question_supplemental_data[action_id] = action_supplemental_data retrieved_supplemental_data.setdefault(question_xpath, {})[ action_id From ad18e8d35977f73484ac4f231f17fcbc1cd465d6 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Tue, 26 Aug 2025 15:27:22 -0400 Subject: [PATCH 082/138] Make automatic process an intern call in revise_data --- .../actions/automatic_google_transcription.py | 89 +++++++++++++------ kobo/apps/subsequences/actions/base.py | 2 +- .../subsequences/integrations/google/base.py | 5 +- kpi/views/v2/data.py | 20 ++--- 4 files changed, 76 insertions(+), 40 deletions(-) diff --git a/kobo/apps/subsequences/actions/automatic_google_transcription.py b/kobo/apps/subsequences/actions/automatic_google_transcription.py index 12f56922e2..8c30002bac 100644 --- a/kobo/apps/subsequences/actions/automatic_google_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_google_transcription.py @@ -34,11 +34,15 @@ def automated_data_schema(self) -> dict: }, 'required': ['language', 'status'], 'allOf': [ - # value must be present iff status == "complete" - {'$ref': '#/$defs/rule_value_presence_when_complete'}, + # value is required when status == "complete" + {'$ref': '#/$defs/rule_value_required_when_complete'}, + # value must be absent when status in {"in_progress","failed"} + {'$ref': '#/$defs/rule_value_forbidden_when_in_progress_or_failed'}, + # value is optional but must be null when status == "deleted" + {'$ref': '#/$defs/rule_value_null_only_when_deleted'}, # error must be present iff status == "failed" {'$ref': '#/$defs/rule_error_presence_when_failed'}, - # accepted must be present iff status == "complete" + # accepted allowed only when status == "complete" (optional) {'$ref': '#/$defs/rule_accepted_only_when_complete'}, ], '$defs': { @@ -46,23 +50,50 @@ def automated_data_schema(self) -> dict: 'locale': {'type': ['string', 'null']}, 'action_status': { 'type': 'string', - 'enum': ['in_progress', 'complete', 'failed'], + 'enum': ['in_progress', 'complete', 'failed', 'deleted'], }, - 'value': {'type': 'string'}, + 'value': {'type': ['string', 'null']}, 'error': {'type': 'string'}, 'accepted': {'type': 'boolean'}, - # If status == "complete" → require "value"; else "value" must be absent - 'rule_value_presence_when_complete': { + # --- Value rules --- + # If status == "complete" → require "value" (string or null) + 'rule_value_required_when_complete': { 'if': { 'required': ['status'], 'properties': {'status': {'const': 'complete'}}, }, 'then': {'required': ['value']}, - 'else': {'not': {'required': ['value']}}, + }, + # If status in {"in_progress","failed"} → forbid "value" + 'rule_value_forbidden_when_in_progress_or_failed': { + 'if': { + 'required': ['status'], + 'properties': { + 'status': {'enum': ['in_progress', 'failed']} + }, + }, + 'then': {'not': {'required': ['value']}}, + }, + # If status == "deleted" → "value" optional, but if present it MUST be null + 'rule_value_null_only_when_deleted': { + 'if': { + 'required': ['status'], + 'properties': {'status': {'const': 'deleted'}}, + }, + 'then': { + 'anyOf': [ + {'not': {'required': ['value']}}, # value absent + { # value present and null + 'properties': {'value': {'type': 'null'}}, + 'required': ['value'], + }, + ] + }, }, - # If status == "failed" → require "error"; else "error" must be absent + # --- Other field rules --- + # If status == "failed" → require "error"; else forbid it 'rule_error_presence_when_failed': { 'if': { 'required': ['status'], @@ -71,17 +102,15 @@ def automated_data_schema(self) -> dict: 'then': {'required': ['error']}, 'else': {'not': {'required': ['error']}}, }, - - # If status == "complete" → accepted is allowed but optional - # Else → accepted must not be present + # If status == "complete" → accepted allowed but optional; else forbid it 'rule_accepted_only_when_complete': { 'if': { 'required': ['status'], 'properties': {'status': {'const': 'complete'}}, }, - 'then': {}, # no requirement: accepted may be present or absent + 'then': {}, # optional 'else': {'not': {'required': ['accepted']}}, - } + }, }, } @@ -90,10 +119,13 @@ def data_schema(self) -> dict: """ Schema rules: - - The field `status` is always required and must be one of: - ["requested", "in_progress"]. - - `value` should not be present - - No additional properties are allowed beyond `language`, `status`. + - `language` is required. + - `value` is optional but, if present, it MUST be `null` (no other type allowed). + - `accepted` is optional. + - Mutual exclusion: `accepted` and `value` cannot be present at the same time. + * If `value` is present (and thus equals null), `accepted` must be absent. + * If `accepted` is present, `value` must be absent. + - No additional properties are allowed beyond: `language`, `locale`, `value`, `accepted`. """ return { '$schema': 'https://json-schema.org/draft/2020-12/schema', @@ -102,13 +134,20 @@ def data_schema(self) -> dict: 'properties': { 'language': {'$ref': '#/$defs/lang'}, 'locale': {'$ref': '#/$defs/locale'}, + 'value': {'$ref': '#/$defs/value_null_only'}, 'accepted': {'$ref': '#/$defs/accepted'}, }, 'required': ['language'], + 'allOf': [ + # Forbid having both `accepted` and `value` at the same time + {'not': {'required': ['accepted', 'value']}}, + ], '$defs': { 'lang': {'type': 'string', 'enum': self.languages}, 'locale': {'type': ['string', 'null']}, 'accepted': {'type': 'boolean'}, + # Only null is permitted for `value` + 'value_null_only': {'type': 'null'}, }, } @@ -171,7 +210,7 @@ def _run_automatic_process( submission_supplement: dict, action_data: dict, *args, - ** kwargs, + **kwargs, ) -> dict | None: """ Run the automatic transcription process using the Google API. @@ -190,8 +229,8 @@ def _run_automatic_process( # the merge and final validation of this acceptance. accepted = action_data.get('accepted', None) if ( - submission_supplement.get('status') == 'complete' - and accepted is not None + submission_supplement.get('status') == 'complete' + and accepted is not None ): return { 'value': submission_supplement['value'], @@ -207,10 +246,10 @@ def _run_automatic_process( # If the transcription request is still running, stop processing here. # Returning None ensures that `revise_data()` will not be called afterwards. if ( - accepted is None - and submission_supplement['status'] - == service_data['status'] - == 'in_progress' + accepted is None + and submission_supplement['status'] + == service_data['status'] + == 'in_progress' ): return None diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index a38a0be179..8e54a893f0 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -226,7 +226,7 @@ def revise_data( if self.action_class_config.automatic: # If the action is automatic, run the external process first. - if not (service_response := self.run_automatic_process( + if not (service_response := self._run_automatic_process( submission, submission_supplement, action_data, diff --git a/kobo/apps/subsequences/integrations/google/base.py b/kobo/apps/subsequences/integrations/google/base.py index 218dfe7b1f..c2202fc86c 100644 --- a/kobo/apps/subsequences/integrations/google/base.py +++ b/kobo/apps/subsequences/integrations/google/base.py @@ -132,12 +132,11 @@ def update_counters(self, amount) -> None: def _get_cache_key(self, xpath: str, source_lang: str, target_lang: str | None) -> str: submission_root_uuid = self.submission[SUBMISSION_UUID_FIELD] - action = 'transcribe' if target_lang is None else 'translate' - args = [self.asset.owner_id, submission_root_uuid, xpath, source_lang] + args = [self.asset.owner_id, submission_root_uuid, xpath, source_lang.lower()] if target_lang is None: args.insert(0, 'transcribe') else: args.insert(0, 'translate') - args.append(target_lang) + args.append(target_lang.lower()) return '::'.join(map(str, [SUBSEQUENCES_ASYNC_CACHE_KEY, *args])) diff --git a/kpi/views/v2/data.py b/kpi/views/v2/data.py index 6f71acd792..26f4ddc691 100644 --- a/kpi/views/v2/data.py +++ b/kpi/views/v2/data.py @@ -4,6 +4,7 @@ from typing import Union import requests +import jsonschema from django.conf import settings from django.http import Http404, HttpResponseRedirect from django.utils.translation import gettext_lazy as t @@ -25,6 +26,7 @@ remove_uuid_prefix, ) from kobo.apps.openrosa.libs.utils.logger_tools import http_open_rosa_error_handler +from kobo.apps.subsequences.models import SubmissionSupplement from kpi.authentication import EnketoSessionAuthentication from kpi.constants import ( PERM_CHANGE_SUBMISSIONS, @@ -49,6 +51,7 @@ SubmissionValidationStatusPermission, ViewSubmissionPermission, ) +from kobo.apps.subsequences.exceptions import InvalidAction, InvalidXPath from kpi.renderers import SubmissionGeoJsonRenderer, SubmissionXMLRenderer from kpi.schema_extensions.v2.data.serializers import ( DataBulkDelete, @@ -508,15 +511,6 @@ def supplement(self, request, submission_id_or_root_uuid: str, *args, **kwargs): # make it clear, a root uuid is expected here submission_root_uuid = submission_id_or_root_uuid - ### TO BE MOVED - from kobo.apps.subsequences.exceptions import InvalidAction, InvalidXPath - from kobo.apps.subsequences.router import ( - handle_incoming_data, - retrieve_supplemental_data, - ) - - ### END TO BE MOVED - deployment = self._get_deployment() try: submission = list( @@ -532,17 +526,21 @@ def supplement(self, request, submission_id_or_root_uuid: str, *args, **kwargs): if request.method == 'GET': return Response( - retrieve_supplemental_data(self.asset, submission_root_uuid) + SubmissionSupplement.retrieve_data(self.asset, submission_root_uuid) ) post_data = request.data try: - supplemental_data = handle_incoming_data(self.asset, submission, post_data) + supplemental_data = SubmissionSupplement.revise_data( + self.asset, submission, post_data + ) except InvalidAction: raise serializers.ValidationError({'detail': 'Invalid action'}) except InvalidXPath: raise serializers.ValidationError({'detail': 'Invalid question name'}) + except jsonschema.exceptions.ValidationError: + raise serializers.ValidationError({'detail': 'Invalid payload'}) return Response(supplemental_data) From 3b1122dc2887c2a725cc8ba702e01ee7096e856e Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Tue, 26 Aug 2025 15:28:15 -0400 Subject: [PATCH 083/138] Unit tests for automatic Google transcription --- .../actions/automatic_google_transcription.py | 9 +- .../subsequences/tests/api/v2/test_actions.py | 87 +++++ .../test_automatic_google_transcription.py | 351 ++++++++++++++++++ .../tests/test_manual_transcription.py | 5 +- .../tests/test_manual_translation.py | 6 +- 5 files changed, 452 insertions(+), 6 deletions(-) create mode 100644 kobo/apps/subsequences/tests/test_automatic_google_transcription.py diff --git a/kobo/apps/subsequences/actions/automatic_google_transcription.py b/kobo/apps/subsequences/actions/automatic_google_transcription.py index 8c30002bac..d6c6d53584 100644 --- a/kobo/apps/subsequences/actions/automatic_google_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_google_transcription.py @@ -237,6 +237,13 @@ def _run_automatic_process( 'status': 'complete', } + # TBC + if 'value' in action_data: + return { + 'value': action_data['value'], + 'status': 'deleted', + } + # Otherwise, trigger the external Google transcription service. service = GoogleTranscriptionService(submission, asset=kwargs['asset']) service_data = service.process_data( @@ -247,7 +254,7 @@ def _run_automatic_process( # Returning None ensures that `revise_data()` will not be called afterwards. if ( accepted is None - and submission_supplement['status'] + and submission_supplement.get('status') == service_data['status'] == 'in_progress' ): diff --git a/kobo/apps/subsequences/tests/api/v2/test_actions.py b/kobo/apps/subsequences/tests/api/v2/test_actions.py index 569cb61937..01d98df2ef 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_actions.py +++ b/kobo/apps/subsequences/tests/api/v2/test_actions.py @@ -1,5 +1,8 @@ +from unittest.mock import patch, MagicMock + from rest_framework import status +from kobo.apps.subsequences.models import SubmissionSupplement from kobo.apps.subsequences.tests.api.v2.base import SubsequenceBaseTestCase @@ -72,3 +75,87 @@ def test_cannot_patch_with_invalid_payload(self): assert response.status_code == status.HTTP_400_BAD_REQUEST assert 'Invalid action' in str(response.data) + + def test_automatic_google_transcription_forbidden_payload(self): + # First, set up the asset to allow automatic google transcription + self.set_asset_advanced_features({ + '_version': '20250820', + '_actionConfigs': { + 'q1': { + 'automatic_google_transcription': [ + {'language': 'es'}, + ] + } + }, + }) + + payload = { + '_version': '20250820', + 'q1': { + 'automatic_google_transcription': { + 'language': 'es', + 'value': 'some text', # forbidden field + } + }, + } + response = self.client.patch( + self.supplement_details_url, data=payload, format='json' + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert 'Invalid payload' in str(response.data) + + def test_cannot_accept_incomplete_automatic_translation(self): + # Set up the asset to allow automatic google transcription + self.set_asset_advanced_features({ + '_version': '20250820', + '_actionConfigs': { + 'q1': { + 'automatic_google_transcription': [ + {'language': 'es'}, + ] + } + }, + }) + + # Simulate in progress translation + mock_submission_supplement = { + "_version": "20250820", + "q1": { + "automatic_google_transcription": { + "status": "in_progress", + "language": "es", + "_dateCreated": "2025-08-25T21:17:35.535710Z", + "_dateModified": "2025-08-26T11:41:21.917338Z", + } + }, + } + SubmissionSupplement.objects.create( + submission_uuid=self.submission_uuid, + content=mock_submission_supplement, + asset=self.asset, + ) + + # Try to set 'accepted' status when translation is not complete + payload = { + '_version': '20250820', + 'q1': { + 'automatic_google_transcription': { + 'language': 'es', + 'accepted': True, + } + }, + } + + # Mock GoogleTranscriptionService + mock_service = MagicMock() + mock_service.process_data.return_value = {'status': 'in_progress'} + + with patch( + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa + return_value=mock_service + ): + response = self.client.patch( + self.supplement_details_url, data=payload, format='json' + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert 'Invalid payload' in str(response.data) diff --git a/kobo/apps/subsequences/tests/test_automatic_google_transcription.py b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py new file mode 100644 index 0000000000..eaf239d13e --- /dev/null +++ b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py @@ -0,0 +1,351 @@ +from unittest.mock import MagicMock, patch + +import dateutil +import jsonschema +import pytest + +from ..actions.automatic_google_transcription import AutomaticGoogleTranscriptionAction +from .constants import EMPTY_SUBMISSION + + +def test_valid_params_pass_validation(): + params = [{'language': 'fr'}, {'language': 'es'}] + AutomaticGoogleTranscriptionAction.validate_params(params) + + +def test_invalid_params_fail_validation(): + params = [{'language': 123}, {'language': 'es'}] + with pytest.raises(jsonschema.exceptions.ValidationError): + AutomaticGoogleTranscriptionAction.validate_params(params) + + +def test_valid_user_data_passes_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + + action = AutomaticGoogleTranscriptionAction(xpath, params) + + allowed_data = [ + # Trivial case + {'language': 'fr'}, + # Transcription with locale + {'language': 'fr', 'locale': 'fr-CA'}, + # Delete transcript + {'language': 'fr', 'value': None}, + # Delete transcript with locale + {'language': 'fr', 'locale': 'fr-CA', 'value': None}, + # Accept translation + {'language': 'fr', 'accepted': True}, + # Accept translation with locale + {'language': 'fr', 'locale': 'fr-CA', 'accepted': True}, + ] + + for data in allowed_data: + action.validate_data(data) + + +def test_valid_automated_translation_data_passes_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + + action = AutomaticGoogleTranscriptionAction(xpath, params) + + allowed_data = [ + # Trivial case + {'language': 'fr', 'value': 'Aucune idée', 'status': 'complete'}, + { + 'language': 'fr', + 'locale': 'fr-FR', + 'value': 'Aucune idée', + 'status': 'complete', + }, + # Delete transcript + {'language': 'fr', 'value': None, 'status': 'deleted'}, + {'language': 'fr', 'locale': 'fr-CA', 'value': None, 'status': 'deleted'}, + # Action in progress no value + {'language': 'es', 'status': 'in_progress'}, + {'language': 'es', 'locale': 'fr-CA', 'status': 'in_progress'}, + # Store error with status + {'language': 'es', 'status': 'failed', 'error': 'Translation failed'}, + { + 'language': 'es', + 'locale': 'fr-CA', + 'status': 'failed', + 'error': 'Translation failed', + }, + ] + + for data in allowed_data: + action.validate_automated_data(data) + + +def test_invalid_user_data_fails_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + action = AutomaticGoogleTranscriptionAction(xpath, params) + + invalid_data = [ + # Wrong language + {'language': 'en'}, + # Empty data + {}, + # Cannot push a translation + {'language': 'fr', 'value': 'Aucune idée'}, + # Cannot push a translation + {'language': 'fr', 'value': 'Aucune idée', 'status': 'complete'}, + # Cannot push a translation + {'language': 'fr', 'value': 'Aucune idée', 'status': 'in_progress'}, + # Cannot push a translation + {'language': 'fr', 'value': 'Aucune idée', 'status': 'failed'}, + # Cannot push a status + {'language': 'fr', 'status': 'in_progress'}, + # Cannot pass value and accepted at the same time + {'language': 'fr', 'value': None, 'accepted': False} + ] + + for data in invalid_data: + with pytest.raises(jsonschema.exceptions.ValidationError): + action.validate_data(data) + + +def test_invalid_automated_data_fails_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = AutomaticGoogleTranscriptionAction(xpath, params) + + invalid_data = [ + # Wrong language + {'language': 'es', 'value': 'No idea', 'status': 'complete'}, + # Cannot pass a value while in progress + {'language': 'en', 'value': 'No idea', 'status': 'in_progress'}, + {}, + # Cannot accept an empty translation + {'language': 'en', 'accepted': True}, + # Cannot deny an empty translation + {'language': 'en', 'accepted': False}, + # Cannot pass value and accepted at the same time + {'language': 'en', 'value': None, 'accepted': False}, + # Cannot have a value while in progress + {'language': 'en', 'value': 'No idea', 'status': 'in_progress'}, + # Missing error property + {'language': 'en', 'status': 'failed'}, + # Delete transcript without status + {'language': 'fr', 'value': None}, + # Delete transcript with locale without status + {'language': 'fr', 'locale': 'fr-CA', 'value': None}, + # failed with no status + {'language': 'es', 'error': 'Translation failed'}, + # failed with no error + {'language': 'es', 'status': 'failed'}, + ] + + for data in invalid_data: + with pytest.raises(jsonschema.exceptions.ValidationError): + action.validate_automated_data(data) + + +def test_valid_result_passes_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = AutomaticGoogleTranscriptionAction(xpath, params) + + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'en', 'value': 'two'} + third = {'language': 'fr', 'value': 'trois'} + fourth = {'language': 'fr', 'accepted': True} + fifth = {'language': 'fr', 'value': None} + six = {'language': 'en', 'value': 'six'} + mock_sup_det = action.action_class_config.default_type + + mock_service = MagicMock() + with patch( + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa + return_value=mock_service + ): + for data in first, second, third, fourth, fifth, six: + value = data.get('value', '') + # The 'value' field is not allowed in the payload, except when its + # value is None. + if value: + del data['value'] + + mock_service.process_data.return_value = { + 'value': value, + 'status': 'complete' + } + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + + action.validate_result(mock_sup_det) + + assert '_dateAccepted' in mock_sup_det['_revisions'][1] + assert mock_sup_det['_revisions'][0]['status'] == 'deleted' + + +def test_acceptance_does_not_produce_revisions(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = AutomaticGoogleTranscriptionAction(xpath, params) + + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'fr', 'accepted': True} + third = {'language': 'fr', 'accepted': False} + mock_sup_det = action.action_class_config.default_type + + mock_service = MagicMock() + with patch( + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa + return_value=mock_service + ): + for data in first, second, third: + value = data.get('value', '') + # The 'value' field is not allowed in the payload, except when its + # value is None. + if value: + del data['value'] + + mock_service.process_data.return_value = { + 'value': value, + 'status': 'complete' + } + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + assert '_revisions' not in mock_sup_det + if data.get('value') is None: + is_date_accepted_present = mock_sup_det.get('_dateAccepted') is None + assert is_date_accepted_present is not bool(data.get('accepted')) + + action.validate_result(mock_sup_det) + + +def test_invalid_result_fails_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = AutomaticGoogleTranscriptionAction(xpath, params) + + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'en', 'value': 'two'} + third = {'language': 'fr', 'value': 'trois'} + fourth = {'language': 'fr', 'accepted': True} + fifth = {'language': 'fr', 'value': None} + six = {'language': 'en', 'value': 'six'} + mock_sup_det = action.action_class_config.default_type + + mock_service = MagicMock() + with patch( + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', + # noqa + return_value=mock_service + ): + for data in first, second, third, fourth, fifth, six: + value = data.get('value', '') + # The 'value' field is not allowed in the payload, except when its + # value is None. + if value: + del data['value'] + + mock_service.process_data.return_value = { + 'value': value, + 'status': 'complete' + } + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + + action.validate_result(mock_sup_det) + + # erroneously add '_dateModified' onto a revision + first_revision = mock_sup_det['_revisions'][0] + first_revision['_dateModified'] = first_revision['_dateCreated'] + + with pytest.raises(jsonschema.exceptions.ValidationError): + action.validate_result(mock_sup_det) + + +def test_transcription_revisions_are_retained_in_supplemental_details(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = AutomaticGoogleTranscriptionAction(xpath, params) + + first = {'language': 'en', 'value': 'No idea'} + second = {'language': 'fr', 'value': 'Aucune idée'} + mock_service = MagicMock() + with patch( + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', + # noqa + return_value=mock_service + ): + value = first.pop('value', None) + mock_service.process_data.return_value = { + 'value': value, + 'status': 'complete' + } + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, action.action_class_config.default_type, first + ) + + assert mock_sup_det['language'] == 'en' + assert mock_sup_det['value'] == 'No idea' + assert mock_sup_det['_dateCreated'] == mock_sup_det['_dateModified'] + assert '_revisions' not in mock_sup_det + first_time = mock_sup_det['_dateCreated'] + + with patch( + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa + return_value=mock_service + ): + value = second.pop('value', None) + mock_service.process_data.return_value = { + 'value': value, + 'status': 'complete' + } + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) + + assert len(mock_sup_det['_revisions']) == 1 + + # the revision should encompass the first transcript + assert mock_sup_det['_revisions'][0].items() >= first.items() + + # the revision should have a creation timestamp equal to that of the first + # transcript + assert mock_sup_det['_revisions'][0]['_dateCreated'] == first_time + + # revisions should not list a modification timestamp + assert '_dateModified' not in mock_sup_det['_revisions'][0] + + # the record itself (not revision) should have an unchanged creation + # timestamp + assert mock_sup_det['_dateCreated'] == first_time + + # the record itself should have an updated modification timestamp + assert dateutil.parser.parse(mock_sup_det['_dateModified']) > dateutil.parser.parse( + mock_sup_det['_dateCreated'] + ) + + # the record itself should encompass the second transcript + assert mock_sup_det.items() >= second.items() + + +def test_latest_revision_is_first(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = AutomaticGoogleTranscriptionAction(xpath, params) + + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'fr', 'value': 'deux'} + third = {'language': 'fr', 'value': 'trois'} + + mock_sup_det = action.action_class_config.default_type + mock_service = MagicMock() + with patch( + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', + # noqa + return_value=mock_service + ): + for data in first, second, third: + value = data.pop('value') + mock_service.process_data.return_value = { + 'value': value, + 'status': 'complete' + } + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + + assert mock_sup_det['value'] == 'trois' + assert mock_sup_det['_revisions'][0]['value'] == 'deux' + assert mock_sup_det['_revisions'][1]['value'] == 'un' diff --git a/kobo/apps/subsequences/tests/test_manual_transcription.py b/kobo/apps/subsequences/tests/test_manual_transcription.py index 1bd0e859a7..7bfff6cc9e 100644 --- a/kobo/apps/subsequences/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences/tests/test_manual_transcription.py @@ -30,7 +30,10 @@ def test_valid_transcript_data_passes_validation(): data = {'language': 'fr', 'value': ''} action.validate_data(data) - # Delete transcript + # Transcription with locale + data = {'language': 'fr', 'locale': 'fr-CA', 'value': 'Ché tu moé?'} + + # Tag transcript as deleted data = {'language': 'fr', 'value': None} action.validate_data(data) diff --git a/kobo/apps/subsequences/tests/test_manual_translation.py b/kobo/apps/subsequences/tests/test_manual_translation.py index 19965f2900..cdfe243d31 100644 --- a/kobo/apps/subsequences/tests/test_manual_translation.py +++ b/kobo/apps/subsequences/tests/test_manual_translation.py @@ -5,8 +5,6 @@ from ..actions.manual_translation import ManualTranslationAction from .constants import EMPTY_SUBMISSION -DEFAULT_SUPPLEMENT_DATA = [] - def test_valid_params_pass_validation(): params = [{'language': 'fr'}, {'language': 'es'}] @@ -27,11 +25,11 @@ def test_valid_translation_data_passes_validation(): data = {'language': 'fr', 'value': 'Aucune idée'} action.validate_data(data) - # No transcript + # No translations data = {'language': 'fr', 'value': ''} action.validate_data(data) - # Delete transcript + # Tag translation as deleted data = {'language': 'fr', 'value': None} action.validate_data(data) From 19e1d0f7ebfbe2ede804f80b583d0c9926be0f03 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Tue, 26 Aug 2025 16:57:36 -0400 Subject: [PATCH 084/138] fix: linter --- .../actions/automatic_google_transcription.py | 17 ++--- kobo/apps/subsequences/actions/base.py | 16 +++-- .../actions/manual_transcription.py | 5 +- .../actions/manual_translation.py | 5 +- .../subsequences/integrations/google/base.py | 7 ++- .../integrations/google/google_transcribe.py | 21 +++---- .../subsequences/integrations/utils/cache.py | 7 +-- .../subsequences/integrations/utils/google.py | 1 + kobo/apps/subsequences/models.py | 14 +++-- .../subsequences/tests/api/v2/test_actions.py | 62 ++++++++++--------- .../test_automatic_google_transcription.py | 32 ++++------ .../tests/test_manual_translation.py | 12 +++- kobo/apps/subsequences/time_utils.py | 1 + kobo/apps/subsequences/utils.py | 12 ++-- 14 files changed, 104 insertions(+), 108 deletions(-) diff --git a/kobo/apps/subsequences/actions/automatic_google_transcription.py b/kobo/apps/subsequences/actions/automatic_google_transcription.py index d6c6d53584..d3b700f350 100644 --- a/kobo/apps/subsequences/actions/automatic_google_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_google_transcription.py @@ -1,6 +1,6 @@ from kobo.apps.organizations.constants import UsageType -from .base import ActionClassConfig, BaseLanguageAction from ..integrations.google.google_transcribe import GoogleTranscriptionService +from .base import ActionClassConfig, BaseLanguageAction class AutomaticGoogleTranscriptionAction(BaseLanguageAction): @@ -55,7 +55,6 @@ def automated_data_schema(self) -> dict: 'value': {'type': ['string', 'null']}, 'error': {'type': 'string'}, 'accepted': {'type': 'boolean'}, - # --- Value rules --- # If status == "complete" → require "value" (string or null) 'rule_value_required_when_complete': { @@ -69,9 +68,7 @@ def automated_data_schema(self) -> dict: 'rule_value_forbidden_when_in_progress_or_failed': { 'if': { 'required': ['status'], - 'properties': { - 'status': {'enum': ['in_progress', 'failed']} - }, + 'properties': {'status': {'enum': ['in_progress', 'failed']}}, }, 'then': {'not': {'required': ['value']}}, }, @@ -91,7 +88,6 @@ def automated_data_schema(self) -> dict: ] }, }, - # --- Other field rules --- # If status == "failed" → require "error"; else forbid it 'rule_error_presence_when_failed': { @@ -228,10 +224,7 @@ def _run_automatic_process( # return the completed transcription right away. `revise_data()` will handle # the merge and final validation of this acceptance. accepted = action_data.get('accepted', None) - if ( - submission_supplement.get('status') == 'complete' - and accepted is not None - ): + if submission_supplement.get('status') == 'complete' and accepted is not None: return { 'value': submission_supplement['value'], 'status': 'complete', @@ -246,9 +239,7 @@ def _run_automatic_process( # Otherwise, trigger the external Google transcription service. service = GoogleTranscriptionService(submission, asset=kwargs['asset']) - service_data = service.process_data( - self.source_question_xpath, action_data - ) + service_data = service.process_data(self.source_question_xpath, action_data) # If the transcription request is still running, stop processing here. # Returning None ensures that `revise_data()` will not be called afterwards. diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 8e54a893f0..e7af3478f9 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -104,6 +104,7 @@ } """ + @dataclass class ActionClassConfig: """ @@ -226,12 +227,14 @@ def revise_data( if self.action_class_config.automatic: # If the action is automatic, run the external process first. - if not (service_response := self._run_automatic_process( - submission, - submission_supplement, - action_data, - asset=asset, - )): + if not ( + service_response := self._run_automatic_process( + submission, + submission_supplement, + action_data, + asset=asset, + ) + ): # If the service response is None, the automatic task is still running. # Stop here to avoid processing data and creating redundant revisions. return None @@ -407,6 +410,7 @@ class BaseLanguageAction(BaseAction): element is an object with a single string property for the transcript language. """ + params_schema = { 'type': 'array', 'items': { diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 3bfacaa0a3..d50334b841 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -9,7 +9,7 @@ class ManualTranscriptionAction(BaseLanguageAction): def _get_output_field_name(self, language: str) -> str: language = language.split('-')[0] # ignore region if any - return f"{self.source_question_xpath}/transcription__{language}" + return f'{self.source_question_xpath}/transcription__{language}' def get_output_fields(self) -> list[dict]: return [ @@ -18,7 +18,8 @@ def get_output_fields(self) -> list[dict]: 'name': self._get_output_field_name(params['language']), 'source': self.source_question_xpath, 'type': 'transcript', - } for params in self.params + } + for params in self.params ] def transform_data_for_output(self, action_data: dict) -> dict[str, dict[str, Any]]: diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index 4feafc187c..bbf3cc7c3a 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -9,7 +9,7 @@ class ManualTranslationAction(BaseLanguageAction): def _get_output_field_name(self, language: str) -> str: language = language.split('-')[0] # ignore region if any - return f"{self.source_question_xpath}/translation__{language}" + return f'{self.source_question_xpath}/translation__{language}' def get_output_fields(self): return [ @@ -18,7 +18,8 @@ def get_output_fields(self): 'name': self._get_output_field_name(params['language']), 'source': self.source_question_xpath, 'type': 'translation', - } for params in self.params + } + for params in self.params ] def transform_data_for_output( diff --git a/kobo/apps/subsequences/integrations/google/base.py b/kobo/apps/subsequences/integrations/google/base.py index c2202fc86c..e378d43436 100644 --- a/kobo/apps/subsequences/integrations/google/base.py +++ b/kobo/apps/subsequences/integrations/google/base.py @@ -14,11 +14,12 @@ from kobo.apps.trackers.utils import update_nlp_counter from kpi.utils.log import logging from ...constants import ( - SUBMISSION_UUID_FIELD, GOOGLE_CACHE_TIMEOUT, + SUBMISSION_UUID_FIELD, SUBSEQUENCES_ASYNC_CACHE_KEY, ) from ...exceptions import SubsequenceTimeoutError + # from ...models import SubmissionSupplement # from ..utils.cache import generate_cache_key from ..utils.google import google_credentials_from_constance_config @@ -130,7 +131,9 @@ def update_counters(self, amount) -> None: self.asset.id, ) - def _get_cache_key(self, xpath: str, source_lang: str, target_lang: str | None) -> str: + def _get_cache_key( + self, xpath: str, source_lang: str, target_lang: str | None + ) -> str: submission_root_uuid = self.submission[SUBMISSION_UUID_FIELD] args = [self.asset.owner_id, submission_root_uuid, xpath, source_lang.lower()] if target_lang is None: diff --git a/kobo/apps/subsequences/integrations/google/google_transcribe.py b/kobo/apps/subsequences/integrations/google/google_transcribe.py index 56b6f725f8..cacde9721b 100644 --- a/kobo/apps/subsequences/integrations/google/google_transcribe.py +++ b/kobo/apps/subsequences/integrations/google/google_transcribe.py @@ -11,20 +11,16 @@ from google.api_core.exceptions import InvalidArgument from google.cloud import speech -from kpi.utils.log import logging from kpi.exceptions import ( + AttachmentNotFoundException, InvalidXPathException, + NotSupportedFormatException, SubmissionNotFoundException, XPathNotFoundException, - AttachmentNotFoundException, - NotSupportedFormatException, ) +from kpi.utils.log import logging from ...constants import SUBMISSION_UUID_FIELD -from ...exceptions import ( - AudioTooLongError, - SubsequenceTimeoutError, -) -from ..utils.cache import generate_cache_key +from ...exceptions import AudioTooLongError, SubsequenceTimeoutError from .base import GoogleService # https://cloud.google.com/speech-to-text/quotas#content @@ -152,16 +148,13 @@ def process_data(self, xpath: str, params: dict) -> dict: 'status': 'failed', 'error': {f'Attachment not found'}, } - except (InvalidXPathException,XPathNotFoundException): + except (InvalidXPathException, XPathNotFoundException): return { 'status': 'failed', 'error': {f'Invalid question name XPath'}, } except NotSupportedFormatException: - return { - 'status': 'failed', - 'error': 'Unsupported format' - } + return {'status': 'failed', 'error': 'Unsupported format'} try: value = self.handle_google_operation( @@ -181,7 +174,7 @@ def process_data(self, xpath: str, params: dict) -> dict: logging.error(f'No transcriptions found for xpath={xpath}') return { 'status': 'failed', - 'error': f'Transcription failed with error {str(e)}' + 'error': f'Transcription failed with error {str(e)}', } return { diff --git a/kobo/apps/subsequences/integrations/utils/cache.py b/kobo/apps/subsequences/integrations/utils/cache.py index 407a8edd81..570946c4cd 100644 --- a/kobo/apps/subsequences/integrations/utils/cache.py +++ b/kobo/apps/subsequences/integrations/utils/cache.py @@ -1,12 +1,9 @@ from ...constants import SUBSEQUENCES_ASYNC_CACHE_KEY + # TODO REMOVE ME, I'm not used anymore def generate_cache_key( - user_id: int, - submission_uuid: str, - xpath: str, - source_lang: str, - target_lang: str + user_id: int, submission_uuid: str, xpath: str, source_lang: str, target_lang: str ) -> str: """ Make a cache key from the parameters for NLP diff --git a/kobo/apps/subsequences/integrations/utils/google.py b/kobo/apps/subsequences/integrations/utils/google.py index 23c661a83c..bdc4b1d161 100644 --- a/kobo/apps/subsequences/integrations/utils/google.py +++ b/kobo/apps/subsequences/integrations/utils/google.py @@ -7,6 +7,7 @@ def google_credentials_from_json_string(json_str): return Credentials.from_service_account_info(json.loads(json_str)) + def google_credentials_from_constance_config(): if json_str := constance.config.ASR_MT_GOOGLE_CREDENTIALS: return google_credentials_from_json_string(json_str) diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 7322ec32da..45fcd51de5 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -1,5 +1,3 @@ -from copy import deepcopy - from django.db import models from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix @@ -87,9 +85,11 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di action_id, action.action_class_config.default_type ) - if not (action_supplemental_data := action.revise_data( - submission, action_supplemental_data, action_data - )): + if not ( + action_supplemental_data := action.revise_data( + submission, action_supplemental_data, action_data + ) + ): supplemental_data['_version'] = schema_version return supplemental_data @@ -205,7 +205,9 @@ def retrieve_data( if not new_acceptance_date: # Never return unaccepted data continue - existing_acceptance_date = data_for_output.get(field_name, {}).get('_dateAccepted') + existing_acceptance_date = data_for_output.get( + field_name, {} + ).get('_dateAccepted') if ( not existing_acceptance_date or existing_acceptance_date < new_acceptance_date diff --git a/kobo/apps/subsequences/tests/api/v2/test_actions.py b/kobo/apps/subsequences/tests/api/v2/test_actions.py index 01d98df2ef..70676e5a94 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_actions.py +++ b/kobo/apps/subsequences/tests/api/v2/test_actions.py @@ -1,4 +1,4 @@ -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch from rest_framework import status @@ -78,16 +78,18 @@ def test_cannot_patch_with_invalid_payload(self): def test_automatic_google_transcription_forbidden_payload(self): # First, set up the asset to allow automatic google transcription - self.set_asset_advanced_features({ - '_version': '20250820', - '_actionConfigs': { - 'q1': { - 'automatic_google_transcription': [ - {'language': 'es'}, - ] - } - }, - }) + self.set_asset_advanced_features( + { + '_version': '20250820', + '_actionConfigs': { + 'q1': { + 'automatic_google_transcription': [ + {'language': 'es'}, + ] + } + }, + } + ) payload = { '_version': '20250820', @@ -106,26 +108,28 @@ def test_automatic_google_transcription_forbidden_payload(self): def test_cannot_accept_incomplete_automatic_translation(self): # Set up the asset to allow automatic google transcription - self.set_asset_advanced_features({ - '_version': '20250820', - '_actionConfigs': { - 'q1': { - 'automatic_google_transcription': [ - {'language': 'es'}, - ] - } - }, - }) + self.set_asset_advanced_features( + { + '_version': '20250820', + '_actionConfigs': { + 'q1': { + 'automatic_google_transcription': [ + {'language': 'es'}, + ] + } + }, + } + ) # Simulate in progress translation mock_submission_supplement = { - "_version": "20250820", - "q1": { - "automatic_google_transcription": { - "status": "in_progress", - "language": "es", - "_dateCreated": "2025-08-25T21:17:35.535710Z", - "_dateModified": "2025-08-26T11:41:21.917338Z", + '_version': '20250820', + 'q1': { + 'automatic_google_transcription': { + 'status': 'in_progress', + 'language': 'es', + '_dateCreated': '2025-08-25T21:17:35.535710Z', + '_dateModified': '2025-08-26T11:41:21.917338Z', } }, } @@ -152,7 +156,7 @@ def test_cannot_accept_incomplete_automatic_translation(self): with patch( 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa - return_value=mock_service + return_value=mock_service, ): response = self.client.patch( self.supplement_details_url, data=payload, format='json' diff --git a/kobo/apps/subsequences/tests/test_automatic_google_transcription.py b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py index eaf239d13e..f6dde7271e 100644 --- a/kobo/apps/subsequences/tests/test_automatic_google_transcription.py +++ b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py @@ -100,7 +100,7 @@ def test_invalid_user_data_fails_validation(): # Cannot push a status {'language': 'fr', 'status': 'in_progress'}, # Cannot pass value and accepted at the same time - {'language': 'fr', 'value': None, 'accepted': False} + {'language': 'fr', 'value': None, 'accepted': False}, ] for data in invalid_data: @@ -160,7 +160,7 @@ def test_valid_result_passes_validation(): mock_service = MagicMock() with patch( 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa - return_value=mock_service + return_value=mock_service, ): for data in first, second, third, fourth, fifth, six: value = data.get('value', '') @@ -171,7 +171,7 @@ def test_valid_result_passes_validation(): mock_service.process_data.return_value = { 'value': value, - 'status': 'complete' + 'status': 'complete', } mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) @@ -194,7 +194,7 @@ def test_acceptance_does_not_produce_revisions(): mock_service = MagicMock() with patch( 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa - return_value=mock_service + return_value=mock_service, ): for data in first, second, third: value = data.get('value', '') @@ -205,7 +205,7 @@ def test_acceptance_does_not_produce_revisions(): mock_service.process_data.return_value = { 'value': value, - 'status': 'complete' + 'status': 'complete', } mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) assert '_revisions' not in mock_sup_det @@ -233,7 +233,7 @@ def test_invalid_result_fails_validation(): with patch( 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa - return_value=mock_service + return_value=mock_service, ): for data in first, second, third, fourth, fifth, six: value = data.get('value', '') @@ -244,7 +244,7 @@ def test_invalid_result_fails_validation(): mock_service.process_data.return_value = { 'value': value, - 'status': 'complete' + 'status': 'complete', } mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) @@ -269,13 +269,10 @@ def test_transcription_revisions_are_retained_in_supplemental_details(): with patch( 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa - return_value=mock_service + return_value=mock_service, ): value = first.pop('value', None) - mock_service.process_data.return_value = { - 'value': value, - 'status': 'complete' - } + mock_service.process_data.return_value = {'value': value, 'status': 'complete'} mock_sup_det = action.revise_data( EMPTY_SUBMISSION, action.action_class_config.default_type, first ) @@ -288,13 +285,10 @@ def test_transcription_revisions_are_retained_in_supplemental_details(): with patch( 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa - return_value=mock_service + return_value=mock_service, ): value = second.pop('value', None) - mock_service.process_data.return_value = { - 'value': value, - 'status': 'complete' - } + mock_service.process_data.return_value = {'value': value, 'status': 'complete'} mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) assert len(mock_sup_det['_revisions']) == 1 @@ -336,13 +330,13 @@ def test_latest_revision_is_first(): with patch( 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa - return_value=mock_service + return_value=mock_service, ): for data in first, second, third: value = data.pop('value') mock_service.process_data.return_value = { 'value': value, - 'status': 'complete' + 'status': 'complete', } mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) diff --git a/kobo/apps/subsequences/tests/test_manual_translation.py b/kobo/apps/subsequences/tests/test_manual_translation.py index cdfe243d31..e8d8139d97 100644 --- a/kobo/apps/subsequences/tests/test_manual_translation.py +++ b/kobo/apps/subsequences/tests/test_manual_translation.py @@ -94,7 +94,9 @@ def test_translation_revisions_are_retained_in_supplemental_details(): first = {'language': 'en', 'value': 'No idea'} second = {'language': 'fr', 'value': 'Aucune idée'} third = {'language': 'en', 'value': 'No clue'} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.action_class_config.default_type, first) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, action.action_class_config.default_type, first + ) assert len(mock_sup_det) == 1 assert mock_sup_det[0]['language'] == 'en' @@ -143,7 +145,9 @@ def test_setting_translation_to_empty_string(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': ''} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.action_class_config.default_type, first) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, action.action_class_config.default_type, first + ) assert mock_sup_det[0]['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) @@ -159,7 +163,9 @@ def test_setting_translation_to_none(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': None} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, action.action_class_config.default_type, first) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, action.action_class_config.default_type, first + ) assert mock_sup_det[0]['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) diff --git a/kobo/apps/subsequences/time_utils.py b/kobo/apps/subsequences/time_utils.py index 5ae9f9ddec..56627a1fb1 100644 --- a/kobo/apps/subsequences/time_utils.py +++ b/kobo/apps/subsequences/time_utils.py @@ -6,6 +6,7 @@ import datetime + def utc_datetime_to_js_str(dt: datetime.datetime) -> str: """ Return a string to represent a `datetime` following the simplification of diff --git a/kobo/apps/subsequences/utils.py b/kobo/apps/subsequences/utils.py index abf4e91408..c84ffe8e6c 100644 --- a/kobo/apps/subsequences/utils.py +++ b/kobo/apps/subsequences/utils.py @@ -64,15 +64,11 @@ def get_supplemental_output_fields(asset: 'kpi.models.Asset') -> list[dict]: # since we want transcripts always to come before translations, à la # # and we're lucky with alphabetical order, we can just sort by name - return sorted( - output_fields_by_name.values(), key=lambda field: field['name'] - ) + return sorted(output_fields_by_name.values(), key=lambda field: field['name']) def stream_with_supplements( - asset: 'kpi.models.Asset', - submission_stream: Generator, - for_output: bool = False + asset: 'kpi.models.Asset', submission_stream: Generator, for_output: bool = False ) -> Generator: if not asset.advanced_features: yield from submission_stream @@ -96,6 +92,8 @@ def stream_with_supplements( for submission in submission_stream: submission_uuid = remove_uuid_prefix(submission[SUBMISSION_UUID_FIELD]) submission[SUPPLEMENT_KEY] = SubmissionSupplement.retrieve_data( - asset, for_output=for_output, prefetched_supplement=extras.get(submission_uuid, {}) + asset, + for_output=for_output, + prefetched_supplement=extras.get(submission_uuid, {}), ) yield submission From 3250c218bdd7425b74dbcea5c6be85a744ff107f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 27 Aug 2025 12:22:27 -0400 Subject: [PATCH 085/138] Refactor: add more base classes and mixins --- .../actions/automatic_google_transcription.py | 184 +----------------- .../actions/automatic_google_translation.py | 168 ++++++++++++++++ kobo/apps/subsequences/actions/base.py | 146 +++++++++++++- .../actions/manual_transcription.py | 47 +---- .../actions/manual_translation.py | 65 +------ kobo/apps/subsequences/actions/mixins.py | 98 ++++++++++ .../subsequences/integrations/utils/cache.py | 12 -- 7 files changed, 426 insertions(+), 294 deletions(-) create mode 100644 kobo/apps/subsequences/actions/automatic_google_translation.py create mode 100644 kobo/apps/subsequences/actions/mixins.py delete mode 100644 kobo/apps/subsequences/integrations/utils/cache.py diff --git a/kobo/apps/subsequences/actions/automatic_google_transcription.py b/kobo/apps/subsequences/actions/automatic_google_transcription.py index d3b700f350..23f2ec82b2 100644 --- a/kobo/apps/subsequences/actions/automatic_google_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_google_transcription.py @@ -1,191 +1,19 @@ from kobo.apps.organizations.constants import UsageType from ..integrations.google.google_transcribe import GoogleTranscriptionService -from .base import ActionClassConfig, BaseLanguageAction +from .base import ActionClassConfig, BaseAutomaticNLPAction +from .mixins import TranscriptionResultSchemaMixin +class AutomaticGoogleTranscriptionAction( + TranscriptionResultSchemaMixin, BaseAutomaticNLPAction +): -class AutomaticGoogleTranscriptionAction(BaseLanguageAction): ID = 'automatic_google_transcription' action_class_config = ActionClassConfig({}, None, True) - @property - def automated_data_schema(self) -> dict: - """ - Schema rules: - - - The field `status` is always required and must be one of: - ["requested", "in_progress", "completed", "failed"]. - - If `status` == "done": - * The field `value` becomes required and must be a string. - - If `status` == "failed": - * The field `error` becomes required and must be a string. - - No additional properties are allowed beyond `language`, `status` and `value`. - """ - return { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - 'language': {'$ref': '#/$defs/lang'}, - 'locale': {'$ref': '#/$defs/locale'}, - 'status': {'$ref': '#/$defs/action_status'}, - 'value': {'$ref': '#/$defs/value'}, - 'error': {'$ref': '#/$defs/error'}, - 'accepted': {'$ref': '#/$defs/accepted'}, - }, - 'required': ['language', 'status'], - 'allOf': [ - # value is required when status == "complete" - {'$ref': '#/$defs/rule_value_required_when_complete'}, - # value must be absent when status in {"in_progress","failed"} - {'$ref': '#/$defs/rule_value_forbidden_when_in_progress_or_failed'}, - # value is optional but must be null when status == "deleted" - {'$ref': '#/$defs/rule_value_null_only_when_deleted'}, - # error must be present iff status == "failed" - {'$ref': '#/$defs/rule_error_presence_when_failed'}, - # accepted allowed only when status == "complete" (optional) - {'$ref': '#/$defs/rule_accepted_only_when_complete'}, - ], - '$defs': { - 'lang': {'type': 'string', 'enum': self.languages}, - 'locale': {'type': ['string', 'null']}, - 'action_status': { - 'type': 'string', - 'enum': ['in_progress', 'complete', 'failed', 'deleted'], - }, - 'value': {'type': ['string', 'null']}, - 'error': {'type': 'string'}, - 'accepted': {'type': 'boolean'}, - # --- Value rules --- - # If status == "complete" → require "value" (string or null) - 'rule_value_required_when_complete': { - 'if': { - 'required': ['status'], - 'properties': {'status': {'const': 'complete'}}, - }, - 'then': {'required': ['value']}, - }, - # If status in {"in_progress","failed"} → forbid "value" - 'rule_value_forbidden_when_in_progress_or_failed': { - 'if': { - 'required': ['status'], - 'properties': {'status': {'enum': ['in_progress', 'failed']}}, - }, - 'then': {'not': {'required': ['value']}}, - }, - # If status == "deleted" → "value" optional, but if present it MUST be null - 'rule_value_null_only_when_deleted': { - 'if': { - 'required': ['status'], - 'properties': {'status': {'const': 'deleted'}}, - }, - 'then': { - 'anyOf': [ - {'not': {'required': ['value']}}, # value absent - { # value present and null - 'properties': {'value': {'type': 'null'}}, - 'required': ['value'], - }, - ] - }, - }, - # --- Other field rules --- - # If status == "failed" → require "error"; else forbid it - 'rule_error_presence_when_failed': { - 'if': { - 'required': ['status'], - 'properties': {'status': {'const': 'failed'}}, - }, - 'then': {'required': ['error']}, - 'else': {'not': {'required': ['error']}}, - }, - # If status == "complete" → accepted allowed but optional; else forbid it - 'rule_accepted_only_when_complete': { - 'if': { - 'required': ['status'], - 'properties': {'status': {'const': 'complete'}}, - }, - 'then': {}, # optional - 'else': {'not': {'required': ['accepted']}}, - }, - }, - } - - @property - def data_schema(self) -> dict: - """ - Schema rules: - - - `language` is required. - - `value` is optional but, if present, it MUST be `null` (no other type allowed). - - `accepted` is optional. - - Mutual exclusion: `accepted` and `value` cannot be present at the same time. - * If `value` is present (and thus equals null), `accepted` must be absent. - * If `accepted` is present, `value` must be absent. - - No additional properties are allowed beyond: `language`, `locale`, `value`, `accepted`. - """ - return { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - 'language': {'$ref': '#/$defs/lang'}, - 'locale': {'$ref': '#/$defs/locale'}, - 'value': {'$ref': '#/$defs/value_null_only'}, - 'accepted': {'$ref': '#/$defs/accepted'}, - }, - 'required': ['language'], - 'allOf': [ - # Forbid having both `accepted` and `value` at the same time - {'not': {'required': ['accepted', 'value']}}, - ], - '$defs': { - 'lang': {'type': 'string', 'enum': self.languages}, - 'locale': {'type': ['string', 'null']}, - 'accepted': {'type': 'boolean'}, - # Only null is permitted for `value` - 'value_null_only': {'type': 'null'}, - }, - } - @property def result_schema(self): - schema = { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.REVISIONS_FIELD: { - 'type': 'array', - 'minItems': 1, - 'items': {'$ref': '#/$defs/revision'}, - }, - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], - '$defs': { - 'dateTime': {'type': 'string', 'format': 'date-time'}, - 'revision': { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD], - }, - }, - } - - # Inject data schema in result schema template - self._inject_data_schema(schema, ['$schema', 'title', 'type']) - - # Also inject data schema in the revision definition - self._inject_data_schema( - schema['$defs']['revision'], ['$schema', 'title', '$defs'] - ) + schema = super().result_schema # FIXME _inject_data_schema does not merge nested children schema['$defs']['action_status'] = { diff --git a/kobo/apps/subsequences/actions/automatic_google_translation.py b/kobo/apps/subsequences/actions/automatic_google_translation.py new file mode 100644 index 0000000000..f72d3b0eac --- /dev/null +++ b/kobo/apps/subsequences/actions/automatic_google_translation.py @@ -0,0 +1,168 @@ +from kobo.apps.organizations.constants import UsageType +from ..integrations.google.google_transcribe import GoogleTranscriptionService +from .base import ActionClassConfig, BaseAutomaticNLPAction + + +class AutomaticGoogleTranslationAction(BaseAutomaticNLPAction): + + ID = 'automatic_google_translation' + action_class_config = ActionClassConfig([], 'language', True) + + @property + def result_schema(self): + + ### + + localized_value_schema = { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.REVISIONS_FIELD: { + 'type': 'array', + 'minItems': 1, + 'items': {'$ref': '#/$defs/revision'}, + }, + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], + } + + # Inject data schema in result schema template + self._inject_data_schema(localized_value_schema, ['$schema', 'title', 'type']) + + # Move localized_value_schema definitions to main schema + localized_value_schema_defs = localized_value_schema.pop('$defs') + + schema = { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'array', + 'additionalProperties': False, + 'items': {'$ref': '#/$defs/localized_value_schema'}, + '$defs': { + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'revision': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD], + }, + 'localized_value_schema': localized_value_schema, + **localized_value_schema_defs, + }, + } + + # Also inject data schema in the revision definition + self._inject_data_schema( + schema['$defs']['revision'], ['$schema', 'title', '$defs'] + ) + + #### + + schema = { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.REVISIONS_FIELD: { + 'type': 'array', + 'minItems': 1, + 'items': {'$ref': '#/$defs/revision'}, + }, + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], + '$defs': { + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'revision': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD], + }, + }, + } + + # Inject data schema in result schema template + self._inject_data_schema(schema, ['$schema', 'title', 'type']) + + # Also inject data schema in the revision definition + self._inject_data_schema( + schema['$defs']['revision'], ['$schema', 'title', '$defs'] + ) + + # FIXME _inject_data_schema does not merge nested children + schema['$defs']['action_status'] = { + 'action_status': { + 'type': 'string', + 'enum': ['in_progress', 'complete', 'error'], + }, + } + return schema + + @property + def _limit_identifier(self): + return UsageType.MT_CHARACTERS + + def _run_automatic_process( + self, + submission: dict, + submission_supplement: dict, + action_data: dict, + *args, + **kwargs, + ) -> dict | None: + """ + Run the automatic transcription process using the Google API. + + This method is intended to be called by `revise_data()`, which will finalize + the validation and merging of `action_data`. If the user explicitly accepts + the last completed transcription, the method short-circuits and returns it + immediately. If the transcription request is still in progress, the method + returns None so that `revise_data()` can exit early and skip unnecessary + processing. Otherwise, it calls the Google API and returns the processed + result, ready to be passed back to `revise_data()`. + """ + + # If the client sent "accepted" while the supplement is already complete, + # return the completed transcription right away. `revise_data()` will handle + # the merge and final validation of this acceptance. + accepted = action_data.get('accepted', None) + if submission_supplement.get('status') == 'complete' and accepted is not None: + return { + 'value': submission_supplement['value'], + 'status': 'complete', + } + + # TBC + if 'value' in action_data: + return { + 'value': action_data['value'], + 'status': 'deleted', + } + + # Otherwise, trigger the external Google transcription service. + service = GoogleTranscriptionService(submission, asset=kwargs['asset']) + service_data = service.process_data(self.source_question_xpath, action_data) + + # If the transcription request is still running, stop processing here. + # Returning None ensures that `revise_data()` will not be called afterwards. + if ( + accepted is None + and submission_supplement.get('status') + == service_data['status'] + == 'in_progress' + ): + return None + + # Normal case: return the processed transcription data. + return service_data diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index e7af3478f9..c9f7ca74c2 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -392,7 +392,7 @@ def _run_automatic_process( raise NotImplementedError -class BaseLanguageAction(BaseAction): +class BaseManualNLPAction(BaseAction): """ For an audio question called `my_audio_question` that's transcribed @@ -461,3 +461,147 @@ def languages(self) -> list[str]: for individual_params in self.params: languages.append(individual_params['language']) return languages + + +class BaseAutomaticNLPAction(BaseManualNLPAction): + + + @property + def automated_data_schema(self) -> dict: + """ + Schema rules: + + - The field `status` is always required and must be one of: + ["requested", "in_progress", "completed", "failed"]. + - If `status` == "done": + * The field `value` becomes required and must be a string. + - If `status` == "failed": + * The field `error` becomes required and must be a string. + - No additional properties are allowed beyond `language`, `status` and `value`. + """ + return { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'language': {'$ref': '#/$defs/lang'}, + 'locale': {'$ref': '#/$defs/locale'}, + 'status': {'$ref': '#/$defs/action_status'}, + 'value': {'$ref': '#/$defs/value'}, + 'error': {'$ref': '#/$defs/error'}, + 'accepted': {'$ref': '#/$defs/accepted'}, + }, + 'required': ['language', 'status'], + 'allOf': [ + # value is required when status == "complete" + {'$ref': '#/$defs/rule_value_required_when_complete'}, + # value must be absent when status in {"in_progress","failed"} + {'$ref': '#/$defs/rule_value_forbidden_when_in_progress_or_failed'}, + # value is optional but must be null when status == "deleted" + {'$ref': '#/$defs/rule_value_null_only_when_deleted'}, + # error must be present iff status == "failed" + {'$ref': '#/$defs/rule_error_presence_when_failed'}, + # accepted allowed only when status == "complete" (optional) + {'$ref': '#/$defs/rule_accepted_only_when_complete'}, + ], + '$defs': { + 'lang': {'type': 'string', 'enum': self.languages}, + 'locale': {'type': ['string', 'null']}, + 'action_status': { + 'type': 'string', + 'enum': ['in_progress', 'complete', 'failed', 'deleted'], + }, + 'value': {'type': ['string', 'null']}, + 'error': {'type': 'string'}, + 'accepted': {'type': 'boolean'}, + # --- Value rules --- + # If status == "complete" → require "value" (string or null) + 'rule_value_required_when_complete': { + 'if': { + 'required': ['status'], + 'properties': {'status': {'const': 'complete'}}, + }, + 'then': {'required': ['value']}, + }, + # If status in {"in_progress","failed"} → forbid "value" + 'rule_value_forbidden_when_in_progress_or_failed': { + 'if': { + 'required': ['status'], + 'properties': {'status': {'enum': ['in_progress', 'failed']}}, + }, + 'then': {'not': {'required': ['value']}}, + }, + # If status == "deleted" → "value" optional, but if present it MUST be null + 'rule_value_null_only_when_deleted': { + 'if': { + 'required': ['status'], + 'properties': {'status': {'const': 'deleted'}}, + }, + 'then': { + 'anyOf': [ + {'not': {'required': ['value']}}, # value absent + { # value present and null + 'properties': {'value': {'type': 'null'}}, + 'required': ['value'], + }, + ] + }, + }, + # --- Other field rules --- + # If status == "failed" → require "error"; else forbid it + 'rule_error_presence_when_failed': { + 'if': { + 'required': ['status'], + 'properties': {'status': {'const': 'failed'}}, + }, + 'then': {'required': ['error']}, + 'else': {'not': {'required': ['error']}}, + }, + # If status == "complete" → accepted allowed but optional; else forbid it + 'rule_accepted_only_when_complete': { + 'if': { + 'required': ['status'], + 'properties': {'status': {'const': 'complete'}}, + }, + 'then': {}, # optional + 'else': {'not': {'required': ['accepted']}}, + }, + }, + } + + @property + def data_schema(self) -> dict: + """ + Schema rules: + + - `language` is required. + - `value` is optional but, if present, it MUST be `null` (no other type allowed). + - `accepted` is optional. + - Mutual exclusion: `accepted` and `value` cannot be present at the same time. + * If `value` is present (and thus equals null), `accepted` must be absent. + * If `accepted` is present, `value` must be absent. + - No additional properties are allowed beyond: `language`, `locale`, `value`, `accepted`. + """ + return { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'language': {'$ref': '#/$defs/lang'}, + 'locale': {'$ref': '#/$defs/locale'}, + 'value': {'$ref': '#/$defs/value_null_only'}, + 'accepted': {'$ref': '#/$defs/accepted'}, + }, + 'required': ['language'], + 'allOf': [ + # Forbid having both `accepted` and `value` at the same time + {'not': {'required': ['accepted', 'value']}}, + ], + '$defs': { + 'lang': {'type': 'string', 'enum': self.languages}, + 'locale': {'type': ['string', 'null']}, + 'accepted': {'type': 'boolean'}, + # Only null is permitted for `value` + 'value_null_only': {'type': 'null'}, + }, + } diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index d50334b841..1870d76778 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -1,9 +1,10 @@ from typing import Any -from .base import ActionClassConfig, BaseLanguageAction +from .base import ActionClassConfig, BaseManualNLPAction +from .mixins import TranscriptionResultSchemaMixin +class ManualTranscriptionAction(TranscriptionResultSchemaMixin, BaseManualNLPAction): -class ManualTranscriptionAction(BaseLanguageAction): ID = 'manual_transcription' action_class_config = ActionClassConfig({}, None, False) @@ -33,45 +34,3 @@ def transform_data_for_output(self, action_data: dict) -> dict[str, dict[str, An self.DATE_ACCEPTED_FIELD: action_data[self.DATE_MODIFIED_FIELD], } } - - @property - def result_schema(self): - - schema = { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.REVISIONS_FIELD: { - 'type': 'array', - 'minItems': 1, - 'items': {'$ref': '#/$defs/revision'}, - }, - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], - '$defs': { - 'dateTime': {'type': 'string', 'format': 'date-time'}, - 'revision': { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD], - }, - }, - } - - # Inject data schema in result schema template - self._inject_data_schema(schema, ['$schema', 'title', 'type']) - - # Also inject data schema in the revision definition - self._inject_data_schema( - schema['$defs']['revision'], ['$schema', 'title', '$defs'] - ) - - return schema diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index bbf3cc7c3a..c10c050582 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -1,9 +1,13 @@ from typing import Any -from .base import ActionClassConfig, BaseLanguageAction +from .base import ActionClassConfig, BaseManualNLPAction +from .mixins import TranslationResultSchemaMixin -class ManualTranslationAction(BaseLanguageAction): +class ManualTranslationAction( + TranslationResultSchemaMixin, BaseManualNLPAction +): + ID = 'manual_translation' action_class_config = ActionClassConfig([], 'language', False) @@ -34,60 +38,3 @@ def transform_data_for_output( } for translation_data in action_data } - - @property - def result_schema(self): - - localized_value_schema = { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.REVISIONS_FIELD: { - 'type': 'array', - 'minItems': 1, - 'items': {'$ref': '#/$defs/revision'}, - }, - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], - } - - # Inject data schema in result schema template - self._inject_data_schema(localized_value_schema, ['$schema', 'title', 'type']) - - # Move localized_value_schema definitions to main schema - localized_value_schema_defs = localized_value_schema.pop('$defs') - - schema = { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'array', - 'additionalProperties': False, - 'items': {'$ref': '#/$defs/localized_value_schema'}, - '$defs': { - 'dateTime': {'type': 'string', 'format': 'date-time'}, - 'revision': { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD], - }, - 'localized_value_schema': localized_value_schema, - **localized_value_schema_defs, - }, - } - - # Also inject data schema in the revision definition - self._inject_data_schema( - schema['$defs']['revision'], ['$schema', 'title', '$defs'] - ) - - return schema - - @property - def _is_usage_limited(self): - return False diff --git a/kobo/apps/subsequences/actions/mixins.py b/kobo/apps/subsequences/actions/mixins.py new file mode 100644 index 0000000000..1000a11c06 --- /dev/null +++ b/kobo/apps/subsequences/actions/mixins.py @@ -0,0 +1,98 @@ + +class TranscriptionResultSchemaMixin: + + @property + def result_schema(self): + schema = { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.REVISIONS_FIELD: { + 'type': 'array', + 'minItems': 1, + 'items': {'$ref': '#/$defs/revision'}, + }, + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], + '$defs': { + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'revision': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD], + }, + }, + } + + # Inject data schema in result schema template + self._inject_data_schema(schema, ['$schema', 'title', 'type']) + + # Also inject data schema in the revision definition + self._inject_data_schema( + schema['$defs']['revision'], ['$schema', 'title', '$defs'] + ) + + return schema + + +class TranslationResultSchemaMixin: + + @property + def result_schema(self): + localized_value_schema = { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.REVISIONS_FIELD: { + 'type': 'array', + 'minItems': 1, + 'items': {'$ref': '#/$defs/revision'}, + }, + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], + } + + # Inject data schema in result schema template + self._inject_data_schema(localized_value_schema, ['$schema', 'title', 'type']) + + # Move localized_value_schema definitions to main schema + localized_value_schema_defs = localized_value_schema.pop('$defs') + + schema = { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'array', + 'additionalProperties': False, + 'items': {'$ref': '#/$defs/localized_value_schema'}, + '$defs': { + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'revision': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, + }, + 'required': [self.DATE_CREATED_FIELD], + }, + 'localized_value_schema': localized_value_schema, + **localized_value_schema_defs, + }, + } + + # Also inject data schema in the revision definition + self._inject_data_schema( + schema['$defs']['revision'], ['$schema', 'title', '$defs'] + ) + + return schema diff --git a/kobo/apps/subsequences/integrations/utils/cache.py b/kobo/apps/subsequences/integrations/utils/cache.py deleted file mode 100644 index 570946c4cd..0000000000 --- a/kobo/apps/subsequences/integrations/utils/cache.py +++ /dev/null @@ -1,12 +0,0 @@ -from ...constants import SUBSEQUENCES_ASYNC_CACHE_KEY - - -# TODO REMOVE ME, I'm not used anymore -def generate_cache_key( - user_id: int, submission_uuid: str, xpath: str, source_lang: str, target_lang: str -) -> str: - """ - Make a cache key from the parameters for NLP - """ - args = [user_id, submission_uuid, xpath, source_lang, target_lang] - return '-'.join(map(str, [SUBSEQUENCES_ASYNC_CACHE_KEY, *args])) From 7d2f5bea160896be4fd58039fa89f25d2993e20d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 27 Aug 2025 12:37:40 -0400 Subject: [PATCH 086/138] Update docstrings to make base classes and mixins purpose more obvious --- .../actions/automatic_google_transcription.py | 1 - kobo/apps/subsequences/actions/base.py | 47 +++++++++++++------ kobo/apps/subsequences/actions/mixins.py | 12 +++++ 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/kobo/apps/subsequences/actions/automatic_google_transcription.py b/kobo/apps/subsequences/actions/automatic_google_transcription.py index 23f2ec82b2..73dc641e14 100644 --- a/kobo/apps/subsequences/actions/automatic_google_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_google_transcription.py @@ -12,7 +12,6 @@ class AutomaticGoogleTranscriptionAction( @property def result_schema(self): - schema = super().result_schema # FIXME _inject_data_schema does not merge nested children diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index c9f7ca74c2..28b8a2a212 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -393,22 +393,29 @@ def _run_automatic_process( class BaseManualNLPAction(BaseAction): - """ - For an audio question called `my_audio_question` that's transcribed - into 3 languages, the schema for `Asset.advanced_features` might look - like: - 'my_audio_question': { - 'language_action_id': [ - {'language': 'ar'}, - {'language': 'bn'}, - {'language': 'es'}, - ], - } - - The `params_schema` attribute defines the shape of the array where each - element is an object with a single string property for the transcript - language. + Base class for all manual NLP actions. + + It defines a common `params_schema` that describes the set of languages + configured in `Asset.advanced_features`. For example, if an audio + question `my_audio_question` is transcribed into three languages, the + schema may look like: + + 'my_audio_question': { + 'language_action_id': [ + {'language': 'ar'}, + {'language': 'bn'}, + {'language': 'es'}, + ], + } + + Each element in `params_schema` is an object with a single `language` + property, enforcing the expected shape of the configuration. + + The `data_schema` property defines the JSON payload expected when + posting supplemental data for a submission. This includes the + transcription or translation result, identified by language and + optionally by locale. """ params_schema = { @@ -464,7 +471,17 @@ def languages(self) -> list[str]: class BaseAutomaticNLPAction(BaseManualNLPAction): + """ + Base class for all automated NLP actions. + Extends `BaseManualNLPAction`, reusing its `params_schema` for + consistency in language configuration, while adding automated-specific + schema definitions (`automated_data_schema` and `data_schema`). + + This ensures that both manual and automated actions share the same + validation rules for parameters, while automated actions introduce + their own structure for system-generated results. + """ @property def automated_data_schema(self) -> dict: diff --git a/kobo/apps/subsequences/actions/mixins.py b/kobo/apps/subsequences/actions/mixins.py index 1000a11c06..fb1fb6249a 100644 --- a/kobo/apps/subsequences/actions/mixins.py +++ b/kobo/apps/subsequences/actions/mixins.py @@ -1,5 +1,11 @@ class TranscriptionResultSchemaMixin: + """ + Provides the `result_schema` property used by all transcription-related actions. + + This mixin centralizes the schema definition so that both manual and automated + transcription classes can reuse the same structure consistently. + """ @property def result_schema(self): @@ -44,6 +50,12 @@ def result_schema(self): class TranslationResultSchemaMixin: + """ + Provides the `result_schema` property used by all translation-related actions. + + This mixin centralizes the schema definition so that both manual and automated + translation classes can reuse the same structure consistently. + """ @property def result_schema(self): From 858d0c3b0cc0d133619fc1afbe29648b20793db0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 27 Aug 2025 14:26:56 -0400 Subject: [PATCH 087/138] Refactor for automatic external services --- .../actions/automatic_google_transcription.py | 59 +------ .../actions/automatic_google_translation.py | 154 ++---------------- kobo/apps/subsequences/actions/base.py | 82 +++++++++- kobo/apps/subsequences/constants.py | 3 +- .../integrations/google/google_transcribe.py | 15 +- .../integrations/google/google_translate.py | 68 ++++---- kobo/apps/subsequences/type_aliases.py | 10 ++ kobo/apps/subsequences/utils/__init__.py | 0 .../{utils.py => utils/supplement_data.py} | 6 +- .../{time_utils.py => utils/time.py} | 4 - kpi/deployment_backends/openrosa_backend.py | 2 +- kpi/models/asset.py | 2 +- kpi/models/import_export_task.py | 2 +- 13 files changed, 146 insertions(+), 261 deletions(-) create mode 100644 kobo/apps/subsequences/type_aliases.py create mode 100644 kobo/apps/subsequences/utils/__init__.py rename kobo/apps/subsequences/{utils.py => utils/supplement_data.py} (95%) rename kobo/apps/subsequences/{time_utils.py => utils/time.py} (84%) diff --git a/kobo/apps/subsequences/actions/automatic_google_transcription.py b/kobo/apps/subsequences/actions/automatic_google_transcription.py index 73dc641e14..4f10525b40 100644 --- a/kobo/apps/subsequences/actions/automatic_google_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_google_transcription.py @@ -2,6 +2,8 @@ from ..integrations.google.google_transcribe import GoogleTranscriptionService from .base import ActionClassConfig, BaseAutomaticNLPAction from .mixins import TranscriptionResultSchemaMixin +from ..type_aliases import NLPExternalServiceClass + class AutomaticGoogleTranscriptionAction( TranscriptionResultSchemaMixin, BaseAutomaticNLPAction @@ -10,6 +12,9 @@ class AutomaticGoogleTranscriptionAction( ID = 'automatic_google_transcription' action_class_config = ActionClassConfig({}, None, True) + def get_nlp_service_class(self) -> NLPExternalServiceClass: + return GoogleTranscriptionService + @property def result_schema(self): schema = super().result_schema @@ -26,57 +31,3 @@ def result_schema(self): @property def _limit_identifier(self): return UsageType.ASR_SECONDS - - def _run_automatic_process( - self, - submission: dict, - submission_supplement: dict, - action_data: dict, - *args, - **kwargs, - ) -> dict | None: - """ - Run the automatic transcription process using the Google API. - - This method is intended to be called by `revise_data()`, which will finalize - the validation and merging of `action_data`. If the user explicitly accepts - the last completed transcription, the method short-circuits and returns it - immediately. If the transcription request is still in progress, the method - returns None so that `revise_data()` can exit early and skip unnecessary - processing. Otherwise, it calls the Google API and returns the processed - result, ready to be passed back to `revise_data()`. - """ - - # If the client sent "accepted" while the supplement is already complete, - # return the completed transcription right away. `revise_data()` will handle - # the merge and final validation of this acceptance. - accepted = action_data.get('accepted', None) - if submission_supplement.get('status') == 'complete' and accepted is not None: - return { - 'value': submission_supplement['value'], - 'status': 'complete', - } - - # TBC - if 'value' in action_data: - return { - 'value': action_data['value'], - 'status': 'deleted', - } - - # Otherwise, trigger the external Google transcription service. - service = GoogleTranscriptionService(submission, asset=kwargs['asset']) - service_data = service.process_data(self.source_question_xpath, action_data) - - # If the transcription request is still running, stop processing here. - # Returning None ensures that `revise_data()` will not be called afterwards. - if ( - accepted is None - and submission_supplement.get('status') - == service_data['status'] - == 'in_progress' - ): - return None - - # Normal case: return the processed transcription data. - return service_data diff --git a/kobo/apps/subsequences/actions/automatic_google_translation.py b/kobo/apps/subsequences/actions/automatic_google_translation.py index f72d3b0eac..ac27920f40 100644 --- a/kobo/apps/subsequences/actions/automatic_google_translation.py +++ b/kobo/apps/subsequences/actions/automatic_google_translation.py @@ -1,104 +1,24 @@ from kobo.apps.organizations.constants import UsageType -from ..integrations.google.google_transcribe import GoogleTranscriptionService +from ..integrations.google.google_translate import GoogleTranslationService from .base import ActionClassConfig, BaseAutomaticNLPAction +from .mixins import TranslationResultSchemaMixin +from ..type_aliases import NLPExternalServiceClass -class AutomaticGoogleTranslationAction(BaseAutomaticNLPAction): +class AutomaticGoogleTranslationAction( + TranslationResultSchemaMixin, BaseAutomaticNLPAction +): ID = 'automatic_google_translation' action_class_config = ActionClassConfig([], 'language', True) + def get_nlp_service_class(self) -> NLPExternalServiceClass: + return GoogleTranslationService + @property def result_schema(self): - ### - - localized_value_schema = { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.REVISIONS_FIELD: { - 'type': 'array', - 'minItems': 1, - 'items': {'$ref': '#/$defs/revision'}, - }, - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], - } - - # Inject data schema in result schema template - self._inject_data_schema(localized_value_schema, ['$schema', 'title', 'type']) - - # Move localized_value_schema definitions to main schema - localized_value_schema_defs = localized_value_schema.pop('$defs') - - schema = { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'array', - 'additionalProperties': False, - 'items': {'$ref': '#/$defs/localized_value_schema'}, - '$defs': { - 'dateTime': {'type': 'string', 'format': 'date-time'}, - 'revision': { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD], - }, - 'localized_value_schema': localized_value_schema, - **localized_value_schema_defs, - }, - } - - # Also inject data schema in the revision definition - self._inject_data_schema( - schema['$defs']['revision'], ['$schema', 'title', '$defs'] - ) - - #### - - schema = { - '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.REVISIONS_FIELD: { - 'type': 'array', - 'minItems': 1, - 'items': {'$ref': '#/$defs/revision'}, - }, - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], - '$defs': { - 'dateTime': {'type': 'string', 'format': 'date-time'}, - 'revision': { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, - }, - 'required': [self.DATE_CREATED_FIELD], - }, - }, - } - - # Inject data schema in result schema template - self._inject_data_schema(schema, ['$schema', 'title', 'type']) - - # Also inject data schema in the revision definition - self._inject_data_schema( - schema['$defs']['revision'], ['$schema', 'title', '$defs'] - ) + schema = super().result_schema # FIXME _inject_data_schema does not merge nested children schema['$defs']['action_status'] = { @@ -112,57 +32,3 @@ def result_schema(self): @property def _limit_identifier(self): return UsageType.MT_CHARACTERS - - def _run_automatic_process( - self, - submission: dict, - submission_supplement: dict, - action_data: dict, - *args, - **kwargs, - ) -> dict | None: - """ - Run the automatic transcription process using the Google API. - - This method is intended to be called by `revise_data()`, which will finalize - the validation and merging of `action_data`. If the user explicitly accepts - the last completed transcription, the method short-circuits and returns it - immediately. If the transcription request is still in progress, the method - returns None so that `revise_data()` can exit early and skip unnecessary - processing. Otherwise, it calls the Google API and returns the processed - result, ready to be passed back to `revise_data()`. - """ - - # If the client sent "accepted" while the supplement is already complete, - # return the completed transcription right away. `revise_data()` will handle - # the merge and final validation of this acceptance. - accepted = action_data.get('accepted', None) - if submission_supplement.get('status') == 'complete' and accepted is not None: - return { - 'value': submission_supplement['value'], - 'status': 'complete', - } - - # TBC - if 'value' in action_data: - return { - 'value': action_data['value'], - 'status': 'deleted', - } - - # Otherwise, trigger the external Google transcription service. - service = GoogleTranscriptionService(submission, asset=kwargs['asset']) - service_data = service.process_data(self.source_question_xpath, action_data) - - # If the transcription request is still running, stop processing here. - # Returning None ensures that `revise_data()` will not be called afterwards. - if ( - accepted is None - and submission_supplement.get('status') - == service_data['status'] - == 'in_progress' - ): - return None - - # Normal case: return the processed transcription data. - return service_data diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 28b8a2a212..9a7df4b49f 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -9,7 +9,9 @@ from kpi.exceptions import UsageLimitExceededException from kpi.utils.usage_calculator import ServiceUsageCalculator from ..exceptions import InvalidItem -from ..time_utils import utc_datetime_to_js_str +from kobo.apps.subsequences.utils.time import utc_datetime_to_js_str +from ..type_aliases import NLPExternalServiceClass + """ ### All actions must have the following components @@ -228,7 +230,7 @@ def revise_data( if self.action_class_config.automatic: # If the action is automatic, run the external process first. if not ( - service_response := self._run_automatic_process( + service_response := self.run_automated_process( submission, submission_supplement, action_data, @@ -378,7 +380,7 @@ def _limit_identifier(self): # See AutomaticGoogleTranscriptionAction._limit_identifier() for example raise NotImplementedError() - def _run_automatic_process( + def _run_automated_process( self, submission: dict, submission_supplement: dict, @@ -622,3 +624,77 @@ def data_schema(self) -> dict: 'value_null_only': {'type': 'null'}, }, } + + def get_nlp_service_class(self) -> NLPExternalServiceClass: + """ + + """ + + raise NotImplementedError + + + def run_automated_process( + self, + submission: dict, + submission_supplement: dict, + action_data: dict, + *args, + **kwargs, + ) -> dict | None: + """ + Run the automated NLP process using the configured external service + (e.g., Google). + This method is intended to be called by `revise_data()`, which finalizes + the validation and merging of `action_data`. + The underlying service is expected to implement a `process_data()` method + returning a dictionary with one of the following shapes: + + {'status': 'in_progress'} + {'status': 'failed', 'error': ''} + {'status': 'complete', 'value': ''} + + Behavior: + - If the user explicitly accepted the last completed result, the method + short-circuits and returns it immediately. + - If the service reports `in_progress`, the method returns `None` so that + `revise_data()` can exit early and avoid redundant processing. + - If the service returns `failed` or `complete`, the processed result is + returned and passed back to `revise_data()`. + """ + + # If the client sent "accepted" while the supplement is already complete, + # return the completed translation/transcription right away. `revise_data()` + # will handle the merge and final validation of this acceptance. + accepted = action_data.get('accepted', None) + if submission_supplement.get('status') == 'complete' and accepted is not None: + return { + 'value': submission_supplement['value'], + 'status': 'complete', + } + + # If the client explicitly removed a previously stored result, + # preserve the deletion by returning a `deleted` status instead + # of reprocessing with the automated service. + if 'value' in action_data: + return { + 'value': action_data['value'], + 'status': 'deleted', + } + + # Otherwise, trigger the external service. + NLPService = self.get_nlp_service_class() # noqa + service = NLPService(submission, asset=kwargs['asset']) + service_data = service.process_data(self.source_question_xpath, action_data) + + # If the request is still running, stop processing here. + # Returning None ensures that `revise_data()` will not be called afterwards. + if ( + accepted is None + and submission_supplement.get('status') + == service_data['status'] + == 'in_progress' + ): + return None + + # Normal case: return the processed transcription data. + return service_data diff --git a/kobo/apps/subsequences/constants.py b/kobo/apps/subsequences/constants.py index 724d5161cf..af4bf7f4ec 100644 --- a/kobo/apps/subsequences/constants.py +++ b/kobo/apps/subsequences/constants.py @@ -7,10 +7,11 @@ TRANSLATABLE_SOURCE_TYPES = TRANSCRIBABLE_SOURCE_TYPES + ['text'] QUAL_SOURCE_TYPES = TRANSLATABLE_SOURCE_TYPES - ASYNC_TRANSLATION_DELAY_INTERVAL = 5 SUBSEQUENCES_ASYNC_CACHE_KEY = 'subsequences' + # Google speech api limits audio to ~480 Minutes* # Processing time is not audio length, but it's an estimate GOOGLE_CACHE_TIMEOUT = 28800 # 8 hours +GOOGLE_CODE = 'goog' diff --git a/kobo/apps/subsequences/integrations/google/google_transcribe.py b/kobo/apps/subsequences/integrations/google/google_transcribe.py index cacde9721b..cc087f04e1 100644 --- a/kobo/apps/subsequences/integrations/google/google_transcribe.py +++ b/kobo/apps/subsequences/integrations/google/google_transcribe.py @@ -139,20 +139,11 @@ def process_data(self, xpath: str, params: dict) -> dict: self.asset.owner, ) except SubmissionNotFoundException: - return { - 'status': 'failed', - 'error': {f'Submission not found'}, - } + return {'status': 'failed', 'error': 'Submission not found'} except AttachmentNotFoundException: - return { - 'status': 'failed', - 'error': {f'Attachment not found'}, - } + return {'status': 'failed', 'error': 'Attachment not found'} except (InvalidXPathException, XPathNotFoundException): - return { - 'status': 'failed', - 'error': {f'Invalid question name XPath'}, - } + return {'status': 'failed', 'error': 'Invalid question name XPath'} except NotSupportedFormatException: return {'status': 'failed', 'error': 'Unsupported format'} diff --git a/kobo/apps/subsequences/integrations/google/google_translate.py b/kobo/apps/subsequences/integrations/google/google_translate.py index 41415b9d04..91ab0f5f29 100644 --- a/kobo/apps/subsequences/integrations/google/google_translate.py +++ b/kobo/apps/subsequences/integrations/google/google_translate.py @@ -12,17 +12,15 @@ from kobo.apps.languages.models.translation import TranslationService from kpi.utils.log import logging -from ...constants import GOOGLE_CODE, GOOGLETX + +from .base import GoogleService +from ..utils.google import google_credentials_from_constance_config +from ...constants import SUBMISSION_UUID_FIELD, GOOGLE_CODE from ...exceptions import ( SubsequenceTimeoutError, TranslationAsyncResultAvailable, TranslationResultsNotFound, ) -from .base import GoogleService -from .utils import google_credentials_from_constance_config - -MAX_SYNC_CHARS = 30720 - def _hashed_strings(self, *strings): return md5(''.join(strings).encode()).hexdigest()[0:10] @@ -32,13 +30,15 @@ class GoogleTranslationService(GoogleService): API_NAME = 'translate' API_VERSION = 'v3' API_RESOURCE = 'projects.locations.operations' + MAX_SYNC_CHARS = 30720 - def __init__(self, *args): + def __init__(self, submission: dict, asset: 'kpi.models.Asset', *args, **kwargs): """ This service takes a submission object as a GoogleService inheriting - class. It uses google cloud translation v3 API. + class. It uses Google Cloud translation v3 API. """ - super().__init__(*args) + super().__init__(submission, asset, *args, **kwargs) + self.submission_root_uuid = self.submission[SUBMISSION_UUID_FIELD] self.translate_client = translate.TranslationServiceClient( credentials=google_credentials_from_constance_config() @@ -90,10 +90,10 @@ def begin_google_operation( # check if directory is not empty if stored_result := self.get_stored_result(target_lang, output_path): logging.info(f'Found stored results in {output_path=}') - return (stored_result, len(content)) + return stored_result, len(content) logging.info( - f'Starting async translation for {self.submission.submission_uuid=} {xpath=}' + f'Starting async translation for {self.submission_root_uuid=} {xpath=}' ) dest = self.bucket.blob(source_path) dest.upload_from_string(content) @@ -118,8 +118,8 @@ def begin_google_operation( } }, 'labels': { - 'username': self.user.username, - 'submission': self.submission.submission_uuid, + 'username': self.asset.owner.username, + 'submission': self.submission_root_uuid, # this needs to be lowercased to comply with google's API 'xpath': xpath.lower(), }, @@ -128,7 +128,7 @@ def begin_google_operation( response = self.translate_client.batch_translate_text( request=req_params ) - return (response, len(content)) + return response, len(content) @property def counter_name(self): @@ -159,9 +159,9 @@ def get_unique_paths( Returns source and output paths based on the parameters used and the current date. """ - submission_uuid = self.submission.submission_uuid + _hash = _hashed_strings( - self.submission.submission_uuid, xpath, self.user.username + self.submission_root_uuid, xpath, self.asset.owner.username ) _uniq_dir = f'{self.date_string}/{_hash}/{source_lang}/{target_lang}/' source_path = posixpath.join( @@ -176,14 +176,18 @@ def process_data(self, xpath: str, params: dict) -> dict: """ Translates the value for a given xpath and its json values. """ - autoparams = vals[GOOGLETX] + try: - content = vals['transcript']['value'] - source_lang = vals['transcript']['languageCode'] - target_lang = autoparams.get('languageCode') + content = params['transcript']['value'] + source_lang = params['transcript']['languageCode'] + target_lang = params['language'] except KeyError: - logging.exception('Error while setting up translation') - return {'status': 'error'} + message = 'Error while setting up translation' + logging.exception(message) + return { + 'status': 'failed', + 'error': message + } lang_service = TranslationService.objects.get(code=GOOGLE_CODE) try: @@ -196,32 +200,22 @@ def process_data(self, xpath: str, params: dict) -> dict: except SubsequenceTimeoutError: return { 'status': 'in_progress', - 'source': source_lang, - 'languageCode': target_lang, - 'value': None, } except (TranslationResultsNotFound, InvalidArgument) as e: logging.exception('Error when processing translation') return { - 'status': 'error', - 'value': None, - 'responseJSON': { - 'error': f'Translation failed with error {e}' - }, + 'status': 'failed', 'error': f'Translation failed with error {str(e)}' } except TranslationAsyncResultAvailable: - _, output_path = self.get_unique_paths( - xpath, source_lang, target_lang - ) + _, output_path = self.get_unique_paths(xpath, source_lang, target_lang) logging.info( - f'Fetching stored results for {self.submission.submission_uuid=} {xpath=}, {output_path=}' + f'Fetching stored results for {self.submission.submission_uuid=} ' + f'{xpath=}, {output_path=}' ) value = self.get_stored_result(target_lang, output_path) return { 'status': 'complete', - 'source': source_lang, - 'languageCode': target_lang, 'value': value, } @@ -236,7 +230,7 @@ def translate_content( Translates an input content string """ content_size = len(content) - if content_size <= MAX_SYNC_CHARS: + if content_size <= GoogleTranslationService.MAX_SYNC_CHARS: logging.info( f'Starting sync translation for {self.submission.submission_uuid=} {xpath=}' ) diff --git a/kobo/apps/subsequences/type_aliases.py b/kobo/apps/subsequences/type_aliases.py new file mode 100644 index 0000000000..4cc6999f2c --- /dev/null +++ b/kobo/apps/subsequences/type_aliases.py @@ -0,0 +1,10 @@ +from typing import Type, TypeAlias, Union + +from .integrations.google.google_transcribe import GoogleTranscriptionService +from .integrations.google.google_translate import GoogleTranslationService + +# A list of possible NLP external services +NLPExternalServiceClass: TypeAlias = Union[ + Type[GoogleTranscriptionService], + Type[GoogleTranslationService], +] diff --git a/kobo/apps/subsequences/utils/__init__.py b/kobo/apps/subsequences/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/subsequences/utils.py b/kobo/apps/subsequences/utils/supplement_data.py similarity index 95% rename from kobo/apps/subsequences/utils.py rename to kobo/apps/subsequences/utils/supplement_data.py index c84ffe8e6c..b20b7e29f6 100644 --- a/kobo/apps/subsequences/utils.py +++ b/kobo/apps/subsequences/utils/supplement_data.py @@ -1,9 +1,9 @@ from typing import Generator from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix -from .actions import ACTION_IDS_TO_CLASSES -from .constants import SUBMISSION_UUID_FIELD, SUPPLEMENT_KEY -from .models import SubmissionSupplement +from kobo.apps.subsequences.actions import ACTION_IDS_TO_CLASSES +from kobo.apps.subsequences.constants import SUBMISSION_UUID_FIELD, SUPPLEMENT_KEY +from kobo.apps.subsequences.models import SubmissionSupplement def get_supplemental_output_fields(asset: 'kpi.models.Asset') -> list[dict]: diff --git a/kobo/apps/subsequences/time_utils.py b/kobo/apps/subsequences/utils/time.py similarity index 84% rename from kobo/apps/subsequences/time_utils.py rename to kobo/apps/subsequences/utils/time.py index 56627a1fb1..61d44760d4 100644 --- a/kobo/apps/subsequences/time_utils.py +++ b/kobo/apps/subsequences/utils/time.py @@ -1,9 +1,5 @@ # idk if DRF is doing this work for us automatically, but if not, find another # place where these utils already exist in the app - -# if they must stay here, probably need to move utils.py to utils/something.py -# and put this in utils/time.py. can't go together due to circular imports - import datetime diff --git a/kpi/deployment_backends/openrosa_backend.py b/kpi/deployment_backends/openrosa_backend.py index cf81b6b5b9..9b553f17be 100644 --- a/kpi/deployment_backends/openrosa_backend.py +++ b/kpi/deployment_backends/openrosa_backend.py @@ -46,7 +46,7 @@ from kobo.apps.openrosa.apps.viewer.models import ParsedInstance from kobo.apps.openrosa.libs.utils.logger_tools import create_instance, publish_xls_form from kobo.apps.openrosa.libs.utils.viewer_tools import get_mongo_userform_id -from kobo.apps.subsequences.utils import stream_with_supplements +from kobo.apps.subsequences.utils.supplement_data import stream_with_supplements from kobo.apps.trackers.models import NLPUsageCounter from kpi.constants import ( PERM_CHANGE_SUBMISSIONS, diff --git a/kpi/models/asset.py b/kpi/models/asset.py index c4d0cc32f9..b36ddb4d81 100644 --- a/kpi/models/asset.py +++ b/kpi/models/asset.py @@ -19,7 +19,7 @@ from kobo.apps.reports.constants import DEFAULT_REPORTS_KEY, SPECIFIC_REPORTS_KEY from kobo.apps.subsequences.schemas import ACTION_PARAMS_SCHEMA -from kobo.apps.subsequences.utils import get_supplemental_output_fields +from kobo.apps.subsequences.utils.supplement_data import get_supplemental_output_fields from kpi.constants import ( ASSET_TYPE_BLOCK, ASSET_TYPE_COLLECTION, diff --git a/kpi/models/import_export_task.py b/kpi/models/import_export_task.py index b6aab31b10..bc691f8908 100644 --- a/kpi/models/import_export_task.py +++ b/kpi/models/import_export_task.py @@ -41,7 +41,7 @@ from werkzeug.http import parse_options_header from kobo.apps.reports.report_data import build_formpack -from kobo.apps.subsequences.utils import stream_with_supplements +from kobo.apps.subsequences.utils.supplement_data import stream_with_supplements from kpi.constants import ( ASSET_TYPE_COLLECTION, ASSET_TYPE_EMPTY, From d5713fd59cf73817fd7194eaa9e6fbf288d6788e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 27 Aug 2025 15:40:40 -0400 Subject: [PATCH 088/138] Make Automatic Google Translation a real thing --- kobo/apps/languages/models/translation.py | 1 - kobo/apps/subsequences/actions/__init__.py | 10 ++ .../actions/automatic_google_translation.py | 59 +++++++++- kobo/apps/subsequences/actions/base.py | 103 ++++++++++-------- kobo/apps/subsequences/exceptions.py | 4 +- .../integrations/google/google_translate.py | 22 ++-- kobo/apps/subsequences/models.py | 7 +- .../test_automatic_google_transcription.py | 14 +-- .../tests/test_manual_transcription.py | 20 ++-- .../tests/test_manual_translation.py | 22 ++-- 10 files changed, 175 insertions(+), 87 deletions(-) diff --git a/kobo/apps/languages/models/translation.py b/kobo/apps/languages/models/translation.py index 9384f1306f..03b63c13ed 100644 --- a/kobo/apps/languages/models/translation.py +++ b/kobo/apps/languages/models/translation.py @@ -14,7 +14,6 @@ class TranslationService(BaseLanguageService): def get_language_code(self, value: str) -> str: - try: through_obj = TranslationServiceLanguageM2M.objects.get( Q(region__code=value) | diff --git a/kobo/apps/subsequences/actions/__init__.py b/kobo/apps/subsequences/actions/__init__.py index 7e41e57aa1..63e4df8977 100644 --- a/kobo/apps/subsequences/actions/__init__.py +++ b/kobo/apps/subsequences/actions/__init__.py @@ -1,11 +1,21 @@ from .automatic_google_transcription import AutomaticGoogleTranscriptionAction +from .automatic_google_translation import AutomaticGoogleTranslationAction from .manual_transcription import ManualTranscriptionAction from .manual_translation import ManualTranslationAction # TODO, what about using a loader for every class in "actions" folder (except base.py)? ACTIONS = ( AutomaticGoogleTranscriptionAction, + AutomaticGoogleTranslationAction, ManualTranscriptionAction, ManualTranslationAction, ) + +TRANSCRIPTION_ACTIONS = ( + AutomaticGoogleTranscriptionAction, + ManualTranscriptionAction, +) + ACTION_IDS_TO_CLASSES = {a.ID: a for a in ACTIONS} + +TRANSCRIPTION_ACTION_IDS_TO_CLASSES = {a.ID: a for a in TRANSCRIPTION_ACTIONS} diff --git a/kobo/apps/subsequences/actions/automatic_google_translation.py b/kobo/apps/subsequences/actions/automatic_google_translation.py index ac27920f40..cf8718fa87 100644 --- a/kobo/apps/subsequences/actions/automatic_google_translation.py +++ b/kobo/apps/subsequences/actions/automatic_google_translation.py @@ -1,9 +1,13 @@ +from copy import deepcopy + +from dateutil import parser + from kobo.apps.organizations.constants import UsageType from ..integrations.google.google_translate import GoogleTranslationService from .base import ActionClassConfig, BaseAutomaticNLPAction from .mixins import TranslationResultSchemaMixin from ..type_aliases import NLPExternalServiceClass - +from ..exceptions import TranscriptionNotFound class AutomaticGoogleTranslationAction( TranslationResultSchemaMixin, BaseAutomaticNLPAction @@ -29,6 +33,59 @@ def result_schema(self): } return schema + def _get_action_data_dependency( + self, question_supplemental_data: dict, action_data: dict + ) -> dict: + """ + Retrieve and attach dependency data from another transcription action. + + This method searches `question_supplemental_data` for the most recent + transcription matching the base language of `action_data`. Regional + variants are not supported: only the language code is used to locate + the transcript. The found transcript (and locale if available) is then + added to `action_data` under the `transcript` field. + """ + + # Avoid circular imports + from ..actions import TRANSCRIPTION_ACTION_IDS_TO_CLASSES + + transcript = transcript_language = None + last_date_modified = None + + # TODO Should we search only for accepted transcriptions? + for action_id in TRANSCRIPTION_ACTION_IDS_TO_CLASSES.keys(): + try: + question_supplemental_data[action_id]['value'] + except KeyError: + continue + + action_version = question_supplemental_data[action_id] + dependency_date_modified = parser.parse( + action_version[self.DATE_MODIFIED_FIELD] + ) + + if ( + not last_date_modified + or last_date_modified < dependency_date_modified + ): + last_date_modified = dependency_date_modified + transcript = action_version['value'] + transcript_language = ( + action_version.get('locale') or action_version['language'] + ) + + if transcript is None: + raise TranscriptionNotFound + + # Inject dependency property for translation service + action_data['dependency'] = { + 'value': transcript, + 'language': transcript_language, + } + + return action_data + + @property def _limit_identifier(self): return UsageType.MT_CHARACTERS diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 9a7df4b49f..deeb3ed017 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -215,7 +215,8 @@ def revise_field(self, *args, **kwargs): def revise_data( self, submission: dict, - submission_supplement: dict, + question_supplemental_data: dict, + action_supplement_data: dict, action_data: dict, asset: 'kpi.models.Asset' = None, ) -> dict | None: @@ -227,12 +228,33 @@ def revise_data( self.validate_data(action_data) self.raise_for_any_leading_underscore_key(action_data) + now_str = utc_datetime_to_js_str(timezone.now()) + item_index = None + action_supplement_data_copy = deepcopy(action_supplement_data) + if not isinstance(self.action_class_config.default_type, list): + revision = action_supplement_data_copy + else: + # TODO: Multiple keys are not supported. + # Not a big issue for now since translation actions don’t use locale + # (yet?) and transcription actions only involve one occurrence at a time. + needle = action_data[self.action_class_config.key] + revision = {} + if not isinstance(action_supplement_data, list): + raise InvalidItem + + for idx, item in enumerate(action_supplement_data): + if needle == item[self.action_class_config.key]: + revision = deepcopy(item) + item_index = idx + break + if self.action_class_config.automatic: # If the action is automatic, run the external process first. if not ( service_response := self.run_automated_process( submission, - submission_supplement, + question_supplemental_data, + revision, action_data, asset=asset, ) @@ -250,26 +272,6 @@ def revise_data( else: accepted = True - now_str = utc_datetime_to_js_str(timezone.now()) - item_index = None - submission_supplement_copy = deepcopy(submission_supplement) - if not isinstance(self.action_class_config.default_type, list): - revision = submission_supplement_copy - else: - # TODO: Multiple keys are not supported. - # Not a big issue for now since translation actions don’t use locale - # (yet?) and transcription actions only involve one occurrence at a time. - needle = action_data[self.action_class_config.key] - revision = {} - if not isinstance(submission_supplement, list): - raise InvalidItem - - for idx, item in enumerate(submission_supplement): - if needle == item[self.action_class_config.key]: - revision = deepcopy(item) - item_index = idx - break - new_record = deepcopy(action_data) revisions = revision.pop(self.REVISIONS_FIELD, []) @@ -280,7 +282,7 @@ def revise_data( # If the default type is not a list, we handle a single record case. if not isinstance(self.action_class_config.default_type, list): - if submission_supplement: + if action_supplement_data: revisions.insert(0, revision) new_record[self.REVISIONS_FIELD] = revisions else: @@ -312,11 +314,11 @@ def revise_data( # - Otherwise, replace the record at the given index. # Finally, update `new_record` to reference the full updated list. if item_index is None: - submission_supplement_copy.append(new_record) + action_supplement_data_copy.append(new_record) else: - submission_supplement_copy[item_index] = new_record + action_supplement_data_copy[item_index] = new_record - new_record = submission_supplement_copy + new_record = action_supplement_data_copy self.validate_result(new_record) @@ -341,6 +343,20 @@ def raise_for_any_leading_underscore_key(d: dict): if match: raise Exception('An unexpected key with a leading underscore was found') + def run_automated_process( + self, + submission: dict, + question_supplemental_data: dict, + action_supplement_data: dict, + action_data: dict, + *args, + **kwargs, + ) -> dict | bool: + """ + Update action_data with automatic process + """ + raise NotImplementedError + def _inject_data_schema(self, destination_schema: dict, skipped_keys: list): """ Utility function to inject data schema into another schema to @@ -380,19 +396,6 @@ def _limit_identifier(self): # See AutomaticGoogleTranscriptionAction._limit_identifier() for example raise NotImplementedError() - def _run_automated_process( - self, - submission: dict, - submission_supplement: dict, - action_data: dict, - *args, - **kwargs, - ) -> dict | bool: - """ - Update action_data with automatic process - """ - raise NotImplementedError - class BaseManualNLPAction(BaseAction): """ @@ -636,7 +639,8 @@ def get_nlp_service_class(self) -> NLPExternalServiceClass: def run_automated_process( self, submission: dict, - submission_supplement: dict, + question_supplemental_data: dict, + action_supplement_data: dict, action_data: dict, *args, **kwargs, @@ -666,9 +670,9 @@ def run_automated_process( # return the completed translation/transcription right away. `revise_data()` # will handle the merge and final validation of this acceptance. accepted = action_data.get('accepted', None) - if submission_supplement.get('status') == 'complete' and accepted is not None: + if action_supplement_data.get('status') == 'complete' and accepted is not None: return { - 'value': submission_supplement['value'], + 'value': action_supplement_data['value'], 'status': 'complete', } @@ -681,16 +685,27 @@ def run_automated_process( 'status': 'deleted', } + if hasattr(self, '_get_action_data_dependency'): + action_data = self._get_action_data_dependency( + question_supplemental_data, action_data + ) + # Otherwise, trigger the external service. NLPService = self.get_nlp_service_class() # noqa service = NLPService(submission, asset=kwargs['asset']) - service_data = service.process_data(self.source_question_xpath, action_data) + service_data = service.process_data( + self.source_question_xpath, action_data + ) + + # Remove the 'dependency' flag from action_data since it is only used + # internally to resolve prerequisites and must not be kept in the final payload. + action_data.pop('dependency', None) # If the request is still running, stop processing here. # Returning None ensures that `revise_data()` will not be called afterwards. if ( accepted is None - and submission_supplement.get('status') + and action_supplement_data.get('status') == service_data['status'] == 'in_progress' ): diff --git a/kobo/apps/subsequences/exceptions.py b/kobo/apps/subsequences/exceptions.py index be147a30c5..418350f125 100644 --- a/kobo/apps/subsequences/exceptions.py +++ b/kobo/apps/subsequences/exceptions.py @@ -34,9 +34,9 @@ class SubsequenceTimeoutError(Exception): pass -class TranslationAsyncResultAvailable(Exception): +class TranscriptionNotFound(Exception): pass -class TranslationResultsNotFound(Exception): +class TranslationAsyncResultAvailable(Exception): pass diff --git a/kobo/apps/subsequences/integrations/google/google_translate.py b/kobo/apps/subsequences/integrations/google/google_translate.py index 91ab0f5f29..b02fe1e50c 100644 --- a/kobo/apps/subsequences/integrations/google/google_translate.py +++ b/kobo/apps/subsequences/integrations/google/google_translate.py @@ -10,6 +10,7 @@ from google.api_core.exceptions import InvalidArgument from google.cloud import translate_v3 as translate +from kobo.apps.languages.models.transcription import TranscriptionService from kobo.apps.languages.models.translation import TranslationService from kpi.utils.log import logging @@ -19,7 +20,6 @@ from ...exceptions import ( SubsequenceTimeoutError, TranslationAsyncResultAvailable, - TranslationResultsNotFound, ) def _hashed_strings(self, *strings): @@ -178,8 +178,8 @@ def process_data(self, xpath: str, params: dict) -> dict: """ try: - content = params['transcript']['value'] - source_lang = params['transcript']['languageCode'] + content = params['dependency']['value'] + source_lang = params['dependency']['language'] target_lang = params['language'] except KeyError: message = 'Error while setting up translation' @@ -189,19 +189,21 @@ def process_data(self, xpath: str, params: dict) -> dict: 'error': message } - lang_service = TranslationService.objects.get(code=GOOGLE_CODE) + transcription_lang_service = TranscriptionService.objects.get(code=GOOGLE_CODE) + translation_lang_service = TranslationService.objects.get(code=GOOGLE_CODE) + try: value = self.translate_content( xpath, - lang_service.get_language_code(source_lang), - lang_service.get_language_code(target_lang), + transcription_lang_service.get_language_code(source_lang), + translation_lang_service.get_language_code(target_lang), content, ) except SubsequenceTimeoutError: return { 'status': 'in_progress', } - except (TranslationResultsNotFound, InvalidArgument) as e: + except InvalidArgument as e: logging.exception('Error when processing translation') return { 'status': 'failed', 'error': f'Translation failed with error {str(e)}' @@ -209,7 +211,7 @@ def process_data(self, xpath: str, params: dict) -> dict: except TranslationAsyncResultAvailable: _, output_path = self.get_unique_paths(xpath, source_lang, target_lang) logging.info( - f'Fetching stored results for {self.submission.submission_uuid=} ' + f'Fetching stored results for {self.submission_root_uuid=} ' f'{xpath=}, {output_path=}' ) value = self.get_stored_result(target_lang, output_path) @@ -232,7 +234,7 @@ def translate_content( content_size = len(content) if content_size <= GoogleTranslationService.MAX_SYNC_CHARS: logging.info( - f'Starting sync translation for {self.submission.submission_uuid=} {xpath=}' + f'Starting sync translation for {self.submission_root_uuid=} {xpath=}' ) response = self.translate_client.translate_text( request={ @@ -241,7 +243,7 @@ def translate_content( 'target_language_code': target_lang, 'parent': self.translate_parent, 'mime_type': 'text/plain', - 'labels': {'username': self.user.username}, + 'labels': {'username': self.asset.owner.username}, } ) self.update_counters(content_size) diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 45fcd51de5..7c97ba022e 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -87,9 +87,14 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di if not ( action_supplemental_data := action.revise_data( - submission, action_supplemental_data, action_data + submission, + question_supplemental_data, + action_supplemental_data, + action_data, + asset=asset, ) ): + # TODO is line below really needed? supplemental_data['_version'] = schema_version return supplemental_data diff --git a/kobo/apps/subsequences/tests/test_automatic_google_transcription.py b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py index f6dde7271e..8de4681a34 100644 --- a/kobo/apps/subsequences/tests/test_automatic_google_transcription.py +++ b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py @@ -5,7 +5,7 @@ import pytest from ..actions.automatic_google_transcription import AutomaticGoogleTranscriptionAction -from .constants import EMPTY_SUBMISSION +from .constants import EMPTY_SUBMISSION, EMPTY_SUPPLEMENT def test_valid_params_pass_validation(): @@ -173,7 +173,7 @@ def test_valid_result_passes_validation(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) action.validate_result(mock_sup_det) @@ -207,7 +207,7 @@ def test_acceptance_does_not_produce_revisions(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) assert '_revisions' not in mock_sup_det if data.get('value') is None: is_date_accepted_present = mock_sup_det.get('_dateAccepted') is None @@ -246,7 +246,7 @@ def test_invalid_result_fails_validation(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) action.validate_result(mock_sup_det) @@ -274,7 +274,7 @@ def test_transcription_revisions_are_retained_in_supplemental_details(): value = first.pop('value', None) mock_service.process_data.return_value = {'value': value, 'status': 'complete'} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, action.action_class_config.default_type, first + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first ) assert mock_sup_det['language'] == 'en' @@ -289,7 +289,7 @@ def test_transcription_revisions_are_retained_in_supplemental_details(): ): value = second.pop('value', None) mock_service.process_data.return_value = {'value': value, 'status': 'complete'} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) assert len(mock_sup_det['_revisions']) == 1 @@ -338,7 +338,7 @@ def test_latest_revision_is_first(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) assert mock_sup_det['value'] == 'trois' assert mock_sup_det['_revisions'][0]['value'] == 'deux' diff --git a/kobo/apps/subsequences/tests/test_manual_transcription.py b/kobo/apps/subsequences/tests/test_manual_transcription.py index 7bfff6cc9e..b961e40ed2 100644 --- a/kobo/apps/subsequences/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences/tests/test_manual_transcription.py @@ -3,7 +3,7 @@ import pytest from ..actions.manual_transcription import ManualTranscriptionAction -from .constants import EMPTY_SUBMISSION +from .constants import EMPTY_SUBMISSION, EMPTY_SUPPLEMENT def test_valid_params_pass_validation(): @@ -63,7 +63,7 @@ def test_valid_result_passes_validation(): fifth = {'language': 'en', 'value': 'fifth'} mock_sup_det = action.action_class_config.default_type for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) action.validate_result(mock_sup_det) @@ -79,7 +79,7 @@ def test_invalid_result_fails_validation(): fifth = {'language': 'en', 'value': 'fifth'} mock_sup_det = action.action_class_config.default_type for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) # erroneously add '_dateModified' onto a revision mock_sup_det['_revisions'][0]['_dateModified'] = mock_sup_det['_revisions'][0][ @@ -98,7 +98,7 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): first = {'language': 'en', 'value': 'No idea'} second = {'language': 'fr', 'value': 'Aucune idée'} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, action.action_class_config.default_type, first + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first ) assert mock_sup_det['language'] == 'en' @@ -107,7 +107,7 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): assert '_revisions' not in mock_sup_det first_time = mock_sup_det['_dateCreated'] - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) assert len(mock_sup_det['_revisions']) == 1 # the revision should encompass the first transcript @@ -142,11 +142,11 @@ def test_setting_transcript_to_empty_string(): second = {'language': 'fr', 'value': ''} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, action.action_class_config.default_type, first + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first ) assert mock_sup_det['value'] == 'Aucune idée' - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) assert mock_sup_det['value'] == '' assert mock_sup_det['_revisions'][0]['value'] == 'Aucune idée' @@ -160,11 +160,11 @@ def test_setting_transcript_to_none(): second = {'language': 'fr', 'value': None} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, action.action_class_config.default_type, first + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first ) assert mock_sup_det['value'] == 'Aucune idée' - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) assert mock_sup_det['value'] is None assert mock_sup_det['_revisions'][0]['value'] == 'Aucune idée' @@ -180,7 +180,7 @@ def test_latest_revision_is_first(): mock_sup_det = action.action_class_config.default_type for data in first, second, third: - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) assert mock_sup_det['value'] == 'trois' assert mock_sup_det['_revisions'][0]['value'] == 'deux' diff --git a/kobo/apps/subsequences/tests/test_manual_translation.py b/kobo/apps/subsequences/tests/test_manual_translation.py index e8d8139d97..ddef90c0ac 100644 --- a/kobo/apps/subsequences/tests/test_manual_translation.py +++ b/kobo/apps/subsequences/tests/test_manual_translation.py @@ -3,7 +3,7 @@ import pytest from ..actions.manual_translation import ManualTranslationAction -from .constants import EMPTY_SUBMISSION +from .constants import EMPTY_SUBMISSION, EMPTY_SUPPLEMENT def test_valid_params_pass_validation(): @@ -60,7 +60,7 @@ def test_valid_result_passes_validation(): fifth = {'language': 'en', 'value': 'fifth'} mock_sup_det = action.action_class_config.default_type for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data) action.validate_result(mock_sup_det) @@ -76,7 +76,7 @@ def test_invalid_result_fails_validation(): fifth = {'language': 'en', 'value': 'fifth'} mock_sup_det = action.action_class_config.default_type for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data) # erroneously add '_dateModified' onto a revision first_revision = mock_sup_det[0]['_revisions'][0] @@ -95,7 +95,7 @@ def test_translation_revisions_are_retained_in_supplemental_details(): second = {'language': 'fr', 'value': 'Aucune idée'} third = {'language': 'en', 'value': 'No clue'} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, action.action_class_config.default_type, first + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first ) assert len(mock_sup_det) == 1 @@ -105,14 +105,14 @@ def test_translation_revisions_are_retained_in_supplemental_details(): assert '_revisions' not in mock_sup_det[0] first_time = mock_sup_det[0]['_dateCreated'] - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) assert len(mock_sup_det) == 2 assert mock_sup_det[1]['language'] == 'fr' assert mock_sup_det[1]['value'] == 'Aucune idée' assert mock_sup_det[1]['_dateCreated'] == mock_sup_det[1]['_dateModified'] assert '_revisions' not in mock_sup_det[1] - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, third) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, third) assert len(mock_sup_det) == 2 # the revision should encompass the first translation @@ -146,11 +146,11 @@ def test_setting_translation_to_empty_string(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': ''} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, action.action_class_config.default_type, first + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first ) assert mock_sup_det[0]['value'] == 'Aucune idée' - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) assert mock_sup_det[0]['value'] == '' assert mock_sup_det[0]['_revisions'][0]['value'] == 'Aucune idée' @@ -164,11 +164,11 @@ def test_setting_translation_to_none(): second = {'language': 'fr', 'value': None} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, action.action_class_config.default_type, first + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first ) assert mock_sup_det[0]['value'] == 'Aucune idée' - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) assert mock_sup_det[0]['value'] is None assert mock_sup_det[0]['_revisions'][0]['value'] == 'Aucune idée' @@ -184,7 +184,7 @@ def test_latest_revision_is_first(): mock_sup_det = action.action_class_config.default_type for data in first, second, third: - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) assert mock_sup_det[0]['value'] == 'trois' assert mock_sup_det[0]['_revisions'][0]['value'] == 'deux' From 4215c6dee3acf4acbd2400a1d46fca12cdb4995d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 27 Aug 2025 19:07:22 -0400 Subject: [PATCH 089/138] Fix rootUuid with suffix --- kobo/apps/subsequences/integrations/google/base.py | 7 +++++-- .../subsequences/integrations/google/google_transcribe.py | 2 +- .../subsequences/integrations/google/google_translate.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/kobo/apps/subsequences/integrations/google/base.py b/kobo/apps/subsequences/integrations/google/base.py index e378d43436..7a1eea8e61 100644 --- a/kobo/apps/subsequences/integrations/google/base.py +++ b/kobo/apps/subsequences/integrations/google/base.py @@ -11,6 +11,7 @@ from google.cloud import storage from googleapiclient import discovery +from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix from kobo.apps.trackers.utils import update_nlp_counter from kpi.utils.log import logging from ...constants import ( @@ -39,6 +40,9 @@ class GoogleService(ABC): def __init__(self, submission: dict, asset: 'kpi.models.Asset', *args, **kwargs): super().__init__() self.submission = submission + self.submission_root_uuid = remove_uuid_prefix( + self.submission[SUBMISSION_UUID_FIELD] + ) self.asset = asset # Need to retrieve the attachment content self.credentials = google_credentials_from_constance_config() self.storage_client = storage.Client(credentials=self.credentials) @@ -134,8 +138,7 @@ def update_counters(self, amount) -> None: def _get_cache_key( self, xpath: str, source_lang: str, target_lang: str | None ) -> str: - submission_root_uuid = self.submission[SUBMISSION_UUID_FIELD] - args = [self.asset.owner_id, submission_root_uuid, xpath, source_lang.lower()] + args = [self.asset.owner_id, self.submission_root_uuid, xpath, source_lang.lower()] if target_lang is None: args.insert(0, 'transcribe') else: diff --git a/kobo/apps/subsequences/integrations/google/google_transcribe.py b/kobo/apps/subsequences/integrations/google/google_transcribe.py index cc087f04e1..e6c11886b6 100644 --- a/kobo/apps/subsequences/integrations/google/google_transcribe.py +++ b/kobo/apps/subsequences/integrations/google/google_transcribe.py @@ -135,7 +135,7 @@ def process_data(self, xpath: str, params: dict) -> dict: try: converted_audio = self.get_converted_audio( xpath, - self.submission[SUBMISSION_UUID_FIELD], + self.submission_root_uuid, self.asset.owner, ) except SubmissionNotFoundException: diff --git a/kobo/apps/subsequences/integrations/google/google_translate.py b/kobo/apps/subsequences/integrations/google/google_translate.py index b02fe1e50c..af9ff32dde 100644 --- a/kobo/apps/subsequences/integrations/google/google_translate.py +++ b/kobo/apps/subsequences/integrations/google/google_translate.py @@ -12,6 +12,7 @@ from kobo.apps.languages.models.transcription import TranscriptionService from kobo.apps.languages.models.translation import TranslationService +from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix from kpi.utils.log import logging from .base import GoogleService @@ -38,7 +39,6 @@ def __init__(self, submission: dict, asset: 'kpi.models.Asset', *args, **kwargs) class. It uses Google Cloud translation v3 API. """ super().__init__(submission, asset, *args, **kwargs) - self.submission_root_uuid = self.submission[SUBMISSION_UUID_FIELD] self.translate_client = translate.TranslationServiceClient( credentials=google_credentials_from_constance_config() From 0dbf07453bc37d0168a4f19735e58b019ea632d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 27 Aug 2025 19:09:25 -0400 Subject: [PATCH 090/138] Fix: linter --- .../actions/automatic_google_transcription.py | 2 +- .../actions/automatic_google_translation.py | 13 ++---- kobo/apps/subsequences/actions/base.py | 12 ++---- .../actions/manual_transcription.py | 1 + .../actions/manual_translation.py | 4 +- kobo/apps/subsequences/actions/mixins.py | 1 - .../subsequences/integrations/google/base.py | 7 ++- .../integrations/google/google_translate.py | 20 +++------ .../test_automatic_google_transcription.py | 25 ++++++++--- .../tests/test_manual_transcription.py | 39 +++++++++++++---- .../tests/test_manual_translation.py | 43 ++++++++++++++----- 11 files changed, 105 insertions(+), 62 deletions(-) diff --git a/kobo/apps/subsequences/actions/automatic_google_transcription.py b/kobo/apps/subsequences/actions/automatic_google_transcription.py index 4f10525b40..f48c5a659c 100644 --- a/kobo/apps/subsequences/actions/automatic_google_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_google_transcription.py @@ -1,8 +1,8 @@ from kobo.apps.organizations.constants import UsageType from ..integrations.google.google_transcribe import GoogleTranscriptionService +from ..type_aliases import NLPExternalServiceClass from .base import ActionClassConfig, BaseAutomaticNLPAction from .mixins import TranscriptionResultSchemaMixin -from ..type_aliases import NLPExternalServiceClass class AutomaticGoogleTranscriptionAction( diff --git a/kobo/apps/subsequences/actions/automatic_google_translation.py b/kobo/apps/subsequences/actions/automatic_google_translation.py index cf8718fa87..2be83e4795 100644 --- a/kobo/apps/subsequences/actions/automatic_google_translation.py +++ b/kobo/apps/subsequences/actions/automatic_google_translation.py @@ -1,13 +1,12 @@ -from copy import deepcopy - from dateutil import parser from kobo.apps.organizations.constants import UsageType +from ..exceptions import TranscriptionNotFound from ..integrations.google.google_translate import GoogleTranslationService +from ..type_aliases import NLPExternalServiceClass from .base import ActionClassConfig, BaseAutomaticNLPAction from .mixins import TranslationResultSchemaMixin -from ..type_aliases import NLPExternalServiceClass -from ..exceptions import TranscriptionNotFound + class AutomaticGoogleTranslationAction( TranslationResultSchemaMixin, BaseAutomaticNLPAction @@ -64,10 +63,7 @@ def _get_action_data_dependency( action_version[self.DATE_MODIFIED_FIELD] ) - if ( - not last_date_modified - or last_date_modified < dependency_date_modified - ): + if not last_date_modified or last_date_modified < dependency_date_modified: last_date_modified = dependency_date_modified transcript = action_version['value'] transcript_language = ( @@ -85,7 +81,6 @@ def _get_action_data_dependency( return action_data - @property def _limit_identifier(self): return UsageType.MT_CHARACTERS diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index deeb3ed017..5dd0a59752 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -6,13 +6,12 @@ from django.utils import timezone from kobo.apps.kobo_auth.shortcuts import User +from kobo.apps.subsequences.utils.time import utc_datetime_to_js_str from kpi.exceptions import UsageLimitExceededException from kpi.utils.usage_calculator import ServiceUsageCalculator from ..exceptions import InvalidItem -from kobo.apps.subsequences.utils.time import utc_datetime_to_js_str from ..type_aliases import NLPExternalServiceClass - """ ### All actions must have the following components @@ -629,13 +628,10 @@ def data_schema(self) -> dict: } def get_nlp_service_class(self) -> NLPExternalServiceClass: - """ - - """ + """ """ raise NotImplementedError - def run_automated_process( self, submission: dict, @@ -693,9 +689,7 @@ def run_automated_process( # Otherwise, trigger the external service. NLPService = self.get_nlp_service_class() # noqa service = NLPService(submission, asset=kwargs['asset']) - service_data = service.process_data( - self.source_question_xpath, action_data - ) + service_data = service.process_data(self.source_question_xpath, action_data) # Remove the 'dependency' flag from action_data since it is only used # internally to resolve prerequisites and must not be kept in the final payload. diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 1870d76778..452de0c1ce 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -3,6 +3,7 @@ from .base import ActionClassConfig, BaseManualNLPAction from .mixins import TranscriptionResultSchemaMixin + class ManualTranscriptionAction(TranscriptionResultSchemaMixin, BaseManualNLPAction): ID = 'manual_transcription' diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index c10c050582..fa16706333 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -4,9 +4,7 @@ from .mixins import TranslationResultSchemaMixin -class ManualTranslationAction( - TranslationResultSchemaMixin, BaseManualNLPAction -): +class ManualTranslationAction(TranslationResultSchemaMixin, BaseManualNLPAction): ID = 'manual_translation' action_class_config = ActionClassConfig([], 'language', False) diff --git a/kobo/apps/subsequences/actions/mixins.py b/kobo/apps/subsequences/actions/mixins.py index fb1fb6249a..b36fd3cfe7 100644 --- a/kobo/apps/subsequences/actions/mixins.py +++ b/kobo/apps/subsequences/actions/mixins.py @@ -1,4 +1,3 @@ - class TranscriptionResultSchemaMixin: """ Provides the `result_schema` property used by all transcription-related actions. diff --git a/kobo/apps/subsequences/integrations/google/base.py b/kobo/apps/subsequences/integrations/google/base.py index 7a1eea8e61..e2a6f79211 100644 --- a/kobo/apps/subsequences/integrations/google/base.py +++ b/kobo/apps/subsequences/integrations/google/base.py @@ -138,7 +138,12 @@ def update_counters(self, amount) -> None: def _get_cache_key( self, xpath: str, source_lang: str, target_lang: str | None ) -> str: - args = [self.asset.owner_id, self.submission_root_uuid, xpath, source_lang.lower()] + args = [ + self.asset.owner_id, + self.submission_root_uuid, + xpath, + source_lang.lower(), + ] if target_lang is None: args.insert(0, 'transcribe') else: diff --git a/kobo/apps/subsequences/integrations/google/google_translate.py b/kobo/apps/subsequences/integrations/google/google_translate.py index af9ff32dde..b737eae650 100644 --- a/kobo/apps/subsequences/integrations/google/google_translate.py +++ b/kobo/apps/subsequences/integrations/google/google_translate.py @@ -12,16 +12,12 @@ from kobo.apps.languages.models.transcription import TranscriptionService from kobo.apps.languages.models.translation import TranslationService -from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix from kpi.utils.log import logging - -from .base import GoogleService +from ...constants import GOOGLE_CODE +from ...exceptions import SubsequenceTimeoutError, TranslationAsyncResultAvailable from ..utils.google import google_credentials_from_constance_config -from ...constants import SUBMISSION_UUID_FIELD, GOOGLE_CODE -from ...exceptions import ( - SubsequenceTimeoutError, - TranslationAsyncResultAvailable, -) +from .base import GoogleService + def _hashed_strings(self, *strings): return md5(''.join(strings).encode()).hexdigest()[0:10] @@ -184,10 +180,7 @@ def process_data(self, xpath: str, params: dict) -> dict: except KeyError: message = 'Error while setting up translation' logging.exception(message) - return { - 'status': 'failed', - 'error': message - } + return {'status': 'failed', 'error': message} transcription_lang_service = TranscriptionService.objects.get(code=GOOGLE_CODE) translation_lang_service = TranslationService.objects.get(code=GOOGLE_CODE) @@ -206,7 +199,8 @@ def process_data(self, xpath: str, params: dict) -> dict: except InvalidArgument as e: logging.exception('Error when processing translation') return { - 'status': 'failed', 'error': f'Translation failed with error {str(e)}' + 'status': 'failed', + 'error': f'Translation failed with error {str(e)}', } except TranslationAsyncResultAvailable: _, output_path = self.get_unique_paths(xpath, source_lang, target_lang) diff --git a/kobo/apps/subsequences/tests/test_automatic_google_transcription.py b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py index 8de4681a34..4a716e1aa2 100644 --- a/kobo/apps/subsequences/tests/test_automatic_google_transcription.py +++ b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py @@ -173,7 +173,9 @@ def test_valid_result_passes_validation(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data + ) action.validate_result(mock_sup_det) @@ -207,7 +209,9 @@ def test_acceptance_does_not_produce_revisions(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data + ) assert '_revisions' not in mock_sup_det if data.get('value') is None: is_date_accepted_present = mock_sup_det.get('_dateAccepted') is None @@ -246,7 +250,9 @@ def test_invalid_result_fails_validation(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data + ) action.validate_result(mock_sup_det) @@ -274,7 +280,10 @@ def test_transcription_revisions_are_retained_in_supplemental_details(): value = first.pop('value', None) mock_service.process_data.return_value = {'value': value, 'status': 'complete'} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first + EMPTY_SUBMISSION, + EMPTY_SUPPLEMENT, + action.action_class_config.default_type, + first, ) assert mock_sup_det['language'] == 'en' @@ -289,7 +298,9 @@ def test_transcription_revisions_are_retained_in_supplemental_details(): ): value = second.pop('value', None) mock_service.process_data.return_value = {'value': value, 'status': 'complete'} - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second + ) assert len(mock_sup_det['_revisions']) == 1 @@ -338,7 +349,9 @@ def test_latest_revision_is_first(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data + ) assert mock_sup_det['value'] == 'trois' assert mock_sup_det['_revisions'][0]['value'] == 'deux' diff --git a/kobo/apps/subsequences/tests/test_manual_transcription.py b/kobo/apps/subsequences/tests/test_manual_transcription.py index b961e40ed2..757c431d60 100644 --- a/kobo/apps/subsequences/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences/tests/test_manual_transcription.py @@ -63,7 +63,9 @@ def test_valid_result_passes_validation(): fifth = {'language': 'en', 'value': 'fifth'} mock_sup_det = action.action_class_config.default_type for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data + ) action.validate_result(mock_sup_det) @@ -79,7 +81,9 @@ def test_invalid_result_fails_validation(): fifth = {'language': 'en', 'value': 'fifth'} mock_sup_det = action.action_class_config.default_type for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data + ) # erroneously add '_dateModified' onto a revision mock_sup_det['_revisions'][0]['_dateModified'] = mock_sup_det['_revisions'][0][ @@ -98,7 +102,10 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): first = {'language': 'en', 'value': 'No idea'} second = {'language': 'fr', 'value': 'Aucune idée'} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first + EMPTY_SUBMISSION, + EMPTY_SUPPLEMENT, + action.action_class_config.default_type, + first, ) assert mock_sup_det['language'] == 'en' @@ -107,7 +114,9 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): assert '_revisions' not in mock_sup_det first_time = mock_sup_det['_dateCreated'] - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second + ) assert len(mock_sup_det['_revisions']) == 1 # the revision should encompass the first transcript @@ -142,11 +151,16 @@ def test_setting_transcript_to_empty_string(): second = {'language': 'fr', 'value': ''} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first + EMPTY_SUBMISSION, + EMPTY_SUPPLEMENT, + action.action_class_config.default_type, + first, ) assert mock_sup_det['value'] == 'Aucune idée' - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second + ) assert mock_sup_det['value'] == '' assert mock_sup_det['_revisions'][0]['value'] == 'Aucune idée' @@ -160,11 +174,16 @@ def test_setting_transcript_to_none(): second = {'language': 'fr', 'value': None} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first + EMPTY_SUBMISSION, + EMPTY_SUPPLEMENT, + action.action_class_config.default_type, + first, ) assert mock_sup_det['value'] == 'Aucune idée' - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second + ) assert mock_sup_det['value'] is None assert mock_sup_det['_revisions'][0]['value'] == 'Aucune idée' @@ -180,7 +199,9 @@ def test_latest_revision_is_first(): mock_sup_det = action.action_class_config.default_type for data in first, second, third: - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data + ) assert mock_sup_det['value'] == 'trois' assert mock_sup_det['_revisions'][0]['value'] == 'deux' diff --git a/kobo/apps/subsequences/tests/test_manual_translation.py b/kobo/apps/subsequences/tests/test_manual_translation.py index ddef90c0ac..12e968bb7b 100644 --- a/kobo/apps/subsequences/tests/test_manual_translation.py +++ b/kobo/apps/subsequences/tests/test_manual_translation.py @@ -60,7 +60,9 @@ def test_valid_result_passes_validation(): fifth = {'language': 'en', 'value': 'fifth'} mock_sup_det = action.action_class_config.default_type for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data + ) action.validate_result(mock_sup_det) @@ -76,7 +78,9 @@ def test_invalid_result_fails_validation(): fifth = {'language': 'en', 'value': 'fifth'} mock_sup_det = action.action_class_config.default_type for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data + ) # erroneously add '_dateModified' onto a revision first_revision = mock_sup_det[0]['_revisions'][0] @@ -95,7 +99,10 @@ def test_translation_revisions_are_retained_in_supplemental_details(): second = {'language': 'fr', 'value': 'Aucune idée'} third = {'language': 'en', 'value': 'No clue'} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first + EMPTY_SUBMISSION, + EMPTY_SUPPLEMENT, + action.action_class_config.default_type, + first, ) assert len(mock_sup_det) == 1 @@ -105,14 +112,18 @@ def test_translation_revisions_are_retained_in_supplemental_details(): assert '_revisions' not in mock_sup_det[0] first_time = mock_sup_det[0]['_dateCreated'] - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second + ) assert len(mock_sup_det) == 2 assert mock_sup_det[1]['language'] == 'fr' assert mock_sup_det[1]['value'] == 'Aucune idée' assert mock_sup_det[1]['_dateCreated'] == mock_sup_det[1]['_dateModified'] assert '_revisions' not in mock_sup_det[1] - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, third) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, third + ) assert len(mock_sup_det) == 2 # the revision should encompass the first translation @@ -146,11 +157,16 @@ def test_setting_translation_to_empty_string(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': ''} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first + EMPTY_SUBMISSION, + EMPTY_SUPPLEMENT, + action.action_class_config.default_type, + first, ) assert mock_sup_det[0]['value'] == 'Aucune idée' - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second + ) assert mock_sup_det[0]['value'] == '' assert mock_sup_det[0]['_revisions'][0]['value'] == 'Aucune idée' @@ -164,11 +180,16 @@ def test_setting_translation_to_none(): second = {'language': 'fr', 'value': None} mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, action.action_class_config.default_type, first + EMPTY_SUBMISSION, + EMPTY_SUPPLEMENT, + action.action_class_config.default_type, + first, ) assert mock_sup_det[0]['value'] == 'Aucune idée' - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, second) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second + ) assert mock_sup_det[0]['value'] is None assert mock_sup_det[0]['_revisions'][0]['value'] == 'Aucune idée' @@ -184,7 +205,9 @@ def test_latest_revision_is_first(): mock_sup_det = action.action_class_config.default_type for data in first, second, third: - mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT,mock_sup_det, data) + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data + ) assert mock_sup_det[0]['value'] == 'trois' assert mock_sup_det[0]['_revisions'][0]['value'] == 'deux' From 72f3395cb6ae0ca9fe618ce64c77889b6f445b0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 27 Aug 2025 23:40:04 -0400 Subject: [PATCH 091/138] Add unit tests for automatic_google_translation --- kobo/apps/subsequences/tests/constants.py | 9 + .../test_automatic_google_transcription.py | 37 +- .../test_automatic_google_translation.py | 404 ++++++++++++++++++ 3 files changed, 432 insertions(+), 18 deletions(-) create mode 100644 kobo/apps/subsequences/tests/test_automatic_google_translation.py diff --git a/kobo/apps/subsequences/tests/constants.py b/kobo/apps/subsequences/tests/constants.py index aa0d5fdb03..35bd514df3 100644 --- a/kobo/apps/subsequences/tests/constants.py +++ b/kobo/apps/subsequences/tests/constants.py @@ -1,2 +1,11 @@ EMPTY_SUBMISSION = {} EMPTY_SUPPLEMENT = {} +QUESTION_SUPPLEMENT = { + 'automatic_google_transcription': { + 'value': 'My audio has been transcribed', + 'language': 'en', + 'status': 'completed', + '_dateCreated': '2024-04-08T15:27:00Z', + '_dateModified': '2024-04-08T15:27:00Z', + } +} diff --git a/kobo/apps/subsequences/tests/test_automatic_google_transcription.py b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py index 4a716e1aa2..6d5e1ac6f5 100644 --- a/kobo/apps/subsequences/tests/test_automatic_google_transcription.py +++ b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py @@ -110,25 +110,26 @@ def test_invalid_user_data_fails_validation(): def test_invalid_automated_data_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'en'}] + params = [{'language': 'fr'}, {'language': 'es'}] action = AutomaticGoogleTranscriptionAction(xpath, params) invalid_data = [ # Wrong language - {'language': 'es', 'value': 'No idea', 'status': 'complete'}, + {'language': 'en', 'value': 'No idea', 'status': 'complete'}, # Cannot pass a value while in progress - {'language': 'en', 'value': 'No idea', 'status': 'in_progress'}, + {'language': 'es', 'value': 'Ni idea', 'status': 'in_progress'}, + # Cannot pass an empty object {}, # Cannot accept an empty translation - {'language': 'en', 'accepted': True}, + {'language': 'es', 'accepted': True}, # Cannot deny an empty translation - {'language': 'en', 'accepted': False}, + {'language': 'es', 'accepted': False}, # Cannot pass value and accepted at the same time - {'language': 'en', 'value': None, 'accepted': False}, + {'language': 'es', 'value': None, 'accepted': False}, # Cannot have a value while in progress - {'language': 'en', 'value': 'No idea', 'status': 'in_progress'}, + {'language': 'es', 'value': 'Ni idea', 'status': 'in_progress'}, # Missing error property - {'language': 'en', 'status': 'failed'}, + {'language': 'es', 'status': 'failed'}, # Delete transcript without status {'language': 'fr', 'value': None}, # Delete transcript with locale without status @@ -146,15 +147,15 @@ def test_invalid_automated_data_fails_validation(): def test_valid_result_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'en'}] + params = [{'language': 'fr'}, {'language': 'es'}] action = AutomaticGoogleTranscriptionAction(xpath, params) first = {'language': 'fr', 'value': 'un'} - second = {'language': 'en', 'value': 'two'} + second = {'language': 'es', 'value': 'dos'} third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'accepted': True} fifth = {'language': 'fr', 'value': None} - six = {'language': 'en', 'value': 'six'} + six = {'language': 'es', 'value': 'seis'} mock_sup_det = action.action_class_config.default_type mock_service = MagicMock() @@ -222,15 +223,15 @@ def test_acceptance_does_not_produce_revisions(): def test_invalid_result_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'en'}] + params = [{'language': 'fr'}, {'language': 'es'}] action = AutomaticGoogleTranscriptionAction(xpath, params) first = {'language': 'fr', 'value': 'un'} - second = {'language': 'en', 'value': 'two'} + second = {'language': 'es', 'value': 'dos'} third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'accepted': True} fifth = {'language': 'fr', 'value': None} - six = {'language': 'en', 'value': 'six'} + six = {'language': 'es', 'value': 'seis'} mock_sup_det = action.action_class_config.default_type mock_service = MagicMock() @@ -266,10 +267,10 @@ def test_invalid_result_fails_validation(): def test_transcription_revisions_are_retained_in_supplemental_details(): xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'en'}] + params = [{'language': 'fr'}, {'language': 'es'}] action = AutomaticGoogleTranscriptionAction(xpath, params) - first = {'language': 'en', 'value': 'No idea'} + first = {'language': 'es', 'value': 'Ni idea'} second = {'language': 'fr', 'value': 'Aucune idée'} mock_service = MagicMock() with patch( @@ -286,8 +287,8 @@ def test_transcription_revisions_are_retained_in_supplemental_details(): first, ) - assert mock_sup_det['language'] == 'en' - assert mock_sup_det['value'] == 'No idea' + assert mock_sup_det['language'] == 'es' + assert mock_sup_det['value'] == 'Ni idea' assert mock_sup_det['_dateCreated'] == mock_sup_det['_dateModified'] assert '_revisions' not in mock_sup_det first_time = mock_sup_det['_dateCreated'] diff --git a/kobo/apps/subsequences/tests/test_automatic_google_translation.py b/kobo/apps/subsequences/tests/test_automatic_google_translation.py new file mode 100644 index 0000000000..c47e1259c4 --- /dev/null +++ b/kobo/apps/subsequences/tests/test_automatic_google_translation.py @@ -0,0 +1,404 @@ +from unittest.mock import MagicMock, patch + +import dateutil +import jsonschema +import pytest + +from ..actions.automatic_google_translation import AutomaticGoogleTranslationAction +from .constants import EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, QUESTION_SUPPLEMENT +from ..exceptions import TranscriptionNotFound + + +def test_valid_params_pass_validation(): + params = [{'language': 'fr'}, {'language': 'es'}] + AutomaticGoogleTranslationAction.validate_params(params) + + +def test_invalid_params_fail_validation(): + params = [{'language': 123}, {'language': 'es'}] + with pytest.raises(jsonschema.exceptions.ValidationError): + AutomaticGoogleTranslationAction.validate_params(params) + + +def test_valid_user_data_passes_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + + action = AutomaticGoogleTranslationAction(xpath, params) + + allowed_data = [ + # Trivial case + {'language': 'fr'}, + # Transcription with locale + {'language': 'fr', 'locale': 'fr-CA'}, + # Delete translation + {'language': 'fr', 'value': None}, + # Delete translation with locale + {'language': 'fr', 'locale': 'fr-CA', 'value': None}, + # Accept translation + {'language': 'fr', 'accepted': True}, + # Accept translation with locale + {'language': 'fr', 'locale': 'fr-CA', 'accepted': True}, + ] + + for data in allowed_data: + action.validate_data(data) + + +def test_valid_automated_translation_data_passes_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + + action = AutomaticGoogleTranslationAction(xpath, params) + + allowed_data = [ + # Trivial case + {'language': 'fr', 'value': 'Aucune idée', 'status': 'complete'}, + { + 'language': 'fr', + 'locale': 'fr-FR', + 'value': 'Aucune idée', + 'status': 'complete', + }, + # Delete translation + {'language': 'fr', 'value': None, 'status': 'deleted'}, + {'language': 'fr', 'locale': 'fr-CA', 'value': None, 'status': 'deleted'}, + # Action in progress no value + {'language': 'es', 'status': 'in_progress'}, + {'language': 'es', 'locale': 'fr-CA', 'status': 'in_progress'}, + # Store error with status + {'language': 'es', 'status': 'failed', 'error': 'Translation failed'}, + { + 'language': 'es', + 'locale': 'fr-CA', + 'status': 'failed', + 'error': 'Translation failed', + }, + ] + + for data in allowed_data: + action.validate_automated_data(data) + + +def test_invalid_user_data_fails_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + action = AutomaticGoogleTranslationAction(xpath, params) + + invalid_data = [ + # Wrong language + {'language': 'en'}, + # Empty data + {}, + # Cannot push a translation + {'language': 'fr', 'value': 'Aucune idée'}, + # Cannot push a translation + {'language': 'fr', 'value': 'Aucune idée', 'status': 'complete'}, + # Cannot push a translation + {'language': 'fr', 'value': 'Aucune idée', 'status': 'in_progress'}, + # Cannot push a translation + {'language': 'fr', 'value': 'Aucune idée', 'status': 'failed'}, + # Cannot push a status + {'language': 'fr', 'status': 'in_progress'}, + # Cannot pass value and accepted at the same time + {'language': 'fr', 'value': None, 'accepted': False}, + ] + + for data in invalid_data: + with pytest.raises(jsonschema.exceptions.ValidationError): + action.validate_data(data) + + +def test_invalid_automated_data_fails_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + action = AutomaticGoogleTranslationAction(xpath, params) + + invalid_data = [ + # Wrong language + {'language': 'en', 'value': 'No idea', 'status': 'complete'}, + # Cannot pass a value while in progress + {'language': 'es', 'value': 'Ni idea', 'status': 'in_progress'}, + # Cannot pass an empty object + {}, + # Cannot accept an empty translation + {'language': 'es', 'accepted': True}, + # Cannot deny an empty translation + {'language': 'es', 'accepted': False}, + # Cannot pass value and accepted at the same time + {'language': 'es', 'value': None, 'accepted': False}, + # Cannot have a value while in progress + {'language': 'es', 'value': 'Ni idea', 'status': 'in_progress'}, + # Missing error property + {'language': 'es', 'status': 'failed'}, + # Delete translation without status + {'language': 'fr', 'value': None}, + # Delete translation with locale without status + {'language': 'fr', 'locale': 'fr-CA', 'value': None}, + # failed with no status + {'language': 'es', 'error': 'Translation failed'}, + # failed with no error + {'language': 'es', 'status': 'failed'}, + ] + + for data in invalid_data: + with pytest.raises(jsonschema.exceptions.ValidationError): + action.validate_automated_data(data) + + +def test_valid_result_passes_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + action = AutomaticGoogleTranslationAction(xpath, params) + + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'es', 'value': 'dos'} + third = {'language': 'fr', 'value': 'trois'} + fourth = {'language': 'fr', 'accepted': True} + fifth = {'language': 'fr', 'value': None} + six = {'language': 'es', 'value': 'seis'} + mock_sup_det = action.action_class_config.default_type + + mock_service = MagicMock() + with patch( + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa + return_value=mock_service, + ): + for data in first, second, third, fourth, fifth, six: + value = data.get('value', '') + # The 'value' field is not allowed in the payload, except when its + # value is None. + if value: + del data['value'] + + mock_service.process_data.return_value = { + 'value': value, + 'status': 'complete', + } + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, data + ) + + action.validate_result(mock_sup_det) + + assert '_dateAccepted' in mock_sup_det[0]['_revisions'][0] + assert mock_sup_det[0]['status'] == 'deleted' + assert mock_sup_det[0]['_revisions'][0]['status'] == 'complete' + assert mock_sup_det[1]['_revisions'][0]['status'] == 'complete' + + +def test_acceptance_does_not_produce_revisions(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + action = AutomaticGoogleTranslationAction(xpath, params) + + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'fr', 'accepted': True} + third = {'language': 'fr', 'accepted': False} + mock_sup_det = action.action_class_config.default_type + + mock_service = MagicMock() + with patch( + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa + return_value=mock_service, + ): + for data in first, second, third: + value = data.get('value', '') + # The 'value' field is not allowed in the payload, except when its + # value is None. + if value: + del data['value'] + + mock_service.process_data.return_value = { + 'value': value, + 'status': 'complete', + } + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, data + ) + assert '_revisions' not in mock_sup_det[0] + if data.get('value') is None: + is_date_accepted_present = mock_sup_det[0].get('_dateAccepted') is None + assert is_date_accepted_present is not bool(data.get('accepted')) + + action.validate_result(mock_sup_det) + + +def test_invalid_result_fails_validation(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + action = AutomaticGoogleTranslationAction(xpath, params) + + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'es', 'value': 'dos'} + third = {'language': 'fr', 'value': 'trois'} + fourth = {'language': 'fr', 'accepted': True} + fifth = {'language': 'fr', 'value': None} + six = {'language': 'es', 'value': 'seis'} + mock_sup_det = action.action_class_config.default_type + + mock_service = MagicMock() + with patch( + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', + # noqa + return_value=mock_service, + ): + for data in first, second, third, fourth, fifth, six: + value = data.get('value', '') + # The 'value' field is not allowed in the payload, except when its + # value is None. + if value: + del data['value'] + + mock_service.process_data.return_value = { + 'value': value, + 'status': 'complete', + } + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, data + ) + + action.validate_result(mock_sup_det) + + # erroneously add '_dateModified' onto a revision + first_revision = mock_sup_det[0]['_revisions'][0] + first_revision['_dateModified'] = first_revision['_dateCreated'] + + with pytest.raises(jsonschema.exceptions.ValidationError): + action.validate_result(mock_sup_det) + + +def test_translation_revisions_are_retained_in_supplemental_details(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + action = AutomaticGoogleTranslationAction(xpath, params) + + first = {'language': 'es', 'value': 'Ni idea'} + second = {'language': 'fr', 'value': 'Aucune idée'} + third = {'language': 'es', 'value': 'Ninguna idea'} + + mock_service = MagicMock() + with patch( + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', + # noqa + return_value=mock_service, + ): + value = first.pop('value', None) + mock_service.process_data.return_value = {'value': value, 'status': 'complete'} + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, + QUESTION_SUPPLEMENT, + action.action_class_config.default_type, + first, + ) + + assert mock_sup_det[0]['language'] == 'es' + assert mock_sup_det[0]['value'] == 'Ni idea' + assert mock_sup_det[0]['_dateCreated'] == mock_sup_det[0]['_dateModified'] + assert '_revisions' not in mock_sup_det[0] + first_time = mock_sup_det[0]['_dateCreated'] + + with patch( + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa + return_value=mock_service, + ): + value = second.pop('value', None) + mock_service.process_data.return_value = {'value': value, 'status': 'complete'} + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, second + ) + + assert len(mock_sup_det) == 2 + assert '_revision' not in mock_sup_det[0] + assert mock_sup_det[1]['language'] == 'fr' + assert mock_sup_det[1]['value'] == 'Aucune idée' + assert mock_sup_det[1]['_dateCreated'] == mock_sup_det[1]['_dateModified'] + assert '_revision' not in mock_sup_det[1] + + with patch( + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa + return_value=mock_service, + ): + value = third.pop('value', None) + mock_service.process_data.return_value = {'value': value, 'status': 'complete'} + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, third + ) + + assert len(mock_sup_det) == 2 + + # the revision should encompass the first translation + assert mock_sup_det[0]['_revisions'][0].items() >= first.items() + + # the revision should have a creation timestamp equal to that of the first + # translation + assert mock_sup_det[0]['_revisions'][0]['_dateCreated'] == first_time + + # revisions should not list a modification timestamp + assert '_dateModified' not in mock_sup_det[0]['_revisions'][0] + + # the record itself (not revision) should have an unchanged creation + # timestamp + assert mock_sup_det[0]['_dateCreated'] == first_time + + # the record itself should have an updated modification timestamp + assert dateutil.parser.parse( + mock_sup_det[0]['_dateModified'] + ) > dateutil.parser.parse(mock_sup_det[0]['_dateCreated']) + + # the record itself should encompass the second translation + assert mock_sup_det[0].items() >= third.items() + + +def test_latest_revision_is_first(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = AutomaticGoogleTranslationAction(xpath, params) + + first = {'language': 'fr', 'value': 'un'} + second = {'language': 'fr', 'value': 'deux'} + third = {'language': 'fr', 'value': 'trois'} + + mock_sup_det = action.action_class_config.default_type + mock_service = MagicMock() + with patch( + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', + # noqa + return_value=mock_service, + ): + for data in first, second, third: + value = data.pop('value') + mock_service.process_data.return_value = { + 'value': value, + 'status': 'complete', + } + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, data + ) + + assert mock_sup_det[0]['value'] == 'trois' + assert mock_sup_det[0]['_revisions'][0]['value'] == 'deux' + assert mock_sup_det[0]['_revisions'][1]['value'] == 'un' + +def test_cannot_revise_data_without_transcription(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = AutomaticGoogleTranslationAction(xpath, params) + + first = {'language': 'fr', 'value': 'un'} + mock_sup_det = action.action_class_config.default_type + mock_service = MagicMock() + with patch( + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa + return_value=mock_service, + ): + mock_service.process_data.return_value = { + 'value': 'fr', + 'status': 'complete', + } + + with pytest.raises(TranscriptionNotFound): + # question supplement data is empty + mock_sup_det = action.revise_data( + EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, {'language': 'fr'} + ) From 0e3222148308aba199cfa07e353e9f4d73e92893 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 27 Aug 2025 23:59:29 -0400 Subject: [PATCH 092/138] Add validation unit tests on automatic google translation --- .../{test_actions.py => test_validation.py} | 104 ++++++++++++++++-- 1 file changed, 95 insertions(+), 9 deletions(-) rename kobo/apps/subsequences/tests/api/v2/{test_actions.py => test_validation.py} (61%) diff --git a/kobo/apps/subsequences/tests/api/v2/test_actions.py b/kobo/apps/subsequences/tests/api/v2/test_validation.py similarity index 61% rename from kobo/apps/subsequences/tests/api/v2/test_actions.py rename to kobo/apps/subsequences/tests/api/v2/test_validation.py index 70676e5a94..0012eb6339 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_actions.py +++ b/kobo/apps/subsequences/tests/api/v2/test_validation.py @@ -106,7 +106,7 @@ def test_automatic_google_transcription_forbidden_payload(self): assert response.status_code == status.HTTP_400_BAD_REQUEST assert 'Invalid payload' in str(response.data) - def test_cannot_accept_incomplete_automatic_translation(self): + def test_cannot_accept_incomplete_automatic_transcription(self): # Set up the asset to allow automatic google transcription self.set_asset_advanced_features( { @@ -121,16 +121,60 @@ def test_cannot_accept_incomplete_automatic_translation(self): } ) - # Simulate in progress translation - mock_submission_supplement = { + # Try to set 'accepted' status when translation is not complete + payload = { '_version': '20250820', 'q1': { 'automatic_google_transcription': { - 'status': 'in_progress', 'language': 'es', + 'accepted': True, + } + }, + } + + # Mock GoogleTranscriptionService and simulate in progress transcription + mock_service = MagicMock() + mock_service.process_data.return_value = {'status': 'in_progress'} + + with patch( + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa + return_value=mock_service, + ): + response = self.client.patch( + self.supplement_details_url, data=payload, format='json' + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert 'Invalid payload' in str(response.data) + + def test_cannot_accept_incomplete_automatic_translation(self): + # Set up the asset to allow automatic google transcription + self.set_asset_advanced_features( + { + '_version': '20250820', + '_actionConfigs': { + 'q1': { + 'automatic_google_transcription': [ + {'language': 'en'}, + ], + 'automatic_google_translation': [ + {'language': 'fr'}, + ] + } + }, + } + ) + + # Simulate a completed transcription, first. + mock_submission_supplement = { + '_version': '20250820', + 'q1': { + 'automatic_google_transcription': { + 'status': 'complete', + 'value': 'My audio has been transcribed', + 'language': 'en', '_dateCreated': '2025-08-25T21:17:35.535710Z', '_dateModified': '2025-08-26T11:41:21.917338Z', - } + }, }, } SubmissionSupplement.objects.create( @@ -143,19 +187,61 @@ def test_cannot_accept_incomplete_automatic_translation(self): payload = { '_version': '20250820', 'q1': { - 'automatic_google_transcription': { - 'language': 'es', + 'automatic_google_translation': { + 'language': 'fr', 'accepted': True, } }, } - # Mock GoogleTranscriptionService + # Mock GoogleTranscriptionService and simulate in progress translation mock_service = MagicMock() mock_service.process_data.return_value = {'status': 'in_progress'} with patch( - 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa + return_value=mock_service, + ): + response = self.client.patch( + self.supplement_details_url, data=payload, format='json' + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert 'Invalid payload' in str(response.data) + + def test_cannot_request_translation_without_transcription(self): + # Set up the asset to allow automatic google transcription + self.set_asset_advanced_features( + { + '_version': '20250820', + '_actionConfigs': { + 'q1': { + 'automatic_google_transcription': [ + {'language': 'en'}, + ], + 'automatic_google_translation': [ + {'language': 'fr'}, + ] + } + }, + } + ) + + # Try to ask for translation + payload = { + '_version': '20250820', + 'q1': { + 'automatic_google_translation': { + 'language': 'fr', + } + }, + } + + # Mock GoogleTranscriptionService and simulate in progress translation + mock_service = MagicMock() + mock_service.process_data.return_value = {'status': 'in_progress'} + + with patch( + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): response = self.client.patch( From 98f551d2712300a6bd49bcb2ad7b0ac220432074 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 28 Aug 2025 09:12:17 -0400 Subject: [PATCH 093/138] Create new README --- kobo/apps/subsequences/README.md | 245 +++++++++++++++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 kobo/apps/subsequences/README.md diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences/README.md new file mode 100644 index 0000000000..a868ff74ed --- /dev/null +++ b/kobo/apps/subsequences/README.md @@ -0,0 +1,245 @@ +# Subsequence Actions – Supplement Processing Flow + +This document explains the full flow when a client submits a **supplement** payload to the API. +It covers how the payload is validated through the various schemas (`params_schema`, `data_schema`, `automated_data_schema`, `result_schema`), how external NLP services are invoked for automatic actions, and how revisions are created and persisted. + +--- + +## Table of Contents + +1. [Class Overview](#1-class-overview) +2. [Subsequence Workflow](#2-subsequence-workflow) + 1. [Enabling an Action](#21-enabling-an-action) + 2. [Add Submission Supplement](#22-add-submission-supplement) + 3. [Sequence Diagram (End-to-End Flow)](#23-sequence-diagram-end-to-end-flow) + 4. [Flowchart (Logic inside revise_data per Action)](#24-flowchart-logic-inside-revise_data-per-action) +3. [Where Schemas Apply](#3-where-schemas-apply) + +--- + +## 1. Class Overview + +> The following diagram shows the inheritance tree and how mixins provide `result_schema`. + +```mermaid +classDiagram +direction TB + +%% ==== Bases ==== +class BaseAction { + <> + +automated_data_schema [abstract][property] + +data_schema [abstract][property] + +result_schema [abstract][property] + +retrieve_data() + +revise_data() + +run_automated_process() [abstract] +} + +class BaseManualNLPAction { + +params_schema [class-level attribute] + +data_schema [property] +} + +class BaseAutomaticNLPAction { + +automated_data_schema [property] + +data_schema [property] + +run_automated_process() + +get_nlp_service_class() [abstract] +} + +%% ==== Concrete ==== +class ManualTranscription +class ManualTranslation +class AutomaticGoogleTranscription +class AutomaticGoogleTranslation + +%% ==== Mixins (provide result_schema) ==== +class TranscriptionResultSchemaMixin { + +result_schema [property] +} +class TranslationResultSchemaMixin { + +result_schema [property] +} + +%% ==== Inheritance (bases) ==== +BaseAction <|-- BaseManualNLPAction +BaseManualNLPAction <|-- BaseAutomaticNLPAction + +%% ==== Inheritance (concretes) ==== +BaseManualNLPAction <|-- ManualTranscription +BaseManualNLPAction <|-- ManualTranslation +BaseAutomaticNLPAction <|-- AutomaticGoogleTranscription +BaseAutomaticNLPAction <|-- AutomaticGoogleTranslation + +%% ==== Mixins -> Concretes ==== +TranscriptionResultSchemaMixin <.. ManualTranscription : mixin +TranscriptionResultSchemaMixin <.. AutomaticGoogleTranscription : mixin +TranslationResultSchemaMixin <.. ManualTranslation : mixin +TranslationResultSchemaMixin <.. AutomaticGoogleTranslation : mixin +``` + +--- + +## 2. Subsequence Workflow + +### 2.1 Enabling an Action + +To enable an action on an Asset, its configuration must be added under +`Asset.advanced_features`. This configuration is used to **instantiate the +action** with its parameters and is validated against the action's +`params_schema`. + +**Example: Enable Manual Transcription** + +PATCH the asset with: + +```json +{ + "_version": "20250820", + "_actionConfigs": { + "question_name_xpath": { + "action_id": , + "other_action_id": + } + } +} +``` + +**Example: Manual transcription in English and Spanish** + +```json +{ + "_version": "20250820", + "_actionConfigs": { + "audio_question": { + "manual_transcription": [{"language": "en"}, {"language": "es"}] + } + } +} +``` + +--- + +### 2.2 Add Submission Supplement + +You need to PATCH the submission supplement with this payload: + +#### Generic request + +``` +PATCH /api/v2/assets//data//supplement/ +``` + +```json +{ + "_version": "20250820", + "question_name_xpath": { + "action_id": + } +} +``` + +#### Example: Manual transcription in English + +```json +{ + "_version": "20250820", + "audio_question": { + "manual_transcription": { "language": "en", "value": "My transcript" } + } +} +``` + +--- + +### 2.3 Sequence Diagram (End-to-End Flow) + +> This diagram illustrates the complete call flow from the client request to persistence. + +```mermaid +sequenceDiagram +autonumber +actor Client +participant API as KPI API +participant SS as SubmissionSupplement +participant Action as Action (Manual/Automatic) +participant Ext as NLP Service (if automatic) +participant DB as Database + +Client->>API: POST /assets//data//supplement +Note right of API: Parse payload & route + +API->>SS: SubmissionSupplement.revise_data(payload) + +loop For each action in _actionConfigs + SS->>Action: action.revise_data(one_action_payload) + Note right of Action: Validate with data_schema + + alt Action is automatic (BaseAutomaticNLPAction) + Action->>Action: run_automated_process() + Action->>Ext: Call external NLP service + Ext-->>Action: Response (augmented payload) + Action->>Action: Validate with automated_data_schema + end + + Action->>Action: Build new revision + Action->>Action: Validate with result_schema + Action->>DB: Save revision JSON +end + +SS-->>API: Aggregated result / status +API-->>Client: 200 OK (or error) +``` + +--- + +### 2.4 Flowchart (Logic inside `revise_data` per Action) + +> This diagram shows the decision tree when validating and processing a single action payload. + +```mermaid +flowchart TB + A[Incoming action payload] + B{Validate with data schema} + C{Is automatic action?} + D[Build revision] + G[Validate with result schema] + H[Save to DB] + I[Done] + F[Run automated process] + J[Validate with automated data schema] + E[Return 4xx error] + + A --> B + B -->|fail| E + B -->|ok| C + C -->|no| D + D --> G + G --> H + H --> I + C -->|yes| F + F --> J + J -->|fail| E + J -->|ok| D +``` + +--- + +## 3. Where Schemas Apply + +- **`params_schema`** (class-level attribute, `BaseManualNLPAction`) + Defines the schema for the parameters used to instantiate the action. + These parameters are configured when the action is enabled on the **Asset** + and are stored under `Asset.advanced_features`. + > Example: `[ { "language": "en" }, { "language": "es" } ]` + +- **`data_schema`** (property) + Validates the **client payload** for a given action. + > Example: `{ "language": "en", "value": "My transcript" }` + +- **`automated_data_schema`** (property, automatic actions only) + Validates the **augmented payload** returned by the external service. + +- **`result_schema`** (property, via mixin) + Validates the **revision JSON** that is persisted and returned. From 1112b75ce44c46bae9a2d59abcfd0d3680fd0c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 28 Aug 2025 10:48:01 -0400 Subject: [PATCH 094/138] Update validation unit tests --- .../tests/api/v2/test_validation.py | 69 ++++++++++++------- .../test_automatic_google_translation.py | 52 ++++++++++++++ kpi/views/v2/data.py | 10 ++- 3 files changed, 107 insertions(+), 24 deletions(-) diff --git a/kobo/apps/subsequences/tests/api/v2/test_validation.py b/kobo/apps/subsequences/tests/api/v2/test_validation.py index 0012eb6339..dc5bda825e 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_validation.py +++ b/kobo/apps/subsequences/tests/api/v2/test_validation.py @@ -76,35 +76,58 @@ def test_cannot_patch_with_invalid_payload(self): assert response.status_code == status.HTTP_400_BAD_REQUEST assert 'Invalid action' in str(response.data) - def test_automatic_google_transcription_forbidden_payload(self): - # First, set up the asset to allow automatic google transcription - self.set_asset_advanced_features( - { - '_version': '20250820', - '_actionConfigs': { - 'q1': { - 'automatic_google_transcription': [ - {'language': 'es'}, - ] - } - }, - } - ) + def test_cannot_set_value_with_automated_actions(self): + # First, set up the asset to allow automated actions + advanced_features = { + '_version': '20250820', + '_actionConfigs': { + 'q1': { + 'automatic_google_transcription': [ + {'language': 'en'}, + ], + 'automatic_google_translation': [ + {'language': 'fr'}, + ] + } + }, + } + self.set_asset_advanced_features(advanced_features) - payload = { + # Simulate a completed transcription, first. + mock_submission_supplement = { '_version': '20250820', 'q1': { 'automatic_google_transcription': { - 'language': 'es', - 'value': 'some text', # forbidden field - } + 'status': 'complete', + 'value': 'My audio has been transcribed', + 'language': 'en', + '_dateCreated': '2025-08-25T21:17:35.535710Z', + '_dateModified': '2025-08-26T11:41:21.917338Z', + }, }, } - response = self.client.patch( - self.supplement_details_url, data=payload, format='json' + SubmissionSupplement.objects.create( + submission_uuid=self.submission_uuid, + content=mock_submission_supplement, + asset=self.asset, ) - assert response.status_code == status.HTTP_400_BAD_REQUEST - assert 'Invalid payload' in str(response.data) + automated_actions = advanced_features['_actionConfigs']['q1'].keys() + for automated_action in automated_actions: + payload = { + '_version': '20250820', + 'q1': { + automated_action: { + 'language': 'es', + 'value': 'some text', # forbidden field + } + }, + } + response = self.client.patch( + self.supplement_details_url, data=payload, format='json' + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert 'Invalid payload' in str(response.data) + def test_cannot_accept_incomplete_automatic_transcription(self): # Set up the asset to allow automatic google transcription @@ -248,4 +271,4 @@ def test_cannot_request_translation_without_transcription(self): self.supplement_details_url, data=payload, format='json' ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert 'Invalid payload' in str(response.data) + assert 'Cannot translate without transcription' in str(response.data) diff --git a/kobo/apps/subsequences/tests/test_automatic_google_translation.py b/kobo/apps/subsequences/tests/test_automatic_google_translation.py index c47e1259c4..7e0d9c686a 100644 --- a/kobo/apps/subsequences/tests/test_automatic_google_translation.py +++ b/kobo/apps/subsequences/tests/test_automatic_google_translation.py @@ -402,3 +402,55 @@ def test_cannot_revise_data_without_transcription(): mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, {'language': 'fr'} ) + +def test_find_the_most_recent_transcription(): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = AutomaticGoogleTranslationAction(xpath, params) + + question_supplement_data = { + 'automatic_google_transcription': { + 'value': 'My audio has been transcribed automatically', + 'language': 'en', + 'status': 'completed', + '_dateCreated': '2024-04-08T15:27:00Z', + '_dateModified': '2024-04-08T15:27:00Z', + }, + 'manual_transcription': { + 'value': 'My audio has been transcribed manually', + 'language': 'en', + 'locale': 'en-CA', + 'status': 'completed', + '_dateCreated': '2024-04-08T15:28:00Z', + '_dateModified': '2024-04-08T15:28:00Z', + }, + } + + # Manual transcription is the most recent + action_data = {} # not really relevant for this test + expected = { + 'dependency': { + 'value': 'My audio has been transcribed manually', + 'language': 'en-CA', + } + } + action_data = action._get_action_data_dependency( + question_supplement_data, action_data + ) + assert action_data == expected + + # Automated transcription is the most recent + action_data = {} + question_supplement_data['automatic_google_transcription'][ + '_dateModified' + ] = '2025-07-28T14:18:00Z' + expected = { + 'dependency': { + 'value': 'My audio has been transcribed automatically', + 'language': 'en', + } + } + action_data = action._get_action_data_dependency( + question_supplement_data, action_data + ) + assert action_data == expected diff --git a/kpi/views/v2/data.py b/kpi/views/v2/data.py index 26f4ddc691..5958d5f670 100644 --- a/kpi/views/v2/data.py +++ b/kpi/views/v2/data.py @@ -51,7 +51,11 @@ SubmissionValidationStatusPermission, ViewSubmissionPermission, ) -from kobo.apps.subsequences.exceptions import InvalidAction, InvalidXPath +from kobo.apps.subsequences.exceptions import ( + InvalidAction, + InvalidXPath, + TranscriptionNotFound, +) from kpi.renderers import SubmissionGeoJsonRenderer, SubmissionXMLRenderer from kpi.schema_extensions.v2.data.serializers import ( DataBulkDelete, @@ -541,6 +545,10 @@ def supplement(self, request, submission_id_or_root_uuid: str, *args, **kwargs): raise serializers.ValidationError({'detail': 'Invalid question name'}) except jsonschema.exceptions.ValidationError: raise serializers.ValidationError({'detail': 'Invalid payload'}) + except TranscriptionNotFound: + raise serializers.ValidationError( + {'detail': 'Cannot translate without transcription'} + ) return Response(supplemental_data) From f92da1d76ec4ebd8db918b4febce35f7da642128 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 28 Aug 2025 10:53:18 -0400 Subject: [PATCH 095/138] Refactor: replace Automatic with Automated --- kobo/apps/subsequences/README.md | 28 +++++------ kobo/apps/subsequences/actions/__init__.py | 10 ++-- ...n.py => automated_google_transcription.py} | 8 ++-- ...ion.py => automated_google_translation.py} | 8 ++-- kobo/apps/subsequences/actions/base.py | 22 ++++----- .../tests/api/v2/test_validation.py | 36 +++++++------- kobo/apps/subsequences/tests/constants.py | 2 +- ...=> test_automated_google_transcription.py} | 36 +++++++------- ...y => test_automated_google_translation.py} | 48 +++++++++---------- .../subsequences/utils/supplement_data.py | 4 +- 10 files changed, 101 insertions(+), 101 deletions(-) rename kobo/apps/subsequences/actions/{automatic_google_transcription.py => automated_google_transcription.py} (80%) rename kobo/apps/subsequences/actions/{automatic_google_translation.py => automated_google_translation.py} (93%) rename kobo/apps/subsequences/tests/{test_automatic_google_transcription.py => test_automated_google_transcription.py} (91%) rename kobo/apps/subsequences/tests/{test_automatic_google_translation.py => test_automated_google_translation.py} (91%) diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences/README.md index a868ff74ed..55645980d8 100644 --- a/kobo/apps/subsequences/README.md +++ b/kobo/apps/subsequences/README.md @@ -1,7 +1,7 @@ # Subsequence Actions – Supplement Processing Flow This document explains the full flow when a client submits a **supplement** payload to the API. -It covers how the payload is validated through the various schemas (`params_schema`, `data_schema`, `automated_data_schema`, `result_schema`), how external NLP services are invoked for automatic actions, and how revisions are created and persisted. +It covers how the payload is validated through the various schemas (`params_schema`, `data_schema`, `automated_data_schema`, `result_schema`), how external NLP services are invoked for automated actions, and how revisions are created and persisted. --- @@ -41,7 +41,7 @@ class BaseManualNLPAction { +data_schema [property] } -class BaseAutomaticNLPAction { +class BaseAutomatedNLPAction { +automated_data_schema [property] +data_schema [property] +run_automated_process() @@ -51,8 +51,8 @@ class BaseAutomaticNLPAction { %% ==== Concrete ==== class ManualTranscription class ManualTranslation -class AutomaticGoogleTranscription -class AutomaticGoogleTranslation +class AutomatedGoogleTranscription +class AutomatedGoogleTranslation %% ==== Mixins (provide result_schema) ==== class TranscriptionResultSchemaMixin { @@ -64,19 +64,19 @@ class TranslationResultSchemaMixin { %% ==== Inheritance (bases) ==== BaseAction <|-- BaseManualNLPAction -BaseManualNLPAction <|-- BaseAutomaticNLPAction +BaseManualNLPAction <|-- BaseAutomatedNLPAction %% ==== Inheritance (concretes) ==== BaseManualNLPAction <|-- ManualTranscription BaseManualNLPAction <|-- ManualTranslation -BaseAutomaticNLPAction <|-- AutomaticGoogleTranscription -BaseAutomaticNLPAction <|-- AutomaticGoogleTranslation +BaseAutomatedNLPAction <|-- AutomatedGoogleTranscription +BaseAutomatedNLPAction <|-- AutomatedGoogleTranslation %% ==== Mixins -> Concretes ==== TranscriptionResultSchemaMixin <.. ManualTranscription : mixin -TranscriptionResultSchemaMixin <.. AutomaticGoogleTranscription : mixin +TranscriptionResultSchemaMixin <.. AutomatedGoogleTranscription : mixin TranslationResultSchemaMixin <.. ManualTranslation : mixin -TranslationResultSchemaMixin <.. AutomaticGoogleTranslation : mixin +TranslationResultSchemaMixin <.. AutomatedGoogleTranslation : mixin ``` --- @@ -163,8 +163,8 @@ autonumber actor Client participant API as KPI API participant SS as SubmissionSupplement -participant Action as Action (Manual/Automatic) -participant Ext as NLP Service (if automatic) +participant Action as Action (Manual/Automated) +participant Ext as NLP Service (if automated) participant DB as Database Client->>API: POST /assets//data//supplement @@ -176,7 +176,7 @@ loop For each action in _actionConfigs SS->>Action: action.revise_data(one_action_payload) Note right of Action: Validate with data_schema - alt Action is automatic (BaseAutomaticNLPAction) + alt Action is automated (BaseAutomatedNLPAction) Action->>Action: run_automated_process() Action->>Ext: Call external NLP service Ext-->>Action: Response (augmented payload) @@ -202,7 +202,7 @@ API-->>Client: 200 OK (or error) flowchart TB A[Incoming action payload] B{Validate with data schema} - C{Is automatic action?} + C{Is automated action?} D[Build revision] G[Validate with result schema] H[Save to DB] @@ -238,7 +238,7 @@ flowchart TB Validates the **client payload** for a given action. > Example: `{ "language": "en", "value": "My transcript" }` -- **`automated_data_schema`** (property, automatic actions only) +- **`automated_data_schema`** (property, automated actions only) Validates the **augmented payload** returned by the external service. - **`result_schema`** (property, via mixin) diff --git a/kobo/apps/subsequences/actions/__init__.py b/kobo/apps/subsequences/actions/__init__.py index 63e4df8977..19accc2b94 100644 --- a/kobo/apps/subsequences/actions/__init__.py +++ b/kobo/apps/subsequences/actions/__init__.py @@ -1,18 +1,18 @@ -from .automatic_google_transcription import AutomaticGoogleTranscriptionAction -from .automatic_google_translation import AutomaticGoogleTranslationAction +from .automated_google_transcription import AutomatedGoogleTranscriptionAction +from .automated_google_translation import AutomatedGoogleTranslationAction from .manual_transcription import ManualTranscriptionAction from .manual_translation import ManualTranslationAction # TODO, what about using a loader for every class in "actions" folder (except base.py)? ACTIONS = ( - AutomaticGoogleTranscriptionAction, - AutomaticGoogleTranslationAction, + AutomatedGoogleTranscriptionAction, + AutomatedGoogleTranslationAction, ManualTranscriptionAction, ManualTranslationAction, ) TRANSCRIPTION_ACTIONS = ( - AutomaticGoogleTranscriptionAction, + AutomatedGoogleTranscriptionAction, ManualTranscriptionAction, ) diff --git a/kobo/apps/subsequences/actions/automatic_google_transcription.py b/kobo/apps/subsequences/actions/automated_google_transcription.py similarity index 80% rename from kobo/apps/subsequences/actions/automatic_google_transcription.py rename to kobo/apps/subsequences/actions/automated_google_transcription.py index f48c5a659c..3cac032307 100644 --- a/kobo/apps/subsequences/actions/automatic_google_transcription.py +++ b/kobo/apps/subsequences/actions/automated_google_transcription.py @@ -1,15 +1,15 @@ from kobo.apps.organizations.constants import UsageType from ..integrations.google.google_transcribe import GoogleTranscriptionService from ..type_aliases import NLPExternalServiceClass -from .base import ActionClassConfig, BaseAutomaticNLPAction +from .base import ActionClassConfig, BaseAutomatedNLPAction from .mixins import TranscriptionResultSchemaMixin -class AutomaticGoogleTranscriptionAction( - TranscriptionResultSchemaMixin, BaseAutomaticNLPAction +class AutomatedGoogleTranscriptionAction( + TranscriptionResultSchemaMixin, BaseAutomatedNLPAction ): - ID = 'automatic_google_transcription' + ID = 'automated_google_transcription' action_class_config = ActionClassConfig({}, None, True) def get_nlp_service_class(self) -> NLPExternalServiceClass: diff --git a/kobo/apps/subsequences/actions/automatic_google_translation.py b/kobo/apps/subsequences/actions/automated_google_translation.py similarity index 93% rename from kobo/apps/subsequences/actions/automatic_google_translation.py rename to kobo/apps/subsequences/actions/automated_google_translation.py index 2be83e4795..9e78feb532 100644 --- a/kobo/apps/subsequences/actions/automatic_google_translation.py +++ b/kobo/apps/subsequences/actions/automated_google_translation.py @@ -4,15 +4,15 @@ from ..exceptions import TranscriptionNotFound from ..integrations.google.google_translate import GoogleTranslationService from ..type_aliases import NLPExternalServiceClass -from .base import ActionClassConfig, BaseAutomaticNLPAction +from .base import ActionClassConfig, BaseAutomatedNLPAction from .mixins import TranslationResultSchemaMixin -class AutomaticGoogleTranslationAction( - TranslationResultSchemaMixin, BaseAutomaticNLPAction +class AutomatedGoogleTranslationAction( + TranslationResultSchemaMixin, BaseAutomatedNLPAction ): - ID = 'automatic_google_translation' + ID = 'automated_google_translation' action_class_config = ActionClassConfig([], 'language', True) def get_nlp_service_class(self) -> NLPExternalServiceClass: diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 5dd0a59752..b16bd13f4c 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -117,7 +117,7 @@ class ActionClassConfig: default_type: dict | list key: str | None - automatic: bool + automated: bool class BaseAction: @@ -247,8 +247,8 @@ def revise_data( item_index = idx break - if self.action_class_config.automatic: - # If the action is automatic, run the external process first. + if self.action_class_config.automated: + # If the action is automated, run the external process first. if not ( service_response := self.run_automated_process( submission, @@ -258,7 +258,7 @@ def revise_data( asset=asset, ) ): - # If the service response is None, the automatic task is still running. + # If the service response is None, the automated task is still running. # Stop here to avoid processing data and creating redundant revisions. return None @@ -293,9 +293,9 @@ def revise_data( new_record[self.DATE_CREATED_FIELD] = record_creation_date # For manual actions, always mark as accepted. - # For automatic actions, revert the just-created revision (remove it and + # For automated actions, revert the just-created revision (remove it and # reapply its dates) to avoid adding extra branching earlier in the method. - if self.action_class_config.automatic: + if self.action_class_config.automated: if accepted is not None: revision = new_record[self.REVISIONS_FIELD].pop(0) if not len(new_record[self.REVISIONS_FIELD]): @@ -352,7 +352,7 @@ def run_automated_process( **kwargs, ) -> dict | bool: """ - Update action_data with automatic process + Update action_data with automated process """ raise NotImplementedError @@ -365,7 +365,7 @@ def _inject_data_schema(self, destination_schema: dict, skipped_keys: list): schema_to_inject = ( self.automated_data_schema - if self.action_class_config.automatic + if self.action_class_config.automated else self.data_schema ) @@ -388,11 +388,11 @@ def _is_usage_limited(self): """ Returns whether an action should check for usage limits. """ - return self.action_class_config.automatic + return self.action_class_config.automated @property def _limit_identifier(self): - # See AutomaticGoogleTranscriptionAction._limit_identifier() for example + # See AutomatedGoogleTranscriptionAction._limit_identifier() for example raise NotImplementedError() @@ -474,7 +474,7 @@ def languages(self) -> list[str]: return languages -class BaseAutomaticNLPAction(BaseManualNLPAction): +class BaseAutomatedNLPAction(BaseManualNLPAction): """ Base class for all automated NLP actions. diff --git a/kobo/apps/subsequences/tests/api/v2/test_validation.py b/kobo/apps/subsequences/tests/api/v2/test_validation.py index dc5bda825e..ba71c0c2da 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_validation.py +++ b/kobo/apps/subsequences/tests/api/v2/test_validation.py @@ -82,10 +82,10 @@ def test_cannot_set_value_with_automated_actions(self): '_version': '20250820', '_actionConfigs': { 'q1': { - 'automatic_google_transcription': [ + 'automated_google_transcription': [ {'language': 'en'}, ], - 'automatic_google_translation': [ + 'automated_google_translation': [ {'language': 'fr'}, ] } @@ -97,7 +97,7 @@ def test_cannot_set_value_with_automated_actions(self): mock_submission_supplement = { '_version': '20250820', 'q1': { - 'automatic_google_transcription': { + 'automated_google_transcription': { 'status': 'complete', 'value': 'My audio has been transcribed', 'language': 'en', @@ -130,13 +130,13 @@ def test_cannot_set_value_with_automated_actions(self): def test_cannot_accept_incomplete_automatic_transcription(self): - # Set up the asset to allow automatic google transcription + # Set up the asset to allow automated google transcription self.set_asset_advanced_features( { '_version': '20250820', '_actionConfigs': { 'q1': { - 'automatic_google_transcription': [ + 'automated_google_transcription': [ {'language': 'es'}, ] } @@ -148,7 +148,7 @@ def test_cannot_accept_incomplete_automatic_transcription(self): payload = { '_version': '20250820', 'q1': { - 'automatic_google_transcription': { + 'automated_google_transcription': { 'language': 'es', 'accepted': True, } @@ -160,7 +160,7 @@ def test_cannot_accept_incomplete_automatic_transcription(self): mock_service.process_data.return_value = {'status': 'in_progress'} with patch( - 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa + 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): response = self.client.patch( @@ -170,16 +170,16 @@ def test_cannot_accept_incomplete_automatic_transcription(self): assert 'Invalid payload' in str(response.data) def test_cannot_accept_incomplete_automatic_translation(self): - # Set up the asset to allow automatic google transcription + # Set up the asset to allow automated google actions self.set_asset_advanced_features( { '_version': '20250820', '_actionConfigs': { 'q1': { - 'automatic_google_transcription': [ + 'automated_google_transcription': [ {'language': 'en'}, ], - 'automatic_google_translation': [ + 'automated_google_translation': [ {'language': 'fr'}, ] } @@ -191,7 +191,7 @@ def test_cannot_accept_incomplete_automatic_translation(self): mock_submission_supplement = { '_version': '20250820', 'q1': { - 'automatic_google_transcription': { + 'automated_google_transcription': { 'status': 'complete', 'value': 'My audio has been transcribed', 'language': 'en', @@ -210,7 +210,7 @@ def test_cannot_accept_incomplete_automatic_translation(self): payload = { '_version': '20250820', 'q1': { - 'automatic_google_translation': { + 'automated_google_translation': { 'language': 'fr', 'accepted': True, } @@ -222,7 +222,7 @@ def test_cannot_accept_incomplete_automatic_translation(self): mock_service.process_data.return_value = {'status': 'in_progress'} with patch( - 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): response = self.client.patch( @@ -232,16 +232,16 @@ def test_cannot_accept_incomplete_automatic_translation(self): assert 'Invalid payload' in str(response.data) def test_cannot_request_translation_without_transcription(self): - # Set up the asset to allow automatic google transcription + # Set up the asset to allow automated google actions self.set_asset_advanced_features( { '_version': '20250820', '_actionConfigs': { 'q1': { - 'automatic_google_transcription': [ + 'automated_google_transcription': [ {'language': 'en'}, ], - 'automatic_google_translation': [ + 'automated_google_translation': [ {'language': 'fr'}, ] } @@ -253,7 +253,7 @@ def test_cannot_request_translation_without_transcription(self): payload = { '_version': '20250820', 'q1': { - 'automatic_google_translation': { + 'automated_google_translation': { 'language': 'fr', } }, @@ -264,7 +264,7 @@ def test_cannot_request_translation_without_transcription(self): mock_service.process_data.return_value = {'status': 'in_progress'} with patch( - 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): response = self.client.patch( diff --git a/kobo/apps/subsequences/tests/constants.py b/kobo/apps/subsequences/tests/constants.py index 35bd514df3..ef0010d749 100644 --- a/kobo/apps/subsequences/tests/constants.py +++ b/kobo/apps/subsequences/tests/constants.py @@ -1,7 +1,7 @@ EMPTY_SUBMISSION = {} EMPTY_SUPPLEMENT = {} QUESTION_SUPPLEMENT = { - 'automatic_google_transcription': { + 'automated_google_transcription': { 'value': 'My audio has been transcribed', 'language': 'en', 'status': 'completed', diff --git a/kobo/apps/subsequences/tests/test_automatic_google_transcription.py b/kobo/apps/subsequences/tests/test_automated_google_transcription.py similarity index 91% rename from kobo/apps/subsequences/tests/test_automatic_google_transcription.py rename to kobo/apps/subsequences/tests/test_automated_google_transcription.py index 6d5e1ac6f5..0fa0bd04d0 100644 --- a/kobo/apps/subsequences/tests/test_automatic_google_transcription.py +++ b/kobo/apps/subsequences/tests/test_automated_google_transcription.py @@ -4,26 +4,26 @@ import jsonschema import pytest -from ..actions.automatic_google_transcription import AutomaticGoogleTranscriptionAction +from ..actions.automated_google_transcription import AutomatedGoogleTranscriptionAction from .constants import EMPTY_SUBMISSION, EMPTY_SUPPLEMENT def test_valid_params_pass_validation(): params = [{'language': 'fr'}, {'language': 'es'}] - AutomaticGoogleTranscriptionAction.validate_params(params) + AutomatedGoogleTranscriptionAction.validate_params(params) def test_invalid_params_fail_validation(): params = [{'language': 123}, {'language': 'es'}] with pytest.raises(jsonschema.exceptions.ValidationError): - AutomaticGoogleTranscriptionAction.validate_params(params) + AutomatedGoogleTranscriptionAction.validate_params(params) def test_valid_user_data_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranscriptionAction(xpath, params) + action = AutomatedGoogleTranscriptionAction(xpath, params) allowed_data = [ # Trivial case @@ -48,7 +48,7 @@ def test_valid_automated_translation_data_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranscriptionAction(xpath, params) + action = AutomatedGoogleTranscriptionAction(xpath, params) allowed_data = [ # Trivial case @@ -82,7 +82,7 @@ def test_valid_automated_translation_data_passes_validation(): def test_invalid_user_data_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranscriptionAction(xpath, params) + action = AutomatedGoogleTranscriptionAction(xpath, params) invalid_data = [ # Wrong language @@ -111,7 +111,7 @@ def test_invalid_user_data_fails_validation(): def test_invalid_automated_data_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranscriptionAction(xpath, params) + action = AutomatedGoogleTranscriptionAction(xpath, params) invalid_data = [ # Wrong language @@ -148,7 +148,7 @@ def test_invalid_automated_data_fails_validation(): def test_valid_result_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranscriptionAction(xpath, params) + action = AutomatedGoogleTranscriptionAction(xpath, params) first = {'language': 'fr', 'value': 'un'} second = {'language': 'es', 'value': 'dos'} @@ -160,7 +160,7 @@ def test_valid_result_passes_validation(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa + 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): for data in first, second, third, fourth, fifth, six: @@ -187,7 +187,7 @@ def test_valid_result_passes_validation(): def test_acceptance_does_not_produce_revisions(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] - action = AutomaticGoogleTranscriptionAction(xpath, params) + action = AutomatedGoogleTranscriptionAction(xpath, params) first = {'language': 'fr', 'value': 'un'} second = {'language': 'fr', 'accepted': True} @@ -196,7 +196,7 @@ def test_acceptance_does_not_produce_revisions(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa + 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): for data in first, second, third: @@ -224,7 +224,7 @@ def test_acceptance_does_not_produce_revisions(): def test_invalid_result_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranscriptionAction(xpath, params) + action = AutomatedGoogleTranscriptionAction(xpath, params) first = {'language': 'fr', 'value': 'un'} second = {'language': 'es', 'value': 'dos'} @@ -236,7 +236,7 @@ def test_invalid_result_fails_validation(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', + 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): @@ -268,13 +268,13 @@ def test_invalid_result_fails_validation(): def test_transcription_revisions_are_retained_in_supplemental_details(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranscriptionAction(xpath, params) + action = AutomatedGoogleTranscriptionAction(xpath, params) first = {'language': 'es', 'value': 'Ni idea'} second = {'language': 'fr', 'value': 'Aucune idée'} mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', + 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): @@ -294,7 +294,7 @@ def test_transcription_revisions_are_retained_in_supplemental_details(): first_time = mock_sup_det['_dateCreated'] with patch( - 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa + 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): value = second.pop('value', None) @@ -331,7 +331,7 @@ def test_transcription_revisions_are_retained_in_supplemental_details(): def test_latest_revision_is_first(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] - action = AutomaticGoogleTranscriptionAction(xpath, params) + action = AutomatedGoogleTranscriptionAction(xpath, params) first = {'language': 'fr', 'value': 'un'} second = {'language': 'fr', 'value': 'deux'} @@ -340,7 +340,7 @@ def test_latest_revision_is_first(): mock_sup_det = action.action_class_config.default_type mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', + 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): diff --git a/kobo/apps/subsequences/tests/test_automatic_google_translation.py b/kobo/apps/subsequences/tests/test_automated_google_translation.py similarity index 91% rename from kobo/apps/subsequences/tests/test_automatic_google_translation.py rename to kobo/apps/subsequences/tests/test_automated_google_translation.py index 7e0d9c686a..5163c4a9fc 100644 --- a/kobo/apps/subsequences/tests/test_automatic_google_translation.py +++ b/kobo/apps/subsequences/tests/test_automated_google_translation.py @@ -4,27 +4,27 @@ import jsonschema import pytest -from ..actions.automatic_google_translation import AutomaticGoogleTranslationAction +from ..actions.automated_google_translation import AutomatedGoogleTranslationAction from .constants import EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, QUESTION_SUPPLEMENT from ..exceptions import TranscriptionNotFound def test_valid_params_pass_validation(): params = [{'language': 'fr'}, {'language': 'es'}] - AutomaticGoogleTranslationAction.validate_params(params) + AutomatedGoogleTranslationAction.validate_params(params) def test_invalid_params_fail_validation(): params = [{'language': 123}, {'language': 'es'}] with pytest.raises(jsonschema.exceptions.ValidationError): - AutomaticGoogleTranslationAction.validate_params(params) + AutomatedGoogleTranslationAction.validate_params(params) def test_valid_user_data_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranslationAction(xpath, params) + action = AutomatedGoogleTranslationAction(xpath, params) allowed_data = [ # Trivial case @@ -49,7 +49,7 @@ def test_valid_automated_translation_data_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranslationAction(xpath, params) + action = AutomatedGoogleTranslationAction(xpath, params) allowed_data = [ # Trivial case @@ -83,7 +83,7 @@ def test_valid_automated_translation_data_passes_validation(): def test_invalid_user_data_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranslationAction(xpath, params) + action = AutomatedGoogleTranslationAction(xpath, params) invalid_data = [ # Wrong language @@ -112,7 +112,7 @@ def test_invalid_user_data_fails_validation(): def test_invalid_automated_data_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranslationAction(xpath, params) + action = AutomatedGoogleTranslationAction(xpath, params) invalid_data = [ # Wrong language @@ -149,7 +149,7 @@ def test_invalid_automated_data_fails_validation(): def test_valid_result_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranslationAction(xpath, params) + action = AutomatedGoogleTranslationAction(xpath, params) first = {'language': 'fr', 'value': 'un'} second = {'language': 'es', 'value': 'dos'} @@ -161,7 +161,7 @@ def test_valid_result_passes_validation(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): for data in first, second, third, fourth, fifth, six: @@ -190,7 +190,7 @@ def test_valid_result_passes_validation(): def test_acceptance_does_not_produce_revisions(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranslationAction(xpath, params) + action = AutomatedGoogleTranslationAction(xpath, params) first = {'language': 'fr', 'value': 'un'} second = {'language': 'fr', 'accepted': True} @@ -199,7 +199,7 @@ def test_acceptance_does_not_produce_revisions(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): for data in first, second, third: @@ -227,7 +227,7 @@ def test_acceptance_does_not_produce_revisions(): def test_invalid_result_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranslationAction(xpath, params) + action = AutomatedGoogleTranslationAction(xpath, params) first = {'language': 'fr', 'value': 'un'} second = {'language': 'es', 'value': 'dos'} @@ -239,7 +239,7 @@ def test_invalid_result_fails_validation(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', + 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): @@ -271,7 +271,7 @@ def test_invalid_result_fails_validation(): def test_translation_revisions_are_retained_in_supplemental_details(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomaticGoogleTranslationAction(xpath, params) + action = AutomatedGoogleTranslationAction(xpath, params) first = {'language': 'es', 'value': 'Ni idea'} second = {'language': 'fr', 'value': 'Aucune idée'} @@ -279,7 +279,7 @@ def test_translation_revisions_are_retained_in_supplemental_details(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', + 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): @@ -299,7 +299,7 @@ def test_translation_revisions_are_retained_in_supplemental_details(): first_time = mock_sup_det[0]['_dateCreated'] with patch( - 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): value = second.pop('value', None) @@ -316,7 +316,7 @@ def test_translation_revisions_are_retained_in_supplemental_details(): assert '_revision' not in mock_sup_det[1] with patch( - 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): value = third.pop('value', None) @@ -353,7 +353,7 @@ def test_translation_revisions_are_retained_in_supplemental_details(): def test_latest_revision_is_first(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] - action = AutomaticGoogleTranslationAction(xpath, params) + action = AutomatedGoogleTranslationAction(xpath, params) first = {'language': 'fr', 'value': 'un'} second = {'language': 'fr', 'value': 'deux'} @@ -362,7 +362,7 @@ def test_latest_revision_is_first(): mock_sup_det = action.action_class_config.default_type mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', + 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): @@ -383,13 +383,13 @@ def test_latest_revision_is_first(): def test_cannot_revise_data_without_transcription(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] - action = AutomaticGoogleTranslationAction(xpath, params) + action = AutomatedGoogleTranslationAction(xpath, params) first = {'language': 'fr', 'value': 'un'} mock_sup_det = action.action_class_config.default_type mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): mock_service.process_data.return_value = { @@ -406,10 +406,10 @@ def test_cannot_revise_data_without_transcription(): def test_find_the_most_recent_transcription(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] - action = AutomaticGoogleTranslationAction(xpath, params) + action = AutomatedGoogleTranslationAction(xpath, params) question_supplement_data = { - 'automatic_google_transcription': { + 'automated_google_transcription': { 'value': 'My audio has been transcribed automatically', 'language': 'en', 'status': 'completed', @@ -441,7 +441,7 @@ def test_find_the_most_recent_transcription(): # Automated transcription is the most recent action_data = {} - question_supplement_data['automatic_google_transcription'][ + question_supplement_data['automated_google_transcription'][ '_dateModified' ] = '2025-07-28T14:18:00Z' expected = { diff --git a/kobo/apps/subsequences/utils/supplement_data.py b/kobo/apps/subsequences/utils/supplement_data.py index b20b7e29f6..9a82fbead2 100644 --- a/kobo/apps/subsequences/utils/supplement_data.py +++ b/kobo/apps/subsequences/utils/supplement_data.py @@ -11,7 +11,7 @@ def get_supplemental_output_fields(asset: 'kpi.models.Asset') -> list[dict]: these are the fields added to exports, displayed in the table view, etc. multiple actions could result in only a single field, such as a manual - transcript and an automatic transcript for a given language only resulting + transcript and an automated transcript for a given language only resulting in one field in the output data Returns a list of fields contributed by all enabled actions (at the asset @@ -31,7 +31,7 @@ def get_supplemental_output_fields(asset: 'kpi.models.Asset') -> list[dict]: ] When it's time to get the data, we'll have to arbitrate between the manual - and automatic transcripts if both are ever present for a particular + and automated transcripts if both are ever present for a particular submission. We'll do that by looking at the acceptance dates and letting the most recent win """ From d7ac2ca5443f9e1336adc225637c94ef79df56a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 28 Aug 2025 13:31:05 -0400 Subject: [PATCH 096/138] Change content JSON structure from _revisions to _version --- kobo/apps/subsequences/README.md | 10 +- .../actions/automated_google_transcription.py | 2 +- .../actions/automated_google_translation.py | 4 +- kobo/apps/subsequences/actions/base.py | 193 ++++++++++-------- .../actions/manual_transcription.py | 2 +- .../actions/manual_translation.py | 4 +- kobo/apps/subsequences/actions/mixins.py | 52 ++--- .../subsequences/integrations/google/base.py | 2 - kobo/apps/subsequences/models.py | 2 +- kobo/apps/subsequences/tasks.py | 26 +++ .../tests/api/v2/test_permissions.py | 11 +- .../test_automated_google_transcription.py | 73 +++---- .../test_automated_google_translation.py | 85 ++++---- .../tests/test_manual_transcription.py | 56 +++-- .../tests/test_manual_translation.py | 87 ++++---- kobo/apps/subsequences/tests/test_models.py | 39 ++-- kobo/apps/subsequences__old/tasks/__init__.py | 0 17 files changed, 345 insertions(+), 303 deletions(-) create mode 100644 kobo/apps/subsequences/tasks.py delete mode 100644 kobo/apps/subsequences__old/tasks/__init__.py diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences/README.md index 55645980d8..b1b33b5941 100644 --- a/kobo/apps/subsequences/README.md +++ b/kobo/apps/subsequences/README.md @@ -1,7 +1,7 @@ # Subsequence Actions – Supplement Processing Flow This document explains the full flow when a client submits a **supplement** payload to the API. -It covers how the payload is validated through the various schemas (`params_schema`, `data_schema`, `automated_data_schema`, `result_schema`), how external NLP services are invoked for automated actions, and how revisions are created and persisted. +It covers how the payload is validated through the various schemas (`params_schema`, `data_schema`, `automated_data_schema`, `result_schema`), how external NLP services are invoked for automated actions, and how versions are created and persisted. --- @@ -183,9 +183,9 @@ loop For each action in _actionConfigs Action->>Action: Validate with automated_data_schema end - Action->>Action: Build new revision + Action->>Action: Build new version Action->>Action: Validate with result_schema - Action->>DB: Save revision JSON + Action->>DB: Save version JSON end SS-->>API: Aggregated result / status @@ -203,7 +203,7 @@ flowchart TB A[Incoming action payload] B{Validate with data schema} C{Is automated action?} - D[Build revision] + D[Build version] G[Validate with result schema] H[Save to DB] I[Done] @@ -242,4 +242,4 @@ flowchart TB Validates the **augmented payload** returned by the external service. - **`result_schema`** (property, via mixin) - Validates the **revision JSON** that is persisted and returned. + Validates the **version JSON** that is persisted and returned. diff --git a/kobo/apps/subsequences/actions/automated_google_transcription.py b/kobo/apps/subsequences/actions/automated_google_transcription.py index 3cac032307..b7aec4959f 100644 --- a/kobo/apps/subsequences/actions/automated_google_transcription.py +++ b/kobo/apps/subsequences/actions/automated_google_transcription.py @@ -10,7 +10,7 @@ class AutomatedGoogleTranscriptionAction( ): ID = 'automated_google_transcription' - action_class_config = ActionClassConfig({}, None, True) + action_class_config = ActionClassConfig(allow_multiple=False, automated=True) def get_nlp_service_class(self) -> NLPExternalServiceClass: return GoogleTranscriptionService diff --git a/kobo/apps/subsequences/actions/automated_google_translation.py b/kobo/apps/subsequences/actions/automated_google_translation.py index 9e78feb532..71887b9f95 100644 --- a/kobo/apps/subsequences/actions/automated_google_translation.py +++ b/kobo/apps/subsequences/actions/automated_google_translation.py @@ -13,7 +13,9 @@ class AutomatedGoogleTranslationAction( ): ID = 'automated_google_translation' - action_class_config = ActionClassConfig([], 'language', True) + action_class_config = ActionClassConfig( + allow_multiple=True, automated=True, action_data_key='language' + ) def get_nlp_service_class(self) -> NLPExternalServiceClass: return GoogleTranslationService diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index b16bd13f4c..cf7d5c3a14 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -9,7 +9,6 @@ from kobo.apps.subsequences.utils.time import utc_datetime_to_js_str from kpi.exceptions import UsageLimitExceededException from kpi.utils.usage_calculator import ServiceUsageCalculator -from ..exceptions import InvalidItem from ..type_aliases import NLPExternalServiceClass """ @@ -53,53 +52,77 @@ '_version': '20250820', 'my_audio_question': { 'manual_transcription': { - 'transcript': 'هائج', - 'language': 'ar', '_dateCreated': '2025-08-21T20:55:42.012053Z', '_dateModified': '2025-08-21T20:57:28.154567Z', - '_revisions': [ + '_versions': [ + { + 'transcript': 'هائج', + 'language': 'ar', + '_dateCreated': '2025-08-21T20:57:28.154567Z', + '_dateAccepted': '2025-08-21T20:57:28.154567Z', + }, { 'transcript': 'فارغ', 'language': 'ar', '_dateCreated': '2025-08-21T20:55:42.012053Z', + '_dateAccepted': '2025-08-21T20:55:42.012053Z', } ], }, - 'manual_translation': [ - { - 'language': 'en', - 'translation': 'berserk', - '_dateCreated': '2025-08-21T21:39:42.141306Z', - '_dateModified': '2025-08-21T21:39:42.141306Z', - }, - { - 'language': 'es', - 'translation': 'enloquecido', - '_dateCreated': '2025-08-21T21:40:54.644308Z', - '_dateModified': '2025-08-21T22:00:10.862880Z', - '_revisions': [ - { - 'translation': 'loco', - 'language': 'es', - '_dateCreated': '2025-08-21T21:40:54.644308Z', - } - ], + 'manual_translation': { + 'es': { + '_dateCreated': '2025-08-21T21:39:42.141306Z', + '_dateModified': '2025-08-21T21:40:54.644308Z', + '_versions': [ + { + 'value': 'enloquecido', + 'language': 'es', + '_dateCreated': '2025-08-21T21:40:54.644308Z', + '_dateAccepted': '2025-08-21T21:40:54.644308Z', + }, + { + 'value': 'loco', + 'language': 'es', + '_dateCreated': '2025-08-21T21:39:42.141306Z', + '_dateAccepted': '2025-08-21T21:39:42.141306Z', + } + ], + }, + 'fr': { + '_dateCreated': '2025-08-21T22:00:10.862880Z', + '_dateModified': '2025-08-21T22:00:10.862880Z', + '_versions': [ + { + 'translation': 'fou', + 'language': 'fr', + '_dateCreated': '2025-08-21T22:00:10.862880Z', + '_dateAccepted': '2025-08-21T22:00:10.862880Z', + } + ], + } }, ], }, 'my_video_question': { - 'manual_transcription': { - 'transcript': 'sea horse sea hell', - 'language': 'en', - '_dateCreated': '2025-08-21T21:06:20.059117Z', - '_dateModified': '2025-08-21T21:06:20.059117Z', - }, + '_dateCreated': '2025-08-21T21:06:20.059117Z', + '_dateModified': '2025-08-21T21:06:20.059117Z', + '_versions': [ + { + 'value': 'sea horse sea hell', + 'language': 'en', + '_dateCreated': '2025-08-21T21:06:20.059117Z', + '_dateAccepted': '2025-08-21T21:06:20.059117Z', + } + ], }, 'my_number_question': { 'number_multiplier': { - 'numberMultiplied': 99, '_dateCreated': '2025-08-21T21:09:34.504546Z', '_dateModified': '2025-08-21T21:09:34.504546Z', + '_versions': [ + 'value': 99, + '_dateCreated': '2025-08-21T21:09:34.504546Z', + ], }, }, } @@ -109,15 +132,18 @@ @dataclass class ActionClassConfig: """ - Defines how items in a result schema can be resolved. - - key: the dictionary field used to identify or match an item (e.g., "language"). - - default_type: the default container type to return when no items exist - (usually {} for objects or [] for arrays). + Configuration for how items in a result schema are resolved. + + - allow_multiple: Whether multiple items can share the same `action_data_key`. + - action_data_key: The field in `action_data` used to identify or match an item + when multiple entries are allowed (e.g., "language"). + - automated: Indicates whether the action relies on an external service + to generate data. """ - default_type: dict | list - key: str | None + allow_multiple: bool automated: bool + action_data_key: str | None = None class BaseAction: @@ -125,7 +151,7 @@ class BaseAction: DATE_CREATED_FIELD = '_dateCreated' DATE_MODIFIED_FIELD = '_dateModified' DATE_ACCEPTED_FIELD = '_dateAccepted' - REVISIONS_FIELD = '_revisions' + VERSION_FIELD = '_versions' action_class_config: ActionClassConfig | None = None @@ -215,7 +241,7 @@ def revise_data( self, submission: dict, question_supplemental_data: dict, - action_supplement_data: dict, + action_supplemental_data: dict, action_data: dict, asset: 'kpi.models.Asset' = None, ) -> dict | None: @@ -229,23 +255,21 @@ def revise_data( now_str = utc_datetime_to_js_str(timezone.now()) item_index = None - action_supplement_data_copy = deepcopy(action_supplement_data) - if not isinstance(self.action_class_config.default_type, list): - revision = action_supplement_data_copy - else: + + localized_action_supplemental_data = deepcopy(action_supplemental_data) + if self.action_class_config.allow_multiple: # TODO: Multiple keys are not supported. # Not a big issue for now since translation actions don’t use locale # (yet?) and transcription actions only involve one occurrence at a time. - needle = action_data[self.action_class_config.key] - revision = {} - if not isinstance(action_supplement_data, list): - raise InvalidItem + needle = action_data[self.action_class_config.action_data_key] + localized_action_supplemental_data = action_supplemental_data.get(needle, {}) - for idx, item in enumerate(action_supplement_data): - if needle == item[self.action_class_config.key]: - revision = deepcopy(item) - item_index = idx - break + try: + current_version = localized_action_supplemental_data.get( + self.VERSION_FIELD, [] + )[0] + except IndexError: + current_version = {} if self.action_class_config.automated: # If the action is automated, run the external process first. @@ -253,7 +277,7 @@ def revise_data( service_response := self.run_automated_process( submission, question_supplemental_data, - revision, + current_version, action_data, asset=asset, ) @@ -271,57 +295,51 @@ def revise_data( else: accepted = True - new_record = deepcopy(action_data) - revisions = revision.pop(self.REVISIONS_FIELD, []) + new_version = deepcopy(action_data) + new_version[self.DATE_CREATED_FIELD] = now_str + if self.DATE_CREATED_FIELD not in localized_action_supplemental_data: + localized_action_supplemental_data[self.DATE_CREATED_FIELD] = now_str + localized_action_supplemental_data[self.DATE_MODIFIED_FIELD] = now_str - revision_creation_date = revision.pop(self.DATE_MODIFIED_FIELD, now_str) - record_creation_date = revision.pop(self.DATE_CREATED_FIELD, now_str) - revision[self.DATE_CREATED_FIELD] = revision_creation_date - new_record[self.DATE_MODIFIED_FIELD] = now_str - - # If the default type is not a list, we handle a single record case. - if not isinstance(self.action_class_config.default_type, list): - if action_supplement_data: - revisions.insert(0, revision) - new_record[self.REVISIONS_FIELD] = revisions - else: - # When the default type is a list, we are handling an item within it. - if item_index is not None: - revisions.insert(0, revision) - new_record[self.REVISIONS_FIELD] = revisions - - new_record[self.DATE_CREATED_FIELD] = record_creation_date + localized_action_supplemental_data.setdefault( + self.VERSION_FIELD, [] + ).insert(0, new_version) # For manual actions, always mark as accepted. # For automated actions, revert the just-created revision (remove it and # reapply its dates) to avoid adding extra branching earlier in the method. if self.action_class_config.automated: if accepted is not None: - revision = new_record[self.REVISIONS_FIELD].pop(0) - if not len(new_record[self.REVISIONS_FIELD]): - del new_record[self.REVISIONS_FIELD] - # reassign date - new_record[self.DATE_MODIFIED_FIELD] = revision[self.DATE_CREATED_FIELD] + # Remove stale version + localized_action_supplemental_data[self.VERSION_FIELD].pop(0) if accepted: - new_record[self.DATE_ACCEPTED_FIELD] = now_str + localized_action_supplemental_data[self.VERSION_FIELD][0][ + self.DATE_ACCEPTED_FIELD + ] = now_str + else: + localized_action_supplemental_data[self.VERSION_FIELD][ + 0 + ].pop(self.DATE_ACCEPTED_FIELD, None) + else: - new_record[self.DATE_ACCEPTED_FIELD] = now_str + new_version[self.DATE_ACCEPTED_FIELD] = now_str - if isinstance(self.action_class_config.default_type, list): + if not self.action_class_config.allow_multiple: + new_action_supplement_data = localized_action_supplemental_data + else: + new_action_supplement_data = deepcopy(action_supplemental_data) # Handle the case where the default type is a list: # - If no index is provided, append the new record. # - Otherwise, replace the record at the given index. # Finally, update `new_record` to reference the full updated list. - if item_index is None: - action_supplement_data_copy.append(new_record) - else: - action_supplement_data_copy[item_index] = new_record - new_record = action_supplement_data_copy + new_action_supplement_data.update({ + needle: localized_action_supplemental_data + }) - self.validate_result(new_record) + self.validate_result(new_action_supplement_data) - return new_record + return new_action_supplement_data @staticmethod def raise_for_any_leading_underscore_key(d: dict): @@ -687,8 +705,9 @@ def run_automated_process( ) # Otherwise, trigger the external service. + asset = kwargs['asset'] NLPService = self.get_nlp_service_class() # noqa - service = NLPService(submission, asset=kwargs['asset']) + service = NLPService(submission, asset=asset) service_data = service.process_data(self.source_question_xpath, action_data) # Remove the 'dependency' flag from action_data since it is only used diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 452de0c1ce..4e85d13ef8 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -7,7 +7,7 @@ class ManualTranscriptionAction(TranscriptionResultSchemaMixin, BaseManualNLPAction): ID = 'manual_transcription' - action_class_config = ActionClassConfig({}, None, False) + action_class_config = ActionClassConfig(allow_multiple=False, automated=False) def _get_output_field_name(self, language: str) -> str: language = language.split('-')[0] # ignore region if any diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index fa16706333..2f23c1a89c 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -7,7 +7,9 @@ class ManualTranslationAction(TranslationResultSchemaMixin, BaseManualNLPAction): ID = 'manual_translation' - action_class_config = ActionClassConfig([], 'language', False) + action_class_config = ActionClassConfig( + allow_multiple=True, automated=False, action_data_key='language' + ) def _get_output_field_name(self, language: str) -> str: language = language.split('-')[0] # ignore region if any diff --git a/kobo/apps/subsequences/actions/mixins.py b/kobo/apps/subsequences/actions/mixins.py index b36fd3cfe7..73922e2f4f 100644 --- a/kobo/apps/subsequences/actions/mixins.py +++ b/kobo/apps/subsequences/actions/mixins.py @@ -8,24 +8,30 @@ class TranscriptionResultSchemaMixin: @property def result_schema(self): + + # Move localized_value_schema definitions to main schema + if self.action_class_config.automated: + data_schema_defs = self.automated_data_schema.get('$defs', {}) + else: + data_schema_defs = self.data_schema.get('$defs', {}) + schema = { '$schema': 'https://json-schema.org/draft/2020-12/schema', 'type': 'object', 'additionalProperties': False, 'properties': { - self.REVISIONS_FIELD: { + self.VERSION_FIELD: { 'type': 'array', 'minItems': 1, - 'items': {'$ref': '#/$defs/revision'}, + 'items': {'$ref': '#/$defs/version'}, }, self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, }, 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], '$defs': { 'dateTime': {'type': 'string', 'format': 'date-time'}, - 'revision': { + 'version': { 'type': 'object', 'additionalProperties': False, 'properties': { @@ -34,15 +40,13 @@ def result_schema(self): }, 'required': [self.DATE_CREATED_FIELD], }, + **data_schema_defs, # Copy defs at the root level }, } - # Inject data schema in result schema template - self._inject_data_schema(schema, ['$schema', 'title', 'type']) - - # Also inject data schema in the revision definition + # Also inject data schema in the version definition self._inject_data_schema( - schema['$defs']['revision'], ['$schema', 'title', '$defs'] + schema['$defs']['version'], ['$schema', 'title', '$defs'] ) return schema @@ -62,32 +66,35 @@ def result_schema(self): 'type': 'object', 'additionalProperties': False, 'properties': { - self.REVISIONS_FIELD: { + self.VERSION_FIELD: { 'type': 'array', 'minItems': 1, - 'items': {'$ref': '#/$defs/revision'}, + 'items': {'$ref': '#/$defs/version'}, }, self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, self.DATE_MODIFIED_FIELD: {'$ref': '#/$defs/dateTime'}, - self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, }, 'required': [self.DATE_CREATED_FIELD, self.DATE_MODIFIED_FIELD], } - # Inject data schema in result schema template - self._inject_data_schema(localized_value_schema, ['$schema', 'title', 'type']) - # Move localized_value_schema definitions to main schema - localized_value_schema_defs = localized_value_schema.pop('$defs') + if self.action_class_config.automated: + data_schema_defs = self.automated_data_schema.get('$defs', {}) + else: + data_schema_defs = self.data_schema.get('$defs', {}) schema = { '$schema': 'https://json-schema.org/draft/2020-12/schema', - 'type': 'array', + 'type': 'object', 'additionalProperties': False, - 'items': {'$ref': '#/$defs/localized_value_schema'}, + 'properties': { + language: {'$ref': '#/$defs/dataActionKey'} + for language in self.languages + }, '$defs': { + 'dataActionKey': localized_value_schema, 'dateTime': {'type': 'string', 'format': 'date-time'}, - 'revision': { + 'version': { 'type': 'object', 'additionalProperties': False, 'properties': { @@ -96,14 +103,13 @@ def result_schema(self): }, 'required': [self.DATE_CREATED_FIELD], }, - 'localized_value_schema': localized_value_schema, - **localized_value_schema_defs, + **data_schema_defs, }, } - # Also inject data schema in the revision definition + # Also inject data schema in the version definition self._inject_data_schema( - schema['$defs']['revision'], ['$schema', 'title', '$defs'] + schema['$defs']['version'], ['$schema', 'title', '$defs'] ) return schema diff --git a/kobo/apps/subsequences/integrations/google/base.py b/kobo/apps/subsequences/integrations/google/base.py index e2a6f79211..79c4cc2d1c 100644 --- a/kobo/apps/subsequences/integrations/google/base.py +++ b/kobo/apps/subsequences/integrations/google/base.py @@ -21,8 +21,6 @@ ) from ...exceptions import SubsequenceTimeoutError -# from ...models import SubmissionSupplement -# from ..utils.cache import generate_cache_key from ..utils.google import google_credentials_from_constance_config diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 7c97ba022e..e2b1cfc152 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -82,7 +82,7 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di question_xpath, {} ) action_supplemental_data = question_supplemental_data.setdefault( - action_id, action.action_class_config.default_type + action_id, {} ) if not ( diff --git a/kobo/apps/subsequences/tasks.py b/kobo/apps/subsequences/tasks.py new file mode 100644 index 0000000000..7f962d4c4a --- /dev/null +++ b/kobo/apps/subsequences/tasks.py @@ -0,0 +1,26 @@ + +from django.apps import apps +from django.conf import settings +from django.core.exceptions import ObjectDoesNotExist + +from kobo.apps.subsequences.exceptions import InvalidAction +from kobo.celery import celery_app + + + +@celery_app.task( + autoretry_for=(ObjectDoesNotExist,), + max_retries=settings.MAX_RETRIES_FOR_IMPORT_EXPORT_TASK, + retry_backoff=True, +) +def poll_run_automated_process(asset_id: int, submission: dict, action_data: dict): + # Avoid circular import + SubmissionSupplement = apps.get_model('subsequences', 'SubmissionSupplement') # noqa + try: + submission_supplement = SubmissionSupplement.revise_data( + asset_id, submission, action_data + ) + except InvalidAction: + return + + # submission diff --git a/kobo/apps/subsequences/tests/api/v2/test_permissions.py b/kobo/apps/subsequences/tests/api/v2/test_permissions.py index 100a0a5205..449d08393e 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_permissions.py +++ b/kobo/apps/subsequences/tests/api/v2/test_permissions.py @@ -165,9 +165,14 @@ def test_can_write(self, username, shared, status_code): 'manual_transcription': { '_dateCreated': '2024-04-08T15:27:00Z', '_dateModified': '2024-04-08T15:27:00Z', - '_dateAccepted': '2024-04-08T15:27:00Z', - 'language': 'es', - 'value': 'buenas noches', + '_versions': [ + { + '_dateCreated': '2024-04-08T15:27:00Z', + '_dateAccepted': '2024-04-08T15:27:00Z', + 'language': 'es', + 'value': 'buenas noches', + } + ], }, }, } diff --git a/kobo/apps/subsequences/tests/test_automated_google_transcription.py b/kobo/apps/subsequences/tests/test_automated_google_transcription.py index 0fa0bd04d0..c583bf7cdd 100644 --- a/kobo/apps/subsequences/tests/test_automated_google_transcription.py +++ b/kobo/apps/subsequences/tests/test_automated_google_transcription.py @@ -156,7 +156,7 @@ def test_valid_result_passes_validation(): fourth = {'language': 'fr', 'accepted': True} fifth = {'language': 'fr', 'value': None} six = {'language': 'es', 'value': 'seis'} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} mock_service = MagicMock() with patch( @@ -180,11 +180,11 @@ def test_valid_result_passes_validation(): action.validate_result(mock_sup_det) - assert '_dateAccepted' in mock_sup_det['_revisions'][1] - assert mock_sup_det['_revisions'][0]['status'] == 'deleted' + assert '_dateAccepted' in mock_sup_det['_versions'][2] + assert mock_sup_det['_versions'][1]['status'] == 'deleted' -def test_acceptance_does_not_produce_revisions(): +def test_acceptance_does_not_produce_versions(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] action = AutomatedGoogleTranscriptionAction(xpath, params) @@ -192,7 +192,7 @@ def test_acceptance_does_not_produce_revisions(): first = {'language': 'fr', 'value': 'un'} second = {'language': 'fr', 'accepted': True} third = {'language': 'fr', 'accepted': False} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} mock_service = MagicMock() with patch( @@ -213,9 +213,12 @@ def test_acceptance_does_not_produce_revisions(): mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data ) - assert '_revisions' not in mock_sup_det + assert '_versions' in mock_sup_det if data.get('value') is None: - is_date_accepted_present = mock_sup_det.get('_dateAccepted') is None + is_date_accepted_present = ( + mock_sup_det['_versions'][0].get('_dateAccepted') + is None + ) assert is_date_accepted_present is not bool(data.get('accepted')) action.validate_result(mock_sup_det) @@ -232,12 +235,11 @@ def test_invalid_result_fails_validation(): fourth = {'language': 'fr', 'accepted': True} fifth = {'language': 'fr', 'value': None} six = {'language': 'es', 'value': 'seis'} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', - # noqa + 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): for data in first, second, third, fourth, fifth, six: @@ -257,15 +259,15 @@ def test_invalid_result_fails_validation(): action.validate_result(mock_sup_det) - # erroneously add '_dateModified' onto a revision - first_revision = mock_sup_det['_revisions'][0] - first_revision['_dateModified'] = first_revision['_dateCreated'] + # erroneously add '_dateModified' onto a version + first_version = mock_sup_det['_versions'][0] + first_version['_dateModified'] = first_version['_dateCreated'] with pytest.raises(jsonschema.exceptions.ValidationError): action.validate_result(mock_sup_det) -def test_transcription_revisions_are_retained_in_supplemental_details(): +def test_transcription_versions_are_retained_in_supplemental_details(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] action = AutomatedGoogleTranscriptionAction(xpath, params) @@ -274,8 +276,7 @@ def test_transcription_revisions_are_retained_in_supplemental_details(): second = {'language': 'fr', 'value': 'Aucune idée'} mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', - # noqa + 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): value = first.pop('value', None) @@ -283,14 +284,15 @@ def test_transcription_revisions_are_retained_in_supplemental_details(): mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, - action.action_class_config.default_type, + {}, first, ) - assert mock_sup_det['language'] == 'es' - assert mock_sup_det['value'] == 'Ni idea' + assert mock_sup_det['_versions'][0]['language'] == 'es' + assert mock_sup_det['_versions'][0]['value'] == 'Ni idea' assert mock_sup_det['_dateCreated'] == mock_sup_det['_dateModified'] - assert '_revisions' not in mock_sup_det + assert 'value' not in mock_sup_det + assert 'language' not in mock_sup_det first_time = mock_sup_det['_dateCreated'] with patch( @@ -303,19 +305,16 @@ def test_transcription_revisions_are_retained_in_supplemental_details(): EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second ) - assert len(mock_sup_det['_revisions']) == 1 - - # the revision should encompass the first transcript - assert mock_sup_det['_revisions'][0].items() >= first.items() + assert len(mock_sup_det['_versions']) == 2 - # the revision should have a creation timestamp equal to that of the first + # the first version should have a creation timestamp equal to that of the first # transcript - assert mock_sup_det['_revisions'][0]['_dateCreated'] == first_time + assert mock_sup_det['_versions'][1]['_dateCreated'] == first_time - # revisions should not list a modification timestamp - assert '_dateModified' not in mock_sup_det['_revisions'][0] + # versions should not list a modification timestamp + assert '_dateModified' not in mock_sup_det['_versions'][0] - # the record itself (not revision) should have an unchanged creation + # the record itself (not version) should have an unchanged creation # timestamp assert mock_sup_det['_dateCreated'] == first_time @@ -324,11 +323,8 @@ def test_transcription_revisions_are_retained_in_supplemental_details(): mock_sup_det['_dateCreated'] ) - # the record itself should encompass the second transcript - assert mock_sup_det.items() >= second.items() - -def test_latest_revision_is_first(): +def test_latest_version_is_first(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] action = AutomatedGoogleTranscriptionAction(xpath, params) @@ -337,11 +333,10 @@ def test_latest_revision_is_first(): second = {'language': 'fr', 'value': 'deux'} third = {'language': 'fr', 'value': 'trois'} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', - # noqa + 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): for data in first, second, third: @@ -354,6 +349,6 @@ def test_latest_revision_is_first(): EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data ) - assert mock_sup_det['value'] == 'trois' - assert mock_sup_det['_revisions'][0]['value'] == 'deux' - assert mock_sup_det['_revisions'][1]['value'] == 'un' + assert mock_sup_det['_versions'][0]['value'] == 'trois' + assert mock_sup_det['_versions'][1]['value'] == 'deux' + assert mock_sup_det['_versions'][2]['value'] == 'un' diff --git a/kobo/apps/subsequences/tests/test_automated_google_translation.py b/kobo/apps/subsequences/tests/test_automated_google_translation.py index 5163c4a9fc..a53633d3e9 100644 --- a/kobo/apps/subsequences/tests/test_automated_google_translation.py +++ b/kobo/apps/subsequences/tests/test_automated_google_translation.py @@ -157,7 +157,7 @@ def test_valid_result_passes_validation(): fourth = {'language': 'fr', 'accepted': True} fifth = {'language': 'fr', 'value': None} six = {'language': 'es', 'value': 'seis'} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} mock_service = MagicMock() with patch( @@ -181,13 +181,13 @@ def test_valid_result_passes_validation(): action.validate_result(mock_sup_det) - assert '_dateAccepted' in mock_sup_det[0]['_revisions'][0] - assert mock_sup_det[0]['status'] == 'deleted' - assert mock_sup_det[0]['_revisions'][0]['status'] == 'complete' - assert mock_sup_det[1]['_revisions'][0]['status'] == 'complete' + assert '_dateAccepted' in mock_sup_det['fr']['_versions'][1] + assert mock_sup_det['fr']['_versions'][0]['status'] == 'deleted' + assert mock_sup_det['es']['_versions'][1]['status'] == 'complete' + assert mock_sup_det['fr']['_versions'][-1]['status'] == 'complete' -def test_acceptance_does_not_produce_revisions(): +def test_acceptance_does_not_produce_versions(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] action = AutomatedGoogleTranslationAction(xpath, params) @@ -195,7 +195,7 @@ def test_acceptance_does_not_produce_revisions(): first = {'language': 'fr', 'value': 'un'} second = {'language': 'fr', 'accepted': True} third = {'language': 'fr', 'accepted': False} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} mock_service = MagicMock() with patch( @@ -216,9 +216,8 @@ def test_acceptance_does_not_produce_revisions(): mock_sup_det = action.revise_data( EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, data ) - assert '_revisions' not in mock_sup_det[0] if data.get('value') is None: - is_date_accepted_present = mock_sup_det[0].get('_dateAccepted') is None + is_date_accepted_present = mock_sup_det['fr']['_versions'][0].get('_dateAccepted') is None assert is_date_accepted_present is not bool(data.get('accepted')) action.validate_result(mock_sup_det) @@ -235,7 +234,7 @@ def test_invalid_result_fails_validation(): fourth = {'language': 'fr', 'accepted': True} fifth = {'language': 'fr', 'value': None} six = {'language': 'es', 'value': 'seis'} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} mock_service = MagicMock() with patch( @@ -260,15 +259,15 @@ def test_invalid_result_fails_validation(): action.validate_result(mock_sup_det) - # erroneously add '_dateModified' onto a revision - first_revision = mock_sup_det[0]['_revisions'][0] - first_revision['_dateModified'] = first_revision['_dateCreated'] + # erroneously add '_dateModified' onto a version + first_version = mock_sup_det['fr']['_versions'][0] + first_version['_dateModified'] = first_version['_dateCreated'] with pytest.raises(jsonschema.exceptions.ValidationError): action.validate_result(mock_sup_det) -def test_translation_revisions_are_retained_in_supplemental_details(): +def test_translation_versions_are_retained_in_supplemental_details(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] action = AutomatedGoogleTranslationAction(xpath, params) @@ -288,15 +287,14 @@ def test_translation_revisions_are_retained_in_supplemental_details(): mock_sup_det = action.revise_data( EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, - action.action_class_config.default_type, + {}, first, ) - assert mock_sup_det[0]['language'] == 'es' - assert mock_sup_det[0]['value'] == 'Ni idea' - assert mock_sup_det[0]['_dateCreated'] == mock_sup_det[0]['_dateModified'] - assert '_revisions' not in mock_sup_det[0] - first_time = mock_sup_det[0]['_dateCreated'] + assert mock_sup_det['es']['_versions'][0]['language'] == 'es' + assert mock_sup_det['es']['_versions'][0]['value'] == 'Ni idea' + assert mock_sup_det['es']['_dateCreated'] == mock_sup_det['es']['_dateModified'] + first_time = mock_sup_det['es']['_versions'][0]['_dateCreated'] with patch( 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa @@ -308,12 +306,11 @@ def test_translation_revisions_are_retained_in_supplemental_details(): EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, second ) - assert len(mock_sup_det) == 2 - assert '_revision' not in mock_sup_det[0] - assert mock_sup_det[1]['language'] == 'fr' - assert mock_sup_det[1]['value'] == 'Aucune idée' - assert mock_sup_det[1]['_dateCreated'] == mock_sup_det[1]['_dateModified'] - assert '_revision' not in mock_sup_det[1] + assert len(mock_sup_det.keys()) == 2 + + assert mock_sup_det['fr']['_versions'][0]['language'] == 'fr' + assert mock_sup_det['fr']['_versions'][0]['value'] == 'Aucune idée' + assert mock_sup_det['fr']['_dateCreated'] == mock_sup_det['fr']['_dateModified'] with patch( 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa @@ -325,32 +322,26 @@ def test_translation_revisions_are_retained_in_supplemental_details(): EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, third ) - assert len(mock_sup_det) == 2 - - # the revision should encompass the first translation - assert mock_sup_det[0]['_revisions'][0].items() >= first.items() + assert len(mock_sup_det.keys()) == 2 - # the revision should have a creation timestamp equal to that of the first + # the first version should have a creation timestamp equal to that of the first # translation - assert mock_sup_det[0]['_revisions'][0]['_dateCreated'] == first_time + assert mock_sup_det['es']['_versions'][-1]['_dateCreated'] == first_time - # revisions should not list a modification timestamp - assert '_dateModified' not in mock_sup_det[0]['_revisions'][0] + # versions should not list a modification timestamp + assert '_dateModified' not in mock_sup_det['es']['_versions'][0] - # the record itself (not revision) should have an unchanged creation + # the record itself (not version) should have an unchanged creation # timestamp - assert mock_sup_det[0]['_dateCreated'] == first_time + assert mock_sup_det['es']['_dateCreated'] == first_time # the record itself should have an updated modification timestamp assert dateutil.parser.parse( - mock_sup_det[0]['_dateModified'] - ) > dateutil.parser.parse(mock_sup_det[0]['_dateCreated']) - - # the record itself should encompass the second translation - assert mock_sup_det[0].items() >= third.items() + mock_sup_det['es']['_dateModified'] + ) > dateutil.parser.parse(mock_sup_det['es']['_dateCreated']) -def test_latest_revision_is_first(): +def test_latest_version_is_first(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] action = AutomatedGoogleTranslationAction(xpath, params) @@ -359,7 +350,7 @@ def test_latest_revision_is_first(): second = {'language': 'fr', 'value': 'deux'} third = {'language': 'fr', 'value': 'trois'} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} mock_service = MagicMock() with patch( 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', @@ -376,9 +367,9 @@ def test_latest_revision_is_first(): EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, data ) - assert mock_sup_det[0]['value'] == 'trois' - assert mock_sup_det[0]['_revisions'][0]['value'] == 'deux' - assert mock_sup_det[0]['_revisions'][1]['value'] == 'un' + assert mock_sup_det['fr']['_versions'][0]['value'] == 'trois' + assert mock_sup_det['fr']['_versions'][1]['value'] == 'deux' + assert mock_sup_det['fr']['_versions'][2]['value'] == 'un' def test_cannot_revise_data_without_transcription(): xpath = 'group_name/question_name' # irrelevant for this test @@ -386,7 +377,7 @@ def test_cannot_revise_data_without_transcription(): action = AutomatedGoogleTranslationAction(xpath, params) first = {'language': 'fr', 'value': 'un'} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} mock_service = MagicMock() with patch( 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa diff --git a/kobo/apps/subsequences/tests/test_manual_transcription.py b/kobo/apps/subsequences/tests/test_manual_transcription.py index 757c431d60..fbf5b48cac 100644 --- a/kobo/apps/subsequences/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences/tests/test_manual_transcription.py @@ -61,7 +61,7 @@ def test_valid_result_passes_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data @@ -79,14 +79,14 @@ def test_invalid_result_fails_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data ) # erroneously add '_dateModified' onto a revision - mock_sup_det['_revisions'][0]['_dateModified'] = mock_sup_det['_revisions'][0][ + mock_sup_det['_versions'][0]['_dateModified'] = mock_sup_det['_versions'][0][ '_dateCreated' ] @@ -94,7 +94,7 @@ def test_invalid_result_fails_validation(): action.validate_result(mock_sup_det) -def test_transcript_revisions_are_retained_in_supplemental_details(): +def test_transcript_versions_are_retained_in_supplemental_details(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] action = ManualTranscriptionAction(xpath, params) @@ -104,30 +104,27 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, - action.action_class_config.default_type, + {}, first, ) - assert mock_sup_det['language'] == 'en' - assert mock_sup_det['value'] == 'No idea' assert mock_sup_det['_dateCreated'] == mock_sup_det['_dateModified'] - assert '_revisions' not in mock_sup_det + assert len(mock_sup_det['_versions']) == 1 + assert mock_sup_det['_versions'][0]['language'] == 'en' + assert mock_sup_det['_versions'][0]['value'] == 'No idea' first_time = mock_sup_det['_dateCreated'] mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second ) - assert len(mock_sup_det['_revisions']) == 1 + assert len(mock_sup_det['_versions']) == 2 - # the revision should encompass the first transcript - assert mock_sup_det['_revisions'][0].items() >= first.items() - - # the revision should have a creation timestamp equal to that of the first + # the version should have a creation timestamp equal to that of the first # transcript - assert mock_sup_det['_revisions'][0]['_dateCreated'] == first_time + assert mock_sup_det['_versions'][-1]['_dateCreated'] == first_time - # revisions should not list a modification timestamp - assert '_dateModified' not in mock_sup_det['_revisions'][0] + # versions should not list a modification timestamp + assert '_dateModified' not in mock_sup_det['_versions'][0] # the record itself (not revision) should have an unchanged creation # timestamp @@ -138,9 +135,6 @@ def test_transcript_revisions_are_retained_in_supplemental_details(): mock_sup_det['_dateCreated'] ) - # the record itself should encompass the second transcript - assert mock_sup_det.items() >= second.items() - def test_setting_transcript_to_empty_string(): xpath = 'group_name/question_name' # irrelevant for this test @@ -153,16 +147,16 @@ def test_setting_transcript_to_empty_string(): mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, - action.action_class_config.default_type, + {}, first, ) - assert mock_sup_det['value'] == 'Aucune idée' + assert mock_sup_det['_versions'][0]['value'] == 'Aucune idée' mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second ) - assert mock_sup_det['value'] == '' - assert mock_sup_det['_revisions'][0]['value'] == 'Aucune idée' + assert mock_sup_det['_versions'][0]['value'] == '' + assert mock_sup_det['_versions'][1]['value'] == 'Aucune idée' def test_setting_transcript_to_none(): @@ -176,16 +170,16 @@ def test_setting_transcript_to_none(): mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, - action.action_class_config.default_type, + {}, first, ) - assert mock_sup_det['value'] == 'Aucune idée' + assert mock_sup_det['_versions'][0]['value'] == 'Aucune idée' mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second ) - assert mock_sup_det['value'] is None - assert mock_sup_det['_revisions'][0]['value'] == 'Aucune idée' + assert mock_sup_det['_versions'][0]['value'] is None + assert mock_sup_det['_versions'][1]['value'] == 'Aucune idée' def test_latest_revision_is_first(): @@ -197,12 +191,12 @@ def test_latest_revision_is_first(): second = {'language': 'fr', 'value': 'deux'} third = {'language': 'fr', 'value': 'trois'} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} for data in first, second, third: mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data ) - assert mock_sup_det['value'] == 'trois' - assert mock_sup_det['_revisions'][0]['value'] == 'deux' - assert mock_sup_det['_revisions'][1]['value'] == 'un' + assert mock_sup_det['_versions'][0]['value'] == 'trois' + assert mock_sup_det['_versions'][1]['value'] == 'deux' + assert mock_sup_det['_versions'][2]['value'] == 'un' diff --git a/kobo/apps/subsequences/tests/test_manual_translation.py b/kobo/apps/subsequences/tests/test_manual_translation.py index 12e968bb7b..9a3e046983 100644 --- a/kobo/apps/subsequences/tests/test_manual_translation.py +++ b/kobo/apps/subsequences/tests/test_manual_translation.py @@ -58,7 +58,7 @@ def test_valid_result_passes_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data @@ -76,21 +76,21 @@ def test_invalid_result_fails_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} for data in first, second, third, fourth, fifth: mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data ) - # erroneously add '_dateModified' onto a revision - first_revision = mock_sup_det[0]['_revisions'][0] - first_revision['_dateModified'] = first_revision['_dateCreated'] + # erroneously add '_dateModified' onto a version + first_version = mock_sup_det['en']['_versions'][0] + first_version['_dateModified'] = first_version['_dateCreated'] with pytest.raises(jsonschema.exceptions.ValidationError): action.validate_result(mock_sup_det) -def test_translation_revisions_are_retained_in_supplemental_details(): +def test_translation_versions_are_retained_in_supplemental_details(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] action = ManualTranslationAction(xpath, params) @@ -101,52 +101,47 @@ def test_translation_revisions_are_retained_in_supplemental_details(): mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, - action.action_class_config.default_type, + {}, first, ) - assert len(mock_sup_det) == 1 - assert mock_sup_det[0]['language'] == 'en' - assert mock_sup_det[0]['value'] == 'No idea' - assert mock_sup_det[0]['_dateCreated'] == mock_sup_det[0]['_dateModified'] - assert '_revisions' not in mock_sup_det[0] - first_time = mock_sup_det[0]['_dateCreated'] + assert len(mock_sup_det.keys()) == 1 + assert '_versions' in mock_sup_det['en'] + assert mock_sup_det['en']['_versions'][0]['language'] == 'en' + assert mock_sup_det['en']['_versions'][0]['value'] == 'No idea' + assert mock_sup_det['en']['_dateCreated'] == mock_sup_det['en']['_dateModified'] + + first_time = mock_sup_det['en']['_dateCreated'] mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second ) - assert len(mock_sup_det) == 2 - assert mock_sup_det[1]['language'] == 'fr' - assert mock_sup_det[1]['value'] == 'Aucune idée' - assert mock_sup_det[1]['_dateCreated'] == mock_sup_det[1]['_dateModified'] - assert '_revisions' not in mock_sup_det[1] + assert len(mock_sup_det.keys()) == 2 + assert '_versions' in mock_sup_det['fr'] + assert mock_sup_det['fr']['_versions'][0]['language'] == 'fr' + assert mock_sup_det['fr']['_versions'][0]['value'] == 'Aucune idée' + assert mock_sup_det['fr']['_dateCreated'] == mock_sup_det['fr']['_dateModified'] mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, third ) - assert len(mock_sup_det) == 2 - - # the revision should encompass the first translation - assert mock_sup_det[0]['_revisions'][0].items() >= first.items() + assert len(mock_sup_det.keys()) == 2 - # the revision should have a creation timestamp equal to that of the first + # the first version should have a creation timestamp equal to that of the first # translation - assert mock_sup_det[0]['_revisions'][0]['_dateCreated'] == first_time + assert mock_sup_det['en']['_versions'][-1]['_dateCreated'] == first_time - # revisions should not list a modification timestamp - assert '_dateModified' not in mock_sup_det[0]['_revisions'][0] + # versions should not list a modification timestamp + assert '_dateModified' not in mock_sup_det['en']['_versions'][0] - # the record itself (not revision) should have an unchanged creation + # the record itself (not version) should have an unchanged creation # timestamp - assert mock_sup_det[0]['_dateCreated'] == first_time + assert mock_sup_det['en']['_dateCreated'] == first_time # the record itself should have an updated modification timestamp assert dateutil.parser.parse( - mock_sup_det[0]['_dateModified'] - ) > dateutil.parser.parse(mock_sup_det[0]['_dateCreated']) - - # the record itself should encompass the second translation - assert mock_sup_det[0].items() >= third.items() + mock_sup_det['en']['_dateModified'] + ) > dateutil.parser.parse(mock_sup_det['en']['_dateCreated']) def test_setting_translation_to_empty_string(): @@ -159,16 +154,16 @@ def test_setting_translation_to_empty_string(): mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, - action.action_class_config.default_type, + {}, first, ) - assert mock_sup_det[0]['value'] == 'Aucune idée' + assert mock_sup_det['fr']['_versions'][0]['value'] == 'Aucune idée' mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second ) - assert mock_sup_det[0]['value'] == '' - assert mock_sup_det[0]['_revisions'][0]['value'] == 'Aucune idée' + assert mock_sup_det['fr']['_versions'][0]['value'] == '' + assert mock_sup_det['fr']['_versions'][1]['value'] == 'Aucune idée' def test_setting_translation_to_none(): @@ -182,19 +177,19 @@ def test_setting_translation_to_none(): mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, - action.action_class_config.default_type, + {}, first, ) - assert mock_sup_det[0]['value'] == 'Aucune idée' + assert mock_sup_det['fr']['_versions'][0]['value'] == 'Aucune idée' mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second ) - assert mock_sup_det[0]['value'] is None - assert mock_sup_det[0]['_revisions'][0]['value'] == 'Aucune idée' + assert mock_sup_det['fr']['_versions'][0]['value'] is None + assert mock_sup_det['fr']['_versions'][1]['value'] == 'Aucune idée' -def test_latest_revision_is_first(): +def test_latest_version_is_first(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] action = ManualTranslationAction(xpath, params) @@ -203,12 +198,12 @@ def test_latest_revision_is_first(): second = {'language': 'fr', 'value': 'deux'} third = {'language': 'fr', 'value': 'trois'} - mock_sup_det = action.action_class_config.default_type + mock_sup_det = {} for data in first, second, third: mock_sup_det = action.revise_data( EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data ) - assert mock_sup_det[0]['value'] == 'trois' - assert mock_sup_det[0]['_revisions'][0]['value'] == 'deux' - assert mock_sup_det[0]['_revisions'][1]['value'] == 'un' + assert mock_sup_det['fr']['_versions'][0]['value'] == 'trois' + assert mock_sup_det['fr']['_versions'][1]['value'] == 'deux' + assert mock_sup_det['fr']['_versions'][2]['value'] == 'un' diff --git a/kobo/apps/subsequences/tests/test_models.py b/kobo/apps/subsequences/tests/test_models.py index 12b805ac1b..790b92b269 100644 --- a/kobo/apps/subsequences/tests/test_models.py +++ b/kobo/apps/subsequences/tests/test_models.py @@ -33,12 +33,15 @@ class SubmissionSupplementTestCase(TestCase): '_version': '20250820', 'group_name/question_name': { 'manual_transcription': { - 'language': 'ar', - 'value': 'فارغ', '_dateCreated': '2024-04-08T15:27:00Z', '_dateModified': '2024-04-08T15:31:00Z', - '_dateAccepted': '2024-04-08T15:31:00Z', - '_revisions': [ + '_versions': [ + { + 'language': 'ar', + 'value': 'فارغ', + '_dateCreated': '2024-04-08T15:31:00Z', + '_dateAccepted': '2024-04-08T15:31:00Z', + }, { 'language': 'ar', 'value': 'هائج', @@ -47,21 +50,27 @@ class SubmissionSupplementTestCase(TestCase): } ], }, - 'manual_translation': [ - { - 'language': 'en', - 'value': 'berserk', + 'manual_translation': { + 'en': { '_dateCreated': '2024-04-08T15:27:00Z', '_dateModified': '2024-04-08T15:27:00Z', - '_dateAccepted': '2024-04-08T15:27:00Z', + '_versions': [{ + 'language': 'en', + 'value': 'berserk', + '_dateCreated': '2024-04-08T15:27:00Z', + '_dateAccepted': '2024-04-08T15:27:00Z', + }], }, - { - 'language': 'es', - 'value': 'enloquecido', + 'es': { '_dateCreated': '2024-04-08T15:29:00Z', '_dateModified': '2024-04-08T15:32:00Z', - '_dateAccepted': '2024-04-08T15:32:00Z', - '_revisions': [ + '_versions': [ + { + 'language': 'es', + 'value': 'enloquecido', + '_dateCreated': '2024-04-08T15:32:00Z', + '_dateAccepted': '2024-04-08T15:32:00Z', + }, { 'language': 'es', 'value': 'loco', @@ -70,7 +79,7 @@ class SubmissionSupplementTestCase(TestCase): } ], }, - ], + }, }, } diff --git a/kobo/apps/subsequences__old/tasks/__init__.py b/kobo/apps/subsequences__old/tasks/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 From 3202533cb1215dffa7eb76bbea671f16385407db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 28 Aug 2025 18:51:00 -0400 Subject: [PATCH 097/138] Comments --- kobo/apps/subsequences/tasks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kobo/apps/subsequences/tasks.py b/kobo/apps/subsequences/tasks.py index 7f962d4c4a..d28a6abc14 100644 --- a/kobo/apps/subsequences/tasks.py +++ b/kobo/apps/subsequences/tasks.py @@ -1,3 +1,7 @@ +##### +# WIP: Unfinished business +# +##### from django.apps import apps from django.conf import settings From 2fd57700264567086a1ba5c5762feb1642a69be9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 29 Aug 2025 10:13:39 -0400 Subject: [PATCH 098/138] Add comments and draft logic for background updates --- kobo/apps/subsequences/actions/base.py | 39 +++++++++++++++++--------- kobo/apps/subsequences/models.py | 9 +++++- kobo/apps/subsequences/tasks.py | 32 +++++++++++++++++++++ 3 files changed, 65 insertions(+), 15 deletions(-) create mode 100644 kobo/apps/subsequences/tasks.py diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index b16bd13f4c..eec29f1aa8 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -10,6 +10,7 @@ from kpi.exceptions import UsageLimitExceededException from kpi.utils.usage_calculator import ServiceUsageCalculator from ..exceptions import InvalidItem +from ..tasks import poll_run_automated_process from ..type_aliases import NLPExternalServiceClass """ @@ -215,7 +216,7 @@ def revise_data( self, submission: dict, question_supplemental_data: dict, - action_supplement_data: dict, + action_supplemental_data: dict, action_data: dict, asset: 'kpi.models.Asset' = None, ) -> dict | None: @@ -229,19 +230,19 @@ def revise_data( now_str = utc_datetime_to_js_str(timezone.now()) item_index = None - action_supplement_data_copy = deepcopy(action_supplement_data) + action_supplemental_data_copy = deepcopy(action_supplemental_data) if not isinstance(self.action_class_config.default_type, list): - revision = action_supplement_data_copy + revision = action_supplemental_data_copy else: # TODO: Multiple keys are not supported. # Not a big issue for now since translation actions don’t use locale # (yet?) and transcription actions only involve one occurrence at a time. needle = action_data[self.action_class_config.key] revision = {} - if not isinstance(action_supplement_data, list): + if not isinstance(action_supplemental_data_copy, list): raise InvalidItem - for idx, item in enumerate(action_supplement_data): + for idx, item in enumerate(action_supplemental_data_copy): if needle == item[self.action_class_config.key]: revision = deepcopy(item) item_index = idx @@ -281,7 +282,7 @@ def revise_data( # If the default type is not a list, we handle a single record case. if not isinstance(self.action_class_config.default_type, list): - if action_supplement_data: + if action_supplemental_data: revisions.insert(0, revision) new_record[self.REVISIONS_FIELD] = revisions else: @@ -313,11 +314,11 @@ def revise_data( # - Otherwise, replace the record at the given index. # Finally, update `new_record` to reference the full updated list. if item_index is None: - action_supplement_data_copy.append(new_record) + action_supplemental_data_copy.append(new_record) else: - action_supplement_data_copy[item_index] = new_record + action_supplemental_data_copy[item_index] = new_record - new_record = action_supplement_data_copy + new_record = action_supplemental_data_copy self.validate_result(new_record) @@ -687,8 +688,9 @@ def run_automated_process( ) # Otherwise, trigger the external service. + asset = kwargs['asset'] NLPService = self.get_nlp_service_class() # noqa - service = NLPService(submission, asset=kwargs['asset']) + service = NLPService(submission, asset=asset) service_data = service.process_data(self.source_question_xpath, action_data) # Remove the 'dependency' flag from action_data since it is only used @@ -699,11 +701,20 @@ def run_automated_process( # Returning None ensures that `revise_data()` will not be called afterwards. if ( accepted is None - and action_supplement_data.get('status') - == service_data['status'] - == 'in_progress' + and service_data['status'] == 'in_progress' ): - return None + if action_supplement_data.get('status'): + return None + else: + # TODO Retry with Celery, make it work! + poll_run_automated_process.delay( + submission, + question_supplemental_data, + action_supplement_data, + action_data, + action_id=self.ID, + asset_id=asset.pk, + ) # Normal case: return the processed transcription data. return service_data diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 7c97ba022e..dc50cd4b52 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -3,6 +3,7 @@ from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix from kpi.models.abstract_models import AbstractTimeStampedModel from .actions import ACTION_IDS_TO_CLASSES +from .constants import SUBMISSION_UUID_FIELD from .exceptions import InvalidAction, InvalidXPath from .schemas import validate_submission_supplement @@ -45,7 +46,7 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di # TODO: migrate from old per-asset schema raise NotImplementedError - submission_uuid = remove_uuid_prefix(submission['meta/rootUuid']) # constant? + submission_uuid = remove_uuid_prefix(submission[SUBMISSION_UUID_FIELD]) # constant? supplemental_data = SubmissionExtras.objects.get_or_create( asset=asset, submission_uuid=submission_uuid )[ @@ -85,6 +86,12 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di action_id, action.action_class_config.default_type ) + # TODO: `action.revise_data()` may need `question_xpath` to retry when + # the action is automated and returns "in_progress" (see + # `tasks.py::poll_run_automated_progress()`). + # Also, `action_supplemental_data` seems redundant now that + # `question_supplemental_data` is passed; it could potentially be + # rebuilt inside `action.revise_data()`. if not ( action_supplemental_data := action.revise_data( submission, diff --git a/kobo/apps/subsequences/tasks.py b/kobo/apps/subsequences/tasks.py new file mode 100644 index 0000000000..c309dcdf62 --- /dev/null +++ b/kobo/apps/subsequences/tasks.py @@ -0,0 +1,32 @@ +from django.apps import apps +from kobo.apps.subsequences.exceptions import SubsequenceTimeoutError +from kobo.celery import celery_app + + +# TODO Adjust max_retries. Should be no longer than external service timeout. +@celery_app.task( + autoretry_for=(SubsequenceTimeoutError,), + retry_backoff=60, + max_retries=5, + retry_jitter=False, + queue='kpi_low_priority_queue', +) +def poll_run_automated_process( + submission: dict, + question_supplemental_data: dict, + action_supplement_data: dict, + action_data: dict, + action_id: str, + asset_id: int, +): + Asset = apps.get_model('kpi', 'Asset') # noqa: N806 + SubmissionSupplement = apps.get_model('subsequences', 'SubmissionSupplement') # noqa: N806 + # TODO Rebuild incoming data from question supplemental data. + # We are missing the question_name_xpath, see comment in + # `SupplementData.revise_data()` + incoming_data = {} + + asset = Asset.objects.defer('content').get(id=asset_id) + supplement_data = SubmissionSupplement.revise_data(asset, submission, incoming_data) + if supplement_data['status'] == 'in_progress': + raise SubsequenceTimeoutError From e81416cae8985870d1ee91851af93dd43b94c136 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 24 Sep 2025 10:52:52 -0400 Subject: [PATCH 099/138] Correct a few typos --- kobo/apps/subsequences/actions/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 355234570f..ab0cd76587 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -54,17 +54,18 @@ '_version': '20250820', 'my_audio_question': { 'manual_transcription': { + # TODO: think about wrapping in language dictionary like translations '_dateCreated': '2025-08-21T20:55:42.012053Z', '_dateModified': '2025-08-21T20:57:28.154567Z', '_versions': [ { - 'transcript': 'هائج', + 'value': 'هائج', 'language': 'ar', '_dateCreated': '2025-08-21T20:57:28.154567Z', '_dateAccepted': '2025-08-21T20:57:28.154567Z', }, { - 'transcript': 'فارغ', + 'value': 'فارغ', 'language': 'ar', '_dateCreated': '2025-08-21T20:55:42.012053Z', '_dateAccepted': '2025-08-21T20:55:42.012053Z', @@ -95,7 +96,7 @@ '_dateModified': '2025-08-21T22:00:10.862880Z', '_versions': [ { - 'translation': 'fou', + 'value': 'fou', 'language': 'fr', '_dateCreated': '2025-08-21T22:00:10.862880Z', '_dateAccepted': '2025-08-21T22:00:10.862880Z', From fe7c88e6e3f25d6fab542d903df41e14e225a445 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 24 Sep 2025 10:52:52 -0400 Subject: [PATCH 100/138] Correct a few typos --- kobo/apps/subsequences/actions/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 355234570f..ab0cd76587 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -54,17 +54,18 @@ '_version': '20250820', 'my_audio_question': { 'manual_transcription': { + # TODO: think about wrapping in language dictionary like translations '_dateCreated': '2025-08-21T20:55:42.012053Z', '_dateModified': '2025-08-21T20:57:28.154567Z', '_versions': [ { - 'transcript': 'هائج', + 'value': 'هائج', 'language': 'ar', '_dateCreated': '2025-08-21T20:57:28.154567Z', '_dateAccepted': '2025-08-21T20:57:28.154567Z', }, { - 'transcript': 'فارغ', + 'value': 'فارغ', 'language': 'ar', '_dateCreated': '2025-08-21T20:55:42.012053Z', '_dateAccepted': '2025-08-21T20:55:42.012053Z', @@ -95,7 +96,7 @@ '_dateModified': '2025-08-21T22:00:10.862880Z', '_versions': [ { - 'translation': 'fou', + 'value': 'fou', 'language': 'fr', '_dateCreated': '2025-08-21T22:00:10.862880Z', '_dateAccepted': '2025-08-21T22:00:10.862880Z', From ef087812b07974da32a115bfa7ecd2265a17643f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 24 Sep 2025 12:16:08 -0400 Subject: [PATCH 101/138] Add an unique identifier for each version --- kobo/apps/subsequences/actions/__init__.py | 7 - .../actions/automated_google_translation.py | 74 ++++---- kobo/apps/subsequences/actions/base.py | 33 ++-- kobo/apps/subsequences/actions/mixins.py | 8 +- .../tests/api/v2/test_permissions.py | 14 +- .../tests/api/v2/test_validation.py | 22 +-- kobo/apps/subsequences/tests/constants.py | 13 +- .../test_automated_google_translation.py | 38 +++-- kobo/apps/subsequences/tests/test_models.py | 160 ++++++++++-------- 9 files changed, 213 insertions(+), 156 deletions(-) diff --git a/kobo/apps/subsequences/actions/__init__.py b/kobo/apps/subsequences/actions/__init__.py index 19accc2b94..a34df26372 100644 --- a/kobo/apps/subsequences/actions/__init__.py +++ b/kobo/apps/subsequences/actions/__init__.py @@ -11,11 +11,4 @@ ManualTranslationAction, ) -TRANSCRIPTION_ACTIONS = ( - AutomatedGoogleTranscriptionAction, - ManualTranscriptionAction, -) - ACTION_IDS_TO_CLASSES = {a.ID: a for a in ACTIONS} - -TRANSCRIPTION_ACTION_IDS_TO_CLASSES = {a.ID: a for a in TRANSCRIPTION_ACTIONS} diff --git a/kobo/apps/subsequences/actions/automated_google_translation.py b/kobo/apps/subsequences/actions/automated_google_translation.py index 71887b9f95..7da8abe5ac 100644 --- a/kobo/apps/subsequences/actions/automated_google_translation.py +++ b/kobo/apps/subsequences/actions/automated_google_translation.py @@ -1,6 +1,8 @@ from dateutil import parser from kobo.apps.organizations.constants import UsageType +from ..actions.automated_google_transcription import AutomatedGoogleTranscriptionAction +from ..actions.manual_transcription import ManualTranscriptionAction from ..exceptions import TranscriptionNotFound from ..integrations.google.google_translate import GoogleTranslationService from ..type_aliases import NLPExternalServiceClass @@ -38,47 +40,61 @@ def _get_action_data_dependency( self, question_supplemental_data: dict, action_data: dict ) -> dict: """ - Retrieve and attach dependency data from another transcription action. - - This method searches `question_supplemental_data` for the most recent - transcription matching the base language of `action_data`. Regional - variants are not supported: only the language code is used to locate - the transcript. The found transcript (and locale if available) is then - added to `action_data` under the `transcript` field. + Attach the latest accepted transcript as a dependency for a translation action. + + Looks up prior transcription actions in `question_supplemental_data` and + selects the most recent accepted version. + The chosen transcript is injected into `action_data['dependency']` with: + - 'value': transcript text + - 'language': preferred locale if present, else base language + - '_uuid': transcript UUID + + The search is restricted to known transcription action IDs (e.g., Google + automated and manual transcription). If none is found, raises + `TranscriptionNotFound`. """ - # Avoid circular imports - from ..actions import TRANSCRIPTION_ACTION_IDS_TO_CLASSES + # Action IDs that can provide a transcript dependency. + transcription_action_ids = ( + AutomatedGoogleTranscriptionAction.ID, + ManualTranscriptionAction.ID, + ) - transcript = transcript_language = None - last_date_modified = None + latest_version = None + latest_accepted_dt = None - # TODO Should we search only for accepted transcriptions? - for action_id in TRANSCRIPTION_ACTION_IDS_TO_CLASSES.keys(): - try: - question_supplemental_data[action_id]['value'] - except KeyError: + for action_id in transcription_action_ids: + # Each action's data is expected to store versions under "_versions". + action_supplemental_data = question_supplemental_data.get(action_id) + if not action_supplemental_data: continue - action_version = question_supplemental_data[action_id] - dependency_date_modified = parser.parse( - action_version[self.DATE_MODIFIED_FIELD] - ) + versions = action_supplemental_data.get(self.VERSION_FIELD) or [] + for version in versions: + # Skip versions without an acceptance timestamp. + accepted_raw = version.get(self.DATE_ACCEPTED_FIELD) + if not accepted_raw: + continue + + accepted_dt = parser.parse(accepted_raw) - if not last_date_modified or last_date_modified < dependency_date_modified: - last_date_modified = dependency_date_modified - transcript = action_version['value'] - transcript_language = ( - action_version.get('locale') or action_version['language'] - ) + if latest_accepted_dt is None or accepted_dt > latest_accepted_dt: + latest_accepted_dt = accepted_dt + latest_version = version - if transcript is None: + if latest_version is None: raise TranscriptionNotFound + # Prefer a specific locale when available; otherwise use the base language. + language_or_locale = ( + latest_version.get('locale') or latest_version['language'] + ) + # Inject dependency property for translation service action_data['dependency'] = { - 'value': transcript, - 'language': transcript_language, + 'value': latest_version['value'], + 'language': language_or_locale, + self.UUID_FIELD: latest_version[self.UUID_FIELD], } return action_data diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index ab0cd76587..e313f8d6d5 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -1,3 +1,4 @@ +import uuid from copy import deepcopy from dataclasses import dataclass @@ -9,7 +10,6 @@ from kobo.apps.subsequences.utils.time import utc_datetime_to_js_str from kpi.exceptions import UsageLimitExceededException from kpi.utils.usage_calculator import ServiceUsageCalculator -from ..exceptions import InvalidItem from ..tasks import poll_run_automated_process from ..type_aliases import NLPExternalServiceClass @@ -63,12 +63,14 @@ 'language': 'ar', '_dateCreated': '2025-08-21T20:57:28.154567Z', '_dateAccepted': '2025-08-21T20:57:28.154567Z', + '_uuid': '4dcf9c9f-e503-4e5c-81f5-74250b295001', }, { 'value': 'فارغ', 'language': 'ar', '_dateCreated': '2025-08-21T20:55:42.012053Z', '_dateAccepted': '2025-08-21T20:55:42.012053Z', + '_uuid': '850e6359-50e8-4252-9895-e9669a27b1ea', } ], }, @@ -82,12 +84,14 @@ 'language': 'es', '_dateCreated': '2025-08-21T21:40:54.644308Z', '_dateAccepted': '2025-08-21T21:40:54.644308Z', + '_uuid': '22b04ce8-61c2-4383-836f-5d5f0ad73645', }, { 'value': 'loco', 'language': 'es', '_dateCreated': '2025-08-21T21:39:42.141306Z', '_dateAccepted': '2025-08-21T21:39:42.141306Z', + '_uuid': '13403918-6b53-4222-8f8f-27397b53e2ce', } ], }, @@ -100,6 +104,7 @@ 'language': 'fr', '_dateCreated': '2025-08-21T22:00:10.862880Z', '_dateAccepted': '2025-08-21T22:00:10.862880Z', + '_uuid': 'de6501fd-71c0-43fe-a569-b8407e50bc70', } ], } @@ -107,16 +112,19 @@ ], }, 'my_video_question': { - '_dateCreated': '2025-08-21T21:06:20.059117Z', - '_dateModified': '2025-08-21T21:06:20.059117Z', - '_versions': [ - { - 'value': 'sea horse sea hell', - 'language': 'en', - '_dateCreated': '2025-08-21T21:06:20.059117Z', - '_dateAccepted': '2025-08-21T21:06:20.059117Z', - } - ], + 'manual_transcription': { + '_dateCreated': '2025-08-21T21:06:20.059117Z', + '_dateModified': '2025-08-21T21:06:20.059117Z', + '_versions': [ + { + 'value': 'sea horse sea hell', + 'language': 'en', + '_dateCreated': '2025-08-21T21:06:20.059117Z', + '_dateAccepted': '2025-08-21T21:06:20.059117Z', + '_uuid': 'fec5a51d-bd12-4d61-86ba-c2e8507a2a93', + } + ], + } }, 'my_number_question': { 'number_multiplier': { @@ -125,6 +133,7 @@ '_versions': [ 'value': 99, '_dateCreated': '2025-08-21T21:09:34.504546Z', + '_uuid': '12345678-90ab-cdef-1234-567890abcdef', ], }, }, @@ -154,6 +163,7 @@ class BaseAction: DATE_CREATED_FIELD = '_dateCreated' DATE_MODIFIED_FIELD = '_dateModified' DATE_ACCEPTED_FIELD = '_dateAccepted' + UUID_FIELD = '_uuid' VERSION_FIELD = '_versions' action_class_config: ActionClassConfig | None = None @@ -300,6 +310,7 @@ def revise_data( new_version = deepcopy(action_data) new_version[self.DATE_CREATED_FIELD] = now_str + new_version[self.UUID_FIELD] = str(uuid.uuid4()) if self.DATE_CREATED_FIELD not in localized_action_supplemental_data: localized_action_supplemental_data[self.DATE_CREATED_FIELD] = now_str localized_action_supplemental_data[self.DATE_MODIFIED_FIELD] = now_str diff --git a/kobo/apps/subsequences/actions/mixins.py b/kobo/apps/subsequences/actions/mixins.py index 73922e2f4f..8b3a491d8c 100644 --- a/kobo/apps/subsequences/actions/mixins.py +++ b/kobo/apps/subsequences/actions/mixins.py @@ -37,9 +37,11 @@ def result_schema(self): 'properties': { self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.UUID_FIELD: {'$ref': '#/$defs/uuid'}, }, - 'required': [self.DATE_CREATED_FIELD], + 'required': [self.DATE_CREATED_FIELD, self.UUID_FIELD], }, + 'uuid': {'type': 'string', 'format': 'uuid'}, **data_schema_defs, # Copy defs at the root level }, } @@ -100,9 +102,11 @@ def result_schema(self): 'properties': { self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, + self.UUID_FIELD: {'$ref': '#/$defs/uuid'}, }, - 'required': [self.DATE_CREATED_FIELD], + 'required': [self.DATE_CREATED_FIELD, self.UUID_FIELD], }, + 'uuid': {'type': 'string', 'format': 'uuid'}, **data_schema_defs, }, } diff --git a/kobo/apps/subsequences/tests/api/v2/test_permissions.py b/kobo/apps/subsequences/tests/api/v2/test_permissions.py index 449d08393e..603a51128c 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_permissions.py +++ b/kobo/apps/subsequences/tests/api/v2/test_permissions.py @@ -1,4 +1,6 @@ +import uuid from datetime import datetime +from unittest.mock import patch from zoneinfo import ZoneInfo from ddt import data, ddt, unpack @@ -152,12 +154,15 @@ def test_can_write(self, username, shared, status_code): self.asset.assign_perm(user, PERM_CHANGE_SUBMISSIONS) frozen_datetime_now = datetime(2024, 4, 8, 15, 27, 0, tzinfo=ZoneInfo('UTC')) - with freeze_time(frozen_datetime_now): - response = self.client.patch( - self.supplement_details_url, data=payload, format='json' - ) + fixed_uuid = uuid.UUID('11111111-2222-3333-4444-555555555555') + with patch('uuid.uuid4', return_value=fixed_uuid): + with freeze_time(frozen_datetime_now): + response = self.client.patch( + self.supplement_details_url, data=payload, format='json' + ) assert response.status_code == status_code + if status_code == status.HTTP_200_OK: expected = { '_version': '20250820', @@ -171,6 +176,7 @@ def test_can_write(self, username, shared, status_code): '_dateAccepted': '2024-04-08T15:27:00Z', 'language': 'es', 'value': 'buenas noches', + '_uuid': '11111111-2222-3333-4444-555555555555', } ], }, diff --git a/kobo/apps/subsequences/tests/api/v2/test_validation.py b/kobo/apps/subsequences/tests/api/v2/test_validation.py index ba71c0c2da..9995da8835 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_validation.py +++ b/kobo/apps/subsequences/tests/api/v2/test_validation.py @@ -4,6 +4,7 @@ from kobo.apps.subsequences.models import SubmissionSupplement from kobo.apps.subsequences.tests.api.v2.base import SubsequenceBaseTestCase +from kobo.apps.subsequences.tests.constants import QUESTION_SUPPLEMENT class SubmissionSupplementAPITestCase(SubsequenceBaseTestCase): @@ -96,16 +97,9 @@ def test_cannot_set_value_with_automated_actions(self): # Simulate a completed transcription, first. mock_submission_supplement = { '_version': '20250820', - 'q1': { - 'automated_google_transcription': { - 'status': 'complete', - 'value': 'My audio has been transcribed', - 'language': 'en', - '_dateCreated': '2025-08-25T21:17:35.535710Z', - '_dateModified': '2025-08-26T11:41:21.917338Z', - }, - }, + 'q1': QUESTION_SUPPLEMENT } + SubmissionSupplement.objects.create( submission_uuid=self.submission_uuid, content=mock_submission_supplement, @@ -190,15 +184,7 @@ def test_cannot_accept_incomplete_automatic_translation(self): # Simulate a completed transcription, first. mock_submission_supplement = { '_version': '20250820', - 'q1': { - 'automated_google_transcription': { - 'status': 'complete', - 'value': 'My audio has been transcribed', - 'language': 'en', - '_dateCreated': '2025-08-25T21:17:35.535710Z', - '_dateModified': '2025-08-26T11:41:21.917338Z', - }, - }, + 'q1': QUESTION_SUPPLEMENT } SubmissionSupplement.objects.create( submission_uuid=self.submission_uuid, diff --git a/kobo/apps/subsequences/tests/constants.py b/kobo/apps/subsequences/tests/constants.py index ef0010d749..1793c0f95a 100644 --- a/kobo/apps/subsequences/tests/constants.py +++ b/kobo/apps/subsequences/tests/constants.py @@ -2,10 +2,17 @@ EMPTY_SUPPLEMENT = {} QUESTION_SUPPLEMENT = { 'automated_google_transcription': { - 'value': 'My audio has been transcribed', - 'language': 'en', - 'status': 'completed', '_dateCreated': '2024-04-08T15:27:00Z', '_dateModified': '2024-04-08T15:27:00Z', + '_versions': [ + { + 'value': 'My audio has been transcribed', + 'language': 'en', + 'status': 'completed', + '_dateCreated': '2025-08-21T20:57:28.154567Z', + '_dateAccepted': '2025-08-21T20:57:28.154567Z', + '_uuid': '4dcf9c9f-e503-4e5c-81f5-74250b295001', + }, + ] } } diff --git a/kobo/apps/subsequences/tests/test_automated_google_translation.py b/kobo/apps/subsequences/tests/test_automated_google_translation.py index a53633d3e9..76e9269aef 100644 --- a/kobo/apps/subsequences/tests/test_automated_google_translation.py +++ b/kobo/apps/subsequences/tests/test_automated_google_translation.py @@ -394,26 +394,40 @@ def test_cannot_revise_data_without_transcription(): EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, {'language': 'fr'} ) -def test_find_the_most_recent_transcription(): +def test_find_the_most_recent_accepted_transcription(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] action = AutomatedGoogleTranslationAction(xpath, params) question_supplement_data = { 'automated_google_transcription': { - 'value': 'My audio has been transcribed automatically', - 'language': 'en', - 'status': 'completed', '_dateCreated': '2024-04-08T15:27:00Z', '_dateModified': '2024-04-08T15:27:00Z', + '_versions': [ + { + 'value': 'My audio has been transcribed automatically', + 'language': 'en', + 'status': 'completed', + '_dateCreated': '2024-04-08T15:27:00Z', + '_dateAccepted': '2024-04-08T15:27:00Z', + '_uuid': '4dcf9c9f-e503-4e5c-81f5-74250b295001', + }, + ], }, 'manual_transcription': { - 'value': 'My audio has been transcribed manually', - 'language': 'en', - 'locale': 'en-CA', - 'status': 'completed', '_dateCreated': '2024-04-08T15:28:00Z', '_dateModified': '2024-04-08T15:28:00Z', + '_versions': [ + { + 'value': 'My audio has been transcribed manually', + 'language': 'en', + 'locale': 'en-CA', + 'status': 'completed', + '_dateCreated': '2024-04-08T15:28:00Z', + '_dateAccepted': '2024-04-08T15:28:00Z', + '_uuid': 'd69b9263-04fd-45b4-b011-2e166cfefd4a', + }, + ], }, } @@ -423,6 +437,7 @@ def test_find_the_most_recent_transcription(): 'dependency': { 'value': 'My audio has been transcribed manually', 'language': 'en-CA', + '_uuid': 'd69b9263-04fd-45b4-b011-2e166cfefd4a', } } action_data = action._get_action_data_dependency( @@ -432,13 +447,14 @@ def test_find_the_most_recent_transcription(): # Automated transcription is the most recent action_data = {} - question_supplement_data['automated_google_transcription'][ - '_dateModified' - ] = '2025-07-28T14:18:00Z' + question_supplement_data['automated_google_transcription']['_versions'][0][ + '_dateAccepted' + ] = '2025-07-28T16:18:00Z' expected = { 'dependency': { 'value': 'My audio has been transcribed automatically', 'language': 'en', + '_uuid': '4dcf9c9f-e503-4e5c-81f5-74250b295001', } } action_data = action._get_action_data_dependency( diff --git a/kobo/apps/subsequences/tests/test_models.py b/kobo/apps/subsequences/tests/test_models.py index 790b92b269..dca8bf5dc5 100644 --- a/kobo/apps/subsequences/tests/test_models.py +++ b/kobo/apps/subsequences/tests/test_models.py @@ -1,5 +1,7 @@ +import uuid from copy import deepcopy from datetime import datetime +from unittest.mock import patch from zoneinfo import ZoneInfo import pytest @@ -41,12 +43,14 @@ class SubmissionSupplementTestCase(TestCase): 'value': 'فارغ', '_dateCreated': '2024-04-08T15:31:00Z', '_dateAccepted': '2024-04-08T15:31:00Z', + '_uuid': '51ff33a5-62d6-48ec-94b2-2dfb406e1dee', }, { 'language': 'ar', 'value': 'هائج', '_dateCreated': '2024-04-08T15:27:00Z', '_dateAccepted': '2024-04-08T15:27:00Z', + '_uuid': '123e4567-e89b-12d3-a456-426614174000', } ], }, @@ -59,6 +63,7 @@ class SubmissionSupplementTestCase(TestCase): 'value': 'berserk', '_dateCreated': '2024-04-08T15:27:00Z', '_dateAccepted': '2024-04-08T15:27:00Z', + '_uuid': '22b04ce8-61c2-4383-836f-5d5f0ad73645', }], }, 'es': { @@ -70,12 +75,15 @@ class SubmissionSupplementTestCase(TestCase): 'value': 'enloquecido', '_dateCreated': '2024-04-08T15:32:00Z', '_dateAccepted': '2024-04-08T15:32:00Z', + '_uuid': 'd69b9263-04fd-45b4-b011-2e166cfefd4a', }, { 'language': 'es', 'value': 'loco', '_dateCreated': '2024-04-08T15:29:00Z', '_dateAccepted': '2024-04-08T15:29:00Z', + '_uuid': '30d0f39c-a1dd-43fe-999a-844f12f83d31', + } ], }, @@ -193,92 +201,102 @@ def test_revise_data(self): submission_uuid=self.submission_root_uuid ).exists() - frozen_datetime_now = datetime(2024, 4, 8, 15, 27, 0, tzinfo=ZoneInfo('UTC')) - with freeze_time(frozen_datetime_now): + fake_uuids = [ + uuid.UUID('123e4567-e89b-12d3-a456-426614174000'), + uuid.UUID('22b04ce8-61c2-4383-836f-5d5f0ad73645'), + uuid.UUID('30d0f39c-a1dd-43fe-999a-844f12f83d31'), + uuid.UUID('51ff33a5-62d6-48ec-94b2-2dfb406e1dee'), + uuid.UUID('d69b9263-04fd-45b4-b011-2e166cfefd4a'), + ] - # 1) First call with transcription (ar) and translation (en) - SubmissionSupplement.revise_data( - self.asset, - self.submission, - { - '_version': '20250820', - 'group_name/question_name': { - 'manual_transcription': { - 'language': 'ar', - 'value': 'هائج', - }, - 'manual_translation': { - 'language': 'en', - 'value': 'berserk', - }, - }, - }, - ) + with patch('uuid.uuid4', side_effect=fake_uuids): - # Make sure a SubmissionSupplement object has been created - assert SubmissionSupplement.objects.filter( - submission_uuid=self.submission_root_uuid - ).exists() + frozen_datetime_now = datetime(2024, 4, 8, 15, 27, 0, tzinfo=ZoneInfo('UTC')) + with freeze_time(frozen_datetime_now): - # 2) Call with translation es = "loco" - frozen_datetime_now = datetime(2024, 4, 8, 15, 29, 0, tzinfo=ZoneInfo('UTC')) - with freeze_time(frozen_datetime_now): - SubmissionSupplement.revise_data( - self.asset, - self.submission, - { - '_version': '20250820', - 'group_name/question_name': { - 'manual_translation': { - 'language': 'es', - 'value': 'loco', + # 1) First call with transcription (ar) and translation (en) + SubmissionSupplement.revise_data( + self.asset, + self.submission, + { + '_version': '20250820', + 'group_name/question_name': { + 'manual_transcription': { + 'language': 'ar', + 'value': 'هائج', + }, + 'manual_translation': { + 'language': 'en', + 'value': 'berserk', + }, }, }, - }, - ) + ) - assert ( - SubmissionSupplement.objects.filter( + # Make sure a SubmissionSupplement object has been created + assert SubmissionSupplement.objects.filter( submission_uuid=self.submission_root_uuid - ).count() - == 1 - ) + ).exists() - # 3) Call with transcription ar = 'فارغ' - frozen_datetime_now = datetime(2024, 4, 8, 15, 31, 0, tzinfo=ZoneInfo('UTC')) - with freeze_time(frozen_datetime_now): - submission_supplement = SubmissionSupplement.revise_data( - self.asset, - self.submission, - { - '_version': '20250820', - 'group_name/question_name': { - 'manual_transcription': { - 'language': 'ar', - 'value': 'فارغ', + # 2) Call with translation es = "loco" + frozen_datetime_now = datetime(2024, 4, 8, 15, 29, 0, tzinfo=ZoneInfo('UTC')) + with freeze_time(frozen_datetime_now): + SubmissionSupplement.revise_data( + self.asset, + self.submission, + { + '_version': '20250820', + 'group_name/question_name': { + 'manual_translation': { + 'language': 'es', + 'value': 'loco', + }, }, }, - }, + ) + + assert ( + SubmissionSupplement.objects.filter( + submission_uuid=self.submission_root_uuid + ).count() + == 1 ) - # 4) Call with translation es = "enloquecido" - frozen_datetime_now = datetime(2024, 4, 8, 15, 32, 0, tzinfo=ZoneInfo('UTC')) - with freeze_time(frozen_datetime_now): - submission_supplement = SubmissionSupplement.revise_data( - self.asset, - self.submission, - { - '_version': '20250820', - 'group_name/question_name': { - 'manual_translation': { - 'language': 'es', - 'value': 'enloquecido', + # 3) Call with transcription ar = 'فارغ' + frozen_datetime_now = datetime(2024, 4, 8, 15, 31, 0, tzinfo=ZoneInfo('UTC')) + with freeze_time(frozen_datetime_now): + submission_supplement = SubmissionSupplement.revise_data( + self.asset, + self.submission, + { + '_version': '20250820', + 'group_name/question_name': { + 'manual_transcription': { + 'language': 'ar', + 'value': 'فارغ', + }, }, }, - }, - ) + ) - assert submission_supplement == self.EXPECTED_SUBMISSION_SUPPLEMENT + # 4) Call with translation es = "enloquecido" + frozen_datetime_now = datetime(2024, 4, 8, 15, 32, 0, tzinfo=ZoneInfo('UTC')) + with freeze_time(frozen_datetime_now): + submission_supplement = SubmissionSupplement.revise_data( + self.asset, + self.submission, + { + '_version': '20250820', + 'group_name/question_name': { + 'manual_translation': { + 'language': 'es', + 'value': 'enloquecido', + }, + }, + }, + ) + + assert submission_supplement == self.EXPECTED_SUBMISSION_SUPPLEMENT def test_revise_data_raise_error_wrong_action(self): From 39dd383a2015fe34ce3dcde0df7cf1d959c7975d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 24 Sep 2025 15:39:22 -0400 Subject: [PATCH 102/138] Add dependency and more comments --- .../actions/automated_google_transcription.py | 92 +++++++++++ .../actions/automated_google_translation.py | 146 +++++++++++++++++- kobo/apps/subsequences/actions/base.py | 56 +++++-- kobo/apps/subsequences/exceptions.py | 8 +- .../integrations/google/google_translate.py | 4 +- .../test_automated_google_translation.py | 6 +- kobo/apps/subsequences/tests/test_models.py | 1 - 7 files changed, 294 insertions(+), 19 deletions(-) diff --git a/kobo/apps/subsequences/actions/automated_google_transcription.py b/kobo/apps/subsequences/actions/automated_google_transcription.py index b7aec4959f..8ebb5c1bd9 100644 --- a/kobo/apps/subsequences/actions/automated_google_transcription.py +++ b/kobo/apps/subsequences/actions/automated_google_transcription.py @@ -17,6 +17,98 @@ def get_nlp_service_class(self) -> NLPExternalServiceClass: @property def result_schema(self): + """ + JSON Schema for automated Google transcription results. + + The payload is a single-language object with: + - _dateCreated : required string (date-time) + - _dateModified: required string (date-time) + - _versions : array of version objects + + Validation rules for each version + ---------------- + • _dateCreated : always required. + • _uuid : always required. + • language : always required. + • status : always required (in_progress, complete, failed, deleted). + + • value + – required when status == "complete" (holds the transcript). + – must be absent when status is "in_progress" or "failed". + – may be absent or explicitly null when status == "deleted". + + • error + – required when status == "failed". + – must be absent for all other statuses. + + • accepted + – allowed only when status == "complete". + + Examples + -------- + # In-progress (minimal) + { + "_dateCreated": "2025-09-24T10:45:00Z", + "_dateModified": "2025-09-24T10:45:00Z", + "_versions": [ + { + "_dateCreated": "2025-09-24T10:45:00Z", + "_uuid": "550e8400-e29b-41d4-a716-446655440000", + "language": "en", + "status": "in_progress" + } + ] + } + + # Complete (value required) + { + "_dateCreated": "2025-09-24T10:45:00Z", + "_dateModified": "2025-09-24T10:45:00Z", + "_versions": [ + { + "_dateCreated": "2025-09-24T10:45:00Z", + "_dateAccepted": "2025-09-24T10:46:10Z", + "_uuid": "4c0a0e9c-0f2c-4d8a-9c72-3a8d2f9a2a11", + "language": "en", + "locale": "en-CA", + "status": "complete", + "value": "Lunch was great today.", + "accepted": true + } + ] + } + + # Failed (error required, no value) + { + "_dateCreated": "2025-09-24T10:45:00Z", + "_dateModified": "2025-09-24T10:45:00Z", + "_versions": [ + { + "_dateCreated": "2025-09-24T10:45:00Z", + "_uuid": "9b1deb4d-5b15-4e8f-9f8b-7b3f5c6e4d21", + "language": "en", + "status": "failed", + "error": "Upstream service timeout." + } + ] + } + + # Deleted (value null or absent) + { + "_dateCreated": "2025-09-24T10:45:00Z", + "_dateModified": "2025-09-24T10:45:00Z", + "_versions": [ + { + "_dateCreated": "2025-09-24T10:45:00Z", + "_uuid": "7d444840-9dc0-11d1-b245-5ffdce74fad2", + "language": "en", + "status": "deleted", + "value": null + } + ] + } + """ + schema = super().result_schema # FIXME _inject_data_schema does not merge nested children diff --git a/kobo/apps/subsequences/actions/automated_google_translation.py b/kobo/apps/subsequences/actions/automated_google_translation.py index 7da8abe5ac..5428e912bf 100644 --- a/kobo/apps/subsequences/actions/automated_google_translation.py +++ b/kobo/apps/subsequences/actions/automated_google_translation.py @@ -24,6 +24,116 @@ def get_nlp_service_class(self) -> NLPExternalServiceClass: @property def result_schema(self): + """ + JSON Schema for automated Google translation results. + + The payload is an object where each top-level key is a language code from + `self.languages` (e.g. "en") mapping to a dataActionKey object. Timestamps + are ISO-8601 `date-time` strings (e.g. "2025-09-24T10:45:00Z"). + + Validation rules for each version + ---------------- + • _dateCreated : always required. + • _uuid : always required. + • language : always required. + • status : always required (one of: in_progress, complete, failed, + deleted). + + • value + – required when status == "complete". + – must be absent when status is "in_progress" or "failed". + – may be absent or explicitly null when status == "deleted". + + • error + – required when status == "failed". + – must be absent for all other statuses. + + • accepted + – allowed only when status == "complete". + + • _dependency + – required when status is "complete" or "in_progress". + – must be absent for any other status. + + Examples + -------- + # In-progress (minimal) + { + "en": { + "_dateCreated": "2025-09-24T10:45:00Z", + "_dateModified": "2025-09-24T10:45:00Z", + "_versions": [ + { + "_dateCreated": "2025-09-24T10:45:00Z", + "_uuid": "550e8400-e29b-41d4-a716-446655440000", + "language": "en", + "status": "in_progress", + "_dependency": { + "_uuid": "16fd2706-8baf-433b-82eb-8c7fada847da", + "_actionId": "automated_google_transcription" + } + } + ] + } + } + + # Complete (value required) + { + "en": { + "_dateCreated": "2025-09-24T10:45:00Z", + "_dateModified": "2025-09-24T10:45:00Z", + "_versions": [ + { + "_dateCreated": "2025-09-24T10:45:00Z", + "_uuid": "4c0a0e9c-0f2c-4d8a-9c72-3a8d2f9a2a11", + "language": "en", + "locale": "en-CA", + "status": "complete", + "value": "Lunch was great today.", + "accepted": true, + "_dependency": { + "_uuid": "16fd2706-8baf-433b-82eb-8c7fada847da", + "_actionId": "automated_google_transcription" + } + } + ] + } + } + + # Failed (error required) + { + "en": { + "_dateCreated": "2025-09-24T10:45:00Z", + "_dateModified": "2025-09-24T10:45:00Z", + "_versions": [ + { + "_dateCreated": "2025-09-24T10:45:00Z", + "_uuid": "9b1deb4d-5b15-4e8f-9f8b-7b3f5c6e4d21", + "language": "en", + "status": "failed", + "error": "Upstream service timeout." + } + ] + } + } + + # Deleted (value null or absent, no _dependency) + { + "en": { + "_dateCreated": "2025-09-24T10:45:00Z", + "_dateModified": "2025-09-24T10:45:00Z", + "_versions": [ + { + "_dateCreated": "2025-09-24T10:45:00Z", + "_uuid": "7d444840-9dc0-11d1-b245-5ffdce74fad2", + "language": "en", + "status": "deleted", + "value": null + } + ] + } + } + """ schema = super().result_schema @@ -34,6 +144,38 @@ def result_schema(self): 'enum': ['in_progress', 'complete', 'error'], }, } + + # Make "_dependency" property required if status is not deleted + schema['$defs']['version']['properties'].update( + { + self.DEPENDENCY_FIELD: { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.UUID_FIELD: {'$ref': '#/$defs/uuid'}, + self.ACTION_ID_FIELD: {'type': 'string'}, + }, + 'required': [self.UUID_FIELD, self.ACTION_ID_FIELD], + }, + } + ) + schema['$defs']['version']['allOf'].append( + { + 'if': { + 'properties': { + 'status': {'enum': ['complete', 'in_progress']} + }, + 'required': ['status'] + }, + 'then': { + 'required': [self.DEPENDENCY_FIELD] + }, + 'else': { + 'not': {'required': [self.DEPENDENCY_FIELD]} + } + } + ) + return schema def _get_action_data_dependency( @@ -81,6 +223,7 @@ def _get_action_data_dependency( if latest_accepted_dt is None or accepted_dt > latest_accepted_dt: latest_accepted_dt = accepted_dt latest_version = version + latest_version[self.ACTION_ID_FIELD] = action_id if latest_version is None: raise TranscriptionNotFound @@ -91,10 +234,11 @@ def _get_action_data_dependency( ) # Inject dependency property for translation service - action_data['dependency'] = { + action_data[self.DEPENDENCY_FIELD] = { 'value': latest_version['value'], 'language': language_or_locale, self.UUID_FIELD: latest_version[self.UUID_FIELD], + self.ACTION_ID_FIELD: latest_version.pop(self.ACTION_ID_FIELD), } return action_data diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index e313f8d6d5..654c259a51 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -160,9 +160,11 @@ class ActionClassConfig: class BaseAction: + ACTION_ID_FIELD = '_actionId' DATE_CREATED_FIELD = '_dateCreated' DATE_MODIFIED_FIELD = '_dateModified' DATE_ACCEPTED_FIELD = '_dateAccepted' + DEPENDENCY_FIELD = '_dependency' UUID_FIELD = '_uuid' VERSION_FIELD = '_versions' @@ -267,7 +269,6 @@ def revise_data( self.raise_for_any_leading_underscore_key(action_data) now_str = utc_datetime_to_js_str(timezone.now()) - item_index = None localized_action_supplemental_data = deepcopy(action_supplemental_data) if self.action_class_config.allow_multiple: @@ -301,16 +302,22 @@ def revise_data( # Otherwise, merge the service response into action_data and keep going # the validation process. - action_data = deepcopy(action_data) + + dependency_supplemental_data = action_data.pop(self.DEPENDENCY_FIELD, None) + # action_data = deepcopy(action_data) action_data.update(service_response) self.validate_automated_data(action_data) accepted = action_data.pop('accepted', None) else: + dependency_supplemental_data = None accepted = True new_version = deepcopy(action_data) new_version[self.DATE_CREATED_FIELD] = now_str new_version[self.UUID_FIELD] = str(uuid.uuid4()) + if dependency_supplemental_data: + new_version[self.DEPENDENCY_FIELD] = dependency_supplemental_data + if self.DATE_CREATED_FIELD not in localized_action_supplemental_data: localized_action_supplemental_data[self.DATE_CREATED_FIELD] = now_str localized_action_supplemental_data[self.DATE_MODIFIED_FIELD] = now_str @@ -472,12 +479,30 @@ class BaseManualNLPAction(BaseAction): def data_schema(self): """ POST to "/api/v2/assets//data//supplemental/" + The payload must be an object with: + - language : required string, one of the allowed languages (e.g. "fr", "es") + - value : required string or null + - locale : optional string or null (e.g. "fr-CA", "es-ES") + + Examples + -------- + # Minimal valid example (required fields only) { - 'language_action_id': { - 'language': 'es', - 'locale': 'es-ES', - 'value': 'Almorzamos muy bien hoy', - } + "language": "es", + "value": "Almorzamos muy bien hoy" + } + + # With explicit locale + { + "language": "fr", + "locale": "fr-CA", + "value": "Bonjour tout le monde" + } + + # Null value is allowed when data is intentionally deleted + { + "language": "fr", + "value": null } """ @@ -525,13 +550,14 @@ def automated_data_schema(self) -> dict: Schema rules: - The field `status` is always required and must be one of: - ["requested", "in_progress", "completed", "failed"]. - - If `status` == "done": + ["requested", "in_progress", "complete", "failed"]. + - If `status` == "complete": * The field `value` becomes required and must be a string. - If `status` == "failed": * The field `error` becomes required and must be a string. - No additional properties are allowed beyond `language`, `status` and `value`. """ + return { '$schema': 'https://json-schema.org/draft/2020-12/schema', 'type': 'object', @@ -635,6 +661,7 @@ def data_schema(self) -> dict: * If `accepted` is present, `value` must be absent. - No additional properties are allowed beyond: `language`, `locale`, `value`, `accepted`. """ + return { '$schema': 'https://json-schema.org/draft/2020-12/schema', 'type': 'object', @@ -724,9 +751,14 @@ def run_automated_process( service = NLPService(submission, asset=asset) service_data = service.process_data(self.source_question_xpath, action_data) - # Remove the 'dependency' flag from action_data since it is only used - # internally to resolve prerequisites and must not be kept in the final payload. - action_data.pop('dependency', None) + # Sanitize 'dependency' before persisting: keep only stable identifiers and drop + # all other fields (e.g., 'value', 'language', timestamps). + if dependency := action_data.pop(self.DEPENDENCY_FIELD, None): + action_data[self.DEPENDENCY_FIELD] = { + self.ACTION_ID_FIELD: dependency[self.ACTION_ID_FIELD], + self.UUID_FIELD: dependency[self.UUID_FIELD], + } + # If the request is still running, stop processing here. # Returning None ensures that `revise_data()` will not be called afterwards. diff --git a/kobo/apps/subsequences/exceptions.py b/kobo/apps/subsequences/exceptions.py index 418350f125..75baa2b443 100644 --- a/kobo/apps/subsequences/exceptions.py +++ b/kobo/apps/subsequences/exceptions.py @@ -3,6 +3,12 @@ class AudioTooLongError(Exception): Audio file is too long for the specified speech service """ + pass + + +class DependencyNotFound(Exception): + pass + class InvalidAction(Exception): """ @@ -34,7 +40,7 @@ class SubsequenceTimeoutError(Exception): pass -class TranscriptionNotFound(Exception): +class TranscriptionNotFound(DependencyNotFound): pass diff --git a/kobo/apps/subsequences/integrations/google/google_translate.py b/kobo/apps/subsequences/integrations/google/google_translate.py index b737eae650..cd7b84ba10 100644 --- a/kobo/apps/subsequences/integrations/google/google_translate.py +++ b/kobo/apps/subsequences/integrations/google/google_translate.py @@ -174,8 +174,8 @@ def process_data(self, xpath: str, params: dict) -> dict: """ try: - content = params['dependency']['value'] - source_lang = params['dependency']['language'] + content = params['_dependency']['value'] + source_lang = params['_dependency']['language'] target_lang = params['language'] except KeyError: message = 'Error while setting up translation' diff --git a/kobo/apps/subsequences/tests/test_automated_google_translation.py b/kobo/apps/subsequences/tests/test_automated_google_translation.py index 76e9269aef..54dcde2954 100644 --- a/kobo/apps/subsequences/tests/test_automated_google_translation.py +++ b/kobo/apps/subsequences/tests/test_automated_google_translation.py @@ -434,10 +434,11 @@ def test_find_the_most_recent_accepted_transcription(): # Manual transcription is the most recent action_data = {} # not really relevant for this test expected = { - 'dependency': { + '_dependency': { 'value': 'My audio has been transcribed manually', 'language': 'en-CA', '_uuid': 'd69b9263-04fd-45b4-b011-2e166cfefd4a', + '_actionId': 'manual_transcription', } } action_data = action._get_action_data_dependency( @@ -451,10 +452,11 @@ def test_find_the_most_recent_accepted_transcription(): '_dateAccepted' ] = '2025-07-28T16:18:00Z' expected = { - 'dependency': { + '_dependency': { 'value': 'My audio has been transcribed automatically', 'language': 'en', '_uuid': '4dcf9c9f-e503-4e5c-81f5-74250b295001', + '_actionId': 'automated_google_transcription', } } action_data = action._get_action_data_dependency( diff --git a/kobo/apps/subsequences/tests/test_models.py b/kobo/apps/subsequences/tests/test_models.py index dca8bf5dc5..8a3b0b67e5 100644 --- a/kobo/apps/subsequences/tests/test_models.py +++ b/kobo/apps/subsequences/tests/test_models.py @@ -83,7 +83,6 @@ class SubmissionSupplementTestCase(TestCase): '_dateCreated': '2024-04-08T15:29:00Z', '_dateAccepted': '2024-04-08T15:29:00Z', '_uuid': '30d0f39c-a1dd-43fe-999a-844f12f83d31', - } ], }, From 7ee999e28c4180182c10b9b8b9b4808dc229dd1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 24 Sep 2025 16:14:30 -0400 Subject: [PATCH 103/138] Fixed typo --- kobo/apps/subsequences/actions/base.py | 22 +++++++++++----------- kobo/apps/subsequences/models.py | 8 +++++--- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 654c259a51..918fb441be 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -346,21 +346,21 @@ def revise_data( new_version[self.DATE_ACCEPTED_FIELD] = now_str if not self.action_class_config.allow_multiple: - new_action_supplement_data = localized_action_supplemental_data + new_action_supplemental_data = localized_action_supplemental_data else: - new_action_supplement_data = deepcopy(action_supplemental_data) + new_action_supplemental_data = deepcopy(action_supplemental_data) # Handle the case where the default type is a list: # - If no index is provided, append the new record. # - Otherwise, replace the record at the given index. # Finally, update `new_record` to reference the full updated list. - new_action_supplement_data.update({ + new_action_supplemental_data.update({ needle: localized_action_supplemental_data }) - self.validate_result(new_action_supplement_data) + self.validate_result(new_action_supplemental_data) - return new_action_supplement_data + return new_action_supplemental_data @staticmethod def raise_for_any_leading_underscore_key(d: dict): @@ -385,7 +385,7 @@ def run_automated_process( self, submission: dict, question_supplemental_data: dict, - action_supplement_data: dict, + action_supplemental_data: dict, action_data: dict, *args, **kwargs, @@ -695,7 +695,7 @@ def run_automated_process( self, submission: dict, question_supplemental_data: dict, - action_supplement_data: dict, + action_supplemental_data: dict, action_data: dict, *args, **kwargs, @@ -725,9 +725,9 @@ def run_automated_process( # return the completed translation/transcription right away. `revise_data()` # will handle the merge and final validation of this acceptance. accepted = action_data.get('accepted', None) - if action_supplement_data.get('status') == 'complete' and accepted is not None: + if action_supplemental_data.get('status') == 'complete' and accepted is not None: return { - 'value': action_supplement_data['value'], + 'value': action_supplemental_data['value'], 'status': 'complete', } @@ -766,14 +766,14 @@ def run_automated_process( accepted is None and service_data['status'] == 'in_progress' ): - if action_supplement_data.get('status'): + if action_supplemental_data.get('status'): return None else: # TODO Retry with Celery, make it work! poll_run_automated_process.delay( submission, question_supplemental_data, - action_supplement_data, + action_supplemental_data, action_data, action_id=self.ID, asset_id=asset.pk, diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 999825c84f..011310ab07 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -106,9 +106,11 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di return supplemental_data question_supplemental_data[action_id] = action_supplemental_data - retrieved_supplemental_data.setdefault(question_xpath, {})[ - action_id - ] = action.retrieve_data(action_supplemental_data) + + # 2025-09-24 oleger: What are the 3 lines below for? + #retrieved_supplemental_data.setdefault(question_xpath, {})[ + # action_id + #] = action.retrieve_data(action_supplemental_data) supplemental_data['_version'] = schema_version validate_submission_supplement(asset, supplemental_data) From 9d3680c77066136917dbcdf7b5a6c80755a6f124 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 25 Sep 2025 17:12:00 -0400 Subject: [PATCH 104/138] WIP - With Celery --- .../openrosa/libs/utils/jsonbfield_helper.py | 165 +++++++++++++++--- kobo/apps/subsequences/actions/base.py | 91 +++++++--- kobo/apps/subsequences/models.py | 2 +- kobo/apps/subsequences/tasks.py | 101 +++++++++-- 4 files changed, 292 insertions(+), 67 deletions(-) diff --git a/kobo/apps/openrosa/libs/utils/jsonbfield_helper.py b/kobo/apps/openrosa/libs/utils/jsonbfield_helper.py index 1c8986c95a..0c4de477d2 100644 --- a/kobo/apps/openrosa/libs/utils/jsonbfield_helper.py +++ b/kobo/apps/openrosa/libs/utils/jsonbfield_helper.py @@ -1,34 +1,159 @@ -# coding: utf-8 -import json +# Python 3.10+ +from typing import Any +from django.db.models import F, Func, Value, JSONField, TextField +from django.db.models.functions import Coalesce +from django.contrib.postgres.fields import ArrayField +from django.db.models.fields.json import KeyTransform -from django.db.models.expressions import Func, Value +# --- SQL primitives ----------------------------------------------------------- + +class JsonbConcat(Func): + """Implements jsonb '||' operator (non-recursive merge).""" + arg_joiner = ' || ' + template = '%(expressions)s' + output_field = JSONField() + + +class JsonbSet(Func): + """ + Wraps jsonb_set(target, path text[], new_value, create_missing boolean). + """ + function = 'jsonb_set' + output_field = JSONField() + + def __init__(self, target, path, new_value, create_missing: bool = True, **extra): + if not isinstance(path, (list, tuple)): + raise TypeError('path must be a list/tuple of keys') + super().__init__( + target, + Value(path, output_field=ArrayField(base_field=TextField())), + new_value, + Value(bool(create_missing)), + **extra, + ) +# --- Helpers ----------------------------------------------------------------- + +def _split_path_dunder(path: str) -> list[str]: + """ + Convert a Django-style '__' path into a list of keys. + Example: 'content__audio__foo' -> ['content', 'audio', 'foo']. + """ + s = (path or '').strip() + if not s: + raise ValueError('path must be a non-empty string when provided') + parts = s.split('__') + if any(p == '' for p in parts): + raise ValueError('invalid path: consecutive or trailing "__"') + return parts + + +def _json_key(path_list: list[str], base_expr): + """ + Build a nested KeyTransform chain: base->'k1'->'k2'->... + """ + expr = base_expr + for key in path_list: + expr = KeyTransform(key, expr) + return expr + + +def _merge_obj_at(expr, path_list: list[str], patch: dict, *, create_missing: bool = True): + """ + Deep-merge a dict 'patch' into an object at 'path_list'. + If the object at path doesn't exist, coalesce it to {} before merging. + """ + existing = Coalesce( + _json_key(path_list, expr), + Value({}, output_field=JSONField()), + output_field=JSONField(), + ) + new_value = JsonbConcat(existing, Value(patch, output_field=JSONField())) + return JsonbSet(expr, path_list, new_value, create_missing=create_missing) + + +def _set_at(expr, path_list: list[str], value: Any, *, create_missing: bool = True): + """ + Set any JSON value (scalar/array/object) at 'path_list'. + """ + return JsonbSet(expr, path_list, Value(value, output_field=JSONField()), create_missing=create_missing) + + +# --- Public API --------------------------------------------------------------- + class ReplaceValues(Func): """ - Updates several properties at once of a JSONBField without overwriting the - whole document. - Avoids race conditions when document is saved in two different transactions - at the same time. - https://www.postgresql.org/docs/current/functions-json.html + Single-op JSONB updater with Django-style '__' paths and root merge. + + Usage: + # Root merge (non-destructive) when path=None (default) + MyModel.objects.update( + metadata=ReplaceValues('metadata', updates={'feature_flags': {'x': True}}) + ) - Notes from postgres docs: - > Does not operate recursively: only the top-level array or object - > structure is merged + # Merge nested dict at dunder path + MyModel.objects.update( + metadata=ReplaceValues( + 'metadata', + path='content__audio__automated_google_transcription', + updates={'status': 'done', 'text': 'Bonjour'}, + ) + ) + + # Set scalar at nested path + MyModel.objects.update( + metadata=ReplaceValues('metadata', path='flags__legacy', updates=False) + ) """ - arg_joiner = ' || ' - template = "%(expressions)s" - arity = 2 + output_field = JSONField() + template = '%(expressions)s' # render the built inner expression as-is + arg_joiner = ', ' def __init__( self, expression: str, - updates: dict, + *, + updates: Any, + path: str | None = None, + create_missing: bool = True, **extra, ): - super().__init__( - expression, - Value(json.dumps(updates)), - **extra, - ) + """ + Parameters + ---------- + expression : str + JSONB field name (e.g., 'metadata'). + updates : Any + - If path is None: must be a dict (will be merged at root, non-destructive). + - If path is provided: + * dict -> deep-merge at the given path + * non-dict -> set value at the given path + path : str | None + Django-style dunder path (e.g., 'a__b__c'). None means root. + create_missing : bool + Create missing parents/keys when setting/merging. + """ + expr = F(expression) + + if path is None: + # Root-level operation: only dict-merge makes sense, to avoid nuking the whole JSON. + if not isinstance(updates, dict): + raise TypeError('When path=None, "updates" must be a dict for a root merge.') + # Top-level merge is equivalent to merging each root key individually + # to avoid clobbering existing nested objects. + for key, val in updates.items(): + if isinstance(val, dict): + expr = _merge_obj_at(expr, [key], val, create_missing=create_missing) + else: + expr = _set_at(expr, [key], val, create_missing=create_missing) + else: + # Nested path operation + path_list = _split_path_dunder(path) + if isinstance(updates, dict): + expr = _merge_obj_at(expr, path_list, updates, create_missing=create_missing) + else: + expr = _set_at(expr, path_list, updates, create_missing=create_missing) + + super().__init__(expr, **extra) diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 918fb441be..75158a4ccf 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -8,6 +8,7 @@ from kobo.apps.kobo_auth.shortcuts import User from kobo.apps.subsequences.utils.time import utc_datetime_to_js_str +from kobo.celery import celery_app from kpi.exceptions import UsageLimitExceededException from kpi.utils.usage_calculator import ServiceUsageCalculator from ..tasks import poll_run_automated_process @@ -214,7 +215,8 @@ def get_output_fields(self) -> list[dict]: Must be implemented by subclasses. """ - raise NotImplementedError() + # raise NotImplementedError() + return [] def validate_automated_data(self, data): jsonschema.validate(data, self.automated_data_schema) @@ -268,15 +270,11 @@ def revise_data( self.validate_data(action_data) self.raise_for_any_leading_underscore_key(action_data) - now_str = utc_datetime_to_js_str(timezone.now()) - - localized_action_supplemental_data = deepcopy(action_supplemental_data) - if self.action_class_config.allow_multiple: - # TODO: Multiple keys are not supported. - # Not a big issue for now since translation actions don’t use locale - # (yet?) and transcription actions only involve one occurrence at a time. - needle = action_data[self.action_class_config.action_data_key] - localized_action_supplemental_data = action_supplemental_data.get(needle, {}) + localized_action_supplemental_data, needle = ( + self.get_localized_action_supplemental_data( + action_supplemental_data, action_data + ) + ) try: current_version = localized_action_supplemental_data.get( @@ -302,9 +300,7 @@ def revise_data( # Otherwise, merge the service response into action_data and keep going # the validation process. - dependency_supplemental_data = action_data.pop(self.DEPENDENCY_FIELD, None) - # action_data = deepcopy(action_data) action_data.update(service_response) self.validate_automated_data(action_data) accepted = action_data.pop('accepted', None) @@ -312,6 +308,44 @@ def revise_data( dependency_supplemental_data = None accepted = True + return self.get_new_action_supplemental_data( + action_supplemental_data, + action_data, + dependency_supplemental_data, + accepted, + ) + + def get_localized_action_supplemental_data( + self, action_supplemental_data: dict, action_data: dict + ) -> tuple[dict, str | None]: + + localized_action_supplemental_data = deepcopy(action_supplemental_data) + needle = None + + if self.action_class_config.allow_multiple: + # TODO: Multiple keys are not supported. + # Not a big issue for now since translation actions don’t use locale + # (yet?) and transcription actions only involve one occurrence at a time. + needle = action_data[self.action_class_config.action_data_key] + localized_action_supplemental_data = action_supplemental_data.get(needle, {}) + + return localized_action_supplemental_data, needle + + def get_new_action_supplemental_data( + self, + action_supplemental_data: dict, + action_data: dict, + dependency_supplemental_data: dict, + accepted: bool | None = None, + ) -> dict: + now_str = utc_datetime_to_js_str(timezone.now()) + + localized_action_supplemental_data, needle = ( + self.get_localized_action_supplemental_data( + action_supplemental_data, action_data + ) + ) + new_version = deepcopy(action_data) new_version[self.DATE_CREATED_FIELD] = now_str new_version[self.UUID_FIELD] = str(uuid.uuid4()) @@ -349,11 +383,6 @@ def revise_data( new_action_supplemental_data = localized_action_supplemental_data else: new_action_supplemental_data = deepcopy(action_supplemental_data) - # Handle the case where the default type is a list: - # - If no index is provided, append the new record. - # - Otherwise, replace the record at the given index. - # Finally, update `new_record` to reference the full updated list. - new_action_supplemental_data.update({ needle: localized_action_supplemental_data }) @@ -721,6 +750,8 @@ def run_automated_process( returned and passed back to `revise_data()`. """ + print('ACTION DATA', action_data, flush=True) + # If the client sent "accepted" while the supplement is already complete, # return the completed translation/transcription right away. `revise_data()` # will handle the merge and final validation of this acceptance. @@ -734,6 +765,7 @@ def run_automated_process( # If the client explicitly removed a previously stored result, # preserve the deletion by returning a `deleted` status instead # of reprocessing with the automated service. + # TODO add comment for delete here if 'value' in action_data: return { 'value': action_data['value'], @@ -766,18 +798,23 @@ def run_automated_process( accepted is None and service_data['status'] == 'in_progress' ): - if action_supplemental_data.get('status'): + print('HERE', service_data, flush=True) + if action_supplemental_data.get('status') == 'in_progress': return None else: - # TODO Retry with Celery, make it work! - poll_run_automated_process.delay( - submission, - question_supplemental_data, - action_supplemental_data, - action_data, - action_id=self.ID, - asset_id=asset.pk, - ) + # Make Celery update in the background. + # Since Celery is calling the same code, we want to ensure + if not celery_app.current_worker_task: + poll_run_automated_process.apply_async( + kwargs={ + 'submission': submission, + 'action_data': action_data, + 'action_id': self.ID, + 'asset_id': asset.pk, + 'question_xpath': self.source_question_xpath, + }, + countdown=10, + ) # Normal case: return the processed transcription data. return service_data diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 011310ab07..66a54c22e3 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -78,7 +78,7 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di raise InvalidAction from e action = action_class(question_xpath, action_params) - action.check_limits(asset.owner) + # action.check_limits(asset.owner) question_supplemental_data = supplemental_data.setdefault( question_xpath, {} ) diff --git a/kobo/apps/subsequences/tasks.py b/kobo/apps/subsequences/tasks.py index 9477321130..8db8301d30 100644 --- a/kobo/apps/subsequences/tasks.py +++ b/kobo/apps/subsequences/tasks.py @@ -1,35 +1,98 @@ -########################### -# WIP: Unfinished business# -########################### +from celery.signals import task_failure + from django.apps import apps -from kobo.apps.subsequences.exceptions import SubsequenceTimeoutError from kobo.celery import celery_app +from kobo.apps.openrosa.libs.utils.jsonbfield_helper import ReplaceValues +from kobo.apps.subsequences.exceptions import SubsequenceTimeoutError +from .constants import SUBMISSION_UUID_FIELD +from .exceptions import InvalidAction, InvalidXPath +from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix -# TODO Adjust max_retries. Should be no longer than external service timeout. +# TODO Adjust max_retries. Should be no longer than external service timeout (28800 s). @celery_app.task( autoretry_for=(SubsequenceTimeoutError,), - retry_backoff=60, - max_retries=5, + retry_backoff=5, + retry_backoff_max=60, + max_retries=2, retry_jitter=False, queue='kpi_low_priority_queue', ) def poll_run_automated_process( + asset_id: int, submission: dict, - question_supplemental_data: dict, - action_supplement_data: dict, - action_data: dict, + question_xpath: str, action_id: str, - asset_id: int, + action_data: dict, ): Asset = apps.get_model('kpi', 'Asset') # noqa: N806 SubmissionSupplement = apps.get_model('subsequences', 'SubmissionSupplement') # noqa: N806 - # TODO Rebuild incoming data from question supplemental data. - # We are missing the question_name_xpath, see comment in - # `SupplementData.revise_data()` - incoming_data = {} - - asset = Asset.objects.defer('content').get(id=asset_id) + incoming_data = { + '_version': '20250820', + question_xpath: {action_id: action_data}, + } + asset = Asset.objects.only('pk', 'owner_id').get(id=asset_id) supplement_data = SubmissionSupplement.revise_data(asset, submission, incoming_data) - if supplement_data['status'] == 'in_progress': - raise SubsequenceTimeoutError + + last_action_version = supplement_data[question_xpath][action_id]['_versions'][0] + + if last_action_version['status'] == 'in_progress': + raise SubsequenceTimeoutError( + f'{action_id} is still in progress for submission ' + f'{submission[SUBMISSION_UUID_FIELD]}' + ) + + +@task_failure.connect(sender=poll_run_automated_process) +def poll_run_automated_process_failure(sender=None, **kwargs): + + # Avoid circular import + from .actions import ACTION_IDS_TO_CLASSES + Asset = apps.get_model('kpi', 'Asset') # noqa: N806 + SubmissionSupplement = apps.get_model('subsequences', 'SubmissionSupplement') # noqa: N806 + + asset_id = kwargs['kwargs']['asset_id'] + error = str(kwargs['exception']) + submission = kwargs['kwargs']['submission'] + question_xpath = kwargs['kwargs']['question_xpath'] + action_id = kwargs['kwargs']['action_id'] + action_data = kwargs['kwargs']['action_data'] + + asset = Asset.objects.only('pk', 'owner_id', 'advanced_features').get(id=asset_id) + + supplemental_data = SubmissionSupplement.retrieve_data( + asset, submission_root_uuid=submission[SUBMISSION_UUID_FIELD] + ) + # TODO Add failure to DB + if 'is still in progress for submission' in error: + error = 'Maximum retries exceeded.' + + action_class = ACTION_IDS_TO_CLASSES[action_id] + action_configs = asset.advanced_features['_actionConfigs'] + action_configs_for_this_question = action_configs[question_xpath] + action_params = action_configs_for_this_question[action_id] + action = action_class(question_xpath, action_params) + + action_supplemental_data = supplemental_data[question_xpath][action_id] + action_data.update({ + 'error': error, + 'status': 'failed', # TODO maybe add dependency? + }) + dependency_supplemental_data = {} + + new_action_supplemental_data = action.get_new_action_supplemental_data( + action_supplemental_data, action_data, dependency_supplemental_data + ) + + SubmissionSupplement.objects.filter() + submission_uuid = remove_uuid_prefix(submission[SUBMISSION_UUID_FIELD]) + + SubmissionSupplement.objects.filter( + asset=asset, submission_uuid=submission_uuid + ).update( + content=ReplaceValues( + 'content', + path=f'{question_xpath}__{action_id}', + updates=new_action_supplemental_data, + ) + ) From 8a78a77daffb1d6bf59ca663a91b03e7552c94f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 26 Sep 2025 13:39:38 -0400 Subject: [PATCH 105/138] Save errors when Google Timeout is reached --- .../commands/populate_submission_counters.py | 6 +- .../update_attachment_storage_bytes.py | 4 +- kobo/apps/openrosa/apps/logger/tasks.py | 4 +- .../openrosa/libs/utils/jsonbfield_helper.py | 159 ------------- kobo/apps/subsequences/constants.py | 5 + kobo/apps/subsequences/models.py | 20 +- kobo/apps/subsequences/tasks.py | 27 ++- kobo/apps/subsequences/utils/versioning.py | 5 + kpi/tests/test_django_orm_helper.py | 213 ++++++++++++++++++ kpi/utils/django_orm_helper.py | 208 ++++++++++++++--- 10 files changed, 434 insertions(+), 217 deletions(-) delete mode 100644 kobo/apps/openrosa/libs/utils/jsonbfield_helper.py create mode 100644 kobo/apps/subsequences/utils/versioning.py create mode 100644 kpi/tests/test_django_orm_helper.py diff --git a/kobo/apps/openrosa/apps/logger/management/commands/populate_submission_counters.py b/kobo/apps/openrosa/apps/logger/management/commands/populate_submission_counters.py index 1943294250..f82dfefd70 100644 --- a/kobo/apps/openrosa/apps/logger/management/commands/populate_submission_counters.py +++ b/kobo/apps/openrosa/apps/logger/management/commands/populate_submission_counters.py @@ -17,7 +17,7 @@ MonthlyXFormSubmissionCounter, ) from kobo.apps.openrosa.apps.main.models.user_profile import UserProfile -from kobo.apps.openrosa.libs.utils.jsonbfield_helper import ReplaceValues +from kpi.utils.django_orm_helper import UpdateJSONFieldAttributes class Command(BaseCommand): @@ -227,7 +227,7 @@ def release_old_locks(self): # Release any locks on the users' profile from getting submissions UserProfile.objects.all().update( - metadata=ReplaceValues( + metadata=UpdateJSONFieldAttributes( 'metadata', updates=updates, ), @@ -242,7 +242,7 @@ def update_user_profile(self, user: settings.AUTH_USER_MODEL): UserProfile.objects.filter( user_id=user.pk ).update( - metadata=ReplaceValues( + metadata=UpdateJSONFieldAttributes( 'metadata', updates=updates, ), diff --git a/kobo/apps/openrosa/apps/logger/management/commands/update_attachment_storage_bytes.py b/kobo/apps/openrosa/apps/logger/management/commands/update_attachment_storage_bytes.py index 18c85923d5..9810a7b400 100644 --- a/kobo/apps/openrosa/apps/logger/management/commands/update_attachment_storage_bytes.py +++ b/kobo/apps/openrosa/apps/logger/management/commands/update_attachment_storage_bytes.py @@ -14,7 +14,7 @@ from kobo.apps.openrosa.apps.logger.models.attachment import Attachment from kobo.apps.openrosa.apps.logger.models.xform import XForm from kobo.apps.openrosa.apps.main.models.user_profile import UserProfile -from kobo.apps.openrosa.libs.utils.jsonbfield_helper import ReplaceValues +from kpi.utils.django_orm_helper import UpdateJSONFieldAttributes class Command(BaseCommand): @@ -296,7 +296,7 @@ def _update_user_profile(self, user: settings.AUTH_USER_MODEL): UserProfile.objects.filter(user_id=user.pk).update( attachment_storage_bytes=Subquery(subquery), - metadata=ReplaceValues( + metadata=UpdateJSONFieldAttributes( 'metadata', updates=updates, ), diff --git a/kobo/apps/openrosa/apps/logger/tasks.py b/kobo/apps/openrosa/apps/logger/tasks.py index d6764e7377..330f033763 100644 --- a/kobo/apps/openrosa/apps/logger/tasks.py +++ b/kobo/apps/openrosa/apps/logger/tasks.py @@ -16,7 +16,7 @@ from django_redis import get_redis_connection from kobo.apps.kobo_auth.shortcuts import User -from kobo.apps.openrosa.libs.utils.jsonbfield_helper import ReplaceValues +from kpi.utils.django_orm_helper import UpdateJSONFieldAttributes from kobo.celery import celery_app from kpi.deployment_backends.kc_access.storage import ( default_kobocat_storage as default_storage, @@ -167,7 +167,7 @@ def fix_stale_submissions_suspended_flag(): if usernames: UserProfile.objects.filter(user__username__in=usernames).update( - metadata=ReplaceValues( + metadata=UpdateJSONFieldAttributes( 'metadata', updates={'submissions_suspended': False}, ), diff --git a/kobo/apps/openrosa/libs/utils/jsonbfield_helper.py b/kobo/apps/openrosa/libs/utils/jsonbfield_helper.py deleted file mode 100644 index 0c4de477d2..0000000000 --- a/kobo/apps/openrosa/libs/utils/jsonbfield_helper.py +++ /dev/null @@ -1,159 +0,0 @@ -# Python 3.10+ -from typing import Any -from django.db.models import F, Func, Value, JSONField, TextField -from django.db.models.functions import Coalesce -from django.contrib.postgres.fields import ArrayField -from django.db.models.fields.json import KeyTransform - - -# --- SQL primitives ----------------------------------------------------------- - -class JsonbConcat(Func): - """Implements jsonb '||' operator (non-recursive merge).""" - arg_joiner = ' || ' - template = '%(expressions)s' - output_field = JSONField() - - -class JsonbSet(Func): - """ - Wraps jsonb_set(target, path text[], new_value, create_missing boolean). - """ - function = 'jsonb_set' - output_field = JSONField() - - def __init__(self, target, path, new_value, create_missing: bool = True, **extra): - if not isinstance(path, (list, tuple)): - raise TypeError('path must be a list/tuple of keys') - super().__init__( - target, - Value(path, output_field=ArrayField(base_field=TextField())), - new_value, - Value(bool(create_missing)), - **extra, - ) - - -# --- Helpers ----------------------------------------------------------------- - -def _split_path_dunder(path: str) -> list[str]: - """ - Convert a Django-style '__' path into a list of keys. - Example: 'content__audio__foo' -> ['content', 'audio', 'foo']. - """ - s = (path or '').strip() - if not s: - raise ValueError('path must be a non-empty string when provided') - parts = s.split('__') - if any(p == '' for p in parts): - raise ValueError('invalid path: consecutive or trailing "__"') - return parts - - -def _json_key(path_list: list[str], base_expr): - """ - Build a nested KeyTransform chain: base->'k1'->'k2'->... - """ - expr = base_expr - for key in path_list: - expr = KeyTransform(key, expr) - return expr - - -def _merge_obj_at(expr, path_list: list[str], patch: dict, *, create_missing: bool = True): - """ - Deep-merge a dict 'patch' into an object at 'path_list'. - If the object at path doesn't exist, coalesce it to {} before merging. - """ - existing = Coalesce( - _json_key(path_list, expr), - Value({}, output_field=JSONField()), - output_field=JSONField(), - ) - new_value = JsonbConcat(existing, Value(patch, output_field=JSONField())) - return JsonbSet(expr, path_list, new_value, create_missing=create_missing) - - -def _set_at(expr, path_list: list[str], value: Any, *, create_missing: bool = True): - """ - Set any JSON value (scalar/array/object) at 'path_list'. - """ - return JsonbSet(expr, path_list, Value(value, output_field=JSONField()), create_missing=create_missing) - - -# --- Public API --------------------------------------------------------------- - -class ReplaceValues(Func): - """ - Single-op JSONB updater with Django-style '__' paths and root merge. - - Usage: - # Root merge (non-destructive) when path=None (default) - MyModel.objects.update( - metadata=ReplaceValues('metadata', updates={'feature_flags': {'x': True}}) - ) - - # Merge nested dict at dunder path - MyModel.objects.update( - metadata=ReplaceValues( - 'metadata', - path='content__audio__automated_google_transcription', - updates={'status': 'done', 'text': 'Bonjour'}, - ) - ) - - # Set scalar at nested path - MyModel.objects.update( - metadata=ReplaceValues('metadata', path='flags__legacy', updates=False) - ) - """ - output_field = JSONField() - template = '%(expressions)s' # render the built inner expression as-is - arg_joiner = ', ' - - def __init__( - self, - expression: str, - *, - updates: Any, - path: str | None = None, - create_missing: bool = True, - **extra, - ): - """ - Parameters - ---------- - expression : str - JSONB field name (e.g., 'metadata'). - updates : Any - - If path is None: must be a dict (will be merged at root, non-destructive). - - If path is provided: - * dict -> deep-merge at the given path - * non-dict -> set value at the given path - path : str | None - Django-style dunder path (e.g., 'a__b__c'). None means root. - create_missing : bool - Create missing parents/keys when setting/merging. - """ - expr = F(expression) - - if path is None: - # Root-level operation: only dict-merge makes sense, to avoid nuking the whole JSON. - if not isinstance(updates, dict): - raise TypeError('When path=None, "updates" must be a dict for a root merge.') - # Top-level merge is equivalent to merging each root key individually - # to avoid clobbering existing nested objects. - for key, val in updates.items(): - if isinstance(val, dict): - expr = _merge_obj_at(expr, [key], val, create_missing=create_missing) - else: - expr = _set_at(expr, [key], val, create_missing=create_missing) - else: - # Nested path operation - path_list = _split_path_dunder(path) - if isinstance(updates, dict): - expr = _merge_obj_at(expr, path_list, updates, create_missing=create_missing) - else: - expr = _set_at(expr, path_list, updates, create_missing=create_missing) - - super().__init__(expr, **extra) diff --git a/kobo/apps/subsequences/constants.py b/kobo/apps/subsequences/constants.py index af4bf7f4ec..2445db6e01 100644 --- a/kobo/apps/subsequences/constants.py +++ b/kobo/apps/subsequences/constants.py @@ -15,3 +15,8 @@ # Processing time is not audio length, but it's an estimate GOOGLE_CACHE_TIMEOUT = 28800 # 8 hours GOOGLE_CODE = 'goog' + +SCHEMA_VERSIONS = [ + '20250820', + None +] diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 66a54c22e3..43a39d1a08 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -3,7 +3,7 @@ from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix from kpi.models.abstract_models import AbstractTimeStampedModel from .actions import ACTION_IDS_TO_CLASSES -from .constants import SUBMISSION_UUID_FIELD +from .constants import SUBMISSION_UUID_FIELD, SCHEMA_VERSIONS from .exceptions import InvalidAction, InvalidXPath from .schemas import validate_submission_supplement @@ -38,11 +38,16 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di raise InvalidAction schema_version = incoming_data.get('_version') - if schema_version != '20250820': + + if schema_version not in SCHEMA_VERSIONS: + # TODO: raise error. Unknown version + raise NotImplementedError + + if schema_version != SCHEMA_VERSIONS[0]: # TODO: migrate from old per-submission schema raise NotImplementedError - if asset.advanced_features['_version'] != schema_version: + if asset.advanced_features.get('_version') != schema_version: # TODO: migrate from old per-asset schema raise NotImplementedError @@ -154,11 +159,16 @@ def retrieve_data( return {} schema_version = supplemental_data.pop('_version') - if schema_version != '20250820': + + if schema_version not in SCHEMA_VERSIONS: + # TODO: raise error. Unknown version + raise NotImplementedError + + if schema_version != SCHEMA_VERSIONS[0]: # TODO: migrate from old per-submission schema raise NotImplementedError - if asset.advanced_features['_version'] != schema_version: + if asset.advanced_features.get('_version') != schema_version: # TODO: migrate from old per-asset schema raise NotImplementedError diff --git a/kobo/apps/subsequences/tasks.py b/kobo/apps/subsequences/tasks.py index 8db8301d30..eeded52482 100644 --- a/kobo/apps/subsequences/tasks.py +++ b/kobo/apps/subsequences/tasks.py @@ -2,19 +2,26 @@ from django.apps import apps from kobo.celery import celery_app -from kobo.apps.openrosa.libs.utils.jsonbfield_helper import ReplaceValues +from kpi.utils.django_orm_helper import UpdateJSONFieldAttributes from kobo.apps.subsequences.exceptions import SubsequenceTimeoutError from .constants import SUBMISSION_UUID_FIELD -from .exceptions import InvalidAction, InvalidXPath from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix - - -# TODO Adjust max_retries. Should be no longer than external service timeout (28800 s). +from .utils.versioning import set_version + +# With retry_backoff=5 and retry_backoff_max=60, each retry waits: +# min(5 * 2^(n-1), 60) seconds. +# We also add an initial 10s delay before the first attempt. +# Total wait time must stay ≤ 28,800 s (GOOGLE_CACHE_TIMEOUT). +# Calculation: +# 10 (initial) + (5 + 10 + 20 + 40) + 60 * k ≤ 28,800 +# => k = floor((28,800 - 10 - 75) / 60) = 478 +# => max_retries = 4 (before cap) + 478 = 482 +# So set max_retries to 482 or less to stay within the 8-hour limit. @celery_app.task( autoretry_for=(SubsequenceTimeoutError,), retry_backoff=5, retry_backoff_max=60, - max_retries=2, + max_retries=482, retry_jitter=False, queue='kpi_low_priority_queue', ) @@ -27,10 +34,9 @@ def poll_run_automated_process( ): Asset = apps.get_model('kpi', 'Asset') # noqa: N806 SubmissionSupplement = apps.get_model('subsequences', 'SubmissionSupplement') # noqa: N806 - incoming_data = { - '_version': '20250820', + incoming_data = set_version({ question_xpath: {action_id: action_data}, - } + }) asset = Asset.objects.only('pk', 'owner_id').get(id=asset_id) supplement_data = SubmissionSupplement.revise_data(asset, submission, incoming_data) @@ -63,7 +69,6 @@ def poll_run_automated_process_failure(sender=None, **kwargs): supplemental_data = SubmissionSupplement.retrieve_data( asset, submission_root_uuid=submission[SUBMISSION_UUID_FIELD] ) - # TODO Add failure to DB if 'is still in progress for submission' in error: error = 'Maximum retries exceeded.' @@ -90,7 +95,7 @@ def poll_run_automated_process_failure(sender=None, **kwargs): SubmissionSupplement.objects.filter( asset=asset, submission_uuid=submission_uuid ).update( - content=ReplaceValues( + content=UpdateJSONFieldAttributes( 'content', path=f'{question_xpath}__{action_id}', updates=new_action_supplemental_data, diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py new file mode 100644 index 0000000000..2cd6f646b9 --- /dev/null +++ b/kobo/apps/subsequences/utils/versioning.py @@ -0,0 +1,5 @@ +from ..constants import SCHEMA_VERSIONS + +def set_version(schema: dict) -> dict: + schema['_version'] = SCHEMA_VERSIONS[0] + return schema diff --git a/kpi/tests/test_django_orm_helper.py b/kpi/tests/test_django_orm_helper.py new file mode 100644 index 0000000000..88526fb52d --- /dev/null +++ b/kpi/tests/test_django_orm_helper.py @@ -0,0 +1,213 @@ +from __future__ import annotations + +from django.test import TestCase +from kobo.apps.kobo_auth.shortcuts import User +from hub.models import ExtraUserDetail + +from kpi.utils.django_orm_helper import UpdateJSONFieldAttributes + + +class DjangoORMHelperTestCase(TestCase): + def setUp(self): + # Seed a user with initial JSON data on ExtraUserDetail + self.bob = User.objects.create_user(username='bob', password='password') + extra = self.bob.extra_details + extra.data['organization'] = "Bob's organization" + extra.data['name'] = 'Bob Loblaw' + extra.save() + + def _data(self): + self.bob.extra_details.refresh_from_db() + return self.bob.extra_details.data + + def test_update_property_root_level_merge_and_set(self): + """ + Root merge with a dict preserves existing keys; scalars are set at root. + """ + + updates = { + 'country': {'code': 'CA', 'label': 'Canada'}, + 'sector': 'Humanitarian', + } + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes( + 'data', updates=updates + ) + ) + data = self._data() + assert data['organization'] == "Bob's organization" + assert data['name'] == 'Bob Loblaw' + assert data['country'] == {'code': 'CA', 'label': 'Canada'} + assert data['sector'] == 'Humanitarian' + + def test_update_nested_merge_dunder_preserves_siblings(self): + """ + Merging into a nested object keeps sibling keys intact. + """ + + # Seed nested subtree + seed = {'profile': {'address': {'street': 'Main', 'city': 'Montréal'}}} + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes('data', updates=seed) + ) + + # Merge into the same nested object (change city, add postal_code) + patch = {'city': 'Toronto', 'postal_code': 'M5H'} + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes( + 'data', path='profile__address', updates=patch + ) + ) + data = self._data() + assert data['profile']['address']['city'] == 'Toronto' + assert data['profile']['address']['postal_code'] == 'M5H' + # Sibling preserved + assert data['profile']['address']['street'] == 'Main' + + def test_update_nested_set_scalar_creates_missing_parents(self): + """ + Setting a scalar at a nested path creates missing parent objects. + """ + + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes( + 'data', path='flags__legacy', updates=False + ) + ) + data = self._data() + assert 'flags' in data + assert data['flags']['legacy'] is False + + def test_update_top_level_set_scalar_via_path(self): + """ + Setting a top-level scalar via a single-key dunder path. + """ + + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes( + 'data', path='sector', updates='Humanitarian' + ) + ) + data = self._data() + assert data['sector'] == 'Humanitarian' + assert data['organization'] == "Bob's organization" + assert data['name'] == 'Bob Loblaw' + + def test_update_top_level_merge_via_path_single_key(self): + """ + Merging a dict at a top-level key via a dunder path. + """ + + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes( + 'data', path='preferences', updates={'theme': 'dark'} + ) + ) + data = self._data() + assert data['preferences']['theme'] == 'dark' + assert data['organization'] == "Bob's organization" + + def test_root_merge_does_not_clobber_existing_nested_objects(self): + """ + Root merge of {'profile': {...}} should not overwrite other 'profile' subkeys. + """ + + # Seed content under 'profile' + seed = {'profile': {'address': {'city': 'Montréal', 'street': 'Main'}}} + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes('data', updates=seed) + ) + + # Root merge adds a sibling key under 'profile' + root_updates = {'profile': {'bio': 'Hello, world!'}} + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes('data', updates=root_updates) + ) + data = self._data() + assert data['profile']['bio'] == 'Hello, world!' + assert data['profile']['address']['city'] == 'Montréal' + assert data['profile']['address']['street'] == 'Main' + + def test_nested_set_list_value(self): + """ + Setting a list value + """ + + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes( + 'data', path='tags', updates=['a', 'b'] + ) + ) + data = self._data() + assert data['tags'] == ['a', 'b'] + + def test_nested_merge_overwrites_conflicting_keys_only(self): + """ + Nested merge updates conflicting keys and keeps others. + """ + # Seed country object + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes( + 'data', + updates={ + 'country': { + 'code': 'CA', + 'label': 'CanadA', + 'postal_code': 'H0H0H0', + } + }, + ) + ) + + # Merge: change label, add continent; keep code and postal_code + patch = {'label': 'Country of SantaClaus', 'continent': 'North America'} + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes( + 'data', path='country', updates=patch + ) + ) + data = self._data() + assert data['country']['code'] == 'CA' # preserved + assert data['country']['label'] == 'Country of SantaClaus' # replaced + assert data['country']['continent'] == 'North America' # added + assert data['country']['postal_code'] == 'H0H0H0' # preserved + + def test_nested_merge_creates_missing_parents(self): + """ + Merging a dict at a deep path should create all missing parents. + """ + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes( + 'data', + path='profile__contact', + updates={'email': 'bob@example.com'}, + ) + ) + data = self._data() + assert data['profile']['contact']['email'] == 'bob@example.com' + + def test_error_when_root_with_non_dict(self): + """ + Root operation requires a dict; passing a non-dict should raise TypeError. + """ + try: + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes('data', updates='not-a-dict') + ) + assert False, 'TypeError was expected but not raised' + except TypeError: + pass + + def test_error_invalid_dunder_path(self): + """ + Invalid dunder path should raise ValueError. + """ + try: + ExtraUserDetail.objects.filter(user_id=self.bob.pk).update( + data=UpdateJSONFieldAttributes( + 'data', path='foo____bar', updates=1 + ) + ) + assert False, 'ValueError was expected but not raised' + except ValueError: + pass diff --git a/kpi/utils/django_orm_helper.py b/kpi/utils/django_orm_helper.py index ffd8b0b471..b0795083ff 100644 --- a/kpi/utils/django_orm_helper.py +++ b/kpi/utils/django_orm_helper.py @@ -1,11 +1,39 @@ -from __future__ import annotations - import json +from typing import Any +from django.db.models import F, Func, Value, JSONField, TextField +from django.db.models.functions import Coalesce +from django.contrib.postgres.fields import ArrayField +from django.db.models.fields.json import KeyTransform from django.db.models import Field, Lookup from django.db.models.expressions import Func, Value +class DeductUsageValue(Func): + + function = 'jsonb_set' + usage_value = "COALESCE(%(expressions)s ->> '%(keyname)s', '0')::int" + template = ( + '%(function)s(%(expressions)s,' + '\'{"%(keyname)s"}\',' + '(' + f'CASE WHEN {usage_value} > %(amount)s ' + f'THEN {usage_value} - %(amount)s ' + 'ELSE 0 ' + 'END ' + ')::text::jsonb)' + ) + arity = 1 + + def __init__(self, expression: str, keyname: str, amount: int, **extra): + super().__init__( + expression, + keyname=keyname, + amount=amount, + **extra, + ) + + @Field.register_lookup class InArray(Lookup): @@ -39,27 +67,32 @@ def __init__(self, expression: str, keyname: str, increment: int, **extra): ) -class DeductUsageValue(Func): +class JSONBConcat(Func): + """ + Implements jsonb '||' operator (non-recursive merge). + """ + + arg_joiner = ' || ' + template = '%(expressions)s' + output_field = JSONField() + + +class JSONBSet(Func): + """ + Wraps jsonb_set(target, path text[], new_value, create_missing boolean). + """ function = 'jsonb_set' - usage_value = "COALESCE(%(expressions)s ->> '%(keyname)s', '0')::int" - template = ( - '%(function)s(%(expressions)s,' - '\'{"%(keyname)s"}\',' - '(' - f'CASE WHEN {usage_value} > %(amount)s ' - f'THEN {usage_value} - %(amount)s ' - 'ELSE 0 ' - 'END ' - ')::text::jsonb)' - ) - arity = 1 + output_field = JSONField() - def __init__(self, expression: str, keyname: str, amount: int, **extra): + def __init__(self, target, path, new_value, create_missing: bool = True, **extra): + if not isinstance(path, (list, tuple)): + raise TypeError('path must be a list/tuple of keys') super().__init__( - expression, - keyname=keyname, - amount=amount, + target, + Value(path, output_field=ArrayField(base_field=TextField())), + new_value, + Value(bool(create_missing)), **extra, ) @@ -102,7 +135,6 @@ def __init__( class RemoveJSONFieldAttribute(Func): - """ Remove attribute from models.JSONField. It supports nested attributes by targeting the attribute with its dotted path. @@ -129,28 +161,134 @@ def __init__( class UpdateJSONFieldAttributes(Func): """ - Updates several attributes at once of a models.JSONField without overwriting - the whole document. - Avoids race conditions when document is saved in two different transactions - at the same time. (i.e.: `Asset._deployment['status']`) - https://www.postgresql.org/docs/current/functions-json.html + Single-op JSONB updater using Django-style '__' paths and root merge. + + Usage: + # Root merge (non-destructive) + MyModel.objects.update( + data=UpdateJSONFieldAttributes('data', updates={'feature_flags': {'x': True}}) + ) - Notes from postgres docs: - > Does not operate recursively: only the top-level array or object - > structure is merged + # Nested merge (dict) + MyModel.objects.update( + data=UpdateJSONFieldAttributes( + 'data', + path='profile__address', + updates={'city': 'Toronto', 'postal_code': 'M5H'}, + ) + ) + + # Nested set (scalar/array) + MyModel.objects.update( + data=UpdateJSONFieldAttributes('data', path='flags__legacy', updates=False) + ) """ - arg_joiner = ' || ' + output_field = JSONField() template = '%(expressions)s' - arity = 2 + arg_joiner = ', ' def __init__( self, expression: str, - updates: dict, + updates: Any, + path: str | None = None, **extra, ): - super().__init__( - expression, - Value(json.dumps(updates)), - **extra, + expr = F(expression) + + if path is None: + if not isinstance(updates, dict): + raise TypeError( + 'When path=None, "updates" must be a dict for a root merge.' + ) + + # Merge each top-level key independently (preserve existing siblings). + for key, val in updates.items(): + if isinstance(val, dict): + expr = _merge_obj_at(expr, [key], val) + else: + # Ensure parent exists (root key), then set scalar/array. + expr = _ensure_parents(expr, []) # no-op for root + expr = JSONBSet( + expr, + [key], + Value(val, output_field=JSONField()), + create_missing=True, + ) + else: + path_list = _split_path_dunder(path) + parent_path = path_list[:-1] + if isinstance(updates, dict): + # Make sure parents exist, then deep-merge at the object path. + expr = _ensure_parents(expr, parent_path) + expr = _merge_obj_at(expr, path_list, updates) + else: + # Make sure parents exist, then set scalar/array at leaf. + expr = _ensure_parents(expr, parent_path) + expr = JSONBSet( + expr, + path_list, + Value(updates, output_field=JSONField()), + create_missing=True, + ) + + super().__init__(expr, **extra) + + +def _ensure_parents(expr, parents: list[str]): + """ + Ensure that each prefix in 'parents' exists and is an object. + For each prefix, set it to itself if present, otherwise to {}. + """ + + for i in range(len(parents)): + prefix = parents[: i + 1] + existing = Coalesce( + _json_key(prefix, expr), + Value({}, output_field=JSONField()), + output_field=JSONField(), ) + expr = JSONBSet(expr, prefix, existing, create_missing=True) + return expr + + +def _json_key(path_list: list[str], base_expr): + """ + Build a nested KeyTransform chain to access JSON keys. + Returns base_expr when path_list is empty. + """ + + expr = base_expr + for key in path_list: + expr = KeyTransform(key, expr) + return expr + + +def _merge_obj_at(expr, path_list: list[str], patch: dict): + """ + Deep-merge a dict 'patch' into an object at 'path_list'. + If object at the path doesn't exist, coalesce it to {} before merging. + """ + + existing = Coalesce( + _json_key(path_list, expr), + Value({}, output_field=JSONField()), + output_field=JSONField(), + ) + new_value = JSONBConcat(existing, Value(patch, output_field=JSONField())) + return JSONBSet(expr, path_list, new_value, create_missing=True) + + +def _split_path_dunder(path: str) -> list[str]: + """ + Convert a Django-style '__' path into a list of keys. + Example: 'content__audio__foo' -> ['content', 'audio', 'foo']. + """ + + s = (path or '').strip() + if not s: + raise ValueError('path must be a non-empty string when provided') + parts = s.split('__') + if any(p == '' for p in parts): + raise ValueError('invalid path: consecutive or trailing "__"') + return parts From b7134edf07c8b7a537a5c6bcf26728b8106474ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 26 Sep 2025 16:03:19 -0400 Subject: [PATCH 106/138] Refactor dependencies system --- .../actions/automated_google_translation.py | 142 ++++++------ kobo/apps/subsequences/actions/base.py | 207 ++++++++++-------- kobo/apps/subsequences/models.py | 14 +- kobo/apps/subsequences/tasks.py | 2 - 4 files changed, 193 insertions(+), 172 deletions(-) diff --git a/kobo/apps/subsequences/actions/automated_google_translation.py b/kobo/apps/subsequences/actions/automated_google_translation.py index 5428e912bf..a4ec504190 100644 --- a/kobo/apps/subsequences/actions/automated_google_translation.py +++ b/kobo/apps/subsequences/actions/automated_google_translation.py @@ -19,9 +19,86 @@ class AutomatedGoogleTranslationAction( allow_multiple=True, automated=True, action_data_key='language' ) + def attach_action_dependency(self, action_data: dict): + """ + Attach the latest accepted transcript as a dependency for a translation action. + + Looks up prior transcription actions in `self._action_dependencies` and + selects the most recent accepted version. + The chosen transcript is injected into `action_data['dependency']` with: + - 'value': transcript text + - 'language': preferred locale if present, else base language + - '_uuid': transcript UUID + + If none is found, raises `TranscriptionNotFound`. + """ + + latest_version = latest_accepted_dt = None + + for action_id, action_supplemental_data in self._action_dependencies.items(): + + versions = action_supplemental_data.get(self.VERSION_FIELD) or [] + for version in versions: + # Skip versions without an acceptance timestamp. + accepted_raw = version.get(self.DATE_ACCEPTED_FIELD) + if not accepted_raw: + continue + + accepted_dt = parser.parse(accepted_raw) + + if latest_accepted_dt is None or accepted_dt > latest_accepted_dt: + latest_accepted_dt = accepted_dt + latest_version = version + latest_version[self.ACTION_ID_FIELD] = action_id + + if latest_version is None: + raise TranscriptionNotFound + + # Prefer a specific locale when available; otherwise use the base language. + language_or_locale = ( + latest_version.get('locale') or latest_version['language'] + ) + + # Inject dependency property for translation service + action_data[self.DEPENDENCY_FIELD] = { + 'value': latest_version['value'], + 'language': language_or_locale, + self.UUID_FIELD: latest_version[self.UUID_FIELD], + self.ACTION_ID_FIELD: latest_version.pop(self.ACTION_ID_FIELD), + } + + return action_data + + def get_nlp_service_class(self) -> NLPExternalServiceClass: return GoogleTranslationService + def get_action_dependencies(self, question_supplemental_data: dict) -> dict: + """ + Return only the supplemental data required by this action. + + This method inspects the full `question_supplemental_data` payload + and extracts a subset containing only the actions on which the + current action relies (e.g., transcription results needed before a + translation). It never mutates the original dictionary and does not + include unrelated entries—only the minimal keys and values needed + for this action to run correctly. + """ + + transcription_action_ids = ( + AutomatedGoogleTranscriptionAction.ID, + ManualTranscriptionAction.ID, + ) + + for action_id in transcription_action_ids: + + action_supplemental_data = question_supplemental_data.get(action_id) + if not action_supplemental_data: + continue + self._action_dependencies[action_id] = action_supplemental_data + + return self._action_dependencies + @property def result_schema(self): """ @@ -178,71 +255,6 @@ def result_schema(self): return schema - def _get_action_data_dependency( - self, question_supplemental_data: dict, action_data: dict - ) -> dict: - """ - Attach the latest accepted transcript as a dependency for a translation action. - - Looks up prior transcription actions in `question_supplemental_data` and - selects the most recent accepted version. - The chosen transcript is injected into `action_data['dependency']` with: - - 'value': transcript text - - 'language': preferred locale if present, else base language - - '_uuid': transcript UUID - - The search is restricted to known transcription action IDs (e.g., Google - automated and manual transcription). If none is found, raises - `TranscriptionNotFound`. - """ - - # Action IDs that can provide a transcript dependency. - transcription_action_ids = ( - AutomatedGoogleTranscriptionAction.ID, - ManualTranscriptionAction.ID, - ) - - latest_version = None - latest_accepted_dt = None - - for action_id in transcription_action_ids: - # Each action's data is expected to store versions under "_versions". - action_supplemental_data = question_supplemental_data.get(action_id) - if not action_supplemental_data: - continue - - versions = action_supplemental_data.get(self.VERSION_FIELD) or [] - for version in versions: - # Skip versions without an acceptance timestamp. - accepted_raw = version.get(self.DATE_ACCEPTED_FIELD) - if not accepted_raw: - continue - - accepted_dt = parser.parse(accepted_raw) - - if latest_accepted_dt is None or accepted_dt > latest_accepted_dt: - latest_accepted_dt = accepted_dt - latest_version = version - latest_version[self.ACTION_ID_FIELD] = action_id - - if latest_version is None: - raise TranscriptionNotFound - - # Prefer a specific locale when available; otherwise use the base language. - language_or_locale = ( - latest_version.get('locale') or latest_version['language'] - ) - - # Inject dependency property for translation service - action_data[self.DEPENDENCY_FIELD] = { - 'value': latest_version['value'], - 'language': language_or_locale, - self.UUID_FIELD: latest_version[self.UUID_FIELD], - self.ACTION_ID_FIELD: latest_version.pop(self.ACTION_ID_FIELD), - } - - return action_data - @property def _limit_identifier(self): return UsageType.MT_CHARACTERS diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 75158a4ccf..570e3a3501 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -1,6 +1,7 @@ import uuid from copy import deepcopy from dataclasses import dataclass +from typing import Optional import jsonschema from django.conf import settings @@ -171,9 +172,19 @@ class BaseAction: action_class_config: ActionClassConfig | None = None - def __init__(self, source_question_xpath, params): + def __init__( + self, + source_question_xpath: str, + params: list[dict], + asset: Optional['kpi.models.Asset'] = None, + ): self.source_question_xpath = source_question_xpath self.params = params + self.asset = asset + self._action_dependencies = {} + + def attach_action_dependency(self, action_data: dict): + pass def check_limits(self, user: User): @@ -198,6 +209,97 @@ def data_schema(self): """ raise NotImplementedError + def get_action_dependencies(self, question_supplemental_data: dict) -> dict | None: + """ + Return a mapping of supplemental data required by this action, or None. + + This method allows an action to declare which parts of + `question_supplemental_data` (or other context data) it depends on + in order to run correctly. By default it returns None, meaning the + action has no external dependencies. Subclasses can override this + to return a dictionary of prerequisite data—typically other actions’ + results or metadata—that must be present before executing this + action. + """ + + return None + + def get_localized_action_supplemental_data( + self, action_supplemental_data: dict, action_data: dict + ) -> tuple[dict, str | None]: + + localized_action_supplemental_data = deepcopy(action_supplemental_data) + needle = None + + if self.action_class_config.allow_multiple: + # TODO: Multiple keys are not supported. + # Not a big issue for now since translation actions don’t use locale + # (yet?) and transcription actions only involve one occurrence at a time. + needle = action_data[self.action_class_config.action_data_key] + localized_action_supplemental_data = action_supplemental_data.get(needle, {}) + + return localized_action_supplemental_data, needle + + def get_new_action_supplemental_data( + self, + action_supplemental_data: dict, + action_data: dict, + dependency_supplemental_data: dict, + accepted: bool | None = None, + ) -> dict: + now_str = utc_datetime_to_js_str(timezone.now()) + + localized_action_supplemental_data, needle = ( + self.get_localized_action_supplemental_data( + action_supplemental_data, action_data + ) + ) + + new_version = deepcopy(action_data) + new_version[self.DATE_CREATED_FIELD] = now_str + new_version[self.UUID_FIELD] = str(uuid.uuid4()) + if dependency_supplemental_data: + new_version[self.DEPENDENCY_FIELD] = dependency_supplemental_data + + if self.DATE_CREATED_FIELD not in localized_action_supplemental_data: + localized_action_supplemental_data[self.DATE_CREATED_FIELD] = now_str + localized_action_supplemental_data[self.DATE_MODIFIED_FIELD] = now_str + + localized_action_supplemental_data.setdefault( + self.VERSION_FIELD, [] + ).insert(0, new_version) + + # For manual actions, always mark as accepted. + # For automated actions, revert the just-created revision (remove it and + # reapply its dates) to avoid adding extra branching earlier in the method. + if self.action_class_config.automated: + if accepted is not None: + # Remove stale version + localized_action_supplemental_data[self.VERSION_FIELD].pop(0) + if accepted: + localized_action_supplemental_data[self.VERSION_FIELD][0][ + self.DATE_ACCEPTED_FIELD + ] = now_str + else: + localized_action_supplemental_data[self.VERSION_FIELD][ + 0 + ].pop(self.DATE_ACCEPTED_FIELD, None) + + else: + new_version[self.DATE_ACCEPTED_FIELD] = now_str + + if not self.action_class_config.allow_multiple: + new_action_supplemental_data = localized_action_supplemental_data + else: + new_action_supplemental_data = deepcopy(action_supplemental_data) + new_action_supplemental_data.update({ + needle: localized_action_supplemental_data + }) + + self.validate_result(new_action_supplemental_data) + + return new_action_supplemental_data + def get_output_fields(self) -> list[dict]: """ Returns a list of fields contributed by this action to outputted @@ -257,10 +359,9 @@ def revise_field(self, *args, **kwargs): def revise_data( self, submission: dict, - question_supplemental_data: dict, action_supplemental_data: dict, action_data: dict, - asset: 'kpi.models.Asset' = None, + action_dependencies: dict | None = None, ) -> dict | None: """ `submission` argument for future use by subclasses @@ -288,10 +389,9 @@ def revise_data( if not ( service_response := self.run_automated_process( submission, - question_supplemental_data, current_version, action_data, - asset=asset, + action_dependencies, ) ): # If the service response is None, the automated task is still running. @@ -315,82 +415,6 @@ def revise_data( accepted, ) - def get_localized_action_supplemental_data( - self, action_supplemental_data: dict, action_data: dict - ) -> tuple[dict, str | None]: - - localized_action_supplemental_data = deepcopy(action_supplemental_data) - needle = None - - if self.action_class_config.allow_multiple: - # TODO: Multiple keys are not supported. - # Not a big issue for now since translation actions don’t use locale - # (yet?) and transcription actions only involve one occurrence at a time. - needle = action_data[self.action_class_config.action_data_key] - localized_action_supplemental_data = action_supplemental_data.get(needle, {}) - - return localized_action_supplemental_data, needle - - def get_new_action_supplemental_data( - self, - action_supplemental_data: dict, - action_data: dict, - dependency_supplemental_data: dict, - accepted: bool | None = None, - ) -> dict: - now_str = utc_datetime_to_js_str(timezone.now()) - - localized_action_supplemental_data, needle = ( - self.get_localized_action_supplemental_data( - action_supplemental_data, action_data - ) - ) - - new_version = deepcopy(action_data) - new_version[self.DATE_CREATED_FIELD] = now_str - new_version[self.UUID_FIELD] = str(uuid.uuid4()) - if dependency_supplemental_data: - new_version[self.DEPENDENCY_FIELD] = dependency_supplemental_data - - if self.DATE_CREATED_FIELD not in localized_action_supplemental_data: - localized_action_supplemental_data[self.DATE_CREATED_FIELD] = now_str - localized_action_supplemental_data[self.DATE_MODIFIED_FIELD] = now_str - - localized_action_supplemental_data.setdefault( - self.VERSION_FIELD, [] - ).insert(0, new_version) - - # For manual actions, always mark as accepted. - # For automated actions, revert the just-created revision (remove it and - # reapply its dates) to avoid adding extra branching earlier in the method. - if self.action_class_config.automated: - if accepted is not None: - # Remove stale version - localized_action_supplemental_data[self.VERSION_FIELD].pop(0) - if accepted: - localized_action_supplemental_data[self.VERSION_FIELD][0][ - self.DATE_ACCEPTED_FIELD - ] = now_str - else: - localized_action_supplemental_data[self.VERSION_FIELD][ - 0 - ].pop(self.DATE_ACCEPTED_FIELD, None) - - else: - new_version[self.DATE_ACCEPTED_FIELD] = now_str - - if not self.action_class_config.allow_multiple: - new_action_supplemental_data = localized_action_supplemental_data - else: - new_action_supplemental_data = deepcopy(action_supplemental_data) - new_action_supplemental_data.update({ - needle: localized_action_supplemental_data - }) - - self.validate_result(new_action_supplemental_data) - - return new_action_supplemental_data - @staticmethod def raise_for_any_leading_underscore_key(d: dict): """ @@ -413,9 +437,9 @@ def raise_for_any_leading_underscore_key(d: dict): def run_automated_process( self, submission: dict, - question_supplemental_data: dict, action_supplemental_data: dict, action_data: dict, + action_dependencies: dict | None = None, *args, **kwargs, ) -> dict | bool: @@ -723,9 +747,9 @@ def get_nlp_service_class(self) -> NLPExternalServiceClass: def run_automated_process( self, submission: dict, - question_supplemental_data: dict, action_supplemental_data: dict, action_data: dict, + action_dependencies: dict | None = None, *args, **kwargs, ) -> dict | None: @@ -750,8 +774,6 @@ def run_automated_process( returned and passed back to `revise_data()`. """ - print('ACTION DATA', action_data, flush=True) - # If the client sent "accepted" while the supplement is already complete, # return the completed translation/transcription right away. `revise_data()` # will handle the merge and final validation of this acceptance. @@ -772,15 +794,11 @@ def run_automated_process( 'status': 'deleted', } - if hasattr(self, '_get_action_data_dependency'): - action_data = self._get_action_data_dependency( - question_supplemental_data, action_data - ) + self.attach_action_dependency(action_data) # Otherwise, trigger the external service. - asset = kwargs['asset'] NLPService = self.get_nlp_service_class() # noqa - service = NLPService(submission, asset=asset) + service = NLPService(submission, asset=self.asset) service_data = service.process_data(self.source_question_xpath, action_data) # Sanitize 'dependency' before persisting: keep only stable identifiers and drop @@ -791,29 +809,28 @@ def run_automated_process( self.UUID_FIELD: dependency[self.UUID_FIELD], } - # If the request is still running, stop processing here. # Returning None ensures that `revise_data()` will not be called afterwards. if ( accepted is None and service_data['status'] == 'in_progress' ): - print('HERE', service_data, flush=True) if action_supplemental_data.get('status') == 'in_progress': return None else: # Make Celery update in the background. # Since Celery is calling the same code, we want to ensure + # it does not recall itself. if not celery_app.current_worker_task: poll_run_automated_process.apply_async( kwargs={ 'submission': submission, 'action_data': action_data, 'action_id': self.ID, - 'asset_id': asset.pk, + 'asset_id': self.asset.pk, 'question_xpath': self.source_question_xpath, }, - countdown=10, + countdown=10, # Give it a small delay before retrying ) # Normal case: return the processed transcription data. diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index 43a39d1a08..b292538256 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -82,28 +82,22 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di except KeyError as e: raise InvalidAction from e - action = action_class(question_xpath, action_params) + action = action_class(question_xpath, action_params, asset) + # TODO REMOVE The comment below # action.check_limits(asset.owner) + question_supplemental_data = supplemental_data.setdefault( question_xpath, {} ) action_supplemental_data = question_supplemental_data.setdefault( action_id, {} ) - - # TODO: `action.revise_data()` may need `question_xpath` to retry when - # the action is automated and returns "in_progress" (see - # `tasks.py::poll_run_automated_progress()`). - # Also, `action_supplemental_data` seems redundant now that - # `question_supplemental_data` is passed; it could potentially be - # rebuilt inside `action.revise_data()`. + action.get_action_dependencies(question_supplemental_data) if not ( action_supplemental_data := action.revise_data( submission, - question_supplemental_data, action_supplemental_data, action_data, - asset=asset, ) ): # TODO is line below really needed? diff --git a/kobo/apps/subsequences/tasks.py b/kobo/apps/subsequences/tasks.py index eeded52482..fd922e4b42 100644 --- a/kobo/apps/subsequences/tasks.py +++ b/kobo/apps/subsequences/tasks.py @@ -89,9 +89,7 @@ def poll_run_automated_process_failure(sender=None, **kwargs): action_supplemental_data, action_data, dependency_supplemental_data ) - SubmissionSupplement.objects.filter() submission_uuid = remove_uuid_prefix(submission[SUBMISSION_UUID_FIELD]) - SubmissionSupplement.objects.filter( asset=asset, submission_uuid=submission_uuid ).update( From d70fc4047c33dac1b41d076da6b93c575e110025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Mon, 29 Sep 2025 15:16:45 -0400 Subject: [PATCH 107/138] Persist action dependency --- kobo/apps/subsequences/README.md | 12 +- .../actions/automated_google_transcription.py | 6 +- .../actions/automated_google_translation.py | 123 +----------- kobo/apps/subsequences/actions/base.py | 32 ++-- .../actions/manual_transcription.py | 4 +- .../actions/manual_translation.py | 4 +- kobo/apps/subsequences/actions/mixins.py | 156 ++++++++++++++- kobo/apps/subsequences/tests/constants.py | 23 ++- .../test_automated_google_transcription.py | 60 +++--- .../test_automated_google_translation.py | 180 ++++++------------ .../tests/test_manual_transcription.py | 51 ++--- .../tests/test_manual_translation.py | 109 +++++------ kobo/apps/subsequences/tests/test_models.py | 18 +- 13 files changed, 353 insertions(+), 425 deletions(-) diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences/README.md index b1b33b5941..280a90f9cf 100644 --- a/kobo/apps/subsequences/README.md +++ b/kobo/apps/subsequences/README.md @@ -55,10 +55,10 @@ class AutomatedGoogleTranscription class AutomatedGoogleTranslation %% ==== Mixins (provide result_schema) ==== -class TranscriptionResultSchemaMixin { +class TranscriptionActionMixin { +result_schema [property] } -class TranslationResultSchemaMixin { +class TranslationActionMixin { +result_schema [property] } @@ -73,10 +73,10 @@ BaseAutomatedNLPAction <|-- AutomatedGoogleTranscription BaseAutomatedNLPAction <|-- AutomatedGoogleTranslation %% ==== Mixins -> Concretes ==== -TranscriptionResultSchemaMixin <.. ManualTranscription : mixin -TranscriptionResultSchemaMixin <.. AutomatedGoogleTranscription : mixin -TranslationResultSchemaMixin <.. ManualTranslation : mixin -TranslationResultSchemaMixin <.. AutomatedGoogleTranslation : mixin +TranscriptionActionMixin <.. ManualTranscription : mixin +TranscriptionActionMixin <.. AutomatedGoogleTranscription : mixin +TranslationActionMixin <.. ManualTranslation : mixin +TranslationActionMixin <.. AutomatedGoogleTranslation : mixin ``` --- diff --git a/kobo/apps/subsequences/actions/automated_google_transcription.py b/kobo/apps/subsequences/actions/automated_google_transcription.py index 8ebb5c1bd9..59ec6fcfce 100644 --- a/kobo/apps/subsequences/actions/automated_google_transcription.py +++ b/kobo/apps/subsequences/actions/automated_google_transcription.py @@ -2,11 +2,11 @@ from ..integrations.google.google_transcribe import GoogleTranscriptionService from ..type_aliases import NLPExternalServiceClass from .base import ActionClassConfig, BaseAutomatedNLPAction -from .mixins import TranscriptionResultSchemaMixin +from .mixins import TranscriptionActionMixin class AutomatedGoogleTranscriptionAction( - TranscriptionResultSchemaMixin, BaseAutomatedNLPAction + TranscriptionActionMixin, BaseAutomatedNLPAction ): ID = 'automated_google_transcription' @@ -115,7 +115,7 @@ def result_schema(self): schema['$defs']['action_status'] = { 'action_status': { 'type': 'string', - 'enum': ['in_progress', 'complete', 'error'], + 'enum': ['in_progress', 'complete', 'failed', 'deleted'], }, } return schema diff --git a/kobo/apps/subsequences/actions/automated_google_translation.py b/kobo/apps/subsequences/actions/automated_google_translation.py index a4ec504190..695446d4c2 100644 --- a/kobo/apps/subsequences/actions/automated_google_translation.py +++ b/kobo/apps/subsequences/actions/automated_google_translation.py @@ -1,17 +1,12 @@ -from dateutil import parser - from kobo.apps.organizations.constants import UsageType -from ..actions.automated_google_transcription import AutomatedGoogleTranscriptionAction -from ..actions.manual_transcription import ManualTranscriptionAction -from ..exceptions import TranscriptionNotFound from ..integrations.google.google_translate import GoogleTranslationService from ..type_aliases import NLPExternalServiceClass from .base import ActionClassConfig, BaseAutomatedNLPAction -from .mixins import TranslationResultSchemaMixin +from .mixins import TranslationActionMixin class AutomatedGoogleTranslationAction( - TranslationResultSchemaMixin, BaseAutomatedNLPAction + TranslationActionMixin, BaseAutomatedNLPAction ): ID = 'automated_google_translation' @@ -19,86 +14,9 @@ class AutomatedGoogleTranslationAction( allow_multiple=True, automated=True, action_data_key='language' ) - def attach_action_dependency(self, action_data: dict): - """ - Attach the latest accepted transcript as a dependency for a translation action. - - Looks up prior transcription actions in `self._action_dependencies` and - selects the most recent accepted version. - The chosen transcript is injected into `action_data['dependency']` with: - - 'value': transcript text - - 'language': preferred locale if present, else base language - - '_uuid': transcript UUID - - If none is found, raises `TranscriptionNotFound`. - """ - - latest_version = latest_accepted_dt = None - - for action_id, action_supplemental_data in self._action_dependencies.items(): - - versions = action_supplemental_data.get(self.VERSION_FIELD) or [] - for version in versions: - # Skip versions without an acceptance timestamp. - accepted_raw = version.get(self.DATE_ACCEPTED_FIELD) - if not accepted_raw: - continue - - accepted_dt = parser.parse(accepted_raw) - - if latest_accepted_dt is None or accepted_dt > latest_accepted_dt: - latest_accepted_dt = accepted_dt - latest_version = version - latest_version[self.ACTION_ID_FIELD] = action_id - - if latest_version is None: - raise TranscriptionNotFound - - # Prefer a specific locale when available; otherwise use the base language. - language_or_locale = ( - latest_version.get('locale') or latest_version['language'] - ) - - # Inject dependency property for translation service - action_data[self.DEPENDENCY_FIELD] = { - 'value': latest_version['value'], - 'language': language_or_locale, - self.UUID_FIELD: latest_version[self.UUID_FIELD], - self.ACTION_ID_FIELD: latest_version.pop(self.ACTION_ID_FIELD), - } - - return action_data - - def get_nlp_service_class(self) -> NLPExternalServiceClass: return GoogleTranslationService - def get_action_dependencies(self, question_supplemental_data: dict) -> dict: - """ - Return only the supplemental data required by this action. - - This method inspects the full `question_supplemental_data` payload - and extracts a subset containing only the actions on which the - current action relies (e.g., transcription results needed before a - translation). It never mutates the original dictionary and does not - include unrelated entries—only the minimal keys and values needed - for this action to run correctly. - """ - - transcription_action_ids = ( - AutomatedGoogleTranscriptionAction.ID, - ManualTranscriptionAction.ID, - ) - - for action_id in transcription_action_ids: - - action_supplemental_data = question_supplemental_data.get(action_id) - if not action_supplemental_data: - continue - self._action_dependencies[action_id] = action_supplemental_data - - return self._action_dependencies - @property def result_schema(self): """ @@ -129,8 +47,8 @@ def result_schema(self): – allowed only when status == "complete". • _dependency - – required when status is "complete" or "in_progress". - – must be absent for any other status. + – must be absent when status is "deleted". + – required when this status is any other status. Examples -------- @@ -218,41 +136,10 @@ def result_schema(self): schema['$defs']['action_status'] = { 'action_status': { 'type': 'string', - 'enum': ['in_progress', 'complete', 'error'], + 'enum': ['in_progress', 'complete', 'failed', 'deleted'], }, } - # Make "_dependency" property required if status is not deleted - schema['$defs']['version']['properties'].update( - { - self.DEPENDENCY_FIELD: { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - self.UUID_FIELD: {'$ref': '#/$defs/uuid'}, - self.ACTION_ID_FIELD: {'type': 'string'}, - }, - 'required': [self.UUID_FIELD, self.ACTION_ID_FIELD], - }, - } - ) - schema['$defs']['version']['allOf'].append( - { - 'if': { - 'properties': { - 'status': {'enum': ['complete', 'in_progress']} - }, - 'required': ['status'] - }, - 'then': { - 'required': [self.DEPENDENCY_FIELD] - }, - 'else': { - 'not': {'required': [self.DEPENDENCY_FIELD]} - } - } - ) - return schema @property diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 570e3a3501..7c549a9918 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -361,7 +361,6 @@ def revise_data( submission: dict, action_supplemental_data: dict, action_data: dict, - action_dependencies: dict | None = None, ) -> dict | None: """ `submission` argument for future use by subclasses @@ -384,6 +383,8 @@ def revise_data( except IndexError: current_version = {} + self.attach_action_dependency(action_data) + if self.action_class_config.automated: # If the action is automated, run the external process first. if not ( @@ -391,7 +392,6 @@ def revise_data( submission, current_version, action_data, - action_dependencies, ) ): # If the service response is None, the automated task is still running. @@ -405,9 +405,17 @@ def revise_data( self.validate_automated_data(action_data) accepted = action_data.pop('accepted', None) else: - dependency_supplemental_data = None + dependency_supplemental_data = action_data.pop(self.DEPENDENCY_FIELD, None) accepted = True + if dependency_supplemental_data: + # Sanitize 'dependency' before persisting: keep only stable identifiers and + # drop all other fields (e.g., 'value', 'language', timestamps). + dependency_supplemental_data = { + self.ACTION_ID_FIELD: dependency_supplemental_data[self.ACTION_ID_FIELD], + self.UUID_FIELD: dependency_supplemental_data[self.UUID_FIELD], + } + return self.get_new_action_supplemental_data( action_supplemental_data, action_data, @@ -439,7 +447,6 @@ def run_automated_process( submission: dict, action_supplemental_data: dict, action_data: dict, - action_dependencies: dict | None = None, *args, **kwargs, ) -> dict | bool: @@ -749,7 +756,6 @@ def run_automated_process( submission: dict, action_supplemental_data: dict, action_data: dict, - action_dependencies: dict | None = None, *args, **kwargs, ) -> dict | None: @@ -794,21 +800,11 @@ def run_automated_process( 'status': 'deleted', } - self.attach_action_dependency(action_data) - # Otherwise, trigger the external service. NLPService = self.get_nlp_service_class() # noqa service = NLPService(submission, asset=self.asset) service_data = service.process_data(self.source_question_xpath, action_data) - # Sanitize 'dependency' before persisting: keep only stable identifiers and drop - # all other fields (e.g., 'value', 'language', timestamps). - if dependency := action_data.pop(self.DEPENDENCY_FIELD, None): - action_data[self.DEPENDENCY_FIELD] = { - self.ACTION_ID_FIELD: dependency[self.ACTION_ID_FIELD], - self.UUID_FIELD: dependency[self.UUID_FIELD], - } - # If the request is still running, stop processing here. # Returning None ensures that `revise_data()` will not be called afterwards. if ( @@ -822,10 +818,14 @@ def run_automated_process( # Since Celery is calling the same code, we want to ensure # it does not recall itself. if not celery_app.current_worker_task: + + celery_action_data = deepcopy(action_data) + celery_action_data.pop(self.DEPENDENCY_FIELD, None) + poll_run_automated_process.apply_async( kwargs={ 'submission': submission, - 'action_data': action_data, + 'action_data': celery_action_data, 'action_id': self.ID, 'asset_id': self.asset.pk, 'question_xpath': self.source_question_xpath, diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 4e85d13ef8..6ef65ea8eb 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -1,10 +1,10 @@ from typing import Any from .base import ActionClassConfig, BaseManualNLPAction -from .mixins import TranscriptionResultSchemaMixin +from .mixins import TranscriptionActionMixin -class ManualTranscriptionAction(TranscriptionResultSchemaMixin, BaseManualNLPAction): +class ManualTranscriptionAction(TranscriptionActionMixin, BaseManualNLPAction): ID = 'manual_transcription' action_class_config = ActionClassConfig(allow_multiple=False, automated=False) diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index 2f23c1a89c..1915a87f4c 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -1,10 +1,10 @@ from typing import Any from .base import ActionClassConfig, BaseManualNLPAction -from .mixins import TranslationResultSchemaMixin +from .mixins import TranslationActionMixin -class ManualTranslationAction(TranslationResultSchemaMixin, BaseManualNLPAction): +class ManualTranslationAction(TranslationActionMixin, BaseManualNLPAction): ID = 'manual_translation' action_class_config = ActionClassConfig( diff --git a/kobo/apps/subsequences/actions/mixins.py b/kobo/apps/subsequences/actions/mixins.py index 8b3a491d8c..5db88a4383 100644 --- a/kobo/apps/subsequences/actions/mixins.py +++ b/kobo/apps/subsequences/actions/mixins.py @@ -1,9 +1,14 @@ -class TranscriptionResultSchemaMixin: +from dateutil import parser + +from ..exceptions import TranscriptionNotFound + + +class TranscriptionActionMixin: """ - Provides the `result_schema` property used by all transcription-related actions. + Provides common methods and properties used by all transcription-related actions. - This mixin centralizes the schema definition so that both manual and automated - transcription classes can reuse the same structure consistently. + This mixin centralizes them so that both manual and automated transcription classes + can reuse the same structure consistently. """ @property @@ -54,14 +59,115 @@ def result_schema(self): return schema -class TranslationResultSchemaMixin: +class TranslationActionMixin: """ - Provides the `result_schema` property used by all translation-related actions. + Provides common methods and properties used by all translation-related actions. - This mixin centralizes the schema definition so that both manual and automated - translation classes can reuse the same structure consistently. + This mixin centralizes them so that both manual and automated translation classes + can reuse the same structure consistently. """ + def attach_action_dependency(self, action_data: dict): + """ + Attach the latest *accepted* transcript as a dependency for a translation + action. + + Selection logic: + - Scan `self._action_dependencies` for prior transcription actions. + - Consider only versions that have a non-empty `DATE_ACCEPTED_FIELD`. + - Pick the version with the most recent acceptance timestamp. + - Prefer a specific `locale` if present; otherwise fall back to `language`. + + Side effects: + - Mutates and returns `action_data` by setting `action_data[DEPENDENCY_FIELD]` + to a sanitized dependency payload. + + Deletion guard: + - If the caller explicitly wants to delete the translation, i.e.: + `action_data['value']` equals `None`, this is treated no dependency is + attached. + + Injected payload (sanitized): + - 'value' : transcript text + - 'language' : locale if present, else base language + - '_uuid' : transcript UUID + - '_action_id' : source transcription action ID + + Raises: + - TranscriptionNotFound: if no accepted transcript is available. + """ + + latest_version = latest_accepted_dt = latest_version_action_id = None + + # If deletion has been requested, we do not want to attach any dependency. + if 'value' in action_data and action_data['value'] is None: + return action_data + + for action_id, action_supplemental_data in self._action_dependencies.items(): + + versions = action_supplemental_data.get(self.VERSION_FIELD) or [] + for version in versions: + # Skip versions without an acceptance timestamp. + accepted_raw = version.get(self.DATE_ACCEPTED_FIELD) + if not accepted_raw: + continue + + accepted_dt = parser.parse(accepted_raw) + + if latest_accepted_dt is None or accepted_dt > latest_accepted_dt: + latest_accepted_dt = accepted_dt + latest_version = version + latest_version_action_id = action_id + + if latest_version is None: + raise TranscriptionNotFound + + # Prefer a specific locale when available; otherwise use the base language. + language_or_locale = ( + latest_version.get('locale') or latest_version['language'] + ) + + # Inject dependency property for translation service + action_data[self.DEPENDENCY_FIELD] = { + 'value': latest_version['value'], + 'language': language_or_locale, + self.UUID_FIELD: latest_version[self.UUID_FIELD], + self.ACTION_ID_FIELD: latest_version_action_id + } + + return action_data + + def get_action_dependencies(self, question_supplemental_data: dict) -> dict: + """ + Return only the supplemental data required by this action. + + This method inspects the full `question_supplemental_data` payload + and extracts a subset containing only the actions on which the + current action relies (e.g., transcription results needed before a + translation). It never mutates the original dictionary and does not + include unrelated entries—only the minimal keys and values needed + for this action to run correctly. + """ + + from ..actions.automated_google_transcription import ( + AutomatedGoogleTranscriptionAction + ) + from ..actions.manual_transcription import ManualTranscriptionAction + + transcription_action_ids = ( + AutomatedGoogleTranscriptionAction.ID, + ManualTranscriptionAction.ID, + ) + + for action_id in transcription_action_ids: + + action_supplemental_data = question_supplemental_data.get(action_id) + if not action_supplemental_data: + continue + self._action_dependencies[action_id] = action_supplemental_data + + return self._action_dependencies + @property def result_schema(self): localized_value_schema = { @@ -103,8 +209,40 @@ def result_schema(self): self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, self.UUID_FIELD: {'$ref': '#/$defs/uuid'}, + self.DEPENDENCY_FIELD: { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + self.UUID_FIELD: {'$ref': '#/$defs/uuid'}, + self.ACTION_ID_FIELD: {'type': 'string'}, + }, + 'required': [self.UUID_FIELD, self.ACTION_ID_FIELD], + }, }, - 'required': [self.DATE_CREATED_FIELD, self.UUID_FIELD], + 'required': [ + self.DATE_CREATED_FIELD, + self.UUID_FIELD, + ], + 'allOf': [ + # Add conditional rule: `_dependency` is required unless `value` + # is explicitly null. + { + 'if': { + # If `value` exists and is null… + 'properties': {'value': {'type': 'null'}}, + 'required': ['value'] + }, + # …then `_dependency` must be absent. + 'then': { + # Quand value est null → _dependency doit être absent + 'not': {'required': ['_dependency']} + }, + # Otherwise (value is absent or not null), `_dependency` is + # required. + 'else': { + 'required': ['_dependency'] + } + }] }, 'uuid': {'type': 'string', 'format': 'uuid'}, **data_schema_defs, diff --git a/kobo/apps/subsequences/tests/constants.py b/kobo/apps/subsequences/tests/constants.py index 1793c0f95a..a0208b885b 100644 --- a/kobo/apps/subsequences/tests/constants.py +++ b/kobo/apps/subsequences/tests/constants.py @@ -6,13 +6,28 @@ '_dateModified': '2024-04-08T15:27:00Z', '_versions': [ { - 'value': 'My audio has been transcribed', + 'value': 'My audio has been transcribed automatically', 'language': 'en', 'status': 'completed', - '_dateCreated': '2025-08-21T20:57:28.154567Z', - '_dateAccepted': '2025-08-21T20:57:28.154567Z', + '_dateCreated': '2024-04-08T15:27:00Z', + '_dateAccepted': '2024-04-08T15:29:00Z', '_uuid': '4dcf9c9f-e503-4e5c-81f5-74250b295001', }, ] - } + }, + 'manual_transcription': { + '_dateCreated': '2024-04-08T15:28:00Z', + '_dateModified': '2024-04-08T15:28:00Z', + '_versions': [ + { + 'value': 'My audio has been transcribed manually', + 'language': 'en', + 'locale': 'en-CA', + 'status': 'completed', + '_dateCreated': '2024-04-08T15:28:00Z', + '_dateAccepted': '2024-04-08T15:28:00Z', + '_uuid': 'd69b9263-04fd-45b4-b011-2e166cfefd4a', + }, + ], + }, } diff --git a/kobo/apps/subsequences/tests/test_automated_google_transcription.py b/kobo/apps/subsequences/tests/test_automated_google_transcription.py index c583bf7cdd..3ef9f97671 100644 --- a/kobo/apps/subsequences/tests/test_automated_google_transcription.py +++ b/kobo/apps/subsequences/tests/test_automated_google_transcription.py @@ -22,7 +22,6 @@ def test_invalid_params_fail_validation(): def test_valid_user_data_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranscriptionAction(xpath, params) allowed_data = [ @@ -34,9 +33,9 @@ def test_valid_user_data_passes_validation(): {'language': 'fr', 'value': None}, # Delete transcript with locale {'language': 'fr', 'locale': 'fr-CA', 'value': None}, - # Accept translation + # Accept transcript {'language': 'fr', 'accepted': True}, - # Accept translation with locale + # Accept translat with locale {'language': 'fr', 'locale': 'fr-CA', 'accepted': True}, ] @@ -44,7 +43,7 @@ def test_valid_user_data_passes_validation(): action.validate_data(data) -def test_valid_automated_translation_data_passes_validation(): +def test_valid_automated_transcription_data_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] @@ -66,12 +65,12 @@ def test_valid_automated_translation_data_passes_validation(): {'language': 'es', 'status': 'in_progress'}, {'language': 'es', 'locale': 'fr-CA', 'status': 'in_progress'}, # Store error with status - {'language': 'es', 'status': 'failed', 'error': 'Translation failed'}, + {'language': 'es', 'status': 'failed', 'error': 'Transcription failed'}, { 'language': 'es', 'locale': 'fr-CA', 'status': 'failed', - 'error': 'Translation failed', + 'error': 'Transcription failed', }, ] @@ -89,13 +88,13 @@ def test_invalid_user_data_fails_validation(): {'language': 'en'}, # Empty data {}, - # Cannot push a translation + # Cannot push a transcription {'language': 'fr', 'value': 'Aucune idée'}, - # Cannot push a translation + # Cannot push a transcription {'language': 'fr', 'value': 'Aucune idée', 'status': 'complete'}, - # Cannot push a translation + # Cannot push a transcription {'language': 'fr', 'value': 'Aucune idée', 'status': 'in_progress'}, - # Cannot push a translation + # Cannot push a transcription {'language': 'fr', 'value': 'Aucune idée', 'status': 'failed'}, # Cannot push a status {'language': 'fr', 'status': 'in_progress'}, @@ -120,9 +119,9 @@ def test_invalid_automated_data_fails_validation(): {'language': 'es', 'value': 'Ni idea', 'status': 'in_progress'}, # Cannot pass an empty object {}, - # Cannot accept an empty translation + # Cannot accept an empty transcription {'language': 'es', 'accepted': True}, - # Cannot deny an empty translation + # Cannot deny an empty transcription {'language': 'es', 'accepted': False}, # Cannot pass value and accepted at the same time {'language': 'es', 'value': None, 'accepted': False}, @@ -135,7 +134,7 @@ def test_invalid_automated_data_fails_validation(): # Delete transcript with locale without status {'language': 'fr', 'locale': 'fr-CA', 'value': None}, # failed with no status - {'language': 'es', 'error': 'Translation failed'}, + {'language': 'es', 'error': 'Transcription failed'}, # failed with no error {'language': 'es', 'status': 'failed'}, ] @@ -156,7 +155,7 @@ def test_valid_result_passes_validation(): fourth = {'language': 'fr', 'accepted': True} fifth = {'language': 'fr', 'value': None} six = {'language': 'es', 'value': 'seis'} - mock_sup_det = {} + mock_sup_det = EMPTY_SUPPLEMENT mock_service = MagicMock() with patch( @@ -174,9 +173,7 @@ def test_valid_result_passes_validation(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) action.validate_result(mock_sup_det) @@ -192,7 +189,7 @@ def test_acceptance_does_not_produce_versions(): first = {'language': 'fr', 'value': 'un'} second = {'language': 'fr', 'accepted': True} third = {'language': 'fr', 'accepted': False} - mock_sup_det = {} + mock_sup_det = EMPTY_SUPPLEMENT mock_service = MagicMock() with patch( @@ -210,9 +207,7 @@ def test_acceptance_does_not_produce_versions(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) assert '_versions' in mock_sup_det if data.get('value') is None: is_date_accepted_present = ( @@ -235,7 +230,7 @@ def test_invalid_result_fails_validation(): fourth = {'language': 'fr', 'accepted': True} fifth = {'language': 'fr', 'value': None} six = {'language': 'es', 'value': 'seis'} - mock_sup_det = {} + mock_sup_det = EMPTY_SUPPLEMENT mock_service = MagicMock() with patch( @@ -253,9 +248,7 @@ def test_invalid_result_fails_validation(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) action.validate_result(mock_sup_det) @@ -281,12 +274,7 @@ def test_transcription_versions_are_retained_in_supplemental_details(): ): value = first.pop('value', None) mock_service.process_data.return_value = {'value': value, 'status': 'complete'} - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, - EMPTY_SUPPLEMENT, - {}, - first, - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) assert mock_sup_det['_versions'][0]['language'] == 'es' assert mock_sup_det['_versions'][0]['value'] == 'Ni idea' @@ -301,9 +289,7 @@ def test_transcription_versions_are_retained_in_supplemental_details(): ): value = second.pop('value', None) mock_service.process_data.return_value = {'value': value, 'status': 'complete'} - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) assert len(mock_sup_det['_versions']) == 2 @@ -333,7 +319,7 @@ def test_latest_version_is_first(): second = {'language': 'fr', 'value': 'deux'} third = {'language': 'fr', 'value': 'trois'} - mock_sup_det = {} + mock_sup_det = EMPTY_SUPPLEMENT mock_service = MagicMock() with patch( 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa @@ -345,9 +331,7 @@ def test_latest_version_is_first(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) assert mock_sup_det['_versions'][0]['value'] == 'trois' assert mock_sup_det['_versions'][1]['value'] == 'deux' diff --git a/kobo/apps/subsequences/tests/test_automated_google_translation.py b/kobo/apps/subsequences/tests/test_automated_google_translation.py index 54dcde2954..fa5cf96a99 100644 --- a/kobo/apps/subsequences/tests/test_automated_google_translation.py +++ b/kobo/apps/subsequences/tests/test_automated_google_translation.py @@ -1,3 +1,4 @@ +from copy import deepcopy from unittest.mock import MagicMock, patch import dateutil @@ -21,10 +22,7 @@ def test_invalid_params_fail_validation(): def test_valid_user_data_passes_validation(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'es'}] - - action = AutomatedGoogleTranslationAction(xpath, params) + action = _get_action() allowed_data = [ # Trivial case @@ -46,10 +44,7 @@ def test_valid_user_data_passes_validation(): def test_valid_automated_translation_data_passes_validation(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'es'}] - - action = AutomatedGoogleTranslationAction(xpath, params) + action = _get_action() allowed_data = [ # Trivial case @@ -81,9 +76,7 @@ def test_valid_automated_translation_data_passes_validation(): def test_invalid_user_data_fails_validation(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranslationAction(xpath, params) + action = _get_action() invalid_data = [ # Wrong language @@ -110,9 +103,7 @@ def test_invalid_user_data_fails_validation(): def test_invalid_automated_data_fails_validation(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranslationAction(xpath, params) + action = _get_action() invalid_data = [ # Wrong language @@ -147,9 +138,7 @@ def test_invalid_automated_data_fails_validation(): def test_valid_result_passes_validation(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranslationAction(xpath, params) + action = _get_action() first = {'language': 'fr', 'value': 'un'} second = {'language': 'es', 'value': 'dos'} @@ -157,7 +146,7 @@ def test_valid_result_passes_validation(): fourth = {'language': 'fr', 'accepted': True} fifth = {'language': 'fr', 'value': None} six = {'language': 'es', 'value': 'seis'} - mock_sup_det = {} + mock_sup_det = EMPTY_SUPPLEMENT mock_service = MagicMock() with patch( @@ -175,9 +164,7 @@ def test_valid_result_passes_validation(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, data - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) action.validate_result(mock_sup_det) @@ -188,14 +175,12 @@ def test_valid_result_passes_validation(): def test_acceptance_does_not_produce_versions(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranslationAction(xpath, params) + action = _get_action() first = {'language': 'fr', 'value': 'un'} second = {'language': 'fr', 'accepted': True} third = {'language': 'fr', 'accepted': False} - mock_sup_det = {} + mock_sup_det = EMPTY_SUPPLEMENT mock_service = MagicMock() with patch( @@ -214,19 +199,22 @@ def test_acceptance_does_not_produce_versions(): 'status': 'complete', } mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, data + EMPTY_SUBMISSION, mock_sup_det, data ) if data.get('value') is None: - is_date_accepted_present = mock_sup_det['fr']['_versions'][0].get('_dateAccepted') is None - assert is_date_accepted_present is not bool(data.get('accepted')) + is_date_accepted_present = ( + mock_sup_det['fr']['_versions'][0].get('_dateAccepted') + is None + ) + assert is_date_accepted_present is not bool( + data.get('accepted') + ) action.validate_result(mock_sup_det) def test_invalid_result_fails_validation(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranslationAction(xpath, params) + action = _get_action() first = {'language': 'fr', 'value': 'un'} second = {'language': 'es', 'value': 'dos'} @@ -234,7 +222,7 @@ def test_invalid_result_fails_validation(): fourth = {'language': 'fr', 'accepted': True} fifth = {'language': 'fr', 'value': None} six = {'language': 'es', 'value': 'seis'} - mock_sup_det = {} + mock_sup_det = EMPTY_SUPPLEMENT mock_service = MagicMock() with patch( @@ -253,9 +241,7 @@ def test_invalid_result_fails_validation(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, data - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) action.validate_result(mock_sup_det) @@ -268,9 +254,7 @@ def test_invalid_result_fails_validation(): def test_translation_versions_are_retained_in_supplemental_details(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranslationAction(xpath, params) + action = _get_action() first = {'language': 'es', 'value': 'Ni idea'} second = {'language': 'fr', 'value': 'Aucune idée'} @@ -284,12 +268,7 @@ def test_translation_versions_are_retained_in_supplemental_details(): ): value = first.pop('value', None) mock_service.process_data.return_value = {'value': value, 'status': 'complete'} - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, - QUESTION_SUPPLEMENT, - {}, - first, - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) assert mock_sup_det['es']['_versions'][0]['language'] == 'es' assert mock_sup_det['es']['_versions'][0]['value'] == 'Ni idea' @@ -302,9 +281,7 @@ def test_translation_versions_are_retained_in_supplemental_details(): ): value = second.pop('value', None) mock_service.process_data.return_value = {'value': value, 'status': 'complete'} - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, second - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) assert len(mock_sup_det.keys()) == 2 @@ -318,9 +295,7 @@ def test_translation_versions_are_retained_in_supplemental_details(): ): value = third.pop('value', None) mock_service.process_data.return_value = {'value': value, 'status': 'complete'} - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, third - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, third) assert len(mock_sup_det.keys()) == 2 @@ -342,15 +317,13 @@ def test_translation_versions_are_retained_in_supplemental_details(): def test_latest_version_is_first(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'en'}] - action = AutomatedGoogleTranslationAction(xpath, params) + action = _get_action() first = {'language': 'fr', 'value': 'un'} second = {'language': 'fr', 'value': 'deux'} third = {'language': 'fr', 'value': 'trois'} - mock_sup_det = {} + mock_sup_det = EMPTY_SUPPLEMENT mock_service = MagicMock() with patch( 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', @@ -363,21 +336,16 @@ def test_latest_version_is_first(): 'value': value, 'status': 'complete', } - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, QUESTION_SUPPLEMENT, mock_sup_det, data - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) assert mock_sup_det['fr']['_versions'][0]['value'] == 'trois' assert mock_sup_det['fr']['_versions'][1]['value'] == 'deux' assert mock_sup_det['fr']['_versions'][2]['value'] == 'un' + def test_cannot_revise_data_without_transcription(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'en'}] - action = AutomatedGoogleTranslationAction(xpath, params) + action = _get_action(fetch_action_dependencies=False) - first = {'language': 'fr', 'value': 'un'} - mock_sup_det = {} mock_service = MagicMock() with patch( 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa @@ -389,49 +357,33 @@ def test_cannot_revise_data_without_transcription(): } with pytest.raises(TranscriptionNotFound): - # question supplement data is empty - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, {'language': 'fr'} - ) + action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, {'language': 'fr'}) + def test_find_the_most_recent_accepted_transcription(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'en'}] - action = AutomatedGoogleTranslationAction(xpath, params) + action = _get_action() - question_supplement_data = { - 'automated_google_transcription': { - '_dateCreated': '2024-04-08T15:27:00Z', - '_dateModified': '2024-04-08T15:27:00Z', - '_versions': [ - { - 'value': 'My audio has been transcribed automatically', - 'language': 'en', - 'status': 'completed', - '_dateCreated': '2024-04-08T15:27:00Z', - '_dateAccepted': '2024-04-08T15:27:00Z', - '_uuid': '4dcf9c9f-e503-4e5c-81f5-74250b295001', - }, - ], - }, - 'manual_transcription': { - '_dateCreated': '2024-04-08T15:28:00Z', - '_dateModified': '2024-04-08T15:28:00Z', - '_versions': [ - { - 'value': 'My audio has been transcribed manually', - 'language': 'en', - 'locale': 'en-CA', - 'status': 'completed', - '_dateCreated': '2024-04-08T15:28:00Z', - '_dateAccepted': '2024-04-08T15:28:00Z', - '_uuid': 'd69b9263-04fd-45b4-b011-2e166cfefd4a', - }, - ], - }, + # Automated transcription is the most recent + action_data = {} + expected = { + '_dependency': { + 'value': 'My audio has been transcribed automatically', + 'language': 'en', + '_uuid': '4dcf9c9f-e503-4e5c-81f5-74250b295001', + '_actionId': 'automated_google_transcription', + } } + action_data = action.attach_action_dependency(action_data) + assert action_data == expected # Manual transcription is the most recent + question_supplement_data = deepcopy(QUESTION_SUPPLEMENT) + question_supplement_data['manual_transcription']['_versions'][0][ + '_dateAccepted' + ] = '2025-07-28T16:18:00Z' + action.get_action_dependencies(question_supplement_data) + + action_data = {} # not really relevant for this test expected = { '_dependency': { @@ -441,25 +393,15 @@ def test_find_the_most_recent_accepted_transcription(): '_actionId': 'manual_transcription', } } - action_data = action._get_action_data_dependency( - question_supplement_data, action_data - ) - assert action_data == expected - # Automated transcription is the most recent - action_data = {} - question_supplement_data['automated_google_transcription']['_versions'][0][ - '_dateAccepted' - ] = '2025-07-28T16:18:00Z' - expected = { - '_dependency': { - 'value': 'My audio has been transcribed automatically', - 'language': 'en', - '_uuid': '4dcf9c9f-e503-4e5c-81f5-74250b295001', - '_actionId': 'automated_google_transcription', - } - } - action_data = action._get_action_data_dependency( - question_supplement_data, action_data - ) + action_data = action.attach_action_dependency(action_data) assert action_data == expected + + +def _get_action(fetch_action_dependencies=True): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'es'}] + action = AutomatedGoogleTranslationAction(xpath, params) + if fetch_action_dependencies: + action.get_action_dependencies(QUESTION_SUPPLEMENT) + return action diff --git a/kobo/apps/subsequences/tests/test_manual_transcription.py b/kobo/apps/subsequences/tests/test_manual_transcription.py index fbf5b48cac..cd1919a543 100644 --- a/kobo/apps/subsequences/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences/tests/test_manual_transcription.py @@ -61,11 +61,9 @@ def test_valid_result_passes_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = {} + mock_sup_det = EMPTY_SUPPLEMENT for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) action.validate_result(mock_sup_det) @@ -79,11 +77,9 @@ def test_invalid_result_fails_validation(): third = {'language': 'fr', 'value': 'trois'} fourth = {'language': 'fr', 'value': None} fifth = {'language': 'en', 'value': 'fifth'} - mock_sup_det = {} + mock_sup_det = EMPTY_SUPPLEMENT for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) # erroneously add '_dateModified' onto a revision mock_sup_det['_versions'][0]['_dateModified'] = mock_sup_det['_versions'][0][ @@ -101,12 +97,7 @@ def test_transcript_versions_are_retained_in_supplemental_details(): first = {'language': 'en', 'value': 'No idea'} second = {'language': 'fr', 'value': 'Aucune idée'} - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, - EMPTY_SUPPLEMENT, - {}, - first, - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) assert mock_sup_det['_dateCreated'] == mock_sup_det['_dateModified'] assert len(mock_sup_det['_versions']) == 1 @@ -114,9 +105,7 @@ def test_transcript_versions_are_retained_in_supplemental_details(): assert mock_sup_det['_versions'][0]['value'] == 'No idea' first_time = mock_sup_det['_dateCreated'] - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) assert len(mock_sup_det['_versions']) == 2 # the version should have a creation timestamp equal to that of the first @@ -144,17 +133,10 @@ def test_setting_transcript_to_empty_string(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': ''} - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, - EMPTY_SUPPLEMENT, - {}, - first, - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) assert mock_sup_det['_versions'][0]['value'] == 'Aucune idée' - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) assert mock_sup_det['_versions'][0]['value'] == '' assert mock_sup_det['_versions'][1]['value'] == 'Aucune idée' @@ -167,17 +149,10 @@ def test_setting_transcript_to_none(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': None} - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, - EMPTY_SUPPLEMENT, - {}, - first, - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) assert mock_sup_det['_versions'][0]['value'] == 'Aucune idée' - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) assert mock_sup_det['_versions'][0]['value'] is None assert mock_sup_det['_versions'][1]['value'] == 'Aucune idée' @@ -191,11 +166,9 @@ def test_latest_revision_is_first(): second = {'language': 'fr', 'value': 'deux'} third = {'language': 'fr', 'value': 'trois'} - mock_sup_det = {} + mock_sup_det = EMPTY_SUPPLEMENT for data in first, second, third: - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) assert mock_sup_det['_versions'][0]['value'] == 'trois' assert mock_sup_det['_versions'][1]['value'] == 'deux' diff --git a/kobo/apps/subsequences/tests/test_manual_translation.py b/kobo/apps/subsequences/tests/test_manual_translation.py index 9a3e046983..91073260bb 100644 --- a/kobo/apps/subsequences/tests/test_manual_translation.py +++ b/kobo/apps/subsequences/tests/test_manual_translation.py @@ -2,8 +2,9 @@ import jsonschema import pytest +from ..exceptions import TranscriptionNotFound from ..actions.manual_translation import ManualTranslationAction -from .constants import EMPTY_SUBMISSION, EMPTY_SUPPLEMENT +from .constants import EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, QUESTION_SUPPLEMENT def test_valid_params_pass_validation(): @@ -18,9 +19,8 @@ def test_invalid_params_fail_validation(): def test_valid_translation_data_passes_validation(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'es'}] - action = ManualTranslationAction(xpath, params) + action = _get_action() + # Trivial case data = {'language': 'fr', 'value': 'Aucune idée'} action.validate_data(data) @@ -35,11 +35,9 @@ def test_valid_translation_data_passes_validation(): def test_invalid_translation_data_fails_validation(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'es'}] - action = ManualTranslationAction(xpath, params) + action = _get_action() - data = {'language': 'en', 'value': 'No idea'} + data = {'language': 'es', 'value': 'No idea'} with pytest.raises(jsonschema.exceptions.ValidationError): action.validate_data(data) @@ -49,9 +47,7 @@ def test_invalid_translation_data_fails_validation(): def test_valid_result_passes_validation(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'en'}] - action = ManualTranslationAction(xpath, params) + action = _get_action() first = {'language': 'fr', 'value': 'un'} second = {'language': 'en', 'value': 'two'} @@ -60,16 +56,12 @@ def test_valid_result_passes_validation(): fifth = {'language': 'en', 'value': 'fifth'} mock_sup_det = {} for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) action.validate_result(mock_sup_det) def test_invalid_result_fails_validation(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'en'}] - action = ManualTranslationAction(xpath, params) + action = _get_action() first = {'language': 'fr', 'value': 'un'} second = {'language': 'en', 'value': 'two'} @@ -78,9 +70,7 @@ def test_invalid_result_fails_validation(): fifth = {'language': 'en', 'value': 'fifth'} mock_sup_det = {} for data in first, second, third, fourth, fifth: - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) # erroneously add '_dateModified' onto a version first_version = mock_sup_det['en']['_versions'][0] @@ -91,19 +81,12 @@ def test_invalid_result_fails_validation(): def test_translation_versions_are_retained_in_supplemental_details(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'en'}] - action = ManualTranslationAction(xpath, params) + action = _get_action() first = {'language': 'en', 'value': 'No idea'} second = {'language': 'fr', 'value': 'Aucune idée'} third = {'language': 'en', 'value': 'No clue'} - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, - EMPTY_SUPPLEMENT, - {}, - first, - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) assert len(mock_sup_det.keys()) == 1 assert '_versions' in mock_sup_det['en'] @@ -113,18 +96,14 @@ def test_translation_versions_are_retained_in_supplemental_details(): first_time = mock_sup_det['en']['_dateCreated'] - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) assert len(mock_sup_det.keys()) == 2 assert '_versions' in mock_sup_det['fr'] assert mock_sup_det['fr']['_versions'][0]['language'] == 'fr' assert mock_sup_det['fr']['_versions'][0]['value'] == 'Aucune idée' assert mock_sup_det['fr']['_dateCreated'] == mock_sup_det['fr']['_dateModified'] - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, third - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, third) assert len(mock_sup_det.keys()) == 2 # the first version should have a creation timestamp equal to that of the first @@ -145,65 +124,63 @@ def test_translation_versions_are_retained_in_supplemental_details(): def test_setting_translation_to_empty_string(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'en'}] - action = ManualTranslationAction(xpath, params) + action = _get_action() first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': ''} - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, - EMPTY_SUPPLEMENT, - {}, - first, - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) assert mock_sup_det['fr']['_versions'][0]['value'] == 'Aucune idée' - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) assert mock_sup_det['fr']['_versions'][0]['value'] == '' assert mock_sup_det['fr']['_versions'][1]['value'] == 'Aucune idée' def test_setting_translation_to_none(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'en'}] - action = ManualTranslationAction(xpath, params) + action = _get_action() first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': None} - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, - EMPTY_SUPPLEMENT, - {}, - first, - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) assert mock_sup_det['fr']['_versions'][0]['value'] == 'Aucune idée' - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, second - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) assert mock_sup_det['fr']['_versions'][0]['value'] is None assert mock_sup_det['fr']['_versions'][1]['value'] == 'Aucune idée' def test_latest_version_is_first(): - xpath = 'group_name/question_name' # irrelevant for this test - params = [{'language': 'fr'}, {'language': 'en'}] - action = ManualTranslationAction(xpath, params) + action = _get_action() first = {'language': 'fr', 'value': 'un'} second = {'language': 'fr', 'value': 'deux'} third = {'language': 'fr', 'value': 'trois'} - mock_sup_det = {} + mock_sup_det = EMPTY_SUPPLEMENT for data in first, second, third: - mock_sup_det = action.revise_data( - EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, mock_sup_det, data - ) + mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) assert mock_sup_det['fr']['_versions'][0]['value'] == 'trois' assert mock_sup_det['fr']['_versions'][1]['value'] == 'deux' assert mock_sup_det['fr']['_versions'][2]['value'] == 'un' + + +def test_cannot_revise_data_without_transcription(): + action = _get_action(fetch_action_dependencies=False) + + with pytest.raises(TranscriptionNotFound): + action.revise_data( + EMPTY_SUBMISSION, + EMPTY_SUPPLEMENT, + {'language': 'fr', 'value': 'un'}, + ) + + +def _get_action(fetch_action_dependencies=True): + xpath = 'group_name/question_name' # irrelevant for this test + params = [{'language': 'fr'}, {'language': 'en'}] + action = ManualTranslationAction(xpath, params) + if fetch_action_dependencies: + action.get_action_dependencies(QUESTION_SUPPLEMENT) + return action diff --git a/kobo/apps/subsequences/tests/test_models.py b/kobo/apps/subsequences/tests/test_models.py index 8a3b0b67e5..6e49e1c530 100644 --- a/kobo/apps/subsequences/tests/test_models.py +++ b/kobo/apps/subsequences/tests/test_models.py @@ -40,7 +40,7 @@ class SubmissionSupplementTestCase(TestCase): '_versions': [ { 'language': 'ar', - 'value': 'فارغ', + 'value': 'مجنون', '_dateCreated': '2024-04-08T15:31:00Z', '_dateAccepted': '2024-04-08T15:31:00Z', '_uuid': '51ff33a5-62d6-48ec-94b2-2dfb406e1dee', @@ -64,6 +64,10 @@ class SubmissionSupplementTestCase(TestCase): '_dateCreated': '2024-04-08T15:27:00Z', '_dateAccepted': '2024-04-08T15:27:00Z', '_uuid': '22b04ce8-61c2-4383-836f-5d5f0ad73645', + '_dependency': { + '_uuid': '123e4567-e89b-12d3-a456-426614174000', + '_actionId': 'manual_transcription' + } }], }, 'es': { @@ -76,6 +80,10 @@ class SubmissionSupplementTestCase(TestCase): '_dateCreated': '2024-04-08T15:32:00Z', '_dateAccepted': '2024-04-08T15:32:00Z', '_uuid': 'd69b9263-04fd-45b4-b011-2e166cfefd4a', + '_dependency': { + '_uuid': '51ff33a5-62d6-48ec-94b2-2dfb406e1dee', + '_actionId': 'manual_transcription' + } }, { 'language': 'es', @@ -83,6 +91,10 @@ class SubmissionSupplementTestCase(TestCase): '_dateCreated': '2024-04-08T15:29:00Z', '_dateAccepted': '2024-04-08T15:29:00Z', '_uuid': '30d0f39c-a1dd-43fe-999a-844f12f83d31', + '_dependency': { + '_uuid': '123e4567-e89b-12d3-a456-426614174000', + '_actionId': 'manual_transcription' + } } ], }, @@ -261,7 +273,7 @@ def test_revise_data(self): == 1 ) - # 3) Call with transcription ar = 'فارغ' + # 3) Call with transcription ar = 'مجنون' frozen_datetime_now = datetime(2024, 4, 8, 15, 31, 0, tzinfo=ZoneInfo('UTC')) with freeze_time(frozen_datetime_now): submission_supplement = SubmissionSupplement.revise_data( @@ -272,7 +284,7 @@ def test_revise_data(self): 'group_name/question_name': { 'manual_transcription': { 'language': 'ar', - 'value': 'فارغ', + 'value': 'مجنون', }, }, }, From 0f29748d60158f582b04651cb3ef8f4fc43a0263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Mon, 29 Sep 2025 16:26:03 -0400 Subject: [PATCH 108/138] Test Celery is triggered when task is in progress --- .../test_automated_google_translation.py | 26 ++++++++++++++++++- .../tests/test_manual_translation.py | 2 ++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/kobo/apps/subsequences/tests/test_automated_google_translation.py b/kobo/apps/subsequences/tests/test_automated_google_translation.py index fa5cf96a99..c83ff1305a 100644 --- a/kobo/apps/subsequences/tests/test_automated_google_translation.py +++ b/kobo/apps/subsequences/tests/test_automated_google_translation.py @@ -8,6 +8,7 @@ from ..actions.automated_google_translation import AutomatedGoogleTranslationAction from .constants import EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, QUESTION_SUPPLEMENT from ..exceptions import TranscriptionNotFound +from ..tasks import poll_run_automated_process def test_valid_params_pass_validation(): @@ -398,10 +399,33 @@ def test_find_the_most_recent_accepted_transcription(): assert action_data == expected +def test_action_is_updated_in_background_if_in_progress(): + action = _get_action() + mock_service = MagicMock() + submission = {'meta/rootUuid': '123-abdc'} + + with patch( + 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa + return_value=mock_service, + ): + mock_service.process_data.return_value = {'status': 'in_progress'} + with patch( + 'kobo.apps.subsequences.actions.base.poll_run_automated_process' + ) as task_mock: + action.revise_data( + submission, EMPTY_SUPPLEMENT, {'language': 'fr'} + ) + + task_mock.apply_async.assert_called_once() + + def _get_action(fetch_action_dependencies=True): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranslationAction(xpath, params) + mock_asset = MagicMock() + mock_asset.pk = 1 + mock_asset.owner.pk = 1 + action = AutomatedGoogleTranslationAction(xpath, params, asset=mock_asset) if fetch_action_dependencies: action.get_action_dependencies(QUESTION_SUPPLEMENT) return action diff --git a/kobo/apps/subsequences/tests/test_manual_translation.py b/kobo/apps/subsequences/tests/test_manual_translation.py index 91073260bb..f774e290d4 100644 --- a/kobo/apps/subsequences/tests/test_manual_translation.py +++ b/kobo/apps/subsequences/tests/test_manual_translation.py @@ -177,6 +177,8 @@ def test_cannot_revise_data_without_transcription(): ) + + def _get_action(fetch_action_dependencies=True): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] From 1beb46f6149b2952f830a023dec22e7a6e8fb958 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Mon, 29 Sep 2025 16:53:07 -0400 Subject: [PATCH 109/138] Fix dependency field on error --- kobo/apps/subsequences/tasks.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/kobo/apps/subsequences/tasks.py b/kobo/apps/subsequences/tasks.py index fd922e4b42..dbfcb3e2e3 100644 --- a/kobo/apps/subsequences/tasks.py +++ b/kobo/apps/subsequences/tasks.py @@ -64,7 +64,11 @@ def poll_run_automated_process_failure(sender=None, **kwargs): action_id = kwargs['kwargs']['action_id'] action_data = kwargs['kwargs']['action_data'] - asset = Asset.objects.only('pk', 'owner_id', 'advanced_features').get(id=asset_id) + asset = ( + Asset.objects.only('pk', 'owner_id', 'advanced_features') + .select_related('owner') + .get(id=asset_id) + ) supplemental_data = SubmissionSupplement.retrieve_data( asset, submission_root_uuid=submission[SUBMISSION_UUID_FIELD] @@ -76,14 +80,19 @@ def poll_run_automated_process_failure(sender=None, **kwargs): action_configs = asset.advanced_features['_actionConfigs'] action_configs_for_this_question = action_configs[question_xpath] action_params = action_configs_for_this_question[action_id] - action = action_class(question_xpath, action_params) + action = action_class(question_xpath, action_params, asset=asset) + action.get_action_dependencies(supplemental_data[question_xpath]) action_supplemental_data = supplemental_data[question_xpath][action_id] action_data.update({ 'error': error, - 'status': 'failed', # TODO maybe add dependency? + 'status': 'failed', }) - dependency_supplemental_data = {} + # FIXME We assume that the last action is the one in progress but it could + # be another one. + dependency_supplemental_data = action_supplemental_data['_versions'][0].get( + action.DEPENDENCY_FIELD + ) new_action_supplemental_data = action.get_new_action_supplemental_data( action_supplemental_data, action_data, dependency_supplemental_data From 4d6abfc4b6e0ef7080c41b15edb8e52a3e0e968a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Tue, 30 Sep 2025 09:16:35 -0400 Subject: [PATCH 110/138] Update README --- kobo/apps/subsequences/README.md | 76 ++++++++++++++++++++------ kobo/apps/subsequences/actions/base.py | 5 +- 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences/README.md index 280a90f9cf..7e73fc2a01 100644 --- a/kobo/apps/subsequences/README.md +++ b/kobo/apps/subsequences/README.md @@ -42,8 +42,10 @@ class BaseManualNLPAction { } class BaseAutomatedNLPAction { + +attach_action_dependency() [abstract] +automated_data_schema [property] +data_schema [property] + +get_action_dependencies() [abstract] +run_automated_process() +get_nlp_service_class() [abstract] } @@ -59,6 +61,8 @@ class TranscriptionActionMixin { +result_schema [property] } class TranslationActionMixin { + +attach_action_dependency() + +get_action_dependencies() +result_schema [property] } @@ -155,7 +159,12 @@ PATCH /api/v2/assets//data//supplement/ ### 2.3 Sequence Diagram (End-to-End Flow) -> This diagram illustrates the complete call flow from the client request to persistence. +This section explains how the system handles a supplement from the initial +client request, through validation and optional background retries. + +#### 2.3.1 Sequence Diagram – End-to-End + +> The diagram shows the synchronous request until the first response. ```mermaid sequenceDiagram @@ -165,6 +174,7 @@ participant API as KPI API participant SS as SubmissionSupplement participant Action as Action (Manual/Automated) participant Ext as NLP Service (if automated) +participant Celery as Celery Worker participant DB as Database Client->>API: POST /assets//data//supplement @@ -180,6 +190,9 @@ loop For each action in _actionConfigs Action->>Action: run_automated_process() Action->>Ext: Call external NLP service Ext-->>Action: Response (augmented payload) + alt status == "in_progress" + Action->>Celery: enqueue poll_automated_process task + end Action->>Action: Validate with automated_data_schema end @@ -194,34 +207,61 @@ API-->>Client: 200 OK (or error) --- -### 2.4 Flowchart (Logic inside `revise_data` per Action) +#### 2.3.2 Background Polling with Celery + +If run_automated_process receives a response like: + +```json +{"status": "in_progress"} +``` + + +a Celery task (e.g. poll_automated_process) is queued. +This task will periodically re-invoke the external service until the action’s +status becomes complete or a maximum retry limit is reached. +The task uses the same validation chain (automated_data_schema → result_schema) +before persisting the final revision. + +--- + +#### 2.3.3 Flowchart (Logic inside `revise_data` per Action) > This diagram shows the decision tree when validating and processing a single action payload. ```mermaid flowchart TB A[Incoming action payload] - B{Validate with data schema} + B[Attach action dependency] C{Is automated action?} - D[Build version] - G[Validate with result schema] - H[Save to DB] - I[Done] - F[Run automated process] - J[Validate with automated data schema] - E[Return 4xx error] + D[Add dependency supplemental data if any] + E[Build version] + F[Validate with result schema] + G[Save to DB] + H[Done] + I[Run automated process] + J[Sanitize dependency supplemental data] + K[Validate with automated data schema] + L[Enqueue Celery task poll_automated_process] + M[Return 4xx error] + N{Status in_progress?} A --> B - B -->|fail| E - B -->|ok| C + B --> C C -->|no| D - D --> G + D --> E + E --> F + F --> G G --> H - H --> I - C -->|yes| F - F --> J - J -->|fail| E - J -->|ok| D + + C -->|yes| I + I --> N + N -->|yes| L + N -->|no| J + J --> K + K -->|fail| M + K -->|ok| E + + B -->|invalid dependency| M ``` --- diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 7c549a9918..7e4f542dfd 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -409,8 +409,9 @@ def revise_data( accepted = True if dependency_supplemental_data: - # Sanitize 'dependency' before persisting: keep only stable identifiers and - # drop all other fields (e.g., 'value', 'language', timestamps). + # Sanitize 'dependency' before persisting: keep only a reference of the + # dependency (with '_actionId' and '_uuid') and drop all other fields + # (e.g., 'value', 'language', timestamps). dependency_supplemental_data = { self.ACTION_ID_FIELD: dependency_supplemental_data[self.ACTION_ID_FIELD], self.UUID_FIELD: dependency_supplemental_data[self.UUID_FIELD], From b42f23dd2ca65e0cf0e28d115402c2cfd263ba2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Tue, 30 Sep 2025 14:42:50 -0400 Subject: [PATCH 111/138] Update README --- kobo/apps/subsequences/README.md | 165 ++++++++++++++++++++++++++++--- 1 file changed, 153 insertions(+), 12 deletions(-) diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences/README.md index 7e73fc2a01..3bd95093a6 100644 --- a/kobo/apps/subsequences/README.md +++ b/kobo/apps/subsequences/README.md @@ -268,18 +268,159 @@ flowchart TB ## 3. Where Schemas Apply -- **`params_schema`** (class-level attribute, `BaseManualNLPAction`) - Defines the schema for the parameters used to instantiate the action. - These parameters are configured when the action is enabled on the **Asset** - and are stored under `Asset.advanced_features`. - > Example: `[ { "language": "en" }, { "language": "es" } ]` +Every action relies on a set of schemas to validate its lifecycle: +- **`params_schema`** – defines how the action is instantiated and configured on the Asset. +- **`data_schema`** – validates the client payload sent in supplements. +- **`automated_data_schema`** – extends `data_schema` for automated actions by adding status and system-generated fields. +- **`result_schema`** – validates the persisted revision format, including metadata and version history. -- **`data_schema`** (property) - Validates the **client payload** for a given action. - > Example: `{ "language": "en", "value": "My transcript" }` +--- + +### 3.1 `params_schema` + +Defined on all classes inheriting from `BaseAction`. +It describes the configuration stored in `Asset.advanced_features` when an action is enabled. + +**Example: enabling Manual Transcription in English and Spanish** + +```json +{ + "audio_question": { + "manual_transcription": [ + { "language": "en" }, + { "language": "es" } + ] + } +} +``` + +--- + +### 3.2 `data_schema` + +Validates the **client payload** sent for a supplement. +Each action has its own expected format: + +- **Manual Transcription** + ```json + { "language": "en", "value": "My transcript" } + ``` + +- **Manual Translation** + ```json + { "language": "en", "value": "My translation" } + ``` + +- **Automated Transcription / Automated Translation** + ```json + { "language": "en" } + ``` + +- **All actions – delete request** + ```json + { "language": "en", "value": null } + ``` + +--- + +### 3.3 `automated_data_schema` + +Used only for **automated actions** (`BaseAutomatedNLPAction`). +It validates the **augmented payload** returned by the external service. + +- **Example (complete)** + ```json + { "language": "en", "value": "My automated result", "status": "complete" } + ``` + +- **Example (in progress)** + ```json + { "language": "en", "status": "in_progress" } + ``` -- **`automated_data_schema`** (property, automated actions only) - Validates the **augmented payload** returned by the external service. +- **Example (deleted)** + ```json + { "language": "en", "status": "deleted", "value": null } + ``` + +- **Example (failed)** + ```json + { "language": "en", "status": "failed", "error": "Could not process action" } + ``` + +--- + +### 3.4 `result_schema` + +Validates the **revision JSON** persisted in the database. +The structure is the same for both manual and automated actions: + +- Metadata about the action itself (`_dateCreated`, `_dateModified`). +- A list of versions under `_versions`, each containing: + - The properties defined by `data_schema` (manual) or `automated_data_schema` (automated). + - Audit fields (`_dateCreated`, `_dateAccepted`, `_uuid`). + +**Generic Example** + +```json +{ + "_dateCreated": "2025-08-21T20:55:42Z", + "_dateModified": "2025-08-21T20:57:28Z", + "_versions": [ + { + "language": "en", + "value": "My automated result", + "status": "complete", + "_dateCreated": "2025-08-21T20:57:28Z", + "_dateAccepted": "2025-08-21T20:57:28Z", + "_uuid": "4dcf9c9f-e503-4e5c-81f5-74250b295001" + }, + { + "language": "en", + "value": "Previous revision", + "status": "complete", + "_dateCreated": "2025-08-21T20:55:42Z", + "_dateAccepted": "2025-08-21T20:55:42Z", + "_uuid": "850e6359-50e8-4252-9895-e9669a27b1ea" + } + ] +} +``` + +> For manual actions, the inner version objects correspond to `data_schema`. +> +> For automated actions, they correspond to `automated_data_schema`. + +--- + +### 3.5 `result_schema` with dependencies + +Some actions depend on the result of other actions. +For example, a **translation** action requires an existing **transcription**. +In this case, a `_dependency` property is added to the persisted JSON. + +**Example: Automated Translation result depending on an Automated Transcription** + +```json +{ + "_dateCreated": "2025-09-01T12:15:42Z", + "_dateModified": "2025-09-01T12:17:28Z", + "_versions": [ + { + "language": "fr", + "value": "Mon audio a été traduit automatiquement", + "status": "complete", + "_dateCreated": "2025-09-01T12:17:28Z", + "_uuid": "91ab5f30-0f73-4e2e-b91f-8ad2f67a4729", + "_dependency": { + "_uuid": "4dcf9c9f-e503-4e5c-81f5-74250b295001", + "_actionId": "automated_google_transcription" + } + } + ] +} +``` -- **`result_schema`** (property, via mixin) - Validates the **version JSON** that is persisted and returned. +- The `_dependency` object references the transcription result that the translation was built upon. +- It reuses the UUID and action ID from the transcription’s persisted result, ensuring referential integrity. +- This allows clients to trace back a translation to the exact transcription version it relied on. From f550303a9f3c9230c56360f1d1408a788ccccaf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 1 Oct 2025 09:40:50 -0400 Subject: [PATCH 112/138] WIP: migrate advanced_features and submission supplements --- kobo/apps/subsequences/schemas.py | 10 +++-- .../subsequences/utils/supplement_data.py | 9 +++-- kobo/apps/subsequences/utils/versioning.py | 39 +++++++++++++++++++ kpi/deployment_backends/openrosa_backend.py | 4 +- kpi/models/asset.py | 7 ++++ kpi/views/v2/data.py | 4 +- 6 files changed, 63 insertions(+), 10 deletions(-) diff --git a/kobo/apps/subsequences/schemas.py b/kobo/apps/subsequences/schemas.py index 60f5fd924f..333ff79c32 100644 --- a/kobo/apps/subsequences/schemas.py +++ b/kobo/apps/subsequences/schemas.py @@ -1,6 +1,8 @@ import jsonschema from .actions import ACTION_IDS_TO_CLASSES, ACTIONS +from .constants import SCHEMA_VERSIONS +from .utils.versioning import migrate_advanced_features # not the full complexity of XPath, but a slash-delimited path of valid XML tag # names to convey group hierarchy @@ -31,13 +33,13 @@ def validate_submission_supplement(asset: 'kpi.models.Asset', supplement: dict): def get_submission_supplement_schema(asset: 'kpi.models.Asset') -> dict: - if asset.advanced_features.get('_version') != '20250820': - # TODO: migrate from old per-asset schema - raise NotImplementedError + + if migrated_schema := migrate_advanced_features(asset.advanced_features): + asset.advanced_features = migrated_schema submission_supplement_schema = { 'additionalProperties': False, - 'properties': {'_version': {'const': '20250820'}}, + 'properties': {'_version': {'const': SCHEMA_VERSIONS[0]}}, 'type': 'object', } diff --git a/kobo/apps/subsequences/utils/supplement_data.py b/kobo/apps/subsequences/utils/supplement_data.py index 9a82fbead2..41e9c1d2fb 100644 --- a/kobo/apps/subsequences/utils/supplement_data.py +++ b/kobo/apps/subsequences/utils/supplement_data.py @@ -2,8 +2,10 @@ from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix from kobo.apps.subsequences.actions import ACTION_IDS_TO_CLASSES -from kobo.apps.subsequences.constants import SUBMISSION_UUID_FIELD, SUPPLEMENT_KEY +from kobo.apps.subsequences.constants import SUBMISSION_UUID_FIELD, SUPPLEMENT_KEY, \ + SCHEMA_VERSIONS from kobo.apps.subsequences.models import SubmissionSupplement +from kobo.apps.subsequences.utils.versioning import migrate_advanced_features def get_supplemental_output_fields(asset: 'kpi.models.Asset') -> list[dict]: @@ -37,9 +39,8 @@ def get_supplemental_output_fields(asset: 'kpi.models.Asset') -> list[dict]: """ advanced_features = asset.advanced_features - if advanced_features.get('_version') != '20250820': - # TODO: add a migration to update the schema version - raise NotImplementedError() + if migrated_schema := migrate_advanced_features(advanced_features): + asset.advanced_features = migrated_schema output_fields_by_name = {} # FIXME: `_actionConfigs` is 👎 and should be dropped in favor of top-level configs, eh? diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py index 2cd6f646b9..aba7b21852 100644 --- a/kobo/apps/subsequences/utils/versioning.py +++ b/kobo/apps/subsequences/utils/versioning.py @@ -1,5 +1,44 @@ from ..constants import SCHEMA_VERSIONS + +def migrate_advanced_features(advanced_features: dict) -> dict | None: + + if advanced_features.get('_version') == SCHEMA_VERSIONS[0]: + return + + migrated_advanced_features = { + '_version': SCHEMA_VERSIONS[0], + '_actionConfigs': {} + } + + actionConfigs = migrated_advanced_features['_actionConfigs'] + for key, value in advanced_features.items(): + if ( + key == 'transcript' + and value + and 'languages' in value + and value['languages'] + ): + actionConfigs['manual_transcription'] = [ + {'language': language} for language in value['languages'] + ] + + if ( + key == 'translation' + and value + and 'languages' in value + and value['languages'] + ): + actionConfigs['manual_translation'] = [ + {'language': language} for language in value['languages'] + ] + + if key == 'qual': + raise NotImplementedError + + return migrated_advanced_features + + def set_version(schema: dict) -> dict: schema['_version'] = SCHEMA_VERSIONS[0] return schema diff --git a/kpi/deployment_backends/openrosa_backend.py b/kpi/deployment_backends/openrosa_backend.py index 3877929c5a..774caf1efa 100644 --- a/kpi/deployment_backends/openrosa_backend.py +++ b/kpi/deployment_backends/openrosa_backend.py @@ -342,6 +342,7 @@ def duplicate_submission( # very un-Pythonic! if element is not None: element.text = date_formatted + # Rely on `meta/instanceID` being present. If it's absent, something is # fishy enough to warrant raising an exception instead of continuing # silently @@ -389,6 +390,7 @@ def edit_submission( The returned Response should be in XML (expected format by Enketo Express) """ + user = request.user submission_xml = xml_submission_file.read() try: @@ -1518,7 +1520,7 @@ def __get_submissions_in_json( add_supplements_to_query = self.asset.has_advanced_features fields = params.get('fields', []) - if len(fields) > 0 and '_uuid' not in fields: + if len(fields) > 0 and self.SUBMISSION_ROOT_UUID_XPATH not in fields: # skip the query if submission '_uuid' is not even q'd from mongo add_supplements_to_query = False diff --git a/kpi/models/asset.py b/kpi/models/asset.py index 25b8011545..87b0f35e50 100644 --- a/kpi/models/asset.py +++ b/kpi/models/asset.py @@ -21,6 +21,7 @@ from kobo.apps.reports.constants import DEFAULT_REPORTS_KEY, SPECIFIC_REPORTS_KEY from kobo.apps.subsequences.schemas import ACTION_PARAMS_SCHEMA from kobo.apps.subsequences.utils.supplement_data import get_supplemental_output_fields +from kobo.apps.subsequences.utils.versioning import migrate_advanced_features from kpi.constants import ( ASSET_TYPE_BLOCK, ASSET_TYPE_COLLECTION, @@ -1154,6 +1155,12 @@ def validate_advanced_features(self): if self.advanced_features is None: self.advanced_features = {} + if migrated_schema := migrate_advanced_features(self.advanced_features): + self.advanced_features = migrated_schema + # We should save the new schema, but for debugging purposes, + # we don't yet! + # self.save(update_fields=['advanced_features']) + jsonschema.validate( instance=self.advanced_features, schema=ACTION_PARAMS_SCHEMA, diff --git a/kpi/views/v2/data.py b/kpi/views/v2/data.py index cac0a5f120..6e5753981a 100644 --- a/kpi/views/v2/data.py +++ b/kpi/views/v2/data.py @@ -284,7 +284,9 @@ def duplicate(self, request, submission_id_or_root_uuid: int, *args, **kwargs): # Coerce to int because the back-end only finds matches with the same type submission_id = positive_int(submission_id_or_root_uuid) original_submission = deployment.get_submission( - submission_id=submission_id, user=request.user, fields=['_id', '_uuid'] + submission_id=submission_id, + user=request.user, + fields=['_id', '_uuid', 'meta/rootUuid'], ) with http_open_rosa_error_handler( From 0a92a24b339eb791f605e453a941a054333d56b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Tue, 21 Oct 2025 16:29:33 -0400 Subject: [PATCH 113/138] Reactivate limits --- kobo/apps/subsequences/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index b292538256..d66e97f25f 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -83,8 +83,7 @@ def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> di raise InvalidAction from e action = action_class(question_xpath, action_params, asset) - # TODO REMOVE The comment below - # action.check_limits(asset.owner) + action.check_limits(asset.owner) question_supplemental_data = supplemental_data.setdefault( question_xpath, {} From 9aec68091f080300122b6296fd2952224aed8ae5 Mon Sep 17 00:00:00 2001 From: Rebecca Graber Date: Wed, 5 Nov 2025 12:18:14 -0500 Subject: [PATCH 114/138] refactor(subsequences): rename "automated" to "automatic" (#6446) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### 💭 Notes Rename "automated_google_\*" actions to "automatIC_google_\*". Also renames a few other methods for clarity. The unit tests that are failing are failing on the base branch as well. --- kobo/apps/subsequences/README.md | 74 +++++++++---------- kobo/apps/subsequences/actions/__init__.py | 8 +- ...n.py => automatic_google_transcription.py} | 12 +-- ...ion.py => automatic_google_translation.py} | 16 ++-- kobo/apps/subsequences/actions/base.py | 62 ++++++++-------- .../actions/manual_transcription.py | 2 +- .../actions/manual_translation.py | 2 +- kobo/apps/subsequences/actions/mixins.py | 18 ++--- kobo/apps/subsequences/tasks.py | 6 +- .../tests/api/v2/test_validation.py | 42 +++++------ kobo/apps/subsequences/tests/constants.py | 2 +- ...=> test_automatic_google_transcription.py} | 44 +++++------ ...y => test_automatic_google_translation.py} | 45 +++++------ .../subsequences/utils/supplement_data.py | 4 +- 14 files changed, 166 insertions(+), 171 deletions(-) rename kobo/apps/subsequences/actions/{automated_google_transcription.py => automatic_google_transcription.py} (93%) rename kobo/apps/subsequences/actions/{automated_google_translation.py => automatic_google_translation.py} (90%) rename kobo/apps/subsequences/tests/{test_automated_google_transcription.py => test_automatic_google_transcription.py} (89%) rename kobo/apps/subsequences/tests/{test_automated_google_translation.py => test_automatic_google_translation.py} (91%) diff --git a/kobo/apps/subsequences/README.md b/kobo/apps/subsequences/README.md index 3bd95093a6..fbdaa2dee7 100644 --- a/kobo/apps/subsequences/README.md +++ b/kobo/apps/subsequences/README.md @@ -1,7 +1,7 @@ # Subsequence Actions – Supplement Processing Flow This document explains the full flow when a client submits a **supplement** payload to the API. -It covers how the payload is validated through the various schemas (`params_schema`, `data_schema`, `automated_data_schema`, `result_schema`), how external NLP services are invoked for automated actions, and how versions are created and persisted. +It covers how the payload is validated through the various schemas (`params_schema`, `data_schema`, `external_data_schema`, `result_schema`), how external NLP services are invoked for automatic actions, and how versions are created and persisted. --- @@ -28,12 +28,12 @@ direction TB %% ==== Bases ==== class BaseAction { <> - +automated_data_schema [abstract][property] + +external_data_schema [abstract][property] +data_schema [abstract][property] +result_schema [abstract][property] +retrieve_data() +revise_data() - +run_automated_process() [abstract] + +run_external_process() [abstract] } class BaseManualNLPAction { @@ -41,20 +41,20 @@ class BaseManualNLPAction { +data_schema [property] } -class BaseAutomatedNLPAction { +class BaseAutomaticNLPAction { +attach_action_dependency() [abstract] - +automated_data_schema [property] + +external_data_schema [property] +data_schema [property] +get_action_dependencies() [abstract] - +run_automated_process() + +run_external_process() +get_nlp_service_class() [abstract] } %% ==== Concrete ==== class ManualTranscription class ManualTranslation -class AutomatedGoogleTranscription -class AutomatedGoogleTranslation +class AutomaticGoogleTranscription +class AutomaticGoogleTranslation %% ==== Mixins (provide result_schema) ==== class TranscriptionActionMixin { @@ -68,19 +68,19 @@ class TranslationActionMixin { %% ==== Inheritance (bases) ==== BaseAction <|-- BaseManualNLPAction -BaseManualNLPAction <|-- BaseAutomatedNLPAction +BaseManualNLPAction <|-- BaseAutomaticNLPAction %% ==== Inheritance (concretes) ==== BaseManualNLPAction <|-- ManualTranscription BaseManualNLPAction <|-- ManualTranslation -BaseAutomatedNLPAction <|-- AutomatedGoogleTranscription -BaseAutomatedNLPAction <|-- AutomatedGoogleTranslation +BaseAutomaticNLPAction <|-- AutomaticGoogleTranscription +BaseAutomaticNLPAction <|-- AutomaticGoogleTranslation %% ==== Mixins -> Concretes ==== TranscriptionActionMixin <.. ManualTranscription : mixin -TranscriptionActionMixin <.. AutomatedGoogleTranscription : mixin +TranscriptionActionMixin <.. AutomaticGoogleTranscription : mixin TranslationActionMixin <.. ManualTranslation : mixin -TranslationActionMixin <.. AutomatedGoogleTranslation : mixin +TranslationActionMixin <.. AutomaticGoogleTranslation : mixin ``` --- @@ -172,8 +172,8 @@ autonumber actor Client participant API as KPI API participant SS as SubmissionSupplement -participant Action as Action (Manual/Automated) -participant Ext as NLP Service (if automated) +participant Action as Action (Manual/Automatic) +participant Ext as NLP Service (if automatic) participant Celery as Celery Worker participant DB as Database @@ -186,14 +186,14 @@ loop For each action in _actionConfigs SS->>Action: action.revise_data(one_action_payload) Note right of Action: Validate with data_schema - alt Action is automated (BaseAutomatedNLPAction) - Action->>Action: run_automated_process() + alt Action is automatic (BaseAutomaticNLPAction) + Action->>Action: run_external_process() Action->>Ext: Call external NLP service Ext-->>Action: Response (augmented payload) alt status == "in_progress" - Action->>Celery: enqueue poll_automated_process task + Action->>Celery: enqueue poll_external_process task end - Action->>Action: Validate with automated_data_schema + Action->>Action: Validate with external_data_schema end Action->>Action: Build new version @@ -209,17 +209,17 @@ API-->>Client: 200 OK (or error) #### 2.3.2 Background Polling with Celery -If run_automated_process receives a response like: +If run_external_process receives a response like: ```json {"status": "in_progress"} ``` -a Celery task (e.g. poll_automated_process) is queued. +a Celery task (e.g. poll_external_process) is queued. This task will periodically re-invoke the external service until the action’s status becomes complete or a maximum retry limit is reached. -The task uses the same validation chain (automated_data_schema → result_schema) +The task uses the same validation chain (external_data_schema → result_schema) before persisting the final revision. --- @@ -232,16 +232,16 @@ before persisting the final revision. flowchart TB A[Incoming action payload] B[Attach action dependency] - C{Is automated action?} + C{Is automatic action?} D[Add dependency supplemental data if any] E[Build version] F[Validate with result schema] G[Save to DB] H[Done] - I[Run automated process] + I[Run external process] J[Sanitize dependency supplemental data] - K[Validate with automated data schema] - L[Enqueue Celery task poll_automated_process] + K[Validate with external data schema] + L[Enqueue Celery task poll_external_process] M[Return 4xx error] N{Status in_progress?} @@ -271,7 +271,7 @@ flowchart TB Every action relies on a set of schemas to validate its lifecycle: - **`params_schema`** – defines how the action is instantiated and configured on the Asset. - **`data_schema`** – validates the client payload sent in supplements. -- **`automated_data_schema`** – extends `data_schema` for automated actions by adding status and system-generated fields. +- **`external_data_schema`** – extends `data_schema` for automatic actions by adding status and system-generated fields. - **`result_schema`** – validates the persisted revision format, including metadata and version history. --- @@ -311,7 +311,7 @@ Each action has its own expected format: { "language": "en", "value": "My translation" } ``` -- **Automated Transcription / Automated Translation** +- **Automatic Transcription / Automatic Translation** ```json { "language": "en" } ``` @@ -323,14 +323,14 @@ Each action has its own expected format: --- -### 3.3 `automated_data_schema` +### 3.3 `external_data_schema` -Used only for **automated actions** (`BaseAutomatedNLPAction`). +Used only for **automatic actions** (`BaseAutomaticNLPAction`). It validates the **augmented payload** returned by the external service. - **Example (complete)** ```json - { "language": "en", "value": "My automated result", "status": "complete" } + { "language": "en", "value": "My automatic result", "status": "complete" } ``` - **Example (in progress)** @@ -353,11 +353,11 @@ It validates the **augmented payload** returned by the external service. ### 3.4 `result_schema` Validates the **revision JSON** persisted in the database. -The structure is the same for both manual and automated actions: +The structure is the same for both manual and automatic actions: - Metadata about the action itself (`_dateCreated`, `_dateModified`). - A list of versions under `_versions`, each containing: - - The properties defined by `data_schema` (manual) or `automated_data_schema` (automated). + - The properties defined by `data_schema` (manual) or `external_data_schema` (automatic). - Audit fields (`_dateCreated`, `_dateAccepted`, `_uuid`). **Generic Example** @@ -369,7 +369,7 @@ The structure is the same for both manual and automated actions: "_versions": [ { "language": "en", - "value": "My automated result", + "value": "My automatic result", "status": "complete", "_dateCreated": "2025-08-21T20:57:28Z", "_dateAccepted": "2025-08-21T20:57:28Z", @@ -389,7 +389,7 @@ The structure is the same for both manual and automated actions: > For manual actions, the inner version objects correspond to `data_schema`. > -> For automated actions, they correspond to `automated_data_schema`. +> For automatic actions, they correspond to `external_data_schema`. --- @@ -399,7 +399,7 @@ Some actions depend on the result of other actions. For example, a **translation** action requires an existing **transcription**. In this case, a `_dependency` property is added to the persisted JSON. -**Example: Automated Translation result depending on an Automated Transcription** +**Example: Automatic Translation result depending on an Automatic Transcription** ```json { @@ -414,7 +414,7 @@ In this case, a `_dependency` property is added to the persisted JSON. "_uuid": "91ab5f30-0f73-4e2e-b91f-8ad2f67a4729", "_dependency": { "_uuid": "4dcf9c9f-e503-4e5c-81f5-74250b295001", - "_actionId": "automated_google_transcription" + "_actionId": "automatic_google_transcription" } } ] diff --git a/kobo/apps/subsequences/actions/__init__.py b/kobo/apps/subsequences/actions/__init__.py index a34df26372..e514a954ff 100644 --- a/kobo/apps/subsequences/actions/__init__.py +++ b/kobo/apps/subsequences/actions/__init__.py @@ -1,12 +1,12 @@ -from .automated_google_transcription import AutomatedGoogleTranscriptionAction -from .automated_google_translation import AutomatedGoogleTranslationAction +from .automatic_google_transcription import AutomaticGoogleTranscriptionAction +from .automatic_google_translation import AutomaticGoogleTranslationAction from .manual_transcription import ManualTranscriptionAction from .manual_translation import ManualTranslationAction # TODO, what about using a loader for every class in "actions" folder (except base.py)? ACTIONS = ( - AutomatedGoogleTranscriptionAction, - AutomatedGoogleTranslationAction, + AutomaticGoogleTranscriptionAction, + AutomaticGoogleTranslationAction, ManualTranscriptionAction, ManualTranslationAction, ) diff --git a/kobo/apps/subsequences/actions/automated_google_transcription.py b/kobo/apps/subsequences/actions/automatic_google_transcription.py similarity index 93% rename from kobo/apps/subsequences/actions/automated_google_transcription.py rename to kobo/apps/subsequences/actions/automatic_google_transcription.py index 59ec6fcfce..7bf6d1583b 100644 --- a/kobo/apps/subsequences/actions/automated_google_transcription.py +++ b/kobo/apps/subsequences/actions/automatic_google_transcription.py @@ -1,16 +1,16 @@ from kobo.apps.organizations.constants import UsageType from ..integrations.google.google_transcribe import GoogleTranscriptionService from ..type_aliases import NLPExternalServiceClass -from .base import ActionClassConfig, BaseAutomatedNLPAction +from .base import ActionClassConfig, BaseAutomaticNLPAction from .mixins import TranscriptionActionMixin -class AutomatedGoogleTranscriptionAction( - TranscriptionActionMixin, BaseAutomatedNLPAction +class AutomaticGoogleTranscriptionAction( + TranscriptionActionMixin, BaseAutomaticNLPAction ): - ID = 'automated_google_transcription' - action_class_config = ActionClassConfig(allow_multiple=False, automated=True) + ID = 'automatic_google_transcription' + action_class_config = ActionClassConfig(allow_multiple=False, automatic=True) def get_nlp_service_class(self) -> NLPExternalServiceClass: return GoogleTranscriptionService @@ -18,7 +18,7 @@ def get_nlp_service_class(self) -> NLPExternalServiceClass: @property def result_schema(self): """ - JSON Schema for automated Google transcription results. + JSON Schema for automatic Google transcription results. The payload is a single-language object with: - _dateCreated : required string (date-time) diff --git a/kobo/apps/subsequences/actions/automated_google_translation.py b/kobo/apps/subsequences/actions/automatic_google_translation.py similarity index 90% rename from kobo/apps/subsequences/actions/automated_google_translation.py rename to kobo/apps/subsequences/actions/automatic_google_translation.py index 695446d4c2..0c51930467 100644 --- a/kobo/apps/subsequences/actions/automated_google_translation.py +++ b/kobo/apps/subsequences/actions/automatic_google_translation.py @@ -1,17 +1,17 @@ from kobo.apps.organizations.constants import UsageType from ..integrations.google.google_translate import GoogleTranslationService from ..type_aliases import NLPExternalServiceClass -from .base import ActionClassConfig, BaseAutomatedNLPAction +from .base import ActionClassConfig, BaseAutomaticNLPAction from .mixins import TranslationActionMixin -class AutomatedGoogleTranslationAction( - TranslationActionMixin, BaseAutomatedNLPAction +class AutomaticGoogleTranslationAction( + TranslationActionMixin, BaseAutomaticNLPAction ): - ID = 'automated_google_translation' + ID = 'automatic_google_translation' action_class_config = ActionClassConfig( - allow_multiple=True, automated=True, action_data_key='language' + allow_multiple=True, automatic=True, action_data_key='language' ) def get_nlp_service_class(self) -> NLPExternalServiceClass: @@ -20,7 +20,7 @@ def get_nlp_service_class(self) -> NLPExternalServiceClass: @property def result_schema(self): """ - JSON Schema for automated Google translation results. + JSON Schema for automatic Google translation results. The payload is an object where each top-level key is a language code from `self.languages` (e.g. "en") mapping to a dataActionKey object. Timestamps @@ -65,7 +65,7 @@ def result_schema(self): "status": "in_progress", "_dependency": { "_uuid": "16fd2706-8baf-433b-82eb-8c7fada847da", - "_actionId": "automated_google_transcription" + "_actionId": "automatic_google_transcription" } } ] @@ -88,7 +88,7 @@ def result_schema(self): "accepted": true, "_dependency": { "_uuid": "16fd2706-8baf-433b-82eb-8c7fada847da", - "_actionId": "automated_google_transcription" + "_actionId": "automatic_google_transcription" } } ] diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 7e4f542dfd..5c07434173 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -12,7 +12,7 @@ from kobo.celery import celery_app from kpi.exceptions import UsageLimitExceededException from kpi.utils.usage_calculator import ServiceUsageCalculator -from ..tasks import poll_run_automated_process +from ..tasks import poll_run_external_process from ..type_aliases import NLPExternalServiceClass """ @@ -151,12 +151,12 @@ class ActionClassConfig: - allow_multiple: Whether multiple items can share the same `action_data_key`. - action_data_key: The field in `action_data` used to identify or match an item when multiple entries are allowed (e.g., "language"). - - automated: Indicates whether the action relies on an external service + - automatic: Indicates whether the action relies on an external service to generate data. """ allow_multiple: bool - automated: bool + automatic: bool action_data_key: str | None = None @@ -199,7 +199,7 @@ def check_limits(self, user: User): raise UsageLimitExceededException() @property - def automated_data_schema(self): + def external_data_schema(self): raise NotImplementedError @property @@ -270,9 +270,9 @@ def get_new_action_supplemental_data( ).insert(0, new_version) # For manual actions, always mark as accepted. - # For automated actions, revert the just-created revision (remove it and + # For automatic actions, revert the just-created revision (remove it and # reapply its dates) to avoid adding extra branching earlier in the method. - if self.action_class_config.automated: + if self.action_class_config.automatic: if accepted is not None: # Remove stale version localized_action_supplemental_data[self.VERSION_FIELD].pop(0) @@ -320,8 +320,8 @@ def get_output_fields(self) -> list[dict]: # raise NotImplementedError() return [] - def validate_automated_data(self, data): - jsonschema.validate(data, self.automated_data_schema) + def validate_external_data(self, data): + jsonschema.validate(data, self.external_data_schema) def validate_data(self, data): jsonschema.validate(data, self.data_schema) @@ -385,16 +385,16 @@ def revise_data( self.attach_action_dependency(action_data) - if self.action_class_config.automated: - # If the action is automated, run the external process first. + if self.action_class_config.automatic: + # If the action is automatic, run the external process first. if not ( - service_response := self.run_automated_process( + service_response := self.run_external_process( submission, current_version, action_data, ) ): - # If the service response is None, the automated task is still running. + # If the service response is None, the external task is still running. # Stop here to avoid processing data and creating redundant revisions. return None @@ -402,7 +402,7 @@ def revise_data( # the validation process. dependency_supplemental_data = action_data.pop(self.DEPENDENCY_FIELD, None) action_data.update(service_response) - self.validate_automated_data(action_data) + self.validate_external_data(action_data) accepted = action_data.pop('accepted', None) else: dependency_supplemental_data = action_data.pop(self.DEPENDENCY_FIELD, None) @@ -443,7 +443,7 @@ def raise_for_any_leading_underscore_key(d: dict): if match: raise Exception('An unexpected key with a leading underscore was found') - def run_automated_process( + def run_external_process( self, submission: dict, action_supplemental_data: dict, @@ -452,7 +452,7 @@ def run_automated_process( **kwargs, ) -> dict | bool: """ - Update action_data with automated process + Update action_data with external process """ raise NotImplementedError @@ -464,8 +464,8 @@ def _inject_data_schema(self, destination_schema: dict, skipped_keys: list): """ schema_to_inject = ( - self.automated_data_schema - if self.action_class_config.automated + self.external_data_schema + if self.action_class_config.automatic else self.data_schema ) @@ -488,11 +488,11 @@ def _is_usage_limited(self): """ Returns whether an action should check for usage limits. """ - return self.action_class_config.automated + return self.action_class_config.automatic @property def _limit_identifier(self): - # See AutomatedGoogleTranscriptionAction._limit_identifier() for example + # See AutomaticGoogleTranscriptionAction._limit_identifier() for example raise NotImplementedError() @@ -592,21 +592,21 @@ def languages(self) -> list[str]: return languages -class BaseAutomatedNLPAction(BaseManualNLPAction): +class BaseAutomaticNLPAction(BaseManualNLPAction): """ - Base class for all automated NLP actions. + Base class for all automatic NLP actions. Extends `BaseManualNLPAction`, reusing its `params_schema` for - consistency in language configuration, while adding automated-specific - schema definitions (`automated_data_schema` and `data_schema`). + consistency in language configuration, while adding automatic-specific + schema definitions (`external_data_schema` and `data_schema`). - This ensures that both manual and automated actions share the same - validation rules for parameters, while automated actions introduce - their own structure for system-generated results. + This ensures that both manual and automatic actions share the same + validation rules for parameters, while automatic actions introduce + their own structure with additional system-generated fields. """ @property - def automated_data_schema(self) -> dict: + def external_data_schema(self) -> dict: """ Schema rules: @@ -752,7 +752,7 @@ def get_nlp_service_class(self) -> NLPExternalServiceClass: raise NotImplementedError - def run_automated_process( + def run_external_process( self, submission: dict, action_supplemental_data: dict, @@ -761,7 +761,7 @@ def run_automated_process( **kwargs, ) -> dict | None: """ - Run the automated NLP process using the configured external service + Run the automatic NLP process using the configured external service (e.g., Google). This method is intended to be called by `revise_data()`, which finalizes the validation and merging of `action_data`. @@ -793,7 +793,7 @@ def run_automated_process( # If the client explicitly removed a previously stored result, # preserve the deletion by returning a `deleted` status instead - # of reprocessing with the automated service. + # of reprocessing with the external service. # TODO add comment for delete here if 'value' in action_data: return { @@ -823,7 +823,7 @@ def run_automated_process( celery_action_data = deepcopy(action_data) celery_action_data.pop(self.DEPENDENCY_FIELD, None) - poll_run_automated_process.apply_async( + poll_run_external_process.apply_async( kwargs={ 'submission': submission, 'action_data': celery_action_data, diff --git a/kobo/apps/subsequences/actions/manual_transcription.py b/kobo/apps/subsequences/actions/manual_transcription.py index 6ef65ea8eb..02349775f7 100644 --- a/kobo/apps/subsequences/actions/manual_transcription.py +++ b/kobo/apps/subsequences/actions/manual_transcription.py @@ -7,7 +7,7 @@ class ManualTranscriptionAction(TranscriptionActionMixin, BaseManualNLPAction): ID = 'manual_transcription' - action_class_config = ActionClassConfig(allow_multiple=False, automated=False) + action_class_config = ActionClassConfig(allow_multiple=False, automatic=False) def _get_output_field_name(self, language: str) -> str: language = language.split('-')[0] # ignore region if any diff --git a/kobo/apps/subsequences/actions/manual_translation.py b/kobo/apps/subsequences/actions/manual_translation.py index 1915a87f4c..e7b4cb3667 100644 --- a/kobo/apps/subsequences/actions/manual_translation.py +++ b/kobo/apps/subsequences/actions/manual_translation.py @@ -8,7 +8,7 @@ class ManualTranslationAction(TranslationActionMixin, BaseManualNLPAction): ID = 'manual_translation' action_class_config = ActionClassConfig( - allow_multiple=True, automated=False, action_data_key='language' + allow_multiple=True, automatic=False, action_data_key='language' ) def _get_output_field_name(self, language: str) -> str: diff --git a/kobo/apps/subsequences/actions/mixins.py b/kobo/apps/subsequences/actions/mixins.py index 5db88a4383..333a4c4c45 100644 --- a/kobo/apps/subsequences/actions/mixins.py +++ b/kobo/apps/subsequences/actions/mixins.py @@ -7,7 +7,7 @@ class TranscriptionActionMixin: """ Provides common methods and properties used by all transcription-related actions. - This mixin centralizes them so that both manual and automated transcription classes + This mixin centralizes them so that both manual and automatic transcription classes can reuse the same structure consistently. """ @@ -15,8 +15,8 @@ class TranscriptionActionMixin: def result_schema(self): # Move localized_value_schema definitions to main schema - if self.action_class_config.automated: - data_schema_defs = self.automated_data_schema.get('$defs', {}) + if self.action_class_config.automatic: + data_schema_defs = self.external_data_schema.get('$defs', {}) else: data_schema_defs = self.data_schema.get('$defs', {}) @@ -63,7 +63,7 @@ class TranslationActionMixin: """ Provides common methods and properties used by all translation-related actions. - This mixin centralizes them so that both manual and automated translation classes + This mixin centralizes them so that both manual and automatic translation classes can reuse the same structure consistently. """ @@ -149,13 +149,13 @@ def get_action_dependencies(self, question_supplemental_data: dict) -> dict: for this action to run correctly. """ - from ..actions.automated_google_transcription import ( - AutomatedGoogleTranscriptionAction + from ..actions.automatic_google_transcription import ( + AutomaticGoogleTranscriptionAction ) from ..actions.manual_transcription import ManualTranscriptionAction transcription_action_ids = ( - AutomatedGoogleTranscriptionAction.ID, + AutomaticGoogleTranscriptionAction.ID, ManualTranscriptionAction.ID, ) @@ -186,8 +186,8 @@ def result_schema(self): } # Move localized_value_schema definitions to main schema - if self.action_class_config.automated: - data_schema_defs = self.automated_data_schema.get('$defs', {}) + if self.action_class_config.automatic: + data_schema_defs = self.external_data_schema.get('$defs', {}) else: data_schema_defs = self.data_schema.get('$defs', {}) diff --git a/kobo/apps/subsequences/tasks.py b/kobo/apps/subsequences/tasks.py index dbfcb3e2e3..8086402cbd 100644 --- a/kobo/apps/subsequences/tasks.py +++ b/kobo/apps/subsequences/tasks.py @@ -25,7 +25,7 @@ retry_jitter=False, queue='kpi_low_priority_queue', ) -def poll_run_automated_process( +def poll_run_external_process( asset_id: int, submission: dict, question_xpath: str, @@ -49,8 +49,8 @@ def poll_run_automated_process( ) -@task_failure.connect(sender=poll_run_automated_process) -def poll_run_automated_process_failure(sender=None, **kwargs): +@task_failure.connect(sender=poll_run_external_process) +def poll_run_external_process_failure(sender=None, **kwargs): # Avoid circular import from .actions import ACTION_IDS_TO_CLASSES diff --git a/kobo/apps/subsequences/tests/api/v2/test_validation.py b/kobo/apps/subsequences/tests/api/v2/test_validation.py index 9995da8835..f6a25eaa0a 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_validation.py +++ b/kobo/apps/subsequences/tests/api/v2/test_validation.py @@ -77,16 +77,16 @@ def test_cannot_patch_with_invalid_payload(self): assert response.status_code == status.HTTP_400_BAD_REQUEST assert 'Invalid action' in str(response.data) - def test_cannot_set_value_with_automated_actions(self): - # First, set up the asset to allow automated actions + def test_cannot_set_value_with_automatic_actions(self): + # First, set up the asset to allow automatic actions advanced_features = { '_version': '20250820', '_actionConfigs': { 'q1': { - 'automated_google_transcription': [ + 'automatic_google_transcription': [ {'language': 'en'}, ], - 'automated_google_translation': [ + 'automatic_google_translation': [ {'language': 'fr'}, ] } @@ -105,12 +105,12 @@ def test_cannot_set_value_with_automated_actions(self): content=mock_submission_supplement, asset=self.asset, ) - automated_actions = advanced_features['_actionConfigs']['q1'].keys() - for automated_action in automated_actions: + automatic_actions = advanced_features['_actionConfigs']['q1'].keys() + for automatic_action in automatic_actions: payload = { '_version': '20250820', 'q1': { - automated_action: { + automatic_action: { 'language': 'es', 'value': 'some text', # forbidden field } @@ -124,13 +124,13 @@ def test_cannot_set_value_with_automated_actions(self): def test_cannot_accept_incomplete_automatic_transcription(self): - # Set up the asset to allow automated google transcription + # Set up the asset to allow automatic google transcription self.set_asset_advanced_features( { '_version': '20250820', '_actionConfigs': { 'q1': { - 'automated_google_transcription': [ + 'automatic_google_transcription': [ {'language': 'es'}, ] } @@ -142,7 +142,7 @@ def test_cannot_accept_incomplete_automatic_transcription(self): payload = { '_version': '20250820', 'q1': { - 'automated_google_transcription': { + 'automatic_google_transcription': { 'language': 'es', 'accepted': True, } @@ -154,7 +154,7 @@ def test_cannot_accept_incomplete_automatic_transcription(self): mock_service.process_data.return_value = {'status': 'in_progress'} with patch( - 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): response = self.client.patch( @@ -164,16 +164,16 @@ def test_cannot_accept_incomplete_automatic_transcription(self): assert 'Invalid payload' in str(response.data) def test_cannot_accept_incomplete_automatic_translation(self): - # Set up the asset to allow automated google actions + # Set up the asset to allow automatic google actions self.set_asset_advanced_features( { '_version': '20250820', '_actionConfigs': { 'q1': { - 'automated_google_transcription': [ + 'automatic_google_transcription': [ {'language': 'en'}, ], - 'automated_google_translation': [ + 'automatic_google_translation': [ {'language': 'fr'}, ] } @@ -196,7 +196,7 @@ def test_cannot_accept_incomplete_automatic_translation(self): payload = { '_version': '20250820', 'q1': { - 'automated_google_translation': { + 'automatic_google_translation': { 'language': 'fr', 'accepted': True, } @@ -208,7 +208,7 @@ def test_cannot_accept_incomplete_automatic_translation(self): mock_service.process_data.return_value = {'status': 'in_progress'} with patch( - 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): response = self.client.patch( @@ -218,16 +218,16 @@ def test_cannot_accept_incomplete_automatic_translation(self): assert 'Invalid payload' in str(response.data) def test_cannot_request_translation_without_transcription(self): - # Set up the asset to allow automated google actions + # Set up the asset to allow automatic google actions self.set_asset_advanced_features( { '_version': '20250820', '_actionConfigs': { 'q1': { - 'automated_google_transcription': [ + 'automatic_google_transcription': [ {'language': 'en'}, ], - 'automated_google_translation': [ + 'automatic_google_translation': [ {'language': 'fr'}, ] } @@ -239,7 +239,7 @@ def test_cannot_request_translation_without_transcription(self): payload = { '_version': '20250820', 'q1': { - 'automated_google_translation': { + 'automatic_google_translation': { 'language': 'fr', } }, @@ -250,7 +250,7 @@ def test_cannot_request_translation_without_transcription(self): mock_service.process_data.return_value = {'status': 'in_progress'} with patch( - 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): response = self.client.patch( diff --git a/kobo/apps/subsequences/tests/constants.py b/kobo/apps/subsequences/tests/constants.py index a0208b885b..9c8b103937 100644 --- a/kobo/apps/subsequences/tests/constants.py +++ b/kobo/apps/subsequences/tests/constants.py @@ -1,7 +1,7 @@ EMPTY_SUBMISSION = {} EMPTY_SUPPLEMENT = {} QUESTION_SUPPLEMENT = { - 'automated_google_transcription': { + 'automatic_google_transcription': { '_dateCreated': '2024-04-08T15:27:00Z', '_dateModified': '2024-04-08T15:27:00Z', '_versions': [ diff --git a/kobo/apps/subsequences/tests/test_automated_google_transcription.py b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py similarity index 89% rename from kobo/apps/subsequences/tests/test_automated_google_transcription.py rename to kobo/apps/subsequences/tests/test_automatic_google_transcription.py index 3ef9f97671..e31d3035df 100644 --- a/kobo/apps/subsequences/tests/test_automated_google_transcription.py +++ b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py @@ -4,25 +4,25 @@ import jsonschema import pytest -from ..actions.automated_google_transcription import AutomatedGoogleTranscriptionAction +from ..actions.automatic_google_transcription import AutomaticGoogleTranscriptionAction from .constants import EMPTY_SUBMISSION, EMPTY_SUPPLEMENT def test_valid_params_pass_validation(): params = [{'language': 'fr'}, {'language': 'es'}] - AutomatedGoogleTranscriptionAction.validate_params(params) + AutomaticGoogleTranscriptionAction.validate_params(params) def test_invalid_params_fail_validation(): params = [{'language': 123}, {'language': 'es'}] with pytest.raises(jsonschema.exceptions.ValidationError): - AutomatedGoogleTranscriptionAction.validate_params(params) + AutomaticGoogleTranscriptionAction.validate_params(params) def test_valid_user_data_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranscriptionAction(xpath, params) + action = AutomaticGoogleTranscriptionAction(xpath, params) allowed_data = [ # Trivial case @@ -43,11 +43,11 @@ def test_valid_user_data_passes_validation(): action.validate_data(data) -def test_valid_automated_transcription_data_passes_validation(): +def test_valid_automatic_transcription_data_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranscriptionAction(xpath, params) + action = AutomaticGoogleTranscriptionAction(xpath, params) allowed_data = [ # Trivial case @@ -75,13 +75,13 @@ def test_valid_automated_transcription_data_passes_validation(): ] for data in allowed_data: - action.validate_automated_data(data) + action.validate_external_data(data) def test_invalid_user_data_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranscriptionAction(xpath, params) + action = AutomaticGoogleTranscriptionAction(xpath, params) invalid_data = [ # Wrong language @@ -107,10 +107,10 @@ def test_invalid_user_data_fails_validation(): action.validate_data(data) -def test_invalid_automated_data_fails_validation(): +def test_invalid_external_data_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranscriptionAction(xpath, params) + action = AutomaticGoogleTranscriptionAction(xpath, params) invalid_data = [ # Wrong language @@ -141,13 +141,13 @@ def test_invalid_automated_data_fails_validation(): for data in invalid_data: with pytest.raises(jsonschema.exceptions.ValidationError): - action.validate_automated_data(data) + action.validate_external_data(data) def test_valid_result_passes_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranscriptionAction(xpath, params) + action = AutomaticGoogleTranscriptionAction(xpath, params) first = {'language': 'fr', 'value': 'un'} second = {'language': 'es', 'value': 'dos'} @@ -159,7 +159,7 @@ def test_valid_result_passes_validation(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): for data in first, second, third, fourth, fifth, six: @@ -184,7 +184,7 @@ def test_valid_result_passes_validation(): def test_acceptance_does_not_produce_versions(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] - action = AutomatedGoogleTranscriptionAction(xpath, params) + action = AutomaticGoogleTranscriptionAction(xpath, params) first = {'language': 'fr', 'value': 'un'} second = {'language': 'fr', 'accepted': True} @@ -193,7 +193,7 @@ def test_acceptance_does_not_produce_versions(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): for data in first, second, third: @@ -222,7 +222,7 @@ def test_acceptance_does_not_produce_versions(): def test_invalid_result_fails_validation(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranscriptionAction(xpath, params) + action = AutomaticGoogleTranscriptionAction(xpath, params) first = {'language': 'fr', 'value': 'un'} second = {'language': 'es', 'value': 'dos'} @@ -234,7 +234,7 @@ def test_invalid_result_fails_validation(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): for data in first, second, third, fourth, fifth, six: @@ -263,13 +263,13 @@ def test_invalid_result_fails_validation(): def test_transcription_versions_are_retained_in_supplemental_details(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'es'}] - action = AutomatedGoogleTranscriptionAction(xpath, params) + action = AutomaticGoogleTranscriptionAction(xpath, params) first = {'language': 'es', 'value': 'Ni idea'} second = {'language': 'fr', 'value': 'Aucune idée'} mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): value = first.pop('value', None) @@ -284,7 +284,7 @@ def test_transcription_versions_are_retained_in_supplemental_details(): first_time = mock_sup_det['_dateCreated'] with patch( - 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): value = second.pop('value', None) @@ -313,7 +313,7 @@ def test_transcription_versions_are_retained_in_supplemental_details(): def test_latest_version_is_first(): xpath = 'group_name/question_name' # irrelevant for this test params = [{'language': 'fr'}, {'language': 'en'}] - action = AutomatedGoogleTranscriptionAction(xpath, params) + action = AutomaticGoogleTranscriptionAction(xpath, params) first = {'language': 'fr', 'value': 'un'} second = {'language': 'fr', 'value': 'deux'} @@ -322,7 +322,7 @@ def test_latest_version_is_first(): mock_sup_det = EMPTY_SUPPLEMENT mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_transcription.GoogleTranscriptionService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_transcription.GoogleTranscriptionService', # noqa return_value=mock_service, ): for data in first, second, third: diff --git a/kobo/apps/subsequences/tests/test_automated_google_translation.py b/kobo/apps/subsequences/tests/test_automatic_google_translation.py similarity index 91% rename from kobo/apps/subsequences/tests/test_automated_google_translation.py rename to kobo/apps/subsequences/tests/test_automatic_google_translation.py index c83ff1305a..c596c0561d 100644 --- a/kobo/apps/subsequences/tests/test_automated_google_translation.py +++ b/kobo/apps/subsequences/tests/test_automatic_google_translation.py @@ -5,21 +5,20 @@ import jsonschema import pytest -from ..actions.automated_google_translation import AutomatedGoogleTranslationAction +from ..actions.automatic_google_translation import AutomaticGoogleTranslationAction from .constants import EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, QUESTION_SUPPLEMENT from ..exceptions import TranscriptionNotFound -from ..tasks import poll_run_automated_process def test_valid_params_pass_validation(): params = [{'language': 'fr'}, {'language': 'es'}] - AutomatedGoogleTranslationAction.validate_params(params) + AutomaticGoogleTranslationAction.validate_params(params) def test_invalid_params_fail_validation(): params = [{'language': 123}, {'language': 'es'}] with pytest.raises(jsonschema.exceptions.ValidationError): - AutomatedGoogleTranslationAction.validate_params(params) + AutomaticGoogleTranslationAction.validate_params(params) def test_valid_user_data_passes_validation(): @@ -44,7 +43,7 @@ def test_valid_user_data_passes_validation(): action.validate_data(data) -def test_valid_automated_translation_data_passes_validation(): +def test_valid_automatic_translation_data_passes_validation(): action = _get_action() allowed_data = [ @@ -73,7 +72,7 @@ def test_valid_automated_translation_data_passes_validation(): ] for data in allowed_data: - action.validate_automated_data(data) + action.validate_external_data(data) def test_invalid_user_data_fails_validation(): @@ -103,7 +102,7 @@ def test_invalid_user_data_fails_validation(): action.validate_data(data) -def test_invalid_automated_data_fails_validation(): +def test_invalid_automatic_data_fails_validation(): action = _get_action() invalid_data = [ @@ -135,7 +134,7 @@ def test_invalid_automated_data_fails_validation(): for data in invalid_data: with pytest.raises(jsonschema.exceptions.ValidationError): - action.validate_automated_data(data) + action.validate_external_data(data) def test_valid_result_passes_validation(): @@ -151,7 +150,7 @@ def test_valid_result_passes_validation(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): for data in first, second, third, fourth, fifth, six: @@ -185,7 +184,7 @@ def test_acceptance_does_not_produce_versions(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): for data in first, second, third: @@ -227,8 +226,7 @@ def test_invalid_result_fails_validation(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', - # noqa + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): for data in first, second, third, fourth, fifth, six: @@ -263,8 +261,7 @@ def test_translation_versions_are_retained_in_supplemental_details(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', - # noqa + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): value = first.pop('value', None) @@ -277,7 +274,7 @@ def test_translation_versions_are_retained_in_supplemental_details(): first_time = mock_sup_det['es']['_versions'][0]['_dateCreated'] with patch( - 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): value = second.pop('value', None) @@ -291,7 +288,7 @@ def test_translation_versions_are_retained_in_supplemental_details(): assert mock_sup_det['fr']['_dateCreated'] == mock_sup_det['fr']['_dateModified'] with patch( - 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): value = third.pop('value', None) @@ -327,8 +324,7 @@ def test_latest_version_is_first(): mock_sup_det = EMPTY_SUPPLEMENT mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', - # noqa + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): for data in first, second, third: @@ -349,7 +345,7 @@ def test_cannot_revise_data_without_transcription(): mock_service = MagicMock() with patch( - 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): mock_service.process_data.return_value = { @@ -364,14 +360,14 @@ def test_cannot_revise_data_without_transcription(): def test_find_the_most_recent_accepted_transcription(): action = _get_action() - # Automated transcription is the most recent + # Automatic transcription is the most recent action_data = {} expected = { '_dependency': { 'value': 'My audio has been transcribed automatically', 'language': 'en', '_uuid': '4dcf9c9f-e503-4e5c-81f5-74250b295001', - '_actionId': 'automated_google_transcription', + '_actionId': 'automatic_google_transcription', } } action_data = action.attach_action_dependency(action_data) @@ -384,7 +380,6 @@ def test_find_the_most_recent_accepted_transcription(): ] = '2025-07-28T16:18:00Z' action.get_action_dependencies(question_supplement_data) - action_data = {} # not really relevant for this test expected = { '_dependency': { @@ -405,12 +400,12 @@ def test_action_is_updated_in_background_if_in_progress(): submission = {'meta/rootUuid': '123-abdc'} with patch( - 'kobo.apps.subsequences.actions.automated_google_translation.GoogleTranslationService', # noqa + 'kobo.apps.subsequences.actions.automatic_google_translation.GoogleTranslationService', # noqa return_value=mock_service, ): mock_service.process_data.return_value = {'status': 'in_progress'} with patch( - 'kobo.apps.subsequences.actions.base.poll_run_automated_process' + 'kobo.apps.subsequences.actions.base.poll_run_automatic_process' ) as task_mock: action.revise_data( submission, EMPTY_SUPPLEMENT, {'language': 'fr'} @@ -425,7 +420,7 @@ def _get_action(fetch_action_dependencies=True): mock_asset = MagicMock() mock_asset.pk = 1 mock_asset.owner.pk = 1 - action = AutomatedGoogleTranslationAction(xpath, params, asset=mock_asset) + action = AutomaticGoogleTranslationAction(xpath, params, asset=mock_asset) if fetch_action_dependencies: action.get_action_dependencies(QUESTION_SUPPLEMENT) return action diff --git a/kobo/apps/subsequences/utils/supplement_data.py b/kobo/apps/subsequences/utils/supplement_data.py index 41e9c1d2fb..613bed6611 100644 --- a/kobo/apps/subsequences/utils/supplement_data.py +++ b/kobo/apps/subsequences/utils/supplement_data.py @@ -13,7 +13,7 @@ def get_supplemental_output_fields(asset: 'kpi.models.Asset') -> list[dict]: these are the fields added to exports, displayed in the table view, etc. multiple actions could result in only a single field, such as a manual - transcript and an automated transcript for a given language only resulting + transcript and an automatic transcript for a given language only resulting in one field in the output data Returns a list of fields contributed by all enabled actions (at the asset @@ -33,7 +33,7 @@ def get_supplemental_output_fields(asset: 'kpi.models.Asset') -> list[dict]: ] When it's time to get the data, we'll have to arbitrate between the manual - and automated transcripts if both are ever present for a particular + and automatic transcripts if both are ever present for a particular submission. We'll do that by looking at the acceptance dates and letting the most recent win """ From 548008b073ba68181059aa70167e7085f691cef3 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 1 Oct 2025 16:34:05 -0400 Subject: [PATCH 115/138] =?UTF-8?q?Add=20schemas=20for=20qualitative=20ana?= =?UTF-8?q?lysis=20and=20nest=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit action data within `_data` attribute for each version Two tests are failing as they were already on 0a92a24b339eb791f605e453a941a054333d56b0: FAILED test_models.py::SubmissionSupplementTestCase::test_retrieve_data_from_migrated_data - KeyError: '_version' FAILED test_models.py::SubmissionSupplementTestCase::test_retrieve_data_with_stale_questions - AssertionError: assert {'group_name/question_name': {'manual_translation': {'en': {'_versions': [{'_uuid': '22b04ce8-61c2-4383-836f-5d5f0ad73645', 'value': 'berserk',... --- .../actions/automatic_google_translation.py | 36 +- kobo/apps/subsequences/actions/base.py | 5 +- kobo/apps/subsequences/actions/mixins.py | 47 +- kobo/apps/subsequences/actions/qual.py | 263 +++++++ kobo/apps/subsequences/tests/constants.py | 20 +- .../test_automatic_google_transcription.py | 12 +- .../test_automatic_google_translation.py | 20 +- .../tests/test_manual_transcription.py | 22 +- .../tests/test_manual_translation.py | 26 +- kobo/apps/subsequences/tests/test_models.py | 30 +- kobo/apps/subsequences/tests/test_qual.py | 644 ++++++++++++++++++ kobo/apps/subsequences/utils/time.py | 3 + 12 files changed, 1038 insertions(+), 90 deletions(-) create mode 100644 kobo/apps/subsequences/actions/qual.py create mode 100644 kobo/apps/subsequences/tests/test_qual.py diff --git a/kobo/apps/subsequences/actions/automatic_google_translation.py b/kobo/apps/subsequences/actions/automatic_google_translation.py index 0c51930467..82b7bd4e9e 100644 --- a/kobo/apps/subsequences/actions/automatic_google_translation.py +++ b/kobo/apps/subsequences/actions/automatic_google_translation.py @@ -60,9 +60,11 @@ def result_schema(self): "_versions": [ { "_dateCreated": "2025-09-24T10:45:00Z", - "_uuid": "550e8400-e29b-41d4-a716-446655440000", - "language": "en", - "status": "in_progress", + "_data": { + "_uuid": "550e8400-e29b-41d4-a716-446655440000", + "language": "en", + "status": "in_progress" + }, "_dependency": { "_uuid": "16fd2706-8baf-433b-82eb-8c7fada847da", "_actionId": "automatic_google_transcription" @@ -81,11 +83,13 @@ def result_schema(self): { "_dateCreated": "2025-09-24T10:45:00Z", "_uuid": "4c0a0e9c-0f2c-4d8a-9c72-3a8d2f9a2a11", - "language": "en", - "locale": "en-CA", - "status": "complete", - "value": "Lunch was great today.", - "accepted": true, + "_data": { + "language": "en", + "locale": "en-CA", + "status": "complete", + "value": "Lunch was great today.", + "accepted": true + }, "_dependency": { "_uuid": "16fd2706-8baf-433b-82eb-8c7fada847da", "_actionId": "automatic_google_transcription" @@ -104,9 +108,11 @@ def result_schema(self): { "_dateCreated": "2025-09-24T10:45:00Z", "_uuid": "9b1deb4d-5b15-4e8f-9f8b-7b3f5c6e4d21", - "language": "en", - "status": "failed", - "error": "Upstream service timeout." + "_data": { + "language": "en", + "status": "failed", + "error": "Upstream service timeout." + } } ] } @@ -121,9 +127,11 @@ def result_schema(self): { "_dateCreated": "2025-09-24T10:45:00Z", "_uuid": "7d444840-9dc0-11d1-b245-5ffdce74fad2", - "language": "en", - "status": "deleted", - "value": null + "_data": { + "language": "en", + "status": "deleted", + "value": null + } } ] } diff --git a/kobo/apps/subsequences/actions/base.py b/kobo/apps/subsequences/actions/base.py index 5c07434173..6ec6abd893 100644 --- a/kobo/apps/subsequences/actions/base.py +++ b/kobo/apps/subsequences/actions/base.py @@ -169,6 +169,7 @@ class BaseAction: DEPENDENCY_FIELD = '_dependency' UUID_FIELD = '_uuid' VERSION_FIELD = '_versions' + VERSION_DATA_FIELD = '_data' action_class_config: ActionClassConfig | None = None @@ -179,6 +180,7 @@ def __init__( asset: Optional['kpi.models.Asset'] = None, ): self.source_question_xpath = source_question_xpath + self.validate_params(params) self.params = params self.asset = asset self._action_dependencies = {} @@ -255,7 +257,7 @@ def get_new_action_supplemental_data( ) ) - new_version = deepcopy(action_data) + new_version = {self.VERSION_DATA_FIELD: deepcopy(action_data)} new_version[self.DATE_CREATED_FIELD] = now_str new_version[self.UUID_FIELD] = str(uuid.uuid4()) if dependency_supplemental_data: @@ -457,6 +459,7 @@ def run_external_process( raise NotImplementedError def _inject_data_schema(self, destination_schema: dict, skipped_keys: list): + raise Exception('This method is going away') """ Utility function to inject data schema into another schema to avoid repeating the same schema. diff --git a/kobo/apps/subsequences/actions/mixins.py b/kobo/apps/subsequences/actions/mixins.py index 333a4c4c45..65a3673f1b 100644 --- a/kobo/apps/subsequences/actions/mixins.py +++ b/kobo/apps/subsequences/actions/mixins.py @@ -1,3 +1,5 @@ +from copy import deepcopy + from dateutil import parser from ..exceptions import TranscriptionNotFound @@ -16,9 +18,12 @@ def result_schema(self): # Move localized_value_schema definitions to main schema if self.action_class_config.automatic: - data_schema_defs = self.external_data_schema.get('$defs', {}) + data_schema = self.external_data_schema else: - data_schema_defs = self.data_schema.get('$defs', {}) + data_schema = self.data_schema + data_schema = deepcopy(data_schema) + data_schema_defs = data_schema.pop('$defs') + data_schema.pop('$schema') # Also discard this prior to nesting schema = { '$schema': 'https://json-schema.org/draft/2020-12/schema', @@ -40,6 +45,7 @@ def result_schema(self): 'type': 'object', 'additionalProperties': False, 'properties': { + self.VERSION_DATA_FIELD: {'$ref': '#/$defs/dataSchema'}, self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, self.UUID_FIELD: {'$ref': '#/$defs/uuid'}, @@ -47,15 +53,11 @@ def result_schema(self): 'required': [self.DATE_CREATED_FIELD, self.UUID_FIELD], }, 'uuid': {'type': 'string', 'format': 'uuid'}, + 'dataSchema': data_schema, **data_schema_defs, # Copy defs at the root level }, } - # Also inject data schema in the version definition - self._inject_data_schema( - schema['$defs']['version'], ['$schema', 'title', '$defs'] - ) - return schema @@ -122,14 +124,16 @@ def attach_action_dependency(self, action_data: dict): if latest_version is None: raise TranscriptionNotFound + latest_version_data = latest_version.get(self.VERSION_DATA_FIELD, {}) + # Prefer a specific locale when available; otherwise use the base language. language_or_locale = ( - latest_version.get('locale') or latest_version['language'] + latest_version_data.get('locale') or latest_version_data['language'] ) # Inject dependency property for translation service action_data[self.DEPENDENCY_FIELD] = { - 'value': latest_version['value'], + 'value': latest_version_data['value'], 'language': language_or_locale, self.UUID_FIELD: latest_version[self.UUID_FIELD], self.ACTION_ID_FIELD: latest_version_action_id @@ -187,9 +191,12 @@ def result_schema(self): # Move localized_value_schema definitions to main schema if self.action_class_config.automatic: - data_schema_defs = self.external_data_schema.get('$defs', {}) + data_schema = self.external_data_schema else: - data_schema_defs = self.data_schema.get('$defs', {}) + data_schema = self.data_schema + data_schema = deepcopy(data_schema) + data_schema_defs = data_schema.pop('$defs') + data_schema.pop('$schema') # Also discard this prior to nesting schema = { '$schema': 'https://json-schema.org/draft/2020-12/schema', @@ -206,6 +213,7 @@ def result_schema(self): 'type': 'object', 'additionalProperties': False, 'properties': { + self.VERSION_DATA_FIELD: {'$ref': '#/$defs/dataSchema'}, self.DATE_CREATED_FIELD: {'$ref': '#/$defs/dateTime'}, self.DATE_ACCEPTED_FIELD: {'$ref': '#/$defs/dateTime'}, self.UUID_FIELD: {'$ref': '#/$defs/uuid'}, @@ -229,8 +237,15 @@ def result_schema(self): { 'if': { # If `value` exists and is null… - 'properties': {'value': {'type': 'null'}}, - 'required': ['value'] + 'properties': { + self.VERSION_DATA_FIELD: { + 'type': 'object', + 'properties': { + 'value': {'type': 'null'}, + }, + 'required': ['value'], + } + }, }, # …then `_dependency` must be absent. 'then': { @@ -245,13 +260,9 @@ def result_schema(self): }] }, 'uuid': {'type': 'string', 'format': 'uuid'}, + 'dataSchema': data_schema, **data_schema_defs, }, } - # Also inject data schema in the version definition - self._inject_data_schema( - schema['$defs']['version'], ['$schema', 'title', '$defs'] - ) - return schema diff --git a/kobo/apps/subsequences/actions/qual.py b/kobo/apps/subsequences/actions/qual.py new file mode 100644 index 0000000000..de9dfab86d --- /dev/null +++ b/kobo/apps/subsequences/actions/qual.py @@ -0,0 +1,263 @@ +from copy import deepcopy +from typing import Any + +from .base import ActionClassConfig, BaseAction + + +class QualAction(BaseAction): + + ID = 'qual' + action_class_config = ActionClassConfig( + allow_multiple=True, automatic=False, action_data_key='uuid' + ) + + # JSON Schema definitions + + data_schema_definitions = { + 'qualCommon': { + # Remember that JSON Schema is subtractive + # These essential constraints are common to all qualitative + # analysis question types + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'uuid': {'$ref': '#/$defs/qualUuid'}, + # `value` is further restricted by the schemas for each type + 'value': {}, + }, + 'required': ['uuid', 'value'], + }, + 'qualInteger': { + 'type': 'object', + 'properties': { + 'value': {'type': ['integer', 'null']}, + }, + }, + 'qualSelectOne': { + 'type': 'object', + 'properties': { + 'value': {'type': 'string'}, + }, + }, + 'qualSelectMultiple': { + 'type': 'object', + 'properties': { + 'value': { + 'type': 'array', + 'items': {'type': 'string', 'minLength': 1}, + }, + }, + }, + 'qualTags': { + 'type': 'object', + 'properties': { + 'value': { + 'type': 'array', + 'items': {'type': 'string'}, + }, + }, + }, + 'qualText': { + 'type': 'object', + 'properties': { + 'value': { + 'type': 'string', + }, + }, + }, + } + params_schema_definitions = { + 'qualChoice': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'labels': {'$ref': '#/$defs/qualLabels'}, + 'uuid': {'$ref': '#/$defs/qualUuid'}, + 'options': {'type': 'object'}, + }, + 'required': ['labels', 'uuid'], + }, + 'qualLabels': { + 'type': 'object', + 'additionalProperties': False, + 'patternProperties': {'.+': {'type': 'string'}}, + }, + 'qualQuestion': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'uuid': {'$ref': '#/$defs/qualUuid'}, + 'type': {'$ref': '#/$defs/qualQuestionType'}, + 'labels': {'$ref': '#/$defs/qualLabels'}, + 'choices': { + 'type': 'array', + 'items': {'$ref': '#/$defs/qualChoice'}, + }, + 'options': {'type': 'object'}, + }, + 'required': ['uuid', 'type', 'labels'], + # Additionally require `choices` for the select types + 'if': { + 'properties': { + 'type': {'$ref': '#/$defs/qualSelectQuestionType'}, + } + }, + 'then': {'required': ['choices']}, + }, + 'qualQuestionType': { + 'type': 'string', + 'enum': [ + 'qualInteger', + 'qualSelectMultiple', + 'qualSelectOne', + 'qualTags', + 'qualText', + 'qualNote', # Takes no response data + ], + }, + 'qualSelectQuestionType': { + 'type': 'string', + 'enum': [ + 'qualSelectMultiple', + 'qualSelectOne', + ], + }, + } + shared_definitions = { + 'qualUuid': {'type': 'string', 'minLength': 1}, + } + + # JSON Schemas + + params_schema = { + 'type': 'array', + 'items': {'$ref': '#/$defs/qualQuestion'}, + '$defs': {**shared_definitions, **params_schema_definitions}, + } + + @property + def data_schema(self): + """ + POST to "/api/v2/assets//data//supplemental/" + { + '_version': '20250820', + 'question_name_xpath': { + 'qual': { + 'uuid': '24a68b0a-62fb-4122-8377-412810b2f45d', + 'value': 'pithy text', + } + }, + } + + …gets processed by our caller into just: + { + 'uuid': '24a68b0a-62fb-4122-8377-412810b2f45d', + 'value': 'pithy text', + } + + …which is what the schema returned by this function needs to validate + """ + schema = { + '$defs': { + **self.shared_definitions, + 'qualCommon': deepcopy(self.data_schema_definitions['qualCommon']) + }, + 'oneOf': [], + } + + for qual_item in self.params: + try: + data_schema_def = self.data_schema_definitions[ + qual_item['type'] + ] + except KeyError: + # Not all "question" types are allowed to receive responses + continue + + schema['$defs'][qual_item['type']] = data_schema_def + schema['oneOf'].append( + # TODO: resolve + # + # Concerns: + # 1. Is including only the schemas for types actually used in + # this asset's qualitative analysis form confusing? + # 2. Does using the definitions to save on bloat in the schema + # result in error messages that are too confusing? + # + # Note: a "good" (?) thing is that the choices are not really + # validated, so if we have allowed them to be deleted in the + # past (which we probably have), at least validation won't blow + # up for existing data + + { + 'allOf': [ + {'$ref': '#/$defs/qualCommon'}, + {'$ref': '#/$defs/' + qual_item['type']}, + { + 'type': 'object', + 'properties': { + 'uuid': {'const': qual_item['uuid']} + }, + }, + ], + } + ) + + return schema + + @property + def result_schema(self): + data_schema = deepcopy(self.data_schema) + data_schema_definitions = data_schema.pop('$defs') + schema = { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + # Every question gets a property in the results + # TODO: Does `dataActionKey` make sense as a name? + qual_item['uuid']: {'$ref': '#/$defs/dataActionKey'} + for qual_item in self.params + }, + '$defs': { + 'dataActionKey': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + '_versions': { + 'type': 'array', + 'minItems': 1, + 'items': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + '_data': {'$ref': '#/$defs/dataSchema'}, + '_dateCreated': {'$ref': '#/$defs/dateTime'}, + '_dateAccepted': {'$ref': '#/$defs/dateTime'}, + '_uuid': {'$ref': '#/$defs/uuid'}, + }, + 'required': ['_data', '_dateCreated', '_uuid'], + }, + }, + '_dateCreated': {'$ref': '#/$defs/dateTime'}, + '_dateModified': {'$ref': '#/$defs/dateTime'}, + }, + 'required': ['_dateCreated', '_dateModified'], + }, + 'dataSchema': data_schema, + **data_schema_definitions, + # FIXME: This junk should be in some global place + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'uuid': {'type': 'string', 'format': 'uuid'}, + ### + }, + } + return schema + + def get_output_fields(self): + raise NotImplementedError('Sorry!') + + def transform_data_for_output( + self, action_data: list[dict] + ) -> dict[str, dict[str, Any]]: + raise NotImplementedError('Sorry!') diff --git a/kobo/apps/subsequences/tests/constants.py b/kobo/apps/subsequences/tests/constants.py index 9c8b103937..2075fada4a 100644 --- a/kobo/apps/subsequences/tests/constants.py +++ b/kobo/apps/subsequences/tests/constants.py @@ -1,14 +1,18 @@ EMPTY_SUBMISSION = {} EMPTY_SUPPLEMENT = {} + +# What is a "question" supplement? QUESTION_SUPPLEMENT = { 'automatic_google_transcription': { '_dateCreated': '2024-04-08T15:27:00Z', '_dateModified': '2024-04-08T15:27:00Z', '_versions': [ { - 'value': 'My audio has been transcribed automatically', - 'language': 'en', - 'status': 'completed', + '_data': { + 'value': 'My audio has been transcribed automatically', + 'language': 'en', + 'status': 'completed', + }, '_dateCreated': '2024-04-08T15:27:00Z', '_dateAccepted': '2024-04-08T15:29:00Z', '_uuid': '4dcf9c9f-e503-4e5c-81f5-74250b295001', @@ -20,10 +24,12 @@ '_dateModified': '2024-04-08T15:28:00Z', '_versions': [ { - 'value': 'My audio has been transcribed manually', - 'language': 'en', - 'locale': 'en-CA', - 'status': 'completed', + '_data': { + 'value': 'My audio has been transcribed manually', + 'language': 'en', + 'locale': 'en-CA', + 'status': 'completed', + }, '_dateCreated': '2024-04-08T15:28:00Z', '_dateAccepted': '2024-04-08T15:28:00Z', '_uuid': 'd69b9263-04fd-45b4-b011-2e166cfefd4a', diff --git a/kobo/apps/subsequences/tests/test_automatic_google_transcription.py b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py index e31d3035df..8b62ce3361 100644 --- a/kobo/apps/subsequences/tests/test_automatic_google_transcription.py +++ b/kobo/apps/subsequences/tests/test_automatic_google_transcription.py @@ -178,7 +178,7 @@ def test_valid_result_passes_validation(): action.validate_result(mock_sup_det) assert '_dateAccepted' in mock_sup_det['_versions'][2] - assert mock_sup_det['_versions'][1]['status'] == 'deleted' + assert mock_sup_det['_versions'][1]['_data']['status'] == 'deleted' def test_acceptance_does_not_produce_versions(): @@ -276,8 +276,8 @@ def test_transcription_versions_are_retained_in_supplemental_details(): mock_service.process_data.return_value = {'value': value, 'status': 'complete'} mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) - assert mock_sup_det['_versions'][0]['language'] == 'es' - assert mock_sup_det['_versions'][0]['value'] == 'Ni idea' + assert mock_sup_det['_versions'][0]['_data']['language'] == 'es' + assert mock_sup_det['_versions'][0]['_data']['value'] == 'Ni idea' assert mock_sup_det['_dateCreated'] == mock_sup_det['_dateModified'] assert 'value' not in mock_sup_det assert 'language' not in mock_sup_det @@ -333,6 +333,6 @@ def test_latest_version_is_first(): } mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) - assert mock_sup_det['_versions'][0]['value'] == 'trois' - assert mock_sup_det['_versions'][1]['value'] == 'deux' - assert mock_sup_det['_versions'][2]['value'] == 'un' + assert mock_sup_det['_versions'][0]['_data']['value'] == 'trois' + assert mock_sup_det['_versions'][1]['_data']['value'] == 'deux' + assert mock_sup_det['_versions'][2]['_data']['value'] == 'un' diff --git a/kobo/apps/subsequences/tests/test_automatic_google_translation.py b/kobo/apps/subsequences/tests/test_automatic_google_translation.py index c596c0561d..50b5512dd9 100644 --- a/kobo/apps/subsequences/tests/test_automatic_google_translation.py +++ b/kobo/apps/subsequences/tests/test_automatic_google_translation.py @@ -169,9 +169,9 @@ def test_valid_result_passes_validation(): action.validate_result(mock_sup_det) assert '_dateAccepted' in mock_sup_det['fr']['_versions'][1] - assert mock_sup_det['fr']['_versions'][0]['status'] == 'deleted' - assert mock_sup_det['es']['_versions'][1]['status'] == 'complete' - assert mock_sup_det['fr']['_versions'][-1]['status'] == 'complete' + assert mock_sup_det['fr']['_versions'][0]['_data']['status'] == 'deleted' + assert mock_sup_det['es']['_versions'][1]['_data']['status'] == 'complete' + assert mock_sup_det['fr']['_versions'][-1]['_data']['status'] == 'complete' def test_acceptance_does_not_produce_versions(): @@ -268,8 +268,8 @@ def test_translation_versions_are_retained_in_supplemental_details(): mock_service.process_data.return_value = {'value': value, 'status': 'complete'} mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) - assert mock_sup_det['es']['_versions'][0]['language'] == 'es' - assert mock_sup_det['es']['_versions'][0]['value'] == 'Ni idea' + assert mock_sup_det['es']['_versions'][0]['_data']['language'] == 'es' + assert mock_sup_det['es']['_versions'][0]['_data']['value'] == 'Ni idea' assert mock_sup_det['es']['_dateCreated'] == mock_sup_det['es']['_dateModified'] first_time = mock_sup_det['es']['_versions'][0]['_dateCreated'] @@ -283,8 +283,8 @@ def test_translation_versions_are_retained_in_supplemental_details(): assert len(mock_sup_det.keys()) == 2 - assert mock_sup_det['fr']['_versions'][0]['language'] == 'fr' - assert mock_sup_det['fr']['_versions'][0]['value'] == 'Aucune idée' + assert mock_sup_det['fr']['_versions'][0]['_data']['language'] == 'fr' + assert mock_sup_det['fr']['_versions'][0]['_data']['value'] == 'Aucune idée' assert mock_sup_det['fr']['_dateCreated'] == mock_sup_det['fr']['_dateModified'] with patch( @@ -335,9 +335,9 @@ def test_latest_version_is_first(): } mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) - assert mock_sup_det['fr']['_versions'][0]['value'] == 'trois' - assert mock_sup_det['fr']['_versions'][1]['value'] == 'deux' - assert mock_sup_det['fr']['_versions'][2]['value'] == 'un' + assert mock_sup_det['fr']['_versions'][0]['_data']['value'] == 'trois' + assert mock_sup_det['fr']['_versions'][1]['_data']['value'] == 'deux' + assert mock_sup_det['fr']['_versions'][2]['_data']['value'] == 'un' def test_cannot_revise_data_without_transcription(): diff --git a/kobo/apps/subsequences/tests/test_manual_transcription.py b/kobo/apps/subsequences/tests/test_manual_transcription.py index cd1919a543..a04fb129eb 100644 --- a/kobo/apps/subsequences/tests/test_manual_transcription.py +++ b/kobo/apps/subsequences/tests/test_manual_transcription.py @@ -101,8 +101,8 @@ def test_transcript_versions_are_retained_in_supplemental_details(): assert mock_sup_det['_dateCreated'] == mock_sup_det['_dateModified'] assert len(mock_sup_det['_versions']) == 1 - assert mock_sup_det['_versions'][0]['language'] == 'en' - assert mock_sup_det['_versions'][0]['value'] == 'No idea' + assert mock_sup_det['_versions'][0]['_data']['language'] == 'en' + assert mock_sup_det['_versions'][0]['_data']['value'] == 'No idea' first_time = mock_sup_det['_dateCreated'] mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) @@ -134,11 +134,11 @@ def test_setting_transcript_to_empty_string(): second = {'language': 'fr', 'value': ''} mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) - assert mock_sup_det['_versions'][0]['value'] == 'Aucune idée' + assert mock_sup_det['_versions'][0]['_data']['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) - assert mock_sup_det['_versions'][0]['value'] == '' - assert mock_sup_det['_versions'][1]['value'] == 'Aucune idée' + assert mock_sup_det['_versions'][0]['_data']['value'] == '' + assert mock_sup_det['_versions'][1]['_data']['value'] == 'Aucune idée' def test_setting_transcript_to_none(): @@ -150,11 +150,11 @@ def test_setting_transcript_to_none(): second = {'language': 'fr', 'value': None} mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) - assert mock_sup_det['_versions'][0]['value'] == 'Aucune idée' + assert mock_sup_det['_versions'][0]['_data']['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) - assert mock_sup_det['_versions'][0]['value'] is None - assert mock_sup_det['_versions'][1]['value'] == 'Aucune idée' + assert mock_sup_det['_versions'][0]['_data']['value'] is None + assert mock_sup_det['_versions'][1]['_data']['value'] == 'Aucune idée' def test_latest_revision_is_first(): @@ -170,6 +170,6 @@ def test_latest_revision_is_first(): for data in first, second, third: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) - assert mock_sup_det['_versions'][0]['value'] == 'trois' - assert mock_sup_det['_versions'][1]['value'] == 'deux' - assert mock_sup_det['_versions'][2]['value'] == 'un' + assert mock_sup_det['_versions'][0]['_data']['value'] == 'trois' + assert mock_sup_det['_versions'][1]['_data']['value'] == 'deux' + assert mock_sup_det['_versions'][2]['_data']['value'] == 'un' diff --git a/kobo/apps/subsequences/tests/test_manual_translation.py b/kobo/apps/subsequences/tests/test_manual_translation.py index f774e290d4..5dbc9d5a6d 100644 --- a/kobo/apps/subsequences/tests/test_manual_translation.py +++ b/kobo/apps/subsequences/tests/test_manual_translation.py @@ -90,8 +90,8 @@ def test_translation_versions_are_retained_in_supplemental_details(): assert len(mock_sup_det.keys()) == 1 assert '_versions' in mock_sup_det['en'] - assert mock_sup_det['en']['_versions'][0]['language'] == 'en' - assert mock_sup_det['en']['_versions'][0]['value'] == 'No idea' + assert mock_sup_det['en']['_versions'][0]['_data']['language'] == 'en' + assert mock_sup_det['en']['_versions'][0]['_data']['value'] == 'No idea' assert mock_sup_det['en']['_dateCreated'] == mock_sup_det['en']['_dateModified'] first_time = mock_sup_det['en']['_dateCreated'] @@ -99,8 +99,8 @@ def test_translation_versions_are_retained_in_supplemental_details(): mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) assert len(mock_sup_det.keys()) == 2 assert '_versions' in mock_sup_det['fr'] - assert mock_sup_det['fr']['_versions'][0]['language'] == 'fr' - assert mock_sup_det['fr']['_versions'][0]['value'] == 'Aucune idée' + assert mock_sup_det['fr']['_versions'][0]['_data']['language'] == 'fr' + assert mock_sup_det['fr']['_versions'][0]['_data']['value'] == 'Aucune idée' assert mock_sup_det['fr']['_dateCreated'] == mock_sup_det['fr']['_dateModified'] mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, third) @@ -129,11 +129,11 @@ def test_setting_translation_to_empty_string(): first = {'language': 'fr', 'value': 'Aucune idée'} second = {'language': 'fr', 'value': ''} mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) - assert mock_sup_det['fr']['_versions'][0]['value'] == 'Aucune idée' + assert mock_sup_det['fr']['_versions'][0]['_data']['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) - assert mock_sup_det['fr']['_versions'][0]['value'] == '' - assert mock_sup_det['fr']['_versions'][1]['value'] == 'Aucune idée' + assert mock_sup_det['fr']['_versions'][0]['_data']['value'] == '' + assert mock_sup_det['fr']['_versions'][1]['_data']['value'] == 'Aucune idée' def test_setting_translation_to_none(): @@ -143,11 +143,11 @@ def test_setting_translation_to_none(): second = {'language': 'fr', 'value': None} mock_sup_det = action.revise_data(EMPTY_SUBMISSION, EMPTY_SUPPLEMENT, first) - assert mock_sup_det['fr']['_versions'][0]['value'] == 'Aucune idée' + assert mock_sup_det['fr']['_versions'][0]['_data']['value'] == 'Aucune idée' mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, second) - assert mock_sup_det['fr']['_versions'][0]['value'] is None - assert mock_sup_det['fr']['_versions'][1]['value'] == 'Aucune idée' + assert mock_sup_det['fr']['_versions'][0]['_data']['value'] is None + assert mock_sup_det['fr']['_versions'][1]['_data']['value'] == 'Aucune idée' def test_latest_version_is_first(): @@ -161,9 +161,9 @@ def test_latest_version_is_first(): for data in first, second, third: mock_sup_det = action.revise_data(EMPTY_SUBMISSION, mock_sup_det, data) - assert mock_sup_det['fr']['_versions'][0]['value'] == 'trois' - assert mock_sup_det['fr']['_versions'][1]['value'] == 'deux' - assert mock_sup_det['fr']['_versions'][2]['value'] == 'un' + assert mock_sup_det['fr']['_versions'][0]['_data']['value'] == 'trois' + assert mock_sup_det['fr']['_versions'][1]['_data']['value'] == 'deux' + assert mock_sup_det['fr']['_versions'][2]['_data']['value'] == 'un' def test_cannot_revise_data_without_transcription(): diff --git a/kobo/apps/subsequences/tests/test_models.py b/kobo/apps/subsequences/tests/test_models.py index 6e49e1c530..fc13c97f18 100644 --- a/kobo/apps/subsequences/tests/test_models.py +++ b/kobo/apps/subsequences/tests/test_models.py @@ -39,15 +39,19 @@ class SubmissionSupplementTestCase(TestCase): '_dateModified': '2024-04-08T15:31:00Z', '_versions': [ { - 'language': 'ar', - 'value': 'مجنون', + '_data': { + 'language': 'ar', + 'value': 'مجنون', + }, '_dateCreated': '2024-04-08T15:31:00Z', '_dateAccepted': '2024-04-08T15:31:00Z', '_uuid': '51ff33a5-62d6-48ec-94b2-2dfb406e1dee', }, { - 'language': 'ar', - 'value': 'هائج', + '_data': { + 'language': 'ar', + 'value': 'هائج', + }, '_dateCreated': '2024-04-08T15:27:00Z', '_dateAccepted': '2024-04-08T15:27:00Z', '_uuid': '123e4567-e89b-12d3-a456-426614174000', @@ -59,8 +63,10 @@ class SubmissionSupplementTestCase(TestCase): '_dateCreated': '2024-04-08T15:27:00Z', '_dateModified': '2024-04-08T15:27:00Z', '_versions': [{ - 'language': 'en', - 'value': 'berserk', + '_data': { + 'language': 'en', + 'value': 'berserk', + }, '_dateCreated': '2024-04-08T15:27:00Z', '_dateAccepted': '2024-04-08T15:27:00Z', '_uuid': '22b04ce8-61c2-4383-836f-5d5f0ad73645', @@ -75,8 +81,10 @@ class SubmissionSupplementTestCase(TestCase): '_dateModified': '2024-04-08T15:32:00Z', '_versions': [ { - 'language': 'es', - 'value': 'enloquecido', + '_data': { + 'language': 'es', + 'value': 'enloquecido', + }, '_dateCreated': '2024-04-08T15:32:00Z', '_dateAccepted': '2024-04-08T15:32:00Z', '_uuid': 'd69b9263-04fd-45b4-b011-2e166cfefd4a', @@ -86,8 +94,10 @@ class SubmissionSupplementTestCase(TestCase): } }, { - 'language': 'es', - 'value': 'loco', + '_data': { + 'language': 'es', + 'value': 'loco', + }, '_dateCreated': '2024-04-08T15:29:00Z', '_dateAccepted': '2024-04-08T15:29:00Z', '_uuid': '30d0f39c-a1dd-43fe-999a-844f12f83d31', diff --git a/kobo/apps/subsequences/tests/test_qual.py b/kobo/apps/subsequences/tests/test_qual.py new file mode 100644 index 0000000000..a77020a243 --- /dev/null +++ b/kobo/apps/subsequences/tests/test_qual.py @@ -0,0 +1,644 @@ +from copy import deepcopy +from unittest import mock +import uuid + +from freezegun import freeze_time +import dateutil +import jsonschema +import pytest + +from ..actions.qual import QualAction +from .constants import EMPTY_SUBMISSION + + +class Fix: + """ + This class houses things that should probably be moved to a fixture + - - - + TODO: do we want `_dateAccepted` here? + TODO: forbid deletion of questions and choices + TODO: be a lot more diligent about deepcopying, e.g. in `def data_schema()` + + DECISION: discard `type`s from response data. Even pre-refactor, we already + had stuff like this (confirmed on Global): + { + 'val': '81c5c592-9c3f-4220-b7fc-ea1d758b6535', + 'type': 'qual_select_one', + 'uuid': '8ce9be67-ae6a-4eca-b1e5-2a9f7ac51341', + } + What benefit is `type` really adding here? If we lose track of the + `8ce9be67…` question, we're hosed anyway + """ + + # Action configuration + + fake_question_xpath = 'group_name/question_name' # irrelevant in tests + action_params = [ + { + 'type': 'qualInteger', + 'uuid': '1a2c8eb0-e2ec-4b3c-942a-c1a5410c081a', + 'labels': {'_default': 'How many characters appear in the story?'}, + }, + { + 'type': 'qualSelectMultiple', + 'uuid': '2e30bec7-4843-43c7-98bc-13114af230c5', + 'labels': {'_default': "What themes were present in the story?"}, + 'choices': [ + { + 'uuid': '2e24e6b4-bc3b-4e8e-b0cd-d8d3b9ca15b6', + 'labels': {'_default': 'Empathy'}, + }, + { + 'uuid': 'cb82919d-2948-4ccf-a488-359c5d5ee53a', + 'labels': {'_default': 'Competition'}, + }, + { + 'uuid': '8effe3b1-619e-4ada-be45-ebcea5af0aaf', + 'labels': {'_default': 'Apathy'}, + }, + ], + }, + { + 'type': 'qualSelectOne', + 'uuid': '1a8b748b-f470-4c40-bc09-ce2b1197f503', + 'labels': {'_default': 'Was this a first-hand account?'}, + 'choices': [ + { + 'uuid': '3c7aacdc-8971-482a-9528-68e64730fc99', + 'labels': {'_default': 'Yes'}, + }, + { + 'uuid': '7e31c6a5-5eac-464c-970c-62c383546a94', + 'labels': {'_default': 'No'}, + }, + ], + }, + { + 'type': 'qualTags', + 'uuid': 'e9b4e6d1-fdbb-4dc9-8b10-a9c3c388322f', + 'labels': {'_default': 'Tag any landmarks mentioned in the story'}, + }, + { + 'type': 'qualText', + 'uuid': '83acf2a7-8edc-4fd8-8b9f-f832ca3f18ad', + 'labels': {'_default': 'Add any further remarks'}, + }, + { + 'type': 'qualNote', + 'uuid': '5ef11d48-d7a3-432e-af83-8c2e9b1feb72', + 'labels': {'_default': 'Thanks for your diligence'}, + }, + ] + + # Data-related schemas + + expected_data_schema = { + '$defs': { + # TODO: use `'format': 'uuid'` and move to global? + 'qualUuid': {'type': 'string', 'minLength': 1}, + 'qualCommon': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'uuid': {'$ref': '#/$defs/qualUuid'}, + 'value': {}, + }, + 'required': ['uuid', 'value'], + }, + 'qualInteger': { + 'type': 'object', + 'properties': { + 'value': {'type': ['integer', 'null']}, + }, + }, + 'qualSelectMultiple': { + 'type': 'object', + 'properties': { + 'value': { + 'type': 'array', + 'items': {'type': 'string', 'minLength': 1}, + }, + }, + }, + 'qualSelectOne': { + 'type': 'object', + 'properties': { + 'value': {'type': 'string'}, + }, + }, + 'qualTags': { + 'type': 'object', + 'properties': { + 'value': {'type': 'array', 'items': {'type': 'string'}}, + }, + }, + 'qualText': { + 'type': 'object', + 'properties': { + 'value': {'type': 'string'}, + }, + }, + }, + 'oneOf': [ + { + 'allOf': [ + {'$ref': '#/$defs/qualCommon'}, + {'$ref': '#/$defs/qualInteger'}, + { + 'type': 'object', + 'properties': { + 'uuid': { + 'const': '1a2c8eb0-e2ec-4b3c-942a-c1a5410c081a' + } + }, + }, + ] + }, + { + 'allOf': [ + {'$ref': '#/$defs/qualCommon'}, + {'$ref': '#/$defs/qualSelectMultiple'}, + { + 'type': 'object', + 'properties': { + 'uuid': { + 'const': '2e30bec7-4843-43c7-98bc-13114af230c5' + } + }, + }, + ] + }, + { + 'allOf': [ + {'$ref': '#/$defs/qualCommon'}, + {'$ref': '#/$defs/qualSelectOne'}, + { + 'type': 'object', + 'properties': { + 'uuid': { + 'const': '1a8b748b-f470-4c40-bc09-ce2b1197f503' + } + }, + }, + ] + }, + { + 'allOf': [ + {'$ref': '#/$defs/qualCommon'}, + {'$ref': '#/$defs/qualTags'}, + { + 'type': 'object', + 'properties': { + 'uuid': { + 'const': 'e9b4e6d1-fdbb-4dc9-8b10-a9c3c388322f' + } + }, + }, + ] + }, + { + 'allOf': [ + {'$ref': '#/$defs/qualCommon'}, + {'$ref': '#/$defs/qualText'}, + { + 'type': 'object', + 'properties': { + 'uuid': { + 'const': '83acf2a7-8edc-4fd8-8b9f-f832ca3f18ad' + } + }, + }, + ] + }, + ], + } + expected_result_schema = { + '$schema': 'https://json-schema.org/draft/2020-12/schema', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + '1a2c8eb0-e2ec-4b3c-942a-c1a5410c081a': { + '$ref': '#/$defs/dataActionKey' + }, + '2e30bec7-4843-43c7-98bc-13114af230c5': { + '$ref': '#/$defs/dataActionKey' + }, + '1a8b748b-f470-4c40-bc09-ce2b1197f503': { + '$ref': '#/$defs/dataActionKey' + }, + 'e9b4e6d1-fdbb-4dc9-8b10-a9c3c388322f': { + '$ref': '#/$defs/dataActionKey' + }, + '83acf2a7-8edc-4fd8-8b9f-f832ca3f18ad': { + '$ref': '#/$defs/dataActionKey' + }, + '5ef11d48-d7a3-432e-af83-8c2e9b1feb72': { + '$ref': '#/$defs/dataActionKey' + }, + }, + '$defs': { + 'dataActionKey': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + '_versions': { + 'type': 'array', + 'minItems': 1, + 'items': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + '_data': {'$ref': '#/$defs/dataSchema'}, + '_dateCreated': {'$ref': '#/$defs/dateTime'}, + '_dateAccepted': {'$ref': '#/$defs/dateTime'}, + '_uuid': {'$ref': '#/$defs/uuid'}, + }, + 'required': ['_data', '_dateCreated', '_uuid'], + }, + }, + '_dateCreated': {'$ref': '#/$defs/dateTime'}, + '_dateModified': {'$ref': '#/$defs/dateTime'}, + }, + 'required': ['_dateCreated', '_dateModified'], + }, + # Apologies for sacrificing clarity by not reproducing here the + # entire schema unadulterated, but the length was getting out of + # control + 'dataSchema': { + # Un-nest definitions + k: v + for k, v in expected_data_schema.items() + if k != '$defs' + }, + **expected_data_schema['$defs'], + 'dateTime': {'type': 'string', 'format': 'date-time'}, + 'uuid': {'type': 'string', 'format': 'uuid'}, + }, + } + + # Response data + + valid_filled_responses = [ + { + 'uuid': '1a2c8eb0-e2ec-4b3c-942a-c1a5410c081a', + # type is qualInteger + 'value': 3, + }, + { + 'uuid': '2e30bec7-4843-43c7-98bc-13114af230c5', + # type is qualSelectMultiple + 'value': [ + '2e24e6b4-bc3b-4e8e-b0cd-d8d3b9ca15b6', + 'cb82919d-2948-4ccf-a488-359c5d5ee53a', + ], + }, + { + 'uuid': '1a8b748b-f470-4c40-bc09-ce2b1197f503', + # type is qualSelectOne + 'value': '7e31c6a5-5eac-464c-970c-62c383546a94', + }, + { + 'uuid': 'e9b4e6d1-fdbb-4dc9-8b10-a9c3c388322f', + # type is qualTags + 'value': ['Quinobequin', 'Doughboy Donuts'], + }, + { + 'uuid': '83acf2a7-8edc-4fd8-8b9f-f832ca3f18ad', + # type is qualText + 'value': 'As the eagle and the wild goose see it', + }, + ] + valid_empty_responses = [ + { + 'uuid': '1a2c8eb0-e2ec-4b3c-942a-c1a5410c081a', + # type is qualInteger + 'value': None, + }, + { + 'uuid': '2e30bec7-4843-43c7-98bc-13114af230c5', + # type is qualSelectMultiple + 'value': [], + }, + { + 'uuid': '1a8b748b-f470-4c40-bc09-ce2b1197f503', + # type is qualSelectOne + 'value': '', + }, + { + 'uuid': 'e9b4e6d1-fdbb-4dc9-8b10-a9c3c388322f', + # type is qualTags + 'value': [], + }, + { + 'uuid': '83acf2a7-8edc-4fd8-8b9f-f832ca3f18ad', + # type is qualText + 'value': '', + }, + ] + invalid_responses = [ + 'garbage', + { + # type is qualText + 'value': 'missing uuid!', + }, + { + 'uuid': '83acf2a7-8edc-4fd8-8b9f-f832ca3f18ad', + # type is qualText + # missing value! + }, + { + 'uuid': '5ef11d48-d7a3-432e-af83-8c2e9b1feb72', + # type is qualNote + 'value': 'unexpected response!', # notes take no responses + }, + { + 'uuid': '1a2c8eb0-e2ec-4b3c-942a-c1a5410c081a', + # type is qualInteger + 'value': 'not an integer', + }, + { + 'uuid': '2e30bec7-4843-43c7-98bc-13114af230c5', + # type is qualSelectMultiple + 'value': 'not an array', + }, + { + 'uuid': '1a8b748b-f470-4c40-bc09-ce2b1197f503', + # type is qualSelectOne + 'value': ['unexpected array'], + }, + { + 'uuid': 'e9b4e6d1-fdbb-4dc9-8b10-a9c3c388322f', + # type is qualTags + 'value': 'not an array', + }, + { + 'uuid': '83acf2a7-8edc-4fd8-8b9f-f832ca3f18ad', + # type is qualText + 'value': ['unexpected array'], + }, + { + 'uuid': '83acf2a7-8edc-4fd8-8b9f-f832ca3f18ad', + 'type': 'qualText', + 'value': 'the type is not to be included as an attribute', + }, + ] + + # Results, including multiple versions of responses + + result_mock_timestamp_sequence = [ + '2025-01-01T11:11:11Z', + '2025-01-02T11:11:11Z', + '2025-02-01T11:11:11Z', + '2025-02-02T11:11:11Z', + '2025-03-01T11:11:11Z', + '2025-03-02T11:11:11Z', + '2025-04-01T11:11:11Z', + '2025-04-02T11:11:11Z', + '2025-05-01T11:11:11Z', + '2025-05-02T11:11:11Z', + ] + result_mock_uuid_sequence = [ + 'a9a817c0-7208-4063-bab6-93c0a3a7615b', + '61d23cd7-ce2c-467b-ab26-0839226c714d', + '20dd5185-ee43-451f-8759-2f5185c3c912', + '409c690e-d148-4d80-8c73-51be941b33b0', + '49fbd509-e042-44ce-843c-db04485a0096', + '5799f662-76d7-49ab-9a1c-ae2c7d502a78', + 'c4fa8263-50c0-4252-9c9b-216ca338be13', + '64e59cc1-adaf-47a3-a068-550854d8f98f', + '909c62cf-d544-4926-8839-7f035c6c7483', + '15ccc864-0e83-48f2-be1d-dc2adb9297f4', + ] + expected_result_after_filled_and_empty_responses = { + '1a2c8eb0-e2ec-4b3c-942a-c1a5410c081a': { + '_dateCreated': '2025-01-01T11:11:11Z', + '_dateModified': '2025-01-02T11:11:11Z', + '_versions': [ + { + '_data': { + 'uuid': '1a2c8eb0-e2ec-4b3c-942a-c1a5410c081a', + 'value': None, # Empty response recorded last + }, + '_dateCreated': '2025-01-02T11:11:11Z', + '_dateAccepted': '2025-01-02T11:11:11Z', + '_uuid': '61d23cd7-ce2c-467b-ab26-0839226c714d', + }, + { + '_data': { + 'uuid': '1a2c8eb0-e2ec-4b3c-942a-c1a5410c081a', + 'value': 3, # Filled response recorded first + }, + '_dateCreated': '2025-01-01T11:11:11Z', + '_dateAccepted': '2025-01-01T11:11:11Z', + '_uuid': 'a9a817c0-7208-4063-bab6-93c0a3a7615b', + }, + ], + }, + '2e30bec7-4843-43c7-98bc-13114af230c5': { + '_dateCreated': '2025-02-01T11:11:11Z', + '_dateModified': '2025-02-02T11:11:11Z', + '_versions': [ + { + '_data': { + 'uuid': '2e30bec7-4843-43c7-98bc-13114af230c5', + 'value': [], + }, + '_dateCreated': '2025-02-02T11:11:11Z', + '_dateAccepted': '2025-02-02T11:11:11Z', + '_uuid': '409c690e-d148-4d80-8c73-51be941b33b0', + }, + { + '_data': { + 'uuid': '2e30bec7-4843-43c7-98bc-13114af230c5', + 'value': [ + '2e24e6b4-bc3b-4e8e-b0cd-d8d3b9ca15b6', + 'cb82919d-2948-4ccf-a488-359c5d5ee53a', + ], + }, + '_dateCreated': '2025-02-01T11:11:11Z', + '_dateAccepted': '2025-02-01T11:11:11Z', + '_uuid': '20dd5185-ee43-451f-8759-2f5185c3c912', + }, + ], + }, + '1a8b748b-f470-4c40-bc09-ce2b1197f503': { + '_dateCreated': '2025-03-01T11:11:11Z', + '_dateModified': '2025-03-02T11:11:11Z', + '_versions': [ + { + '_data': { + 'uuid': '1a8b748b-f470-4c40-bc09-ce2b1197f503', + 'value': '', + }, + '_dateCreated': '2025-03-02T11:11:11Z', + '_dateAccepted': '2025-03-02T11:11:11Z', + '_uuid': '5799f662-76d7-49ab-9a1c-ae2c7d502a78', + }, + { + '_data': { + 'uuid': '1a8b748b-f470-4c40-bc09-ce2b1197f503', + 'value': '7e31c6a5-5eac-464c-970c-62c383546a94', + }, + '_dateCreated': '2025-03-01T11:11:11Z', + '_dateAccepted': '2025-03-01T11:11:11Z', + '_uuid': '49fbd509-e042-44ce-843c-db04485a0096', + }, + ], + }, + 'e9b4e6d1-fdbb-4dc9-8b10-a9c3c388322f': { + '_dateCreated': '2025-04-01T11:11:11Z', + '_dateModified': '2025-04-02T11:11:11Z', + '_versions': [ + { + '_data': { + 'uuid': 'e9b4e6d1-fdbb-4dc9-8b10-a9c3c388322f', + 'value': [], + }, + '_dateCreated': '2025-04-02T11:11:11Z', + '_dateAccepted': '2025-04-02T11:11:11Z', + '_uuid': '64e59cc1-adaf-47a3-a068-550854d8f98f', + }, + { + '_data': { + 'uuid': 'e9b4e6d1-fdbb-4dc9-8b10-a9c3c388322f', + 'value': ['Quinobequin', 'Doughboy Donuts'], + }, + '_dateCreated': '2025-04-01T11:11:11Z', + '_dateAccepted': '2025-04-01T11:11:11Z', + '_uuid': 'c4fa8263-50c0-4252-9c9b-216ca338be13', + }, + ], + }, + '83acf2a7-8edc-4fd8-8b9f-f832ca3f18ad': { + '_dateCreated': '2025-05-01T11:11:11Z', + '_dateModified': '2025-05-02T11:11:11Z', + '_versions': [ + { + '_data': { + 'uuid': '83acf2a7-8edc-4fd8-8b9f-f832ca3f18ad', + 'value': '', + }, + '_dateCreated': '2025-05-02T11:11:11Z', + '_dateAccepted': '2025-05-02T11:11:11Z', + '_uuid': '15ccc864-0e83-48f2-be1d-dc2adb9297f4', + }, + { + '_data': { + 'uuid': '83acf2a7-8edc-4fd8-8b9f-f832ca3f18ad', + 'value': 'As the eagle and the wild goose see it', + }, + '_dateCreated': '2025-05-01T11:11:11Z', + '_dateAccepted': '2025-05-01T11:11:11Z', + '_uuid': '909c62cf-d544-4926-8839-7f035c6c7483', + }, + ], + }, + } + + +_action = QualAction( + source_question_xpath=Fix.fake_question_xpath, params=Fix.action_params +) + + +def test_param_validation(): + invalid_params = [ + { + 'type': 'qualSelectMultiple', + 'uuid': '2e30bec7-4843-43c7-98bc-13114af230c5', + 'labels': {'_default': "What themes were present in the story?"}, + # Oops, no choices! + } + ] + with pytest.raises(jsonschema.exceptions.ValidationError): + # Instantiation must validate params + QualAction( + source_question_xpath=Fix.fake_question_xpath, params=invalid_params + ) + + +def test_data_schema_generation(): + generated_schema = _action.data_schema + assert generated_schema == Fix.expected_data_schema + + +def test_valid_filled_responses_pass_data_validation(): + for response in Fix.valid_filled_responses: + _action.validate_data(response) + + +def test_valid_empty_responses_pass_data_validation(): + for response in Fix.valid_empty_responses: + _action.validate_data(response) + + +def test_invalid_reponses_fail_data_validation(): + for response in Fix.invalid_responses: + with pytest.raises(jsonschema.exceptions.ValidationError): + _action.validate_data(response) + + +def test_result_schema_generation(): + generated_schema = _action.result_schema + assert generated_schema == Fix.expected_result_schema + + +def test_valid_result_passes_validation(): + _action.validate_result( + Fix.expected_result_after_filled_and_empty_responses + ) + + +def test_invalid_result_fails_validation(): + working_result = deepcopy( + Fix.expected_result_after_filled_and_empty_responses + ) + + # erroneously add '_dateModified' onto a version + first_version = working_result['1a2c8eb0-e2ec-4b3c-942a-c1a5410c081a'][ + '_versions' + ][0] + first_version['_dateModified'] = first_version['_dateCreated'] + + with pytest.raises(jsonschema.exceptions.ValidationError): + _action.validate_result(working_result) + + +def test_result_content(): + """ + For each question specified in `Fix.action_params`, record two responses: + 1. First, the corresponding response from `Fix.valid_filled_responses` + 2. Then, the empty response from `Fix.valid_empty_responses` + + Afterwards, verify the result against + `Fix.expected_result_after_filled_and_empty_responses` + """ + + # Sanity check the fixture data, since this test requires both the filled + # and empty response lists each to have one response per question + # (identified by its UUID) in the same order + filled_uuids = [x['uuid'] for x in Fix.valid_filled_responses] + empty_uuids = [x['uuid'] for x in Fix.valid_empty_responses] + assert filled_uuids == empty_uuids + + datetime_iter = iter( + (dateutil.parser.parse(dt) for dt in Fix.result_mock_timestamp_sequence) + ) + uuid_list = [uuid.UUID(u) for u in Fix.result_mock_uuid_sequence] + + accumulated_result = {} + + with mock.patch('uuid.uuid4', side_effect=uuid_list): + for filled_response, empty_response in zip( + Fix.valid_filled_responses, Fix.valid_empty_responses + ): + for response in filled_response, empty_response: + with freeze_time(next(datetime_iter)): + accumulated_result = _action.revise_data( + EMPTY_SUBMISSION, accumulated_result, response + ) + + assert ( + accumulated_result + == Fix.expected_result_after_filled_and_empty_responses + ) diff --git a/kobo/apps/subsequences/utils/time.py b/kobo/apps/subsequences/utils/time.py index 61d44760d4..0ce0dce4ce 100644 --- a/kobo/apps/subsequences/utils/time.py +++ b/kobo/apps/subsequences/utils/time.py @@ -18,5 +18,8 @@ def js_str_to_datetime(js_str: str) -> datetime.datetime: """ Return a `datetime` from a string following the simplification of the ISO 8601 format used by JavaScript + + TODO: trash this in favor of `dateutil.parser.parse` (and eventual support + by `datetime` itself)? """ return datetime.datetime.fromisoformat(js_str.replace('Z', '+00:00')) From 7023d277f9a23a9cd97167d63827f37b03e2be47 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 5 Nov 2025 12:53:01 -0500 Subject: [PATCH 116/138] Correct name of patched method --- .../subsequences/tests/test_automatic_google_translation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kobo/apps/subsequences/tests/test_automatic_google_translation.py b/kobo/apps/subsequences/tests/test_automatic_google_translation.py index 50b5512dd9..8df7dc1540 100644 --- a/kobo/apps/subsequences/tests/test_automatic_google_translation.py +++ b/kobo/apps/subsequences/tests/test_automatic_google_translation.py @@ -405,7 +405,7 @@ def test_action_is_updated_in_background_if_in_progress(): ): mock_service.process_data.return_value = {'status': 'in_progress'} with patch( - 'kobo.apps.subsequences.actions.base.poll_run_automatic_process' + 'kobo.apps.subsequences.actions.base.poll_run_external_process' ) as task_mock: action.revise_data( submission, EMPTY_SUPPLEMENT, {'language': 'fr'} From c0ac023501150065e846edcdf7489952b4c6b30c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Sat, 1 Nov 2025 14:23:47 -0400 Subject: [PATCH 117/138] fix(ci): pin pip<25.3 to restore compatibility with pip-tools 7.x (#6435) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### 📣 Summary Fixes a CI installation issue caused by an incompatibility between `pip` 25.3 and `pip-tools` 7.x. --- .github/workflows/openapi.yml | 2 +- .github/workflows/pytest.yml | 2 +- Dockerfile | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/openapi.yml b/.github/workflows/openapi.yml index f3854b83eb..fef5fdcfc1 100644 --- a/.github/workflows/openapi.yml +++ b/.github/workflows/openapi.yml @@ -50,7 +50,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install pip-tools - run: python -m pip install pip-tools==7.\* + run: python -m pip install --no-cache-dir --upgrade "pip<25.3" "pip-tools==7.*" - name: Update Debian package lists run: sudo DEBIAN_FRONTEND=noninteractive apt-get -y update diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 9ecc10b3d8..4e735b4310 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -47,7 +47,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: Install pip-tools - run: python -m pip install pip-tools==7.\* + run: python -m pip install --no-cache-dir --upgrade "pip<25.3" "pip-tools==7.*" - name: Update Debian package lists run: sudo DEBIAN_FRONTEND=noninteractive apt-get -y update - name: Install Debian dependencies diff --git a/Dockerfile b/Dockerfile index 8e6aab2a68..8337bbbabf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,8 @@ ENV VIRTUAL_ENV=/opt/venv \ RUN python -m venv "$VIRTUAL_ENV" ENV PATH="$VIRTUAL_ENV/bin:$PATH" -RUN pip install --quiet pip-tools==7.\* +RUN python -m pip install --upgrade "pip<25.3" \ + && python -m pip install "pip-tools==7.*" COPY ./dependencies/pip/requirements.txt "${TMP_DIR}/pip_dependencies.txt" RUN pip-sync "${TMP_DIR}/pip_dependencies.txt" 1>/dev/null From 2b65aba7b4cd8296d4436909122ea4c2e110c0b7 Mon Sep 17 00:00:00 2001 From: Rebecca Graber Date: Tue, 25 Nov 2025 13:47:11 +0100 Subject: [PATCH 118/138] test(subsequences): fix broken unit tests (#6491) ### Notes Unit tests only. Skips tests that we eventually want to implement but don't have implementations for yet. DRF failures are unrelated to PR. The only substantive difference is in how we add supplements to duplicated submissions. The old `update_submission_extras` method has been removed so instead we just create a SubmissionSupplement object with the correct data. --------- Co-authored-by: John N. Milner --- .github/workflows/pytest.yml | 4 +- kobo/apps/audit_log/models.py | 6 +- kobo/apps/audit_log/tests/test_models.py | 6 +- .../tests/test_project_history_logs.py | 88 ++++++++----------- kobo/apps/subsequences/actions/__init__.py | 3 + kobo/apps/subsequences/actions/qual.py | 4 +- kobo/apps/subsequences/models.py | 6 +- kobo/apps/subsequences/schemas.py | 10 ++- .../tests/api/v2/test_permissions.py | 6 +- .../tests/api/v2/test_validation.py | 2 +- kobo/apps/subsequences/tests/test_models.py | 4 + .../subsequences/utils/supplement_data.py | 3 +- kobo/apps/subsequences/utils/versioning.py | 3 - .../test_attachment_cleanup.py | 14 +-- kpi/deployment_backends/base_backend.py | 8 +- kpi/deployment_backends/openrosa_backend.py | 11 ++- kpi/fixtures/asset_with_settings_and_qa.json | 21 +++-- kpi/tests/api/v2/test_api_submissions.py | 59 ++++++++++--- kpi/views/v2/asset.py | 2 +- 19 files changed, 151 insertions(+), 109 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 4e735b4310..954acffbb8 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -24,8 +24,8 @@ jobs: matrix: python-version: ['3.10'] extra-pytest-options: - - --ds kobo.settings.testing - - --ds kobo.settings.testing_no_stripe --ignore kobo/apps/stripe + - --ds kobo.settings.testing --ignore kobo/apps/subsequences__old + - --ds kobo.settings.testing_no_stripe --ignore kobo/apps/stripe --ignore kobo/apps/subsequences__old services: postgres: image: postgis/postgis:14-3.4 diff --git a/kobo/apps/audit_log/models.py b/kobo/apps/audit_log/models.py index c0827f5af2..81c688147a 100644 --- a/kobo/apps/audit_log/models.py +++ b/kobo/apps/audit_log/models.py @@ -403,7 +403,7 @@ def create_from_request(cls, request: WSGIRequest): 'submissions': cls._create_from_submission_request, 'submissions-list': cls._create_from_submission_request, 'submission-detail': cls._create_from_submission_request, - 'advanced-submission-post': cls._create_from_submission_extra_request, + 'submission-supplement': cls._create_from_submission_extra_request, } url_name = request.resolver_match.url_name method = url_name_to_action.get(url_name, None) @@ -572,7 +572,7 @@ def _create_from_detail_request(cls, request): 'settings': cls._handle_settings_change, 'data_sharing': cls._handle_sharing_change, 'content': cls._handle_content_change, - 'advanced_features.qual.qual_survey': cls._handle_qa_change, + 'advanced_features._actionConfigs': cls._handle_qa_change, } # additional metadata should generally follow the pattern @@ -655,7 +655,7 @@ def _create_from_submission_request(cls, request): @classmethod def _create_from_submission_extra_request(cls, request): - s_uuid = request._data['submission'] + s_uuid = request.resolver_match.kwargs['submission_id_or_root_uuid'] # have to fetch the instance here because we don't have access to it # anywhere else in the request instance = Instance.objects.filter( diff --git a/kobo/apps/audit_log/tests/test_models.py b/kobo/apps/audit_log/tests/test_models.py index 0da9274e65..f9f81bc56b 100644 --- a/kobo/apps/audit_log/tests/test_models.py +++ b/kobo/apps/audit_log/tests/test_models.py @@ -759,7 +759,7 @@ def test_create_from_bulk_requests_exits_on_malformed_request(self): ('settings', '_handle_settings_change'), ('data_sharing', '_handle_sharing_change'), ('content', '_handle_content_change'), - ('advanced_features.qual.qual_survey', '_handle_qa_change'), + ('advanced_features._actionConfigs', '_handle_qa_change'), ) @unpack def test_create_from_detail_request_plumbing(self, field, expected_method): @@ -770,9 +770,9 @@ def test_create_from_detail_request_plumbing(self, field, expected_method): 'settings': 'settings', 'data_sharing': 'sharing', 'content': 'content', - 'advanced_features.qual.qual_survey': 'survey', 'latest_version.uid': 'v12345', 'owner.username': 'someuser', + 'advanced_features._actionConfigs': {'some': 'stuff'}, } request.updated_data = {**request.initial_data, field: 'new'} with patch( @@ -790,7 +790,7 @@ def test_unexpected_fields_ignored_in_detail_request(self): 'settings': 'settings', 'data_sharing': 'sharing', 'content': 'content', - 'advanced_features.qual.qual_survey': 'survey', + 'advanced_features._actionConfigs': {'some': 'stuff'}, 'latest_version.uid': 'v12345', 'something_new': 'new', 'owner.username': 'someuser', diff --git a/kobo/apps/audit_log/tests/test_project_history_logs.py b/kobo/apps/audit_log/tests/test_project_history_logs.py index b83292cda9..817c73a40b 100644 --- a/kobo/apps/audit_log/tests/test_project_history_logs.py +++ b/kobo/apps/audit_log/tests/test_project_history_logs.py @@ -5,7 +5,6 @@ from unittest.mock import patch from xml.etree import ElementTree as ET -import jsonschema.exceptions import responses from ddt import data, ddt, unpack from django.conf import settings @@ -62,6 +61,7 @@ class TestProjectHistoryLogs(BaseAuditLogTestCase): """ fixtures = ['test_data', 'asset_with_settings_and_qa'] + URL_NAMESPACE = 'api_v2' def setUp(self): super().setUp() @@ -562,19 +562,17 @@ def test_update_content_creates_log(self, use_v2): def test_update_qa_creates_log(self): request_data = { 'advanced_features': { - 'qual': { - 'qual_survey': [ - { - 'type': 'qual_note', - 'uuid': '12345', - 'scope': 'by_question#survey', - 'xpath': 'q1', - 'labels': {'_default': 'QA Question'}, - # requests to remove a question just add this - # option rather than actually deleting anything - 'options': {'deleted': True}, - } - ] + '_version': '20250820', + '_actionConfigs': { + 'q1': { + 'qual': [ + { + 'type': 'qualText', + 'uuid': '12345', + 'labels': {'_default': 'Why?'}, + }, + ] + } } } } @@ -588,21 +586,9 @@ def test_update_qa_creates_log(self): self.assertEqual( log_metadata['qa'][PROJECT_HISTORY_LOG_METADATA_FIELD_NEW], - request_data['advanced_features']['qual']['qual_survey'], + request_data['advanced_features']['_actionConfigs'], ) - def test_failed_qa_update_does_not_create_log(self): - # badly formatted QA dict should result in an error before update - request_data = {'advanced_features': {'qual': {'qual_survey': ['bad']}}} - with self.assertRaises(jsonschema.exceptions.ValidationError): - self.client.patch( - reverse('api_v2:asset-detail', kwargs={'uid': self.asset.uid}), - data=request_data, - format='json', - ) - - self.assertEqual(ProjectHistoryLog.objects.count(), 0) - @data(True, False) def test_register_service_creates_log(self, use_v2): request_data = { @@ -1569,7 +1555,7 @@ def test_update_one_submission_content(self, username): edit_submission_xml(xml_parsed, 'Q1', 'new answer') edited_submission = xml_tostring(xml_parsed) url = reverse( - self._get_endpoint('api_v2:assetsnapshot-submission-openrosa'), + self._get_endpoint('assetsnapshot-submission-openrosa'), args=(self.asset.snapshot().uid,), ) data = { @@ -1748,7 +1734,7 @@ def test_add_submission(self, anonymous, v1): endpoint = 'submissions-list' if v1 else 'submissions' kwargs = {'username': self.user.username} if not v1 else {} url = reverse( - self._get_endpoint(endpoint), + endpoint, kwargs=kwargs, ) data = {'xml_submission_file': SimpleUploadedFile('name.txt', ET.tostring(xml))} @@ -1868,22 +1854,22 @@ def test_update_qa_data(self, is_anonymous, expected_username): instance, submission = self._add_submission( 'adminuser' if not is_anonymous else None ) + question_uuid = self.asset.advanced_features['_actionConfigs']['q1']['qual'][0][ + 'uuid' + ] log_metadata = self._base_project_history_log_test( - method=self.client.post, + method=self.client.patch, url=reverse( - 'advanced-submission-post', - kwargs={'asset_uid': self.asset.uid}, + self._get_endpoint('submission-supplement'), + args=[self.asset.uid, submission['_uuid']], ), request_data={ - 'submission': submission['_uuid'], + '_version': '20250820', 'q1': { - 'qual': [ - { - 'type': 'qual_text', - 'uuid': '12345', - 'val': 'someval', - } - ] + 'qual': { + 'uuid': question_uuid, + 'value': 1, + } }, }, expected_action=AuditAction.MODIFY_QA_DATA, @@ -1898,6 +1884,9 @@ def test_update_qa_data_on_modified_instance(self): deployment = self.asset.deployment new_uuid = str(uuid.uuid4()) xml_parsed = fromstring_preserve_root_xmlns(instance.xml) + question_uuid = self.asset.advanced_features['_actionConfigs']['q1']['qual'][0][ + 'uuid' + ] edit_submission_xml( xml_parsed, deployment.SUBMISSION_DEPRECATED_UUID_XPATH, @@ -1917,21 +1906,18 @@ def test_update_qa_data_on_modified_instance(self): instance.uuid = new_uuid instance.save() log_metadata = self._base_project_history_log_test( - method=self.client.post, + method=self.client.patch, url=reverse( - 'advanced-submission-post', - kwargs={'asset_uid': self.asset.uid}, + self._get_endpoint('submission-supplement'), + args=[self.asset.uid, submission['_uuid']], ), request_data={ - 'submission': submission['_uuid'], + '_version': '20250820', 'q1': { - 'qual': [ - { - 'type': 'qual_text', - 'uuid': '12345', - 'val': 'someval', - } - ] + 'qual': { + 'uuid': question_uuid, + 'value': 1, + } }, }, expected_action=AuditAction.MODIFY_QA_DATA, diff --git a/kobo/apps/subsequences/actions/__init__.py b/kobo/apps/subsequences/actions/__init__.py index e514a954ff..93652fbdee 100644 --- a/kobo/apps/subsequences/actions/__init__.py +++ b/kobo/apps/subsequences/actions/__init__.py @@ -2,13 +2,16 @@ from .automatic_google_translation import AutomaticGoogleTranslationAction from .manual_transcription import ManualTranscriptionAction from .manual_translation import ManualTranslationAction +from .qual import QualAction # TODO, what about using a loader for every class in "actions" folder (except base.py)? + ACTIONS = ( AutomaticGoogleTranscriptionAction, AutomaticGoogleTranslationAction, ManualTranscriptionAction, ManualTranslationAction, + QualAction, ) ACTION_IDS_TO_CLASSES = {a.ID: a for a in ACTIONS} diff --git a/kobo/apps/subsequences/actions/qual.py b/kobo/apps/subsequences/actions/qual.py index de9dfab86d..0cdfa15493 100644 --- a/kobo/apps/subsequences/actions/qual.py +++ b/kobo/apps/subsequences/actions/qual.py @@ -255,9 +255,9 @@ def result_schema(self): return schema def get_output_fields(self): - raise NotImplementedError('Sorry!') + return [] def transform_data_for_output( self, action_data: list[dict] ) -> dict[str, dict[str, Any]]: - raise NotImplementedError('Sorry!') + return [] diff --git a/kobo/apps/subsequences/models.py b/kobo/apps/subsequences/models.py index d66e97f25f..149f965136 100644 --- a/kobo/apps/subsequences/models.py +++ b/kobo/apps/subsequences/models.py @@ -3,7 +3,7 @@ from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix from kpi.models.abstract_models import AbstractTimeStampedModel from .actions import ACTION_IDS_TO_CLASSES -from .constants import SUBMISSION_UUID_FIELD, SCHEMA_VERSIONS +from .constants import SCHEMA_VERSIONS, SUBMISSION_UUID_FIELD from .exceptions import InvalidAction, InvalidXPath from .schemas import validate_submission_supplement @@ -34,7 +34,9 @@ def __repr__(self): @staticmethod def revise_data(asset: 'kpi.Asset', submission: dict, incoming_data: dict) -> dict: - if not asset.advanced_features: + if not asset.advanced_features or not asset.advanced_features.get( + '_actionConfigs' + ): raise InvalidAction schema_version = incoming_data.get('_version') diff --git a/kobo/apps/subsequences/schemas.py b/kobo/apps/subsequences/schemas.py index 333ff79c32..79da5a616e 100644 --- a/kobo/apps/subsequences/schemas.py +++ b/kobo/apps/subsequences/schemas.py @@ -1,3 +1,4 @@ +from copy import deepcopy import jsonschema from .actions import ACTION_IDS_TO_CLASSES, ACTIONS @@ -8,7 +9,14 @@ # names to convey group hierarchy QUESTION_XPATH_PATTERN = '^([A-Za-z_][A-Za-z0-9_-]*)(/[A-Za-z_][A-Za-z0-9_-]*)*$' +_action_params_schemas = {} +_action_params_defs = {} +for a in ACTIONS: + _action_params_schemas[a.ID] = deepcopy(a.params_schema) + _action_params_defs.update(_action_params_schemas[a.ID].pop('$defs', {})) + ACTION_PARAMS_SCHEMA = { + '$defs': _action_params_defs, 'additionalProperties': False, 'properties': { '_actionConfigs': { @@ -16,7 +24,7 @@ 'patternProperties': { QUESTION_XPATH_PATTERN: { 'additionalProperties': False, - 'properties': {a.ID: a.params_schema for a in ACTIONS}, + 'properties': _action_params_schemas, 'type': 'object', } }, diff --git a/kobo/apps/subsequences/tests/api/v2/test_permissions.py b/kobo/apps/subsequences/tests/api/v2/test_permissions.py index 603a51128c..675f187b00 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_permissions.py +++ b/kobo/apps/subsequences/tests/api/v2/test_permissions.py @@ -172,10 +172,12 @@ def test_can_write(self, username, shared, status_code): '_dateModified': '2024-04-08T15:27:00Z', '_versions': [ { + '_data': { + 'language': 'es', + 'value': 'buenas noches', + }, '_dateCreated': '2024-04-08T15:27:00Z', '_dateAccepted': '2024-04-08T15:27:00Z', - 'language': 'es', - 'value': 'buenas noches', '_uuid': '11111111-2222-3333-4444-555555555555', } ], diff --git a/kobo/apps/subsequences/tests/api/v2/test_validation.py b/kobo/apps/subsequences/tests/api/v2/test_validation.py index f6a25eaa0a..bf271051c4 100644 --- a/kobo/apps/subsequences/tests/api/v2/test_validation.py +++ b/kobo/apps/subsequences/tests/api/v2/test_validation.py @@ -20,7 +20,7 @@ def test_cannot_patch_if_action_is_invalid(self): }, } - # No actions activated at the asset level + # No actions activated for q1 response = self.client.patch( self.supplement_details_url, data=payload, format='json' ) diff --git a/kobo/apps/subsequences/tests/test_models.py b/kobo/apps/subsequences/tests/test_models.py index fc13c97f18..27b0758179 100644 --- a/kobo/apps/subsequences/tests/test_models.py +++ b/kobo/apps/subsequences/tests/test_models.py @@ -146,6 +146,8 @@ def test_retrieve_data_with_invalid_arguments(self): self.asset, submission_root_uuid=None, prefetched_supplement=None ) + # skip until we actually fill out or delete this test + @pytest.mark.skip() def test_retrieve_data_with_stale_questions(self): SubmissionSupplement.objects.create( asset=self.asset, @@ -160,6 +162,8 @@ def test_retrieve_data_with_stale_questions(self): ) assert submission_supplement == EMPTY_SUPPLEMENT + # skip until we update how we migrate advanced_actions + @pytest.mark.skip() def test_retrieve_data_from_migrated_data(self): submission_supplement = { 'group_name/question_name': { diff --git a/kobo/apps/subsequences/utils/supplement_data.py b/kobo/apps/subsequences/utils/supplement_data.py index 613bed6611..b365e7b278 100644 --- a/kobo/apps/subsequences/utils/supplement_data.py +++ b/kobo/apps/subsequences/utils/supplement_data.py @@ -2,8 +2,7 @@ from kobo.apps.openrosa.apps.logger.xform_instance_parser import remove_uuid_prefix from kobo.apps.subsequences.actions import ACTION_IDS_TO_CLASSES -from kobo.apps.subsequences.constants import SUBMISSION_UUID_FIELD, SUPPLEMENT_KEY, \ - SCHEMA_VERSIONS +from kobo.apps.subsequences.constants import SUBMISSION_UUID_FIELD, SUPPLEMENT_KEY from kobo.apps.subsequences.models import SubmissionSupplement from kobo.apps.subsequences.utils.versioning import migrate_advanced_features diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py index aba7b21852..64935298d1 100644 --- a/kobo/apps/subsequences/utils/versioning.py +++ b/kobo/apps/subsequences/utils/versioning.py @@ -33,9 +33,6 @@ def migrate_advanced_features(advanced_features: dict) -> dict | None: {'language': language} for language in value['languages'] ] - if key == 'qual': - raise NotImplementedError - return migrated_advanced_features diff --git a/kobo/apps/trash_bin/tests/storage_cleanup/test_attachment_cleanup.py b/kobo/apps/trash_bin/tests/storage_cleanup/test_attachment_cleanup.py index 43994b8689..a2c6ba5dbb 100644 --- a/kobo/apps/trash_bin/tests/storage_cleanup/test_attachment_cleanup.py +++ b/kobo/apps/trash_bin/tests/storage_cleanup/test_attachment_cleanup.py @@ -1,7 +1,7 @@ -import pytest import uuid from unittest.mock import patch +import pytest from constance import config from constance.test import override_config from django.conf import settings @@ -15,7 +15,7 @@ from kobo.apps.stripe.utils.import_management import requires_stripe from kobo.apps.trash_bin.tasks.attachment import ( auto_delete_excess_attachments, - schedule_auto_attachment_cleanup_for_users + schedule_auto_attachment_cleanup_for_users, ) from kpi.tests.base_test_case import BaseTestCase from kpi.tests.mixins.create_asset_and_submission_mixin import AssetSubmissionTestMixin @@ -68,7 +68,7 @@ def test_auto_delete_excess_attachments_user_within_limit(self): } } with patch( - 'kobo.apps.subsequences.api_view.ServiceUsageCalculator.get_usage_balances', + 'kobo.apps.trash_bin.tasks.attachment.ServiceUsageCalculator.get_usage_balances', # noqa return_value=mock_balances, ): auto_delete_excess_attachments(self.owner.pk) @@ -91,7 +91,7 @@ def test_auto_delete_excess_attachments_user_exceeds_limit(self): }, } with patch( - 'kobo.apps.subsequences.api_view.ServiceUsageCalculator.get_usage_balances', # noqa + 'kobo.apps.trash_bin.tasks.attachment.ServiceUsageCalculator.get_usage_balances', # noqa return_value=mock_balances, ): auto_delete_excess_attachments(self.owner.pk) @@ -141,7 +141,7 @@ def test_auto_delete_trashes_minimum_attachments_to_meet_limit(self): }, } with patch( - 'kobo.apps.subsequences.api_view.ServiceUsageCalculator.get_usage_balances', # noqa + 'kobo.apps.trash_bin.tasks.attachment.ServiceUsageCalculator.get_usage_balances', # noqa return_value=mock_balances, ): auto_delete_excess_attachments(self.owner.pk) @@ -158,7 +158,7 @@ def test_auto_delete_trashes_minimum_attachments_to_meet_limit(self): # Re-run the task and ensure no further deletions with patch( - 'kobo.apps.subsequences.api_view.ServiceUsageCalculator.get_usage_balances', # noqa + 'kobo.apps.trash_bin.tasks.attachment.ServiceUsageCalculator.get_usage_balances', # noqa return_value={ UsageType.STORAGE_BYTES: { 'effective_limit': limit_bytes, @@ -248,7 +248,7 @@ def test_auto_delete_excess_attachments_skips_if_lock_held(self): }, } with patch( - 'kobo.apps.subsequences.api_view.ServiceUsageCalculator.get_usage_balances', # noqa + 'kobo.apps.trash_bin.tasks.attachment.ServiceUsageCalculator.get_usage_balances', # noqa return_value=mock_balances, ): diff --git a/kpi/deployment_backends/base_backend.py b/kpi/deployment_backends/base_backend.py index 3d0205f0a8..82d008004f 100644 --- a/kpi/deployment_backends/base_backend.py +++ b/kpi/deployment_backends/base_backend.py @@ -25,6 +25,7 @@ from kobo.apps.openrosa.apps.logger.xform_instance_parser import add_uuid_prefix from kobo.apps.openrosa.libs.utils.common_tags import META_INSTANCE_ID, META_ROOT_UUID from kobo.apps.openrosa.libs.utils.logger_tools import http_open_rosa_error_handler +from kobo.apps.subsequences.models import SubmissionSupplement from kpi.constants import ( PERM_CHANGE_SUBMISSIONS, PERM_PARTIAL_SUBMISSIONS, @@ -226,8 +227,11 @@ def copy_submission_extras(self, origin_uuid: str, dest_uuid: str): ).first() if original_extras is not None: duplicated_extras = copy.deepcopy(original_extras.content) - duplicated_extras['submission'] = dest_uuid - self.asset.update_submission_extra(duplicated_extras) + SubmissionSupplement.objects.create( + asset=self.asset, + submission_uuid=dest_uuid, + content=duplicated_extras, + ) def create_enketo_survey_links_for_data_collectors(self): data_collector_tokens = list( diff --git a/kpi/deployment_backends/openrosa_backend.py b/kpi/deployment_backends/openrosa_backend.py index 774caf1efa..559c599550 100644 --- a/kpi/deployment_backends/openrosa_backend.py +++ b/kpi/deployment_backends/openrosa_backend.py @@ -1523,12 +1523,19 @@ def __get_submissions_in_json( if len(fields) > 0 and self.SUBMISSION_ROOT_UUID_XPATH not in fields: # skip the query if submission '_uuid' is not even q'd from mongo add_supplements_to_query = False + all_attachment_xpaths = self.asset.get_all_attachment_xpaths() + mongo_cursor = ( + self._inject_properties( + MongoHelper.to_readable_dict(submission), + request, + all_attachment_xpaths, + ) + for submission in mongo_cursor + ) if add_supplements_to_query: mongo_cursor = stream_with_supplements(self.asset, mongo_cursor) - all_attachment_xpaths = self.asset.get_all_attachment_xpaths() - return ( self._inject_properties( MongoHelper.to_readable_dict(submission), diff --git a/kpi/fixtures/asset_with_settings_and_qa.json b/kpi/fixtures/asset_with_settings_and_qa.json index aba9017bf0..2e7859caf1 100644 --- a/kpi/fixtures/asset_with_settings_and_qa.json +++ b/kpi/fixtures/asset_with_settings_and_qa.json @@ -41,18 +41,17 @@ "enabled": false }, "advanced_features": { - "qual": { - "qual_survey": [ - { - "type": "qual_note", - "uuid": "12345", - "scope": "by_question#survey", - "xpath": "q1", - "labels": { - "_default": "QA Question" + "_version": "20250820", + "_actionConfigs": { + "q1": { + "qual": [ + { + "type": "qualInteger", + "uuid": "1a2c8eb0-e2ec-4b3c-942a-c1a5410c081a", + "labels": {"_default": "How many characters appear in the story?"} } - } - ] + ] + } } }, "date_created": "2022-04-05T21:00:22.402Z", diff --git a/kpi/tests/api/v2/test_api_submissions.py b/kpi/tests/api/v2/test_api_submissions.py index c0c6def446..f1da19c78c 100644 --- a/kpi/tests/api/v2/test_api_submissions.py +++ b/kpi/tests/api/v2/test_api_submissions.py @@ -32,6 +32,7 @@ from kobo.apps.openrosa.libs.utils.common_tags import META_ROOT_UUID from kobo.apps.openrosa.libs.utils.logger_tools import dict2xform from kobo.apps.project_ownership.utils import create_invite +from kobo.apps.subsequences.models import SubmissionSupplement from kpi.constants import ( ASSET_TYPE_SURVEY, PERM_ADD_SUBMISSIONS, @@ -2328,33 +2329,63 @@ def test_duplicate_submission_as_anotheruser_with_partial_perms(self): def test_duplicate_submission_with_extras(self): dummy_extra = { + '_version': '20250820', 'q1': { - 'transcript': { - 'value': 'dummy transcription', - 'languageCode': 'en', + 'manual_transcription': { + '_dateCreated': '2025-01-01T00:00:00Z', + '_dateModified': '2025-01-01T00:00:00Z', + '_versions': [ + { + '_dateCreated': '2025-01-01T00:00:00Z', + '_dateAccepted': '2025-01-01T00:00:00Z', + '_data': { + 'value': 'dummy transcription', + 'langaugeCode': 'en', + }, + '_uuid': '12345', + } + ], }, - 'translation': { - 'tx1': { - 'value': 'dummy translation', - 'languageCode': 'xx', - } + 'manual_translation': { + '_dateCreated': '2025-01-01T00:00:00Z', + '_dateModified': '2025-01-01T00:00:00Z', + '_versions': [ + { + '_dateCreated': '2025-01-01T00:00:00Z', + '_dateAccepted': '2025-01-01T00:00:00Z', + '_data': { + 'value': 'dummy translation', + 'langaugeCode': 'xx', + }, + '_uuid': '678910', + } + ], }, }, - 'submission': self.submission['_uuid'] } - self.asset.update_submission_extra(dummy_extra) + SubmissionSupplement.objects.create( + submission_uuid=self.submission['_uuid'], + asset=self.asset, + content=dummy_extra, + ) response = self.client.post(self.submission_url, {'format': 'json'}) duplicated_submission = response.data duplicated_extra = self.asset.submission_extras.filter( submission_uuid=duplicated_submission['_uuid'] ).first() assert ( - duplicated_extra.content['q1']['translation']['tx1']['value'] - == dummy_extra['q1']['translation']['tx1']['value'] + duplicated_extra.content['q1']['manual_translation']['_versions'][0][ + '_data' + ]['value'] + == dummy_extra['q1']['manual_translation']['_versions'][0]['_data']['value'] ) assert ( - duplicated_extra.content['q1']['transcript']['value'] - == dummy_extra['q1']['transcript']['value'] + duplicated_extra.content['q1']['manual_transcription']['_versions'][0][ + '_data' + ]['value'] + == dummy_extra['q1']['manual_transcription']['_versions'][0]['_data'][ + 'value' + ] ) def test_duplicate_edited_submission(self): diff --git a/kpi/views/v2/asset.py b/kpi/views/v2/asset.py index 06dbeba17d..b16315e586 100644 --- a/kpi/views/v2/asset.py +++ b/kpi/views/v2/asset.py @@ -403,7 +403,7 @@ class AssetViewSet( 'latest_version.uid', 'data_sharing', 'content', - 'advanced_features.qual.qual_survey', + 'advanced_features._actionConfigs', 'owner.username', ] log_type = AuditType.PROJECT_HISTORY From 13afab7a645722dab7e98cfa75429712b1d0ed58 Mon Sep 17 00:00:00 2001 From: Rebecca Graber Date: Wed, 26 Nov 2025 07:51:32 -0500 Subject: [PATCH 119/138] feat(subsequences): add model and new endpoints for advanced actions DEV-1229 (#6492) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### 💭 Notes Add new QuestionAdvancedAction model and associated CRU endpoints. This PR does not involve actually using the models, though it does include audit logs for when users hit those endpoints. QuestionAdvancedAction logs cannot be deleted. Also includes the automatic migration of the `advanced_features` dict into corresponding QuestionAdvancedAction objects. For now it does not change the `advanced_features` dict since we are still using it, but eventually it will be updated to signal that the data in it has already been migrated and we should use the associated QuestionAdvancedAction models instead. The OpenAPI errors are pre-existing and will be dealt with at the branch level sometime before merging the full project branch. ### 👀 Preview steps 1. ℹ️ have an account and a project with an audio question 2. POST to `/api/v2/assets//advanced-features/` the following data: ``` { "action": "manual_transcription", "question_xpath":