Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions modules/odf_data_quality_dashboard/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from . import models
19 changes: 19 additions & 0 deletions modules/odf_data_quality_dashboard/__manifest__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"name": "ODF Data Quality Dashboard",
"version": "18.0.1.0.0",
"summary": "Data Quality Dashboard for Odoo",
"author": "OdooDataFlow",
"website": "https://github.com/OdooDataFlow/odoo-data-flow",
"license": "AGPL-3",
"category": "Tools",
"depends": ["base", "base_vat"],
"data": [
"security/security.xml",
"security/ir.model.access.csv",
"data/res_users.xml",
"data/scheduled_actions.xml",
"views/data_quality_issue_views.xml",
"views/menus.xml",
],
"installable": True,
}
14 changes: 14 additions & 0 deletions modules/odf_data_quality_dashboard/data/res_users.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="utf-8"?>
<odoo>
<data noupdate="1">

<record id="user_odf_data_quality_cron" model="res.users">
<field name="name">Data Quality Cron User</field>
<field name="login">data_quality_cron_user</field>
<!-- This user is for programmatic access only and should not be used by a human -->

<field name="groups_id" eval="[(6, 0, [ref('odf_data_quality_dashboard.group_odf_data_quality_user')])]"/>
</record>

</data>
</odoo>
16 changes: 16 additions & 0 deletions modules/odf_data_quality_dashboard/data/scheduled_actions.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="utf-8"?>
<odoo>
<data noupdate="1">

<record id="ir_cron_run_data_quality_checks" model="ir.cron">
<field name="name">Data Quality: Run Checks</field>
<field name="model_id" ref="model_odf_data_quality_issue"/>
<field name="state">code</field>
<field name="code">model._run_data_quality_checks()</field>
<field name="user_id" ref="odf_data_quality_dashboard.user_odf_data_quality_cron"/>
<field name="interval_number">1</field>
<field name="interval_type">days</field>
</record>

</data>
</odoo>
1 change: 1 addition & 0 deletions modules/odf_data_quality_dashboard/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from . import data_quality_issue
137 changes: 137 additions & 0 deletions modules/odf_data_quality_dashboard/models/data_quality_issue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import time
from datetime import timedelta

from odoo import api, fields, models

Comment on lines +1 to +5
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The _logger variable is used on line 112, but it is not imported or defined in this file. This will raise a NameError at runtime when the VIES check fails, causing the cron job to crash. To fix this, you need to import the logging module and initialize a logger instance at the top of the file.

import logging
import time
from datetime import timedelta

from odoo import api, fields, models

_logger = logging.getLogger(__name__)


class DataQualityIssue(models.Model):
"""Represents a data quality issue found in the system.
This model stores records of data inconsistencies or errors,
allowing users to track and resolve them in a structured manner.
"""

_name = "odf.data.quality.issue"
_description = "Data Quality Issue"

name = fields.Char(
string="Name",
required=True,
)
issue_type = fields.Char(
string="Issue Type",
)
Comment on lines +20 to +22
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The issue_type field is a Char. For better data consistency and to provide a better user experience for filtering in the UI, consider changing this to a Selection field. This will prevent typos and ensure that issue types are standardized. You can define the selection in a dedicated method to keep it extensible.

Example:

    issue_type = fields.Selection(
        selection=[
            ('invalid_vat', 'Invalid VAT'),
        ],
        string="Issue Type",
    )

If you make this change, remember to update the hardcoded string 'Invalid VAT' on lines 80 and 131 to use the selection key (e.g., 'invalid_vat').

related_record = fields.Reference(
selection="_selection_related_record",
string="Related Record",
)
status = fields.Selection(
selection=[
("new", "New"),
("in_progress", "In Progress"),
("resolved", "Resolved"),
],
string="Status",
default="new",
)
notes = fields.Text(
string="Notes",
)

@api.model
def _selection_related_record(self):
"""Return the list of models that can be checked."""
return [
("res.partner", "Partner"),
("product.product", "Product"),
]
Comment on lines +41 to +46
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The list of models available for data quality checks is hardcoded. This makes it difficult for other modules to add new models to the check. To improve extensibility, you could move the list to a separate "hook" method that other modules can easily extend using super().

Example:

@api.model
def _get_data_quality_models(self):
    """Hook for extensibility. Returns a list of (model_name, model_description) tuples."""
    return [
        ("res.partner", "Partner"),
        ("product.product", "Product"),
    ]

@api.model
def _selection_related_record(self):
    """Return the list of models that can be checked."""
    return self._get_data_quality_models()


@api.model
def _run_data_quality_checks(self):
"""Dispatcher for all data quality checks."""
self._check_partner_vat()
Comment on lines +48 to +51
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The _run_data_quality_checks method acts as a dispatcher but directly calls check methods. This pattern requires modifying this method every time a new check is added, which is not ideal for extensibility, especially if other modules need to add their own checks. A more robust and extensible approach would be to use a registry pattern where check methods are discovered and called automatically. You could, for example, use a convention to name check methods with a specific prefix (e.g., _data_quality_check_*) and then dynamically find and execute them.


@api.model
def _check_partner_vat(self):
"""Check for partners with invalid VAT numbers using a performant,
batch-oriented approach.
"""
yesterday = fields.Datetime.now() - timedelta(days=1)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Using a fixed time window (timedelta(days=1)) can lead to missed records if the cron job fails to run on a given day. A more robust approach is to store the timestamp of the last successful run and check for records modified since then. You can use ir.config_parameter for this.

Example:

# At the beginning of the method:
ICP = self.env['ir.config_parameter'].sudo()
last_run_dt_str = ICP.get_param('data_quality.last_partner_vat_check', fields.Datetime.to_string(fields.Datetime.now() - timedelta(days=1)))
now_dt_str = fields.Datetime.to_string(fields.Datetime.now())

# Your search domain would then use last_run_dt_str:
search_domain = [('write_date', '>', last_run_dt_str), ...]

# At the end of the method, after a successful run:
ICP.set_param('data_quality.last_partner_vat_check', now_dt_str)


# 1. Fetch all partners modified recently that have a VAT number.
partners_to_check = self.env["res.partner"].search(
[
("write_date", ">=", fields.Datetime.to_string(yesterday)),
("vat", "!=", False),
("vat", "!=", ""),
]
)
Comment on lines +62 to +67
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The search domain contains two conditions to filter out partners without a VAT number: ("vat", "!=", False) and ("vat", "!=", ""). In Odoo's ORM, ("vat", '!=', False) is sufficient to exclude records where the vat field is either NULL (in the database) or an empty string. You can simplify the domain by removing the redundant condition for better readability and maintainability.

            [
                ("write_date", ">=", fields.Datetime.to_string(yesterday)),
                ("vat", "!=", False),
            ]
        )


if not partners_to_check:
return

# 2. Fetch all existing, unresolved "Invalid VAT" issues for the partners.
existing_issues = self.env["odf.data.quality.issue"].search(
[
(
"related_record",
"in",
[f"res.partner,{pid}" for pid in partners_to_check.ids],
),
("issue_type", "=", "Invalid VAT"),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The string 'Invalid VAT' is hardcoded here and on line 131. It's better to define this as a constant at the class level to avoid typos and improve maintainability.

For example, add this to the DataQualityIssue class:

ISSUE_TYPE_INVALID_VAT = "Invalid VAT"

Then use self.ISSUE_TYPE_INVALID_VAT in your code.

("status", "!=", "resolved"),
]
)
partners_with_existing_issue = set(
issue.related_record.id
for issue in existing_issues
if issue.related_record
)
partners_to_validate = partners_to_check.filtered(
lambda p: p.id not in partners_with_existing_issue
)
Comment on lines +72 to +91
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The current method of filtering out partners with existing issues can be inefficient. It fetches full odf.data.quality.issue records and then loops over them in Python. For better performance, consider adding res_model and res_id fields to your model, computed from related_record and stored in the database. This will allow for a much more efficient search.

  1. Add these fields to the DataQualityIssue model:
res_model = fields.Char(
    string="Related Model", compute="_compute_related_record_parts", store=True, index=True
)
res_id = fields.Integer(
    string="Related Record ID", compute="_compute_related_record_parts", store=True, index=True
)

@api.depends("related_record")
def _compute_related_record_parts(self):
    for issue in self:
        if issue.related_record:
            issue.res_model = issue.related_record._name
            issue.res_id = issue.related_record.id
        else:
            issue.res_model = False
            issue.res_id = False
  1. Then, you can replace this block with more efficient logic:
# 2. Find partner IDs that already have an unresolved "Invalid VAT" issue.
existing_issue_partner_ids = set(self.env["odf.data.quality.issue"].search([
    ("res_model", "=", "res.partner"),
    ("res_id", "in", partners_to_check.ids),
    ("issue_type", "=", "Invalid VAT"),
    ("status", "!=", "resolved"),
]).mapped("res_id"))

partners_to_validate = partners_to_check.filtered(
    lambda p: p.id not in existing_issue_partner_ids
)


if not partners_to_validate:
return

# 3. Perform validation checks.
invalid_partners = self.env["res.partner"]

# 3a. First, run the VIES check in batches if enabled. This is the most
# reliable check, but it is remote and requires careful handling.
if self.env.company.vat_check_vies:
batch_size = 10 # Process 10 partners per batch
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The batch size is hardcoded. This value, along with the time.sleep duration on line 115, might need tuning in a production environment. It's good practice to make these values configurable via system parameters (ir.config_parameter). This allows administrators to adjust them without code changes.

Example:

ICP = self.env['ir.config_parameter'].sudo()
batch_size = int(ICP.get_param('data_quality.vies_batch_size', '10'))
sleep_interval = float(ICP.get_param('data_quality.vies_sleep_interval', '1.0'))

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The batch size for VIES checks is hardcoded. While 10 is a reasonable default, it might not be optimal for all environments or future regulations on the VIES service. Consider making this configurable by using a system parameter (ir.config_parameter). This would allow administrators to tune the performance or adapt to service limits without changing the code.

Example of how to retrieve the parameter:

batch_size = int(self.env['ir.config_parameter'].sudo().get_param('odf_data_quality_dashboard.vies_batch_size', '10'))

for i in range(0, len(partners_to_validate), batch_size):
batch = partners_to_validate[i : i + batch_size]
try:
# The button_vies_check method from base_vat is designed to
# be called on a recordset and handles iteration internally.
batch.button_vies_check()
except Exception as e:
# If the VIES service fails, we can't validate this batch.
# We could log this, but for now we'll just continue.
_logger.warning(
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The _logger variable is used here but is not defined. This will cause a NameError when this code is executed. You need to import the logging module and initialize _logger at the top of the file.

Add this after your imports:

import logging

_logger = logging.getLogger(__name__)

"VIES check failed for batch with error: %s", e
)
Comment on lines +109 to +114
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Catching a broad Exception can hide unexpected errors and make debugging difficult. It's better to catch more specific exceptions if you know what to expect from button_vies_check() (e.g., connection errors). At a minimum, you should log the full traceback to aid in debugging by adding exc_info=True to the logger call.

Suggested change
except Exception as e:
# If the VIES service fails, we can't validate this batch.
# We could log this, but for now we'll just continue.
_logger.warning(
"VIES check failed for batch with error: %s", e
)
except Exception:
# If the VIES service fails, we can't validate this batch.
# We could log this, but for now we'll just continue.
_logger.warning(
"VIES check failed for batch.", exc_info=True
)

time.sleep(1) # Wait 1 second between batches to be safe.

# 3b. Now, check the validation status. The `check_vat` method in
# `base_vat` provides the definitive status, incorporating the
# result of the VIES check if it was performed.
partners_to_validate.refresh() # Refresh to get latest status
for partner in partners_to_validate:
if not partner.check_vat():
invalid_partners |= partner

# 4. Create issues for all invalid partners found.
issues_to_create = []
for partner in invalid_partners:
issues_to_create.append(
{
"name": f"Invalid VAT number for '{partner.display_name}'",
"issue_type": "Invalid VAT",
"related_record": f"res.partner,{partner.id}",
}
)

if issues_to_create:
self.env["odf.data.quality.issue"].create(issues_to_create)
8 changes: 8 additions & 0 deletions modules/odf_data_quality_dashboard/readme/CONFIGURE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

To configure this module, you need to assign users to the appropriate security groups. This module introduces two new groups to manage access:

* **Data Quality User:** Users in this group can view, create, and edit data quality issues. They have read, write, and create permissions. This group is intended for functional users who are responsible for correcting data.

* **Data Quality Manager:** Users in this group have full access to all data quality issues, including the ability to delete them. This group inherits all rights from the 'User' group and is intended for team leads or administrators.

To assign users to these groups, go to **Settings > Users & Companies > Users**, select a user, and under the 'Access Rights' tab, find the 'Data Quality' section to grant the desired permissions.
3 changes: 3 additions & 0 deletions modules/odf_data_quality_dashboard/readme/CONTEXT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
The primary business need for this module is to streamline and accelerate data import processes. In many data migration or integration scenarios, a single invalid record (e.g., a contact with an incorrect VAT number) can cause an entire import batch to fail. This creates a bottleneck and requires technical intervention to fix the data and re-run the import.

This module solves that problem by adopting a non-blocking approach. Data is imported first, ensuring speed and efficiency. A separate, automated process then runs nightly to check for common data quality issues. These issues are flagged and presented in a user-friendly dashboard, transforming data validation from a blocking, technical task into a manageable, asynchronous workflow for functional users. This ensures that the data import pipeline remains fast and resilient, while still providing a robust mechanism for maintaining high data quality.
1 change: 1 addition & 0 deletions modules/odf_data_quality_dashboard/readme/CONTRIBUTORS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
- bosd
Empty file.
1 change: 1 addition & 0 deletions modules/odf_data_quality_dashboard/readme/DESCRIPTION.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This module provides a Data Quality Dashboard for Odoo. It helps improve data import workflows by decoupling data validation from the import process. Instead of blocking imports, it allows data to be imported quickly and then runs validation checks asynchronously. Issues found are logged as records in a 'Data Quality Issue' model, which are then displayed on a dashboard for users to review and resolve.
2 changes: 2 additions & 0 deletions modules/odf_data_quality_dashboard/readme/HISTORY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@


5 changes: 5 additions & 0 deletions modules/odf_data_quality_dashboard/readme/INSTALL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[ This file must only be present if there are very specific
installation instructions, such as installing non-python
dependencies. The audience is systems administrators. ]
Comment on lines +1 to +3
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This file contains placeholder text indicating it should only be present for specific installation instructions. Since there are no such instructions for this module, this file could be confusing for system administrators. It would be better to either remove this file or clear its content if you plan to add instructions later.



Empty file.
4 changes: 4 additions & 0 deletions modules/odf_data_quality_dashboard/readme/USAGE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

Once installed, a 'Data Quality Dashboard' menu will appear under the 'Data Flow' top-level menu. This dashboard displays data quality issues in a Kanban view, categorized by their status (New, In Progress, Resolved).

Users can click on an issue to view its details, including a direct link to the problematic record (e.g., the Partner with an invalid VAT). From there, users can correct the data. Once the issue is fixed, the status of the quality issue record can be moved to 'Resolved'.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,name,model_id:id,group_id:id,perm_read,perm_write,perm_create,perm_unlink
access_odf_data_quality_issue_user,odf.data.quality.issue.user,model_odf_data_quality_issue,odf_data_quality_dashboard.group_odf_data_quality_user,1,1,1,0
access_odf_data_quality_issue_manager,odf.data.quality.issue.manager,model_odf_data_quality_issue,odf_data_quality_dashboard.group_odf_data_quality_manager,1,1,1,1
23 changes: 23 additions & 0 deletions modules/odf_data_quality_dashboard/security/security.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<odoo noupdate="1">

<record id="module_category_data_quality" model="ir.module.category">
<field name="name">Data Quality</field>
<field name="description">User access levels for the Data Quality Dashboard module.</field>
<field name="sequence">20</field>
</record>

<record id="group_odf_data_quality_user" model="res.groups">
<field name="name">User</field>
<field name="category_id" ref="module_category_data_quality"/>
<field name="implied_ids" eval="[(4, ref('base.group_user'))]"/>
</record>

<record id="group_odf_data_quality_manager" model="res.groups">
<field name="name">Manager</field>
<field name="category_id" ref="module_category_data_quality"/>
<field name="implied_ids" eval="[(4, ref('group_odf_data_quality_user'))]"/>
<field name="users" eval="[(4, ref('base.user_root')), (4, ref('base.user_admin'))]"/>
</record>

</odoo>
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
<?xml version="1.0" encoding="utf-8"?>
<odoo>
<data>

<!-- Form View -->
<record id="view_odf_data_quality_issue_form" model="ir.ui.view">
<field name="name">odf.data.quality.issue.form</field>
<field name="model">odf.data.quality.issue</field>
<field name="arch" type="xml">
<form string="Data Quality Issue">
<sheet>
<group>
<field name="name"/>
<field name="issue_type"/>
<field name="related_record"/>
<field name="status"/>
</group>
<notebook>
<page string="Notes">
<field name="notes"/>
</page>
</notebook>
</sheet>
</form>
</field>
</record>

<!-- Kanban View -->
<record id="view_odf_data_quality_issue_kanban" model="ir.ui.view">
<field name="name">odf.data.quality.issue.kanban</field>
<field name="model">odf.data.quality.issue</field>
<field name="arch" type="xml">
<kanban default_group_by="status">
<field name="status"/>
<templates>
<t t-name="kanban-box">
<div class="oe_kanban_global_click">
<div class="oe_kanban_details">
<strong><field name="name"/></strong>
<div>Type: <field name="issue_type"/></div>
<div>Record: <field name="related_record"/></div>
</div>
</div>
</t>
</templates>
</kanban>
</field>
</record>

<!-- List View -->
<record id="view_odf_data_quality_issue_list" model="ir.ui.view">
<field name="name">odf.data.quality.issue.list</field>
<field name="model">odf.data.quality.issue</field>
<field name="arch" type="xml">
<list string="Data Quality Issues">
<field name="name"/>
<field name="issue_type"/>
<field name="related_record"/>
<field name="status"/>
</list>
</field>
</record>

<!-- Action -->
<record id="action_odf_data_quality_issue" model="ir.actions.act_window">
<field name="name">Data Quality Issues</field>
<field name="res_model">odf.data.quality.issue</field>
<field name="view_mode">kanban,list,form</field>
</record>

</data>
</odoo>
20 changes: 20 additions & 0 deletions modules/odf_data_quality_dashboard/views/menus.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="utf-8"?>
<odoo>
<data>

<!-- Top menu -->
<menuitem
id="menu_odf_root"
name="Data Flow"
sequence="10"/>

<!-- Sub menu -->
<menuitem
id="menu_odf_data_quality_dashboard"
name="Data Quality Dashboard"
parent="menu_odf_root"
action="action_odf_data_quality_issue"
sequence="10"/>

</data>
</odoo>