From 5db411609e71f34392624f47d676f2d2dba0efd9 Mon Sep 17 00:00:00 2001
From: Samuel Tonks <60216815+Tonks684@users.noreply.github.com>
Date: Wed, 19 Jun 2024 12:55:39 +0100
Subject: [PATCH] Check Models Action and Front End showing Action findings
 (#687)

* Check Models Action

* test for nod20 actions

* toml updates

* entry point

* entry point

* entry point

* allow install true

* debuging check model outputs when unable to install package via pip

* debuging check model outputs when unable to install package via pip

* debuging check model outputs when unable to install package via pip

* subprocess.check_call shell=True

* model catalog checks

* model catalog checks

* model catalog checks

* model catalog checks requests

* model catalog action

* model catalog action

* check_models

* url updates

* url update VEdge

* print config

* Update path to use os.path.join

* model load successfully check

* flake8 updates

* flak8 updates

* flake8 updates

* workflow schedule updated

* updates to function description and return report

* check models formatting

* save action report as csv

* temporary change of run action on push so to check results

* check model report

* model report

* json fix

* stardist fix

* stardist fix

* json serializable

* falke8

* flake8

* update website with check models

* test

* json changes

* json changes

* json changes

* bug

* bug

* checkreport

* switch back to at

* removal of artefact saving in yml and updates to checkreport in Model Table

* turing to fork

* table not loading debugging

* removal of store artifact component of action

* model table now populates test outcomes, paths sets back to alan turing institute, store artifact is removed and tested to make sure still runs successfully

---------

Co-authored-by: Isabel Fenton <IFenton@users.noreply.github.com>
---
 .github/workflows/check-models.yml     |  36 +++++++++
 .pre-commit-config.yaml                |   2 +-
 frontend/src/ModelTable.jsx            | 106 +++++++++++++++++++++----
 pyproject.toml                         |   1 +
 src/scivision/catalog/check_models.py  |  99 +++++++++++++++++++++++
 src/scivision/catalog/data/models.json |  12 +--
 src/scivision/io/installer.py          |  10 ++-
 src/scivision/io/reader.py             |   4 +-
 8 files changed, 241 insertions(+), 29 deletions(-)
 create mode 100644 .github/workflows/check-models.yml
 create mode 100644 src/scivision/catalog/check_models.py

diff --git a/.github/workflows/check-models.yml b/.github/workflows/check-models.yml
new file mode 100644
index 00000000..16807194
--- /dev/null
+++ b/.github/workflows/check-models.yml
@@ -0,0 +1,36 @@
+name: Check models 
+on:
+  schedule:
+    - cron:  '18 3 * * *'
+jobs:
+  check-models:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Upgrade pip
+        run: python -m pip install --upgrade pip
+      - name: Install Scivision
+        run: pip install -e .
+      - name: Install chardet
+        run: pip install chardet 
+      - name: Check model catalog
+        run: scivision-check-models
+      # - name: Store artifact
+      #   uses: actions/upload-artifact@v3
+      #   with:
+      #     name: check-models-report
+      #     path: check_models.csv
+      - name: Create Release
+        uses: ncipollo/release-action@v1
+        with:
+          tag: model-checks-report-latest-release
+          name: Models checks report
+          commit: ${{ github.ref }}
+          allowUpdates: true
+          removeArtifacts: true
+          artifacts: check_models.js
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 246ff845..f9026ae0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@ repos:
     - id: black
       pass_filenames: true
       args: [--line-length=79]
--   repo: https://gitlab.com/pycqa/flake8
+-   repo: https://github.com/PyCQA/flake8
     rev: 3.8.3
     hooks:
     - id: flake8
diff --git a/frontend/src/ModelTable.jsx b/frontend/src/ModelTable.jsx
index e1d6e504..8b817b8f 100644
--- a/frontend/src/ModelTable.jsx
+++ b/frontend/src/ModelTable.jsx
@@ -1,3 +1,5 @@
+import { useState } from "react";
+import useScript from "react-use-scripts";
 import DataTable from "react-data-table-component";
 
 import ModelNav from "./ModelNav.jsx";
@@ -11,10 +13,10 @@ import { TaskBadge } from "./badges.jsx";
 import models from "./catalog/data/models.json";
 
 // Component: Fragment containing definition items for the expanded
-// view of the model table, and the model page
+// view of the Model table and the page for one Model
 //
-// * data - one model
-function ModelDefinitionList({ data }) {
+// * data - one Model
+function modelDefinitionList({ data }) {
   return (
     <dl className="row">
       <dt className="col-sm-3">Description</dt>
@@ -22,27 +24,51 @@ function ModelDefinitionList({ data }) {
         {data.description ? data.description : "(none provided)"}
       </dd>
 
-      <dt className="col-sm-3">Homepage</dt>
+      <dt className="col-sm-3">Location</dt>
       <dd className="col-sm-9">
         <a href={data.url}>{data.url}</a>
       </dd>
-
-      <dt className="col-sm-3">Install with pip</dt>
-      <dd className="col-sm-9">
-        <div>
-          <code>pip install {data.pkg_url}</code>
-        </div>
-      </dd>
     </dl>
   );
 }
 
+// Component: Models, table view
+// route: /Models
 function ModelTableContents() {
+  const [modelChecksReport, setmodelChecksReport] = useState(null);
+
+  function modelCheckResult(name) {
+    if (modelChecksReport !== null) {
+      const report = modelChecksReport.report[name];
+      if (report !== undefined) {
+        return report.check_result;
+      } else {
+        return "Unknown";
+      }
+    } else {
+      return "Unknown";
+    }
+  }
+
+  function modelCheckTime() {
+    if (modelChecksReport) {
+      var time = new Date(modelChecksReport.time);
+      return time.toUTCString();
+    } else {
+      return "(never)";
+    }
+  }
+
+  function modelValidationTimeString() {
+    return `last run ${modelCheckTime()}`;
+  }
+
   const columns = [
     {
       name: "Thumbnail",
       width: "150px",
-      selector: (row) => model_thumbnails[`./${row.name}.jpg`] === undefined,
+      selector: (row) =>
+        model_thumbnails[`./${row.name}.jpg`] === undefined,
       sortable: true,
       cell: (row, index, column, id) => {
         const thumb = model_thumbnails[`./${row.name}.jpg`];
@@ -50,26 +76,74 @@ function ModelTableContents() {
       },
     },
     {
+      selector: (row) => row.name,
       name: "Name",
       sortable: true,
       grow: 0.5,
-      selector: (row) => row.name,
     },
     {
-      name: "Tasks",
       selector: (row) => row.tasks,
+      name: "Tasks",
       cell: (row, index, column, id) =>
         row.tasks.map((t) => <TaskBadge key={t} taskName={t} />),
     },
+    {
+      selector: (row) => {
+        const result = modelCheckResult(row.name);
+        if (result === "Pass") {
+          return (
+            <img
+              src="https://img.shields.io/badge/scivision_metadata-pass-green"
+              title="The metadata for this model was successfully loaded by scivision, from the location in the catalog"
+            />
+          );
+        } else if (result === "Fail") {
+          return (
+            <img
+              src="https://img.shields.io/badge/scivision_metadata-fail-red"
+              title="Scivision metadata (yaml) file for this model failed to load or was missing at the indicated location"
+            />
+          );
+        } else {
+          return (
+            <img
+              src="https://img.shields.io/badge/scivision_metadata-unknown-lightgray"
+              title="Could not access the result for this validation check"
+            />
+          );
+        }
+      },
+      name: (
+        <span
+          className="tooltip-available"
+          title={modelValidationTimeString()}
+        >
+          Validation checks
+        </span>
+      ),
+      grow: 0.5,
+    },
   ];
 
+  const check_models_script_url =
+  "https://github.com/alan-turing-institute/scivision/releases/download/model-checks-report-latest-release/check_models.js"
+
+  useScript({
+    src: check_models_script_url,
+    onReady: () => setmodelChecksReport(window.global_CheckModelReport),
+    onError: () =>
+      console.log(
+        `Could not load latest model checks from ${check_models_script_url}`,
+      ),
+  });
+
   return (
     <DataTable
       columns={columns}
       data={models.entries}
       title=""
       expandableRowsComponent={(props) => (
-        <TableCardDropdown element={<ModelDefinitionList {...props} />} />
+        <TableCardDropdown element={<modelDefinitionList {...props} />} />
       )}
       expandableRows
       expandableRowsHideExpander
@@ -79,7 +153,7 @@ function ModelTableContents() {
 }
 
 // Component: Models, table view
-// route: /models
+// route: /Models
 export default function ModelTable() {
   return (
     <>
diff --git a/pyproject.toml b/pyproject.toml
index 791f374e..b66de02e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,3 +57,4 @@ cloud_extra = ["scivision[gcs,s3]"]
 [project.scripts]
 scivision-catalog-json-schema = "scivision.catalog.gen_json_schema:entry_point"
 scivision-check-datasets = "scivision.catalog.check_datasets:entry_point"
+scivision-check-models = "scivision.catalog.check_models:entry_point"
\ No newline at end of file
diff --git a/src/scivision/catalog/check_models.py b/src/scivision/catalog/check_models.py
new file mode 100644
index 00000000..1a7f5b6e
--- /dev/null
+++ b/src/scivision/catalog/check_models.py
@@ -0,0 +1,99 @@
+"""
+Automated Models Checks
+
+Iterate through model catalog via scivision.load_pretrained_model
+to check if the model can be loaded and if the model scivision_usable = True.
+If not scivision_usable, check if the model url is accessible, if return 200 reponse log as passed.
+Otherwise, load the model using load_pretrained_model and log as passed if successful.
+"""
+
+import logging
+import json
+import requests
+from datetime import datetime
+
+from scivision import default_catalog, load_pretrained_model
+from tqdm import tqdm
+
+# Create Logger
+logger = logging.getLogger(__name__)
+# Set log level
+logger.setLevel(logging.INFO)
+file_handler = logging.FileHandler('check_models.log')
+formatter = logging.Formatter('%(asctime)s : %(levelname)s : %(name)s : %(message)s')
+file_handler.setFormatter(formatter)
+logger.addHandler(file_handler)
+
+
+def check_models():
+    """
+    For each model in the catalog, check that the URL can be loaded
+    with `load_pretrained_model`.
+
+    Returns a json report
+
+    Model information includes
+    - name
+    - tasks
+    - pkg_url
+    - url
+    - scivision_usable
+    """
+    # Load model catalog
+    model_catalog = default_catalog.models.to_dataframe()
+    # Load model using model and record response
+    rows = {}
+    for model in tqdm(model_catalog.itertuples()):
+        name = model.name
+        yml_path = model.url
+        print(f'\nValidating: {name}')
+        if not model.scivision_usable:
+            response = requests.get(model.url)
+            row_data = {
+                'url': model.url,
+                'check_result': 'Pass' if response.status_code == 200 else 'Fail',
+                'response': f'Scivision_usable = False but model url response: {response.status_code}',
+            }
+            print(f'Model is not scivision usable but model url response: {response.status_code}')
+        else:
+            try:
+                if not yml_path.endswith((".yml", ".yaml",)):
+                    load_pretrained_model(yml_path, allow_install=True)
+                    print('Model Loaded Successfully')
+                    check_result = "Pass"
+                    response = None
+            except Exception as e:
+                print(e)
+                logger.exception("Automated Model Check has failed!")
+                check_result = "Fail"
+                response = logger.error(e, exc_info=True)
+            # Convert response to JSON serializable format
+            if response is not None:
+                response = str(response)
+            row_data = {
+                'url': yml_path,
+                'check_result': check_result,
+                'response': response,
+            }
+
+        rows.update({model.name: row_data})
+
+    automated_checks_report = {
+        "time": datetime.now().isoformat(),
+        "report": rows
+    }
+    automated_checks_report_json = json.dumps(automated_checks_report)
+
+    return automated_checks_report_json
+
+
+def entry_point():
+    """This is the entry point for the 'scivision-check-models'
+    command.
+    """
+    automated_checks_report_json = check_models()
+
+    with open('check_models.js', 'w') as f:
+        print('// This file was generated automatically by check_models.py', file=f)
+        print(f'var global_CheckModelReport = {automated_checks_report_json};', file=f)
+        # ^^^ requires changes to ModelTable.jsx similar to DataTable.jsx
diff --git a/src/scivision/catalog/data/models.json b/src/scivision/catalog/data/models.json
index 69f4cfdf..7b8ee286 100644
--- a/src/scivision/catalog/data/models.json
+++ b/src/scivision/catalog/data/models.json
@@ -97,7 +97,7 @@
       "tasks": [
         "classification"
       ],
-      "url": "https://github.com/alan-turing-institute/plankton-cefas-scivision",
+      "url": "https://github.com/alan-turing-institute/plankton-cefas-scivision/tree/main/",
       "pkg_url": "git+https://github.com/alan-turing-institute/plankton-cefas-scivision@main",
       "scivision_usable": true,
       "institution": [
@@ -118,7 +118,7 @@
       "tasks": [
         "segmentation"
       ],
-      "url": "https://github.com/MartinSJRogers/VEdge_Detector_scivision",
+      "url": "https://github.com/MartinSJRogers/VEdge_Detector_scivision/blob/main/",
       "pkg_url": "git+https://github.com/MartinSJRogers/VEdge_Detector_scivision@main",
       "scivision_usable": true,
       "institution": [
@@ -176,7 +176,7 @@
       "tasks": [
         "segmentation"
       ],
-      "url": "https://github.com/alan-turing-institute/scivision_huggingface_segmentation",
+      "url": "https://github.com/alan-turing-institute/scivision_huggingface_segmentation/tree/main",
       "pkg_url": "git+https://github.com/alan-turing-institute/scivision_huggingface_segmentation.git@main",
       "scivision_usable": true,
       "institution": [
@@ -191,7 +191,7 @@
       "tasks": [
         "object-detection"
       ],
-      "url": "https://github.com/alan-turing-institute/scivision_huggingface_objectdetection",
+      "url": "https://github.com/alan-turing-institute/scivision_huggingface_objectdetection/tree/main",
       "pkg_url": "git+https://github.com/alan-turing-institute/scivision_huggingface_objectdetection.git@main",
       "scivision_usable": true,
       "institution": [
@@ -227,7 +227,7 @@
       "tasks": [
         "classification"
       ],
-      "url": "https://github.com/alan-turing-institute/flower_classification_model",
+      "url": "https://github.com/alan-turing-institute/flower_classification_model/tree/main",
       "pkg_url": "git+https://github.com/alan-turing-institute/flower_classification_model.git@main",
       "scivision_usable": true,
       "institution": [
@@ -244,7 +244,7 @@
       "tasks": [
         "classification"
       ],
-      "url": "https://github.com/alan-turing-institute/butterfly_classification_model",
+      "url": "https://github.com/alan-turing-institute/butterfly_classification_model/tree/main/",
       "pkg_url": "git+https://github.com/alan-turing-institute/butterfly_classification_model.git@main",
       "scivision_usable": true,
       "institution": [
diff --git a/src/scivision/io/installer.py b/src/scivision/io/installer.py
index cdc78e9d..164b4bb2 100644
--- a/src/scivision/io/installer.py
+++ b/src/scivision/io/installer.py
@@ -30,10 +30,12 @@ def _install(package, pip_install_args=None):
 
     if pip_install_args is None:
         pip_install_args = []
-
-    subprocess.check_call(
-        [sys.executable, "-m", "pip", "install", *pip_install_args, package]
-    )
+    try:
+        subprocess.check_call(
+            [sys.executable, "-m", "pip", "install", *pip_install_args, package],
+        )
+    except subprocess.CalledProcessError as e:
+        raise RuntimeError(f'command {e.cmd} return with error code {e.returncode}: {e.output}')
 
 
 def install_package(
diff --git a/src/scivision/io/reader.py b/src/scivision/io/reader.py
index 18d09008..97f09881 100644
--- a/src/scivision/io/reader.py
+++ b/src/scivision/io/reader.py
@@ -11,7 +11,6 @@
 from ..koala import koala
 from .installer import install_package
 from .wrapper import PretrainedModel, Datasource
-
 import warnings
 import xarray
 
@@ -157,7 +156,7 @@ def load_pretrained_model(
             ".yaml",
         )
     ):
-        path = path + ".scivision/model.yml"
+        path = f"{path}/.scivision/model.yml"
     # fsspec will throw an error if the path does not exist
     file = fsspec.open(path)
     # parse the config file:
@@ -165,6 +164,7 @@ def load_pretrained_model(
         stream = config_file.read()
         config = yaml.safe_load(stream)
     config_list = _get_model_configs(config, load_multiple, model_selection)
+    print(config_list)
     loaded_models = []
     for config in config_list:
         # make sure a model at least has an input to the function