Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions medcat-v2/medcat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
from importlib.metadata import version as __version_method
from importlib.metadata import PackageNotFoundError as __PackageNotFoundError

from medcat.utils.check_for_updates import (
check_for_updates as __check_for_updates)

try:
__version__ = __version_method("medcat")
except __PackageNotFoundError:
__version__ = "0.0.0-dev"


# NOTE: this will not always actually do the check
# it will only (by default) check once a week
__check_for_updates("medcat", __version__)
190 changes: 190 additions & 0 deletions medcat-v2/medcat/utils/check_for_updates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
from typing import TypedDict
import json
import os
import time
import urllib.request
from pathlib import Path
from packaging.version import Version, InvalidVersion
import logging

from medcat.utils.defaults import (
MEDCAT_DISABLE_VERSION_CHECK_ENVIRON, MEDCAT_PYPI_URL_ENVIRON,
MEDCAT_MINOR_UPDATE_THRESHOLD_ENVIRON,
MEDCAT_PATCH_UPDATE_THRESHOLD_ENVIRON,
MEDCAT_VERSION_UPDATE_LOG_LEVEL_ENVIRON,
MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL_ENVIRON,
)
from medcat.utils.defaults import (
DEFAULT_PYPI_URL, DEFAULT_MINOR_FOR_INFO, DEFAULT_PATCH_FOR_INFO,
DEFAULT_VERSION_INFO_LEVEL, DEFAULT_VERSION_INFO_YANKED_LEVEL)


DEFAULT_CACHE_PATH = Path.home() / ".cache" / "medcat_version.json"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally - I'd rather make a medcat (cogstack?) folder in .cache, just the folder has so much use

# 1 week
DEFAULT_CHECK_INTERVAL = 7 * 24 * 3600


logger = logging.getLogger(__name__)


def log_info(msg: str, *args, yanked: bool = False, **kwargs):
if yanked:
lvl = os.environ.get(MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL_ENVIRON,
DEFAULT_VERSION_INFO_YANKED_LEVEL).upper()
else:
lvl = os.environ.get(MEDCAT_VERSION_UPDATE_LOG_LEVEL_ENVIRON,
DEFAULT_VERSION_INFO_LEVEL).upper()
_level_map = {
"NOTSET": logging.NOTSET,
"DEBUG": logging.DEBUG,
"INFO": logging.INFO,
"WARN": logging.WARNING,
"WARNING": logging.WARNING,
"ERROR": logging.ERROR,
"CRITICAL": logging.CRITICAL,
"FATAL": logging.FATAL,
}
level = _level_map.get(lvl, logging.INFO)
logger.log(level, msg, *args, **kwargs)


def _get_env_int(name: str, default: int) -> int:
try:
return int(os.getenv(name, default))
except ValueError:
return default


def _should_check(cache_path: Path, check_interval: int) -> bool:
if not cache_path.exists():
return True
try:
with open(cache_path) as f:
last_check = json.load(f)["last_check"]
return time.time() - last_check > check_interval
except Exception:
return True


class UpdateCheckConfig(TypedDict):
pkg_name: str
cache_path: Path
url: str
enabled: bool
minor_threshold: int
patch_threshold: int
timeout: float
check_interval: int


def _get_config(pkg_name: str) -> UpdateCheckConfig:
if os.getenv(MEDCAT_DISABLE_VERSION_CHECK_ENVIRON):
return {
"pkg_name": pkg_name,
"enabled": False,
"cache_path": Path("."),
"url": "-1",
"minor_threshold": -1,
"patch_threshold": -1,
"timeout": -1.0,
"check_interval": -1,
}
base_url = os.getenv(MEDCAT_PYPI_URL_ENVIRON, DEFAULT_PYPI_URL).rstrip("/")
url = f"{base_url}/{pkg_name}/json"
minor_thresh = _get_env_int(MEDCAT_MINOR_UPDATE_THRESHOLD_ENVIRON,
DEFAULT_MINOR_FOR_INFO)
patch_thresh = _get_env_int(MEDCAT_PATCH_UPDATE_THRESHOLD_ENVIRON,
DEFAULT_PATCH_FOR_INFO)
# TODO: add env variables for timeout and default cache?
return {
"pkg_name": pkg_name,
"enabled": True,
"cache_path": DEFAULT_CACHE_PATH,
"url": url,
"minor_threshold": minor_thresh,
"patch_threshold": patch_thresh,
"timeout": 3.0,
"check_interval": DEFAULT_CHECK_INTERVAL,
}


def check_for_updates(pkg_name: str, current_version: str):
cnf = _get_config(pkg_name)
if not cnf["enabled"]:
return

if not _should_check(cnf["cache_path"], cnf["check_interval"]):
return

try:
with urllib.request.urlopen(cnf["url"],
timeout=cnf["timeout"]) as r:
data = json.load(r)
releases = {
v: files for v, files in data.get("releases", {}).items()
if files # skip empty entries
}
except Exception as e:
log_info("Unable to check for update", exc_info=e)
return

# cache update time
cnf["cache_path"].parent.mkdir(parents=True, exist_ok=True)
with open(cnf["cache_path"], "w") as f:
json.dump({"last_check": time.time()}, f)

_do_check(cnf, releases, current_version)


def _do_check(cnf: UpdateCheckConfig, releases: dict,
current_version: str):
try:
current = Version(current_version)
except InvalidVersion:
return
pkg_name = cnf["pkg_name"]
patch_thresh = cnf["patch_threshold"]
minor_thresh = cnf["minor_threshold"]

newer_minors, newer_patches = [], []
yanked = False
for v_str, files in releases.items():
try:
v = Version(v_str)
except InvalidVersion:
continue
if v <= current:
continue
if any(f.get("yanked") for f in files):
continue # don’t count yanked releases in comparisons
if v.major == current.major and v.minor == current.minor:
newer_patches.append(v)
elif v.major == current.major and v.minor > current.minor:
newer_minors.append(v)

# detect if current version is yanked
for f in releases.get(current_version, []):
if f.get("yanked"):
reason = f.get("yanked_reason", "")
msg = (f"⚠️ You are using a yanked version ({pkg_name} "
f"{current_version}). {reason}")
log_info(msg, yanked=True)
yanked = True
break

# report newer versions
if len(newer_patches) >= patch_thresh:
latest_patch = max(newer_patches)
msg = (f"ℹ️ {pkg_name} {current_version}{latest_patch} "
f"({len(newer_patches)} newer patch releases available)")
log_info(msg)
elif len(newer_minors) >= minor_thresh:
latest_minor = max(newer_minors)
msg = (f"⚠️ {pkg_name} {current_version}{latest_minor} "
f"({len(newer_minors)} newer minor releases available)")
log_info(msg)

if yanked and not (newer_minors or newer_patches):
msg = (f"⚠️ Your installed version {current_version} was yanked and "
"has no newer stable releases yet.")
log_info(msg, yanked=True)
14 changes: 14 additions & 0 deletions medcat-v2/medcat/utils/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,20 @@
COMPONENTS_FOLDER = "saved_components"
AVOID_LEGACY_CONVERSION_ENVIRON = "MEDCAT_AVOID_LECACY_CONVERSION"

# version check
MEDCAT_DISABLE_VERSION_CHECK_ENVIRON = "MEDCAT_DISABLE_VERSION_CHECK"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add this to the docs as well?

The whole .md table in this PR would be great to add there.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added it to the README.

However, there's a separate docs/main.md that seems to be (again) limping behind the README, but (mostly) mirrors it.
I think we should find a way to have it just automatically mirror the README.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is pretty pedantic - but can you enforce true/false here instead? Just feels like it will save the question "I set it to False but it is somehow disabled"

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I'd make it the same as AVOID_LEGACY_CONVERSION_ENVIRON just above

MEDCAT_PYPI_URL_ENVIRON = "MEDCAT_PYPI_URL"
DEFAULT_PYPI_URL = "https://pypi.org/pypi"
MEDCAT_MINOR_UPDATE_THRESHOLD_ENVIRON = "MEDCAT_MINOR_UPDATE_THRESHOLD"
DEFAULT_MINOR_FOR_INFO = 3
MEDCAT_PATCH_UPDATE_THRESHOLD_ENVIRON = "MEDCAT_PATCH_UPDATE_THRESHOLD"
DEFAULT_PATCH_FOR_INFO = 3
MEDCAT_VERSION_UPDATE_LOG_LEVEL_ENVIRON = "MEDCAT_VERSION_UPDATE_LOG_LEVEL"
DEFAULT_VERSION_INFO_LEVEL = "INFO"
MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL_ENVIRON = (
"MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL")
DEFAULT_VERSION_INFO_YANKED_LEVEL = "WARNING"


def avoid_legacy_conversion() -> bool:
return os.environ.get(
Expand Down
148 changes: 148 additions & 0 deletions medcat-v2/tests/utils/test_check_for_updates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import io
import json
import logging
import time
import unittest
from unittest.mock import patch
from pathlib import Path
from medcat.utils import check_for_updates


class TestVersionCheck(unittest.TestCase):

def setUp(self):
self.pkg = "medcat"
self.current_version = "1.3.0"
self.cache_path = Path("/tmp/fake_cache.json")

def tearDown(self):
if self.cache_path.exists():
self.cache_path.unlink()

# --- helpers ---
def _make_releases(self, versions, yanked=None):
"""Return a fake releases dict."""
yanked = yanked or {}
return {
v: [{"yanked": yanked.get(v, False)}]
for v in versions
}

# 1. runs if cache missing
@patch("medcat.utils.check_for_updates._do_check")
@patch("medcat.utils.check_for_updates.urllib.request.urlopen")
def test_runs_without_cache(self, mock_urlopen, mock_do_check):
data = {"releases": self._make_releases(["1.3.1", "1.3.2", "1.4.0"])}
mock_urlopen.return_value.__enter__.return_value = io.StringIO(
json.dumps(data))
with patch("medcat.utils.check_for_updates.DEFAULT_CACHE_PATH",
self.cache_path):
check_for_updates.check_for_updates(self.pkg, self.current_version)
mock_do_check.assert_called_once()

# 2. runs if cache interval expired
@patch("medcat.utils.check_for_updates._do_check")
@patch("medcat.utils.check_for_updates.urllib.request.urlopen")
def test_runs_if_interval_expired(self, mock_urlopen, mock_do_check):
data = {"releases": self._make_releases(["1.3.1"])}
mock_urlopen.return_value.__enter__.return_value = io.StringIO(
json.dumps(data))
# create old cache
self.cache_path.parent.mkdir(parents=True, exist_ok=True)
with open(self.cache_path, "w") as f:
json.dump({"last_check": time.time() - (
check_for_updates.DEFAULT_CHECK_INTERVAL + 1)}, f)
with patch("medcat.utils.check_for_updates.DEFAULT_CACHE_PATH",
self.cache_path):
check_for_updates.check_for_updates(self.pkg, self.current_version)
mock_do_check.assert_called_once()

# 3. doesn't run if cache still valid
@patch("medcat.utils.check_for_updates._do_check")
def test_does_not_run_if_interval_not_expired(self, mock_do_check):
# recent cache
self.cache_path.parent.mkdir(parents=True, exist_ok=True)
with open(self.cache_path, "w") as f:
json.dump({"last_check": time.time()}, f)
with patch("medcat.utils.check_for_updates.DEFAULT_CACHE_PATH",
self.cache_path):
check_for_updates.check_for_updates(self.pkg, self.current_version)
mock_do_check.assert_not_called()

# 4. info for 3+ patch versions
@patch("medcat.utils.check_for_updates.log_info")
def test_patch_threshold_triggered(self, mock_log):
releases = self._make_releases(["1.3.1", "1.3.2", "1.3.3", "1.3.4"])
cnf = {
"pkg_name": self.pkg,
"minor_threshold": 99,
"patch_threshold": 3,
}
cnf.update(enabled=True, cache_path=self.cache_path, url="",
timeout=0, check_interval=0)
check_for_updates._do_check(cnf, releases, self.current_version)
self.assertTrue(any("patch releases available" in c[0][0]
for c in mock_log.call_args_list))

# 5. info for 3+ minor versions
@patch("medcat.utils.check_for_updates.log_info")
def test_minor_threshold_triggered(self, mock_log):
releases = self._make_releases(["1.4.0", "1.5.0", "1.6.0", "1.7.0"])
cnf = {
"pkg_name": self.pkg,
"minor_threshold": 3,
"patch_threshold": 99,
}
cnf.update(enabled=True, cache_path=self.cache_path, url="",
timeout=0, check_interval=0)
check_for_updates._do_check(cnf, releases, self.current_version)
self.assertTrue(any("minor releases available" in c[0][0]
for c in mock_log.call_args_list))

# 6. env variable changes log level (regular)
@patch.dict("os.environ", {
"MEDCAT_VERSION_UPDATE_LOG_LEVEL": "ERROR"})
def test_env_log_level_regular(self):
msg = "Test"
with patch.object(check_for_updates.logger, "log") as mock_log:
check_for_updates.log_info(msg)
self.assertEqual(mock_log.call_args[0][0], logging.ERROR)

# 7. env variable changes log level (yanked)
@patch.dict("os.environ", {
"MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL": "CRITICAL"})
def test_env_log_level_yanked(self):
msg = "Yanked"
with patch.object(check_for_updates.logger, "log") as mock_log:
check_for_updates.log_info(msg, yanked=True)
self.assertEqual(mock_log.call_args[0][0], logging.CRITICAL)

# 8. yanked version triggers warning
@patch("medcat.utils.check_for_updates.log_info")
def test_yanked_version_logs(self, mock_log):
releases = self._make_releases(["1.3.0"], yanked={"1.3.0": True})
cnf = {
"pkg_name": self.pkg,
"minor_threshold": 99,
"patch_threshold": 99,
}
cnf.update(enabled=True, cache_path=self.cache_path, url="",
timeout=0, check_interval=0)
check_for_updates._do_check(cnf, releases, self.current_version)
self.assertTrue(any("yanked version" in c[0][0]
for c in mock_log.call_args_list))

# 9. invalid current version handled gracefully
def test_invalid_current_version_does_not_raise(self):
releases = self._make_releases(["1.2.0"])
cnf = {
"pkg_name": self.pkg,
"minor_threshold": 99,
"patch_threshold": 99,
}
cnf.update(enabled=True, cache_path=self.cache_path, url="",
timeout=0, check_interval=0)
try:
check_for_updates._do_check(cnf, releases, "not_a_version")
except Exception as e:
self.fail(f"Should not raise, but got {e!r}")
Loading