From e4a1cbed3dc2ba0db10649fba7e4b8700c4ccfcd Mon Sep 17 00:00:00 2001 From: "P.J. Finlay" Date: Sun, 18 Aug 2024 09:15:58 -0400 Subject: [PATCH] Expand configuration management system This commit copies some of the improvements from the v2 settings system to the master branch. The most important change is allowing settings to be configured from a JSON file. --- argostranslate/settings.py | 189 ++++++++++++++++++++++++++++++------- 1 file changed, 157 insertions(+), 32 deletions(-) diff --git a/argostranslate/settings.py b/argostranslate/settings.py index 4a756705..fefff78e 100644 --- a/argostranslate/settings.py +++ b/argostranslate/settings.py @@ -1,14 +1,42 @@ +import json import os from enum import Enum from pathlib import Path +from typing import Any, Dict -TRUE_VALUES = ["1", "TRUE", "True", "true"] +""" +Argos Translate can be configured using either environment variables or json file -debug = os.getenv("ARGOS_DEBUG") in TRUE_VALUES +### Environment variables +``` +export ARGOS_DEBUG="0" +export ARGOS_PACKAGE_INDEX="https://raw.githubusercontent.com/argosopentech/argospm-index/main/" +export ARGOS_PACKAGES_DIR="/home//.local/share/argos-translate/packages/" +export ARGOS_DEVICE_TYPE="cpu" -dev_mode = os.getenv("ARGOS_DEV_MODE") in TRUE_VALUES +``` + +### JSON + +# $HOME/.config/argos-translate/settings.json +``` +{ + "ARGOS_DEBUG": "0", + "ARGOS_PACKAGES_INDEX": "https://raw.githubusercontent.com/argosopentech/argospm-index/main/", + "ARGOS_PACKAGE_DIR": "/home//.local/share/argos-translate/packages/", + "ARGOS_DEVICE_TYPE": "cpu" +} +``` +""" + +""" +Importing argostranslate.settings will create the Argos Translate data directory (~/.local/share/argos-translate), +the Argos Translate config directory (~/.config/argos-translate), +and the Argos Translate cache directory (~/.local/cache/argos-translate) if they do not already exist. +""" home_dir = Path.home() + if "SNAP" in os.environ: home_dir = Path(os.environ["SNAP_USER_DATA"]) @@ -22,8 +50,11 @@ legacy_package_data_dir = Path( os.getenv("ARGOS_TRANSLATE_PACKAGES_DIR", default=data_dir / "packages") ) -package_data_dir = Path(os.getenv("ARGOS_PACKAGES_DIR", legacy_package_data_dir)) -os.makedirs(package_data_dir, exist_ok=True) + +config_dir = ( + Path(os.getenv("XDG_CONFIG_HOME", default=home_dir / ".config")) / "argos-translate" +) +os.makedirs(config_dir, exist_ok=True) cache_dir = ( Path(os.getenv("XDG_CACHE_HOME", default=home_dir / ".local" / "cache")) @@ -31,6 +62,85 @@ ) os.makedirs(cache_dir, exist_ok=True) + +downloads_dir = cache_dir / "downloads" +os.makedirs(downloads_dir, exist_ok=True) + +settings_file = config_dir / "settings.json" + + +def load_settings_dict() -> Dict[str, Any]: + settings_dict = dict() + if settings_file.exists(): + try: + with open(settings_file, "r") as settings_file_data: + settings_dict = json.load(settings_file_data) + assert isinstance( + settings_dict, dict + ), "settings.json should contain a dictionary" + except FileNotFoundError as e: + print(f"{settings_file} not found : FileNotFoundError {e}") + except json.JSONDecodeError as e: + print(f"Error decoding {settings_file}: JSONDecodeError {e}") + return settings_dict + + +def get_setting(key: str, default=None): + """Gets a setting from either environment variables or settings.json + + Settings from environment variables take precedence over settings.json + + Args: + key (str): Key value + default: The default setting value. Defaults to None. + + Returns: + The setting value + """ + value_from_environment = os.getenv(key) + value_from_file = load_settings_dict().get(key) + if value_from_environment is not None: + return value_from_environment + else: + if value_from_file is not None: + return value_from_file + return default + + +def set_setting(key: str, value): + """Sets a setting in the settings.json file. + + Args: + key (str): The key to set. + value: The value to set. + """ + settings = load_settings_dict() + settings[key] = value + with open(settings_file, "w") as settings_file_data: + json.dump(settings, settings_file_data, indent=4) + + +TRUE_VALUES = ["1", "TRUE", "True", "true", 1, True] + + +debug = get_setting("ARGOS_DEBUG") in TRUE_VALUES + +dev_mode = get_setting("ARGOS_DEV_MODE") in TRUE_VALUES + +package_index = get_setting( + "ARGOS_PACKAGE_INDEX", + default="https://raw.githubusercontent.com/argosopentech/argospm-index/main/", +) + +package_data_dir = Path( + get_setting("ARGOS_PACKAGES_DIR", default=data_dir / "packages") +) +os.makedirs(package_data_dir, exist_ok=True) + + +downloads_dir = cache_dir / "downloads" +os.makedirs(downloads_dir, exist_ok=True) + if not dev_mode: remote_repo = os.getenv( "ARGOS_PACKAGE_INDEX", @@ -41,12 +151,22 @@ "ARGOS_PACKAGE_INDEX", default="https://raw.githubusercontent.com/argosopentech/argospm-index-dev/main", ) -remote_package_index = remote_repo + "/index.json" + +remote_package_index = package_index + "index.json" + +local_package_index = data_dir / "index.json" experimental_enabled = os.getenv("ARGOS_EXPERIMENTAL_ENABLED") in TRUE_VALUES stanza_available = os.getenv("ARGOS_STANZA_AVAILABLE") in (TRUE_VALUES + [None]) +# Supported values: "cpu" and "cuda" +device = get_setting("ARGOS_DEVICE_TYPE", "cpu") + +# https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html +inter_threads = int(get_setting("ARGOS_INTER_THREADS", "1")) +intra_threads = int(get_setting("ARGOS_INTRA_THREADS", "0")) + class ModelProvider(Enum): OPENNMT = 0 @@ -59,15 +179,39 @@ class ModelProvider(Enum): "LIBRETRANSLATE": ModelProvider.LIBRETRANSLATE, "OPENAI": ModelProvider.OPENAI, } -model_provider = os.getenv("ARGOS_MODEL_PROVIDER", "OPENNMT") -assert model_provider in model_mapping.keys() -model_provider = model_mapping[model_provider] +model_provider = model_mapping[get_setting("ARGOS_MODEL_PROVIDER", default="OPENNMT")] -libretranslate_api_key = os.getenv("LIBRETRANSLATE_API_KEY", None) -openai_api_key = os.getenv("OPENAI_API_KEY", None) -downloads_dir = cache_dir / "downloads" -os.makedirs(downloads_dir, exist_ok=True) +# Sentence boundary detection +class ChunkType(Enum): + DEFAULT = 0 + ARGOSTRANSLATE = 1 + NONE = 2 # No sentence splitting + + +chunk_type_mapping = { + "DEFAULT": ChunkType.DEFAULT, + "ARGOSTRANSLATE": ChunkType.ARGOSTRANSLATE, + "NONE": ChunkType.NONE, +} +chunk_type = chunk_type_mapping[get_setting("ARGOS_CHUNK_TYPE", default="DEFAULT")] +if chunk_type == ChunkType.DEFAULT: + chunk_type = ChunkType.ARGOSTRANSLATE + + +libretranslate_api_key = get_setting("LIBRETRANSLATE_API_KEY", None) +openai_api_key = get_setting("OPENAI_API_KEY", None) + + +argos_translate_about_text = ( + "Argos Translate is an open source neural machine " + + "translation application created by Argos Open " + + "Technologies, LLC (www.argosopentech.com). " +) + +# Fix Intel bug +# https://github.com/argosopentech/argos-translate/issues/40 +os.environ["KMP_DUPLICATE_LIB_OK"] = "True" # Will search all of these directories for packages package_dirs = [package_data_dir] @@ -85,22 +229,3 @@ class ModelProvider(Enum): for package_dir in content_snap_packages.iterdir(): if package_dir.is_dir(): package_dirs.append(package_dir) - -local_package_index = cache_dir / "index.json" - -about_text = ( - "Argos Translate is an open source neural machine " - "translation application created by Argos Open " - "Technologies, LLC (www.argosopentech.com). " -) - -# Fix Intel bug -# https://github.com/argosopentech/argos-translate/issues/40 -os.environ["KMP_DUPLICATE_LIB_OK"] = "True" - -# Supported values: cpu and cuda -device = os.environ.get("ARGOS_DEVICE_TYPE", "cpu") - -# https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html -inter_threads = int(os.environ.get("ARGOS_INTER_THREADS", "1")) -intra_threads = int(os.environ.get("ARGOS_INTRA_THREADS", "0"))