CogStack · vladd-bit · Sep 12, 2025 · Sep 12, 2025 · Sep 12, 2025 · Sep 12, 2025
diff --git a/medcat-service/env/app.env b/medcat-service/env/app.env
@@ -36,6 +36,8 @@ SERVER_PORT=5000
 SERVER_WORKERS=1
 SERVER_WORKER_TIMEOUT=300
 SERVER_THREADS=1
+SERVER_GUNICORN_MAX_REQUESTS=1000
+SERVER_GUNICORN_MAX_REQUESTS_JITTER=50
 
 # set the number of torch threads, this should be used ONLY if you are using CPUs and the default image
 # set to -1 or 0 if you are using GPU

diff --git a/medcat-service/env/app_deid.env b/medcat-service/env/app_deid.env
@@ -21,7 +21,7 @@ APP_MODEL_REL_PATH_LIST=
 # MedCAT Model Pack path
 # IMPORTANT: if this parameter has value IT WILL BE LOADED FIRST OVER EVERYTHING ELSE (CDB, Vocab, MetaCATs, etc.) declared above.
 # Respect the same paths as above : /cat/models/model_pack_name.zip
-APP_MEDCAT_MODEL_PACK=
+APP_MEDCAT_MODEL_PACK=/cat/models/medcat_v2_deid_model_691c3f6a6e5400e7_686dfbf9c3c664e0.zip
 
 # optionally, an filter the reported concepts by CUIs
 # APP_MODEL_CUI_FILTER_PATH=/cat/models/cui_filter.txt
@@ -36,6 +36,8 @@ SERVER_PORT=5000
 SERVER_WORKERS=1
 SERVER_WORKER_TIMEOUT=300
 SERVER_THREADS=1
+SERVER_GUNICORN_MAX_REQUESTS=1000
+SERVER_GUNICORN_MAX_REQUESTS_JITTER=50
 
 # set the number of torch threads, this should be used ONLY if you are using CPUs and the default image
 # set to -1 or 0 if you are using GPU

diff --git a/medcat-service/medcat_service/dependencies.py b/medcat-service/medcat_service/dependencies.py
@@ -10,14 +10,14 @@
 log = logging.getLogger(__name__)
 
 
-@lru_cache
+@lru_cache(maxsize=1)
 def get_settings() -> Settings:
     settings = Settings()
     log.debug("Using settings: %s", settings)
     return settings
 
 
-@lru_cache
+@lru_cache(maxsize=1)
 def get_medcat_processor(settings: Annotated[Settings, Depends(get_settings)]) -> MedCatProcessor:
     log.debug("Creating new Medcat Processsor using settings: %s", settings)
     return MedCatProcessor(settings)

diff --git a/medcat-service/start_service_production.sh b/medcat-service/start_service_production.sh
@@ -33,6 +33,15 @@ if [ -z ${SERVER_WORKER_TIMEOUT+x} ]; then
   echo "SERVER_WORKER_TIMEOUT is unset -- setting to default (sec): $SERVER_WORKER_TIMEOUT";
 fi
 
+if [ -z ${SERVER_GUNICORN_MAX_REQUESTS+x} ]; then
+  SERVER_WORKER_TIMSERVER_GUNICORN_MAX_REQUESTSEOUT=3600;
+  echo "SERVER_GUNICORN_MAX_REQUESTS is unset -- setting to default (sec): $SERVER_GUNICORN_MAX_REQUESTS";
+fi
+
+if [ -z ${SERVER_GUNICORN_MAX_REQUESTS_JITTER+x} ]; then
+  SERVER_GUNICORN_MAX_REQUESTS_JITTER=50;
+  echo "SERVER_GUNICORN_MAX_REQUESTS_JITTER is unset -- setting to default (sec): $SERVER_GUNICORN_MAX_REQUESTS_JITTER";
+fi
 
 SERVER_ACCESS_LOG_FORMAT="%(t)s [ACCESS] %(h)s \"%(r)s\" %(s)s \"%(f)s\" \"%(a)s\""
 
@@ -50,5 +59,7 @@ exec gunicorn \
   --error-logfile=- \
   --log-level info \
   --config /cat/config.py \
+  --max-requests="$SERVER_GUNICORN_MAX_REQUESTS" \
+  --max-requests-jitter="$SERVER_GUNICORN_MAX_REQUESTS_JITTER" \
   --worker-class uvicorn.workers.UvicornWorker \
   medcat_service.main:app