diff --git a/deploy/doaj_gunicorn_config.py b/deploy/doaj_gunicorn_config.py index a08dd6ef62..fdeeb5b412 100644 --- a/deploy/doaj_gunicorn_config.py +++ b/deploy/doaj_gunicorn_config.py @@ -3,6 +3,12 @@ bind = "0.0.0.0:5050" workers = multiprocessing.cpu_count() * 6 + 1 proc_name = 'doaj' + +# Preload the app before forking workers to prevent race conditions +# during index initialization. This ensures initialise_index() runs +# only once instead of once per worker. +preload_app = True + max_requests = 1000 # The maximum jitter to add to the max_requests setting. diff --git a/deploy/doaj_test_gunicorn_config.py b/deploy/doaj_test_gunicorn_config.py index 4c80d42f9e..748c2918e3 100644 --- a/deploy/doaj_test_gunicorn_config.py +++ b/deploy/doaj_test_gunicorn_config.py @@ -3,6 +3,12 @@ bind = "0.0.0.0:5050" workers = multiprocessing.cpu_count() * 3 + 1 proc_name = 'doaj (test)' + +# Preload the app before forking workers to prevent race conditions +# during index initialization. This ensures initialise_index() runs +# only once instead of once per worker. +preload_app = True + max_requests = 1000 # The maximum jitter to add to the max_requests setting. diff --git a/deploy/supervisor/production-background/huey-events.conf b/deploy/supervisor/production-background/huey-events.conf index 2361dcc751..7fd73ee8d0 100644 --- a/deploy/supervisor/production-background/huey-events.conf +++ b/deploy/supervisor/production-background/huey-events.conf @@ -1,6 +1,7 @@ [program:huey-events] command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_events_queue.events_queue -environment= DOAJENV=production +environment= DOAJENV=production,INITIALISE_INDEX=False + user=cloo directory=/home/cloo/doaj stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log diff --git a/deploy/supervisor/production-background/huey-long-running.conf b/deploy/supervisor/production-background/huey-long-running.conf deleted file mode 100644 index 4e23fdbc7d..0000000000 --- a/deploy/supervisor/production-background/huey-long-running.conf +++ /dev/null @@ -1,9 +0,0 @@ -[program:huey-long-running] -command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_long_running.long_running -environment= DOAJENV=production -user=cloo -directory=/home/cloo/doaj -stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log -stderr_logfile=/var/log/supervisor/%(program_name)s-error.log -autostart=true -autorestart=true diff --git a/deploy/supervisor/production-background/huey-main.conf b/deploy/supervisor/production-background/huey-main.conf deleted file mode 100644 index e02c498351..0000000000 --- a/deploy/supervisor/production-background/huey-main.conf +++ /dev/null @@ -1,9 +0,0 @@ -[program:huey-main] -command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_main_queue.main_queue -environment= DOAJENV=production -user=cloo -directory=/home/cloo/doaj -stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log -stderr_logfile=/var/log/supervisor/%(program_name)s-error.log -autostart=true -autorestart=true diff --git a/deploy/supervisor/production-background/huey-scheduled-long.conf b/deploy/supervisor/production-background/huey-scheduled-long.conf index 705fe66901..f0c11e4aec 100644 --- a/deploy/supervisor/production-background/huey-scheduled-long.conf +++ b/deploy/supervisor/production-background/huey-scheduled-long.conf @@ -1,6 +1,6 @@ [program:huey-scheduled-long] command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_scheduled_long_queue.scheduled_long_queue -environment= DOAJENV=production +environment= DOAJENV=production,INITIALISE_INDEX=False user=cloo directory=/home/cloo/doaj stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log diff --git a/deploy/supervisor/production-background/huey-scheduled-short.conf b/deploy/supervisor/production-background/huey-scheduled-short.conf index a38439d834..f2620188a1 100644 --- a/deploy/supervisor/production-background/huey-scheduled-short.conf +++ b/deploy/supervisor/production-background/huey-scheduled-short.conf @@ -1,6 +1,6 @@ [program:huey-scheduled-short] command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_scheduled_short_queue.scheduled_short_queue -environment= DOAJENV=production +environment= DOAJENV=production,INITIALISE_INDEX=False user=cloo directory=/home/cloo/doaj stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log diff --git a/deploy/supervisor/test/huey-events.conf b/deploy/supervisor/test/huey-events.conf index bcb47109e2..fd005439e9 100644 --- a/deploy/supervisor/test/huey-events.conf +++ b/deploy/supervisor/test/huey-events.conf @@ -1,6 +1,6 @@ [program:huey-events] command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_events_queue.events_queue -environment= DOAJENV=test +environment= DOAJENV=test,INITIALISE_INDEX=False user=cloo directory=/home/cloo/doaj stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log diff --git a/deploy/supervisor/test/huey-long-running.conf b/deploy/supervisor/test/huey-long-running.conf deleted file mode 100644 index 081d53a767..0000000000 --- a/deploy/supervisor/test/huey-long-running.conf +++ /dev/null @@ -1,9 +0,0 @@ -[program:huey-long-running] -command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_long_running.long_running -environment= DOAJENV=test -user=cloo -directory=/home/cloo/doaj -stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log -stderr_logfile=/var/log/supervisor/%(program_name)s-error.log -autostart=true -autorestart=true diff --git a/deploy/supervisor/test/huey-main.conf b/deploy/supervisor/test/huey-main.conf deleted file mode 100644 index 488c3ab905..0000000000 --- a/deploy/supervisor/test/huey-main.conf +++ /dev/null @@ -1,9 +0,0 @@ -[program:huey-main] -command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_main_queue.main_queue -environment= DOAJENV=test -user=cloo -directory=/home/cloo/doaj -stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log -stderr_logfile=/var/log/supervisor/%(program_name)s-error.log -autostart=true -autorestart=true diff --git a/deploy/supervisor/test/huey-scheduled-long.conf b/deploy/supervisor/test/huey-scheduled-long.conf index 4f2d0b9dfc..d3574d59d0 100644 --- a/deploy/supervisor/test/huey-scheduled-long.conf +++ b/deploy/supervisor/test/huey-scheduled-long.conf @@ -1,6 +1,6 @@ [program:huey-scheduled-long] command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_scheduled_long_queue.scheduled_long_queue -environment= DOAJENV=test +environment= DOAJENV=test,INITIALISE_INDEX=False user=cloo directory=/home/cloo/doaj stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log diff --git a/deploy/supervisor/test/huey-scheduled-short.conf b/deploy/supervisor/test/huey-scheduled-short.conf index 1f4ab54764..00c4461513 100644 --- a/deploy/supervisor/test/huey-scheduled-short.conf +++ b/deploy/supervisor/test/huey-scheduled-short.conf @@ -1,6 +1,6 @@ [program:huey-scheduled-short] command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_scheduled_short_queue.scheduled_short_queue -environment= DOAJENV=test +environment= DOAJENV=test,INITIALISE_INDEX=False user=cloo directory=/home/cloo/doaj stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log diff --git a/portality/app.py b/portality/app.py index 9525513220..c3cbdfaf1c 100644 --- a/portality/app.py +++ b/portality/app.py @@ -96,7 +96,11 @@ # because that does not run if gunicorn is loading the app, as opposed # to the app being run directly by python portality/app.py # putting it here ensures it will run under any web server -initialise_index(app, es_connection) +# NOTE: With gunicorn preload_app=True, this runs once before worker forking, +# preventing race conditions. Set INITIALISE_INDEX=False to disable automatic +# index creation on startup (useful for production deployments). +if app.config.get('INITIALISE_INDEX', False): + initialise_index(app, es_connection) # serve static files from multiple potential locations # this allows us to override the standard static file handling with our own dynamic version diff --git a/portality/core.py b/portality/core.py index 066e7b65f2..901e393ef6 100644 --- a/portality/core.py +++ b/portality/core.py @@ -208,16 +208,41 @@ def put_mappings(conn, mappings, force_mappings=False): # Set up a new index and corresponding alias idx_name = altered_key + '-{}'.format(dates.now_str(dates.FMT_DATETIME_LONG)) + index_created = False try: resp = es_connection.indices.create(index=idx_name, body=mapping, request_timeout=app.config.get("ES_SOCKET_TIMEOUT", None)) print("Initialised index: {}".format(resp['index'])) + index_created = True except elasticsearch.exceptions.RequestError as e: - print('Could not create index: ' + str(e)) - - resp2 = es_connection.indices.put_alias(index=idx_name, name=altered_key) - print("Created alias: {:<25} -> {}, status {}".format(idx_name, altered_key, resp2)) + # Check if this is a race condition where another worker already created the index + if 'resource_already_exists_exception' in str(e): + print('Index {} already exists (race condition with another worker), skipping...'.format(idx_name)) + # Check if alias was already created by the other worker + if conn.indices.exists_alias(name=altered_key): + print("Alias {} already created by another worker, skipping alias creation".format(altered_key)) + continue + else: + # Index exists but alias doesn't - this shouldn't happen but let's try to create the alias + print("Index exists but alias doesn't, attempting to create alias...") + index_created = True + else: + print('Could not create index: ' + str(e)) + # Don't try to create alias if index creation failed for other reasons + continue + + # Only create alias if index was created successfully or already exists + if index_created: + try: + resp2 = es_connection.indices.put_alias(index=idx_name, name=altered_key) + print("Created alias: {:<25} -> {}, status {}".format(idx_name, altered_key, resp2)) + except elasticsearch.exceptions.RequestError as e: + # Handle race condition where another worker created the alias first + if 'invalid_alias_name_exception' in str(e) or 'resource_already_exists_exception' in str(e): + print("Alias {} already exists (created by another worker)".format(altered_key)) + else: + print('Could not create alias: ' + str(e)) def initialise_index(app, conn, only_mappings=None, force_mappings=False): diff --git a/portality/settings.py b/portality/settings.py index 779883c100..65d96a2dec 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -5,6 +5,14 @@ from portality import constants from portality.lib import paths + +def get_bool_env(var_name, default=False): + """Get a boolean value from environment variable, handling common truthy/falsy strings.""" + val = os.getenv(var_name) + if val is None: + return default + return val.lower() in ('true', '1', 'yes', 'on') + ########################################### # Application Version information # ~~->API:Feature~~ @@ -66,7 +74,7 @@ ELASTIC_SEARCH_DB_PREFIX = "doaj-" # note: include the separator ELASTIC_SEARCH_TEST_DB_PREFIX = "doajtest-" -INITIALISE_INDEX = True # whether or not to try creating the index and required index types on startup +INITIALISE_INDEX = get_bool_env('INITIALISE_INDEX', False) # whether to try creating required index types on startup ELASTIC_SEARCH_VERSION = "7.10.2" ELASTIC_SEARCH_SNAPSHOT_REPOSITORY = None ELASTIC_SEARCH_SNAPSHOT_TTL = 366 diff --git a/test.cfg b/test.cfg index 855dbd5e63..dc077246a0 100644 --- a/test.cfg +++ b/test.cfg @@ -1,3 +1,7 @@ +from portality.settings import get_bool_env + +INITIALISE_INDEX = get_bool_env('INITIALISE_INDEX', True) + ELASTICSEARCH_HOSTS = [{'host': 'localhost', 'port': 9200}] INDEX_PER_TYPE_SUBSTITUTE = '_doc' APP_MACHINES_INTERNAL_IPS = ["localhost:5050"]