Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions deploy/doaj_gunicorn_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
bind = "0.0.0.0:5050"
workers = multiprocessing.cpu_count() * 6 + 1
proc_name = 'doaj'

# Preload the app before forking workers to prevent race conditions
# during index initialization. This ensures initialise_index() runs
# only once instead of once per worker.
preload_app = True

max_requests = 1000

# The maximum jitter to add to the max_requests setting.
Expand Down
6 changes: 6 additions & 0 deletions deploy/doaj_test_gunicorn_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
bind = "0.0.0.0:5050"
workers = multiprocessing.cpu_count() * 3 + 1
proc_name = 'doaj (test)'

# Preload the app before forking workers to prevent race conditions
# during index initialization. This ensures initialise_index() runs
# only once instead of once per worker.
preload_app = True

max_requests = 1000

# The maximum jitter to add to the max_requests setting.
Expand Down
3 changes: 2 additions & 1 deletion deploy/supervisor/production-background/huey-events.conf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[program:huey-events]
command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_events_queue.events_queue
environment= DOAJENV=production
environment= DOAJENV=production,INITIALISE_INDEX=False

user=cloo
directory=/home/cloo/doaj
stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log
Expand Down

This file was deleted.

9 changes: 0 additions & 9 deletions deploy/supervisor/production-background/huey-main.conf

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[program:huey-scheduled-long]
command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_scheduled_long_queue.scheduled_long_queue
environment= DOAJENV=production
environment= DOAJENV=production,INITIALISE_INDEX=False
user=cloo
directory=/home/cloo/doaj
stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[program:huey-scheduled-short]
command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_scheduled_short_queue.scheduled_short_queue
environment= DOAJENV=production
environment= DOAJENV=production,INITIALISE_INDEX=False
user=cloo
directory=/home/cloo/doaj
stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log
Expand Down
2 changes: 1 addition & 1 deletion deploy/supervisor/test/huey-events.conf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[program:huey-events]
command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_events_queue.events_queue
environment= DOAJENV=test
environment= DOAJENV=test,INITIALISE_INDEX=False
user=cloo
directory=/home/cloo/doaj
stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log
Expand Down
9 changes: 0 additions & 9 deletions deploy/supervisor/test/huey-long-running.conf

This file was deleted.

9 changes: 0 additions & 9 deletions deploy/supervisor/test/huey-main.conf

This file was deleted.

2 changes: 1 addition & 1 deletion deploy/supervisor/test/huey-scheduled-long.conf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[program:huey-scheduled-long]
command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_scheduled_long_queue.scheduled_long_queue
environment= DOAJENV=test
environment= DOAJENV=test,INITIALISE_INDEX=False
user=cloo
directory=/home/cloo/doaj
stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log
Expand Down
2 changes: 1 addition & 1 deletion deploy/supervisor/test/huey-scheduled-short.conf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[program:huey-scheduled-short]
command=/home/cloo/doaj/venv/bin/python /home/cloo/doaj/venv/bin/huey_consumer.py -v portality.tasks.consumer_scheduled_short_queue.scheduled_short_queue
environment= DOAJENV=test
environment= DOAJENV=test,INITIALISE_INDEX=False
user=cloo
directory=/home/cloo/doaj
stdout_logfile=/var/log/supervisor/%(program_name)s-stdout.log
Expand Down
6 changes: 5 additions & 1 deletion portality/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,11 @@
# because that does not run if gunicorn is loading the app, as opposed
# to the app being run directly by python portality/app.py
# putting it here ensures it will run under any web server
initialise_index(app, es_connection)
# NOTE: With gunicorn preload_app=True, this runs once before worker forking,
# preventing race conditions. Set INITIALISE_INDEX=False to disable automatic
# index creation on startup (useful for production deployments).
if app.config.get('INITIALISE_INDEX', False):
initialise_index(app, es_connection)

# serve static files from multiple potential locations
# this allows us to override the standard static file handling with our own dynamic version
Expand Down
33 changes: 29 additions & 4 deletions portality/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,16 +208,41 @@ def put_mappings(conn, mappings, force_mappings=False):
# Set up a new index and corresponding alias
idx_name = altered_key + '-{}'.format(dates.now_str(dates.FMT_DATETIME_LONG))

index_created = False
try:
resp = es_connection.indices.create(index=idx_name,
body=mapping,
request_timeout=app.config.get("ES_SOCKET_TIMEOUT", None))
print("Initialised index: {}".format(resp['index']))
index_created = True
except elasticsearch.exceptions.RequestError as e:
print('Could not create index: ' + str(e))

resp2 = es_connection.indices.put_alias(index=idx_name, name=altered_key)
print("Created alias: {:<25} -> {}, status {}".format(idx_name, altered_key, resp2))
# Check if this is a race condition where another worker already created the index
if 'resource_already_exists_exception' in str(e):
print('Index {} already exists (race condition with another worker), skipping...'.format(idx_name))
# Check if alias was already created by the other worker
if conn.indices.exists_alias(name=altered_key):
print("Alias {} already created by another worker, skipping alias creation".format(altered_key))
continue
else:
# Index exists but alias doesn't - this shouldn't happen but let's try to create the alias
print("Index exists but alias doesn't, attempting to create alias...")
index_created = True
else:
print('Could not create index: ' + str(e))
# Don't try to create alias if index creation failed for other reasons
continue

# Only create alias if index was created successfully or already exists
if index_created:
try:
resp2 = es_connection.indices.put_alias(index=idx_name, name=altered_key)
print("Created alias: {:<25} -> {}, status {}".format(idx_name, altered_key, resp2))
except elasticsearch.exceptions.RequestError as e:
# Handle race condition where another worker created the alias first
if 'invalid_alias_name_exception' in str(e) or 'resource_already_exists_exception' in str(e):
print("Alias {} already exists (created by another worker)".format(altered_key))
else:
print('Could not create alias: ' + str(e))


def initialise_index(app, conn, only_mappings=None, force_mappings=False):
Expand Down
10 changes: 9 additions & 1 deletion portality/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@
from portality import constants
from portality.lib import paths


def get_bool_env(var_name, default=False):
"""Get a boolean value from environment variable, handling common truthy/falsy strings."""
val = os.getenv(var_name)
if val is None:
return default
return val.lower() in ('true', '1', 'yes', 'on')

###########################################
# Application Version information
# ~~->API:Feature~~
Expand Down Expand Up @@ -66,7 +74,7 @@
ELASTIC_SEARCH_DB_PREFIX = "doaj-" # note: include the separator
ELASTIC_SEARCH_TEST_DB_PREFIX = "doajtest-"

INITIALISE_INDEX = True # whether or not to try creating the index and required index types on startup
INITIALISE_INDEX = get_bool_env('INITIALISE_INDEX', False) # whether to try creating required index types on startup
ELASTIC_SEARCH_VERSION = "7.10.2"
ELASTIC_SEARCH_SNAPSHOT_REPOSITORY = None
ELASTIC_SEARCH_SNAPSHOT_TTL = 366
Expand Down
4 changes: 4 additions & 0 deletions test.cfg
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from portality.settings import get_bool_env

INITIALISE_INDEX = get_bool_env('INITIALISE_INDEX', True)

ELASTICSEARCH_HOSTS = [{'host': 'localhost', 'port': 9200}]
INDEX_PER_TYPE_SUBSTITUTE = '_doc'
APP_MACHINES_INTERNAL_IPS = ["localhost:5050"]
Expand Down