Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,21 @@ public class LuceneSail extends NotifyingSailWrapper {
*/
public static final String LUCENE_RAMDIR_KEY = "useramdir";

/**
* Set the key "fsyncInterval=<t>" as sail parameter to configure the interval in milliseconds in which fsync
* is called on the Lucene index. Set to 0 or a negative value to call fsync synchronously after each operation.
* Default is 0. Setting this parameter to a positive value will improve performance for frequent writes, but may
* cause the loss of the last few operations in case of a crash.
*/
public static final String FSYNC_INTERVAL_KEY = "fsyncInterval";

/**
* Set the key "fsyncMaxPendingFiles=<n>" as sail parameter to configure the maximum number of files pending
* to be fsynced. When this number is reached, a fsync is forced to limit memory usage. Default is 5000. This
* parameter only has an effect when {@link #FSYNC_INTERVAL_KEY} is set to a positive value.
*/
public static final String FSYNC_MAX_PENDING_FILES_KEY = "fsyncMaxPendingFiles";

/**
* Set the key "defaultNumDocs=<n>" as sail parameter to limit the maximum number of documents to return from
* a search query. The default is to return all documents. NB: this may involve extra cost for some SearchIndex
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*******************************************************************************
* Copyright (c) 2025 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.sail.lucene.impl;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;

import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FilterDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Wrapper around a Lucene Directory that batches sync and metadata sync calls to be executed at a fixed interval.
*
* @author Piotr Sowiński
*/
class DelayedSyncDirectoryWrapper extends FilterDirectory {

final private Logger logger = LoggerFactory.getLogger(getClass());

final private ScheduledExecutorService scheduler;

final private AtomicBoolean needsMetadataSync = new AtomicBoolean(false);

final private AtomicReference<IOException> lastSyncException = new AtomicReference<>(null);

final private HashSet<String> pendingSyncs = new HashSet<>();

final private int maxPendingSyncs;

/**
* Creates a new instance of LuceneDirectoryWrapper.
*
* @param in the underlying directory
* @param fsyncInterval the interval in milliseconds writes after which a fsync is performed
* @param maxPendingSyncs the maximum number of pending syncs to accumulate before forcing a sync
*/
DelayedSyncDirectoryWrapper(Directory in, long fsyncInterval, int maxPendingSyncs) {
super(in);
assert fsyncInterval > 0;
assert maxPendingSyncs > 0;
this.maxPendingSyncs = maxPendingSyncs;
scheduler = Executors.newScheduledThreadPool(1);
scheduler.scheduleAtFixedRate(
this::doSync,
fsyncInterval,
fsyncInterval,
TimeUnit.MILLISECONDS
);
}

private void doSync() {
List<String> toSync;
synchronized (pendingSyncs) {
toSync = new ArrayList<>(pendingSyncs);
pendingSyncs.clear();
}
if (!toSync.isEmpty()) {
try {
super.sync(toSync);
} catch (IOException e) {
lastSyncException.set(e);
logger.error("IO error during a periodic sync of Lucene index files", e);
}
}
if (this.needsMetadataSync.getAndSet(false)) {
try {
super.syncMetaData();
} catch (IOException e) {
lastSyncException.set(e);
logger.error("IO error during a periodic sync of Lucene index metadata", e);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit worried that if for some reason there is a persistent issue, then we may end up logging continuously but never actually throwing an exception.

What would usually happen if an IO exception was thrown (with the original code)? Would it bring down the entire application or just a particular transaction?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would result in a transaction rollback:

luceneIndex.commit();
} catch (IOException | SailException e) {
logger.error("Rolling back", e);
luceneIndex.rollback();

We cannot do the same thing 1:1 with asynchronous fsyncs, because we don't wait for the result of the fsync. The next best thing we can do is to throw an exception on the next transaction.

I've added a bit of code for that, along with a test.

}
}
}

@Override
public void sync(Collection<String> names) throws IOException {
final IOException ex = lastSyncException.getAndSet(null);
if (ex != null) {
// Rethrow the last exception if there was one.
// This will fail the current transaction, and not the one that caused the original exception.
// But there is no other way to notify the caller of the error, as the sync is done asynchronously.
throw ex;
}
synchronized (pendingSyncs) {
pendingSyncs.addAll(names);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How big is this likely to grow? Should we have a hard limit (possibly configurable) so that we don't run out of memory before we sync?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I can tell, there is no limit on this, it depends on Lucene index size. I added a configurable limit for this, set to 5000 files by default – should be good enough. There is also a test for this.

if (pendingSyncs.size() >= maxPendingSyncs) {
// If we have accumulated too many pending syncs, do a sync right away
// to avoid excessive memory usage
doSync();
}
}
}

@Override
public void syncMetaData() throws IOException {
needsMetadataSync.set(true);
}

@Override
public void close() throws IOException {
// Finish the current sync task, if in progress and then shut down
try {
scheduler.shutdown();
} finally {
// Do a final sync of any remaining files
try {
doSync();
} finally {
super.close();
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
Expand Down Expand Up @@ -227,6 +226,23 @@ protected Directory createDirectory(Properties parameters) throws IOException {
throw new IOException("No luceneIndex set, and no '" + LuceneSail.LUCENE_DIR_KEY + "' or '"
+ LuceneSail.LUCENE_RAMDIR_KEY + "' parameter given. ");
}
long fsyncInterval = 0;
int maxPendingSyncs = 5000;
try {
var param = parameters.getProperty(LuceneSail.FSYNC_INTERVAL_KEY, "0");
fsyncInterval = Long.parseLong(param);
} catch (NumberFormatException e) {
logger.warn("Ignoring invalid {} parameter: {}", LuceneSail.FSYNC_INTERVAL_KEY, e.getMessage());
}
try {
var param = parameters.getProperty(LuceneSail.FSYNC_MAX_PENDING_FILES_KEY, "5000");
maxPendingSyncs = Integer.parseInt(param);
} catch (NumberFormatException e) {
logger.warn("Ignoring invalid {} parameter: {}", LuceneSail.FSYNC_MAX_PENDING_FILES_KEY, e.getMessage());
}
if (fsyncInterval > 0) {
dir = new DelayedSyncDirectoryWrapper(dir, fsyncInterval, maxPendingSyncs);
}
return dir;
}

Expand Down Expand Up @@ -385,10 +401,16 @@ public void shutDown() throws IOException {
}
} finally {
try {
IndexWriter toCloseIndexWriter = indexWriter;
indexWriter = null;
if (toCloseIndexWriter != null) {
toCloseIndexWriter.close();
try {
IndexWriter toCloseIndexWriter = indexWriter;
indexWriter = null;
if (toCloseIndexWriter != null) {
toCloseIndexWriter.close();
}
} finally {
// Close the directory -- if asynchronous fsync is used, this will clean
// up the scheduler thread too.
directory.close();
}
} finally {
if (!exceptions.isEmpty()) {
Expand Down
Loading
Loading