diff --git a/.github/agents/reviewer.agent.md b/.github/agents/reviewer.agent.md new file mode 100644 index 00000000..1347f137 --- /dev/null +++ b/.github/agents/reviewer.agent.md @@ -0,0 +1,87 @@ +--- +description: Code reviewer for the AGAVE codebase. Reviews diffs and individual files for correctness, style, and architectural fit. +tools: + [ + "codebase", + "search", + "usages", + "problems", + "changes", + "githubRepo", + "editFiles", + ] +--- + +# AGAVE Code Reviewer + +You review code changes in the AGAVE repository (C++17/Qt6 desktop app + Python client + JS web client). Be concise, specific, and cite file paths with line numbers. + +## Review Scope + +When invoked, determine what to review: + +1. If the user names files or a PR, review those. +2. If the reviewer names a branch, review all changes in that branch relative to its base branch +3. Otherwise, inspect uncommitted/staged changes via the changes tool. +4. For each changed file, read enough surrounding context to understand intent — do not review lines in isolation. + +## What To Check + +### Architecture + +- `renderlib/` must not depend on Qt. Flag any `Q*` types, `QObject`, signals/slots, or Qt headers leaking into `renderlib/`. +- GUI logic belongs in `agave_app/`. Rendering, I/O, scene, and serialization belong in `renderlib/`. It is preferable to have anything not necessary for the GUI to be pushed down into `renderlib/`. +- New commands must be added to **all** of: `renderlib/command.h`, `renderlib/command.cpp`, `agave_app/commandBuffer.cpp`, `test/test_commands.cpp`, `agave_pyclient/agave_pyclient/commandbuffer.py`, `agave_pyclient/agave_pyclient/agave.py`, `webclient/src/commandbuffer.ts`, `webclient/src/agave.ts`. Verify the integer ID is unique and consistent, and that the argument list/order matches across all locations. + +### C++ Style + +- C++17 only — no later-standard features. +- Classes / methods / enums: `PascalCase`. Member variables: `m_` prefix. +- Headers use `#pragma once`. +- Include order: local project headers → standard C++ → third-party → Qt. +- Prefer `const`, `constexpr`, references, and RAII. Flag raw `new`/`delete` outside ownership-transfer patterns already in use. +- Watch for missing `override`, unnecessary copies in range-for, signed/unsigned comparisons, and narrowing conversions (especially `size_t` ↔ `int`). +- We use clang-format with Mozilla-style (see .clang-format). Code should already be autoformatted. +- We use clang-tidy with a custom config (see .clang-tidy). Flag any warnings that are not explicitly disabled there. + +### Python Style + +- PEP 8 / `snake_case`. +- Should pass `ruff check`, `ruff format`. + +### Correctness & Safety + +- Qt signal/slot connections: confirm sender/receiver lifetimes and that lambdas capturing `this` are safe. +- OpenGL / GPU code in `renderlib/graphics/`: check resource cleanup, context currency, and that GL calls aren't made from non-GL threads. +- File I/O in `renderlib/io/`: validate bounds, handle malformed input, and avoid blocking the UI thread. +- Command protocol: `parse()` / `write()` field order must match the `CMD_ARGS` declaration exactly. +- OWASP-relevant issues: unchecked input sizes, path traversal in file loaders, integer overflow in image dimension math. + +### Tests + +- New commands require a round-trip test in `test/test_commands.cpp` (see `AGENTS.md` for the pattern). +- Non-trivial logic in `renderlib/` should have a Catch2 test. +- Python client changes should have a corresponding test in `agave_pyclient/tests/`. + +### Build & Versioning + +- New source files must be added to the relevant `CMakeLists.txt`. +- Version bumps go through `tbump` — flag manual edits to version strings. + +## Output Format + +Group findings by severity: + +- **Blocking** — bugs, protocol mismatches, broken builds, security issues. +- **Should fix** — style violations, missing tests, architectural drift. +- **Nits** — minor suggestions, naming, comments. + +For each finding, cite the file and line(s) and give a short rationale plus a concrete suggested change. If a finding is speculative, say so. + +End with a one-line overall recommendation: _approve_, _approve with comments_, or _request changes_. + +## Constraints + +- Do not rewrite the whole change. Suggest targeted edits. +- Do not run the build or tests unless asked. +- If you edit files, limit edits to the specific fixes you flagged; do not opportunistically refactor unrelated code. diff --git a/AGENTS.md b/AGENTS.md index 8a3459e9..56a8f8b9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -34,8 +34,8 @@ aqt install-qt --outputdir ~/Qt mac desktop 6.9.3 -m qtwebsockets qtimageformats export Qt6_DIR=~/Qt/6.9.3/macos mkdir build && cd build -cmake .. -make +cmake .. -G Ninja +cmake --build . ``` ### Windows @@ -90,7 +90,7 @@ clang-tidy -p build --fix renderlib/RenderSettings.cpp ### Python - PEP 8 / snake_case -- Tooling: black, flake8, pyright (see `pyrightconfig.json`) +- Tooling: - `ruff check`, `ruff format`. ## Conventions diff --git a/agave_app/CMakeLists.txt b/agave_app/CMakeLists.txt index 6f351a63..c7612d78 100644 --- a/agave_app/CMakeLists.txt +++ b/agave_app/CMakeLists.txt @@ -27,6 +27,12 @@ target_sources(agaveapp PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/CameraDockWidget.h" "${CMAKE_CURRENT_SOURCE_DIR}/CameraWidget.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/CameraWidget.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CacheSettings.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/CacheSettings.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CacheSettingsDockWidget.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/CacheSettingsDockWidget.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CacheSettingsWidget.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/CacheSettingsWidget.h" "${CMAKE_CURRENT_SOURCE_DIR}/citationDialog.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/citationDialog.h" "${CMAKE_CURRENT_SOURCE_DIR}/cgiparser.cpp" diff --git a/agave_app/CacheSettings.cpp b/agave_app/CacheSettings.cpp new file mode 100644 index 00000000..29a6a1b3 --- /dev/null +++ b/agave_app/CacheSettings.cpp @@ -0,0 +1,148 @@ +#include "CacheSettings.h" + +#include "renderlib/CacheManager.h" +#include "renderlib/Logging.h" +#include "renderlib/SystemInfo.h" + +#include +#include +#include + +#include + +namespace { + +::CacheConfig +toRenderlibConfig(const CacheSettingsData& data) +{ + ::CacheConfig config; + config.enabled = data.enabled; + config.enableDisk = data.enableDisk; + + std::uint64_t availableMem = SystemInfo::availableMemoryBytes(); + if (availableMem > 0) { + config.maxRamBytes = std::min(data.maxRamBytes, availableMem); + if (config.maxRamBytes < data.maxRamBytes) { + LOG_WARNING << "Cache RAM limit reduced from requested " << data.maxRamBytes << " to " << config.maxRamBytes + << " bytes to fit available memory."; + } + } else { + config.maxRamBytes = data.maxRamBytes; + } + + // The cache root is owned and writability-checked by CacheManager; clamp the + // disk limit against whatever filesystem it actually lives on. + std::uint64_t availableDisk = SystemInfo::availableDiskBytes(CacheManager::instance().getCacheDirectory()); + if (availableDisk > 0) { + config.maxDiskBytes = std::min(data.maxDiskBytes, availableDisk); + if (config.maxDiskBytes < data.maxDiskBytes && data.enableDisk) { + LOG_WARNING << "Cache disk limit reduced from requested " << data.maxDiskBytes << " to " << config.maxDiskBytes + << " bytes to fit available disk space."; + } + } else { + config.maxDiskBytes = data.maxDiskBytes; + } + + if (!config.enableDisk) { + config.maxDiskBytes = 0; + } + + return config; +} + +} // namespace + +CacheSettings::CacheSettings() = default; + +CacheSettingsData +CacheSettings::defaultSettings() const +{ + // Tunable defaults come from CacheSettingsData's in-class initializers. + return {}; +} + +std::string +CacheSettings::configPath() const +{ + QString baseDir = QStandardPaths::writableLocation(QStandardPaths::AppConfigLocation); + if (baseDir.isEmpty()) { + baseDir = QStandardPaths::writableLocation(QStandardPaths::AppDataLocation); + } + if (baseDir.isEmpty()) { + baseDir = QDir::currentPath(); + } + QDir().mkpath(baseDir); + return QDir(baseDir).filePath("cache_settings.json").toStdString(); +} + +CacheSettingsData +CacheSettings::load() +{ + CacheSettingsData data = defaultSettings(); + QString path = QString::fromStdString(configPath()); + QFile file(path); + if (!file.exists()) { + return data; + } + if (!file.open(QIODevice::ReadOnly)) { + return data; + } + + QByteArray raw = file.readAll(); + try { + nlohmann::json doc = nlohmann::json::parse(raw.toStdString()); + if (doc.contains("enabled")) { + data.enabled = doc["enabled"].get(); + } + if (doc.contains("enableDisk")) { + data.enableDisk = doc["enableDisk"].get(); + } + if (doc.contains("maxRamBytes")) { + data.maxRamBytes = doc["maxRamBytes"].get(); + } + if (doc.contains("maxDiskBytes")) { + data.maxDiskBytes = doc["maxDiskBytes"].get(); + } + } catch (...) { + return defaultSettings(); + } + + return data; +} + +bool +CacheSettings::save(const CacheSettingsData& data) const +{ + nlohmann::json doc; + doc["enabled"] = data.enabled; + doc["enableDisk"] = data.enableDisk; + doc["maxRamBytes"] = data.maxRamBytes; + doc["maxDiskBytes"] = data.maxDiskBytes; + + QString path = QString::fromStdString(configPath()); + QFile file(path); + if (!file.open(QIODevice::WriteOnly)) { + return false; + } + std::string out = doc.dump(2); + file.write(out.c_str(), static_cast(out.size())); + return true; +} + +void +CacheSettings::applyToRenderlib(const CacheSettingsData& data) const +{ + // The cache directory (and its writability) is settled once at startup in + // CacheManager::initialize(); if it wasn't writable the manager left its root + // unset, so a disk-enabled config here is simply honored as RAM-only. We only + // push the runtime tunables. + ::CacheConfig config = toRenderlibConfig(data); + LOG_INFO << "Cache config: enabled=" << (config.enabled ? 1 : 0) << " ram_bytes=" << config.maxRamBytes + << " disk_enabled=" << (config.enableDisk ? 1 : 0) << " disk_bytes=" << config.maxDiskBytes + << " cache_dir=" << CacheManager::instance().getCacheDirectory(); + CacheManager::instance().setConfig(config); + auto applied = CacheManager::instance().getConfig(); + LOG_INFO << "Cache config applied: enabled=" << (applied.enabled ? 1 : 0) << " ram_bytes=" << applied.maxRamBytes + << " disk_enabled=" << (applied.enableDisk ? 1 : 0) << " disk_bytes=" << applied.maxDiskBytes + << " cache_dir=" << CacheManager::instance().getCacheDirectory(); +} diff --git a/agave_app/CacheSettings.h b/agave_app/CacheSettings.h new file mode 100644 index 00000000..579cab17 --- /dev/null +++ b/agave_app/CacheSettings.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +struct CacheSettingsData +{ + bool enabled = true; + bool enableDisk = true; + std::uint64_t maxRamBytes = 4ULL * 1024ULL * 1024ULL * 1024ULL; + std::uint64_t maxDiskBytes = 100ULL * 1024ULL * 1024ULL * 1024ULL; +}; + +class CacheSettings +{ +public: + CacheSettings(); + + CacheSettingsData load(); + bool save(const CacheSettingsData& data) const; + + void applyToRenderlib(const CacheSettingsData& data) const; + + CacheSettingsData defaultSettings() const; + std::string configPath() const; +}; diff --git a/agave_app/CacheSettingsDockWidget.cpp b/agave_app/CacheSettingsDockWidget.cpp new file mode 100644 index 00000000..69af9bab --- /dev/null +++ b/agave_app/CacheSettingsDockWidget.cpp @@ -0,0 +1,8 @@ +#include "CacheSettingsDockWidget.h" + +CacheSettingsDockWidget::CacheSettingsDockWidget(QWidget* parent) + : QDockWidget(parent) +{ + setWindowTitle(tr("Advanced Cache Settings")); + setWidget(&m_settingsWidget); +} diff --git a/agave_app/CacheSettingsDockWidget.h b/agave_app/CacheSettingsDockWidget.h new file mode 100644 index 00000000..f7625ebb --- /dev/null +++ b/agave_app/CacheSettingsDockWidget.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +#include "CacheSettingsWidget.h" + +class CacheSettingsDockWidget : public QDockWidget +{ + Q_OBJECT + +public: + explicit CacheSettingsDockWidget(QWidget* parent = nullptr); + + CacheSettingsWidget* widget() { return &m_settingsWidget; } + +private: + CacheSettingsWidget m_settingsWidget; +}; diff --git a/agave_app/CacheSettingsWidget.cpp b/agave_app/CacheSettingsWidget.cpp new file mode 100644 index 00000000..89d49ed3 --- /dev/null +++ b/agave_app/CacheSettingsWidget.cpp @@ -0,0 +1,62 @@ +#include "CacheSettingsWidget.h" + +#include "renderlib/CacheManager.h" + +#include + +CacheSettingsWidget::CacheSettingsWidget(QWidget* parent) + : QWidget(parent) +{ + auto* layout = new QFormLayout(this); + + m_enableCache = new QCheckBox(tr("Enable cache"), this); + m_enableDisk = new QCheckBox(tr("Enable disk cache"), this); + + m_ramLimitMB = new QSpinBox(this); + m_ramLimitMB->setRange(0, 1024 * 1024); + m_ramLimitMB->setSuffix(tr(" MB")); + m_ramLimitMB->setSingleStep(256); + + m_diskLimitGB = new QSpinBox(this); + m_diskLimitGB->setRange(0, 1024 * 1024); + m_diskLimitGB->setSuffix(tr(" GB")); + m_diskLimitGB->setSingleStep(10); + + m_cacheDirLabel = new QLabel(this); + // The cache directory is fixed (registered at startup); display it read-only. + m_cacheDirLabel->setText(QString::fromStdString(CacheManager::instance().getCacheDirectory())); + m_cacheDirLabel->setTextInteractionFlags(Qt::TextSelectableByMouse); + m_cacheDirLabel->setWordWrap(true); + + m_applyButton = new QPushButton(tr("Apply"), this); + m_clearDiskButton = new QPushButton(tr("Clear disk cache"), this); + + layout->addRow(m_enableCache); + layout->addRow(m_enableDisk); + layout->addRow(tr("RAM limit"), m_ramLimitMB); + layout->addRow(tr("Disk limit"), m_diskLimitGB); + layout->addRow(tr("Cache directory"), m_cacheDirLabel); + layout->addRow(QString(), m_applyButton); + layout->addRow(QString(), m_clearDiskButton); + setLayout(layout); +} + +void +CacheSettingsWidget::setSettings(const CacheSettingsData& data) +{ + m_enableCache->setChecked(data.enabled); + m_enableDisk->setChecked(data.enableDisk); + m_ramLimitMB->setValue(static_cast(data.maxRamBytes / (1024ULL * 1024ULL))); + m_diskLimitGB->setValue(static_cast(data.maxDiskBytes / (1024ULL * 1024ULL * 1024ULL))); +} + +CacheSettingsData +CacheSettingsWidget::getSettings() const +{ + CacheSettingsData data; + data.enabled = m_enableCache->isChecked(); + data.enableDisk = m_enableDisk->isChecked(); + data.maxRamBytes = static_cast(m_ramLimitMB->value()) * 1024ULL * 1024ULL; + data.maxDiskBytes = static_cast(m_diskLimitGB->value()) * 1024ULL * 1024ULL * 1024ULL; + return data; +} diff --git a/agave_app/CacheSettingsWidget.h b/agave_app/CacheSettingsWidget.h new file mode 100644 index 00000000..2da5cb36 --- /dev/null +++ b/agave_app/CacheSettingsWidget.h @@ -0,0 +1,32 @@ +#pragma once + +#include "CacheSettings.h" + +#include +#include +#include +#include +#include + +class CacheSettingsWidget : public QWidget +{ + Q_OBJECT + +public: + explicit CacheSettingsWidget(QWidget* parent = nullptr); + + void setSettings(const CacheSettingsData& data); + CacheSettingsData getSettings() const; + + QPushButton* applyButton() const { return m_applyButton; } + QPushButton* clearDiskButton() const { return m_clearDiskButton; } + +private: + QCheckBox* m_enableCache = nullptr; + QCheckBox* m_enableDisk = nullptr; + QSpinBox* m_ramLimitMB = nullptr; + QSpinBox* m_diskLimitGB = nullptr; + QLabel* m_cacheDirLabel = nullptr; + QPushButton* m_applyButton = nullptr; + QPushButton* m_clearDiskButton = nullptr; +}; diff --git a/agave_app/TimelineDockWidget.cpp b/agave_app/TimelineDockWidget.cpp index d4050b06..8ad1112b 100644 --- a/agave_app/TimelineDockWidget.cpp +++ b/agave_app/TimelineDockWidget.cpp @@ -82,16 +82,10 @@ QTimelineWidget::OnTimeChanged(int newTime) LoadSpec loadSpec = m_loadSpec; loadSpec.time = newTime; - if (!m_reader) { - m_reader = std::shared_ptr(FileReader::getReader(loadSpec.filepath, loadSpec.isImageSequence)); - if (!m_reader) { - LOG_ERROR << "Could not find a reader for file " << loadSpec.filepath; - return; - } - } - QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); - std::shared_ptr image = m_reader->loadFromFile(loadSpec); + // Reuse the reader created at file-open time so we skip re-opening the file + // and re-parsing metadata on every time-step. + std::shared_ptr image = FileReader::loadAndCache(loadSpec, m_reader); QApplication::restoreOverrideCursor(); if (!image) { // TODO FIXME if we fail to set the new time, then reset the GUI to previous time diff --git a/agave_app/agaveGui.cpp b/agave_app/agaveGui.cpp index 71bbbd8b..5d38c841 100644 --- a/agave_app/agaveGui.cpp +++ b/agave_app/agaveGui.cpp @@ -10,10 +10,12 @@ #include "renderlib/Logging.h" #include "renderlib/Status.h" #include "renderlib/VolumeDimensions.h" +#include "renderlib/CacheManager.h" #include "renderlib/io/FileReader.h" #include "renderlib/version.hpp" #include "AppearanceDockWidget.h" +#include "CacheSettingsDockWidget.h" #include "CameraDockWidget.h" #include "Serialize.h" #include "StatisticsDockWidget.h" @@ -98,6 +100,28 @@ agaveGui::agaveGui(QWidget* parent) createDockWindows(); setDockOptions(AllowTabbedDocks); + CacheSettingsData cacheData = m_cacheSettings.load(); + m_cacheSettingsDockWidget->widget()->setSettings(cacheData); + m_cacheSettings.applyToRenderlib(cacheData); + connect(m_cacheSettingsDockWidget->widget()->applyButton(), &QPushButton::clicked, this, [this]() { + CacheSettingsData data = m_cacheSettingsDockWidget->widget()->getSettings(); + m_cacheSettings.save(data); + m_cacheSettings.applyToRenderlib(data); + }); + connect(m_cacheSettingsDockWidget->widget()->clearDiskButton(), &QPushButton::clicked, this, [this]() { + // Show the directory that will actually be cleared. + QString cacheDir = QString::fromStdString(CacheManager::instance().getCacheDirectory()); + QMessageBox::StandardButton reply = + QMessageBox::question(this, + tr("Clear disk cache"), + tr("Delete all cached volume data in:\n%1\n\nThis cannot be undone.").arg(cacheDir), + QMessageBox::Yes | QMessageBox::No, + QMessageBox::No); + if (reply == QMessageBox::Yes) { + CacheManager::instance().clearDiskCache(); + } + }); + m_tabs = new QTabWidget(this); QHBoxLayout* mainLayout = new QHBoxLayout; @@ -368,6 +392,11 @@ agaveGui::createDockWindows() m_statisticsDockWidget->setAllowedAreas(Qt::AllDockWidgetAreas); addDockWidget(Qt::RightDockWidgetArea, m_statisticsDockWidget); + m_cacheSettingsDockWidget = new CacheSettingsDockWidget(this); + m_cacheSettingsDockWidget->setAllowedAreas(Qt::AllDockWidgetAreas); + addDockWidget(Qt::LeftDockWidgetArea, m_cacheSettingsDockWidget); + m_cacheSettingsDockWidget->setVisible(false); + m_viewMenu->addSeparator(); m_viewMenu->addAction(m_cameradock->toggleViewAction()); m_viewMenu->addSeparator(); @@ -376,6 +405,8 @@ agaveGui::createDockWindows() m_viewMenu->addAction(m_appearanceDockWidget->toggleViewAction()); m_viewMenu->addSeparator(); m_viewMenu->addAction(m_statisticsDockWidget->toggleViewAction()); + m_viewMenu->addSeparator(); + m_viewMenu->addAction(m_cacheSettingsDockWidget->toggleViewAction()); } QSlider* @@ -596,6 +627,7 @@ agaveGui::onRenderAction() m_glView->doneCurrent(); m_glView->setEnabled(false); m_glView->setUpdatesEnabled(false); + m_cacheSettingsDockWidget->setEnabled(false); if (m_captureSettings.width == 0 && m_captureSettings.height == 0) { m_captureSettings.width = m_glView->width(); m_captureSettings.height = m_glView->height(); @@ -630,6 +662,7 @@ agaveGui::onRenderAction() m_renderSettings.m_DirtyFlags.SetFlag(LightsDirty); m_renderSettings.m_DirtyFlags.SetFlag(RenderParamsDirty); m_renderSettings.m_DirtyFlags.SetFlag(TransferFunctionDirty); + m_cacheSettingsDockWidget->setEnabled(true); m_glView->setEnabled(true); m_glView->resizeGL(m_glView->width(), m_glView->height()); m_glView->setUpdatesEnabled(true); @@ -828,7 +861,7 @@ agaveGui::open(const std::string& file, const Serialize::ViewerState* vs, bool i // We can update the render and gui progressively as chunks are loaded. // Also, this would allow renders to be cancelled during loading. QApplication::setOverrideCursor(QCursor(Qt::WaitCursor)); - std::shared_ptr image = reader->loadFromFile(loadSpec); + std::shared_ptr image = FileReader::loadAndCache(loadSpec, reader); QApplication::restoreOverrideCursor(); if (!image) { LOG_DEBUG << "Failed to open " << file; diff --git a/agave_app/agaveGui.h b/agave_app/agaveGui.h index 4e97c3f9..e5f65d9b 100644 --- a/agave_app/agaveGui.h +++ b/agave_app/agaveGui.h @@ -3,6 +3,7 @@ #include "ui_agaveGui.h" #include "Camera.h" +#include "CacheSettings.h" #include "GLView3D.h" #include "QRenderSettings.h" #include "ViewerState.h" @@ -18,6 +19,7 @@ class QAppearanceDockWidget; class QCameraDockWidget; class QStatisticsDockWidget; class QTimelineDockWidget; +class CacheSettingsDockWidget; class IFileReader; class ViewToolbar; @@ -152,6 +154,7 @@ private slots: QAppearanceDockWidget* m_appearanceDockWidget; QStatisticsDockWidget* m_statisticsDockWidget; + CacheSettingsDockWidget* m_cacheSettingsDockWidget = nullptr; QTabWidget* m_tabs; GLView3D* m_glView; @@ -172,6 +175,8 @@ private slots: Scene m_appScene; int m_currentScene = 0; + CacheSettings m_cacheSettings; + QAction* m_recentFileActs[MaxRecentFiles]; QAction* m_recentFileSeparator; QAction* m_recentFileSubMenuAct; diff --git a/agave_app/main.cpp b/agave_app/main.cpp index 5a4d4f67..474b178c 100644 --- a/agave_app/main.cpp +++ b/agave_app/main.cpp @@ -1,6 +1,7 @@ #include "agaveGui.h" #include "mainwindow.h" +#include "renderlib/CacheManager.h" #include "renderlib/Logging.h" #include "renderlib/io/FileReader.h" #include "renderlib/renderlib.h" @@ -10,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -134,6 +136,22 @@ getUrlToOpen(const QUrl& agaveUrl) return ""; } +// Platform-appropriate cache directory for AGAVE's volume cache. Cache files +// are not user data, so the location is fixed (not user-configurable) and +// resolved once at startup, the same way the assets path is. +std::string +getCacheDirectory() +{ + QString baseDir = QStandardPaths::writableLocation(QStandardPaths::CacheLocation); + if (baseDir.isEmpty()) { + baseDir = QStandardPaths::writableLocation(QStandardPaths::AppDataLocation); + } + if (baseDir.isEmpty()) { + baseDir = QDir::currentPath(); + } + return QDir(baseDir).filePath("agave-cache").toStdString(); +} + class AgaveApplication : public QApplication { public: @@ -254,6 +272,12 @@ main(int argc, char* argv[]) return 0; } + // Register the cache directory once for the lifetime of the process, after + // renderlib has successfully initialized. Resolving the platform path needs + // the Qt application name (set above). Applies to both server and GUI mode; + // caching stays inert until a CacheConfig enables it. + CacheManager::initialize(getCacheDirectory()); + if (isServer) { QString configPath = parser.value(serverConfigOption); ServerParams p = readConfig(configPath); diff --git a/renderlib/CMakeLists.txt b/renderlib/CMakeLists.txt index c73b80ff..eef768be 100644 --- a/renderlib/CMakeLists.txt +++ b/renderlib/CMakeLists.txt @@ -24,6 +24,9 @@ target_sources(renderlib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/BoundingBox.h" "${CMAKE_CURRENT_SOURCE_DIR}/BoundingBoxTool.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/BoundingBoxTool.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CacheConfig.h" + "${CMAKE_CURRENT_SOURCE_DIR}/CacheManager.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/CacheManager.h" "${CMAKE_CURRENT_SOURCE_DIR}/CCamera.h" "${CMAKE_CURRENT_SOURCE_DIR}/CCamera.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/ClipPlaneTool.cpp" @@ -79,6 +82,8 @@ target_sources(renderlib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/Status.h" "${CMAKE_CURRENT_SOURCE_DIR}/StringUtil.h" "${CMAKE_CURRENT_SOURCE_DIR}/StringUtil.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/SystemInfo.h" + "${CMAKE_CURRENT_SOURCE_DIR}/SystemInfo.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/threading.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/threading.h" "${CMAKE_CURRENT_SOURCE_DIR}/Timeline.cpp" diff --git a/renderlib/CacheConfig.h b/renderlib/CacheConfig.h new file mode 100644 index 00000000..e3f19a5e --- /dev/null +++ b/renderlib/CacheConfig.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +// Runtime-tunable cache settings. The disk cache *location* is intentionally +// not part of this struct: it is resolved once at app startup and registered +// via CacheManager::initialize(). Keeping it out of CacheConfig means +// the per-apply settings (enable/limits) can change freely without ever +// re-pointing the cache root. +struct CacheConfig +{ + bool enabled = false; + bool enableDisk = false; + std::uint64_t maxRamBytes = 0; + std::uint64_t maxDiskBytes = 0; +}; diff --git a/renderlib/CacheManager.cpp b/renderlib/CacheManager.cpp new file mode 100644 index 00000000..e8721e1d --- /dev/null +++ b/renderlib/CacheManager.cpp @@ -0,0 +1,967 @@ +#include "CacheManager.h" + +#include "ImageXYZC.h" +#include "Logging.h" + +#include "tensorstore/array.h" +#include "tensorstore/context.h" +#include "tensorstore/open.h" +#include "tensorstore/tensorstore.h" + +// must include after tensorstore so that tensorstore picks up its own internal json impl +#include "json/json.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +// Marker file written into any directory we manage as our own disk cache root. +// clearDiskCache requires this file to be present before it will delete +// anything, which protects against the user pointing the cache dir at a path +// like "C:\" or "/home/me" and then clicking "Clear disk cache". +constexpr const char* kCacheMarkerFilename = ".agave-cache-dir"; + +inline void +hashCombine(std::size_t& seed, std::size_t value) +{ + seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +std::uint64_t +nowMillis() +{ + return static_cast( + std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count()); +} + +std::string +toHex(std::size_t value) +{ + std::ostringstream stream; + stream << std::hex << value; + return stream.str(); +} + +// Paths beginning with these schemes are treated as remote; we don't try to +// stat them and the cache key omits mtime/size for them. +bool +isRemotePath(const std::string& path) +{ + return path.rfind("http", 0) == 0 || path.rfind("s3:", 0) == 0 || path.rfind("gs:", 0) == 0; +} + +// Normalize a filepath into a canonical form for cache key generation. Goals: +// - "/some/dir/./foo", "/some//dir//foo", and "/some/dir/x/../foo" all +// produce the same key (lexically_normal collapses these). +// - On Windows, "C:/foo", "C:\foo", and "c:\foo" all produce the same key +// (path treats both separators; lowercase normalizes case). +// +// We deliberately use lexically_normal (purely textual) rather than +// weakly_canonical, because the latter resolves relative paths against the +// process CWD — which would make bare names like "my_in_memory_array" passed +// to loadFromArray_4D produce different keys when CWD changes. We also pass +// remote URLs through unchanged, since lexically_normal would mangle the +// "://" portion into "//". +std::string +normalizeFilepath(const std::string& path) +{ + if (path.empty() || isRemotePath(path)) { + return path; + } + std::filesystem::path p(path); + std::string normalized = p.lexically_normal().generic_string(); +#ifdef _WIN32 + // NTFS and FAT are conventionally case-insensitive. (Case-sensitive + // Windows configurations — per-directory case sensitivity flag, ReFS, + // WSL paths reached via \\wsl$ — will see incorrect cache hits between + // case-only-different paths. Accept this for the common case.) + std::transform(normalized.begin(), normalized.end(), normalized.begin(), [](unsigned char c) { + return static_cast(std::tolower(c)); + }); +#endif + return normalized; +} + +// Returns (mtime_ns, file_size). Either or both may be 0 if the path is +// remote, missing, or otherwise unreadable. For directories (zarr) file_size +// is 0; mtime is the directory's last_write_time, which most filesystems +// update when entries are added/removed at the top level. This is best-effort +// invalidation — a zarr whose chunks were rewritten without touching the +// root directory will not be invalidated by this check. +std::pair +statForKey(const std::string& path) +{ + if (path.empty() || isRemotePath(path)) { + return { 0, 0 }; + } + std::error_code ec; + std::filesystem::file_status status = std::filesystem::status(path, ec); + if (ec || !std::filesystem::exists(status)) { + return { 0, 0 }; + } + std::uint64_t mtimeNs = 0; + auto writeTime = std::filesystem::last_write_time(path, ec); + if (!ec) { + mtimeNs = static_cast( + std::chrono::duration_cast(writeTime.time_since_epoch()).count()); + } + std::uint64_t size = 0; + if (std::filesystem::is_regular_file(status)) { + auto s = std::filesystem::file_size(path, ec); + if (!ec) { + size = static_cast(s); + } + } + return { mtimeNs, size }; +} + +std::vector +channelNamesFromImage(const ImageXYZC& image) +{ + std::vector names; + names.reserve(image.sizeC()); + for (size_t i = 0; i < image.sizeC(); ++i) { + names.push_back(image.channel(static_cast(i))->m_name); + } + return names; +} + +} // namespace + +bool +CacheKey::operator==(const CacheKey& other) const +{ + return filepath == other.filepath && subpath == other.subpath && scene == other.scene && time == other.time && + channels == other.channels && minx == other.minx && maxx == other.maxx && miny == other.miny && + maxy == other.maxy && minz == other.minz && maxz == other.maxz && isImageSequence == other.isImageSequence && + fileMtimeNs == other.fileMtimeNs && fileSize == other.fileSize; +} + +std::size_t +CacheKeyHash::operator()(const CacheKey& key) const +{ + std::size_t seed = 0; + hashCombine(seed, std::hash{}(key.filepath)); + hashCombine(seed, std::hash{}(key.subpath)); + hashCombine(seed, std::hash{}(key.scene)); + hashCombine(seed, std::hash{}(key.time)); + hashCombine(seed, std::hash{}(key.minx)); + hashCombine(seed, std::hash{}(key.maxx)); + hashCombine(seed, std::hash{}(key.miny)); + hashCombine(seed, std::hash{}(key.maxy)); + hashCombine(seed, std::hash{}(key.minz)); + hashCombine(seed, std::hash{}(key.maxz)); + hashCombine(seed, std::hash{}(key.isImageSequence)); + hashCombine(seed, std::hash{}(key.fileMtimeNs)); + hashCombine(seed, std::hash{}(key.fileSize)); + for (auto ch : key.channels) { + hashCombine(seed, std::hash{}(ch)); + } + return seed; +} + +namespace { +// Storage for the process-wide singleton. A unique_ptr (rather than a Meyers +// static) lets initialize() inject the cache directory at construction time. +std::unique_ptr& +singletonSlot() +{ + static std::unique_ptr slot; + return slot; +} +} // namespace + +CacheManager::CacheManager(std::string cacheDir) + : m_cacheDir(std::move(cacheDir)) +{ +} + +bool +CacheManager::canWriteCacheDir(const std::string& path) +{ + if (path.empty()) { + return false; + } + + std::error_code ec; + std::filesystem::path dir(path); + if (!std::filesystem::exists(dir, ec)) { + if (!std::filesystem::create_directories(dir, ec) || ec) { + LOG_WARNING << "canWriteCacheDir: failed to create " << path << ": " << ec.message(); + return false; + } + } else if (!std::filesystem::is_directory(dir, ec) || ec) { + LOG_WARNING << "canWriteCacheDir: not a directory: " << path; + return false; + } + + std::filesystem::path testPath = dir / ".agave_cache_write_test"; + { + std::ofstream testFile(testPath, std::ios::binary | std::ios::trunc); + if (!testFile.is_open()) { + return false; + } + testFile << "test"; + if (!testFile.good()) { + testFile.close(); + std::filesystem::remove(testPath, ec); + return false; + } + } + std::filesystem::remove(testPath, ec); + return !ec; +} + +void +CacheManager::initialize(const std::string& cacheDir) +{ + if (singletonSlot()) { + throw std::logic_error("CacheManager::initialize() called more than once; the cache directory is fixed for the " + "lifetime of the process."); + } + + // Probe writability once, here, rather than on every config-apply: the cache + // root is fixed for the lifetime of the process. If the directory can't be + // created or written, leave the root unset so the disk tier stays inert + // regardless of what any later CacheConfig requests. + std::string root = cacheDir; + if (!root.empty() && !canWriteCacheDir(root)) { + LOG_WARNING << "Disk cache disabled: cache directory not writable: " << root; + root.clear(); + } + singletonSlot() = std::make_unique(root); +} + +CacheManager& +CacheManager::instance() +{ + auto& slot = singletonSlot(); + if (!slot) { + // initialize() was never called (e.g. a context that does no caching); + // fall back to a RAM-only, disk-inert manager. + slot = std::make_unique(std::string{}); + } + return *slot; +} + +std::string +CacheManager::getCacheDirectory() const +{ + std::scoped_lock lock(m_mutex); + return m_cacheDir; +} + +void +CacheManager::setConfig(const CacheConfig& config) +{ + CacheConfig configCopy; + std::string cacheDirCopy; + bool rebuildDiskIndex = false; + { + std::scoped_lock lock(m_mutex); + m_config = config; + if (!m_config.enabled) { + m_entries.clear(); + m_lruKeys.clear(); + m_currentRamBytes = 0; + m_diskEntries.clear(); + m_currentDiskBytes = 0; + m_diskIndexRoot.clear(); + return; + } + + if (!m_config.enableDisk || m_cacheDir.empty()) { + m_diskEntries.clear(); + m_currentDiskBytes = 0; + m_diskIndexRoot.clear(); + } else if (m_diskIndexRoot != m_cacheDir) { + m_diskEntries.clear(); + m_currentDiskBytes = 0; + m_diskIndexRoot = m_cacheDir; + rebuildDiskIndex = true; + configCopy = m_config; + cacheDirCopy = m_cacheDir; + } + } + + if (rebuildDiskIndex) { + loadDiskIndex(configCopy, cacheDirCopy); + evictDiskIfNeeded(configCopy, 0); + } + + std::scoped_lock lock(m_mutex); + evictIfNeededLocked(0); +} + +CacheConfig +CacheManager::getConfig() const +{ + std::scoped_lock lock(m_mutex); + return m_config; +} + +std::shared_ptr +CacheManager::findImage(const LoadSpec& loadSpec) +{ + CacheConfig configCopy; + std::string cacheDirCopy; + CacheKey key = makeKey(loadSpec); + { + std::scoped_lock lock(m_mutex); + configCopy = m_config; + cacheDirCopy = m_cacheDir; + if (m_config.enabled && m_config.maxRamBytes > 0) { + auto it = m_entries.find(key); + if (it != m_entries.end()) { + touchEntry(it->second.lruIt); + m_stats.ramHits++; + LOG_DEBUG << "Cache stats: ram_hits=" << m_stats.ramHits << " disk_hits=" << m_stats.diskHits + << " misses=" << m_stats.misses << " disk_writes=" << m_stats.diskWrites; + // NOTE: we deliberately do not refresh the matching DiskEntry's + // lastAccess (or its on-disk meta.json) on a RAM hit. The disk LRU + // is only consulted when an entry has fallen out of RAM, and + // loadFromDisk refreshes the disk lastAccess at that point — so + // within a session the disk bookkeeping is fresh whenever it + // actually matters. + // + // TODO: edge case — an entry that stays RAM-resident for an entire + // session never has its disk lastAccess bumped, so at the next + // session start it can look "older" than entries that were only + // served from disk in the previous session, and may be the first + // thing evicted from disk on cold start. If that ever shows up as + // a real cold-start problem, fix by bumping the in-memory + // DiskEntry.lastAccess here (no on-disk write needed) and letting + // the existing flush-on-eviction-or-shutdown path persist it. + return it->second.image; + } + } + } + + if (configCopy.enabled && configCopy.enableDisk && configCopy.maxDiskBytes > 0 && !cacheDirCopy.empty()) { + auto diskImage = loadFromDisk(key, configCopy, cacheDirCopy); + if (diskImage) { + { + std::scoped_lock lock(m_mutex); + m_stats.diskHits++; + LOG_DEBUG << "Cache stats: ram_hits=" << m_stats.ramHits << " disk_hits=" << m_stats.diskHits + << " misses=" << m_stats.misses << " disk_writes=" << m_stats.diskWrites; + } + storeImageInMemory(key, diskImage); + return diskImage; + } + } + + { + std::scoped_lock lock(m_mutex); + m_stats.misses++; + LOG_DEBUG << "Cache stats: ram_hits=" << m_stats.ramHits << " disk_hits=" << m_stats.diskHits + << " misses=" << m_stats.misses << " disk_writes=" << m_stats.diskWrites; + } + + return nullptr; +} + +void +CacheManager::storeImage(const LoadSpec& loadSpec, const std::shared_ptr& image) +{ + if (!image) { + return; + } + + CacheConfig configCopy; + std::string cacheDirCopy; + { + std::scoped_lock lock(m_mutex); + configCopy = m_config; + cacheDirCopy = m_cacheDir; + } + + const auto key = makeKey(loadSpec); + + if (configCopy.enabled && configCopy.enableDisk && configCopy.maxDiskBytes > 0 && !cacheDirCopy.empty()) { + storeToDisk(key, image, configCopy, cacheDirCopy); + } + + storeImageInMemory(key, image); +} + +void +CacheManager::clearMemoryCache() +{ + std::scoped_lock lock(m_mutex); + m_entries.clear(); + m_lruKeys.clear(); + m_currentRamBytes = 0; +} + +void +CacheManager::clearDiskCache() +{ + std::string cacheDir; + std::vector knownEntryPaths; + { + std::scoped_lock lock(m_mutex); + cacheDir = m_cacheDir; + + if (!isAgaveCacheDir(cacheDir)) { + LOG_WARNING << "Refusing to clear disk cache: directory missing AGAVE cache marker file (" << kCacheMarkerFilename + << "): " << cacheDir; + return; + } + + knownEntryPaths.reserve(m_diskEntries.size()); + for (const auto& kv : m_diskEntries) { + knownEntryPaths.push_back(kv.second.path); + } + m_diskEntries.clear(); + m_currentDiskBytes = 0; + } + + if (cacheDir.empty()) { + return; + } + + // Remove the per-entry subdirectories we know about. + for (const auto& path : knownEntryPaths) { + std::error_code ec; + std::filesystem::remove_all(path, ec); + } + + // Also remove any orphan per-entry subdirectories left behind by prior + // sessions or partial writes. We only touch subdirectories that contain a + // meta.json (i.e. look like cache entries we wrote) — anything else the user + // may have placed in the cache dir is preserved. + std::error_code dirEc; + for (auto it = std::filesystem::directory_iterator(cacheDir, dirEc); + it != std::filesystem::directory_iterator() && !dirEc; + it.increment(dirEc)) { + if (!it->is_directory()) { + continue; + } + std::filesystem::path metaPath = it->path() / "meta.json"; + std::error_code existEc; + if (std::filesystem::exists(metaPath, existEc)) { + std::error_code rmEc; + std::filesystem::remove_all(it->path(), rmEc); + } + } +} + +void +CacheManager::writeCacheMarker(const std::string& path) const +{ + if (path.empty()) { + return; + } + std::error_code ec; + std::filesystem::create_directories(path, ec); + std::filesystem::path marker = std::filesystem::path(path) / kCacheMarkerFilename; + std::error_code existEc; + if (std::filesystem::exists(marker, existEc)) { + return; + } + std::ofstream out(marker.string(), std::ios::trunc); + if (out) { + out << "AGAVE disk cache root. Safe to delete this directory and its contents.\n"; + } +} + +bool +CacheManager::isAgaveCacheDir(const std::string& path) const +{ + if (path.empty()) { + return false; + } + std::error_code ec; + std::filesystem::path marker = std::filesystem::path(path) / kCacheMarkerFilename; + return std::filesystem::exists(marker, ec); +} + +CacheManager::CacheStats +CacheManager::getStats() const +{ + std::scoped_lock lock(m_mutex); + return m_stats; +} + +void +CacheManager::resetStats() +{ + std::scoped_lock lock(m_mutex); + m_stats = CacheStats{}; +} + +CacheKey +CacheManager::makeKey(const LoadSpec& loadSpec) const +{ + CacheKey key; + key.filepath = normalizeFilepath(loadSpec.filepath); + key.subpath = loadSpec.subpath; + key.scene = loadSpec.scene; + key.time = loadSpec.time; + key.channels = loadSpec.channels; + key.minx = loadSpec.minx; + key.maxx = loadSpec.maxx; + key.miny = loadSpec.miny; + key.maxy = loadSpec.maxy; + key.minz = loadSpec.minz; + key.maxz = loadSpec.maxz; + key.isImageSequence = loadSpec.isImageSequence; + // Use the normalized filepath for stat() too so equivalent paths produce + // identical fileMtimeNs / fileSize. + auto stat = statForKey(key.filepath); + key.fileMtimeNs = stat.first; + key.fileSize = stat.second; + return key; +} + +std::string +CacheManager::keyToString(const CacheKey& key) const +{ + std::ostringstream stream; + stream << key.filepath << "|" << key.subpath << "|" << key.scene << "|" << key.time << "|"; + stream << key.minx << "," << key.maxx << "," << key.miny << "," << key.maxy << "," << key.minz << "," << key.maxz + << "|" << (key.isImageSequence ? 1 : 0) << "|"; + stream << "m=" << key.fileMtimeNs << ",s=" << key.fileSize << "|"; + for (size_t i = 0; i < key.channels.size(); ++i) { + if (i > 0) { + stream << ","; + } + stream << key.channels[i]; + } + return stream.str(); +} + +std::string +CacheManager::diskCacheId(const CacheKey& key) const +{ + std::size_t hashValue = std::hash{}(keyToString(key)); + return toHex(hashValue); +} + +std::uint64_t +CacheManager::estimateImageBytes(const ImageXYZC& image) const +{ + std::uint64_t bytesPerPixel = static_cast(ImageXYZC::IN_MEMORY_BPP / 8); + std::uint64_t numPixels = static_cast(image.sizeX()) * static_cast(image.sizeY()) * + static_cast(image.sizeZ()) * static_cast(image.sizeC()); + return numPixels * bytesPerPixel; +} + +void +CacheManager::touchEntry(std::list::iterator it) +{ + if (it == m_lruKeys.begin()) { + return; + } + m_lruKeys.splice(m_lruKeys.begin(), m_lruKeys, it); +} + +void +CacheManager::evictIfNeededLocked(std::uint64_t incomingBytes) +{ + if (m_config.maxRamBytes == 0) { + return; + } + + while (!m_lruKeys.empty() && (m_currentRamBytes + incomingBytes) > m_config.maxRamBytes) { + const CacheKey& key = m_lruKeys.back(); + auto it = m_entries.find(key); + if (it != m_entries.end()) { + m_currentRamBytes -= it->second.bytes; + m_entries.erase(it); + } + m_lruKeys.pop_back(); + } +} + +void +CacheManager::storeImageInMemory(const CacheKey& key, const std::shared_ptr& image) +{ + std::scoped_lock lock(m_mutex); + if (!m_config.enabled || m_config.maxRamBytes == 0) { + return; + } + + std::uint64_t bytes = estimateImageBytes(*image); + if (bytes == 0 || bytes > m_config.maxRamBytes) { + return; + } + + auto existing = m_entries.find(key); + if (existing != m_entries.end()) { + m_currentRamBytes -= existing->second.bytes; + m_lruKeys.erase(existing->second.lruIt); + m_entries.erase(existing); + } + + evictIfNeededLocked(bytes); + + m_lruKeys.push_front(key); + CacheEntry entry; + entry.image = image; + entry.bytes = bytes; + entry.lruIt = m_lruKeys.begin(); + m_entries.emplace(key, entry); + m_currentRamBytes += bytes; +} + +std::shared_ptr +CacheManager::loadFromDisk(const CacheKey& key, const CacheConfig& config, const std::string& cacheDir) +{ + if (!config.enableDisk || cacheDir.empty()) { + return nullptr; + } + + std::filesystem::path entryPath = std::filesystem::path(cacheDir) / diskCacheId(key); + std::filesystem::path metaPath = entryPath / "meta.json"; + std::filesystem::path dataPath = entryPath / "data.zarr"; + if (!std::filesystem::exists(metaPath) || !std::filesystem::exists(dataPath)) { + return nullptr; + } + + nlohmann::json meta; + try { + std::ifstream metaFile(metaPath.string()); + metaFile >> meta; + } catch (...) { + return nullptr; + } + + if (!meta.contains("key") || meta["key"].get() != keyToString(key)) { + return nullptr; + } + + if (!meta.contains("sizeX") || !meta.contains("sizeY") || !meta.contains("sizeZ") || !meta.contains("sizeC")) { + return nullptr; + } + + std::uint32_t sizeX = meta["sizeX"].get(); + std::uint32_t sizeY = meta["sizeY"].get(); + std::uint32_t sizeZ = meta["sizeZ"].get(); + std::uint32_t sizeC = meta["sizeC"].get(); + std::uint32_t bpp = meta.value("bpp", static_cast(ImageXYZC::IN_MEMORY_BPP)); + if (bpp != static_cast(ImageXYZC::IN_MEMORY_BPP)) { + LOG_ERROR << "Disk cache load: unsupported bpp " << bpp << " in " << metaPath.string() << " (expected " + << ImageXYZC::IN_MEMORY_BPP << "); skipping cache entry"; + return nullptr; + } + float sx = meta.value("physicalSizeX", 1.0f); + float sy = meta.value("physicalSizeY", 1.0f); + float sz = meta.value("physicalSizeZ", 1.0f); + std::string spatialUnits = meta.value("spatialUnits", std::string("units")); + + std::uint64_t bytes = static_cast(sizeX) * static_cast(sizeY) * + static_cast(sizeZ) * static_cast(sizeC) * + static_cast(bpp / 8); + if (bytes == 0) { + return nullptr; + } + + std::unique_ptr data(new uint8_t[bytes]); + + auto openFuture = + tensorstore::Open({ { "driver", "zarr3" }, { "kvstore", { { "driver", "file" }, { "path", dataPath.string() } } } }, + tensorstore::OpenMode::open, + tensorstore::ReadWriteMode::read); + const auto& result = openFuture.result(); + if (!result.ok()) { + return nullptr; + } + + auto store = result.value(); + std::vector shape = { sizeC, sizeZ, sizeY, sizeX }; + auto arr = tensorstore::Array(reinterpret_cast(data.get()), shape); + auto readResult = tensorstore::Read(store, tensorstore::UnownedToShared(arr)).result(); + if (!readResult.ok()) { + return nullptr; + } + + ImageXYZC* image = new ImageXYZC(sizeX, sizeY, sizeZ, sizeC, bpp, data.release(), sx, sy, sz, spatialUnits); + std::shared_ptr sharedImage(image); + + if (meta.contains("channelNames")) { + std::vector channelNames; + for (auto& item : meta["channelNames"]) { + channelNames.push_back(item.get()); + } + image->setChannelNames(channelNames); + } + + meta["lastAccess"] = nowMillis(); + try { + std::ofstream metaOut(metaPath.string(), std::ios::trunc); + metaOut << meta.dump(2); + } catch (...) { + // best effort + } + + { + std::scoped_lock lock(m_mutex); + auto it = m_diskEntries.find(diskCacheId(key)); + if (it != m_diskEntries.end()) { + it->second.lastAccess = meta["lastAccess"].get(); + } + } + + return sharedImage; +} + +void +CacheManager::storeToDisk(const CacheKey& key, + const std::shared_ptr& image, + const CacheConfig& config, + const std::string& cacheDir) +{ + if (!image || !config.enableDisk || cacheDir.empty()) { + return; + } + + std::uint64_t bytes = estimateImageBytes(*image); + if (bytes == 0) { + return; + } + if (bytes > config.maxDiskBytes) { + // A single image larger than the entire disk cap; refuse rather than + // evict everything and overshoot. + LOG_WARNING << "Disk cache: skipping store of " << bytes << " byte image — larger than disk cap " + << config.maxDiskBytes; + return; + } + + // Make room before writing, so we never temporarily exceed the cap on + // disk and don't waste a large write that would have to be undone. + evictDiskIfNeeded(config, bytes); + + writeCacheMarker(cacheDir); + + std::filesystem::path entryPath = std::filesystem::path(cacheDir) / diskCacheId(key); + std::filesystem::path dataPath = entryPath / "data.zarr"; + std::filesystem::path metaPath = entryPath / "meta.json"; + std::error_code ec; + std::filesystem::create_directories(entryPath, ec); + + std::uint32_t sizeX = static_cast(image->sizeX()); + std::uint32_t sizeY = static_cast(image->sizeY()); + std::uint32_t sizeZ = static_cast(image->sizeZ()); + std::uint32_t sizeC = static_cast(image->sizeC()); + std::uint32_t bpp = static_cast(ImageXYZC::IN_MEMORY_BPP); + + std::vector shape = { sizeC, sizeZ, sizeY, sizeX }; + std::vector chunkShape = { 1, + std::min(16, sizeZ), + std::min(256, sizeY), + std::min(256, sizeX) }; + + nlohmann::json schema = { { "dtype", "uint16" }, + { "domain", { { "shape", shape } } }, + { "chunk_layout", + { { "read_chunk", { { "shape", chunkShape } } }, + { "write_chunk", { { "shape", chunkShape } } } } } }; + + auto openFuture = tensorstore::Open( + { { "driver", "zarr3" }, + { "kvstore", { { "driver", "file" }, { "path", dataPath.string() } } }, + { "schema", schema } }, + tensorstore::OpenMode::create | tensorstore::OpenMode::open); + const auto& result = openFuture.result(); + if (!result.ok()) { + std::error_code rmEc; + std::filesystem::remove_all(entryPath, rmEc); + LOG_WARNING << "Disk cache store: tensorstore open failed for " << dataPath.string(); + return; + } + + auto store = result.value(); + auto arr = tensorstore::Array(reinterpret_cast(image->ptr()), shape); + auto writeResult = tensorstore::Write(tensorstore::UnownedToShared(arr), store).result(); + if (!writeResult.ok()) { + std::error_code rmEc; + std::filesystem::remove_all(entryPath, rmEc); + LOG_WARNING << "Disk cache store: tensorstore write failed for " << dataPath.string(); + return; + } + + std::uint64_t accessTime = nowMillis(); + nlohmann::json meta = { { "key", keyToString(key) }, + { "sizeX", sizeX }, + { "sizeY", sizeY }, + { "sizeZ", sizeZ }, + { "sizeC", sizeC }, + { "bpp", bpp }, + { "physicalSizeX", image->physicalSizeX() }, + { "physicalSizeY", image->physicalSizeY() }, + { "physicalSizeZ", image->physicalSizeZ() }, + { "spatialUnits", image->spatialUnits() }, + { "channelNames", channelNamesFromImage(*image) }, + { "bytes", bytes }, + { "lastAccess", accessTime } }; + + try { + std::ofstream metaOut(metaPath.string(), std::ios::trunc); + metaOut << meta.dump(2); + } catch (...) { + std::error_code rmEc; + std::filesystem::remove_all(entryPath, rmEc); + LOG_WARNING << "Disk cache store: meta.json write failed for " << metaPath.string(); + return; + } + + { + std::scoped_lock lock(m_mutex); + m_stats.diskWrites++; + DiskEntry entry; + entry.path = entryPath.string(); + entry.bytes = bytes; + entry.lastAccess = accessTime; + auto it = m_diskEntries.find(diskCacheId(key)); + if (it != m_diskEntries.end()) { + if (m_currentDiskBytes >= it->second.bytes) { + m_currentDiskBytes -= it->second.bytes; + } else { + m_currentDiskBytes = 0; + } + } + m_diskEntries[diskCacheId(key)] = entry; + m_currentDiskBytes += bytes; + } +} + +void +CacheManager::loadDiskIndex(const CacheConfig& config, const std::string& cacheDir) +{ + if (cacheDir.empty() || !config.enableDisk) { + return; + } + + std::filesystem::path root(cacheDir); + std::error_code ec; + std::filesystem::create_directories(root, ec); + if (!std::filesystem::exists(root)) { + return; + } + writeCacheMarker(cacheDir); + + std::unordered_map entries; + std::uint64_t totalBytes = 0; + + std::error_code iterEc; + for (const auto& dirEntry : std::filesystem::directory_iterator(root, iterEc)) { + if (iterEc) { + LOG_WARNING << "loadDiskIndex: error reading cache directory " << root.string() << ": " << iterEc.message(); + break; + } + if (!dirEntry.is_directory()) { + continue; + } + + std::filesystem::path metaPath = dirEntry.path() / "meta.json"; + if (!std::filesystem::exists(metaPath)) { + continue; + } + + try { + nlohmann::json meta; + std::ifstream metaFile(metaPath.string()); + metaFile >> meta; + if (!meta.contains("lastAccess")) { + continue; + } + + DiskEntry entry; + entry.path = dirEntry.path().string(); + entry.lastAccess = meta["lastAccess"].get(); + if (meta.contains("bytes")) { + entry.bytes = meta["bytes"].get(); + } else { + entry.bytes = directorySizeBytes(entry.path); + } + + std::string id = dirEntry.path().filename().string(); + entries[id] = entry; + totalBytes += entry.bytes; + } catch (...) { + continue; + } + } + + std::scoped_lock lock(m_mutex); + m_diskEntries = std::move(entries); + m_currentDiskBytes = totalBytes; +} + +void +CacheManager::evictDiskIfNeeded(const CacheConfig& config, std::uint64_t incomingBytes) +{ + if (!config.enableDisk || config.maxDiskBytes == 0) { + return; + } + + // Hold the lock for the entire eviction so a concurrent storeToDisk can't + // re-populate an entry we're about to delete on disk (which would + // otherwise let us nuke the fresh file). Each per-entry remove_all is a + // single small zarr directory, so keeping the lock held is acceptable. + std::scoped_lock lock(m_mutex); + + if ((m_currentDiskBytes + incomingBytes) <= config.maxDiskBytes) { + return; + } + + // Sort entries by lastAccess ascending so we evict the oldest first. + std::vector> byAge; + byAge.reserve(m_diskEntries.size()); + for (const auto& kv : m_diskEntries) { + byAge.emplace_back(kv.second.lastAccess, kv.first); + } + std::sort(byAge.begin(), byAge.end()); + + for (const auto& aged : byAge) { + if ((m_currentDiskBytes + incomingBytes) <= config.maxDiskBytes) { + break; + } + auto it = m_diskEntries.find(aged.second); + if (it == m_diskEntries.end()) { + continue; + } + std::string path = it->second.path; + std::uint64_t bytes = it->second.bytes; + if (m_currentDiskBytes >= bytes) { + m_currentDiskBytes -= bytes; + } else { + m_currentDiskBytes = 0; + } + m_diskEntries.erase(it); + + std::error_code ec; + std::filesystem::remove_all(path, ec); + if (ec) { + // On Windows this can fail if another thread (e.g. loadFromDisk) + // still holds tensorstore file handles into the same path. The + // bookkeeping has already been updated to reflect eviction; the + // stale files will be picked up by the next clearDiskCache. + LOG_WARNING << "Disk cache eviction: failed to remove " << path << ": " << ec.message(); + } + } +} + +std::uint64_t +CacheManager::directorySizeBytes(const std::string& path) const +{ + std::uint64_t total = 0; + std::error_code ec; + for (const auto& entry : std::filesystem::recursive_directory_iterator(path, ec)) { + if (entry.is_regular_file()) { + total += static_cast(entry.file_size()); + } + } + return total; +} diff --git a/renderlib/CacheManager.h b/renderlib/CacheManager.h new file mode 100644 index 00000000..1cbd48f7 --- /dev/null +++ b/renderlib/CacheManager.h @@ -0,0 +1,150 @@ +#pragma once + +#include "CacheConfig.h" +#include "IFileReader.h" + +#include +#include +#include +#include +#include +#include +#include + +class ImageXYZC; + +struct CacheKey +{ + std::string filepath; + std::string subpath; + std::uint32_t scene = 0; + std::uint32_t time = 0; + std::vector channels; + std::uint32_t minx = 0; + std::uint32_t maxx = 0; + std::uint32_t miny = 0; + std::uint32_t maxy = 0; + std::uint32_t minz = 0; + std::uint32_t maxz = 0; + bool isImageSequence = false; + // last_write_time of the filepath (or directory) at the time the key was + // built, expressed as nanoseconds since epoch. Zero for remote URLs and for + // paths we couldn't stat. Folding this into the key invalidates cache + // entries when the source file is overwritten. + std::uint64_t fileMtimeNs = 0; + // file_size of filepath at the time the key was built. Zero for + // directories (zarr) and remote URLs. + std::uint64_t fileSize = 0; + + bool operator==(const CacheKey& other) const; +}; + +struct CacheKeyHash +{ + std::size_t operator()(const CacheKey& key) const; +}; + +class CacheManager +{ +public: + struct CacheStats + { + std::uint64_t ramHits = 0; + std::uint64_t diskHits = 0; + std::uint64_t misses = 0; + std::uint64_t diskWrites = 0; + }; + + // Construct a cache rooted at `cacheDir`. An empty `cacheDir` disables the + // disk tier (the cache is RAM-only). The directory is fixed for the lifetime + // of the instance and is never derived from per-apply CacheConfig. Production + // code uses the process-wide singleton (initialize() + instance()); this + // constructor is public so tests can create isolated, throwaway caches with + // their own directories. + explicit CacheManager(std::string cacheDir = {}); + + // The process-wide singleton. initialize() creates it rooted at `cacheDir` + // and must be called exactly once, at app startup, before the cache is used; + // a second call throws std::logic_error. The platform-appropriate path is + // resolved by the GUI layer (renderlib has no Qt and so cannot resolve + // QStandardPaths itself) and injected here. If initialize() is never called, + // instance() lazily yields a RAM-only (disk-inert) manager. + static void initialize(const std::string& cacheDir); + static CacheManager& instance(); + std::string getCacheDirectory() const; + + void setConfig(const CacheConfig& config); + CacheConfig getConfig() const; + + std::shared_ptr findImage(const LoadSpec& loadSpec); + void storeImage(const LoadSpec& loadSpec, const std::shared_ptr& image); + // Drop all entries from the in-memory cache. Disk cache is untouched. + void clearMemoryCache(); + // Drop all entries from the disk cache (refuses if the cache directory is + // missing the AGAVE marker file). Memory cache is untouched. + void clearDiskCache(); + + CacheStats getStats() const; + void resetStats(); + +private: + // Verify that `path` is (or can be made) a writable directory. Creates the + // directory if it does not exist, then probes it by writing and deleting a + // small marker file. Returns false on any failure. Probed once, at + // initialize() time, since the cache root is fixed for the process lifetime. + static bool canWriteCacheDir(const std::string& path); + + CacheKey makeKey(const LoadSpec& loadSpec) const; + std::string keyToString(const CacheKey& key) const; + std::string diskCacheId(const CacheKey& key) const; + std::uint64_t estimateImageBytes(const ImageXYZC& image) const; + void touchEntry(std::list::iterator it); + // Precondition: caller must hold m_mutex. + void evictIfNeededLocked(std::uint64_t incomingBytes); + void storeImageInMemory(const CacheKey& key, const std::shared_ptr& image); + + std::shared_ptr loadFromDisk(const CacheKey& key, const CacheConfig& config, const std::string& cacheDir); + void storeToDisk(const CacheKey& key, + const std::shared_ptr& image, + const CacheConfig& config, + const std::string& cacheDir); + void loadDiskIndex(const CacheConfig& config, const std::string& cacheDir); + void evictDiskIfNeeded(const CacheConfig& config, std::uint64_t incomingBytes); + std::uint64_t directorySizeBytes(const std::string& path) const; + // Writes a marker file to a directory we manage as our own disk cache root. + // clearDiskCache refuses to delete anything unless this marker is present, + // protecting against accidental wipes of user-typed paths (e.g. "C:\"). + void writeCacheMarker(const std::string& path) const; + bool isAgaveCacheDir(const std::string& path) const; + + mutable std::mutex m_mutex; + CacheConfig m_config; + // The disk cache root, fixed at construction. Distinct from m_diskIndexRoot, + // which tracks the root the in-memory index was last built against (used to + // decide when a rebuild is needed). + const std::string m_cacheDir; + std::uint64_t m_currentRamBytes = 0; + std::list m_lruKeys; + + struct CacheEntry + { + std::shared_ptr image; + std::uint64_t bytes = 0; + std::list::iterator lruIt; + }; + + std::unordered_map m_entries; + + struct DiskEntry + { + std::string path; + std::uint64_t bytes = 0; + std::uint64_t lastAccess = 0; + }; + + std::unordered_map m_diskEntries; + std::uint64_t m_currentDiskBytes = 0; + std::string m_diskIndexRoot; + + CacheStats m_stats; +}; diff --git a/renderlib/ImageXYZC.h b/renderlib/ImageXYZC.h index d0fa4e48..67f21433 100644 --- a/renderlib/ImageXYZC.h +++ b/renderlib/ImageXYZC.h @@ -69,9 +69,9 @@ class ImageXYZC { public: // how many channels to enable on first load by default - static const int FIRST_N_CHANNELS = 1; + static constexpr int FIRST_N_CHANNELS = 1; - static const size_t IN_MEMORY_BPP = 16; + static constexpr size_t IN_MEMORY_BPP = 16; ImageXYZC(uint32_t x, uint32_t y, uint32_t z, diff --git a/renderlib/SystemInfo.cpp b/renderlib/SystemInfo.cpp new file mode 100644 index 00000000..d5a725a0 --- /dev/null +++ b/renderlib/SystemInfo.cpp @@ -0,0 +1,108 @@ +#include "SystemInfo.h" + +#include +#include +#include +#include + +#ifdef _WIN32 +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#elif defined(__linux__) +#include +#else // macOS +#include +#include +#endif + +namespace SystemInfo { + +std::uint64_t +availableMemoryBytes() +{ +#ifdef _WIN32 + MEMORYSTATUSEX statex; + statex.dwLength = sizeof(statex); + if (GlobalMemoryStatusEx(&statex)) { + return static_cast(statex.ullAvailPhys); + } + return 0; +#elif defined(__linux__) + // Prefer MemAvailable from /proc/meminfo: this is the kernel's own + // estimate of how much memory can be allocated without swapping, and + // unlike sysinfo.freeram it includes reclaimable page cache and slab. + // On a typical box freeram alone is under 5% of total even when most + // memory is reclaimable. + std::ifstream meminfo("/proc/meminfo"); + if (meminfo.is_open()) { + std::string line; + while (std::getline(meminfo, line)) { + // Format: "MemAvailable: 12345678 kB" + if (line.rfind("MemAvailable:", 0) == 0) { + std::istringstream iss(line.substr(13)); + std::uint64_t kb = 0; + iss >> kb; + if (kb > 0) { + return kb * 1024ULL; + } + break; + } + } + } + // Older kernels (<3.14) don't expose MemAvailable. Fall back to + // sysinfo, and at least add bufferram to freeram so we don't grossly + // under-report. + struct sysinfo info; + if (sysinfo(&info) == 0) { + std::uint64_t bytes = static_cast(info.freeram) + static_cast(info.bufferram); + return bytes * static_cast(info.mem_unit); + } + return 0; +#else // macOS + mach_msg_type_number_t count = HOST_VM_INFO64_COUNT; + vm_statistics64_data_t vm_stats; + mach_port_t host_port = mach_host_self(); + + if (host_statistics64(host_port, HOST_VM_INFO64, (host_info64_t)&vm_stats, &count) == KERN_SUCCESS) { + vm_size_t page_size = 0; + host_page_size(host_port, &page_size); + + // free_count alone is what's truly free right now; macOS keeps that + // small on purpose and uses inactive/purgeable/speculative pages as + // reclaimable buffer (this is what Activity Monitor reports as + // "Available"). Including them avoids massive under-reporting. + std::uint64_t reclaimablePages = static_cast(vm_stats.free_count) + + static_cast(vm_stats.inactive_count) + + static_cast(vm_stats.purgeable_count) + + static_cast(vm_stats.speculative_count); + return reclaimablePages * static_cast(page_size); + } + return 0; +#endif +} + +std::uint64_t +availableDiskBytes(const std::string& path) +{ + if (path.empty()) { + return 0; + } + std::error_code ec; + std::filesystem::space_info info = std::filesystem::space(path, ec); + if (ec) { + // path may not exist yet; try its parent. + std::filesystem::path parent = std::filesystem::path(path).parent_path(); + if (parent.empty()) { + return 0; + } + info = std::filesystem::space(parent, ec); + if (ec) { + return 0; + } + } + return static_cast(info.available); +} + +} // namespace SystemInfo diff --git a/renderlib/SystemInfo.h b/renderlib/SystemInfo.h new file mode 100644 index 00000000..c4c7ef02 --- /dev/null +++ b/renderlib/SystemInfo.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +namespace SystemInfo { + +// Returns an estimate of physical memory that can be allocated without +// swapping, in bytes. Returns 0 if the value cannot be determined. +// +// Per-platform definition: +// Linux: MemAvailable from /proc/meminfo (kernel's own estimate that +// includes reclaimable page cache and slab). Falls back to +// sysinfo (freeram + bufferram) on older kernels. +// macOS: free + inactive + purgeable + speculative pages, matching +// what Activity Monitor reports as "Available". +// Windows: MEMORYSTATUSEX::ullAvailPhys. +std::uint64_t availableMemoryBytes(); + +// Returns bytes available to a non-privileged user on the filesystem +// containing `path`. Returns 0 if `path` is empty or the value cannot +// be determined. +std::uint64_t availableDiskBytes(const std::string& path); + +} // namespace SystemInfo diff --git a/renderlib/command.cpp b/renderlib/command.cpp index 11b87bb6..788f3d58 100644 --- a/renderlib/command.cpp +++ b/renderlib/command.cpp @@ -571,14 +571,7 @@ SetTimeCommand::execute(ExecutionContext* c) std::shared_ptr image; try { - std::unique_ptr reader(FileReader::getReader(loadSpec.filepath, loadSpec.isImageSequence)); - if (!reader) { - LOG_ERROR << "Could not find a reader for file " << loadSpec.filepath; - image = nullptr; - return; - } - - image = reader->loadFromFile(loadSpec); + image = FileReader::loadAndCache(loadSpec); } catch (...) { LOG_ERROR << "Failed to load time " << m_data.m_time << " from file " << c->m_loadSpec.toString(); image = nullptr; @@ -670,7 +663,7 @@ LoadDataCommand::execute(ExecutionContext* c) c->m_loadSpec.maxz = m_data.m_zmax; // TODO can we load time sequences of separate files here? - std::unique_ptr reader(FileReader::getReader(m_data.m_path)); + std::shared_ptr reader(FileReader::getReader(m_data.m_path)); if (!reader) { LOG_ERROR << "Could not find a reader for file " << m_data.m_path; return; @@ -678,7 +671,7 @@ LoadDataCommand::execute(ExecutionContext* c) VolumeDimensions dims = reader->loadDimensions(m_data.m_path, m_data.m_scene); - std::shared_ptr image = reader->loadFromFile(c->m_loadSpec); + std::shared_ptr image = FileReader::loadAndCache(c->m_loadSpec, reader); if (!image) { return; } diff --git a/renderlib/io/FileReader.cpp b/renderlib/io/FileReader.cpp index 5aa33839..e0c2ceab 100644 --- a/renderlib/io/FileReader.cpp +++ b/renderlib/io/FileReader.cpp @@ -7,12 +7,10 @@ #include "FileReaderZarr.h" #include "ImageXYZC.h" #include "Logging.h" +#include "CacheManager.h" #include #include -#include - -std::map> FileReader::sPreloadedImageCache; // return file extension as lowercase std::string @@ -66,30 +64,34 @@ FileReader::getReader(const std::string& filepath, bool isImageSequence) } std::shared_ptr -FileReader::loadAndCache(const LoadSpec& loadSpec) +FileReader::loadAndCache(const LoadSpec& loadSpec, std::shared_ptr reader) { - // check cache first of all. - auto cached = sPreloadedImageCache.find(loadSpec.filepath); - if (cached != sPreloadedImageCache.end()) { - return cached->second; + auto cached = CacheManager::instance().findImage(loadSpec); + if (cached) { + return cached; } - VolumeDimensions dims; - std::shared_ptr image; - std::string filepath = loadSpec.filepath; + const std::string& filepath = loadSpec.filepath; - std::unique_ptr reader(FileReader::getReader(filepath)); + // Fall back to constructing a new reader if the caller didn't supply one. + // A reused reader can skip re-opening the file and re-parsing metadata (notably + // valuable for time-series stepping on OME-Zarr/TIFF/CZI). + std::shared_ptr ownedReader; if (!reader) { - LOG_ERROR << "Could not find a reader for file " << filepath; - return nullptr; + ownedReader.reset(FileReader::getReader(filepath, loadSpec.isImageSequence)); + if (!ownedReader) { + LOG_ERROR << "Could not find a reader for file " << filepath; + return nullptr; + } + reader = ownedReader; } image = reader->loadFromFile(loadSpec); if (image) { - sPreloadedImageCache[filepath] = image; + CacheManager::instance().storeImage(loadSpec, image); } return image; @@ -106,9 +108,11 @@ FileReader::loadFromArray_4D(uint8_t* dataArray, bool addToCache) { // check cache first of all. - auto cached = sPreloadedImageCache.find(name); - if (cached != sPreloadedImageCache.end()) { - return cached->second; + LoadSpec cacheSpec; + cacheSpec.filepath = name; + auto cached = CacheManager::instance().findImage(cacheSpec); + if (cached) { + return cached; } // assume data is in CZYX order: @@ -145,7 +149,9 @@ FileReader::loadFromArray_4D(uint8_t* dataArray, std::shared_ptr sharedImage(im); if (addToCache) { - sPreloadedImageCache[name] = sharedImage; + LoadSpec newSpec; + newSpec.filepath = name; + CacheManager::instance().storeImage(newSpec, sharedImage); } return sharedImage; } diff --git a/renderlib/io/FileReader.h b/renderlib/io/FileReader.h index 1effb508..ec41c6bc 100644 --- a/renderlib/io/FileReader.h +++ b/renderlib/io/FileReader.h @@ -2,7 +2,6 @@ #include "IFileReader.h" -#include #include #include #include @@ -19,7 +18,11 @@ class FileReader static IFileReader* getReader(const std::string& filepath, bool isImageSequence = false); - static std::shared_ptr loadAndCache(const LoadSpec& loadSpec); + // If `reader` is provided, it is reused to load the image (skipping the cost of + // re-opening the file and re-parsing metadata). If null, a new reader is constructed + // via getReader(). The result is stored in the CacheManager on success either way. + static std::shared_ptr loadAndCache(const LoadSpec& loadSpec, + std::shared_ptr reader = nullptr); static std::shared_ptr loadFromArray_4D(uint8_t* dataArray, std::vector shape, @@ -31,5 +34,4 @@ class FileReader bool addToCache = false); private: - static std::map> sPreloadedImageCache; }; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 42dd01e2..32b210b8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -14,6 +14,7 @@ target_include_directories(agave_test PUBLIC "${glm_SOURCE_DIR}" ) target_sources(agave_test PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/test_cacheManager.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/test_commands.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/test_histogram.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/test_main.cpp" diff --git a/test/test_cacheManager.cpp b/test/test_cacheManager.cpp new file mode 100644 index 00000000..ff8c550c --- /dev/null +++ b/test/test_cacheManager.cpp @@ -0,0 +1,678 @@ +#include + +#include "renderlib/CacheConfig.h" +#include "renderlib/CacheManager.h" +#include "renderlib/IFileReader.h" +#include "renderlib/ImageXYZC.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// These tests construct their own CacheManager instances (each with its own +// cache directory) rather than touching the process-wide singleton. That keeps +// every case fully isolated and means we never need to reach into CacheManager +// internals or reset shared state. + +namespace { + +// Bytes per pixel that CacheManager uses when estimating an image's RAM cost. +constexpr size_t kBytesPerPixel = ImageXYZC::IN_MEMORY_BPP / 8; + +// Build a minimal in-memory ImageXYZC for cache testing. Memory is owned by the +// returned ImageXYZC (it deletes m_data in its destructor). +std::shared_ptr +makeImage(uint32_t x, uint32_t y, uint32_t z, uint32_t c) +{ + const std::uint64_t bytes = static_cast(x) * y * z * c * kBytesPerPixel; + auto* data = new uint8_t[bytes]; + std::memset(data, 0, bytes); + return std::make_shared( + x, y, z, c, static_cast(ImageXYZC::IN_MEMORY_BPP), data, 1.0f, 1.0f, 1.0f, "units"); +} + +LoadSpec +makeSpec(const std::string& filepath, uint32_t time = 0) +{ + LoadSpec s; + s.filepath = filepath; + s.time = time; + return s; +} + +std::uint64_t +imageBytes(uint32_t x, uint32_t y, uint32_t z, uint32_t c) +{ + return static_cast(x) * y * z * c * kBytesPerPixel; +} + +CacheConfig +ramOnlyConfig(std::uint64_t maxRamBytes) +{ + CacheConfig cfg; + cfg.enabled = true; + cfg.enableDisk = false; + cfg.maxRamBytes = maxRamBytes; + cfg.maxDiskBytes = 0; + return cfg; +} + +CacheConfig +diskConfig(std::uint64_t maxRamBytes, std::uint64_t maxDiskBytes) +{ + CacheConfig cfg; + cfg.enabled = true; + cfg.enableDisk = true; + cfg.maxRamBytes = maxRamBytes; + cfg.maxDiskBytes = maxDiskBytes; + return cfg; +} + +// An image whose pixel bytes match a known pattern (data[i] = i & 0xFF), used +// to verify the disk round-trip didn't corrupt the data. +std::shared_ptr +makeImageWithPattern(uint32_t x, uint32_t y, uint32_t z, uint32_t c) +{ + const std::uint64_t bytes = static_cast(x) * y * z * c * kBytesPerPixel; + auto* data = new uint8_t[bytes]; + for (std::uint64_t i = 0; i < bytes; ++i) { + data[i] = static_cast(i & 0xFF); + } + return std::make_shared( + x, y, z, c, static_cast(ImageXYZC::IN_MEMORY_BPP), data, 1.0f, 1.0f, 1.0f, "units"); +} + +// RAII temporary directory for disk-cache tests. Each instance gets a unique +// path under the system temp dir and is removed on destruction. The counter +// guarantees uniqueness even within a single test process. +class TempCacheDir +{ +public: + TempCacheDir() + { + static std::atomic sCounter{ 0 }; + int n = sCounter.fetch_add(1); + m_path = std::filesystem::temp_directory_path() / ("agave_cache_test_" + std::to_string(n)); + std::error_code ec; + std::filesystem::remove_all(m_path, ec); + std::filesystem::create_directories(m_path, ec); + } + ~TempCacheDir() + { + std::error_code ec; + std::filesystem::remove_all(m_path, ec); + } + TempCacheDir(const TempCacheDir&) = delete; + TempCacheDir& operator=(const TempCacheDir&) = delete; + + std::string str() const { return m_path.string(); } + std::filesystem::path path() const { return m_path; } + +private: + std::filesystem::path m_path; +}; + +// Count subdirectories under `dir` (used to verify entry-dir counts after +// store/evict/clear operations). +int +countSubdirs(const std::filesystem::path& dir) +{ + int n = 0; + std::error_code ec; + for (auto it = std::filesystem::directory_iterator(dir, ec); !ec && it != std::filesystem::directory_iterator(); + it.increment(ec)) { + if (it->is_directory()) { + ++n; + } + } + return n; +} + +} // namespace + +TEST_CASE("CacheManager respects RAM limit and evicts LRU entries", "[cache]") +{ + // Each 4x4x4x1 image is 4*4*4*1*2 = 128 bytes. + const std::uint64_t oneImage = imageBytes(4, 4, 4, 1); + + // Fresh, RAM-only cache for each section (Catch2 re-runs the body per leaf). + CacheManager cache; + + SECTION("Store and retrieve a single image (RAM hit)") + { + cache.setConfig(ramOnlyConfig(oneImage * 4)); + + auto img = makeImage(4, 4, 4, 1); + auto spec = makeSpec("a"); + + cache.storeImage(spec, img); + auto retrieved = cache.findImage(spec); + + REQUIRE(retrieved != nullptr); + REQUIRE(retrieved.get() == img.get()); + + auto stats = cache.getStats(); + REQUIRE(stats.ramHits == 1); + REQUIRE(stats.misses == 0); + } + + SECTION("Miss returns null and increments miss counter") + { + cache.setConfig(ramOnlyConfig(oneImage * 4)); + + auto retrieved = cache.findImage(makeSpec("missing")); + REQUIRE(retrieved == nullptr); + REQUIRE(cache.getStats().misses == 1); + } + + SECTION("Cache stays under maxRamBytes when limit is reached") + { + // Limit is exactly 2 images. Storing a 3rd must evict. + cache.setConfig(ramOnlyConfig(oneImage * 2)); + + cache.storeImage(makeSpec("a"), makeImage(4, 4, 4, 1)); + cache.storeImage(makeSpec("b"), makeImage(4, 4, 4, 1)); + cache.storeImage(makeSpec("c"), makeImage(4, 4, 4, 1)); + + // "a" was the least recently used and must be evicted. + REQUIRE(cache.findImage(makeSpec("a")) == nullptr); + REQUIRE(cache.findImage(makeSpec("b")) != nullptr); + REQUIRE(cache.findImage(makeSpec("c")) != nullptr); + } + + SECTION("LRU ordering is updated on access (touched entry survives eviction)") + { + cache.setConfig(ramOnlyConfig(oneImage * 2)); + + cache.storeImage(makeSpec("a"), makeImage(4, 4, 4, 1)); + cache.storeImage(makeSpec("b"), makeImage(4, 4, 4, 1)); + + // Touch "a" so that "b" becomes least recently used. + REQUIRE(cache.findImage(makeSpec("a")) != nullptr); + + cache.storeImage(makeSpec("c"), makeImage(4, 4, 4, 1)); + + REQUIRE(cache.findImage(makeSpec("a")) != nullptr); + REQUIRE(cache.findImage(makeSpec("b")) == nullptr); + REQUIRE(cache.findImage(makeSpec("c")) != nullptr); + } + + SECTION("Storing many images keeps total RAM usage bounded") + { + const std::uint64_t cap = oneImage * 3; + cache.setConfig(ramOnlyConfig(cap)); + + for (int i = 0; i < 20; ++i) { + cache.storeImage(makeSpec("file_" + std::to_string(i)), makeImage(4, 4, 4, 1)); + } + + // Count how many of the 20 inserts are still resident. With a cap of 3 + // images, no more than 3 can be present at once. + cache.resetStats(); + int present = 0; + for (int i = 0; i < 20; ++i) { + if (cache.findImage(makeSpec("file_" + std::to_string(i)))) { + present++; + } + } + REQUIRE(present <= 3); + // The most recently inserted entries should still be in the cache. + REQUIRE(present >= 1); + REQUIRE(cache.findImage(makeSpec("file_19")) != nullptr); + } + + SECTION("Image larger than maxRamBytes is not stored") + { + // Cap at less than the size of one image. + cache.setConfig(ramOnlyConfig(oneImage / 2)); + + cache.storeImage(makeSpec("too_big"), makeImage(4, 4, 4, 1)); + REQUIRE(cache.findImage(makeSpec("too_big")) == nullptr); + } + + SECTION("Re-storing the same key replaces the entry without exceeding the limit") + { + cache.setConfig(ramOnlyConfig(oneImage * 2)); + + auto first = makeImage(4, 4, 4, 1); + auto second = makeImage(4, 4, 4, 1); + auto spec = makeSpec("a"); + + cache.storeImage(spec, first); + cache.storeImage(spec, second); + + // Fill the cache up to the limit with another entry. If the duplicate + // key had double-counted bytes, this would evict "a". + cache.storeImage(makeSpec("b"), makeImage(4, 4, 4, 1)); + + auto retrieved = cache.findImage(spec); + REQUIRE(retrieved != nullptr); + REQUIRE(retrieved.get() == second.get()); + REQUIRE(cache.findImage(makeSpec("b")) != nullptr); + } + + SECTION("Disabling the cache clears all entries") + { + cache.setConfig(ramOnlyConfig(oneImage * 4)); + cache.storeImage(makeSpec("a"), makeImage(4, 4, 4, 1)); + REQUIRE(cache.findImage(makeSpec("a")) != nullptr); + + CacheConfig disabled; + disabled.enabled = false; + cache.setConfig(disabled); + + REQUIRE(cache.findImage(makeSpec("a")) == nullptr); + } + + SECTION("Shrinking the limit evicts existing entries") + { + cache.setConfig(ramOnlyConfig(oneImage * 4)); + cache.storeImage(makeSpec("a"), makeImage(4, 4, 4, 1)); + cache.storeImage(makeSpec("b"), makeImage(4, 4, 4, 1)); + cache.storeImage(makeSpec("c"), makeImage(4, 4, 4, 1)); + + // Reconfigure to a smaller limit; oldest entries must be evicted. + cache.setConfig(ramOnlyConfig(oneImage)); + + int present = 0; + for (const char* name : { "a", "b", "c" }) { + if (cache.findImage(makeSpec(name))) { + present++; + } + } + REQUIRE(present <= 1); + } + + SECTION("Reducing cache size immediately evicts LRU entries to fit") + { + // Fill the cache exactly to its 4-image cap. + cache.setConfig(ramOnlyConfig(oneImage * 4)); + cache.storeImage(makeSpec("a"), makeImage(4, 4, 4, 1)); + cache.storeImage(makeSpec("b"), makeImage(4, 4, 4, 1)); + cache.storeImage(makeSpec("c"), makeImage(4, 4, 4, 1)); + cache.storeImage(makeSpec("d"), makeImage(4, 4, 4, 1)); + + // All four are resident. LRU order (most recent first) is: d, c, b, a. + // Reset stats so the find() calls below count cleanly. + cache.resetStats(); + + // User shrinks the cache to hold only 2 images. Eviction must occur + // synchronously inside setConfig() — not lazily on the next store. + cache.setConfig(ramOnlyConfig(oneImage * 2)); + + // The two least recently used entries ("a" and "b") must be gone, and + // the two most recently used ("c" and "d") must still be present. + REQUIRE(cache.findImage(makeSpec("a")) == nullptr); + REQUIRE(cache.findImage(makeSpec("b")) == nullptr); + REQUIRE(cache.findImage(makeSpec("c")) != nullptr); + REQUIRE(cache.findImage(makeSpec("d")) != nullptr); + + auto stats = cache.getStats(); + REQUIRE(stats.misses == 2); + REQUIRE(stats.ramHits == 2); + } + + SECTION("Reducing cache size below a single image evicts everything") + { + cache.setConfig(ramOnlyConfig(oneImage * 3)); + cache.storeImage(makeSpec("a"), makeImage(4, 4, 4, 1)); + cache.storeImage(makeSpec("b"), makeImage(4, 4, 4, 1)); + cache.storeImage(makeSpec("c"), makeImage(4, 4, 4, 1)); + + // Shrink below the size of a single entry — everything must go. + cache.setConfig(ramOnlyConfig(oneImage / 2)); + + REQUIRE(cache.findImage(makeSpec("a")) == nullptr); + REQUIRE(cache.findImage(makeSpec("b")) == nullptr); + REQUIRE(cache.findImage(makeSpec("c")) == nullptr); + } + + SECTION("clearMemoryCache() empties the cache") + { + cache.setConfig(ramOnlyConfig(oneImage * 4)); + cache.storeImage(makeSpec("a"), makeImage(4, 4, 4, 1)); + cache.storeImage(makeSpec("b"), makeImage(4, 4, 4, 1)); + + cache.clearMemoryCache(); + + REQUIRE(cache.findImage(makeSpec("a")) == nullptr); + REQUIRE(cache.findImage(makeSpec("b")) == nullptr); + } +} + +TEST_CASE("CacheManager disk tier round-trips images and respects the disk cap", "[cache][disk]") +{ + // 4x4x4x1 uint16 image -> 128 raw bytes. On-disk zarr representation is a + // few KB once chunk + metadata files are accounted for, so a 1 MB cap is + // comfortably oversized for the round-trip and clear-cache tests, and we + // size the cap explicitly down to a couple of images for the eviction + // test. Total disk usage per test is well under a megabyte. + const std::uint64_t oneImage = imageBytes(4, 4, 4, 1); + + // Fresh temp dir + cache rooted at it for each section. + TempCacheDir tmp; + CacheManager cache(tmp.str()); + + SECTION("Enabling the disk tier writes the AGAVE marker file") + { + cache.setConfig(diskConfig(oneImage * 4, 1ULL * 1024 * 1024)); + + REQUIRE(std::filesystem::exists(tmp.path() / ".agave-cache-dir")); + } + + SECTION("Round-trip: store, drop RAM, find reloads from disk with bit-identical data") + { + cache.setConfig(diskConfig(oneImage * 4, 1ULL * 1024 * 1024)); + + auto img = makeImageWithPattern(4, 4, 4, 1); + auto spec = makeSpec("disk_roundtrip"); + cache.storeImage(spec, img); + + // Drop RAM cache; disk cache survives. + cache.clearMemoryCache(); + cache.resetStats(); + + auto found = cache.findImage(spec); + REQUIRE(found != nullptr); + // The reloaded image is a fresh instance, not the same shared_ptr. + REQUIRE(found.get() != img.get()); + + auto stats = cache.getStats(); + REQUIRE(stats.diskHits == 1); + REQUIRE(stats.ramHits == 0); + + REQUIRE(found->sizeX() == 4); + REQUIRE(found->sizeY() == 4); + REQUIRE(found->sizeZ() == 4); + REQUIRE(found->sizeC() == 1); + + const std::uint64_t bytes = imageBytes(4, 4, 4, 1); + REQUIRE(std::memcmp(img->ptr(), found->ptr(), bytes) == 0); + } + + SECTION("An image larger than maxDiskBytes is not written to disk") + { + // Cap is below a single image, so storeToDisk must refuse outright. + cache.setConfig(diskConfig(oneImage * 4, oneImage / 2)); + + auto spec = makeSpec("too_big_for_disk"); + cache.storeImage(spec, makeImage(4, 4, 4, 1)); + + // No entry subdirectory should have been created. + REQUIRE(countSubdirs(tmp.path()) == 0); + + // Drop RAM to force a disk-or-miss lookup; with no entry on disk we + // should get a miss. + cache.clearMemoryCache(); + REQUIRE(cache.findImage(spec) == nullptr); + } + + SECTION("Disk eviction removes the oldest entry to stay under the cap") + { + // Cap large enough for two entries' raw byte estimate but not three. + cache.setConfig(diskConfig(oneImage * 4, oneImage * 2)); + + cache.storeImage(makeSpec("disk_a"), makeImage(4, 4, 4, 1)); + // Sleep briefly so lastAccess timestamps are distinct on fast disks. + std::this_thread::sleep_for(std::chrono::milliseconds(5)); + cache.storeImage(makeSpec("disk_b"), makeImage(4, 4, 4, 1)); + std::this_thread::sleep_for(std::chrono::milliseconds(5)); + cache.storeImage(makeSpec("disk_c"), makeImage(4, 4, 4, 1)); + + // Drop RAM so finds have to go through the disk tier. + cache.clearMemoryCache(); + + REQUIRE(cache.findImage(makeSpec("disk_a")) == nullptr); + REQUIRE(cache.findImage(makeSpec("disk_b")) != nullptr); + REQUIRE(cache.findImage(makeSpec("disk_c")) != nullptr); + + // After eviction there should be at most two entry subdirectories on + // disk (the marker file is not a directory). + REQUIRE(countSubdirs(tmp.path()) <= 2); + } + + SECTION("clearDiskCache removes entry subdirectories but keeps the marker") + { + cache.setConfig(diskConfig(oneImage * 4, 1ULL * 1024 * 1024)); + + cache.storeImage(makeSpec("clear_a"), makeImage(4, 4, 4, 1)); + cache.storeImage(makeSpec("clear_b"), makeImage(4, 4, 4, 1)); + REQUIRE(countSubdirs(tmp.path()) >= 2); + + cache.clearDiskCache(); + + REQUIRE(countSubdirs(tmp.path()) == 0); + REQUIRE(std::filesystem::exists(tmp.path() / ".agave-cache-dir")); + } + + SECTION("clearDiskCache refuses to touch a directory without the AGAVE marker") + { + cache.setConfig(diskConfig(oneImage * 4, 1ULL * 1024 * 1024)); + + cache.storeImage(makeSpec("guarded_a"), makeImage(4, 4, 4, 1)); + int subdirsBefore = countSubdirs(tmp.path()); + REQUIRE(subdirsBefore >= 1); + + // Simulate a directory that doesn't belong to AGAVE by removing the + // marker file. clearDiskCache must refuse. + std::error_code ec; + std::filesystem::remove(tmp.path() / ".agave-cache-dir", ec); + REQUIRE_FALSE(ec); + + cache.clearDiskCache(); + + REQUIRE(countSubdirs(tmp.path()) == subdirsBefore); + } + + SECTION("A fresh cache instance rebuilds the disk index from an existing cache dir") + { + cache.setConfig(diskConfig(oneImage * 4, 1ULL * 1024 * 1024)); + cache.storeImage(makeSpec("persistent"), makeImage(4, 4, 4, 1)); + + // Simulate a session restart: a brand-new CacheManager rooted at the same + // directory must rebuild its disk index on first use and serve the + // persisted entry from disk. + CacheManager restarted(tmp.str()); + restarted.setConfig(diskConfig(oneImage * 4, 1ULL * 1024 * 1024)); + + auto found = restarted.findImage(makeSpec("persistent")); + REQUIRE(found != nullptr); + REQUIRE(restarted.getStats().diskHits == 1); + } +} + +TEST_CASE("CacheManager cache directory is fixed at construction", "[cache][disk]") +{ + const std::uint64_t oneImage = imageBytes(4, 4, 4, 1); + + SECTION("getCacheDirectory reflects the constructed path") + { + TempCacheDir tmp; + CacheManager cache(tmp.str()); + REQUIRE(cache.getCacheDirectory() == tmp.str()); + } + + SECTION("A cache constructed with no directory is RAM-only") + { + CacheManager cache; // empty cache directory + REQUIRE(cache.getCacheDirectory().empty()); + + // Even with a disk-enabled config, an empty root keeps the disk tier inert. + cache.setConfig(diskConfig(oneImage * 4, 1ULL * 1024 * 1024)); + + auto spec = makeSpec("no_dir"); + cache.storeImage(spec, makeImage(4, 4, 4, 1)); + + // RAM still serves it within the session... + REQUIRE(cache.findImage(spec) != nullptr); + + // ...but nothing was persisted to disk, so once RAM is dropped it misses. + cache.clearMemoryCache(); + REQUIRE(cache.findImage(spec) == nullptr); + } +} + +TEST_CASE("CacheManager invalidates entries when the source file mtime changes", "[cache][mtime]") +{ + CacheManager cache; + + // Use a real file on disk so the cache key picks up its mtime via + // std::filesystem::last_write_time. We bump the file's mtime explicitly + // (rather than sleeping and rewriting) so the test is deterministic on + // filesystems with coarse mtime resolution. + static std::atomic sCounter{ 0 }; + std::filesystem::path srcFile = std::filesystem::temp_directory_path() / + ("agave_cache_mtime_test_" + std::to_string(sCounter.fetch_add(1)) + ".bin"); + { + std::ofstream out(srcFile.string()); + out << "initial content"; + } + + cache.setConfig(ramOnlyConfig(imageBytes(4, 4, 4, 1) * 4)); + + LoadSpec spec; + spec.filepath = srcFile.string(); + cache.storeImage(spec, makeImage(4, 4, 4, 1)); + REQUIRE(cache.findImage(spec) != nullptr); + + // Bump the file's last_write_time well into the future; subsequent + // makeKey calls now produce a different key and must miss. + std::error_code ec; + auto futureTime = std::filesystem::last_write_time(srcFile, ec) + std::chrono::seconds(60); + REQUIRE_FALSE(ec); + std::filesystem::last_write_time(srcFile, futureTime, ec); + REQUIRE_FALSE(ec); + + REQUIRE(cache.findImage(spec) == nullptr); + + std::filesystem::remove(srcFile, ec); +} + +TEST_CASE("CacheManager normalizes equivalent filepaths to the same key", "[cache][normalize]") +{ + // These tests use synthetic paths that don't exist on disk; statForKey + // returns (0, 0) for them, so the cache key depends only on the + // (normalized) filepath string and the other LoadSpec fields. + const std::uint64_t oneImage = imageBytes(4, 4, 4, 1); + + CacheManager cache; + cache.setConfig(ramOnlyConfig(oneImage * 4)); + + SECTION("'./' segment is normalized away") + { + LoadSpec stored; + stored.filepath = "/some/dir/foo.tif"; + cache.storeImage(stored, makeImage(4, 4, 4, 1)); + + LoadSpec lookup; + lookup.filepath = "/some/dir/./foo.tif"; + REQUIRE(cache.findImage(lookup) != nullptr); + } + + SECTION("'..' segment is normalized away") + { + LoadSpec stored; + stored.filepath = "/some/dir/foo.tif"; + cache.storeImage(stored, makeImage(4, 4, 4, 1)); + + LoadSpec lookup; + lookup.filepath = "/some/dir/subdir/../foo.tif"; + REQUIRE(cache.findImage(lookup) != nullptr); + } + + SECTION("Duplicate path separators are collapsed") + { + LoadSpec stored; + stored.filepath = "/some/dir/foo.tif"; + cache.storeImage(stored, makeImage(4, 4, 4, 1)); + + LoadSpec lookup; + lookup.filepath = "/some//dir///foo.tif"; + REQUIRE(cache.findImage(lookup) != nullptr); + } + + SECTION("Bare names (no slashes) pass through unchanged") + { + LoadSpec stored; + stored.filepath = "in_memory_array_42"; + cache.storeImage(stored, makeImage(4, 4, 4, 1)); + + LoadSpec lookup; + lookup.filepath = "in_memory_array_42"; + REQUIRE(cache.findImage(lookup) != nullptr); + } + + SECTION("Remote URLs are passed through without normalization") + { + LoadSpec stored; + stored.filepath = "http://example.com/path/to/data.zarr"; + cache.storeImage(stored, makeImage(4, 4, 4, 1)); + + LoadSpec lookup; + lookup.filepath = "http://example.com/path/to/data.zarr"; + REQUIRE(cache.findImage(lookup) != nullptr); + } + + SECTION("Distinct paths still produce distinct keys (no false hits)") + { + LoadSpec stored; + stored.filepath = "/some/dir/foo.tif"; + cache.storeImage(stored, makeImage(4, 4, 4, 1)); + + LoadSpec lookup; + lookup.filepath = "/some/dir/bar.tif"; + REQUIRE(cache.findImage(lookup) == nullptr); + } + +#ifdef _WIN32 + SECTION("UNC paths normalize across slash style and case on Windows") + { + LoadSpec stored; + stored.filepath = "\\\\Server\\Share\\Path\\Foo.tif"; + cache.storeImage(stored, makeImage(4, 4, 4, 1)); + + // Same UNC path, lowercase, mixed slashes. + LoadSpec lookupSlash; + lookupSlash.filepath = "//server/share/path/foo.tif"; + REQUIRE(cache.findImage(lookupSlash) != nullptr); + + // Same UNC path, with a '..' segment in the relative tail. + LoadSpec lookupDotDot; + lookupDotDot.filepath = "\\\\server\\share\\path\\subdir\\..\\foo.tif"; + REQUIRE(cache.findImage(lookupDotDot) != nullptr); + + // Different UNC share — must NOT collide. + LoadSpec otherShare; + otherShare.filepath = "\\\\server\\othershare\\path\\foo.tif"; + REQUIRE(cache.findImage(otherShare) == nullptr); + } + + SECTION("Forward and back slashes are equivalent on Windows") + { + LoadSpec stored; + stored.filepath = "C:/some/dir/foo.tif"; + cache.storeImage(stored, makeImage(4, 4, 4, 1)); + + LoadSpec lookup; + lookup.filepath = "C:\\some\\dir\\foo.tif"; + REQUIRE(cache.findImage(lookup) != nullptr); + } + + SECTION("Case differences are equivalent on Windows") + { + LoadSpec stored; + stored.filepath = "C:/Some/Dir/Foo.tif"; + cache.storeImage(stored, makeImage(4, 4, 4, 1)); + + LoadSpec lookup; + lookup.filepath = "c:/some/dir/foo.tif"; + REQUIRE(cache.findImage(lookup) != nullptr); + } +#endif +}