Skip to content

Commit d02a434

Browse files
committed
[ntuple] Add some internal utilities to the storage layer
Will be used by the RNTupleAttributes.
1 parent b88782c commit d02a434

12 files changed

+142
-9
lines changed

tree/ntuple/inc/ROOT/RMiniFile.hxx

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,14 @@ class TVirtualStreamerInfo;
3232

3333
namespace ROOT {
3434

35+
class RNTupleWriteOptions;
36+
3537
namespace Internal {
38+
3639
class RRawFile;
37-
}
3840

39-
class RNTupleWriteOptions;
41+
TDirectory *GetUnderlyingDirectory(ROOT::Internal::RNTupleFileWriter &writer);
4042

41-
namespace Internal {
4243
/// Holds status information of an open ROOT file during writing
4344
struct RTFileControlBlock;
4445

@@ -68,9 +69,6 @@ private:
6869
/// Used when the file turns out to be a TFile container. The ntuplePath variable is either the ntuple name
6970
/// or an ntuple name preceded by a directory (`myNtuple` or `foo/bar/myNtuple` or `/foo/bar/myNtuple`)
7071
RResult<RNTuple> GetNTupleProper(std::string_view ntuplePath);
71-
/// Loads an RNTuple anchor from a TFile at the given file offset (unzipping it if necessary).
72-
RResult<RNTuple>
73-
GetNTupleProperAtOffset(std::uint64_t payloadOffset, std::uint64_t compSize, std::uint64_t uncompLen);
7472

7573
/// Searches for a key with the given name and type in the key index of the directory starting at offsetDir.
7674
/// The offset points to the start of the TDirectory DATA section, without the key and without the name and title
@@ -84,6 +82,9 @@ public:
8482
explicit RMiniFileReader(ROOT::Internal::RRawFile *rawFile);
8583
/// Extracts header and footer location for the RNTuple identified by ntupleName
8684
RResult<RNTuple> GetNTuple(std::string_view ntupleName);
85+
/// Loads an RNTuple anchor from a TFile at the given file offset (unzipping it if necessary).
86+
RResult<RNTuple>
87+
GetNTupleProperAtOffset(std::uint64_t payloadOffset, std::uint64_t compSize, std::uint64_t uncompLen);
8788
/// Reads a given byte range from the file into the provided memory buffer.
8889
/// If `nbytes > fMaxKeySize` it will perform chunked read from multiple blobs,
8990
/// whose addresses are listed at the end of the first chunk.
@@ -109,6 +110,8 @@ A stand-alone version of RNTuple can remove the TFile based writer.
109110
*/
110111
// clang-format on
111112
class RNTupleFileWriter {
113+
friend TDirectory *ROOT::Internal::GetUnderlyingDirectory(ROOT::Internal::RNTupleFileWriter &writer);
114+
112115
public:
113116
/// The key length of a blob. It is always a big key (version > 1000) with class name RBlob.
114117
static constexpr std::size_t kBlobKeyLen = 42;
@@ -254,7 +257,7 @@ public:
254257
void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset);
255258
/// Ensures that the streamer info records passed as argument are written to the file
256259
void UpdateStreamerInfos(const ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t &streamerInfos);
257-
/// Writes the RNTuple key to the file so that the header and footer keys can be found
260+
/// Writes the RNTuple key to the file so that the header and footer keys can be found.
258261
void Commit(int compression = RCompressionSetting::EDefaults::kUseGeneralPurpose);
259262
};
260263

tree/ntuple/inc/ROOT/RPageNullSink.hxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ public:
106106
void CommitStagedClusters(std::span<RStagedCluster>) final {}
107107
void CommitClusterGroup() final {}
108108
void CommitDatasetImpl() final {}
109+
std::unique_ptr<RPageSink> CloneWithNewRNTuple(std::string_view) const final { return nullptr; }
109110
};
110111

111112
} // namespace Internal

tree/ntuple/inc/ROOT/RPageSinkBuf.hxx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,11 @@ public:
148148
void CommitDatasetImpl() final;
149149

150150
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
151+
152+
std::unique_ptr<RPageSink> CloneWithNewRNTuple(std::string_view newName) const final
153+
{
154+
return std::make_unique<RPageSinkBuf>(fInnerSink->CloneWithNewRNTuple(newName));
155+
}
151156
}; // RPageSinkBuf
152157

153158
} // namespace Internal

tree/ntuple/inc/ROOT/RPageStorage.hxx

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@ namespace ROOT {
4747
class RNTupleModel;
4848

4949
namespace Internal {
50-
51-
class RPageAllocator;
5250
class RColumn;
51+
class RMiniFileReader;
52+
class RPageAllocator;
5353
struct RNTupleModelChangeset;
5454

5555
enum class EPageStorageType {
@@ -313,6 +313,10 @@ public:
313313

314314
virtual ROOT::NTupleSize_t GetNEntries() const = 0;
315315

316+
/// Creates a new RPageSink linked to the same underlying storage as this, writing to a new RNTuple called `newName`.
317+
/// The existing sink will stay valid. The existing sink and the new one must not write concurrently.
318+
virtual std::unique_ptr<RPageSink> CloneWithNewRNTuple(std::string_view newName) const = 0;
319+
316320
/// Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket)
317321
/// Init() associates column handles to the columns referenced by the model
318322
void Init(RNTupleModel &model)
@@ -808,6 +812,8 @@ public:
808812
virtual std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
809813
LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) = 0;
810814

815+
virtual RMiniFileReader *GetUnderlyingReader() { return nullptr; }
816+
811817
/// Parallel decompression and unpacking of the pages in the given cluster. The unzipped pages are supposed
812818
/// to be preloaded in a page pool attached to the source. The method is triggered by the cluster pool's
813819
/// unzip thread. It is an optional optimization, the method can safely do nothing. In particular, the

tree/ntuple/inc/ROOT/RPageStorageDaos.hxx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,8 @@ protected:
141141
public:
142142
RPageSinkDaos(std::string_view ntupleName, std::string_view uri, const ROOT::RNTupleWriteOptions &options);
143143
~RPageSinkDaos() override;
144+
145+
std::unique_ptr<RPageSink> CloneWithNewRNTuple(std::string_view) const final;
144146
}; // class RPageSinkDaos
145147

146148
// clang-format off

tree/ntuple/inc/ROOT/RPageStorageFile.hxx

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ public:
9898
RPageSinkFile(RPageSinkFile &&) = default;
9999
RPageSinkFile &operator=(RPageSinkFile &&) = default;
100100
~RPageSinkFile() override;
101+
102+
std::unique_ptr<RPageSink> CloneWithNewRNTuple(std::string_view) const final;
103+
104+
ROOT::Internal::RNTupleFileWriter *GetUnderlyingWriter() const { return fWriter.get(); }
101105
}; // class RPageSinkFile
102106

103107
// clang-format off
@@ -149,6 +153,8 @@ private:
149153
std::unique_ptr<ROOT::Internal::RCluster>
150154
PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector<RRawFile::RIOVec> &readRequests);
151155

156+
RMiniFileReader *GetUnderlyingReader() final { return &fReader; }
157+
152158
protected:
153159
void LoadStructureImpl() final;
154160
ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final;
@@ -173,6 +179,11 @@ public:
173179
RPageSourceFile &operator=(RPageSourceFile &&) = delete;
174180
~RPageSourceFile() override;
175181

182+
/// Creates a new PageSourceFile using the same underlying file as this but referring to a different RNTuple,
183+
/// represented by `anchor`.
184+
std::unique_ptr<RPageSourceFile>
185+
OpenWithDifferentAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
186+
176187
void
177188
LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final;
178189

tree/ntuple/src/RMiniFile.cxx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,3 +1608,11 @@ void ROOT::Internal::RNTupleFileWriter::WriteTFileSkeleton(int defaultCompressio
16081608
fileSimple.Write(&padding, sizeof(padding));
16091609
fileSimple.fKeyOffset = fileSimple.fFilePos;
16101610
}
1611+
1612+
TDirectory *ROOT::Internal::GetUnderlyingDirectory(ROOT::Internal::RNTupleFileWriter &writer)
1613+
{
1614+
if (auto *proper = std::get_if<ROOT::Internal::RNTupleFileWriter::RFileProper>(&writer.fFile)) {
1615+
return proper->fDirectory;
1616+
}
1617+
return nullptr;
1618+
}

tree/ntuple/src/RNTupleParallelWriter.cxx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,10 @@ class RPageSynchronizingSink : public RPageSink {
111111
{
112112
throw ROOT::RException(R__FAIL("should never commit dataset via RPageSynchronizingSink"));
113113
}
114+
std::unique_ptr<RPageSink> CloneWithNewRNTuple(std::string_view) const final
115+
{
116+
throw ROOT::RException(R__FAIL("CloneWithNewRNTuple unavailable for RPageSynchronizingSink"));
117+
}
114118

115119
RSinkGuard GetSinkGuard() final { return RSinkGuard(fMutex); }
116120
};

tree/ntuple/src/RPageStorageDaos.cxx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,12 @@ void ROOT::Experimental::Internal::RPageSinkDaos::WriteNTupleAnchor()
483483
kDistributionKeyDefault, kAttributeKeyAnchor, kCidMetadata);
484484
}
485485

486+
std::unique_ptr<ROOT::Internal::RPageSink>
487+
ROOT::Experimental::Internal::RPageSinkDaos::CloneWithNewRNTuple(std::string_view) const
488+
{
489+
throw ROOT::RException(R__FAIL("this method is not available for the DAOS backend"));
490+
}
491+
486492
////////////////////////////////////////////////////////////////////////////////
487493

488494
ROOT::Experimental::Internal::RPageSourceDaos::RPageSourceDaos(std::string_view ntupleName, std::string_view uri,

tree/ntuple/src/RPageStorageFile.cxx

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,18 @@ void ROOT::Internal::RPageSinkFile::CommitDatasetImpl(unsigned char *serializedF
252252
fWriter->Commit(GetWriteOptions().GetCompression());
253253
}
254254

255+
std::unique_ptr<ROOT::Internal::RPageSink>
256+
ROOT::Internal::RPageSinkFile::CloneWithNewRNTuple(std::string_view newName) const
257+
{
258+
if (auto *dir = Internal::GetUnderlyingDirectory(*fWriter)) {
259+
auto opts = ROOT::RNTupleWriteOptions();
260+
opts.SetCompression(GetWriteOptions().GetCompression());
261+
return std::make_unique<ROOT::Internal::RPageSinkFile>(newName, *dir, opts);
262+
}
263+
// TODO: support this method also for non-TFile-based writers
264+
throw ROOT::RException(R__FAIL("cannot CloneWithNewRNTuple a non-TFile-based Sink."));
265+
}
266+
255267
////////////////////////////////////////////////////////////////////////////////
256268

257269
ROOT::Internal::RPageSourceFile::RPageSourceFile(std::string_view ntupleName, const ROOT::RNTupleReadOptions &opts)
@@ -305,6 +317,15 @@ ROOT::Internal::RPageSourceFile::CreateFromAnchor(const RNTuple &anchor, const R
305317
return pageSource;
306318
}
307319

320+
std::unique_ptr<ROOT::Internal::RPageSourceFile>
321+
ROOT::Internal::RPageSourceFile::OpenWithDifferentAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options)
322+
{
323+
auto pageSource = std::make_unique<RPageSourceFile>("", fFile->Clone(), options);
324+
pageSource->fAnchor = anchor;
325+
pageSource->fNTupleName = pageSource->fDescriptorBuilder.GetDescriptor().GetName();
326+
return pageSource;
327+
}
328+
308329
ROOT::Internal::RPageSourceFile::~RPageSourceFile() = default;
309330

310331
void ROOT::Internal::RPageSourceFile::LoadStructureImpl()

0 commit comments

Comments
 (0)