Skip to content

Commit 4d9bc2b

Browse files
author
Alex Dolski
committed
readDocument() reads native metadata
1 parent 4150b6e commit 4d9bc2b

File tree

2 files changed

+27
-8
lines changed

2 files changed

+27
-8
lines changed

src/main/java/edu/illinois/library/cantaloupe/processor/PdfBoxProcessor.java

+20-8
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.pdfbox.cos.COSObject;
2222
import org.apache.pdfbox.pdmodel.DefaultResourceCache;
2323
import org.apache.pdfbox.pdmodel.PDDocument;
24+
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
2425
import org.apache.pdfbox.pdmodel.PDPage;
2526
import org.apache.pdfbox.pdmodel.common.PDMetadata;
2627
import org.apache.pdfbox.pdmodel.common.PDRectangle;
@@ -37,6 +38,7 @@
3738
import java.nio.file.Path;
3839
import java.util.Collections;
3940
import java.util.EnumSet;
41+
import java.util.HashMap;
4042
import java.util.Map;
4143
import java.util.Set;
4244

@@ -178,14 +180,24 @@ public void put(COSObject indirect, PDXObject xobject) {
178180
}
179181
});
180182

181-
// Read the document's XMP metadata.
182-
final PDMetadata pdfMetadata = doc.getDocumentCatalog().getMetadata();
183-
if (pdfMetadata != null) {
184-
try (InputStream is = pdfMetadata.exportXMPMetadata()) {
185-
ByteArrayOutputStream os = new ByteArrayOutputStream();
186-
is.transferTo(os);
187-
metadata = new Metadata();
188-
metadata.setXMP(os.toByteArray());
183+
metadata = new Metadata();
184+
{ // Read the document's native metadata.
185+
PDDocumentInformation info = doc.getDocumentInformation();
186+
Map<String, String> pdfMetadata = new HashMap<>();
187+
for (String key : info.getMetadataKeys()) {
188+
pdfMetadata.put(key, info.getPropertyStringValue(key).toString());
189+
}
190+
metadata.setNativeMetadata(pdfMetadata);
191+
}
192+
{ // Read the document's XMP metadata.
193+
PDMetadata pdfMetadata = doc.getDocumentCatalog().getMetadata();
194+
if (pdfMetadata != null) {
195+
try (InputStream is = pdfMetadata.exportXMPMetadata()) {
196+
ByteArrayOutputStream os = new ByteArrayOutputStream();
197+
is.transferTo(os);
198+
199+
metadata.setXMP(os.toByteArray());
200+
}
189201
}
190202
}
191203

src/test/java/edu/illinois/library/cantaloupe/processor/PdfBoxProcessorTest.java

+7
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,13 @@ void testProcessWithIllegalPageOptionThrowsException() throws Exception {
8888
() -> instance.process(ops, imageInfo, outputStream));
8989
}
9090

91+
@Test
92+
void testReadInfoNativeMetadataAwareness() throws Exception {
93+
instance.setSourceFile(TestUtil.getImage("pdf-xmp.pdf"));
94+
Info info = instance.readInfo();
95+
assertTrue(info.getMetadata().getNativeMetadata().isPresent());
96+
}
97+
9198
@Test
9299
void testReadInfoXMPAwareness() throws Exception {
93100
instance.setSourceFile(TestUtil.getImage("pdf-xmp.pdf"));

0 commit comments

Comments
 (0)