Skip to content
This repository was archived by the owner on Feb 27, 2023. It is now read-only.

Commit f29d4d2

Browse files
author
Scott Stafford
committed
#279 add TikaParserItemProcessor
1 parent 77c2a62 commit f29d4d2

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package com.marklogic.spring.batch.item.file;
2+
3+
import com.marklogic.client.io.InputSourceHandle;
4+
import com.marklogic.client.io.marker.AbstractWriteHandle;
5+
import com.marklogic.spring.batch.item.file.support.TikaParser;
6+
import com.marklogic.spring.batch.item.processor.AbstractMarkLogicItemProcessor;
7+
import org.springframework.core.io.Resource;
8+
import org.xml.sax.InputSource;
9+
10+
import java.io.StringReader;
11+
12+
public class TikaParserItemProcessor extends AbstractMarkLogicItemProcessor<Resource> {
13+
14+
@Override
15+
public AbstractWriteHandle getContentHandle(Resource item) throws Exception {
16+
String parsedContent = TikaParser.parseToXML(item.getInputStream());
17+
InputSource inputSource = new InputSource(new StringReader(parsedContent.toString()));
18+
InputSourceHandle handle = new InputSourceHandle(inputSource);
19+
return handle;
20+
}
21+
22+
23+
}

0 commit comments

Comments
 (0)