Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion sip-app/src/main/java/eu/delving/sip/files/ReportWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

package eu.delving.sip.files;

import eu.delving.groovy.DiscardRecordException;
import eu.delving.groovy.MappingException;
import eu.delving.groovy.MetadataRecord;
import eu.delving.groovy.XmlNodePrinter;
Expand Down Expand Up @@ -50,6 +51,8 @@
*/

public class ReportWriter {

private final Object lock = new Object();
private File reportFile;
private File reportIndexFile;
private File reportConclusionFile;
Expand All @@ -74,12 +77,39 @@ public ReportWriter(File reportFile, File reportIndexFile, File reportConclusion
this.out = new OutputStreamWriter(count, "UTF-8");
}

public void invalid(MappingResult mappingResult, Exception e) throws IOException {
public void invalid(MappingResult mappingResult, Throwable e) throws IOException {
report(ReportType.INVALID, e.getMessage());
out.write(mappingResult.toXml());
terminate();
}

public boolean recordError(MetadataRecord metadataRecord, MappingResult result, Throwable e) {
if (e == null) {
throw new NullPointerException();
}

try {
if (e instanceof DiscardRecordException) {
synchronized (lock) {
discarded(metadataRecord, e.getMessage());
}
return false;
} else if (e instanceof MappingException) {
synchronized (lock) {
unexpected(metadataRecord, (MappingException) e);
}
} else {
synchronized (lock) {
invalid(result, e);
}
}
} catch (IOException ioe) {
ioe.addSuppressed(e);
throw new RuntimeException(ioe);
}
return true;
}

public void discarded(MetadataRecord inputRecord, String discardMessage) throws IOException {
report(ReportType.DISCARDED, discardMessage);
out.write("Reason: ");
Expand Down
4 changes: 2 additions & 2 deletions sip-app/src/main/java/eu/delving/sip/model/SipModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -441,11 +441,11 @@ private RecordScanner(ScanPredicate scanPredicate, Swing finished) {
public void run() {
try {
if (parser == null) {
parser = new MetadataParser(dataSetModel.getDataSet().openSourceInputStream(), statsModel.getRecordCount());
parser = new MetadataParser(dataSetModel.getDataSet().openSourceInputStream());
}
parser.setNotExhausted();
parser.setProgressListener(progressListener);
for (MetadataRecord metadataRecord = parser.nextRecord(); metadataRecord != null && !metadataRecord.isPoison(); metadataRecord = parser.nextRecord()) {
for (MetadataRecord metadataRecord = parser.nextRecord(); metadataRecord != null; metadataRecord = parser.nextRecord()) {
if (scanPredicate == null || scanPredicate.accept(metadataRecord)) {
for (ParseListener parseListener : parseListeners) {
parseListener.updatedRecord(metadataRecord);
Expand Down
139 changes: 72 additions & 67 deletions sip-app/src/main/java/eu/delving/sip/xml/AnalysisParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,24 @@
import eu.delving.sip.files.DataSet;
import eu.delving.sip.model.DataSetModel;
import eu.delving.stats.Stats;
import org.apache.commons.io.IOUtils;
import org.codehaus.stax2.XMLStreamReader2;

import javax.xml.namespace.QName;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;
import java.io.InputStream;

/**
* Analyze xml input and compile statistics. When analysis fails, the .error will be appended to the filename
* of the erroneous file.
*
*/
public class AnalysisParser implements Work.LongTermWork, Work.DataSetWork {
public static final int ELEMENT_STEP = 10000;
private Stats stats = new Stats();
private Listener listener;
private DataSetModel dataSetModel;

private final Listener listener;
private final DataSetModel dataSetModel;
private final int maxUniqueValueLength;
private ProgressListener progressListener;

public interface Listener {
Expand All @@ -60,7 +60,7 @@ public interface Listener {
public AnalysisParser(DataSetModel dataSetModel, int maxUniqueValueLength, Listener listener) {
this.dataSetModel = dataSetModel;
this.listener = listener;
stats.maxUniqueValueLength = maxUniqueValueLength;
this.maxUniqueValueLength = maxUniqueValueLength;
}

@Override
Expand All @@ -82,71 +82,26 @@ public void setProgressListener(ProgressListener progressListener) {
@Override
public void run() {
try {
XMLInputFactory xmlif = XMLToolFactory.xmlInputFactory();
Path path = Path.create();
InputStream inputStream = null;
if (dataSetModel.isEmpty()) return;
try {
switch (dataSetModel.getDataSetState()) {
case SOURCED:
inputStream = dataSetModel.getDataSet().openSourceInputStream();
stats.freshStats();
break;
default:
throw new IllegalStateException("Unexpected state: " + dataSetModel.getDataSetState());
}
stats.name = dataSetModel.getDataSet().getDataSetFacts().get("name");
XMLStreamReader2 input = (XMLStreamReader2) xmlif.createXMLStreamReader(getClass().getName(), inputStream);
StringBuilder text = new StringBuilder();
int count = 0;
while (true) {
switch (input.getEventType()) {
case XMLEvent.START_ELEMENT:
if (++count % ELEMENT_STEP == 0) {
if (listener != null) progressListener.setProgress(count);
}
for (int walk = 0; walk < input.getNamespaceCount(); walk++) {
stats.recordNamespace(input.getNamespacePrefix(walk), input.getNamespaceURI(walk));
}
String chunk = text.toString().trim();
if (!chunk.isEmpty()) {
stats.recordValue(path, chunk);
}
text.setLength(0);
path = path.child(Tag.element(input.getName()));
if (input.getAttributeCount() > 0) {
for (int walk = 0; walk < input.getAttributeCount(); walk++) {
QName attributeName = input.getAttributeName(walk);
Path withAttr = path.child(Tag.attribute(attributeName));
stats.recordValue(withAttr, input.getAttributeValue(walk));
}
}
break;
case XMLEvent.CHARACTERS:
case XMLEvent.CDATA:
text.append(input.getText());
break;
case XMLEvent.END_ELEMENT:
// todo: stats.recordRecordEnd()
stats.recordValue(path, text.toString().trim());
text.setLength(0);
path = path.parent();
break;

switch (dataSetModel.getDataSetState()) {
case SOURCED:
Stats stats = new Stats();
stats.maxUniqueValueLength = maxUniqueValueLength;

try (InputStream in = dataSetModel.getDataSet().openSourceInputStream()) {
updateStats(stats, in, progressListener);
stats.name = dataSetModel.getDataSet().getDataSetFacts().get("name");
stats.finish();
listener.success(stats);
}
if (!input.hasNext()) break;
input.next();
}
}
finally {
IOUtils.closeQuietly(inputStream);
break;
default:
throw new IllegalStateException("Unexpected state: " + dataSetModel.getDataSetState());
}
stats.finish();
listener.success(stats);
}
catch (CancelException e) {
} catch (CancelException e) {
listener.failure("Cancellation", e);
}
catch (Exception e) {
} catch (Exception e) {
switch (dataSetModel.getDataSetState()) {
case SOURCED:
dataSetModel.getDataSet().deleteSource();
Expand All @@ -157,4 +112,54 @@ public void run() {
listener.failure("The imported file contains errors, the file has been deleted", e);
}
}

public static Stats updateStats(Stats stats, InputStream inputStream) throws XMLStreamException, CancelException {
return updateStats(stats, inputStream, null);
}

private static Stats updateStats(Stats stats, InputStream inputStream, ProgressListener listener) throws XMLStreamException, CancelException {
XMLInputFactory xmlif = XMLToolFactory.xmlInputFactory();
Path path = Path.create();
XMLStreamReader2 input = (XMLStreamReader2) xmlif.createXMLStreamReader(AnalysisParser.class.getName(), inputStream);
StringBuilder text = new StringBuilder();
int count = 0;
while (true) {
switch (input.getEventType()) {
case XMLEvent.START_ELEMENT:
if (++count % ELEMENT_STEP == 0) {
if (listener != null) listener.setProgress(count);
}
for (int walk = 0; walk < input.getNamespaceCount(); walk++) {
stats.recordNamespace(input.getNamespacePrefix(walk), input.getNamespaceURI(walk));
}
String chunk = text.toString().trim();
if (!chunk.isEmpty()) {
stats.recordValue(path, chunk);
}
text.setLength(0);
path = path.child(Tag.element(input.getName()));
if (input.getAttributeCount() > 0) {
for (int walk = 0; walk < input.getAttributeCount(); walk++) {
QName attributeName = input.getAttributeName(walk);
Path withAttr = path.child(Tag.attribute(attributeName));
stats.recordValue(withAttr, input.getAttributeValue(walk));
}
}
break;
case XMLEvent.CHARACTERS:
case XMLEvent.CDATA:
text.append(input.getText());
break;
case XMLEvent.END_ELEMENT:
// todo: stats.recordRecordEnd()
stats.recordValue(path, text.toString().trim());
text.setLength(0);
path = path.parent();
break;
}
if (!input.hasNext()) break;
input.next();
}
return stats;
}
}
Loading