Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ public class BibtexParser implements Parser {
private final MetaDataParser metaDataParser;
private final Map<String, String> parsedBibDeskGroups;

private StringBuilder currentEntryBuffer = new StringBuilder();

private GroupTreeNode bibDeskGroupTreeNode;
private String lastBrokenEntryText = null;

public BibtexParser(@NonNull ImportFormatPreferences importFormatPreferences, FileUpdateMonitor fileMonitor) {
this.importFormatPreferences = importFormatPreferences;
Expand Down Expand Up @@ -322,13 +325,14 @@ private void parseRemainingContent() {
database.setEpilog(dumpTextReadSoFarToString().trim());
}

private void parseAndAddEntry(String type) {
private void parseAndAddEntry(String type) throws RecoverableParseException, IOException {
int startLine = line;
int startColumn = column;
String commentsAndEntryTypeDefinition = "";
try {
// collect all comments and the entry type definition in front of the actual entry
// this is at least `@Type`
String commentsAndEntryTypeDefinition = dumpTextReadSoFarToString();
commentsAndEntryTypeDefinition = dumpTextReadSoFarToString();

// remove first newline
// this is appended by JabRef during writing automatically
Expand All @@ -339,26 +343,85 @@ private void parseAndAddEntry(String type) {
}

BibEntry entry = parseEntry(type);
String commentBeforeEntry = "";
if (lastBrokenEntryText != null && !lastBrokenEntryText.isEmpty()) {
commentBeforeEntry = lastBrokenEntryText;
lastBrokenEntryText = null;
}

commentBeforeEntry = commentBeforeEntry
+ commentsAndEntryTypeDefinition.substring(0, commentsAndEntryTypeDefinition.lastIndexOf('@'));
// store comments collected without type definition
entry.setCommentsBeforeEntry(
commentsAndEntryTypeDefinition.substring(0, commentsAndEntryTypeDefinition.lastIndexOf('@')));
commentBeforeEntry);

// store complete parsed serialization (comments, type definition + type contents)

String parsedSerialization = commentsAndEntryTypeDefinition + dumpTextReadSoFarToString();
entry.setParsedSerialization(parsedSerialization);

database.insertEntry(entry);
} catch (IOException ex) {
currentEntryBuffer = new StringBuilder();
} catch (RecoverableParseException ex) {
// This makes the parser more robust:
// If an exception is thrown when parsing an entry, drop the entry and try to resume parsing.
LOGGER.warn("Could not parse entry", ex);

String errorMessage = Localization.lang("Error occurred when parsing entry") + ": '" + ex.getMessage()
+ "'. " + "\n\n" + Localization.lang("JabRef skipped the entry.");

parserResult.addWarning(new ParserResult.Range(startLine, startColumn, line, column), errorMessage);
int safePos = ex.getRecoveryPosition();
int consumed = currentEntryBuffer.length();

String chunk = getPureTextFromFileSnapshot();
int totalLen = chunk.length();
int valueLen = currentEntryBuffer.length();
int headerLen = totalLen - valueLen;
if (headerLen < 0) {
headerLen = 0;
}

int cut = headerLen + safePos;
if (cut > totalLen) {
cut = totalLen;
}

String brokenBody = chunk.substring(0, cut);
String brokenEntryText = commentsAndEntryTypeDefinition + brokenBody;

lastBrokenEntryText = brokenEntryText;

// roll back to the start
for (int i = 0; i < consumed; i++) {
unread(currentEntryBuffer.charAt(consumed - 1 - i));
}

// go to safePos
for (int i = 0; i < safePos; i++) {
read();
}

int next = peek();
LOGGER.info(">>> RECOVERED NEXT CHAR = [{}] @ line {}", (char) next, line);
currentEntryBuffer = new StringBuilder();
dumpTextReadSoFarToString();
} catch (IOException ex) {
LOGGER.warn("Could not parse entry", ex);
String errorMessage = Localization.lang("Error occurred when parsing entry") + ": '" + ex.getMessage()
+ "'. " + "\n\n" + Localization.lang("JabRef skipped the entry.");
parserResult.addWarning(new ParserResult.Range(startLine, startColumn, line, column), errorMessage);
}
}

private String getPureTextFromFileSnapshot() {
StringBuilder sb = new StringBuilder();
for (Character c : pureTextFromFile) {
sb.append(c);
}
return sb.toString();
}

private void parseJabRefComment(Map<String, String> meta) {
StringBuilder buffer;
int startLine = line;
Expand Down Expand Up @@ -1087,6 +1150,7 @@ private StringBuilder parseBracketedFieldContent() throws IOException {
StringBuilder value = new StringBuilder();

consume('{');
currentEntryBuffer.append('{');

int brackets = 0;
char character;
Expand Down Expand Up @@ -1124,19 +1188,50 @@ private StringBuilder parseBracketedFieldContent() throws IOException {
if (isClosingBracket && (brackets == 0)) {
return value;
} else if (isEOFCharacter(character)) {
throw new IOException("Error in line " + line + ": EOF in mid-string");
String scanned = currentEntryBuffer.toString();
int pos = findRecoveryStart(scanned, scanned.length() - 1);
throw new RecoverableParseException(pos);
} else if ((character == '{') && (!isEscapeSymbol(lastCharacter))) {
brackets++;
} else if (isClosingBracket) {
brackets--;
}

currentEntryBuffer.append(character);
value.append(character);

lastCharacter = character;
}
}

private int findRecoveryStart(String buffer, int failPos) {
char[] chars = buffer.toCharArray();
int unmatched = 0;
int lastEntryStart = -1;

for (int i = failPos; i >= 0; i--) {
char c = chars[i];

if (c == '}') {
unmatched++;
} else if (c == '{') {
if (unmatched > 0) {
unmatched--;
} else {
// Found an unmatched '{', safe to stop trimming
return (lastEntryStart >= 0) ? lastEntryStart : failPos;
}
}

// detect entry start: @
if (c == '@' && unmatched == 0) {
lastEntryStart = i;
}
}

return 0;
}

private boolean isEscapeSymbol(char character) {
return '\\' == character;
}
Expand Down Expand Up @@ -1213,4 +1308,18 @@ private void consume(char firstOption, char secondOption) throws IOException {
+ " but received " + (char) character);
}
}

private static class RecoverableParseException extends RuntimeException {

private final int recoveryPosition;

RecoverableParseException(int recoveryPosition) {
super("Recoverable parse error at position " + recoveryPosition);
this.recoveryPosition = recoveryPosition;
}

int getRecoveryPosition() {
return recoveryPosition;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2247,4 +2247,47 @@ void parseInvalidBibDeskFilesResultsInWarnings() throws IOException {

assertEquals(List.of(firstEntry, secondEntry), result.getDatabase().getEntries());
}

@Test
public void unmatchedBracketsDoesNotStopParsing() throws Exception {
String entries = """
@Article{ok1,
title = {AAA}
}

@Article{bad,
title = {accuracy by 3 to 15{{\\%}
}

@Article{ok2,
title = {BBB}
}
""";

ParserResult result = parser.parse(Reader.of(entries));
BibDatabase database = result.getDatabase();
List<BibEntry> entryList = database.getEntries();

BibEntry firstEntry = new BibEntry(StandardEntryType.Article)
.withCitationKey("ok1")
.withField(StandardField.TITLE, "AAA");

String expectedComment = """
@Article{bad,
title = {accuracy by 3 to 15{{\\%}
}

""";

assertEquals(2, entryList.size());

BibEntry parsedOk1 = entryList.getFirst();
BibEntry parsedOk2 = entryList.get(1);

assertEquals(firstEntry, parsedOk1);

assertEquals("BBB", parsedOk2.getField(StandardField.TITLE).get());

assertEquals(expectedComment, parsedOk2.getUserComments());
}
}