Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ public class BibtexParser implements Parser {
private final MetaDataParser metaDataParser;
private final Map<String, String> parsedBibDeskGroups;

private final StringBuilder currentEntryBuffer = new StringBuilder();

private GroupTreeNode bibDeskGroupTreeNode;

public BibtexParser(@NonNull ImportFormatPreferences importFormatPreferences, FileUpdateMonitor fileMonitor) {
Expand Down Expand Up @@ -322,7 +324,7 @@ private void parseRemainingContent() {
database.setEpilog(dumpTextReadSoFarToString().trim());
}

private void parseAndAddEntry(String type) {
private void parseAndAddEntry(String type) throws IOException {
int startLine = line;
int startColumn = column;
try {
Expand All @@ -349,13 +351,34 @@ private void parseAndAddEntry(String type) {
entry.setParsedSerialization(parsedSerialization);

database.insertEntry(entry);
currentEntryBuffer.setLength(0);
} catch (IOException ex) {
// This makes the parser more robust:
// If an exception is thrown when parsing an entry, drop the entry and try to resume parsing.
String msg = ex.getMessage();
LOGGER.warn("Could not parse entry", ex);
String errorMessage = Localization.lang("Error occurred when parsing entry") + ": '" + ex.getMessage()
+ "'. " + "\n\n" + Localization.lang("JabRef skipped the entry.");
parserResult.addWarning(new ParserResult.Range(startLine, startColumn, line, column), errorMessage);
if (msg != null && msg.startsWith("RECOVER:")) {
int safePos = Integer.parseInt(msg.substring("RECOVER:".length()));
int consumed = currentEntryBuffer.length();

// roll back to the start
for (int i = 0; i < consumed; i++) {
unread(currentEntryBuffer.charAt(consumed - 1 - i));
}

// go to safePos
for (int i = 0; i < safePos; i++) {
read();
}

int next = peek();
LOGGER.info(">>> RECOVERED NEXT CHAR = [{}] @ line {}", (char) next, line);
currentEntryBuffer.setLength(0);
dumpTextReadSoFarToString();
}
}
}

Expand Down Expand Up @@ -1087,6 +1110,7 @@ private StringBuilder parseBracketedFieldContent() throws IOException {
StringBuilder value = new StringBuilder();

consume('{');
currentEntryBuffer.append('{');

int brackets = 0;
char character;
Expand Down Expand Up @@ -1124,19 +1148,50 @@ private StringBuilder parseBracketedFieldContent() throws IOException {
if (isClosingBracket && (brackets == 0)) {
return value;
} else if (isEOFCharacter(character)) {
throw new IOException("Error in line " + line + ": EOF in mid-string");
String scanned = currentEntryBuffer.toString();
int pos = findRecoveryStart(scanned, scanned.length() - 1);
throw new IOException("RECOVER:" + pos);
} else if ((character == '{') && (!isEscapeSymbol(lastCharacter))) {
brackets++;
} else if (isClosingBracket) {
brackets--;
}

currentEntryBuffer.append(character);
value.append(character);

lastCharacter = character;
}
}

private int findRecoveryStart(String buffer, int failPos) {
char[] chars = buffer.toCharArray();
int unmatched = 0;
int lastEntryStart = -1;

for (int i = failPos; i >= 0; i--) {
char c = chars[i];

if (c == '}') {
unmatched++;
} else if (c == '{') {
if (unmatched > 0) {
unmatched--;
} else {
// Found an unmatched '{', safe to stop trimming
return (lastEntryStart >= 0) ? lastEntryStart : failPos;
}
}

// detect entry start: @
if (c == '@' && unmatched == 0) {
lastEntryStart = i;
}
}

return 0;
}

private boolean isEscapeSymbol(char character) {
return '\\' == character;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2247,4 +2247,38 @@ void parseInvalidBibDeskFilesResultsInWarnings() throws IOException {

assertEquals(List.of(firstEntry, secondEntry), result.getDatabase().getEntries());
}

@Test
public void unmatchedBracketsDoesNotStopParsing() throws Exception {
String entries = """
@Article{ok1,
title = {AAA},
}

@Article{bad,
title = {accuracy by 3 to 15{{\\%}.
}
Comment on lines 2258 to 2260
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is lost. - can it recovered partially?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is hard to do that. I cannot think of a way to detect where to throw and where to keep. So, I choose to throw this whole entry with unbalanced bracket and keeps the others.
image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My rescue is always to put the thing in the "comment" field of a new entry. - I am not sure at which point escapings should be done.

The other alternative is to copy the whole string as user comment to the next entry - usercomment is the part above @{entrytype}-


@Article{ok2,
title = {BBB},
}
""";

ParserResult result = parser.parse(Reader.of(entries));
BibDatabase database = result.getDatabase();
List<BibEntry> entryList = database.getEntries();

BibEntry firstEntry = new BibEntry(StandardEntryType.Article)
.withCitationKey("ok1")
.withField(StandardField.TITLE, "AAA");

BibEntry secondEntry = new BibEntry(StandardEntryType.Article)
.withCitationKey("ok2")
.withField(StandardField.TITLE, "BBB");

assertEquals(
List.of(firstEntry, secondEntry),
result.getDatabase().getEntries()
);
}
}