diff --git a/jablib/src/main/java/org/jabref/logic/importer/fileformat/BibtexParser.java b/jablib/src/main/java/org/jabref/logic/importer/fileformat/BibtexParser.java index d9f8c9c0125..82a61e52f98 100644 --- a/jablib/src/main/java/org/jabref/logic/importer/fileformat/BibtexParser.java +++ b/jablib/src/main/java/org/jabref/logic/importer/fileformat/BibtexParser.java @@ -115,7 +115,10 @@ public class BibtexParser implements Parser { private final MetaDataParser metaDataParser; private final Map parsedBibDeskGroups; + private StringBuilder currentEntryBuffer = new StringBuilder(); + private GroupTreeNode bibDeskGroupTreeNode; + private String lastBrokenEntryText = null; public BibtexParser(@NonNull ImportFormatPreferences importFormatPreferences, FileUpdateMonitor fileMonitor) { this.importFormatPreferences = importFormatPreferences; @@ -322,13 +325,14 @@ private void parseRemainingContent() { database.setEpilog(dumpTextReadSoFarToString().trim()); } - private void parseAndAddEntry(String type) { + private void parseAndAddEntry(String type) throws RecoverableParseException, IOException { int startLine = line; int startColumn = column; + String commentsAndEntryTypeDefinition = ""; try { // collect all comments and the entry type definition in front of the actual entry // this is at least `@Type` - String commentsAndEntryTypeDefinition = dumpTextReadSoFarToString(); + commentsAndEntryTypeDefinition = dumpTextReadSoFarToString(); // remove first newline // this is appended by JabRef during writing automatically @@ -339,9 +343,17 @@ private void parseAndAddEntry(String type) { } BibEntry entry = parseEntry(type); + String commentBeforeEntry = ""; + if (lastBrokenEntryText != null && !lastBrokenEntryText.isEmpty()) { + commentBeforeEntry = lastBrokenEntryText; + lastBrokenEntryText = null; + } + + commentBeforeEntry = commentBeforeEntry + + commentsAndEntryTypeDefinition.substring(0, commentsAndEntryTypeDefinition.lastIndexOf('@')); // store comments collected without type definition entry.setCommentsBeforeEntry( - commentsAndEntryTypeDefinition.substring(0, commentsAndEntryTypeDefinition.lastIndexOf('@'))); + commentBeforeEntry); // store complete parsed serialization (comments, type definition + type contents) @@ -349,16 +361,67 @@ private void parseAndAddEntry(String type) { entry.setParsedSerialization(parsedSerialization); database.insertEntry(entry); - } catch (IOException ex) { + currentEntryBuffer = new StringBuilder(); + } catch (RecoverableParseException ex) { // This makes the parser more robust: // If an exception is thrown when parsing an entry, drop the entry and try to resume parsing. LOGGER.warn("Could not parse entry", ex); + + String errorMessage = Localization.lang("Error occurred when parsing entry") + ": '" + ex.getMessage() + + "'. " + "\n\n" + Localization.lang("JabRef skipped the entry."); + + parserResult.addWarning(new ParserResult.Range(startLine, startColumn, line, column), errorMessage); + int safePos = ex.getRecoveryPosition(); + int consumed = currentEntryBuffer.length(); + + String chunk = getPureTextFromFileSnapshot(); + int totalLen = chunk.length(); + int valueLen = currentEntryBuffer.length(); + int headerLen = totalLen - valueLen; + if (headerLen < 0) { + headerLen = 0; + } + + int cut = headerLen + safePos; + if (cut > totalLen) { + cut = totalLen; + } + + String brokenBody = chunk.substring(0, cut); + String brokenEntryText = commentsAndEntryTypeDefinition + brokenBody; + + lastBrokenEntryText = brokenEntryText; + + // roll back to the start + for (int i = 0; i < consumed; i++) { + unread(currentEntryBuffer.charAt(consumed - 1 - i)); + } + + // go to safePos + for (int i = 0; i < safePos; i++) { + read(); + } + + int next = peek(); + LOGGER.info(">>> RECOVERED NEXT CHAR = [{}] @ line {}", (char) next, line); + currentEntryBuffer = new StringBuilder(); + dumpTextReadSoFarToString(); + } catch (IOException ex) { + LOGGER.warn("Could not parse entry", ex); String errorMessage = Localization.lang("Error occurred when parsing entry") + ": '" + ex.getMessage() + "'. " + "\n\n" + Localization.lang("JabRef skipped the entry."); parserResult.addWarning(new ParserResult.Range(startLine, startColumn, line, column), errorMessage); } } + private String getPureTextFromFileSnapshot() { + StringBuilder sb = new StringBuilder(); + for (Character c : pureTextFromFile) { + sb.append(c); + } + return sb.toString(); + } + private void parseJabRefComment(Map meta) { StringBuilder buffer; int startLine = line; @@ -1087,6 +1150,7 @@ private StringBuilder parseBracketedFieldContent() throws IOException { StringBuilder value = new StringBuilder(); consume('{'); + currentEntryBuffer.append('{'); int brackets = 0; char character; @@ -1124,19 +1188,50 @@ private StringBuilder parseBracketedFieldContent() throws IOException { if (isClosingBracket && (brackets == 0)) { return value; } else if (isEOFCharacter(character)) { - throw new IOException("Error in line " + line + ": EOF in mid-string"); + String scanned = currentEntryBuffer.toString(); + int pos = findRecoveryStart(scanned, scanned.length() - 1); + throw new RecoverableParseException(pos); } else if ((character == '{') && (!isEscapeSymbol(lastCharacter))) { brackets++; } else if (isClosingBracket) { brackets--; } + currentEntryBuffer.append(character); value.append(character); lastCharacter = character; } } + private int findRecoveryStart(String buffer, int failPos) { + char[] chars = buffer.toCharArray(); + int unmatched = 0; + int lastEntryStart = -1; + + for (int i = failPos; i >= 0; i--) { + char c = chars[i]; + + if (c == '}') { + unmatched++; + } else if (c == '{') { + if (unmatched > 0) { + unmatched--; + } else { + // Found an unmatched '{', safe to stop trimming + return (lastEntryStart >= 0) ? lastEntryStart : failPos; + } + } + + // detect entry start: @ + if (c == '@' && unmatched == 0) { + lastEntryStart = i; + } + } + + return 0; + } + private boolean isEscapeSymbol(char character) { return '\\' == character; } @@ -1213,4 +1308,18 @@ private void consume(char firstOption, char secondOption) throws IOException { + " but received " + (char) character); } } + + private static class RecoverableParseException extends RuntimeException { + + private final int recoveryPosition; + + RecoverableParseException(int recoveryPosition) { + super("Recoverable parse error at position " + recoveryPosition); + this.recoveryPosition = recoveryPosition; + } + + int getRecoveryPosition() { + return recoveryPosition; + } + } } diff --git a/jablib/src/test/java/org/jabref/logic/importer/fileformat/BibtexParserTest.java b/jablib/src/test/java/org/jabref/logic/importer/fileformat/BibtexParserTest.java index 9bde0c834a9..70fe33e8b5b 100644 --- a/jablib/src/test/java/org/jabref/logic/importer/fileformat/BibtexParserTest.java +++ b/jablib/src/test/java/org/jabref/logic/importer/fileformat/BibtexParserTest.java @@ -2247,4 +2247,47 @@ void parseInvalidBibDeskFilesResultsInWarnings() throws IOException { assertEquals(List.of(firstEntry, secondEntry), result.getDatabase().getEntries()); } + + @Test + public void unmatchedBracketsDoesNotStopParsing() throws Exception { + String entries = """ + @Article{ok1, + title = {AAA} + } + + @Article{bad, + title = {accuracy by 3 to 15{{\\%} + } + + @Article{ok2, + title = {BBB} + } + """; + + ParserResult result = parser.parse(Reader.of(entries)); + BibDatabase database = result.getDatabase(); + List entryList = database.getEntries(); + + BibEntry firstEntry = new BibEntry(StandardEntryType.Article) + .withCitationKey("ok1") + .withField(StandardField.TITLE, "AAA"); + + String expectedComment = """ + @Article{bad, + title = {accuracy by 3 to 15{{\\%} + } + + """; + + assertEquals(2, entryList.size()); + + BibEntry parsedOk1 = entryList.getFirst(); + BibEntry parsedOk2 = entryList.get(1); + + assertEquals(firstEntry, parsedOk1); + + assertEquals("BBB", parsedOk2.getField(StandardField.TITLE).get()); + + assertEquals(expectedComment, parsedOk2.getUserComments()); + } }