Skip to content

Commit a8ba0c3

Browse files
committed
scanner: Add context field to patch_hunk_line for better context diff handling
Add enum patch_line_context and context field to struct patch_hunk_line to explicitly represent which file version a line belongs to: - PATCH_CONTEXT_BOTH: Normal lines (applies to both old and new versions) - PATCH_CONTEXT_OLD: Lines representing the old file state - PATCH_CONTEXT_NEW: Lines representing the new file state For context diffs, this eliminates ambiguity about changed lines ('!'): - Old section lines are emitted with PATCH_CONTEXT_OLD - New section lines are emitted with PATCH_CONTEXT_NEW - Each line is emitted exactly once with appropriate context This simplifies consumer logic by providing explicit context information instead of requiring manual handling of context diff dual-nature semantics. Updated scanner_debug utility and documentation to reflect the new API. Fixed test expectations to match the corrected emission behavior. Assisted-by: Cursor
1 parent 58d6f80 commit a8ba0c3

5 files changed

Lines changed: 67 additions & 11 deletions

File tree

README_scanner_debug.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,13 @@ Complete patch headers (file names, types, Git metadata)
5656
Hunk range information (`@@ -1,3 +1,3 @@` or `*** 1,3 ****`)
5757

5858
### HUNK_LINE
59-
Individual patch lines with type:
60-
- **Context (' ')**: Unchanged lines
61-
- **Added ('+')**: Added lines
62-
- **Removed ('-')**: Removed lines
63-
- **Changed ('!')**: Changed lines (context diffs)
59+
Individual patch lines with type and context:
60+
- **Context (' ')**: Unchanged lines (context: both)
61+
- **Added ('+')**: Added lines (context: both)
62+
- **Removed ('-')**: Removed lines (context: both)
63+
- **Changed ('!')**: Changed lines (context diffs only)
64+
- Emitted twice: first as context "old", then as context "new"
65+
- Same line content, different context indicating old vs new version
6466

6567
### BINARY
6668
Binary patch markers (`Binary files differ`, `GIT binary patch`)
@@ -106,10 +108,10 @@ Scanner Debug Output for: example.patch
106108
Range: -1,3 +1,3
107109
108110
[HUNK_LINE] HUNK_LINE (line 4, pos 38)
109-
Type: Context (' ') Content: "line1\n"
111+
Type: Context (' ') Context: both Content: "line1\n"
110112
111113
[HUNK_LINE] HUNK_LINE (line 5, pos 45)
112-
Type: Removed ('-') Content: "old line\n"
114+
Type: Removed ('-') Context: both Content: "old line\n"
113115
114116
================================================================
115117
Summary: Processed 6 events, scanner finished normally

src/patch_scanner.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,11 @@ static int scanner_context_buffer_emit_next(patch_scanner_t *scanner, const patc
219219
if (scanner->context_buffer_emit_index < scanner->context_buffer_count) {
220220
/* Emit the next buffered line */
221221
scanner_init_content(scanner, PATCH_CONTENT_HUNK_LINE);
222-
scanner->current_content.data.line = &scanner->context_buffer[scanner->context_buffer_emit_index];
222+
223+
/* Get the buffered line - context was set correctly when buffered */
224+
struct patch_hunk_line *buffered_line = &scanner->context_buffer[scanner->context_buffer_emit_index];
225+
226+
scanner->current_content.data.line = buffered_line;
223227
*content = &scanner->current_content;
224228
scanner->context_buffer_emit_index++;
225229
return PATCH_SCAN_OK;
@@ -528,13 +532,16 @@ int patch_scanner_next(patch_scanner_t *scanner, const patch_content_t **content
528532

529533
/* For context diffs, check if we should buffer this line */
530534
if (scanner->context_buffering) {
531-
/* Buffer this line instead of emitting it */
535+
/* Buffer this line for later emission */
532536
result = scanner_context_buffer_add(scanner, &scanner->current_line);
533537
if (result != PATCH_SCAN_OK) {
534538
scanner->state = STATE_ERROR;
535539
return result;
536540
}
537-
/* Continue to next line without emitting */
541+
542+
/* All lines in old section are buffered for later emission - no immediate emission */
543+
544+
/* For other lines, continue to next line without emitting */
538545
continue;
539546
}
540547

@@ -1396,6 +1403,22 @@ static int scanner_emit_hunk_line(patch_scanner_t *scanner, const char *line)
13961403
scanner->current_line.type = (enum patch_hunk_line_type)line_type;
13971404
scanner->current_line.position = scanner->current_position;
13981405

1406+
/* Set context based on line type and diff format */
1407+
if (line_type == '!' && scanner->current_headers.type == PATCH_TYPE_CONTEXT) {
1408+
/* For context diff changed lines, context depends on when we emit:
1409+
* - During buffering (old section): PATCH_CONTEXT_OLD
1410+
* - During emission from buffer (new section): PATCH_CONTEXT_NEW
1411+
*/
1412+
if (scanner->context_buffering) {
1413+
scanner->current_line.context = PATCH_CONTEXT_OLD;
1414+
} else {
1415+
scanner->current_line.context = PATCH_CONTEXT_NEW;
1416+
}
1417+
} else {
1418+
/* Normal lines apply to both old and new file versions */
1419+
scanner->current_line.context = PATCH_CONTEXT_BOTH;
1420+
}
1421+
13991422
/* Populate full line including prefix, excluding trailing newline */
14001423
scanner->current_line.line = line;
14011424
size_t line_len = strlen(line);

src/patch_scanner.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,13 @@ enum patch_hunk_line_type {
8686
PATCH_LINE_NO_NEWLINE = '\\' /* No newline marker */
8787
};
8888

89+
/* Context for patch lines (especially important for context diff changed lines) */
90+
enum patch_line_context {
91+
PATCH_CONTEXT_BOTH = 0, /* Normal lines (space, +, -, \) - applies to both old and new */
92+
PATCH_CONTEXT_OLD, /* This represents the "old" version of a changed line (!) */
93+
PATCH_CONTEXT_NEW /* This represents the "new" version of a changed line (!) */
94+
};
95+
8996
/**
9097
* Complete patch headers information.
9198
*
@@ -177,6 +184,19 @@ struct patch_hunk {
177184
* - PATCH_LINE_CHANGED ('!'): Line changed between files (context diffs only)
178185
* - PATCH_LINE_NO_NEWLINE ('\\'): Not a real line, indicates previous line has no newline
179186
*
187+
* CONTEXT HANDLING:
188+
* - context indicates which version of the file this line represents
189+
* - PATCH_CONTEXT_BOTH: Normal lines (applies to both old and new file versions)
190+
* - PATCH_CONTEXT_OLD: For PATCH_LINE_CHANGED, this is the "old" version of the line
191+
* - PATCH_CONTEXT_NEW: For PATCH_LINE_CHANGED, this is the "new" version of the line
192+
*
193+
* CONTEXT DIFF DUAL EMISSION:
194+
* - For context diffs, changed lines (!) are emitted twice with identical content:
195+
* 1. First emission: during old section parsing (context = PATCH_CONTEXT_OLD)
196+
* 2. Second emission: during new section parsing (context = PATCH_CONTEXT_NEW)
197+
* - This allows consumers to easily filter for "before" vs "after" views
198+
* - Unified diffs don't have this behavior (changed lines appear as separate - and + lines)
199+
*
180200
* CONTENT HANDLING:
181201
* - line points to the FULL original line INCLUDING the +/- prefix character
182202
* - length is the byte length of the full line (includes prefix, excludes newline)
@@ -186,6 +206,7 @@ struct patch_hunk {
186206
*/
187207
struct patch_hunk_line {
188208
enum patch_hunk_line_type type; /* Line operation type (space, +, -, !, \) */
209+
enum patch_line_context context; /* Which file version this line represents */
189210
const char *line; /* Full original line INCLUDING prefix (NOT null-terminated) */
190211
size_t length; /* Length of full line in bytes (includes prefix, excludes newline) */
191212
long position; /* Byte offset in input where this line appears */

src/scanner_debug.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,16 @@ static void print_hunk_line_info(const struct patch_hunk_line *line)
423423
printf(" %sType:%s %s", C(COLOR_BOLD), C(COLOR_RESET),
424424
hunk_line_type_name(line->type));
425425

426+
/* Show context information */
427+
const char *context_name;
428+
switch (line->context) {
429+
case PATCH_CONTEXT_BOTH: context_name = "both"; break;
430+
case PATCH_CONTEXT_OLD: context_name = "old"; break;
431+
case PATCH_CONTEXT_NEW: context_name = "new"; break;
432+
default: context_name = "unknown"; break;
433+
}
434+
printf(" %sContext:%s %s", C(COLOR_BOLD), C(COLOR_RESET), context_name);
435+
426436
if (show_content && line->line && line->length > 0) {
427437
printf(" %sContent:%s ", C(COLOR_BOLD), C(COLOR_RESET));
428438
/* Extract content without prefix for display */

tests/scanner/test_basic.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1484,7 +1484,7 @@ static void test_context_diff_multi_hunk_parsing(void)
14841484
/* Basic structure validation */
14851485
assert(header_count == 1); /* file1 */
14861486
assert(hunk_header_count == 1); /* one hunk */
1487-
assert(change_line_count == 2); /* ! a and ! b */
1487+
assert(change_line_count == 2); /* ! a (old context), ! b (new context) */
14881488

14891489
/* The key assertions: change lines were found as HUNK_LINE (not NON-PATCH) */
14901490
assert(found_change_a == 1); /* ! a was parsed as HUNK_LINE */

0 commit comments

Comments
 (0)