Skip to content

Commit 6351030

Browse files
mihowclaude
andcommitted
fix: 2-digit year timestamp parsing, skip null timestamps, sync error reporting, captures columns
- Add YYMMDDHHMMSS (12-digit) regex pattern for cameras like Farmscape/NSCF that produce 2-digit year filenames - Skip images with unparseable timestamps during sync instead of importing them with NULL — they can't be grouped into events anyway - Wrap per-image sync processing in try/except, track failed count, and report it as a "Failed" stage param in job progress UI - Warn on suspicious pre-2000 timestamps - Add filename and path columns to captures table (hidden by default) - Expose path field in SourceImage list serializer and sortable fields Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 0487cf9 commit 6351030

10 files changed

Lines changed: 79 additions & 7 deletions

File tree

ami/jobs/models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,8 @@ def run(cls, job: "Job"):
606606
"""
607607

608608
job.progress.add_stage(cls.name)
609-
job.progress.add_stage_param(cls.key, "Total files", "")
609+
job.progress.add_stage_param(cls.key, "Total files", 0)
610+
job.progress.add_stage_param(cls.key, "Failed", 0)
610611
job.update_status(JobState.STARTED)
611612
job.started_at = datetime.datetime.now()
612613
job.finished_at = None

ami/main/api/serializers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,6 +1106,7 @@ class Meta:
11061106
"deployment",
11071107
"event",
11081108
"url",
1109+
"path",
11091110
# "thumbnail",
11101111
"timestamp",
11111112
"width",

ami/main/api/views.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,7 @@ class SourceImageViewSet(DefaultViewSet, ProjectMixin):
501501
"taxa_count",
502502
"deployment__name",
503503
"event__start",
504+
"path",
504505
]
505506
permission_classes = [ObjectPermission]
506507

ami/main/models.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,7 @@ def sync_captures(self, batch_size=1000, regroup_events_per_batch=False, job: "J
797797
s3_config = deployment.data_source.config
798798
total_size = 0
799799
total_files = 0
800+
failed = 0
800801
source_images = []
801802
django_batch_size = batch_size
802803
sql_batch_size = 1000
@@ -814,8 +815,34 @@ def sync_captures(self, batch_size=1000, regroup_events_per_batch=False, job: "J
814815
logger.debug(f"Processing file {file_index}: {obj}")
815816
if not obj:
816817
continue
817-
source_image = _create_source_image_for_sync(deployment, obj)
818+
try:
819+
source_image = _create_source_image_for_sync(deployment, obj)
820+
except Exception:
821+
failed += 1
822+
msg = f"Failed to process {obj.get('Key', '?')}"
823+
if job:
824+
job.logger.exception(msg)
825+
else:
826+
logger.exception(msg)
827+
continue
828+
818829
if source_image:
830+
# Skip images with unparseable timestamps — they can't be grouped into events
831+
if source_image.timestamp is None:
832+
failed += 1
833+
msg = f"No timestamp parsed from filename: {obj['Key']}"
834+
if job:
835+
job.logger.error(msg)
836+
else:
837+
logger.error(msg)
838+
continue
839+
elif source_image.timestamp.year < 2000:
840+
msg = f"Suspicious timestamp ({source_image.timestamp.year}) for: {obj['Key']}"
841+
if job:
842+
job.logger.warning(msg)
843+
else:
844+
logger.warning(msg)
845+
819846
total_files += 1
820847
total_size += obj.get("Size", 0)
821848
source_images.append(source_image)
@@ -827,7 +854,7 @@ def sync_captures(self, batch_size=1000, regroup_events_per_batch=False, job: "J
827854
source_images = []
828855
if job:
829856
job.logger.info(f"Processed {total_files} files")
830-
job.progress.update_stage(job.job_type().key, total_files=total_files)
857+
job.progress.update_stage(job.job_type().key, total_files=total_files, failed=failed)
831858
job.update_progress()
832859

833860
if source_images:
@@ -837,7 +864,7 @@ def sync_captures(self, batch_size=1000, regroup_events_per_batch=False, job: "J
837864
)
838865
if job:
839866
job.logger.info(f"Processed {total_files} files")
840-
job.progress.update_stage(job.job_type().key, total_files=total_files)
867+
job.progress.update_stage(job.job_type().key, total_files=total_files, failed=failed)
841868
job.update_progress()
842869

843870
_compare_totals_for_sync(deployment, total_files)

ami/utils/dates.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ def get_image_timestamp_from_filename(img_path, raise_error=False) -> datetime.d
3535
>>> # Snapshot date format from Wingscape camera from Newfoundland
3636
>>> get_image_timestamp_from_filename("Project_20230801023001_4393.JPG").strftime(out_fmt)
3737
'2023-08-01 02:30:01'
38+
>>> # 2-digit year format (e.g., Farmscape/NSCF cameras)
39+
>>> get_image_timestamp_from_filename("NSCF----_250927194802_0017.JPG").strftime(out_fmt)
40+
'2025-09-27 19:48:02'
3841
3942
"""
4043
name = pathlib.Path(img_path).stem
@@ -47,19 +50,30 @@ def get_image_timestamp_from_filename(img_path, raise_error=False) -> datetime.d
4750
two_groups_pattern = r"\d{8}[^\d]+\d{6}" # YYYYMMDD*HHMMSS
4851
# Allow single non-digit delimiters within components, and one or more between DD and HH
4952
delimited_pattern = r"\d{4}[^\d]\d{2}[^\d]\d{2}[^\d]+\d{2}[^\d]\d{2}[^\d]\d{2}" # YYYY*MM*DD*+HH*MM*SS
53+
# 2-digit year: YYMMDDHHMMSS (12 consecutive digits, bounded by non-digits or string edges)
54+
short_year_pattern = r"(?<!\d)\d{12}(?!\d)" # YYMMDDHHMMSS
5055

5156
# Combine patterns with OR '|' but keep them in their own groups
52-
pattern = re.compile(f"({consecutive_pattern})|({two_groups_pattern})|({delimited_pattern})")
57+
# Order matters: longer/more specific patterns first
58+
pattern = re.compile(
59+
f"({consecutive_pattern})|({two_groups_pattern})|({delimited_pattern})|({short_year_pattern})"
60+
)
5361

5462
match = pattern.search(name)
5563
if match:
5664
# Get the full string matched by any of the patterns
5765
matched_string = match.group(0)
58-
# Remove all non-digit characters to create YYYYMMDDHHMMSS
66+
# Remove all non-digit characters to create YYYYMMDDHHMMSS or YYMMDDHHMMSS
5967
consecutive_date_string = re.sub(r"[^\d]", "", matched_string)
6068

69+
# Determine format based on length (12 digits = 2-digit year, 14 = 4-digit year)
70+
if len(consecutive_date_string) == 12:
71+
fmt = "%y%m%d%H%M%S"
72+
else:
73+
fmt = strptime_format
74+
6175
try:
62-
date = datetime.datetime.strptime(consecutive_date_string, strptime_format)
76+
date = datetime.datetime.strptime(consecutive_date_string, fmt)
6377
except ValueError:
6478
pass
6579

ami/utils/tests.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ def test_extract_timestamps(self):
2121
("mothbox/2024_01_01 12_00_00.jpg", "2024-01-01 12:00:00"),
2222
("other_common/2024-01-01 12:00:00.jpg", "2024-01-01 12:00:00"),
2323
("other_common/2024-01-01T12:00:00.jpg", "2024-01-01 12:00:00"),
24+
# 2-digit year: YYMMDDHHMMSS (Farmscape/NSCF cameras)
25+
("farmscape/NSCF----_250927194802_0017.JPG", "2025-09-27 19:48:02"),
26+
("farmscape/NSCF----_251004210001_0041.JPG", "2025-10-04 21:00:01"),
2427
]
2528

2629
for filename, expected_date in filenames_and_expected_dates:

ui/src/data-services/models/capture.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,15 @@ export class Capture {
167167
return new Date(this._capture.timestamp)
168168
}
169169

170+
get path(): string {
171+
return this._capture.path ?? ''
172+
}
173+
174+
get filename(): string {
175+
const path = this.path
176+
return path ? path.split('/').pop() ?? path : ''
177+
}
178+
170179
get width(): number | null {
171180
return this._capture.width
172181
}

ui/src/pages/captures/capture-columns.tsx

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,18 @@ export const columns: (projectId: string) => TableColumn<Capture>[] = (
134134
<BasicTableCell value={item.dimensionsLabel} />
135135
),
136136
},
137+
{
138+
id: 'filename',
139+
name: translate(STRING.FIELD_LABEL_FILENAME),
140+
sortField: 'path',
141+
renderCell: (item: Capture) => <BasicTableCell value={item.filename} />,
142+
},
143+
{
144+
id: 'path',
145+
name: translate(STRING.FIELD_LABEL_PATH),
146+
sortField: 'path',
147+
renderCell: (item: Capture) => <BasicTableCell value={item.path} />,
148+
},
137149
{
138150
id: 'occurrences',
139151
name: translate(STRING.FIELD_LABEL_OCCURRENCES),

ui/src/pages/captures/captures.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ export const Captures = () => {
3333
session: true,
3434
size: true,
3535
dimensions: true,
36+
filename: false,
37+
path: false,
3638
})
3739
const { selectedView, setSelectedView } = useSelectedView('table')
3840
const { filters } = useFilters()

ui/src/utils/language.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ export enum STRING {
9696
FIELD_LABEL_EMAIL,
9797
FIELD_LABEL_ENDPOINT,
9898
FIELD_LABEL_ERRORS,
99+
FIELD_LABEL_FILENAME,
99100
FIELD_LABEL_FILE_SIZE,
100101
FIELD_LABEL_FINISHED_AT,
101102
FIELD_LABEL_FIRST_DATE,
@@ -386,6 +387,7 @@ const ENGLISH_STRINGS: { [key in STRING]: string } = {
386387
[STRING.FIELD_LABEL_EMAIL]: 'Email',
387388
[STRING.FIELD_LABEL_ENDPOINT]: 'Endpoint URL',
388389
[STRING.FIELD_LABEL_ERRORS]: 'Errors',
390+
[STRING.FIELD_LABEL_FILENAME]: 'Filename',
389391
[STRING.FIELD_LABEL_FILE_SIZE]: 'File size',
390392
[STRING.FIELD_LABEL_FINISHED_AT]: 'Finished at',
391393
[STRING.FIELD_LABEL_FIRST_DATE]: 'First date',

0 commit comments

Comments
 (0)