Skip to content

Commit

Permalink
Compile regular expressions.
Browse files Browse the repository at this point in the history
  • Loading branch information
delucchi-cmu committed Jan 8, 2025
1 parent 58f4630 commit 74c325f
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 12 deletions.
12 changes: 5 additions & 7 deletions src/hats_import/catalog/resume_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,9 @@ def get_remaining_map_keys(self):
list of mapping keys *not* found in files like /resume/path/mapping_key.npz
"""
prefix = file_io.get_upath(self.tmp_path) / self.HISTOGRAMS_DIR
done_indexes = [
int(re.match(r"map_(\d+).npz", path.name).group(1)) for path in prefix.glob("*.npz")
]
remaining_indexes = list(set(range(0, len(self.input_paths))).difference(set(done_indexes)))
map_file_pattern = re.compile(r"map_(\d+).npz")
done_indexes = [int(map_file_pattern.match(path.name).group(1)) for path in prefix.glob("*.npz")]
remaining_indexes = list(set(range(0, len(self.input_paths))) - (set(done_indexes)))
return [(f"map_{key}", self.input_paths[key]) for key in remaining_indexes]

def read_histogram(self, healpix_order):
Expand Down Expand Up @@ -214,9 +213,8 @@ def get_remaining_split_keys(self):
list of splitting keys *not* found in files like /resume/path/split_key.done
"""
prefix = file_io.get_upath(self.tmp_path) / self.SPLITTING_STAGE
done_indexes = [
int(re.match(r"split_(\d+)_done", path.name).group(1)) for path in prefix.glob("*_done")
]
split_file_pattern = re.compile(r"split_(\d+)_done")
done_indexes = [int(split_file_pattern.match(path.name).group(1)) for path in prefix.glob("*_done")]
remaining_indexes = list(set(range(0, len(self.input_paths))) - set(done_indexes))
return [(f"split_{key}", self.input_paths[key]) for key in remaining_indexes]

Expand Down
8 changes: 4 additions & 4 deletions src/hats_import/pipeline_resume_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,9 @@ def read_markers(self, stage_name: str) -> dict[str, list[str]]:
prefix = file_io.append_paths_to_pointer(self.tmp_path, stage_name)
result = {}
result_files = file_io.find_files_matching_path(prefix, "*_done")
done_file_pattern = re.compile(r"(.*)_done")
for file_path in result_files:
match = re.match(r"(.*)_done", str(file_path.name))
match = done_file_pattern.match(str(file_path.name))
if not match:
raise ValueError(f"Unexpected file found: {file_path.name}")
key = match.group(1)
Expand All @@ -136,9 +137,8 @@ def read_done_pixels(self, stage_name):
List[HealpixPixel] - all pixel keys found in done directory
"""
prefix = file_io.append_paths_to_pointer(self.tmp_path, stage_name)
pixel_tuples = [
re.match(r"(\d+)_(\d+)_done", path.name).group(1, 2) for path in prefix.glob("*_done")
]
done_file_pattern = re.compile(r"(\d+)_(\d+)_done")
pixel_tuples = [done_file_pattern.match(path.name).group(1, 2) for path in prefix.glob("*_done")]
return [HealpixPixel(int(match[0]), int(match[1])) for match in pixel_tuples]

def clean_resume_files(self):
Expand Down
3 changes: 2 additions & 1 deletion src/hats_import/soap/resume_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@ def get_sources_to_count(self, source_pixel_map=None):
self.source_pixel_map = source_pixel_map
if self.source_pixel_map is None:
raise ValueError("source_pixel_map not provided for progress tracking.")
count_file_pattern = re.compile(r"(\d+)_(\d+).csv")
counted_pixel_tuples = [
re.match(r"(\d+)_(\d+).csv", path.name).group(1, 2) for path in self.tmp_path.glob("*.csv")
count_file_pattern.match(path.name).group(1, 2) for path in self.tmp_path.glob("*.csv")
]
counted_pixels = [HealpixPixel(int(match[0]), int(match[1])) for match in counted_pixel_tuples]

Expand Down

0 comments on commit 74c325f

Please sign in to comment.