Skip to content

Commit 6423c19

Browse files
authored
Merge pull request #151 from kbase/dev-service
Fix bug in distributing files to containers
2 parents 392d7ba + 0bb5f16 commit 6423c19

File tree

2 files changed

+53
-13
lines changed

2 files changed

+53
-13
lines changed

cdmtaskservice/models.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -416,18 +416,13 @@ class Cluster(str, Enum):
416416

417417
FilesPerContainer = NamedTuple("FilesPerContainer", [
418418
("containers", int),
419-
("files_per_container", int),
420-
("last_container", int),
421419
("files", list[list[S3File | str]])
422420
])
423421
"""
424422
Information about splitting files to be processed among containers.
425423
426-
containers - the number of containers expected to be run; this is the mimimum of the specified
424+
containers - the number of containers expected to be run; this is the minimum of the specified
427425
container count and the number of files.
428-
files_per_container - the number of files to be run per container
429-
last_container - the remainder of files to be run in the last container. Always less than
430-
files_per_container.
431426
files - a list of lists of files, split up per container.
432427
"""
433428

@@ -609,12 +604,18 @@ def inputs_are_S3File(self) -> bool:
609604

610605
def get_files_per_container(self) -> FilesPerContainer:
611606
"""
612-
Returns the number of files to be run per container and the files, split up by container.
607+
Returns the container count and the files split up by container.
613608
"""
614609
containers = min(self.num_containers, len(self.input_files))
615-
fpc = math.ceil(len(self.input_files) / containers)
616-
files = [self.input_files[i:i + fpc] for i in range(0, fpc * containers, fpc)]
617-
return FilesPerContainer(containers, fpc, len(self.input_files) % fpc, files)
610+
fpc, extra_files = divmod(len(self.input_files), containers)
611+
infiles = list(self.input_files)
612+
files = []
613+
for i in range(1, containers + 1):
614+
# this seems dumb. It works though, make it pretty later
615+
fcount = fpc + (1 if i <= extra_files else 0)
616+
files.append(infiles[:fcount])
617+
infiles = infiles[fcount:]
618+
return FilesPerContainer(containers, files)
618619

619620

620621
class JobState(str, Enum):

test/models_test.py

+42-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,46 @@
1-
# TODO TEST add tests
1+
# TODO TEST add more tests
22

33
from cdmtaskservice import models # @UnusedImport
44

55

6-
def test_noop():
7-
pass
6+
def test_files_per_container():
7+
_files_per_container(1, 1, 1, [[1]])
8+
_files_per_container(1, 2, 1, [[1, 2]])
9+
_files_per_container(2, 1, 1, [[1]])
10+
_files_per_container(2, 2, 2, [[1], [2]])
11+
_files_per_container(3, 2, 2, [[1], [2]])
12+
_files_per_container(3, 3, 3, [[1], [2], [3]])
13+
_files_per_container(3, 4, 3, [[1, 2], [3], [4]])
14+
_files_per_container(3, 5, 3, [[1, 2], [3, 4], [5]])
15+
_files_per_container(3, 6, 3, [[1, 2], [3, 4], [5, 6]])
16+
_files_per_container(3, 7, 3, [[1, 2, 3], [4, 5], [6, 7]])
17+
18+
19+
def _files_per_container(containers: int, files: int, expcont: int, expfiles: list[list[int]]):
20+
ji = models.JobInput(
21+
cluster=models.Cluster.PERLMUTTER_JAWS,
22+
image="fakeimage",
23+
params=models.Parameters(),
24+
num_containers=containers,
25+
input_files=[f"foo/bar{i}" for i in range(1, files + 1)],
26+
output_dir="foo/bar"
27+
)
28+
fpc = ji.get_files_per_container()
29+
exp = []
30+
for lst in expfiles:
31+
exp.append([f"foo/bar{i}" for i in lst])
32+
assert fpc == models.FilesPerContainer(expcont, exp)
33+
34+
ji = models.JobInput(
35+
cluster=models.Cluster.PERLMUTTER_JAWS,
36+
image="fakeimage",
37+
params=models.Parameters(),
38+
num_containers=containers,
39+
input_files=[models.S3File(file=f"foo/bar{i}") for i in range(1, files + 1)],
40+
output_dir="foo/bar"
41+
)
42+
fpc = ji.get_files_per_container()
43+
exp = []
44+
for lst in expfiles:
45+
exp.append([models.S3File(file=f"foo/bar{i}") for i in lst])
46+
assert fpc == models.FilesPerContainer(expcont, exp)

0 commit comments

Comments
 (0)