Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,10 @@ help:
@uv run python -c "import re; \
[[print(f'\033[36m{m[0]:<20}\033[0m {m[1]}') for m in re.findall(r'^([a-zA-Z_-]+):.*?## (.*)$$', open(makefile).read(), re.M)] for makefile in ('$(MAKEFILE_LIST)').strip().split()]"

.PHONY: deploy
deploy: ## Deploy to site
@echo "🚀 Deploying ComposeAPI to Site"
@kubectl kustomize kustomize/overlays/compose-api-rke | kubectl apply -f -


.DEFAULT_GOAL := help
2 changes: 1 addition & 1 deletion compose_api/api/routers/templates/copasi.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"state": {
"time_course": {
"_type": "step",
"address": "local:pbest.registry.simulators.copasi_process.CopasiUTCStep",
"address": "local:pbsim_common.simulators.copasi_process.CopasiUTCStep",
"config": {
"model_source": "interesting.sbml",
"sim_start_time": {{ start_time }},
Expand Down
2 changes: 1 addition & 1 deletion compose_api/api/routers/templates/tellurium.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"state": {
"time_course": {
"_type": "step",
"address": "local:pbest.registry.simulators.tellurium_process.TelluriumUTCStep",
"address": "local:pbsim_common.simulators.tellurium_process.TelluriumUTCStep",
"config": {
"model_source": "interesting.sbml",
"sim_start_time": {{ start_time }},
Expand Down
5 changes: 4 additions & 1 deletion compose_api/common/hpc/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,16 @@ def get_sacct_format_string() -> str:
def from_sacct_formatted_output(cls, line: str) -> "SlurmJob":
# Split the line by delimiter
fields = line.strip().split("|")
job_state = fields[4]
if "cancelled" in job_state.lower(): # Has 'cancelled by <User-ID>' which trips up mappings from string to Enum
job_state = "CANCELLED" # so just set it to 'canceled'
# Map fields to model attributes
return cls(
job_id=int(fields[0]),
name=fields[1],
account=fields[2],
user_name=fields[3],
job_state=fields[4],
job_state=job_state,
start_time=fields[5],
end_time=fields[6],
elapsed=fields[7],
Expand Down
4 changes: 1 addition & 3 deletions compose_api/common/hpc/slurm_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@ async def get_job_status_squeue(self, job_ids: list[int] | None = None) -> list[
return slurm_jobs

async def get_job_status_sacct(self, job_ids: list[int] | None = None) -> list[SlurmJob]:
command = (
f'sacct -u $USER --parsable --delimiter="|" --noheader --format="{SlurmJob.get_sacct_format_string()}"'
)
command = f'sacct -u $USER --parsable --allocations --delimiter="|" --noheader --format="{SlurmJob.get_sacct_format_string()}"' # noqa: E501
if job_ids is not None:
job_ids_str = ",".join(map(str, job_ids)) if len(job_ids) > 1 else str(job_ids[0])
command = command + f" -j {job_ids_str}"
Expand Down
5 changes: 5 additions & 0 deletions compose_api/db/tables/hpc_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ class JobStatusDB(enum.Enum):
COMPLETED = "completed"
FAILED = "failed"
PENDING = "pending"
CANCELLED = "cancelled"
OUT_OF_MEMORY = "out_of_memory"
SUSPENDED = "suspended"
TIMEOUT = "timeout"
UNKNOWN = "unknown"

def to_job_status(self) -> JobStatus:
return JobStatus(self.value)
Expand Down
2 changes: 1 addition & 1 deletion compose_api/simulation/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ async def run_curated_pbif(
# Create OMEX with all necessary files
with tempfile.TemporaryDirectory(delete=False) as tmp_dir:
with zipfile.ZipFile(tmp_dir + "/input.omex", "w") as omex:
omex.writestr(data=templated_pbif, zinfo_or_arcname=f"{simulator_name}.pbif")
omex.writestr(data=templated_pbif, zinfo_or_arcname=f"{simulator_name}.pbg")
if use_interesting:
omex.write(loaded_sbml.absolute(), arcname="interesting.sbml")
else:
Expand Down
23 changes: 17 additions & 6 deletions compose_api/simulation/job_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,26 @@ async def update_running_jobs(self) -> None:
slurm_job = slurm_job_map.get(hpc_run.slurmjobid)
if not slurm_job or not slurm_job.job_state:
continue
new_status = JobStatus(slurm_job.job_state.lower())
if new_status == hpc_run.status:
logger.debug(f"HpcRun {hpc_run.database_id} is still running with status {new_status}")
continue
if hpc_run.status != new_status:
try:
new_status = JobStatus(slurm_job.job_state.lower())
if new_status == hpc_run.status:
logger.debug(f"HpcRun {hpc_run.database_id} is still running with status {new_status}")
continue
if hpc_run.status != new_status:
await self.database_service.get_hpc_db().update_hpcrun_status(
hpcrun_id=hpc_run.database_id, new_slurm_job=slurm_job
)
logger.info(f"Updated HpcRun {hpc_run.database_id} status to {new_status}")
except ValueError as e:
logger.exception(
f"Error updating HpcRun {hpc_run.database_id} to status {slurm_job.job_state.lower()}."
f" Setting status to UNKNOWN.",
exc_info=e,
)
slurm_job.job_state = JobStatus.UNKNOWN.upper()
await self.database_service.get_hpc_db().update_hpcrun_status(
hpcrun_id=hpc_run.database_id, new_slurm_job=slurm_job
)
logger.info(f"Updated HpcRun {hpc_run.database_id} status to {new_status}")

if slurm_job.job_id in self.internal_listeners:
self.internal_listeners[slurm_job.job_id].put_nowait(hpc_run)
Expand Down
5 changes: 5 additions & 0 deletions compose_api/simulation/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ class JobStatus(StrEnum):
COMPLETED = "completed"
FAILED = "failed"
PENDING = "pending"
CANCELLED = "cancelled"
OUT_OF_MEMORY = "out_of_memory"
SUSPENDED = "suspended"
TIMEOUT = "timeout"
UNKNOWN = "unknown"


class HpcRun(BaseModel):
Expand Down
4 changes: 2 additions & 2 deletions compose_api/simulation/simulation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ async def build_container(self, simulator_version: SimulatorVersion, random_str:
#SBATCH --cpus-per-task 1
#SBATCH --mem=4GB
#SBATCH --nodelist={settings.slurm_build_node}
#SBATCH --partition=general
#SBATCH --qos=general
#SBATCH --partition={settings.slurm_partition}
#SBATCH --qos={settings.slurm_qos}
#SBATCH --output={get_slurm_log_file(slurm_job_name=slurm_job_name)}

set -e
Expand Down
2 changes: 1 addition & 1 deletion compose_api/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.5"
__version__ = "0.3.8"
8 changes: 4 additions & 4 deletions kustomize/config/compose-api-rke/shared.env
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
SLURM_SUBMIT_HOST=hamantis.cam.uchc.edu
SLURM_SUBMIT_USER=crbmapi
#SLURM_SUBMIT_KEY_PATH=/run/secrets/slurmsubmitkeyfile/ssh-privatekey
SLURM_PARTITION=general
SLURM_NODE_LIST=mantis-039
SLURM_BUILD_NODE=mantis-007
SLURM_QOS=general
SLURM_PARTITION=vcell
SLURM_NODE_LIST=mantis-042
SLURM_BUILD_NODE=mantis-042
SLURM_QOS=vcell
SLURM_LOG_BASE_PATH=/home/FCAM/crbmapi/compose_api/prod/htclogs
SLURM_SBATCH_BASE_PATH=/home/FCAM/crbmapi/compose_api/prod/slurm_sbatch
HPC_IMAGE_BASE_PATH=/home/FCAM/crbmapi/compose_api/prod/images
Expand Down
2 changes: 1 addition & 1 deletion kustomize/overlays/compose-api-rke/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace: compose-api-rke

images:
- name: ghcr.io/biosimulations/compose-api
newTag: 0.3.4
newTag: 0.3.8
- name: docker.io/library/mongo
newTag: 8.0.12

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "compose-api"
version = "0.3.5"
version = "0.3.8"
description = "An API server for reproducible biological workflows and cosimulations."
authors = [{ name = "Jim Schaff", email = "[email protected]" }]
requires-python = ">=3.13.2,<4.0"
Expand Down Expand Up @@ -36,7 +36,7 @@ dependencies = [
"python-libsedml>=2.0.33",
"biosimulators-utils>=0.2.3",
"python-libsbml>=5.20.5",
"pbest==0.2.8",
"pbest==0.4.0",
]

[project.urls]
Expand Down
Binary file modified tests/fixtures/resources/phase_cycle.omex
Binary file not shown.
Binary file modified tests/fixtures/resources/readdy.omex
Binary file not shown.
Loading
Loading