Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop python-3.8 unittest in favor of adding 3.12. Update to python-3.10 for other workflows #1020

Merged
merged 1 commit into from
Mar 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/aws-batch-integration-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.9
python-version: "3.10"
architecture: x64
- name: Checkout TorchX
uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/components-integration-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: "3.10"
architecture: x64
- name: Checkout TorchX
uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/container.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: "3.10"
architecture: x64
- name: Checkout TorchX
uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/gcp-batch-integration-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.9
python-version: "3.10"
architecture: x64
- name: Checkout TorchX
uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/kfp-integration-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: "3.10"
architecture: x64
- name: Checkout TorchX
uses: actions/checkout@v2
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/nightly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
- name: Setup Python 3.10
uses: actions/setup-python@v2
with:
python-version: '3.10'
python-version: "3.10"
architecture: x64
- name: Checkout TorchX
uses: actions/checkout@v2
Expand All @@ -22,7 +22,7 @@ jobs:
pip install -e .[dev]
pip install twine
- name: Run tests
run: pytest --cov=./ --cov-report=xml
run: pytest --cov=./ --cov-report=xml
- name: Push nightly
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pyre.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
run: |
set -eux
pip install -e .[dev]

- name: Init Lint Runner
lintrunner init
- name: Run Pyre
run: scripts/pyre.sh
2 changes: 1 addition & 1 deletion .github/workflows/python-unittests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
unittest:
strategy:
matrix:
python-version: [3.8, 3.9, "3.10", 3.11]
python-version: [3.9, "3.10", 3.11, 3.12]
platform: ["linux.20_04.4x"]
include:
- python-version: 3.9
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/slurm-local-integration-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: "3.10"
architecture: x64
- name: Checkout TorchX
uses: actions/checkout@v2
Expand Down
8 changes: 4 additions & 4 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ pytorch-lightning==2.3.1
tensorboard==2.14.0
sagemaker==2.224.4
torch-model-archiver>=0.4.2
torch==2.2.1
torchmetrics==0.10.3
torch>=2.6.0
torchmetrics==1.6.3
torchserve>=0.10.0
torchtext==0.17.1
torchvision==0.17.1
torchtext==0.18.0
torchvision==0.21.0
ts==0.5.1
ray[default]
wheel
Expand Down
4 changes: 1 addition & 3 deletions torchx/distributed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,7 @@ def local_device() -> torch.device:
if dist.is_initialized():
default_pg = _get_default_group()
return (
local_cuda_device()
if default_pg.options.backend == "nccl"
else torch.device("cpu")
local_cuda_device() if default_pg.name() == "nccl" else torch.device("cpu")
)
else:
return torch.device("cuda") if has_cuda_devices() else torch.device("cpu")
Expand Down
11 changes: 5 additions & 6 deletions torchx/examples/apps/lightning/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import torch
import torch.jit
from torch.nn import functional as F
from torchmetrics import Accuracy
from torchmetrics.classification import MulticlassAccuracy
from torchvision.models.resnet import BasicBlock, ResNet


Expand All @@ -44,13 +44,12 @@ def __init__(

# We use the torchvision resnet model with some small tweaks to match
# TinyImageNet.
m = ResNet(BasicBlock, layer_sizes)
m = ResNet(BasicBlock, layer_sizes, num_classes=200)
m.avgpool = torch.nn.AdaptiveAvgPool2d(1)
m.fc.out_features = 200
self.model: ResNet = m

self.train_acc = Accuracy()
self.val_acc = Accuracy()
self.train_acc = MulticlassAccuracy(num_classes=m.fc.out_features)
self.val_acc = MulticlassAccuracy(num_classes=m.fc.out_features)

# pyre-fixme[14]
def forward(self, x: torch.Tensor) -> torch.Tensor:
Expand All @@ -71,7 +70,7 @@ def validation_step(
def _step(
self,
step_name: str,
acc_metric: Accuracy,
acc_metric: MulticlassAccuracy,
batch: Tuple[torch.Tensor, torch.Tensor],
batch_idx: int,
) -> torch.Tensor:
Expand Down
11 changes: 9 additions & 2 deletions torchx/examples/apps/lightning/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import torch
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.distributed.elastic.multiprocessing import errors
from torchx.examples.apps.lightning.data import (
create_random_data,
download_data,
Expand All @@ -85,7 +86,12 @@ def parse_args(argv: List[str]) -> argparse.Namespace:
parser.add_argument(
"--batch_size", type=int, default=32, help="batch size to use for training"
)
parser.add_argument("--num_samples", type=int, default=10, help="num_samples")
parser.add_argument(
"--num_samples",
type=int,
default=32,
help="number of samples in the dataset",
)
parser.add_argument(
"--data_path",
type=str,
Expand Down Expand Up @@ -126,6 +132,7 @@ def get_model_checkpoint(args: argparse.Namespace) -> Optional[ModelCheckpoint]:
)


@errors.record
def main(argv: List[str]) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
args = parse_args(argv)
Expand All @@ -138,7 +145,7 @@ def main(argv: List[str]) -> None:
if not args.data_path:
data_path = os.path.join(tmpdir, "data")
os.makedirs(data_path)
create_random_data(data_path)
create_random_data(data_path, args.num_samples)
else:
data_path = download_data(args.data_path, tmpdir)

Expand Down
2 changes: 1 addition & 1 deletion torchx/runner/test/api_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,7 @@ def test_get_schedulers(self, json_dumps_mock: MagicMock, _) -> None:
)
app = AppDef("sleeper", roles=[role])
runner.run(app, scheduler="local")
local_sched_mock.submit.called_once_with(app, {})
local_sched_mock.schedule.assert_called_once()

def test_run_from_module(self, _: str) -> None:
runner = get_runner(name="test_session")
Expand Down
10 changes: 8 additions & 2 deletions torchx/schedulers/test/ray_scheduler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,12 @@ def test_nonmatching_address(self) -> None:
):
_scheduler_with_client.submit(app=app, cfg={})

def _assertDictContainsSubset(self, expected, actual, msg=None):
# NB: implement unittest.TestCase.assertDictContainsSubsetNew() since it was removed in python-3.11
for key, value in expected.items():
self.assertIn(key, actual, msg)
self.assertEqual(actual[key], value, msg)

def test_client_with_headers(self) -> None:
# This tests only one option for the client. Different versions may have more options available.
headers = {"Authorization": "Bearer: token"}
Expand All @@ -398,7 +404,7 @@ def test_client_with_headers(self) -> None:
)
_scheduler_with_client = RayScheduler("client_session", ray_client)
scheduler_client = _scheduler_with_client._get_ray_client()
self.assertDictContainsSubset(scheduler_client._headers, headers)
self._assertDictContainsSubset(scheduler_client._headers, headers)

class RayClusterSetup:
_instance = None # pyre-ignore
Expand Down Expand Up @@ -606,7 +612,7 @@ def test_ray_driver_elasticity(self) -> None:
# 3-3
teriminal = (
driver._step()
) # pg 2 becomes availiable, but actor 2 shouldn't be executed
) # pg 2 becomes available, but actor 2 shouldn't be executed
self.assertEqual(teriminal, False)
self.assertEqual(len(driver.active_tasks), 0) # actor1 should be finished
self.assertEqual(driver.command_actors_count, 0)
Expand Down
Loading