Skip to content

Commit 0d782fb

Browse files
kiukchungfacebook-github-bot
authored andcommitted
Drop python-3.8 unittest in favor of adding 3.12. Update to python-3.10 for other workflows (#1020)
Summary: PyTorch dropped support for python-3.8 a while ago. This change makes TorchX consistent with PyTorch. This should also fix gh workflow breakages in https://hud.pytorch.org/hud/pytorch/torchx/main Changes: 1. Drops python-3.8 in unittest matrix 2. Adds python-3.12 to unittest matrix 3. Upgrades a few libraries (torch, torchvision, etc) in dev-requirements.txt 4. Upgrades to python-3.10 in non-unittest workflows Reviewed By: manav-a Differential Revision: D71475986
1 parent 72e4895 commit 0d782fb

15 files changed

+741
-27
lines changed

.github/workflows/aws-batch-integration-tests.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
- name: Setup Python
1717
uses: actions/setup-python@v2
1818
with:
19-
python-version: 3.9
19+
python-version: "3.10"
2020
architecture: x64
2121
- name: Checkout TorchX
2222
uses: actions/checkout@v2

.github/workflows/components-integration-tests.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
- name: Setup Python
3636
uses: actions/setup-python@v2
3737
with:
38-
python-version: 3.8
38+
python-version: "3.10"
3939
architecture: x64
4040
- name: Checkout TorchX
4141
uses: actions/checkout@v2

.github/workflows/container.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
- name: Setup Python
1616
uses: actions/setup-python@v2
1717
with:
18-
python-version: 3.8
18+
python-version: "3.10"
1919
architecture: x64
2020
- name: Checkout TorchX
2121
uses: actions/checkout@v2

.github/workflows/gcp-batch-integration-tests.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
- name: Setup Python
1818
uses: actions/setup-python@v2
1919
with:
20-
python-version: 3.9
20+
python-version: "3.10"
2121
architecture: x64
2222
- name: Checkout TorchX
2323
uses: actions/checkout@v2

.github/workflows/kfp-integration-tests.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
- name: Setup Python
1414
uses: actions/setup-python@v2
1515
with:
16-
python-version: 3.8
16+
python-version: "3.10"
1717
architecture: x64
1818
- name: Checkout TorchX
1919
uses: actions/checkout@v2

.github/workflows/nightly.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
- name: Setup Python 3.10
1313
uses: actions/setup-python@v2
1414
with:
15-
python-version: '3.10'
15+
python-version: "3.10"
1616
architecture: x64
1717
- name: Checkout TorchX
1818
uses: actions/checkout@v2
@@ -22,7 +22,7 @@ jobs:
2222
pip install -e .[dev]
2323
pip install twine
2424
- name: Run tests
25-
run: pytest --cov=./ --cov-report=xml
25+
run: pytest --cov=./ --cov-report=xml
2626
- name: Push nightly
2727
env:
2828
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}

.github/workflows/pyre.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
run: |
2222
set -eux
2323
pip install -e .[dev]
24-
24+
- name: Init Lint Runner
2525
lintrunner init
2626
- name: Run Pyre
2727
run: scripts/pyre.sh

.github/workflows/python-unittests.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
unittest:
1111
strategy:
1212
matrix:
13-
python-version: [3.8, 3.9, "3.10", 3.11]
13+
python-version: [3.9, "3.10", 3.11, 3.12]
1414
platform: ["linux.20_04.4x"]
1515
include:
1616
- python-version: 3.9

.github/workflows/slurm-local-integration-tests.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
- name: Setup Python
2020
uses: actions/setup-python@v2
2121
with:
22-
python-version: 3.8
22+
python-version: "3.10"
2323
architecture: x64
2424
- name: Checkout TorchX
2525
uses: actions/checkout@v2

dev-requirements.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ pytorch-lightning==2.3.1
2424
tensorboard==2.14.0
2525
sagemaker==2.224.4
2626
torch-model-archiver>=0.4.2
27-
torch==2.2.1
28-
torchmetrics==0.10.3
27+
torch>=2.6.0
28+
torchmetrics==1.6.3
2929
torchserve>=0.10.0
30-
torchtext==0.17.1
31-
torchvision==0.17.1
30+
torchtext==0.18.0
31+
torchvision==0.21.0
3232
ts==0.5.1
3333
ray[default]
3434
wheel

torchx/distributed/__init__.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,7 @@ def local_device() -> torch.device:
8383
if dist.is_initialized():
8484
default_pg = _get_default_group()
8585
return (
86-
local_cuda_device()
87-
if default_pg.options.backend == "nccl"
88-
else torch.device("cpu")
86+
local_cuda_device() if default_pg.name() == "nccl" else torch.device("cpu")
8987
)
9088
else:
9189
return torch.device("cuda") if has_cuda_devices() else torch.device("cpu")

torchx/examples/apps/lightning/model.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import torch
2424
import torch.jit
2525
from torch.nn import functional as F
26-
from torchmetrics import Accuracy
26+
from torchmetrics.classification import MulticlassAccuracy
2727
from torchvision.models.resnet import BasicBlock, ResNet
2828

2929

@@ -44,13 +44,12 @@ def __init__(
4444

4545
# We use the torchvision resnet model with some small tweaks to match
4646
# TinyImageNet.
47-
m = ResNet(BasicBlock, layer_sizes)
47+
m = ResNet(BasicBlock, layer_sizes, num_classes=200)
4848
m.avgpool = torch.nn.AdaptiveAvgPool2d(1)
49-
m.fc.out_features = 200
5049
self.model: ResNet = m
5150

52-
self.train_acc = Accuracy()
53-
self.val_acc = Accuracy()
51+
self.train_acc = MulticlassAccuracy(num_classes=m.fc.out_features)
52+
self.val_acc = MulticlassAccuracy(num_classes=m.fc.out_features)
5453

5554
# pyre-fixme[14]
5655
def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -71,7 +70,7 @@ def validation_step(
7170
def _step(
7271
self,
7372
step_name: str,
74-
acc_metric: Accuracy,
73+
acc_metric: MulticlassAccuracy,
7574
batch: Tuple[torch.Tensor, torch.Tensor],
7675
batch_idx: int,
7776
) -> torch.Tensor:

torchx/examples/apps/lightning/train.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import torch
6161
from pytorch_lightning.callbacks import ModelCheckpoint
6262
from pytorch_lightning.loggers import TensorBoardLogger
63+
from torch.distributed.elastic.multiprocessing import errors
6364
from torchx.examples.apps.lightning.data import (
6465
create_random_data,
6566
download_data,
@@ -85,7 +86,12 @@ def parse_args(argv: List[str]) -> argparse.Namespace:
8586
parser.add_argument(
8687
"--batch_size", type=int, default=32, help="batch size to use for training"
8788
)
88-
parser.add_argument("--num_samples", type=int, default=10, help="num_samples")
89+
parser.add_argument(
90+
"--num_samples",
91+
type=int,
92+
default=32,
93+
help="number of samples in the dataset",
94+
)
8995
parser.add_argument(
9096
"--data_path",
9197
type=str,
@@ -126,6 +132,7 @@ def get_model_checkpoint(args: argparse.Namespace) -> Optional[ModelCheckpoint]:
126132
)
127133

128134

135+
@errors.record
129136
def main(argv: List[str]) -> None:
130137
with tempfile.TemporaryDirectory() as tmpdir:
131138
args = parse_args(argv)
@@ -138,7 +145,7 @@ def main(argv: List[str]) -> None:
138145
if not args.data_path:
139146
data_path = os.path.join(tmpdir, "data")
140147
os.makedirs(data_path)
141-
create_random_data(data_path)
148+
create_random_data(data_path, args.num_samples)
142149
else:
143150
data_path = download_data(args.data_path, tmpdir)
144151

torchx/runner/test/api_test.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ def test_get_schedulers(self, json_dumps_mock: MagicMock, _) -> None:
601601
)
602602
app = AppDef("sleeper", roles=[role])
603603
runner.run(app, scheduler="local")
604-
local_sched_mock.submit.called_once_with(app, {})
604+
local_sched_mock.schedule.assert_called_once()
605605

606606
def test_run_from_module(self, _: str) -> None:
607607
runner = get_runner(name="test_session")

0 commit comments

Comments
 (0)