diff --git a/PhysicsTools/PyTorch/BuildFile.xml b/PhysicsTools/PyTorch/BuildFile.xml
deleted file mode 100644
index 511f4697bbabe..0000000000000
--- a/PhysicsTools/PyTorch/BuildFile.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<use name="FWCore/Framework"/>
-<use name="FWCore/MessageLogger"/>
-<use name="FWCore/Utilities"/>
-<use name="FWCore/ServiceRegistry"/>
-<use name="pytorch"/>
-<export>
-  <lib name="1"/>
-</export>
diff --git a/PhysicsTools/PyTorch/test/BuildFile.xml b/PhysicsTools/PyTorch/test/BuildFile.xml
index 0e49e06c50c12..dcd42286ea9bd 100644
--- a/PhysicsTools/PyTorch/test/BuildFile.xml
+++ b/PhysicsTools/PyTorch/test/BuildFile.xml
@@ -34,3 +34,26 @@
   <use name="pytorch"/>
   <use name="pytorch-cuda"/>
 </bin>
+
+<test name="pytorch-create_simple_dnn" command="create_simple_dnn.py ."/>
+<test name="pytorch-test_simple_dnn"   command="test_simple_dnn.py .">
+  <flags PRE_TEST="pytorch-create_simple_dnn ."/>
+</test>
+<test name="test-torch-tensor-cpu"     command="torch-tensor.py cpu"/>
+<test name="test-torch-tensor"         command="torch-tensor.py"/>
+<test name="test-torch-control-flow"   command="torch-control-flow.py"/>
+ <test name="test-torch-mini-nn-cpu"   command="torch-mini-nn.py cpu"/>
+<iftool name="cuda-gcc-support">
+  <test name="test-torch-cuda"         command="torch-cuda.py">
+    <use name="cuda"/>
+  </test>
+  <test name="test-torch-cpu-cuda"     command="torch-cpu-cuda.py">
+    <use name="cuda"/>
+  </test>
+  <test name="test-torch-mini-nn-gpu"  command="torch-mini-nn.py gpu">
+    <use name="cuda"/>
+  </test>
+  <test name="test-torch-tensor-gpu"   command="torch-tensor.py cuda">
+    <use name="cuda"/>
+  </test>
+</iftool>
diff --git a/PhysicsTools/PyTorch/test/create_simple_dnn.py b/PhysicsTools/PyTorch/test/create_simple_dnn.py
old mode 100644
new mode 100755
index aeb2a16449f75..4ff9cfe01b45d
--- a/PhysicsTools/PyTorch/test/create_simple_dnn.py
+++ b/PhysicsTools/PyTorch/test/create_simple_dnn.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 import sys
 import os
 import torch
diff --git a/PhysicsTools/PyTorch/test/test_simple_dnn.py b/PhysicsTools/PyTorch/test/test_simple_dnn.py
new file mode 100755
index 0000000000000..dd01f4a1d7ed9
--- /dev/null
+++ b/PhysicsTools/PyTorch/test/test_simple_dnn.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import torch
+
+if len(sys.argv)>=2:
+    datadir=sys.argv[1]
+else:
+    thisdir=os.path.dirname(os.path.abspath(__file__))
+    datadir=os.path.join(os.path.dirname(thisdir), "bin", "data")
+
+ptfile = os.path.join(datadir, "simple_dnn.pt")
+print("loading:", ptfile)
+
+tm = torch.jit.load(ptfile)
+tm.eval()
+
+# dummy input (same shape used during trace: 10)
+x = torch.ones(10)
+
+# optional: run on gpu if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tm.to(device)
+x = x.to(device)
+
+with torch.no_grad():
+    y = tm(x)
+
+print("ok. output:", y.item())
+print("device:", device)
diff --git a/PhysicsTools/PyTorch/test/torch-control-flow.py b/PhysicsTools/PyTorch/test/torch-control-flow.py
new file mode 100755
index 0000000000000..106594be6d766
--- /dev/null
+++ b/PhysicsTools/PyTorch/test/torch-control-flow.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+
+import random
+import torch
+import math
+
+
+class DynamicNet(torch.nn.Module):
+    def __init__(self):
+        """
+        In the constructor we instantiate five parameters and assign them as members.
+        """
+        super().__init__()
+        self.a = torch.nn.Parameter(torch.randn(()))
+        self.b = torch.nn.Parameter(torch.randn(()))
+        self.c = torch.nn.Parameter(torch.randn(()))
+        self.d = torch.nn.Parameter(torch.randn(()))
+        self.e = torch.nn.Parameter(torch.randn(()))
+
+    def forward(self, x):
+        """
+        For the forward pass of the model, we randomly choose either 4, 5
+        and reuse the e parameter to compute the contribution of these orders.
+
+        Since each forward pass builds a dynamic computation graph, we can use normal
+        Python control-flow operators like loops or conditional statements when
+        defining the forward pass of the model.
+
+        Here we also see that it is perfectly safe to reuse the same parameter many
+        times when defining a computational graph.
+        """
+        y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
+        for exp in range(4, random.randint(4, 6)):
+            y = y + self.e * x ** exp
+        return y
+
+    def string(self):
+        """
+        Just like any class in Python, you can also define custom method on PyTorch modules
+        """
+        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'
+
+
+# Create Tensors to hold input and outputs.
+x = torch.linspace(-math.pi, math.pi, 2000)
+y = torch.sin(x)
+
+# Construct our model by instantiating the class defined above
+model = DynamicNet()
+
+# Construct our loss function and an Optimizer. Training this strange model with
+# vanilla stochastic gradient descent is tough, so we use momentum
+criterion = torch.nn.MSELoss(reduction='sum')
+optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
+for t in range(30000):
+    # Forward pass: Compute predicted y by passing x to the model
+    y_pred = model(x)
+
+    # Compute and print loss
+    loss = criterion(y_pred, y)
+    if t % 2000 == 1999:
+        print(t, loss.item())
+
+    # Zero gradients, perform a backward pass, and update the weights.
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+
+print(f'Result: {model.string()}')
diff --git a/PhysicsTools/PyTorch/test/torch-cpu-cuda.py b/PhysicsTools/PyTorch/test/torch-cpu-cuda.py
new file mode 100755
index 0000000000000..76170efd89b26
--- /dev/null
+++ b/PhysicsTools/PyTorch/test/torch-cpu-cuda.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+
+import time
+import torch
+
+def test(device):
+    print("Testing on", device)
+    x = torch.rand(8000, 8000, device=device)
+    y = torch.rand(8000, 8000, device=device)
+
+    torch.cuda.synchronize() if device=="cuda" else None
+    t0 = time.time()
+    torch.mm(x, y)
+    torch.cuda.synchronize() if device=="cuda" else None
+    print("Time:", time.time()-t0, "seconds")
+
+if torch.cuda.is_available():
+    test("cpu")
+    test("cuda")
+else:
+    print("no cuda")
+    exit(1)
diff --git a/PhysicsTools/PyTorch/test/torch-cuda.py b/PhysicsTools/PyTorch/test/torch-cuda.py
new file mode 100755
index 0000000000000..3e170581ffd0c
--- /dev/null
+++ b/PhysicsTools/PyTorch/test/torch-cuda.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+
+import torch
+print("cuda available:", torch.cuda.is_available())
+print("device count:", torch.cuda.device_count())
+print("current device:", torch.cuda.current_device() if torch.cuda.is_available() else None)
+print("device name:", torch.cuda.get_device_name() if torch.cuda.is_available() else None)
+
+# small compute test
+if torch.cuda.is_available():
+    x = torch.rand(10000, 10000, device="cuda")
+    y = torch.rand(10000, 10000, device="cuda")
+    z = torch.mm(x, y)
+    print("OK. Computed on CUDA. Result:", z[0][0].item())
+else:
+    print("NO CUDA")
+    exit(1)
diff --git a/PhysicsTools/PyTorch/test/torch-mini-nn.py b/PhysicsTools/PyTorch/test/torch-mini-nn.py
new file mode 100755
index 0000000000000..90df21de50a89
--- /dev/null
+++ b/PhysicsTools/PyTorch/test/torch-mini-nn.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+import sys
+import torch
+import torch.nn as nn
+import torch.optim as optim
+
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+print("device:", device)
+if (device == "cpu") and (len(sys.argv) > 1) and (sys.argv[1] != "cpu"):
+  pritn("Unable to find accelerator",sys.argv[1])
+  exit(1)
+
+# simple fully connected network
+model = nn.Sequential(
+    nn.Linear(1000, 2000),
+    nn.ReLU(),
+    nn.Linear(2000, 1)
+).to(device)
+
+# random data
+x = torch.randn(5000, 1000, device=device)
+y = torch.randn(5000, 1, device=device)
+
+opt = optim.Adam(model.parameters(), lr=1e-3)
+
+# train 5 steps
+for i in range(5):
+    opt.zero_grad()
+    pred = model(x)
+    loss = ((pred - y)**2).mean()
+    loss.backward()
+    opt.step()
+    print("step:", i, "loss:", loss.item())
+
diff --git a/PhysicsTools/PyTorch/test/torch-tensor.py b/PhysicsTools/PyTorch/test/torch-tensor.py
new file mode 100755
index 0000000000000..8b2bfbc88c943
--- /dev/null
+++ b/PhysicsTools/PyTorch/test/torch-tensor.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+
+import torch
+import math
+import sys
+
+# We want to be able to train our model on an `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
+# such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.
+
+dtype = torch.float
+device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
+print(f"Using {device} device")
+if (device == "cpu") and (len(sys.argv) > 1) and (sys.argv[1] != "cpu"):
+  pritn("Unable to find accelerator",sys.argv[1])
+  exit(1)
+torch.set_default_device(device)
+
+# Create Tensors to hold input and outputs.
+# By default, requires_grad=False, which indicates that we do not need to
+# compute gradients with respect to these Tensors during the backward pass.
+x = torch.linspace(-1, 1, 2000, dtype=dtype)
+y = torch.exp(x) # A Taylor expansion would be 1 + x + (1/2) x**2 + (1/3!) x**3 + ...
+
+# Create random Tensors for weights. For a third order polynomial, we need
+# 4 weights: y = a + b x + c x^2 + d x^3
+# Setting requires_grad=True indicates that we want to compute gradients with
+# respect to these Tensors during the backward pass.
+a = torch.randn((), dtype=dtype, requires_grad=True)
+b = torch.randn((), dtype=dtype, requires_grad=True)
+c = torch.randn((), dtype=dtype, requires_grad=True)
+d = torch.randn((), dtype=dtype, requires_grad=True)
+
+initial_loss = 1.
+learning_rate = 1e-5
+for t in range(5000):
+    # Forward pass: compute predicted y using operations on Tensors.
+    y_pred = a + b * x + c * x ** 2 + d * x ** 3
+
+    # Compute and print loss using operations on Tensors.
+    # Now loss is a Tensor of shape (1,)
+    # loss.item() gets the scalar value held in the loss.
+    loss = (y_pred - y).pow(2).sum()
+
+    # Calculare initial loss, so we can report loss relative to it
+    if t==0:
+        initial_loss=loss.item()
+
+    if t % 100 == 99:
+        print(f'Iteration t = {t:4d}  loss(t)/loss(0) = {round(loss.item()/initial_loss, 6):10.6f}  a = {a.item():10.6f}  b = {b.item():10.6f}  c = {c.item():10.6f}  d = {d.item():10.6f}')
+
+    # Use autograd to compute the backward pass. This call will compute the
+    # gradient of loss with respect to all Tensors with requires_grad=True.
+    # After this call a.grad, b.grad. c.grad and d.grad will be Tensors holding
+    # the gradient of the loss with respect to a, b, c, d respectively.
+    loss.backward()
+
+    # Manually update weights using gradient descent. Wrap in torch.no_grad()
+    # because weights have requires_grad=True, but we don't need to track this
+    # in autograd.
+    with torch.no_grad():
+        a -= learning_rate * a.grad
+        b -= learning_rate * b.grad
+        c -= learning_rate * c.grad
+        d -= learning_rate * d.grad
+
+        # Manually zero the gradients after updating weights
+        a.grad = None
+        b.grad = None
+        c.grad = None
+        d.grad = None
+
+print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')