chemprop · JacksonBurns · May 6, 2026 · Mar 19, 2026 · Mar 20, 2026 · Mar 26, 2026
diff --git a/chemprop_contrib/__init__.py b/chemprop_contrib/__init__.py
@@ -14,3 +14,10 @@
     __all__ += ["mcp"]
 except ImportError:
     pass
+
+try:
+    from chemprop_contrib import set2set
+
+    __all__ += ["set2set"]
+except ImportError:
+    pass
diff --git a/chemprop_contrib/set2set/LICENSE b/chemprop_contrib/set2set/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Marc Short
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/chemprop_contrib/set2set/README.md b/chemprop_contrib/set2set/README.md
@@ -0,0 +1,33 @@
+# `Set2Set Aggregation`
+
+This `chemprop-contrib` package implements the Set2Set aggregation method [1].
+
+See `test_set2set.py` for example usage, which is broadly the same as typical Chemprop aggregation methods except it requires the input dimension to be explicitly defined and returns twice the input dimension as output.
+
+The Set2Set aggregation operator performs the following operations:
+
+```math
+\begin{matrix}
+\mathbf{q}_t = \mathrm{LSTM}(\mathbf{q}^{*}_{t-1}) \\  
+\alpha_{i,t} = \mathrm{softmax}(\mathbf{h}_v \cdot \mathbf{q}_t) \\  
+\mathbf{r}_t = \sum_{i=1}^N \alpha_{i,t} \mathbf{h}_v \\  
+\mathbf{q}^{*}_t = \mathbf{q}_t \, \Vert \, \mathbf{r}_t
+\end{matrix}
+```
+
+where $\mathbf{q}^{*}_T$ defines the output of the layer with twice
+the dimensionality as the input.
+
+```
+Parameters
+----------
+in_channels : int
+    The size of each input sample.
+processing_steps : Optional[int], default=6
+    The number of processing steps.
+n_layers : Optional[int], default=3
+    The number of recurrent LSTM layers.
+```
+
+## References
+[1] O. Vinyals, S. Benigio, M. Kudlur, "Order Matters: Sequence to sequence for sets", February 2016, doi: [10.48550/arXiv.1511.06391](https://doi.org/10.48550/arXiv.1511.06391).
diff --git a/chemprop_contrib/set2set/__init__.py b/chemprop_contrib/set2set/__init__.py
@@ -0,0 +1,5 @@
+from chemprop_contrib.set2set.set2set import (
+    Set2Set,
+)
+
+__all__ = ["Set2Set"]
diff --git a/chemprop_contrib/set2set/set2set.py b/chemprop_contrib/set2set/set2set.py
@@ -0,0 +1,56 @@
+import torch
+from torch_geometric.utils import softmax
+from chemprop.nn import Aggregation
+
+
+class Set2Set(Aggregation):
+    def __init__(
+        self,
+        in_channels: int,
+        processing_steps: int = 6,
+        n_layers: int = 3,
+        dim: int = 0,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(dim, **kwargs)
+        self.in_channels = in_channels
+        self.out_channels = 2 * in_channels
+        self.processing_steps = processing_steps
+        self.lstm = torch.nn.LSTM(
+            self.out_channels, self.in_channels, num_layers=n_layers, **kwargs
+        )
+        self.reset_parameters()
+
+        self.hparams["in_channels"] = in_channels
+        self.hparams["processing_steps"] = processing_steps
+        self.hparams["n_layers"] = n_layers
+
+    def reset_parameters(self):
+        self.lstm.reset_parameters()
+
+    def forward(self, H: torch.Tensor, batch: torch.Tensor):
+        dim_size = batch.max().int() + 1
+        index_torch = batch.unsqueeze(1).repeat(1, H.shape[1])
+        h = (
+            H.new_zeros((self.lstm.num_layers, dim_size, H.size(-1)), dtype=H.dtype),
+            H.new_zeros((self.lstm.num_layers, dim_size, H.size(-1)), dtype=H.dtype),
+        )
+        q_star = H.new_zeros(dim_size, self.out_channels, dtype=H.dtype)
+
+        for _ in range(self.processing_steps):
+            q, h = self.lstm(q_star.unsqueeze(0), h)
+            q = q.view(dim_size, self.in_channels)
+            e = (H * q[batch]).sum(dim=-1, keepdim=True, dtype=H.dtype)
+            a = softmax(e, batch, None, dim=self.dim).to(H.dtype)
+            r = torch.zeros(
+                dim_size, H.shape[1], dtype=H.dtype, device=H.device
+            ).scatter_reduce_(
+                self.dim, index_torch, a * H, reduce="sum", include_self=False
+            )
+            q_star = torch.cat([q, r], dim=-1)
+
+        return q_star
+
+    def __repr__(self):
+        return f"{self.__class__.__name__}({self.in_channels}, {self.out_channels})"
diff --git a/chemprop_contrib/set2set/test_set2set.py b/chemprop_contrib/set2set/test_set2set.py
@@ -0,0 +1,66 @@
+import pandas as pd
+from chemprop.data import MoleculeDatapoint, MoleculeDataset, build_dataloader
+from chemprop.featurizers import SimpleMoleculeMolGraphFeaturizer
+from chemprop.models import MPNN
+from chemprop.nn import BondMessagePassing, RegressionFFN
+from chemprop.nn.transforms import UnscaleTransform
+from lightning import Trainer
+
+from chemprop_contrib.set2set import Set2Set
+
+
+def test_set2set():
+
+    HIDDEN_SIZE=8
+    featurizer = SimpleMoleculeMolGraphFeaturizer()
+
+    df = pd.DataFrame.from_dict(
+        dict(
+            smiles=["C" * i for i in range(1, 10)],
+            target=list(range(1, 10)),
+        )
+    )
+    smiles_col = "smiles"
+    target = df[["target"]].values
+    data = [
+        MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(df[smiles_col], target)
+    ]
+    dataset = MoleculeDataset(data, featurizer)
+    target_scaler = dataset.normalize_targets()
+    output_transform = UnscaleTransform.from_standard_scaler(target_scaler)
+    dataloader = build_dataloader(dataset)
+
+    mp = BondMessagePassing(d_h=HIDDEN_SIZE, depth=1)
+    agg = Set2Set(
+        in_channels=HIDDEN_SIZE,
+        processing_steps=6,
+        n_layers=3
+    )
+    fnn = RegressionFFN(
+        n_tasks=1,
+        input_dim=2*HIDDEN_SIZE,
+        hidden_dim=4,
+        n_layers=1,
+        output_transform=output_transform,
+    )
+    model = MPNN(
+        mp,
+        agg,
+        fnn,
+    )
+
+    trainer = Trainer(
+        max_epochs=1,
+        logger=False,
+        enable_checkpointing=False,
+        fast_dev_run=True,
+        enable_progress_bar=False,
+        enable_model_summary=False,
+        accelerator="cpu",
+        devices=1,
+    )
+    trainer.fit(model, dataloader)
+
+
+if __name__ == "__main__":
+    test_set2set()
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,6 +10,7 @@ authors = [
     {name = "The Chemprop Development Team", email="[email protected]"},
     {name = "Jackson Burns"},
     {name = "Shih-Cheng Li"},
+    {name = "Marc Short"},
 ]
 readme = "README.md"
 license = {text = "MIT unless otherwise noted in individual contribution files"}
@@ -41,6 +42,9 @@ mcp =[
     "pandas>=2.3.1",
     "requests>=2.32.4",
 ]
+set2set = [
+    "torch-geometric>=2.7.0",
+]
 
 [project.urls]
 source = "https://github.com/chemprop/chemprop-contrib"