Merge pull request #1086 from JanFSchulte/softmaxfix_torch

jmitrevs · web-flow · commit ab457084daf2 · 2024-10-25T12:57:29.000-04:00
Fix softmax parsing in pytorch and add test
diff --git a/hls4ml/converters/pytorch/core.py b/hls4ml/converters/pytorch/core.py
@@ -62,9 +62,13 @@ def parse_activation_layer(operation, layer_name, input_names, input_shapes, nod
             layer['activation'] = 'ThresholdedReLU'
             if layer['activ_param'] < 0:
                 raise Exception('negative threshold values not supported')
-
-        if hasattr(node, 'dim'):
+        if hasattr(class_object, 'dim'):
             layer['axis'] = class_object.dim
+            if layer['class_name'] == 'Softmax' and layer['axis'] is None:
+                layer['axis'] = -1
+            if 'IOType' in config:
+                if layer['class_name'] == 'Softmax' and config['IOType'] == 'io_stream' and layer['axis'] != -1:
+                    raise Exception('dim needs to be -1 for io_stream')
     else:
         if layer['class_name'] in ['ReLU', 'Sigmoid', 'Tanh']:
             layer['class_name'] = 'Activation'
@@ -80,6 +84,11 @@ def parse_activation_layer(operation, layer_name, input_names, input_shapes, nod
             layer['activation'] = 'ThresholdedReLU'
         if 'dim' in node.kwargs:
             layer['axis'] = node.kwargs['dim']
+            if layer['class_name'] == 'Softmax' and layer['axis'] is None:
+                layer['axis'] = -1
+            if 'IOType' in config:
+                if layer['class_name'] == 'Softmax' and config['IOType'] == 'io_stream' and layer['axis'] != -1:
+                    raise Exception('dim needs to be -1 for io_stream')
 
     output_shape = input_shapes[0]
     return layer, output_shape
diff --git a/hls4ml/model/optimizer/passes/convert_to_channels_last.py b/hls4ml/model/optimizer/passes/convert_to_channels_last.py
@@ -94,7 +94,11 @@ def transform(self, model, node):
                 node.add_output_variable(shape, dims)
 
             # Have to transpose back before flattening to get correct order of elements in the flattened tensor
-            if isinstance(node, Reshape) and len(node.attributes['target_shape']) == 1:
+            if (
+                isinstance(node, Reshape)
+                and len(node.attributes['target_shape']) == 1
+                and not model.config.config['HLSConfig']['Model']['ChannelsLastConversion'] == "internal"
+            ):
                 previous_node = node.get_input_node(node.inputs[0])
                 input = previous_node.name
                 outshape = previous_node.get_output_variable().shape
diff --git a/test/pytest/test_pytorch_api.py b/test/pytest/test_pytorch_api.py
@@ -63,6 +63,7 @@ def test_linear(backend, io_type):
 @pytest.mark.parametrize(
     "activation_function",
     [
+        nn.Softmax(dim=-1),
         nn.ReLU(),
         nn.Tanh(),
         nn.LeakyReLU(negative_slope=1.0),
@@ -119,6 +120,14 @@ def forward(self, x):
         return nn.functional.relu(x)
 
 
+class SoftmaxModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return nn.functional.softmax(x, dim=-1)
+
+
 class TanHModel(nn.Module):
     def __init__(self):
         super().__init__()
@@ -162,6 +171,7 @@ def forward(self, x):
 @pytest.mark.parametrize(
     "activation_function",
     [
+        SoftmaxModel(),
         ReLuModel(),
         TanHModel(),
         LeakyReLuModel(),