-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
Copy pathrunner.py
76 lines (64 loc) · 2.64 KB
/
runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#
# SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import time
from collections import OrderedDict
from polygraphy import mod, util
from polygraphy.backend.base import BaseRunner
torch = mod.lazy_import("torch")
@mod.export()
class PytRunner(BaseRunner):
"""
Runs inference using PyTorch.
"""
def __init__(self, model, input_metadata, output_names, name=None):
"""
Args:
model (Union[torch.nn.Module, Callable() -> torch.nn.Module]):
A torch.nn.Module or subclass or a callable that returns one.
input_metadata (TensorMetadata): Mapping of input names to their data types and shapes.
output_names (List[str]):
A list of output names of the model. This information is used by the
Comparator to determine which outputs to compare.
name (str):
The human-readable name prefix to use for this runner.
A runner count and timestamp will be appended to this prefix.
"""
super().__init__(name=name, prefix="pytorch-runner")
self._model = model
self.input_metadata = input_metadata
self.output_names = output_names
def activate_impl(self):
self.model, _ = util.invoke_if_callable(self._model)
self.model.eval()
def get_input_metadata_impl(self):
return self.input_metadata
def infer_impl(self, feed_dict):
with torch.no_grad():
inputs = [
torch.from_numpy(val.astype(dtype)).cuda()
for (val, (dtype, _)) in zip(feed_dict.values(), self.input_metadata.values())
]
start = time.time()
outputs = self.model(*inputs)
end = time.time()
out_dict = OrderedDict()
for name, output in zip(self.output_names, outputs):
out_dict[name] = output.detach().cpu().numpy()
self.inference_time = end - start
return out_dict
def deactivate_impl(self):
del self.model