Skip to content

Commit 33e9e58

Browse files
rchinnaclaynerobison
authored andcommitted
Adding First version of Wide and Deep large dataset FP32 & INT8 model. (#195)
* Adding First version of Wide and Deep large dataset FP32 & INT8 model. Signed-off-by: Sneha Kola <[email protected]> Signed-off-by: Rajendrakumar Chinnaiyan <[email protected]> * Resolved all the PR comments. Signed-off-by: Rajendrakumar Chinnaiyan <[email protected]> * Removed FP32 and INT8 directories from wide_deep_large_ds models dir as both using same script Signed-off-by: Rajendrakumar Chinnaiyan <[email protected]>
1 parent 30cdb0d commit 33e9e58

File tree

16 files changed

+871
-0
lines changed

16 files changed

+871
-0
lines changed

benchmarks/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,6 @@ dependencies to be installed:
3030
| Object Detection | TensorFlow | R-FCN | Inference | [FP32](object_detection/tensorflow/rfcn/README.md#fp32-inference-instructions) |
3131
| Object Detection | TensorFlow | SSD-MobileNet | Inference | [FP32](object_detection/tensorflow/ssd-mobilenet/README.md#fp32-inference-instructions) |
3232
| Recommendation | TensorFlow | NCF | Inference | [FP32](recommendation/tensorflow/ncf/README.md#fp32-inference-instructions) |
33+
| Recommendation | TensorFlow | Wide & Deep Large Dataset | Inference | [Int8](recommendation/tensorflow/wide_deep_large_ds/README.md#int8-inference-instructions) [FP32](recommendation/tensorflow/wide_deep_large_ds/README.md#fp32-inference-instructions) |
3334
| Recommendation | TensorFlow | Wide & Deep | Inference | [FP32](recommendation/tensorflow/wide_deep/README.md#fp32-inference-instructions) |
3435
| Text-to-Speech | TensorFlow | WaveNet | Inference | [FP32](text_to_speech/tensorflow/wavenet/README.md#fp32-inference-instructions) |

benchmarks/common/tensorflow/start.sh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,44 @@ function wide_deep() {
615615
fi
616616
}
617617

618+
# Wide & Deep large dataset model
619+
function wide_deep_large_ds() {
620+
export PYTHONPATH=${PYTHONPATH}:$(pwd):${MOUNT_BENCHMARK}
621+
622+
# Depends on the Ubuntu version the ldpreload gets installed on various places.
623+
# Hence getting the best available one from ldconfig and setting it up
624+
625+
TCMALLOC_LIB="libtcmalloc.so.4"
626+
LIBTCMALLOC="$(ldconfig -p | grep $TCMALLOC_LIB | tr ' ' '\n' | grep /)"
627+
628+
if [[ -z "${LIBTCMALLOC}" ]]; then
629+
echo "libtcmalloc.so.4 not found, trying to install"
630+
apt-get update
631+
apt-get install google-perftools --fix-missing -y
632+
fi
633+
634+
LIBTCMALLOC="$(ldconfig -p | grep $TCMALLOC_LIB | tr ' ' '\n' | grep /)"
635+
echo $LIBTCMALLOC
636+
export LD_PRELOAD=$LIBTCMALLOC
637+
if [[ -z "${LIBTCMALLOC}" ]]; then
638+
echo "Failed to load $TCMALLOC_LIB"
639+
fi
640+
641+
# Dataset file is required, see README for more information.
642+
if [ "${DATASET_LOCATION_VOL}" == None ]; then
643+
echo "Wide & Deep requires --data-location arg to be defined"
644+
exit 1
645+
fi
646+
647+
if [ ${PRECISION} == "int8" ] || [ ${PRECISION} == "fp32" ]; then
648+
CMD="${CMD} --in-graph=${IN_GRAPH} --data-location=${DATASET_LOCATION}"
649+
PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model
650+
else
651+
echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}"
652+
exit 1
653+
fi
654+
}
655+
618656
LOGFILE=${OUTPUT_DIR}/${LOG_FILENAME}
619657
echo "Log output location: ${LOGFILE}"
620658

@@ -653,6 +691,8 @@ elif [ ${MODEL_NAME} == "wavenet" ]; then
653691
wavenet
654692
elif [ ${MODEL_NAME} == "wide_deep" ]; then
655693
wide_deep
694+
elif [ ${MODEL_NAME} == "wide_deep_large_ds" ]; then
695+
wide_deep_large_ds
656696
else
657697
echo "Unsupported model: ${MODEL_NAME}"
658698
exit 1
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
# Wide & Deep
2+
3+
This document has instructions for how to run Wide & Deep benchmark for the
4+
following modes/precisions:
5+
6+
* [INT8 inference](#int8-inference-instructions)
7+
* [FP32 inference](#fp32-inference-instructions)
8+
9+
Benchmarking instructions and scripts for model training coming later.
10+
11+
## INT8 Inference Instructions
12+
13+
14+
1. Download large <> dataset income dataset from <>:
15+
16+
To be updated post dataset approval
17+
18+
2. Pre-process the downloaded dataset to tfrecords using [preprocess_csv_tfrecords.py](/models/recommendation/tensorflow/wide_deep_large_ds/dataset/preprocess_csv_tfrecords.py)
19+
20+
```
21+
$ python3.6 preprocess_csv_tfrecords.py --csv-datafile eval.csv
22+
```
23+
24+
3. Clone the [intelai/models](https://github.com/intelai/models) repo.
25+
26+
This repo has the launch script for running benchmarks, which we will
27+
use in the next step.
28+
29+
```
30+
$ git clone https://github.com/IntelAI/models.git
31+
```
32+
4. How to run benchmarks
33+
34+
* Running benchmarks in latency mode, set `--batch-size 1`
35+
```
36+
$ cd /home/myuser/models/benchmarks
37+
38+
$ python launch_benchmark.py
39+
--model-name wide_deep_large_ds \
40+
--precision int8 \
41+
--mode inference \
42+
--framework tensorflow \
43+
--benchmark-only \
44+
--batch-size 1 \
45+
--socket-id 0 \
46+
--docker-image tensorflow/tensorflow:latest-mkl \
47+
--in-graph /root/user/wide_deep_files/int8_wide_deep_final.pb \
48+
--data-location /root/user/wide_deep_files/preprocessed_eval.tfrecords
49+
```
50+
* Running benchmarks in throughput mode, set `--batch-size 1024`
51+
```
52+
$ cd /home/myuser/models/benchmarks
53+
54+
$ python launch_benchmark.py
55+
--model-name wide_deep_large_ds \
56+
--precision int8 \
57+
--mode inference \
58+
--framework tensorflow \
59+
--benchmark-only \
60+
--batch-size 1024 \
61+
--socket-id 0 \
62+
--docker-image tensorflow/tensorflow:latest-mkl \
63+
--in-graph /root/user/wide_deep_files/int8_wide_deep_final.pb \
64+
--data-location /root/user/wide_deep_files/preprocessed_eval.tfrecords
65+
```
66+
6. The log file is saved to the value of `--output-dir`.
67+
68+
The tail of the log output when the benchmarking completes should look
69+
something like this:
70+
71+
```
72+
73+
--------------------------------------------------
74+
Total test records : 2000000
75+
No of correct predicitons : 1549508
76+
Batch size is : 1024
77+
Number of batches : 1954
78+
Classification accuracy (%) : 77.4754
79+
Inference duration (seconds) : 1.9765
80+
Latency (millisecond/batch) : 0.000988
81+
Throughput is (records/sec) : 1151892.25
82+
--------------------------------------------------
83+
numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/inference.py --input-graph=/in_graph/int8_wide_deep_final.pb --inter-op-parallelism-threads=28 --intra-op-parallelism-threads=1 --omp-num-threads=1 --batch-size=1024 --kmp-blocktime=0 --datafile-path=/dataset
84+
Ran inference with batch size 1024
85+
Log location outside container: {--output-dir value}/benchmark_wide_deep_large_ds_inference_int8_20190225_061815.log
86+
```
87+
88+
## FP32 Inference Instructions
89+
90+
1. Download large <> dataset income dataset from <>:
91+
92+
To be updated post dataset approval
93+
94+
2. Pre-process the downloaded dataset to tfrecords using [preprocess_csv_tfrecords.py](../../../../models/recommendation/tensorflow/wide_deep_large_ds/dataset/preprocess_csv_tfrecords.py)
95+
96+
```
97+
$ python3.6 preprocess_csv_tfrecords.py --csv-datafile eval.csv
98+
```
99+
3. Clone the [intelai/models](https://github.com/intelai/models) repo.
100+
101+
This repo has the launch script for running benchmarks, which we will
102+
use in the next step.
103+
104+
```
105+
$ git clone https://github.com/IntelAI/models.git
106+
```
107+
4. How to run benchmarks
108+
109+
* Running benchmarks in latency mode, set `--batch-size 1`
110+
```
111+
$ cd /home/myuser/models/benchmarks
112+
113+
$ python launch_benchmark.py
114+
--model-name wide_deep_large_ds \
115+
--precision fp32 \
116+
--mode inference \
117+
--framework tensorflow \
118+
--benchmark-only \
119+
--batch-size 1 \
120+
--socket-id 0 \
121+
--docker-image tensorflow/tensorflow:latest-mkl \
122+
--in-graph /root/user/wide_deep_files/fp32_wide_deep_final.pb \
123+
--data-location /root/user/wide_deep_files/preprocessed_eval.tfrecords
124+
```
125+
* Running benchmarks in throughput mode, set `--batch-size 1024`
126+
```
127+
$ cd /home/myuser/models/benchmarks
128+
129+
$ python launch_benchmark.py
130+
--model-name wide_deep_large_ds \
131+
--precision fp32 \
132+
--mode inference \
133+
--framework tensorflow \
134+
--benchmark-only \
135+
--batch-size 1024 \
136+
--socket-id 0 \
137+
--docker-image tensorflow/tensorflow:latest-mkl \
138+
--in-graph /root/user/wide_deep_files/fp32_wide_deep_final.pb \
139+
--data-location /root/user/wide_deep_files/preprocessed_eval.tfrecords
140+
```
141+
6. The log file is saved to the value of `--output-dir`.
142+
143+
The tail of the log output when the benchmarking completes should look
144+
something like this:
145+
146+
```
147+
148+
--------------------------------------------------
149+
Total test records : 2000000
150+
No of correct predicitons : 1550447
151+
Batch size is : 1024
152+
Number of batches : 1954
153+
Classification accuracy (%) : 77.5223
154+
Inference duration (seconds) : 3.4977
155+
Latency (millisecond/batch) : 0.001749
156+
Throughput is (records/sec) : 571802.228
157+
--------------------------------------------------
158+
numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/inference.py --input-graph=/in_graph/fp32_wide_deep_final.pb --inter-op-parallelism-threads=28 --intra-op-parallelism-threads=1 --omp-num-threads=1 --batch-size=1024 --kmp-blocktime=0 --datafile-path=/dataset
159+
Ran inference with batch size 1024
160+
Log location outside container: {--output-dir value}/benchmark_wide_deep_large_ds_inference_fp32_20190225_062206.log
161+
162+
```
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) 2018 Intel Corporation
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
# SPDX-License-Identifier: EPL-2.0
19+
#
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) 2018 Intel Corporation
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
# SPDX-License-Identifier: EPL-2.0
19+
#
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) 2018 Intel Corporation
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
# SPDX-License-Identifier: EPL-2.0
19+
#
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) 2018 Intel Corporation
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
# SPDX-License-Identifier: EPL-2.0
19+
#
20+
21+
from __future__ import absolute_import
22+
from __future__ import division
23+
from __future__ import print_function
24+
from common.base_model_init import BaseModelInitializer
25+
from common.base_model_init import set_env_var
26+
27+
import os
28+
import time
29+
30+
31+
class ModelInitializer(BaseModelInitializer):
32+
"""initialize mode and run benchmark"""
33+
34+
def __init__(self, args, custom_args, platform_util=None):
35+
super(ModelInitializer, self).__init__(args, custom_args, platform_util)
36+
37+
self.benchmark_command = "" # use default batch size if -1
38+
self.results_file_path = ""
39+
40+
if self.args.batch_size == -1:
41+
self.args.batch_size = 1024
42+
43+
# get number of cores if there is no core info passed in.
44+
if self.args.num_cores == -1:
45+
self.args.num_cores = self.platform_util.num_cores_per_socket()
46+
47+
num_of_parallel_batches = self.platform_util.num_cores_per_socket()
48+
kmp_blocktime = "0"
49+
# Set KMP env vars, if they haven't already been set
50+
self.set_kmp_vars(kmp_settings="1", kmp_blocktime=kmp_blocktime,
51+
kmp_affinity="noverbose,warnings,respect,granularity=core,none")
52+
# set num_inter_threads and num_intra_threads
53+
self.set_num_inter_intra_threads(num_inter_threads=self.args.num_cores, num_intra_threads=1)
54+
55+
benchmark_script = os.path.join(
56+
self.args.intelai_models,
57+
self.args.mode,
58+
"inference.py")
59+
60+
self.benchmark_command = self.get_numactl_command(self.args.socket_id)\
61+
+ "python " + benchmark_script
62+
63+
set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads)
64+
self.benchmark_command += " --input-graph=" + \
65+
self.args.input_graph + \
66+
" --inter-op-parallelism-threads=" + \
67+
str(self.args.num_inter_threads) + \
68+
" --intra-op-parallelism-threads=" + \
69+
str(self.args.num_intra_threads) + \
70+
" --omp-num-threads=" + \
71+
str(self.args.num_intra_threads) + \
72+
" --batch-size=" + \
73+
str(self.args.batch_size) + \
74+
" --num-of-parallel-batches=" + \
75+
str(num_of_parallel_batches) + \
76+
" --kmp-blocktime=" + \
77+
str(kmp_blocktime)
78+
79+
# if the data location directory is not empty, then include the arg
80+
if self.args.data_location:
81+
self.benchmark_command += " --datafile-path=" + \
82+
self.args.data_location
83+
# if output results is enabled, generate a results file name and pass it to the inference script
84+
if self.args.output_results:
85+
self.results_filename = "{}_{}_{}_results_{}.txt".format(
86+
self.args.model_name, self.args.precision, self.args.mode,
87+
time.strftime("%Y%m%d_%H%M%S", time.gmtime()))
88+
self.results_file_path = os.path.join(self.args.output_dir, self.results_filename)
89+
self.benchmark_command += " --results-file-path {}".format(self.results_file_path)
90+
91+
def run(self):
92+
if self.benchmark_command:
93+
print(self.benchmark_command)
94+
self.run_command(self.benchmark_command)
95+
if self.results_file_path:
96+
print("Inference results file in the output directory: {}".format(self.results_filename))

0 commit comments

Comments
 (0)