intel
diff --git a/‎benchmarks/README.md‎
Lines changed: 1 addition & 0 deletions b/‎benchmarks/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎benchmarks/common/tensorflow/start.sh‎
Lines changed: 40 additions & 0 deletions b/‎benchmarks/common/tensorflow/start.sh‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md‎
Lines changed: 162 additions & 0 deletions b/‎benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md‎
Lines changed: 162 additions & 0 deletions
diff --git a/‎benchmarks/recommendation/tensorflow/wide_deep_large_ds/__init__.py‎
Lines changed: 19 additions & 0 deletions b/‎benchmarks/recommendation/tensorflow/wide_deep_large_ds/__init__.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/__init__.py‎
Lines changed: 19 additions & 0 deletions b/‎benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/__init__.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/__init__.py‎
Lines changed: 19 additions & 0 deletions b/‎benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/__init__.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py‎
Lines changed: 96 additions & 0 deletions b/‎benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py‎
Lines changed: 96 additions & 0 deletions
@@ -30,5 +30,6 @@ dependencies to be installed:
 | Object Detection       | TensorFlow    | R-FCN               | Inference | [FP32](object_detection/tensorflow/rfcn/README.md#fp32-inference-instructions) |
 | Object Detection       | TensorFlow    | SSD-MobileNet       | Inference | [FP32](object_detection/tensorflow/ssd-mobilenet/README.md#fp32-inference-instructions) |
 | Recommendation         | TensorFlow    | NCF                 | Inference | [FP32](recommendation/tensorflow/ncf/README.md#fp32-inference-instructions) |
+| Recommendation         | TensorFlow    | Wide & Deep Large Dataset	| Inference | [Int8](recommendation/tensorflow/wide_deep_large_ds/README.md#int8-inference-instructions) [FP32](recommendation/tensorflow/wide_deep_large_ds/README.md#fp32-inference-instructions) |
 | Recommendation         | TensorFlow    | Wide & Deep         | Inference | [FP32](recommendation/tensorflow/wide_deep/README.md#fp32-inference-instructions) |
 | Text-to-Speech         | TensorFlow    | WaveNet             | Inference | [FP32](text_to_speech/tensorflow/wavenet/README.md#fp32-inference-instructions) |
@@ -615,6 +615,44 @@ function wide_deep() {
     fi
 }
 
+# Wide & Deep large dataset model
+function wide_deep_large_ds() {
+    export PYTHONPATH=${PYTHONPATH}:$(pwd):${MOUNT_BENCHMARK}
+
+    # Depends on the Ubuntu version the ldpreload gets installed on various places.
+    # Hence getting the best available one from ldconfig and setting it up
+
+    TCMALLOC_LIB="libtcmalloc.so.4"
+    LIBTCMALLOC="$(ldconfig -p | grep $TCMALLOC_LIB | tr ' ' '\n' | grep /)"
+
+    if [[ -z "${LIBTCMALLOC}" ]]; then
+      echo "libtcmalloc.so.4 not found, trying to install"
+      apt-get update
+      apt-get install google-perftools --fix-missing -y
+    fi
+
+    LIBTCMALLOC="$(ldconfig -p | grep $TCMALLOC_LIB | tr ' ' '\n' | grep /)"
+    echo $LIBTCMALLOC
+    export LD_PRELOAD=$LIBTCMALLOC
+    if [[ -z "${LIBTCMALLOC}" ]]; then
+      echo "Failed to load $TCMALLOC_LIB"
+    fi
+
+    # Dataset file is required, see README for more information.
+    if [ "${DATASET_LOCATION_VOL}" == None ]; then
+      echo "Wide & Deep requires --data-location arg to be defined"
+      exit 1
+    fi
+
+    if [ ${PRECISION} == "int8" ] ||  [ ${PRECISION} == "fp32" ]; then
+        CMD="${CMD} --in-graph=${IN_GRAPH} --data-location=${DATASET_LOCATION}"
+        PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model
+    else
+        echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}"
+        exit 1
+    fi
+}
+
 LOGFILE=${OUTPUT_DIR}/${LOG_FILENAME}
 echo "Log output location: ${LOGFILE}"
 
@@ -653,6 +691,8 @@ elif [ ${MODEL_NAME} == "wavenet" ]; then
   wavenet
 elif [ ${MODEL_NAME} == "wide_deep" ]; then
   wide_deep
+elif [ ${MODEL_NAME} == "wide_deep_large_ds" ]; then
+  wide_deep_large_ds  
 else
   echo "Unsupported model: ${MODEL_NAME}"
   exit 1
 
@@ -0,0 +1,162 @@
+# Wide & Deep
+
+This document has instructions for how to run Wide & Deep benchmark for the
+following modes/precisions:
+
+* [INT8 inference](#int8-inference-instructions)
+* [FP32 inference](#fp32-inference-instructions)
+
+Benchmarking instructions and scripts for model training coming later.
+
+## INT8 Inference Instructions
+
+ 
+1. Download large <> dataset income dataset from <>: 
+   
+   To be updated post dataset approval
+       
+2. Pre-process the downloaded dataset to tfrecords using [preprocess_csv_tfrecords.py](/models/recommendation/tensorflow/wide_deep_large_ds/dataset/preprocess_csv_tfrecords.py)
+   
+    ```
+	$ python3.6 preprocess_csv_tfrecords.py --csv-datafile eval.csv 
+    ```
+   
+3. Clone the [intelai/models](https://github.com/intelai/models) repo.
+
+   This repo has the launch script for running benchmarks, which we will
+   use in the next step.
+
+    ```
+    $ git clone https://github.com/IntelAI/models.git
+    ```
+4. How to run benchmarks
+
+   * Running benchmarks in latency mode, set `--batch-size 1`
+       ``` 
+       $ cd /home/myuser/models/benchmarks
+
+       $ python launch_benchmark.py 
+            --model-name wide_deep_large_ds \
+            --precision int8 \
+            --mode inference \
+            --framework tensorflow \
+            --benchmark-only \
+            --batch-size 1 \
+            --socket-id 0 \
+            --docker-image tensorflow/tensorflow:latest-mkl \
+            --in-graph /root/user/wide_deep_files/int8_wide_deep_final.pb \
+            --data-location /root/user/wide_deep_files/preprocessed_eval.tfrecords 
+       ```
+   * Running benchmarks in throughput mode, set `--batch-size 1024`
+       ``` 
+       $ cd /home/myuser/models/benchmarks
+    
+        $ python launch_benchmark.py 
+            --model-name wide_deep_large_ds \
+            --precision int8 \
+            --mode inference \
+            --framework tensorflow \
+            --benchmark-only \
+            --batch-size 1024 \
+            --socket-id 0 \
+            --docker-image tensorflow/tensorflow:latest-mkl \
+            --in-graph /root/user/wide_deep_files/int8_wide_deep_final.pb \
+            --data-location /root/user/wide_deep_files/preprocessed_eval.tfrecords 
+       ```
+6. The log file is saved to the value of `--output-dir`.
+
+   The tail of the log output when the benchmarking completes should look
+   something like this:
+
+    ```
+    
+    --------------------------------------------------
+    Total test records           :  2000000
+    No of correct predicitons    :  1549508
+    Batch size is                :  1024
+    Number of batches            :  1954
+    Classification accuracy (%)  :  77.4754
+    Inference duration (seconds) :  1.9765
+    Latency (millisecond/batch)  :  0.000988
+    Throughput is (records/sec)  :  1151892.25
+    --------------------------------------------------
+    numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/inference.py --input-graph=/in_graph/int8_wide_deep_final.pb --inter-op-parallelism-threads=28 --intra-op-parallelism-threads=1 --omp-num-threads=1 --batch-size=1024 --kmp-blocktime=0 --datafile-path=/dataset
+    Ran inference with batch size 1024
+    Log location outside container:  {--output-dir value}/benchmark_wide_deep_large_ds_inference_int8_20190225_061815.log
+    ```
+
+## FP32 Inference Instructions
+
+1. Download large <> dataset income dataset from <>: 
+   
+   To be updated post dataset approval
+       
+2. Pre-process the downloaded dataset to tfrecords using [preprocess_csv_tfrecords.py](../../../../models/recommendation/tensorflow/wide_deep_large_ds/dataset/preprocess_csv_tfrecords.py)
+   
+   ```
+    $ python3.6 preprocess_csv_tfrecords.py --csv-datafile eval.csv 
+   ```
+3. Clone the [intelai/models](https://github.com/intelai/models) repo.
+
+   This repo has the launch script for running benchmarks, which we will
+   use in the next step.
+
+    ```
+    $ git clone https://github.com/IntelAI/models.git
+    ```
+4. How to run benchmarks
+
+   * Running benchmarks in latency mode, set `--batch-size 1`
+       ``` 
+       $ cd /home/myuser/models/benchmarks
+
+       $ python launch_benchmark.py 
+            --model-name wide_deep_large_ds \
+            --precision fp32 \
+            --mode inference \
+            --framework tensorflow \
+            --benchmark-only \
+            --batch-size 1 \
+            --socket-id 0 \
+            --docker-image tensorflow/tensorflow:latest-mkl \
+            --in-graph /root/user/wide_deep_files/fp32_wide_deep_final.pb \
+            --data-location /root/user/wide_deep_files/preprocessed_eval.tfrecords 
+       ```
+   * Running benchmarks in throughput mode, set `--batch-size 1024`
+       ``` 
+       $ cd /home/myuser/models/benchmarks
+    
+        $ python launch_benchmark.py 
+            --model-name wide_deep_large_ds \
+            --precision fp32 \
+            --mode inference \
+            --framework tensorflow \
+            --benchmark-only \
+            --batch-size 1024 \
+            --socket-id 0 \
+            --docker-image tensorflow/tensorflow:latest-mkl \
+            --in-graph /root/user/wide_deep_files/fp32_wide_deep_final.pb \
+            --data-location /root/user/wide_deep_files/preprocessed_eval.tfrecords 
+       ```
+6. The log file is saved to the value of `--output-dir`.
+
+   The tail of the log output when the benchmarking completes should look
+   something like this:
+
+    ```
+
+    --------------------------------------------------
+    Total test records           :  2000000
+    No of correct predicitons    :  1550447
+    Batch size is                :  1024
+    Number of batches            :  1954
+    Classification accuracy (%)  :  77.5223
+    Inference duration (seconds) :  3.4977
+    Latency (millisecond/batch)  :  0.001749
+    Throughput is (records/sec)  :  571802.228
+    --------------------------------------------------
+    numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/inference.py --input-graph=/in_graph/fp32_wide_deep_final.pb --inter-op-parallelism-threads=28 --intra-op-parallelism-threads=1 --omp-num-threads=1 --batch-size=1024 --kmp-blocktime=0 --datafile-path=/dataset
+    Ran inference with batch size 1024
+    Log location outside container: {--output-dir value}/benchmark_wide_deep_large_ds_inference_fp32_20190225_062206.log
+    
+    ```
@@ -0,0 +1,19 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: EPL-2.0
+#
@@ -0,0 +1,19 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: EPL-2.0
+#
@@ -0,0 +1,19 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: EPL-2.0
+#
@@ -0,0 +1,96 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: EPL-2.0
+#
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from common.base_model_init import BaseModelInitializer
+from common.base_model_init import set_env_var
+
+import os
+import time
+
+
+class ModelInitializer(BaseModelInitializer):
+    """initialize mode and run benchmark"""
+
+    def __init__(self, args, custom_args, platform_util=None):
+        super(ModelInitializer, self).__init__(args, custom_args, platform_util)
+
+        self.benchmark_command = ""  # use default batch size if -1
+        self.results_file_path = ""
+
+        if self.args.batch_size == -1:
+            self.args.batch_size = 1024
+
+        # get number of cores if there is no core info passed in.
+        if self.args.num_cores == -1:
+            self.args.num_cores = self.platform_util.num_cores_per_socket()
+
+        num_of_parallel_batches = self.platform_util.num_cores_per_socket()
+        kmp_blocktime = "0"
+        # Set KMP env vars, if they haven't already been set
+        self.set_kmp_vars(kmp_settings="1", kmp_blocktime=kmp_blocktime,
+                          kmp_affinity="noverbose,warnings,respect,granularity=core,none")
+        # set num_inter_threads and num_intra_threads
+        self.set_num_inter_intra_threads(num_inter_threads=self.args.num_cores, num_intra_threads=1)
+
+        benchmark_script = os.path.join(
+            self.args.intelai_models,
+            self.args.mode,
+            "inference.py")
+
+        self.benchmark_command = self.get_numactl_command(self.args.socket_id)\
+            + "python " + benchmark_script
+
+        set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads)
+        self.benchmark_command += " --input-graph=" + \
+                                  self.args.input_graph + \
+                                  " --inter-op-parallelism-threads=" + \
+                                  str(self.args.num_inter_threads) + \
+                                  " --intra-op-parallelism-threads=" + \
+                                  str(self.args.num_intra_threads) + \
+                                  " --omp-num-threads=" + \
+                                  str(self.args.num_intra_threads) + \
+                                  " --batch-size=" + \
+                                  str(self.args.batch_size) + \
+                                  " --num-of-parallel-batches=" + \
+                                  str(num_of_parallel_batches) + \
+                                  " --kmp-blocktime=" + \
+                                  str(kmp_blocktime)
+
+        # if the data location directory is not empty, then include the arg
+        if self.args.data_location:
+            self.benchmark_command += " --datafile-path=" + \
+                                      self.args.data_location
+            # if output results is enabled, generate a results file name and pass it to the inference script
+            if self.args.output_results:
+                self.results_filename = "{}_{}_{}_results_{}.txt".format(
+                    self.args.model_name, self.args.precision, self.args.mode,
+                    time.strftime("%Y%m%d_%H%M%S", time.gmtime()))
+                self.results_file_path = os.path.join(self.args.output_dir, self.results_filename)
+                self.benchmark_command += " --results-file-path {}".format(self.results_file_path)
+
+    def run(self):
+        if self.benchmark_command:
+            print(self.benchmark_command)
+            self.run_command(self.benchmark_command)
+            if self.results_file_path:
+                print("Inference results file in the output directory: {}".format(self.results_filename))