diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst index 0f4e02abfe7452..87f835991f3697 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst @@ -3,7 +3,8 @@ Most Efficient Large Language Models for AI PC This page is regularly updated to help you identify the best-performing LLMs on the Intel® Core™ Ultra processor family and AI PCs. -The current data is as of OpenVINO 2024.6, 13 Dec. 2024. +The current data is as of OpenVINO 2025.0, 06 March 2025 (7-155H and 7-268V) +and OpenVINO 2024.6, 13 Dec. 2024 (9-288V). The tables below list the key performance indicators for inference on built-in GPUs. diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv index 9481b5619244e2..81ac6b4d4a3f9d 100644 --- a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv +++ b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv @@ -1,96 +1,147 @@ -Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd tok/sec -opt-125m-gptq,INT4-MIXED,32,1116,25.8,8.1,123.5 -opt-125m-gptq,INT4-MIXED,1024,1187.1,75.2,8.2,122.0 -qwen2-0.5b,INT4-MIXED,32,1587.4,45.1,15.4,64.9 -qwen2-0.5b,INT4-MIXED,1024,1587.8,228.2,15.6,64.1 -tiny-llama-1.1b-chat,INT4-MIXED,32,1704.2,42.4,17.6,56.8 -tiny-llama-1.1b-chat,INT4-MIXED,1024,1616.3,489.2,18.9,52.9 -qwen2-0.5b,INT8-CW,32,1477.3,51.5,20.2,49.5 -qwen2-0.5b,INT8-CW,1024,1592,263.7,20.6,48.5 -tiny-llama-1.1b-chat,INT8-CW,32,1855.6,60.2,20.7,48.3 -tiny-llama-1.1b-chat,INT8-CW,1024,1992.6,618.2,21.7,46.1 -qwen2-1.5b,INT4-MIXED,32,2024.2,59.6,23.1,43.3 -bloomz-560m,FP16,1024,2773.1,647.8,23.8,42.0 -qwen2-1.5b,INT4-MIXED,1024,2177.7,577.4,23.8,42.0 -bloomz-560m,FP16,32,2582.7,44.2,25.1,39.8 -dolly-v2-3b,INT4-MIXED,32,2507.9,79.8,29.4,34.0 -phi-2,INT4-MIXED,32,2568.9,74.6,29.7,33.7 -qwen2-1.5b,INT8-CW,32,2577.3,81.6,30.5,32.8 -red-pajama-incite-chat-3b-v1,INT4-MIXED,32,2489.4,69.9,30.5,32.8 -minicpm-1b-sft,INT4-MIXED,31,2442.1,84.7,31,32.3 -qwen2-1.5b,INT8-CW,1024,2739.8,773.3,31.2,32.1 -gemma-2b-it,INT4-MIXED,32,2998.2,103.5,31.4,31.8 -dolly-v2-3b,INT4-MIXED,1024,2508.1,1396.6,32,31.3 -gemma-2b-it,INT4-MIXED,1024,3171.5,822.3,32.2,31.1 -phi-2,INT4-MIXED,1024,2940.5,1395.3,32.2,31.1 -red-pajama-incite-chat-3b-v1,INT4-MIXED,1023,2489.6,1435.5,33.1,30.2 -minicpm-1b-sft,INT8-CW,31,2818.6,86.9,33.4,29.9 -stable-zephyr-3b-dpo,INT4-MIXED,32,2638.2,87.4,33.8,29.6 -stablelm-3b-4e1t,INT4-MIXED,32,2750.5,89.4,35.6,28.1 -stablelm-3b-4e1t,INT4-MIXED,1023,3115.5,1473.1,38.1,26.2 -phi-3-mini-4k-instruct,INT4-MIXED,32,3039.1,109.2,40.4,24.8 -phi-2,INT8-CW,32,3599.7,107.5,42.1,23.8 -gemma-2b-it,INT8-CW,32,3845.4,111.3,42.2,23.7 -dolly-v2-3b,INT8-CW,32,3596.4,110.1,42.5,23.5 -gemma-2b-it,INT8-CW,1024,3844.6,1183,43,23.3 -red-pajama-incite-chat-3b-v1,INT8-CW,32,3590,111,43.3,23.1 -phi-3-mini-4k-instruct,INT4-MIXED,1024,3467.6,1721.6,43.5,23.0 -stablelm-3b-4e1t,INT8-CW,32,3582.8,111,44.3,22.6 -stable-zephyr-3b-dpo,INT8-CW,32,3607.2,110.2,44.5,22.5 -phi-2,INT8-CW,1024,3982,1508,44.6,22.4 -dolly-v2-3b,INT8-CW,1024,3596.5,1529.1,44.9,22.3 -minicpm-1b-sft,FP16,31,3769.9,84,45.4,22.0 -red-pajama-incite-chat-3b-v1,INT8-CW,1023,3952,2064.5,45.7,21.9 -stablelm-3b-4e1t,INT8-CW,1023,3934.5,2286.3,46.8,21.4 -gpt-j-6b,INT4-MIXED,32,4443.5,159.3,56.7,17.6 -phi-3-mini-4k-instruct,INT8-CW,32,4545,117.1,57.6,17.4 -phi-3-mini-4k-instruct,INT8-CW,1024,4810.4,2068.8,60.5,16.5 -gpt-j-6b,INT4-MIXED,1024,4746.4,2397,60.6,16.5 -falcon-7b-instruct,INT4-MIXED,32,5014,203.7,61.3,16.3 -qwen2-7b,INT4-MIXED,32,5269.4,203.8,62.3,16.1 -codegen25-7b,INT4-MIXED,32,4641.1,170.6,63.5,15.7 -llama-2-7b-gptq,INT4-MIXED,32,4597.3,172.1,63.5,15.7 -falcon-7b-instruct,INT4-MIXED,1024,5230.6,2695.3,63.6,15.7 -qwen2-7b,INT4-MIXED,1024,5370.8,2505.9,63.9,15.6 -decilm-7b-instruct,INT4-MIXED,36,4614.2,301.1,65.3,15.3 -codegen25-7b,INT4-MIXED,1024,4641.9,2629.6,67.4,14.8 -llama-2-7b-gptq,INT4-MIXED,1024,4928.1,2584.3,67.6,14.8 -mistral-7b-v0.1,INT4-MIXED,32,4928.5,180.9,69.2,14.5 -llama-2-7b-chat-hf,INT4-MIXED,32,4985.7,160.3,69.5,14.4 -qwen-7b-chat-gptq,INT4-MIXED,32,5426.7,188.3,69.5,14.4 -llama-3-8b,INT4-MIXED,33,5473.4,285.7,70,14.3 -flan-t5-xxl,INT4-MIXED,33,19293.8,211.7,70.1,14.3 -llama-3-8b,INT4-MIXED,33,5389.2,281,70.8,14.1 -mistral-7b-v0.1,INT4-MIXED,1024,5225.4,2713.3,71.8,13.9 -zephyr-7b-beta,INT4-MIXED,32,5306.1,177.9,72.1,13.9 -llama-3-8b,INT4-MIXED,1025,5615.2,2937.8,72.4,13.8 -llama-3-8b,INT4-MIXED,1025,5531.7,2815.4,73.2,13.7 -llama-2-7b-chat-hf,INT4-MIXED,1024,5319.5,2736.2,73.6,13.6 -phi-2,FP16,32,6197,104.6,74.7,13.4 -zephyr-7b-beta,INT4-MIXED,1024,5306.4,2802.3,74.7,13.4 -qwen-7b-chat-gptq,INT4-MIXED,1024,5934.9,2606.9,75,13.3 -dolly-v2-3b,FP16,32,6195.1,105.3,75.3,13.3 -baichuan2-7b-chat,INT4-MIXED,32,5837.9,188.5,76.8,13.0 -red-pajama-incite-chat-3b-v1,FP16,32,6178.6,118,76.8,13.0 -gemma-7b-it,INT4-MIXED,32,6495.9,230.6,77,13.0 -stablelm-3b-4e1t,FP16,32,6174.2,105.9,77.1,13.0 -stable-zephyr-3b-dpo,FP16,32,6217.8,107.9,77.2,13.0 -glm-4-9b-chat,INT4-MIXED,32,6333.4,225,77.3,12.9 -phi-2,FP16,1024,6411.5,2065.2,77.3,12.9 -dolly-v2-3b,FP16,1024,6410.1,2075,77.7,12.9 -llama-3.1-8b,INT4-MIXED,32,6324.6,182.2,78.8,12.7 -red-pajama-incite-chat-3b-v1,FP16,1023,6394.2,2752.4,79.2,12.6 -stablelm-3b-4e1t,FP16,1023,6386.9,2953.3,79.5,12.6 -glm-4-9b-chat,INT4-MIXED,1024,6439.5,3282.2,80,12.5 -baichuan2-7b-chat,INT4-MIXED,1024,6174.1,2752.6,80.6,12.4 -gemma-7b-it,INT4-MIXED,1024,6795.4,3118.3,80.6,12.4 -llama-3.1-8b,INT4-MIXED,1024,6324.8,2865.7,81.3,12.3 -gpt-j-6b,INT8-CW,32,6793.2,167.6,85,11.8 -qwen-7b-chat,INT4-MIXED,32,7274.8,168.8,85.2,11.7 -gpt-j-6b,INT8-CW,1024,6793.3,2668.4,88.8,11.3 -qwen-7b-chat,INT4-MIXED,1024,7610.3,2991.9,90.6,11.0 -flan-t5-xxl,INT4-MIXED,1139,23514,540.8,94.9,10.5 -falcon-7b-instruct,INT8-CW,32,7764.1,181.3,95.5,10.5 -llama-2-7b-chat-hf,INT8-CW,32,7330.9,172,96.1,10.4 -falcon-7b-instruct,INT8-CW,1024,7987.4,3072.8,98.1,10.2 -qwen2-7b,INT8-CW,32,8175.3,211.3,99.6,10.0 +Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd token per sec (2nd lat^(-1)),,, +bloomz-560m,INT4,32,2123,36.1,12.5,80,,, +bloomz-560m,INT4,1024,2123.6,195,13.7,72.99270073,,, +tiny-llama-1.1b-chat,INT4,32,2249.2,36.8,13.9,71.94244604,,, +tiny-llama-1.1b-chat,INT4,1024,2249.9,427.8,15,66.66666667,,, +qwen2-0.5b,INT4,32,1800.7,44.7,15.4,64.93506494,,, +bloomz-560m,INT8,32,2273.5,39.5,15.4,64.93506494,,, +qwen2-0.5b,INT4,1024,1801.1,185.9,15.5,64.51612903,,, +bloomz-560m,INT8,1024,2471.6,213.3,15.8,63.29113924,,, +qwen2-0.5b,INT8,32,2000.1,37.9,18.2,54.94505495,,, +qwen2-0.5b,INT8,1024,2135.9,218,18.7,53.47593583,,, +bloomz-560m,FP16,32,3069.2,39.1,19.7,50.76142132,,, +qwen2-1.5b,INT4,32,2750.3,47.6,20,50,,, +tiny-llama-1.1b-chat,INT8,32,2441.6,49.4,20.5,48.7804878,,, +qwen2-1.5b,INT4,1024,2575.9,531.2,20.9,47.84688995,,, +bloomz-560m,FP16,1024,3057.5,232.7,21,47.61904762,,, +tiny-llama-1.1b-chat,INT8,1024,2431.7,523.6,21.5,46.51162791,,, +dolly-v2-3b,INT4,32,3178.8,75.4,27.1,36.900369,,, +minicpm-1b-sft,INT4,31,3131.5,74,27.6,36.23188406,,, +red-pajama-incite-chat-3b-v1,INT4,32,3057.5,67.1,27.6,36.23188406,,, +gemma-2b-it,INT4,32,3460.7,97.9,28.5,35.0877193,,, +minicpm-1b-sft,INT4,1014,3132,732.4,29,34.48275862,,, +qwen2-1.5b,INT8,32,3126.4,77.4,29.3,34.12969283,,, +gemma-2b-it,INT4,1024,3461.4,796.3,29.4,34.01360544,,, +qwen2-1.5b,INT8,1024,3126.8,660.3,30.1,33.22259136,,, +dolly-v2-3b,INT4,1024,3179,1171.9,31.8,31.44654088,,, +minicpm-1b-sft,INT8,31,3496,77.9,31.9,31.34796238,,, +red-pajama-incite-chat-3b-v1,INT4,1023,3057.7,1211,32.8,30.48780488,,, +minicpm-1b-sft,INT8,1014,3433.2,783.7,33.6,29.76190476,,, +phi-3-mini-4k-instruct,INT4,32,3534.8,96.6,36.6,27.32240437,,, +red-pajama-incite-chat-3b-v1,INT8,32,4099.8,107.3,42.3,23.64066194,,, +gemma-2b-it,INT8,32,4478.7,103.1,42.4,23.58490566,,, +minicpm-1b-sft,FP16,31,4157.5,75.7,42.7,23.41920375,,, +phi-3-mini-4k-instruct,INT4,1024,3535.3,1521.7,42.8,23.36448598,,, +dolly-v2-3b,INT8,32,4143.7,102,43.1,23.20185615,,, +gemma-2b-it,INT8,1024,4478.9,936.2,43.3,23.09468822,,, +minicpm-1b-sft,FP16,1014,4329.7,876.6,44.8,22.32142857,,, +red-pajama-incite-chat-3b-v1,INT8,1023,4412.8,1815.9,44.9,22.27171492,,, +dolly-v2-3b,INT8,1024,4143.8,1276.4,45.6,21.92982456,,, +chatglm3-6b,INT4,32,4746.8,149.6,50.6,19.76284585,,, +chatglm3-6b,INT4,1024,4747,2279.1,52.6,19.01140684,,, +flan-t5-xxl,INT4,33,13681.2,91.7,53.6,18.65671642,,, +phi-3-mini-4k-instruct,INT8,32,5041.3,110.9,56.9,17.57469244,,, +llama-2-7b-gptq,INT4,32,5115.9,168.1,57.8,17.30103806,,, +chatglm3-6b-gptq,INT4,32,5371.4,159.5,57.8,17.30103806,,, +decilm-7b-instruct,INT4,36,5415.9,230.5,58,17.24137931,,, +codegen25-7b,INT4,32,5110.5,161,59.1,16.92047377,,, +flan-t5-xxl,INT4,1139,16627.6,455.8,59.3,16.86340641,,, +qwen2-7b,INT4,32,5802.2,173.2,60.1,16.63893511,,, +phi-3-mini-4k-instruct,INT8,1024,5041.7,1812.4,60.2,16.61129568,,, +chatglm3-6b-gptq,INT4,1024,5748.7,2236,60.2,16.61129568,,, +falcon-7b-instruct,INT4,32,5495.1,181.3,60.3,16.58374793,,, +decilm-7b-instruct,INT4,1091,5237.4,2995.4,60.9,16.42036125,,, +qwen2-7b,INT4,1024,5758.2,2445.4,61.9,16.15508885,,, +falcon-7b-instruct,INT4,1024,5682.7,2718.5,62.6,15.97444089,,, +codegen25-7b,INT4,1024,5513.9,2500.7,63.2,15.82278481,,, +mistral-7b-v0.1,INT4,32,5475.8,178.5,64.7,15.45595054,,, +qwen-7b-chat-gptq,INT4,32,6115.4,174.2,64.8,15.43209877,,, +llama-3-8b,INT4,33,5964.2,238.4,65.2,15.33742331,,, +llama-3-8b,INT4,33,5870.5,239.8,65.3,15.31393568,,, +llama-2-7b-chat-hf,INT4,32,5493.5,157.4,65.4,15.29051988,,, +llama-2-7b-gptq,INT4,1024,5802.7,2547.3,65.4,15.29051988,,, +mistral-7b-v0.1,INT4,1024,5476,2684.8,67.2,14.88095238,,, +llama-3-8b,INT4,1025,6163.2,2842.9,67.6,14.79289941,,, +zephyr-7b-beta,INT4,32,5739.1,177.4,67.7,14.77104874,,, +llama-3-8b,INT4,1025,6069.4,2741.8,67.8,14.74926254,,, +llama-2-7b-chat-hf,INT4,1024,5494,2500.3,69.5,14.38848921,,, +zephyr-7b-beta,INT4,1024,5739.7,2671.4,71,14.08450704,,, +qwen-7b-chat-gptq,INT4,1024,6646.3,2596.9,73,13.69863014,,, +baichuan2-7b-chat,INT4,32,6385.1,159.5,73.1,13.67989056,,, +gemma-7b-it,INT4,32,7297.7,221.9,73.7,13.56852103,,, +dolly-v2-3b,FP16,32,6652.1,107.1,74.2,13.47708895,,, +red-pajama-incite-chat-3b-v1,FP16,32,6640.8,103.1,74.7,13.38688086,,, +llama-3.1-8b,INT4,32,6797.5,182.7,76.3,13.1061599,,, +glm-4-9b-chat,INT4,32,6805.1,215.5,76.4,13.08900524,,, +baichuan2-7b-chat,INT4,1024,6385.5,2597,77.3,12.93661061,,, +gemma-7b-it,INT4,1024,6974.7,3126,77.5,12.90322581,,, +dolly-v2-3b,FP16,1024,6652.2,1542.4,78.7,12.7064803,,, +red-pajama-incite-chat-3b-v1,FP16,1023,7120.4,2490.4,79.3,12.61034048,,, +llama-3.1-8b,INT4,1024,7114,2807.6,79.7,12.54705144,,, +glm-4-9b-chat,INT4,1024,6805.2,3197,79.7,12.54705144,,, +qwen-7b-chat,INT4,32,7255.7,156.2,81.2,12.31527094,,, +chatglm3-6b,INT8,32,7308.6,154.4,85.1,11.75088132,,, +qwen-7b-chat,INT4,1024,7827.7,2693.7,86.6,11.54734411,,, +chatglm3-6b,INT8,1024,7308.9,2486,87.4,11.4416476,,, +flan-t5-xxl,INT8,33,20923.9,170.5,91.7,10.90512541,,, +llama-2-7b-chat-hf,INT8,32,7838.4,157.9,94.8,10.54852321,,, +falcon-7b-instruct,INT8,32,8250,175.3,95.1,10.51524711,,, +codegen25-7b,INT8,32,7996.9,162.7,95.7,10.44932079,,, +falcon-7b-instruct,INT8,1024,8445.4,3055.4,97.5,10.25641026,,, +flan-t5-xxl,INT8,1139,24095.3,571.2,97.6,10.24590164,,, +qwen2-7b,INT8,32,8542.4,185.5,98.2,10.18329939,,, +llama-2-7b-chat-hf,INT8,1024,7838.6,3132.1,98.8,10.12145749,,, +qwen2-7b,INT8,1024,8543.5,3124.5,99.8,10.02004008,,, +codegen25-7b,INT8,1024,8453.5,3136,99.9,10.01001001,,, +decilm-7b-instruct,INT8,36,8088.5,244.9,100.7,9.930486594,,, +phi-3-mini-4k-instruct,FP16,32,8592.5,124.5,102.9,9.718172983,,, +decilm-7b-instruct,INT8,1091,8292.4,9951.9,103.5,9.661835749,,, +qwen-7b-chat,INT8,32,8991.1,169.7,103.7,9.643201543,,, +zephyr-7b-beta,INT8,32,8267.2,183.1,104.5,9.56937799,,, +mistral-7b-v0.1,INT8,32,8269.6,184.1,104.9,9.532888465,,, +zephyr-7b-beta,INT8,1024,8268.1,3379.7,107,9.345794393,,, +mistral-7b-v0.1,INT8,1024,8513.8,3394.1,107.4,9.310986965,,, +phi-3-mini-4k-instruct,FP16,1024,9157.2,2080.8,108.4,9.225092251,,, +qwen-7b-chat,INT8,1024,8991.4,3137.5,109,9.174311927,,, +llama-3-8b,INT8,33,9085.1,264.9,109.4,9.140767824,,, +llama-3.1-8b,INT8,32,9070.9,189.1,110.7,9.033423668,,, +baichuan2-13b-chat,INT4,32,10592.1,330.4,111.4,8.976660682,,, +llama-3-8b,INT8,1025,9085.2,9900.1,111.9,8.936550492,,, +llama-3.1-8b,INT8,1024,9071,3408.2,113.2,8.833922261,,, +phi-3-medium-4k-instruct,INT4,38,9009.6,443.3,116,8.620689655,,, +phi-3-medium-4k-instruct,INT4,1061,8935.4,5655.5,119.9,8.34028357,,, +baichuan2-7b-chat,INT8,32,8633.7,172.7,120.5,8.298755187,,, +baichuan2-7b-chat,INT8,1024,9135.7,3192.6,124.7,8.019246191,,, +gemma-7b-it,INT8,32,10087.5,223.2,125.2,7.987220447,,, +glm-4-9b-chat,INT8,32,10440,224.2,125.7,7.955449483,,, +gemma-7b-it,INT8,1024,9965.1,3723.4,129.1,7.745933385,,, +glm-4-9b-chat,INT8,1024,10440.1,4054.2,129.2,7.73993808,,, +starcoder,INT4,32,9738.6,599.6,177.5,5.633802817,,, +flan-t5-xxl,FP16,33,19273,553.7,188.1,5.316321106,,, +flan-t5-xxl,FP16,1139,24887.6,999,193.1,5.178663905,,, +phi-3-medium-4k-instruct,INT8,38,14453.1,1342.7,205.9,4.856726566,,, +phi-3-medium-4k-instruct,INT8,1061,14287.2,19763.6,210.9,4.741583689,,, +decilm-7b-instruct,FP16,36,14215.6,465.7,222,4.504504505,,, +decilm-7b-instruct,FP16,1091,14332.5,12122.8,225.6,4.432624113,,, +starcoder,INT8,32,8567.4,379.1,235.4,4.24808836,,, +llama-3.1-8b,FP16,32,15653.3,319.9,240.7,4.154549231,,, +starcoder,INT4,1024,9738.7,6736.5,241.1,4.147656574,,, +llama-3.1-8b,FP16,1024,17004.9,4679.8,245.7,4.07000407,,, +starcoder,INT8,1024,9829.9,8819.9,269.2,3.714710253,,, +lcm-dreamshaper-v7,INT4,32,5391.5,296.1,284.2,3.518648839,,, +lcm-dreamshaper-v7,INT4,1024,5779.1,305.6,284.3,3.517411185,,, +lcm-dreamshaper-v7,FP16,1024,5967.9,304.5,284.5,3.514938489,,, +lcm-dreamshaper-v7,FP16,32,5238.8,295.8,284.5,3.514938489,,, +lcm-dreamshaper-v7,INT8,32,4974.1,314.4,301.4,3.317850033,,, +lcm-dreamshaper-v7,INT8,1024,5622.3,323.9,301.7,3.314550878,,, +stable-diffusion-v2-1,FP16,1024,5942.7,475.7,444.7,2.248706993,,, +stable-diffusion-v2-1,FP16,32,5197.9,466.9,445.4,2.245172878,,, +baichuan2-13b-chat,INT4,1024,12879,5213.1,448.6,2.229157379,,, +stable-diffusion-v2-1,INT8,32,4723.6,484,455.9,2.193463479,,, +stable-diffusion-v2-1,INT8,1024,5458.1,489.4,456.2,2.192021043,,, +stable-diffusion-v1-5,FP16,1024,6573.2,576.6,550.6,1.816200509,,, +stable-diffusion-v1-5,FP16,32,5848.9,570.5,551.4,1.81356547,,, +stable-diffusion-v1-5,INT8,32,5581,603.9,587.7,1.701548409,,, +stable-diffusion-v1-5,INT8,1024,6258.2,612.9,589.4,1.696640652,,, +phi-3-medium-4k-instruct,FP16,38,27222.7,3293.8,1198.9,0.834097923,,, +phi-3-medium-4k-instruct,FP16,1061,28813.8,32882.8,1199.7,0.833541719,,, diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv index 625ff1d6fe5ed5..c864794fea022f 100644 --- a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv +++ b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv @@ -1,117 +1,149 @@ -Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd tok/sec -opt-125m-gptq,INT4-MIXED,32,1150.2,35.1,8.2,122.0 -opt-125m-gptq,INT4-MIXED,1024,1228,67,8.2,122.0 -qwen2-0.5b,INT4-MIXED,1024,1596.2,83.6,14.4,69.4 -qwen2-0.5b,INT4-MIXED,32,1675.6,63.6,14.9,67.1 -qwen2-0.5b,INT8-CW,32,1857.5,56.9,15,66.7 -qwen2-0.5b,INT8-CW,1024,1663.5,87,15,66.7 -bloomz-560m,INT8-CW,32,1761.1,62.4,15.1,66.2 -tiny-llama-1.1b-chat,INT4-MIXED,1024,1687.9,158.7,15.3,65.4 -bloomz-560m,INT4-MIXED,32,1894.2,40.1,15.4,64.9 -tiny-llama-1.1b-chat,INT4-MIXED,32,1833,74.5,15.7,63.7 -bloomz-560m,INT8-CW,1024,1689.2,146.2,15.8,63.3 -bloomz-560m,INT4-MIXED,1024,1791,150.1,16.4,61.0 -tiny-llama-1.1b-chat,INT8-CW,32,2132.3,35.6,18.1,55.2 -bloomz-560m,FP16,32,2395,36,18.4,54.3 -tiny-llama-1.1b-chat,INT8-CW,1024,1986.4,149.3,19.2,52.1 -bloomz-560m,FP16,1024,2344.4,157.4,19.3,51.8 -qwen2-1.5b,INT4-MIXED,1024,2175.1,184.9,20.4,49.0 -qwen2-1.5b,INT4-MIXED,32,2066.2,94.9,20.6,48.5 -red-pajama-incite-chat-3b-v1,INT4-MIXED,32,2599.8,118.1,25,40.0 -qwen2-1.5b,INT8-CW,32,2377.4,83.3,25.1,39.8 -qwen2-1.5b,INT8-CW,1024,2483.3,189.6,25.3,39.5 -gemma-2b-it,INT4-MIXED,32,2594.3,181.4,26.1,38.3 -phi-2,INT4-MIXED,32,2912.4,77.7,26.8,37.3 -gemma-2b-it,INT4-MIXED,1024,2594.4,248.2,26.9,37.2 -dolly-v2-3b,INT4-MIXED,32,2610.3,141.3,27,37.0 -stable-zephyr-3b-dpo,INT4-MIXED,32,2956.2,149.2,27.4,36.5 -minicpm-1b-sft,INT4-MIXED,31,2625.8,159.2,28.1,35.6 -red-pajama-incite-chat-3b-v1,INT4-MIXED,1023,3069.7,413.5,28.2,35.5 -minicpm-1b-sft,INT8-CW,31,2868.2,74.1,28.9,34.6 -dolly-v2-3b,INT4-MIXED,1024,3081.5,386,29.4,34.0 -phi-2,INT4-MIXED,1024,3136.2,340,29.6,33.8 -stablelm-3b-4e1t,INT4-MIXED,32,3035.9,150.5,30.6,32.7 -phi-3-mini-4k-instruct,INT4-MIXED,32,3373.2,57.9,32.6,30.7 -stablelm-3b-4e1t,INT4-MIXED,1023,3296.5,456.2,34.4,29.1 -phi-3-mini-4k-instruct,INT4-MIXED,1024,3707.1,432,36.1,27.7 -gemma-2b-it,INT8-CW,32,3370.5,203.8,36.6,27.3 -minicpm-1b-sft,FP16,31,3679.6,80.6,36.9,27.1 -gemma-2b-it,INT8-CW,1024,3503.2,258.5,37.9,26.4 -dolly-v2-3b,INT8-CW,32,3893.3,142.9,39.4,25.4 -red-pajama-incite-chat-3b-v1,INT8-CW,32,3760.7,117.2,39.4,25.4 -phi-2,INT8-CW,32,3765.6,121,39.7,25.2 -stablelm-3b-4e1t,INT8-CW,32,3641.2,123,39.9,25.1 -stable-zephyr-3b-dpo,INT8-CW,32,3743.3,120.1,39.9,25.1 -red-pajama-incite-chat-3b-v1,INT8-CW,1023,4083.1,422.9,41.9,23.9 -dolly-v2-3b,INT8-CW,1024,4211.5,384.1,42.2,23.7 -phi-2,INT8-CW,1024,4096.8,367.2,42.5,23.5 -stablelm-3b-4e1t,INT8-CW,1023,4086.6,459.9,43.5,23.0 -llama-2-7b-gptq,INT4-MIXED,32,4754.8,75.1,46.2,21.6 -codegen25-7b,INT4-MIXED,32,4738.5,74.9,46.9,21.3 -gpt-j-6b,INT4-MIXED,32,4506.5,221.4,47.3,21.1 -decilm-7b-instruct,INT4-MIXED,36,4794.9,199.3,48.5,20.6 -qwen-7b-chat-gptq,INT4-MIXED,32,5615.8,100.5,49.8,20.1 -falcon-7b-instruct,INT4-MIXED,32,4738,79.9,50.7,19.7 -phi-3-mini-4k-instruct,INT8-CW,32,4589.9,83,50.8,19.7 -llama-2-7b-gptq,INT4-MIXED,1024,5246,640,52.1,19.2 -llama-3-8b,INT4-MIXED,33,5475.8,114.7,52.2,19.2 -codegen25-7b,INT4-MIXED,1024,5241.9,643.7,52.5,19.0 -mistral-7b-v0.1,INT4-MIXED,32,5015.3,94.6,52.6,19.0 -qwen2-7b,INT4-MIXED,32,5330.7,86.3,52.7,19.0 -gpt-j-6b,INT4-MIXED,1024,4926.5,867.2,53.2,18.8 -llama-2-7b-chat-hf,INT4-MIXED,32,5100.7,78.7,54.2,18.5 -llama-3-8b,INT4-MIXED,33,5527.1,114.9,54.3,18.4 -phi-3-mini-4k-instruct,INT8-CW,1024,4959.2,450.6,54.6,18.3 -falcon-7b-instruct,INT4-MIXED,1024,4863.4,660.5,54.9,18.2 -qwen2-7b,INT4-MIXED,1024,5375.4,659.8,55.4,18.1 -mistral-7b-v0.1,INT4-MIXED,1024,5286.8,662.8,55.6,18.0 -llama-3-8b,INT4-MIXED,1025,5601,992.5,56.1,17.8 -llama-3-8b,INT4-MIXED,1025,5646.8,1047.1,56.7,17.6 -baichuan2-7b-chat,INT4-MIXED,32,5913.7,86.5,57.2,17.5 -zephyr-7b-beta,INT4-MIXED,32,5339.7,88.5,58.2,17.2 -qwen-7b-chat-gptq,INT4-MIXED,1024,6315.8,664.2,60.1,16.6 -glm-4-9b-chat,INT4-MIXED,32,6349.7,86.5,60.5,16.5 -llama-2-7b-chat-hf,INT4-MIXED,1024,5592.7,856.8,60.9,16.4 -zephyr-7b-beta,INT4-MIXED,1024,5459.1,898.6,61.6,16.2 -baichuan2-7b-chat,INT4-MIXED,1024,6410.3,942.2,63.5,15.7 -gemma-7b-it,INT4-MIXED,32,5816.3,104.5,63.5,15.7 -glm-4-9b-chat,INT4-MIXED,1024,6368.8,1128.2,63.8,15.7 -llama-3.1-8b,INT4-MIXED,32,6315.3,97.4,65,15.4 -llama-3.1-8b,INT4-MIXED,1024,6421.8,902.9,68.2,14.7 -gemma-7b-it,INT4-MIXED,1024,6233.2,1052.7,68.7,14.6 -qwen-7b-chat,INT4-MIXED,32,7320.5,132.3,68.8,14.5 -red-pajama-incite-chat-3b-v1,FP16,32,6318.9,79.2,70.7,14.1 -phi-2,FP16,32,6330.2,83.2,70.8,14.1 -dolly-v2-3b,FP16,32,6327.2,92.7,71.9,13.9 -stable-zephyr-3b-dpo,FP16,32,6356.4,79.8,72.2,13.9 -stablelm-3b-4e1t,FP16,32,6261.9,74.6,72.6,13.8 -phi-2,FP16,1024,6654.4,379.3,73.9,13.5 -red-pajama-incite-chat-3b-v1,FP16,1023,6640.3,442.6,74.4,13.4 -dolly-v2-3b,FP16,1024,6653.9,441.9,74.9,13.4 -qwen-7b-chat,INT4-MIXED,1024,7814.1,909.4,75.5,13.2 -stablelm-3b-4e1t,FP16,1023,6575.3,449.5,75.8,13.2 -falcon-7b-instruct,INT8-CW,32,7487.6,109.4,84.3,11.9 -gpt-j-6b,INT8-CW,32,6918.7,185.3,85.3,11.7 -llama-2-7b-chat-hf,INT8-CW,32,7494.7,110.6,87.9,11.4 -qwen2-7b,INT8-CW,32,8177.7,117.8,88.2,11.3 -falcon-7b-instruct,INT8-CW,1024,7621.2,675.4,88.3,11.3 -codegen25-7b,INT8-CW,32,7582.1,114.6,89,11.2 -qwen2-7b,INT8-CW,1024,8226.2,842,90.4,11.1 -gpt-j-6b,INT8-CW,1024,7353.1,1093.9,90.8,11.0 -phi-3-medium-4k-instruct,INT4-MIXED,38,8184.1,270.2,90.8,11.0 -qwen-7b-chat,INT8-CW,32,9223.8,138.4,91.3,11.0 -baichuan2-7b-chat,INT8-CW,32,8188.4,122.9,91.8,10.9 -phi-3-mini-4k-instruct,FP16,32,8311.5,98.2,92,10.9 -llama-2-7b-chat-hf,INT8-CW,1024,7984.3,874.9,92.8,10.8 -mistral-7b-v0.1,INT8-CW,32,7908.6,116.3,93.1,10.7 -baichuan2-13b-chat,INT4-MIXED,32,10016.5,165.7,93.2,10.7 -zephyr-7b-beta,INT8-CW,32,7812.6,117,93.4,10.7 -codegen25-7b,INT8-CW,1024,8074.3,870.2,94,10.6 -decilm-7b-instruct,INT8-CW,36,7885.2,181.4,94.9,10.5 -mistral-7b-v0.1,INT8-CW,1024,8023.7,906.4,95.7,10.4 -zephyr-7b-beta,INT8-CW,1024,7930.8,915.2,96.3,10.4 -phi-3-medium-4k-instruct,INT4-MIXED,1061,8384.5,2225.7,96.7,10.3 -baichuan2-7b-chat,INT8-CW,1024,8678.3,956.7,96.8,10.3 -llama-3.1-8b,INT8-CW,32,8615.4,121.6,97.7,10.2 -llama-3-8b,INT8-CW,33,8615.1,131.3,97.7,10.2 -phi-3-mini-4k-instruct,FP16,1024,8695.2,509,99.9,10.0 +Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd token per sec (2nd lat^(-1)),,, +tiny-llama-1.1b-chat,INT4,32,2176.5,31.7,9.6,104.1666667,,, +tiny-llama-1.1b-chat,INT4,1024,2261,132.4,10.2,98.03921569,,, +bloomz-560m,INT4,1024,2103,67.4,10.3,97.08737864,,, +bloomz-560m,INT4,32,1880.6,33.7,10.5,95.23809524,,, +qwen2-0.5b,INT4,1024,1679.5,63.2,10.8,92.59259259,,, +qwen2-0.5b,INT4,32,1577.1,36.3,10.9,91.74311927,,, +bloomz-560m,INT8,32,2015.6,30.3,10.9,91.74311927,,, +qwen2-0.5b,INT8,32,1869.8,31.7,11,90.90909091,,, +bloomz-560m,INT8,1024,2230.8,67.3,11.4,87.71929825,,, +qwen2-0.5b,INT8,1024,1951.1,68,11.9,84.03361345,,, +tiny-llama-1.1b-chat,INT8,32,2687.2,28.6,12.9,77.51937984,,, +qwen2-1.5b,INT4,1024,2368.7,167.6,13.5,74.07407407,,, +tiny-llama-1.1b-chat,INT8,1024,2530.6,127.8,13.7,72.99270073,,, +qwen2-1.5b,INT4,32,2480.4,43.1,13.9,71.94244604,,, +bloomz-560m,FP16,32,2654.3,29.6,14.5,68.96551724,,, +bloomz-560m,FP16,1024,2880.8,75.8,15.8,63.29113924,,, +qwen2-1.5b,INT8,32,2994.7,37.7,18.9,52.91005291,,, +red-pajama-incite-chat-3b-v1,INT4,32,3240.5,53.2,19.2,52.08333333,,, +qwen2-1.5b,INT8,1024,2893.3,163.6,19.6,51.02040816,,, +gemma-2b-it,INT4,32,3245,188.4,20.1,49.75124378,,, +minicpm-1b-sft,INT4,31,3024.6,68.3,20.4,49.01960784,,, +dolly-v2-3b,INT4,32,3301,66.3,20.4,49.01960784,,, +gemma-2b-it,INT4,1024,3022.4,231.3,21.5,46.51162791,,, +red-pajama-incite-chat-3b-v1,INT4,1023,3400.6,397.9,22.1,45.24886878,,, +minicpm-1b-sft,INT4,1014,2902.7,266.7,22.1,45.24886878,,, +dolly-v2-3b,INT4,1024,3442.1,377.2,23.1,43.29004329,,, +minicpm-1b-sft,INT8,31,3330,62.7,23.2,43.10344828,,, +minicpm-1b-sft,INT8,1014,3259.1,211,24.8,40.32258065,,, +phi-3-mini-4k-instruct,INT4,32,3662.9,44.3,26.3,38.02281369,,, +phi-3-mini-4k-instruct,INT4,1024,4000.4,417.4,29.9,33.44481605,,, +gemma-2b-it,INT8,32,3739.4,191.2,30.5,32.78688525,,, +red-pajama-incite-chat-3b-v1,INT8,32,4338.9,48.7,31.2,32.05128205,,, +minicpm-1b-sft,FP16,31,4195.6,63.5,31.5,31.74603175,,, +dolly-v2-3b,INT8,32,4438.1,63.2,32,31.25,,, +gemma-2b-it,INT8,1024,3910.8,248.1,32,31.25,,, +minicpm-1b-sft,FP16,1014,4123.4,229.6,33.3,30.03003003,,, +red-pajama-incite-chat-3b-v1,INT8,1023,4503.5,405.2,34.1,29.3255132,,, +chatglm3-6b,INT4,32,4909.8,52.2,34.2,29.23976608,,, +dolly-v2-3b,INT8,1024,4626,379.1,35.3,28.3286119,,, +chatglm3-6b,INT4,1024,5049.9,638.2,36,27.77777778,,, +llama-2-7b-gptq,INT4,32,5045.7,63.8,38.9,25.70694087,,, +codegen25-7b,INT4,32,5366.8,66,39.2,25.51020408,,, +decilm-7b-instruct,INT4,36,4964.3,123.3,39.9,25.06265664,,, +chatglm3-6b-gptq,INT4,32,5344,70.6,40.7,24.57002457,,, +decilm-7b-instruct,INT4,1091,4965,795.7,42.1,23.75296912,,, +qwen-7b-chat-gptq,INT4,32,5958.7,68.5,42.7,23.41920375,,, +chatglm3-6b-gptq,INT4,1024,6003.4,645.5,42.7,23.41920375,,, +phi-3-mini-4k-instruct,INT8,32,5021.3,51.3,43,23.25581395,,, +qwen2-7b,INT4,32,5653.2,69.4,43.8,22.83105023,,, +llama-2-7b-gptq,INT4,1024,5767,752.6,43.9,22.77904328,,, +codegen25-7b,INT4,1024,5757.8,811.5,44.2,22.62443439,,, +falcon-7b-instruct,INT4,32,5062.6,65.2,44.2,22.62443439,,, +llama-3-8b,INT4,33,5800.1,72.2,44.3,22.57336343,,, +llama-3-8b,INT4,33,5867.8,73,45,22.22222222,,, +mistral-7b-v0.1,INT4,32,5750.6,72.9,45.4,22.02643172,,, +qwen2-7b,INT4,1024,5654,762.2,45.7,21.88183807,,, +llama-3-8b,INT4,1025,5800.7,743.1,46.3,21.59827214,,, +phi-3-mini-4k-instruct,INT8,1024,5021.9,441.7,46.7,21.41327623,,, +llama-2-7b-chat-hf,INT4,32,5726.6,68.6,47,21.27659574,,, +llama-3-8b,INT4,1025,5868.4,761.1,47.5,21.05263158,,, +mistral-7b-v0.1,INT4,1024,5607.6,741.7,47.9,20.87682672,,, +falcon-7b-instruct,INT4,1024,5063.2,645.9,48.2,20.74688797,,, +qwen-7b-chat-gptq,INT4,1024,6647.1,757.2,48.6,20.57613169,,, +zephyr-7b-beta,INT4,32,6088,73.6,49.6,20.16129032,,, +baichuan2-7b-chat,INT4,32,6268.1,74.6,50.7,19.72386588,,, +glm-4-9b-chat,INT4,32,6987.4,79.1,51.5,19.41747573,,, +llama-2-7b-chat-hf,INT4,1024,6149.7,923.4,51.7,19.34235977,,, +zephyr-7b-beta,INT4,1024,6016.3,745.6,51.9,19.26782274,,, +gemma-7b-it,INT4,32,6418.3,89.4,53.1,18.83239171,,, +baichuan2-7b-chat,INT4,1024,6268.6,734.8,55.7,17.95332136,,, +llama-3.1-8b,INT4,32,6640.3,87.3,56,17.85714286,,, +glm-4-9b-chat,INT4,1024,6831.9,846.7,56.4,17.73049645,,, +llama-3.1-8b,INT4,1024,6904.8,933.2,58.9,16.97792869,,, +qwen-7b-chat,INT4,32,7172.3,77.7,59.9,16.69449082,,, +gemma-7b-it,INT4,1024,6821,749.7,60,16.66666667,,, +red-pajama-incite-chat-3b-v1,FP16,32,6798.6,66.3,64.6,15.47987616,,, +dolly-v2-3b,FP16,32,6737.8,68.2,65.5,15.26717557,,, +chatglm3-6b,INT8,32,7478.5,77.6,66.3,15.08295626,,, +qwen-7b-chat,INT4,1024,7851.6,758.8,66.5,15.03759398,,, +red-pajama-incite-chat-3b-v1,FP16,1023,7121.4,434.8,67.7,14.77104874,,, +dolly-v2-3b,FP16,1024,6738.7,402.8,68.5,14.59854015,,, +chatglm3-6b,INT8,1024,7397.6,654.2,68.8,14.53488372,,, +falcon-7b-instruct,INT8,32,7912.4,84.6,73.8,13.5501355,,, +qwen2-7b,INT8,32,8567.4,92.1,75,13.33333333,,, +llama-2-7b-chat-hf,INT8,32,7821.1,85.2,77.1,12.97016861,,, +codegen25-7b,INT8,32,8002.4,86.9,77.7,12.87001287,,, +baichuan2-13b-chat,INT4,32,10428.4,156.1,78.6,12.72264631,,, +qwen2-7b,INT8,1024,8797,885.1,78.6,12.72264631,,, +phi-3-medium-4k-instruct,INT4,38,8810.9,156.8,78.7,12.7064803,,, +decilm-7b-instruct,INT8,36,8369.7,119.9,78.8,12.69035533,,, +falcon-7b-instruct,INT8,1024,7912.4,969.3,79.7,12.54705144,,, +baichuan2-7b-chat,INT8,32,8498.5,94.6,81,12.34567901,,, +zephyr-7b-beta,INT8,32,8232.9,94.6,81.7,12.23990208,,, +mistral-7b-v0.1,INT8,32,8644,95,81.8,12.22493888,,, +qwen-7b-chat,INT8,32,8975.1,92.7,81.8,12.22493888,,, +decilm-7b-instruct,INT8,1091,8208.8,1025.7,82.3,12.15066829,,, +llama-2-7b-chat-hf,INT8,1024,7821.4,759.4,82.5,12.12121212,,, +codegen25-7b,INT8,1024,8003,923.1,83.5,11.9760479,,, +phi-3-mini-4k-instruct,FP16,32,8751.9,88.8,84.9,11.77856302,,, +mistral-7b-v0.1,INT8,1024,8488,781.6,85.1,11.75088132,,, +phi-3-medium-4k-instruct,INT4,1061,8946.2,2039.2,85.3,11.72332943,,, +zephyr-7b-beta,INT8,1024,8487.4,826.4,85.3,11.72332943,,, +llama-3.1-8b,INT8,32,9039,98.6,85.9,11.64144354,,, +llama-3-8b,INT8,33,9040.6,102.9,86.1,11.61440186,,, +baichuan2-7b-chat,INT8,1024,9331.5,836.8,86.5,11.56069364,,, +qwen-7b-chat,INT8,1024,9642.8,740.6,88.7,11.27395716,,, +phi-3-mini-4k-instruct,FP16,1024,9173.4,490.2,89,11.23595506,,, +llama-3-8b,INT8,1025,9041,1025.4,89.4,11.18568233,,, +llama-3.1-8b,INT8,1024,9302,885.9,89.6,11.16071429,,, +starcoder,INT4,32,9298.4,142.3,93.6,10.68376068,,, +gemma-7b-it,INT8,32,9662.7,114,94,10.63829787,,, +glm-4-9b-chat,INT8,32,10381.6,110.6,98,10.20408163,,, +gemma-7b-it,INT8,1024,10351.1,1005.5,99.4,10.06036217,,, +glm-4-9b-chat,INT8,1024,10545.1,1116.4,101.1,9.891196835,,, +lcm-dreamshaper-v7,INT8,32,4719.6,117.5,107.7,9.285051068,,, +lcm-dreamshaper-v7,INT8,1024,5279.1,119.4,108.1,9.250693802,,, +lcm-dreamshaper-v7,FP16,32,4907.3,118.5,109.7,9.115770283,,, +lcm-dreamshaper-v7,FP16,1024,5530.5,122.3,109.9,9.099181074,,, +lcm-dreamshaper-v7,INT4,1024,5443.8,121.4,110,9.090909091,,, +flan-t5-xxl,INT4,33,13636.5,85.8,110.2,9.074410163,,, +lcm-dreamshaper-v7,INT4,32,4790.7,120.3,110.9,9.017132552,,, +flan-t5-xxl,INT8,33,23408.6,471.4,128.3,7.794232268,,, +starcoder,INT4,1024,9716.5,1953.1,141.7,7.05716302,,, +phi-3-medium-4k-instruct,INT8,38,14470.9,219.9,149.9,6.671114076,,, +phi-3-medium-4k-instruct,INT8,1061,14471.2,2300,154.3,6.4808814,,, +decilm-7b-instruct,FP16,1091,14729.5,1426.8,162.3,6.161429452,,, +llama-3-8b,FP16,33,15955.7,237.9,168.8,5.924170616,,, +llama-3-8b,FP16,1025,16095.6,1384,172.7,5.790387956,,, +starcoder,INT8,32,15761.5,205.7,180.5,5.540166205,,, +stable-diffusion-v2-1,INT8,1024,5484.5,202.4,181.3,5.515719801,,, +stable-diffusion-v2-1,INT8,32,4830.1,194.9,181.9,5.497526113,,, +stable-diffusion-v2-1,FP16,32,5580,202.3,182.7,5.473453749,,, +stable-diffusion-v2-1,FP16,1024,6018,207.6,183.6,5.446623094,,, +stable-diffusion-v1-5,INT8,32,4732.6,218.9,207.8,4.812319538,,, +stable-diffusion-v1-5,INT8,1024,5212.1,219.8,208.5,4.79616307,,, +stable-diffusion-v1-5,FP16,1024,5524.4,227.1,210.8,4.743833017,,, +stable-diffusion-v1-5,FP16,32,4856.2,220.4,211.4,4.730368969,,, +decilm-7b-instruct,FP16,36,15039.6,223.1,226.3,4.418912947,,, +starcoder,INT8,1024,15763.6,2118.6,229.5,4.357298475,,, +flan-t5-xxl,INT4,1139,16177.9,270.1,236.5,4.22832981,,, +flan-t5-xxl,INT8,1139,26075.7,322.8,263.1,3.800836184,,, +baichuan2-13b-chat,INT4,1024,12977.1,1796.4,279.2,3.581661891,,, +baichuan2-13b-chat,INT8,32,15417,197.2,334.2,2.992220227,,, +llama-3.1-8b,FP16,32,16112.3,853.2,410.5,2.436053593,,, +llama-3.1-8b,FP16,1024,17452.2,1166.4,418.4,2.390057361,,, +baichuan2-13b-chat,INT8,1024,15611.5,1891.2,446.3,2.240645306,,, +phi-3-medium-4k-instruct,FP16,38,27161.6,2440.2,2280.7,0.438461876,,, +phi-3-medium-4k-instruct,FP16,1061,27505.4,3536.3,2285.2,0.43759846,,,