Skip to content

Commit

Permalink
update hugectr nbs (#728)
Browse files Browse the repository at this point in the history
  • Loading branch information
rnyak authored Apr 12, 2021
1 parent 8bfe307 commit f64211d
Show file tree
Hide file tree
Showing 2 changed files with 186 additions and 139 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,9 @@
"import numpy as np\n",
"\n",
"from os import path\n",
"from sklearn.model_selection import train_test_split"
"from sklearn.model_selection import train_test_split\n",
"\n",
"from nvtabular.utils import download_file"
]
},
{
Expand Down Expand Up @@ -149,13 +151,19 @@
"execution_count": 4,
"id": "mounted-temple",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"downloading ml-25m.zip: 262MB [00:43, 6.09MB/s] \n",
"unzipping files: 100%|██████████| 8/8 [00:09<00:00, 1.19s/files]\n"
]
}
],
"source": [
"if not path.exists(BASE_DIR + 'ml-25m'):\n",
" if not path.exists(BASE_DIR + 'ml-25m.zip'):\n",
" os.system(\"wget http://files.grouplens.org/datasets/movielens/ml-25m.zip\")\n",
" os.system(\"mv ml-25m.zip \" + BASE_DIR)\n",
" os.system(\"unzip \" + BASE_DIR + \"ml-25m.zip -d \" + BASE_DIR)"
"download_file(\"http://files.grouplens.org/datasets/movielens/ml-25m.zip\", \n",
" os.path.join(BASE_DIR, \"ml-25m.zip\"))"
]
},
{
Expand Down Expand Up @@ -400,7 +408,7 @@
{
"data": {
"text/plain": [
"34"
"60"
]
},
"execution_count": 9,
Expand Down Expand Up @@ -500,74 +508,74 @@
"<!-- 0 -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>0</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"272.64\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"272.64\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
"</g>\n",
"<!-- 5 -->\n",
"<g id=\"node6\" class=\"node\">\n",
"<title>5</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"272.64\" cy=\"-18\" rx=\"183.87\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"272.64\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">output cols=[userId, movieId, rating]</text>\n",
"</g>\n",
"<!-- 0&#45;&gt;5 -->\n",
"<g id=\"edge5\" class=\"edge\">\n",
"<title>0&#45;&gt;5</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M272.64,-71.7C272.64,-63.98 272.64,-54.71 272.64,-46.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"276.14,-46.1 272.64,-36.1 269.14,-46.1 276.14,-46.1\"/>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"143.64\" cy=\"-234\" rx=\"143.77\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"143.64\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">input cols=[userId, movieId]</text>\n",
"</g>\n",
"<!-- 3 -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>3</title>\n",
"<!-- 4 -->\n",
"<g id=\"node5\" class=\"node\">\n",
"<title>4</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"203.64\" cy=\"-162\" rx=\"59.59\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"203.64\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">Categorify</text>\n",
"</g>\n",
"<!-- 3&#45;&gt;0 -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>3&#45;&gt;0</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M219.99,-144.41C229.26,-135 240.98,-123.12 251.01,-112.94\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"253.75,-115.15 258.27,-105.57 248.76,-110.24 253.75,-115.15\"/>\n",
"<!-- 0&#45;&gt;4 -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<title>0&#45;&gt;4</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M158.16,-216.05C165.53,-207.46 174.62,-196.86 182.73,-187.4\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"185.4,-189.66 189.25,-179.79 180.08,-185.1 185.4,-189.66\"/>\n",
"</g>\n",
"<!-- 1 -->\n",
"<g id=\"node3\" class=\"node\">\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>1</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"372.64\" cy=\"-162\" rx=\"61.19\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"372.64\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">LambdaOp</text>\n",
"</g>\n",
"<!-- 1&#45;&gt;0 -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>1&#45;&gt;0</title>\n",
"<!-- 3 -->\n",
"<g id=\"node4\" class=\"node\">\n",
"<title>3</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"272.64\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"272.64\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
"</g>\n",
"<!-- 1&#45;&gt;3 -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<title>1&#45;&gt;3</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M349.95,-145.12C334.78,-134.5 314.77,-120.49 298.91,-109.39\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"300.78,-106.43 290.58,-103.56 296.77,-112.16 300.78,-106.43\"/>\n",
"</g>\n",
"<!-- 2 -->\n",
"<g id=\"node4\" class=\"node\">\n",
"<g id=\"node3\" class=\"node\">\n",
"<title>2</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"403.64\" cy=\"-234\" rx=\"98.58\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"403.64\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">input cols=[rating]</text>\n",
"</g>\n",
"<!-- 2&#45;&gt;1 -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>2&#45;&gt;1</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M395.97,-215.7C392.45,-207.73 388.18,-198.1 384.27,-189.26\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"387.46,-187.83 380.21,-180.1 381.06,-190.67 387.46,-187.83\"/>\n",
"</g>\n",
"<!-- 4 -->\n",
"<g id=\"node5\" class=\"node\">\n",
"<title>4</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"143.64\" cy=\"-234\" rx=\"143.77\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"143.64\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">input cols=[userId, movieId]</text>\n",
"<!-- 5 -->\n",
"<g id=\"node6\" class=\"node\">\n",
"<title>5</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"272.64\" cy=\"-18\" rx=\"183.87\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"272.64\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">output cols=[userId, movieId, rating]</text>\n",
"</g>\n",
"<!-- 3&#45;&gt;5 -->\n",
"<g id=\"edge5\" class=\"edge\">\n",
"<title>3&#45;&gt;5</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M272.64,-71.7C272.64,-63.98 272.64,-54.71 272.64,-46.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"276.14,-46.1 272.64,-36.1 269.14,-46.1 276.14,-46.1\"/>\n",
"</g>\n",
"<!-- 4&#45;&gt;3 -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>4&#45;&gt;3</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M158.16,-216.05C165.53,-207.46 174.62,-196.86 182.73,-187.4\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"185.4,-189.66 189.25,-179.79 180.08,-185.1 185.4,-189.66\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M219.99,-144.41C229.26,-135 240.98,-123.12 251.01,-112.94\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"253.75,-115.15 258.27,-105.57 248.76,-110.24 253.75,-115.15\"/>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.dot.Digraph at 0x7f16f453e580>"
"<graphviz.dot.Digraph at 0x7f3dffce09d0>"
]
},
"execution_count": 13,
Expand Down Expand Up @@ -635,8 +643,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 8.28 s, sys: 431 ms, total: 8.71 s\n",
"Wall time: 8.76 s\n"
"CPU times: user 884 ms, sys: 333 ms, total: 1.22 s\n",
"Wall time: 1.32 s\n"
]
}
],
Expand Down Expand Up @@ -1068,16 +1076,16 @@
"output_type": "stream",
"text": [
"===================================Model Init====================================\n",
"[11d22h07m20s][HUGECTR][INFO]: Global seed is 1182778607\n",
"[11d22h07m22s][HUGECTR][INFO]: Peer-to-peer access cannot be fully enabled.\n",
"[12d22h09m04s][HUGECTR][INFO]: Global seed is 2523917653\n",
"[12d22h09m06s][HUGECTR][INFO]: Peer-to-peer access cannot be fully enabled.\n",
"Device 0: Tesla V100-DGXS-16GB\n",
"[11d22h07m22s][HUGECTR][INFO]: num of DataReader workers: 1\n",
"[11d22h07m22s][HUGECTR][INFO]: num_internal_buffers 1\n",
"[11d22h07m22s][HUGECTR][INFO]: num_internal_buffers 1\n",
"[11d22h07m22s][HUGECTR][INFO]: Vocabulary size: 219128\n",
"[11d22h07m22s][HUGECTR][INFO]: max_vocabulary_size_per_gpu_=219128\n",
"[11d22h07m23s][HUGECTR][INFO]: gpu0 start to init embedding\n",
"[11d22h07m23s][HUGECTR][INFO]: gpu0 init embedding done\n",
"[12d22h09m06s][HUGECTR][INFO]: num of DataReader workers: 1\n",
"[12d22h09m06s][HUGECTR][INFO]: num_internal_buffers 1\n",
"[12d22h09m06s][HUGECTR][INFO]: num_internal_buffers 1\n",
"[12d22h09m06s][HUGECTR][INFO]: Vocabulary size: 219128\n",
"[12d22h09m06s][HUGECTR][INFO]: max_vocabulary_size_per_gpu_=219128\n",
"[12d22h09m07s][HUGECTR][INFO]: gpu0 start to init embedding\n",
"[12d22h09m07s][HUGECTR][INFO]: gpu0 init embedding done\n",
"==================================Model Summary==================================\n",
"Label Name Dense Name Sparse Name \n",
"label dense data1 \n",
Expand All @@ -1094,49 +1102,49 @@
"BinaryCrossEntropyLoss fc3, label loss \n",
"--------------------------------------------------------------------------------\n",
"=====================================Model Fit====================================\n",
"[11d22h70m23s][HUGECTR][INFO]: Use non-epoch mode with number of iterations: 2000\n",
"[11d22h70m23s][HUGECTR][INFO]: Training batchsize: 2048, evaluation batchsize: 2048\n",
"[11d22h70m23s][HUGECTR][INFO]: Evaluation interval: 200, snapshot interval: 1900\n",
"[11d22h70m23s][HUGECTR][INFO]: Iter: 100 Time(100 iters): 0.056858s Loss: 0.591121 lr:0.001000\n",
"[11d22h70m23s][HUGECTR][INFO]: Iter: 200 Time(100 iters): 0.053634s Loss: 0.564416 lr:0.001000\n",
"[11d22h70m23s][HUGECTR][INFO]: Evaluation, AUC: 0.743987\n",
"[11d22h70m23s][HUGECTR][INFO]: Eval Time for 160 iters: 0.038483s\n",
"[11d22h70m23s][HUGECTR][INFO]: Iter: 300 Time(100 iters): 0.102818s Loss: 0.566298 lr:0.001000\n",
"[11d22h70m23s][HUGECTR][INFO]: Iter: 400 Time(100 iters): 0.053446s Loss: 0.539269 lr:0.001000\n",
"[11d22h70m23s][HUGECTR][INFO]: Evaluation, AUC: 0.763787\n",
"[11d22h70m23s][HUGECTR][INFO]: Eval Time for 160 iters: 0.034829s\n",
"[11d22h70m23s][HUGECTR][INFO]: Iter: 500 Time(100 iters): 0.100928s Loss: 0.554708 lr:0.001000\n",
"[11d22h70m23s][HUGECTR][INFO]: Iter: 600 Time(100 iters): 0.053452s Loss: 0.539525 lr:0.001000\n",
"[11d22h70m23s][HUGECTR][INFO]: Evaluation, AUC: 0.772562\n",
"[11d22h70m23s][HUGECTR][INFO]: Eval Time for 160 iters: 0.034294s\n",
"[11d22h70m23s][HUGECTR][INFO]: Iter: 700 Time(100 iters): 0.089169s Loss: 0.533822 lr:0.001000\n",
"[11d22h70m23s][HUGECTR][INFO]: Iter: 800 Time(100 iters): 0.053729s Loss: 0.547485 lr:0.001000\n",
"[11d22h70m24s][HUGECTR][INFO]: Evaluation, AUC: 0.779771\n",
"[11d22h70m24s][HUGECTR][INFO]: Eval Time for 160 iters: 0.045291s\n",
"[11d22h70m24s][HUGECTR][INFO]: Iter: 900 Time(100 iters): 0.099816s Loss: 0.521559 lr:0.001000\n",
"[11d22h70m24s][HUGECTR][INFO]: Iter: 1000 Time(100 iters): 0.064345s Loss: 0.524825 lr:0.001000\n",
"[11d22h70m24s][HUGECTR][INFO]: Evaluation, AUC: 0.783923\n",
"[11d22h70m24s][HUGECTR][INFO]: Eval Time for 160 iters: 0.034748s\n",
"[11d22h70m24s][HUGECTR][INFO]: Iter: 1100 Time(100 iters): 0.089246s Loss: 0.541518 lr:0.001000\n",
"[11d22h70m24s][HUGECTR][INFO]: Iter: 1200 Time(100 iters): 0.053427s Loss: 0.517627 lr:0.001000\n",
"[11d22h70m24s][HUGECTR][INFO]: Evaluation, AUC: 0.785172\n",
"[11d22h70m24s][HUGECTR][INFO]: Eval Time for 160 iters: 0.035040s\n",
"[11d22h70m24s][HUGECTR][INFO]: Iter: 1300 Time(100 iters): 0.089587s Loss: 0.532193 lr:0.001000\n",
"[11d22h70m24s][HUGECTR][INFO]: Iter: 1400 Time(100 iters): 0.053467s Loss: 0.546165 lr:0.001000\n",
"[11d22h70m24s][HUGECTR][INFO]: Evaluation, AUC: 0.790062\n",
"[11d22h70m24s][HUGECTR][INFO]: Eval Time for 160 iters: 0.046366s\n",
"[11d22h70m24s][HUGECTR][INFO]: Iter: 1500 Time(100 iters): 0.112205s Loss: 0.528746 lr:0.001000\n",
"[11d22h70m24s][HUGECTR][INFO]: Iter: 1600 Time(100 iters): 0.053563s Loss: 0.518219 lr:0.001000\n",
"[11d22h70m24s][HUGECTR][INFO]: Evaluation, AUC: 0.792964\n",
"[11d22h70m24s][HUGECTR][INFO]: Eval Time for 160 iters: 0.035604s\n",
"[11d22h70m24s][HUGECTR][INFO]: Iter: 1700 Time(100 iters): 0.090292s Loss: 0.513209 lr:0.001000\n",
"[11d22h70m24s][HUGECTR][INFO]: Iter: 1800 Time(100 iters): 0.053419s Loss: 0.536347 lr:0.001000\n",
"[11d22h70m24s][HUGECTR][INFO]: Evaluation, AUC: 0.793697\n",
"[11d22h70m24s][HUGECTR][INFO]: Eval Time for 160 iters: 0.034481s\n",
"[11d22h70m24s][HUGECTR][INFO]: Iter: 1900 Time(100 iters): 0.089039s Loss: 0.501846 lr:0.001000\n",
"[11d22h70m24s][HUGECTR][INFO]: Rank0: Dump hash table from GPU0\n",
"[11d22h70m24s][HUGECTR][INFO]: Rank0: Write hash table <key,value> pairs to file\n",
"[11d22h70m24s][HUGECTR][INFO]: Done\n"
"[12d22h90m70s][HUGECTR][INFO]: Use non-epoch mode with number of iterations: 2000\n",
"[12d22h90m70s][HUGECTR][INFO]: Training batchsize: 2048, evaluation batchsize: 2048\n",
"[12d22h90m70s][HUGECTR][INFO]: Evaluation interval: 200, snapshot interval: 1900\n",
"[12d22h90m70s][HUGECTR][INFO]: Iter: 100 Time(100 iters): 0.052433s Loss: 0.584569 lr:0.001000\n",
"[12d22h90m70s][HUGECTR][INFO]: Iter: 200 Time(100 iters): 0.050910s Loss: 0.574016 lr:0.001000\n",
"[12d22h90m70s][HUGECTR][INFO]: Evaluation, AUC: 0.742104\n",
"[12d22h90m70s][HUGECTR][INFO]: Eval Time for 160 iters: 0.037350s\n",
"[12d22h90m70s][HUGECTR][INFO]: Iter: 300 Time(100 iters): 0.097618s Loss: 0.567825 lr:0.001000\n",
"[12d22h90m70s][HUGECTR][INFO]: Iter: 400 Time(100 iters): 0.050943s Loss: 0.537596 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Evaluation, AUC: 0.759488\n",
"[12d22h90m80s][HUGECTR][INFO]: Eval Time for 160 iters: 0.032945s\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 500 Time(100 iters): 0.096795s Loss: 0.542408 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 600 Time(100 iters): 0.050967s Loss: 0.542498 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Evaluation, AUC: 0.773175\n",
"[12d22h90m80s][HUGECTR][INFO]: Eval Time for 160 iters: 0.032986s\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 700 Time(100 iters): 0.085280s Loss: 0.537160 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 800 Time(100 iters): 0.051053s Loss: 0.536568 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Evaluation, AUC: 0.778617\n",
"[12d22h90m80s][HUGECTR][INFO]: Eval Time for 160 iters: 0.044035s\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 900 Time(100 iters): 0.096313s Loss: 0.522038 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 1000 Time(100 iters): 0.061872s Loss: 0.527347 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Evaluation, AUC: 0.784214\n",
"[12d22h90m80s][HUGECTR][INFO]: Eval Time for 160 iters: 0.032451s\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 1100 Time(100 iters): 0.084576s Loss: 0.539346 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 1200 Time(100 iters): 0.050991s Loss: 0.540385 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Evaluation, AUC: 0.785587\n",
"[12d22h90m80s][HUGECTR][INFO]: Eval Time for 160 iters: 0.033604s\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 1300 Time(100 iters): 0.085920s Loss: 0.526508 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 1400 Time(100 iters): 0.050974s Loss: 0.529692 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Evaluation, AUC: 0.790832\n",
"[12d22h90m80s][HUGECTR][INFO]: Eval Time for 160 iters: 0.044729s\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 1500 Time(100 iters): 0.108554s Loss: 0.512485 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 1600 Time(100 iters): 0.050959s Loss: 0.553773 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Evaluation, AUC: 0.792876\n",
"[12d22h90m80s][HUGECTR][INFO]: Eval Time for 160 iters: 0.034639s\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 1700 Time(100 iters): 0.086896s Loss: 0.511820 lr:0.001000\n",
"[12d22h90m80s][HUGECTR][INFO]: Iter: 1800 Time(100 iters): 0.050913s Loss: 0.529587 lr:0.001000\n",
"[12d22h90m90s][HUGECTR][INFO]: Evaluation, AUC: 0.794456\n",
"[12d22h90m90s][HUGECTR][INFO]: Eval Time for 160 iters: 0.034695s\n",
"[12d22h90m90s][HUGECTR][INFO]: Iter: 1900 Time(100 iters): 0.086743s Loss: 0.520362 lr:0.001000\n",
"[12d22h90m90s][HUGECTR][INFO]: Rank0: Dump hash table from GPU0\n",
"[12d22h90m90s][HUGECTR][INFO]: Rank0: Write hash table <key,value> pairs to file\n",
"[12d22h90m90s][HUGECTR][INFO]: Done\n"
]
}
],
Expand Down Expand Up @@ -1387,7 +1395,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
"version": "3.8.8"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit f64211d

Please sign in to comment.