From 5bf633c3d7b4b979aebe81c3f3a52420c232056c Mon Sep 17 00:00:00 2001 From: Yulia Date: Wed, 22 Nov 2023 17:00:10 +0300 Subject: [PATCH 1/2] Add a notebook with a metrics --- Recsys_02.ipynb | 2244 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2244 insertions(+) create mode 100644 Recsys_02.ipynb diff --git a/Recsys_02.ipynb b/Recsys_02.ipynb new file mode 100644 index 00000000..42017d50 --- /dev/null +++ b/Recsys_02.ipynb @@ -0,0 +1,2244 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "1154553e8da249f7ac4bc6c56708fedd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_835e20f177ec4ffaa7e646c01524b50c", + "IPY_MODEL_0b17791df6ac4e2db361ccbbc094f192", + "IPY_MODEL_2cb58f1d01ce49d9af409da139c95797" + ], + "layout": "IPY_MODEL_8790ad7e4d5b4a9e9bd5c8db96cfa82c" + } + }, + "835e20f177ec4ffaa7e646c01524b50c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b4fa8c0b246a41f1a10ce80e2c75a365", + "placeholder": "​", + "style": "IPY_MODEL_614c47cccc1b47c4b9a37f2b5bad9a70", + "value": "kion dataset download: 100%" + } + }, + "0b17791df6ac4e2db361ccbbc094f192": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ce5223c6eb1e46c08b8d8416d8e8cc2a", + "max": 78795295, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_210f85b0cdc94aa1a582928a50cab553", + "value": 78795295 + } + }, + "2cb58f1d01ce49d9af409da139c95797": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d7f119e0465c413ca0711dca01ec3bb2", + "placeholder": "​", + "style": "IPY_MODEL_0f2fdbe723d24691addc6ece6f02f2d0", + "value": " 78.8M/78.8M [00:20<00:00, 262MiB/s]" + } + }, + "8790ad7e4d5b4a9e9bd5c8db96cfa82c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b4fa8c0b246a41f1a10ce80e2c75a365": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "614c47cccc1b47c4b9a37f2b5bad9a70": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ce5223c6eb1e46c08b8d8416d8e8cc2a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "210f85b0cdc94aa1a582928a50cab553": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d7f119e0465c413ca0711dca01ec3bb2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0f2fdbe723d24691addc6ece6f02f2d0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "source": [ + "!pip -q install rectools" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OX06xQ2FWS43", + "outputId": "99f57678-6269-4f48-9f5c-afe3daa0acc7" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.0/99.0 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.9/8.9 MB\u001b[0m \u001b[31m77.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "p7NMna_cVQxG" + }, + "outputs": [], + "source": [ + "import time\n", + "import logging\n", + "import requests\n", + "import pprint\n", + "from copy import deepcopy, copy\n", + "from typing import Dict, List, Tuple, Union, Callable, Any\n", + "import numpy as np\n", + "import pandas as pd\n", + "from tqdm.auto import tqdm\n", + "from IPython.display import display" + ] + }, + { + "cell_type": "code", + "source": [ + "import rectools\n", + "from rectools import Columns\n", + "from rectools.dataset import Interactions, Dataset, DenseFeatures\n", + "from rectools.model_selection import Splitter, TimeRangeSplitter\n", + "from rectools.models.base import ModelBase\n", + "from rectools.models import RandomModel, PopularModel\n", + "from rectools.metrics.base import MetricAtK\n", + "from rectools.metrics import (\n", + " Precision,\n", + " Recall,\n", + " MAP,\n", + " NDCG,\n", + " Serendipity,\n", + " MeanInvUserFreq,\n", + " IntraListDiversity,\n", + " PairwiseHammingDistanceCalculator,\n", + " calc_metrics,\n", + ")" + ], + "metadata": { + "id": "BaVJV2euWq83" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "url = 'https://github.com/irsafilo/KION_DATASET/raw/f69775be31fa5779907cf0a92ddedb70037fb5ae/data_original.zip'" + ], + "metadata": { + "id": "2GDmmYBB_5Az" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "req = requests.get(url, stream=True)\n", + "\n", + "with open('kion.zip', 'wb') as fd:\n", + " total_size_in_bytes = int(req.headers.get('Content-Length', 0))\n", + " progress_bar = tqdm(desc='kion dataset download', total=total_size_in_bytes, unit='iB', unit_scale=True)\n", + " for chunk in req.iter_content(chunk_size=2 ** 20):\n", + " progress_bar.update(len(chunk))\n", + " fd.write(chunk)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "1154553e8da249f7ac4bc6c56708fedd", + "835e20f177ec4ffaa7e646c01524b50c", + "0b17791df6ac4e2db361ccbbc094f192", + "2cb58f1d01ce49d9af409da139c95797", + "8790ad7e4d5b4a9e9bd5c8db96cfa82c", + "b4fa8c0b246a41f1a10ce80e2c75a365", + "614c47cccc1b47c4b9a37f2b5bad9a70", + "ce5223c6eb1e46c08b8d8416d8e8cc2a", + "210f85b0cdc94aa1a582928a50cab553", + "d7f119e0465c413ca0711dca01ec3bb2", + "0f2fdbe723d24691addc6ece6f02f2d0" + ] + }, + "id": "okHHWMWG_7-J", + "outputId": "f783fffc-56f5-4041-aa92-822a984f50ac" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "kion dataset download: 0%| | 0.00/78.8M [00:00\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_iditem_iddatetimeweightwatched_pct
017654995062021-05-11425072.0
169931716592021-05-298317100.0
265668371072021-05-09100.0
386461376382021-07-0514483100.0
496486895062021-04-306725100.0
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + " \n" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "users.head(5)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "mvOFvir3Assy", + "outputId": "1a187c3a-56bf-40f2-f7f3-6cb1151ed5f9" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " user_id age income sex kids_flg\n", + "0 973171 age_25_34 income_60_90 М 1\n", + "1 962099 age_18_24 income_20_40 М 0\n", + "2 1047345 age_45_54 income_40_60 Ж 0\n", + "3 721985 age_45_54 income_20_40 Ж 0\n", + "4 704055 age_35_44 income_60_90 Ж 0" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_idageincomesexkids_flg
0973171age_25_34income_60_90М1
1962099age_18_24income_20_40М0
21047345age_45_54income_40_60Ж0
3721985age_45_54income_20_40Ж0
4704055age_35_44income_60_90Ж0
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "items.head(5)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 608 + }, + "id": "bCnZZ6bKA0yz", + "outputId": "6211c1a5-c754-4fda-c7a5-04d5d00f3977" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " item_id content_type title title_orig release_year \\\n", + "0 10711 film Поговори с ней Hable con ella 2002.0 \n", + "1 2508 film Голые перцы Search Party 2014.0 \n", + "2 10716 film Тактическая сила Tactical Force 2011.0 \n", + "3 7868 film 45 лет 45 Years 2015.0 \n", + "4 16268 film Все решает мгновение NaN 1978.0 \n", + "\n", + " genres countries for_kids \\\n", + "0 драмы, зарубежные, детективы, мелодрамы Испания NaN \n", + "1 зарубежные, приключения, комедии США NaN \n", + "2 криминал, зарубежные, триллеры, боевики, комедии Канада NaN \n", + "3 драмы, зарубежные, мелодрамы Великобритания NaN \n", + "4 драмы, спорт, советские, мелодрамы СССР NaN \n", + "\n", + " age_rating studios directors \\\n", + "0 16.0 NaN Педро Альмодовар \n", + "1 16.0 NaN Скот Армстронг \n", + "2 16.0 NaN Адам П. Калтраро \n", + "3 16.0 NaN Эндрю Хэй \n", + "4 12.0 Ленфильм Виктор Садовский \n", + "\n", + " actors \\\n", + "0 Адольфо Фернандес, Ана Фернандес, Дарио Гранди... \n", + "1 Адам Палли, Брайан Хаски, Дж.Б. Смув, Джейсон ... \n", + "2 Адриан Холмс, Даррен Шалави, Джерри Вассерман,... \n", + "3 Александра Риддлстон-Барретт, Джеральдин Джейм... \n", + "4 Александр Абдулов, Александр Демьяненко, Алекс... \n", + "\n", + " description \\\n", + "0 Мелодрама легендарного Педро Альмодовара «Пого... \n", + "1 Уморительная современная комедия на популярную... \n", + "2 Профессиональный рестлер Стив Остин («Все или ... \n", + "3 Шарлотта Рэмплинг, Том Кортни, Джеральдин Джей... \n", + "4 Расчетливая чаровница из советского кинохита «... \n", + "\n", + " keywords \n", + "0 Поговори, ней, 2002, Испания, друзья, любовь, ... \n", + "1 Голые, перцы, 2014, США, друзья, свадьбы, прео... \n", + "2 Тактическая, сила, 2011, Канада, бандиты, ганг... \n", + "3 45, лет, 2015, Великобритания, брак, жизнь, лю... \n", + "4 Все, решает, мгновение, 1978, СССР, сильные, ж... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
item_idcontent_typetitletitle_origrelease_yeargenrescountriesfor_kidsage_ratingstudiosdirectorsactorsdescriptionkeywords
010711filmПоговори с нейHable con ella2002.0драмы, зарубежные, детективы, мелодрамыИспанияNaN16.0NaNПедро АльмодоварАдольфо Фернандес, Ана Фернандес, Дарио Гранди...Мелодрама легендарного Педро Альмодовара «Пого...Поговори, ней, 2002, Испания, друзья, любовь, ...
12508filmГолые перцыSearch Party2014.0зарубежные, приключения, комедииСШАNaN16.0NaNСкот АрмстронгАдам Палли, Брайан Хаски, Дж.Б. Смув, Джейсон ...Уморительная современная комедия на популярную...Голые, перцы, 2014, США, друзья, свадьбы, прео...
210716filmТактическая силаTactical Force2011.0криминал, зарубежные, триллеры, боевики, комедииКанадаNaN16.0NaNАдам П. КалтрароАдриан Холмс, Даррен Шалави, Джерри Вассерман,...Профессиональный рестлер Стив Остин («Все или ...Тактическая, сила, 2011, Канада, бандиты, ганг...
37868film45 лет45 Years2015.0драмы, зарубежные, мелодрамыВеликобританияNaN16.0NaNЭндрю ХэйАлександра Риддлстон-Барретт, Джеральдин Джейм...Шарлотта Рэмплинг, Том Кортни, Джеральдин Джей...45, лет, 2015, Великобритания, брак, жизнь, лю...
416268filmВсе решает мгновениеNaN1978.0драмы, спорт, советские, мелодрамыСССРNaN12.0ЛенфильмВиктор СадовскийАлександр Абдулов, Александр Демьяненко, Алекс...Расчетливая чаровница из советского кинохита «...Все, решает, мгновение, 1978, СССР, сильные, ж...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Возьмем метрики: классификационные (Precision, Recall), метрики ранжирования (MAP, NDCG), специальные (Serendipity, Novelty)" + ], + "metadata": { + "id": "eUUF2kRlDL1k" + } + }, + { + "cell_type": "code", + "source": [ + "metrics = {\n", + " \"precision\": Precision,\n", + " \"recall\": Recall,\n", + " \"MAP\": MAP,\n", + " \"NDCG\": NDCG,\n", + " \"serendipity\": Serendipity,\n", + " \"novelty\": MeanInvUserFreq\n", + " }\n", + "\n", + "k = [1, 5, 10]" + ], + "metadata": { + "id": "s028iLIjA4BL" + }, + "execution_count": 68, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def compute_metrics(metrics, K):\n", + " metrics_at_k = {}\n", + "\n", + " for key, value in metrics.items():\n", + " kwargs = {}\n", + " if isinstance(value, tuple):\n", + " kwargs.update(**value[1])\n", + " value = value[0]\n", + "\n", + " metrics_at_k.update({f\"{key}@{k}\": value(k=k, **kwargs) for k in K})\n", + "\n", + " return metrics_at_k" + ], + "metadata": { + "id": "-X8nK9YuGeAy" + }, + "execution_count": 69, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "metrics_at_k = compute_metrics(metrics, k)" + ], + "metadata": { + "id": "hnHzx2RDGgil" + }, + "execution_count": 70, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "metrics_at_k" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BCNAO707P_3J", + "outputId": "f092c764-b467-408b-b997-75687a80344c" + }, + "execution_count": 71, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'precision@1': Precision(k=1),\n", + " 'precision@5': Precision(k=5),\n", + " 'precision@10': Precision(k=10),\n", + " 'recall@1': Recall(k=1),\n", + " 'recall@5': Recall(k=5),\n", + " 'recall@10': Recall(k=10),\n", + " 'MAP@1': MAP(k=1, divide_by_k=False),\n", + " 'MAP@5': MAP(k=5, divide_by_k=False),\n", + " 'MAP@10': MAP(k=10, divide_by_k=False),\n", + " 'NDCG@1': NDCG(k=1, log_base=2),\n", + " 'NDCG@5': NDCG(k=5, log_base=2),\n", + " 'NDCG@10': NDCG(k=10, log_base=2),\n", + " 'serendipity@1': Serendipity(k=1),\n", + " 'serendipity@5': Serendipity(k=5),\n", + " 'serendipity@10': Serendipity(k=10),\n", + " 'novelty@1': MeanInvUserFreq(k=1),\n", + " 'novelty@5': MeanInvUserFreq(k=5),\n", + " 'novelty@10': MeanInvUserFreq(k=10)}" + ] + }, + "metadata": {}, + "execution_count": 71 + } + ] + }, + { + "cell_type": "code", + "source": [ + "def computation_metrics(models, metrics, splitter, interactions, k):\n", + "\n", + " results = []\n", + " interactions_data = Interactions(interactions)\n", + " fold_iterator = splitter.split(interactions_data, collect_fold_stats=True)\n", + "\n", + " for train_ids, test_ids, fold_info in fold_iterator:\n", + "\n", + " df_train = interactions_data.df.iloc[train_ids]\n", + " df_test = interactions_data.df.iloc[test_ids][Columns.UserItem]\n", + "\n", + " dataset = Dataset.construct(df_train)\n", + " test_users = np.unique(df_test[Columns.User])\n", + "\n", + " catalog = df_train[Columns.Item].unique()\n", + "\n", + " for key, value in models.items():\n", + "\n", + " model = deepcopy(value)\n", + "\n", + " start = time.time()\n", + " model.fit(dataset)\n", + " stop = time.time()\n", + "\n", + " reco = model.recommend(users=test_users, dataset=dataset, k=k, filter_viewed=True)\n", + "\n", + " metric_values = calc_metrics(\n", + " metrics,\n", + " reco=reco,\n", + " interactions=df_test,\n", + " prev_interactions=df_train,\n", + " catalog=catalog,\n", + " )\n", + "\n", + " times = {\"model\": key, \"time\": stop - start}\n", + " times.update(metric_values)\n", + " results.append(times)\n", + "\n", + " results_df = pd.DataFrame(results)\n", + "\n", + " return results_df" + ], + "metadata": { + "id": "ykxIWnNcJcQ-" + }, + "execution_count": 72, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Сплиттер: 3 фолда для кросс-валидации по неделе, исключение холодных юзеров и айтемов и просмотренных айтемов" + ], + "metadata": { + "id": "zx8USwNZU85w" + } + }, + { + "cell_type": "code", + "source": [ + "splitter = TimeRangeSplitter(test_size=\"7D\", n_splits=3, filter_cold_users=True, filter_cold_items=True, filter_already_seen=True)" + ], + "metadata": { + "id": "VQDSnhLfIv8-" + }, + "execution_count": 73, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "models = {\"RandomModel\": RandomModel(), \"PopularModel\": PopularModel()}\n", + "\n", + "results = computation_metrics(models, metrics_at_k, splitter, interactions, 10)" + ], + "metadata": { + "id": "ev1D_mS8HjdI" + }, + "execution_count": 74, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "pd.DataFrame(results).groupby(\"model\").mean()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 164 + }, + "id": "vCJr-rNPHxtj", + "outputId": "e65feb8b-f340-474a-9157-f00fa2baf35f" + }, + "execution_count": 75, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " time precision@1 recall@1 precision@5 recall@5 \\\n", + "model \n", + "PopularModel 1.865071 0.076432 0.042720 0.052402 0.137413 \n", + "RandomModel 0.000046 0.000180 0.000066 0.000170 0.000318 \n", + "\n", + " precision@10 recall@10 NDCG@1 NDCG@5 NDCG@10 MAP@1 \\\n", + "model \n", + "PopularModel 0.033903 0.173492 0.076432 0.057932 0.043084 0.042720 \n", + "RandomModel 0.000166 0.000601 0.000180 0.000170 0.000167 0.000066 \n", + "\n", + " MAP@5 MAP@10 novelty@1 novelty@5 novelty@10 \\\n", + "model \n", + "PopularModel 0.078295 0.084109 2.377055 3.066979 3.713390 \n", + "RandomModel 0.000148 0.000185 15.613740 15.614079 15.613485 \n", + "\n", + " serendipity@1 serendipity@5 serendipity@10 \n", + "model \n", + "PopularModel 0.000002 0.000003 0.000002 \n", + "RandomModel 0.000006 0.000006 0.000006 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timeprecision@1recall@1precision@5recall@5precision@10recall@10NDCG@1NDCG@5NDCG@10MAP@1MAP@5MAP@10novelty@1novelty@5novelty@10serendipity@1serendipity@5serendipity@10
model
PopularModel1.8650710.0764320.0427200.0524020.1374130.0339030.1734920.0764320.0579320.0430840.0427200.0782950.0841092.3770553.0669793.7133900.0000020.0000030.000002
RandomModel0.0000460.0001800.0000660.0001700.0003180.0001660.0006010.0001800.0001700.0001670.0000660.0001480.00018515.61374015.61407915.6134850.0000060.0000060.000006
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 75 + } + ] + }, + { + "cell_type": "code", + "source": [ + "def visualized(model, dataset, users, features, items):\n", + "\n", + " if model.is_fitted == False:\n", + " raise Exception('model is not fitted')\n", + "\n", + " recommendations = model.recommend(users, dataset, 3, True)\n", + " interactions_data = dataset.interactions.df\n", + " history = interactions_data[interactions_data.user_id.isin(users)]\n", + "\n", + " for user in users:\n", + "\n", + " history_for_user = history[history.user_id.isin([user])]\n", + " hist = items.join(history_for_user.set_index('item_id'), on='item_id')[[\"user_id\"] + features]\n", + " hist_for_id = hist.loc[hist['user_id'].isin([user])]\n", + " print(f\"Пользователь с id {user} уже посмотрел:\")\n", + " print(hist_for_id)\n", + " print('')\n", + "\n", + " recommendations_for_id = model.recommend(users, dataset, 3, True)\n", + " rec = pd.merge(recommendations_for_id, items)[[\"user_id\"] + features]\n", + " rec_for_id = rec.loc[rec['user_id'].isin([user])]\n", + "\n", + " print(f\"Пользователю с id {user} можно порекомендовать:\")\n", + " print(rec_for_id)\n", + " print('')\n", + " print('')" + ], + "metadata": { + "id": "GccJWs1aQUmN" + }, + "execution_count": 92, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "model = RandomModel(random_state=32)\n", + "dataset = Dataset.construct(interactions)\n", + "model.fit(dataset)\n", + "users = np.array([666262, 672861, 955527])" + ], + "metadata": { + "id": "v0F4yRc_SB53" + }, + "execution_count": 93, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "visualized(model, dataset, users, [\"title\"], items)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HxekR0z_SFEZ", + "outputId": "10680310-1264-4874-faed-24f33258f54c" + }, + "execution_count": 94, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Пользователь с id 666262 уже посмотрел:\n", + " user_id title\n", + "11230 666262.0 Дом ночных призраков\n", + "\n", + "Пользователю с id 666262 можно порекомендовать:\n", + " user_id title\n", + "0 666262 Возвращение Будулая\n", + "1 666262 Новые приключения Аладдина (жестовым языком)\n", + "2 666262 Пропавшая грамота\n", + "\n", + "\n", + "Пользователь с id 672861 уже посмотрел:\n", + " user_id title\n", + "11182 672861.0 В ритме сердца\n", + "13578 672861.0 Медвежонок Винни и его друзья\n", + "\n", + "Пользователю с id 672861 можно порекомендовать:\n", + " user_id title\n", + "3 672861 Женщина в беде 3\n", + "4 672861 Гордость и предубеждение\n", + "5 672861 Болванчики\n", + "\n", + "\n", + "Пользователь с id 955527 уже посмотрел:\n", + " user_id title\n", + "8909 955527.0 Признание 5\n", + "\n", + "Пользователю с id 955527 можно порекомендовать:\n", + " user_id title\n", + "6 955527 Солдат Джейн\n", + "7 955527 Спеши любить\n", + "8 955527 Комната (жестовым языком)\n", + "\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "39Ce1wmoTNpZ" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file From 693a41d4165d0dcab5bdb36fb2b34a6f33e7e42d Mon Sep 17 00:00:00 2001 From: Yulia Date: Wed, 22 Nov 2023 17:03:19 +0300 Subject: [PATCH 2/2] Sorry for this pr --- Recsys_02.ipynb | 2244 ----------------------------------------------- 1 file changed, 2244 deletions(-) delete mode 100644 Recsys_02.ipynb diff --git a/Recsys_02.ipynb b/Recsys_02.ipynb deleted file mode 100644 index 42017d50..00000000 --- a/Recsys_02.ipynb +++ /dev/null @@ -1,2244 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "gpuType": "T4" - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU", - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "1154553e8da249f7ac4bc6c56708fedd": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_835e20f177ec4ffaa7e646c01524b50c", - "IPY_MODEL_0b17791df6ac4e2db361ccbbc094f192", - "IPY_MODEL_2cb58f1d01ce49d9af409da139c95797" - ], - "layout": "IPY_MODEL_8790ad7e4d5b4a9e9bd5c8db96cfa82c" - } - }, - "835e20f177ec4ffaa7e646c01524b50c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b4fa8c0b246a41f1a10ce80e2c75a365", - "placeholder": "​", - "style": "IPY_MODEL_614c47cccc1b47c4b9a37f2b5bad9a70", - "value": "kion dataset download: 100%" - } - }, - "0b17791df6ac4e2db361ccbbc094f192": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ce5223c6eb1e46c08b8d8416d8e8cc2a", - "max": 78795295, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_210f85b0cdc94aa1a582928a50cab553", - "value": 78795295 - } - }, - "2cb58f1d01ce49d9af409da139c95797": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d7f119e0465c413ca0711dca01ec3bb2", - "placeholder": "​", - "style": "IPY_MODEL_0f2fdbe723d24691addc6ece6f02f2d0", - "value": " 78.8M/78.8M [00:20<00:00, 262MiB/s]" - } - }, - "8790ad7e4d5b4a9e9bd5c8db96cfa82c": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b4fa8c0b246a41f1a10ce80e2c75a365": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "614c47cccc1b47c4b9a37f2b5bad9a70": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ce5223c6eb1e46c08b8d8416d8e8cc2a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "210f85b0cdc94aa1a582928a50cab553": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "d7f119e0465c413ca0711dca01ec3bb2": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0f2fdbe723d24691addc6ece6f02f2d0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } - } - }, - "cells": [ - { - "cell_type": "code", - "source": [ - "!pip -q install rectools" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "OX06xQ2FWS43", - "outputId": "99f57678-6269-4f48-9f5c-afe3daa0acc7" - }, - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.0/99.0 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.9/8.9 MB\u001b[0m \u001b[31m77.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h" - ] - } - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "p7NMna_cVQxG" - }, - "outputs": [], - "source": [ - "import time\n", - "import logging\n", - "import requests\n", - "import pprint\n", - "from copy import deepcopy, copy\n", - "from typing import Dict, List, Tuple, Union, Callable, Any\n", - "import numpy as np\n", - "import pandas as pd\n", - "from tqdm.auto import tqdm\n", - "from IPython.display import display" - ] - }, - { - "cell_type": "code", - "source": [ - "import rectools\n", - "from rectools import Columns\n", - "from rectools.dataset import Interactions, Dataset, DenseFeatures\n", - "from rectools.model_selection import Splitter, TimeRangeSplitter\n", - "from rectools.models.base import ModelBase\n", - "from rectools.models import RandomModel, PopularModel\n", - "from rectools.metrics.base import MetricAtK\n", - "from rectools.metrics import (\n", - " Precision,\n", - " Recall,\n", - " MAP,\n", - " NDCG,\n", - " Serendipity,\n", - " MeanInvUserFreq,\n", - " IntraListDiversity,\n", - " PairwiseHammingDistanceCalculator,\n", - " calc_metrics,\n", - ")" - ], - "metadata": { - "id": "BaVJV2euWq83" - }, - "execution_count": 9, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "url = 'https://github.com/irsafilo/KION_DATASET/raw/f69775be31fa5779907cf0a92ddedb70037fb5ae/data_original.zip'" - ], - "metadata": { - "id": "2GDmmYBB_5Az" - }, - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "req = requests.get(url, stream=True)\n", - "\n", - "with open('kion.zip', 'wb') as fd:\n", - " total_size_in_bytes = int(req.headers.get('Content-Length', 0))\n", - " progress_bar = tqdm(desc='kion dataset download', total=total_size_in_bytes, unit='iB', unit_scale=True)\n", - " for chunk in req.iter_content(chunk_size=2 ** 20):\n", - " progress_bar.update(len(chunk))\n", - " fd.write(chunk)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 49, - "referenced_widgets": [ - "1154553e8da249f7ac4bc6c56708fedd", - "835e20f177ec4ffaa7e646c01524b50c", - "0b17791df6ac4e2db361ccbbc094f192", - "2cb58f1d01ce49d9af409da139c95797", - "8790ad7e4d5b4a9e9bd5c8db96cfa82c", - "b4fa8c0b246a41f1a10ce80e2c75a365", - "614c47cccc1b47c4b9a37f2b5bad9a70", - "ce5223c6eb1e46c08b8d8416d8e8cc2a", - "210f85b0cdc94aa1a582928a50cab553", - "d7f119e0465c413ca0711dca01ec3bb2", - "0f2fdbe723d24691addc6ece6f02f2d0" - ] - }, - "id": "okHHWMWG_7-J", - "outputId": "f783fffc-56f5-4041-aa92-822a984f50ac" - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "kion dataset download: 0%| | 0.00/78.8M [00:00\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iditem_iddatetimeweightwatched_pct
017654995062021-05-11425072.0
169931716592021-05-298317100.0
265668371072021-05-09100.0
386461376382021-07-0514483100.0
496486895062021-04-306725100.0
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - " \n" - ] - }, - "metadata": {}, - "execution_count": 14 - } - ] - }, - { - "cell_type": "code", - "source": [ - "users.head(5)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - }, - "id": "mvOFvir3Assy", - "outputId": "1a187c3a-56bf-40f2-f7f3-6cb1151ed5f9" - }, - "execution_count": 13, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " user_id age income sex kids_flg\n", - "0 973171 age_25_34 income_60_90 М 1\n", - "1 962099 age_18_24 income_20_40 М 0\n", - "2 1047345 age_45_54 income_40_60 Ж 0\n", - "3 721985 age_45_54 income_20_40 Ж 0\n", - "4 704055 age_35_44 income_60_90 Ж 0" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_idageincomesexkids_flg
0973171age_25_34income_60_90М1
1962099age_18_24income_20_40М0
21047345age_45_54income_40_60Ж0
3721985age_45_54income_20_40Ж0
4704055age_35_44income_60_90Ж0
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" - ] - }, - "metadata": {}, - "execution_count": 13 - } - ] - }, - { - "cell_type": "code", - "source": [ - "items.head(5)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 608 - }, - "id": "bCnZZ6bKA0yz", - "outputId": "6211c1a5-c754-4fda-c7a5-04d5d00f3977" - }, - "execution_count": 15, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " item_id content_type title title_orig release_year \\\n", - "0 10711 film Поговори с ней Hable con ella 2002.0 \n", - "1 2508 film Голые перцы Search Party 2014.0 \n", - "2 10716 film Тактическая сила Tactical Force 2011.0 \n", - "3 7868 film 45 лет 45 Years 2015.0 \n", - "4 16268 film Все решает мгновение NaN 1978.0 \n", - "\n", - " genres countries for_kids \\\n", - "0 драмы, зарубежные, детективы, мелодрамы Испания NaN \n", - "1 зарубежные, приключения, комедии США NaN \n", - "2 криминал, зарубежные, триллеры, боевики, комедии Канада NaN \n", - "3 драмы, зарубежные, мелодрамы Великобритания NaN \n", - "4 драмы, спорт, советские, мелодрамы СССР NaN \n", - "\n", - " age_rating studios directors \\\n", - "0 16.0 NaN Педро Альмодовар \n", - "1 16.0 NaN Скот Армстронг \n", - "2 16.0 NaN Адам П. Калтраро \n", - "3 16.0 NaN Эндрю Хэй \n", - "4 12.0 Ленфильм Виктор Садовский \n", - "\n", - " actors \\\n", - "0 Адольфо Фернандес, Ана Фернандес, Дарио Гранди... \n", - "1 Адам Палли, Брайан Хаски, Дж.Б. Смув, Джейсон ... \n", - "2 Адриан Холмс, Даррен Шалави, Джерри Вассерман,... \n", - "3 Александра Риддлстон-Барретт, Джеральдин Джейм... \n", - "4 Александр Абдулов, Александр Демьяненко, Алекс... \n", - "\n", - " description \\\n", - "0 Мелодрама легендарного Педро Альмодовара «Пого... \n", - "1 Уморительная современная комедия на популярную... \n", - "2 Профессиональный рестлер Стив Остин («Все или ... \n", - "3 Шарлотта Рэмплинг, Том Кортни, Джеральдин Джей... \n", - "4 Расчетливая чаровница из советского кинохита «... \n", - "\n", - " keywords \n", - "0 Поговори, ней, 2002, Испания, друзья, любовь, ... \n", - "1 Голые, перцы, 2014, США, друзья, свадьбы, прео... \n", - "2 Тактическая, сила, 2011, Канада, бандиты, ганг... \n", - "3 45, лет, 2015, Великобритания, брак, жизнь, лю... \n", - "4 Все, решает, мгновение, 1978, СССР, сильные, ж... " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
item_idcontent_typetitletitle_origrelease_yeargenrescountriesfor_kidsage_ratingstudiosdirectorsactorsdescriptionkeywords
010711filmПоговори с нейHable con ella2002.0драмы, зарубежные, детективы, мелодрамыИспанияNaN16.0NaNПедро АльмодоварАдольфо Фернандес, Ана Фернандес, Дарио Гранди...Мелодрама легендарного Педро Альмодовара «Пого...Поговори, ней, 2002, Испания, друзья, любовь, ...
12508filmГолые перцыSearch Party2014.0зарубежные, приключения, комедииСШАNaN16.0NaNСкот АрмстронгАдам Палли, Брайан Хаски, Дж.Б. Смув, Джейсон ...Уморительная современная комедия на популярную...Голые, перцы, 2014, США, друзья, свадьбы, прео...
210716filmТактическая силаTactical Force2011.0криминал, зарубежные, триллеры, боевики, комедииКанадаNaN16.0NaNАдам П. КалтрароАдриан Холмс, Даррен Шалави, Джерри Вассерман,...Профессиональный рестлер Стив Остин («Все или ...Тактическая, сила, 2011, Канада, бандиты, ганг...
37868film45 лет45 Years2015.0драмы, зарубежные, мелодрамыВеликобританияNaN16.0NaNЭндрю ХэйАлександра Риддлстон-Барретт, Джеральдин Джейм...Шарлотта Рэмплинг, Том Кортни, Джеральдин Джей...45, лет, 2015, Великобритания, брак, жизнь, лю...
416268filmВсе решает мгновениеNaN1978.0драмы, спорт, советские, мелодрамыСССРNaN12.0ЛенфильмВиктор СадовскийАлександр Абдулов, Александр Демьяненко, Алекс...Расчетливая чаровница из советского кинохита «...Все, решает, мгновение, 1978, СССР, сильные, ж...
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" - ] - }, - "metadata": {}, - "execution_count": 15 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "Возьмем метрики: классификационные (Precision, Recall), метрики ранжирования (MAP, NDCG), специальные (Serendipity, Novelty)" - ], - "metadata": { - "id": "eUUF2kRlDL1k" - } - }, - { - "cell_type": "code", - "source": [ - "metrics = {\n", - " \"precision\": Precision,\n", - " \"recall\": Recall,\n", - " \"MAP\": MAP,\n", - " \"NDCG\": NDCG,\n", - " \"serendipity\": Serendipity,\n", - " \"novelty\": MeanInvUserFreq\n", - " }\n", - "\n", - "k = [1, 5, 10]" - ], - "metadata": { - "id": "s028iLIjA4BL" - }, - "execution_count": 68, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "def compute_metrics(metrics, K):\n", - " metrics_at_k = {}\n", - "\n", - " for key, value in metrics.items():\n", - " kwargs = {}\n", - " if isinstance(value, tuple):\n", - " kwargs.update(**value[1])\n", - " value = value[0]\n", - "\n", - " metrics_at_k.update({f\"{key}@{k}\": value(k=k, **kwargs) for k in K})\n", - "\n", - " return metrics_at_k" - ], - "metadata": { - "id": "-X8nK9YuGeAy" - }, - "execution_count": 69, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "metrics_at_k = compute_metrics(metrics, k)" - ], - "metadata": { - "id": "hnHzx2RDGgil" - }, - "execution_count": 70, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "metrics_at_k" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "BCNAO707P_3J", - "outputId": "f092c764-b467-408b-b997-75687a80344c" - }, - "execution_count": 71, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "{'precision@1': Precision(k=1),\n", - " 'precision@5': Precision(k=5),\n", - " 'precision@10': Precision(k=10),\n", - " 'recall@1': Recall(k=1),\n", - " 'recall@5': Recall(k=5),\n", - " 'recall@10': Recall(k=10),\n", - " 'MAP@1': MAP(k=1, divide_by_k=False),\n", - " 'MAP@5': MAP(k=5, divide_by_k=False),\n", - " 'MAP@10': MAP(k=10, divide_by_k=False),\n", - " 'NDCG@1': NDCG(k=1, log_base=2),\n", - " 'NDCG@5': NDCG(k=5, log_base=2),\n", - " 'NDCG@10': NDCG(k=10, log_base=2),\n", - " 'serendipity@1': Serendipity(k=1),\n", - " 'serendipity@5': Serendipity(k=5),\n", - " 'serendipity@10': Serendipity(k=10),\n", - " 'novelty@1': MeanInvUserFreq(k=1),\n", - " 'novelty@5': MeanInvUserFreq(k=5),\n", - " 'novelty@10': MeanInvUserFreq(k=10)}" - ] - }, - "metadata": {}, - "execution_count": 71 - } - ] - }, - { - "cell_type": "code", - "source": [ - "def computation_metrics(models, metrics, splitter, interactions, k):\n", - "\n", - " results = []\n", - " interactions_data = Interactions(interactions)\n", - " fold_iterator = splitter.split(interactions_data, collect_fold_stats=True)\n", - "\n", - " for train_ids, test_ids, fold_info in fold_iterator:\n", - "\n", - " df_train = interactions_data.df.iloc[train_ids]\n", - " df_test = interactions_data.df.iloc[test_ids][Columns.UserItem]\n", - "\n", - " dataset = Dataset.construct(df_train)\n", - " test_users = np.unique(df_test[Columns.User])\n", - "\n", - " catalog = df_train[Columns.Item].unique()\n", - "\n", - " for key, value in models.items():\n", - "\n", - " model = deepcopy(value)\n", - "\n", - " start = time.time()\n", - " model.fit(dataset)\n", - " stop = time.time()\n", - "\n", - " reco = model.recommend(users=test_users, dataset=dataset, k=k, filter_viewed=True)\n", - "\n", - " metric_values = calc_metrics(\n", - " metrics,\n", - " reco=reco,\n", - " interactions=df_test,\n", - " prev_interactions=df_train,\n", - " catalog=catalog,\n", - " )\n", - "\n", - " times = {\"model\": key, \"time\": stop - start}\n", - " times.update(metric_values)\n", - " results.append(times)\n", - "\n", - " results_df = pd.DataFrame(results)\n", - "\n", - " return results_df" - ], - "metadata": { - "id": "ykxIWnNcJcQ-" - }, - "execution_count": 72, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "Сплиттер: 3 фолда для кросс-валидации по неделе, исключение холодных юзеров и айтемов и просмотренных айтемов" - ], - "metadata": { - "id": "zx8USwNZU85w" - } - }, - { - "cell_type": "code", - "source": [ - "splitter = TimeRangeSplitter(test_size=\"7D\", n_splits=3, filter_cold_users=True, filter_cold_items=True, filter_already_seen=True)" - ], - "metadata": { - "id": "VQDSnhLfIv8-" - }, - "execution_count": 73, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "models = {\"RandomModel\": RandomModel(), \"PopularModel\": PopularModel()}\n", - "\n", - "results = computation_metrics(models, metrics_at_k, splitter, interactions, 10)" - ], - "metadata": { - "id": "ev1D_mS8HjdI" - }, - "execution_count": 74, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "pd.DataFrame(results).groupby(\"model\").mean()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 164 - }, - "id": "vCJr-rNPHxtj", - "outputId": "e65feb8b-f340-474a-9157-f00fa2baf35f" - }, - "execution_count": 75, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " time precision@1 recall@1 precision@5 recall@5 \\\n", - "model \n", - "PopularModel 1.865071 0.076432 0.042720 0.052402 0.137413 \n", - "RandomModel 0.000046 0.000180 0.000066 0.000170 0.000318 \n", - "\n", - " precision@10 recall@10 NDCG@1 NDCG@5 NDCG@10 MAP@1 \\\n", - "model \n", - "PopularModel 0.033903 0.173492 0.076432 0.057932 0.043084 0.042720 \n", - "RandomModel 0.000166 0.000601 0.000180 0.000170 0.000167 0.000066 \n", - "\n", - " MAP@5 MAP@10 novelty@1 novelty@5 novelty@10 \\\n", - "model \n", - "PopularModel 0.078295 0.084109 2.377055 3.066979 3.713390 \n", - "RandomModel 0.000148 0.000185 15.613740 15.614079 15.613485 \n", - "\n", - " serendipity@1 serendipity@5 serendipity@10 \n", - "model \n", - "PopularModel 0.000002 0.000003 0.000002 \n", - "RandomModel 0.000006 0.000006 0.000006 " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
timeprecision@1recall@1precision@5recall@5precision@10recall@10NDCG@1NDCG@5NDCG@10MAP@1MAP@5MAP@10novelty@1novelty@5novelty@10serendipity@1serendipity@5serendipity@10
model
PopularModel1.8650710.0764320.0427200.0524020.1374130.0339030.1734920.0764320.0579320.0430840.0427200.0782950.0841092.3770553.0669793.7133900.0000020.0000030.000002
RandomModel0.0000460.0001800.0000660.0001700.0003180.0001660.0006010.0001800.0001700.0001670.0000660.0001480.00018515.61374015.61407915.6134850.0000060.0000060.000006
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" - ] - }, - "metadata": {}, - "execution_count": 75 - } - ] - }, - { - "cell_type": "code", - "source": [ - "def visualized(model, dataset, users, features, items):\n", - "\n", - " if model.is_fitted == False:\n", - " raise Exception('model is not fitted')\n", - "\n", - " recommendations = model.recommend(users, dataset, 3, True)\n", - " interactions_data = dataset.interactions.df\n", - " history = interactions_data[interactions_data.user_id.isin(users)]\n", - "\n", - " for user in users:\n", - "\n", - " history_for_user = history[history.user_id.isin([user])]\n", - " hist = items.join(history_for_user.set_index('item_id'), on='item_id')[[\"user_id\"] + features]\n", - " hist_for_id = hist.loc[hist['user_id'].isin([user])]\n", - " print(f\"Пользователь с id {user} уже посмотрел:\")\n", - " print(hist_for_id)\n", - " print('')\n", - "\n", - " recommendations_for_id = model.recommend(users, dataset, 3, True)\n", - " rec = pd.merge(recommendations_for_id, items)[[\"user_id\"] + features]\n", - " rec_for_id = rec.loc[rec['user_id'].isin([user])]\n", - "\n", - " print(f\"Пользователю с id {user} можно порекомендовать:\")\n", - " print(rec_for_id)\n", - " print('')\n", - " print('')" - ], - "metadata": { - "id": "GccJWs1aQUmN" - }, - "execution_count": 92, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "model = RandomModel(random_state=32)\n", - "dataset = Dataset.construct(interactions)\n", - "model.fit(dataset)\n", - "users = np.array([666262, 672861, 955527])" - ], - "metadata": { - "id": "v0F4yRc_SB53" - }, - "execution_count": 93, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "visualized(model, dataset, users, [\"title\"], items)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "HxekR0z_SFEZ", - "outputId": "10680310-1264-4874-faed-24f33258f54c" - }, - "execution_count": 94, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Пользователь с id 666262 уже посмотрел:\n", - " user_id title\n", - "11230 666262.0 Дом ночных призраков\n", - "\n", - "Пользователю с id 666262 можно порекомендовать:\n", - " user_id title\n", - "0 666262 Возвращение Будулая\n", - "1 666262 Новые приключения Аладдина (жестовым языком)\n", - "2 666262 Пропавшая грамота\n", - "\n", - "\n", - "Пользователь с id 672861 уже посмотрел:\n", - " user_id title\n", - "11182 672861.0 В ритме сердца\n", - "13578 672861.0 Медвежонок Винни и его друзья\n", - "\n", - "Пользователю с id 672861 можно порекомендовать:\n", - " user_id title\n", - "3 672861 Женщина в беде 3\n", - "4 672861 Гордость и предубеждение\n", - "5 672861 Болванчики\n", - "\n", - "\n", - "Пользователь с id 955527 уже посмотрел:\n", - " user_id title\n", - "8909 955527.0 Признание 5\n", - "\n", - "Пользователю с id 955527 можно порекомендовать:\n", - " user_id title\n", - "6 955527 Солдат Джейн\n", - "7 955527 Спеши любить\n", - "8 955527 Комната (жестовым языком)\n", - "\n", - "\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "39Ce1wmoTNpZ" - }, - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file