From a867fbe93d1ec82d0d794d81e4edb0cfb999210c Mon Sep 17 00:00:00 2001 From: Mattia Rigotti <4016834+matrig@users.noreply.github.com> Date: Mon, 22 Feb 2021 22:03:32 +0100 Subject: [PATCH 1/3] Add Financial Dataset from Riquelme et al. 2018, Deep Bayesian Bandits Showdown paper --- genrl/utils/__init__.py | 1 + genrl/utils/data_bandits/__init__.py | 1 + genrl/utils/data_bandits/financial_bandit.py | 112 +++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 genrl/utils/data_bandits/financial_bandit.py diff --git a/genrl/utils/__init__.py b/genrl/utils/__init__.py index b7f4070d..5a029958 100644 --- a/genrl/utils/__init__.py +++ b/genrl/utils/__init__.py @@ -3,6 +3,7 @@ CensusDataBandit, CovertypeDataBandit, DataBasedBandit, + FinancialDataBandit, MagicDataBandit, MushroomDataBandit, StatlogDataBandit, diff --git a/genrl/utils/data_bandits/__init__.py b/genrl/utils/data_bandits/__init__.py index 8ee3757c..0f9b36a3 100644 --- a/genrl/utils/data_bandits/__init__.py +++ b/genrl/utils/data_bandits/__init__.py @@ -2,6 +2,7 @@ from genrl.utils.data_bandits.base import DataBasedBandit from genrl.utils.data_bandits.census_bandit import CensusDataBandit from genrl.utils.data_bandits.covertype_bandit import CovertypeDataBandit +from genrl.utils.data_bandits.financial_bandit import FinancialDataBandit from genrl.utils.data_bandits.magic_bandit import MagicDataBandit from genrl.utils.data_bandits.mushroom_bandit import MushroomDataBandit from genrl.utils.data_bandits.statlog_bandit import StatlogDataBandit diff --git a/genrl/utils/data_bandits/financial_bandit.py b/genrl/utils/data_bandits/financial_bandit.py new file mode 100644 index 00000000..2ac8b3f4 --- /dev/null +++ b/genrl/utils/data_bandits/financial_bandit.py @@ -0,0 +1,112 @@ +from pathlib import Path +from typing import Tuple + +import numpy as np +import pandas as pd +import torch + +from genrl.utils.data_bandits.base import DataBasedBandit +from genrl.utils.data_bandits.utils import download_data + +URL = "https://storage.googleapis.com/bandits_datasets/raw_stock_contexts" + + +class FinancialDataBandit(DataBasedBandit): + """A contextual bandit based on Financial Stock data. + + Source: + https://github.com/tensorflow/models/tree/archive/research/deep_contextual_bandits + + Args: + path (str, optional): Path to the data. Defaults to "./data/Financial/". + download (bool, optional): Whether to download the data. Defaults to False. + force_download (bool, optional): Whether to force download even if file exists. + Defaults to False. + url (Union[str, None], optional): URL to download data from. Defaults to None + which implies use of source URL. + device (str): Device to use for tensor operations. + "cpu" for cpu or "cuda" for cuda. Defaults to "cpu". + + Attributes: + n_actions (int): Number of actions available. + context_dim (int): The length of context vector. + len (int): The number of examples (context, reward pairs) in the dataset. + device (torch.device): Device to use for tensor operations. + + Raises: + FileNotFoundError: If file is not found at specified path. + """ + + def __init__(self, **kwargs): + super(FinancialDataBandit, self).__init__(kwargs.get("device", "cpu")) + + self.n_actions = 8 + + path = kwargs.get("path", "./data/Financial/") + download = kwargs.get("download", None) + force_download = kwargs.get("force_download", None) + url = kwargs.get("url", URL) + + if download: + fpath = download_data(path, url, force_download) + self.df = pd.read_csv( + fpath, header=None, skiprows=[0], sep=" ", dtype=np.float32 + ).dropna() + else: + fpath = Path(path).joinpath("raw_stock_contexts") + self.df = pd.read_csv( + fpath, header=None, skiprows=[0], sep=" ", dtype=np.float32 + ).dropna() + + self.context_dim = self.df.shape[1] + self.len = len(self.df) + + self._generate_rewards() + + def _generate_rewards(self): + # Vector with additive noise levels for each action + noise_stds = [0.01 * (i + 1) for i in range(self.n_actions)] + betas = np.random.uniform(-1, 1, (self.context_dim, self.n_actions)) + betas /= np.linalg.norm(betas, axis=0) + + mean_rewards = np.dot(self.df, betas) + noise = np.random.normal(scale=noise_stds, size=mean_rewards.shape) + + self.rewards = mean_rewards + noise + self.max_rewards = np.max(self.rewards, axis=1) + + def reset(self) -> torch.Tensor: + """Reset bandit by shuffling indices and get new context. + + Returns: + torch.Tensor: Current context selected by bandit. + """ + self._reset() + self.df = self.df.sample(frac=1).reset_index(drop=True) + self._generate_rewards() + return self._get_context() + + def _compute_reward(self, action: int) -> Tuple[int, int]: + """Compute the reward for a given action. + + Args: + action (int): The action to compute reward for. + + Returns: + Tuple[int, int]: Computed reward. + """ + r = self.rewards[self.idx, action] + max_r = self.max_rewards[self.idx] + return r, max_r + + def _get_context(self) -> torch.Tensor: + """Get the vector for current selected context. + + Returns: + torch.Tensor: Current context vector. + """ + return torch.tensor( + self.df.iloc[self.idx], + device=self.device, + dtype=torch.float, + ) From 135f3590f80cd60484d21d3973dac0277399c04d Mon Sep 17 00:00:00 2001 From: Mattia Rigotti <4016834+matrig@users.noreply.github.com> Date: Mon, 22 Feb 2021 22:11:29 +0100 Subject: [PATCH 2/3] Add Jester Dataset from Riquelme et al. 2018, Deep Bayesian Bandits Showdown paper (originally in reference Goldberg et al., 2001) --- genrl/utils/__init__.py | 1 + genrl/utils/data_bandits/__init__.py | 5 +- genrl/utils/data_bandits/jester_bandit.py | 101 ++++++++++++++++++++++ 3 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 genrl/utils/data_bandits/jester_bandit.py diff --git a/genrl/utils/__init__.py b/genrl/utils/__init__.py index 5a029958..7f027247 100644 --- a/genrl/utils/__init__.py +++ b/genrl/utils/__init__.py @@ -4,6 +4,7 @@ CovertypeDataBandit, DataBasedBandit, FinancialDataBandit, + JesterDataBandit, MagicDataBandit, MushroomDataBandit, StatlogDataBandit, diff --git a/genrl/utils/data_bandits/__init__.py b/genrl/utils/data_bandits/__init__.py index 0f9b36a3..9415598a 100644 --- a/genrl/utils/data_bandits/__init__.py +++ b/genrl/utils/data_bandits/__init__.py @@ -2,7 +2,10 @@ from genrl.utils.data_bandits.base import DataBasedBandit from genrl.utils.data_bandits.census_bandit import CensusDataBandit from genrl.utils.data_bandits.covertype_bandit import CovertypeDataBandit -from genrl.utils.data_bandits.financial_bandit import FinancialDataBandit +from genrl.utils.data_bandits.financial_bandit import ( + FinancialDataBandit, + JesterDataBandit, +) from genrl.utils.data_bandits.magic_bandit import MagicDataBandit from genrl.utils.data_bandits.mushroom_bandit import MushroomDataBandit from genrl.utils.data_bandits.statlog_bandit import StatlogDataBandit diff --git a/genrl/utils/data_bandits/jester_bandit.py b/genrl/utils/data_bandits/jester_bandit.py new file mode 100644 index 00000000..c90dde75 --- /dev/null +++ b/genrl/utils/data_bandits/jester_bandit.py @@ -0,0 +1,101 @@ +from pathlib import Path +from typing import Tuple + +import numpy as np +import torch + +from genrl.utils.data_bandits.base import DataBasedBandit +from genrl.utils.data_bandits.utils import download_data + +URL = ( + "https://storage.googleapis.com/bandits_datasets/jester_data_40jokes_19181users.npy" +) + + +class JesterDataBandit(DataBasedBandit): + """A contextual bandit based on the Jester Dataset of Goldberg et al., 2001. + + Source: + https://github.com/tensorflow/models/tree/archive/research/deep_contextual_bandits + + Args: + path (str, optional): Path to the data. Defaults to "./data/Jester/". + download (bool, optional): Whether to download the data. Defaults to False. + force_download (bool, optional): Whether to force download even if file exists. + Defaults to False. + url (Union[str, None], optional): URL to download data from. Defaults to None + which implies use of source URL. + device (str): Device to use for tensor operations. + "cpu" for cpu or "cuda" for cuda. Defaults to "cpu". + + Attributes: + n_actions (int): Number of actions available. + context_dim (int): The length of context vector. + len (int): The number of examples (context, reward pairs) in the dataset. + device (torch.device): Device to use for tensor operations. + + Raises: + FileNotFoundError: If file is not found at specified path. + """ + + def __init__(self, **kwargs): + super(JesterDataBandit, self).__init__(kwargs.get("device", "cpu")) + + self.n_actions = 8 + + path = kwargs.get("path", "./data/Jester/") + download = kwargs.get("download", None) + force_download = kwargs.get("force_download", None) + url = kwargs.get("url", URL) + + if download: + fpath = download_data(path, url, force_download) + else: + fpath = Path(path).joinpath("jester_data_40jokes_19181users.npy") + self.df = np.load(fpath) + + self.context_dim = self.df.shape[1] - self.n_actions + self.len = len(self.df) + + self.rewards = self.df[:, self.context_dim :] + self.max_rewards = np.max(self.rewards, axis=1) + + def reset(self) -> torch.Tensor: + """Reset bandit by shuffling indices and get new context. + + Returns: + torch.Tensor: Current context selected by bandit. + """ + self._reset() + self.df = self.df[:, np.random.permutation(self.df.shape[1])] + np.random.shuffle(self.df) + + self.rewards = self.df[:, self.context_dim :] + self.max_rewards = np.max(self.rewards, axis=1) + + return self._get_context() + + def _compute_reward(self, action: int) -> Tuple[int, int]: + """Compute the reward for a given action. + + Args: + action (int): The action to compute reward for. + + Returns: + Tuple[int, int]: Computed reward. + """ + r = self.rewards[self.idx, action] + max_r = self.max_rewards[self.idx] + return r, max_r + + def _get_context(self) -> torch.Tensor: + """Get the vector for current selected context. + + Returns: + torch.Tensor: Current context vector. + """ + return torch.tensor( + self.df[self.idx], + device=self.device, + dtype=torch.float, + ) From 519f70e94f12bed4838bd94d57152272dd3b8f51 Mon Sep 17 00:00:00 2001 From: Mattia Rigotti <4016834+matrig@users.noreply.github.com> Date: Tue, 23 Feb 2021 21:21:49 +0100 Subject: [PATCH 3/3] Fix small bugs in jester bandit --- genrl/utils/data_bandits/__init__.py | 6 ++---- genrl/utils/data_bandits/jester_bandit.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/genrl/utils/data_bandits/__init__.py b/genrl/utils/data_bandits/__init__.py index 9415598a..8c16758f 100644 --- a/genrl/utils/data_bandits/__init__.py +++ b/genrl/utils/data_bandits/__init__.py @@ -2,10 +2,8 @@ from genrl.utils.data_bandits.base import DataBasedBandit from genrl.utils.data_bandits.census_bandit import CensusDataBandit from genrl.utils.data_bandits.covertype_bandit import CovertypeDataBandit -from genrl.utils.data_bandits.financial_bandit import ( - FinancialDataBandit, - JesterDataBandit, -) +from genrl.utils.data_bandits.financial_bandit import FinancialDataBandit +from genrl.utils.data_bandits.jester_bandit import JesterDataBandit from genrl.utils.data_bandits.magic_bandit import MagicDataBandit from genrl.utils.data_bandits.mushroom_bandit import MushroomDataBandit from genrl.utils.data_bandits.statlog_bandit import StatlogDataBandit diff --git a/genrl/utils/data_bandits/jester_bandit.py b/genrl/utils/data_bandits/jester_bandit.py index c90dde75..fdff7d98 100644 --- a/genrl/utils/data_bandits/jester_bandit.py +++ b/genrl/utils/data_bandits/jester_bandit.py @@ -95,7 +95,7 @@ def _get_context(self) -> torch.Tensor: torch.Tensor: Current context vector. """ return torch.tensor( - self.df[self.idx], + self.df[self.idx, : self.context_dim], device=self.device, dtype=torch.float, )