v1.0 release of seqdecisionlib

luizamfsantos · Feb 2, 2019 · 95825ca · 95825ca
1 parent 53cef4e
commit 95825ca
Show file tree

Hide file tree

Showing 56 changed files with 8,569 additions and 1 deletion.
diff --git a/AdaptiveMarketPlanning/AdaptiveMarketPlanningDriverScript.py b/AdaptiveMarketPlanning/AdaptiveMarketPlanningDriverScript.py
@@ -0,0 +1,99 @@
+"""
+Adaptive Market Planning Driver Script
+
+"""
+
+from collections import namedtuple
+from AdaptiveMarketPlanningModel import AdaptiveMarketPlanningModel
+from AdaptiveMarketPlanningPolicy import AdaptiveMarketPlanningPolicy
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+if __name__ == "__main__":
+	# this is an example of creating a model and running a simulation for a certain trial size
+
+	# define state variables
+	state_names = ['order_quantity', 'counter']
+	init_state = {'order_quantity': 0, 'counter': 0}
+	decision_names = ['step_size']
+
+	# read in variables from excel file
+	file = 'Base parameters.xlsx'
+	raw_data = pd.ExcelFile(file)
+	data = raw_data.parse('parameters')
+	cost = data.iat[0, 2]
+	trial_size = np.rint(data.iat[1, 2]).astype(int)
+	price = data.iat[2, 2]
+	theta_step = data.iat[3, 2]
+	T = data.iat[4, 2]
+	reward_type = data.iat[5, 2]
+
+	# initialize model and store ordered quantities in an array
+	M = AdaptiveMarketPlanningModel(state_names, decision_names, init_state, T,reward_type, price, cost)
+	P = AdaptiveMarketPlanningPolicy(M, theta_step)
+
+	rewards_per_iteration = []
+	learning_list_per_iteration = []
+	for ite in list(range(trial_size)):
+		print("Starting iteration ", ite)
+		reward,learning_list = P.run_policy()
+		M.learning_list=[]
+		#print(learning_list)
+		rewards_per_iteration.append(reward)
+		learning_list_per_iteration.append(learning_list)
+		print("Ending iteration ", ite," Reward ",reward)
+
+
+	nElem = np.arange(1,trial_size+1)
+
+	rewards_per_iteration = np.array(rewards_per_iteration)
+	rewards_per_iteration_sum = rewards_per_iteration.cumsum()
+	rewards_per_iteration_cum_avg = rewards_per_iteration_sum/nElem
+
+	if (reward_type=="Cumulative"):
+		rewards_per_iteration_cum_avg = rewards_per_iteration_cum_avg/T
+		rewards_per_iteration = rewards_per_iteration/T
+
+	optimal_order_quantity = -np.log(cost/price) * 100
+	print("Optimal order_quantity for price {} and cost {} is {}".format(price,cost,optimal_order_quantity))
+	print("Reward type: {}, theta_step: {}, T: {} - Average reward over {} iteratios is: {}".format(reward_type,theta_step,T,trial_size,rewards_per_iteration_cum_avg[-1]))
+
+	ite = np.random.randint(0,trial_size)
+	order_quantity = learning_list_per_iteration[ite]
+	print("Order quantity for iteration {}".format(ite))
+	print(order_quantity)
+
+	#Ploting the reward
+	fig1, axsubs = plt.subplots(1,2,sharex=True,sharey=True)
+	fig1.suptitle("Reward type: {}, theta_step: {}, T: {}".format(reward_type,theta_step,T) )
+
+	axsubs[0].plot(nElem, rewards_per_iteration_cum_avg, 'g')
+	axsubs[0].set_title('Cum_average reward')
+
+	axsubs[1].plot(nElem, rewards_per_iteration, 'g')
+	axsubs[1].set_title('Reward per iteration')
+	#Create a big subplot
+	ax = fig1.add_subplot(111, frameon=False)
+	# hide tick and tick label of the big axes
+	plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
+	ax.set_ylabel('USD', labelpad=0) # Use argument `labelpad` to move label downwards.
+	ax.set_xlabel('Iterations', labelpad=10)
+	plt.show()
+
+
+
+
+	# ploting the analytical sol
+	plt.xlabel("Time")
+	plt.ylabel("Order quantity")
+	plt.title("Analytical vs learned ordered quantity - (iteration {})".format(ite))
+	time = np.arange(0, len(order_quantity))
+	plt.plot(time, time * 0 - np.log(cost/price) * 100, label = "Analytical solution")
+	plt.plot(time, order_quantity, label = "Kesten's Rule for theta_step {}".format(theta_step))
+	plt.legend()
+	plt.show()
+
+
+
diff --git a/AdaptiveMarketPlanning/AdaptiveMarketPlanningModel.py b/AdaptiveMarketPlanning/AdaptiveMarketPlanningModel.py
@@ -0,0 +1,118 @@
+"""
+Adaptive Market Planning Model class
+
+Adapted from code by Donghun Lee (c) 2018
+
+"""
+
+from collections import namedtuple
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+class AdaptiveMarketPlanningModel():
+	"""
+	Base class for model
+	"""
+
+	def __init__(self, state_names, x_names, s_0, T,reward_type,price = 1.0, cost = 1.0, exog_info_fn=None, transition_fn=None, objective_fn=None, seed=20180613):
+		"""
+		Initializes the model
+
+		:param state_names: list(str) - state variable dimension names
+		:param x_names: list(str) - decision variable dimension names
+        :param s_0: dict - need to contain at least information to populate initial state using s_names
+		:param price: float - price p
+		:param cost: float - cost c
+		:param exog_info_fn: function - calculates relevant exogenous information
+		:param transition_fn: function - takes in decision variables and exogenous information to describe how the state
+			   evolves
+		:param objective_fn: function - calculates contribution at time t
+        :param seed: int - seed for random number generator
+        """
+
+		self.init_args = {seed: seed}
+		self.prng = np.random.RandomState(seed)
+		self.init_state = s_0
+		self.T = T
+		self.reward_type = reward_type
+		self.state_names = state_names
+		self.x_names = x_names
+		self.State = namedtuple('State', state_names)
+		self.state = self.build_state(s_0)
+		self.Decision = namedtuple('Decision', x_names)
+		self.obj = 0.0
+		self.past_derivative = 0.0
+		self.cost = cost
+		self.price = price
+		self.t = 0
+		self.learning_list=[]
+
+
+
+	# this function gives a state containing all the state information needed
+	def build_state(self, info):
+		return self.State(*[info[k] for k in self.state_names])
+
+	# this function gives a decision 
+	def build_decision(self, info):
+		return self.Decision(*[info[k] for k in self.x_names])
+
+	# this function gives the exogenous information that is dependent on a random process
+	# computes the f_hat, chnage in the forecast over the horizon
+	def exog_info_fn(self, decision):
+		# return new demand based on a given distribution
+		return {"demand": self.prng.exponential(100)}
+
+	# this function takes in the decision and exogenous information to return
+	# new state
+	def transition_fn(self, decision, exog_info):
+
+		self.learning_list.append(self.state.order_quantity)
+
+		# compute derivative
+		derivative = self.price - self.cost if self.state.order_quantity < exog_info['demand'] else - self.cost
+		# update order quantity
+		new_order_quantity = max(0, self.state.order_quantity + decision.step_size * derivative)
+		print(' step ', decision.step_size)
+		print(' derivative ', derivative)
+		# count number of times derivative changes sign
+		new_counter = self.state.counter + 1 if self.past_derivative * derivative < 0 else self.state.counter
+		self.past_derivative = derivative
+
+
+
+		return {"order_quantity": new_order_quantity, "counter": new_counter}
+
+	# this function calculates how much money we make
+	def objective_fn(self, decision, exog_info):
+		self.order_quantity=self.state.order_quantity
+		obj_part = self.price * min(self.order_quantity, exog_info['demand']) - self.cost * self.state.order_quantity
+		return obj_part
+
+	# this method steps the process forward by one time increment by updating the sum of the contributions, the
+	# exogenous information and the state variable
+	def step(self, decision):
+		self.t_update()
+		exog_info = self.exog_info_fn(decision)
+		onestep_contribution = self.objective_fn(decision, exog_info)
+
+		print("t {}, Price {}, Demand {}, order_quantity {}, contribution {}".format(self.t,self.price,exog_info['demand'],self.order_quantity,onestep_contribution))
+
+		#Check if cumulative or terminal reward
+		if (self.reward_type == 'Cumulative'):
+			self.obj += onestep_contribution
+		else:
+			if (self.t == self.T):
+				self.obj = onestep_contribution
+
+
+		transition_info = self.transition_fn(decision, exog_info)
+		self.state = self.build_state(transition_info)
+
+
+
+	# Update method for time counter
+	def t_update(self):
+		self.t += 1
+		return self.t
diff --git a/AdaptiveMarketPlanning/AdaptiveMarketPlanningPolicy.py b/AdaptiveMarketPlanning/AdaptiveMarketPlanningPolicy.py
@@ -0,0 +1,51 @@
+"""
+Adaptive Market Planning Policy class
+
+"""
+
+from collections import namedtuple
+
+import numpy as np
+from copy import copy
+from AdaptiveMarketPlanningModel import AdaptiveMarketPlanningModel
+
+class AdaptiveMarketPlanningPolicy():
+	"""
+	Base class for policy
+	"""
+
+	def __init__(self, AdaptiveMarketPlanningModel, theta_step):
+		"""
+		Initializes the model
+
+		:param AdaptiveMarketPlanningModel: AdaptiveMarketPlanningModel - model to construct decision for
+		:param theta_step: float - theta step variable
+        """
+
+		self.M = AdaptiveMarketPlanningModel
+		self.theta_step = theta_step
+
+	# returns decision based on harmonic step size policy
+	def harmonic_rule(self):
+		return self.M.build_decision({'step_size': self.theta_step / (self.theta_step + self.M.t - 1)})
+
+	# returns decision based on Kesten's rule policy
+	def kesten_rule(self):
+		return self.M.build_decision({'step_size': self.theta_step / (self.theta_step + self.M.state.counter - 1)})
+
+	# returns decision based on a constant rule policy
+	def constant_rule(self):
+		return self.M.build_decision({'step_size': self.theta_step})
+
+	# returns decision based on a constant rule policy
+	def run_policy(self):
+		model_copy = copy(self.M)
+
+		for t in range(model_copy.T):	 
+			model_copy.step(AdaptiveMarketPlanningPolicy(model_copy, self.theta_step).kesten_rule())
+
+
+
+		return (model_copy.obj,model_copy.learning_list.copy())
+
+
diff --git a/AdaptiveMarketPlanning/Base parameters.xlsx b/AdaptiveMarketPlanning/Base parameters.xlsx
diff --git a/AdaptiveMarketPlanning/ParametricModel parameters.xlsx b/AdaptiveMarketPlanning/ParametricModel parameters.xlsx
diff --git a/AdaptiveMarketPlanning/ParametricModel.py b/AdaptiveMarketPlanning/ParametricModel.py
@@ -0,0 +1,82 @@
+"""
+Adaptive Market Planning Model for variable price subclass
+
+"""
+
+from collections import namedtuple
+from AdaptiveMarketPlanningModel import AdaptiveMarketPlanningModel
+
+import numpy as np
+
+class ParametricModel(AdaptiveMarketPlanningModel):
+	"""
+	Subclass for Adaptive Market Planning
+	"""
+
+	def __init__(self, state_names, x_names, s_0, T, reward_type, cost = 1.0, price_low = 1.0, price_high = 10.0, exog_info_fn=None, transition_fn=None, objective_fn=None, seed=20180613):
+		"""
+		Initializes the model
+
+		See Adaptive Market Planning Model for more details
+        """
+		super().__init__(state_names, x_names, s_0, T, reward_type,cost = cost, exog_info_fn=exog_info_fn, transition_fn=transition_fn, objective_fn=objective_fn, seed=seed)
+		self.past_derivative = np.array([0, 0, 0])
+		self.low = price_low
+		self.high = price_high
+		self.PRICE_PROCESS  ='RW'
+
+	# returns order quantity for a given price and theta vector
+	def order_quantity_fn(self, price, theta):
+		return max(0,theta[0] + theta[1] * price + theta[2] * price ** (-2))
+
+	# returns derivative for a given price and theta vector
+	def derivative_fn(self, price, theta):
+		return np.array([1, price, price ** (-2)])
+
+	# this function takes in the decision and exogenous information to return
+	# new state
+	def transition_fn(self, decision, exog_info):
+
+		self.learning_list.append(self.state.theta)
+		print(' theta ',self.state.theta)
+
+		# compute derivative and update theta
+		derivative = np.array([0, 0, 0])
+		if self.order_quantity_fn(self.state.price, self.state.theta) < exog_info['demand']:
+			derivative = (self.state.price - self.cost) * self.derivative_fn(self.state.price, self.state.theta)
+		else:
+			derivative = (- self.cost) * self.derivative_fn(self.state.price, self.state.theta)
+
+		new_theta = self.state.theta + decision.step_size * derivative
+
+		new_counter = self.state.counter + 1 if np.dot(self.past_derivative, derivative) < 0 else self.state.counter
+		print(' step ', decision.step_size)
+		print(' derivative ', derivative)
+		print('new theta ',new_theta)
+
+
+		self.past_derivative = derivative
+
+		# generate random price
+		if (self.PRICE_PROCESS  == 'RW'):
+			coin = self.prng.uniform()
+			delta = 0
+			if coin < .2:
+				delta = -1
+			elif coin >.8:
+				delta = 1
+
+			new_price = min(self.high,max(self.low,self.state.price + delta))
+		else:
+			new_price = self.prng.uniform(self.low, self.high)
+
+
+
+		return {"counter": new_counter, "price": new_price, "theta": new_theta}
+
+	# this function calculates how much money we make
+	def objective_fn(self, decision, exog_info):
+		self.price = self.state.price
+		self.order_quantity=self.order_quantity_fn(self.state.price, self.state.theta)
+		obj_part = self.state.price * min(self.order_quantity, exog_info['demand']) - self.cost * self.order_quantity
+		return obj_part