-
Notifications
You must be signed in to change notification settings - Fork 14
/
estimate.py
231 lines (199 loc) · 9.83 KB
/
estimate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# This code is from the paper:
# Berbeglia, G., Garassino, A., & Vulcano, G. (2018). A comparative empirical study of discrete choice models in retail operations. Available at SSRN 3136816.
# Electronic companion of the paper:
# "A Comparative Empirical Study of Discrete Choice Models in Retail Operations" by
# - Gerardo Berbeglia: Melbourne Business School, University of Melbourne, [email protected]
# - Agustin Garassino: School of Sciences, Universidad de Buenos Aires, and School of Business, Universidad Torcuato di Tella, Buenos Aires, Argentina, [email protected]
# - Gustavo Vulcano: School of Business, Universidad Torcuato di Tella, and CONICET, Buenos Aires, Argentina, [email protected]
# This code was written in Python and tested on Ubunutu Linux 17.04 operating system.
# It is designed with focus on its reusability and maintainability, taking advantage of several object oriented programming (OOP) features of Python.
# The implementation of the different algorithms is intended to be self-explanatory in terms of the names of the objects and functions used along the way.
import json
import os
import sys
from python_choice_models.estimation.market_explore.ranked_list import MIPMarketExplorer
from python_choice_models.estimation.expectation_maximization.markov_chain import MarkovChainExpectationMaximizationEstimator
from python_choice_models.estimation.expectation_maximization.ranked_list import RankedListExpectationMaximizationEstimator
from python_choice_models.estimation.expectation_maximization.latent_class import LatentClassExpectationMaximizationEstimator
from python_choice_models.estimation.maximum_likelihood.random_choice import RandomChoiceModelMaximumLikelihoodEstimator
from python_choice_models.estimation.maximum_likelihood import MaximumLikelihoodEstimator
from python_choice_models.estimation.maximum_likelihood.latent_class import LatentClassFrankWolfeEstimator
from python_choice_models.estimation.maximum_likelihood.ranked_list import RankedListMaximumLikelihoodEstimator
from python_choice_models.settings import Settings
from python_choice_models.models import Model, MixedLogitModel, MultinomialLogitModel, ExponomialModel, LatentClassModel, MarkovChainModel, MarkovChainRank2Model, NestedLogitModel, RandomChoiceModel, RankedListModel
from python_choice_models.transactions.base import Transaction
GLOBAL_TIME_LIMIT = 1800
NORMAL_SETTINGS = {
'linear_solver_partial_time_limit': None,
'non_linear_solver_partial_time_limit': None,
'solver_total_time_limit': 1800.0,
}
RANKED_LIST_SETTINGS = {
'linear_solver_partial_time_limit': 300,
'non_linear_solver_partial_time_limit': 300,
'solver_total_time_limit': 1800.0,
}
LATENT_CLASS_SETTINGS = {
'linear_solver_partial_time_limit': None,
'non_linear_solver_partial_time_limit': 300,
'solver_total_time_limit': 1800.0,
}
estimators = {
'max': {
'name': 'Standard Maximum Likelihood',
'models': {
'exp': {
'estimator': MaximumLikelihoodEstimator(),
'model_class': lambda products: ExponomialModel.simple_deterministic(products),
'name': 'Exponomial',
'settings': NORMAL_SETTINGS
},
'mkv': {
'estimator': MaximumLikelihoodEstimator(),
'model_class': lambda products: MarkovChainModel.simple_deterministic(products),
'name': 'Markov Chain',
'settings': NORMAL_SETTINGS
},
'mkv2': {
'estimator': MaximumLikelihoodEstimator(),
'model_class': lambda products: MarkovChainRank2Model.simple_deterministic(products),
'name': 'Markov Chain Rank 2',
'settings': NORMAL_SETTINGS
},
'mnl': {
'estimator': MaximumLikelihoodEstimator(),
'model_class': lambda products: MultinomialLogitModel.simple_deterministic(products),
'name': 'Multinomial Logit',
'settings': NORMAL_SETTINGS
},
'nl': {
'estimator': MaximumLikelihoodEstimator(),
'model_class': lambda products: NestedLogitModel.simple_deterministic_ordered_nests(products, [1, len(products) // 2, len(products) - (len(products) // 2) - 1]),
'name': 'Nested Logit',
'settings': NORMAL_SETTINGS
},
'mx': {
'estimator': MaximumLikelihoodEstimator(),
'model_class': lambda products: MixedLogitModel.simple_deterministic(products),
'name': 'Mixed Logit',
'settings': NORMAL_SETTINGS
},
'rnd': {
'estimator': RandomChoiceModelMaximumLikelihoodEstimator(),
'model_class': lambda products: RandomChoiceModel.simple_deterministic(products),
'name': 'Random Choice',
'settings': NORMAL_SETTINGS
},
'rl': {
'estimator': RankedListMaximumLikelihoodEstimator.with_this(MIPMarketExplorer()),
'model_class': lambda products: RankedListModel.simple_deterministic_independent(products),
'name': 'Ranked List',
'settings': RANKED_LIST_SETTINGS
},
'lc': {
'estimator': MaximumLikelihoodEstimator(),
'model_class': lambda products: LatentClassModel.simple_deterministic(products, 5),
'name': 'Latent Class',
'settings': RANKED_LIST_SETTINGS
}
}
},
'em': {
'name': 'Expectation Maximization',
'models': {
'mkv': {
'estimator': MarkovChainExpectationMaximizationEstimator(),
'model_class': lambda products: MarkovChainModel.simple_deterministic(products),
'name': 'Markov Chain',
'settings': NORMAL_SETTINGS
},
'rl': {
'estimator': RankedListExpectationMaximizationEstimator.with_this(MIPMarketExplorer()),
'model_class': lambda products: RankedListModel.simple_deterministic_independent(products),
'name': 'Ranked List',
'settings': RANKED_LIST_SETTINGS
},
'lc': {
'estimator': LatentClassExpectationMaximizationEstimator(),
'model_class': lambda products: LatentClassModel.simple_deterministic(products, 5),
'name': 'Latent Class',
'settings': RANKED_LIST_SETTINGS
}
}
},
'fw': {
'name': 'Frank Wolfe/Conditional Gradient',
'models': {
'lc': {
'estimator': LatentClassFrankWolfeEstimator(),
'model_class': lambda products: LatentClassModel.simple_deterministic(products, 1),
'name': 'Latent Class',
'settings': NORMAL_SETTINGS
},
}
}
}
def read_synthetic_instance(file_name):
with open(file_name, 'r') as f:
data = json.loads(f.read())
ground_truth = Model.from_data(data['ground_model'])
transactions = Transaction.from_json(data['transactions']['in_sample_transactions'])
return ground_truth, transactions
def print_usage():
print('Use:')
print(' - Run just one model: python estimate.py <estimation_method_code> <model_code> <input_file> <output_file>')
print(' - Example: python estimate.py max mnl examples/instances/30_periods_1_instance.json mnl_estimation.json')
print('')
print('Estimation methods codes:')
for estimation_method, method_info in estimators.items():
print(' - (%s) %s' % (estimation_method, method_info['name']))
print('')
print('Model codes and compatibilities with estimation methods:')
for estimation_method, method_info in sorted(estimators.items(), key=lambda x: x[0]):
print(' - (%s) %s' % (estimation_method, method_info['name']))
for model, model_info in sorted(method_info['models'].items(), key=lambda x: x[0]):
print(' - (%s) %s' % (model, model_info['name']))
print('')
def run(estimation_method, model, input_file, output_file):
print(' * Reading input file...')
ground_truth, transactions = read_synthetic_instance(input_file)
products = ground_truth.products
print(' - Amount of transactions: %s' % len(transactions))
print(' - Amount of products: %s' % len(products))
print(' * Retrieving estimation method...')
model_info = estimators[estimation_method]['models'][model]
print(' * Retrieving settings...')
Settings.new(
linear_solver_partial_time_limit=model_info['settings']['linear_solver_partial_time_limit'],
non_linear_solver_partial_time_limit=model_info['settings']['non_linear_solver_partial_time_limit'],
solver_total_time_limit=model_info['settings']['solver_total_time_limit'],
)
print(' * Creating initial solution...')
model = model_info['model_class'](products)
print(' * Starting estimation...')
if hasattr(model_info['estimator'], 'estimate_with_market_discovery'):
result = model_info['estimator'].estimate_with_market_discovery(model, transactions)
else:
result = model_info['estimator'].estimate(model, transactions)
rmse = ground_truth.soft_rmse_for(result)
print(' * Computing result...')
print('')
print('Soft RMSE: %s' % rmse)
print('')
result.save(output_file)
return rmse
def main():
try:
if len(sys.argv) == 5:
output_file = sys.argv[4]
input_file = sys.argv[3]
model = sys.argv[2]
estimation_method = sys.argv[1]
run(estimation_method, model, input_file, output_file)
else:
print('Wrong number of arguments!\n')
print_usage()
except KeyError as error:
print('Estimator not found!\n')
print_usage()
main()