Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions Scripts/ReTrainModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,30 @@
from Utils.util import *
import numpy as np
import os
from Utils.TreeVis import *


folder_list = ["Degradation0","Degradation1","Degradation5","Degradation10"]

RetrainModel = "Retrained"
RetrainModel = "Retrained0123"
RetrainFolder = "Retrained"


model = XGBoostClassifierModel(folder_list[0])
model.load_data("Datasets/"+folder_list[0]+"/"+folder_list[0]+"_Train/")
model.n_estimators = 15
model.train()
folder_list.pop(0)

for i,folder in enumerate(folder_list):
model.load_data("Datasets/"+folder+"/"+folder+"_Train/")
model.retrain()
model = XGBoostClassifierModel("Degredation_0")
#filepath = "/home/ryanm124/ml_hackathon/TrackQuality_package/Projects/Degradation0_Test/Models/Degradation0/"
filepath = "/home/ryanm124/ml_hackathon/TrackQuality_package/Scripts/Projects/Retrained/Models/"
model.load_model(filepath,"Retrained012Trim_XGB")
booster = model.model.get_booster()
treedict = convert(booster)
model.model.set_params(n_estimators=(60 - treedict["n_trees"]))
model.n_estimators = treedict["n_trees"]
datapath = "/home/ryanm124/ml_hackathon/TrackQuality_package/Datasets/Degradation3_Train/"
model.load_data(datapath)
model.retrain()

model.save_model("Projects/"+RetrainFolder+"/Models/",RetrainModel+"_XGB")
model.load_model("Projects/"+RetrainFolder+"/Models/",RetrainModel+"_XGB")

model.load_data("Datasets/Degradation10/Degradation10_Test/")
model.load_data("/home/ryanm124/ml_hackathon/TrackQuality_package/Datasets/Degradation4_Test/")
plot_model(model,"Projects/"+RetrainFolder+"/")
model.test()
model.evaluate(plot=True,save_dir="Projects/"+RetrainFolder+"/Plots/")
73 changes: 37 additions & 36 deletions Scripts/TrainModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,47 +4,48 @@
from Utils.util import *
import numpy as np
import os
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--Train", help = "Train Folder")
parser.add_argument("--Test", help = "Test Folder")
parser.add_argument("--Model", help = "Model file")
args = parser.parse_args()

setmatplotlib()

folder_list = ["Degradation0","Degradation1","Degradation5","Degradation10"]
name_list = ["Degradation0","Degradation1","Degradation5","Degradation10"]
# [folder_list.append("Degradation"+str(i)) for i in range(1,10)]
# [name_list.append("Degradation "+str(i)) for i in range(1,10)]
train_folder = args.Train
test_folder = args.Test
model_folder = args.Model
name = train_folder.split("_")[0]

plot_types = ["ROC","FPR","TPR","score"]

for i,folder in enumerate(folder_list):
os.system("mkdir Projects/"+folder)
os.system("mkdir Projects/"+folder+"/Plots")
os.system("mkdir Projects/"+folder+"/FW")
os.system("mkdir Projects/"+folder+"/Models")

# cutmodel = Binned_CutClassifierModel("Cut")
# cutmodel.load_data("Datasets/"+folder+"/"+folder+"_Test/")
# cutmodel.test()
# cutmodel.evaluate(plot=False,binned=False)
# cutmodel.full_save("Projects/"+folder+"/Models/"+folder+"_Cut/","Cut")
# cutmodel.full_load("Projects/"+folder+"/Models/"+folder+"_Cut/","Cut")

model = XGBoostClassifierModel(name_list[i])
model.load_data("Datasets/"+folder+"/"+folder+"_Train/")

model = XGBoostClassifierModel(name)
if args.Train is not None:
if not os.path.exists('Projects/'+train_folder):
os.system("mkdir Projects/"+train_folder)
os.system("mkdir Projects/"+train_folder+"/Plots")
os.system("mkdir Projects/"+train_folder+"/FW")
os.system("mkdir Projects/"+train_folder+"/Models")

model.load_data("Datasets/"+train_folder+"/")
model.train()
model.save_model("Projects/"+folder+"/Models/",folder+"_XGB")
model.load_model("Projects/"+folder+"/Models/",folder+"_XGB")

model.load_data("Datasets/"+folder+"/"+folder+"_Test/")
model.save_model("Projects/"+train_folder+"/Models/",train_folder+"_XGB")
model.load_model("Projects/"+train_folder+"/Models/",train_folder+"_XGB")
elif args.Model is not None:
model.load_model("Projects/"+model_folder+"/Models/",model_folder+"_XGB")

if args.Test is not None:
if not os.path.exists('Projects/'+test_folder):
os.system("mkdir Projects/"+test_folder)
os.system("mkdir Projects/"+test_folder+"/Plots")
os.system("mkdir Projects/"+test_folder+"/FW")
os.system("mkdir Projects/"+test_folder+"/Models")
model.load_data("Datasets/"+test_folder+"/")
model.test()
model.evaluate(plot=True,save_dir="Projects/"+folder+"/Plots/")
model.full_save("Projects/"+folder+"/Models/"+folder+"/",folder+"_XGB")
model.full_load("Projects/"+folder+"/Models/"+folder+"/",folder+"_XGB")

plot_model(model,"Projects/"+folder+"/")

# precisions = ['ap_fixed<12,6>','ap_fixed<11,6>','ap_fixed<11,5>','ap_fixed<10,6>','ap_fixed<10,5>','ap_fixed<10,4>']
precisions = ['ap_fixed<12,6>','ap_fixed<10,5>']
model.evaluate(plot=True,save_dir="Projects/"+test_folder+"/Plots/")
model.full_save("Projects/"+test_folder+"/Models/"+test_folder+"/",test_folder+"_XGB")
model.full_load("Projects/"+test_folder+"/Models/"+test_folder+"/",test_folder+"_XGB")
plot_model(model,"Projects/"+test_folder+"/")

synth_model(model,sim=True,hdl=True,hls=True,cpp=True,onnx=True,python=True,
test_events=10000,
precisions=precisions,
save_dir="Projects/"+folder+"/")
49 changes: 49 additions & 0 deletions Scripts/dumpTree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from Models.GBDTTrackQualityModel import XGBoostClassifierModel
from Datasets.Dataset import *
from Utils.util import *
from Utils.TreeVis import *
import numpy as np
from scipy.special import expit
import os
import matplotlib.animation as animation
import matplotlib.pyplot as plt
model = XGBoostClassifierModel("Degredation_0")
model.load_model("/home/ryanm124/ml_hackathon/TrackQuality_package/Scripts/Projects/Retrained/Models/","Retrained0123_XGB")
treedict = convert(model.model.get_booster())
gains = []
treeNum = []
cutValues = [2000,1000,400,0]
gainSum = np.zeros_like(cutValues)
percentGain = []
j=0
for i in range(treedict['n_trees']):
print("Tree #: ",i)
print(treedict['trees'][i][0]['value'][0])
gains.append(treedict['trees'][i][0]['value'][0])
treeNum.append(i)
if(treedict['trees'][i][0]['value'][0]<cutValues[j]):
j += 1
gainSum[j] += treedict['trees'][i][0]['value'][0]

total = 0
for sum in gainSum:
if(total!=0):
percent = (sum / total) * 100
percentGain.append(percent)
total += sum

fig, ax = plt.subplots()
ax.plot(treeNum, gains,marker="o")
i = 0
for value in cutValues:
ax.plot([0,treeNum[-1]],[value,value],linestyle='dashed')
if(i!=0):
plt.text(0,value+10,str(gainSum[i])+" {:4.2f}".format(percentGain[i-1])+"%",fontsize=12)
i += 1

ax.set(xlabel='Tree #', ylabel='Gain',
title='Gain vs Tree')
ax.set_ylim([0, 3000])
ax.grid()

fig.savefig("Gain.png")
24 changes: 24 additions & 0 deletions Scripts/maskTrees.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from Models.GBDTTrackQualityModel import XGBoostClassifierModel
from Datasets.Dataset import *
from Utils.util import *
from Utils.TreeVis import *
import numpy as np
from scipy.special import expit
import os
import matplotlib.animation as animation
model = XGBoostClassifierModel("Degredation_0")
filepath = "/home/ryanm124/ml_hackathon/TrackQuality_package/Scripts/Projects/Retrained/Models/"
model.load_model(filepath,"Retrained012_XGB")
cut_value = 400.0

booster = model.model.get_booster()
treedict = convert(booster)
trees = [_ for _ in booster]
for i in range(treedict['n_trees']):
# print("Tree #: ",i)
# print(treedict['trees'][i][0]['value'][0])
if treedict['trees'][i][0]['value'][0]<cut_value:
booster = booster[:i]
break
model.model._Booster = booster
model.save_model(filepath,"Retrained012Trim_XGB")
80 changes: 80 additions & 0 deletions Scripts/plotScores.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from Models.GBDTTrackQualityModel import XGBoostClassifierModel, TFDFClassifierModel
from Models.CutTrackQualityModel import CutClassifierModel
from Datasets.Dataset import *
from Utils.util import *
from Utils.TreeVis import *
import numpy as np
from scipy.special import expit
import os
import matplotlib.animation as animation
import matplotlib.pyplot as plt
model = XGBoostClassifierModel("Degredation_0")
filepath = "/home/ryanm124/ml_hackathon/TrackQuality_package/Projects/Degradation0_Test/Models/Degradation0/"
model.load_model(filepath,"Degradation0_XGB")
model.load_data("/home/ryanm124/ml_hackathon/TrackQuality_package/Datasets/Degradation1_Train/")

treedict = convert(model.model.get_booster())
n_trees = treedict['n_trees']
x_tree = range(n_trees)
max_predict_fake = 0.0
min_predict_fake = 10.0
max_predict_real = 0.0
min_predict_real = 10.0
worst_fake_scores= np.zeros([n_trees])
worst_real_scores= np.zeros([n_trees])
best_fake_scores= np.zeros([n_trees])
best_real_scores= np.zeros([n_trees])
n_events = len(model.DataSet.X_train)
for event in range(10000):
if(event%1000==0):
print("event:",event)
temp_output_array = np.zeros([n_trees])
accumulation = 0
for i in range(treedict['n_trees']):
value = evaluateTree(model.DataSet.X_train[event:event+1],treedict['trees'][i][0],model.training_features)
temp_output_array[i] = value
accumulation += value


prediction = expit(accumulation)
if(model.DataSet.y_train[event:event+1].values[0][0]==1 and prediction<min_predict_real):
min_predict_real = prediction
worst_real_scores = temp_output_array
if(model.DataSet.y_train[event:event+1].values[0][0]==1 and prediction>max_predict_real):
max_predict_real = prediction
best_real_scores = temp_output_array
if(model.DataSet.y_train[event:event+1].values[0][0]==0 and prediction>max_predict_fake):
max_predict_fake = prediction
worst_fake_scores = temp_output_array
if(model.DataSet.y_train[event:event+1].values[0][0]==0 and prediction<min_predict_fake):
min_predict_fake = prediction
best_fake_scores = temp_output_array

fig, ax = plt.subplots()
ax.plot(x_tree, worst_real_scores,marker="o")
ax.set(xlabel='Tree #', ylabel='Output',
title='Output vs Tree')
ax.grid()
fig.savefig("WorstReal.png")
plt.cla()
ax.plot(x_tree, worst_fake_scores,marker="o")
ax.set(xlabel='Tree #', ylabel='Output',
title='Output vs Tree')
ax.grid()
fig.savefig("WorstFake.png")
plt.cla()
ax.plot(x_tree, best_real_scores,marker="o")
ax.set(xlabel='Tree #', ylabel='Output',
title='Output vs Tree')
ax.grid()
fig.savefig("BestReal.png")
plt.cla()
ax.plot(x_tree, best_fake_scores,marker="o")
ax.set(xlabel='Tree #', ylabel='Output',
title='Output vs Tree')
ax.grid()
fig.savefig("BestFake.png")
print("max_predict_fake",max_predict_fake)
print("min_predict_real",min_predict_real)
print("min_predict_fake",min_predict_fake)
print("max_predict_real",max_predict_real)
87 changes: 87 additions & 0 deletions Scripts/plotScoresVsDeg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from Models.GBDTTrackQualityModel import XGBoostClassifierModel, TFDFClassifierModel
from Models.CutTrackQualityModel import CutClassifierModel
from Datasets.Dataset import *
from Utils.util import *
from Utils.TreeVis import *
import numpy as np
from scipy.special import expit
import os
import matplotlib.animation as animation
import matplotlib.pyplot as plt

def calcAvgDiff(scores):
diffSum= 0.0
for i in range(len(scores)-1):
diffSum += abs(scores[i]-scores[i+1])
return (diffSum/(len(scores)-1))


model = XGBoostClassifierModel("Degredation_0")
filepath = "/home/ryanm124/ml_hackathon/TrackQuality_package/Projects/Degradation0_Test/Models/Degradation0/"
model.load_model(filepath,"Degradation0_XGB")
folder_list = ["Degradation1","Degradation2","Degradation3","Degradation4","Degradation5","Degradation6","Degradation7","Degradation8","Degradation9","Degradation10"]
treedict = convert(model.model.get_booster())
n_trees = treedict['n_trees']
x_tree = range(n_trees)
avgDiffMean_worstReal = []
avgDiffMean_worstFake = []
avgDiffMean_bestReal = []
avgDiffMean_bestFake = []

for folder in folder_list:
model.load_data("/home/ryanm124/ml_hackathon/TrackQuality_package/Datasets/"+folder+"_Train/")
avgDiff_worstReal = []
avgDiff_worstFake = []
avgDiff_bestReal = []
avgDiff_bestFake = []
for i in range(10):
max_predict_fake = 0.0
min_predict_fake = 10.0
max_predict_real = 0.0
min_predict_real = 10.0
worst_fake_scores= np.zeros([n_trees])
worst_real_scores= np.zeros([n_trees])
best_fake_scores= np.zeros([n_trees])
best_real_scores= np.zeros([n_trees])
n_events = len(model.DataSet.X_train)
for event in range(i*1000,(i+1)*1000):
temp_output_array = np.zeros([n_trees])
accumulation = 0
for i in range(treedict['n_trees']):
value = evaluateTree(model.DataSet.X_train[event:event+1],treedict['trees'][i][0],model.training_features)
temp_output_array[i] = value
accumulation += value

prediction = expit(accumulation)
if(model.DataSet.y_train[event:event+1].values[0][0]==1 and prediction<min_predict_real):
min_predict_real = prediction
worst_real_scores = temp_output_array
if(model.DataSet.y_train[event:event+1].values[0][0]==1 and prediction>max_predict_real):
max_predict_real = prediction
best_real_scores = temp_output_array
if(model.DataSet.y_train[event:event+1].values[0][0]==0 and prediction>max_predict_fake):
max_predict_fake = prediction
worst_fake_scores = temp_output_array
if(model.DataSet.y_train[event:event+1].values[0][0]==0 and prediction<min_predict_fake):
min_predict_fake = prediction
best_fake_scores = temp_output_array
avgDiff_worstReal.append(calcAvgDiff(worst_real_scores[40:]))
avgDiff_bestReal.append(calcAvgDiff(best_real_scores[40:]))
avgDiff_worstFake.append(calcAvgDiff(worst_fake_scores[40:]))
avgDiff_bestFake.append(calcAvgDiff(best_fake_scores[40:]))
avgDiffMean_worstReal.append(sum(avgDiff_worstReal)/len(avgDiff_worstReal))
avgDiffMean_worstFake.append(sum(avgDiff_worstFake)/len(avgDiff_worstFake))
avgDiffMean_bestReal.append(sum(avgDiff_bestReal)/len(avgDiff_bestReal))
avgDiffMean_bestFake.append(sum(avgDiff_bestFake)/len(avgDiff_bestFake))
fig, ax = plt.subplots()
x_samples = range(1,11)
ax.plot(x_samples, avgDiffMean_worstReal,marker="o",label="Real Track Lowest Score")
ax.plot(x_samples, avgDiffMean_bestReal,marker="o",label="Real Track Highest Score")
ax.plot(x_samples, avgDiffMean_worstFake,marker="o",label="Fake Track Highest Score")
ax.plot(x_samples, avgDiffMean_bestFake,marker="o",label="Fake Track Lowest Score")
ax.set(xlabel='Degredation %', ylabel='Avg Diff',
title='Average Difference vs Degredation')
ax.grid()
ax.legend()
fig.savefig("AverageDiff.png")

25 changes: 25 additions & 0 deletions Scripts/printTree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from Models.GBDTTrackQualityModel import XGBoostClassifierModel,TFDFClassifierModel
from Models.CutTrackQualityModel import CutClassifierModel
from Datasets.Dataset import *
from Utils.util import *
import numpy as np
import os
from Utils.TreeVis import *



RetrainModel = "Retrained0123"
RetrainFolder = "Retrained"


model = XGBoostClassifierModel("Degredation_0")
#filepath = "/home/ryanm124/ml_hackathon/TrackQuality_package/Projects/Degradation0_Test/Models/Degradation0/"
filepath = "/home/ryanm124/ml_hackathon/TrackQuality_package/Scripts/Projects/Retrained/Models/"
model.load_model(filepath,"Retrained012Trim_XGB")
booster = model.model.get_booster()
treedict = convert(booster)
datapath = "/home/ryanm124/ml_hackathon/TrackQuality_package/Datasets/Degradation3_Train/"
model.load_data(datapath)
model.model.set_params(n_estimators=treedict["n_trees"])
print(model.model.get_params())
print("trees to train",60-treedict["n_trees"])
Loading