forked from qrfaction/toxic_competition
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
executable file
·64 lines (53 loc) · 1.61 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import pandas as pd
import numpy as np
PATH='data/'
def cal_mean():
from sklearn.cross_validation import KFold
a=pd.read_csv('data/labels.csv')
# a[list_classes]=np.exp(np.log(a[list_classes]) -0.5)
kf = KFold(a.shape[0], n_folds=6, shuffle=False)
for train_index,valid_index in kf:
tr_set = a.iloc[train_index]
valid_set = a.iloc[valid_index]
print(tr_set.describe())
print(valid_set.describe())
def get_corr():
usecol = [
'toxicity_score_level',
'quoting_attack_level',
'recipient_attack_level',
'third_party_attack_level',
'other_attack_level',
'toxicity_level',
'attack_level',
]
dataset = pd.read_csv(PATH+'clean_train.csv',usecols=usecol)
print(dataset.corr())
def bagging():
list_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
# PATH = 'results/'
file_weight = {
# 'GRU1.csv.gz':6,
# 'GRU2.csv.gz':6,
# 'GRU3.csv.gz':6,
# 'frGRU1.csv.gz':4,
# 'gloveGRU1.csv.gz':4,
# 'kernel.csv.gz':26,
# 'kernel2.csv.gz':10,
# 'focalloss19.csv.gz':10,
'base1.gz':1,
'base2.gz':1,
}
output = pd.read_csv('data/sample_submission.csv')
output[list_classes] = 0
norm = 0
for file,weight in file_weight.items():
result = pd.read_csv(file)
output[list_classes] += weight*result[list_classes]
norm +=weight
output[list_classes] /= norm
output.to_csv('output.csv.gz',index=False,compression='gzip')
cal_mean()
# post_deal()
# bagging()
# get_corr()