-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathActivity1.py
183 lines (143 loc) · 3.98 KB
/
Activity1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 24 17:47:43 2018
@author: sanath
"""
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
def dist(x,y):
res=np.sqrt((x*x)+(y*y))
return(res)
"""
def rad(lx,ly):
res=np.sqrt((lx*lx)+(ly*ly))
return(res)
"""
def mode(arr) :
try:
res=max(arr, key = arr.count)
return(res)
except:
return None
#reading data
dfs=pd.read_excel("Activity_1_Data.xlsx",sheetname="Sheet1")
x=np.array(dfs.iloc[0:,1])
#print(len(x))
y=np.array(dfs.iloc[0:,2])
#print(len(y))
print("Initial Plot of Customer Distribution")
plt.figure(1)
plt.scatter(x,y)
plt.show()
print("___Activity on Data Visualisation___")
meanx=np.mean(x)
print("Mean of x data",meanx)
meany=np.mean(y)
print("Mean of Y data ",meany)
print("_________#####_____________")
medianx=np.median(x)
print("Median of X data",medianx)
mediany=np.median(y)
print("Median of Y data",mediany)
print("_________#####_____________")
mx=list(x)
modex=mode(mx)
print("Mode of X data",modex)
my=list(y)
modey=mode(my)
print("Mode of X data",modey)
print("_________#####_____________")
stdx=np.std(x)
print("Standard deviation of X is",stdx)
print("Variance of X is ",stdx*stdx)
stdy=np.std(y)
print("Standard deviation of Y is ",stdy)
print("Variance of Y is ",stdy*stdy)
print("_________#####_____________")
print("\n")
#since much of the data lies in the centre i.e 150/170 points lie at the centre therefore
#It would be better if we consider Median rather than mean ,since we have 20 outliers here
#Here in our case these outliers are far from the centre ,so we will consider Median
# Here if we consider even mean also it doesnot effect much ,since we have more points concentrated near centre
#Here iam considering median as the feature
baseradius=dist(medianx,mediany)
print("Our New Base Location should be X= ",medianx," Y=",mediany,",to gain max customers")
print("\n")
temp=dist((medianx),(mediany))
print("Distance from initial location to New base location is",temp)
#Retaining Maximum Customers within redius of 2 km
custx=[]
custy=[]
clist=[]
#myarr=[]
print("\n")
l=len(x)
ncust=np.arange(1,l+1)
for i in range(l):
xa=x[i]
ya=y[i]
temp=dist((medianx-xa),(mediany-ya))
if(temp<2):
clist.append(i+1)
custx.append(xa)
custy.append(ya)
tcust=list(set(ncust)-set(clist))
tcust=sorted(tcust)
tcust=np.array(tcust)
tcustlen=len(tcust)
clist=np.array(clist)
custx=np.array(custx)
custy=np.array(custy)
#myarr=np.array(myarr)
print("Retained Customers plot")
plt.figure(2)
plt.scatter(custx,custy)
plt.show()
tcustx=[]
tcusty=[]
for i in range(tcustlen):
tcustx.append(x[tcust[i]-1])
tcusty.append(y[tcust[i]-1])
print("transferred Customers Plot")
plt.figure(3)
plt.scatter(tcustx,tcusty)
plt.show()
df=pd.DataFrame(clist)
try:
df.to_excel("Activity_1_Retain.xlsx", sheet_name='Retained Customers',index=False)
print("Activity_1_Retain.xlsx file is successfully saved in ",os.getcwd(),"directory")
except:
print("Could not save file")
df=pd.DataFrame(tcust)
try:
df.to_excel("Activity_1_Transfer.xlsx", sheet_name='Transferred Customers',index=False)
print("Activity_1_Transfer.xlsx file is successfully saved in ",os.getcwd(),"directory")
except:
print("Could not save file")
"""
#Manual removal of Outliers
ya=[]
yb=[]
#data Visualisation
#Here Iam removing the outliers Manually by visualisation from graph
for i in range(l):
if(x[i]<4 and x[i] >0 and y[i] <4 and y[i]>0 ) :
ya.append(x[i])
yb.append(y[i])
# If needed
print("After removal of outliers")
meanx1=np.mean(ya)
print("Mean of X after removal of outliers",meanx)
meany1=np.mean(yb)
print("Mean of Y after removal of outliers",meany)
print("_________#####_____________")
medianx1=np.median(ya)
print("Median of X after removal of outliers",medianx)
mediany1=np.median(yb)
print("Median of Y after removal of outliers",mediany)
plt.figure(3)
plt.scatter(ya,yb)
plt.show()
"""