-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathurbain_traffic_sao_paulo.py
114 lines (89 loc) · 3.65 KB
/
urbain_traffic_sao_paulo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Sep 24 19:49:36 2022
@author: tarcisio
"""
import matplotlib.pyplot as plt
#open the data
traffic = pd.read_csv('/Users/tarcisio/Documents/Projects_python/behavior_urban_Traffic/Behavior of the urban traffic of the city of Sao Paulo in Brazil/Behavior of the urban traffic of the city of Sao Paulo in Brazil.csv', sep = ';')
#quick analyze
print(traffic.head(5))
print(traffic.tail(5))
traffic.info()
#describe method
traffic['Slowness in traffic (%)'].describe()
# clean the data
traffic['Slowness in traffic (%)'] = traffic['Slowness in traffic (%)'].str.replace(',', '.')
traffic['Slowness in traffic (%)'] = traffic['Slowness in traffic (%)'].astype(float)
plt.hist(traffic['Slowness in traffic (%)'])
plt.show()
# series.plot.hist()
traffic['Slowness in traffic (%)'].plot.hist()
plt.xlabel('Slowness in traffic (%)')
plt.title('Distribution of Slowness in traffic (%)')
plt.show()
# The y-label "Frequency" was generated by default
# #Let's calculate the sums for all the incident columns so we can compare them (we start
#with isolating the incident columns by dropping the columns 'Hour (Coded)' and 'Slowness in
#traffic (%)').
# ele aqui simplesmente tirou as 2 colunas indicadas abaixo, para calcular a soma de cada incidente
incidents = traffic.drop(['Hour (Coded)', 'Slowness in traffic (%)'],
axis=1)
incidents.sum()
type(incidents.sum())
#This means that we can use the Series.plot.bar() method we mentioned on the previous screen:
incidents.sum().plot.barh()
plt.show()
#Dataframe.plot.scatter() method
traffic.plot.scatter(x='Slowness in traffic (%)', # lembrar de aqui tem que usar plot
y='Lack of electricity')
plt.show()
#
slowness_20_or_more = traffic[traffic["Slowness in traffic (%)"] >= 20]
incidents_20_or_more = slowness_20_or_more.drop(['Hour (Coded)', 'Slowness in traffic (%)'],
axis=1)
incident_frequencies = incidents_20_or_more.sum()
incident_frequencies.plot.barh() # lembrar que aqui tem que usar plot
plt.show()
# esse é foda - plota os 5 dias
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
traffic_per_day = {}
for i, day in zip(range(0, 135, 27), days): # o zip faz 2 interações para i no range e para day em days
each_day_traffic = traffic[i:i+27]
traffic_per_day[day] = each_day_traffic
for day in days:
traffic_per_day[day].plot.line(x='Hour (Coded)',
y='Slowness in traffic (%)')
plt.title(day)
plt.ylim([0, 25])
plt.show()
#outro exemplo, usando tudo dentro, nesse caso uso plt.plot, pq dataframes.plot não funciona para graficos no msm lugar
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
traffic_per_day = {}
for i, day in zip(range(0, 135, 27), days): # o range é de 0 à 135 linhas, mas a cada 27 valores
each_day_traffic = traffic[i:i+27]
traffic_per_day[day] = each_day_traffic
for day in days:
plt.plot(traffic_per_day[day]['Hour (Coded)'], traffic_per_day[day]['Slowness in traffic (%)'], label = day)
plt.title('Difference by day')
plt.legend()
plt.show()
#grid
plt.figure(figsize=(10,12))
for i, day in zip(range(1,6), days):
plt.subplot(3, 2, i)
plt.plot(traffic_per_day[day]['Hour (Coded)'],
traffic_per_day[day]['Slowness in traffic (%)'])
plt.title(day)
plt.ylim([0,25])
#plt.show() para gerar um grid desses valores
# add um grafico a mais no meu grid
plt.subplot(3, 2, 6)
for day in days:
plt.plot(traffic_per_day[day]['Hour (Coded)'],
traffic_per_day[day]['Slowness in traffic (%)'],
label=day)
plt.ylim([0,25])
plt.legend()
plt.show()