-
Notifications
You must be signed in to change notification settings - Fork 0
/
.Rhistory
512 lines (512 loc) · 20.2 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
# Sıcaklık değerleri temp 'e aktarılır.
temp <- c(35, 88, 42, 84, 81, 30)
# Şehir isimleri city 'e aktarılır.
city <- c("Beijing", "Lagos", "Paris", "Rio de Janeiro", "San Juan", "Toronto")
# Şehir isimleri ve sıcaklık değerlerini içeren dataframe oluşturulur.
city_temps <- data.frame(name = city, temperature = temp)
states <- murders$state
ranks <- rank(murders$population)
# Murder içerisinde ki state ve rank değerlerini içeren ayrı bir dataframe oluşturulur.
my_df <- data.frame(states = states, ranks = ranks)
# --Data Frames, Ranks ve Orders--
# his exercise is somewhat more challenging. We are going to repeat the previous exercise
# but this time order my_df so that the states are ordered from least populous to most.
# Create variables states and ranks to store the state names and ranks by population size
# respectively.
# Create an object ind that stores the indexes needed to order the population values,
# using the order command.
# For example we could define o <- order(murders$population)
# Create a data frame with both variables following the correct order.
# Use the bracket operator [ to re-order each column in the data frame.
# For example, states[o] orders the abbreviations based by population size.
# The columns of the data frame must be in the specific
# order and have the specific names: states, ranks.
# Define a variable states to be the state names from the murders data frame
states <- murders$state
# Define a variable ranks to determine the population size ranks
ranks <- rank(murders$population)
# Define a variable ind to store the indexes needed to order the population values
ind <- order(murders$population)
# Create a data frame my_df with the state name and its rank and ordered from least populous to most
my_df <- data.frame(states = states[ind], ranks = ranks[ind])
# Define a variable states to be the state names from the murders data frame
states <- murders$state
library(murders)
dslabs(murders)
library(murders)
dslabs(murders)
library(murders)
dslabs(murders)
library(murders)
dslabs(murders)
# --Ranks--
#data.frame fonksiyonu ile dataframe oluşturulabilinir.
# Sıcaklık değerleri temp 'e aktarılır.
temp <- c(35, 88, 42, 84, 81, 30)
# Şehir isimleri city 'e aktarılır.
city <- c("Beijing", "Lagos", "Paris", "Rio de Janeiro", "San Juan", "Toronto")
# Şehir isimleri ve sıcaklık değerlerini içeren dataframe oluşturulur.
city_temps <- data.frame(name = city, temperature = temp)
states <- murders$state
ranks <- rank(murders$population)
# Murder içerisinde ki state ve rank değerlerini içeren ayrı bir dataframe oluşturulur.
my_df <- data.frame(states = states, ranks = ranks)
# --Data Frames, Ranks ve Orders--
# his exercise is somewhat more challenging. We are going to repeat the previous exercise
# but this time order my_df so that the states are ordered from least populous to most.
# Create variables states and ranks to store the state names and ranks by population size
# respectively.
# Create an object ind that stores the indexes needed to order the population values,
# using the order command.
# For example we could define o <- order(murders$population)
# Create a data frame with both variables following the correct order.
# Use the bracket operator [ to re-order each column in the data frame.
# For example, states[o] orders the abbreviations based by population size.
# The columns of the data frame must be in the specific
# order and have the specific names: states, ranks.
# Define a variable states to be the state names from the murders data frame
states <- murders$state
# Define a variable ranks to determine the population size ranks
ranks <- rank(murders$population)
# Define a variable ind to store the indexes needed to order the population values
ind <- order(murders$population)
# Create a data frame my_df with the state name and its rank and ordered from least populous to most
my_df <- data.frame(states = states[ind], ranks = ranks[ind])
library(murders)
dslabs(murders)
dslabs(murders)
library(murders)
dslabs(murders)
# --Ranks--
#data.frame fonksiyonu ile dataframe oluşturulabilinir.
# Sıcaklık değerleri temp 'e aktarılır.
temp <- c(35, 88, 42, 84, 81, 30)
# Şehir isimleri city 'e aktarılır.
city <- c("Beijing", "Lagos", "Paris", "Rio de Janeiro", "San Juan", "Toronto")
# Şehir isimleri ve sıcaklık değerlerini içeren dataframe oluşturulur.
city_temps <- data.frame(name = city, temperature = temp)
states <- murders$state
ranks <- rank(murders$population)
# Murder içerisinde ki state ve rank değerlerini içeren ayrı bir dataframe oluşturulur.
my_df <- data.frame(states = states, ranks = ranks)
# --Data Frames, Ranks ve Orders--
# his exercise is somewhat more challenging. We are going to repeat the previous exercise
# but this time order my_df so that the states are ordered from least populous to most.
# Create variables states and ranks to store the state names and ranks by population size
# respectively.
# Create an object ind that stores the indexes needed to order the population values,
# using the order command.
# For example we could define o <- order(murders$population)
# Create a data frame with both variables following the correct order.
# Use the bracket operator [ to re-order each column in the data frame.
# For example, states[o] orders the abbreviations based by population size.
# The columns of the data frame must be in the specific
# order and have the specific names: states, ranks.
# Define a variable states to be the state names from the murders data frame
states <- murders$state
# Define a variable ranks to determine the population size ranks
ranks <- rank(murders$population)
# Define a variable ind to store the indexes needed to order the population values
ind <- order(murders$population)
# Create a data frame my_df with the state name and its rank and ordered from least populous to most
my_df <- data.frame(states = states[ind], ranks = ranks[ind])
dslabs(murder)
library(murders)
library(murder)
library(dslabs)
dslabs(murders)
# --Ranks--
#data.frame fonksiyonu ile dataframe oluşturulabilinir.
# Sıcaklık değerleri temp 'e aktarılır.
temp <- c(35, 88, 42, 84, 81, 30)
# Şehir isimleri city 'e aktarılır.
city <- c("Beijing", "Lagos", "Paris", "Rio de Janeiro", "San Juan", "Toronto")
# Şehir isimleri ve sıcaklık değerlerini içeren dataframe oluşturulur.
city_temps <- data.frame(name = city, temperature = temp)
states <- murders$state
ranks <- rank(murders$population)
# Murder içerisinde ki state ve rank değerlerini içeren ayrı bir dataframe oluşturulur.
my_df <- data.frame(states = states, ranks = ranks)
# --Data Frames, Ranks ve Orders--
# his exercise is somewhat more challenging. We are going to repeat the previous exercise
# but this time order my_df so that the states are ordered from least populous to most.
# Create variables states and ranks to store the state names and ranks by population size
# respectively.
# Create an object ind that stores the indexes needed to order the population values,
# using the order command.
# For example we could define o <- order(murders$population)
# Create a data frame with both variables following the correct order.
# Use the bracket operator [ to re-order each column in the data frame.
# For example, states[o] orders the abbreviations based by population size.
# The columns of the data frame must be in the specific
# order and have the specific names: states, ranks.
# Define a variable states to be the state names from the murders data frame
states <- murders$state
# Define a variable ranks to determine the population size ranks
ranks <- rank(murders$population)
# Define a variable ind to store the indexes needed to order the population values
ind <- order(murders$population)
# Create a data frame my_df with the state name and its rank and ordered from least populous to most
my_df <- data.frame(states = states[ind], ranks = ranks[ind])
my_df <- data.frame(states = states[ind], ranks = ranks[ind])
my_df
library(dslabs)
data(na_example) #NA içeren bir vektör çağırmak için na_example kullanılabilir
# Checking the structure
str(na_example)
na_example
mean(na_example)
# Use is.na to create a logical index ind that tells which entries are NA
ind <- is.na(na_example)
ind
# Determine how many NA ind has using the sum function
sum(ind)
x <- c(1, 2, 3)
ind <- c(FALSE, TRUE, FALSE)
x[!ind]
ind2 <- is.na(na_example)
mean(na_example[!ind2]
mean(na_example[!ind2])
mean(na_example[!ind2]
data(na_example) #NA içeren bir vektör çağırmak için na_example kullanılabilir
ind2 <- is.na(na_example)
ind
ind2
mean(na_example[!ind2]
mean(na_example[!ind2]
mean(na_example[!ind2]
mean(na_example[!ind2]
mean(na_example[!ind2])
# --Factors & Tables--
library(murders)
dslabs(murders)
# --Factors--
# Using the str() command, we saw that the region column stores a factor.
# You can corroborate this by using the class command on the region column.
str(murders)
# --Factors & Tables--
library(dslabs)
dslabs(murders)
# --Factors--
# Using the str() command, we saw that the region column stores a factor.
# You can corroborate this by using the class command on the region column.
str(murders)
# The function levels shows us the categories for the factor.
# We can see the class of the region variable using class
class(murders$region)
# Determine the number of regions included in this variable
a <- levels(murders$region)
length(a)
# --Table--
# The function table takes a vector as input and returns the frequency of
# each unique element in the vector.
# We will use the table function to answer this question.
# Use the table function in one line of code to
# create a table showing the number of states per region.
# The "c" in `c()` is short for "concatenate," which is the action of connecting items into a chain
# The function `c()` connects all of the strings within it into a single vector, which we can assign to `x`
x <- c("a", "a", "b", "b", "b", "c")
# Here is an example of what the table function does
table(x)
# Write one line of code to show the number of states per region
murder_regiontable <- table(murders$region)
#Shown Table "murder_regiontable"
murder_regiontable
# --Factors & Tables--
library(dslabs)
dslabs(murders)
# --Factors--
# Using the str() command, we saw that the region column stores a factor.
# You can corroborate this by using the class command on the region column.
str(murders)
# The function levels shows us the categories for the factor.
levels(murders)
# The function levels shows us the categories for the factor.
levels(murders$region)
a <- levels(murders$region)
length(a)
x <- c("a", "a", "b", "b", "b", "c")
# Here is an example of what the table function does
table(x)
# Write one line of code to show the number of states per region
murder_regiontable <- table(murders$region)
murder_regiontable
library(murders)
dslabs(murders)
# -- Sort --
states <- murders$state
# State değerlerini alfabetik olarak sırala
states <- sort(states)
# 1. değeri göster
states[1]
pop <- murders$pop
# Sort the object and save it in the same object
pop <- sort(pop)
# Report the smallest population size
pop[1]
library(dslabs)
dslabs(murders)
# -- Sort --
states <- murders$state
# State değerlerini alfabetik olarak sırala
states <- sort(states)
# 1. değeri göster
states[1]
pop <- murders$pop
# Sort the object and save it in the same object
pop <- sort(pop)
# Report the smallest population size
pop[1]
states[1]
pop <- murders$pop
pop
# if you go down the rows of murders.
# Access population from the dataset and store it in pop
pop2 <- murders$population
ord <- order(pop2)
ord
# --Sequences--
# 32 'den 99 'a kadar olan sayıları içeren vektörü oluşturmak için;
m <- 32:99
m
# Create a vector with the multiples of 7, smaller than 50.
seq(7, 49, 7)
# by the same amount but are of the prespecified length.
#For example, this line of code
a <- seq(0, 100, length.out = 5) #produces the numbers 0, 25, 50, 75, 100.
lenght(a)
# by the same amount but are of the prespecified length.
#For example, this line of code
a <- seq(0, 100, length.out = 5) #produces the numbers 0, 25, 50, 75, 100.
lenght(a)
#For example, this line of code
a <- seq(0, 100, length.out = 5) #produces the numbers 0, 25, 50, 75, 100.
length(a)
a
cost <- c(50, 75, 90, 100, 150)
# --Character Vectors--
food <- c("pizza", "burgers", "salads", "cheese", "pasta")
# --Connecting Numeric and Character Vectors--
# We have successfully assigned the temperatures as numeric values to temp and the city
# names as character values to city. But can we associate the temperature to its related city?
# Yes! We can do so using a code we already know - names.
# We assign names to the numeric values.
# Associate the cost values with its corresponding food item
names(cost) <- food
cost
# Numeric/Character Vectors and Connecting Numeric and Character Vectors
# --Numeric Vectors--
cost <- c(50, 75, 90, 100, 150)
# --Character Vectors--
food <- c("pizza", "burgers", "salads", "cheese", "pasta")
# --Connecting Numeric and Character Vectors--
#Numeric ve Character vektörlerini birleştirme.
names(cost) <- food
cost
temp <- c(35, 88, 42, 84, 81, 30)
city <- c("Beijing", "Lagos", "Paris", "Rio de Janeiro", "San Juan", "Toronto")
names(temp) <- city
temp
# --Subsetting Vectors--
# If we want to display only selected values from the object, R can help us do that easily.
# For example, if we want to see the cost of the last 3 items in our food list, we would type:
# cost of the last 3 items in our food list:
cost[3:5]
# Note here, that we could also type cost[c(3,4,5)] and get the same result.
# The : operator helps us condense the code and get consecutive values.
# temperatures of the first three cities in the list:
temp[1:3]
# In the previous question, we accessed the temperature for consecutive cities (1st three).
# But what if we want to access the temperatures for any 2 specific cities?
# An example: To access the cost of pizza (1st) and pasta (5th food item) in our list,
# the code would be:
# Access the cost of pizza and pasta from our food list
cost_new <- cost[c(1,5)]
# Access the temperatures of Paris and San Juan
temp_new <- temp[c(3,5)]
cost[3:5]
#cost vektöründe 3. elemandan 5. elemana kadar göster.
cost[3:5]
# Access the cost of pizza and pasta from our food list
cost_new <- cost[c(1,5)]
cost
cost_new
a <- murders$abb
b <- murders[["abb"]]
identical(a,b)
library(dslabs)
data(murders)
# murders içerisinde bulunan population 'a ulaşmak için "$" veya "[["population"]]" kullanılabilir.
p <- murders$population
o <- murders[["population"]]
# 2 değer birbirine eşit ise TRUE döndürür.
identical(o, p)
a <- murders$abb
b <- murders[["abb"]]
identical(a,b)
# R has another type of vector we have not described, the integer class.
#You can create an integer by adding the letter L after a whole number. If you type
class(3L)
c
'den 10 'a kadar 0.5 artaran değerlerin vektörünü oluştur.
class(seq(1, 10, 0.5))
# Sayıların sonuna "L" ekleyerek integer oluşturulabilir.
class(3L)
3L - 3
#
class(1)
# Define the vector mx
mx <- c(1, 3, 5,"a")
# Note that the x is character vector
mx
mx <- as.numeric(mx)
mx
x <- 1:100
# Compute the sum
sum(1/x^2)
pi/6
pi
sum(x)
100*101/2
sum(1/x^2)
x
murder_rate <- murders$total/murders$population*100000
# Compute the average murder rate using `mean` and store it in object named `avg`
avg <- mean(murder_rate)
# --Logical Vectors--
library(dslabs)
data(murders)
murder_rate <- murders$total / murders$population * 100000
low <- murder_rate < 1
which(murder_rate<1)
murders$state[low]
ind <- low & murders$region=="Northeast"
# murder_rate < 1 ve murders region = Northeast olan değer
murders$state[ind]
# --Exaple--
# In a previous exercise we computed the murder rate for each state and the average of these numbers.
# How many states are below the average?
murder_rate <- murders$total/murders$population*100000
# Compute the average murder rate using `mean` and store it in object named `avg`
avg <- mean(murder_rate)
# How many states have murder rates below avg ? Check using sum
x <- murder_rate < avg
sum(x)
avg <- mean(murder_rate)
avg
abbs <- c("AK", "MI", "IA")
# Match the abbs to the murders$abb and store in ind
ind <- match(abbs, murders$abb)
# Print state names from ind
murders$state[ind]
abbs <- c("AK", "MI", "IA")
ind <- match(abbs, murders$abb)
library(dslabs)
dslabs(murders)
# --Match--
abbs <- c("AK", "MI", "IA")
# Oluşturulan abbs vektörü içerisinde murders$abb ile eşleşen değerlerin indexleri
ind <- match(abbs, murders$abb)
murders$state[ind]
# --%in%--
# If rather than an index we want a logical that tells us whether or not
# each element of a first vector is in a second, we can use the function %in%.
# For example:
# x <- c(2, 3, 5)
# y <- c(1, 2, 3, 4)
# x%in%y
#Gives us two TRUE followed by a FALSE because 2 and 3 are in y but 5 is not.
# Store the 5 abbreviations in abbs. (remember that they are character vectors)
abbs <- c("MA", "ME", "MI", "MO", "MU")
# Use the %in% command to check if the entries of abbs are abbreviations in the the murders data frame
abbs%in%murders$abb
# --Example2--
#We are again working with the characters abbs <- c("MA", "ME", "MI", "MO", "MU")
#In a previous exercise we computed the index abbs%in%murders$abb. Based on that,
#and using the which function and the ! operator, get the index of the entries of
#abbs that are not abbreviations.
#Show the entries of abbs that are not actual abbreviations.
# Store the 5 abbreviations in abbs. (remember that they are character vectors)
abbs <- c("MA", "ME", "MI", "MO", "MU")
# Use the `which` command and `!` operator to find out which index abbreviations are not actually part of the dataset and store in `ind`
ind <- which(!(abbs%in%murders$abb))
# Names of abbreviations in `ind`
abbs[ind]
ind <- match(abbs, murders$abb)
ind
murders$state[ind]
x <- c(2, 3, 5)
y <- c(1, 2, 3, 4)
x%in%y
abbs <- c("MA", "ME", "MI", "MO", "MU")
ind <- which(!(abbs%in%murders$abb))
abbs[ind]
x <- c(88, 100, 83, 92, 94)
rank(-x)
library(dplyr)
# --dplyr--
library(dplyr)
library(dslabs)
data(murders)
# --mutate--
# rank(x) x vektörü(n elemanlı) içerisindeki en küçük değere "1" en düşük değere ise "n" değeri yazar
# rank(-x) yazarak en yüksek değere "1" en düşük değere "n" yazdırılabilir.
x <- c(88, 100, 83, 92, 94)
rank(-x)
rate <- murders$total/ murders$population * 100000
rank_rate <- rank(-rate)
murders <- mutate(murders, rank = rank_rate)
murders
# -- select--
# With dplyr we can use select to show only certain columns.
# For example with this code we would only show the states and population sizes:
# select(murders, state, population)
# Load dplyr
library(dplyr)
library(dslabs)
dslabs(murders)
# Use select to show the state names and abbreviations in murders.
select(murders, state, abb)
# --filter--
# The dplyr function filter is used to choose specific rows of the data frame to keep.
# Unlike select which is for columns, filter is for rows.
# For example you can show just the New York row like this:
filter(murders, state == "New York")
#You can use other logical vectors to filter rows.
# --Example--
# Use filter to show the top 5 states with the highest murder rates.
# After we add murder rate and rank, do not change the murders dataset,
# just show the result. Note that you can filter based on the rank column.
# Add the necessary columns
murders <- mutate(murders, rate = total/population * 100000, rank = rank(-rate))
# Filter to show the top 5 states with the highest murder rates
filter(murders, rank <=5)
# --filter with " != "--
# We can remove rows using the != operator.
# For example to remove Florida we would do this:
no_florida <- filter(murders, state != "Florida")
# --Example--
# Create a new data frame called no_south that removes states from the South region.
# How many states are in this category? You can use the function nrow for this.
# Use filter to create a new data frame no_south
no_south <- filter(murders, region != "South")
# Use nrow() to calculate the number of rows
nrow(no_south)
e select to show the state names and abbreviations in murders.
select(murders, state, abb)
# --filter--
#murders içerisinde sadece New York 'a ait değerlere ulaşmak için.
filter(murders, state == "New York")
# mutate() komutu ile var olan dataframe 'e
murders <- mutate(murders, rate = total/population * 100000, rank = rank(-rate))
murders
no_south <- filter(murders, region != "South")
nrow(no_south)
no_south
# any() komutunda herhangi bir değer TRUE ise
z <- c(TRUE, TRUE, FALSE)
any(z)
all(z)
help("any")
z <- c(FALSE, FALSE, FALSE)
any(z)