generated from carpentries/workshop-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathday1.R
176 lines (110 loc) · 4.09 KB
/
day1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#running lines of code in script and console
6*7
#functions have arguments, they can be obligatory or optional. Optional arguments have default values
print("Ciao Leipzig")
print(x = "Ciao Leipzig")
print(x = "Ciao Leipzig", quote = FALSE)
print(x = "Hello Kitty")
#assignment
six_times_seven <- 6*7
print(x = six_times_seven)
#installing and loading packages
install.packages("ggplot2")
library(ggplot2)
#calling on functions from a specific packages
ggplot()
ggplot2::ggplot()
#data types and structures
#numeric and character
greetings_city_vec <- c("Ciao leipzig", "Goodbye Berlin", "Hi Dresden")
greetings_city_vec[3]
str(greetings_city_vec)
num_vec <- c(1, 2, 3)
str(num_vec)
chr_vec <- c("1", "2", "3", "HEDVIG")
as.numeric(chr_vec)
#What happens if the vector has nothing in it, but a comma?
output <- empty_vec <- c(,)
as.numeric(empty_vec)
as.character(num_vec)
#logical vectors
log_vec <- as.numeric(c(TRUE, FALSE))
#TRUE = 1 and FALSE = 0
sum(log_vec)
num_log_vec <- as.numeric(log_vec)
as.character(log_vec)
name_vec <- c("Angela", "Hedvig", "Ezequiel")
sort(name_vec)
#factors
name_factor_vec <- factor(x = c("Angela", "Hedvig", "Ezequiel", "Hedvig", "Bret", "Hedwig"),
levels = c("Ezequiel", "Hedvig", "Angela"))
name_factor_vec
sort(name_factor_vec)
sum(name_factor_vec)
sum(chr_vec)
names_as_numbers <- as.numeric(name_vec)
as.numeric(name_factor_vec)
#lists are 1-dimensional objects where the items can be of different types.
list <- list("hedvig", TRUE, 1)
str(list)
#data-frames
gapminder <- read.csv("https://raw.githubusercontent.com/swcarpentry/r-novice-gapminder/main/episodes/data/gapminder_data.csv")
str(gapminder)
#selecting a subset of a data-frame.
#rows, columns
gapminder[3:6,4:6]
sum(gapminder$pop)
not_afgan_gapminder <- subset(x = gapminder, country != "Afghanistan")
#different kinds of non-data
empty <- c()
empty_2 <- c("", "", NA)
str(empty_2)
base::table(gapminder$country, useNA = "always")
table(empty_2, useNA = "always")
afgan_gapminder <- subset(x = gapminder, country == "Afghanistan")
gapminder$country == "Afghanistan"
afgan_gapminder$year
year_1992_dataframe <- subset(x = gapminder, year == 1992)
sum(year_1992_dataframe$pop)
year_1992_dataframe$pop
#changing large numbers to strings that are easier for human eyes to parse
format(year_1992_dataframe$pop, big.mark = ",")
format(max(year_1992_dataframe$pop), big.mark = ",")
#what happens if we replace the column in the dataframe with a changed version of the column where we've formatted it to be character strings?
year_1992_dataframe$pop <- format(year_1992_dataframe$pop, big.mark = ",")
sum(year_1992_dataframe$pop)
#picking out the particular row that meets a condition
subset(year_1992_dataframe, pop == max(year_1992_dataframe$pop))
subset(year_1992_dataframe, pop == 3333333333333333333333)
subset(year_1992_dataframe, country == "afganistan")
year_1992_dataframe[year_1992_dataframe$pop == max(year_1992_dataframe$pop),]
year_1992_dataframe[, "country"]
year_1992_dataframe$country
max(year_1992_dataframe[,3])
str(year_1992_dataframe$pop)
#digging deeper into how max() and sort() treats non-numeric data-types
chara_vector <- c("Hedvig", "Ezequiel")
max(chara_vector)
letter_vector <- c("A","b", "AA", "B","c", "3", NA, NULL, " b", " ")
max(letter_vector, na.rm = TRUE)
sort(letter_vector)
str(year_1992_dataframe)
table(year_1992_dataframe$pop)
unique(year_1992_dataframe$pop)
#functions for testing if something is a data-type
is.numeric(letter_vector)
is.character(letter_vector)
is.factor(letter_vector)
letter_vector <- c("A","b", "AA", "B","c", "3", NA, NULL, " b", " ", NaN)
is.na(`letter_vector`)
#it is possible (but NOT adivsable) to have spaces in variable names. if this happens, use backticks (and preferably rename as soon as possible).
`letter vector` <- c("A","b", "AA", "B","c", "3", NA, NULL, " b", " ", NaN, NULL, NULL, NULL)
print(`letter vector`)
is.na(c("letter_vector"))
length(c("letter_vector"))
length(letter_vector)
length(`letter vector`)
length(`letter vector`)
length(c(NA, "hello"))
is.nan(`letter vector`)
str(c(1, "hedvig",TRUE))