forked from rdpeng/ExData_Plotting1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot4.R
49 lines (40 loc) · 1.98 KB
/
plot4.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
library(plyr)
library(ggplot2)
# Read the data file
## This first line will likely take a few seconds. Be patient!
NEI <- readRDS("data/summarySCC_PM25.rds")
SCC <- readRDS("data/Source_Classification_Code.rds")
# First extract all source codes corresponding to coal combustion
# The EI.Sector field looks like the most direct way to determine this
CoalCombustionSCC <- subset(SCC, EI.Sector %in% c("Fuel Comb - Comm/Institutional - Coal",
"Fuel Comb - Electric Generation - Coal",
"Fuel Comb - Industrial Boilers, ICEs - Coal"))
# This may omit some. For example, see 242
# Compare to Short.Name matching both Comb and Coal
CoalCombustionSCC1 <- subset(SCC, grepl("Comb", Short.Name) & grepl("Coal", Short.Name))
nrow(CoalCombustionSCC)
nrow(CoalCombustionSCC1)
d1 <- setdiff(CoalCombustionSCC, CoalCombustionSCC1)
d2 <- setdiff(CoalCombustionSCC1, CoalCombustionSCC)
length(d1)
length(d2)
# Above does not work correctly
d3 <- setdiff(CoalCombustionSCC$SCC, CoalCombustionSCC1$SCC)
d4 <- setdiff(CoalCombustionSCC1$SCC, CoalCombustionSCC$SCC)
length(d3)
length(d4)
# Given these differences I believe the best approach is to union the two sets
CoalCombustionSCCCodes <- union(CoalCombustionSCC$SCC, CoalCombustionSCC1$SCC)
length(CoalCombustionSCCCodes)
CoalCombustion <- subset(NEI, SCC %in% CoalCombustionSCCCodes)
coalCombustionPM25ByYear <- ddply(CoalCombustion, .(year, type), function(x) sum(x$Emissions))
colnames(coalCombustionPM25ByYear)[3] <- "Emissions"
png("plot4.png")
qplot(year, Emissions, data=coalCombustionPM25ByYear, color=type, geom="line") +
stat_summary(fun.y = "sum", fun.ymin = "sum", fun.ymax = "sum",
color = "black", aes(shape="total"), geom="line") +
geom_line(aes(size="total", shape = NA)) +
ggtitle(expression("Coal Combustion" ~ PM[2.5] ~ "Emissions by Source Type and Year")) +
xlab("Year") +
ylab(expression("Total" ~ PM[2.5] ~ "Emissions (tons)"))
dev.off()