Skip to content

Commit

Permalink
Now to bring on the big guns
Browse files Browse the repository at this point in the history
(the big guns are curve fitting and rsquared for defyning our model's equation)

~lross2k~
  • Loading branch information
lross2k committed Jun 4, 2023
1 parent 72dce1e commit 32cace7
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 127 deletions.
46 changes: 46 additions & 0 deletions analysis.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Function for plotting the sensor data
.PlotSensors <- function(tmp, file.name) {
png(file=file.name, width=1280, height=720)
par(mar=c(5.1, 4.8, 1.1, 8.1), xpd=TRUE)
colores = rainbow(500)
plot(tmp$Cycle, tmp[,6], type='l', col=colores[100],
ylim=c(min(tmp[,-(1:5)]),max(tmp[,-(1:5)])),
ylab = 'Sensores', xlab = 'Ciclos', lwd=4.0)
leg <- c(names(tmp)[6])
col <- c(colores[100])
for (x in 7:length(tmp)) {
lines(tmp$Cycle, tmp[,x], type='l',
col=colores[x+(30*x)], lwd=4.0)
leg <- rbind(leg, c(names(tmp)[x]))
col <- rbind(col, c(colores[x+(30*x)]))
}
legend("topright", inset=c(-0.09,0), box.col = "brown",
bg ="yellow", box.lwd = 2, legend=leg, fill = col)
dev.off()
}

.TransformDecreasing <- function(col) {
if (col[length(col)] < col[1]) {
col <- rev(col)
}
return(col)
}

.FormatMotor <- function(df) {
df[,-c(1:5)] <- apply(df[,-c(1:5)], 2, .TransformDecreasing)
df[,-c(1:5)] <- apply(df[,-c(1:5)], 2, (function(col)
predict(loess(col~df$Cycle))))
df[,-c(1:5)] <- data.frame(apply(df[,-c(1:5)], 2,
(function(col) col/max(col))))
return(df)
}

TestMotor <- function(df, motor, file.name) {
tmp <- data.frame(as.list(split(df, df$Motor)[260]))
names(tmp) <- names(Data_1)
tmp <- .FormatMotor(tmp)

write.csv2(tmp, paste(file.name, '.csv', sep = ''), row.names = FALSE)

.PlotSensors(tmp, paste(file.name, '.png', sep = ''))
}
7 changes: 0 additions & 7 deletions denoise.R

This file was deleted.

37 changes: 37 additions & 0 deletions formatting.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
FormatSplit <- function(data, index, col.name) {
res <- as.data.frame(tmp[1])
names(res) <- col.name
return(res)
}

.CalcInsignificance <- function(dataFrame, rowName) {
res <- data.frame(as.list(apply(dataFrame, 2, (function(col)
sum(quantile(col))/(quantile(col)[5]*5)))))
row.names(res) <- c(rowName)
return(res)
}

.FilterInsignificance <- function(sig, criteria) {
res <- apply(sig, 2, (function(col) min(col) > criteria))
res <- sig[,- as.array(which(res == "FALSE", arr.ind = FALSE))]
return(res)
}

GetInsignificance <- function(Data_1, Data_2, Data_3,
Data_4, Data_5, Data_6,
criteria) {
res <- .CalcInsignificance(Data_1[-c(1:5)], "Subset_1")
res <- rbind(res, .CalcInsignificance(Data_2[-c(1:5)], "Subset_2"))
res <- rbind(res, .CalcInsignificance(Data_3[-c(1:5)], "Subset_3"))
res <- rbind(res, .CalcInsignificance(Data_4[-c(1:5)], "Subset_4"))
res <- rbind(res, .CalcInsignificance(Data_5[-c(1:5)], "Subset_5"))
res <- rbind(res, .CalcInsignificance(Data_6[-c(1:5)], "Subset_6"))
res <- .FilterInsignificance(res, criteria)
return(res)
}

RemoveByInsignificance <- function(insig,df) {
res <- df[,-apply(as.array(names(insig)), 1, (function(name,df)
grep(paste("^",name,"$",sep=""), colnames(df))), df)]
return(res)
}
73 changes: 14 additions & 59 deletions main.R
Original file line number Diff line number Diff line change
@@ -1,73 +1,28 @@
Data <- read.csv("train_data.csv")
col.name <- names(Data)

source('formatting.R')
tmp <- split(Data, cut(Data$Height, c(-5,5,15,24,30,40,45)))
Data_1 <- as.data.frame(tmp[1])
Data_2 <- as.data.frame(tmp[2])
Data_3 <- as.data.frame(tmp[3])
Data_4 <- as.data.frame(tmp[4])
Data_5 <- as.data.frame(tmp[5])
Data_6 <- as.data.frame(tmp[6])
rm(Data)
Data_1 <- FormatSplit(tmp, 1, col.name)
Data_2 <- FormatSplit(tmp, 2, col.name)
Data_3 <- FormatSplit(tmp, 3, col.name)
Data_4 <- FormatSplit(tmp, 4, col.name)
Data_5 <- FormatSplit(tmp, 5, col.name)
Data_6 <- FormatSplit(tmp, 6, col.name)
rm(tmp)
names(Data_1) <- col.name
names(Data_2) <- col.name
names(Data_3) <- col.name
names(Data_4) <- col.name
names(Data_5) <- col.name
names(Data_6) <- col.name

GetInsignificance <- function(dataFrame, rowName) {
res <- data.frame(as.list(apply(dataFrame, 2, (function(col) sum(quantile(col))/(quantile(col)[5]*5)))))
row.names(res) <- c(rowName)
return(res)
}

insig <- GetInsignificance(Data_1[-c(1:5)], "Subset_1")
insig <- rbind(insig, GetInsignificance(Data_2[-c(1:5)], "Subset_2"))
insig <- rbind(insig, GetInsignificance(Data_3[-c(1:5)], "Subset_3"))
insig <- rbind(insig, GetInsignificance(Data_4[-c(1:5)], "Subset_4"))
insig <- rbind(insig, GetInsignificance(Data_5[-c(1:5)], "Subset_5"))
insig <- rbind(insig, GetInsignificance(Data_6[-c(1:5)], "Subset_6"))

FilterInsignificance <- function(sig, criteria) {
res <- apply(sig, 2, (function(col) min(col) > criteria))
res <- sig[,- as.array(which(res == "FALSE", arr.ind = FALSE))]
return(res)
}

insig <- FilterInsignificance(insig, 0.988)

RemoveByInsignificance <- function(insig,df) {
res <- df[,-apply(as.array(names(insig)), 1, (function(name,df)
grep(paste("^",name,"$",sep=""), colnames(df))), df)]
return(res)
}
insig <- GetInsignificance(Data_1, Data_2, Data_3,
Data_4, Data_5, Data_6,
0.988)

Data_1 <- RemoveByInsignificance(insig, Data_1)
Data_2 <- RemoveByInsignificance(insig, Data_2)
Data_3 <- RemoveByInsignificance(insig, Data_3)
Data_4 <- RemoveByInsignificance(insig, Data_4)
Data_5 <- RemoveByInsignificance(insig, Data_5)
Data_6 <- RemoveByInsignificance(insig, Data_6)
rm(insig)

TransformDecreasing <- function(col) {
if (col[length(col)] < col[1]) {
col <- rev(col)
}
return(col)
}

FormatMotor <- function(df) {
df[,-(1:5)] <- apply(df[,-(1:5)], 2, TransformDecreasing)
df[,-c(1:5)] <- apply(df[,-c(1:5)], 2, (function(col)
predict(loess(col~df$Cycle))))
df <- data.frame(apply(df, 2, (function(col) col/max(col))))
return(df)
}

tmp <- data.frame(as.list(split(Data_1, Data_1$Motor)[260]))
names(tmp) <- names(Data_1)
tmp <- FormatMotor(tmp)

source('plotting.R')
PlotSensors(tmp, 'motor260.png')
source('analysis.R')
TestMotor(Data_1, 1, 'Subset_1_Regulador_60_Motor_1')
5 changes: 0 additions & 5 deletions normalize.R

This file was deleted.

20 changes: 0 additions & 20 deletions plotting.R

This file was deleted.

29 changes: 5 additions & 24 deletions proceso.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -46,43 +46,23 @@ plot(Data100$Cycle, Data100$Height)
```

```{r}
tmp <- split(Data100, cut(Data100$Height, c(-10,5,15,25,39,55)))
Data100_1 <- as.data.frame(tmp[1])
Data100_2 <- as.data.frame(tmp[2])
Data100_3 <- as.data.frame(tmp[3])
Data100_4 <- as.data.frame(tmp[4])
Data100_5 <- as.data.frame(tmp[5])
names(Data100_1) <- names(Data)
names(Data100_2) <- names(Data)
names(Data100_3) <- names(Data)
names(Data100_4) <- names(Data)
names(Data100_5) <- names(Data)
splittedData <- split(Data, cut(Data$Height, c(-5,5,15,24,30,40,45)))
```

Los datos de estos sub grupos presentan comportamiento homogéneo

```{r echo=FALSE}
plot(Data100_5$Cycle, Data100_5$Height)
plottingData <- data.frame(splittedData[6])
names(plottingData) <- names(Data)
plot(plottingData$Cycle, plottingData$Height)
```

Se ha notado en todos los gráficos generados, que a partir del ciclo 200 hay una disminución en la densidad de datos, o eso aparenta, se realiza un análisis general de cada sub set, auque primero se debe determinar cuáles datos no vale la pena analizar, algunos fáciles de descartar corresponden al motor, ciclo y regulador, ya que son valores discretos con variaciones conocidas que de momento no nos interesan, por ejemplo

```{r}
summary(Data[,-c(3:4,6:39)])
```

Se separan los datos a mostrar por regiones, ya que son muchos sensores para mostrar todos en una sóla salida, es posible observar para los datos a 60%, que la altura es casi constante, con variaciones de 0,01 unidades, el número Mach presenta variaciones ligeramente mayores, con 0,022 unidades, ambos variando hacia el tercer cuartil y el sensor 1 es constante

```{r}
summary(Data60[,-c(1,2,5,9:39)])
```

El sensor 5 es constante y el 8 sólo tiene una mínima variación en el máximo, la cual no llega a ser significativa ni para el tercer cuartil

```{r}
summary(Data60[,-c(1:8,14:39)])
```

Para simplificar la discriminación de variables, se decide emplear únicamente los cuártiles, sumando todos estos y dividiendo según el valor del primer cuartíl multiplicado por 5, para obtener un número sencillo que si es 1, significa que la variable no es significativa para el análisis

```{r}
Expand Down Expand Up @@ -243,6 +223,7 @@ Separamos con base en los porcentajes del regulador
tmp <- split(Data, Data$Throttle)
Data60_1 <- as.data.frame(tmp[1])
Data100 <- as.data.frame(tmp[2])
rm(tmp)
names(Data60_1) <- names(Data)
names(Data100) <- names(Data)
```
Expand Down
4 changes: 0 additions & 4 deletions regresion.R

This file was deleted.

4 changes: 0 additions & 4 deletions rsquared-height.R

This file was deleted.

4 changes: 0 additions & 4 deletions rsquared-mach.R

This file was deleted.

0 comments on commit 32cace7

Please sign in to comment.