-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathselectSamples.R
52 lines (40 loc) · 2.15 KB
/
selectSamples.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
filter_microarray <- function(allTargets, seed = 77129830) {
# Configurar la semilla aleatoria
set.seed(77129830)
# Filtrar las filas donde 'time' no sea 'hour 2'
filtered <- subset(allTargets, time != "hour 2")
# Dividir el dataset por grupos únicos de 'infection' + 'agent'
filtered$group <- interaction(filtered$infection, filtered$agent)
# Seleccionar 4 muestras al azar de cada grupo
selected <- do.call(rbind, lapply(split(filtered, filtered$group), function(group_data) {
if (nrow(group_data) > 4) {
group_data[sample(1:nrow(group_data), 4), ]
} else {
group_data
}
}))
# Obtener los índices originales como nombres de las filas seleccionadas
original_indices <- match(selected$sample, allTargets$sample)
# Modificar los rownames usando 'sample' y los índices originales
rownames(selected) <- paste0(selected$sample, ".", original_indices)
# Eliminar la columna 'group' y devolver el resultado
selected$group <- NULL
return(selected)
}
# Simular el dataset basado en la descripción proporcionada
allTargets <- data.frame(
sample = c("GSM944831", "GSM944838", "GSM944845", "GSM944852", "GSM944859",
"GSM944833", "GSM944840", "GSM944847", "GSM944854", "GSM944861",
"GSM944834", "GSM944841", "GSM944848", "GSM944855", "GSM944862",
"GSM944832", "GSM944839", "GSM944846", "GSM944853", "GSM944860",
"GSM944835", "GSM944842", "GSM944849", "GSM944856", "GSM944863",
"GSM944836", "GSM944843", "GSM944850", "GSM944857", "GSM944864",
"GSM944837", "GSM944844", "GSM944851", "GSM944858", "GSM944865"),
infection = c(rep("uninfected", 15), rep("S. aureus USA300", 20)),
time = c(rep("hour 0", 15), rep("hour 2", 5), rep("hour 24", 15)),
agent = c(rep("untreated", 5), rep("linezolid", 5), rep("vancomycin", 5),
rep("untreated", 5), rep("untreated", 5), rep("linezolid", 5), rep("vancomycin", 5))
)
# Aplicar la función (cambiar 123 por vuestro ID de la UOC u otro número que podáis escribir en el documento)
result <- filter_microarray(allTargets, seed=77129830)
-----