Skip to content

Commit c031272

Browse files
committed
Curso Completo de ML
1 parent 353e35f commit c031272

File tree

10,173 files changed

+75452
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

10,173 files changed

+75452
-0
lines changed

.RData

2.53 KB
Binary file not shown.

.Rhistory

+512
Large diffs are not rendered by default.

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,5 @@ venv.bak/
102102

103103
# mypy
104104
.mypy_cache/
105+
.Rproj.user
106+
.DS_Store

datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/.Rhistory

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Country,Age,Salary,Purchased
2+
France,44,72000,No
3+
Spain,27,48000,Yes
4+
Germany,30,54000,No
5+
Spain,38,61000,No
6+
Germany,40,,Yes
7+
France,35,58000,Yes
8+
Spain,,52000,No
9+
France,48,79000,Yes
10+
Germany,50,83000,No
11+
France,37,67000,Yes
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Plantilla para el Pre Procesado de Datos - Datos Categóricos
2+
# Importar el dataset
3+
dataset = read.csv('Data.csv')
4+
5+
6+
# Codificar las variables categóricas
7+
dataset$Country = factor(dataset$Country,
8+
levels = c("France", "Spain", "Germany"),
9+
labels = c(1, 2, 3))
10+
dataset$Purchased = factor(dataset$Purchased,
11+
levels = c("No", "Yes"),
12+
labels = c(0,1))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Thu Feb 28 13:19:06 2019
5+
6+
@author: juangabriel
7+
"""
8+
9+
# Plantilla de Pre Procesado - Datos Categóricos
10+
11+
# Cómo importar las librerías
12+
import numpy as np
13+
import matplotlib.pyplot as plt
14+
import pandas as pd
15+
16+
# Importar el data set
17+
dataset = pd.read_csv('Data.csv')
18+
X = dataset.iloc[:, :-1].values
19+
y = dataset.iloc[:, 3].values
20+
21+
# Codificar datos categóricos
22+
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
23+
labelencoder_X = LabelEncoder()
24+
X[:, 0] = labelencoder_X.fit_transform(X[:, 0])
25+
onehotencoder = OneHotEncoder(categorical_features=[0])
26+
X = onehotencoder.fit_transform(X).toarray()
27+
labelencoder_y = LabelEncoder()
28+
y = labelencoder_y.fit_transform(y)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Plantilla para el Pre Procesado de Datos
2+
# Importar el dataset
3+
dataset = read.csv('Data.csv')
4+
#dataset = dataset[, 2:3]
5+
6+
# Dividir los datos en conjunto de entrenamiento y conjunto de test
7+
# install.packages("caTools")
8+
library(caTools)
9+
set.seed(123)
10+
split = sample.split(dataset$Purchased, SplitRatio = 0.8)
11+
training_set = subset(dataset, split == TRUE)
12+
testing_set = subset(dataset, split == FALSE)
13+
14+
# Escalado de valores
15+
# training_set[,2:3] = scale(training_set[,2:3])
16+
# testing_set[,2:3] = scale(testing_set[,2:3])
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Wed Feb 27 19:43:11 2019
5+
6+
@author: juangabriel
7+
"""
8+
9+
# Plantilla de Pre Procesado
10+
11+
# Cómo importar las librerías
12+
import numpy as np
13+
import matplotlib.pyplot as plt
14+
import pandas as pd
15+
16+
# Importar el data set
17+
dataset = pd.read_csv('Data.csv')
18+
X = dataset.iloc[:, :-1].values
19+
y = dataset.iloc[:, 3].values
20+
21+
22+
# Dividir el data set en conjunto de entrenamiento y conjunto de testing
23+
from sklearn.model_selection import train_test_split
24+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
25+
26+
27+
# Escalado de variables
28+
"""from sklearn.preprocessing import StandardScaler
29+
sc_X = StandardScaler()
30+
X_train = sc_X.fit_transform(X_train)
31+
X_test = sc_X.transform(X_test)"""
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Plantilla para el Pre Procesado de Datos - Datos faltantes
2+
# Importar el dataset
3+
dataset = read.csv('Data.csv')
4+
5+
6+
# Tratamiento de los valores NA
7+
dataset$Age = ifelse(is.na(dataset$Age),
8+
ave(dataset$Age, FUN = function(x) mean(x, na.rm = TRUE)),
9+
dataset$Age)
10+
dataset$Salary = ifelse(is.na(dataset$Salary),
11+
ave(dataset$Salary, FUN = function(x) mean(x, na.rm = TRUE)),
12+
dataset$Salary)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Thu Feb 28 13:19:21 2019
5+
6+
@author: juangabriel
7+
"""
8+
9+
# Plantilla de Pre Procesado - Datos faltantes
10+
11+
# Cómo importar las librerías
12+
import numpy as np
13+
import matplotlib.pyplot as plt
14+
import pandas as pd
15+
16+
# Importar el data set
17+
dataset = pd.read_csv('Data.csv')
18+
X = dataset.iloc[:, :-1].values
19+
y = dataset.iloc[:, 3].values
20+
21+
# Tratamiento de los NAs
22+
from sklearn.preprocessing import Imputer
23+
imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
24+
imputer = imputer.fit(X[:, 1:3])
25+
X[:, 1:3] = imputer.transform(X[:,1:3])

0 commit comments

Comments
 (0)