Skip to content

Commit fde1ab7

Browse files
authored
Move Normalizer extension method from experimental to stable nuget and remove Normalizer generic APIs (dotnet#3118)
* Move Normalizer extension method from experimental to stable nuget. * Cleanup unused method in Normalizer Estimator. * remove normalizer estimator catalog methods that take enum as parameter. * Remove Microsoft.ML.Experimental references in CS files. * merge fix. * cleanup. * cleanup. * PR feedback.
1 parent 41a6308 commit fde1ab7

25 files changed

+155
-194
lines changed

docs/code/MlNetCookBook.md

+4-7
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ var cachedTrainData = mlContext.Data.Cache(trainData);
344344
var pipeline =
345345
// First 'normalize' the data (rescale to be
346346
// between -1 and 1 for all examples)
347-
mlContext.Transforms.Normalize("FeatureVector")
347+
mlContext.Transforms.NormalizeMinMax("FeatureVector")
348348
// We add a step for caching data in memory so that the downstream iterative training
349349
// algorithm can efficiently scan through the data multiple times. Otherwise, the following
350350
// trainer will load data from disk multiple times. The caching mechanism uses an on-demand strategy.
@@ -625,18 +625,15 @@ var trainData = mlContext.Data.LoadFromTextFile<IrisInputAllFeatures>(dataPath,
625625
separatorChar: ','
626626
);
627627

628-
// Apply all kinds of standard ML.NET normalization to the raw features.
628+
// Apply MinMax normalization to the raw features.
629629
var pipeline =
630-
mlContext.Transforms.Normalize(
631-
new NormalizingEstimator.MinMaxColumnOptions("MinMaxNormalized", "Features", fixZero: true),
632-
new NormalizingEstimator.MeanVarianceColumnOptions("MeanVarNormalized", "Features", fixZero: true),
633-
new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", maximumBinCount: 256));
630+
mlContext.Transforms.NormalizeMinMax("MinMaxNormalized", "Features");
634631

635632
// Let's train our pipeline of normalizers, and then apply it to the same data.
636633
var normalizedData = pipeline.Fit(trainData).Transform(trainData);
637634

638635
// Inspect one column of the resulting dataset.
639-
var meanVarValues = normalizedData.GetColumn<float[]>(normalizedData.Schema["MeanVarNormalized"]).ToArray();
636+
var meanVarValues = normalizedData.GetColumn<float[]>(normalizedData.Schema["MinMaxNormalized"]).ToArray();
640637
```
641638

642639
## How do I train my model on categorical data?

docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3-
using System.Linq;
43
using Microsoft.ML.Data;
5-
using Microsoft.ML.Transforms;
64

75
namespace Microsoft.ML.Samples.Dynamic
86
{
@@ -28,7 +26,7 @@ public static void Example()
2826
// 35 1 6-11yrs 1 3 32 5 ...
2927

3028
// A pipeline for normalizing the Induced column.
31-
var pipeline = ml.Transforms.Normalize("Induced");
29+
var pipeline = ml.Transforms.NormalizeMinMax("Induced");
3230
// The transformed (normalized according to Normalizer.NormalizerMode.MinMax) data.
3331
var transformer = pipeline.Fit(trainData);
3432

@@ -58,8 +56,8 @@ public static void Example()
5856

5957
// Composing a different pipeline if we wanted to normalize more than one column at a time.
6058
// Using log scale as the normalization mode.
61-
var multiColPipeline = ml.Transforms.Normalize("LogInduced", "Induced", NormalizingEstimator.NormalizationMode.LogMeanVariance)
62-
.Append(ml.Transforms.Normalize("LogSpontaneous", "Spontaneous", NormalizingEstimator.NormalizationMode.LogMeanVariance));
59+
var multiColPipeline = ml.Transforms.NormalizeMinMax("LogInduced", "Induced")
60+
.Append(ml.Transforms.NormalizeMinMax("LogSpontaneous", "Spontaneous"));
6361
// The transformed data.
6462
var multiColtransformer = multiColPipeline.Fit(trainData);
6563
var multiColtransformedData = multiColtransformer.Transform(trainData);

docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public static void Example()
1919
// Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0.
2020
// Then append a linear regression trainer.
2121
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
22-
.Append(mlContext.Transforms.Normalize("Features"))
22+
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
2323
.Append(mlContext.Regression.Trainers.Ols(
2424
labelColumnName: labelName, featureColumnName: "Features"));
2525
var model = pipeline.Fit(data);

docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public static void Example()
2121
// Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0.
2222
// Then append a logistic regression trainer.
2323
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
24-
.Append(mlContext.Transforms.Normalize("Features"))
24+
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
2525
.Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(
2626
labelColumnName: labelName, featureColumnName: "Features"));
2727
var model = pipeline.Fit(data);

src/Microsoft.ML.Data/Transforms/Normalizer.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ internal static class Defaults
3939
public const long MaximumExampleCount = 1000000000;
4040
}
4141

42-
public enum NormalizationMode
42+
[BestFriend]
43+
internal enum NormalizationMode
4344
{
4445
/// <summary>
4546
/// Linear rescale such that minimum and maximum values are mapped between -1 and 1.

src/Microsoft.ML.Experimental/TransformsCatalogExtensions.cs

-112
This file was deleted.

src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
171171
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
172172
"capital-gain", "capital-loss", "hours-per-week"))
173173
// Min-max normalize all the features
174-
.Append(mlContext.Transforms.Normalize("Features"));
174+
.Append(mlContext.Transforms.NormalizeMinMax("Features"));
175175

176176
var data = loader.Load(dataFile);
177177
var featurizedData = pipeline.Fit(data).Transform(data);

0 commit comments

Comments
 (0)