|
| 1 | +using System; |
| 2 | +using System.Collections.Generic; |
| 3 | +using System.Text; |
| 4 | + |
| 5 | +namespace Microsoft.ML.Samples.Dynamic |
| 6 | +{ |
| 7 | + public static class NormalizeText |
| 8 | + { |
| 9 | + public static void Example() |
| 10 | + { |
| 11 | + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, |
| 12 | + // as well as the source of randomness. |
| 13 | + var mlContext = new MLContext(); |
| 14 | + |
| 15 | + // Create an empty data sample list. The 'NormalizeText' API does not require training data as |
| 16 | + // the estimator ('TextNormalizingEstimator') created by 'NormalizeText' API is not a trainable estimator. |
| 17 | + // The empty list is only needed to pass input schema to the pipeline. |
| 18 | + var emptySamples = new List<TextData>(); |
| 19 | + |
| 20 | + // Convert sample list to an empty IDataView. |
| 21 | + var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples); |
| 22 | + |
| 23 | + // A pipeline for normalizing text. |
| 24 | + var normTextPipeline = mlContext.Transforms.Text.NormalizeText("NormalizedText", "Text", |
| 25 | + Transforms.Text.TextNormalizingEstimator.CaseMode.Lower, |
| 26 | + keepDiacritics: false, |
| 27 | + keepPunctuations: false, |
| 28 | + keepNumbers: false); |
| 29 | + |
| 30 | + // Fit to data. |
| 31 | + var normTextTransformer = normTextPipeline.Fit(emptyDataView); |
| 32 | + |
| 33 | + // Create the prediction engine to get the normalized text from the input text/string. |
| 34 | + var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(normTextTransformer); |
| 35 | + |
| 36 | + // Call the prediction API. |
| 37 | + var data = new TextData() { Text = "ML.NET's NormalizeText API changes the case of the TEXT and removes/keeps diâcrîtîcs, punctuations, and/or numbers (123)." }; |
| 38 | + var prediction = predictionEngine.Predict(data); |
| 39 | + |
| 40 | + // Print the normalized text. |
| 41 | + Console.WriteLine($"Normalized Text: {prediction.NormalizedText}"); |
| 42 | + |
| 43 | + // Expected output: |
| 44 | + // Normalized Text: mlnets normalizetext api changes the case of the text and removeskeeps diacritics punctuations andor numbers |
| 45 | + } |
| 46 | + |
| 47 | + public class TextData |
| 48 | + { |
| 49 | + public string Text { get; set; } |
| 50 | + } |
| 51 | + |
| 52 | + public class TransformedTextData : TextData |
| 53 | + { |
| 54 | + public string NormalizedText { get; set; } |
| 55 | + } |
| 56 | + } |
| 57 | +} |
0 commit comments