forked from dotnet/machinelearning-samples
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add onnx export (regression) sample (dotnet#891)
* add readme and project * update readme * add comments and update readme * Change name of folder * update readme + file structure * update readme for other onnx sample * update file paths * add section about exporting specific columns
- Loading branch information
Bri Achtman
authored
Apr 5, 2021
1 parent
8167306
commit 0936bf4
Showing
6 changed files
with
269 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
25 changes: 25 additions & 0 deletions
25
samples/csharp/getting-started/Regression_ONNXExport/ONNXExport.sln
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
|
||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio Version 16 | ||
VisualStudioVersion = 16.0.31025.218 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ONNXExport", "ONNXExport\ONNXExport.csproj", "{6F92E373-62D0-41E7-83C9-93DC2AEC4ADB}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|Any CPU = Debug|Any CPU | ||
Release|Any CPU = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{6F92E373-62D0-41E7-83C9-93DC2AEC4ADB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{6F92E373-62D0-41E7-83C9-93DC2AEC4ADB}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{6F92E373-62D0-41E7-83C9-93DC2AEC4ADB}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{6F92E373-62D0-41E7-83C9-93DC2AEC4ADB}.Release|Any CPU.Build.0 = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
GlobalSection(ExtensibilityGlobals) = postSolution | ||
SolutionGuid = {247BB18A-BBED-49BB-8A61-218E13BFF44F} | ||
EndGlobalSection | ||
EndGlobal |
15 changes: 15 additions & 0 deletions
15
samples/csharp/getting-started/Regression_ONNXExport/ONNXExport/ONNXExport.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>net5.0</TargetFramework> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="Microsoft.ML" Version="1.5.5" /> | ||
<PackageReference Include="Microsoft.ML.OnnxConverter" Version="0.17.5" /> | ||
<PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.7.0" /> | ||
<PackageReference Include="Microsoft.ML.OnnxTransformer" Version="1.5.5" /> | ||
</ItemGroup> | ||
|
||
</Project> |
141 changes: 141 additions & 0 deletions
141
samples/csharp/getting-started/Regression_ONNXExport/ONNXExport/Program.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Runtime.InteropServices.ComTypes; | ||
using Microsoft.ML; | ||
using Microsoft.ML.Data; | ||
|
||
namespace ONNXExport | ||
{ | ||
class Program | ||
{ | ||
private static string currentDirectory = AppDomain.CurrentDomain.BaseDirectory; | ||
private static string TRAIN_DATA_FILEPATH = Path.Combine(currentDirectory, @"..\..\..\..\..\Regression_TaxiFarePrediction\TaxiFarePrediction\Data\taxi-fare-train.csv"); | ||
private static string TEST_DATA_FILEPATH = Path.Combine(currentDirectory, @"..\..\..\..\..\Regression_TaxiFarePrediction\TaxiFarePrediction\Data\taxi-fare-test.csv"); | ||
|
||
static void Main(string[] args) | ||
{ | ||
var mlContext = new MLContext(); | ||
|
||
// Load training data | ||
IDataView trainingDataView = mlContext.Data.LoadFromTextFile<ModelInput>( | ||
path: TRAIN_DATA_FILEPATH, | ||
hasHeader: true, | ||
separatorChar: ','); | ||
|
||
// Load test data | ||
IDataView testDataView = mlContext.Data.LoadFromTextFile<ModelInput>( | ||
path: TEST_DATA_FILEPATH, | ||
hasHeader: true, | ||
separatorChar: ','); | ||
|
||
|
||
// Create data processing pipeline for training ML.NET model | ||
var dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("vendor_id", "vendor_id"), new InputOutputColumnPair("payment_type", "payment_type") }) | ||
.Append(mlContext.Transforms.Concatenate("Features", new[] { "vendor_id", "payment_type", "rate_code", "passenger_count", "trip_time_in_secs", "trip_distance" })); | ||
|
||
// Set training algorithm and append to pipeline | ||
var trainer = mlContext.Regression.Trainers.Sdca(labelColumnName: "fare_amount", featureColumnName: "Features"); | ||
|
||
var trainingPipeline = dataProcessPipeline.Append(trainer); | ||
|
||
// Train ML.NET model on training data | ||
ITransformer model = trainingPipeline.Fit(trainingDataView); | ||
|
||
// You need a transformer and input data to convert an ML.NET model to an ONNX model | ||
// By default, the ONNX conversion will generate the ONNX file with the latest OpSet version | ||
using (var stream = File.Create("taxi-fare-model.onnx")) | ||
mlContext.Model.ConvertToOnnx(model, trainingDataView, stream); | ||
|
||
// Now you can compare the results from the ML.NET and ONNX models | ||
|
||
// Create the pipeline using the ONNX file | ||
var onnxModelPath = "taxi-fare-model.onnx"; | ||
var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(onnxModelPath); | ||
|
||
// Make sure to either use the 'using' clause or explicitly dispose the returned onnxTransformer to prevent memory leaks | ||
using var onnxTransformer = onnxEstimator.Fit(trainingDataView); | ||
|
||
// Inference on the test set with the ML.NET model | ||
var output = model.Transform(testDataView); | ||
|
||
// Inference on the test set with the ONNX model | ||
var onnxOutput = onnxTransformer.Transform(testDataView); | ||
|
||
//Get the Scores from ML.NET model | ||
var outScores = mlContext.Data.CreateEnumerable<ScoreValue>(output, reuseRowObject: false); | ||
|
||
//Get the Scores from ONNX model | ||
var onnxOutScores = mlContext.Data.CreateEnumerable<OnnxScoreValue>(onnxOutput, reuseRowObject: false); | ||
|
||
// Print Scores from ML.NET model | ||
PrintScore(outScores, 5); | ||
|
||
// Print Scores from ONNX model | ||
PrintScore(onnxOutScores, 5); | ||
} | ||
|
||
// Define model input schema | ||
public class ModelInput | ||
{ | ||
[ColumnName("vendor_id"), LoadColumn(0)] | ||
public string Vendor_id { get; set; } | ||
|
||
|
||
[ColumnName("rate_code"), LoadColumn(1)] | ||
public float Rate_code { get; set; } | ||
|
||
|
||
[ColumnName("passenger_count"), LoadColumn(2)] | ||
public float Passenger_count { get; set; } | ||
|
||
|
||
[ColumnName("trip_time_in_secs"), LoadColumn(3)] | ||
public float Trip_time_in_secs { get; set; } | ||
|
||
|
||
[ColumnName("trip_distance"), LoadColumn(4)] | ||
public float Trip_distance { get; set; } | ||
|
||
|
||
[ColumnName("payment_type"), LoadColumn(5)] | ||
public string Payment_type { get; set; } | ||
|
||
|
||
[ColumnName("fare_amount"), LoadColumn(6)] | ||
public float Fare_amount { get; set; } | ||
|
||
} | ||
|
||
// Define model output schema | ||
public class ModelOutput | ||
{ | ||
public float Score { get; set; } | ||
} | ||
|
||
private class ScoreValue | ||
{ | ||
public float Score { get; set; } | ||
} | ||
|
||
private class OnnxScoreValue | ||
{ | ||
public VBuffer<float> Score { get; set; } | ||
} | ||
|
||
private static void PrintScore(IEnumerable<ScoreValue> values, int numRows) | ||
{ | ||
Console.WriteLine("Predicted Scores with ML.NET model"); | ||
foreach (var value in values.Take(numRows)) | ||
Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score); | ||
} | ||
|
||
private static void PrintScore(IEnumerable<OnnxScoreValue> values, int numRows) | ||
{ | ||
Console.WriteLine("Predicted Scores with ONNX model"); | ||
foreach (var value in values.Take(numRows)) | ||
Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score.GetItemOrDefault(0)); | ||
} | ||
} | ||
} |
86 changes: 86 additions & 0 deletions
86
samples/csharp/getting-started/Regression_ONNXExport/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
# Exporting an ML.NET model to ONNX | ||
|
||
| ML.NET version | API type | Status | App Type | Data type | Scenario | ML Task | Algorithms | | ||
|----------------|-------------------|-------------------------------|-------------|-----------|---------------------|---------------------------|-----------------------------| | ||
| v1.5.5 | Dynamic API | Up-to-date | Console app | .csv files | Price prediction | Regression | Light GBM regression | | ||
|
||
In this sample, you'll see how to use ML.NET to train a regression model and then convert that model to the ONNX format. | ||
|
||
## Problem | ||
|
||
The Open Neural Network Exchange i.e [ONNX](http://onnx.ai/) is an open format to represent deep learning models. With ONNX, developers can move models between state-of-the-art tools and choose the combination that is best for them. ONNX is developed and supported by a community of partners. | ||
|
||
There may be times when you want to train a model with ML.NET and then convert to ONNX, for instance if you want to consume your model with WinML to take advantage of GPU inferencing in Windows applications. | ||
|
||
Not all ML.NET models can be converted to ONNX; it is dependent on the trainers and transforms in the training pipeline. For a list of supported trainers see the tables in the ML.NET [Algorithms Doc](https://docs.microsoft.com/dotnet/machine-learning/how-to-choose-an-ml-net-algorithm) and for a list of supported transforms check out the [Data transforms Doc](https://docs.microsoft.com/dotnet/machine-learning/resources/transforms). | ||
|
||
## Dataset | ||
|
||
This sample uses the [NYC Taxi Fare dataset](https://github.com/dotnet/machinelearning-samples/blob/main/datasets/README.md#nyc-taxi-fare) for training. | ||
|
||
## Solution | ||
|
||
The console application project `ONNXExport` can be used to train an ML.NET model that predicts the price of taxi fare based on several features such as distance travelled and number of passengers, to export that model to ONNX, and then to consume the ONNX model and make predictions with it. | ||
|
||
### NuGet Packages | ||
|
||
To export an ML.NET model to ONNX, you must install the following NuGet packages in your project: | ||
|
||
- Microsoft.ML.OnnxConverter | ||
|
||
You must also install: | ||
|
||
- Microsoft.ML for training the ML.NET model | ||
- Microsoft.ML.ONNXRuntime and Microsoft.ML.OnnxTransformer for scoring the ONNX model | ||
|
||
### Transforms and trainers | ||
|
||
This pipeline contains the following transforms and trainers which are all ONNX exportable: | ||
|
||
- OneHotEncoding transform | ||
- Concatenate transform | ||
- Light GBM trainer | ||
|
||
### Code | ||
|
||
After training an ML.NET model, you can use the following code to convert to ONNX: | ||
|
||
```csharp | ||
using (var stream = File.Create("taxi-fare-model.onnx")) | ||
mlContext.Model.ConvertToOnnx(model, trainingDataView, stream); | ||
``` | ||
|
||
You need a transformer and input data to convert an ML.NET model to an ONNX model. By default, the ONNX conversion will generate the ONNX file with the latest OpSet version | ||
|
||
After converting to ONNX, you can then consume the ONNX model with the following code: | ||
|
||
```csharp | ||
var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(onnxModelPath); | ||
|
||
using var onnxTransformer = onnxEstimator.Fit(trainingDataView); | ||
|
||
var onnxOutput = onnxTransformer.Transform(testDataView); | ||
``` | ||
|
||
You should get the same results when comparing the ML.NET model and ONNX model on the same sample input. If you run the project, you should get similar to the following output in the console: | ||
|
||
```console | ||
Predicted Scores with ML.NET model | ||
Score 19.60645 | ||
Score 18.673796 | ||
Score 5.9175444 | ||
Score 4.8969507 | ||
Score 19.108932 | ||
Predicted Scores with ONNX model | ||
Score 19.60645 | ||
Score 18.673796 | ||
Score 5.9175444 | ||
Score 4.8969507 | ||
Score 19.108932 | ||
``` | ||
|
||
## Performance | ||
|
||
The default ONNX to ML.NET conversion is not optimal and produces extra graph outputs that are not needed for ML.NET usage. ONNX Runtime does reverse depth first search which results in a lot of conversion operations of native memory to managed memory from ONNX Runtime to ML.NET and execution of more than the necessary kernels. | ||
|
||
If you specify just the necessary graph outputs, it will only execute a subset of the graph. Thus, by eliminating all graph outputs except Score, you can improve inference performance. |