Hi !
This is a cool one, Machine Learning .Net (ML.Net) now support ranking scenario in AutoML. As an Machine Learning aficionado, this is amazing. I can now process my problems with AutoML, and then learn the specifics of the best produced models.
Note: a couple of weeks ago, someone asked a question around ranking scenarios. My knowledge is low here, so I kindly shared a couple of starting points. With AutoML now supporting ranking scenarios, the response is completely different!
So I picked up the sample for the current version (1.5.1), the sample for standard ranking scenarios from ML.Net and a data source based on a public datasets provided by Microsoft originally provided Microsoft Bing (see references); and I created this sample
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System;
using System. Linq;
using System. Net. Http. Headers;
using Microsoft. ML;
using Microsoft. ML. AutoML;
using Microsoft. ML. Data;
namespace ConsoleApp1
{
public class Program
{
static void Main ( string [ ] args )
{
Console. WriteLine ( " Start …" ) ;
Run( ) ;
Console. WriteLine ( " End" ) ;
}
private static string TrainDataPath = @"data\train.txt" ;
private static string TestDataPath = @"data\test.txt" ;
private static string ModelPath = @"Model.zip" ;
private static string LabelColumnName = " Label" ;
private static string GroupColumnName = " GroupId" ;
private static uint ExperimentTime = 600 ;
public static void Run ( )
{
var mlContext = new MLContext( ) ;
// STEP 1: Load data
var trainDataView = mlContext. Data. LoadFromTextFile < SearchData > ( TrainDataPath, hasHeader: false , separatorChar: '\t ' ) ;
var testDataView = mlContext. Data. LoadFromTextFile < SearchData > ( TestDataPath, hasHeader: false , separatorChar: '\t ' ) ;
// STEP 2: Run AutoML experiment
Console. WriteLine ( $" Running AutoML recommendation experiment for { ExperimentTime} seconds… " ) ;
var experimentResult = mlContext. Auto ( )
. CreateRankingExperiment ( new RankingExperimentSettings( ) { MaxExperimentTimeInSeconds = ExperimentTime } )
. Execute ( trainDataView, testDataView,
new ColumnInformation( )
{
LabelColumnName = LabelColumnName ,
GroupIdColumnName = GroupColumnName
} ) ;
// STEP 3: Print metric from best model
var bestRun = experimentResult. BestRun;
Console. WriteLine ( $" ===================================================== " ) ;
Console. WriteLine ( $" Total models produced: { experimentResult. RunDetails. Count ( ) } " ) ;
var i = 0 ;
foreach ( var experimentResultRunDetail in experimentResult. RunDetails)
{
i ++ ;
Console. WriteLine ( $" { i} – TrainerName: { experimentResultRunDetail. TrainerName} " ) ;
Console. WriteLine ( $" Runtime In Seconds: { experimentResultRunDetail. RuntimeInSeconds} " ) ;
Console. WriteLine ( $" " ) ;
//PrintMetrics(experimentResultRunDetail.ValidationMetrics);
}
Console. WriteLine ( $" " ) ;
Console. WriteLine ( $" ===================================================== " ) ;
Console. WriteLine ( $" Best model's trainer: { bestRun. TrainerName} " ) ;
// STEP 5: Evaluate test data
var testDataViewWithBestScore = bestRun. Model. Transform ( testDataView) ;
var testMetrics = mlContext. Ranking. Evaluate ( testDataViewWithBestScore, labelColumnName: LabelColumnName) ;
Console. WriteLine ( $" Metrics of best model on test data — " ) ;
PrintMetrics( testMetrics) ;
// STEP 6: Save the best model for later deployment and inferencing
mlContext. Model. Save ( bestRun. Model, trainDataView. Schema, ModelPath) ;
// STEP 7: Create prediction engine from the best trained model
var predictionEngine = mlContext. Model. CreatePredictionEngine < SearchData , SearchDataPrediction > ( bestRun. Model) ;
// STEP 8: Initialize a new test, and get the prediction
var testPage = new SearchData
{
GroupId = " 1" ,
Features = 9 ,
Label = 1
} ;
var prediction = predictionEngine. Predict ( testPage) ;
Console. WriteLine ( $" Predicted rating for: { prediction. Prediction} " ) ;
// New Page
testPage = new SearchData
{
GroupId = " 2" ,
Features = 2 ,
Label = 9
} ;
prediction = predictionEngine. Predict ( testPage) ;
Console. WriteLine ( $" Predicted: { prediction. Prediction} " ) ;
Console. WriteLine ( " Press any key to continue…" ) ;
Console. ReadKey ( ) ;
}
private static void PrintMetrics ( RankingMetrics metrics )
{
if ( metrics is null )
{
Console. WriteLine ( $" No metrics " ) ;
return ;
}
var ndcg = metrics. NormalizedDiscountedCumulativeGains. Aggregate ( " " , ( current , p ) => current + p + " – " ) ;
var dcg = metrics. DiscountedCumulativeGains. Aggregate ( " " , ( current , p ) => current + p + " – " ) ;
Console. WriteLine ( $" Normalized Discounted Cumulative Gains: { ndcg} " ) ;
Console. WriteLine ( $" Discounted Cumulative Gains: { dcg} " ) ;
}
}
class SearchData
{
[ LoadColumn( 0 ) ]
public string GroupId ;
[ LoadColumn( 1 ) ]
public float Features ;
[ LoadColumn( 2 ) ]
public float Label ;
}
class SearchDataPrediction
{
[ ColumnName( " PredictedLabel" ) ]
public float Prediction ;
public float Score { get ; set ; }
}
}
The sample run for 10 minutes and evaluates 33 models.
The test file is 266MB and the train data file is 799 MBs.
At the end, the best trainer is [FastTreeRanking].
The output is also very clear about the tested models and the best one. (I trimmed this to make it clearer).
Start ...
Running AutoML recommendation experiment for 600 seconds...
=====================================================
Total models produced: 33
1 - TrainerName: LightGbmRanking
Runtime In Seconds: 10.6167636
2 - TrainerName: FastTreeRanking
Runtime In Seconds: 11.1055165
3 - TrainerName: FastTreeRanking
Runtime In Seconds: 35.0196598
4 - TrainerName: FastTreeRanking
Runtime In Seconds: 6.0401781
...
=====================================================
Best model's trainer: FastTreeRanking
Press any key to continue...
Super cool feature !
References
Like this: Like Loading...
Related
7 comments