Buenas!
Siguiendo la serie de posts de Windows Machine Learning, hoy voy a repasar un poco una de las apps de ejemplo que podemos encontrar entre los ejemplos de GitHub de Windows Universal Samples.
Comencemos por uno de los mas interesantes [SqueezeNet Object Detection Sample].
Este ejemplo utiliza un modelo llamado SqueezeNet que permite identificar el objeto predominante en una imagen. El modelo de SqueezeNet esta entrenado para reconocer mas de 1000 objetos y la verdad es que funciona bastante bien. Por ejemplo, veamos que valores retorna utilizando mi teclado y una taza.
Bastante bien. Sin embargo, frente a una mala foto de Venom, el resultado no es tan bueno.
Pues bien, momento de analizar un poco el código. Lo 1ro que tenemos que tener en cuenta es que necesitamos utilizar una version de Windows 10 igual o superior a 17110. Actualmente esto significa ser parte del programa de Windows Insiders.
En mi caso la version que tengo actualmente es 17120.1
Para utilizar el modelo SqueezeNet, nuestra UWP app se sirve de 2 archivos
- Labels.json. Contiene una lista con todos los labels (objetos) registrados en el modelo
- SqueezeNet.onnx. Es el modelo de ML propiamente dicho
El funcionamiento de la app aprovecha la nueva API [Windows.AI.MachineLearning.Preview] para cargar el modelo ONNX, luego enlaza una imagen de entrada y procesa la salida de la evaluación para determinar los labels encontrados.
En el próximo post analizare un poco el código C# de la app porque me he quedado sorprendido de lo simple que es el funcionamiento. Aquí va de regalo
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Linq; | |
using Windows.UI.Xaml; | |
using Windows.UI.Xaml.Controls; | |
using Windows.AI.MachineLearning.Preview; | |
using Windows.Storage; | |
using Windows.Media; | |
using Windows.Graphics.Imaging; | |
using System.Threading.Tasks; | |
using Windows.Storage.Streams; | |
using Windows.UI.Core; | |
using Windows.Storage.Pickers; | |
using Windows.UI.Xaml.Media.Imaging; | |
namespace SqueezeNetObjectDetection | |
{ | |
/// <summary> | |
/// An empty page that can be used on its own or navigated to within a Frame. | |
/// </summary> | |
public sealed partial class MainPage : Page | |
{ | |
private const string _kModelFileName = "SqueezeNet.onnx"; | |
private const string _kLabelsFileName = "Labels.json"; | |
private ImageVariableDescriptorPreview _inputImageDescription; | |
private TensorVariableDescriptorPreview _outputTensorDescription; | |
private LearningModelPreview _model = null; | |
private List<string> _labels = new List<string>(); | |
List<float> _outputVariableList = new List<float>(); | |
public MainPage() | |
{ | |
this.InitializeComponent(); | |
} | |
/// <summary> | |
/// Load the label and model files | |
/// </summary> | |
/// <returns></returns> | |
private async Task LoadModelAsync() | |
{ | |
await Dispatcher.RunAsync(CoreDispatcherPriority.Normal, () => StatusBlock.Text = $"Loading {_kModelFileName} … patience "); | |
try | |
{ | |
// Parse labels from label file | |
var file = await StorageFile.GetFileFromApplicationUriAsync(new Uri($"ms-appx:///Assets/{_kLabelsFileName}")); | |
using (var inputStream = await file.OpenReadAsync()) | |
using (var classicStream = inputStream.AsStreamForRead()) | |
using (var streamReader = new StreamReader(classicStream)) | |
{ | |
string line = ""; | |
char[] charToTrim = { '\"', ' ' }; | |
while (streamReader.Peek() >= 0) | |
{ | |
line = streamReader.ReadLine(); | |
line.Trim(charToTrim); | |
var indexAndLabel = line.Split(':'); | |
if (indexAndLabel.Count() == 2) | |
{ | |
_labels.Add(indexAndLabel[1]); | |
} | |
} | |
} | |
// Load Model | |
var modelFile = await StorageFile.GetFileFromApplicationUriAsync(new Uri($"ms-appx:///Assets/{_kModelFileName}")); | |
_model = await LearningModelPreview.LoadModelFromStorageFileAsync(modelFile); | |
// Retrieve model input and output variable descriptions (we already know the model takes an image in and outputs a tensor) | |
List<ILearningModelVariableDescriptorPreview> inputFeatures = _model.Description.InputFeatures.ToList(); | |
List<ILearningModelVariableDescriptorPreview> outputFeatures = _model.Description.OutputFeatures.ToList(); | |
_inputImageDescription = | |
inputFeatures.FirstOrDefault(feature => feature.ModelFeatureKind == LearningModelFeatureKindPreview.Image) | |
as ImageVariableDescriptorPreview; | |
_outputTensorDescription = | |
outputFeatures.FirstOrDefault(feature => feature.ModelFeatureKind == LearningModelFeatureKindPreview.Tensor) | |
as TensorVariableDescriptorPreview; | |
} | |
catch (Exception ex) | |
{ | |
await Dispatcher.RunAsync(CoreDispatcherPriority.Normal, () => StatusBlock.Text = $"error: {ex.Message}"); | |
_model = null; | |
} | |
} | |
/// <summary> | |
/// Trigger file picker and image evaluation | |
/// </summary> | |
/// <param name="sender"></param> | |
/// <param name="e"></param> | |
private async void ButtonRun_Click(object sender, RoutedEventArgs e) | |
{ | |
ButtonRun.IsEnabled = false; | |
UIPreviewImage.Source = null; | |
try | |
{ | |
// Load the model | |
await Task.Run(async () => await LoadModelAsync()); | |
// Trigger file picker to select an image file | |
FileOpenPicker fileOpenPicker = new FileOpenPicker(); | |
fileOpenPicker.SuggestedStartLocation = PickerLocationId.PicturesLibrary; | |
fileOpenPicker.FileTypeFilter.Add(".jpg"); | |
fileOpenPicker.FileTypeFilter.Add(".png"); | |
fileOpenPicker.ViewMode = PickerViewMode.Thumbnail; | |
StorageFile selectedStorageFile = await fileOpenPicker.PickSingleFileAsync(); | |
SoftwareBitmap softwareBitmap; | |
using (IRandomAccessStream stream = await selectedStorageFile.OpenAsync(FileAccessMode.Read)) | |
{ | |
// Create the decoder from the stream | |
BitmapDecoder decoder = await BitmapDecoder.CreateAsync(stream); | |
// Get the SoftwareBitmap representation of the file in BGRA8 format | |
softwareBitmap = await decoder.GetSoftwareBitmapAsync(); | |
softwareBitmap = SoftwareBitmap.Convert(softwareBitmap, BitmapPixelFormat.Bgra8, BitmapAlphaMode.Premultiplied); | |
} | |
// Display the image | |
SoftwareBitmapSource imageSource = new SoftwareBitmapSource(); | |
await imageSource.SetBitmapAsync(softwareBitmap); | |
UIPreviewImage.Source = imageSource; | |
// Encapsulate the image within a VideoFrame to be bound and evaluated | |
VideoFrame inputImage = VideoFrame.CreateWithSoftwareBitmap(softwareBitmap); | |
await Task.Run(async () => | |
{ | |
// Evaluate the image | |
await EvaluateVideoFrameAsync(inputImage); | |
}); | |
} | |
catch (Exception ex) | |
{ | |
await Dispatcher.RunAsync(CoreDispatcherPriority.Normal, () => StatusBlock.Text = $"error: {ex.Message}"); | |
ButtonRun.IsEnabled = true; | |
} | |
} | |
/// <summary> | |
/// Evaluate the VideoFrame passed in as arg | |
/// </summary> | |
/// <param name="inputFrame"></param> | |
/// <returns></returns> | |
private async Task EvaluateVideoFrameAsync(VideoFrame inputFrame) | |
{ | |
if (inputFrame != null) | |
{ | |
try | |
{ | |
// Create bindings for the input and output buffer | |
LearningModelBindingPreview binding = new LearningModelBindingPreview(_model as LearningModelPreview); | |
binding.Bind(_inputImageDescription.Name, inputFrame); | |
binding.Bind(_outputTensorDescription.Name, _outputVariableList); | |
// Process the frame with the model | |
LearningModelEvaluationResultPreview results = await _model.EvaluateAsync(binding, "test"); | |
List<float> resultProbabilities = results.Outputs[_outputTensorDescription.Name] as List<float>; | |
// Find the result of the evaluation in the bound output (the top classes detected with the max confidence) | |
List<float> topProbabilities = new List<float>() { 0.0f, 0.0f, 0.0f }; | |
List<int> topProbabilityLabelIndexes = new List<int>() { 0, 0, 0 }; | |
for (int i = 0; i < resultProbabilities.Count(); i++) | |
{ | |
for (int j = 0; j < 3; j++) | |
{ | |
if (resultProbabilities[i] > topProbabilities[j]) | |
{ | |
topProbabilityLabelIndexes[j] = i; | |
topProbabilities[j] = resultProbabilities[i]; | |
break; | |
} | |
} | |
} | |
// Display the result | |
string message = "Predominant objects detected are:"; | |
for (int i = 0; i < 3; i++) | |
{ | |
message += $"\n{ _labels[topProbabilityLabelIndexes[i]]} with confidence of { topProbabilities[i]}"; | |
} | |
await Dispatcher.RunAsync(CoreDispatcherPriority.Normal, () => StatusBlock.Text = message); | |
} | |
catch (Exception ex) | |
{ | |
await Dispatcher.RunAsync(CoreDispatcherPriority.Normal, () => StatusBlock.Text = $"error: {ex.Message}"); | |
} | |
await Dispatcher.RunAsync(CoreDispatcherPriority.Normal, () => ButtonRun.IsEnabled = true); | |
} | |
} | |
} | |
} |
Happy Coding!
Saludos @ Burlington
El Bruno
References