Hi!
In previous posts I shared how to host and chat with a Llama 2 model hosted locally with Ollama. (view post).
And then I also found OllamaSharp (nuget package and repo).
OllamaSharp is a .NET binding for the Ollama API, making it easy to interact with Ollama using your favorite .NET languages.
So, I decided to try it, and create a Chat Completion and a Text Generation specific implementation for Semantic Kernel using this library.
The full test is a console app using both services with Semantic Kernel.

Text Generation Service
The Text Generation Service is an easy one. Just implement the interface Microsoft.SemanticKernel.TextGeneration.ITextGenerationService, and the generated code looks like this:
| // Copyright (c) 2024 | |
| // Author : Bruno Capuano | |
| // Change Log : | |
| // – Sample Text Generation Service for Ollama models | |
| // | |
| // The MIT License (MIT) | |
| // | |
| // Permission is hereby granted, free of charge, to any person obtaining a copy | |
| // of this software and associated documentation files (the "Software"), to deal | |
| // in the Software without restriction, including without limitation the rights | |
| // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| // copies of the Software, and to permit persons to whom the Software is | |
| // furnished to do so, subject to the following conditions: | |
| // | |
| // The above copyright notice and this permission notice shall be included in | |
| // all copies or substantial portions of the Software. | |
| // | |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| // THE SOFTWARE. | |
| using Microsoft.SemanticKernel; | |
| using Microsoft.SemanticKernel.ChatCompletion; | |
| using OllamaSharp; | |
| using OllamaSharp.Models.Chat; | |
| namespace sk_ollamacsharp | |
| { | |
| public class OllamaTextGenerationService : Microsoft.SemanticKernel.TextGeneration.ITextGenerationService | |
| { | |
| // public property for the model url endpoint | |
| public string ModelUrl { get; set; } | |
| public string ModelName { get; set; } | |
| public IReadOnlyDictionary<string, object?> Attributes => throw new NotImplementedException(); | |
| public IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) | |
| { | |
| throw new NotImplementedException(); | |
| } | |
| public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) | |
| { | |
| var ollama = new OllamaApiClient(ModelUrl, ModelName); | |
| var completionResponse = await ollama.GetCompletion(prompt, null, CancellationToken.None); | |
| TextContent stc = new TextContent(completionResponse.Response); | |
| return new List<TextContent> { stc }; | |
| } | |
| } | |
| } |
Chat Completion Service
The chat completion, requires the implementation of the interface: IChatCompletionService. The code looks like this:
| // Copyright (c) 2024 | |
| // Author : Bruno Capuano | |
| // Change Log : | |
| // – Sample Chat Completion Service for Ollama models | |
| // | |
| // The MIT License (MIT) | |
| // | |
| // Permission is hereby granted, free of charge, to any person obtaining a copy | |
| // of this software and associated documentation files (the "Software"), to deal | |
| // in the Software without restriction, including without limitation the rights | |
| // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| // copies of the Software, and to permit persons to whom the Software is | |
| // furnished to do so, subject to the following conditions: | |
| // | |
| // The above copyright notice and this permission notice shall be included in | |
| // all copies or substantial portions of the Software. | |
| // | |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| // THE SOFTWARE. | |
| using Microsoft.SemanticKernel; | |
| using Microsoft.SemanticKernel.ChatCompletion; | |
| using OllamaSharp; | |
| using OllamaSharp.Models.Chat; | |
| namespace sk_ollamacsharp | |
| { | |
| public class OllamaChatCompletionService : IChatCompletionService | |
| { | |
| // public property for the model url endpoint | |
| public string ModelUrl { get; set; } | |
| public string ModelName { get; set; } | |
| public IReadOnlyDictionary<string, object?> Attributes => throw new NotImplementedException(); | |
| public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) | |
| { | |
| var ollama = new OllamaApiClient(ModelUrl, ModelName); // (uri); | |
| var chat = new Chat(ollama, _ => { }); | |
| // iterate though chatHistory Messages | |
| foreach (var message in chatHistory) | |
| { | |
| if (message.Role == AuthorRole.System) | |
| { | |
| await chat.SendAs(ChatRole.System, message.Content); | |
| continue; | |
| } | |
| } | |
| var lastMessage = chatHistory.LastOrDefault(); | |
| string question = lastMessage.Content; | |
| var chatResponse = ""; | |
| var history = (await chat.Send(question, CancellationToken.None)).ToArray(); | |
| var last = history.Last(); | |
| chatResponse = last.Content; | |
| chatHistory.AddAssistantMessage(chatResponse); | |
| return chatHistory; | |
| } | |
| public IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) | |
| { | |
| throw new NotImplementedException(); | |
| } | |
| } | |
| } |
Test Chat Completion and Text Generation Services
With both services implemented, we can now code with Semantic Kernel to access these services.
The following code:
- Creates 2 services: text and chat, both with ollamasharp implementation
- Create a semantic kernel builder, register both services, and build a kernel.
- Using the kernel run a text generation sample, and later a chat history sample.
- In the chat sample, it also uses a System Message to define the chat behavior for the conversation.
- This is a test, there are a lot of improvements that can be made here.
| // Copyright (c) 2024 | |
| // Author : Bruno Capuano | |
| // Change Log : | |
| // – Sample console application to use llama2 LLM running locally in Ubuntu with Semantic Kernel | |
| // | |
| // The MIT License (MIT) | |
| // | |
| // Permission is hereby granted, free of charge, to any person obtaining a copy | |
| // of this software and associated documentation files (the "Software"), to deal | |
| // in the Software without restriction, including without limitation the rights | |
| // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| // copies of the Software, and to permit persons to whom the Software is | |
| // furnished to do so, subject to the following conditions: | |
| // | |
| // The above copyright notice and this permission notice shall be included in | |
| // all copies or substantial portions of the Software. | |
| // | |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| // THE SOFTWARE. | |
| using Microsoft.Extensions.DependencyInjection; | |
| using Microsoft.SemanticKernel; | |
| using Microsoft.SemanticKernel.ChatCompletion; | |
| using Microsoft.SemanticKernel.TextGeneration; | |
| using sk_ollamacsharp; | |
| // llama2 in Ubuntu local in WSL | |
| var ollamaChat = new OllamaChatCompletionService(); | |
| ollamaChat.ModelUrl = "http://localhost:11434"; | |
| ollamaChat.ModelName = "llama2"; | |
| var ollamaText = new OllamaTextGenerationService(); | |
| ollamaText.ModelUrl = "http://localhost:11434"; | |
| ollamaText.ModelName = "llama2"; | |
| // semantic kernel builder | |
| var builder = Kernel.CreateBuilder(); | |
| builder.Services.AddKeyedSingleton<IChatCompletionService>("ollamaChat", ollamaChat); | |
| builder.Services.AddKeyedSingleton<ITextGenerationService>("ollamaText", ollamaText); | |
| var kernel = builder.Build(); | |
| // text generation | |
| var textGen = kernel.GetRequiredService<ITextGenerationService>(); | |
| var response = textGen.GetTextContentsAsync("The weather in January in Toronto is usually ").Result; | |
| Console.WriteLine(response[^1].Text); | |
| // chat | |
| var chat = kernel.GetRequiredService<IChatCompletionService>(); | |
| var history = new ChatHistory(); | |
| history.AddSystemMessage("You are a useful assistant that replies using a funny style and emojis. Your name is Goku."); | |
| history.AddUserMessage("hi, who are you?"); | |
| // print response | |
| var result = await chat.GetChatMessageContentsAsync(history); | |
| Console.WriteLine(result[^1].Content); |
The full code is available here: https://github.com/elbruno/semantickernel-localLLMs. And the main readme of the repo needs to be also updates.
Happy coding!
Greetings
El Bruno
More posts in my blog ElBruno.com.
More info in https://beacons.ai/elbruno
Leave a comment