diff --git a/Tests/Annotations/LlmRunner.cs b/Tests/Annotations/LlmRunner.cs index 61061a9..40011bf 100644 --- a/Tests/Annotations/LlmRunner.cs +++ b/Tests/Annotations/LlmRunner.cs @@ -1,171 +1,410 @@ -using Esiur.Schema.Llm; +using Esiur.Resource; +using Esiur.Schema.Llm; +using Esiur.Stores; using OpenAI; using OpenAI.Chat; using System; using System.ClientModel; using System.Collections.Generic; +using System.Diagnostics; using System.Net.NetworkInformation; using System.Text; using System.Text.Json; -namespace Esiur.Tests.Annotations +namespace Esiur.Tests.Annotations; + +//public sealed class TickState +//{ +// public int Load { get; set; } +// public int ErrorCount { get; set; } +// public bool Enabled { get; set; } +//} + +//public sealed class LlmDecision +//{ +// public string? Function { get; set; } +// public string? Reason { get; set; } +//} + + + +public sealed class LlmRunner { - public class LlmRunner + private static readonly HashSet ValidFunctions = new(StringComparer.OrdinalIgnoreCase) { - public async Task RunAsync(ServiceNode node, string endpoint, ApiKeyCredential apiKey, string modelName, - int tickDelayMs = 1000) + null, "Restart", "ResetErrors", "Enable", "Disable" + }; + + public async Task<(List Results, List Summary)> RunAsync( + + IReadOnlyList models, + int tickDelayMs = 1000) + { + + var wh = new Warehouse(); + + await wh.Put("store", new MemoryStore()); + + var allResults = new List(); + + var ticks = new List { - var client = new OpenAIClient(apiKey, new OpenAIClientOptions() { Endpoint = new Uri(endpoint) }); - var chat = client.GetChatClient("microsoft/phi-4"); + new() { Load = 35, ErrorCount = 0, Enabled = true }, + new() { Load = 88, ErrorCount = 1, Enabled = true }, + new() { Load = 42, ErrorCount = 4, Enabled = true }, + new() { Load = 18, ErrorCount = 0, Enabled = false }, + new() { Load = 91, ErrorCount = 5, Enabled = true }, + new() { Load = 25, ErrorCount = 0, Enabled = true } + }; + + var expectations = new List + { + new() { Tick = 1, AllowedFunctions = new HashSet { null }, Note = "Stable service; no action expected." }, + new() { Tick = 2, AllowedFunctions = new HashSet { "Restart" }, Note = "Overload; restart expected." }, + new() { Tick = 3, AllowedFunctions = new HashSet { "Restart", "ResetErrors" }, Note = "High error count; restart or reset is acceptable." }, + new() { Tick = 4, AllowedFunctions = new HashSet { "Enable" }, Note = "Service disabled; enable expected." }, + new() { Tick = 5, AllowedFunctions = new HashSet { "Restart" }, Note = "Overload and instability; restart expected." }, + new() { Tick = 6, AllowedFunctions = new HashSet { null }, Note = "Stable service; no action expected." } + }; + + foreach (var model in models) + { + Console.WriteLine($"=== Model: {model.Name} ({model.ModelName}) ==="); + + var client = new OpenAIClient( + model.ApiKey, + new OpenAIClientOptions { Endpoint = new Uri(model.Endpoint) }); + + var chat = client.GetChatClient(model.ModelName); + + Console.WriteLine($"Warming up {model.Name}..."); + + await InferAsync(chat, + "Return {\"function\":null,\"reason\":\"warmup\"}"); + + Console.WriteLine("Warmup done"); + + // Fresh node instance per model so results are independent. + var node = await wh.Put("store/service-" + model.Name, new ServiceNode()); var typeModel = LlmTypeModel.FromTypeDef(node.Instance?.Definition); - var ticks = new List - { - new() { Load = 35, ErrorCount = 0, Enabled = true }, - new() { Load = 88, ErrorCount = 1, Enabled = true }, - new() { Load = 42, ErrorCount = 4, Enabled = true }, - new() { Load = 18, ErrorCount = 0, Enabled = false }, - new() { Load = 91, ErrorCount = 5, Enabled = true }, - new() { Load = 25, ErrorCount = 0, Enabled = true } - }; - for (int i = 0; i < ticks.Count; i++) { var tick = ticks[i]; + var expected = expectations[i]; - // Simulate property changes for this tick + // Apply tick state before inference node.Load = tick.Load; node.ErrorCount = tick.ErrorCount; node.Enabled = tick.Enabled; + var loadBefore = node.Load; + var errorBefore = node.ErrorCount; + var enabledBefore = node.Enabled; + var jsonModel = typeModel.ToJson(node); - Console.WriteLine($"Tick {i + 1}"); - Console.WriteLine($"State: Load={node.Load}, ErrorCount={node.ErrorCount}, Enabled={node.Enabled}"); + var prompt = BuildPrompt(jsonModel, i + 1); - var prompt = BuildPrompt(jsonModel, node, i + 1); + var sw = Stopwatch.StartNew(); + string raw = await InferAsync(chat, prompt); + sw.Stop(); - string llmRaw = await InferAsync(chat, prompt); - var decision = ParseDecision(llmRaw); + var parsedResult = ParseDecisionWithRepair(raw); - bool invoked = InvokeIfValid(node, decision?.Function); + var firstDecision = parsedResult.First; + var finalDecision = parsedResult.Final; - Console.WriteLine($"LLM: {llmRaw}"); - Console.WriteLine($"Invoked: {invoked}"); - Console.WriteLine($"After: Load={node.Load}, ErrorCount={node.ErrorCount}, Enabled={node.Enabled}"); - Console.WriteLine(new string('-', 60)); + var parsed = finalDecision != null; + var repaired = parsedResult.Repaired; + var jsonObjectCount = parsedResult.Count; + var firstPredicted = NormalizeFunction(firstDecision?.Function); + var predicted = NormalizeFunction(finalDecision?.Function); + + var allowed = ValidFunctions.Contains(predicted); + var correct = expected.AllowedFunctions.Contains(predicted); + + var invoked = false; + if (allowed) + invoked = InvokeIfValid(node, predicted); + + var result = new TickResult + { + Model = model.Name, + Tick = i + 1, + + LoadBefore = loadBefore, + ErrorCountBefore = errorBefore, + EnabledBefore = enabledBefore, + + RawResponse = raw, + FirstPredictedFunction = firstPredicted, + PredictedFunction = predicted, + Reason = finalDecision?.Reason, + + Parsed = parsed, + Allowed = allowed, + Correct = correct, + Repaired = repaired, + JsonObjectCount = jsonObjectCount, + Invoked = invoked, + LatencyMs = sw.Elapsed.TotalMilliseconds, + + LoadAfter = node.Load, + ErrorCountAfter = node.ErrorCount, + EnabledAfter = node.Enabled, + + ExpectedText = string.Join(" | ", expected.AllowedFunctions.Select(x => x ?? "null")) + }; + + allResults.Add(result); + + Console.WriteLine($"Tick {result.Tick}"); + Console.WriteLine($"Before: Load={result.LoadBefore}, ErrorCount={result.ErrorCountBefore}, Enabled={result.EnabledBefore}"); + Console.WriteLine($"Expected: {result.ExpectedText}"); + Console.WriteLine($"LLM: {result.RawResponse}"); + Console.WriteLine($"First: {result.FirstPredictedFunction ?? "null"}"); + Console.WriteLine($"Final: {result.PredictedFunction ?? "null"}"); + Console.WriteLine($"Parsed={result.Parsed}, Allowed={result.Allowed}, Correct={result.Correct}, Repaired={result.Repaired}, Invoked={result.Invoked}, Latency={result.LatencyMs:F1} ms"); + Console.WriteLine($"After: Load={result.LoadAfter}, ErrorCount={result.ErrorCountAfter}, Enabled={result.EnabledAfter}"); + Console.WriteLine(new string('-', 72)); await Task.Delay(tickDelayMs); } - } - async Task InferAsync( - ChatClient chat, - string prompt) + var summary = Summarize(allResults); + return (allResults, summary); + } + + private static async Task InferAsync(ChatClient chat, string prompt) + { + List messages = new() { + new SystemChatMessage( + "You control a distributed resource. " + + "Return raw JSON only with fields: function and reason. " + + "Do not wrap the response in markdown or code fences."), + new UserChatMessage(prompt) + }; - List messages = new List - { - new SystemChatMessage("You control a distributed resource. " + - "Return only JSON with fields: function and reason."), - new UserChatMessage(prompt) - }; + var result = await chat.CompleteChatAsync(messages); + return result.Value.Content[0].Text; + } - var result = await chat.CompleteChatAsync(messages); - - return result.Value.Content[0].Text; - } - private static string BuildPrompt(string typeDefJson, ServiceNode node, int tick) - { - return + private static string BuildPrompt(string typeDefJson, int tick) + { + return $@"You are given a runtime type definition for a distributed resource and its current state. Choose at most one function to call. Use only the functions defined in the type definition. Do not invent functions. -If no action is needed, return function as null. -Return only JSON in this format: -{{ ""function"": ""Restart|ResetErrors|Enable|Disable|null"", ""reason"": ""short explanation"" }} +Return ONLY valid JSON in this format: +{{ ""function"": ""<>"", ""reason"": ""short explanation"" }} +If the current state is normal and no action is needed, return: +{{ ""function"": null, ""reason"": ""..."" }}. -Type Definition: +Input: {typeDefJson}"; + } -//Current Tick: {tick} -//Current State: -//{{ -// ""Load"": {node.Load}, -// ""ErrorCount"": {node.ErrorCount}, -// ""Enabled"": {(node.Enabled ? "true" : "false")} -//}}"; + //private static LlmDecision? ParseDecision(string text) + //{ + // try + // { + // var json = ExtractJson(text); + + // return JsonSerializer.Deserialize( + // json, + // new JsonSerializerOptions + // { + // PropertyNameCaseInsensitive = true + // }); + // } + // catch + // { + // return null; + // } + //} + + private static (LlmDecision? First, LlmDecision? Final, bool Repaired, int Count) ParseDecisionWithRepair(string text) + { + var objects = ExtractJsonObjects(text); + + if (objects.Count == 0) + return (null, null, false, 0); + + var options = new JsonSerializerOptions + { + PropertyNameCaseInsensitive = true + }; + + LlmDecision? first = null; + LlmDecision? final = null; + + try { first = JsonSerializer.Deserialize(objects[0], options); } catch { } + try { final = JsonSerializer.Deserialize(objects[^1], options); } catch { } + + bool repaired = objects.Count > 1 && + NormalizeFunction(first?.Function) != NormalizeFunction(final?.Function); + + return (first, final, repaired, objects.Count); + } + private static List ExtractJsonObjects(string text) + { + var results = new List(); + + if (string.IsNullOrWhiteSpace(text)) + return results; + + text = text.Trim(); + + if (text.StartsWith("```", StringComparison.Ordinal)) + { + var firstNewline = text.IndexOf('\n'); + if (firstNewline >= 0) + text = text[(firstNewline + 1)..]; + + var lastFence = text.LastIndexOf("```", StringComparison.Ordinal); + if (lastFence >= 0) + text = text[..lastFence]; } - private static LlmDecision? ParseDecision(string text) - { - try - { - var json = ExtractJson(text); + int depth = 0; + int start = -1; - return JsonSerializer.Deserialize( - json, - new JsonSerializerOptions + for (int i = 0; i < text.Length; i++) + { + char c = text[i]; + + if (c == '{') + { + if (depth == 0) + start = i; + + depth++; + } + else if (c == '}') + { + if (depth > 0) + { + depth--; + + if (depth == 0 && start >= 0) { - PropertyNameCaseInsensitive = true - }); - } - catch - { - return null; + results.Add(text.Substring(start, i - start + 1)); + start = -1; + } + } } } - private static string ExtractJson(string text) + return results; + } + private static string ExtractJson(string text) + { + if (string.IsNullOrWhiteSpace(text)) + return "{}"; + + text = text.Trim(); + + if (text.StartsWith("```", StringComparison.Ordinal)) { - if (string.IsNullOrWhiteSpace(text)) - return "{}"; + var firstNewline = text.IndexOf('\n'); + if (firstNewline >= 0) + text = text[(firstNewline + 1)..]; - text = text.Trim(); - - if (text.StartsWith("```")) - { - var firstNewline = text.IndexOf('\n'); - if (firstNewline >= 0) - text = text[(firstNewline + 1)..]; - - var lastFence = text.LastIndexOf("```", StringComparison.Ordinal); - if (lastFence >= 0) - text = text[..lastFence]; - } - - return text.Trim(); + var lastFence = text.LastIndexOf("```", StringComparison.Ordinal); + if (lastFence >= 0) + text = text[..lastFence]; } - private static bool InvokeIfValid(ServiceNode node, string? functionName) + // Fallback: extract first JSON object if extra text exists. + int start = text.IndexOf('{'); + int end = text.LastIndexOf('}'); + if (start >= 0 && end > start) + text = text.Substring(start, end - start + 1); + + return text.Trim(); + } + + private static string? NormalizeFunction(string? functionName) + { + if (string.IsNullOrWhiteSpace(functionName) || + string.Equals(functionName, "null", StringComparison.OrdinalIgnoreCase)) + return null; + + return functionName.Trim(); + } + + private static bool InvokeIfValid(ServiceNode node, string? functionName) + { + if (functionName == null) + return false; + + switch (functionName) { - if (string.IsNullOrWhiteSpace(functionName) || - string.Equals(functionName, "null", StringComparison.OrdinalIgnoreCase)) + case "Restart": + node.Restart(); + return true; + + case "ResetErrors": + node.ResetErrors(); + return true; + + case "Enable": + node.Enable(); + return true; + + case "Disable": + node.Disable(); + return true; + + default: return false; - - switch (functionName) - { - case "Restart": - node.Restart(); - return true; - - case "ResetErrors": - node.ResetErrors(); - return true; - - case "Enable": - node.Enable(); - return true; - - case "Disable": - node.Disable(); - return true; - - default: - return false; - } } } + + private static List Summarize(List results) + { + return results + .GroupBy(r => r.Model) + .Select(g => + { + var latencies = g.Select(x => x.LatencyMs).OrderBy(x => x).ToList(); + + return new ModelSummary + { + Model = g.Key, + TotalTicks = g.Count(), + ParseRate = 100.0 * g.Count(x => x.Parsed) / g.Count(), + AllowedRate = 100.0 * g.Count(x => x.Allowed) / g.Count(), + CorrectRate = 100.0 * g.Count(x => x.Correct) / g.Count(), + MeanLatencyMs = g.Average(x => x.LatencyMs), + P95LatencyMs = Percentile(latencies, 0.95), + RepairRate = 100.0 * g.Count(x => x.Repaired) / g.Count(), + }; + }) + .OrderBy(x => x.Model) + .ToList(); + } + + private static double Percentile(List sortedValues, double p) + { + if (sortedValues.Count == 0) + return 0; + + if (sortedValues.Count == 1) + return sortedValues[0]; + + double index = (sortedValues.Count - 1) * p; + int lower = (int)Math.Floor(index); + int upper = (int)Math.Ceiling(index); + + if (lower == upper) + return sortedValues[lower]; + + double weight = index - lower; + return sortedValues[lower] * (1 - weight) + sortedValues[upper] * weight; + } } \ No newline at end of file diff --git a/Tests/Annotations/ModelConfig.cs b/Tests/Annotations/ModelConfig.cs new file mode 100644 index 0000000..dfab0e3 --- /dev/null +++ b/Tests/Annotations/ModelConfig.cs @@ -0,0 +1,15 @@ +using System; +using System.ClientModel; +using System.Collections.Generic; +using System.Text; + +namespace Esiur.Tests.Annotations +{ + public sealed class ModelConfig + { + public string Name { get; set; } = ""; + public string Endpoint { get; set; } = ""; + public ApiKeyCredential ApiKey { get; set; } = default!; + public string ModelName { get; set; } = ""; + } +} diff --git a/Tests/Annotations/ModelSummary.cs b/Tests/Annotations/ModelSummary.cs new file mode 100644 index 0000000..0e56629 --- /dev/null +++ b/Tests/Annotations/ModelSummary.cs @@ -0,0 +1,21 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Esiur.Tests.Annotations +{ + public sealed class ModelSummary + { + public string Model { get; set; } = ""; + public int TotalTicks { get; set; } + + public double ParseRate { get; set; } + public double AllowedRate { get; set; } + public double CorrectRate { get; set; } + + public double MeanLatencyMs { get; set; } + public double P95LatencyMs { get; set; } + + public double RepairRate { get; set; } + } +} diff --git a/Tests/Annotations/Program.cs b/Tests/Annotations/Program.cs index 5e48536..a8afe04 100644 --- a/Tests/Annotations/Program.cs +++ b/Tests/Annotations/Program.cs @@ -7,38 +7,75 @@ using OpenAI.Chat; using System.ClientModel; using System.Data; -var wh = new Warehouse(); - -await wh.Put("store", new MemoryStore()); -var node = await wh.Put("store/service", new ServiceNode()); var endpoint = "http://localhost:1234/v1"; var credential = new ApiKeyCredential("lm-studio"); -//var client = new OpenAIClient(credential, new OpenAIClientOptions() { Endpoint = new Uri(endpoint) }); +////var client = new OpenAIClient(credential, new OpenAIClientOptions() { Endpoint = new Uri(endpoint) }); -//var chat = client.GetChatClient("microsoft/phi-4"); +////var chat = client.GetChatClient("microsoft/phi-4"); -var llmRunner = new LlmRunner(); +//var llmRunner = new LlmRunner(); -await llmRunner.RunAsync( - node, - endpoint, - credential, - "microsoft/phi-4" -); - -//List messages = new List -//{ -// new SystemChatMessage("You are a helpful assistant that only speaks in rhymes."), -// new UserChatMessage("What is the capital of France?") -//}; - -//// Send the entire conversation history -//ChatCompletion completion = chat.CompleteChat(messages); - -//var response = await chat.CompleteChatAsync( -// "Explain what Pi means" +//await llmRunner.RunAsync( +// node, +// endpoint, +// credential, +// "microsoft/phi-4" //); -//Console.WriteLine(response.Value.Content[0].Text); \ No newline at end of file +var runner = new LlmRunner(); + +var models = new List +{ + new() + { + Name = "Phi-4", + Endpoint = "http://localhost:1234/v1", + ApiKey = new ApiKeyCredential("lm-studio"), + ModelName = "microsoft/phi-4" + }, + new() + { + Name = "Qwen2.5-7B", + Endpoint = "http://localhost:1234/v1", + ApiKey = new ApiKeyCredential("lm-studio"), + ModelName = "qwen2.5-7b-instruct" + }, + new() + { + Name = "gpt-oss", + Endpoint = "http://localhost:1234/v1", + ApiKey = new ApiKeyCredential("lm-studio"), + ModelName = "openai/gpt-oss-20b" + }, + new() + { + Name = "qwen2.5-1.5b-instruct", + Endpoint = "http://localhost:1234/v1", + ApiKey = new ApiKeyCredential("lm-studio"), + ModelName = "qwen2.5-1.5b-instruct" + }, + new() + { + Name = "ministral-3-3b", + Endpoint = "http://localhost:1234/v1", + ApiKey = new ApiKeyCredential("lm-studio"), + ModelName = "mistralai/ministral-3-3b" + }, + new() + { + Name = "deepseek-r1-0528-qwen3-8b", + Endpoint = "http://localhost:1234/v1", + ApiKey = new ApiKeyCredential("lm-studio"), + ModelName = "deepseek/deepseek-r1-0528-qwen3-8b" + } +}; + +var (results, summary) = await runner.RunAsync( models.Skip(5).Take(1).ToArray(), + 250); + +foreach (var item in summary) +{ + Console.WriteLine($"{item.Model}: Correct={item.CorrectRate:F1}% Repair={item.RepairRate:F1}% Mean={item.MeanLatencyMs:F1} ms P95={item.P95LatencyMs:F1} ms"); +} \ No newline at end of file diff --git a/Tests/Annotations/ServiceNode.cs b/Tests/Annotations/ServiceNode.cs index 780193c..b078e7d 100644 --- a/Tests/Annotations/ServiceNode.cs +++ b/Tests/Annotations/ServiceNode.cs @@ -9,14 +9,15 @@ namespace Esiur.Tests.Annotations [Annotation("usage_rules", @"1.Choose at most one function per tick. 2. Use only functions defined in the functions list. 3. Do not invent properties or functions. - 4. Base the decision only on current property values and annotations.")] + 4. Base the decision only on current property values and annotations. + 5. Keep the service enabled as much as possible")] [Resource] public partial class ServiceNode { [Annotation("Current service load percentage from 0 to 100. Values above 80 indicate overload.")] [Export] int load; - [Annotation("Number of recent errors detected in the service. Values above 3 indicate instability.")] + [Annotation("Number of recent errors detected in the service. Values above 3 indicate instability. A value of 0 means no reset is needed")] [Export] int errorCount; [Annotation("True when the service is enabled and allowed to run. False means the service is disabled.")] @@ -30,13 +31,13 @@ namespace Esiur.Tests.Annotations Enabled = true; } - [Annotation("Clear the error counter when errors were temporary and a restart is not required.")] + [Annotation("Clear recent errors only when ErrorCount is greater than 0 and the service is otherwise stable.")] [Export] public void ResetErrors() { ErrorCount = 0; } - [Annotation("Enable the service if it is disabled and should be running.")] + [Annotation("Enable the service when Enabled is false.")] [Export] public void Enable() { Enabled = true; diff --git a/Tests/Annotations/TickExpectation.cs b/Tests/Annotations/TickExpectation.cs new file mode 100644 index 0000000..881be31 --- /dev/null +++ b/Tests/Annotations/TickExpectation.cs @@ -0,0 +1,13 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Esiur.Tests.Annotations +{ + public sealed class TickExpectation + { + public int Tick { get; set; } + public HashSet AllowedFunctions { get; set; } = new(); + public string Note { get; set; } = ""; + } +} diff --git a/Tests/Annotations/TickResult.cs b/Tests/Annotations/TickResult.cs new file mode 100644 index 0000000..1640972 --- /dev/null +++ b/Tests/Annotations/TickResult.cs @@ -0,0 +1,42 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Esiur.Tests.Annotations +{ + public sealed class TickResult + { + public string Model { get; set; } = ""; + public int Tick { get; set; } + + public int LoadBefore { get; set; } + public int ErrorCountBefore { get; set; } + public bool EnabledBefore { get; set; } + + public string RawResponse { get; set; } = ""; + public string? PredictedFunction { get; set; } + public string? Reason { get; set; } + + public bool Parsed { get; set; } + public bool Allowed { get; set; } + public bool Invoked { get; set; } + public bool Correct { get; set; } + + public double LatencyMs { get; set; } + + public int LoadAfter { get; set; } + public int ErrorCountAfter { get; set; } + public bool EnabledAfter { get; set; } + + public string ExpectedText { get; set; } = ""; + + + public bool Repaired { get; set; } + public int JsonObjectCount { get; set; } + public string? FirstFunction { get; set; } + public string? FinalFunction { get; set; } + + public string? FirstPredictedFunction { get; set; } + } + +} diff --git a/Tests/Annotations/TickState.cs b/Tests/Annotations/TickState.cs index c916a21..711d305 100644 --- a/Tests/Annotations/TickState.cs +++ b/Tests/Annotations/TickState.cs @@ -10,5 +10,4 @@ namespace Esiur.Tests.Annotations public int ErrorCount { get; set; } public bool Enabled { get; set; } } - }