Tests

2026-06-13 22:48:42 +00:00 · 2026-04-03 15:23:15 +03:00
parent d5c434b6af
commit 6cda0bd982
35 changed files with 2197 additions and 28 deletions
@@ -0,0 +1,25 @@
+using Esiur.Core;
+using Esiur.Data;
+using Esiur.Misc;
+using Esiur.Protocol;
+using Esiur.Resource;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Globalization;
+using System.IO;
+using System.Linq;
+using System.Reflection;
+using System.Text;
+using System.Text.Json;
+using System.Text.Json.Nodes;
+using System.Threading.Tasks;
+using static System.Net.Mime.MediaTypeNames;
+
+namespace Esiur.Tests.Annotations;
+
+
+[Resource]
+public partial class Agent;
+
@@ -0,0 +1,18 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net10.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="OpenAI" Version="2.9.1" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\Esiur\Esiur.csproj" OutputItemType="Analyzer"/>
+  </ItemGroup>
+
+</Project>
@@ -0,0 +1,12 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Esiur.Tests.Annotations
+{
+    public sealed class LlmDecision
+    {
+        public string? Function { get; set; }
+        public string? Reason { get; set; }
+    }
+}
@@ -0,0 +1,417 @@
+using Esiur.Resource;
+using Esiur.Schema.Llm;
+using Esiur.Stores;
+using OpenAI;
+using OpenAI.Chat;
+using System;
+using System.ClientModel;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Net.NetworkInformation;
+using System.Text;
+using System.Text.Json;
+
+namespace Esiur.Tests.Annotations;
+
+//public sealed class TickState
+//{
+//    public int Load { get; set; }
+//    public int ErrorCount { get; set; }
+//    public bool Enabled { get; set; }
+//}
+
+//public sealed class LlmDecision
+//{
+//    public string? Function { get; set; }
+//    public string? Reason { get; set; }
+//}
+
+
+
+public sealed class LlmRunner
+{
+    private static readonly HashSet<string?> ValidFunctions = new(StringComparer.OrdinalIgnoreCase)
+    {
+        null, "Restart", "ResetErrors", "Enable", "Disable"
+    };
+
+    public async Task<(List<TickResult> Results, List<ModelSummary> Summary)> RunAsync(
+        
+        IReadOnlyList<ModelConfig> models,
+        int tickDelayMs = 1000)
+    {
+
+        var wh = new Warehouse();
+
+        await wh.Put("store", new MemoryStore());
+
+        var allResults = new List<TickResult>();
+
+        var ticks = new List<TickState>
+        {
+            new() { Load = 35, ErrorCount = 0, Enabled = true  },
+            new() { Load = 88, ErrorCount = 1, Enabled = true  },
+            new() { Load = 42, ErrorCount = 4, Enabled = true  },
+            new() { Load = 18, ErrorCount = 0, Enabled = false },
+            new() { Load = 91, ErrorCount = 5, Enabled = true  },
+            new() { Load = 25, ErrorCount = 0, Enabled = true  }
+        };
+
+        var expectations = new List<TickExpectation>
+        {
+            new() { Tick = 1, AllowedFunctions = new HashSet<string?> { null }, Note = "Stable service; no action expected." },
+            new() { Tick = 2, AllowedFunctions = new HashSet<string?> { "Restart" }, Note = "Overload; restart expected." },
+            new() { Tick = 3, AllowedFunctions = new HashSet<string?> { "Restart", "ResetErrors" }, Note = "High error count; restart or reset is acceptable." },
+            new() { Tick = 4, AllowedFunctions = new HashSet<string?> { "Enable" }, Note = "Service disabled; enable expected." },
+            new() { Tick = 5, AllowedFunctions = new HashSet<string?> { "Restart" }, Note = "Overload and instability; restart expected." },
+            new() { Tick = 6, AllowedFunctions = new HashSet<string?> { null }, Note = "Stable service; no action expected." }
+        };
+
+        foreach (var model in models)
+        {
+            Console.WriteLine($"=== Model: {model.Name} ({model.ModelName}) ===");
+
+            var client = new OpenAIClient(
+                model.ApiKey,
+                new OpenAIClientOptions { Endpoint = new Uri(model.Endpoint),  });
+
+            var chat = client.GetChatClient(model.ModelName);
+
+            Console.WriteLine($"Warming up {model.Name}...");
+
+            await InferAsync(chat,
+                "Return {\"function\":null,\"reason\":\"warmup\"}");
+
+            Console.WriteLine("Warmup done");
+
+            // Fresh node instance per model so results are independent.
+            var node = await wh.Put("store/service-" + model.Name, new ServiceNode());
+
+            var typeModel = LlmTypeModel.FromTypeDef(node.Instance?.Definition);
+
+            for (int i = 0; i < ticks.Count; i++)
+            {
+                var tick = ticks[i];
+                var expected = expectations[i];
+
+                // Apply tick state before inference
+                node.Load = tick.Load;
+                node.ErrorCount = tick.ErrorCount;
+                node.Enabled = tick.Enabled;
+
+                var loadBefore = node.Load;
+                var errorBefore = node.ErrorCount;
+                var enabledBefore = node.Enabled;
+
+                var jsonModel = typeModel.ToJson(node);
+                var prompt = BuildPrompt(jsonModel, i + 1);
+
+                var sw = Stopwatch.StartNew();
+                string raw = await InferAsync(chat, prompt);
+                sw.Stop();
+
+                var parsedResult = ParseDecisionWithRepair(raw);
+
+                var firstDecision = parsedResult.First;
+                var finalDecision = parsedResult.Final;
+
+                var parsed = finalDecision != null;
+                var repaired = parsedResult.Repaired;
+                var jsonObjectCount = parsedResult.Count;
+
+                var firstPredicted = NormalizeFunction(firstDecision?.Function);
+                var predicted = NormalizeFunction(finalDecision?.Function);
+
+                var allowed = ValidFunctions.Contains(predicted);
+                var correct = expected.AllowedFunctions.Contains(predicted);
+
+                var invoked = false;
+                if (allowed)
+                    invoked = InvokeIfValid(node, predicted);
+
+                var result = new TickResult
+                {
+                    Model = model.Name,
+                    Tick = i + 1,
+
+                    LoadBefore = loadBefore,
+                    ErrorCountBefore = errorBefore,
+                    EnabledBefore = enabledBefore,
+
+                    RawResponse = raw,
+                    FirstPredictedFunction = firstPredicted,
+                    PredictedFunction = predicted,
+                    Reason = finalDecision?.Reason,
+
+                    Parsed = parsed,
+                    Allowed = allowed,
+                    Correct = correct,
+                    Repaired = repaired,
+                    JsonObjectCount = jsonObjectCount,
+                    Invoked = invoked,
+                    LatencyMs = sw.Elapsed.TotalMilliseconds,
+
+                    LoadAfter = node.Load,
+                    ErrorCountAfter = node.ErrorCount,
+                    EnabledAfter = node.Enabled,
+
+                    ExpectedText = string.Join(" | ", expected.AllowedFunctions.Select(x => x ?? "null"))
+                };
+
+                allResults.Add(result);
+
+                Console.WriteLine($"Tick {result.Tick}");
+                Console.WriteLine($"Before: Load={result.LoadBefore}, ErrorCount={result.ErrorCountBefore}, Enabled={result.EnabledBefore}");
+                Console.WriteLine($"Expected: {result.ExpectedText}");
+                Console.WriteLine($"LLM: {result.RawResponse}");
+                Console.WriteLine($"First: {result.FirstPredictedFunction ?? "null"}");
+                Console.WriteLine($"Final: {result.PredictedFunction ?? "null"}");
+                Console.WriteLine($"Parsed={result.Parsed}, Allowed={result.Allowed}, Correct={result.Correct}, Repaired={result.Repaired}, Invoked={result.Invoked}, Latency={result.LatencyMs:F1} ms");
+                Console.WriteLine($"After: Load={result.LoadAfter}, ErrorCount={result.ErrorCountAfter}, Enabled={result.EnabledAfter}");
+                Console.WriteLine(new string('-', 72));
+                await Task.Delay(tickDelayMs);
+            }
+        }
+
+        var summary = Summarize(allResults);
+        return (allResults, summary);
+    }
+
+    private static async Task<string> InferAsync(ChatClient chat, string prompt)
+    {
+        List<ChatMessage> messages = new()
+        {
+            new SystemChatMessage(
+                "You control a distributed resource. " +
+                "Return raw JSON only with fields: function and reason. " +
+                "Do not wrap the response in markdown or code fences."),
+            new UserChatMessage(prompt)
+        };
+
+        var options = new ChatCompletionOptions
+        {
+            MaxOutputTokenCount = 800, // Sets the maximum number of tokens to generate in the response
+            Temperature = 0.8f,
+            // Other options like NucleusSamplingFactor (TopP), FrequencyPenalty, etc. can also be set here
+        };
+
+        var result = await chat.CompleteChatAsync(messages, options);
+        return result.Value.Content[0].Text;
+    }
+
+    private static string BuildPrompt(string typeDefJson, int tick)
+    {
+        return
+$@"You are given a runtime type definition for a distributed resource and its current state.
+Choose at most one function to call.
+Use only the functions defined in the type definition.
+Do not invent functions.
+Return ONLY valid JSON in this format:
+{{ ""function"": ""<<name>>"", ""reason"": ""short explanation"" }}
+If the current state is normal and no action is needed, return:
+{{ ""function"": null, ""reason"": ""..."" }}.
+
+Input:
+{typeDefJson}";
+    }
+
+    //private static LlmDecision? ParseDecision(string text)
+    //{
+    //    try
+    //    {
+    //        var json = ExtractJson(text);
+
+    //        return JsonSerializer.Deserialize<LlmDecision>(
+    //            json,
+    //            new JsonSerializerOptions
+    //            {
+    //                PropertyNameCaseInsensitive = true
+    //            });
+    //    }
+    //    catch
+    //    {
+    //        return null;
+    //    }
+    //}
+
+    private static (LlmDecision? First, LlmDecision? Final, bool Repaired, int Count) ParseDecisionWithRepair(string text)
+    {
+        var objects = ExtractJsonObjects(text);
+
+        if (objects.Count == 0)
+            return (null, null, false, 0);
+
+        var options = new JsonSerializerOptions
+        {
+            PropertyNameCaseInsensitive = true
+        };
+
+        LlmDecision? first = null;
+        LlmDecision? final = null;
+
+        try { first = JsonSerializer.Deserialize<LlmDecision>(objects[0], options); } catch { }
+        try { final = JsonSerializer.Deserialize<LlmDecision>(objects[^1], options); } catch { }
+
+        bool repaired = objects.Count > 1 &&
+                        NormalizeFunction(first?.Function) != NormalizeFunction(final?.Function);
+
+        return (first, final, repaired, objects.Count);
+    }
+    private static List<string> ExtractJsonObjects(string text)
+    {
+        var results = new List<string>();
+
+        if (string.IsNullOrWhiteSpace(text))
+            return results;
+
+        text = text.Trim();
+
+        if (text.StartsWith("```", StringComparison.Ordinal))
+        {
+            var firstNewline = text.IndexOf('\n');
+            if (firstNewline >= 0)
+                text = text[(firstNewline + 1)..];
+
+            var lastFence = text.LastIndexOf("```", StringComparison.Ordinal);
+            if (lastFence >= 0)
+                text = text[..lastFence];
+        }
+
+        int depth = 0;
+        int start = -1;
+
+        for (int i = 0; i < text.Length; i++)
+        {
+            char c = text[i];
+
+            if (c == '{')
+            {
+                if (depth == 0)
+                    start = i;
+
+                depth++;
+            }
+            else if (c == '}')
+            {
+                if (depth > 0)
+                {
+                    depth--;
+
+                    if (depth == 0 && start >= 0)
+                    {
+                        results.Add(text.Substring(start, i - start + 1));
+                        start = -1;
+                    }
+                }
+            }
+        }
+
+        return results;
+    }
+    private static string ExtractJson(string text)
+    {
+        if (string.IsNullOrWhiteSpace(text))
+            return "{}";
+
+        text = text.Trim();
+
+        if (text.StartsWith("```", StringComparison.Ordinal))
+        {
+            var firstNewline = text.IndexOf('\n');
+            if (firstNewline >= 0)
+                text = text[(firstNewline + 1)..];
+
+            var lastFence = text.LastIndexOf("```", StringComparison.Ordinal);
+            if (lastFence >= 0)
+                text = text[..lastFence];
+        }
+
+        // Fallback: extract first JSON object if extra text exists.
+        int start = text.IndexOf('{');
+        int end = text.LastIndexOf('}');
+        if (start >= 0 && end > start)
+            text = text.Substring(start, end - start + 1);
+
+        return text.Trim();
+    }
+
+    private static string? NormalizeFunction(string? functionName)
+    {
+        if (string.IsNullOrWhiteSpace(functionName) ||
+            string.Equals(functionName, "null", StringComparison.OrdinalIgnoreCase))
+            return null;
+
+        return functionName.Trim();
+    }
+
+    private static bool InvokeIfValid(ServiceNode node, string? functionName)
+    {
+        if (functionName == null)
+            return false;
+
+        switch (functionName)
+        {
+            case "Restart":
+                node.Restart();
+                return true;
+
+            case "ResetErrors":
+                node.ResetErrors();
+                return true;
+
+            case "Enable":
+                node.Enable();
+                return true;
+
+            case "Disable":
+                node.Disable();
+                return true;
+
+            default:
+                return false;
+        }
+    }
+
+    private static List<ModelSummary> Summarize(List<TickResult> results)
+    {
+        return results
+            .GroupBy(r => r.Model)
+            .Select(g =>
+            {
+                var latencies = g.Select(x => x.LatencyMs).OrderBy(x => x).ToList();
+
+                return new ModelSummary
+                {
+                    Model = g.Key,
+                    TotalTicks = g.Count(),
+                    ParseRate = 100.0 * g.Count(x => x.Parsed) / g.Count(),
+                    AllowedRate = 100.0 * g.Count(x => x.Allowed) / g.Count(),
+                    CorrectRate = 100.0 * g.Count(x => x.Correct) / g.Count(),
+                    MeanLatencyMs = g.Average(x => x.LatencyMs),
+                    P95LatencyMs = Percentile(latencies, 0.95),
+                    RepairRate = 100.0 * g.Count(x => x.Repaired) / g.Count(),
+                };
+            })
+            .OrderBy(x => x.Model)
+            .ToList();
+    }
+
+    private static double Percentile(List<double> sortedValues, double p)
+    {
+        if (sortedValues.Count == 0)
+            return 0;
+
+        if (sortedValues.Count == 1)
+            return sortedValues[0];
+
+        double index = (sortedValues.Count - 1) * p;
+        int lower = (int)Math.Floor(index);
+        int upper = (int)Math.Ceiling(index);
+
+        if (lower == upper)
+            return sortedValues[lower];
+
+        double weight = index - lower;
+        return sortedValues[lower] * (1 - weight) + sortedValues[upper] * weight;
+    }
+}
@@ -0,0 +1,15 @@
+using System;
+using System.ClientModel;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Esiur.Tests.Annotations
+{
+    public sealed class ModelConfig
+    {
+        public string Name { get; set; } = "";
+        public string Endpoint { get; set; } = "";
+        public ApiKeyCredential ApiKey { get; set; } = default!;
+        public string ModelName { get; set; } = "";
+    }
+}
@@ -0,0 +1,21 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Esiur.Tests.Annotations
+{
+    public sealed class ModelSummary
+    {
+        public string Model { get; set; } = "";
+        public int TotalTicks { get; set; }
+
+        public double ParseRate { get; set; }
+        public double AllowedRate { get; set; }
+        public double CorrectRate { get; set; }
+
+        public double MeanLatencyMs { get; set; }
+        public double P95LatencyMs { get; set; }
+
+        public double RepairRate { get; set; }
+    }
+}
@@ -0,0 +1,120 @@
+// The endpoint for LM Studio's local server
+using Esiur.Resource;
+using Esiur.Stores;
+using Esiur.Tests.Annotations;
+using OpenAI;
+using OpenAI.Chat;
+using System.ClientModel;
+using System.Data;
+
+
+var endpoint = "http://localhost:1234/v1";
+//var endpoint = "http://127.0.0.1:22334/v1";
+var credential = new ApiKeyCredential("llm");
+
+var runner = new LlmRunner();
+
+var models = new List<ModelConfig>
+{
+    new()
+    {
+        Name = "phi-3-mini-4k-instruct-qnn-npu:2",
+        Endpoint = endpoint,
+        ApiKey = credential,
+        ModelName = "phi-3-mini-4k-instruct-qnn-npu:2"
+    },
+    new()
+    {
+        Name = "phi-3.5-mini-instruct-qnn-npu:1",
+        Endpoint = endpoint,
+        ApiKey = credential,
+        ModelName = "phi-3.5-mini-instruct-qnn-npu:1"
+    },
+    new()
+    {
+        Name = "qwen2.5-7b-instruct-qnn-npu:2",
+        Endpoint = endpoint,
+        ApiKey = credential,
+        ModelName = "qwen2.5-7b-instruct-qnn-npu:2"
+    },
+    new()
+    {
+        Name = "deepseek-r1-distill-qwen-7b-qnn-npu:1",
+        Endpoint = endpoint,
+        ApiKey = credential,
+        ModelName = "deepseek-r1-distill-qwen-7b-qnn-npu:1"
+    },
+    new()
+    {
+        Name = "qwen3-4b-2507",
+        Endpoint = endpoint,
+        ApiKey = credential,
+        ModelName = "qwen/qwen3-4b-2507"
+    },
+    new()
+    {
+        Name = "gemma-3n-e4b",
+        Endpoint = endpoint,
+        ApiKey = credential,
+        ModelName = "google/gemma-3n-e4b"
+    },
+    new()
+    {
+        Name = "qwen2.5-7b-instruct-1m",
+        Endpoint = endpoint,
+        ApiKey = credential,
+        ModelName = "qwen2.5-7b-instruct-1m"
+    },
+
+
+    new()
+    {
+        Name = "Phi-4",
+        Endpoint = endpoint,
+        ApiKey = credential,
+        ModelName = "microsoft/phi-4"
+    },
+    new()
+    {
+        Name = "Qwen2.5-7B",
+        Endpoint = endpoint,
+        ApiKey = credential,
+        ModelName = "qwen2.5-7b-instruct"
+    },
+    new()
+    {
+        Name = "gpt-oss",
+        Endpoint = endpoint,
+        ApiKey = credential,
+        ModelName = "openai/gpt-oss-20b"
+    },
+    new()
+    {
+        Name = "qwen2.5-1.5b-instruct",
+        Endpoint = endpoint,
+        ApiKey = credential,
+        ModelName = "qwen2.5-1.5b-instruct"
+    },
+    new()
+    {
+        Name = "ministral-3-3b",
+        Endpoint = endpoint,
+        ApiKey = credential,
+        ModelName = "mistralai/ministral-3-3b"
+    },
+    new()
+    {
+        Name = "deepseek-r1-0528-qwen3-8b",
+        Endpoint = endpoint,
+        ApiKey = credential,
+        ModelName = "deepseek/deepseek-r1-0528-qwen3-8b"
+    }
+};
+
+var (results, summary) = await runner.RunAsync(models, 
+    250);
+
+foreach (var item in summary)
+{
+    Console.WriteLine($"{item.Model}: Correct={item.CorrectRate:F1}% Repair={item.RepairRate:F1}% Mean={item.MeanLatencyMs:F1} ms P95={item.P95LatencyMs:F1} ms");
+}
@@ -0,0 +1,52 @@
+using Esiur.Resource;
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Esiur.Tests.Annotations
+{
+    [Annotation("Represents a managed service node with load, error count, and enable state. Functions control service operation.")]
+    [Annotation("usage_rules", @"1.Choose at most one function per tick.
+    2. Use only functions defined in the functions list.
+    3. Do not invent properties or functions.
+    4. Base the decision only on current property values and annotations.
+    5. Keep the service enabled as much as possible")]
+    [Resource]
+    public partial class ServiceNode
+    {
+        [Annotation("Current service load percentage from 0 to 100. Values above 80 indicate overload.")]
+        [Export] int load;
+
+        [Annotation("Number of recent errors detected in the service. Values above 3 indicate instability. A value of 0 means no reset is needed")]
+        [Export] int errorCount;
+
+        [Annotation("True when the service is enabled and allowed to run. False means the service is disabled.")]
+        [Export] bool enabled;
+
+        [Annotation("Restart the service when load is very high or when repeated errors indicate instability.")]
+        [Export] public void Restart()
+        {
+            ErrorCount = 0;
+            Load = 10;
+            Enabled = true;
+        }
+
+        [Annotation("Clear recent errors only when ErrorCount is greater than 0 and the service is otherwise stable.")]
+        [Export] public void ResetErrors()
+        {
+            ErrorCount = 0;
+        }
+
+        [Annotation("Enable the service when Enabled is false.")]
+        [Export] public void Enable()
+        {
+            Enabled = true;
+        }
+
+        [Annotation("Disable the service if it should stop processing requests.")]
+        [Export] public void Disable()
+        {
+            Enabled = false;
+        }
+    }
+}
@@ -0,0 +1,13 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Esiur.Tests.Annotations
+{
+    public sealed class TickExpectation
+    {
+        public int Tick { get; set; }
+        public HashSet<string?> AllowedFunctions { get; set; } = new();
+        public string Note { get; set; } = "";
+    }
+}
@@ -0,0 +1,42 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Esiur.Tests.Annotations
+{
+    public sealed class TickResult
+    {
+        public string Model { get; set; } = "";
+        public int Tick { get; set; }
+
+        public int LoadBefore { get; set; }
+        public int ErrorCountBefore { get; set; }
+        public bool EnabledBefore { get; set; }
+
+        public string RawResponse { get; set; } = "";
+        public string? PredictedFunction { get; set; }
+        public string? Reason { get; set; }
+
+        public bool Parsed { get; set; }
+        public bool Allowed { get; set; }
+        public bool Invoked { get; set; }
+        public bool Correct { get; set; }
+
+        public double LatencyMs { get; set; }
+
+        public int LoadAfter { get; set; }
+        public int ErrorCountAfter { get; set; }
+        public bool EnabledAfter { get; set; }
+
+        public string ExpectedText { get; set; } = "";
+
+
+        public bool Repaired { get; set; }
+        public int JsonObjectCount { get; set; }
+        public string? FirstFunction { get; set; }
+        public string? FinalFunction { get; set; }
+
+         public string? FirstPredictedFunction { get; set; }
+    }
+
+}
@@ -0,0 +1,13 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Esiur.Tests.Annotations
+{
+    public sealed class TickState
+    {
+        public int Load { get; set; }
+        public int ErrorCount { get; set; }
+        public bool Enabled { get; set; }
+    }
+}