diff --git a/Tests/AI/Annotations/LlmRunner.cs b/Tests/AI/Annotations/LlmRunner.cs index be4f1a0..383032c 100644 --- a/Tests/AI/Annotations/LlmRunner.cs +++ b/Tests/AI/Annotations/LlmRunner.cs @@ -13,19 +13,7 @@ using System.Text.Json; namespace Esiur.Tests.Annotations; -//public sealed class TickState -//{ -// public int Load { get; set; } -// public int ErrorCount { get; set; } -// public bool Enabled { get; set; } -//} - -//public sealed class LlmDecision -//{ -// public string? Function { get; set; } -// public string? Reason { get; set; } -//} - + public sealed class LlmRunner @@ -215,25 +203,7 @@ Input: {typeDefJson}"; } - //private static LlmDecision? ParseDecision(string text) - //{ - // try - // { - // var json = ExtractJson(text); - - // return JsonSerializer.Deserialize( - // json, - // new JsonSerializerOptions - // { - // PropertyNameCaseInsensitive = true - // }); - // } - // catch - // { - // return null; - // } - //} - + private static (LlmDecision? First, LlmDecision? Final, bool Repaired, int Count) ParseDecisionWithRepair(string text) { var objects = ExtractJsonObjects(text); diff --git a/Tests/Serialization/Gvwie/GeneratorPattern.cs b/Tests/Serialization/Gvwie/GeneratorPattern.cs index bea7ad3..0b24c3a 100644 --- a/Tests/Serialization/Gvwie/GeneratorPattern.cs +++ b/Tests/Serialization/Gvwie/GeneratorPattern.cs @@ -12,7 +12,9 @@ namespace Esiur.Tests.Gvwie Alternating, Small, Medium, + Large, Ascending, Clustering, + MixedClustering, } } diff --git a/Tests/Serialization/Gvwie/IntArrayGenerator.cs b/Tests/Serialization/Gvwie/IntArrayGenerator.cs index 6dfdc93..2a35775 100644 --- a/Tests/Serialization/Gvwie/IntArrayGenerator.cs +++ b/Tests/Serialization/Gvwie/IntArrayGenerator.cs @@ -122,6 +122,79 @@ public static class IntArrayGenerator } + public static int[] GenerateMixedClusteredInt32(int length, + int minRunSize = 3, + int maxRunSize = 10) + { + if (length <= 0) + return Array.Empty(); + + if (minRunSize < 1) minRunSize = 1; + if (maxRunSize < minRunSize) maxRunSize = minRunSize; + + var data = new int[length]; + + var remaining = new[] + { + length / 3 + (length % 3 > 0 ? 1 : 0), // int8-compatible values + length / 3 + (length % 3 > 1 ? 1 : 0), // int16-compatible values + length / 3, // int32-compatible values + }; + + int index = 0; + int bucket = 0; + + while (index < length) + { + var selected = -1; + for (int offset = 0; offset < remaining.Length; offset++) + { + var candidate = (bucket + offset) % remaining.Length; + if (remaining[candidate] > 0) + { + selected = candidate; + break; + } + } + + if (selected < 0) + break; + + var runSize = Math.Min(remaining[selected], rng.Next(minRunSize, maxRunSize + 1)); + FillMixedClusterRun(data, index, runSize, selected); + + index += runSize; + remaining[selected] -= runSize; + bucket = (selected + 1) % remaining.Length; + } + + return data; + } + + private static void FillMixedClusterRun(int[] data, int startIndex, int runSize, int bucket) + { + var start = bucket switch + { + 0 => NextRunStart(64, sbyte.MaxValue, runSize), + 1 => NextRunStart(128, short.MaxValue, runSize), + 2 => NextRunStart(8_388_608, int.MaxValue, runSize), + _ => throw new ArgumentOutOfRangeException(nameof(bucket), bucket, "Unknown mixed cluster bucket.") + }; + + for (int i = 0; i < runSize; i++) + data[startIndex + i] = start + i; + } + + private static int NextRunStart(int minValue, int maxValue, int runSize) + { + var maxStart = (long)maxValue - runSize + 1; + if (maxStart < minValue) + throw new ArgumentOutOfRangeException(nameof(runSize), runSize, "Run size is larger than the available value range."); + + return (int)rng.NextInt64(minValue, maxStart + 1); + } + + // Generate random int array of given length and distribution public static int[] GenerateInt32(int length, GeneratorPattern pattern = GeneratorPattern.Uniform) { @@ -130,20 +203,30 @@ public static class IntArrayGenerator switch (pattern) { case GeneratorPattern.Uniform: + case GeneratorPattern.Large: // Random values in [-range, range] for (int i = 0; i < length; i++) data[i] = rng.Next(int.MinValue, int.MaxValue); break; + case GeneratorPattern.Medium: + for (int i = 0; i < length; i++) + data[i] = rng.Next(short.MinValue, short.MaxValue); + break; + + case GeneratorPattern.Small: + // Focused on small magnitudes to test ZigZag fast path + for (int i = 0; i < length; i++) + //data[i] = rng.Next(-64, 65); + data[i] = rng.Next(sbyte.MinValue, sbyte.MaxValue); + break; + + case GeneratorPattern.Positive: for (int i = 0; i < length; i++) data[i] = rng.Next(0, int.MaxValue); break; - case GeneratorPattern.Medium: - for (int i = 0; i < length; i++) - data[i] = rng.Next(0, short.MaxValue); - break; //case GeneratorPattern.Large: // for (int i = 0; i < length; i++) @@ -164,13 +247,6 @@ public static class IntArrayGenerator } break; - case GeneratorPattern.Small: - // Focused on small magnitudes to test ZigZag fast path - for (int i = 0; i < length; i++) - //data[i] = rng.Next(-64, 65); - data[i] = rng.Next(sbyte.MinValue, sbyte.MaxValue); - break; - case GeneratorPattern.Ascending: { @@ -183,7 +259,7 @@ public static class IntArrayGenerator case GeneratorPattern.Clustering: { // Build ascending runs and cast to int, clamping to int bounds - var runs = GenerateRuns(length, 3, 50, ((long)int.MinValue), (long)int.MaxValue, true); + var runs = GenerateRuns(length, 3, 10, ((long)int.MinValue), (long)int.MaxValue, true, 10, 15); for (int i = 0; i < length; i++) { long v = runs[i]; @@ -194,6 +270,9 @@ public static class IntArrayGenerator } break; + case GeneratorPattern.MixedClustering: + return GenerateMixedClusteredInt32(length); + default: throw new ArgumentException($"Unknown pattern: {pattern}"); @@ -472,4 +551,4 @@ public static class IntArrayGenerator return data; } -} \ No newline at end of file +} diff --git a/Tests/Serialization/Gvwie/IntArrayRunner.cs b/Tests/Serialization/Gvwie/IntArrayRunner.cs index 4c02d27..494811d 100644 --- a/Tests/Serialization/Gvwie/IntArrayRunner.cs +++ b/Tests/Serialization/Gvwie/IntArrayRunner.cs @@ -29,93 +29,111 @@ namespace Esiur.Tests.Gvwie public void Run() { - const int TEST_ITERATIONS = 10; + const int TEST_ITERATIONS = 100; const int SAMPLE_SIZE = 100; - Console.WriteLine(",Esiur,Aligned,FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal"); + + var st = new StringBuilder(); + + st.AppendLine(",Esiur,Esiur(Aligned),FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal"); + Console.WriteLine(",Esiur,Esiur(Aligned),FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal"); - Console.Write("Cluster (Int32);"); + st.Append("Cluster (Int32),"); + Console.Write("Cluster (Int32),"); - PrintAverage( + st.AppendLine(PrintAverage( Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Clustering)), TEST_ITERATIONS) - ); + )); - Console.Write("Large (Int32);"); + st.Append("Mixed Cluster (Int32),"); + Console.Write("Mixed Cluster (Int32),"); - PrintAverage( + st.AppendLine(PrintAverage( + Average(() => CompareMixedClusteredInt32(SAMPLE_SIZE), TEST_ITERATIONS) + )); + + st.Append("Large (Int32),"); + Console.Write("Large (Int32),"); + + st.AppendLine(PrintAverage( Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Uniform)), TEST_ITERATIONS) - ); - - Console.Write("Medium (Int32);"); - PrintAverage( + )); + + st.Append("Medium (Int32),"); + Console.Write("Medium (Int32),"); + st.AppendLine(PrintAverage( Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Medium)), TEST_ITERATIONS) - ); + )); - Console.Write("Small (Int32);"); - PrintAverage( + st.Append("Small (Int32),"); + Console.Write("Small (Int32),"); + st.AppendLine( PrintAverage( Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Small)), TEST_ITERATIONS) - ); + )); - - Console.Write("Negative (Int32);"); - PrintAverage( + st.Append("Negative (Int32),"); + Console.Write("Negative (Int32),"); + st.AppendLine(PrintAverage( Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Negative)), TEST_ITERATIONS) - ); + )); - - - - Console.Write("Alternating (Int32);"); - PrintAverage( + st.Append("Alternating (Int32),"); + Console.Write("Alternating (Int32),"); + st.AppendLine(PrintAverage( Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Alternating)), TEST_ITERATIONS) - ); + )); - Console.Write("Ascending (Int32);"); + st.Append("Ascending (Int32),"); + Console.Write("Ascending (Int32),"); - PrintAverage( + st.AppendLine(PrintAverage( Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Ascending)), TEST_ITERATIONS) - ); + )); + st.Append("Int64,"); + Console.Write("Int64,"); - - - - Console.Write("Int64;"); - - PrintAverage( + st.AppendLine(PrintAverage( Average(() => CompareInt(IntArrayGenerator.GenerateInt64(SAMPLE_SIZE)), TEST_ITERATIONS) - ); + )); - Console.Write("Int32;"); + st.Append("Int32,"); + Console.Write("Int32,"); - PrintAverage( + st.AppendLine(PrintAverage( Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE)), TEST_ITERATIONS) - ); + )); - Console.Write("Int16;"); + st.Append("Int16,"); + Console.Write("Int16,"); - PrintAverage( + st.AppendLine(PrintAverage( Average(() => CompareInt(IntArrayGenerator.GenerateInt16(SAMPLE_SIZE)), TEST_ITERATIONS) - ); + )); - Console.Write("UInt64;"); + st.Append("UInt64,"); + Console.Write("UInt64,"); - PrintAverage( + st.AppendLine(PrintAverage( Average(() => CompareInt(IntArrayGenerator.GenerateUInt64(SAMPLE_SIZE)), TEST_ITERATIONS) - ); + )); - Console.Write("UInt32;"); + st.Append("UInt32,"); + Console.Write("UInt32,"); - PrintAverage( + st.AppendLine(PrintAverage( Average(() => CompareInt(IntArrayGenerator.GenerateUInt32(SAMPLE_SIZE)), TEST_ITERATIONS) - ); + )); - Console.Write("UInt16;"); + st.Append("UInt16,"); + Console.Write("UInt16,"); - PrintAverage( + st.AppendLine(PrintAverage( Average(() => CompareInt(IntArrayGenerator.GenerateUInt16(SAMPLE_SIZE)), TEST_ITERATIONS) - ); + )); + + File.WriteAllLines("int_array_comparison.csv", st.ToString().Split(Environment.NewLine)); } // Generate CSV suitable for Office Word chart where the sample size varies. @@ -132,6 +150,7 @@ namespace Esiur.Tests.Gvwie var generators = new List<(string name, Func fn)>() { ("Int32_Clustering", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Clustering)), iterations)), + ("Int32_MixedClustering", (size, iterations) => Average(() => CompareMixedClusteredInt32(size), iterations)), ("Int32_Positive", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Positive)), iterations)), ("Int32_Negative", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Negative)), iterations)), ("Int32_Small", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Small)), iterations)), @@ -192,6 +211,19 @@ namespace Esiur.Tests.Gvwie } } + private static (int, int, int, int, int, int, int, int, int) CompareMixedClusteredInt32(int sampleSize) + { + var sample = IntArrayGenerator.GenerateInt32(sampleSize, GeneratorPattern.MixedClustering); + var result = CompareInt(sample); + var requiredCapacity = sampleSize * sizeof(int); + + if (result.Item1 >= requiredCapacity || result.Item2 >= requiredCapacity) + throw new InvalidOperationException( + $"Mixed clustered Int32 encoding produced {result.Item1} bytes ({result.Item2} aligned) for {sampleSize} elements; expected less than {requiredCapacity} bytes."); + + return result; + } + public static (int, int, int, int, int, int, int, int, int) CompareInt(long[] sample) { var intRoot = new ArrayRoot() { Values = sample }; @@ -432,7 +464,6 @@ namespace Esiur.Tests.Gvwie sum.Average(x => x.Item9) }; - Console.WriteLine($"{rt[0]},{rt[1]},{rt[2]},{rt[3]},{rt[4]},{rt[5]},{rt[6]},{rt[7]},{rt[8]}"); return rt; @@ -440,6 +471,11 @@ namespace Esiur.Tests.Gvwie static string PrintAverage(double[] values) { + + var rt = $"{values[0]},{values[1]},{values[2]},{values[3]},{values[4]},{values[5]},{values[6]},{values[7]},{values[8]}"; + + Console.WriteLine(rt); + // Determine winner (lowest average size) var names = new string[] { "Esiur", "Aligned", "FlatBuffer", "ProtoBuffer", "MessagePack", "BSON", "CBOR", "Avro", "Optimal" }; var min = values.SkipLast(1).Min(); @@ -458,7 +494,7 @@ namespace Esiur.Tests.Gvwie Console.ForegroundColor = ConsoleColor.White; - return "Unknown"; + return rt; } public static byte[] SerializeFlatBuffers(ArrayRoot array)