2
0
mirror of https://github.com/esiur/esiur-dotnet.git synced 2026-06-13 14:38:43 +00:00

Mixed clusters

This commit is contained in:
2026-06-07 23:19:38 +03:00
parent 0bac2f8a74
commit 8143da2eee
4 changed files with 184 additions and 97 deletions
+2 -32
View File
@@ -13,19 +13,7 @@ using System.Text.Json;
namespace Esiur.Tests.Annotations;
//public sealed class TickState
//{
// public int Load { get; set; }
// public int ErrorCount { get; set; }
// public bool Enabled { get; set; }
//}
//public sealed class LlmDecision
//{
// public string? Function { get; set; }
// public string? Reason { get; set; }
//}
public sealed class LlmRunner
@@ -215,25 +203,7 @@ Input:
{typeDefJson}";
}
//private static LlmDecision? ParseDecision(string text)
//{
// try
// {
// var json = ExtractJson(text);
// return JsonSerializer.Deserialize<LlmDecision>(
// json,
// new JsonSerializerOptions
// {
// PropertyNameCaseInsensitive = true
// });
// }
// catch
// {
// return null;
// }
//}
private static (LlmDecision? First, LlmDecision? Final, bool Repaired, int Count) ParseDecisionWithRepair(string text)
{
var objects = ExtractJsonObjects(text);
@@ -12,7 +12,9 @@ namespace Esiur.Tests.Gvwie
Alternating,
Small,
Medium,
Large,
Ascending,
Clustering,
MixedClustering,
}
}
+92 -13
View File
@@ -122,6 +122,79 @@ public static class IntArrayGenerator
}
public static int[] GenerateMixedClusteredInt32(int length,
int minRunSize = 3,
int maxRunSize = 10)
{
if (length <= 0)
return Array.Empty<int>();
if (minRunSize < 1) minRunSize = 1;
if (maxRunSize < minRunSize) maxRunSize = minRunSize;
var data = new int[length];
var remaining = new[]
{
length / 3 + (length % 3 > 0 ? 1 : 0), // int8-compatible values
length / 3 + (length % 3 > 1 ? 1 : 0), // int16-compatible values
length / 3, // int32-compatible values
};
int index = 0;
int bucket = 0;
while (index < length)
{
var selected = -1;
for (int offset = 0; offset < remaining.Length; offset++)
{
var candidate = (bucket + offset) % remaining.Length;
if (remaining[candidate] > 0)
{
selected = candidate;
break;
}
}
if (selected < 0)
break;
var runSize = Math.Min(remaining[selected], rng.Next(minRunSize, maxRunSize + 1));
FillMixedClusterRun(data, index, runSize, selected);
index += runSize;
remaining[selected] -= runSize;
bucket = (selected + 1) % remaining.Length;
}
return data;
}
private static void FillMixedClusterRun(int[] data, int startIndex, int runSize, int bucket)
{
var start = bucket switch
{
0 => NextRunStart(64, sbyte.MaxValue, runSize),
1 => NextRunStart(128, short.MaxValue, runSize),
2 => NextRunStart(8_388_608, int.MaxValue, runSize),
_ => throw new ArgumentOutOfRangeException(nameof(bucket), bucket, "Unknown mixed cluster bucket.")
};
for (int i = 0; i < runSize; i++)
data[startIndex + i] = start + i;
}
private static int NextRunStart(int minValue, int maxValue, int runSize)
{
var maxStart = (long)maxValue - runSize + 1;
if (maxStart < minValue)
throw new ArgumentOutOfRangeException(nameof(runSize), runSize, "Run size is larger than the available value range.");
return (int)rng.NextInt64(minValue, maxStart + 1);
}
// Generate random int array of given length and distribution
public static int[] GenerateInt32(int length, GeneratorPattern pattern = GeneratorPattern.Uniform)
{
@@ -130,20 +203,30 @@ public static class IntArrayGenerator
switch (pattern)
{
case GeneratorPattern.Uniform:
case GeneratorPattern.Large:
// Random values in [-range, range]
for (int i = 0; i < length; i++)
data[i] = rng.Next(int.MinValue, int.MaxValue);
break;
case GeneratorPattern.Medium:
for (int i = 0; i < length; i++)
data[i] = rng.Next(short.MinValue, short.MaxValue);
break;
case GeneratorPattern.Small:
// Focused on small magnitudes to test ZigZag fast path
for (int i = 0; i < length; i++)
//data[i] = rng.Next(-64, 65);
data[i] = rng.Next(sbyte.MinValue, sbyte.MaxValue);
break;
case GeneratorPattern.Positive:
for (int i = 0; i < length; i++)
data[i] = rng.Next(0, int.MaxValue);
break;
case GeneratorPattern.Medium:
for (int i = 0; i < length; i++)
data[i] = rng.Next(0, short.MaxValue);
break;
//case GeneratorPattern.Large:
// for (int i = 0; i < length; i++)
@@ -164,13 +247,6 @@ public static class IntArrayGenerator
}
break;
case GeneratorPattern.Small:
// Focused on small magnitudes to test ZigZag fast path
for (int i = 0; i < length; i++)
//data[i] = rng.Next(-64, 65);
data[i] = rng.Next(sbyte.MinValue, sbyte.MaxValue);
break;
case GeneratorPattern.Ascending:
{
@@ -183,7 +259,7 @@ public static class IntArrayGenerator
case GeneratorPattern.Clustering:
{
// Build ascending runs and cast to int, clamping to int bounds
var runs = GenerateRuns(length, 3, 50, ((long)int.MinValue), (long)int.MaxValue, true);
var runs = GenerateRuns(length, 3, 10, ((long)int.MinValue), (long)int.MaxValue, true, 10, 15);
for (int i = 0; i < length; i++)
{
long v = runs[i];
@@ -194,6 +270,9 @@ public static class IntArrayGenerator
}
break;
case GeneratorPattern.MixedClustering:
return GenerateMixedClusteredInt32(length);
default:
throw new ArgumentException($"Unknown pattern: {pattern}");
@@ -472,4 +551,4 @@ public static class IntArrayGenerator
return data;
}
}
}
+88 -52
View File
@@ -29,93 +29,111 @@ namespace Esiur.Tests.Gvwie
public void Run()
{
const int TEST_ITERATIONS = 10;
const int TEST_ITERATIONS = 100;
const int SAMPLE_SIZE = 100;
Console.WriteLine(",Esiur,Aligned,FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal");
var st = new StringBuilder();
st.AppendLine(",Esiur,Esiur(Aligned),FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal");
Console.WriteLine(",Esiur,Esiur(Aligned),FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal");
Console.Write("Cluster (Int32);");
st.Append("Cluster (Int32),");
Console.Write("Cluster (Int32),");
PrintAverage(
st.AppendLine(PrintAverage(
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Clustering)), TEST_ITERATIONS)
);
));
Console.Write("Large (Int32);");
st.Append("Mixed Cluster (Int32),");
Console.Write("Mixed Cluster (Int32),");
PrintAverage(
st.AppendLine(PrintAverage(
Average(() => CompareMixedClusteredInt32(SAMPLE_SIZE), TEST_ITERATIONS)
));
st.Append("Large (Int32),");
Console.Write("Large (Int32),");
st.AppendLine(PrintAverage(
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Uniform)), TEST_ITERATIONS)
);
Console.Write("Medium (Int32);");
PrintAverage(
));
st.Append("Medium (Int32),");
Console.Write("Medium (Int32),");
st.AppendLine(PrintAverage(
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Medium)), TEST_ITERATIONS)
);
));
Console.Write("Small (Int32);");
PrintAverage(
st.Append("Small (Int32),");
Console.Write("Small (Int32),");
st.AppendLine( PrintAverage(
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Small)), TEST_ITERATIONS)
);
));
Console.Write("Negative (Int32);");
PrintAverage(
st.Append("Negative (Int32),");
Console.Write("Negative (Int32),");
st.AppendLine(PrintAverage(
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Negative)), TEST_ITERATIONS)
);
));
Console.Write("Alternating (Int32);");
PrintAverage(
st.Append("Alternating (Int32),");
Console.Write("Alternating (Int32),");
st.AppendLine(PrintAverage(
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Alternating)), TEST_ITERATIONS)
);
));
Console.Write("Ascending (Int32);");
st.Append("Ascending (Int32),");
Console.Write("Ascending (Int32),");
PrintAverage(
st.AppendLine(PrintAverage(
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Ascending)), TEST_ITERATIONS)
);
));
st.Append("Int64,");
Console.Write("Int64,");
Console.Write("Int64;");
PrintAverage(
st.AppendLine(PrintAverage(
Average(() => CompareInt(IntArrayGenerator.GenerateInt64(SAMPLE_SIZE)), TEST_ITERATIONS)
);
));
Console.Write("Int32;");
st.Append("Int32,");
Console.Write("Int32,");
PrintAverage(
st.AppendLine(PrintAverage(
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE)), TEST_ITERATIONS)
);
));
Console.Write("Int16;");
st.Append("Int16,");
Console.Write("Int16,");
PrintAverage(
st.AppendLine(PrintAverage(
Average(() => CompareInt(IntArrayGenerator.GenerateInt16(SAMPLE_SIZE)), TEST_ITERATIONS)
);
));
Console.Write("UInt64;");
st.Append("UInt64,");
Console.Write("UInt64,");
PrintAverage(
st.AppendLine(PrintAverage(
Average(() => CompareInt(IntArrayGenerator.GenerateUInt64(SAMPLE_SIZE)), TEST_ITERATIONS)
);
));
Console.Write("UInt32;");
st.Append("UInt32,");
Console.Write("UInt32,");
PrintAverage(
st.AppendLine(PrintAverage(
Average(() => CompareInt(IntArrayGenerator.GenerateUInt32(SAMPLE_SIZE)), TEST_ITERATIONS)
);
));
Console.Write("UInt16;");
st.Append("UInt16,");
Console.Write("UInt16,");
PrintAverage(
st.AppendLine(PrintAverage(
Average(() => CompareInt(IntArrayGenerator.GenerateUInt16(SAMPLE_SIZE)), TEST_ITERATIONS)
);
));
File.WriteAllLines("int_array_comparison.csv", st.ToString().Split(Environment.NewLine));
}
// Generate CSV suitable for Office Word chart where the sample size varies.
@@ -132,6 +150,7 @@ namespace Esiur.Tests.Gvwie
var generators = new List<(string name, Func<int, int, double[]> fn)>()
{
("Int32_Clustering", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Clustering)), iterations)),
("Int32_MixedClustering", (size, iterations) => Average(() => CompareMixedClusteredInt32(size), iterations)),
("Int32_Positive", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Positive)), iterations)),
("Int32_Negative", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Negative)), iterations)),
("Int32_Small", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Small)), iterations)),
@@ -192,6 +211,19 @@ namespace Esiur.Tests.Gvwie
}
}
private static (int, int, int, int, int, int, int, int, int) CompareMixedClusteredInt32(int sampleSize)
{
var sample = IntArrayGenerator.GenerateInt32(sampleSize, GeneratorPattern.MixedClustering);
var result = CompareInt(sample);
var requiredCapacity = sampleSize * sizeof(int);
if (result.Item1 >= requiredCapacity || result.Item2 >= requiredCapacity)
throw new InvalidOperationException(
$"Mixed clustered Int32 encoding produced {result.Item1} bytes ({result.Item2} aligned) for {sampleSize} elements; expected less than {requiredCapacity} bytes.");
return result;
}
public static (int, int, int, int, int, int, int, int, int) CompareInt(long[] sample)
{
var intRoot = new ArrayRoot<long>() { Values = sample };
@@ -432,7 +464,6 @@ namespace Esiur.Tests.Gvwie
sum.Average(x => x.Item9)
};
Console.WriteLine($"{rt[0]},{rt[1]},{rt[2]},{rt[3]},{rt[4]},{rt[5]},{rt[6]},{rt[7]},{rt[8]}");
return rt;
@@ -440,6 +471,11 @@ namespace Esiur.Tests.Gvwie
static string PrintAverage(double[] values)
{
var rt = $"{values[0]},{values[1]},{values[2]},{values[3]},{values[4]},{values[5]},{values[6]},{values[7]},{values[8]}";
Console.WriteLine(rt);
// Determine winner (lowest average size)
var names = new string[] { "Esiur", "Aligned", "FlatBuffer", "ProtoBuffer", "MessagePack", "BSON", "CBOR", "Avro", "Optimal" };
var min = values.SkipLast(1).Min();
@@ -458,7 +494,7 @@ namespace Esiur.Tests.Gvwie
Console.ForegroundColor = ConsoleColor.White;
return "Unknown";
return rt;
}
public static byte[] SerializeFlatBuffers<T>(ArrayRoot<T> array)