mirror of
https://github.com/esiur/esiur-dotnet.git
synced 2026-06-13 14:38:43 +00:00
Mixed clusters
This commit is contained in:
@@ -13,19 +13,7 @@ using System.Text.Json;
|
||||
|
||||
namespace Esiur.Tests.Annotations;
|
||||
|
||||
//public sealed class TickState
|
||||
//{
|
||||
// public int Load { get; set; }
|
||||
// public int ErrorCount { get; set; }
|
||||
// public bool Enabled { get; set; }
|
||||
//}
|
||||
|
||||
//public sealed class LlmDecision
|
||||
//{
|
||||
// public string? Function { get; set; }
|
||||
// public string? Reason { get; set; }
|
||||
//}
|
||||
|
||||
|
||||
|
||||
|
||||
public sealed class LlmRunner
|
||||
@@ -215,25 +203,7 @@ Input:
|
||||
{typeDefJson}";
|
||||
}
|
||||
|
||||
//private static LlmDecision? ParseDecision(string text)
|
||||
//{
|
||||
// try
|
||||
// {
|
||||
// var json = ExtractJson(text);
|
||||
|
||||
// return JsonSerializer.Deserialize<LlmDecision>(
|
||||
// json,
|
||||
// new JsonSerializerOptions
|
||||
// {
|
||||
// PropertyNameCaseInsensitive = true
|
||||
// });
|
||||
// }
|
||||
// catch
|
||||
// {
|
||||
// return null;
|
||||
// }
|
||||
//}
|
||||
|
||||
|
||||
private static (LlmDecision? First, LlmDecision? Final, bool Repaired, int Count) ParseDecisionWithRepair(string text)
|
||||
{
|
||||
var objects = ExtractJsonObjects(text);
|
||||
|
||||
@@ -12,7 +12,9 @@ namespace Esiur.Tests.Gvwie
|
||||
Alternating,
|
||||
Small,
|
||||
Medium,
|
||||
Large,
|
||||
Ascending,
|
||||
Clustering,
|
||||
MixedClustering,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,6 +122,79 @@ public static class IntArrayGenerator
|
||||
}
|
||||
|
||||
|
||||
public static int[] GenerateMixedClusteredInt32(int length,
|
||||
int minRunSize = 3,
|
||||
int maxRunSize = 10)
|
||||
{
|
||||
if (length <= 0)
|
||||
return Array.Empty<int>();
|
||||
|
||||
if (minRunSize < 1) minRunSize = 1;
|
||||
if (maxRunSize < minRunSize) maxRunSize = minRunSize;
|
||||
|
||||
var data = new int[length];
|
||||
|
||||
var remaining = new[]
|
||||
{
|
||||
length / 3 + (length % 3 > 0 ? 1 : 0), // int8-compatible values
|
||||
length / 3 + (length % 3 > 1 ? 1 : 0), // int16-compatible values
|
||||
length / 3, // int32-compatible values
|
||||
};
|
||||
|
||||
int index = 0;
|
||||
int bucket = 0;
|
||||
|
||||
while (index < length)
|
||||
{
|
||||
var selected = -1;
|
||||
for (int offset = 0; offset < remaining.Length; offset++)
|
||||
{
|
||||
var candidate = (bucket + offset) % remaining.Length;
|
||||
if (remaining[candidate] > 0)
|
||||
{
|
||||
selected = candidate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (selected < 0)
|
||||
break;
|
||||
|
||||
var runSize = Math.Min(remaining[selected], rng.Next(minRunSize, maxRunSize + 1));
|
||||
FillMixedClusterRun(data, index, runSize, selected);
|
||||
|
||||
index += runSize;
|
||||
remaining[selected] -= runSize;
|
||||
bucket = (selected + 1) % remaining.Length;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
private static void FillMixedClusterRun(int[] data, int startIndex, int runSize, int bucket)
|
||||
{
|
||||
var start = bucket switch
|
||||
{
|
||||
0 => NextRunStart(64, sbyte.MaxValue, runSize),
|
||||
1 => NextRunStart(128, short.MaxValue, runSize),
|
||||
2 => NextRunStart(8_388_608, int.MaxValue, runSize),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(bucket), bucket, "Unknown mixed cluster bucket.")
|
||||
};
|
||||
|
||||
for (int i = 0; i < runSize; i++)
|
||||
data[startIndex + i] = start + i;
|
||||
}
|
||||
|
||||
private static int NextRunStart(int minValue, int maxValue, int runSize)
|
||||
{
|
||||
var maxStart = (long)maxValue - runSize + 1;
|
||||
if (maxStart < minValue)
|
||||
throw new ArgumentOutOfRangeException(nameof(runSize), runSize, "Run size is larger than the available value range.");
|
||||
|
||||
return (int)rng.NextInt64(minValue, maxStart + 1);
|
||||
}
|
||||
|
||||
|
||||
// Generate random int array of given length and distribution
|
||||
public static int[] GenerateInt32(int length, GeneratorPattern pattern = GeneratorPattern.Uniform)
|
||||
{
|
||||
@@ -130,20 +203,30 @@ public static class IntArrayGenerator
|
||||
switch (pattern)
|
||||
{
|
||||
case GeneratorPattern.Uniform:
|
||||
case GeneratorPattern.Large:
|
||||
// Random values in [-range, range]
|
||||
for (int i = 0; i < length; i++)
|
||||
data[i] = rng.Next(int.MinValue, int.MaxValue);
|
||||
break;
|
||||
|
||||
case GeneratorPattern.Medium:
|
||||
for (int i = 0; i < length; i++)
|
||||
data[i] = rng.Next(short.MinValue, short.MaxValue);
|
||||
break;
|
||||
|
||||
case GeneratorPattern.Small:
|
||||
// Focused on small magnitudes to test ZigZag fast path
|
||||
for (int i = 0; i < length; i++)
|
||||
//data[i] = rng.Next(-64, 65);
|
||||
data[i] = rng.Next(sbyte.MinValue, sbyte.MaxValue);
|
||||
break;
|
||||
|
||||
|
||||
case GeneratorPattern.Positive:
|
||||
for (int i = 0; i < length; i++)
|
||||
data[i] = rng.Next(0, int.MaxValue);
|
||||
break;
|
||||
|
||||
case GeneratorPattern.Medium:
|
||||
for (int i = 0; i < length; i++)
|
||||
data[i] = rng.Next(0, short.MaxValue);
|
||||
break;
|
||||
|
||||
//case GeneratorPattern.Large:
|
||||
// for (int i = 0; i < length; i++)
|
||||
@@ -164,13 +247,6 @@ public static class IntArrayGenerator
|
||||
}
|
||||
break;
|
||||
|
||||
case GeneratorPattern.Small:
|
||||
// Focused on small magnitudes to test ZigZag fast path
|
||||
for (int i = 0; i < length; i++)
|
||||
//data[i] = rng.Next(-64, 65);
|
||||
data[i] = rng.Next(sbyte.MinValue, sbyte.MaxValue);
|
||||
break;
|
||||
|
||||
|
||||
case GeneratorPattern.Ascending:
|
||||
{
|
||||
@@ -183,7 +259,7 @@ public static class IntArrayGenerator
|
||||
case GeneratorPattern.Clustering:
|
||||
{
|
||||
// Build ascending runs and cast to int, clamping to int bounds
|
||||
var runs = GenerateRuns(length, 3, 50, ((long)int.MinValue), (long)int.MaxValue, true);
|
||||
var runs = GenerateRuns(length, 3, 10, ((long)int.MinValue), (long)int.MaxValue, true, 10, 15);
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
long v = runs[i];
|
||||
@@ -194,6 +270,9 @@ public static class IntArrayGenerator
|
||||
}
|
||||
break;
|
||||
|
||||
case GeneratorPattern.MixedClustering:
|
||||
return GenerateMixedClusteredInt32(length);
|
||||
|
||||
|
||||
default:
|
||||
throw new ArgumentException($"Unknown pattern: {pattern}");
|
||||
@@ -472,4 +551,4 @@ public static class IntArrayGenerator
|
||||
|
||||
return data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,93 +29,111 @@ namespace Esiur.Tests.Gvwie
|
||||
public void Run()
|
||||
{
|
||||
|
||||
const int TEST_ITERATIONS = 10;
|
||||
const int TEST_ITERATIONS = 100;
|
||||
const int SAMPLE_SIZE = 100;
|
||||
|
||||
Console.WriteLine(",Esiur,Aligned,FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal");
|
||||
|
||||
var st = new StringBuilder();
|
||||
|
||||
st.AppendLine(",Esiur,Esiur(Aligned),FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal");
|
||||
Console.WriteLine(",Esiur,Esiur(Aligned),FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal");
|
||||
|
||||
|
||||
Console.Write("Cluster (Int32);");
|
||||
st.Append("Cluster (Int32),");
|
||||
Console.Write("Cluster (Int32),");
|
||||
|
||||
PrintAverage(
|
||||
st.AppendLine(PrintAverage(
|
||||
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Clustering)), TEST_ITERATIONS)
|
||||
);
|
||||
));
|
||||
|
||||
Console.Write("Large (Int32);");
|
||||
st.Append("Mixed Cluster (Int32),");
|
||||
Console.Write("Mixed Cluster (Int32),");
|
||||
|
||||
PrintAverage(
|
||||
st.AppendLine(PrintAverage(
|
||||
Average(() => CompareMixedClusteredInt32(SAMPLE_SIZE), TEST_ITERATIONS)
|
||||
));
|
||||
|
||||
st.Append("Large (Int32),");
|
||||
Console.Write("Large (Int32),");
|
||||
|
||||
st.AppendLine(PrintAverage(
|
||||
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Uniform)), TEST_ITERATIONS)
|
||||
);
|
||||
|
||||
Console.Write("Medium (Int32);");
|
||||
PrintAverage(
|
||||
));
|
||||
|
||||
st.Append("Medium (Int32),");
|
||||
Console.Write("Medium (Int32),");
|
||||
st.AppendLine(PrintAverage(
|
||||
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Medium)), TEST_ITERATIONS)
|
||||
);
|
||||
));
|
||||
|
||||
Console.Write("Small (Int32);");
|
||||
PrintAverage(
|
||||
st.Append("Small (Int32),");
|
||||
Console.Write("Small (Int32),");
|
||||
st.AppendLine( PrintAverage(
|
||||
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Small)), TEST_ITERATIONS)
|
||||
);
|
||||
));
|
||||
|
||||
|
||||
Console.Write("Negative (Int32);");
|
||||
PrintAverage(
|
||||
st.Append("Negative (Int32),");
|
||||
Console.Write("Negative (Int32),");
|
||||
st.AppendLine(PrintAverage(
|
||||
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Negative)), TEST_ITERATIONS)
|
||||
);
|
||||
));
|
||||
|
||||
|
||||
|
||||
|
||||
Console.Write("Alternating (Int32);");
|
||||
PrintAverage(
|
||||
st.Append("Alternating (Int32),");
|
||||
Console.Write("Alternating (Int32),");
|
||||
st.AppendLine(PrintAverage(
|
||||
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Alternating)), TEST_ITERATIONS)
|
||||
);
|
||||
));
|
||||
|
||||
Console.Write("Ascending (Int32);");
|
||||
st.Append("Ascending (Int32),");
|
||||
Console.Write("Ascending (Int32),");
|
||||
|
||||
PrintAverage(
|
||||
st.AppendLine(PrintAverage(
|
||||
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Ascending)), TEST_ITERATIONS)
|
||||
);
|
||||
));
|
||||
|
||||
st.Append("Int64,");
|
||||
Console.Write("Int64,");
|
||||
|
||||
|
||||
|
||||
|
||||
Console.Write("Int64;");
|
||||
|
||||
PrintAverage(
|
||||
st.AppendLine(PrintAverage(
|
||||
Average(() => CompareInt(IntArrayGenerator.GenerateInt64(SAMPLE_SIZE)), TEST_ITERATIONS)
|
||||
);
|
||||
));
|
||||
|
||||
Console.Write("Int32;");
|
||||
st.Append("Int32,");
|
||||
Console.Write("Int32,");
|
||||
|
||||
PrintAverage(
|
||||
st.AppendLine(PrintAverage(
|
||||
Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE)), TEST_ITERATIONS)
|
||||
);
|
||||
));
|
||||
|
||||
Console.Write("Int16;");
|
||||
st.Append("Int16,");
|
||||
Console.Write("Int16,");
|
||||
|
||||
PrintAverage(
|
||||
st.AppendLine(PrintAverage(
|
||||
Average(() => CompareInt(IntArrayGenerator.GenerateInt16(SAMPLE_SIZE)), TEST_ITERATIONS)
|
||||
);
|
||||
));
|
||||
|
||||
Console.Write("UInt64;");
|
||||
st.Append("UInt64,");
|
||||
Console.Write("UInt64,");
|
||||
|
||||
PrintAverage(
|
||||
st.AppendLine(PrintAverage(
|
||||
Average(() => CompareInt(IntArrayGenerator.GenerateUInt64(SAMPLE_SIZE)), TEST_ITERATIONS)
|
||||
);
|
||||
));
|
||||
|
||||
Console.Write("UInt32;");
|
||||
st.Append("UInt32,");
|
||||
Console.Write("UInt32,");
|
||||
|
||||
PrintAverage(
|
||||
st.AppendLine(PrintAverage(
|
||||
Average(() => CompareInt(IntArrayGenerator.GenerateUInt32(SAMPLE_SIZE)), TEST_ITERATIONS)
|
||||
);
|
||||
));
|
||||
|
||||
Console.Write("UInt16;");
|
||||
st.Append("UInt16,");
|
||||
Console.Write("UInt16,");
|
||||
|
||||
PrintAverage(
|
||||
st.AppendLine(PrintAverage(
|
||||
Average(() => CompareInt(IntArrayGenerator.GenerateUInt16(SAMPLE_SIZE)), TEST_ITERATIONS)
|
||||
);
|
||||
));
|
||||
|
||||
File.WriteAllLines("int_array_comparison.csv", st.ToString().Split(Environment.NewLine));
|
||||
}
|
||||
|
||||
// Generate CSV suitable for Office Word chart where the sample size varies.
|
||||
@@ -132,6 +150,7 @@ namespace Esiur.Tests.Gvwie
|
||||
var generators = new List<(string name, Func<int, int, double[]> fn)>()
|
||||
{
|
||||
("Int32_Clustering", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Clustering)), iterations)),
|
||||
("Int32_MixedClustering", (size, iterations) => Average(() => CompareMixedClusteredInt32(size), iterations)),
|
||||
("Int32_Positive", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Positive)), iterations)),
|
||||
("Int32_Negative", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Negative)), iterations)),
|
||||
("Int32_Small", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Small)), iterations)),
|
||||
@@ -192,6 +211,19 @@ namespace Esiur.Tests.Gvwie
|
||||
}
|
||||
}
|
||||
|
||||
private static (int, int, int, int, int, int, int, int, int) CompareMixedClusteredInt32(int sampleSize)
|
||||
{
|
||||
var sample = IntArrayGenerator.GenerateInt32(sampleSize, GeneratorPattern.MixedClustering);
|
||||
var result = CompareInt(sample);
|
||||
var requiredCapacity = sampleSize * sizeof(int);
|
||||
|
||||
if (result.Item1 >= requiredCapacity || result.Item2 >= requiredCapacity)
|
||||
throw new InvalidOperationException(
|
||||
$"Mixed clustered Int32 encoding produced {result.Item1} bytes ({result.Item2} aligned) for {sampleSize} elements; expected less than {requiredCapacity} bytes.");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static (int, int, int, int, int, int, int, int, int) CompareInt(long[] sample)
|
||||
{
|
||||
var intRoot = new ArrayRoot<long>() { Values = sample };
|
||||
@@ -432,7 +464,6 @@ namespace Esiur.Tests.Gvwie
|
||||
sum.Average(x => x.Item9)
|
||||
};
|
||||
|
||||
Console.WriteLine($"{rt[0]},{rt[1]},{rt[2]},{rt[3]},{rt[4]},{rt[5]},{rt[6]},{rt[7]},{rt[8]}");
|
||||
|
||||
|
||||
return rt;
|
||||
@@ -440,6 +471,11 @@ namespace Esiur.Tests.Gvwie
|
||||
|
||||
static string PrintAverage(double[] values)
|
||||
{
|
||||
|
||||
var rt = $"{values[0]},{values[1]},{values[2]},{values[3]},{values[4]},{values[5]},{values[6]},{values[7]},{values[8]}";
|
||||
|
||||
Console.WriteLine(rt);
|
||||
|
||||
// Determine winner (lowest average size)
|
||||
var names = new string[] { "Esiur", "Aligned", "FlatBuffer", "ProtoBuffer", "MessagePack", "BSON", "CBOR", "Avro", "Optimal" };
|
||||
var min = values.SkipLast(1).Min();
|
||||
@@ -458,7 +494,7 @@ namespace Esiur.Tests.Gvwie
|
||||
|
||||
Console.ForegroundColor = ConsoleColor.White;
|
||||
|
||||
return "Unknown";
|
||||
return rt;
|
||||
}
|
||||
|
||||
public static byte[] SerializeFlatBuffers<T>(ArrayRoot<T> array)
|
||||
|
||||
Reference in New Issue
Block a user