diff --git a/Libraries/Esiur/Data/Gvwie/GVN.cs b/Libraries/Esiur/Data/Gvwie/GVN.cs new file mode 100644 index 0000000..f22bef3 --- /dev/null +++ b/Libraries/Esiur/Data/Gvwie/GVN.cs @@ -0,0 +1,1358 @@ +//////////using System; +//////////using System.Collections.Generic; +//////////using System.Runtime.CompilerServices; + +//////////namespace Esiur.Data.Gvwie; + +//////////public static class GroupInt32Codec +//////////{ +////////// private const byte ExtendedRaw32Header = 0xFF; + +////////// // ----------------- Encoder ----------------- +////////// public static byte[] Encode(IList values) +////////// { +////////// var dst = new List(values.Count * 2); +////////// int i = 0; + +////////// while (i < values.Count) +////////// { +////////// uint zz = ZigZag32(values[i]); + +////////// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits +////////// if (zz <= 0x7Fu) +////////// { +////////// dst.Add((byte)zz); +////////// i++; +////////// continue; +////////// } + +////////// int start = i; +////////// int width = WidthFromZigZag(zz); + +////////// // Extended raw 32-bit run: +////////// // 0xFF + varint(count-1) + count * 4-byte LE zigzag payload +////////// if (width == 4) +////////// { +////////// int count = 1; + +////////// while ((i + count) < values.Count) +////////// { +////////// uint z2 = ZigZag32(values[i + count]); +////////// if (WidthFromZigZag(z2) != 4) +////////// break; + +////////// count++; +////////// } + +////////// dst.Add(ExtendedRaw32Header); +////////// WriteVarUInt32(dst, (uint)(count - 1)); + +////////// for (int k = 0; k < count; k++) +////////// WriteLE(dst, ZigZag32(values[start + k]), 4); + +////////// i += count; +////////// continue; +////////// } + +////////// // Normal group: up to 32 items sharing a common width (1..3 bytes) +////////// int countNormal = 1; + +////////// while (countNormal < 32 && (i + countNormal) < values.Count) +////////// { +////////// uint z2 = ZigZag32(values[i + countNormal]); +////////// int w2 = WidthFromZigZag(z2); + +////////// // Stop before 4-byte values so extended mode can take them +////////// if (w2 == 4) +////////// break; + +////////// width = Math.Max(width, w2); +////////// countNormal++; +////////// } + +////////// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] +////////// byte header = 0x80; +////////// header |= (byte)(((countNormal - 1) & 0x1F) << 2); +////////// header |= (byte)((width - 1) & 0x03); +////////// dst.Add(header); + +////////// for (int k = 0; k < countNormal; k++) +////////// WriteLE(dst, ZigZag32(values[start + k]), width); + +////////// i += countNormal; +////////// } + +////////// return dst.ToArray(); +////////// } + +////////// // ----------------- Decoder ----------------- +////////// public static int[] Decode(ReadOnlySpan src) +////////// { +////////// var result = new List(); +////////// int pos = 0; + +////////// while (pos < src.Length) +////////// { +////////// byte h = src[pos++]; + +////////// if ((h & 0x80) == 0) +////////// { +////////// uint zz7 = (uint)(h & 0x7F); +////////// result.Add(UnZigZag32(zz7)); +////////// continue; +////////// } + +////////// // Extended raw 32-bit run +////////// if (h == ExtendedRaw32Header) +////////// { +////////// uint countMinus1 = ReadVarUInt32(src, ref pos); +////////// int count = checked((int)countMinus1 + 1); + +////////// for (int j = 0; j < count; j++) +////////// { +////////// uint raw = (uint)ReadLE(src, ref pos, 4); +////////// result.Add(UnZigZag32(raw)); +////////// } + +////////// continue; +////////// } + +////////// int countNormal = ((h >> 2) & 0x1F) + 1; // 1..32 +////////// int width = (h & 0x03) + 1; // 1..4 (though encoder uses 1..3 here) + +////////// for (int j = 0; j < countNormal; j++) +////////// { +////////// uint raw = (uint)ReadLE(src, ref pos, width); +////////// result.Add(UnZigZag32(raw)); +////////// } +////////// } + +////////// return result.ToArray(); +////////// } + +////////// // ----------------- Helpers ----------------- + +////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////////// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); + +////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////////// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); + +////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////////// private static int WidthFromZigZag(uint z) +////////// { +////////// if (z <= 0xFFu) return 1; +////////// if (z <= 0xFFFFu) return 2; +////////// if (z <= 0xFFFFFFu) return 3; +////////// return 4; +////////// } + +////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////////// private static void WriteLE(List dst, uint value, int width) +////////// { +////////// for (int i = 0; i < width; i++) +////////// dst.Add((byte)((value >> (8 * i)) & 0xFF)); +////////// } + +////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////////// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) +////////// { +////////// if ((uint)(pos + width) > (uint)src.Length) +////////// throw new ArgumentException("Buffer underflow while reading group payload."); + +////////// ulong v = 0; +////////// for (int i = 0; i < width; i++) +////////// v |= (ulong)src[pos++] << (8 * i); +////////// return v; +////////// } + +////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////////// private static void WriteVarUInt32(List dst, uint value) +////////// { +////////// while (value >= 0x80) +////////// { +////////// dst.Add((byte)(value | 0x80)); +////////// value >>= 7; +////////// } + +////////// dst.Add((byte)value); +////////// } + +////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////////// private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) +////////// { +////////// uint result = 0; +////////// int shift = 0; + +////////// while (true) +////////// { +////////// if (pos >= src.Length) +////////// throw new ArgumentException("Buffer underflow while reading varint."); + +////////// byte b = src[pos++]; +////////// result |= (uint)(b & 0x7F) << shift; + +////////// if ((b & 0x80) == 0) +////////// return result; + +////////// shift += 7; +////////// if (shift >= 35) +////////// throw new ArgumentException("Varint is too long for UInt32."); +////////// } +////////// } +//////////} + + + + +////////using System; +////////using System.Collections.Generic; +////////using System.Runtime.CompilerServices; + +////////namespace Esiur.Data.Gvwie; + +////////public static class GroupInt32Codec +////////{ +//////// // ----------------- Encoder ----------------- +//////// public static byte[] Encode(IList values) +//////// { +//////// var dst = new List(values.Count * 2); +//////// int i = 0; + +//////// while (i < values.Count) +//////// { +//////// uint zz = ZigZag32(values[i]); + +//////// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits +//////// if (zz <= 0x7Fu) +//////// { +//////// dst.Add((byte)zz); +//////// i++; +//////// continue; +//////// } + +//////// int start = i; +//////// int width = WidthFromZigZag(zz); +//////// int count = 1; + +//////// // Build a run of same-width non-literal values +//////// while ((i + count) < values.Count) +//////// { +//////// uint z2 = ZigZag32(values[i + count]); + +//////// // Do not absorb literal-fast-path values into groups +//////// if (z2 <= 0x7Fu) +//////// break; + +//////// int w2 = WidthFromZigZag(z2); +//////// if (w2 != width) +//////// break; + +//////// count++; +//////// } + +//////// if (count <= 31) +//////// { +//////// // Short group: +//////// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] +//////// byte header = 0x80; +//////// header |= (byte)(((count - 1) & 0x1F) << 2); +//////// header |= (byte)((width - 1) & 0x03); +//////// dst.Add(header); +//////// } +//////// else +//////// { +//////// // Extended group: +//////// // Header: 1 | 11111 | (width-1)[2 bits] +//////// // Followed by varint(count - 32) +//////// byte header = 0x80; +//////// header |= 0x7C; // count bits = 11111 +//////// header |= (byte)((width - 1) & 0x03); +//////// dst.Add(header); +//////// WriteVarUInt32(dst, (uint)(count - 32)); +//////// } + +//////// // Payload: 'count' zigzag values, LE, 'width' bytes each +//////// for (int k = 0; k < count; k++) +//////// WriteLE(dst, ZigZag32(values[start + k]), width); + +//////// i += count; +//////// } + +//////// return dst.ToArray(); +//////// } + +//////// // ----------------- Decoder ----------------- +//////// public static int[] Decode(ReadOnlySpan src) +//////// { +//////// var result = new List(); +//////// int pos = 0; + +//////// while (pos < src.Length) +//////// { +//////// byte h = src[pos++]; + +//////// if ((h & 0x80) == 0) +//////// { +//////// // Fast path: 7-bit ZigZag in low bits +//////// uint zz7 = (uint)(h & 0x7F); +//////// result.Add(UnZigZag32(zz7)); +//////// continue; +//////// } + +//////// int countField = (h >> 2) & 0x1F; +//////// int width = (h & 0x03) + 1; + +//////// int count; +//////// if (countField == 31) +//////// { +//////// // Extended group length +//////// uint extra = ReadVarUInt32(src, ref pos); +//////// count = checked(32 + (int)extra); +//////// } +//////// else +//////// { +//////// count = countField + 1; +//////// } + +//////// for (int j = 0; j < count; j++) +//////// { +//////// uint raw = (uint)ReadLE(src, ref pos, width); +//////// int val = UnZigZag32(raw); +//////// result.Add(val); +//////// } +//////// } + +//////// return result.ToArray(); +//////// } + +//////// // ----------------- Helpers ----------------- + +//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//////// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); + +//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//////// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); + +//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//////// private static int WidthFromZigZag(uint z) +//////// { +//////// if (z <= 0xFFu) return 1; +//////// if (z <= 0xFFFFu) return 2; +//////// if (z <= 0xFFFFFFu) return 3; +//////// return 4; +//////// } + +//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//////// private static void WriteLE(List dst, uint value, int width) +//////// { +//////// for (int i = 0; i < width; i++) +//////// dst.Add((byte)((value >> (8 * i)) & 0xFF)); +//////// } + +//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//////// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) +//////// { +//////// if ((uint)(pos + width) > (uint)src.Length) +//////// throw new ArgumentException("Buffer underflow while reading group payload."); + +//////// ulong v = 0; +//////// for (int i = 0; i < width; i++) +//////// v |= (ulong)src[pos++] << (8 * i); +//////// return v; +//////// } + +//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//////// private static void WriteVarUInt32(List dst, uint value) +//////// { +//////// while (value >= 0x80) +//////// { +//////// dst.Add((byte)((value & 0x7F) | 0x80)); +//////// value >>= 7; +//////// } + +//////// dst.Add((byte)value); +//////// } + +//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//////// private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) +//////// { +//////// uint result = 0; +//////// int shift = 0; + +//////// while (true) +//////// { +//////// if (pos >= src.Length) +//////// throw new ArgumentException("Buffer underflow while reading varint."); + +//////// byte b = src[pos++]; +//////// result |= (uint)(b & 0x7F) << shift; + +//////// if ((b & 0x80) == 0) +//////// return result; + +//////// shift += 7; +//////// if (shift >= 35) +//////// throw new ArgumentException("Varint is too long for UInt32."); +//////// } +//////// } +////////} + + +//////using System; +//////using System.Collections.Generic; +//////using System.Runtime.CompilerServices; + +//////namespace Esiur.Data.Gvwie; + +//////public static class GroupInt32Codec +//////{ +////// private const byte ExtendedRaw32Header = 0xFF; + +////// // ----------------- Encoder ----------------- +////// public static byte[] Encode(IList values) +////// { +////// var dst = new List(values.Count * 2); +////// int i = 0; + +////// while (i < values.Count) +////// { +////// uint zz = ZigZag32(values[i]); + +////// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits +////// if (zz <= 0x7Fu) +////// { +////// dst.Add((byte)zz); +////// i++; +////// continue; +////// } + +////// int start = i; +////// int width = WidthFromZigZag(zz); + +////// // Extended mode only for long width-4 runs +////// if (width == 4) +////// { +////// int runCount = 1; + +////// while ((i + runCount) < values.Count) +////// { +////// uint z2 = ZigZag32(values[i + runCount]); + +////// // Keep literals separate +////// if (z2 <= 0x7Fu) +////// break; + +////// if (WidthFromZigZag(z2) != 4) +////// break; + +////// runCount++; +////// } + +////// // Use extended mode only when it is actually longer than normal max group +////// if (runCount > 32) +////// { +////// dst.Add(ExtendedRaw32Header); +////// WriteVarUInt32(dst, (uint)(runCount - 33)); // 33 -> 0, 34 -> 1, ... + +////// for (int k = 0; k < runCount; k++) +////// WriteLE(dst, ZigZag32(values[start + k]), 4); + +////// i += runCount; +////// continue; +////// } +////// } + +////// // Normal group: up to 32 items sharing a common width (1..4 bytes) +////// int count = 1; + +////// while (count < 32 && (i + count) < values.Count) +////// { +////// uint z2 = ZigZag32(values[i + count]); + +////// // Do not absorb literal-fast-path values into groups +////// if (z2 <= 0x7Fu) +////// break; + +////// int w2 = WidthFromZigZag(z2); +////// if (w2 != width) +////// break; + +////// count++; +////// } + +////// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] +////// byte header = 0x80; +////// header |= (byte)(((count - 1) & 0x1F) << 2); +////// header |= (byte)((width - 1) & 0x03); +////// dst.Add(header); + +////// for (int k = 0; k < count; k++) +////// WriteLE(dst, ZigZag32(values[start + k]), width); + +////// i += count; +////// } + +////// return dst.ToArray(); +////// } + +////// // ----------------- Decoder ----------------- +////// public static int[] Decode(ReadOnlySpan src) +////// { +////// var result = new List(); +////// int pos = 0; + +////// while (pos < src.Length) +////// { +////// byte h = src[pos++]; + +////// if ((h & 0x80) == 0) +////// { +////// // Fast path: 7-bit ZigZag in low bits +////// uint zz7 = (uint)(h & 0x7F); +////// result.Add(UnZigZag32(zz7)); +////// continue; +////// } + +////// // Extended raw width-4 run +////// if (h == ExtendedRaw32Header) +////// { +////// uint extra = ReadVarUInt32(src, ref pos); +////// int count = checked(33 + (int)extra); + +////// for (int j = 0; j < count; j++) +////// { +////// uint raw = (uint)ReadLE(src, ref pos, 4); +////// result.Add(UnZigZag32(raw)); +////// } + +////// continue; +////// } + +////// int countNormal = ((h >> 2) & 0x1F) + 1; // 1..32 +////// int width = (h & 0x03) + 1; // 1..4 + +////// for (int j = 0; j < countNormal; j++) +////// { +////// uint raw = (uint)ReadLE(src, ref pos, width); +////// result.Add(UnZigZag32(raw)); +////// } +////// } + +////// return result.ToArray(); +////// } + +////// // ----------------- Helpers ----------------- + +////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); + +////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); + +////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////// private static int WidthFromZigZag(uint z) +////// { +////// if (z <= 0xFFu) return 1; +////// if (z <= 0xFFFFu) return 2; +////// if (z <= 0xFFFFFFu) return 3; +////// return 4; +////// } + +////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////// private static void WriteLE(List dst, uint value, int width) +////// { +////// for (int i = 0; i < width; i++) +////// dst.Add((byte)((value >> (8 * i)) & 0xFF)); +////// } + +////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) +////// { +////// if ((uint)(pos + width) > (uint)src.Length) +////// throw new ArgumentException("Buffer underflow while reading group payload."); + +////// ulong v = 0; +////// for (int i = 0; i < width; i++) +////// v |= (ulong)src[pos++] << (8 * i); +////// return v; +////// } + +////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////// private static void WriteVarUInt32(List dst, uint value) +////// { +////// while (value >= 0x80) +////// { +////// dst.Add((byte)((value & 0x7F) | 0x80)); +////// value >>= 7; +////// } + +////// dst.Add((byte)value); +////// } + +////// [MethodImpl(MethodImplOptions.AggressiveInlining)] +////// private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) +////// { +////// uint result = 0; +////// int shift = 0; + +////// while (true) +////// { +////// if (pos >= src.Length) +////// throw new ArgumentException("Buffer underflow while reading varint."); + +////// byte b = src[pos++]; +////// result |= (uint)(b & 0x7F) << shift; + +////// if ((b & 0x80) == 0) +////// return result; + +////// shift += 7; +////// if (shift >= 35) +////// throw new ArgumentException("Varint is too long for UInt32."); +////// } +////// } +//////} + +////using System; +////using System.Collections.Generic; +////using System.Runtime.CompilerServices; + +////namespace Esiur.Data.Gvwie; + +////public static class GroupInt32Codec +////{ +//// private const byte RawInt32RunHeader = 0xFF; + +//// // ----------------- Encoder ----------------- +//// public static byte[] Encode(IList values) +//// { +//// var dst = new List(values.Count * 2); +//// int i = 0; + +//// while (i < values.Count) +//// { +//// uint zz = ZigZag32(values[i]); + +//// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits +//// if (zz <= 0x7Fu) +//// { +//// dst.Add((byte)zz); +//// i++; +//// continue; +//// } + +//// int start = i; +//// int width = WidthFromZigZag(zz); + +//// // Detect long full-width run and emit raw Int32 block instead of grouped width=4 +//// if (width == 4) +//// { +//// int runCount = 1; + +//// while ((i + runCount) < values.Count) +//// { +//// uint z2 = ZigZag32(values[i + runCount]); + +//// // keep literals separate +//// if (z2 <= 0x7Fu) +//// break; + +//// if (WidthFromZigZag(z2) != 4) +//// break; + +//// runCount++; +//// } + +//// // Threshold can be tuned; 33+ is a good starting point +//// if (runCount >= 33) +//// { +//// dst.Add(RawInt32RunHeader); +//// WriteVarUInt32(dst, (uint)runCount); + +//// for (int k = 0; k < runCount; k++) +//// WriteInt32LE(dst, values[start + k]); + +//// i += runCount; +//// continue; +//// } +//// } + +//// // Normal group: up to 32 items sharing the same width (1..4 bytes) +//// int count = 1; + +//// while (count < 32 && (i + count) < values.Count) +//// { +//// uint z2 = ZigZag32(values[i + count]); + +//// // do not absorb literal-fast-path values into groups +//// if (z2 <= 0x7Fu) +//// break; + +//// int w2 = WidthFromZigZag(z2); +//// if (w2 != width) +//// break; + +//// count++; +//// } + +//// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] +//// byte header = 0x80; +//// header |= (byte)(((count - 1) & 0x1F) << 2); +//// header |= (byte)((width - 1) & 0x03); +//// dst.Add(header); + +//// for (int k = 0; k < count; k++) +//// WriteLE(dst, ZigZag32(values[start + k]), width); + +//// i += count; +//// } + +//// return dst.ToArray(); +//// } + +//// // ----------------- Decoder ----------------- +//// public static int[] Decode(ReadOnlySpan src) +//// { +//// var result = new List(); +//// int pos = 0; + +//// while (pos < src.Length) +//// { +//// byte h = src[pos++]; + +//// if ((h & 0x80) == 0) +//// { +//// // Fast path: 7-bit ZigZag in low bits +//// uint zz7 = (uint)(h & 0x7F); +//// result.Add(UnZigZag32(zz7)); +//// continue; +//// } + +//// // Raw fixed-width Int32 run +//// if (h == RawInt32RunHeader) +//// { +//// uint countU = ReadVarUInt32(src, ref pos); +//// int count = checked((int)countU); + +//// for (int j = 0; j < count; j++) +//// result.Add(ReadInt32LE(src, ref pos)); + +//// continue; +//// } + +//// int countNormal = ((h >> 2) & 0x1F) + 1; // 1..32 +//// int width = (h & 0x03) + 1; // 1..4 + +//// for (int j = 0; j < countNormal; j++) +//// { +//// uint raw = (uint)ReadLE(src, ref pos, width); +//// result.Add(UnZigZag32(raw)); +//// } +//// } + +//// return result.ToArray(); +//// } + +//// // ----------------- Helpers ----------------- + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static int WidthFromZigZag(uint z) +//// { +//// if (z <= 0xFFu) return 1; +//// if (z <= 0xFFFFu) return 2; +//// if (z <= 0xFFFFFFu) return 3; +//// return 4; +//// } + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static void WriteLE(List dst, uint value, int width) +//// { +//// for (int i = 0; i < width; i++) +//// dst.Add((byte)((value >> (8 * i)) & 0xFF)); +//// } + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) +//// { +//// if ((uint)(pos + width) > (uint)src.Length) +//// throw new ArgumentException("Buffer underflow while reading group payload."); + +//// ulong v = 0; +//// for (int i = 0; i < width; i++) +//// v |= (ulong)src[pos++] << (8 * i); +//// return v; +//// } + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static void WriteInt32LE(List dst, int value) +//// { +//// uint u = unchecked((uint)value); +//// dst.Add((byte)(u & 0xFF)); +//// dst.Add((byte)((u >> 8) & 0xFF)); +//// dst.Add((byte)((u >> 16) & 0xFF)); +//// dst.Add((byte)((u >> 24) & 0xFF)); +//// } + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static int ReadInt32LE(ReadOnlySpan src, ref int pos) +//// { +//// if ((uint)(pos + 4) > (uint)src.Length) +//// throw new ArgumentException("Buffer underflow while reading raw Int32 payload."); + +//// uint u = +//// (uint)src[pos] +//// | ((uint)src[pos + 1] << 8) +//// | ((uint)src[pos + 2] << 16) +//// | ((uint)src[pos + 3] << 24); + +//// pos += 4; +//// return unchecked((int)u); +//// } + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static void WriteVarUInt32(List dst, uint value) +//// { +//// while (value >= 0x80) +//// { +//// dst.Add((byte)((value & 0x7F) | 0x80)); +//// value >>= 7; +//// } + +//// dst.Add((byte)value); +//// } + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) +//// { +//// uint result = 0; +//// int shift = 0; + +//// while (true) +//// { +//// if (pos >= src.Length) +//// throw new ArgumentException("Buffer underflow while reading varint."); + +//// byte b = src[pos++]; +//// result |= (uint)(b & 0x7F) << shift; + +//// if ((b & 0x80) == 0) +//// return result; + +//// shift += 7; +//// if (shift >= 35) +//// throw new ArgumentException("Varint is too long for UInt32."); +//// } +//// } +////} + + +//using System; +//using System.Collections.Generic; +//using System.Runtime.CompilerServices; + +//namespace Esiur.Data.Gvwie; + +//public static class GroupInt32Codec +//{ +// private const byte RawInt32RunHeader = 0xFF; + +// // ----------------- Encoder ----------------- +// public static byte[] Encode(IList values) +// { +// var dst = new List(values.Count * 2); +// int i = 0; + +// while (i < values.Count) +// { +// uint zz = ZigZag32(values[i]); + +// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits +// if (zz <= 0x7Fu) +// { +// dst.Add((byte)zz); +// i++; +// continue; +// } + +// int start = i; +// int width = WidthFromZigZag(zz); + +// // Detect long full-width run and emit raw Int32 block instead of grouped width=4 +// if (width == 4) +// { +// int runCount = 1; + +// while ((i + runCount) < values.Count) +// { +// uint z2 = ZigZag32(values[i + runCount]); + +// // keep literals separate +// if (z2 <= 0x7Fu) +// break; + +// if (WidthFromZigZag(z2) != 4) +// break; + +// runCount++; +// } + +// // Threshold can be tuned; 33+ is a good starting point +// if (runCount >= 33) +// { +// dst.Add(RawInt32RunHeader); +// WriteVarUInt32(dst, (uint)runCount); + +// for (int k = 0; k < runCount; k++) +// WriteInt32LE(dst, values[start + k]); + +// i += runCount; +// continue; +// } +// } + +// // Normal group: up to 32 items sharing the same width (1..4 bytes) +// int count = 1; + +// while (count < 32 && (i + count) < values.Count) +// { +// uint z2 = ZigZag32(values[i + count]); + +// // do not absorb literal-fast-path values into groups +// if (z2 <= 0x7Fu) +// break; + +// int w2 = WidthFromZigZag(z2); +// if (w2 != width) +// break; + +// count++; +// } + +// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] +// byte header = 0x80; +// header |= (byte)(((count - 1) & 0x1F) << 2); +// header |= (byte)((width - 1) & 0x03); +// dst.Add(header); + +// for (int k = 0; k < count; k++) +// WriteLE(dst, ZigZag32(values[start + k]), width); + +// i += count; +// } + +// return dst.ToArray(); +// } + +// // ----------------- Decoder ----------------- +// public static int[] Decode(ReadOnlySpan src) +// { +// var result = new List(); +// int pos = 0; + +// while (pos < src.Length) +// { +// byte h = src[pos++]; + +// if ((h & 0x80) == 0) +// { +// // Fast path: 7-bit ZigZag in low bits +// uint zz7 = (uint)(h & 0x7F); +// result.Add(UnZigZag32(zz7)); +// continue; +// } + +// // Raw fixed-width Int32 run +// if (h == RawInt32RunHeader) +// { +// uint countU = ReadVarUInt32(src, ref pos); +// int count = checked((int)countU); + +// for (int j = 0; j < count; j++) +// result.Add(ReadInt32LE(src, ref pos)); + +// continue; +// } + +// int countNormal = ((h >> 2) & 0x1F) + 1; // 1..32 +// int width = (h & 0x03) + 1; // 1..4 + +// for (int j = 0; j < countNormal; j++) +// { +// uint raw = (uint)ReadLE(src, ref pos, width); +// result.Add(UnZigZag32(raw)); +// } +// } + +// return result.ToArray(); +// } + +// // ----------------- Helpers ----------------- + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static int WidthFromZigZag(uint z) +// { +// if (z <= 0xFFu) return 1; +// if (z <= 0xFFFFu) return 2; +// if (z <= 0xFFFFFFu) return 3; +// return 4; +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static void WriteLE(List dst, uint value, int width) +// { +// for (int i = 0; i < width; i++) +// dst.Add((byte)((value >> (8 * i)) & 0xFF)); +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) +// { +// if ((uint)(pos + width) > (uint)src.Length) +// throw new ArgumentException("Buffer underflow while reading group payload."); + +// ulong v = 0; +// for (int i = 0; i < width; i++) +// v |= (ulong)src[pos++] << (8 * i); +// return v; +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static void WriteInt32LE(List dst, int value) +// { +// uint u = unchecked((uint)value); +// dst.Add((byte)(u & 0xFF)); +// dst.Add((byte)((u >> 8) & 0xFF)); +// dst.Add((byte)((u >> 16) & 0xFF)); +// dst.Add((byte)((u >> 24) & 0xFF)); +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static int ReadInt32LE(ReadOnlySpan src, ref int pos) +// { +// if ((uint)(pos + 4) > (uint)src.Length) +// throw new ArgumentException("Buffer underflow while reading raw Int32 payload."); + +// uint u = +// (uint)src[pos] +// | ((uint)src[pos + 1] << 8) +// | ((uint)src[pos + 2] << 16) +// | ((uint)src[pos + 3] << 24); + +// pos += 4; +// return unchecked((int)u); +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static void WriteVarUInt32(List dst, uint value) +// { +// while (value >= 0x80) +// { +// dst.Add((byte)((value & 0x7F) | 0x80)); +// value >>= 7; +// } + +// dst.Add((byte)value); +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) +// { +// uint result = 0; +// int shift = 0; + +// while (true) +// { +// if (pos >= src.Length) +// throw new ArgumentException("Buffer underflow while reading varint."); + +// byte b = src[pos++]; +// result |= (uint)(b & 0x7F) << shift; + +// if ((b & 0x80) == 0) +// return result; + +// shift += 7; +// if (shift >= 35) +// throw new ArgumentException("Varint is too long for UInt32."); +// } +// } +//} +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace Esiur.Data.Gvwie; + +public static class GroupInt32Codec +{ + private const byte RawInt32RunHeader = 0xFF; + + // ----------------- Encoder ----------------- + public static byte[] Encode(IList values) + { + var dst = new List(values.Count * 2); + int i = 0; + + while (i < values.Count) + { + uint zz = ZigZag32(values[i]); + + // Fast path: single byte (MSB=0) when zigzag fits in 7 bits + if (zz <= 0x7Fu) + { + dst.Add((byte)zz); + i++; + continue; + } + + int start = i; + int width = WidthFromZigZag(zz); + + // Raw fixed-width mode only for a consecutive width-4 run + if (width == 4) + { + int runCount = 1; + + while ((i + runCount) < values.Count) + { + uint z2 = ZigZag32(values[i + runCount]); + + // keep literals separate + if (z2 <= 0x7Fu) + break; + + if (WidthFromZigZag(z2) != 4) + break; + + runCount++; + } + + // Compare raw run vs grouped run for this exact width-4 span + int rawSize = 1 + VarUInt32Size((uint)runCount) + runCount * 4; + int groupedSize = EstimateWidth4GroupedSize(runCount); + + if (rawSize < groupedSize) + { + dst.Add(RawInt32RunHeader); + WriteVarUInt32(dst, (uint)runCount); + + for (int k = 0; k < runCount; k++) + WriteInt32LE(dst, values[start + k]); + + i += runCount; + continue; + } + } + + // Normal group: up to 32 items sharing the same width + int count = 1; + + // 0xFF is reserved for raw Int32 blocks, so width=4 groups max out at 31 + int maxGroupCount = (width == 4) ? 31 : 32; + + while (count < maxGroupCount && (i + count) < values.Count) + { + uint z2 = ZigZag32(values[i + count]); + + // keep literals separate + if (z2 <= 0x7Fu) + break; + + int w2 = WidthFromZigZag(z2); + if (w2 != width) + break; + + count++; + } + + // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] + byte header = 0x80; + header |= (byte)(((count - 1) & 0x1F) << 2); + header |= (byte)((width - 1) & 0x03); + dst.Add(header); + + for (int k = 0; k < count; k++) + WriteLE(dst, ZigZag32(values[start + k]), width); + + i += count; + } + + return dst.ToArray(); + } + + // ----------------- Decoder ----------------- + public static int[] Decode(ReadOnlySpan src) + { + var result = new List(); + int pos = 0; + + while (pos < src.Length) + { + byte h = src[pos++]; + + if ((h & 0x80) == 0) + { + uint zz7 = (uint)(h & 0x7F); + result.Add(UnZigZag32(zz7)); + continue; + } + + if (h == RawInt32RunHeader) + { + uint countU = ReadVarUInt32(src, ref pos); + int count = checked((int)countU); + + for (int j = 0; j < count; j++) + result.Add(ReadInt32LE(src, ref pos)); + + continue; + } + + int countNormal = ((h >> 2) & 0x1F) + 1; + int width = (h & 0x03) + 1; + + for (int j = 0; j < countNormal; j++) + { + uint raw = (uint)ReadLE(src, ref pos, width); + result.Add(UnZigZag32(raw)); + } + } + + return result.ToArray(); + } + + // ----------------- Size helpers ----------------- + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int EstimateWidth4GroupedSize(int count) + { + // width=4 groups use max 31 items each because 0xFF is reserved + int groups = count / 31; + if ((count % 31) != 0) + groups++; + + return groups + count * 4; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int VarUInt32Size(uint value) + { + int size = 1; + while (value >= 0x80) + { + value >>= 7; + size++; + } + return size; + } + + // ----------------- Helpers ----------------- + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int WidthFromZigZag(uint z) + { + if (z <= 0xFFu) return 1; + if (z <= 0xFFFFu) return 2; + if (z <= 0xFFFFFFu) return 3; + return 4; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void WriteLE(List dst, uint value, int width) + { + for (int i = 0; i < width; i++) + dst.Add((byte)((value >> (8 * i)) & 0xFF)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) + { + if ((uint)(pos + width) > (uint)src.Length) + throw new ArgumentException("Buffer underflow while reading group payload."); + + ulong v = 0; + for (int i = 0; i < width; i++) + v |= (ulong)src[pos++] << (8 * i); + return v; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void WriteInt32LE(List dst, int value) + { + uint u = unchecked((uint)value); + dst.Add((byte)(u & 0xFF)); + dst.Add((byte)((u >> 8) & 0xFF)); + dst.Add((byte)((u >> 16) & 0xFF)); + dst.Add((byte)((u >> 24) & 0xFF)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int ReadInt32LE(ReadOnlySpan src, ref int pos) + { + if ((uint)(pos + 4) > (uint)src.Length) + throw new ArgumentException("Buffer underflow while reading raw Int32 payload."); + + uint u = + (uint)src[pos] + | ((uint)src[pos + 1] << 8) + | ((uint)src[pos + 2] << 16) + | ((uint)src[pos + 3] << 24); + + pos += 4; + return unchecked((int)u); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void WriteVarUInt32(List dst, uint value) + { + while (value >= 0x80) + { + dst.Add((byte)((value & 0x7F) | 0x80)); + value >>= 7; + } + + dst.Add((byte)value); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) + { + uint result = 0; + int shift = 0; + + while (true) + { + if (pos >= src.Length) + throw new ArgumentException("Buffer underflow while reading varint."); + + byte b = src[pos++]; + result |= (uint)(b & 0x7F) << shift; + + if ((b & 0x80) == 0) + return result; + + shift += 7; + if (shift >= 35) + throw new ArgumentException("Varint is too long for UInt32."); + } + } +} \ No newline at end of file diff --git a/Libraries/Esiur/Data/Gvwie/GroupInt32Codec.cs b/Libraries/Esiur/Data/Gvwie/GroupInt32Codec.cs index 809dcf9..479b34e 100644 --- a/Libraries/Esiur/Data/Gvwie/GroupInt32Codec.cs +++ b/Libraries/Esiur/Data/Gvwie/GroupInt32Codec.cs @@ -1,25 +1,413 @@ -using System; +////using System; +////using System.Collections.Generic; +////using System.Linq; +////using System.Text; +////using System.Threading.Tasks; +////using System.Runtime.CompilerServices; +////using System.Collections; + +////namespace Esiur.Data.Gvwie; + +////public static class GroupInt32Codec +////{ +//// // ----------------- Encoder ----------------- +//// public static byte[] Encode(IList values) +//// { +//// //var values = value as int[]; + +//// var dst = new List(values.Count * 2); +//// int i = 0; + +//// while (i < values.Count) +//// { +//// uint zz = ZigZag32(values[i]); + +//// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits +//// if (zz <= 0x7Fu) +//// { +//// dst.Add((byte)zz); +//// i++; +//// continue; +//// } + +//// // Group: up to 32 items sharing a common width (1..4 bytes) +//// int start = i; +//// int count = 1; +//// int width = WidthFromZigZag(zz); + +//// while (count < 32 && (i + count) < values.Count) +//// { +//// uint z2 = ZigZag32(values[i + count]); +//// int w2 = WidthFromZigZag(z2); +//// width = Math.Max(width, w2); // widen as needed +//// count++; +//// } + +//// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] +//// byte header = 0x80; +//// header |= (byte)(((count - 1) & 0x1F) << 2); +//// header |= (byte)((width - 1) & 0x03); +//// dst.Add(header); + +//// // Payload: 'count' zigzag values, LE, 'width' bytes each +//// for (int k = 0; k < count; k++) +//// WriteLE(dst, ZigZag32(values[start + k]), width); + +//// i += count; +//// } + +//// return dst.ToArray(); +//// } + +//// // ----------------- Decoder ----------------- +//// public static int[] Decode(ReadOnlySpan src) +//// { +//// var result = new List(); +//// int pos = 0; + +//// while (pos < src.Length) +//// { +//// byte h = src[pos++]; + +//// if ((h & 0x80) == 0) +//// { +//// // Fast path: 7-bit ZigZag in low bits +//// uint zz7 = (uint)(h & 0x7F); +//// result.Add(UnZigZag32(zz7)); +//// continue; +//// } + +//// int count = ((h >> 2) & 0x1F) + 1; // 1..32 +//// int width = (h & 0x03) + 1; // 1..4 + +//// for (int j = 0; j < count; j++) +//// { +//// uint raw = (uint)ReadLE(src, ref pos, width); +//// int val = UnZigZag32(raw); +//// result.Add(val); +//// } +//// } + +//// return result.ToArray(); +//// } + +//// // ----------------- Helpers ----------------- + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static int WidthFromZigZag(uint z) +//// { +//// if (z <= 0xFFu) return 1; +//// if (z <= 0xFFFFu) return 2; +//// if (z <= 0xFFFFFFu) return 3; +//// return 4; +//// } + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static void WriteLE(List dst, uint value, int width) +//// { +//// for (int i = 0; i < width; i++) +//// dst.Add((byte)((value >> (8 * i)) & 0xFF)); +//// } + +//// [MethodImpl(MethodImplOptions.AggressiveInlining)] +//// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) +//// { +//// if ((uint)(pos + width) > (uint)src.Length) +//// throw new ArgumentException("Buffer underflow while reading group payload."); + +//// ulong v = 0; +//// for (int i = 0; i < width; i++) +//// v |= (ulong)src[pos++] << (8 * i); +//// return v; +//// } +////} +//using System; +//using System.Collections.Generic; +//using System.Runtime.CompilerServices; + +//namespace Esiur.Data.Gvwie; + +//public static class GroupInt32Codec +//{ +// private const byte RawInt32RunHeader = 0xFF; + +// // ----------------- Encoder ----------------- +// public static byte[] Encode(IList values) +// { +// var dst = new List(values.Count * 2); +// int i = 0; + +// while (i < values.Count) +// { +// uint zz = ZigZag32(values[i]); + +// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits +// if (zz <= 0x7Fu) +// { +// dst.Add((byte)zz); +// i++; +// continue; +// } + +// int start = i; +// int width = WidthFromZigZag(zz); + +// // Detect long full-width run and emit raw Int32 block instead of grouped width=4 +// if (width == 4) +// { +// int runCount = 1; + +// while ((i + runCount) < values.Count) +// { +// uint z2 = ZigZag32(values[i + runCount]); + +// // keep literals separate +// if (z2 <= 0x7Fu) +// break; + +// if (WidthFromZigZag(z2) != 4) +// break; + +// runCount++; +// } + +// // Threshold can be tuned; 33+ is a good starting point +// if (runCount >= 33) +// { +// dst.Add(RawInt32RunHeader); +// WriteVarUInt32(dst, (uint)runCount); + +// for (int k = 0; k < runCount; k++) +// WriteInt32LE(dst, values[start + k]); + +// i += runCount; +// continue; +// } +// } + +// // Normal group: up to 32 items sharing the same width (1..4 bytes) +// int count = 1; + +// while (count < 32 && (i + count) < values.Count) +// { +// uint z2 = ZigZag32(values[i + count]); + +// // do not absorb literal-fast-path values into groups +// if (z2 <= 0x7Fu) +// break; + +// int w2 = WidthFromZigZag(z2); +// if (w2 != width) +// break; + +// count++; +// } + +// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] +// byte header = 0x80; +// header |= (byte)(((count - 1) & 0x1F) << 2); +// header |= (byte)((width - 1) & 0x03); +// dst.Add(header); + +// for (int k = 0; k < count; k++) +// WriteLE(dst, ZigZag32(values[start + k]), width); + +// i += count; +// } + +// return dst.ToArray(); +// } + +// // ----------------- Decoder ----------------- +// public static int[] Decode(ReadOnlySpan src) +// { +// var result = new List(); +// int pos = 0; + +// while (pos < src.Length) +// { +// byte h = src[pos++]; + +// if ((h & 0x80) == 0) +// { +// // Fast path: 7-bit ZigZag in low bits +// uint zz7 = (uint)(h & 0x7F); +// result.Add(UnZigZag32(zz7)); +// continue; +// } + +// // Raw fixed-width Int32 run +// if (h == RawInt32RunHeader) +// { +// uint countU = ReadVarUInt32(src, ref pos); +// int count = checked((int)countU); + +// for (int j = 0; j < count; j++) +// result.Add(ReadInt32LE(src, ref pos)); + +// continue; +// } + +// int countNormal = ((h >> 2) & 0x1F) + 1; // 1..32 +// int width = (h & 0x03) + 1; // 1..4 + +// for (int j = 0; j < countNormal; j++) +// { +// uint raw = (uint)ReadLE(src, ref pos, width); +// result.Add(UnZigZag32(raw)); +// } +// } + +// return result.ToArray(); +// } + +// // ----------------- Helpers ----------------- + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static int WidthFromZigZag(uint z) +// { +// if (z <= 0xFFu) return 1; +// if (z <= 0xFFFFu) return 2; +// if (z <= 0xFFFFFFu) return 3; +// return 4; +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static void WriteLE(List dst, uint value, int width) +// { +// for (int i = 0; i < width; i++) +// dst.Add((byte)((value >> (8 * i)) & 0xFF)); +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) +// { +// if ((uint)(pos + width) > (uint)src.Length) +// throw new ArgumentException("Buffer underflow while reading group payload."); + +// ulong v = 0; +// for (int i = 0; i < width; i++) +// v |= (ulong)src[pos++] << (8 * i); +// return v; +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static void WriteInt32LE(List dst, int value) +// { +// uint u = unchecked((uint)value); +// dst.Add((byte)(u & 0xFF)); +// dst.Add((byte)((u >> 8) & 0xFF)); +// dst.Add((byte)((u >> 16) & 0xFF)); +// dst.Add((byte)((u >> 24) & 0xFF)); +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static int ReadInt32LE(ReadOnlySpan src, ref int pos) +// { +// if ((uint)(pos + 4) > (uint)src.Length) +// throw new ArgumentException("Buffer underflow while reading raw Int32 payload."); + +// uint u = +// (uint)src[pos] +// | ((uint)src[pos + 1] << 8) +// | ((uint)src[pos + 2] << 16) +// | ((uint)src[pos + 3] << 24); + +// pos += 4; +// return unchecked((int)u); +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static void WriteVarUInt32(List dst, uint value) +// { +// while (value >= 0x80) +// { +// dst.Add((byte)((value & 0x7F) | 0x80)); +// value >>= 7; +// } + +// dst.Add((byte)value); +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) +// { +// uint result = 0; +// int shift = 0; + +// while (true) +// { +// if (pos >= src.Length) +// throw new ArgumentException("Buffer underflow while reading varint."); + +// byte b = src[pos++]; +// result |= (uint)(b & 0x7F) << shift; + +// if ((b & 0x80) == 0) +// return result; + +// shift += 7; +// if (shift >= 35) +// throw new ArgumentException("Varint is too long for UInt32."); +// } +// } +//} + + +using System; using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; using System.Runtime.CompilerServices; -using System.Collections; namespace Esiur.Data.Gvwie; public static class GroupInt32Codec { + private const byte RawInt32RunHeader = 0xFF; + private const int RawDecisionWindow = 256; + // ----------------- Encoder ----------------- public static byte[] Encode(IList values) { - //var values = value as int[]; - var dst = new List(values.Count * 2); int i = 0; while (i < values.Count) { + int remaining = values.Count - i; + + // Adaptive raw block decision on a bounded window + if (remaining >= 32) + { + int candidateCount = Math.Min(RawDecisionWindow, remaining); + + int rawSize = 1 + VarUInt32Size((uint)candidateCount) + candidateCount * 4; + int groupedSize = EstimateGroupedSize(values, i, candidateCount); + + if (rawSize < groupedSize) + { + dst.Add(RawInt32RunHeader); + WriteVarUInt32(dst, (uint)candidateCount); + + for (int k = 0; k < candidateCount; k++) + WriteInt32LE(dst, values[i + k]); + + i += candidateCount; + continue; + } + } + uint zz = ZigZag32(values[i]); // Fast path: single byte (MSB=0) when zigzag fits in 7 bits @@ -30,16 +418,25 @@ public static class GroupInt32Codec continue; } - // Group: up to 32 items sharing a common width (1..4 bytes) int start = i; - int count = 1; int width = WidthFromZigZag(zz); + int count = 1; - while (count < 32 && (i + count) < values.Count) + // 0xFF is reserved for raw Int32 blocks, so width=4 groups max out at 31 + int maxGroupCount = (width == 4) ? 31 : 32; + + while (count < maxGroupCount && (i + count) < values.Count) { uint z2 = ZigZag32(values[i + count]); + + // keep literals separate + if (z2 <= 0x7Fu) + break; + int w2 = WidthFromZigZag(z2); - width = Math.Max(width, w2); // widen as needed + if (w2 != width) + break; + count++; } @@ -49,7 +446,6 @@ public static class GroupInt32Codec header |= (byte)((width - 1) & 0x03); dst.Add(header); - // Payload: 'count' zigzag values, LE, 'width' bytes each for (int k = 0; k < count; k++) WriteLE(dst, ZigZag32(values[start + k]), width); @@ -77,20 +473,87 @@ public static class GroupInt32Codec continue; } - int count = ((h >> 2) & 0x1F) + 1; // 1..32 - int width = (h & 0x03) + 1; // 1..4 + // Raw fixed-width Int32 run + if (h == RawInt32RunHeader) + { + uint countU = ReadVarUInt32(src, ref pos); + int count = checked((int)countU); - for (int j = 0; j < count; j++) + for (int j = 0; j < count; j++) + result.Add(ReadInt32LE(src, ref pos)); + + continue; + } + + int countNormal = ((h >> 2) & 0x1F) + 1; // 1..32 + int width = (h & 0x03) + 1; // 1..4 + + for (int j = 0; j < countNormal; j++) { uint raw = (uint)ReadLE(src, ref pos, width); - int val = UnZigZag32(raw); - result.Add(val); + result.Add(UnZigZag32(raw)); } } return result.ToArray(); } + // ----------------- Size Estimation ----------------- + + private static int EstimateGroupedSize(IList values, int start, int count) + { + int size = 0; + int i = start; + int end = start + count; + + while (i < end) + { + uint zz = ZigZag32(values[i]); + + if (zz <= 0x7Fu) + { + size += 1; + i++; + continue; + } + + int width = WidthFromZigZag(zz); + int groupCount = 1; + int maxGroupCount = (width == 4) ? 31 : 32; + + while (groupCount < maxGroupCount && (i + groupCount) < end) + { + uint z2 = ZigZag32(values[i + groupCount]); + + if (z2 <= 0x7Fu) + break; + + int w2 = WidthFromZigZag(z2); + if (w2 != width) + break; + + groupCount++; + } + + size += 1 + groupCount * width; + i += groupCount; + } + + return size; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int VarUInt32Size(uint value) + { + int size = 1; + while (value >= 0x80) + { + value >>= 7; + size++; + } + return size; + } + // ----------------- Helpers ----------------- [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -126,4 +589,65 @@ public static class GroupInt32Codec v |= (ulong)src[pos++] << (8 * i); return v; } -} + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void WriteInt32LE(List dst, int value) + { + uint u = unchecked((uint)value); + dst.Add((byte)(u & 0xFF)); + dst.Add((byte)((u >> 8) & 0xFF)); + dst.Add((byte)((u >> 16) & 0xFF)); + dst.Add((byte)((u >> 24) & 0xFF)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int ReadInt32LE(ReadOnlySpan src, ref int pos) + { + if ((uint)(pos + 4) > (uint)src.Length) + throw new ArgumentException("Buffer underflow while reading raw Int32 payload."); + + uint u = + (uint)src[pos] + | ((uint)src[pos + 1] << 8) + | ((uint)src[pos + 2] << 16) + | ((uint)src[pos + 3] << 24); + + pos += 4; + return unchecked((int)u); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void WriteVarUInt32(List dst, uint value) + { + while (value >= 0x80) + { + dst.Add((byte)((value & 0x7F) | 0x80)); + value >>= 7; + } + + dst.Add((byte)value); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) + { + uint result = 0; + int shift = 0; + + while (true) + { + if (pos >= src.Length) + throw new ArgumentException("Buffer underflow while reading varint."); + + byte b = src[pos++]; + result |= (uint)(b & 0x7F) << shift; + + if ((b & 0x80) == 0) + return result; + + shift += 7; + if (shift >= 35) + throw new ArgumentException("Varint is too long for UInt32."); + } + } +} \ No newline at end of file diff --git a/Libraries/Esiur/Esiur.csproj b/Libraries/Esiur/Esiur.csproj index 41d23b6..3cff5d8 100644 --- a/Libraries/Esiur/Esiur.csproj +++ b/Libraries/Esiur/Esiur.csproj @@ -59,6 +59,7 @@ + @@ -73,6 +74,7 @@ + diff --git a/Tests/Serialization/Gvwie/GeneratorPattern.cs b/Tests/Serialization/Gvwie/GeneratorPattern.cs index d365e6d..7134da3 100644 --- a/Tests/Serialization/Gvwie/GeneratorPattern.cs +++ b/Tests/Serialization/Gvwie/GeneratorPattern.cs @@ -12,5 +12,6 @@ namespace Esiur.Tests.Gvwie Alternating, Small, Ascending, + Clustering, } } diff --git a/Tests/Serialization/Gvwie/IntArrayGenerator.cs b/Tests/Serialization/Gvwie/IntArrayGenerator.cs index 6f1899b..04de2a4 100644 --- a/Tests/Serialization/Gvwie/IntArrayGenerator.cs +++ b/Tests/Serialization/Gvwie/IntArrayGenerator.cs @@ -120,8 +120,7 @@ public static class IntArrayGenerator // Generate random int array of given length and distribution - public static int[] GenerateInt32(int length, GeneratorPattern pattern = GeneratorPattern.Uniform, - int range = int.MaxValue) + public static int[] GenerateInt32(int length, GeneratorPattern pattern = GeneratorPattern.Uniform) { var data = new int[length]; @@ -130,23 +129,23 @@ public static class IntArrayGenerator case GeneratorPattern.Uniform: // Random values in [-range, range] for (int i = 0; i < length; i++) - data[i] = rng.Next(-range, range); + data[i] = rng.Next(int.MinValue, int.MaxValue); break; case GeneratorPattern.Positive: for (int i = 0; i < length; i++) - data[i] = rng.Next(0, range); + data[i] = rng.Next(0, int.MaxValue); break; case GeneratorPattern.Negative: for (int i = 0; i < length; i++) - data[i] = -rng.Next(0, range); + data[i] = -rng.Next(int.MinValue, 0); break; case GeneratorPattern.Alternating: for (int i = 0; i < length; i++) { - int val = rng.Next(0, range); + int val = rng.Next(0, int.MaxValue); data[i] = (i % 2 == 0) ? val : -val; } break; @@ -160,12 +159,27 @@ public static class IntArrayGenerator case GeneratorPattern.Ascending: { - int start = rng.Next(-range, range); + int start = rng.Next(int.MinValue, int.MaxValue); for (int i = 0; i < length; i++) data[i] = start + i; } break; + case GeneratorPattern.Clustering: + { + // Build ascending runs and cast to int, clamping to int bounds + var runs = GenerateRuns(length, 3, 50, ((long)int.MinValue), (long)int.MaxValue, true); + for (int i = 0; i < length; i++) + { + long v = runs[i]; + if (v > int.MaxValue) data[i] = int.MaxValue; + else if (v < int.MinValue) data[i] = int.MinValue; + else data[i] = (int)v; + } + } + break; + + default: throw new ArgumentException($"Unknown pattern: {pattern}"); } @@ -175,35 +189,48 @@ public static class IntArrayGenerator // Generate random int array of given length and distribution - public static uint[] GenerateUInt32(int length, string pattern = "uniform", + public static uint[] GenerateUInt32(int length, GeneratorPattern pattern = GeneratorPattern.Uniform, uint range = uint.MaxValue) { var data = new uint[length]; - switch (pattern.ToLower()) + switch (pattern) { - case "uniform": + case GeneratorPattern.Uniform: // Random values in [-range, range] for (int i = 0; i < length; i++) data[i] = (uint)rng.NextInt64(0, (long)range); break; - case "small": + case GeneratorPattern.Small: // Focused on small magnitudes to test ZigZag fast path for (int i = 0; i < length; i++) data[i] = (uint)rng.Next(0, 127); break; - - case "ascending": + case GeneratorPattern.Clustering: { - uint start = (uint)rng.NextInt64(0, (long)range); - for (uint i = 0; i < length; i++) - data[i] = start + i; + // Generate runs in a non-negative range and cast to uint + var runs = GenerateRuns(length, 3, 50, 0, (long)range, false); + for (int i = 0; i < length; i++) + { + long v = runs[i]; + if (v < 0) data[i] = 0u; + else if ((ulong)v > uint.MaxValue) data[i] = uint.MaxValue; + else data[i] = (uint)v; + } } break; + + case GeneratorPattern.Ascending: + uint start = (uint)rng.NextInt64(0, (long)range); + for (uint i = 0; i < length; i++) + data[i] = start + i; + + break; + default: throw new ArgumentException($"Unknown pattern: {pattern}"); } @@ -212,30 +239,42 @@ public static class IntArrayGenerator } // Generate random int array of given length and distribution - public static ulong[] GenerateUInt64(int length, string pattern = "uniform") + public static ulong[] GenerateUInt64(int length, GeneratorPattern pattern = GeneratorPattern.Uniform) { var data = new ulong[length]; - switch (pattern.ToLower()) + switch (pattern) { - case "uniform": + case GeneratorPattern.Uniform: // Random values in [-range, range] for (int i = 0; i < length; i++) data[i] = (ulong)rng.NextInt64(); break; - case "small": + case GeneratorPattern.Small: // Focused on small magnitudes to test ZigZag fast path for (int i = 0; i < length; i++) data[i] = (uint)rng.Next(0, 127); break; - case "ascending": + case GeneratorPattern.Ascending: + + uint start = (uint)rng.NextInt64(); + for (uint i = 0; i < length; i++) + data[i] = start + i; + + break; + + case GeneratorPattern.Clustering: { - uint start = (uint)rng.NextInt64(); - for (uint i = 0; i < length; i++) - data[i] = start + i; + var runs = GenerateRuns(length, 3, 50, 0, long.MaxValue, false); + for (int i = 0; i < length; i++) + { + long v = runs[i]; + if (v < 0) data[i] = 0UL; + else data[i] = (ulong)v; + } } break; @@ -246,31 +285,43 @@ public static class IntArrayGenerator return data; } - public static uint[] GenerateUInt16(int length, string pattern = "uniform", - ushort range = ushort.MaxValue) + public static uint[] GenerateUInt16(int length, GeneratorPattern pattern = GeneratorPattern.Uniform) { var data = new uint[length]; - switch (pattern.ToLower()) + switch (pattern) { - case "uniform": + case GeneratorPattern.Uniform: // Random values in [-range, range] for (int i = 0; i < length; i++) - data[i] = (ushort)rng.Next(0, range); + data[i] = (ushort)rng.Next(0, ushort.MaxValue); break; - case "small": + case GeneratorPattern.Small: // Focused on small magnitudes to test ZigZag fast path for (int i = 0; i < length; i++) data[i] = (uint)rng.Next(0, 127); break; - case "ascending": + case GeneratorPattern.Ascending: + + var start = (ushort)rng.Next(0, ushort.MaxValue); + for (uint i = 0; i < length; i++) + data[i] = start + i; + + break; + + case GeneratorPattern.Clustering: { - var start = (ushort)rng.Next(0, range); - for (uint i = 0; i < length; i++) - data[i] = start + i; + var runs = GenerateRuns(length, 3, 50, 0, ushort.MaxValue, false); + for (int i = 0; i < length; i++) + { + long v = runs[i]; + if (v < 0) data[i] = 0u; + else if (v > ushort.MaxValue) data[i] = ushort.MaxValue; + else data[i] = (uint)v; + } } break; @@ -282,7 +333,7 @@ public static class IntArrayGenerator } // Generate random int array of given length and distribution - public static long[] GenerateInt64(int length, GeneratorPattern pattern = GeneratorPattern.Uniform, + public static long[] GenerateInt64(int length, GeneratorPattern pattern = GeneratorPattern.Uniform, long range = long.MaxValue) { var data = new long[length]; @@ -328,6 +379,14 @@ public static class IntArrayGenerator } break; + case GeneratorPattern.Clustering: + { + var runs = GenerateRuns(length, 3, 50, -range, range, true); + for (int i = 0; i < length; i++) + data[i] = runs[i]; + } + break; + default: throw new ArgumentException($"Unknown pattern: {pattern}"); } @@ -335,7 +394,7 @@ public static class IntArrayGenerator return data; } - public static short[] GenerateInt16(int length, GeneratorPattern pattern = GeneratorPattern.Uniform, + public static short[] GenerateInt16(int length, GeneratorPattern pattern = GeneratorPattern.Uniform, short range = short.MaxValue) { var data = new short[length]; @@ -379,6 +438,19 @@ public static class IntArrayGenerator } break; + case GeneratorPattern.Clustering: + { + var runs = GenerateRuns(length, 3, 50, -range, range, true); + for (int i = 0; i < length; i++) + { + long v = runs[i]; + if (v > short.MaxValue) data[i] = short.MaxValue; + else if (v < short.MinValue) data[i] = short.MinValue; + else data[i] = (short)v; + } + } + break; + default: throw new ArgumentException($"Unknown pattern: {pattern}"); } diff --git a/Tests/Serialization/Gvwie/IntArrayRunner.cs b/Tests/Serialization/Gvwie/IntArrayRunner.cs index 57e62d2..522aeae 100644 --- a/Tests/Serialization/Gvwie/IntArrayRunner.cs +++ b/Tests/Serialization/Gvwie/IntArrayRunner.cs @@ -30,14 +30,14 @@ namespace Esiur.Tests.Gvwie const int TEST_ITERATIONS = 100; const int SAMPLE_SIZE = 100; - Console.WriteLine(";Esiur;FlatBuffer;ProtoBuffer;MessagePack;BSON;CBOR;Avro,Optimal"); + Console.WriteLine(";Esiur;FlatBuffer;ProtoBuffer,MessagePack;BSON;CBOR;Avro,Optimal"); Console.Write("Cluster (Int32);"); PrintAverage( - Average(() => CompareInt(IntArrayGenerator.GenerateRuns(SAMPLE_SIZE)), TEST_ITERATIONS) + Average(() => CompareInt(IntArrayGenerator.GenerateInt32(SAMPLE_SIZE, GeneratorPattern.Clustering)), TEST_ITERATIONS) ); Console.Write("Positive (Int32);"); @@ -111,61 +111,75 @@ namespace Esiur.Tests.Gvwie // Produces a CSV with header: SampleSize;Esiur;FlatBuffer;ProtoBuffer;MessagePack;BSON;CBOR;Avro;Optimal public void RunChart() { - var sizes = new int[] { 10, 100, 1000, 10000, 100000 }; + var sizes = Enumerable.Range(12, 21) + .Select(i => (int)Math.Pow(2, i)) + .ToArray(); + // Define generators to evaluate. Each entry maps a name to a function that // given a sample size returns the averages (double[]) by calling Average(...). var generators = new List<(string name, Func fn)>() { - ("GenerateRuns", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateRuns(size)), iterations)), - ("Int32_Uniform", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Uniform)), iterations)), + ("Int32_Positive", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Positive)), iterations)), + + ("Int32_Clustering", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Clustering)), iterations)), ("Int32_Negative", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Negative)), iterations)), ("Int32_Small", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Small)), iterations)), ("Int32_Alternating", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Alternating)), iterations)), ("Int32_Ascending", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Ascending)), iterations)), - ("Int64", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt64(size)), iterations)), - ("Int32_Default", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size)), iterations)), - ("Int16", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt16(size)), iterations)), - ("UInt64", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateUInt64(size)), iterations)), - ("UInt32", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateUInt32(size)), iterations)), - ("UInt16", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateUInt16(size)), iterations)), + //("Int64", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt64(size)), iterations)), + //("Int32", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size)), iterations)), + //("Int16", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt16(size)), iterations)), + //("UInt64", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateUInt64(size)), iterations)), + //("UInt32", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateUInt32(size)), iterations)), + //("UInt16", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateUInt16(size)), iterations)), }; foreach (var gen in generators) { var sb = new System.Text.StringBuilder(); - sb.AppendLine("SampleSize,Esiur,FlatBufferProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal"); + var sbr = new System.Text.StringBuilder(); + + sb.AppendLine("SampleSize,Esiur,FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal"); + sbr.AppendLine("SampleSize,Esiur,FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal"); foreach (var size in sizes) { // Choose iterations depending on size to keep total runtime reasonable - int iterations; - if (size <= 100) iterations = 1000; - else if (size <= 1000) iterations = 200; - else if (size <= 10000) iterations = 50; - else iterations = 10; + int iterations = 10; + //if (size <= 100) iterations = 1000; + //else if (size <= 1000) iterations = 200; + //else if (size <= 10000) iterations = 50; + //else iterations = 10; Console.WriteLine($"Running {gen.name} sample size={size}, iterations={iterations}..."); var averages = gen.fn(size, iterations); + PrintAverage(averages); sb.Append(size); + sbr.Append(size); for (int i = 0; i < averages.Length; i++) { sb.Append(','); sb.Append(Math.Round(averages[i])); + sbr.Append(','); + sbr.Append(((averages[i] - averages.Last()) / averages.Last()) * 100.0); } sb.AppendLine(); + sbr.AppendLine(); } var file = $"run_chart_{gen.name}.csv"; System.IO.File.WriteAllText(file, sb.ToString()); - Console.WriteLine($"Chart CSV written to: {file}"); + var file2 = $"optimal_chart_{gen.name}.csv"; + System.IO.File.WriteAllText(file, sbr.ToString()); + + Console.WriteLine($"Chart CSV written to: {file} {file2}"); } } - public static (int, int, int, int, int, int, int, int) CompareInt(long[] sample) { var intRoot = new ArrayRoot() { Values = sample }; diff --git a/Tests/Serialization/Gvwie/Program.cs b/Tests/Serialization/Gvwie/Program.cs index d342b36..89f8b89 100644 --- a/Tests/Serialization/Gvwie/Program.cs +++ b/Tests/Serialization/Gvwie/Program.cs @@ -10,6 +10,6 @@ MessagePack.MessagePackSerializer.DefaultOptions = MessagePackSerializerOptions. var ints = new IntArrayRunner(); -ints.Run(); +//ints.Run(); ints.RunChart();