diff --git a/Libraries/Esiur/Data/Gvwie/GVN.cs b/Libraries/Esiur/Data/Gvwie/GVN.cs deleted file mode 100644 index f22bef3..0000000 --- a/Libraries/Esiur/Data/Gvwie/GVN.cs +++ /dev/null @@ -1,1358 +0,0 @@ -//////////using System; -//////////using System.Collections.Generic; -//////////using System.Runtime.CompilerServices; - -//////////namespace Esiur.Data.Gvwie; - -//////////public static class GroupInt32Codec -//////////{ -////////// private const byte ExtendedRaw32Header = 0xFF; - -////////// // ----------------- Encoder ----------------- -////////// public static byte[] Encode(IList values) -////////// { -////////// var dst = new List(values.Count * 2); -////////// int i = 0; - -////////// while (i < values.Count) -////////// { -////////// uint zz = ZigZag32(values[i]); - -////////// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits -////////// if (zz <= 0x7Fu) -////////// { -////////// dst.Add((byte)zz); -////////// i++; -////////// continue; -////////// } - -////////// int start = i; -////////// int width = WidthFromZigZag(zz); - -////////// // Extended raw 32-bit run: -////////// // 0xFF + varint(count-1) + count * 4-byte LE zigzag payload -////////// if (width == 4) -////////// { -////////// int count = 1; - -////////// while ((i + count) < values.Count) -////////// { -////////// uint z2 = ZigZag32(values[i + count]); -////////// if (WidthFromZigZag(z2) != 4) -////////// break; - -////////// count++; -////////// } - -////////// dst.Add(ExtendedRaw32Header); -////////// WriteVarUInt32(dst, (uint)(count - 1)); - -////////// for (int k = 0; k < count; k++) -////////// WriteLE(dst, ZigZag32(values[start + k]), 4); - -////////// i += count; -////////// continue; -////////// } - -////////// // Normal group: up to 32 items sharing a common width (1..3 bytes) -////////// int countNormal = 1; - -////////// while (countNormal < 32 && (i + countNormal) < values.Count) -////////// { -////////// uint z2 = ZigZag32(values[i + countNormal]); -////////// int w2 = WidthFromZigZag(z2); - -////////// // Stop before 4-byte values so extended mode can take them -////////// if (w2 == 4) -////////// break; - -////////// width = Math.Max(width, w2); -////////// countNormal++; -////////// } - -////////// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] -////////// byte header = 0x80; -////////// header |= (byte)(((countNormal - 1) & 0x1F) << 2); -////////// header |= (byte)((width - 1) & 0x03); -////////// dst.Add(header); - -////////// for (int k = 0; k < countNormal; k++) -////////// WriteLE(dst, ZigZag32(values[start + k]), width); - -////////// i += countNormal; -////////// } - -////////// return dst.ToArray(); -////////// } - -////////// // ----------------- Decoder ----------------- -////////// public static int[] Decode(ReadOnlySpan src) -////////// { -////////// var result = new List(); -////////// int pos = 0; - -////////// while (pos < src.Length) -////////// { -////////// byte h = src[pos++]; - -////////// if ((h & 0x80) == 0) -////////// { -////////// uint zz7 = (uint)(h & 0x7F); -////////// result.Add(UnZigZag32(zz7)); -////////// continue; -////////// } - -////////// // Extended raw 32-bit run -////////// if (h == ExtendedRaw32Header) -////////// { -////////// uint countMinus1 = ReadVarUInt32(src, ref pos); -////////// int count = checked((int)countMinus1 + 1); - -////////// for (int j = 0; j < count; j++) -////////// { -////////// uint raw = (uint)ReadLE(src, ref pos, 4); -////////// result.Add(UnZigZag32(raw)); -////////// } - -////////// continue; -////////// } - -////////// int countNormal = ((h >> 2) & 0x1F) + 1; // 1..32 -////////// int width = (h & 0x03) + 1; // 1..4 (though encoder uses 1..3 here) - -////////// for (int j = 0; j < countNormal; j++) -////////// { -////////// uint raw = (uint)ReadLE(src, ref pos, width); -////////// result.Add(UnZigZag32(raw)); -////////// } -////////// } - -////////// return result.ToArray(); -////////// } - -////////// // ----------------- Helpers ----------------- - -////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////////// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); - -////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////////// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); - -////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////////// private static int WidthFromZigZag(uint z) -////////// { -////////// if (z <= 0xFFu) return 1; -////////// if (z <= 0xFFFFu) return 2; -////////// if (z <= 0xFFFFFFu) return 3; -////////// return 4; -////////// } - -////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////////// private static void WriteLE(List dst, uint value, int width) -////////// { -////////// for (int i = 0; i < width; i++) -////////// dst.Add((byte)((value >> (8 * i)) & 0xFF)); -////////// } - -////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////////// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) -////////// { -////////// if ((uint)(pos + width) > (uint)src.Length) -////////// throw new ArgumentException("Buffer underflow while reading group payload."); - -////////// ulong v = 0; -////////// for (int i = 0; i < width; i++) -////////// v |= (ulong)src[pos++] << (8 * i); -////////// return v; -////////// } - -////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////////// private static void WriteVarUInt32(List dst, uint value) -////////// { -////////// while (value >= 0x80) -////////// { -////////// dst.Add((byte)(value | 0x80)); -////////// value >>= 7; -////////// } - -////////// dst.Add((byte)value); -////////// } - -////////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////////// private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) -////////// { -////////// uint result = 0; -////////// int shift = 0; - -////////// while (true) -////////// { -////////// if (pos >= src.Length) -////////// throw new ArgumentException("Buffer underflow while reading varint."); - -////////// byte b = src[pos++]; -////////// result |= (uint)(b & 0x7F) << shift; - -////////// if ((b & 0x80) == 0) -////////// return result; - -////////// shift += 7; -////////// if (shift >= 35) -////////// throw new ArgumentException("Varint is too long for UInt32."); -////////// } -////////// } -//////////} - - - - -////////using System; -////////using System.Collections.Generic; -////////using System.Runtime.CompilerServices; - -////////namespace Esiur.Data.Gvwie; - -////////public static class GroupInt32Codec -////////{ -//////// // ----------------- Encoder ----------------- -//////// public static byte[] Encode(IList values) -//////// { -//////// var dst = new List(values.Count * 2); -//////// int i = 0; - -//////// while (i < values.Count) -//////// { -//////// uint zz = ZigZag32(values[i]); - -//////// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits -//////// if (zz <= 0x7Fu) -//////// { -//////// dst.Add((byte)zz); -//////// i++; -//////// continue; -//////// } - -//////// int start = i; -//////// int width = WidthFromZigZag(zz); -//////// int count = 1; - -//////// // Build a run of same-width non-literal values -//////// while ((i + count) < values.Count) -//////// { -//////// uint z2 = ZigZag32(values[i + count]); - -//////// // Do not absorb literal-fast-path values into groups -//////// if (z2 <= 0x7Fu) -//////// break; - -//////// int w2 = WidthFromZigZag(z2); -//////// if (w2 != width) -//////// break; - -//////// count++; -//////// } - -//////// if (count <= 31) -//////// { -//////// // Short group: -//////// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] -//////// byte header = 0x80; -//////// header |= (byte)(((count - 1) & 0x1F) << 2); -//////// header |= (byte)((width - 1) & 0x03); -//////// dst.Add(header); -//////// } -//////// else -//////// { -//////// // Extended group: -//////// // Header: 1 | 11111 | (width-1)[2 bits] -//////// // Followed by varint(count - 32) -//////// byte header = 0x80; -//////// header |= 0x7C; // count bits = 11111 -//////// header |= (byte)((width - 1) & 0x03); -//////// dst.Add(header); -//////// WriteVarUInt32(dst, (uint)(count - 32)); -//////// } - -//////// // Payload: 'count' zigzag values, LE, 'width' bytes each -//////// for (int k = 0; k < count; k++) -//////// WriteLE(dst, ZigZag32(values[start + k]), width); - -//////// i += count; -//////// } - -//////// return dst.ToArray(); -//////// } - -//////// // ----------------- Decoder ----------------- -//////// public static int[] Decode(ReadOnlySpan src) -//////// { -//////// var result = new List(); -//////// int pos = 0; - -//////// while (pos < src.Length) -//////// { -//////// byte h = src[pos++]; - -//////// if ((h & 0x80) == 0) -//////// { -//////// // Fast path: 7-bit ZigZag in low bits -//////// uint zz7 = (uint)(h & 0x7F); -//////// result.Add(UnZigZag32(zz7)); -//////// continue; -//////// } - -//////// int countField = (h >> 2) & 0x1F; -//////// int width = (h & 0x03) + 1; - -//////// int count; -//////// if (countField == 31) -//////// { -//////// // Extended group length -//////// uint extra = ReadVarUInt32(src, ref pos); -//////// count = checked(32 + (int)extra); -//////// } -//////// else -//////// { -//////// count = countField + 1; -//////// } - -//////// for (int j = 0; j < count; j++) -//////// { -//////// uint raw = (uint)ReadLE(src, ref pos, width); -//////// int val = UnZigZag32(raw); -//////// result.Add(val); -//////// } -//////// } - -//////// return result.ToArray(); -//////// } - -//////// // ----------------- Helpers ----------------- - -//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//////// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); - -//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//////// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); - -//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//////// private static int WidthFromZigZag(uint z) -//////// { -//////// if (z <= 0xFFu) return 1; -//////// if (z <= 0xFFFFu) return 2; -//////// if (z <= 0xFFFFFFu) return 3; -//////// return 4; -//////// } - -//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//////// private static void WriteLE(List dst, uint value, int width) -//////// { -//////// for (int i = 0; i < width; i++) -//////// dst.Add((byte)((value >> (8 * i)) & 0xFF)); -//////// } - -//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//////// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) -//////// { -//////// if ((uint)(pos + width) > (uint)src.Length) -//////// throw new ArgumentException("Buffer underflow while reading group payload."); - -//////// ulong v = 0; -//////// for (int i = 0; i < width; i++) -//////// v |= (ulong)src[pos++] << (8 * i); -//////// return v; -//////// } - -//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//////// private static void WriteVarUInt32(List dst, uint value) -//////// { -//////// while (value >= 0x80) -//////// { -//////// dst.Add((byte)((value & 0x7F) | 0x80)); -//////// value >>= 7; -//////// } - -//////// dst.Add((byte)value); -//////// } - -//////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//////// private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) -//////// { -//////// uint result = 0; -//////// int shift = 0; - -//////// while (true) -//////// { -//////// if (pos >= src.Length) -//////// throw new ArgumentException("Buffer underflow while reading varint."); - -//////// byte b = src[pos++]; -//////// result |= (uint)(b & 0x7F) << shift; - -//////// if ((b & 0x80) == 0) -//////// return result; - -//////// shift += 7; -//////// if (shift >= 35) -//////// throw new ArgumentException("Varint is too long for UInt32."); -//////// } -//////// } -////////} - - -//////using System; -//////using System.Collections.Generic; -//////using System.Runtime.CompilerServices; - -//////namespace Esiur.Data.Gvwie; - -//////public static class GroupInt32Codec -//////{ -////// private const byte ExtendedRaw32Header = 0xFF; - -////// // ----------------- Encoder ----------------- -////// public static byte[] Encode(IList values) -////// { -////// var dst = new List(values.Count * 2); -////// int i = 0; - -////// while (i < values.Count) -////// { -////// uint zz = ZigZag32(values[i]); - -////// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits -////// if (zz <= 0x7Fu) -////// { -////// dst.Add((byte)zz); -////// i++; -////// continue; -////// } - -////// int start = i; -////// int width = WidthFromZigZag(zz); - -////// // Extended mode only for long width-4 runs -////// if (width == 4) -////// { -////// int runCount = 1; - -////// while ((i + runCount) < values.Count) -////// { -////// uint z2 = ZigZag32(values[i + runCount]); - -////// // Keep literals separate -////// if (z2 <= 0x7Fu) -////// break; - -////// if (WidthFromZigZag(z2) != 4) -////// break; - -////// runCount++; -////// } - -////// // Use extended mode only when it is actually longer than normal max group -////// if (runCount > 32) -////// { -////// dst.Add(ExtendedRaw32Header); -////// WriteVarUInt32(dst, (uint)(runCount - 33)); // 33 -> 0, 34 -> 1, ... - -////// for (int k = 0; k < runCount; k++) -////// WriteLE(dst, ZigZag32(values[start + k]), 4); - -////// i += runCount; -////// continue; -////// } -////// } - -////// // Normal group: up to 32 items sharing a common width (1..4 bytes) -////// int count = 1; - -////// while (count < 32 && (i + count) < values.Count) -////// { -////// uint z2 = ZigZag32(values[i + count]); - -////// // Do not absorb literal-fast-path values into groups -////// if (z2 <= 0x7Fu) -////// break; - -////// int w2 = WidthFromZigZag(z2); -////// if (w2 != width) -////// break; - -////// count++; -////// } - -////// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] -////// byte header = 0x80; -////// header |= (byte)(((count - 1) & 0x1F) << 2); -////// header |= (byte)((width - 1) & 0x03); -////// dst.Add(header); - -////// for (int k = 0; k < count; k++) -////// WriteLE(dst, ZigZag32(values[start + k]), width); - -////// i += count; -////// } - -////// return dst.ToArray(); -////// } - -////// // ----------------- Decoder ----------------- -////// public static int[] Decode(ReadOnlySpan src) -////// { -////// var result = new List(); -////// int pos = 0; - -////// while (pos < src.Length) -////// { -////// byte h = src[pos++]; - -////// if ((h & 0x80) == 0) -////// { -////// // Fast path: 7-bit ZigZag in low bits -////// uint zz7 = (uint)(h & 0x7F); -////// result.Add(UnZigZag32(zz7)); -////// continue; -////// } - -////// // Extended raw width-4 run -////// if (h == ExtendedRaw32Header) -////// { -////// uint extra = ReadVarUInt32(src, ref pos); -////// int count = checked(33 + (int)extra); - -////// for (int j = 0; j < count; j++) -////// { -////// uint raw = (uint)ReadLE(src, ref pos, 4); -////// result.Add(UnZigZag32(raw)); -////// } - -////// continue; -////// } - -////// int countNormal = ((h >> 2) & 0x1F) + 1; // 1..32 -////// int width = (h & 0x03) + 1; // 1..4 - -////// for (int j = 0; j < countNormal; j++) -////// { -////// uint raw = (uint)ReadLE(src, ref pos, width); -////// result.Add(UnZigZag32(raw)); -////// } -////// } - -////// return result.ToArray(); -////// } - -////// // ----------------- Helpers ----------------- - -////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); - -////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); - -////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////// private static int WidthFromZigZag(uint z) -////// { -////// if (z <= 0xFFu) return 1; -////// if (z <= 0xFFFFu) return 2; -////// if (z <= 0xFFFFFFu) return 3; -////// return 4; -////// } - -////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////// private static void WriteLE(List dst, uint value, int width) -////// { -////// for (int i = 0; i < width; i++) -////// dst.Add((byte)((value >> (8 * i)) & 0xFF)); -////// } - -////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) -////// { -////// if ((uint)(pos + width) > (uint)src.Length) -////// throw new ArgumentException("Buffer underflow while reading group payload."); - -////// ulong v = 0; -////// for (int i = 0; i < width; i++) -////// v |= (ulong)src[pos++] << (8 * i); -////// return v; -////// } - -////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////// private static void WriteVarUInt32(List dst, uint value) -////// { -////// while (value >= 0x80) -////// { -////// dst.Add((byte)((value & 0x7F) | 0x80)); -////// value >>= 7; -////// } - -////// dst.Add((byte)value); -////// } - -////// [MethodImpl(MethodImplOptions.AggressiveInlining)] -////// private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) -////// { -////// uint result = 0; -////// int shift = 0; - -////// while (true) -////// { -////// if (pos >= src.Length) -////// throw new ArgumentException("Buffer underflow while reading varint."); - -////// byte b = src[pos++]; -////// result |= (uint)(b & 0x7F) << shift; - -////// if ((b & 0x80) == 0) -////// return result; - -////// shift += 7; -////// if (shift >= 35) -////// throw new ArgumentException("Varint is too long for UInt32."); -////// } -////// } -//////} - -////using System; -////using System.Collections.Generic; -////using System.Runtime.CompilerServices; - -////namespace Esiur.Data.Gvwie; - -////public static class GroupInt32Codec -////{ -//// private const byte RawInt32RunHeader = 0xFF; - -//// // ----------------- Encoder ----------------- -//// public static byte[] Encode(IList values) -//// { -//// var dst = new List(values.Count * 2); -//// int i = 0; - -//// while (i < values.Count) -//// { -//// uint zz = ZigZag32(values[i]); - -//// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits -//// if (zz <= 0x7Fu) -//// { -//// dst.Add((byte)zz); -//// i++; -//// continue; -//// } - -//// int start = i; -//// int width = WidthFromZigZag(zz); - -//// // Detect long full-width run and emit raw Int32 block instead of grouped width=4 -//// if (width == 4) -//// { -//// int runCount = 1; - -//// while ((i + runCount) < values.Count) -//// { -//// uint z2 = ZigZag32(values[i + runCount]); - -//// // keep literals separate -//// if (z2 <= 0x7Fu) -//// break; - -//// if (WidthFromZigZag(z2) != 4) -//// break; - -//// runCount++; -//// } - -//// // Threshold can be tuned; 33+ is a good starting point -//// if (runCount >= 33) -//// { -//// dst.Add(RawInt32RunHeader); -//// WriteVarUInt32(dst, (uint)runCount); - -//// for (int k = 0; k < runCount; k++) -//// WriteInt32LE(dst, values[start + k]); - -//// i += runCount; -//// continue; -//// } -//// } - -//// // Normal group: up to 32 items sharing the same width (1..4 bytes) -//// int count = 1; - -//// while (count < 32 && (i + count) < values.Count) -//// { -//// uint z2 = ZigZag32(values[i + count]); - -//// // do not absorb literal-fast-path values into groups -//// if (z2 <= 0x7Fu) -//// break; - -//// int w2 = WidthFromZigZag(z2); -//// if (w2 != width) -//// break; - -//// count++; -//// } - -//// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] -//// byte header = 0x80; -//// header |= (byte)(((count - 1) & 0x1F) << 2); -//// header |= (byte)((width - 1) & 0x03); -//// dst.Add(header); - -//// for (int k = 0; k < count; k++) -//// WriteLE(dst, ZigZag32(values[start + k]), width); - -//// i += count; -//// } - -//// return dst.ToArray(); -//// } - -//// // ----------------- Decoder ----------------- -//// public static int[] Decode(ReadOnlySpan src) -//// { -//// var result = new List(); -//// int pos = 0; - -//// while (pos < src.Length) -//// { -//// byte h = src[pos++]; - -//// if ((h & 0x80) == 0) -//// { -//// // Fast path: 7-bit ZigZag in low bits -//// uint zz7 = (uint)(h & 0x7F); -//// result.Add(UnZigZag32(zz7)); -//// continue; -//// } - -//// // Raw fixed-width Int32 run -//// if (h == RawInt32RunHeader) -//// { -//// uint countU = ReadVarUInt32(src, ref pos); -//// int count = checked((int)countU); - -//// for (int j = 0; j < count; j++) -//// result.Add(ReadInt32LE(src, ref pos)); - -//// continue; -//// } - -//// int countNormal = ((h >> 2) & 0x1F) + 1; // 1..32 -//// int width = (h & 0x03) + 1; // 1..4 - -//// for (int j = 0; j < countNormal; j++) -//// { -//// uint raw = (uint)ReadLE(src, ref pos, width); -//// result.Add(UnZigZag32(raw)); -//// } -//// } - -//// return result.ToArray(); -//// } - -//// // ----------------- Helpers ----------------- - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static int WidthFromZigZag(uint z) -//// { -//// if (z <= 0xFFu) return 1; -//// if (z <= 0xFFFFu) return 2; -//// if (z <= 0xFFFFFFu) return 3; -//// return 4; -//// } - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static void WriteLE(List dst, uint value, int width) -//// { -//// for (int i = 0; i < width; i++) -//// dst.Add((byte)((value >> (8 * i)) & 0xFF)); -//// } - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) -//// { -//// if ((uint)(pos + width) > (uint)src.Length) -//// throw new ArgumentException("Buffer underflow while reading group payload."); - -//// ulong v = 0; -//// for (int i = 0; i < width; i++) -//// v |= (ulong)src[pos++] << (8 * i); -//// return v; -//// } - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static void WriteInt32LE(List dst, int value) -//// { -//// uint u = unchecked((uint)value); -//// dst.Add((byte)(u & 0xFF)); -//// dst.Add((byte)((u >> 8) & 0xFF)); -//// dst.Add((byte)((u >> 16) & 0xFF)); -//// dst.Add((byte)((u >> 24) & 0xFF)); -//// } - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static int ReadInt32LE(ReadOnlySpan src, ref int pos) -//// { -//// if ((uint)(pos + 4) > (uint)src.Length) -//// throw new ArgumentException("Buffer underflow while reading raw Int32 payload."); - -//// uint u = -//// (uint)src[pos] -//// | ((uint)src[pos + 1] << 8) -//// | ((uint)src[pos + 2] << 16) -//// | ((uint)src[pos + 3] << 24); - -//// pos += 4; -//// return unchecked((int)u); -//// } - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static void WriteVarUInt32(List dst, uint value) -//// { -//// while (value >= 0x80) -//// { -//// dst.Add((byte)((value & 0x7F) | 0x80)); -//// value >>= 7; -//// } - -//// dst.Add((byte)value); -//// } - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) -//// { -//// uint result = 0; -//// int shift = 0; - -//// while (true) -//// { -//// if (pos >= src.Length) -//// throw new ArgumentException("Buffer underflow while reading varint."); - -//// byte b = src[pos++]; -//// result |= (uint)(b & 0x7F) << shift; - -//// if ((b & 0x80) == 0) -//// return result; - -//// shift += 7; -//// if (shift >= 35) -//// throw new ArgumentException("Varint is too long for UInt32."); -//// } -//// } -////} - - -//using System; -//using System.Collections.Generic; -//using System.Runtime.CompilerServices; - -//namespace Esiur.Data.Gvwie; - -//public static class GroupInt32Codec -//{ -// private const byte RawInt32RunHeader = 0xFF; - -// // ----------------- Encoder ----------------- -// public static byte[] Encode(IList values) -// { -// var dst = new List(values.Count * 2); -// int i = 0; - -// while (i < values.Count) -// { -// uint zz = ZigZag32(values[i]); - -// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits -// if (zz <= 0x7Fu) -// { -// dst.Add((byte)zz); -// i++; -// continue; -// } - -// int start = i; -// int width = WidthFromZigZag(zz); - -// // Detect long full-width run and emit raw Int32 block instead of grouped width=4 -// if (width == 4) -// { -// int runCount = 1; - -// while ((i + runCount) < values.Count) -// { -// uint z2 = ZigZag32(values[i + runCount]); - -// // keep literals separate -// if (z2 <= 0x7Fu) -// break; - -// if (WidthFromZigZag(z2) != 4) -// break; - -// runCount++; -// } - -// // Threshold can be tuned; 33+ is a good starting point -// if (runCount >= 33) -// { -// dst.Add(RawInt32RunHeader); -// WriteVarUInt32(dst, (uint)runCount); - -// for (int k = 0; k < runCount; k++) -// WriteInt32LE(dst, values[start + k]); - -// i += runCount; -// continue; -// } -// } - -// // Normal group: up to 32 items sharing the same width (1..4 bytes) -// int count = 1; - -// while (count < 32 && (i + count) < values.Count) -// { -// uint z2 = ZigZag32(values[i + count]); - -// // do not absorb literal-fast-path values into groups -// if (z2 <= 0x7Fu) -// break; - -// int w2 = WidthFromZigZag(z2); -// if (w2 != width) -// break; - -// count++; -// } - -// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] -// byte header = 0x80; -// header |= (byte)(((count - 1) & 0x1F) << 2); -// header |= (byte)((width - 1) & 0x03); -// dst.Add(header); - -// for (int k = 0; k < count; k++) -// WriteLE(dst, ZigZag32(values[start + k]), width); - -// i += count; -// } - -// return dst.ToArray(); -// } - -// // ----------------- Decoder ----------------- -// public static int[] Decode(ReadOnlySpan src) -// { -// var result = new List(); -// int pos = 0; - -// while (pos < src.Length) -// { -// byte h = src[pos++]; - -// if ((h & 0x80) == 0) -// { -// // Fast path: 7-bit ZigZag in low bits -// uint zz7 = (uint)(h & 0x7F); -// result.Add(UnZigZag32(zz7)); -// continue; -// } - -// // Raw fixed-width Int32 run -// if (h == RawInt32RunHeader) -// { -// uint countU = ReadVarUInt32(src, ref pos); -// int count = checked((int)countU); - -// for (int j = 0; j < count; j++) -// result.Add(ReadInt32LE(src, ref pos)); - -// continue; -// } - -// int countNormal = ((h >> 2) & 0x1F) + 1; // 1..32 -// int width = (h & 0x03) + 1; // 1..4 - -// for (int j = 0; j < countNormal; j++) -// { -// uint raw = (uint)ReadLE(src, ref pos, width); -// result.Add(UnZigZag32(raw)); -// } -// } - -// return result.ToArray(); -// } - -// // ----------------- Helpers ----------------- - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static int WidthFromZigZag(uint z) -// { -// if (z <= 0xFFu) return 1; -// if (z <= 0xFFFFu) return 2; -// if (z <= 0xFFFFFFu) return 3; -// return 4; -// } - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static void WriteLE(List dst, uint value, int width) -// { -// for (int i = 0; i < width; i++) -// dst.Add((byte)((value >> (8 * i)) & 0xFF)); -// } - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) -// { -// if ((uint)(pos + width) > (uint)src.Length) -// throw new ArgumentException("Buffer underflow while reading group payload."); - -// ulong v = 0; -// for (int i = 0; i < width; i++) -// v |= (ulong)src[pos++] << (8 * i); -// return v; -// } - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static void WriteInt32LE(List dst, int value) -// { -// uint u = unchecked((uint)value); -// dst.Add((byte)(u & 0xFF)); -// dst.Add((byte)((u >> 8) & 0xFF)); -// dst.Add((byte)((u >> 16) & 0xFF)); -// dst.Add((byte)((u >> 24) & 0xFF)); -// } - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static int ReadInt32LE(ReadOnlySpan src, ref int pos) -// { -// if ((uint)(pos + 4) > (uint)src.Length) -// throw new ArgumentException("Buffer underflow while reading raw Int32 payload."); - -// uint u = -// (uint)src[pos] -// | ((uint)src[pos + 1] << 8) -// | ((uint)src[pos + 2] << 16) -// | ((uint)src[pos + 3] << 24); - -// pos += 4; -// return unchecked((int)u); -// } - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static void WriteVarUInt32(List dst, uint value) -// { -// while (value >= 0x80) -// { -// dst.Add((byte)((value & 0x7F) | 0x80)); -// value >>= 7; -// } - -// dst.Add((byte)value); -// } - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) -// { -// uint result = 0; -// int shift = 0; - -// while (true) -// { -// if (pos >= src.Length) -// throw new ArgumentException("Buffer underflow while reading varint."); - -// byte b = src[pos++]; -// result |= (uint)(b & 0x7F) << shift; - -// if ((b & 0x80) == 0) -// return result; - -// shift += 7; -// if (shift >= 35) -// throw new ArgumentException("Varint is too long for UInt32."); -// } -// } -//} -using System; -using System.Collections.Generic; -using System.Runtime.CompilerServices; - -namespace Esiur.Data.Gvwie; - -public static class GroupInt32Codec -{ - private const byte RawInt32RunHeader = 0xFF; - - // ----------------- Encoder ----------------- - public static byte[] Encode(IList values) - { - var dst = new List(values.Count * 2); - int i = 0; - - while (i < values.Count) - { - uint zz = ZigZag32(values[i]); - - // Fast path: single byte (MSB=0) when zigzag fits in 7 bits - if (zz <= 0x7Fu) - { - dst.Add((byte)zz); - i++; - continue; - } - - int start = i; - int width = WidthFromZigZag(zz); - - // Raw fixed-width mode only for a consecutive width-4 run - if (width == 4) - { - int runCount = 1; - - while ((i + runCount) < values.Count) - { - uint z2 = ZigZag32(values[i + runCount]); - - // keep literals separate - if (z2 <= 0x7Fu) - break; - - if (WidthFromZigZag(z2) != 4) - break; - - runCount++; - } - - // Compare raw run vs grouped run for this exact width-4 span - int rawSize = 1 + VarUInt32Size((uint)runCount) + runCount * 4; - int groupedSize = EstimateWidth4GroupedSize(runCount); - - if (rawSize < groupedSize) - { - dst.Add(RawInt32RunHeader); - WriteVarUInt32(dst, (uint)runCount); - - for (int k = 0; k < runCount; k++) - WriteInt32LE(dst, values[start + k]); - - i += runCount; - continue; - } - } - - // Normal group: up to 32 items sharing the same width - int count = 1; - - // 0xFF is reserved for raw Int32 blocks, so width=4 groups max out at 31 - int maxGroupCount = (width == 4) ? 31 : 32; - - while (count < maxGroupCount && (i + count) < values.Count) - { - uint z2 = ZigZag32(values[i + count]); - - // keep literals separate - if (z2 <= 0x7Fu) - break; - - int w2 = WidthFromZigZag(z2); - if (w2 != width) - break; - - count++; - } - - // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] - byte header = 0x80; - header |= (byte)(((count - 1) & 0x1F) << 2); - header |= (byte)((width - 1) & 0x03); - dst.Add(header); - - for (int k = 0; k < count; k++) - WriteLE(dst, ZigZag32(values[start + k]), width); - - i += count; - } - - return dst.ToArray(); - } - - // ----------------- Decoder ----------------- - public static int[] Decode(ReadOnlySpan src) - { - var result = new List(); - int pos = 0; - - while (pos < src.Length) - { - byte h = src[pos++]; - - if ((h & 0x80) == 0) - { - uint zz7 = (uint)(h & 0x7F); - result.Add(UnZigZag32(zz7)); - continue; - } - - if (h == RawInt32RunHeader) - { - uint countU = ReadVarUInt32(src, ref pos); - int count = checked((int)countU); - - for (int j = 0; j < count; j++) - result.Add(ReadInt32LE(src, ref pos)); - - continue; - } - - int countNormal = ((h >> 2) & 0x1F) + 1; - int width = (h & 0x03) + 1; - - for (int j = 0; j < countNormal; j++) - { - uint raw = (uint)ReadLE(src, ref pos, width); - result.Add(UnZigZag32(raw)); - } - } - - return result.ToArray(); - } - - // ----------------- Size helpers ----------------- - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int EstimateWidth4GroupedSize(int count) - { - // width=4 groups use max 31 items each because 0xFF is reserved - int groups = count / 31; - if ((count % 31) != 0) - groups++; - - return groups + count * 4; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int VarUInt32Size(uint value) - { - int size = 1; - while (value >= 0x80) - { - value >>= 7; - size++; - } - return size; - } - - // ----------------- Helpers ----------------- - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int WidthFromZigZag(uint z) - { - if (z <= 0xFFu) return 1; - if (z <= 0xFFFFu) return 2; - if (z <= 0xFFFFFFu) return 3; - return 4; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void WriteLE(List dst, uint value, int width) - { - for (int i = 0; i < width; i++) - dst.Add((byte)((value >> (8 * i)) & 0xFF)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) - { - if ((uint)(pos + width) > (uint)src.Length) - throw new ArgumentException("Buffer underflow while reading group payload."); - - ulong v = 0; - for (int i = 0; i < width; i++) - v |= (ulong)src[pos++] << (8 * i); - return v; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void WriteInt32LE(List dst, int value) - { - uint u = unchecked((uint)value); - dst.Add((byte)(u & 0xFF)); - dst.Add((byte)((u >> 8) & 0xFF)); - dst.Add((byte)((u >> 16) & 0xFF)); - dst.Add((byte)((u >> 24) & 0xFF)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int ReadInt32LE(ReadOnlySpan src, ref int pos) - { - if ((uint)(pos + 4) > (uint)src.Length) - throw new ArgumentException("Buffer underflow while reading raw Int32 payload."); - - uint u = - (uint)src[pos] - | ((uint)src[pos + 1] << 8) - | ((uint)src[pos + 2] << 16) - | ((uint)src[pos + 3] << 24); - - pos += 4; - return unchecked((int)u); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void WriteVarUInt32(List dst, uint value) - { - while (value >= 0x80) - { - dst.Add((byte)((value & 0x7F) | 0x80)); - value >>= 7; - } - - dst.Add((byte)value); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) - { - uint result = 0; - int shift = 0; - - while (true) - { - if (pos >= src.Length) - throw new ArgumentException("Buffer underflow while reading varint."); - - byte b = src[pos++]; - result |= (uint)(b & 0x7F) << shift; - - if ((b & 0x80) == 0) - return result; - - shift += 7; - if (shift >= 35) - throw new ArgumentException("Varint is too long for UInt32."); - } - } -} \ No newline at end of file diff --git a/Libraries/Esiur/Data/Gvwie/GroupInt16Codec.cs b/Libraries/Esiur/Data/Gvwie/GroupInt16Codec.cs index 01a5789..bfef751 100644 --- a/Libraries/Esiur/Data/Gvwie/GroupInt16Codec.cs +++ b/Libraries/Esiur/Data/Gvwie/GroupInt16Codec.cs @@ -1,59 +1,84 @@ using System; using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using System; -using System.Collections.Generic; using System.Runtime.CompilerServices; namespace Esiur.Data.Gvwie; public static class GroupInt16Codec { + // Header layout: + // 1 | cccccc | w + // + // MSB = 1 => grouped form + // cccccc = 0..62 => short count = cccccc + 1 (1..63) + // cccccc = 63 => extended count, followed by varint(count - 64) + // w = 0 => width = 1 byte + // w = 1 => width = 2 bytes + // + // MSB = 0 => literal fast path for ZigZag values in 7 bits + // ----------------- Encoder ----------------- public static byte[] Encode(IList values) { - var dst = new List(values.Count); // close lower bound + var dst = new List(values.Count * 2); int i = 0; while (i < values.Count) { ushort zz = ZigZag16(values[i]); - // Fast path: single byte with 7-bit ZigZag - if (zz <= 0x7Fu) + // Fast path: single byte (MSB=0) when zigzag fits in 7 bits + if (zz <= 0x7F) { - dst.Add((byte)zz); // MSB=0 implicitly + dst.Add((byte)zz); i++; continue; } - // Group path: up to 64 items sharing width (1 or 2 bytes) int start = i; + int width = WidthFromZigZag(zz); // 1 or 2 int count = 1; - int width = (zz <= 0xFFu) ? 1 : 2; - while (count < 64 && (i + count) < values.Count) + // Build a run of same-width non-literal values + while ((i + count) < values.Count) { ushort z2 = ZigZag16(values[i + count]); - int w2 = (z2 <= 0xFFu) ? 1 : 2; - if (w2 > width) width = w2; // widen as needed + + // Do not absorb literal-fast-path values into groups + if (z2 <= 0x7F) + break; + + int w2 = WidthFromZigZag(z2); + if (w2 != width) + break; + count++; } - // Header: 1 | (count-1)[6 bits] | (width-1)[1 bit] - byte header = 0x80; - header |= (byte)(((count - 1) & 0x3F) << 1); - header |= (byte)((width - 1) & 0x01); - dst.Add(header); - - // Payload: count ZigZag magnitudes, LE, 'width' bytes each - for (int k = 0; k < count; k++) + if (count <= 63) { - ushort z = ZigZag16(values[start + k]); - WriteLE(dst, z, width); + // Short group: + // Header: 1 | (count-1)[6 bits] | (width-1)[1 bit] + byte header = 0x80; + header |= (byte)(((count - 1) & 0x3F) << 1); + header |= (byte)((width - 1) & 0x01); + dst.Add(header); } + else + { + // Extended group: + // Header: 1 | 111111 | (width-1)[1 bit] + // Followed by varint(count - 64) + byte header = 0x80; + header |= 0x7E; // count bits = 111111 + header |= (byte)((width - 1) & 0x01); + dst.Add(header); + WriteVarUInt32(dst, (uint)(count - 64)); + } + + // Payload: 'count' zigzag values, LE, 'width' bytes each + for (int k = 0; k < count; k++) + WriteLE(dst, ZigZag16(values[start + k]), width); i += count; } @@ -73,24 +98,30 @@ public static class GroupInt16Codec if ((h & 0x80) == 0) { - // Fast path: 7-bit ZigZag + // Fast path: 7-bit ZigZag in low bits ushort zz7 = (ushort)(h & 0x7F); result.Add(UnZigZag16(zz7)); continue; } - int count = ((h >> 1) & 0x3F) + 1; // 1..64 - int width = (h & 0x01) + 1; // 1..2 + int countField = (h >> 1) & 0x3F; + int width = (h & 0x01) + 1; // 1 or 2 + + int count; + if (countField == 63) + { + uint extra = ReadVarUInt32(src, ref pos); + count = checked(64 + (int)extra); + } + else + { + count = countField + 1; + } for (int j = 0; j < count; j++) { - uint raw = ReadLE(src, ref pos, width); - if (width > 2 && (raw >> 16) != 0) - throw new OverflowException("Decoded ZigZag value exceeds 16-bit range."); - - ushort u = (ushort)raw; - short val = UnZigZag16(u); - result.Add(val); + ushort raw = (ushort)ReadLE(src, ref pos, width); + result.Add(UnZigZag16(raw)); } } @@ -100,25 +131,22 @@ public static class GroupInt16Codec // ----------------- Helpers ----------------- [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static ushort ZigZag16(short v) - { - // (v << 1) ^ (v >> 15), result as unsigned 16-bit - return (ushort)(((uint)(ushort)v << 1) ^ (uint)((int)v >> 15)); - } + private static ushort ZigZag16(short v) => (ushort)((v << 1) ^ (v >> 15)); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static short UnZigZag16(ushort u) + private static short UnZigZag16(ushort u) => (short)((u >> 1) ^ (ushort)-(short)(u & 1)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int WidthFromZigZag(ushort z) { - // (u >> 1) ^ -(u & 1), narrowed to 16-bit signed - return (short)((u >> 1) ^ (ushort)-(short)(u & 1)); + return z <= 0xFF ? 1 : 2; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void WriteLE(List dst, ushort value, int width) { - // width is 1 or 2 - dst.Add((byte)(value & 0xFF)); - if (width == 2) dst.Add((byte)(value >> 8)); + for (int i = 0; i < width; i++) + dst.Add((byte)((value >> (8 * i)) & 0xFF)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -127,11 +155,44 @@ public static class GroupInt16Codec if ((uint)(pos + width) > (uint)src.Length) throw new ArgumentException("Buffer underflow while reading group payload."); - uint v = src[pos++]; - if (width == 2) - { - v |= (uint)src[pos++] << 8; - } + uint v = 0; + for (int i = 0; i < width; i++) + v |= (uint)src[pos++] << (8 * i); return v; } -} + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void WriteVarUInt32(List dst, uint value) + { + while (value >= 0x80) + { + dst.Add((byte)((value & 0x7F) | 0x80)); + value >>= 7; + } + + dst.Add((byte)value); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) + { + uint result = 0; + int shift = 0; + + while (true) + { + if (pos >= src.Length) + throw new ArgumentException("Buffer underflow while reading varint."); + + byte b = src[pos++]; + result |= (uint)(b & 0x7F) << shift; + + if ((b & 0x80) == 0) + return result; + + shift += 7; + if (shift >= 35) + throw new ArgumentException("Varint is too long for UInt32."); + } + } +} \ No newline at end of file diff --git a/Libraries/Esiur/Data/Gvwie/GroupInt32Codec.cs b/Libraries/Esiur/Data/Gvwie/GroupInt32Codec.cs index 479b34e..c25b08a 100644 --- a/Libraries/Esiur/Data/Gvwie/GroupInt32Codec.cs +++ b/Libraries/Esiur/Data/Gvwie/GroupInt32Codec.cs @@ -1,372 +1,4 @@ -////using System; -////using System.Collections.Generic; -////using System.Linq; -////using System.Text; -////using System.Threading.Tasks; -////using System.Runtime.CompilerServices; -////using System.Collections; - -////namespace Esiur.Data.Gvwie; - -////public static class GroupInt32Codec -////{ -//// // ----------------- Encoder ----------------- -//// public static byte[] Encode(IList values) -//// { -//// //var values = value as int[]; - -//// var dst = new List(values.Count * 2); -//// int i = 0; - -//// while (i < values.Count) -//// { -//// uint zz = ZigZag32(values[i]); - -//// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits -//// if (zz <= 0x7Fu) -//// { -//// dst.Add((byte)zz); -//// i++; -//// continue; -//// } - -//// // Group: up to 32 items sharing a common width (1..4 bytes) -//// int start = i; -//// int count = 1; -//// int width = WidthFromZigZag(zz); - -//// while (count < 32 && (i + count) < values.Count) -//// { -//// uint z2 = ZigZag32(values[i + count]); -//// int w2 = WidthFromZigZag(z2); -//// width = Math.Max(width, w2); // widen as needed -//// count++; -//// } - -//// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] -//// byte header = 0x80; -//// header |= (byte)(((count - 1) & 0x1F) << 2); -//// header |= (byte)((width - 1) & 0x03); -//// dst.Add(header); - -//// // Payload: 'count' zigzag values, LE, 'width' bytes each -//// for (int k = 0; k < count; k++) -//// WriteLE(dst, ZigZag32(values[start + k]), width); - -//// i += count; -//// } - -//// return dst.ToArray(); -//// } - -//// // ----------------- Decoder ----------------- -//// public static int[] Decode(ReadOnlySpan src) -//// { -//// var result = new List(); -//// int pos = 0; - -//// while (pos < src.Length) -//// { -//// byte h = src[pos++]; - -//// if ((h & 0x80) == 0) -//// { -//// // Fast path: 7-bit ZigZag in low bits -//// uint zz7 = (uint)(h & 0x7F); -//// result.Add(UnZigZag32(zz7)); -//// continue; -//// } - -//// int count = ((h >> 2) & 0x1F) + 1; // 1..32 -//// int width = (h & 0x03) + 1; // 1..4 - -//// for (int j = 0; j < count; j++) -//// { -//// uint raw = (uint)ReadLE(src, ref pos, width); -//// int val = UnZigZag32(raw); -//// result.Add(val); -//// } -//// } - -//// return result.ToArray(); -//// } - -//// // ----------------- Helpers ----------------- - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static int WidthFromZigZag(uint z) -//// { -//// if (z <= 0xFFu) return 1; -//// if (z <= 0xFFFFu) return 2; -//// if (z <= 0xFFFFFFu) return 3; -//// return 4; -//// } - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static void WriteLE(List dst, uint value, int width) -//// { -//// for (int i = 0; i < width; i++) -//// dst.Add((byte)((value >> (8 * i)) & 0xFF)); -//// } - -//// [MethodImpl(MethodImplOptions.AggressiveInlining)] -//// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) -//// { -//// if ((uint)(pos + width) > (uint)src.Length) -//// throw new ArgumentException("Buffer underflow while reading group payload."); - -//// ulong v = 0; -//// for (int i = 0; i < width; i++) -//// v |= (ulong)src[pos++] << (8 * i); -//// return v; -//// } -////} -//using System; -//using System.Collections.Generic; -//using System.Runtime.CompilerServices; - -//namespace Esiur.Data.Gvwie; - -//public static class GroupInt32Codec -//{ -// private const byte RawInt32RunHeader = 0xFF; - -// // ----------------- Encoder ----------------- -// public static byte[] Encode(IList values) -// { -// var dst = new List(values.Count * 2); -// int i = 0; - -// while (i < values.Count) -// { -// uint zz = ZigZag32(values[i]); - -// // Fast path: single byte (MSB=0) when zigzag fits in 7 bits -// if (zz <= 0x7Fu) -// { -// dst.Add((byte)zz); -// i++; -// continue; -// } - -// int start = i; -// int width = WidthFromZigZag(zz); - -// // Detect long full-width run and emit raw Int32 block instead of grouped width=4 -// if (width == 4) -// { -// int runCount = 1; - -// while ((i + runCount) < values.Count) -// { -// uint z2 = ZigZag32(values[i + runCount]); - -// // keep literals separate -// if (z2 <= 0x7Fu) -// break; - -// if (WidthFromZigZag(z2) != 4) -// break; - -// runCount++; -// } - -// // Threshold can be tuned; 33+ is a good starting point -// if (runCount >= 33) -// { -// dst.Add(RawInt32RunHeader); -// WriteVarUInt32(dst, (uint)runCount); - -// for (int k = 0; k < runCount; k++) -// WriteInt32LE(dst, values[start + k]); - -// i += runCount; -// continue; -// } -// } - -// // Normal group: up to 32 items sharing the same width (1..4 bytes) -// int count = 1; - -// while (count < 32 && (i + count) < values.Count) -// { -// uint z2 = ZigZag32(values[i + count]); - -// // do not absorb literal-fast-path values into groups -// if (z2 <= 0x7Fu) -// break; - -// int w2 = WidthFromZigZag(z2); -// if (w2 != width) -// break; - -// count++; -// } - -// // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] -// byte header = 0x80; -// header |= (byte)(((count - 1) & 0x1F) << 2); -// header |= (byte)((width - 1) & 0x03); -// dst.Add(header); - -// for (int k = 0; k < count; k++) -// WriteLE(dst, ZigZag32(values[start + k]), width); - -// i += count; -// } - -// return dst.ToArray(); -// } - -// // ----------------- Decoder ----------------- -// public static int[] Decode(ReadOnlySpan src) -// { -// var result = new List(); -// int pos = 0; - -// while (pos < src.Length) -// { -// byte h = src[pos++]; - -// if ((h & 0x80) == 0) -// { -// // Fast path: 7-bit ZigZag in low bits -// uint zz7 = (uint)(h & 0x7F); -// result.Add(UnZigZag32(zz7)); -// continue; -// } - -// // Raw fixed-width Int32 run -// if (h == RawInt32RunHeader) -// { -// uint countU = ReadVarUInt32(src, ref pos); -// int count = checked((int)countU); - -// for (int j = 0; j < count; j++) -// result.Add(ReadInt32LE(src, ref pos)); - -// continue; -// } - -// int countNormal = ((h >> 2) & 0x1F) + 1; // 1..32 -// int width = (h & 0x03) + 1; // 1..4 - -// for (int j = 0; j < countNormal; j++) -// { -// uint raw = (uint)ReadLE(src, ref pos, width); -// result.Add(UnZigZag32(raw)); -// } -// } - -// return result.ToArray(); -// } - -// // ----------------- Helpers ----------------- - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static uint ZigZag32(int v) => (uint)((v << 1) ^ (v >> 31)); - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static int WidthFromZigZag(uint z) -// { -// if (z <= 0xFFu) return 1; -// if (z <= 0xFFFFu) return 2; -// if (z <= 0xFFFFFFu) return 3; -// return 4; -// } - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static void WriteLE(List dst, uint value, int width) -// { -// for (int i = 0; i < width; i++) -// dst.Add((byte)((value >> (8 * i)) & 0xFF)); -// } - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) -// { -// if ((uint)(pos + width) > (uint)src.Length) -// throw new ArgumentException("Buffer underflow while reading group payload."); - -// ulong v = 0; -// for (int i = 0; i < width; i++) -// v |= (ulong)src[pos++] << (8 * i); -// return v; -// } - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static void WriteInt32LE(List dst, int value) -// { -// uint u = unchecked((uint)value); -// dst.Add((byte)(u & 0xFF)); -// dst.Add((byte)((u >> 8) & 0xFF)); -// dst.Add((byte)((u >> 16) & 0xFF)); -// dst.Add((byte)((u >> 24) & 0xFF)); -// } - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static int ReadInt32LE(ReadOnlySpan src, ref int pos) -// { -// if ((uint)(pos + 4) > (uint)src.Length) -// throw new ArgumentException("Buffer underflow while reading raw Int32 payload."); - -// uint u = -// (uint)src[pos] -// | ((uint)src[pos + 1] << 8) -// | ((uint)src[pos + 2] << 16) -// | ((uint)src[pos + 3] << 24); - -// pos += 4; -// return unchecked((int)u); -// } - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static void WriteVarUInt32(List dst, uint value) -// { -// while (value >= 0x80) -// { -// dst.Add((byte)((value & 0x7F) | 0x80)); -// value >>= 7; -// } - -// dst.Add((byte)value); -// } - -// [MethodImpl(MethodImplOptions.AggressiveInlining)] -// private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) -// { -// uint result = 0; -// int shift = 0; - -// while (true) -// { -// if (pos >= src.Length) -// throw new ArgumentException("Buffer underflow while reading varint."); - -// byte b = src[pos++]; -// result |= (uint)(b & 0x7F) << shift; - -// if ((b & 0x80) == 0) -// return result; - -// shift += 7; -// if (shift >= 35) -// throw new ArgumentException("Varint is too long for UInt32."); -// } -// } -//} - - -using System; +using System; using System.Collections.Generic; using System.Runtime.CompilerServices; @@ -374,40 +6,15 @@ namespace Esiur.Data.Gvwie; public static class GroupInt32Codec { - private const byte RawInt32RunHeader = 0xFF; - private const int RawDecisionWindow = 256; // ----------------- Encoder ----------------- - public static byte[] Encode(IList values) + public static byte[] Encode(IList values, bool use4for3 = false) { var dst = new List(values.Count * 2); int i = 0; while (i < values.Count) { - int remaining = values.Count - i; - - // Adaptive raw block decision on a bounded window - if (remaining >= 32) - { - int candidateCount = Math.Min(RawDecisionWindow, remaining); - - int rawSize = 1 + VarUInt32Size((uint)candidateCount) + candidateCount * 4; - int groupedSize = EstimateGroupedSize(values, i, candidateCount); - - if (rawSize < groupedSize) - { - dst.Add(RawInt32RunHeader); - WriteVarUInt32(dst, (uint)candidateCount); - - for (int k = 0; k < candidateCount; k++) - WriteInt32LE(dst, values[i + k]); - - i += candidateCount; - continue; - } - } - uint zz = ZigZag32(values[i]); // Fast path: single byte (MSB=0) when zigzag fits in 7 bits @@ -419,33 +26,47 @@ public static class GroupInt32Codec } int start = i; - int width = WidthFromZigZag(zz); + int width = WidthFromZigZag(zz, use4for3); int count = 1; - // 0xFF is reserved for raw Int32 blocks, so width=4 groups max out at 31 - int maxGroupCount = (width == 4) ? 31 : 32; - - while (count < maxGroupCount && (i + count) < values.Count) + // Build a run of same-width non-literal values + while ((i + count) < values.Count) { uint z2 = ZigZag32(values[i + count]); - // keep literals separate + // Do not absorb literal-fast-path values into groups if (z2 <= 0x7Fu) break; - int w2 = WidthFromZigZag(z2); + int w2 = WidthFromZigZag(z2, use4for3); if (w2 != width) break; count++; } - // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] - byte header = 0x80; - header |= (byte)(((count - 1) & 0x1F) << 2); - header |= (byte)((width - 1) & 0x03); - dst.Add(header); + if (count <= 31) + { + // Short group: + // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] + byte header = 0x80; + header |= (byte)(((count - 1) & 0x1F) << 2); + header |= (byte)((width - 1) & 0x03); + dst.Add(header); + } + else + { + // Extended group: + // Header: 1 | 11111 | (width-1)[2 bits] + // Followed by varint(count - 32) + byte header = 0x80; + header |= 0x7C; // count bits = 11111 + header |= (byte)((width - 1) & 0x03); + dst.Add(header); + WriteVarUInt32(dst, (uint)(count - 32)); + } + // Payload: 'count' zigzag values, LE, 'width' bytes each for (int k = 0; k < count; k++) WriteLE(dst, ZigZag32(values[start + k]), width); @@ -473,87 +94,32 @@ public static class GroupInt32Codec continue; } - // Raw fixed-width Int32 run - if (h == RawInt32RunHeader) + int countField = (h >> 2) & 0x1F; + int width = (h & 0x03) + 1; + + int count; + if (countField == 31) { - uint countU = ReadVarUInt32(src, ref pos); - int count = checked((int)countU); - - for (int j = 0; j < count; j++) - result.Add(ReadInt32LE(src, ref pos)); - - continue; + // Extended group length + uint extra = ReadVarUInt32(src, ref pos); + count = checked(32 + (int)extra); + } + else + { + count = countField + 1; } - int countNormal = ((h >> 2) & 0x1F) + 1; // 1..32 - int width = (h & 0x03) + 1; // 1..4 - - for (int j = 0; j < countNormal; j++) + for (int j = 0; j < count; j++) { uint raw = (uint)ReadLE(src, ref pos, width); - result.Add(UnZigZag32(raw)); + int val = UnZigZag32(raw); + result.Add(val); } } return result.ToArray(); } - // ----------------- Size Estimation ----------------- - - private static int EstimateGroupedSize(IList values, int start, int count) - { - int size = 0; - int i = start; - int end = start + count; - - while (i < end) - { - uint zz = ZigZag32(values[i]); - - if (zz <= 0x7Fu) - { - size += 1; - i++; - continue; - } - - int width = WidthFromZigZag(zz); - int groupCount = 1; - int maxGroupCount = (width == 4) ? 31 : 32; - - while (groupCount < maxGroupCount && (i + groupCount) < end) - { - uint z2 = ZigZag32(values[i + groupCount]); - - if (z2 <= 0x7Fu) - break; - - int w2 = WidthFromZigZag(z2); - if (w2 != width) - break; - - groupCount++; - } - - size += 1 + groupCount * width; - i += groupCount; - } - - return size; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int VarUInt32Size(uint value) - { - int size = 1; - while (value >= 0x80) - { - value >>= 7; - size++; - } - return size; - } - // ----------------- Helpers ----------------- [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -563,11 +129,11 @@ public static class GroupInt32Codec private static int UnZigZag32(uint u) => (int)((u >> 1) ^ (uint)-(int)(u & 1)); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int WidthFromZigZag(uint z) + private static int WidthFromZigZag(uint z, bool aligned = false) { if (z <= 0xFFu) return 1; if (z <= 0xFFFFu) return 2; - if (z <= 0xFFFFFFu) return 3; + if (z <= 0xFFFFFFu) return aligned ? 4 : 3; return 4; } @@ -590,32 +156,6 @@ public static class GroupInt32Codec return v; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void WriteInt32LE(List dst, int value) - { - uint u = unchecked((uint)value); - dst.Add((byte)(u & 0xFF)); - dst.Add((byte)((u >> 8) & 0xFF)); - dst.Add((byte)((u >> 16) & 0xFF)); - dst.Add((byte)((u >> 24) & 0xFF)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int ReadInt32LE(ReadOnlySpan src, ref int pos) - { - if ((uint)(pos + 4) > (uint)src.Length) - throw new ArgumentException("Buffer underflow while reading raw Int32 payload."); - - uint u = - (uint)src[pos] - | ((uint)src[pos + 1] << 8) - | ((uint)src[pos + 2] << 16) - | ((uint)src[pos + 3] << 24); - - pos += 4; - return unchecked((int)u); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void WriteVarUInt32(List dst, uint value) { diff --git a/Libraries/Esiur/Data/Gvwie/GroupInt64Codec.cs b/Libraries/Esiur/Data/Gvwie/GroupInt64Codec.cs index 47fa19a..2a595bd 100644 --- a/Libraries/Esiur/Data/Gvwie/GroupInt64Codec.cs +++ b/Libraries/Esiur/Data/Gvwie/GroupInt64Codec.cs @@ -1,8 +1,140 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +//using System; +//using System.Collections.Generic; +//using System.Linq; +//using System.Text; +//using System.Threading.Tasks; +//using System; +//using System.Collections.Generic; +//using System.Runtime.CompilerServices; + +//namespace Esiur.Data.Gvwie; + +//public static class GroupInt64Codec +//{ +// // ----------------- Encoder ----------------- +// public static byte[] Encode(IList values) +// { +// var dst = new List(values.Count * 2); +// int i = 0; + +// while (i < values.Count) +// { +// ulong zz = ZigZag64(values[i]); + +// // Fast path: 1 byte when ZigZag fits in 7 bits +// if (zz <= 0x7Ful) +// { +// dst.Add((byte)zz); // MSB = 0 implicitly +// i++; +// continue; +// } + +// // Group path: up to 16 items sharing a common width (1..8 bytes) +// int start = i; +// int count = 1; +// int width = WidthFromZigZag(zz); + +// while (count < 16 && (i + count) < values.Count) +// { +// ulong z2 = ZigZag64(values[i + count]); +// int w2 = WidthFromZigZag(z2); +// width = Math.Max(width, w2); // widen as needed +// count++; +// } + +// // Header: 1 | (count-1)[4 bits] | (width-1)[3 bits] +// byte header = 0x80; +// header |= (byte)(((count - 1) & 0x0F) << 3); +// header |= (byte)((width - 1) & 0x07); +// dst.Add(header); + +// // Payload: 'count' ZigZag values, LE, 'width' bytes each +// for (int k = 0; k < count; k++) +// { +// ulong z = ZigZag64(values[start + k]); +// WriteLE(dst, z, width); +// } + +// i += count; +// } + +// return dst.ToArray(); +// } + +// // ----------------- Decoder ----------------- +// public static long[] Decode(ReadOnlySpan src) +// { +// var result = new List(); +// int pos = 0; + +// while (pos < src.Length) +// { +// byte h = src[pos++]; + +// if ((h & 0x80) == 0) +// { +// // Fast path: 7-bit ZigZag +// ulong zz7 = (ulong)(h & 0x7F); +// result.Add(UnZigZag64(zz7)); +// continue; +// } + +// int count = ((h >> 3) & 0x0F) + 1; // 1..16 +// int width = (h & 0x07) + 1; // 1..8 + +// for (int j = 0; j < count; j++) +// { +// ulong raw = ReadLE(src, ref pos, width); +// long val = UnZigZag64(raw); +// result.Add(val); +// } +// } + +// return result.ToArray(); +// } + +// // ----------------- Helpers ----------------- + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static ulong ZigZag64(long v) => (ulong)((v << 1) ^ (v >> 63)); + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static long UnZigZag64(ulong u) => (long)((u >> 1) ^ (ulong)-(long)(u & 1)); + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static int WidthFromZigZag(ulong z) +// { +// if (z <= 0xFFUL) return 1; +// if (z <= 0xFFFFUL) return 2; +// if (z <= 0xFFFFFFUL) return 3; +// if (z <= 0xFFFFFFFFUL) return 4; +// if (z <= 0xFFFFFFFFFFUL) return 5; +// if (z <= 0xFFFFFFFFFFFFUL) return 6; +// if (z <= 0xFFFFFFFFFFFFFFUL) return 7; +// return 8; +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static void WriteLE(List dst, ulong value, int width) +// { +// for (int i = 0; i < width; i++) +// dst.Add((byte)((value >> (8 * i)) & 0xFF)); +// } + +// [MethodImpl(MethodImplOptions.AggressiveInlining)] +// private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) +// { +// if ((uint)(pos + width) > (uint)src.Length) +// throw new ArgumentException("Buffer underflow while reading group payload."); + +// ulong v = 0; +// for (int i = 0; i < width; i++) +// v |= (ulong)src[pos++] << (8 * i); +// return v; +// } +//} + + using System; using System.Collections.Generic; using System.Runtime.CompilerServices; @@ -11,8 +143,18 @@ namespace Esiur.Data.Gvwie; public static class GroupInt64Codec { + // Header layout for grouped values: + // 1 | cccc | www + // + // MSB = 1 => grouped form + // cccc = 0..14 => short count = cccc + 1 (1..15) + // cccc = 15 => extended count, followed by varint(count - 16) + // www = 0..7 => width = www + 1 (1..8) + // + // MSB = 0 => literal fast path for ZigZag values in 7 bits + // ----------------- Encoder ----------------- - public static byte[] Encode(IList values) + public static byte[] Encode(IList values, bool aligned = false) { var dst = new List(values.Count * 2); int i = 0; @@ -21,39 +163,58 @@ public static class GroupInt64Codec { ulong zz = ZigZag64(values[i]); - // Fast path: 1 byte when ZigZag fits in 7 bits + // Fast path: single byte (MSB=0) when zigzag fits in 7 bits if (zz <= 0x7Ful) { - dst.Add((byte)zz); // MSB = 0 implicitly + dst.Add((byte)zz); i++; continue; } - // Group path: up to 16 items sharing a common width (1..8 bytes) int start = i; + int width = WidthFromZigZag(zz, aligned); int count = 1; - int width = WidthFromZigZag(zz); - while (count < 16 && (i + count) < values.Count) + // Build a run of same-width non-literal values + while ((i + count) < values.Count) { ulong z2 = ZigZag64(values[i + count]); - int w2 = WidthFromZigZag(z2); - width = Math.Max(width, w2); // widen as needed + + // Do not absorb literal-fast-path values into groups + if (z2 <= 0x7Ful) + break; + + int w2 = WidthFromZigZag(z2, aligned); + if (w2 != width) + break; + count++; } - // Header: 1 | (count-1)[4 bits] | (width-1)[3 bits] - byte header = 0x80; - header |= (byte)(((count - 1) & 0x0F) << 3); - header |= (byte)((width - 1) & 0x07); - dst.Add(header); - - // Payload: 'count' ZigZag values, LE, 'width' bytes each - for (int k = 0; k < count; k++) + if (count <= 15) { - ulong z = ZigZag64(values[start + k]); - WriteLE(dst, z, width); + // Short group: + // Header: 1 | (count-1)[4 bits] | (width-1)[3 bits] + byte header = 0x80; + header |= (byte)(((count - 1) & 0x0F) << 3); + header |= (byte)((width - 1) & 0x07); + dst.Add(header); } + else + { + // Extended group: + // Header: 1 | 1111 | (width-1)[3 bits] + // Followed by varint(count - 16) + byte header = 0x80; + header |= 0x78; // count bits = 1111 + header |= (byte)((width - 1) & 0x07); + dst.Add(header); + WriteVarUInt32(dst, checked((uint)(count - 16))); + } + + // Payload: 'count' zigzag values, LE, 'width' bytes each + for (int k = 0; k < count; k++) + WriteLE(dst, ZigZag64(values[start + k]), width); i += count; } @@ -73,14 +234,26 @@ public static class GroupInt64Codec if ((h & 0x80) == 0) { - // Fast path: 7-bit ZigZag + // Fast path: 7-bit ZigZag in low bits ulong zz7 = (ulong)(h & 0x7F); result.Add(UnZigZag64(zz7)); continue; } - int count = ((h >> 3) & 0x0F) + 1; // 1..16 - int width = (h & 0x07) + 1; // 1..8 + int countField = (h >> 3) & 0x0F; + int width = (h & 0x07) + 1; + + int count; + if (countField == 15) + { + // Extended group length + uint extra = ReadVarUInt32(src, ref pos); + count = checked(16 + (int)extra); + } + else + { + count = countField + 1; + } for (int j = 0; j < count; j++) { @@ -102,16 +275,17 @@ public static class GroupInt64Codec private static long UnZigZag64(ulong u) => (long)((u >> 1) ^ (ulong)-(long)(u & 1)); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int WidthFromZigZag(ulong z) + private static int WidthFromZigZag(ulong z, bool aligned = false) { - if (z <= 0xFFUL) return 1; - if (z <= 0xFFFFUL) return 2; - if (z <= 0xFFFFFFUL) return 3; - if (z <= 0xFFFFFFFFUL) return 4; - if (z <= 0xFFFFFFFFFFUL) return 5; - if (z <= 0xFFFFFFFFFFFFUL) return 6; - if (z <= 0xFFFFFFFFFFFFFFUL) return 7; + if (z <= 0xFFul) return 1; + if (z <= 0xFFFFul) return 2; + if (z <= 0xFFFFFFul) return aligned ? 4 : 3; + if (z <= 0xFFFFFFFFul) return 4; + if (z <= 0xFFFFFFFFFFul) return aligned ? 8 : 5; + if (z <= 0xFFFFFFFFFFFFul) return aligned ? 8 : 6; + if (z <= 0xFFFFFFFFFFFFFFul) return aligned ? 8 : 7; return 8; + } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -132,4 +306,39 @@ public static class GroupInt64Codec v |= (ulong)src[pos++] << (8 * i); return v; } -} + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void WriteVarUInt32(List dst, uint value) + { + while (value >= 0x80) + { + dst.Add((byte)((value & 0x7F) | 0x80)); + value >>= 7; + } + + dst.Add((byte)value); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) + { + uint result = 0; + int shift = 0; + + while (true) + { + if (pos >= src.Length) + throw new ArgumentException("Buffer underflow while reading varint."); + + byte b = src[pos++]; + result |= (uint)(b & 0x7F) << shift; + + if ((b & 0x80) == 0) + return result; + + shift += 7; + if (shift >= 35) + throw new ArgumentException("Varint is too long for UInt32."); + } + } +} \ No newline at end of file diff --git a/Libraries/Esiur/Data/Gvwie/GroupUInt16Codec.cs b/Libraries/Esiur/Data/Gvwie/GroupUInt16Codec.cs index f74d1e4..25de2d1 100644 --- a/Libraries/Esiur/Data/Gvwie/GroupUInt16Codec.cs +++ b/Libraries/Esiur/Data/Gvwie/GroupUInt16Codec.cs @@ -1,20 +1,25 @@ using System; using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; using System.Runtime.CompilerServices; namespace Esiur.Data.Gvwie; - public static class GroupUInt16Codec { + // Header layout: + // 1 | cccccc | w + // + // MSB = 1 => grouped form + // cccccc = 0..62 => short count = cccccc + 1 (1..63) + // cccccc = 63 => extended count, followed by varint(count - 64) + // w = 0 => width = 1 byte + // w = 1 => width = 2 bytes + // + // MSB = 0 => literal fast path for values in 7 bits + // ----------------- Encoder ----------------- public static byte[] Encode(IList values) { - if (values is null) throw new ArgumentNullException(nameof(values)); - var dst = new List(values.Count * 2); int i = 0; @@ -22,38 +27,58 @@ public static class GroupUInt16Codec { ushort v = values[i]; - // Fast path: single byte for 0..127 + // Fast path: single byte (MSB=0) when value fits in 7 bits if (v <= 0x7F) { - dst.Add((byte)v); // MSB=0 implicitly + dst.Add((byte)v); i++; continue; } - // Group path: up to 16 items sharing a common width (1..2 bytes for uint16) int start = i; + int width = WidthFromValue(v); // 1 or 2 int count = 1; - int width = WidthFromUnsigned(v); - while (count < 16 && (i + count) < values.Count) + // Build a run of same-width non-literal values + while ((i + count) < values.Count) { ushort v2 = values[i + count]; - int w2 = WidthFromUnsigned(v2); - if (w2 > width) width = w2; // widen group if needed + + // Do not absorb literal-fast-path values into groups + if (v2 <= 0x7F) + break; + + int w2 = WidthFromValue(v2); + if (w2 != width) + break; + count++; } - // Header: 1 | (count-1)[4b] | (width-1)[3b] - byte header = 0x80; - header |= (byte)(((count - 1) & 0xF) << 3); - header |= (byte)((width - 1) & 0x7); - dst.Add(header); - - // Payload - for (int k = 0; k < count; k++) + if (count <= 63) { - WriteLE(dst, values[start + k], width); + // Short group: + // Header: 1 | (count-1)[6 bits] | (width-1)[1 bit] + byte header = 0x80; + header |= (byte)(((count - 1) & 0x3F) << 1); + header |= (byte)((width - 1) & 0x01); + dst.Add(header); } + else + { + // Extended group: + // Header: 1 | 111111 | (width-1)[1 bit] + // Followed by varint(count - 64) + byte header = 0x80; + header |= 0x7E; // count bits = 111111 + header |= (byte)((width - 1) & 0x01); + dst.Add(header); + WriteVarUInt32(dst, (uint)(count - 64)); + } + + // Payload: 'count' values, LE, 'width' bytes each + for (int k = 0; k < count; k++) + WriteLE(dst, values[start + k], width); i += count; } @@ -73,23 +98,29 @@ public static class GroupUInt16Codec if ((h & 0x80) == 0) { - // Fast path byte (0..127) - result.Add(h); + // Fast path: literal 7-bit unsigned value + result.Add((ushort)(h & 0x7F)); continue; } - int count = ((h >> 3) & 0xF) + 1; // 1..16 - int width = (h & 0x7) + 1; // 1..8 (expect 1..2) + int countField = (h >> 1) & 0x3F; + int width = (h & 0x01) + 1; // 1 or 2 - if (width > 2) - throw new NotSupportedException($"Width {width} bytes exceeds uint16 capacity."); + int count; + if (countField == 63) + { + uint extra = ReadVarUInt32(src, ref pos); + count = checked(64 + (int)extra); + } + else + { + count = countField + 1; + } for (int j = 0; j < count; j++) { - uint val = (uint)ReadLE(src, ref pos, width); - if (val > 0xFFFFu) - throw new OverflowException("Decoded value exceeds UInt16 range."); - result.Add((ushort)val); + ushort raw = (ushort)ReadLE(src, ref pos, width); + result.Add(raw); } } @@ -97,25 +128,64 @@ public static class GroupUInt16Codec } // ----------------- Helpers ----------------- + [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int WidthFromUnsigned(ushort v) => (v <= 0xFF) ? 1 : 2; + private static int WidthFromValue(ushort v) + { + return v <= 0xFF ? 1 : 2; + } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void WriteLE(List dst, ushort value, int width) { - // width is 1 or 2 - dst.Add((byte)(value & 0xFF)); - if (width == 2) dst.Add((byte)(value >> 8)); + for (int i = 0; i < width; i++) + dst.Add((byte)((value >> (8 * i)) & 0xFF)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) + private static uint ReadLE(ReadOnlySpan src, ref int pos, int width) { - if (pos + width > src.Length) - throw new ArgumentException("Buffer underflow while reading payload."); + if ((uint)(pos + width) > (uint)src.Length) + throw new ArgumentException("Buffer underflow while reading group payload."); - ulong v = src[pos++]; // first byte (LSB) - if (width == 2) v |= (ulong)src[pos++] << 8; + uint v = 0; + for (int i = 0; i < width; i++) + v |= (uint)src[pos++] << (8 * i); return v; } -} + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void WriteVarUInt32(List dst, uint value) + { + while (value >= 0x80) + { + dst.Add((byte)((value & 0x7F) | 0x80)); + value >>= 7; + } + + dst.Add((byte)value); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) + { + uint result = 0; + int shift = 0; + + while (true) + { + if (pos >= src.Length) + throw new ArgumentException("Buffer underflow while reading varint."); + + byte b = src[pos++]; + result |= (uint)(b & 0x7F) << shift; + + if ((b & 0x80) == 0) + return result; + + shift += 7; + if (shift >= 35) + throw new ArgumentException("Varint is too long for UInt32."); + } + } +} \ No newline at end of file diff --git a/Libraries/Esiur/Data/Gvwie/GroupUInt32Codec.cs b/Libraries/Esiur/Data/Gvwie/GroupUInt32Codec.cs index 5aa4099..292dd56 100644 --- a/Libraries/Esiur/Data/Gvwie/GroupUInt32Codec.cs +++ b/Libraries/Esiur/Data/Gvwie/GroupUInt32Codec.cs @@ -1,19 +1,24 @@ using System; using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; using System.Runtime.CompilerServices; namespace Esiur.Data.Gvwie; public static class GroupUInt32Codec { - // ----------------- Encoder ----------------- - public static byte[] Encode(IList values) - { - if (values is null) throw new ArgumentNullException(nameof(values)); + // Header layout: + // 1 | ccccc | ww + // + // MSB = 1 => grouped form + // ccccc = 0..30 => short count = ccccc + 1 (1..31) + // ccccc = 31 => extended count, followed by varint(count - 32) + // ww = 0..3 => width = ww + 1 (1..4) + // + // MSB = 0 => literal fast path for values in 7 bits + // ----------------- Encoder ----------------- + public static byte[] Encode(IList values, bool aligned = false) + { var dst = new List(values.Count * 2); int i = 0; @@ -21,38 +26,58 @@ public static class GroupUInt32Codec { uint v = values[i]; - // Fast path: single byte for 0..127 + // Fast path: single byte (MSB=0) when value fits in 7 bits if (v <= 0x7Fu) { - dst.Add((byte)v); // MSB=0 implicitly + dst.Add((byte)v); i++; continue; } - // Group path: up to 16 items sharing a common width (1..4 bytes for uint32) int start = i; + int width = WidthFromValue(v, aligned); int count = 1; - int width = WidthFromUnsigned(v); - while (count < 16 && (i + count) < values.Count) + // Build a run of same-width non-literal values + while ((i + count) < values.Count) { uint v2 = values[i + count]; - int w2 = WidthFromUnsigned(v2); - if (w2 > width) width = w2; + + // Do not absorb literal-fast-path values into groups + if (v2 <= 0x7Fu) + break; + + int w2 = WidthFromValue(v2, aligned); + if (w2 != width) + break; + count++; } - // Header: 1 | (count-1)[4b] | (width-1)[3b] - byte header = 0x80; - header |= (byte)(((count - 1) & 0xF) << 3); - header |= (byte)((width - 1) & 0x7); - dst.Add(header); - - // Payload - for (int k = 0; k < count; k++) + if (count <= 31) { - WriteLE(dst, values[start + k], width); + // Short group: + // Header: 1 | (count-1)[5 bits] | (width-1)[2 bits] + byte header = 0x80; + header |= (byte)(((count - 1) & 0x1F) << 2); + header |= (byte)((width - 1) & 0x03); + dst.Add(header); } + else + { + // Extended group: + // Header: 1 | 11111 | (width-1)[2 bits] + // Followed by varint(count - 32) + byte header = 0x80; + header |= 0x7C; // count bits = 11111 + header |= (byte)((width - 1) & 0x03); + dst.Add(header); + WriteVarUInt32(dst, (uint)(count - 32)); + } + + // Payload: 'count' values, LE, 'width' bytes each + for (int k = 0; k < count; k++) + WriteLE(dst, values[start + k], width); i += count; } @@ -72,21 +97,29 @@ public static class GroupUInt32Codec if ((h & 0x80) == 0) { - // Fast path byte (0..127) - result.Add(h); + // Fast path: literal 7-bit unsigned value + result.Add((uint)(h & 0x7F)); continue; } - int count = ((h >> 3) & 0xF) + 1; // 1..16 - int width = (h & 0x7) + 1; // 1..8 (we expect 1..4) + int countField = (h >> 2) & 0x1F; + int width = (h & 0x03) + 1; // 1..4 - if (width > 4) - throw new NotSupportedException($"Width {width} bytes exceeds uint32 capacity."); + int count; + if (countField == 31) + { + uint extra = ReadVarUInt32(src, ref pos); + count = checked(32 + (int)extra); + } + else + { + count = countField + 1; + } for (int j = 0; j < count; j++) { - uint val = (uint)ReadLE(src, ref pos, width); - result.Add(val); + uint raw = (uint)ReadLE(src, ref pos, width); + result.Add(raw); } } @@ -94,12 +127,13 @@ public static class GroupUInt32Codec } // ----------------- Helpers ----------------- + [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int WidthFromUnsigned(uint v) + private static int WidthFromValue(uint v, bool aligned = false) { if (v <= 0xFFu) return 1; if (v <= 0xFFFFu) return 2; - if (v <= 0xFFFFFFu) return 3; + if (v <= 0xFFFFFFu) return aligned ? 4 : 3; return 4; } @@ -113,13 +147,47 @@ public static class GroupUInt32Codec [MethodImpl(MethodImplOptions.AggressiveInlining)] private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) { - if (pos + width > src.Length) - throw new ArgumentException("Buffer underflow while reading payload."); + if ((uint)(pos + width) > (uint)src.Length) + throw new ArgumentException("Buffer underflow while reading group payload."); ulong v = 0; for (int i = 0; i < width; i++) v |= (ulong)src[pos++] << (8 * i); - return v; } -} + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void WriteVarUInt32(List dst, uint value) + { + while (value >= 0x80) + { + dst.Add((byte)((value & 0x7F) | 0x80)); + value >>= 7; + } + + dst.Add((byte)value); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) + { + uint result = 0; + int shift = 0; + + while (true) + { + if (pos >= src.Length) + throw new ArgumentException("Buffer underflow while reading varint."); + + byte b = src[pos++]; + result |= (uint)(b & 0x7F) << shift; + + if ((b & 0x80) == 0) + return result; + + shift += 7; + if (shift >= 35) + throw new ArgumentException("Varint is too long for UInt32."); + } + } +} \ No newline at end of file diff --git a/Libraries/Esiur/Data/Gvwie/GroupUInt64Codec.cs b/Libraries/Esiur/Data/Gvwie/GroupUInt64Codec.cs index b246e96..9196965 100644 --- a/Libraries/Esiur/Data/Gvwie/GroupUInt64Codec.cs +++ b/Libraries/Esiur/Data/Gvwie/GroupUInt64Codec.cs @@ -1,25 +1,24 @@ using System; using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using System; -using System.Collections.Generic; using System.Runtime.CompilerServices; namespace Esiur.Data.Gvwie; -using System; -using System.Collections.Generic; -using System.Runtime.CompilerServices; - public static class GroupUInt64Codec { - // ----------------- Encoder ----------------- - public static byte[] Encode(IList values) - { - if (values is null) throw new ArgumentNullException(nameof(values)); + // Header layout: + // 1 | cccc | www + // + // MSB = 1 => grouped form + // cccc = 0..14 => short count = cccc + 1 (1..15) + // cccc = 15 => extended count, followed by varint(count - 16) + // www = 0..7 => width = www + 1 (1..8) + // + // MSB = 0 => literal fast path for values in 7 bits + // ----------------- Encoder ----------------- + public static byte[] Encode(IList values, bool aligned = false) + { var dst = new List(values.Count * 2); int i = 0; @@ -27,34 +26,56 @@ public static class GroupUInt64Codec { ulong v = values[i]; - // Fast path: single byte for 0..127 - if (v <= 0x7FUL) + // Fast path: single byte (MSB=0) when value fits in 7 bits + if (v <= 0x7Ful) { - dst.Add((byte)v); // MSB = 0 implicitly + dst.Add((byte)v); i++; continue; } - // Group path: up to 16 items sharing max width (1..8 bytes) int start = i; + int width = WidthFromValue(v, aligned); int count = 1; - int width = WidthFromUnsigned(v); - while (count < 16 && (i + count) < values.Count) + // Build a run of same-width non-literal values + while ((i + count) < values.Count) { ulong v2 = values[i + count]; - int w2 = WidthFromUnsigned(v2); - if (w2 > width) width = w2; + + // Do not absorb literal-fast-path values into groups + if (v2 <= 0x7Ful) + break; + + int w2 = WidthFromValue(v2, aligned); + if (w2 != width) + break; + count++; } - // Header: 1 | (count-1)[4b] | (width-1)[3b] - byte header = 0x80; - header |= (byte)(((count - 1) & 0xF) << 3); - header |= (byte)((width - 1) & 0x7); - dst.Add(header); + if (count <= 15) + { + // Short group: + // Header: 1 | (count-1)[4 bits] | (width-1)[3 bits] + byte header = 0x80; + header |= (byte)(((count - 1) & 0x0F) << 3); + header |= (byte)((width - 1) & 0x07); + dst.Add(header); + } + else + { + // Extended group: + // Header: 1 | 1111 | (width-1)[3 bits] + // Followed by varint(count - 16) + byte header = 0x80; + header |= 0x78; // count bits = 1111 + header |= (byte)((width - 1) & 0x07); + dst.Add(header); + WriteVarUInt32(dst, checked((uint)(count - 16))); + } - // Payload + // Payload: 'count' values, LE, 'width' bytes each for (int k = 0; k < count; k++) WriteLE(dst, values[start + k], width); @@ -76,21 +97,29 @@ public static class GroupUInt64Codec if ((h & 0x80) == 0) { - // Fast path byte (0..127) - result.Add(h); + // Fast path: literal 7-bit unsigned value + result.Add((ulong)(h & 0x7F)); continue; } - int count = ((h >> 3) & 0xF) + 1; // 1..16 - int width = (h & 0x7) + 1; // 1..8 + int countField = (h >> 3) & 0x0F; + int width = (h & 0x07) + 1; // 1..8 - if (width < 1 || width > 8) - throw new NotSupportedException($"Invalid width {width} in header."); + int count; + if (countField == 15) + { + uint extra = ReadVarUInt32(src, ref pos); + count = checked(16 + (int)extra); + } + else + { + count = countField + 1; + } for (int j = 0; j < count; j++) { - ulong val = ReadLE(src, ref pos, width); - result.Add(val); + ulong raw = ReadLE(src, ref pos, width); + result.Add(raw); } } @@ -98,16 +127,17 @@ public static class GroupUInt64Codec } // ----------------- Helpers ----------------- + [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int WidthFromUnsigned(ulong v) + private static int WidthFromValue(ulong v, bool aligned = false) { - if (v <= 0xFFUL) return 1; - if (v <= 0xFFFFUL) return 2; - if (v <= 0xFFFFFFUL) return 3; - if (v <= 0xFFFFFFFFUL) return 4; - if (v <= 0xFFFFFFFFFFUL) return 5; - if (v <= 0xFFFFFFFFFFFFUL) return 6; - if (v <= 0xFFFFFFFFFFFFFFUL) return 7; + if (v <= 0xFFul) return 1; + if (v <= 0xFFFFul) return 2; + if (v <= 0xFFFFFFul) return aligned ? 4 : 3; + if (v <= 0xFFFFFFFFul) return 4; + if (v <= 0xFFFFFFFFFFul) return aligned ? 8: 5; + if (v <= 0xFFFFFFFFFFFFul) return aligned ? 8: 6; + if (v <= 0xFFFFFFFFFFFFFFul) return aligned ? 8 : 7; return 8; } @@ -121,13 +151,47 @@ public static class GroupUInt64Codec [MethodImpl(MethodImplOptions.AggressiveInlining)] private static ulong ReadLE(ReadOnlySpan src, ref int pos, int width) { - if (pos + width > src.Length) - throw new ArgumentException("Buffer underflow while reading payload."); + if ((uint)(pos + width) > (uint)src.Length) + throw new ArgumentException("Buffer underflow while reading group payload."); ulong v = 0; for (int i = 0; i < width; i++) v |= (ulong)src[pos++] << (8 * i); - return v; } -} + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void WriteVarUInt32(List dst, uint value) + { + while (value >= 0x80) + { + dst.Add((byte)((value & 0x7F) | 0x80)); + value >>= 7; + } + + dst.Add((byte)value); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint ReadVarUInt32(ReadOnlySpan src, ref int pos) + { + uint result = 0; + int shift = 0; + + while (true) + { + if (pos >= src.Length) + throw new ArgumentException("Buffer underflow while reading varint."); + + byte b = src[pos++]; + result |= (uint)(b & 0x7F) << shift; + + if ((b & 0x80) == 0) + return result; + + shift += 7; + if (shift >= 35) + throw new ArgumentException("Varint is too long for UInt32."); + } + } +} \ No newline at end of file diff --git a/Libraries/Esiur/Esiur.csproj b/Libraries/Esiur/Esiur.csproj index 3cff5d8..41d23b6 100644 --- a/Libraries/Esiur/Esiur.csproj +++ b/Libraries/Esiur/Esiur.csproj @@ -59,7 +59,6 @@ - @@ -74,7 +73,6 @@ - diff --git a/Tests/Serialization/Gvwie/IntArrayGenerator.cs b/Tests/Serialization/Gvwie/IntArrayGenerator.cs index 04de2a4..de04bca 100644 --- a/Tests/Serialization/Gvwie/IntArrayGenerator.cs +++ b/Tests/Serialization/Gvwie/IntArrayGenerator.cs @@ -11,7 +11,7 @@ public static class IntArrayGenerator - private static readonly Random rng = new Random(24241564); + private static Random rng = new Random(24241564); /// /// Generate an array composed of ascending runs (consecutive integers). @@ -23,6 +23,9 @@ public static class IntArrayGenerator /// - allowNegative: if false, generated values will be non-negative /// - minGap / maxGap: approximate gap between runs (large gaps produce the jump examples) /// + /// + public static void InitRng() => rng = new Random(24241564); + public static long[] GenerateRuns(int length, int minRunSize = 3, int maxRunSize = 8, diff --git a/Tests/Serialization/Gvwie/IntArrayRunner.cs b/Tests/Serialization/Gvwie/IntArrayRunner.cs index 104faa0..000330b 100644 --- a/Tests/Serialization/Gvwie/IntArrayRunner.cs +++ b/Tests/Serialization/Gvwie/IntArrayRunner.cs @@ -3,6 +3,7 @@ using FlatSharp; using FlatSharp.Attributes; using MessagePack; using MongoDB.Bson; +using Org.BouncyCastle.Asn1.X509; using PeterO.Cbor; using ProtoBuf; using SolTechnology.Avro; @@ -10,6 +11,7 @@ using System; using System.Buffers; using System.Collections.Generic; using System.Text; +using static System.Runtime.InteropServices.JavaScript.JSType; namespace Esiur.Tests.Gvwie { @@ -111,7 +113,7 @@ namespace Esiur.Tests.Gvwie // Produces a CSV with header: SampleSize;Esiur;FlatBuffer;ProtoBuffer;MessagePack;BSON;CBOR;Avro;Optimal public void RunChart() { - var sizes = Enumerable.Range(12, 21) + var sizes = Enumerable.Range(0, 21) .Select(i => (int)Math.Pow(2, i)) .ToArray(); @@ -119,20 +121,21 @@ namespace Esiur.Tests.Gvwie // Define generators to evaluate. Each entry maps a name to a function that // given a sample size returns the averages (double[]) by calling Average(...). var generators = new List<(string name, Func fn)>() - { - ("Int32_Positive", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Positive)), iterations)), - + { ("Int32_Clustering", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Clustering)), iterations)), + ("Int32_Positive", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Positive)), iterations)), ("Int32_Negative", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Negative)), iterations)), ("Int32_Small", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Small)), iterations)), ("Int32_Alternating", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Alternating)), iterations)), ("Int32_Ascending", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size, GeneratorPattern.Ascending)), iterations)), - //("Int64", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt64(size)), iterations)), - //("Int32", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size)), iterations)), - //("Int16", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt16(size)), iterations)), - //("UInt64", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateUInt64(size)), iterations)), - //("UInt32", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateUInt32(size)), iterations)), - //("UInt16", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateUInt16(size)), iterations)), + ("Int32", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt32(size)), iterations)), + ("UInt32", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateUInt32(size)), iterations)), + + ("Int16", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt16(size)), iterations)), + ("UInt16", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateUInt16(size)), iterations)), + ("Int64", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateInt64(size)), iterations)), + ("UInt64", (size, iterations) => Average(() => CompareInt(IntArrayGenerator.GenerateUInt64(size)), iterations)), + }; foreach (var gen in generators) @@ -140,13 +143,13 @@ namespace Esiur.Tests.Gvwie var sb = new System.Text.StringBuilder(); var sbr = new System.Text.StringBuilder(); - sb.AppendLine("SampleSize,Esiur,FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal"); - sbr.AppendLine("SampleSize,Esiur,FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal"); + sb.AppendLine("SampleSize,Esiur,Aligned,FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal"); + sbr.AppendLine("SampleSize,Esiur,Aligned,FlatBuffer,ProtoBuffer,MessagePack,BSON,CBOR,Avro,Optimal"); foreach (var size in sizes) { // Choose iterations depending on size to keep total runtime reasonable - int iterations = 10; + int iterations = 100; //if (size <= 100) iterations = 1000; //else if (size <= 1000) iterations = 200; //else if (size <= 10000) iterations = 50; @@ -180,11 +183,12 @@ namespace Esiur.Tests.Gvwie } } - public static (int, int, int, int, int, int, int, int) CompareInt(long[] sample) + public static (int, int, int, int, int, int, int, int, int) CompareInt(long[] sample) { var intRoot = new ArrayRoot() { Values = sample }; var esiur = GroupInt64Codec.Encode(sample); + var esiurAligned = GroupInt64Codec.Encode(sample, true); var messagePack = MessagePackSerializer.Serialize(sample); var flatBuffer = SerializeFlatBuffers(intRoot); @@ -203,18 +207,20 @@ namespace Esiur.Tests.Gvwie var optimal = OptimalSignedEnocding(sample); //Console.WriteLine($"{esiur.Length};{flatBuffer.Length};{protoBuffer.Length};{messagePack.Length};{bson.Length};{cbor.Length};{avro.Length};{optimal}"); - return (esiur.Length, flatBuffer.Length, protoBuffer.Length, messagePack.Length, bson.Length, cbor.Length, avro.Length, optimal); + return (esiur.Length, esiurAligned.Length, flatBuffer.Length, protoBuffer.Length, messagePack.Length, bson.Length, cbor.Length, avro.Length, optimal); } - public static (int, int, int, int, int, int, int, int) CompareInt(int[] sample) + public static (int, int, int, int, int, int, int, int, int) CompareInt(int[] sample) { var intRoot = new ArrayRoot() { Values = sample }; var esiur = GroupInt32Codec.Encode(sample); + var esiurAligned = GroupInt32Codec.Encode(sample, true); var messagePack = MessagePackSerializer.Serialize(sample); var flatBuffer = SerializeFlatBuffers(intRoot); + using var ms = new MemoryStream(); Serializer.Serialize(ms, sample); var protoBuffer = ms.ToArray(); @@ -229,16 +235,17 @@ namespace Esiur.Tests.Gvwie var optimal = OptimalSignedEnocding(sample.Select(x => (long)x).ToArray()); //Console.WriteLine($"{esiur.Length};{flatBuffer.Length};{protoBuffer.Length};{messagePack.Length};{bson.Length};{cbor.Length};{avro.Length};{optimal}"); - return (esiur.Length, flatBuffer.Length, protoBuffer.Length, messagePack.Length, bson.Length, cbor.Length, avro.Length, optimal); + return (esiur.Length, esiurAligned.Length, flatBuffer.Length, protoBuffer.Length, messagePack.Length, bson.Length, cbor.Length, avro.Length, optimal); } - public static (int, int, int, int, int, int, int, int) CompareInt(short[] sample) + public static (int, int, int, int, int, int, int, int, int) CompareInt(short[] sample) { var intRoot = new ArrayRoot() { Values = sample }; var esiur = GroupInt16Codec.Encode(sample); + var esiurAligned = esiur;// GroupInt16Codec.Encode(sample, true); var messagePack = MessagePackSerializer.Serialize(sample); var flatBuffer = SerializeFlatBuffers(intRoot); @@ -256,15 +263,16 @@ namespace Esiur.Tests.Gvwie var optimal = OptimalSignedEnocding(sample.Select(x => (long)x).ToArray()); //Console.WriteLine($"{esiur.Length};{flatBuffer.Length};{protoBuffer.Length};{messagePack.Length};{bson.Length};{cbor.Length};{avro.Length};{optimal}"); - return (esiur.Length, flatBuffer.Length, protoBuffer.Length, messagePack.Length, bson.Length, cbor.Length, avro.Length, optimal); + return (esiur.Length, esiurAligned.Length, flatBuffer.Length, protoBuffer.Length, messagePack.Length, bson.Length, cbor.Length, avro.Length, optimal); } - public static (int, int, int, int, int, int, int, int) CompareInt(uint[] sample) + public static (int, int, int, int, int, int, int, int, int) CompareInt(uint[] sample) { var intRoot = new ArrayRoot() { Values = sample }; var esiur = GroupUInt32Codec.Encode(sample); + var esiurAligned = GroupUInt32Codec.Encode(sample, true); var messagePack = MessagePackSerializer.Serialize(sample); var flatBuffer = SerializeFlatBuffers(intRoot); @@ -287,15 +295,16 @@ namespace Esiur.Tests.Gvwie var optimal = OptimalUnsignedEnocding(sample.Select(x => (ulong)x).ToArray()); //Console.WriteLine($"{esiur.Length};{flatBuffer.Length};{protoBuffer.Length};{messagePack.Length};{bson.Length};{cbor.Length};{avro.Length};{optimal}"); - return (esiur.Length, flatBuffer.Length, protoBuffer.Length, messagePack.Length, bson.Length, cbor.Length, avro.Length, optimal); + return (esiur.Length, esiurAligned.Length, flatBuffer.Length, protoBuffer.Length, messagePack.Length, bson.Length, cbor.Length, avro.Length, optimal); } - public static (int, int, int, int, int, int, int, int) CompareInt(ulong[] sample) + public static (int, int, int, int, int, int, int, int, int) CompareInt(ulong[] sample) { var intRoot = new ArrayRoot() { Values = sample }; var esiur = GroupUInt64Codec.Encode(sample); + var esiurPadded = GroupUInt64Codec.Encode(sample, true); var messagePack = MessagePackSerializer.Serialize(sample); var flatBuffer = SerializeFlatBuffers(intRoot); @@ -315,14 +324,15 @@ namespace Esiur.Tests.Gvwie var optimal = OptimalUnsignedEnocding(sample); //Console.WriteLine($"{esiur.Length};{flatBuffer.Length};{protoBuffer.Length};{messagePack.Length};{bson.Length};{cbor.Length};{avro.Length};{optimal}"); - return (esiur.Length, flatBuffer.Length, protoBuffer.Length, messagePack.Length, bson.Length, cbor.Length, avro.Length, optimal); + return (esiur.Length, esiurPadded.Length, flatBuffer.Length, protoBuffer.Length, messagePack.Length, bson.Length, cbor.Length, avro.Length, optimal); } - public static (int, int, int, int, int, int, int, int) CompareInt(ushort[] sample) + public static (int, int, int, int, int, int, int, int, int) CompareInt(ushort[] sample) { var intRoot = new ArrayRoot() { Values = sample }; var esiur = GroupUInt16Codec.Encode(sample); + var esiurAligned = esiur;// GroupUInt16Codec.Encode(sample, true); var messagePack = MessagePackSerializer.Serialize(sample); var flatBuffer = SerializeFlatBuffers(intRoot); @@ -340,7 +350,7 @@ namespace Esiur.Tests.Gvwie var optimal = OptimalUnsignedEnocding(sample.Select(x => (ulong)x).ToArray()); //Console.WriteLine($"{esiur.Length};{flatBuffer.Length};{protoBuffer.Length};{messagePack.Length};{bson.Length};{cbor.Length};{avro.Length};{optimal}"); - return (esiur.Length, flatBuffer.Length, protoBuffer.Length, messagePack.Length, bson.Length, cbor.Length, avro.Length, optimal); + return (esiur.Length, esiurAligned.Length, flatBuffer.Length, protoBuffer.Length, messagePack.Length, bson.Length, cbor.Length, avro.Length, optimal); } @@ -393,25 +403,27 @@ namespace Esiur.Tests.Gvwie } - static double[] Average(Func<(int, int, int, int, int, int, int, int)> call, int count) + static double[] Average(Func<(int, int, int, int, int, int, int, int, int)> call, int count) { - var sum = new List<(int, int, int, int, int, int, int, int)>(); + var sum = new List<(int, int, int, int, int, int, int, int, int)>(); for (var i = 0; i < count; i++) sum.Add(call()); - var rt = new double[]{ sum.Average(x => x.Item1), + var rt = new double[]{ + sum.Average(x => x.Item1), sum.Average(x => x.Item2), sum.Average(x => x.Item3), sum.Average(x => x.Item4), sum.Average(x => x.Item5), sum.Average(x => x.Item6), sum.Average(x => x.Item7), - sum.Average(x => x.Item8) + sum.Average(x => x.Item8), + sum.Average(x => x.Item9) }; - Console.WriteLine($"{rt[0]};{rt[1]};{rt[2]};{rt[3]};{rt[4]};{rt[5]};{rt[6]};{rt[7]}"); + Console.WriteLine($"{rt[0]};{rt[1]};{rt[2]};{rt[3]};{rt[4]};{rt[5]};{rt[6]};{rt[7]};{rt[8]}"); return rt; @@ -420,23 +432,23 @@ namespace Esiur.Tests.Gvwie static string PrintAverage(double[] values) { // Determine winner (lowest average size) - var names = new string[] { "Esiur", "FlatBuffer", "ProtoBuffer", "MessagePack", "BSON", "CBOR", "Avro", "Optimal" }; + var names = new string[] { "Esiur", "Aligned", "FlatBuffer", "ProtoBuffer", "MessagePack", "BSON", "CBOR", "Avro", "Optimal" }; var min = values.SkipLast(1).Min(); - var idx = Array.IndexOf(values, min); - if (idx >= 0 && idx < names.Length) + + int[] indexes = values.Select((value, index) => new { value, index }) + .Where(x => x.value == min) + .Select(x => x.index) + .ToArray(); + + foreach(var index in indexes) { - if (idx == 0) - - Console.ForegroundColor = ConsoleColor.Green; - else - Console.ForegroundColor = ConsoleColor.Red; - - Console.WriteLine($"Winner: {names[idx]} ({min:F0})"); - Console.ForegroundColor = ConsoleColor.White; - - return names[idx]; + Console.ForegroundColor = index < 2 ? ConsoleColor.Green + : ConsoleColor.Red; + Console.WriteLine($"Winner: {names[index]} ({min:F0})"); } + Console.ForegroundColor = ConsoleColor.White; + return "Unknown"; } @@ -447,5 +459,10 @@ namespace Esiur.Tests.Gvwie return buffer.Take(len).ToArray(); } + public static T[] DeserializeFlatBuffers(byte[] buffer) + { + var root = FlatBufferSerializer.Default.Parse>( buffer); + return root.Values.ToArray(); + } } } diff --git a/Tests/Serialization/Gvwie/Program.cs b/Tests/Serialization/Gvwie/Program.cs index 89f8b89..7f80951 100644 --- a/Tests/Serialization/Gvwie/Program.cs +++ b/Tests/Serialization/Gvwie/Program.cs @@ -10,6 +10,8 @@ MessagePack.MessagePackSerializer.DefaultOptions = MessagePackSerializerOptions. var ints = new IntArrayRunner(); -//ints.Run(); +IntArrayGenerator.InitRng(); +ints.Run(); +IntArrayGenerator.InitRng(); ints.RunChart();