From 9a74a01bdfb6880ef234284f984ad406c5949300 Mon Sep 17 00:00:00 2001 From: Ahmed Zamil Date: Mon, 27 Mar 2023 15:12:40 +0300 Subject: [PATCH] Huffman base2/3 --- Esiur.Analysis.Test/Program.cs | 7 +- Esiur.Analysis/Coding/Arithmetic.cs | 12 ++ Esiur.Analysis/Coding/CodeSet.cs | 53 +++++++ Esiur.Analysis/Coding/Codec.cs | 15 -- Esiur.Analysis/Coding/Functions.cs | 19 +++ Esiur.Analysis/Coding/Huffman.cs | 190 +++++++++++++++----------- Esiur.Analysis/Coding/IStreamCodec.cs | 18 +++ Esiur.Analysis/Coding/Symbol.cs | 50 +++++++ 8 files changed, 268 insertions(+), 96 deletions(-) create mode 100644 Esiur.Analysis/Coding/Arithmetic.cs create mode 100644 Esiur.Analysis/Coding/CodeSet.cs delete mode 100644 Esiur.Analysis/Coding/Codec.cs create mode 100644 Esiur.Analysis/Coding/Functions.cs create mode 100644 Esiur.Analysis/Coding/IStreamCodec.cs create mode 100644 Esiur.Analysis/Coding/Symbol.cs diff --git a/Esiur.Analysis.Test/Program.cs b/Esiur.Analysis.Test/Program.cs index 2318c68..db93172 100644 --- a/Esiur.Analysis.Test/Program.cs +++ b/Esiur.Analysis.Test/Program.cs @@ -45,16 +45,17 @@ namespace Esiur.Analysis.Test static void Main() { - var msg = Encoding.ASCII.GetBytes("A_DEAD_DAD_CEDED_A_BAD_BABE_A_BEADED_ABACA_BED"); + var msg = Encoding.ASCII.GetBytes("A_DEAD_DAD_CEDED_A_BAD_BABE_A_BEADED_ABACA_BED").Select(x => CodeWord.FromByte(x)).ToArray();// ()); - var codec = new Huffman(msg, 0, (uint)msg.Length); + // convert msg to codewords + var codec = new Huffman(msg, 0, (uint)msg.Length); var enc = codec.Encode(msg, 0, (uint) msg.Length); var dec = codec.Decode(enc, 0, (uint)enc.Length); //var code = codec.Encode(); - var ds = codec.DecisionTree.Decide(new bool[] { true, true, true, true }, 0); + //var ds = codec.DecisionTree.Decide(new bool[] { true, true, true, true }, 0); Console.WriteLine(); diff --git a/Esiur.Analysis/Coding/Arithmetic.cs b/Esiur.Analysis/Coding/Arithmetic.cs new file mode 100644 index 0000000..fe56af7 --- /dev/null +++ b/Esiur.Analysis/Coding/Arithmetic.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Esiur.Analysis.Coding +{ + public class Arithmetic where T : System.Enum + { + + + } +} diff --git a/Esiur.Analysis/Coding/CodeSet.cs b/Esiur.Analysis/Coding/CodeSet.cs new file mode 100644 index 0000000..80763f8 --- /dev/null +++ b/Esiur.Analysis/Coding/CodeSet.cs @@ -0,0 +1,53 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Esiur.Analysis.Coding +{ + + public class CodeSet where T : System.Enum + { + + public T[] Elements { get; private set; } + + public int ElementsCount { get; private set; } + + public CodeSet() + { + var values = System.Enum.GetValues(typeof(T)); + Elements = new T[values.Length]; + ElementsCount = values.Length; + values.CopyTo(Elements, 0); + } + } + + //public interface IBaseValue + //{ + // public T Value { get; set; } + // public T[] Allowed { get; set; } + //} + + + public enum Base2: byte + { + Zero, + One + } + + public enum Base3 : byte + { + Zero, + One, + Two + } + + //public struct BinaryValue : IBaseValue + //{ + // public Base2 Value { get; set; } + //} + + //public struct TernaryValue : IBaseValue + //{ + // public Base3 Value { get; set; } + //} +} diff --git a/Esiur.Analysis/Coding/Codec.cs b/Esiur.Analysis/Coding/Codec.cs deleted file mode 100644 index 1db0823..0000000 --- a/Esiur.Analysis/Coding/Codec.cs +++ /dev/null @@ -1,15 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace Esiur.Analysis.Coding -{ - public interface ICodec - { - - public byte[] Encode(byte[] source, uint offset, uint length); - - public byte[] Decode(byte[] source, uint offset, uint length); - - } -} diff --git a/Esiur.Analysis/Coding/Functions.cs b/Esiur.Analysis/Coding/Functions.cs new file mode 100644 index 0000000..2050531 --- /dev/null +++ b/Esiur.Analysis/Coding/Functions.cs @@ -0,0 +1,19 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Esiur.Analysis.Coding +{ + public static class Functions + { + public static double Entropy(int[] frequencies) + { + double total = frequencies.Sum(); + + return frequencies.Sum(x => ((double)x / total * -Log2(x))); + } + + public static double Log2(double value) => Math.Log10(value) / Math.Log10(2); + } +} diff --git a/Esiur.Analysis/Coding/Huffman.cs b/Esiur.Analysis/Coding/Huffman.cs index 0707a1d..55029be 100644 --- a/Esiur.Analysis/Coding/Huffman.cs +++ b/Esiur.Analysis/Coding/Huffman.cs @@ -10,9 +10,11 @@ using System.Xml.Linq; namespace Esiur.Analysis.Coding { - public class Huffman : ICodec + public class Huffman : IStreamCodec where T : System.Enum { + public CodeSet CodeSet { get; } = new CodeSet(); + public class Node { public TKey Key { get; set; } // decision maker (bit) @@ -103,20 +105,16 @@ namespace Esiur.Analysis.Coding } } - public class HuffmanTable - { - public Dictionary ForwardMap { get; set; } = new Dictionary(); - public Dictionary BackwardMap { get; set; } = new Dictionary(); - } - public Tree DecisionTree { get; set; } - public Huffman(byte[] source, uint offset, uint length) + public Tree, int> DecisionTree { get; set; } + + public Huffman(CodeWord[] source, uint offset, uint length) { //var freq = new int[byte.MaxValue + 1]; - var freq = new Dictionary(); + var freq = new Dictionary, int>(); // var root = new Branch>(); @@ -128,29 +126,37 @@ namespace Esiur.Analysis.Coding else freq.Add(source[i], 1); - var nodes = freq.OrderBy(x => x.Value).Select(x => new Node() - { Frequency = x.Value, Key = false, Value = x.Key }).ToList(); + var nodes = freq.OrderBy(x => x.Value).Select(x => new Node, int>() + { Frequency = x.Value, Key = default(T), Value = x.Key }).ToList(); //var leafs = nodes.ToList(); while (nodes.Count() > 1) { - var decision = nodes.Take(2).ToList(); + var decision = nodes.Take(CodeSet.ElementsCount).ToList(); - decision[1].Key = true; + //decision[1].Key = true; - var branch = new Node + var branch = new Node, int> { - Branches = new Dictionary>() - { - [decision[0].Key] = decision[0], - [decision[1].Key] = decision[1] - }, - Key = false, + Branches = new Dictionary, int>>(), + //{ + // [decision[0].Key] = decision[0], + // [decision[1].Key] = decision[1] + //}, + Key = CodeSet.Elements.First(), Frequency = decision[0].Frequency + decision[1].Frequency }; + + // assign values + for (var i = 0; i < decision.Count; i++) + { + branch.Branches.Add(CodeSet.Elements[i], decision[i]); + decision[i].Key = CodeSet.Elements[i]; + } + decision[0].Parent = branch; decision[1].Parent = branch; @@ -159,76 +165,104 @@ namespace Esiur.Analysis.Coding // create tree - DecisionTree = new Tree(nodes[0]); + DecisionTree = new Tree, int>(nodes[0]); Console.WriteLine(); } - public byte[] Encode(byte[] source, uint offset, uint length) + public T[] Encode(CodeWord[] source, uint offset, uint length) { + var rt = new List(); var end = offset + length; - var seq = new List(); - for (var i = offset; i < end; i++) + for(var i = offset; i < end; i++) { - seq.AddRange(DecisionTree.Leafs[source[i]].Sequence); - } - - - var str = (String.Join("", seq.Select(x => x ? "1" : "0"))); - - // convert sequence to bytes - //var bits = new BitArray(seq.ToArray()); - - - var rt = new byte[(seq.Count - 1) / 8 + 1]; - var dst = 0; - - for (var i = 0; i < rt.Length; i++) - { - for (var j = 7; j >= 0; j--) - { - if (dst >= seq.Count) - break; - - if (seq[dst++]) - rt[i] |= (byte)(0x1 << j); - } - } - - // bits.CopyTo(rt, 0); - return rt; - } - - public byte[] Decode(byte[] source, uint offset, uint length) - { - - var rt = new List(); - - var bits = new bool[length * 8]; - var end = offset + length; - - - - var dst = 0; - for (var i = offset; i < end; i++) - { - for (var j = 7; j >= 0; j--) - { - bits[dst++] = ((source[i] >> j) & 0x1) > 0 ? true : false; - } - } - - uint b = 0; - while (b < bits.Length) - { - var (len, value) = DecisionTree.Decide(bits, b); - rt.Add(value); - b += len; + rt.AddRange(DecisionTree.Leafs[source[i]].Sequence); } return rt.ToArray(); } + + public CodeWord[] Decode(T[] source, uint offset, uint length) + { + var rt = new List>(); + + uint processed = 0; + while (processed < length) + { + var (len, value) = DecisionTree.Decide(source, offset); + rt.Add(value); + processed += len; + offset += len; + } + + return rt.ToArray(); + + } + + //public byte[] Encode(byte[] source, uint offset, uint length) + //{ + // var end = offset + length; + + // var seq = new List(); + // for (var i = offset; i < end; i++) + // { + // seq.AddRange(DecisionTree.Leafs[source[i]].Sequence); + // } + + + // //var str = (String.Join("", seq.Select(x => x ? "1" : "0"))); + + // // convert sequence to bytes + + // var rt = new byte[(seq.Count - 1) / 8 + 1]; + // var dst = 0; + + // for (var i = 0; i < rt.Length; i++) + // { + // for (var j = 7; j >= 0; j--) + // { + // if (dst >= seq.Count) + // break; + + // if (seq[dst++]) + // rt[i] |= (byte)(0x1 << j); + // } + // } + + // // bits.CopyTo(rt, 0); + // return rt; + //} + + //public T[] Decode(T[] source, uint offset, uint length) + //{ + + // var rt = new List(); + + // var bits = new bool[length * 8]; + // var end = offset + length; + + + + // var dst = 0; + // for (var i = offset; i < end; i++) + // { + // for (var j = 7; j >= 0; j--) + // { + // bits[dst++] = ((source[i] >> j) & 0x1) > 0 ? true : false; + // } + // } + + // uint b = 0; + // while (b < bits.Length) + // { + // var (len, value) = DecisionTree.Decide(bits, b); + // rt.Add(value); + // b += len; + // } + + // return rt.ToArray(); + //} } } diff --git a/Esiur.Analysis/Coding/IStreamCodec.cs b/Esiur.Analysis/Coding/IStreamCodec.cs new file mode 100644 index 0000000..01cda37 --- /dev/null +++ b/Esiur.Analysis/Coding/IStreamCodec.cs @@ -0,0 +1,18 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Esiur.Analysis.Coding +{ + public interface IStreamCodec + { + + //public byte[] Encode(byte[] source, uint offset, uint length); + + //public byte[] Decode(byte[] source, uint offset, uint length); + + public T[] Encode(CodeWord[] source, uint offset, uint length); + public CodeWord[] Decode(T[] source, uint offset, uint length); + + } +} diff --git a/Esiur.Analysis/Coding/Symbol.cs b/Esiur.Analysis/Coding/Symbol.cs new file mode 100644 index 0000000..5caf774 --- /dev/null +++ b/Esiur.Analysis/Coding/Symbol.cs @@ -0,0 +1,50 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Esiur.Analysis.Coding +{ + + public struct CodeWord + { + public T[] Word; + int hashCode; + + public override bool Equals(object obj) + { + if (obj is CodeWord) + return Word.SequenceEqual(((CodeWord)obj).Word); + return false; + } + + public override int GetHashCode() + { + return ToString().GetHashCode(); + } + + + + public static CodeWord FromByte(byte b) + { + var word = new Base2[8]; + for(var i = 0; i < 8; i++) + { + word[i] = (b & (0x1 << i)) > 0 ? Base2.One : Base2.Zero; + } + + return new CodeWord() { Word = word }; + } + + public override string ToString() + { + return String.Join(" ", Word); + } + } + + public class Symbol + { + public double Probability { get; set; } + public CodeWord Word { get; set; } + } +}