using System; using System.Text; // Test wrapper for our example classes public class Test { static void Main() { // Just do it normally RunTest("Abcdefghij..0123456789 $!@# >> €ÄÖñ", null); Console.WriteLine(); RunTest("Abcdefghij..0123456789 $!@# >> €ÄÖñ", new EncoderNumberFallback()); } static void RunTest(String s, EncoderFallback encFallback) { // A couple simple tests Encoding enc = new IicsaEncoding(); if (encFallback != null) { // Make writable enc = (Encoding)enc.Clone(); // Change fallback enc.EncoderFallback = encFallback; } Console.WriteLine("Start String:"); Console.WriteLine(s); Console.WriteLine("Max Byte Count: " + enc.GetMaxByteCount(s.Length)); Console.WriteLine("Predicted Byte Count: " + enc.GetByteCount(s)); byte[] bytes = enc.GetBytes(s); Console.WriteLine("Encoded Bytes:"); DumpBytes(bytes); Console.WriteLine("Decoded Char Count: " + enc.GetCharCount(bytes)); Console.WriteLine("Decoded Round Trip:"); Console.WriteLine(enc.GetString(bytes)); } static void DumpBytes(byte[] bytes) { // Dump the bytes for (int i = 0; i < bytes.Length; i++) { Console.WriteLine("{0:d2}: 0x{1:x2} ('{2}')", i, bytes[i], (char)bytes[i]); } } } // // WARNING: This example is provided AS-IS and without much testing // // This is an example of overriding an encoding // To be simple, this just reverses a-z, A-Z & 0-9 in ASCII. // // Note that I am striving for clarity here. Some things could be // done more efficiently. Additionally I didn't do error handling // or worry about things like integer overflow (from the charCount // *= or other code) // public class IicsaEncoding : Encoding { // Get the max byte count for an input character stream // Note that we're basically a 1:1 encoding, however the fallback // could provide more than 1 character for unknowns // Additionally some applications may want to treat surrogate pairs // as single characters, so Encoders created for those encodings may // need to add one for left over high surrogates public override int GetMaxByteCount(int charCount) { // Start by assuming 1:1 relationship for this particular encoding int byteCount = charCount; // Other encodings may need to consider left over high surrogates // byteCount++; // we don't need this // If each character is a fallback, our byte count could be much higher. if (this.EncoderFallback.MaxCharCount > 0) { // Note that the EncoderFallback tells us Chars, not Bytes because // it doesn't know about our encoding. We need to convert those to // bytes, but in our case it won't matter because each input // char would still be a single byte or a ? at this point. byteCount *= this.EncoderFallback.MaxCharCount; } return byteCount; } // Get the max char count for an input byte stream // Note that we're basically a 1:1 encoding, but bytes over 0x7f are // unknown, so we'll have to use the fallback for those. // Additionally some applications may have left over bytes that a // decoder may need to consider. Lead bytes in UTF-8 for example. public override int GetMaxCharCount(int byteCount) { // Start by assuming 1:1 relationship for this particular encoding int charCount = byteCount; // Other encodings may need to consider left over data from a decoder // that had data remaining in a buffer from a previous call // charCount += ???; /* left over bytes from a previous call */ // If each input byte was a fallback, our count will have to be higher if (this.DecoderFallback.MaxCharCount > 0) { // Note that the DecoderFallback tells us Chars, which is what // we need when decoding, so this is easier than GetMaxByteCount. charCount *= this.EncoderFallback.MaxCharCount; } return charCount; } // Callers will need to know the actual byte count in some cases public override int GetByteCount(char[] chars, int index, int count) { // We'll start by assuming our output count is the same as the input count int byteCount = count; // We might need a fallback buffer EncoderFallbackBuffer fallbackBuffer = this.EncoderFallback.CreateFallbackBuffer(); // Check to make sure we don't have to fall back any characters for (int i = index; i < index + count; i++) { // We'll handle all characters from 0x00 through 0x7f if (chars[i] <= 0x7f) continue; // For unknown characters we'll have to do the fallback, unreserve the output // count reserved for this char at the start byteCount--; fallbackBuffer.Fallback(chars[i], i); // For each fallback char we'll have the char or a ? // GetNextChar will return 0 when its empty. while (fallbackBuffer.GetNextChar() != 0) { // Either a known char or we'll emit ?. Add one in either case. byteCount++; } } // Actual # of bytes we'll have after conversion return byteCount; } // Finally, do some "real" work. public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) { // Need our index for the count int startIndex = byteIndex; // We might need a fallback buffer EncoderFallbackBuffer fallbackBuffer = this.EncoderFallback.CreateFallbackBuffer(); // Check to make sure we don't have to fall back any characters for (int i = charIndex; i < charIndex + charCount; i++) { // We'll handle all characters from 0x00 through 0x7f if (chars[i] <= 0x7f) { // This'll throw if we go too far in our array. bytes[byteIndex++] = EncodeChar(chars[i]); continue; } // For unknown characters we'll have to do the fallback fallbackBuffer.Fallback(chars[i], i); // For each fallback char we'll have the char or a ? // GetNextChar will return 0 when its empty. char c; while ((c = fallbackBuffer.GetNextChar()) != 0) { // This'll throw if we go too far. // Note that EncodeChar will use ? if its still unknown bytes[byteIndex++] = EncodeChar(c); } } // Actual # of bytes we'll have after conversion return byteIndex - startIndex; } // Callers will need to know the actual byte count in some cases public override int GetCharCount(byte[] bytes, int index, int count) { // We'll start by assuming our output count is the same as the input count int charCount = count; // We might need a fallback buffer DecoderFallbackBuffer fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer(); // Check to make sure we don't have to fall back any bytes for (int i = index; i < index + count; i++) { // We'll handle all bytes from 0x00 through 0x7f if (bytes[i] <= 0x7f) continue; // For unknown bytes we'll have to do the fallback // First unreserve the reserved charCount for this byte. charCount--; // Note that the fallback needs all of the unknown bytes pertinent to this case // (In our case we fall them back one at a time, but Fallback takes an array) fallbackBuffer.Fallback(new byte[] { bytes[i] }, i); // GetChars is easier than GetBytes since each char is known while (fallbackBuffer.GetNextChar() != 0) { // Add a char for each char found in the fallback. charCount++; } } // Actual # of chars we'll have after conversion return charCount; } // And some more "real" work. public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) { // Need our index for the count int startIndex = charIndex; // We might need a fallback buffer DecoderFallbackBuffer fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer(); // Check to make sure we don't have to fall back any bytes for (int i = byteIndex; i < byteIndex + byteCount; i++) { // We'll handle all bytes from 0x00 through 0x7f if (bytes[i] <= 0x7f) { // This'll throw if we go too far in our array. chars[charIndex++] = DecodeByte(bytes[i]); continue; } // For unknown bytes we'll have to do the fallback // Note that the fallback needs all of the unknown bytes pertinent to this case // (In our case we fall them back one at a time, but Fallback takes an array) fallbackBuffer.Fallback(new byte[] { bytes[i] }, i); // GetChars is easier than GetBytes since each char is known char c; while ((c = fallbackBuffer.GetNextChar()) != 0) { // Add the found char chars[charIndex++] = c; } } // Actual # of chars we'll have after conversion return charIndex - startIndex; } // This will do our mapping. // We're using 0x00 - 0x7f and changing the order of the letters & numbers private byte EncodeChar(char c) { // Reverse the numbers if (c >= '0' && c <= '9') { return (byte)('9' - (c - '0')); } // Reverse the lower case letters if (c >= 'a' && c <= 'z') { return (byte)('z' - (c - 'a')); } // Reverse the upper case letters if (c >= 'A' && c <= 'Z') { return (byte)('Z' - (c - 'A')); } // If its < 0x7f then we know it, otherwise just ? if (c > 0x7f) { return (byte)'?'; } return (byte)c; } // This will do our unmapping. // We're using 0x00 - 0x7f and changing the order of the letters & numbers private char DecodeByte(byte b) { // Reverse the numbers if (b >= '0' && b <= '9') { return (char)('9' - (b - '0')); } // Reverse the lower case letters if (b >= 'a' && b <= 'z') { return (char)('z' - (b - 'a')); } // Reverse the upper case letters if (b >= 'A' && b <= 'Z') { return (char)('Z' - (b - 'A')); } // If its < 0x7f then we know it, otherwise just ? if (b > 0x7f) { return '?'; } return (char)b; } } // Put fallback strings in the form 〹 public class EncoderNumberFallback : EncoderFallback { public override EncoderFallbackBuffer CreateFallbackBuffer() { return new EncoderNumberFallbackBuffer(); } // Maximum number of characters that this instance of this fallback could return public override int MaxCharCount { get { // Our maximum is 8, ie: 蜇 return 8; } } } public sealed class EncoderNumberFallbackBuffer : EncoderFallbackBuffer { // Store our fallback string private String strFallback = String.Empty; int fallbackCount = -1; int fallbackIndex = -1; // Construction public EncoderNumberFallbackBuffer() { } // Fallback Methods public override bool Fallback(char charUnknown, int index) { // If we had a buffer already we're being recursive, throw, it's probably at the suspect // character in our array. if (this.fallbackCount >= 1) // Presumably you'd want a prettier exception: throw new Exception("Recursive Fallback Exception"); // Go ahead and get our fallback this.strFallback = String.Format("&#{0};", (int)charUnknown); this.fallbackCount = strFallback.Length; this.fallbackIndex = -1; return this.fallbackCount != 0; } public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index) { // In this example, we didn't really expect surrogates. // If we had a buffer already we're being recursive, throw, it's probably at the suspect // character in our array. if (this.fallbackCount >= 1) // Presumably you'd want a prettier exception: throw new Exception("Recursive Fallback Exception"); // Go ahead and get our fallback // Note that we're doing this 2X, once for each char. That won't effect the // EncoderNumberFallback.MaxCharCount though because it is counting per char, // and although we're 2X that here, we also have 2x chars. this.strFallback = String.Format("&#{0};&#{1};", (int)charUnknownHigh, (int)charUnknownLow); this.fallbackCount = strFallback.Length; this.fallbackIndex = -1; return this.fallbackCount != 0; } public override char GetNextChar() { // We want it to get < 0 because == 0 means that the current/last character is a fallback // and we need to detect recursion. We could have a flag but we already have this counter. this.fallbackCount--; this.fallbackIndex++; // Do we have anything left? 0 is now last fallback char, negative is nothing left if (this.fallbackCount < 0) return (char)0; // Need to get it out of the buffer. return this.strFallback[this.fallbackIndex]; } public override bool MovePrevious() { // Back up one, only if we just processed the last character (or earlier) if (this.fallbackCount >= -1 && this.fallbackIndex >= 0) { this.fallbackIndex--; this.fallbackCount++; return true; } // Return false 'cause we couldn't do it. return false; } // How many characters left to output? public override int Remaining { get { // Our count is 0 for 1 character left. return (this.fallbackCount < 0) ? 0 : this.fallbackCount; } } }