Some time ago I wrote a piece about using the free ICSharpCode.SharpZipLib Zip library to compress a text file that represented a table of cities, states, and zipcodes into a kind of "control" library that would contain the lookup CSV style exported database table as an Embedded Resource, extract it at runtime, and split the resulting string array into an ADO.NET DataTable complete with Primary Key and all the methods needed to look up a City and State by Zipcode, or a list of City, State, and Zipcodes matching an entered City name with optional State abbreviation. This article represents some improvements on this, and uses LZMA (7-Zip) compression, which achieves better compression ratios and features very fast decompression.
There are many applications for this type of arrangement. I remember at one point that Mikhail Arkhipov of Microsoft had created an ASP.NET User Control that called into the NOAA Weather service to return weather information and icons. However, this service requires the longitude and latitude of the location to return results. So what I did was to export my Zipcode table (which includes lat / long columns) to a text file and was able to create a completely self -contained Weather control where all you needed to supply was a Zipcode, and it would find the latitude/longitude data and forward it to the NOAA WebService.
Unfortunately, NOAA has once again changed their schema, so it probably doesn't work any longer, but I mention this as one of many possible solutions that can use the technique.
The LZMA code here comes from the 7-Zip SDK; all one needs to do is figure out the encoder parameters, which I've explained with comments in the code. In addition, I have flattened out all the required LZMA classes so they can be imported "whole" into your own creation and therefore there will be no need for a separate assembly.
My control includes not only all the LZMA classes but also my "SevenZipHelper" class that provides static methods to allow you to compress a resource, decompress a resource, and extract an embedded, compressed text resource from an assembly as a byte array, which can easily be converted to the original delimited text file string.
Let's take a look at the SevenZipHelper class, which should give you an idea of what is necessary to use LZMA Compression:
using System;
using System.IO;
using System.Reflection;
namespace SevenZip.Compression.LZMA
{
public static class SevenZipHelper
{
static int dictionary = 1 << 23; // 1 << 23;
// static Int32 posStateBits = 2;
// static Int32 litContextBits = 3; // for normal files
// UInt32 litContextBits = 0; // for 32-bit data
// static Int32 litPosBits = 0;
// UInt32 litPosBits = 2; // for 32-bit data
// static Int32 algorithm = 2;
// static Int32 numFastBytes = 128;
/*
<Properties>
dictionary - [0,28], default: 23 (2^23 = 8MB)
numFastBytes: set number of fast bytes - [5, 255], default: 128
Usually big number gives a little bit better compression ratio
and slower compression process.
listContextBits: set number of literal context bits - [0, 8], default: 3
Sometimes lc=4 gives gain for big files.
litPosBits: set number of literal pos bits - [0, 4], default: 0
lp switch is intended for periodical data when period is
equal 2^value (where lp=value). For example, for 32-bit (4 bytes)
periodical data you can use lp=2. Often it's better to set lc=0,
if you change lp switch.
-pb{N}: set number of pos bits - [0, 4], default: 2
pb switch is intended for periodical data
when period is equal 2^value (where lp=value).
-eos: write End Of Stream marker
*/
static bool eos = false;
static CoderPropID[] propIDs =
{
CoderPropID.DictionarySize,
CoderPropID.PosStateBits,
CoderPropID.LitContextBits,
CoderPropID.LitPosBits,
CoderPropID.Algorithm,
CoderPropID.NumFastBytes,
CoderPropID.MatchFinder,
CoderPropID.EndMarker
};
// these are the default properties, keeping it simple for now:
static object[] properties =
{
(Int32)(dictionary),
(Int32)(2), /* PosStateBits 2 */
(Int32)(3), /* LitContextBits 3 */
(Int32)(0), /* LitPosBits 0 */
(Int32)(2), /*Algorithm 2 */
(Int32)(128), /* NumFastBytes 128 */
"bt4", /* MatchFinder "bt4" */
eos /* endMarker eos */
};
public static byte[] Compress(byte[] inputBytes)
{
MemoryStream inStream = new MemoryStream(inputBytes);
MemoryStream outStream = new MemoryStream();
Encoder encoder = new Encoder();
encoder.SetCoderProperties(propIDs, properties);
encoder.WriteCoderProperties(outStream);
long fileSize = inStream.Length;
for (int i = 0; i < 8; i++)
outStream.WriteByte((Byte)(fileSize >> (8 * i)));
encoder.Code(inStream, outStream, -1, -1, null);
return outStream.ToArray();
}
public static byte[] GetDecompressedResourceFromAssembly(Assembly assembly, string resourceName)
{
// Get the resource
Stream str = assembly.GetManifestResourceStream(resourceName);
byte[] b = new byte[(int) str.Length];
str.Read(b, 0, b.Length);
// decompress the resource
byte[] b2 = Decompress(b);
return b2;
}
public static byte[] Decompress(byte[] inputBytes)
{
MemoryStream newInStream = new MemoryStream(inputBytes);
Decoder decoder = new Decoder();
newInStream.Seek(0, 0);
MemoryStream newOutStream = new MemoryStream();
byte[] properties2 = new byte[5];
if (newInStream.Read(properties2, 0, 5) != 5)
throw (new Exception("input .lzma is too short"));
long outSize = 0;
for (int i = 0; i < 8; i++)
{
int v = newInStream.ReadByte();
if (v < 0)
throw (new Exception("Can't Read 1"));
outSize |= ((long)(byte)v) << (8 * i);
}
decoder.SetDecoderProperties(properties2);
long compressedSize = newInStream.Length - newInStream.Position;
decoder.Code(newInStream, newOutStream, compressedSize, outSize, null);
byte[] b = newOutStream.ToArray();
return b;
}
}
}Basically what my library does is extract the embedded resource and "rehydrate" it into a DataSet:
using System;
using System.Collections;
using System.Data;
using System.Reflection;
using System.Text;
using SevenZip.Compression.LZMA;
namespace ZipCodeControl
{
public static class ZipCodeLookup
{
private static string theZips = String.Empty;
public static DataTable zipCodeTable = new DataTable();
public static DataSet FindZipCodeByCityState(string city, string state)
{
string strExpr;
if (state != "")
{
strExpr = "city = '" + city + "' AND state LIKE '%" + state.Trim() + "%'";
}
else
{
strExpr = "city='" + city + "'";
}
try
{
DataRow[] foundRows =
zipCodeTable.Select(strExpr);
DataSet ds = new DataSet();
DataTable dt = new DataTable();
dt.Columns.Add("Zip");
dt.Columns.Add("City");
dt.Columns.Add("State");
DataRow rowToAdd = null;
foreach (DataRow row in foundRows)
{
rowToAdd = dt.NewRow();
rowToAdd.ItemArray = row.ItemArray;
dt.Rows.Add(rowToAdd);
}
ds.Tables.Add(dt);
return ds;
}
catch (Exception ex)
{
throw new Exception(ex.Message);
}
}
public static DataSet FindCityStateByZipCode(string zip)
{
string strExpr;
strExpr = "zip='" + zip + "'";
// Use the Select method to find all rows matching the filter.
try
{
DataRow[] foundRows =
zipCodeTable.Select(strExpr);
DataSet ds= new DataSet();
DataTable dt = new DataTable();
dt.Columns.Add("Zip");
dt.Columns.Add("City");
dt.Columns.Add("State");
DataRow rowToAdd=null;
foreach(DataRow row in foundRows)
{
rowToAdd = dt.NewRow();
rowToAdd.ItemArray = row.ItemArray;
dt.Rows.Add(rowToAdd);
}
ds.Tables.Add(dt);
return ds;
}
catch (Exception ex)
{
throw new Exception(ex.Message);
}
}
static ZipCodeLookup()
{
// Get the resource, decompressed, as byte[]
byte[] b =
SevenZipHelper.GetDecompressedResourceFromAssembly(Assembly.GetExecutingAssembly(),
"ZipCodeControl.Zipcodes.dat");
// Convert byte array to string
theZips = Encoding.UTF8.GetString(b);
// add required columns to our DataTable
zipCodeTable.Columns.Add("Zip", typeof(string));
zipCodeTable.Columns.Add("City", typeof (string));
zipCodeTable.Columns.Add("State", typeof (string));
// Set PrimaryKey for lookups
zipCodeTable.Columns["zip"].Unique = true;
zipCodeTable.PrimaryKey = new DataColumn[] {zipCodeTable.Columns["Zip"]};
// de-mangle the string into DataRows, and add to table
string[] zippies = theZips.Split(new Char[] {'\n'});
for (int i = 0; i < zippies.Length; i++)
{
object[] theRow = zippies[i].Split(new Char[] {'|'});
zipCodeTable.Rows.Add(theRow);
}
// Bake it.
zipCodeTable.AcceptChanges();
}
}
}
Included in the downloadable solution are the complete library, a "Tester" Windows Forms app:

Finally, there is a second Windows Forms app that allows you to browse for a file and select it, and the file is compressed and saved next to the original with ".lz" appended. At that point all you need to do is include the compressed file in your assembly as an "Embedded Resource", and you can use the provided static helper method to extract it. What you do with the resource at that point is entirely up to you.
I hope this concept is useful to you!
Download the VS.NET 2005 Solution ( 1.74 MB) |