// A compact text compressor based on frequency analysis
//
// The printable characters are divided into 7 tables. One table of common
// characters, and 6 tables of uncommon characters. Four bits will either
// specify one of the common characters, or the uncommon table to use in
// conjuction with the following four bits. The common characters are:
//	<spc> e t a o i n s h r
//
// Copyright 2009 Jesse Dutton

// The lookup table
const char zip_lookup[]= {' ', 'e', 't', 'a', 'o', 'i', 'n', 's', 'h', 'r'};
const char zip_repl[] = {0, '\t', '\n', '\r'};

int decomp(char *dest, int dlen, int offset, unsigned char *src, int slen){
	int i=0;
	unsigned char top=~0, j;
	char c=0;

	while (i < dlen && offset < slen){
		// check for a common char
		if (top)
			c += src[offset] >> 4;
		else
			c += src[offset++] & 0xf;
		top = ~top;

		if (c < 10){
			// value from main table
			dest[i++] = zip_lookup[c];
			c = 0;
		} else if (c >= 32) {
			// value from a secondary table
			// look for special low characters
			for (j=0; j < 4; j++){
				if (zip_lookup[j] == c)
					c = zip_repl[j];
			}
			if (c == 0)
				break;
			dest[i++] = c;
			c = 0;
		} else {
			// switch tables
			c = ((c-8) * 16) & 0x7f;
		}
	}

	dest[i] = 0;
	return i;
}

#ifdef COMPRESS
struct compress_writer{
	unsigned char *dest;
	int dlen;
	int offset;
	unsigned char top;
	int half_bytes;
};

// This writes four bits without knowing what they mean
static int write_nibl(struct compress_writer *w, unsigned char nibl){
	if (w->offset >= w->dlen)
		return 1;

	// must be four bits
	nibl = nibl & 0xf;

	// write it to the appropriate half bit
	if (w->top)
		w->dest[w->offset] |= (nibl << 4);
	else
		w->dest[w->offset++] |= nibl;
	w->top = ~w->top;

	return 0;
}

// converts an uncompressed char into two encoded nibls and writes them
static int write_char(struct compress_writer *w, char c){
	unsigned char table, index;

	// compute the two nibls
	table = (c/16) + 8;
	if (table < 10 || table > 15)
		return 1;
	index = c % 16;

	// write them
	if (write_nibl(w, table))
		return 1;
	return write_nibl(w, index);
}

// encode a char and write it
static int encode(struct compress_writer *w, char c){
	int i;

	// first, look for chars in common table
	for (i=0; i<10; i++){
		if (c == zip_lookup[i]){
			if (write_nibl(w, i))
				return 1;
			w->half_bytes++;
			return 0;
		}
	}

	// next, check for replacement chars
	for (i=0; i<4; i++){
		if (c == zip_repl[i]){
			return write_char(w, zip_lookup[i]);
		}
	}

	// last, write the char as is
	return write_char(w, c);
}

#include <stdio.h>
int compress(unsigned char *dest, int dlen, int offset, char *src, int slen){
	int i;
	struct compress_writer w;
	
	w.dest = dest;
	w.dlen = dlen;
	w.offset = offset;
	w.top = ~0;
	w.half_bytes = 0;

	for (i=0; i<slen; i++){
		if (encode(&w, src[i]))
			return 0;
	}

	if (!w.top)
		w.offset++;

	fprintf(stderr, "compressed %i bytes out of %i\n", w.half_bytes, slen);
	return w.offset;
}

#endif

