/* This is an implementation of a tiny compression and decompression algorithm
 * intended for embedded applications where space and memory are concerns. In
 * real world use, I envision a table of strings, compressed as a whole and
 * decompressed one string at a time as needed by the application.
 *
 * This algorithm is designed for 7-bit ASCII text. The eighth bit is stripped
 * out when the strings are stored. Each 7 bits is either a literal character
 * or a back reference. A back reference consists of 21 bits that start with
 * the bits 01 to distinguish the sequence from a printable character, followed
 * by an 12-bit offset, and a 7-bit length.
 *
 * Remember to compile -Os. On i386 -m32 this yields a 324 byte decompressor.
 */



/* For clarity, a byte is 8-bits long always. A bat is a 7-bit chunk. A batnum
 * is an offset into the strings list, in bats. We use the unbat function to
 * place a bat into an 8-bit container.
 */
#if defined COMPRESSOR
#include <stdlib.h>	// for malloc and friends
#include <string.h>	// memcpy

static int batpack(unsigned char **dest, unsigned int *destlen, char *src,
		unsigned int srclen)
{
	unsigned int packed_size;
	unsigned char * d = *dest;
	int i;

	if (!d)
		*destlen = 0;
	
	// allocate as needed
	packed_size = ((srclen + 1) >> 3) * 7;
	if ((srclen + 1) & 0x7)
		packed_size += 7;
	if (*destlen < packed_size){
		d = realloc(d, packed_size);
		if (!d)
			return 1;
		*dest = d;
		*destlen = packed_size;
	}

	// pack
	while (srclen){
		for (i=0; i<8; i++){
			if (!srclen){
				*d++ = 0;
				continue;
			}
			switch (i){
			case 1:
			case 2:
			case 3:
			case 4:
			case 5:
			case 6:
				*d++ |= (*src & 0x7f) >> (7-i);
			case 0:
				*d = (*src++ << (i+1));
				srclen--;
				break;
			case 7:
				*d++ |= *src++ & 0x7f;
				srclen--;
				break;
			}
		}
	}

	return 0;
}
#endif

//  [-------+][++++++--][-----+++][++++----][---+++++][++------][-+++++++]
static unsigned char unbat(unsigned char *strings, unsigned int batnum){
	unsigned int ring, mask;
	unsigned short ret;

	// the pattern of bats placed into bytes repeats every 56-bits, or
	// seven bytes. We call the 7-byte chunk the ring that the bat is in.
	ring = (batnum >> 3)*7;
	mask = batnum & 7;

	if (mask == 7)
		ret = strings[ring + 6];	// don't overflow
	else if (mask == 0)
		ret = (strings[ring]) >> 1;	// or underflow
	else {
		if (mask > 1)
			ring = ring + mask - 1;
		ret = ((strings[ring]) << 8) | (strings[ring+1]);
		ret = ret >> (mask + 1);
	}

	return (ret & 0x7f);
}

struct __untinyzip_work {
	char *dest;
	unsigned int len;
	unsigned char *s;
	unsigned int batnum;
	unsigned char c;
};

// worry about stack size since we are recursing
static void __untinyzip_real(struct __untinyzip_work *w){
	unsigned int offset, oldlen, oldbatnum;

	while (w->len){
		w->c = unbat(w->s, w->batnum);

		// handle back reference
		if (((w->c & 70) >> 4) == 1){
			// extract offset
			offset = (w->c & 0xf) << 7;
			w->c = unbat(w->s, ++w->batnum) & 0x7f;
			offset = offset | w->c;
			// get len
			oldlen = w->len;
			w->len = unbat(w->s, ++w->batnum);
			if (w->len > oldlen)
				w->len = oldlen;
			oldlen -= w->len;
			// recurse
			w->batnum = w->batnum - offset - 2;
			oldbatnum = ++w->batnum;
			__untinyzip_real(w);
			// restore vars
			w->batnum = oldbatnum;
			w->len = oldlen;
		} else {
			*(w->dest++) = w->c;
			w->len --;
		}

		// always last char added, despite recursion
		if (w->c == 0)
			return;
	}
}

int untinyzip(char *dest, unsigned int len, unsigned char *s,
		unsigned int batnum)
{
	struct __untinyzip_work w;

	if (!dest)
		return 1;
	
	w.dest = dest;
	w.len = len;
	w.s = s;
	w.batnum = batnum;

	__untinyzip_real(&w);

	if (w.c)
		return 1;
	return 0;
}

#if defined COMPRESSOR

struct tinyzip_ref{
	unsigned int batnum;
	unsigned int len;
};


static int strstr_with_ref(unsigned char *dest, unsigned int dlen,
		unsigned int batnum, char *src, unsigned int slen)
{
	unsigned int offset, len, reflen;
	int count=0, ret=0;

	while (batnum + count < dlen && count < slen){
		if (dest[batnum + count] == src[count]) {
			count++;
			ret++;
		} else if (dest[batnum + count] >> 4 == 1){
			// a back reference -- get offset, len
			offset = (dest[batnum + count++] & 0xf) << 7;
			offset |= (dest[batnum + count++] & 0x7f);
			offset = batnum - offset;
			len = (dest[batnum + count++] & 0x7f);
			// recurse
			reflen = strstr_with_ref(dest, dlen, offset, src+count,
				slen - count < len ? slen - count : len);
			ret += reflen;
			if (reflen < len)
				break;
		} else {
			break;
		}
	}

	if (ret < 4)
		return 0;
	return ret;
}

static int findref(struct tinyzip_ref *ref, unsigned char *dest,
		unsigned int dlen, char *src, unsigned int slen)
{
	int i, match;

	ref->len = 0;
	for (i=0; i<dlen; i++){
		match = strstr_with_ref(dest, dlen, i, src, slen);
		if (match > ref->len){
			ref->len = match;
			ref->batnum = i;
		}
	}

	return ref->len;
}


unsigned int tinyzip(unsigned char *dest, unsigned int dlen, char *src,
		unsigned int slen)
{
	unsigned int used=0, clen, offset;
	char *cur, *end;
	struct tinyzip_ref ref;

	ref.batnum = 0;
	ref.len = 0;
	end = src + slen;
	clen = slen;
	cur = src;
	while (cur < end){
		if (findref(&ref, dest, used, cur, clen)){
			offset = (used - ref.batnum) & 0x7ff;
			dest[used++] = (1 << 5) | (offset >> 7);
			dest[used++] = offset & 0x7f;
			dest[used++] = ref.len & 0x7f;
			clen -= ref.len;
			cur += ref.len;
		} else {
			dest[used++] = *cur++;
			clen--;
		}
	}

	cur = malloc(used);
	memcpy(cur, dest, used);
	batpack(&dest, &dlen, cur, used);
	free(cur);
	
	// return len of batpack
	if (used & 0x7)
		return ((used*7) >> 3) + 1;
	return ((used * 7) >> 3);
}

#endif


