/*
 * ratCode.c --
 *
 *	This file contains basic support for decoding and encoding of
 *	strings coded in various MIME-encodings.
 *
 * TkRat software and its included text is Copyright 1996 by Martin Forssen.
 *
 * The full text of the legal notice is contained in the file called
 * COPYRIGHT, included with this distribution.
 */

#include "rat.h"

/*
 * List used when decoding QP
 */
char alphabetHEX[17] = "0123456789ABCDEF";

/*
 * List used when decoding base64
 * It consists of 64 chars plus '=' and null
 */
static char alphabet64[66] =
	    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";

/*
 *----------------------------------------------------------------------
 *
 * RatDecodeHeader --
 *
 *      Decodes a header line encoded according to rfc1522.
 *
 * Results:
 *	Returns a list of lists. Each sublist consists of two elements;
 *	the first is the encoding and the other is the data. All pieces
 *	should be concatenated. The return value is apointer to a buffer
 *	which will remain valid until the next call to RatDecodeHeader.
 *	The content of this buffer may be modified by the caller.
 *
 * Side effects:
 *	Overwrites the static buffer used in the previous call.
 *
 *
 *----------------------------------------------------------------------
 */

char*
RatDecodeHeader(Tcl_Interp *interp, char *string)
{
    static Tcl_DString ds;
    static int init = 0;
    unsigned char buf[1024];
    char *point = string, *sPtr, *ePtr, c, *decoded;
    unsigned long length;
    int i, lastDec = 0;
    Tcl_RegExp regexp;

    if (!string || !*string) {
	return "";
    }

    /*
     * The first time we initialize the DString, otherwise we just clean it.
     * We also compile our regi'ular expression
     */
    if (!init) {
	Tcl_DStringInit(&ds);
	init = 1;
    } else {
	Tcl_DStringSetLength(&ds, 0);
    }

    regexp = Tcl_RegExpCompile(interp,"=\\?([^?]+)\\?(q|Q|b|B)\\?([^? ]*)\\?=");
    while (Tcl_RegExpExec(interp, regexp, point, string)) {
	Tcl_RegExpRange(regexp, 0, &sPtr, &ePtr);
	if (sPtr != point) {
	    c = *sPtr;
	    *sPtr = '\0';
	    if (!lastDec || !RatIsEmpty(point)) {
		Tcl_DStringStartSublist(&ds);
		Tcl_DStringAppendElement(&ds, "us-ascii");
		Tcl_DStringAppendElement(&ds, point);
		Tcl_DStringEndSublist(&ds);
	    }
	    *sPtr = c;
	}
	point = ePtr;
	Tcl_RegExpRange(regexp, 1, &sPtr, &ePtr);
	c = *ePtr;
	*ePtr = '\0';
	Tcl_DStringStartSublist(&ds);
	Tcl_DStringAppendElement(&ds, sPtr);
	*ePtr = c;
	Tcl_RegExpRange(regexp, 2, &sPtr, &ePtr);
	c = *sPtr;
	Tcl_RegExpRange(regexp, 3, &sPtr, &ePtr);
	if ('b' == c || 'B' == c) {
	    decoded = (char*)rfc822_base64(sPtr, ePtr-sPtr, &length);
	    memmove(buf, decoded, length);
	    buf[length] = '\0';
	} else {
	    for (i=0; sPtr < ePtr; i++, sPtr++) {
		if ('_' == *sPtr) {
		    buf[i] = ' ';
		} else if ('=' == *sPtr) {
		    buf[i] = ((strchr(alphabetHEX, sPtr[1])-alphabetHEX)<<4) +
			    (strchr(alphabetHEX, sPtr[2])-alphabetHEX);
		    sPtr += 2;
		} else {
		    buf[i] = *sPtr;
		}
	    }
	    buf[i] = '\0';
	}
	Tcl_DStringAppendElement(&ds, (char*)buf);
	Tcl_DStringEndSublist(&ds);
	lastDec = 1;
    }
    if (*point) {
	Tcl_DStringStartSublist(&ds);
	Tcl_DStringAppendElement(&ds, "us-ascii");
	Tcl_DStringAppendElement(&ds, point);
	Tcl_DStringEndSublist(&ds);
    }
    return Tcl_DStringValue(&ds);
}


/*
 *----------------------------------------------------------------------
 *
 * RatDecodeHeaderFull --
 *
 *      Decodes a header line encoded according to rfc1522.
 *
 * Results:
 *	A pointer to a static storage area where a decoded copy of the
 *	string is kept.
 *
 * Side effects:
 *	Overwrites the static buffer used in the previous call.
 *
 *
 *----------------------------------------------------------------------
 */

char*
RatDecodeHeaderFull(Tcl_Interp *interp, char *string)
{
    char *encoding = Tcl_GetVar2(interp, "option", "charset", TCL_GLOBAL_ONLY);
    static char buf[1024];
    int i, l, largc, eargc;
    char **largv, **eargv;

    buf[0] = '\0';
    Tcl_SplitList(interp, string, &largc, &largv);
    for (i=l=0; i<largc; i++) {
	Tcl_SplitList(interp, largv[i], &eargc, &eargv);
	if (RatEncodingCompat(interp, eargv[0], encoding)) {
	    strncpy(&buf[l], eargv[1], sizeof(buf)-l);
	} else {
	    strncpy(&buf[l], (char*)
		    RatConvertEncoding(interp, eargv[0], encoding, 
		    (unsigned char*)eargv[1]), sizeof(buf)-l);
	}
	l += strlen(&buf[l]);
	ckfree(eargv);
    }

    ckfree(largv);
    return buf;
}

/*
 *----------------------------------------------------------------------
 *
 * RatDecode --
 *
 *	General decoding interface. It takes as arguments a chunk of data,
 *	the encoding the data is in. And returns a new malloced block of
 *	decoded data. The decoded data will not have any \r or \0 in it
 *	\0 will be changed to the string \0, unless the toCharset parameter
 *	is NULL. If that is the case the data is assumed to be wanted
 *	in raw binary form.
 *	It is also possible to get this routine to do some character set
 *	transformation, but this is not yet implemented.
 *
 * Results:
 *	A block of decoded data. It is the callers responsibility to free
 *	this data.
 *
 * Side effects:
 *	None.
 *
 *
 *----------------------------------------------------------------------
 */

unsigned char*
RatDecode(Tcl_Interp *interp, unsigned char *data, int inLength,
	RatEncoding encoding, int *outLength, char *isCharset, char *toCharset)
{
    unsigned char *dest, *src, buf[64], lbuf[4];
    int allocated, dataIndex = 0, index, srcLength, dstIndex;

    dest = (unsigned char*) ckalloc(inLength * sizeof(char)+1);
    dstIndex = 0;
    allocated = inLength;

    while (dataIndex < inLength) {
	switch (encoding) {
	case RAT_BASE64:
		src = buf;
		for (srcLength = 0; dataIndex < inLength
			&& srcLength < sizeof(buf)-2;) {
		    for (index=0; dataIndex<inLength && index<4; dataIndex++) {
			if (strchr(alphabet64, data[dataIndex])) {
			    lbuf[index++] = strchr(alphabet64, data[dataIndex])
				    - alphabet64;
			}
		    }
		    if (0 == index) {
			continue;
		    }
		    src[srcLength++] = lbuf[0] << 2 | ((lbuf[1]>>4)&0x3);
		    if (strchr(alphabet64, '=')-alphabet64 != lbuf[2]) {
			src[srcLength++] = lbuf[1] << 4 | ((lbuf[2]>>2)&0xf);
			if (strchr(alphabet64, '=')-alphabet64 != lbuf[3]) {
			    src[srcLength++] = lbuf[2] << 6 | (lbuf[3]&0x3f);
			}
		    }
		}
		break;
	case RAT_QP:
		src = buf;
		for (srcLength = 0; dataIndex < inLength &&
			srcLength < sizeof(buf); ) {
		    if ('=' == data[dataIndex]) {
			if ('\r' == data[dataIndex+1]) {
			    dataIndex += 3;
			} else if ('\n' == data[dataIndex+1]) {
			    dataIndex += 2;
			} else {
			    src[srcLength++] = 16*(strchr(alphabetHEX,
				    data[dataIndex+1])-alphabetHEX)
				    + strchr(alphabetHEX,
				    data[dataIndex+2])-alphabetHEX;
			    dataIndex += 3;
			}
		    } else {
			src[srcLength++] = data[dataIndex++];
		    }
		}
		break;
	case RAT_7BIT:	/* fallthrough */
	case RAT_8BIT:	/* fallthrough */
	case RAT_BINARY:/* fallthrough */
	case RAT_UNKOWN:/* fallthrough */
	default:
		src = data;
		srcLength = inLength;
		dataIndex = inLength;
		break;
	}
	if (toCharset) {
	    for (index = 0; index < srcLength; index++) {
		if (allocated - dstIndex < 2) {
		    allocated += 64;
		    dest = (unsigned char*)REALLOC(dest, allocated);
		}
		if ('\r' == src[index]) {
		    continue;
		} else if ('\0' == src[index]) {
		    dest[dstIndex++] = '\\';
		    dest[dstIndex++] = '0';
		} else {
		    dest[dstIndex++] = src[index];
		}
	    }
	    dest[dstIndex] = '\0';
	    if (strcasecmp(toCharset, isCharset) &&
		    !RatEncodingCompat(interp, isCharset, toCharset)) {
		unsigned char *conv;

		conv = RatConvertEncoding(interp, isCharset, toCharset, dest);
		if (allocated < (int)strlen((char*)conv)+1) {
		    allocated = strlen((char*)conv)+1;
		    dest = (unsigned char*)REALLOC(dest,allocated);
		}
		strcpy((char*)dest, (char*)conv);
		dstIndex = strlen((char*)dest);
	    }
	} else {
	    memcpy((char*)&dest[dstIndex], src, srcLength);
	    dstIndex += srcLength;
	}
    }
    *outLength = dstIndex;
    dest[dstIndex] = '\0';
    return dest;
}

/*
 *----------------------------------------------------------------------
 *
 * RatEncodeHeaderLine --
 *
 *	Encodes one header line according to MIME. If doAll is non zero the
 *	entire data field will be enocded, otherwise just the needed words.
 *	The nameLength argument should tell how long the header name is in
 *	characters. This is so that the line folding can do its job properly.
 *
 * Results:
 *	A block of encoded header line. It is the callers responsibility to
 *	free this block later with a call to ckfree().
 *
 * Side effects:
 *	None.
 *
 *
 *----------------------------------------------------------------------
 */

unsigned char*
RatEncodeHeaderLine(unsigned char *line, char *charset, int nameLength,
	int doAll)
{
    unsigned char *buf, *srcPtr, *dstPtr, *startPtr, *endPtr;
    int length = nameLength;

    if (!line || !*line) {
	return NULL;
    }

    /* We hope this is big enough. /MaF */
    dstPtr = buf = (unsigned char*)ckalloc(strlen((char*)line)+1024);
    for (srcPtr = line; *srcPtr;) {
	if (0x80 & *srcPtr) {
	    if (doAll) {
		startPtr = line;
		dstPtr = buf;
		endPtr = startPtr + strlen((char*)startPtr);
		length = nameLength;
	    } else {
		startPtr = srcPtr;
		while (startPtr>line && *startPtr!=' ' && *startPtr!='\t') {
		    startPtr--;
		    dstPtr--;
		    length--;
		}
		endPtr = srcPtr+1;
		while (*endPtr && *endPtr != ' ' && *endPtr != '\t') {
		    endPtr++;
		}
	    }
	    sprintf((char*)dstPtr, "=?%s?Q?", charset);
	    length += strlen((char*)dstPtr);
	    if (length > 70) {
		sprintf((char*)dstPtr, "\n =?%s?Q?", charset);
		length = -1;
	    }
	    dstPtr += strlen((char*)dstPtr);
	    for (srcPtr = startPtr; srcPtr < endPtr; srcPtr++) {
		if (length > 70) {
		    sprintf((char*)dstPtr, "?=\n =?%s?Q?", charset);
		    length = strlen((char*)dstPtr+3);
		    dstPtr += strlen((char*)dstPtr);
		}
		if (' ' == *srcPtr) {
		    *dstPtr++ = '_';
		    length++;
		} else if (*srcPtr & 0x80 || *srcPtr == '_'
			|| *srcPtr == '?' || *srcPtr == '=') {
		    *dstPtr++ = '=';
		    *dstPtr++ = alphabetHEX[*srcPtr >> 4];
		    *dstPtr++ = alphabetHEX[*srcPtr & 0x0f];
		    length += 3;
		} else {
		    *dstPtr++ = *srcPtr;
		    length++;
		}
	    }
	    *dstPtr++ = '?';
	    *dstPtr++ = '=';
	} else {
	    *dstPtr++ = *srcPtr++;
	    length++;
	    if (length > 75) {
		strcpy((char*)dstPtr, "\n ");
		dstPtr += 2;
		length = 1;
	    }
	}
    }
    *dstPtr = '\0';
    return buf;
}

/*
 *----------------------------------------------------------------------
 *
 * RatConvertEncoding --
 *
 *	Tries to convert the encoding of the given data.
 *
 * Results:
 *	A pointer to a static area containing the converted data. This
 *	area may be overwritten by the next call to RatConvertEncoding.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

unsigned char*
RatConvertEncoding(Tcl_Interp *interp, char *fromEncoding, char *toEncoding,
	unsigned char *data)
{
    static unsigned char *buf = NULL;
    static int allocated = 0;
    char *noDisplayableMessage =
	    "[Here was data encoded in %s, which can not be displayed]";
    int i;
    
    /*
     * Use only those characters compatible with US-ASCII
     */
    if ((RatEncodingCompat(interp, "us-ascii", fromEncoding)
            && RatEncodingCompat(interp, "us-ascii", toEncoding))
	    || !strcmp("7bit", toEncoding)) {
	if (allocated < (int)strlen((char*)data)+1) {
	    allocated = strlen((char*)data)+1;
	    buf = (unsigned char*)REALLOC(buf, allocated);
	}
	for (i=0; data[i]; i++) {
	    buf[i] = (data[i] & 0x80) ? '?' : data[i];
	}
	buf[i] = '\0';
	return buf;
    }

    /*
     * Fallback
     */
    if (allocated < (int)(strlen(noDisplayableMessage)+strlen(fromEncoding)+1)){
	allocated = strlen(noDisplayableMessage)+strlen(fromEncoding)+1;
	buf = (unsigned char*)REALLOC(buf, allocated);
    }
    sprintf((char*)buf, noDisplayableMessage, fromEncoding);
    return buf;
}
