/* This file is part of the YAZ toolkit.
 * Copyright (C) 1995-2012 Index Data
 * See the file LICENSE for details.
 */
/**
 * \file
 * \brief MARC-8 decoding
 *
 * MARC-8 reference:
 *  http://www.loc.gov/marc/specifications/speccharmarc8.html
 */

#if HAVE_CONFIG_H
#include <config.h>
#endif

#include <assert.h>
#include <errno.h>
#include <string.h>

#include <yaz/xmalloc.h>
#include "iconv-p.h"

struct decoder_data {
    int g0_mode;
    int g1_mode;

    int comb_offset;
    int comb_size;
    unsigned long comb_x[8];
    size_t comb_no_read[8];
};

yaz_conv_func_t yaz_marc8_42_conv;
yaz_conv_func_t yaz_marc8_45_conv;
yaz_conv_func_t yaz_marc8_67_conv;
yaz_conv_func_t yaz_marc8_62_conv;
yaz_conv_func_t yaz_marc8_70_conv;
yaz_conv_func_t yaz_marc8_32_conv;
yaz_conv_func_t yaz_marc8_4E_conv;
yaz_conv_func_t yaz_marc8_51_conv;
yaz_conv_func_t yaz_marc8_33_conv;
yaz_conv_func_t yaz_marc8_34_conv;
yaz_conv_func_t yaz_marc8_53_conv;
yaz_conv_func_t yaz_marc8_31_conv;


static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
                                         struct decoder_data *data,
                                         unsigned char *inp,
                                         size_t inbytesleft, size_t *no_read,
                                         int *comb);

static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
                               unsigned char *inp,
                               size_t inbytesleft, size_t *no_read)
{
    struct decoder_data *data = (struct decoder_data *) d->data;
    unsigned long x;
    if (data->comb_offset < data->comb_size)
    {
        *no_read = data->comb_no_read[data->comb_offset];
        x = data->comb_x[data->comb_offset];

        /* special case for double-diacritic combining characters,
           INVERTED BREVE and DOUBLE TILDE.
           We'll increment the no_read counter by 1, since we want to skip over
           the processing of the closing ligature character
        */
        /* this code is no longer necessary.. our handlers code in
           yaz_marc8_?_conv (generated by charconv.tcl) now returns
           0 and no_read=1 when a sequence does not match the input.
           The SECOND HALFs in codetables.xml produces a non-existant
           entry in the conversion trie.. Hence when met, the input byte is
           skipped as it should (in yaz_iconv)
        */
#if 0
        if (x == 0x0361 || x == 0x0360)
            *no_read += 1;
#endif
        data->comb_offset++;
        return x;
    }

    data->comb_offset = 0;
    for (data->comb_size = 0; data->comb_size < 8; data->comb_size++)
    {
        int comb = 0;

        if (inbytesleft == 0 && data->comb_size)
        {
            yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
            x = 0;
            *no_read = 0;
            break;
        }
        x = yaz_read_marc8_comb(cd, data, inp, inbytesleft, no_read, &comb);
        if (!comb || !x)
            break;
        data->comb_x[data->comb_size] = x;
        data->comb_no_read[data->comb_size] = *no_read;
        inp += *no_read;
        inbytesleft = inbytesleft - *no_read;
    }
    return x;
}

static unsigned long read_marc8s(yaz_iconv_t cd, yaz_iconv_decoder_t d,
                                 unsigned char *inp,
                                 size_t inbytesleft, size_t *no_read)
{
    struct decoder_data *data = (struct decoder_data *) d->data;
    unsigned long x = read_marc8(cd, d, inp, inbytesleft, no_read);
    if (x && data->comb_size == 1)
    {
        if (yaz_iso_8859_1_lookup_x12(x, data->comb_x[0], &x))
        {
            *no_read += data->comb_no_read[0];
            data->comb_size = 0;
        }
    }
    return x;
}

static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
                                         struct decoder_data *data,
                                         unsigned char *inp,
                                         size_t inbytesleft, size_t *no_read,
                                         int *comb)
{
    *no_read = 0;
    while (inbytesleft > 0 && *inp == 27)
    {
        int *modep = &data->g0_mode;
        size_t inbytesleft0 = inbytesleft;

        inbytesleft--;
        inp++;
        if (inbytesleft == 0)
            goto incomplete;
        if (*inp == '$') /* set with multiple bytes */
        {
            inbytesleft--;
            inp++;
        }
        if (inbytesleft == 0)
            goto incomplete;
        if (*inp == '(' || *inp == ',')  /* G0 */
        {
            inbytesleft--;
            inp++;
        }
        else if (*inp == ')' || *inp == '-') /* G1 */
        {
            inbytesleft--;
            inp++;
            modep = &data->g1_mode;
        }
        if (inbytesleft == 0)
            goto incomplete;
        if (*inp == '!') /* ANSEL is a special case */
        {
            inbytesleft--;
            inp++;
        }
        if (inbytesleft == 0)
            goto incomplete;
        *modep = *inp++; /* Final character */
        inbytesleft--;

        (*no_read) += inbytesleft0 - inbytesleft;
    }
    if (inbytesleft == 0)
        return 0;
    else if (*inp == ' ')
    {
        *no_read += 1;
        return ' ';
    }
    else
    {
        unsigned long x;
        size_t no_read_sub = 0;
        int mode = *inp < 128 ? data->g0_mode : data->g1_mode;
        *comb = 0;

        switch(mode)
        {
        case 'B':  /* Basic ASCII */
        case 's':  /* ASCII */
            x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case 'E':  /* ANSEL */
            x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
            break;
        case 'g':  /* Greek */
            x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case 'b':  /* Subscripts */
            x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case 'p':  /* Superscripts */
            x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case '2':  /* Basic Hebrew */
            x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case 'N':  /* Basic Cyrillic */
            x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case 'Q':  /* Extended Cyrillic */
            x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case '3':  /* Basic Arabic */
            x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case '4':  /* Extended Arabic */
            x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case 'S':  /* Greek */
            x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        case '1':  /* Chinese, Japanese, Korean (EACC) */
            x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
            break;
        default:
            *no_read = 0;
            yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
            return 0;
        }
        *no_read += no_read_sub;
        return x;
    }
incomplete:
    *no_read = 0;
    yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
    return 0;
}


static size_t init_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
                         unsigned char *inp,
                         size_t inbytesleft, size_t *no_read)
{
    struct decoder_data *data = (struct decoder_data *) d->data;
    data->g0_mode = 'B';
    data->g1_mode = 'E';
    data->comb_offset = data->comb_size = 0;
    return 0;
}

void destroy_marc8(yaz_iconv_decoder_t d)
{
    struct decoder_data *data = (struct decoder_data *) d->data;
    xfree(data);
}

yaz_iconv_decoder_t yaz_marc8_decoder(const char *fromcode,
                                      yaz_iconv_decoder_t d)
{
    if (!yaz_matchstr(fromcode, "MARC8") || !yaz_matchstr(fromcode, "ANSEL"))
        d->read_handle = read_marc8;
    else if (!yaz_matchstr(fromcode, "MARC8s"))
        d->read_handle = read_marc8s;
    else
        return 0;
    {
        struct decoder_data *data = (struct decoder_data *)
            xmalloc(sizeof(*data));
        d->data = data;
        d->init_handle = init_marc8;
        d->destroy_handle = destroy_marc8;
    }
    return d;
}


/*
 * Local variables:
 * c-basic-offset: 4
 * c-file-style: "Stroustrup"
 * indent-tabs-mode: nil
 * End:
 * vim: shiftwidth=4 tabstop=8 expandtab
 */

