Logo Search packages:      
Sourcecode: glibc version File versions

gconv_simple.c

/* Simple transformations functions.
   Copyright (C) 1997-2002, 2003 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */

#include <byteswap.h>
#include <dlfcn.h>
#include <endian.h>
#include <errno.h>
#include <gconv.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <sys/param.h>
#include <gconv_int.h>

#define BUILTIN_ALIAS(s1, s2) /* nothing */
#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
                         MinF, MaxF, MinT, MaxT) \
  extern int Fct (struct __gconv_step *, struct __gconv_step_data *,          \
              __const unsigned char **, __const unsigned char *,        \
              unsigned char **, size_t *, int, int);
#include "gconv_builtin.h"


#ifndef EILSEQ
# define EILSEQ EINVAL
#endif


/* Specialized conversion function for a single byte to INTERNAL, recognizing
   only ASCII characters.  */
wint_t
__gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
{
  if (c < 0x80)
    return c;
  else
    return WEOF;
}


/* Transform from the internal, UCS4-like format, to UCS4.  The
   difference between the internal ucs4 format and the real UCS4
   format is, if any, the endianess.  The Unicode/ISO 10646 says that
   unless some higher protocol specifies it differently, the byte
   order is big endian.*/
#define DEFINE_INIT           0
#define DEFINE_FINI           0
#define MIN_NEEDED_FROM       4
#define MIN_NEEDED_TO         4
#define FROM_DIRECTION        1
#define FROM_LOOP       internal_ucs4_loop
#define TO_LOOP               internal_ucs4_loop /* This is not used.  */
#define FUNCTION_NAME         __gconv_transform_internal_ucs4


static inline int
internal_ucs4_loop (struct __gconv_step *step,
                struct __gconv_step_data *step_data,
                const unsigned char **inptrp, const unsigned char *inend,
                unsigned char **outptrp, unsigned char *outend,
                size_t *irreversible)
{
  const unsigned char *inptr = *inptrp;
  unsigned char *outptr = *outptrp;
  size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
  int result;

#if __BYTE_ORDER == __LITTLE_ENDIAN
  /* Sigh, we have to do some real work.  */
  size_t cnt;

  for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
    *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr);

  *inptrp = inptr;
  *outptrp = outptr;
#elif __BYTE_ORDER == __BIG_ENDIAN
  /* Simply copy the data.  */
  *inptrp = inptr + n_convert * 4;
  *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
#else
# error "This endianess is not supported."
#endif

  /* Determine the status.  */
  if (*inptrp == inend)
    result = __GCONV_EMPTY_INPUT;
  else if (*outptrp + 4 > outend)
    result = __GCONV_FULL_OUTPUT;
  else
    result = __GCONV_INCOMPLETE_INPUT;

  return result;
}

#ifndef _STRING_ARCH_unaligned
static inline int
internal_ucs4_loop_unaligned (struct __gconv_step *step,
                        struct __gconv_step_data *step_data,
                        const unsigned char **inptrp,
                        const unsigned char *inend,
                        unsigned char **outptrp, unsigned char *outend,
                        size_t *irreversible)
{
  const unsigned char *inptr = *inptrp;
  unsigned char *outptr = *outptrp;
  size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
  int result;

# if __BYTE_ORDER == __LITTLE_ENDIAN
  /* Sigh, we have to do some real work.  */
  size_t cnt;

  for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
    {
      outptr[0] = inptr[3];
      outptr[1] = inptr[2];
      outptr[2] = inptr[1];
      outptr[3] = inptr[0];
    }

  *inptrp = inptr;
  *outptrp = outptr;
# elif __BYTE_ORDER == __BIG_ENDIAN
  /* Simply copy the data.  */
  *inptrp = inptr + n_convert * 4;
  *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
# else
#  error "This endianess is not supported."
# endif

  /* Determine the status.  */
  if (*inptrp == inend)
    result = __GCONV_EMPTY_INPUT;
  else if (*outptrp + 4 > outend)
    result = __GCONV_FULL_OUTPUT;
  else
    result = __GCONV_INCOMPLETE_INPUT;

  return result;
}
#endif


static inline int
internal_ucs4_loop_single (struct __gconv_step *step,
                     struct __gconv_step_data *step_data,
                     const unsigned char **inptrp,
                     const unsigned char *inend,
                     unsigned char **outptrp, unsigned char *outend,
                     size_t *irreversible)
{
  mbstate_t *state = step_data->__statep;
  size_t cnt = state->__count & 7;

  while (*inptrp < inend && cnt < 4)
    state->__value.__wchb[cnt++] = *(*inptrp)++;

  if (__builtin_expect (cnt < 4, 0))
    {
      /* Still not enough bytes.  Store the ones in the input buffer.  */
      state->__count &= ~7;
      state->__count |= cnt;

      return __GCONV_INCOMPLETE_INPUT;
    }

#if __BYTE_ORDER == __LITTLE_ENDIAN
  (*outptrp)[0] = state->__value.__wchb[3];
  (*outptrp)[1] = state->__value.__wchb[2];
  (*outptrp)[2] = state->__value.__wchb[1];
  (*outptrp)[3] = state->__value.__wchb[0];

  *outptrp += 4;
#elif __BYTE_ORDER == __BIG_ENDIAN
  /* XXX unaligned */
  *(*((uint32_t **) outptrp)++) = state->__value.__wch;
#else
# error "This endianess is not supported."
#endif

  /* Clear the state buffer.  */
  state->__count &= ~7;

  return __GCONV_OK;
}

#include <iconv/skeleton.c>


/* Transform from UCS4 to the internal, UCS4-like format.  Unlike
   for the other direction we have to check for correct values here.  */
#define DEFINE_INIT           0
#define DEFINE_FINI           0
#define MIN_NEEDED_FROM       4
#define MIN_NEEDED_TO         4
#define FROM_DIRECTION        1
#define FROM_LOOP       ucs4_internal_loop
#define TO_LOOP               ucs4_internal_loop /* This is not used.  */
#define FUNCTION_NAME         __gconv_transform_ucs4_internal


static inline int
ucs4_internal_loop (struct __gconv_step *step,
                struct __gconv_step_data *step_data,
                const unsigned char **inptrp, const unsigned char *inend,
                unsigned char **outptrp, unsigned char *outend,
                size_t *irreversible)
{
  int flags = step_data->__flags;
  const unsigned char *inptr = *inptrp;
  unsigned char *outptr = *outptrp;
  size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
  int result;
  size_t cnt;

  for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
    {
      uint32_t inval;

#if __BYTE_ORDER == __LITTLE_ENDIAN
      inval = bswap_32 (*(const uint32_t *) inptr);
#else
      inval = *(const uint32_t *) inptr;
#endif

      if (__builtin_expect (inval > 0x7fffffff, 0))
      {
        /* The value is too large.  We don't try transliteration here since
           this is not an error because of the lack of possibilities to
           represent the result.  This is a genuine bug in the input since
           UCS4 does not allow such values.  */
        if (irreversible == NULL)
          /* We are transliterating, don't try to correct anything.  */
          return __GCONV_ILLEGAL_INPUT;

        if (flags & __GCONV_IGNORE_ERRORS)
          {
            /* Just ignore this character.  */
            ++*irreversible;
            continue;
          }

        *inptrp = inptr;
        *outptrp = outptr;
        return __GCONV_ILLEGAL_INPUT;
      }

      *((uint32_t *) outptr)++ = inval;
    }

  *inptrp = inptr;
  *outptrp = outptr;

  /* Determine the status.  */
  if (*inptrp == inend)
    result = __GCONV_EMPTY_INPUT;
  else if (*outptrp + 4 > outend)
    result = __GCONV_FULL_OUTPUT;
  else
    result = __GCONV_INCOMPLETE_INPUT;

  return result;
}

#ifndef _STRING_ARCH_unaligned
static inline int
ucs4_internal_loop_unaligned (struct __gconv_step *step,
                        struct __gconv_step_data *step_data,
                        const unsigned char **inptrp,
                        const unsigned char *inend,
                        unsigned char **outptrp, unsigned char *outend,
                        size_t *irreversible)
{
  int flags = step_data->__flags;
  const unsigned char *inptr = *inptrp;
  unsigned char *outptr = *outptrp;
  size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
  int result;
  size_t cnt;

  for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
    {
      if (__builtin_expect (inptr[0] > 0x80, 0))
      {
        /* The value is too large.  We don't try transliteration here since
           this is not an error because of the lack of possibilities to
           represent the result.  This is a genuine bug in the input since
           UCS4 does not allow such values.  */
        if (irreversible == NULL)
          /* We are transliterating, don't try to correct anything.  */
          return __GCONV_ILLEGAL_INPUT;

        if (flags & __GCONV_IGNORE_ERRORS)
          {
            /* Just ignore this character.  */
            ++*irreversible;
            continue;
          }

        *inptrp = inptr;
        *outptrp = outptr;
        return __GCONV_ILLEGAL_INPUT;
      }

# if __BYTE_ORDER == __LITTLE_ENDIAN
      outptr[3] = inptr[0];
      outptr[2] = inptr[1];
      outptr[1] = inptr[2];
      outptr[0] = inptr[3];
# else
      outptr[0] = inptr[0];
      outptr[1] = inptr[1];
      outptr[2] = inptr[2];
      outptr[3] = inptr[3];
# endif
      outptr += 4;
    }

  *inptrp = inptr;
  *outptrp = outptr;

  /* Determine the status.  */
  if (*inptrp == inend)
    result = __GCONV_EMPTY_INPUT;
  else if (*outptrp + 4 > outend)
    result = __GCONV_FULL_OUTPUT;
  else
    result = __GCONV_INCOMPLETE_INPUT;

  return result;
}
#endif


static inline int
ucs4_internal_loop_single (struct __gconv_step *step,
                     struct __gconv_step_data *step_data,
                     const unsigned char **inptrp,
                     const unsigned char *inend,
                     unsigned char **outptrp, unsigned char *outend,
                     size_t *irreversible)
{
  mbstate_t *state = step_data->__statep;
  int flags = step_data->__flags;
  size_t cnt = state->__count & 7;

  while (*inptrp < inend && cnt < 4)
    state->__value.__wchb[cnt++] = *(*inptrp)++;

  if (__builtin_expect (cnt < 4, 0))
    {
      /* Still not enough bytes.  Store the ones in the input buffer.  */
      state->__count &= ~7;
      state->__count |= cnt;

      return __GCONV_INCOMPLETE_INPUT;
    }

  if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
                  0))
    {
      /* The value is too large.  We don't try transliteration here since
       this is not an error because of the lack of possibilities to
       represent the result.  This is a genuine bug in the input since
       UCS4 does not allow such values.  */
      if (!(flags & __GCONV_IGNORE_ERRORS))
      {
        *inptrp -= cnt - (state->__count & 7);
        return __GCONV_ILLEGAL_INPUT;
      }
    }
  else
    {
#if __BYTE_ORDER == __LITTLE_ENDIAN
      (*outptrp)[0] = state->__value.__wchb[3];
      (*outptrp)[1] = state->__value.__wchb[2];
      (*outptrp)[2] = state->__value.__wchb[1];
      (*outptrp)[3] = state->__value.__wchb[0];
#elif __BYTE_ORDER == __BIG_ENDIAN
      (*outptrp)[0] = state->__value.__wchb[0];
      (*outptrp)[1] = state->__value.__wchb[1];
      (*outptrp)[2] = state->__value.__wchb[2];
      (*outptrp)[3] = state->__value.__wchb[3];
#endif

      *outptrp += 4;
    }

  /* Clear the state buffer.  */
  state->__count &= ~7;

  return __GCONV_OK;
}

#include <iconv/skeleton.c>


/* Similarly for the little endian form.  */
#define DEFINE_INIT           0
#define DEFINE_FINI           0
#define MIN_NEEDED_FROM       4
#define MIN_NEEDED_TO         4
#define FROM_DIRECTION        1
#define FROM_LOOP       internal_ucs4le_loop
#define TO_LOOP               internal_ucs4le_loop /* This is not used.  */
#define FUNCTION_NAME         __gconv_transform_internal_ucs4le


static inline int
internal_ucs4le_loop (struct __gconv_step *step,
                  struct __gconv_step_data *step_data,
                  const unsigned char **inptrp, const unsigned char *inend,
                  unsigned char **outptrp, unsigned char *outend,
                  size_t *irreversible)
{
  const unsigned char *inptr = *inptrp;
  unsigned char *outptr = *outptrp;
  size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
  int result;

#if __BYTE_ORDER == __BIG_ENDIAN
  /* Sigh, we have to do some real work.  */
  size_t cnt;

  for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
    *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr);

  *inptrp = inptr;
  *outptrp = outptr;
#elif __BYTE_ORDER == __LITTLE_ENDIAN
  /* Simply copy the data.  */
  *inptrp = inptr + n_convert * 4;
  *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
#else
# error "This endianess is not supported."
#endif

  /* Determine the status.  */
  if (*inptrp == inend)
    result = __GCONV_EMPTY_INPUT;
  else if (*outptrp + 4 > outend)
    result = __GCONV_FULL_OUTPUT;
  else
    result = __GCONV_INCOMPLETE_INPUT;

  return result;
}

#ifndef _STRING_ARCH_unaligned
static inline int
internal_ucs4le_loop_unaligned (struct __gconv_step *step,
                        struct __gconv_step_data *step_data,
                        const unsigned char **inptrp,
                        const unsigned char *inend,
                        unsigned char **outptrp, unsigned char *outend,
                        size_t *irreversible)
{
  const unsigned char *inptr = *inptrp;
  unsigned char *outptr = *outptrp;
  size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
  int result;

# if __BYTE_ORDER == __BIG_ENDIAN
  /* Sigh, we have to do some real work.  */
  size_t cnt;

  for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
    {
      outptr[0] = inptr[3];
      outptr[1] = inptr[2];
      outptr[2] = inptr[1];
      outptr[3] = inptr[0];
    }

  *inptrp = inptr;
  *outptrp = outptr;
# elif __BYTE_ORDER == __LITTLE_ENDIAN
  /* Simply copy the data.  */
  *inptrp = inptr + n_convert * 4;
  *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
# else
#  error "This endianess is not supported."
# endif

  /* Determine the status.  */
  if (*inptrp == inend)
    result = __GCONV_EMPTY_INPUT;
  else if (*inptrp + 4 > inend)
    result = __GCONV_INCOMPLETE_INPUT;
  else
    {
      assert (*outptrp + 4 > outend);
      result = __GCONV_FULL_OUTPUT;
    }

  return result;
}
#endif


static inline int
internal_ucs4le_loop_single (struct __gconv_step *step,
                       struct __gconv_step_data *step_data,
                       const unsigned char **inptrp,
                       const unsigned char *inend,
                       unsigned char **outptrp, unsigned char *outend,
                       size_t *irreversible)
{
  mbstate_t *state = step_data->__statep;
  size_t cnt = state->__count & 7;

  while (*inptrp < inend && cnt < 4)
    state->__value.__wchb[cnt++] = *(*inptrp)++;

  if (__builtin_expect (cnt < 4, 0))
    {
      /* Still not enough bytes.  Store the ones in the input buffer.  */
      state->__count &= ~7;
      state->__count |= cnt;

      return __GCONV_INCOMPLETE_INPUT;
    }

#if __BYTE_ORDER == __BIG_ENDIAN
  (*outptrp)[0] = state->__value.__wchb[3];
  (*outptrp)[1] = state->__value.__wchb[2];
  (*outptrp)[2] = state->__value.__wchb[1];
  (*outptrp)[3] = state->__value.__wchb[0];

  *outptrp += 4;
#else
  /* XXX unaligned */
  *(*((uint32_t **) outptrp)++) = state->__value.__wch;
#endif

  /* Clear the state buffer.  */
  state->__count &= ~7;

  return __GCONV_OK;
}

#include <iconv/skeleton.c>


/* And finally from UCS4-LE to the internal encoding.  */
#define DEFINE_INIT           0
#define DEFINE_FINI           0
#define MIN_NEEDED_FROM       4
#define MIN_NEEDED_TO         4
#define FROM_DIRECTION        1
#define FROM_LOOP       ucs4le_internal_loop
#define TO_LOOP               ucs4le_internal_loop /* This is not used.  */
#define FUNCTION_NAME         __gconv_transform_ucs4le_internal


static inline int
ucs4le_internal_loop (struct __gconv_step *step,
                  struct __gconv_step_data *step_data,
                  const unsigned char **inptrp, const unsigned char *inend,
                  unsigned char **outptrp, unsigned char *outend,
                  size_t *irreversible)
{
  int flags = step_data->__flags;
  const unsigned char *inptr = *inptrp;
  unsigned char *outptr = *outptrp;
  size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
  int result;
  size_t cnt;

  for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
    {
      uint32_t inval;

#if __BYTE_ORDER == __BIG_ENDIAN
      inval = bswap_32 (*(const uint32_t *) inptr);
#else
      inval = *(const uint32_t *) inptr;
#endif

      if (__builtin_expect (inval > 0x7fffffff, 0))
      {
        /* The value is too large.  We don't try transliteration here since
           this is not an error because of the lack of possibilities to
           represent the result.  This is a genuine bug in the input since
           UCS4 does not allow such values.  */
        if (irreversible == NULL)
          /* We are transliterating, don't try to correct anything.  */
          return __GCONV_ILLEGAL_INPUT;

        if (flags & __GCONV_IGNORE_ERRORS)
          {
            /* Just ignore this character.  */
            ++*irreversible;
            continue;
          }

        return __GCONV_ILLEGAL_INPUT;
      }

      *((uint32_t *) outptr)++ = inval;
    }

  *inptrp = inptr;
  *outptrp = outptr;

  /* Determine the status.  */
  if (*inptrp == inend)
    result = __GCONV_EMPTY_INPUT;
  else if (*inptrp + 4 > inend)
    result = __GCONV_INCOMPLETE_INPUT;
  else
    {
      assert (*outptrp + 4 > outend);
      result = __GCONV_FULL_OUTPUT;
    }

  return result;
}

#ifndef _STRING_ARCH_unaligned
static inline int
ucs4le_internal_loop_unaligned (struct __gconv_step *step,
                        struct __gconv_step_data *step_data,
                        const unsigned char **inptrp,
                        const unsigned char *inend,
                        unsigned char **outptrp, unsigned char *outend,
                        size_t *irreversible)
{
  int flags = step_data->__flags;
  const unsigned char *inptr = *inptrp;
  unsigned char *outptr = *outptrp;
  size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
  int result;
  size_t cnt;

  for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
    {
      if (__builtin_expect (inptr[3] > 0x80, 0))
      {
        /* The value is too large.  We don't try transliteration here since
           this is not an error because of the lack of possibilities to
           represent the result.  This is a genuine bug in the input since
           UCS4 does not allow such values.  */
        if (irreversible == NULL)
          /* We are transliterating, don't try to correct anything.  */
          return __GCONV_ILLEGAL_INPUT;

        if (flags & __GCONV_IGNORE_ERRORS)
          {
            /* Just ignore this character.  */
            ++*irreversible;
            continue;
          }

        *inptrp = inptr;
        *outptrp = outptr;
        return __GCONV_ILLEGAL_INPUT;
      }

# if __BYTE_ORDER == __BIG_ENDIAN
      outptr[3] = inptr[0];
      outptr[2] = inptr[1];
      outptr[1] = inptr[2];
      outptr[0] = inptr[3];
# else
      outptr[0] = inptr[0];
      outptr[1] = inptr[1];
      outptr[2] = inptr[2];
      outptr[3] = inptr[3];
# endif

      outptr += 4;
    }

  *inptrp = inptr;
  *outptrp = outptr;

  /* Determine the status.  */
  if (*inptrp == inend)
    result = __GCONV_EMPTY_INPUT;
  else if (*inptrp + 4 > inend)
    result = __GCONV_INCOMPLETE_INPUT;
  else
    {
      assert (*outptrp + 4 > outend);
      result = __GCONV_FULL_OUTPUT;
    }

  return result;
}
#endif


static inline int
ucs4le_internal_loop_single (struct __gconv_step *step,
                       struct __gconv_step_data *step_data,
                       const unsigned char **inptrp,
                       const unsigned char *inend,
                       unsigned char **outptrp, unsigned char *outend,
                       size_t *irreversible)
{
  mbstate_t *state = step_data->__statep;
  int flags = step_data->__flags;
  size_t cnt = state->__count & 7;

  while (*inptrp < inend && cnt < 4)
    state->__value.__wchb[cnt++] = *(*inptrp)++;

  if (__builtin_expect (cnt < 4, 0))
    {
      /* Still not enough bytes.  Store the ones in the input buffer.  */
      state->__count &= ~7;
      state->__count |= cnt;

      return __GCONV_INCOMPLETE_INPUT;
    }

  if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
                  0))
    {
      /* The value is too large.  We don't try transliteration here since
       this is not an error because of the lack of possibilities to
       represent the result.  This is a genuine bug in the input since
       UCS4 does not allow such values.  */
      if (!(flags & __GCONV_IGNORE_ERRORS))
      return __GCONV_ILLEGAL_INPUT;
    }
  else
    {
#if __BYTE_ORDER == __BIG_ENDIAN
      (*outptrp)[0] = state->__value.__wchb[3];
      (*outptrp)[1] = state->__value.__wchb[2];
      (*outptrp)[2] = state->__value.__wchb[1];
      (*outptrp)[3] = state->__value.__wchb[0];
#else
      (*outptrp)[0] = state->__value.__wchb[0];
      (*outptrp)[1] = state->__value.__wchb[1];
      (*outptrp)[2] = state->__value.__wchb[2];
      (*outptrp)[3] = state->__value.__wchb[3];
#endif

      *outptrp += 4;
    }

  /* Clear the state buffer.  */
  state->__count &= ~7;

  return __GCONV_OK;
}

#include <iconv/skeleton.c>


/* Convert from ISO 646-IRV to the internal (UCS4-like) format.  */
#define DEFINE_INIT           0
#define DEFINE_FINI           0
#define MIN_NEEDED_FROM       1
#define MIN_NEEDED_TO         4
#define FROM_DIRECTION        1
#define FROM_LOOP       ascii_internal_loop
#define TO_LOOP               ascii_internal_loop /* This is not used.  */
#define FUNCTION_NAME         __gconv_transform_ascii_internal
#define ONE_DIRECTION         1

#define MIN_NEEDED_INPUT      MIN_NEEDED_FROM
#define MIN_NEEDED_OUTPUT     MIN_NEEDED_TO
#define LOOPFCT               FROM_LOOP
#define BODY \
  {                                                         \
    if (__builtin_expect (*inptr > '\x7f', 0))                          \
      {                                                           \
      /* The value is too large.  We don't try transliteration here since   \
         this is not an error because of the lack of possibilities to         \
         represent the result.  This is a genuine bug in the input since    \
         ASCII does not allow such values.  */                    \
      STANDARD_FROM_LOOP_ERR_HANDLER (1);                         \
      }                                                           \
    else                                                    \
      /* It's an one byte sequence.  */                                 \
      *((uint32_t *) outptr)++ = *inptr++;                              \
  }
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>


/* Convert from the internal (UCS4-like) format to ISO 646-IRV.  */
#define DEFINE_INIT           0
#define DEFINE_FINI           0
#define MIN_NEEDED_FROM       4
#define MIN_NEEDED_TO         1
#define FROM_DIRECTION        1
#define FROM_LOOP       internal_ascii_loop
#define TO_LOOP               internal_ascii_loop /* This is not used.  */
#define FUNCTION_NAME         __gconv_transform_internal_ascii
#define ONE_DIRECTION         1

#define MIN_NEEDED_INPUT      MIN_NEEDED_FROM
#define MIN_NEEDED_OUTPUT     MIN_NEEDED_TO
#define LOOPFCT               FROM_LOOP
#define BODY \
  {                                                         \
    if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0))       \
      {                                                           \
      UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4);             \
      STANDARD_TO_LOOP_ERR_HANDLER (4);                           \
      }                                                           \
    else                                                    \
      /* It's an one byte sequence.  */                                 \
      *outptr++ = *((const uint32_t *) inptr)++;                        \
  }
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>


/* Convert from the internal (UCS4-like) format to UTF-8.  */
#define DEFINE_INIT           0
#define DEFINE_FINI           0
#define MIN_NEEDED_FROM       4
#define MIN_NEEDED_TO         1
#define MAX_NEEDED_TO         6
#define FROM_DIRECTION        1
#define FROM_LOOP       internal_utf8_loop
#define TO_LOOP               internal_utf8_loop /* This is not used.  */
#define FUNCTION_NAME         __gconv_transform_internal_utf8
#define ONE_DIRECTION         1

#define MIN_NEEDED_INPUT      MIN_NEEDED_FROM
#define MIN_NEEDED_OUTPUT     MIN_NEEDED_TO
#define MAX_NEEDED_OUTPUT     MAX_NEEDED_TO
#define LOOPFCT               FROM_LOOP
#define BODY \
  {                                                         \
    uint32_t wc = *((const uint32_t *) inptr);                          \
                                                            \
    if (wc < 0x80)                                                \
      /* It's an one byte sequence.  */                                 \
      *outptr++ = (unsigned char) wc;                                   \
    else if (__builtin_expect (wc <= 0x7fffffff, 1))                    \
      {                                                           \
      size_t step;                                                \
      char *start;                                                \
                                                            \
      for (step = 2; step < 6; ++step)                            \
        if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0)               \
          break;                                            \
                                                            \
      if (__builtin_expect (outptr + step > outend, 0))                 \
        {                                                   \
          /* Too long.  */                                        \
          result = __GCONV_FULL_OUTPUT;                           \
          break;                                            \
        }                                                   \
                                                            \
      start = outptr;                                             \
      *outptr = (unsigned char) (~0xff >> step);                        \
      outptr += step;                                             \
      --step;                                                     \
      do                                                    \
        {                                                   \
          start[step] = 0x80 | (wc & 0x3f);                             \
          wc >>= 6;                                               \
        }                                                   \
      while (--step > 0);                                         \
      start[0] |= wc;                                             \
      }                                                           \
    else                                                    \
      {                                                           \
      STANDARD_TO_LOOP_ERR_HANDLER (4);                           \
      }                                                           \
                                                            \
    inptr += 4;                                                   \
  }
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>


/* Convert from UTF-8 to the internal (UCS4-like) format.  */
#define DEFINE_INIT           0
#define DEFINE_FINI           0
#define MIN_NEEDED_FROM       1
#define MAX_NEEDED_FROM       6
#define MIN_NEEDED_TO         4
#define FROM_DIRECTION        1
#define FROM_LOOP       utf8_internal_loop
#define TO_LOOP               utf8_internal_loop /* This is not used.  */
#define FUNCTION_NAME         __gconv_transform_utf8_internal
#define ONE_DIRECTION         1

#define MIN_NEEDED_INPUT      MIN_NEEDED_FROM
#define MAX_NEEDED_INPUT      MAX_NEEDED_FROM
#define MIN_NEEDED_OUTPUT     MIN_NEEDED_TO
#define LOOPFCT               FROM_LOOP
#define BODY \
  {                                                         \
    uint32_t ch;                                            \
    uint_fast32_t cnt;                                            \
    uint_fast32_t i;                                              \
                                                            \
    /* Next input byte.  */                                       \
    ch = *inptr;                                            \
                                                            \
    if (ch < 0x80)                                                \
      {                                                           \
      /* One byte sequence.  */                                   \
      cnt = 1;                                              \
      ++inptr;                                              \
      }                                                           \
    else                                                    \
      {                                                           \
      if (ch >= 0xc2 && ch < 0xe0)                                \
        {                                                   \
          /* We expect two bytes.  The first byte cannot be 0xc0 or 0xc1,   \
             otherwise the wide character could have been represented         \
             using a single byte.  */                                   \
          cnt = 2;                                                \
          ch &= 0x1f;                                             \
        }                                                   \
        else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1))             \
        {                                                   \
          /* We expect three bytes.  */                           \
          cnt = 3;                                                \
          ch &= 0x0f;                                             \
        }                                                   \
      else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1))               \
        {                                                   \
          /* We expect four bytes.  */                            \
          cnt = 4;                                                \
          ch &= 0x07;                                             \
        }                                                   \
      else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1))               \
        {                                                   \
          /* We expect five bytes.  */                            \
          cnt = 5;                                                \
          ch &= 0x03;                                             \
        }                                                   \
      else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1))               \
        {                                                   \
          /* We expect six bytes.  */                                   \
          cnt = 6;                                                \
          ch &= 0x01;                                             \
        }                                                   \
      else                                                  \
        {                                                   \
          int skipped;                                      \
                                                            \
          /* Search the end of this ill-formed UTF-8 character.  This         \
             is the next byte with (x & 0xc0) != 0x80.  */              \
          skipped = 0;                                      \
          do                                                      \
            ++skipped;                                      \
          while (inptr + skipped < inend                          \
               && (*(inptr + skipped) & 0xc0) == 0x80             \
               && skipped < 5);                                   \
                                                            \
          STANDARD_FROM_LOOP_ERR_HANDLER (skipped);                     \
        }                                                   \
                                                            \
      if (__builtin_expect (inptr + cnt > inend, 0))                    \
        {                                                   \
          /* We don't have enough input.  But before we report that check   \
             that all the bytes are correct.  */                        \
          for (i = 1; inptr + i < inend; ++i)                           \
            if ((inptr[i] & 0xc0) != 0x80)                              \
            break;                                                \
                                                            \
          if (__builtin_expect (inptr + i == inend, 1))                 \
            {                                                     \
            result = __GCONV_INCOMPLETE_INPUT;                    \
            break;                                                \
            }                                                     \
                                                            \
          STANDARD_FROM_LOOP_ERR_HANDLER (i);                           \
        }                                                   \
                                                            \
      /* Read the possible remaining bytes.  */                   \
      for (i = 1; i < cnt; ++i)                                   \
        {                                                   \
          uint32_t byte = inptr[i];                               \
                                                            \
          if ((byte & 0xc0) != 0x80)                                    \
            /* This is an illegal encoding.  */                   \
            break;                                                \
                                                            \
          ch <<= 6;                                               \
          ch |= byte & 0x3f;                                      \
        }                                                   \
                                                            \
      /* If i < cnt, some trail byte was not >= 0x80, < 0xc0.                 \
         If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could         \
         have been represented with fewer than cnt bytes.  */                 \
      if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0))                 \
        {                                                   \
          /* This is an illegal encoding.  */                           \
          STANDARD_FROM_LOOP_ERR_HANDLER (i);                           \
        }                                                   \
                                                            \
      inptr += cnt;                                               \
      }                                                           \
                                                            \
    /* Now adjust the pointers and store the result.  */                \
    *((uint32_t *) outptr)++ = ch;                                \
  }
#define LOOP_NEED_FLAGS

#define STORE_REST \
  {                                                         \
    /* We store the remaining bytes while converting them into the UCS4       \
       format.  We can assume that the first byte in the buffer is            \
       correct and that it requires a larger number of bytes than there       \
       are in the input buffer.  */                               \
    wint_t ch = **inptrp;                                         \
    size_t cnt, r;                                                \
                                                            \
    state->__count = inend - *inptrp;                                   \
                                                            \
    if (ch >= 0xc2 && ch < 0xe0)                                  \
      {                                                           \
      /* We expect two bytes.  The first byte cannot be 0xc0 or         \
         0xc1, otherwise the wide character could have been             \
         represented using a single byte.  */                           \
      cnt = 2;                                              \
      ch &= 0x1f;                                           \
      }                                                           \
    else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1))                       \
      {                                                           \
      /* We expect three bytes.  */                               \
      cnt = 3;                                              \
      ch &= 0x0f;                                           \
      }                                                           \
    else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1))                       \
      {                                                           \
      /* We expect four bytes.  */                                \
      cnt = 4;                                              \
      ch &= 0x07;                                           \
      }                                                           \
    else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1))                       \
      {                                                           \
      /* We expect five bytes.  */                                \
      cnt = 5;                                              \
      ch &= 0x03;                                           \
      }                                                           \
    else                                                    \
      {                                                           \
      /* We expect six bytes.  */                                 \
      cnt = 6;                                              \
      ch &= 0x01;                                           \
      }                                                           \
                                                            \
    /* The first byte is already consumed.  */                          \
    r = cnt - 1;                                            \
    while (++(*inptrp) < inend)                                         \
      {                                                           \
      ch <<= 6;                                             \
      ch |= **inptrp & 0x3f;                                      \
      --r;                                                  \
      }                                                           \
                                                            \
    /* Shift for the so far missing bytes.  */                          \
    ch <<= r * 6;                                           \
                                                            \
    /* Store the number of bytes expected for the entire sequence.  */        \
    state->__count |= cnt << 8;                                         \
                                                            \
    /* Store the value.  */                                       \
    state->__value.__wch = ch;                                          \
  }

#define UNPACK_BYTES \
  {                                                         \
    static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc };  \
    wint_t wch = state->__value.__wch;                                  \
    size_t ntotal = state->__count >> 8;                          \
                                                            \
    inlen = state->__count & 255;                                 \
                                                            \
    bytebuf[0] = inmask[ntotal - 2];                                    \
                                                            \
    do                                                            \
      {                                                           \
      if (--ntotal < inlen)                                       \
        bytebuf[ntotal] = 0x80 | (wch & 0x3f);                    \
      wch >>= 6;                                            \
      }                                                           \
    while (ntotal > 1);                                           \
                                                            \
    bytebuf[0] |= wch;                                            \
  }

#define CLEAR_STATE \
  state->__count = 0


#include <iconv/loop.c>
#include <iconv/skeleton.c>


/* Convert from UCS2 to the internal (UCS4-like) format.  */
#define DEFINE_INIT           0
#define DEFINE_FINI           0
#define MIN_NEEDED_FROM       2
#define MIN_NEEDED_TO         4
#define FROM_DIRECTION        1
#define FROM_LOOP       ucs2_internal_loop
#define TO_LOOP               ucs2_internal_loop /* This is not used.  */
#define FUNCTION_NAME         __gconv_transform_ucs2_internal
#define ONE_DIRECTION         1

#define MIN_NEEDED_INPUT      MIN_NEEDED_FROM
#define MIN_NEEDED_OUTPUT     MIN_NEEDED_TO
#define LOOPFCT               FROM_LOOP
#define BODY \
  {                                                         \
    uint16_t u1 = *((const uint16_t *) inptr);                          \
                                                            \
    if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))              \
      {                                                           \
      /* Surrogate characters in UCS-2 input are not valid.  Reject           \
         them.  (Catching this here is not security relevant.)  */            \
      STANDARD_FROM_LOOP_ERR_HANDLER (2);                         \
      }                                                           \
                                                            \
    *((uint32_t *) outptr)++ = u1;                                \
    inptr += 2;                                                   \
  }
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>


/* Convert from the internal (UCS4-like) format to UCS2.  */
#define DEFINE_INIT           0
#define DEFINE_FINI           0
#define MIN_NEEDED_FROM       4
#define MIN_NEEDED_TO         2
#define FROM_DIRECTION        1
#define FROM_LOOP       internal_ucs2_loop
#define TO_LOOP               internal_ucs2_loop /* This is not used.  */
#define FUNCTION_NAME         __gconv_transform_internal_ucs2
#define ONE_DIRECTION         1

#define MIN_NEEDED_INPUT      MIN_NEEDED_FROM
#define MIN_NEEDED_OUTPUT     MIN_NEEDED_TO
#define LOOPFCT               FROM_LOOP
#define BODY \
  {                                                         \
    uint32_t val = *((const uint32_t *) inptr);                         \
                                                            \
    if (__builtin_expect (val >= 0x10000, 0))                           \
      {                                                           \
      UNICODE_TAG_HANDLER (val, 4);                               \
      STANDARD_TO_LOOP_ERR_HANDLER (4);                           \
      }                                                           \
    else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0))       \
      {                                                           \
      /* Surrogate characters in UCS-4 input are not valid.             \
         We must catch this, because the UCS-2 output might be          \
         interpreted as UTF-16 by other programs.  If we let                  \
         surrogates pass through, attackers could make a security       \
         hole exploit by synthesizing any desired plane 1-16                  \
         character.  */                                     \
      result = __GCONV_ILLEGAL_INPUT;                                   \
      if (! ignore_errors_p ())                                   \
        break;                                              \
      inptr += 4;                                           \
      ++*irreversible;                                      \
      continue;                                             \
      }                                                           \
    else                                                    \
      {                                                           \
      *((uint16_t *) outptr)++ = val;                                   \
      inptr += 4;                                           \
      }                                                           \
  }
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>


/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
#define DEFINE_INIT           0
#define DEFINE_FINI           0
#define MIN_NEEDED_FROM       2
#define MIN_NEEDED_TO         4
#define FROM_DIRECTION        1
#define FROM_LOOP       ucs2reverse_internal_loop
#define TO_LOOP               ucs2reverse_internal_loop/* This is not used.*/
#define FUNCTION_NAME         __gconv_transform_ucs2reverse_internal
#define ONE_DIRECTION         1

#define MIN_NEEDED_INPUT      MIN_NEEDED_FROM
#define MIN_NEEDED_OUTPUT     MIN_NEEDED_TO
#define LOOPFCT               FROM_LOOP
#define BODY \
  {                                                         \
    uint16_t u1 = bswap_16 (*((const uint16_t *) inptr));               \
                                                            \
    if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))              \
      {                                                           \
      /* Surrogate characters in UCS-2 input are not valid.  Reject           \
         them.  (Catching this here is not security relevant.)  */            \
      if (! ignore_errors_p ())                                   \
        {                                                   \
          result = __GCONV_ILLEGAL_INPUT;                         \
          break;                                            \
        }                                                   \
      inptr += 2;                                           \
      ++*irreversible;                                      \
      continue;                                             \
      }                                                           \
                                                            \
    *((uint32_t *) outptr)++ = u1;                                \
    inptr += 2;                                                   \
  }
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>


/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
#define DEFINE_INIT           0
#define DEFINE_FINI           0
#define MIN_NEEDED_FROM       4
#define MIN_NEEDED_TO         2
#define FROM_DIRECTION        1
#define FROM_LOOP       internal_ucs2reverse_loop
#define TO_LOOP               internal_ucs2reverse_loop/* This is not used.*/
#define FUNCTION_NAME         __gconv_transform_internal_ucs2reverse
#define ONE_DIRECTION         1

#define MIN_NEEDED_INPUT      MIN_NEEDED_FROM
#define MIN_NEEDED_OUTPUT     MIN_NEEDED_TO
#define LOOPFCT               FROM_LOOP
#define BODY \
  {                                                         \
    uint32_t val = *((const uint32_t *) inptr);                         \
    if (__builtin_expect (val >= 0x10000, 0))                           \
      {                                                           \
      UNICODE_TAG_HANDLER (val, 4);                               \
      STANDARD_TO_LOOP_ERR_HANDLER (4);                           \
      }                                                           \
    else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0))       \
      {                                                           \
      /* Surrogate characters in UCS-4 input are not valid.             \
         We must catch this, because the UCS-2 output might be          \
         interpreted as UTF-16 by other programs.  If we let                  \
         surrogates pass through, attackers could make a security       \
         hole exploit by synthesizing any desired plane 1-16                  \
         character.  */                                     \
      if (! ignore_errors_p ())                                   \
        {                                                   \
          result = __GCONV_ILLEGAL_INPUT;                         \
          break;                                            \
        }                                                   \
      inptr += 4;                                           \
      ++*irreversible;                                      \
      continue;                                             \
      }                                                           \
    else                                                    \
      {                                                           \
      *((uint16_t *) outptr)++ = bswap_16 (val);                        \
      inptr += 4;                                           \
      }                                                           \
  }
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>

Generated by  Doxygen 1.6.0   Back to index