OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [tags/] [gnu-src/] [newlib-1.18.0/] [newlib-1.18.0-or32-1.0rc2/] [newlib/] [libc/] [iconv/] [ces/] [utf-16.c] - Rev 207

Go to most recent revision | Compare with Previous | Blame | View Log

/*
 * Copyright (c) 2003-2004, Artem B. Bityuckiy
 * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
#include "cesbi.h"
 
#if defined (ICONV_TO_UCS_CES_UTF_16) \
 || defined (ICONV_FROM_UCS_CES_UTF_16)
 
#include <_ansi.h>
#include <reent.h>
#include <sys/types.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include "../lib/local.h"
#include "../lib/ucsconv.h"
#include "../lib/endian.h"
 
/*
 * On input UTF-16 converter interpret BOM and uses Big Endian byte order if BOM
 * is absent. UTF-16 converter outputs in System Endian and adds correspondent
 * BOM as first code. UTF-16LE and UTF-16BE converters ignore BOM on input and
 * don't output BOM.
 */
 
#define UTF16_UNDEFINED     0x00
#define UTF16_BIG_ENDIAN    0x01
#define UTF16_LITTLE_ENDIAN 0x02
#define UTF16_SYSTEM_ENDIAN 0x04
#define UTF16_BOM_WRITTEN   0x08
 
#define UTF16_BOM 0xFEFF
 
#define UTF_16   "utf_16"
#define UTF_16BE "utf_16be"
#define UTF_16LE "utf_16le"
 
static size_t
_DEFUN(utf_16_close, (rptr, data),
                     struct _reent *rptr _AND
                     _VOID_PTR data)
{
  _free_r(rptr, data);
  return 0;
}
 
#if defined (ICONV_FROM_UCS_CES_UTF_16)
static _VOID_PTR
_DEFUN(utf_16_init_from_ucs, (rptr, encoding),
                             struct _reent *rptr _AND
                             _CONST char *encoding)
{
  int *data;
 
  if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL)
    return (_VOID_PTR)NULL;
 
  if (strcmp (encoding, UTF_16LE) == 0)
    *data = UTF16_LITTLE_ENDIAN;
  else if (strcmp (encoding, UTF_16BE) == 0)
    *data = UTF16_BIG_ENDIAN;
  else
    *data = UTF16_SYSTEM_ENDIAN;
 
  return (_VOID_PTR)data;
}
 
static size_t
_DEFUN(utf_16_convert_from_ucs, (data, in, outbuf, outbytesleft),
                                _VOID_PTR data         _AND
                                register ucs4_t in     _AND
                                unsigned char **outbuf _AND
                                size_t *outbytesleft)
{
  register ucs2_t *cp;
  register size_t bytes;
  register int *state;
 
  if (in > 0x0010FFFF || (in >= 0x0000D800 && in <= 0x0000DFFF)
      || in == 0x0000FFFF || in == 0x0000FFFE)
    return (size_t)ICONV_CES_INVALID_CHARACTER; 
 
  state = (int *)data;
  bytes = (*state == UTF16_SYSTEM_ENDIAN) ? sizeof (ucs2_t) * 2 
                                          : sizeof (ucs2_t);
 
  if (in > 0x0000FFFF)
    bytes += sizeof (ucs2_t);
 
  if (*outbytesleft < bytes)
    return (size_t)ICONV_CES_NOSPACE;
 
  cp = (ucs2_t *)*outbuf;
 
  if (*state == UTF16_SYSTEM_ENDIAN)
    {
      *cp++ = UTF16_BOM;
      *state |= UTF16_BOM_WRITTEN;
    }
 
  if (in < 0x00010000)
    {
      switch (*state)
        {
          case UTF16_LITTLE_ENDIAN:
            *cp = ICONV_HTOLES ((ucs2_t)in);
            break;
          case UTF16_BIG_ENDIAN:
            *cp = ICONV_HTOBES ((ucs2_t)in);
            break;
          case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN):
            *cp = (ucs2_t)in;
            break;
        }
    }
  else
    {
      ucs2_t w1, w2;
 
      /* Process surrogate pair */
      in -= 0x00010000;
      w1 = ((ucs2_t)((in >> 10)) & 0x03FF) | 0xD800;
      w2 = (ucs2_t)(in & 0x000003FF) | 0xDC00;
 
      switch (*state)
        {
          case UTF16_LITTLE_ENDIAN:
            *cp++ = ICONV_HTOLES (w1);
            *cp = ICONV_HTOLES (w2);
            break;
          case UTF16_BIG_ENDIAN:
            *cp++ = ICONV_HTOBES (w1);
            *cp = ICONV_HTOBES (w2);
            break;
          case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN):
            *cp++ = w1;
            *cp = w2;
            break;
        }
    }
 
  *outbuf += bytes;
  *outbytesleft -= bytes;
 
  return bytes;
}
#endif /* ICONV_FROM_UCS_CES_UTF_16 */
 
#if defined (ICONV_TO_UCS_CES_UTF_16)
static _VOID_PTR
_DEFUN(utf_16_init_to_ucs, (rptr, encoding),
                           struct _reent *rptr _AND
                           _CONST char *encoding)
{
  int *data;
 
  if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL)
    return (_VOID_PTR)NULL;
 
  if (strcmp (encoding, UTF_16BE) == 0)
    *data = UTF16_BIG_ENDIAN;
  else if (strcmp (encoding, UTF_16LE) == 0)
    *data = UTF16_LITTLE_ENDIAN;
  else
    *data = UTF16_UNDEFINED;
 
  return (_VOID_PTR)data;
}
 
static ucs4_t
_DEFUN(utf_16_convert_to_ucs, (data, inbuf, inbytesleft),
                              _VOID_PTR data               _AND
                              _CONST unsigned char **inbuf _AND
                              size_t *inbytesleft)
{
  register ucs2_t w1;
  register ucs2_t w2;
  register ucs2_t *cp;
  int *state;
  ucs4_t res;
  int bytes = sizeof (ucs2_t);
 
  if (*inbytesleft < bytes)
    return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
 
  state = (int *)data;
  cp = ((ucs2_t *)*inbuf);
 
  if (*state == UTF16_UNDEFINED)
    {
      if (*cp == ICONV_HTOLES(UTF16_BOM))
        *state = UTF16_LITTLE_ENDIAN;
      else
        *state = UTF16_BIG_ENDIAN;
 
     if (   *cp == ICONV_HTOBES (UTF16_BOM)
         || *cp == ICONV_HTOLES (UTF16_BOM))
       {
         if (*inbytesleft < (bytes += sizeof (ucs2_t)))
           return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
         cp += 1;
       }
    }
 
  if (*state == UTF16_LITTLE_ENDIAN)      
    w1 = ICONV_LETOHS (*cp);
  else
    w1 = ICONV_BETOHS (*cp);
 
  if (w1  < 0xD800 || w1 > 0xDFFF)
    {
      if (w1 == 0xFFFF || w1 == 0xFFFE)
        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
      res = (ucs4_t)w1;
    }
  else
    {
      /* Process surrogate pair */
      if (*inbytesleft < (bytes += 2))
        return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
 
      if (w1 > 0xDBFF)
        /* Broken surrogate character */
        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
 
      cp += 1;
 
      if (*state == UTF16_LITTLE_ENDIAN)      
        w2 = ICONV_LETOHS (*cp);
      else
        w2 = ICONV_BETOHS (*cp);
 
      if (w2 < 0xDC00 || w2 > 0xDFFF)
        /* Broken surrogate character */
        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
 
      res = (ucs4_t)(w2 & 0x03FF) | ((ucs4_t)(w1 & 0x03FF) << 10);
      res += 0x00010000;
    }
 
  *inbuf += bytes;
  *inbytesleft -= bytes;
 
  return res;
}
#endif /* ICONV_TO_UCS_CES_UTF_16 */
 
static int
_DEFUN(utf_16_get_mb_cur_max, (data),
                              _VOID_PTR data)
{
  return 6;
}
 
#if defined (ICONV_TO_UCS_CES_UTF_16)
_CONST iconv_to_ucs_ces_handlers_t
_iconv_to_ucs_ces_handlers_utf_16 = 
{
  utf_16_init_to_ucs,
  utf_16_close,
  utf_16_get_mb_cur_max,
  NULL,
  NULL,
  NULL,
  utf_16_convert_to_ucs
};
#endif
 
#if defined (ICONV_FROM_UCS_CES_UTF_16)
_CONST iconv_from_ucs_ces_handlers_t
_iconv_from_ucs_ces_handlers_utf_16 =
{
  utf_16_init_from_ucs,
  utf_16_close,
  utf_16_get_mb_cur_max,
  NULL,
  NULL,
  NULL,
  utf_16_convert_from_ucs
};
#endif
 
#endif /* ICONV_TO_UCS_CES_UTF_16 || ICONV_FROM_UCS_CES_UTF_16 */
 
 

Go to most recent revision | Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.