diff -Naurp libmbfl-1.0.1/AUTHORS libmbfl-1.0.1.oden/AUTHORS --- libmbfl-1.0.1/AUTHORS 2004-02-04 05:17:51.000000000 +0100 +++ libmbfl-1.0.1.oden/AUTHORS 2008-07-05 08:52:04.000000000 +0200 @@ -1,10 +1,13 @@ -Den V. Tsopa <tdv@edisoft.ru> -Hironori Sato <satoh@jpnnet.com> Marcus Boerger <helly@php.net> -Moriyoshi Koizumi <moriyoshi@php.net> +Hayk Chamyan <hamshen@gmail.com> +Wez Furlong <wez@thebrainroom.com> Rui Hirokawa <hirokawa@php.net> Shigeru Kanemoto <sgk@happysize.co.jp> -Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> -Tateyama <tateyan@amy.hi-ho.ne.jp> U. Kenkichi <kenkichi@axes.co.jp> -Wez Furlong <wez@thebrainroom.com> +Moriyoshi Koizumi <moriyoshi@php.net> +Hironori Sato <satoh@jpnnet.com> +Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> +Tateyama <tateyan@amy.hi-ho.ne.jp> +Den V. Tsopa <tdv@edisoft.ru> +Maksym Veremeyenko <verem@m1stereo.tv> +Haluk AKIN <halukakin@gmail.com> diff -Naurp libmbfl-1.0.1/buildconf libmbfl-1.0.1.oden/buildconf --- libmbfl-1.0.1/buildconf 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/buildconf 2003-08-23 08:18:44.000000000 +0200 @@ -0,0 +1,6 @@ +#!/bin/sh +libtoolize -c -f --automake +aclocal +autoheader +automake -a -c --foreign +autoconf diff -Naurp libmbfl-1.0.1/config.h.in libmbfl-1.0.1.oden/config.h.in --- libmbfl-1.0.1/config.h.in 2006-01-21 05:10:56.000000000 +0100 +++ libmbfl-1.0.1.oden/config.h.in 2010-03-12 05:55:37.000000000 +0100 @@ -50,6 +50,10 @@ /* Define to 1 if you have the <unistd.h> header file. */ #undef HAVE_UNISTD_H +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#undef LT_OBJDIR + /* Name of package */ #undef PACKAGE @@ -65,6 +69,9 @@ /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME +/* Define to the home page for this package. */ +#undef PACKAGE_URL + /* Define to the version of this package. */ #undef PACKAGE_VERSION @@ -83,5 +90,5 @@ /* Define to rpl_realloc if the replacement function should be used. */ #undef realloc -/* Define to `unsigned' if <sys/types.h> does not define. */ +/* Define to `unsigned int' if <sys/types.h> does not define. */ #undef size_t diff -Naurp libmbfl-1.0.1/configure.in libmbfl-1.0.1.oden/configure.in --- libmbfl-1.0.1/configure.in 2006-01-21 05:04:54.000000000 +0100 +++ libmbfl-1.0.1.oden/configure.in 2011-11-06 13:40:59.346405302 +0100 @@ -1,10 +1,12 @@ # Process this file with autoconf to produce a configure script. AC_INIT(mbfl/mbfilter.c) -AM_INIT_AUTOMAKE(libmbfl, 1.0.1) +AM_INIT_AUTOMAKE(libmbfl, 1.1.0) AC_CONFIG_SRCDIR(mbfl/mbfilter.c) AM_CONFIG_HEADER(config.h) -SHLIB_VERSION="1:0:0" +# SHLIB isn't a version number but the API reference +# Read http://www.gnu.org/software/libtool/manual/libtool.html#Updating-version-info +SHLIB_VERSION="1:1:0" AC_SUBST(SHLIB_VERSION) # Checks for programs. @@ -34,5 +36,9 @@ if test "$FETCH_VIA_FTP" = "curl"; then FETCH_VIA_FTP="curl -O" fi -AC_CONFIG_FILES([Makefile mbfl/Makefile filters/Makefile nls/Makefile]) +AC_CONFIG_FILES([ + Makefile + mbfl/Makefile + filters/Makefile + nls/Makefile]) AC_OUTPUT diff -Naurp libmbfl-1.0.1/filters/Makefile.am libmbfl-1.0.1.oden/filters/Makefile.am --- libmbfl-1.0.1/filters/Makefile.am 2005-03-22 21:30:21.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/Makefile.am 2010-03-12 05:55:37.000000000 +0100 @@ -1,8 +1,149 @@ -EXTRA_DIST=Makefile.bcc32 mk_sb_tbl.awk +EXTRA_DIST=mk_sb_tbl.awk noinst_LTLIBRARIES=libmbfl_filters.la INCLUDES=-I../mbfl libmbfl_filters_la_LDFLAGS=-version-info $(SHLIB_VERSION) -libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c mbfilter_armscii8.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_16.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_armscii8.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_16.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_armscii8.h unicode_table_uhc.h +libmbfl_filters_la_SOURCES=mbfilter_cp936.c \ + mbfilter_hz.c \ + mbfilter_euc_tw.c \ + mbfilter_big5.c \ + mbfilter_euc_jp.c \ + mbfilter_jis.c \ + mbfilter_iso8859_1.c \ + mbfilter_iso8859_2.c \ + mbfilter_cp1254.c \ + mbfilter_cp1252.c \ + mbfilter_cp1251.c \ + mbfilter_ascii.c \ + mbfilter_iso8859_3.c \ + mbfilter_iso8859_4.c \ + mbfilter_iso8859_5.c \ + mbfilter_iso8859_6.c \ + mbfilter_iso8859_7.c \ + mbfilter_iso8859_8.c \ + mbfilter_iso8859_9.c \ + mbfilter_iso8859_10.c \ + mbfilter_iso8859_13.c \ + mbfilter_iso8859_14.c \ + mbfilter_iso8859_15.c \ + mbfilter_iso8859_16.c \ + mbfilter_htmlent.c \ + mbfilter_byte2.c \ + mbfilter_byte4.c \ + mbfilter_uuencode.c \ + mbfilter_base64.c \ + mbfilter_sjis.c \ + mbfilter_sjis_open.c \ + mbfilter_7bit.c \ + mbfilter_qprint.c \ + mbfilter_ucs4.c \ + mbfilter_ucs2.c \ + mbfilter_utf32.c \ + mbfilter_utf16.c \ + mbfilter_utf8.c \ + mbfilter_utf7.c \ + mbfilter_utf7imap.c \ + mbfilter_euc_jp_win.c \ + mbfilter_cp932.c \ + mbfilter_cp51932.c \ + mbfilter_euc_cn.c \ + mbfilter_euc_kr.c \ + mbfilter_uhc.c \ + mbfilter_iso2022_jp_ms.c \ + mbfilter_iso2022_kr.c \ + mbfilter_cp866.c \ + mbfilter_koi8r.c \ + mbfilter_koi8u.c \ + mbfilter_armscii8.c \ + mbfilter_cp850.c \ + mbfilter_cp5022x.c \ + mbfilter_tl_jisx0201_jisx0208.c \ + html_entities.c \ + cp932_table.h \ + html_entities.h \ + mbfilter_7bit.h \ + mbfilter_ascii.h \ + mbfilter_base64.h \ + mbfilter_big5.h \ + mbfilter_byte2.h \ + mbfilter_byte4.h \ + mbfilter_cp1251.h \ + mbfilter_cp1252.h \ + mbfilter_cp1254.h \ + mbfilter_cp866.h \ + mbfilter_cp932.h \ + mbfilter_cp936.h \ + mbfilter_euc_cn.h \ + mbfilter_euc_jp.h \ + mbfilter_euc_jp_win.h \ + mbfilter_euc_kr.h \ + mbfilter_euc_tw.h \ + mbfilter_htmlent.h \ + mbfilter_hz.h \ + mbfilter_iso2022_jp_ms.h \ + mbfilter_iso2022_kr.h \ + mbfilter_iso8859_1.h \ + mbfilter_iso8859_10.h \ + mbfilter_iso8859_13.h \ + mbfilter_iso8859_14.h \ + mbfilter_iso8859_15.h \ + mbfilter_iso8859_16.h \ + mbfilter_iso8859_2.h \ + mbfilter_iso8859_3.h \ + mbfilter_iso8859_4.h \ + mbfilter_iso8859_5.h \ + mbfilter_iso8859_6.h \ + mbfilter_iso8859_7.h \ + mbfilter_iso8859_8.h \ + mbfilter_iso8859_9.h \ + mbfilter_jis.h \ + mbfilter_koi8r.h \ + mbfilter_koi8u.h \ + mbfilter_armscii8.h \ + mbfilter_qprint.h \ + mbfilter_sjis.h \ + mbfilter_sjis_open.h \ + mbfilter_ucs2.h \ + mbfilter_ucs4.h \ + mbfilter_uhc.h \ + mbfilter_utf16.h \ + mbfilter_utf32.h \ + mbfilter_utf7.h \ + mbfilter_utf7imap.h \ + mbfilter_utf8.h \ + mbfilter_uuencode.h \ + mbfilter_cp5022x.h \ + mbfilter_cp51932.h \ + mbfilter_cp850.h \ + mbfilter_tl_jisx0201_jisx0208.h \ + unicode_prop.h \ + unicode_table_big5.h \ + unicode_table_cns11643.h \ + unicode_table_cp1251.h \ + unicode_table_cp1252.h \ + unicode_table_cp1254.h \ + unicode_table_cp866.h \ + unicode_table_cp932_ext.h \ + unicode_table_cp936.h \ + unicode_table_iso8859_10.h \ + unicode_table_iso8859_13.h \ + unicode_table_iso8859_14.h \ + unicode_table_iso8859_15.h \ + unicode_table_iso8859_16.h \ + unicode_table_iso8859_2.h \ + unicode_table_iso8859_3.h \ + unicode_table_iso8859_4.h \ + unicode_table_iso8859_5.h \ + unicode_table_iso8859_6.h \ + unicode_table_iso8859_7.h \ + unicode_table_iso8859_8.h \ + unicode_table_iso8859_9.h \ + unicode_table_jis.h \ + unicode_table_koi8r.h \ + unicode_table_koi8u.h \ + unicode_table_armscii8.h \ + unicode_table_cp850.h \ + unicode_table_uhc.h \ + translit_kana_jisx0201_jisx0208.h mbfilter_iso8859_2.c: unicode_table_iso8859_2.h diff -Naurp libmbfl-1.0.1/filters/mbfilter_cp1254.c libmbfl-1.0.1.oden/filters/mbfilter_cp1254.c --- libmbfl-1.0.1/filters/mbfilter_cp1254.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_cp1254.c 2008-07-05 09:36:24.000000000 +0200 @@ -0,0 +1,157 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Haluk AKIN <halukakin@gmail.com> + * + */ +/* + * The source code included in this files was separated from mbfilter_ru.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_cp1254.h" +#include "unicode_table_cp1254.h" + +static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter); + +static const char *mbfl_encoding_cp1254_aliases[] = {"CP1254", "CP-1254", "WINDOWS-1254", NULL}; + +const mbfl_encoding mbfl_encoding_cp1254 = { + mbfl_no_encoding_cp1254, + "Windows-1254", + "Windows-1254", + (const char *(*)[])&mbfl_encoding_cp1254_aliases, + NULL, + MBFL_ENCTYPE_SBCS +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp1254 = { + mbfl_no_encoding_cp1254, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp1254 +}; + +const struct mbfl_convert_vtbl vtbl_cp1254_wchar = { + mbfl_no_encoding_cp1254, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_cp1254_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp1254 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp1254, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_cp1254, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* + * wchar => cp1254 + */ +int +mbfl_filt_conv_wchar_cp1254(int c, mbfl_convert_filter *filter) +{ + int s, n; + + if (c < 0x80) { + s = c; + } else { + s = -1; + n = cp1254_ucs_table_len-1; + while (n >= 0) { + if (c == cp1254_ucs_table[n] && c != 0xfffe) { + s = cp1254_ucs_table_min + n; + break; + } + n--; + } + if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP1254) { + s = c & MBFL_WCSPLANE_MASK; + } + } + + if (s >= 0) { + CK((*filter->output_function)(s, filter->data)); + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +/* + * cp1254 => wchar + */ +int +mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter) +{ + int s; + + if (c >= 0 && c < cp1254_ucs_table_min) { + s = c; + } else if (c >= cp1254_ucs_table_min && c < 0x100) { + s = cp1254_ucs_table[c - cp1254_ucs_table_min]; + if (s <= 0) { + s = c; + s &= MBFL_WCSPLANE_MASK; + s |= MBFL_WCSPLANE_CP1254; + } + } else { + s = c; + s &= MBFL_WCSGROUP_MASK; + s |= MBFL_WCSGROUP_THROUGH; + } + + CK((*filter->output_function)(s, filter->data)); + + return c; +} + +/* We only distinguish the MS extensions to ISO-8859-1. + * Actually, this is pretty much a NO-OP, since the identification + * system doesn't allow us to discriminate between a positive match, + * a possible match and a definite non-match. + * The problem here is that cp1254 looks like SJIS for certain chars. + * */ +static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x80 && c < 0xff) + filter->flag = 0; + else + filter->flag = 1; /* not it */ + return c; +} + + diff -Naurp libmbfl-1.0.1/filters/mbfilter_cp1254.h libmbfl-1.0.1.oden/filters/mbfilter_cp1254.h --- libmbfl-1.0.1/filters/mbfilter_cp1254.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_cp1254.h 2008-07-05 08:52:04.000000000 +0200 @@ -0,0 +1,43 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Haluk AKIN <halukakin@gmail.com> + * + */ +/* + * the source code included in this files was separated from mbfilter.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_CP1254_H +#define MBFL_MBFILTER_CP1254_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_cp1254; +extern const struct mbfl_identify_vtbl vtbl_identify_cp1254; +extern const struct mbfl_convert_vtbl vtbl_cp1254_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp1254; + +int mbfl_filt_conv_wchar_cp1254(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_CP1254_H */ diff -Naurp libmbfl-1.0.1/filters/mbfilter_cp5022x.c libmbfl-1.0.1.oden/filters/mbfilter_cp5022x.c --- libmbfl-1.0.1/filters/mbfilter_cp5022x.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_cp5022x.c 2010-03-12 05:55:37.000000000 +0100 @@ -0,0 +1,1299 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: Moriyoshi Koizumi <koizumi@gree.co.jp> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_cp5022x.h" +#include "mbfilter_jis.h" +#include "mbfilter_tl_jisx0201_jisx0208.h" + +#include "unicode_table_cp932_ext.h" +#include "unicode_table_jis.h" +#include "cp932_table.h" + +typedef struct _mbfl_filt_conv_wchar_cp50220_ctx { + mbfl_filt_tl_jisx0201_jisx0208_param tl_param; + mbfl_convert_filter last; +} mbfl_filt_conv_wchar_cp50220_ctx; + +static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter); +static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter); +static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter); +static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter); +static void mbfl_filt_conv_wchar_cp50220_ctor(mbfl_convert_filter *filt); +static void mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt); +static void mbfl_filt_conv_wchar_cp50220_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest); + +const mbfl_encoding mbfl_encoding_jis_ms = { + mbfl_no_encoding_jis_ms, + "JIS-ms", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const mbfl_encoding mbfl_encoding_cp50220 = { + mbfl_no_encoding_cp50220, + "CP50220", + "ISO-2022-JP", + (const char *(*)[])NULL, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const mbfl_encoding mbfl_encoding_cp50220raw = { + mbfl_no_encoding_cp50220raw, + "CP50220raw", + "ISO-2022-JP", + (const char *(*)[])NULL, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const mbfl_encoding mbfl_encoding_cp50221 = { + mbfl_no_encoding_cp50221, + "CP50221", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const mbfl_encoding mbfl_encoding_cp50222 = { + mbfl_no_encoding_cp50222, + "CP50222", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const struct mbfl_identify_vtbl vtbl_identify_jis_ms = { + mbfl_no_encoding_jis_ms, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_jis_ms +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp50220 = { + mbfl_no_encoding_cp50220, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp50220 +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp50220raw = { + mbfl_no_encoding_cp50220raw, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp50220 +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp50221 = { + mbfl_no_encoding_cp50221, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp50221 +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp50222 = { + mbfl_no_encoding_cp50222, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp50222 +}; + +const struct mbfl_convert_vtbl vtbl_jis_ms_wchar = { + mbfl_no_encoding_jis_ms, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_jis_ms_wchar, + mbfl_filt_conv_common_flush, +}; + +const struct mbfl_convert_vtbl vtbl_wchar_jis_ms = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_jis_ms, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_jis_ms, + mbfl_filt_conv_any_jis_flush +}; + +const struct mbfl_convert_vtbl vtbl_cp50220_wchar = { + mbfl_no_encoding_cp50220, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_jis_ms_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp50220 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50220, + mbfl_filt_conv_wchar_cp50220_ctor, + mbfl_filt_conv_wchar_cp50220_dtor, + mbfl_filt_conv_wchar_cp50221, + mbfl_filt_conv_any_jis_flush, + mbfl_filt_conv_wchar_cp50220_copy +}; + +const struct mbfl_convert_vtbl vtbl_cp50220raw_wchar = { + mbfl_no_encoding_cp50220raw, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_jis_ms_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp50220raw = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50220raw, + mbfl_filt_conv_wchar_cp50220_ctor, + mbfl_filt_conv_wchar_cp50220_dtor, + mbfl_filt_conv_wchar_cp50220raw, + mbfl_filt_conv_any_jis_flush, + mbfl_filt_conv_wchar_cp50220_copy +}; + +const struct mbfl_convert_vtbl vtbl_cp50221_wchar = { + mbfl_no_encoding_cp50221, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_jis_ms_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp50221 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50221, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_cp50221, + mbfl_filt_conv_any_jis_flush +}; + +const struct mbfl_convert_vtbl vtbl_cp50222_wchar = { + mbfl_no_encoding_cp50222, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_jis_ms_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp50222 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50222, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_cp50222, + mbfl_filt_conv_wchar_cp50222_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* + * JIS-ms => wchar + */ +int +mbfl_filt_conv_jis_ms_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0x90: X 0212 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (c == 0x0e) { /* "kana in" */ + filter->status = 0x20; + } else if (c == 0x0f) { /* "kana out" */ + filter->status = 0; + } else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */ + CK((*filter->output_function)(0xa5, filter->data)); + } else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */ + CK((*filter->output_function)(0x203e, filter->data)); + } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ + CK((*filter->output_function)(0xff40 + c, filter->data)); + } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else { + w = c & MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0x91: X 0212 second char */ + case 1: + filter->status &= ~0xf; + c1 = filter->cache; + if (c > 0x20 && c < 0x7f) { + s = (c1 - 0x21)*94 + c - 0x21; + if (filter->status == 0x80) { + if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else if (s >= cp932ext3_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= 94 * 94 && s < 114 * 94) { + /* user-defined => PUA (Microsoft extended) */ + w = (s & 0xff) + ((s >> 8) - 94) * 94 + 0xe000; + } else if (s >= 212 * 94 && s < 222 * 94) { + /* user-defined => PUA (G3 85 - 94 Ku) */ + w = (s & 0xff) + ((s >> 8) - 212) * 94 + 0xe000 + 10 * 94; + } else { + w = 0; + } + if (w <= 0) { + w = (c1 << 8) | c; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_JIS0208; + } + } else { + if (s >= 0 && s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + } else { + w = 0; + } + if (w <= 0) { + w = (c1 << 8) | c; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_JIS0212; + } + } + CK((*filter->output_function)(w, filter->data)); + } else if (c == 0x1b) { + filter->status += 2; + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = (c1 << 8) | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + /* ESC */ +/* case 0x02: */ +/* case 0x12: */ +/* case 0x22: */ +/* case 0x82: */ +/* case 0x92: */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + goto retry; + } + break; + + /* ESC $ */ +/* case 0x03: */ +/* case 0x13: */ +/* case 0x23: */ +/* case 0x83: */ +/* case 0x93: */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + goto retry; + } + break; + + /* ESC $ ( */ +/* case 0x04: */ +/* case 0x14: */ +/* case 0x24: */ +/* case 0x84: */ +/* case 0x94: */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x44) { /* 'D' */ + filter->status = 0x90; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + /* ESC ( */ +/* case 0x05: */ +/* case 0x15: */ +/* case 0x25: */ +/* case 0x85: */ +/* case 0x95: */ + case 5: + if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +/* + * wchar => JIS + */ +int +mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter) +{ + int c1, s; + + s = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) { + /* PUE => Microsoft extended (pseudo 95ku - 114ku) */ + /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ + s = c - 0xe000; + s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21); + } else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) { + /* PUE => JISX0212 user-defined (G3 85ku - 94ku) */ + /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ + s = c - (0xe000 + 10 * 94); + s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1); + } + + /* do some transliteration */ + if (s <= 0) { + c1 = c & ~MBFL_WCSPLANE_MASK; + if (c1 == MBFL_WCSPLANE_JIS0208) { + s = c & MBFL_WCSPLANE_MASK; + } else if (c1 == MBFL_WCSPLANE_JIS0212) { + s = c & MBFL_WCSPLANE_MASK; + s |= 0x8080; + } else if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0x203e) { /* OVER LINE */ + s = 0x1007e; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + } + if (s <= 0 || s >= 0x8080 && s < 0x10000) { + int i; + s = -1; + + for (i = 0; + i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + const int oh = cp932ext1_ucs_table_min / 94; + + if (c == cp932ext1_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + + if (s < 0) { + const int oh = cp932ext2_ucs_table_min / 94; + const int cp932ext2_ucs_table_size = + cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + for (i = 0; i < cp932ext2_ucs_table_size; i++) { + if (c == cp932ext2_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + } + + if (s < 0) { + const int cp932ext3_ucs_table_size = + cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + const int limit = cp932ext3_ucs_table_size > + cp932ext3_eucjp_table_size ? + cp932ext3_eucjp_table_size: + cp932ext3_ucs_table_size; + for (i = 0; i < limit; i++) { + if (c == cp932ext3_ucs_table[i]) { + s = cp932ext3_eucjp_table[i]; + break; + } + } + } + + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0; + CK((*filter->output_function)(s, filter->data)); + } else if (s < 0x100) { /* kana */ + if ((filter->status & 0xff00) != 0x100) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ + } + filter->status = 0x100; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else if (s < 0x8080) { /* X 0208 */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0x200; + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else if (s < 0x10000) { /* X 0212 */ + if ((filter->status & 0xff00) != 0x300) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x44, filter->data)); /* 'D' */ + } + filter->status = 0x300; + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +/* + * wchar => CP50220 + */ +static void +mbfl_filt_conv_wchar_cp50220_ctor(mbfl_convert_filter *filt) +{ + mbfl_filt_conv_wchar_cp50220_ctx *ctx; + + mbfl_filt_conv_common_ctor(filt); + + ctx = mbfl_malloc(sizeof(mbfl_filt_conv_wchar_cp50220_ctx)); + if (ctx == NULL) { + mbfl_filt_conv_common_dtor(filt); + return; + } + + ctx->tl_param.mode = MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_GLUE; + + ctx->last = *filt; + ctx->last.opaque = ctx; + ctx->last.data = filt->data; + filt->filter_function = vtbl_tl_jisx0201_jisx0208.filter_function; + filt->filter_flush = vtbl_tl_jisx0201_jisx0208.filter_flush; + filt->output_function = (int(*)(int, void *))ctx->last.filter_function; + filt->flush_function = (int(*)(void *))ctx->last.filter_flush; + filt->data = &ctx->last; + filt->opaque = ctx; + vtbl_tl_jisx0201_jisx0208.filter_ctor(filt); +} + +static void +mbfl_filt_conv_wchar_cp50220_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest) +{ + mbfl_filt_conv_wchar_cp50220_ctx *ctx; + + *dest = *src; + ctx = mbfl_malloc(sizeof(mbfl_filt_conv_wchar_cp50220_ctx)); + if (ctx != NULL) { + *ctx = *(mbfl_filt_conv_wchar_cp50220_ctx*)src->opaque; + } + + dest->opaque = ctx; + dest->data = &ctx->last; +} + +static void +mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt) +{ + vtbl_tl_jisx0201_jisx0208.filter_dtor(filt); + + if (filt->opaque != NULL) { + mbfl_free(filt->opaque); + } + + mbfl_filt_conv_common_dtor(filt); +} + +/* + * wchar => cp50220raw + */ +int +mbfl_filt_conv_wchar_cp50220raw(int c, mbfl_convert_filter *filter) +{ + if (c & MBFL_WCSPLANE_JIS0208) { + const int s = c & MBFL_WCSPLANE_MASK; + + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0x200; + } + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + return c; + } else { + return mbfl_filt_conv_wchar_cp50221(c, filter); + } +} + +/* + * wchar => CP50221 + */ +int +mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) { + /* PUE => Microsoft extended */ + /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ + s = c - 0xe000; + s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21); + } else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) { + /* PUE => JISX0212 user-defined (G3 85ku - 94ku) */ + /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ + s = c - (0xe000 + 10 * 94); + s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1); + } + + if (s <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0x203e) { /* OVER LINE */ + s = 0x1007e; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + } + if (s <= 0 || s >= 0x8080 && s < 0x10000) { + int i; + s = -1; + + for (i = 0; + i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + i++) { + const int oh = cp932ext1_ucs_table_min / 94; + + if (c == cp932ext1_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + + if (s < 0) { + const int oh = cp932ext2_ucs_table_min / 94; + const int cp932ext2_ucs_table_size = + cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + for (i = 0; i < cp932ext2_ucs_table_size; i++) { + if (c == cp932ext2_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + } + + if (s < 0) { + const int cp932ext3_ucs_table_size = + cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + const int limit = cp932ext3_ucs_table_size > + cp932ext3_eucjp_table_size ? + cp932ext3_eucjp_table_size: + cp932ext3_ucs_table_size; + for (i = 0; i < limit; i++) { + if (c == cp932ext3_ucs_table[i]) { + s = cp932ext3_eucjp_table[i]; + break; + } + } + } + + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0; + } + CK((*filter->output_function)(s, filter->data)); + } else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */ + if ((filter->status & 0xff00) != 0x500) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ + filter->status = 0x500; + } + CK((*filter->output_function)(s - 0x80, filter->data)); + } else if (s < 0x8080) { /* X 0208 */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0x200; + } + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else if (s < 0x10000) { /* X0212 */ + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +/* + * wchar => CP50222 + */ +int +mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter) +{ + int s; + + s = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) { + /* PUE => Microsoft extended */ + /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ + s = c - 0xe000; + s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21); + } else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) { + /* PUE => JISX0212 user-defined (G3 85ku - 94ku) */ + /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ + s = c - (0xe000 + 10 * 94); + s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1); + } + + if (s <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0x203e) { /* OVER LINE */ + s = 0x1007e; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + } + if (s <= 0 || s >= 0x8080 && s < 0x10000) { + int i; + s = -1; + + for (i = 0; + i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + const int oh = cp932ext1_ucs_table_min / 94; + + if (c == cp932ext1_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + + if (s <= 0) { + const int oh = cp932ext2_ucs_table_min / 94; + const int cp932ext2_ucs_table_size = + cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + for (i = 0; i < cp932ext2_ucs_table_size; i++) { + if (c == cp932ext2_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + } + + if (s <= 0) { + const int cp932ext3_ucs_table_size = + cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + const int limit = cp932ext3_ucs_table_size > + cp932ext3_eucjp_table_size ? + cp932ext3_eucjp_table_size: + cp932ext3_ucs_table_size; + for (i = 0; i < limit; i++) { + if (c == cp932ext3_ucs_table[i]) { + s = cp932ext3_eucjp_table[i]; + break; + } + } + } + + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + filter->status = 0; + } else if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0; + } + CK((*filter->output_function)(s, filter->data)); + } else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */ + if ((filter->status & 0xff00) != 0x500) { + CK((*filter->output_function)(0x0e, filter->data)); /* SI */ + filter->status = 0x500; + } + CK((*filter->output_function)(s - 0x80, filter->data)); + } else if (s < 0x8080) { /* X 0208 */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + filter->status = 0; + } + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0x200; + } + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else if (s < 0x10000) { /* X0212 */ + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + filter->status = 0; + } + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +int +mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter) +{ + /* back to latin */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + } else if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status &= 0xff; + + if (filter->flush_function != NULL) { + return (*filter->flush_function)(filter->data); + } + + return 0; +} + + +static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0x90: X 0212 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (c == 0x0e) { /* "kana in" */ + filter->status = 0x20; + } else if (c == 0x0f) { /* "kana out" */ + filter->status = 0; + } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0x91: X 0212 second char */ + case 1: + filter->status &= ~0xf; + if (c == 0x1b) { + goto retry; + } else if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ ( */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x44) { /* 'D' */ + filter->status = 0x90; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x80: X 0208 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ + case 1: + if (c == 0x1b) { + filter->status++; + } else { + filter->status &= ~0xf; + if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x80: X 0208 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ + case 1: + if (c == 0x1b) { + filter->status++; + } else { + filter->status &= ~0xf; + if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x80: X 0208 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ + case 1: + if (c == 0x1b) { + filter->status++; + } else { + filter->status &= ~0xf; + if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + + + diff -Naurp libmbfl-1.0.1/filters/mbfilter_cp5022x.h libmbfl-1.0.1.oden/filters/mbfilter_cp5022x.h --- libmbfl-1.0.1/filters/mbfilter_cp5022x.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_cp5022x.h 2010-03-12 05:55:37.000000000 +0100 @@ -0,0 +1,64 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_CP50221_h +#define MBFL_MBFILTER_CP50221_h + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_jis_ms; +extern const mbfl_encoding mbfl_encoding_cp50220; +extern const mbfl_encoding mbfl_encoding_cp50220raw; +extern const mbfl_encoding mbfl_encoding_cp50221; +extern const mbfl_encoding mbfl_encoding_cp50222; +extern const struct mbfl_identify_vtbl vtbl_identify_jis_ms; +extern const struct mbfl_identify_vtbl vtbl_identify_cp50220; +extern const struct mbfl_identify_vtbl vtbl_identify_cp50220raw; +extern const struct mbfl_identify_vtbl vtbl_identify_cp50221; +extern const struct mbfl_identify_vtbl vtbl_identify_cp50222; +extern const struct mbfl_convert_vtbl vtbl_jis_ms_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_jis_ms; +extern const struct mbfl_convert_vtbl vtbl_cp50220_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp50220; +extern const struct mbfl_convert_vtbl vtbl_cp50220raw_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp50220raw; +extern const struct mbfl_convert_vtbl vtbl_cp50221_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp50221; +extern const struct mbfl_convert_vtbl vtbl_cp50222_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp50222; + +int mbfl_filt_conv_jis_ms_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp50220raw(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_CP50221_h */ diff -Naurp libmbfl-1.0.1/filters/mbfilter_cp51932.c libmbfl-1.0.1.oden/filters/mbfilter_cp51932.c --- libmbfl-1.0.1/filters/mbfilter_cp51932.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_cp51932.c 2010-03-12 05:55:37.000000000 +0100 @@ -0,0 +1,345 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_cp51932.h" + +#include "unicode_table_cp932_ext.h" +#include "unicode_table_jis.h" +#include "cp932_table.h" + +static int mbfl_filt_ident_cp51932(int c, mbfl_identify_filter *filter); + +static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + + +static const char *mbfl_encoding_cp51932_aliases[] = {"cp51932", NULL}; + +const struct mbfl_identify_vtbl vtbl_identify_cp51932 = { + mbfl_no_encoding_cp51932, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp51932 +}; + +const mbfl_encoding mbfl_encoding_cp51932 = { + mbfl_no_encoding_cp51932, + "CP51932", + "CP51932", + (const char *(*)[])&mbfl_encoding_cp51932_aliases, + mblen_table_eucjp, + MBFL_ENCTYPE_MBCS +}; + +const struct mbfl_convert_vtbl vtbl_cp51932_wchar = { + mbfl_no_encoding_cp51932, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_cp51932_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp51932 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp51932, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_cp51932, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +#define sjistoidx(c1, c2) \ + (((c1) > 0x9f) \ + ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \ + : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40))) +#define idxtoeuc1(c) (((c) / 94) + 0xa1) +#define idxtoeuc2(c) (((c) % 94) + 0xa1) + +/* + * cp51932 => wchar + */ +int +mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xff) { /* CP932 first char */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else { + w = c & MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + case 1: /* got first half */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xa0 && c < 0xff) { + w = 0; + s = (c1 - 0xa1)*94 + c - 0xa1; + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } + } + if (w <= 0) { + w = ((c1 & 0x7f) << 8) | (c & 0x7f); + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_WINCP932; + } + CK((*filter->output_function)(w, filter->data)); + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = (c1 << 8) | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + case 2: /* got 0x8e, X0201 kana */ + filter->status = 0; + if (c > 0xa0 && c < 0xe0) { + w = 0xfec0 + c; + CK((*filter->output_function)(w, filter->data)); + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = 0x8e00 | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +/* + * wchar => cp51932 + */ +int +mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1; + + s1 = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + if (s1 >= 0x8080) s1 = -1; /* we don't support JIS X0213 */ + if (s1 <= 0) { + c1 = c & ~MBFL_WCSPLANE_MASK; + if (c1 == MBFL_WCSPLANE_WINCP932) { + s1 = c & MBFL_WCSPLANE_MASK; + if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */ + s1 = -1; + } + } else if (c1 == MBFL_WCSPLANE_JIS0208) { + s1 = c & MBFL_WCSPLANE_MASK; + if ((s1 >= ((85 + 0x20) << 8) && /* 85ku - 94ku */ + s1 <= ((88 + 0x20) << 8)) ||/* IBM extension */ + (s1 >= ((93 + 0x20) << 8) && /* 89ku - 92ku */ + s1 <= ((94 + 0x20) << 8))) { + s1 = -1; + } + } else if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x005c; /* YEN SIGN */ + } else if (c == 0x203e) { /* OVER LINE */ + s1 = 0x007e; /* FULLWIDTH MACRON */ + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s1 = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } else { + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + if (s1 < 0) { + c1 = 0; + c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext2_ucs_table[c1]) { + s1 = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21); + break; + } + c1++; + } + } + } + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + + if (s1 >= 0) { + if (s1 < 0x80) { /* latin */ + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x100) { /* kana */ + CK((*filter->output_function)(0x8e, filter->data)); + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x8080) { /* X 0208 */ + CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); + CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +static int mbfl_filt_ident_cp51932(int c, mbfl_identify_filter *filter) +{ + switch (filter->status) { + case 0: /* latin */ + if (c >= 0 && c < 0x80) { /* ok */ + ; + } else if (c > 0xa0 && c < 0xff) { /* kanji first char */ + filter->status = 1; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else { /* bad */ + filter->flag = 1; + } + break; + + case 1: /* got first half */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + case 2: /* got 0x8e */ + if (c < 0xa1 || c > 0xdf) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + default: + filter->status = 0; + break; + } + + return c; +} + + diff -Naurp libmbfl-1.0.1/filters/mbfilter_cp51932.h libmbfl-1.0.1.oden/filters/mbfilter_cp51932.h --- libmbfl-1.0.1/filters/mbfilter_cp51932.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_cp51932.h 2007-09-26 17:44:16.000000000 +0200 @@ -0,0 +1,43 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.h + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_CP51932_H +#define MBFL_MBFILTER_CP51932_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_cp51932; +extern const struct mbfl_identify_vtbl vtbl_identify_cp51932; +extern const struct mbfl_convert_vtbl vtbl_cp51932_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp51932; + +int mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_CP51932_H */ diff -Naurp libmbfl-1.0.1/filters/mbfilter_cp850.c libmbfl-1.0.1.oden/filters/mbfilter_cp850.c --- libmbfl-1.0.1/filters/mbfilter_cp850.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_cp850.c 2009-03-18 18:44:25.000000000 +0100 @@ -0,0 +1,147 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Den V. Tsopa <tdv@edisoft.ru> + * Adaption for CP850: D. Giffeler <dg@artegic.de> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_cp850.h" +#include "unicode_table_cp850.h" + +static int mbfl_filt_ident_cp850(int c, mbfl_identify_filter *filter); + +static const char *mbfl_encoding_cp850_aliases[] = {"CP850", "CP-850", "IBM-850", NULL}; + +const mbfl_encoding mbfl_encoding_cp850 = { + mbfl_no_encoding_cp850, + "CP850", + "CP850", + (const char *(*)[])&mbfl_encoding_cp850_aliases, + NULL, + MBFL_ENCTYPE_SBCS +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp850 = { + mbfl_no_encoding_cp850, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp850 +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp850 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp850, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_cp850, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_cp850_wchar = { + mbfl_no_encoding_cp850, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_cp850_wchar, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* + * cp850 => wchar + */ +int +mbfl_filt_conv_cp850_wchar(int c, mbfl_convert_filter *filter) +{ + int s; + + if (c >= 0 && c < cp850_ucs_table_min) { + s = c; + } else if (c >= cp850_ucs_table_min && c < 0x100) { + s = cp850_ucs_table[c - cp850_ucs_table_min]; + if (s <= 0) { + s = c; + s &= MBFL_WCSPLANE_MASK; + s |= MBFL_WCSPLANE_CP850; + } + } else { + s = c; + s &= MBFL_WCSGROUP_MASK; + s |= MBFL_WCSGROUP_THROUGH; + } + + CK((*filter->output_function)(s, filter->data)); + + return c; +} + +/* + * wchar => cp850 + */ +int +mbfl_filt_conv_wchar_cp850(int c, mbfl_convert_filter *filter) +{ + int s, n; + + if (c < 0x80) { + s = c; + } else { + s = -1; + n = cp850_ucs_table_len-1; + while (n >= 0) { + if (c == cp850_ucs_table[n]) { + s = cp850_ucs_table_min + n; + break; + } + n--; + } + if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP850) { + s = c & MBFL_WCSPLANE_MASK; + } + } + + if (s >= 0) { + CK((*filter->output_function)(s, filter->data)); + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +static int mbfl_filt_ident_cp850(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x80 && c < 0xff) + filter->flag = 0; + else + filter->flag = 1; /* not it */ + return c; +} + + diff -Naurp libmbfl-1.0.1/filters/mbfilter_cp850.h libmbfl-1.0.1.oden/filters/mbfilter_cp850.h --- libmbfl-1.0.1/filters/mbfilter_cp850.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_cp850.h 2009-03-18 18:44:25.000000000 +0100 @@ -0,0 +1,37 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Den V. Tsopa <tdv@edisoft.ru> + * Adaption for CP850: D. Giffeler <dg@artegic.de> + * + */ + +#ifndef MBFL_MBFILTER_CP850_H +#define MBFL_MBFILTER_CP850_H + +extern const mbfl_encoding mbfl_encoding_cp850; +extern const struct mbfl_identify_vtbl vtbl_identify_cp850; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp850; +extern const struct mbfl_convert_vtbl vtbl_cp850_wchar; + +int mbfl_filt_conv_cp850_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp850(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_CP850_H */ diff -Naurp libmbfl-1.0.1/filters/mbfilter_cp932.c libmbfl-1.0.1.oden/filters/mbfilter_cp932.c --- libmbfl-1.0.1/filters/mbfilter_cp932.c 2002-12-20 19:17:31.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_cp932.c 2010-03-12 05:55:37.000000000 +0100 @@ -37,7 +37,7 @@ #include "unicode_table_cp932_ext.h" #include "unicode_table_jis.h" -static int mbfl_filt_ident_sjiswin(int c, mbfl_identify_filter *filter); +static int mbfl_filt_ident_cp932(int c, mbfl_identify_filter *filter); static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -58,39 +58,39 @@ static const unsigned char mblen_table_s 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }; -static const char *mbfl_encoding_sjis_win_aliases[] = {"SJIS-open", "CP932", "Windows-31J", "MS_Kanji", NULL}; +static const char *mbfl_encoding_cp932_aliases[] = {"MS932", "Windows-31J", "MS_Kanji", NULL}; -const mbfl_encoding mbfl_encoding_sjis_win = { - mbfl_no_encoding_sjis_win, - "SJIS-win", +const mbfl_encoding mbfl_encoding_cp932 = { + mbfl_no_encoding_cp932, + "CP932", "Shift_JIS", - (const char *(*)[])&mbfl_encoding_sjis_win_aliases, + (const char *(*)[])&mbfl_encoding_cp932_aliases, mblen_table_sjis, MBFL_ENCTYPE_MBCS }; -const struct mbfl_identify_vtbl vtbl_identify_sjiswin = { - mbfl_no_encoding_sjis_win, +const struct mbfl_identify_vtbl vtbl_identify_cp932 = { + mbfl_no_encoding_cp932, mbfl_filt_ident_common_ctor, mbfl_filt_ident_common_dtor, - mbfl_filt_ident_sjiswin + mbfl_filt_ident_cp932 }; -const struct mbfl_convert_vtbl vtbl_sjiswin_wchar = { - mbfl_no_encoding_sjis_win, +const struct mbfl_convert_vtbl vtbl_cp932_wchar = { + mbfl_no_encoding_cp932, mbfl_no_encoding_wchar, mbfl_filt_conv_common_ctor, mbfl_filt_conv_common_dtor, - mbfl_filt_conv_sjiswin_wchar, + mbfl_filt_conv_cp932_wchar, mbfl_filt_conv_common_flush }; -const struct mbfl_convert_vtbl vtbl_wchar_sjiswin = { +const struct mbfl_convert_vtbl vtbl_wchar_cp932 = { mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis_win, + mbfl_no_encoding_cp932, mbfl_filt_conv_common_ctor, mbfl_filt_conv_common_dtor, - mbfl_filt_conv_wchar_sjiswin, + mbfl_filt_conv_wchar_cp932, mbfl_filt_conv_common_flush }; @@ -144,7 +144,7 @@ const struct mbfl_convert_vtbl vtbl_wcha * SJIS-win => wchar */ int -mbfl_filt_conv_sjiswin_wchar(int c, mbfl_convert_filter *filter) +mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter) { int c1, s, s1, s2, w; @@ -167,7 +167,7 @@ mbfl_filt_conv_sjiswin_wchar(int c, mbfl case 1: /* kanji second char */ filter->status = 0; c1 = filter->cache; - if (c > 0x39 && c < 0xfd && c != 0x7f) { + if (c >= 0x40 && c <= 0xfc && c != 0x7f) { w = 0; SJIS_DECODE(c1, c, s1, s2); s = (s1 - 0x21)*94 + s2 - 0x21; @@ -229,7 +229,7 @@ mbfl_filt_conv_sjiswin_wchar(int c, mbfl * wchar => SJIS-win */ int -mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter) +mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter) { int c1, c2, s1, s2; @@ -261,9 +261,9 @@ mbfl_filt_conv_wchar_sjiswin(int c, mbfl s1 = c & MBFL_WCSPLANE_MASK; s1 |= 0x8080; } else if (c == 0xa5) { /* YEN SIGN */ - s1 = 0x216f; /* FULLWIDTH YEN SIGN */ + s1 = 0x005c; /* YEN SIGN */ } else if (c == 0x203e) { /* OVER LINE */ - s1 = 0x2131; /* FULLWIDTH MACRON */ + s1 = 0x007e; /* FULLWIDTH MACRON */ } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s1 = 0x2140; } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ @@ -327,7 +327,7 @@ mbfl_filt_conv_wchar_sjiswin(int c, mbfl return c; } -static int mbfl_filt_ident_sjiswin(int c, mbfl_identify_filter *filter) +static int mbfl_filt_ident_cp932(int c, mbfl_identify_filter *filter) { if (filter->status) { /* kanji second char */ if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */ diff -Naurp libmbfl-1.0.1/filters/mbfilter_cp932.h libmbfl-1.0.1.oden/filters/mbfilter_cp932.h --- libmbfl-1.0.1/filters/mbfilter_cp932.h 2002-12-20 19:17:31.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_cp932.h 2010-03-12 05:55:37.000000000 +0100 @@ -32,12 +32,12 @@ #include "mbfilter.h" -extern const mbfl_encoding mbfl_encoding_sjis_win; -extern const struct mbfl_identify_vtbl vtbl_identify_sjiswin; -extern const struct mbfl_convert_vtbl vtbl_sjiswin_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjiswin; +extern const mbfl_encoding mbfl_encoding_cp932; +extern const struct mbfl_identify_vtbl vtbl_identify_cp932; +extern const struct mbfl_convert_vtbl vtbl_cp932_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp932; -int mbfl_filt_conv_sjiswin_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter); #endif /* MBFL_MBFILTER_CP932_H */ diff -Naurp libmbfl-1.0.1/filters/mbfilter_cp936.c libmbfl-1.0.1.oden/filters/mbfilter_cp936.c --- libmbfl-1.0.1/filters/mbfilter_cp936.c 2002-12-20 19:17:31.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_cp936.c 2009-02-02 05:53:10.000000000 +0100 @@ -192,7 +192,7 @@ mbfl_filt_conv_wchar_cp936(int c, mbfl_c } } if (s >= 0) { - if (s < 0x80) { /* latin */ + if (s <= 0x80) { /* latin */ CK((*filter->output_function)(s, filter->data)); } else { CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); diff -Naurp libmbfl-1.0.1/filters/mbfilter_euc_jp_win.c libmbfl-1.0.1.oden/filters/mbfilter_euc_jp_win.c --- libmbfl-1.0.1/filters/mbfilter_euc_jp_win.c 2002-12-20 19:17:31.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_euc_jp_win.c 2010-02-28 10:33:05.000000000 +0100 @@ -60,7 +60,8 @@ static const unsigned char mblen_table_e }; -static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", NULL}; +static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", + "eucJP-ms", NULL}; const struct mbfl_identify_vtbl vtbl_identify_eucjpwin = { mbfl_no_encoding_eucjp_win, @@ -152,8 +153,8 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbf w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ w = jisx0208_ucs_table[s]; - } else if (s >= (84*94)) { /* user (85ku - 94ku) */ - w = s - (84*94) + 0xe000; + } else if (s >= (84 * 94)) { /* user (85ku - 94ku) */ + w = s - (84 * 94) + 0xe000; } } if (w <= 0) { @@ -203,6 +204,9 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbf s = (c1 - 0xa1)*94 + c - 0xa1; if (s >= 0 && s < jisx0212_ucs_table_size) { w = jisx0212_ucs_table[s]; + if (w == 0x007e) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } } else if (s >= (82*94) && s < (84*94)) { /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */ s = (c1<< 8) | c; w = 0; @@ -221,6 +225,9 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbf } else { w = 0; } + if (w == 0x00A6) { + w = 0xFFE4; /* FULLWIDTH BROKEN BAR */ + } if (w <= 0) { w = ((c1 & 0x7f) << 8) | (c & 0x7f); w &= MBFL_WCSPLANE_MASK; @@ -273,6 +280,9 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbf c2 = s1%94 + 0xa1; s1 = (c1 << 8) | c2; } + if (s1 == 0xa2f1) { + s1 = 0x2d62; /* NUMERO SIGN */ + } if (s1 <= 0) { c1 = c & ~MBFL_WCSPLANE_MASK; if (c1 == MBFL_WCSPLANE_WINCP932) { @@ -310,13 +320,17 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbf s1 = 0x2172; } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ s1 = 0x224c; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s1 = 0x2141; } else { s1 = -1; c1 = 0; c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + const int oh = cp932ext1_ucs_table_min / 94; + if (c == cp932ext1_ucs_table[c1]) { - s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + s1 = ((c1 / 94 + oh + 0x21) << 8) + (c1 % 94 + 0x21); break; } c1++; diff -Naurp libmbfl-1.0.1/filters/mbfilter_htmlent.c libmbfl-1.0.1.oden/filters/mbfilter_htmlent.c --- libmbfl-1.0.1/filters/mbfilter_htmlent.c 2005-02-21 11:09:40.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_htmlent.c 2010-03-12 05:55:37.000000000 +0100 @@ -146,6 +146,11 @@ int mbfl_filt_conv_html_enc_flush(mbfl_c { filter->status = 0; filter->opaque = NULL; + + if (filter->flush_function != NULL) { + (*filter->flush_function)(filter->data); + } + return 0; } @@ -186,18 +191,58 @@ int mbfl_filt_conv_html_dec(int c, mbfl_ } } else { if (c == ';') { - buffer[filter->status] = 0; if (buffer[1]=='#') { - /* numeric entity */ - for (pos=2; pos<filter->status; pos++) { - ent = ent*10 + (buffer[pos] - '0'); + if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) { + if (filter->status > 3) { + /* numeric entity */ + for (pos=3; pos<filter->status; pos++) { + int v = buffer[pos]; + if (v >= '0' && v <= '9') { + v = v - '0'; + } else if (v >= 'A' && v <= 'F') { + v = v - 'A' + 10; + } else if (v >= 'a' && v <= 'f') { + v = v - 'a' + 10; + } else { + ent = -1; + break; + } + ent = ent * 16 + v; + } + } else { + ent = -1; + } + } else { + /* numeric entity */ + if (filter->status > 2) { + for (pos=2; pos<filter->status; pos++) { + int v = buffer[pos]; + if (v >= '0' && v <= '9') { + v = v - '0'; + } else { + ent = -1; + break; + } + ent = ent*10 + v; + } + } else { + ent = -1; + } + } + if (ent >= 0 && ent < 0x110000) { + CK((*filter->output_function)(ent, filter->data)); + } else { + for (pos = 0; pos < filter->status; pos++) { + CK((*filter->output_function)(buffer[pos], filter->data)); + } + CK((*filter->output_function)(c, filter->data)); } - CK((*filter->output_function)(ent, filter->data)); filter->status = 0; /*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/ } else { /* named entity */ - entity = (mbfl_html_entity_entry *)mbfl_html_entity_list; + buffer[filter->status] = 0; + entity = (mbfl_html_entity_entry *)mbfl_html_entity_list; while (entity->name) { if (!strcmp(buffer+1, entity->name)) { ent = entity->code; @@ -232,8 +277,7 @@ int mbfl_filt_conv_html_dec(int c, mbfl_ mbfl_filt_conv_html_dec_flush(filter); if (c=='&') { - filter->status = 1; - buffer[0] = '&'; + buffer[filter->status++] = '&'; } } } @@ -244,17 +288,25 @@ int mbfl_filt_conv_html_dec(int c, mbfl_ int mbfl_filt_conv_html_dec_flush(mbfl_convert_filter *filter) { int status, pos = 0; - char *buffer; + unsigned char *buffer; + int err = 0; - buffer = (char*)filter->opaque; + buffer = (unsigned char*)filter->opaque; status = filter->status; + filter->status = 0; + /* flush fragments */ while (status--) { - CK((*filter->output_function)(buffer[pos++], filter->data)); + int e = (*filter->output_function)(buffer[pos++], filter->data); + if (e != 0) + err = e; } - filter->status = 0; - /*filter->buffer = 0; of cause NOT*/ - return 0; + + if (filter->flush_function != NULL) { + (*filter->flush_function)(filter->data); + } + + return err; } diff -Naurp libmbfl-1.0.1/filters/mbfilter_iso2022_jp_ms.c libmbfl-1.0.1.oden/filters/mbfilter_iso2022_jp_ms.c --- libmbfl-1.0.1/filters/mbfilter_iso2022_jp_ms.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_iso2022_jp_ms.c 2010-03-12 05:55:37.000000000 +0100 @@ -0,0 +1,528 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_iso2022_jp_ms.h" + +#include "unicode_table_cp932_ext.h" +#include "unicode_table_jis.h" +#include "cp932_table.h" + +static int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter); + +static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL}; + +const mbfl_encoding mbfl_encoding_2022jpms = { + mbfl_no_encoding_2022jpms, + "ISO-2022-JP-MS", + "ISO-2022-JP", + (const char *(*)[])&mbfl_encoding_2022jpms_aliases, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const struct mbfl_identify_vtbl vtbl_identify_2022jpms = { + mbfl_no_encoding_2022jpms, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_2022jpms +}; + +const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = { + mbfl_no_encoding_2022jpms, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_2022jpms_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022jpms, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_2022jpms, + mbfl_filt_conv_any_2022jpms_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +#define sjistoidx(c1, c2) \ + (((c1) > 0x9f) \ + ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \ + : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40))) +#define idxtojis1(c) (((c) / 94) + 0x21) +#define idxtojis2(c) (((c) % 94) + 0x21) + +/* + * ISO-2022-JP-MS => wchar + */ +int +mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0xa0: UDC */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ + CK((*filter->output_function)(0xff40 + c, filter->data)); + } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */ + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else { + w = c & MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0xa1: UDC second char */ + case 1: + w = 0; + filter->status &= ~0xf; + c1 = filter->cache; + if (c > 0x20 && c < 0x7f) { + s = (c1 - 0x21)*94 + c - 0x21; + if (filter->status == 0x80) { + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else { + w = 0; + } + } + if (w <= 0) { + w = (c1 << 8) | c; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_JIS0208; + } + CK((*filter->output_function)(w, filter->data)); + } else { + if (c1 > 0x20 && c1 < 0x35) { + w = 0xe000 + (c1 - 0x21)*94 + c - 0x21; + } + if (w <= 0) { + w = (((c1 - 0x21) + 0x7f) << 8) | c; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_JIS0208; + } + CK((*filter->output_function)(w, filter->data)); + } + } else if (c == 0x1b) { + filter->status += 2; + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = (c1 << 8) | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + /* ESC */ +/* case 0x02: */ +/* case 0x12: */ +/* case 0x22: */ +/* case 0x82: */ +/* case 0xa2: */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + goto retry; + } + break; + + /* ESC $ */ +/* case 0x03: */ +/* case 0x13: */ +/* case 0x23: */ +/* case 0x83: */ +/* case 0xa3: */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + goto retry; + } + break; + + /* ESC $ ( */ +/* case 0x04: */ +/* case 0x14: */ +/* case 0x24: */ +/* case 0x84: */ +/* case 0xa4: */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x3f) { /* '?' */ + filter->status = 0xa0; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + /* ESC ( */ +/* case 0x05: */ +/* case 0x15: */ +/* case 0x25: */ +/* case 0x85: */ +/* case 0xa5: */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +static int +cp932ext3_cp932ext2_jis(int c) +{ + int idx; + + idx = sjistoidx(0xfa, 0x40) + c; + if (idx >= sjistoidx(0xfa, 0x5c)) + idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40); + else if (idx >= sjistoidx(0xfa, 0x55)) + idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa); + else if (idx >= sjistoidx(0xfa, 0x40)) + idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef); + return idxtojis1(idx) << 8 | idxtojis2(idx); +} + +/* + * wchar => ISO-2022-JP-MS + */ +int +mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1, s2; + + s1 = 0; + s2 = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */ + s1 = c - 0xe000; + c1 = s1/94 + 0x7f; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + } + if (s1 <= 0) { + c1 = c & ~MBFL_WCSPLANE_MASK; + if (c1 == MBFL_WCSPLANE_WINCP932) { + s1 = c & MBFL_WCSPLANE_MASK; + s2 = 1; + } else if (c1 == MBFL_WCSPLANE_JIS0208) { + s1 = c & MBFL_WCSPLANE_MASK; + } else if (c1 == MBFL_WCSPLANE_JIS0212) { + s1 = c & MBFL_WCSPLANE_MASK; + s1 |= 0x8080; + } else if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x216f; /* FULLWIDTH YEN SIGN */ + } else if (c == 0x203e) { /* OVER LINE */ + s1 = 0x2131; /* FULLWIDTH MACRON */ + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s1 = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } + } + if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + if (s1 <= 0) { + c1 = 0; + c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext3_ucs_table[c1]) { + s1 = cp932ext3_cp932ext2_jis(c1); + break; + } + c1++; + } + } + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + if (s1 >= 0) { + if (s1 < 0x80) { /* latin */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + CK((*filter->output_function)(s1, filter->data)); + filter->status = 0; + } else if (s1 > 0xa0 && s1 < 0xe0) { /* kana */ + if ((filter->status & 0xff00) != 0x100) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ + } + filter->status = 0x100; + CK((*filter->output_function)(s1 & 0x7f, filter->data)); + } else if (s1 < 0x7e7f) { /* X 0208 */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0x200; + CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s1 & 0x7f, filter->data)); + } else if (s1 < 0x927f) { /* UDC */ + if ((filter->status & 0xff00) != 0x800) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x3f, filter->data)); /* '?' */ + } + filter->status = 0x800; + CK((*filter->output_function)(((s1 >> 8) - 0x5e) & 0x7f, filter->data)); + CK((*filter->output_function)(s1 & 0x7f, filter->data)); + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +int +mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter) +{ + /* back to latin */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + + filter->status &= 0xff; + + if (filter->flush_function != NULL) { + return (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0xa0: X UDC */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0xa1: UDC second char */ + case 1: + filter->status &= ~0xf; + if (c == 0x1b) { + goto retry; + } else if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ ( */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x3f) { /* '?' */ + filter->status = 0xa0; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} diff -Naurp libmbfl-1.0.1/filters/mbfilter_iso2022_jp_ms.h libmbfl-1.0.1.oden/filters/mbfilter_iso2022_jp_ms.h --- libmbfl-1.0.1/filters/mbfilter_iso2022_jp_ms.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_iso2022_jp_ms.h 2007-09-26 17:44:16.000000000 +0200 @@ -0,0 +1,44 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_ISO2022_JP_MS_H +#define MBFL_MBFILTER_ISO2022_JP_MS_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_2022jpms; +extern const struct mbfl_identify_vtbl vtbl_identify_2022jpms; +extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms; + +int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_ISO2022_JP_MS_H */ diff -Naurp libmbfl-1.0.1/filters/mbfilter_iso2022_kr.c libmbfl-1.0.1.oden/filters/mbfilter_iso2022_kr.c --- libmbfl-1.0.1/filters/mbfilter_iso2022_kr.c 2002-12-20 19:17:31.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_iso2022_kr.c 2010-03-12 05:55:37.000000000 +0100 @@ -276,7 +276,13 @@ mbfl_filt_conv_any_2022kr_flush(mbfl_con if ((filter->status & 0xff00) != 0) { CK((*filter->output_function)(0x0f, filter->data)); /* SI */ } + filter->status &= 0xff; + + if (filter->flush_function != NULL) { + return (*filter->flush_function)(filter->data); + } + return 0; } diff -Naurp libmbfl-1.0.1/filters/mbfilter_jis.c libmbfl-1.0.1.oden/filters/mbfilter_jis.c --- libmbfl-1.0.1/filters/mbfilter_jis.c 2002-12-20 19:17:31.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_jis.c 2010-03-12 05:55:37.000000000 +0100 @@ -478,6 +478,11 @@ mbfl_filt_conv_any_jis_flush(mbfl_conver CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ } filter->status &= 0xff; + + if (filter->flush_function != NULL) { + return (*filter->flush_function)(filter->data); + } + return 0; } diff -Naurp libmbfl-1.0.1/filters/mbfilter_koi8u.c libmbfl-1.0.1.oden/filters/mbfilter_koi8u.c --- libmbfl-1.0.1/filters/mbfilter_koi8u.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_koi8u.c 2008-07-05 08:52:04.000000000 +0200 @@ -0,0 +1,146 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Maksym Veremeyenko <verem@m1.tv> + * + * Based on mbfilter_koi8r.c code + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_koi8u.h" +#include "unicode_table_koi8u.h" + +static int mbfl_filt_ident_koi8u(int c, mbfl_identify_filter *filter); + +static const char *mbfl_encoding_koi8u_aliases[] = {"KOI8-U", "KOI8U", NULL}; + +const mbfl_encoding mbfl_encoding_koi8u = { + mbfl_no_encoding_koi8u, + "KOI8-U", + "KOI8-U", + (const char *(*)[])&mbfl_encoding_koi8u_aliases, + NULL, + MBFL_ENCTYPE_SBCS +}; + +const struct mbfl_identify_vtbl vtbl_identify_koi8u = { + mbfl_no_encoding_koi8u, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_koi8u +}; + +const struct mbfl_convert_vtbl vtbl_wchar_koi8u = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_koi8u, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_koi8u, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_koi8u_wchar = { + mbfl_no_encoding_koi8u, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_koi8u_wchar, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* + * koi8u => wchar + */ +int +mbfl_filt_conv_koi8u_wchar(int c, mbfl_convert_filter *filter) +{ + int s; + + if (c >= 0 && c < koi8u_ucs_table_min) { + s = c; + } else if (c >= koi8u_ucs_table_min && c < 0x100) { + s = koi8u_ucs_table[c - koi8u_ucs_table_min]; + if (s <= 0) { + s = c; + s &= MBFL_WCSPLANE_MASK; + s |= MBFL_WCSPLANE_KOI8U; + } + } else { + s = c; + s &= MBFL_WCSGROUP_MASK; + s |= MBFL_WCSGROUP_THROUGH; + } + + CK((*filter->output_function)(s, filter->data)); + + return c; +} + +/* + * wchar => koi8u + */ +int +mbfl_filt_conv_wchar_koi8u(int c, mbfl_convert_filter *filter) +{ + int s, n; + + if (c < 0x80) { + s = c; + } else { + s = -1; + n = koi8u_ucs_table_len-1; + while (n >= 0) { + if (c == koi8u_ucs_table[n]) { + s = koi8u_ucs_table_min + n; + break; + } + n--; + } + if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_KOI8U) { + s = c & MBFL_WCSPLANE_MASK; + } + } + + if (s >= 0) { + CK((*filter->output_function)(s, filter->data)); + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +static int mbfl_filt_ident_koi8u(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x80 && c < 0xff) + filter->flag = 0; + else + filter->flag = 1; /* not it */ + return c; +} diff -Naurp libmbfl-1.0.1/filters/mbfilter_koi8u.h libmbfl-1.0.1.oden/filters/mbfilter_koi8u.h --- libmbfl-1.0.1/filters/mbfilter_koi8u.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_koi8u.h 2008-07-05 08:52:04.000000000 +0200 @@ -0,0 +1,47 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Maksym Veremeyenko <verem@m1.tv> + * + * Based on mbfilter_koi8r.h code + * + */ + +#ifndef MBFL_MBFILTER_KOI8U_H +#define MBFL_MBFILTER_KOI8U_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_koi8u; +extern const struct mbfl_identify_vtbl vtbl_identify_koi8u; +extern const struct mbfl_convert_vtbl vtbl_wchar_koi8u; +extern const struct mbfl_convert_vtbl vtbl_koi8u_wchar; + +int mbfl_filt_conv_koi8u_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_koi8u(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_KOI8U_H */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + */ diff -Naurp libmbfl-1.0.1/filters/mbfilter_sjis.c libmbfl-1.0.1.oden/filters/mbfilter_sjis.c --- libmbfl-1.0.1/filters/mbfilter_sjis.c 2002-12-20 19:17:31.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_sjis.c 2009-02-16 02:57:37.000000000 +0100 @@ -167,7 +167,7 @@ mbfl_filt_conv_sjis_wchar(int c, mbfl_co case 1: /* kanji second char */ filter->status = 0; c1 = filter->cache; - if (c > 0x39 && c < 0xfd && c != 0x7f) { + if (c >= 0x40 && c <= 0xfc && c != 0x7f) { SJIS_DECODE(c1, c, s1, s2); w = (s1 - 0x21)*94 + s2 - 0x21; if (w >= 0 && w < jisx0208_ucs_table_size) { diff -Naurp libmbfl-1.0.1/filters/mbfilter_sjis_open.c libmbfl-1.0.1.oden/filters/mbfilter_sjis_open.c --- libmbfl-1.0.1/filters/mbfilter_sjis_open.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_sjis_open.c 2010-03-12 05:55:37.000000000 +0100 @@ -0,0 +1,350 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * the source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_sjis_open.h" + +#include "unicode_table_cp932_ext.h" +#include "unicode_table_jis.h" + +static int mbfl_filt_ident_sjis_open(int c, mbfl_identify_filter *filter); + +static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +static const char *mbfl_encoding_sjis_open_aliases[] = {"SJIS-open", "SJIS-ms", NULL}; + +const mbfl_encoding mbfl_encoding_sjis_open = { + mbfl_no_encoding_sjis_open, + "SJIS-win", + "Shift_JIS", + (const char *(*)[])&mbfl_encoding_sjis_open_aliases, + mblen_table_sjis, + MBFL_ENCTYPE_MBCS +}; + +const struct mbfl_identify_vtbl vtbl_identify_sjis_open = { + mbfl_no_encoding_sjis_open, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_sjis_open +}; + +const struct mbfl_convert_vtbl vtbl_sjis_open_wchar = { + mbfl_no_encoding_sjis_open, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_sjis_open_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_sjis_open = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis_open, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_sjis_open, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +#define SJIS_ENCODE(c1,c2,s1,s2) \ + do { \ + s1 = c1; \ + s1--; \ + s1 >>= 1; \ + if ((c1) < 0x5f) { \ + s1 += 0x71; \ + } else { \ + s1 += 0xb1; \ + } \ + s2 = c2; \ + if ((c1) & 1) { \ + if ((c2) < 0x60) { \ + s2--; \ + } \ + s2 += 0x20; \ + } else { \ + s2 += 0x7e; \ + } \ + } while (0) + +#define SJIS_DECODE(c1,c2,s1,s2) \ + do { \ + s1 = c1; \ + if (s1 < 0xa0) { \ + s1 -= 0x81; \ + } else { \ + s1 -= 0xc1; \ + } \ + s1 <<= 1; \ + s1 += 0x21; \ + s2 = c2; \ + if (s2 < 0x9f) { \ + if (s2 < 0x7f) { \ + s2++; \ + } \ + s2 -= 0x20; \ + } else { \ + s1++; \ + s2 -= 0x7e; \ + } \ + } while (0) + + +/* + * SJIS-win => wchar + */ +int +mbfl_filt_conv_sjis_open_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, s1, s2, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ + filter->status = 1; + filter->cache = c; + } else { + w = c & MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + case 1: /* kanji second char */ + filter->status = 0; + c1 = filter->cache; + if (c >= 0x40 && c <= 0xfc && c != 0x7f) { + w = 0; + SJIS_DECODE(c1, c, s1, s2); + s = (s1 - 0x21)*94 + s2 - 0x21; + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */ + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */ + w = s - (94*94) + 0xe000; + } + } + if (w <= 0) { + w = (s1 << 8) | s2; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_WINCP932; + } + CK((*filter->output_function)(w, filter->data)); + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = (c1 << 8) | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +/* + * wchar => SJIS-win + */ +int +mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1, s2; + + s1 = 0; + s2 = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */ + s1 = c - 0xe000; + c1 = s1/94 + 0x7f; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + s2 = 1; + } + if (s1 <= 0) { + c1 = c & ~MBFL_WCSPLANE_MASK; + if (c1 == MBFL_WCSPLANE_WINCP932) { + s1 = c & MBFL_WCSPLANE_MASK; + s2 = 1; + } else if (c1 == MBFL_WCSPLANE_JIS0208) { + s1 = c & MBFL_WCSPLANE_MASK; + } else if (c1 == MBFL_WCSPLANE_JIS0212) { + s1 = c & MBFL_WCSPLANE_MASK; + s1 |= 0x8080; + } else if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x216f; /* FULLWIDTH YEN SIGN */ + } else if (c == 0x203e) { /* OVER LINE */ + s1 = 0x2131; /* FULLWIDTH MACRON */ + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s1 = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } + } + if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + if (s1 <= 0) { + c1 = 0; + c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext3_ucs_table[c1]) { + s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + } + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + if (s1 >= 0) { + if (s1 < 0x100) { /* latin or kana */ + CK((*filter->output_function)(s1, filter->data)); + } else { /* kanji */ + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +static int mbfl_filt_ident_sjis_open(int c, mbfl_identify_filter *filter) +{ + if (filter->status) { /* kanji second char */ + if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + } else if (c >= 0 && c < 0x80) { /* latin ok */ + ; + } else if (c > 0xa0 && c < 0xe0) { /* kana ok */ + ; + } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ + filter->status = 1; + } else { /* bad */ + filter->flag = 1; + } + + return c; +} + + diff -Naurp libmbfl-1.0.1/filters/mbfilter_sjis_open.h libmbfl-1.0.1.oden/filters/mbfilter_sjis_open.h --- libmbfl-1.0.1/filters/mbfilter_sjis_open.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_sjis_open.h 2010-03-12 05:55:37.000000000 +0100 @@ -0,0 +1,43 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * the source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_SJIS_OPEN_H +#define MBFL_MBFILTER_SJIS_OPEN_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_sjis_open; +extern const struct mbfl_identify_vtbl vtbl_identify_sjis_open; +extern const struct mbfl_convert_vtbl vtbl_sjis_open_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_open; + +int mbfl_filt_conv_sjis_open_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_SJIS_OPEN_H */ diff -Naurp libmbfl-1.0.1/filters/mbfilter_tl_jisx0201_jisx0208.c libmbfl-1.0.1.oden/filters/mbfilter_tl_jisx0201_jisx0208.c --- libmbfl-1.0.1/filters/mbfilter_tl_jisx0201_jisx0208.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_tl_jisx0201_jisx0208.c 2010-03-12 05:55:37.000000000 +0100 @@ -0,0 +1,302 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: Moriyoshi Koizumi <koizumi@gree.co.jp> + * + */ + +#include "mbfl_allocators.h" +#include "mbfilter_tl_jisx0201_jisx0208.h" +#include "translit_kana_jisx0201_jisx0208.h" + +void +mbfl_filt_tl_jisx0201_jisx0208_init(mbfl_convert_filter *filt) +{ + mbfl_filt_conv_common_ctor(filt); +} + +void +mbfl_filt_tl_jisx0201_jisx0208_cleanup(mbfl_convert_filter *filt) +{ +} + +int +mbfl_filt_tl_jisx0201_jisx0208(int c, mbfl_convert_filter *filt) +{ + int s, n; + int mode = ((mbfl_filt_tl_jisx0201_jisx0208_param *)filt->opaque)->mode; + + s = c; + + if ((mode & MBFL_FILT_TL_HAN2ZEN_ALL) + && c >= 0x21 && c <= 0x7d && c != 0x22 && c != 0x27 && c != 0x5c) { + /* all except <"> <'> <\> <~> */ + s = c + 0xfee0; + } else if ((mode & MBFL_FILT_TL_HAN2ZEN_ALPHA) && + ((c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a))) { + /* alpha */ + s = c + 0xfee0; + } else if ((mode & MBFL_FILT_TL_HAN2ZEN_NUMERIC) && + c >= 0x30 && c <= 0x39) { + /* num */ + s = c + 0xfee0; + } else if ((mode & MBFL_FILT_TL_HAN2ZEN_SPACE) && c == 0x20) { + /* space */ + s = 0x3000; + } + + if (mode & + (MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_HIRAGANA)) { + /* hankaku kana to zenkaku kana */ + if ((mode & MBFL_FILT_TL_HAN2ZEN_KATAKANA) && + (mode & MBFL_FILT_TL_HAN2ZEN_GLUE)) { + /* hankaku kana to zenkaku katakana and glue voiced sound mark */ + if (c >= 0xff61 && c <= 0xff9f) { + if (filt->status) { + n = (filt->cache - 0xff60) & 0x3f; + if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { + filt->status = 0; + s = 0x3001 + hankana2zenkana_table[n]; + } else if (c == 0xff9e && n == 19) { + filt->status = 0; + s = 0x30f4; + } else if (c == 0xff9f && (n >= 42 && n <= 46)) { + filt->status = 0; + s = 0x3002 + hankana2zenkana_table[n]; + } else { + filt->status = 1; + filt->cache = c; + s = 0x3000 + hankana2zenkana_table[n]; + } + } else { + filt->status = 1; + filt->cache = c; + return c; + } + } else { + if (filt->status) { + n = (filt->cache - 0xff60) & 0x3f; + filt->status = 0; + (*filt->output_function)(0x3000 + hankana2zenkana_table[n], filt->data); + } + } + } else if ((mode & MBFL_FILT_TL_HAN2ZEN_HIRAGANA) && + (mode & MBFL_FILT_TL_HAN2ZEN_GLUE)) { + /* hankaku kana to zenkaku hirangana and glue voiced sound mark */ + if (c >= 0xff61 && c <= 0xff9f) { + if (filt->status) { + n = (filt->cache - 0xff60) & 0x3f; + if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { + filt->status = 0; + s = 0x3001 + hankana2zenhira_table[n]; + } else if (c == 0xff9f && (n >= 42 && n <= 46)) { + filt->status = 0; + s = 0x3002 + hankana2zenhira_table[n]; + } else { + filt->status = 1; + filt->cache = c; + s = 0x3000 + hankana2zenhira_table[n]; + } + } else { + filt->status = 1; + filt->cache = c; + return c; + } + } else { + if (filt->status) { + n = (filt->cache - 0xff60) & 0x3f; + filt->status = 0; + (*filt->output_function)(0x3000 + hankana2zenhira_table[n], filt->data); + } + } + } else if ((mode & MBFL_FILT_TL_HAN2ZEN_KATAKANA) && + c >= 0xff61 && c <= 0xff9f) { + /* hankaku kana to zenkaku katakana */ + s = 0x3000 + hankana2zenkana_table[c - 0xff60]; + } else if ((mode & MBFL_FILT_TL_HAN2ZEN_HIRAGANA) + && c >= 0xff61 && c <= 0xff9f) { + /* hankaku kana to zenkaku hirangana */ + s = 0x3000 + hankana2zenhira_table[c - 0xff60]; + } + } + + if (mode & MBFL_FILT_TL_HAN2ZEN_COMPAT1) { + /* special ascii to symbol */ + if (c == 0x5c) { + s = 0xffe5; /* FULLWIDTH YEN SIGN */ + } else if (c == 0xa5) { /* YEN SIGN */ + s = 0xffe5; /* FULLWIDTH YEN SIGN */ + } else if (c == 0x7e) { + s = 0xffe3; /* FULLWIDTH MACRON */ + } else if (c == 0x203e) { /* OVERLINE */ + s = 0xffe3; /* FULLWIDTH MACRON */ + } else if (c == 0x27) { + s = 0x2019; /* RIGHT SINGLE QUOTATION MARK */ + } else if (c == 0x22) { + s = 0x201d; /* RIGHT DOUBLE QUOTATION MARK */ + } + } else if (mode & MBFL_FILT_TL_HAN2ZEN_COMPAT2) { + /* special ascii to symbol */ + if (c == 0x5c) { + s = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (c == 0x7e) { + s = 0xff5e; /* FULLWIDTH TILDE */ + } else if (c == 0x27) { + s = 0xff07; /* FULLWIDTH APOSTROPHE */ + } else if (c == 0x22) { + s = 0xff02; /* FULLWIDTH QUOTATION MARK */ + } + } + + if (mode & 0xf0) { /* zenkaku to hankaku */ + if ((mode & 0x10) && c >= 0xff01 && c <= 0xff5d && c != 0xff02 && c != 0xff07 && c!= 0xff3c) { /* all except <"> <'> <\> <~> */ + s = c - 0xfee0; + } else if ((mode & 0x20) && ((c >= 0xff21 && c <= 0xff3a) || (c >= 0xff41 && c <= 0xff5a))) { /* alpha */ + s = c - 0xfee0; + } else if ((mode & 0x40) && (c >= 0xff10 && c <= 0xff19)) { /* num */ + s = c - 0xfee0; + } else if ((mode & 0x80) && (c == 0x3000)) { /* spase */ + s = 0x20; + } else if ((mode & 0x10) && (c == 0x2212)) { /* MINUS SIGN */ + s = 0x2d; + } + } + + if (mode & + (MBFL_FILT_TL_ZEN2HAN_KATAKANA | MBFL_FILT_TL_ZEN2HAN_HIRAGANA)) { + /* Zenkaku kana to hankaku kana */ + if ((mode & MBFL_FILT_TL_ZEN2HAN_KATAKANA) && + c >= 0x30a1 && c <= 0x30f4) { + /* Zenkaku katakana to hankaku kana */ + n = c - 0x30a1; + if (zenkana2hankana_table[n][1] != 0) { + (filt->output_function)(0xff00 + zenkana2hankana_table[n][0], filt->data); + s = 0xff00 + zenkana2hankana_table[n][1]; + } else { + s = 0xff00 + zenkana2hankana_table[n][0]; + } + } else if ((mode & MBFL_FILT_TL_ZEN2HAN_HIRAGANA) && + c >= 0x3041 && c <= 0x3093) { + /* Zenkaku hirangana to hankaku kana */ + n = c - 0x3041; + if (zenkana2hankana_table[n][1] != 0) { + (filt->output_function)(0xff00 + zenkana2hankana_table[n][0], filt->data); + s = 0xff00 + zenkana2hankana_table[n][1]; + } else { + s = 0xff00 + zenkana2hankana_table[n][0]; + } + } else if (c == 0x3001) { + s = 0xff64; /* HALFWIDTH IDEOGRAPHIC COMMA */ + } else if (c == 0x3002) { + s = 0xff61; /* HALFWIDTH IDEOGRAPHIC FULL STOP */ + } else if (c == 0x300c) { + s = 0xff62; /* HALFWIDTH LEFT CORNER BRACKET */ + } else if (c == 0x300d) { + s = 0xff63; /* HALFWIDTH RIGHT CORNER BRACKET */ + } else if (c == 0x309b) { + s = 0xff9e; /* HALFWIDTH KATAKANA VOICED SOUND MARK */ + } else if (c == 0x309c) { + s = 0xff9f; /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */ + } else if (c == 0x30fc) { + s = 0xff70; /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */ + } else if (c == 0x30fb) { + s = 0xff65; /* HALFWIDTH KATAKANA MIDDLE DOT */ + } + } else if (mode & (MBFL_FILT_TL_ZEN2HAN_HIRA2KANA + | MBFL_FILT_TL_ZEN2HAN_KANA2HIRA)) { + if ((mode & MBFL_FILT_TL_ZEN2HAN_HIRA2KANA) && + c >= 0x3041 && c <= 0x3093) { + /* Zenkaku hirangana to Zenkaku katakana */ + s = c + 0x60; + } else if ((mode & MBFL_FILT_TL_ZEN2HAN_KANA2HIRA) && + c >= 0x30a1 && c <= 0x30f3) { + /* Zenkaku katakana to Zenkaku hirangana */ + s = c - 0x60; + } + } + + if (mode & MBFL_FILT_TL_ZEN2HAN_COMPAT1) { /* special symbol to ascii */ + if (c == 0xffe5) { /* FULLWIDTH YEN SIGN */ + s = 0x5c; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x5c; + } else if (c == 0xffe3) { /* FULLWIDTH MACRON */ + s = 0x7e; + } else if (c == 0x203e) { /* OVERLINE */ + s = 0x7e; + } else if (c == 0x2018) { /* LEFT SINGLE QUOTATION MARK*/ + s = 0x27; + } else if (c == 0x2019) { /* RIGHT SINGLE QUOTATION MARK */ + s = 0x27; + } else if (c == 0x201c) { /* LEFT DOUBLE QUOTATION MARK */ + s = 0x22; + } else if (c == 0x201d) { /* RIGHT DOUBLE QUOTATION MARK */ + s = 0x22; + } + } + + if (mode & MBFL_FILT_TL_ZEN2HAN_COMPAT2) { /* special symbol to ascii */ + if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x5c; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s = 0x7e; + } else if (c == 0xff07) { /* FULLWIDTH APOSTROPHE */ + s = 0x27; + } else if (c == 0xff02) { /* FULLWIDTH QUOTATION MARK */ + s = 0x22; + } + } + + return (*filt->output_function)(s, filt->data); +} + +int +mbfl_filt_tl_jisx0201_jisx0208_flush(mbfl_convert_filter *filt) +{ + int ret, n; + int mode = ((mbfl_filt_tl_jisx0201_jisx0208_param *)filt->opaque)->mode; + + ret = 0; + if (filt->status) { + n = (filt->cache - 0xff60) & 0x3f; + if (mode & 0x100) { /* hankaku kana to zenkaku katakana */ + ret = (*filt->output_function)(0x3000 + hankana2zenkana_table[n], filt->data); + } else if (mode & 0x200) { /* hankaku kana to zenkaku hirangana */ + ret = (*filt->output_function)(0x3000 + hankana2zenhira_table[n], filt->data); + } + filt->status = 0; + } + + if (filt->flush_function != NULL) { + return (*filt->flush_function)(filt->data); + } + + return ret; +} + +const struct mbfl_convert_vtbl vtbl_tl_jisx0201_jisx0208 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_wchar, + mbfl_filt_tl_jisx0201_jisx0208_init, + mbfl_filt_tl_jisx0201_jisx0208_cleanup, + mbfl_filt_tl_jisx0201_jisx0208, + mbfl_filt_tl_jisx0201_jisx0208_flush +}; + diff -Naurp libmbfl-1.0.1/filters/mbfilter_tl_jisx0201_jisx0208.h libmbfl-1.0.1.oden/filters/mbfilter_tl_jisx0201_jisx0208.h --- libmbfl-1.0.1/filters/mbfilter_tl_jisx0201_jisx0208.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_tl_jisx0201_jisx0208.h 2010-03-12 05:55:37.000000000 +0100 @@ -0,0 +1,79 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: Moriyoshi Koizumi <koizumi@gree.co.jp> + * + */ + +#ifndef MBFILTER_TL_KANA_JISX0201_JISX0208_H +#define MBFILTER_TL_KANA_JISX0201_JISX0208_H + +#include "mbfl_convert.h" + +#define MBFL_FILT_TL_HAN2ZEN_ALL 0x00000001 +#define MBFL_FILT_TL_HAN2ZEN_ALPHA 0x00000002 +#define MBFL_FILT_TL_HAN2ZEN_NUMERIC 0x00000004 +#define MBFL_FILT_TL_HAN2ZEN_SPACE 0x00000008 +#define MBFL_FILT_TL_ZEN2HAN_ALL 0x00000010 +#define MBFL_FILT_TL_ZEN2HAN_ALPHA 0x00000020 +#define MBFL_FILT_TL_ZEN2HAN_NUMERIC 0x00000040 +#define MBFL_FILT_TL_ZEN2HAN_SPACE 0x00000080 +#define MBFL_FILT_TL_HAN2ZEN_KATAKANA 0x00000100 +#define MBFL_FILT_TL_HAN2ZEN_HIRAGANA 0x00000200 +#define MBFL_FILT_TL_HAN2ZEN_GLUE 0x00000800 +#define MBFL_FILT_TL_ZEN2HAN_KATAKANA 0x00001000 +#define MBFL_FILT_TL_ZEN2HAN_HIRAGANA 0x00002000 +#define MBFL_FILT_TL_ZEN2HAN_HIRA2KANA 0x00010000 +#define MBFL_FILT_TL_ZEN2HAN_KANA2HIRA 0x00020000 +#define MBFL_FILT_TL_HAN2ZEN_COMPAT1 0x00100000 +#define MBFL_FILT_TL_ZEN2HAN_COMPAT1 0x00200000 +#define MBFL_FILT_TL_HAN2ZEN_COMPAT2 0x00400000 +#define MBFL_FILT_TL_ZEN2HAN_COMPAT2 0x00800000 +#define MBFL_FILT_TL_HAN2ZEN_MASK ( \ + MBFL_FILT_TL_HAN2ZEN_ALL |\ + MBFL_FILT_TL_HAN2ZEN_ALPHA |\ + MBFL_FILT_TL_HAN2ZEN_NUMERIC |\ + MBFL_FILT_TL_HAN2ZEN_SPACE |\ + MBFL_FILT_TL_HAN2ZEN_KATAKANA |\ + MBFL_FILT_TL_HAN2ZEN_HIRAGANA |\ + MBFL_FILT_TL_HAN2ZEN_GLUE |\ + MBFL_FILT_TL_HAN2ZEN_COMPAT1 |\ + MBFL_FILT_TL_HAN2ZEN_COMPAT2) +#define MBFL_FILT_TL_ZEN2HAN_MASK ( \ + MBFL_FILT_TL_ZEN2HAN_ALL | \ + MBFL_FILT_TL_ZEN2HAN_ALPHA | \ + MBFL_FILT_TL_ZEN2HAN_NUMERIC | \ + MBFL_FILT_TL_ZEN2HAN_SPACE | \ + MBFL_FILT_TL_ZEN2HAN_KATAKANA | \ + MBFL_FILT_TL_ZEN2HAN_HIRAGANA | \ + MBFL_FILT_TL_ZEN2HAN_HIRA2KANA | \ + MBFL_FILT_TL_ZEN2HAN_KANA2HIRA | \ + MBFL_FILT_TL_ZEN2HAN_COMPAT1 | \ + MBFL_FILT_TL_ZEN2HAN_COMPAT2) + + +typedef struct _mbfl_filt_tl_jisx0201_jisx0208_param { + mbfl_convert_filter *next_filter; + int mode; +} mbfl_filt_tl_jisx0201_jisx0208_param; + +extern const struct mbfl_convert_vtbl vtbl_tl_jisx0201_jisx0208; + +#endif /* MBFILTER_TL_KANA_JISX0201_JISX0208_H */ diff -Naurp libmbfl-1.0.1/filters/mbfilter_utf16.c libmbfl-1.0.1.oden/filters/mbfilter_utf16.c --- libmbfl-1.0.1/filters/mbfilter_utf16.c 2002-12-20 19:17:31.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_utf16.c 2009-09-11 10:22:19.000000000 +0200 @@ -127,7 +127,7 @@ int mbfl_filt_conv_utf16_wchar(int c, mb int n, endian; endian = filter->status & 0xff00; - switch (filter->status & 0xff) { + switch (filter->status & 0x0f) { case 0: if (endian) { n = c & 0xff; @@ -144,15 +144,8 @@ int mbfl_filt_conv_utf16_wchar(int c, mb n = c & 0xff; } n |= filter->cache & 0xffff; - filter->status &= ~0xff; - if (n == 0xfffe) { - if (endian) { - filter->status = 0; /* big-endian */ - } else { - filter->status = 0x100; /* little-endian */ - } - CK((*filter->output_function)(0xfeff, filter->data)); - } else if (n >= 0xd800 && n < 0xdc00) { + filter->status &= ~0x0f; + if (n >= 0xd800 && n < 0xdc00) { filter->cache = ((n & 0x3ff) << 16) + 0x400000; } else if (n >= 0xdc00 && n < 0xe000) { n &= 0x3ff; @@ -166,7 +159,21 @@ int mbfl_filt_conv_utf16_wchar(int c, mb CK((*filter->output_function)(n, filter->data)); } } else { + int is_first = filter->status & 0x10; filter->cache = 0; + filter->status |= 0x10; + if (!is_first) { + if (n == 0xfffe) { + if (endian) { + filter->status &= ~0x100; /* big-endian */ + } else { + filter->status |= 0x100; /* little-endian */ + } + break; + } else if (n == 0xfeff) { + break; + } + } CK((*filter->output_function)(n, filter->data)); } break; diff -Naurp libmbfl-1.0.1/filters/mbfilter_utf32.c libmbfl-1.0.1.oden/filters/mbfilter_utf32.c --- libmbfl-1.0.1/filters/mbfilter_utf32.c 2002-12-20 19:17:31.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_utf32.c 2009-02-24 14:19:09.000000000 +0100 @@ -171,7 +171,9 @@ int mbfl_filt_conv_utf32_wchar(int c, mb CK((*filter->output_function)(0xfeff, filter->data)); } else { filter->status &= ~0xff; - CK((*filter->output_function)(n, filter->data)); + if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) { + CK((*filter->output_function)(n, filter->data)); + } } break; } @@ -201,7 +203,9 @@ int mbfl_filt_conv_utf32be_wchar(int c, } else { filter->status = 0; n = (c & 0xff) | filter->cache; - CK((*filter->output_function)(n, filter->data)); + if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) { + CK((*filter->output_function)(n, filter->data)); + } } return c; } @@ -211,7 +215,7 @@ int mbfl_filt_conv_utf32be_wchar(int c, */ int mbfl_filt_conv_wchar_utf32be(int c, mbfl_convert_filter *filter) { - if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) { + if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) { CK((*filter->output_function)((c >> 24) & 0xff, filter->data)); CK((*filter->output_function)((c >> 16) & 0xff, filter->data)); CK((*filter->output_function)((c >> 8) & 0xff, filter->data)); @@ -247,7 +251,9 @@ int mbfl_filt_conv_utf32le_wchar(int c, } else { filter->status = 0; n = ((c & 0xff) << 24) | filter->cache; - CK((*filter->output_function)(n, filter->data)); + if (n < MBFL_WCSPLANE_UTF32MAX && (n < 0xd800 || n > 0xdfff)) { + CK((*filter->output_function)(n, filter->data)); + } } return c; } @@ -257,7 +263,7 @@ int mbfl_filt_conv_utf32le_wchar(int c, */ int mbfl_filt_conv_wchar_utf32le(int c, mbfl_convert_filter *filter) { - if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) { + if (c >= 0 && c < MBFL_WCSPLANE_UTF32MAX) { CK((*filter->output_function)(c & 0xff, filter->data)); CK((*filter->output_function)((c >> 8) & 0xff, filter->data)); CK((*filter->output_function)((c >> 16) & 0xff, filter->data)); diff -Naurp libmbfl-1.0.1/filters/mbfilter_utf7.c libmbfl-1.0.1.oden/filters/mbfilter_utf7.c --- libmbfl-1.0.1/filters/mbfilter_utf7.c 2002-12-20 19:17:31.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_utf7.c 2010-03-12 05:55:37.000000000 +0100 @@ -405,6 +405,11 @@ int mbfl_filt_conv_wchar_utf7_flush(mbfl CK((*filter->output_function)(0x2d, filter->data)); /* '-' */ break; } + + if (filter->flush_function != NULL) { + (*filter->flush_function)(filter->data); + } + return 0; } diff -Naurp libmbfl-1.0.1/filters/mbfilter_utf8.c libmbfl-1.0.1.oden/filters/mbfilter_utf8.c --- libmbfl-1.0.1/filters/mbfilter_utf8.c 2002-12-20 19:17:32.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mbfilter_utf8.c 2009-09-11 23:26:18.000000000 +0200 @@ -106,7 +106,8 @@ int mbfl_filt_conv_utf8_wchar(int c, mbf } filter->status = 0; } else if (c < 0xc0) { - switch (filter->status & 0xff) { + int status = filter->status & 0xff; + switch (status) { case 0x10: /* 2byte code 2nd char */ case 0x21: /* 3byte code 3rd char */ case 0x32: /* 4byte code 4th char */ @@ -114,7 +115,11 @@ int mbfl_filt_conv_utf8_wchar(int c, mbf case 0x54: /* 6byte code 6th char */ filter->status = 0; s = filter->cache | (c & 0x3f); - if (s >= 0x80) { + if ((status == 0x10 && s >= 0x80) || + (status == 0x21 && s >= 0x800 && (s < 0xd800 || s > 0xdfff)) || + (status == 0x32 && s >= 0x10000) || + (status == 0x43 && s >= 0x200000) || + (status == 0x54 && s >= 0x4000000 && s < MBFL_WCSGROUP_UCS4MAX)) { CK((*filter->output_function)(s, filter->data)); } break; @@ -215,7 +220,7 @@ static int mbfl_filt_ident_utf8(int c, m if (c < 0x80) { if (c < 0) { filter->flag = 1; /* bad */ - } else if (c != 0 && filter->status) { + } else if (filter->status) { filter->flag = 1; /* bad */ } filter->status = 0; diff -Naurp libmbfl-1.0.1/filters/mk_sb_tbl.awk libmbfl-1.0.1.oden/filters/mk_sb_tbl.awk --- libmbfl-1.0.1/filters/mk_sb_tbl.awk 2005-02-21 08:53:17.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/mk_sb_tbl.awk 2005-02-21 08:57:08.000000000 +0100 @@ -1,6 +1,6 @@ #!/usr/bin/awk -f # -# $Id: mk_sb_tbl.awk,v 1.1.2.3 2005/02/21 07:53:17 moriyoshi Exp $ +# $Id: mk_sb_tbl.awk 180308 2005-02-21 07:57:08Z moriyoshi $ # # Description: a script that generates a single byte code set to Unicode # mapping table. diff -Naurp libmbfl-1.0.1/filters/translit_kana_jisx0201_jisx0208.h libmbfl-1.0.1.oden/filters/translit_kana_jisx0201_jisx0208.h --- libmbfl-1.0.1/filters/translit_kana_jisx0201_jisx0208.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/translit_kana_jisx0201_jisx0208.h 2010-03-12 05:55:37.000000000 +0100 @@ -0,0 +1,67 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: Moriyoshi Koizumi <koizumi@gree.co.jp> + * + */ + +#ifndef TRANSLIT_KANA_JISX0201_JISX0208_H +#define TRANSLIT_KANA_JISX0201_JISX0208_H + +static const unsigned char hankana2zenkana_table[64] = { + 0x00,0x02,0x0C,0x0D,0x01,0xFB,0xF2,0xA1,0xA3,0xA5, + 0xA7,0xA9,0xE3,0xE5,0xE7,0xC3,0xFC,0xA2,0xA4,0xA6, + 0xA8,0xAA,0xAB,0xAD,0xAF,0xB1,0xB3,0xB5,0xB7,0xB9, + 0xBB,0xBD,0xBF,0xC1,0xC4,0xC6,0xC8,0xCA,0xCB,0xCC, + 0xCD,0xCE,0xCF,0xD2,0xD5,0xD8,0xDB,0xDE,0xDF,0xE0, + 0xE1,0xE2,0xE4,0xE6,0xE8,0xE9,0xEA,0xEB,0xEC,0xED, + 0xEF,0xF3,0x9B,0x9C +}; + +static const unsigned char hankana2zenhira_table[64] = { + 0x00,0x02,0x0C,0x0D,0x01,0xFB,0x92,0x41,0x43,0x45, + 0x47,0x49,0x83,0x85,0x87,0x63,0xFC,0x42,0x44,0x46, + 0x48,0x4A,0x4B,0x4D,0x4F,0x51,0x53,0x55,0x57,0x59, + 0x5B,0x5D,0x5F,0x61,0x64,0x66,0x68,0x6A,0x6B,0x6C, + 0x6D,0x6E,0x6F,0x72,0x75,0x78,0x7B,0x7E,0x7F,0x80, + 0x81,0x82,0x84,0x86,0x88,0x89,0x8A,0x8B,0x8C,0x8D, + 0x8F,0x93,0x9B,0x9C +}; +static const unsigned char zenkana2hankana_table[84][2] = { + {0x67,0x00},{0x71,0x00},{0x68,0x00},{0x72,0x00},{0x69,0x00}, + {0x73,0x00},{0x6A,0x00},{0x74,0x00},{0x6B,0x00},{0x75,0x00}, + {0x76,0x00},{0x76,0x9E},{0x77,0x00},{0x77,0x9E},{0x78,0x00}, + {0x78,0x9E},{0x79,0x00},{0x79,0x9E},{0x7A,0x00},{0x7A,0x9E}, + {0x7B,0x00},{0x7B,0x9E},{0x7C,0x00},{0x7C,0x9E},{0x7D,0x00}, + {0x7D,0x9E},{0x7E,0x00},{0x7E,0x9E},{0x7F,0x00},{0x7F,0x9E}, + {0x80,0x00},{0x80,0x9E},{0x81,0x00},{0x81,0x9E},{0x6F,0x00}, + {0x82,0x00},{0x82,0x9E},{0x83,0x00},{0x83,0x9E},{0x84,0x00}, + {0x84,0x9E},{0x85,0x00},{0x86,0x00},{0x87,0x00},{0x88,0x00}, + {0x89,0x00},{0x8A,0x00},{0x8A,0x9E},{0x8A,0x9F},{0x8B,0x00}, + {0x8B,0x9E},{0x8B,0x9F},{0x8C,0x00},{0x8C,0x9E},{0x8C,0x9F}, + {0x8D,0x00},{0x8D,0x9E},{0x8D,0x9F},{0x8E,0x00},{0x8E,0x9E}, + {0x8E,0x9F},{0x8F,0x00},{0x90,0x00},{0x91,0x00},{0x92,0x00}, + {0x93,0x00},{0x6C,0x00},{0x94,0x00},{0x6D,0x00},{0x95,0x00}, + {0x6E,0x00},{0x96,0x00},{0x97,0x00},{0x98,0x00},{0x99,0x00}, + {0x9A,0x00},{0x9B,0x00},{0x9C,0x00},{0x9C,0x00},{0x72,0x00}, + {0x74,0x00},{0x66,0x00},{0x9D,0x00},{0x73,0x9E} +}; + +#endif /* TRANSLIT_KANA_JISX0201_JISX0208_H */ diff -Naurp libmbfl-1.0.1/filters/unicode_table_cp1254.h libmbfl-1.0.1.oden/filters/unicode_table_cp1254.h --- libmbfl-1.0.1/filters/unicode_table_cp1254.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/unicode_table_cp1254.h 2008-07-05 09:30:01.000000000 +0200 @@ -0,0 +1,51 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The authors of this file: PHP3 internationalization team + * You can contact the primary author 金本 茂 <sgk@happysize.co.jp>. + * + */ + +#ifndef UNICODE_TABLE_CP1254_H + +/* cp1254 to Unicode table */ +static const unsigned short cp1254_ucs_table[] = { + 0x20ac, 0xfffe, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, + 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0xfffe, 0xfffe, 0xfffe, + 0xfffe, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0xfffe, 0xfffe, 0x0178, + 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, + 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, + 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, + 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, + 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, + 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff +}; +static const int cp1254_ucs_table_min = 0x80; +static const int cp1254_ucs_table_len = (sizeof (cp1254_ucs_table) / sizeof (unsigned short)); +static const int cp1254_ucs_table_max = 0x80 + (sizeof (cp1254_ucs_table) / sizeof (unsigned short)); + +#endif /* UNICODE_TABLE_CP1254_H */ diff -Naurp libmbfl-1.0.1/filters/unicode_table_cp850.h libmbfl-1.0.1.oden/filters/unicode_table_cp850.h --- libmbfl-1.0.1/filters/unicode_table_cp850.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/unicode_table_cp850.h 2009-03-18 18:44:25.000000000 +0100 @@ -0,0 +1,52 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Den V. Tsopa <tdv@edisoft.ru> + * Adaption for CP850: D. Giffeler <dg@artegic.de> + * + */ + +#ifndef UNICODE_TABLE_CP850_H +#define UNICODE_TABLE_CP850_H + +/* cp850_DOS to Unicode table */ +static const unsigned short cp850_ucs_table[] = { + 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7 +, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5 +, 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9 +, 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 0x00d7, 0x0192 +, 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba +, 0x00bf, 0x00ae, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb +, 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x00c0 +, 0x00a9, 0x2563, 0x2551, 0x2557, 0x255d, 0x00a2, 0x00a5, 0x2510 +, 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3 +, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4 +, 0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x0131, 0x00cd, 0x00ce +, 0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580 +, 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe +, 0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4 +, 0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8 +, 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0 +}; +static const int cp850_ucs_table_min = 0x80; +static const int cp850_ucs_table_len = (sizeof (cp850_ucs_table) / sizeof (unsigned short)); +static const int cp850_ucs_table_max = 0x80 + (sizeof (cp850_ucs_table) / sizeof (unsigned short)); + +#endif /* UNICODE_TABLE_CP850_H */ diff -Naurp libmbfl-1.0.1/filters/unicode_table_koi8u.h libmbfl-1.0.1.oden/filters/unicode_table_koi8u.h --- libmbfl-1.0.1/filters/unicode_table_koi8u.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/filters/unicode_table_koi8u.h 2008-07-05 08:52:04.000000000 +0200 @@ -0,0 +1,166 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Maksym Veremeyenko <verem@m1.tv> + * + */ + +#ifndef UNICODE_TABLE_KOI8U_H +#define UNICODE_TABLE_KOI8U_H + +/* KOI8-U (RFC2319) to Unicode */ +static const unsigned short koi8u_ucs_table[] = { + 0x2500, /* BOX DRAWINGS LIGHT HORIZONTAL */ + 0x2502, /* BOX DRAWINGS LIGHT VERTICAL */ + 0x250C, /* BOX DRAWINGS LIGHT DOWN AND RIGHT */ + 0x2510, /* BOX DRAWINGS LIGHT DOWN AND LEFT */ + 0x2514, /* BOX DRAWINGS LIGHT UP AND RIGHT */ + 0x2518, /* BOX DRAWINGS LIGHT UP AND LEFT */ + 0x251C, /* BOX DRAWINGS LIGHT VERTICAL AND RIGHT */ + 0x2524, /* BOX DRAWINGS LIGHT VERTICAL AND LEFT */ + 0x252C, /* BOX DRAWINGS LIGHT DOWN AND HORIZONTAL */ + 0x2534, /* BOX DRAWINGS LIGHT UP AND HORIZONTAL */ + 0x253C, /* BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL */ + 0x2580, /* UPPER HALF BLOCK */ + 0x2584, /* LOWER HALF BLOCK */ + 0x2588, /* FULL BLOCK */ + 0x258C, /* LEFT HALF BLOCK */ + 0x2590, /* RIGHT HALF BLOCK */ + 0x2591, /* LIGHT SHADE */ + 0x2592, /* MEDIUM SHADE */ + 0x2593, /* DARK SHADE */ + 0x2320, /* TOP HALF INTEGRAL */ + 0x25A0, /* BLACK SQUARE */ + 0x2219, /* BULLET OPERATOR */ + 0x221A, /* SQUARE ROOT */ + 0x2248, /* ALMOST EQUAL TO */ + 0x2264, /* LESS THAN OR EQUAL TO */ + 0x2265, /* GREATER THAN OR EQUAL TO */ + 0x00A0, /* NO-BREAK SPACE */ + 0x2321, /* BOTTOM HALF INTEGRAL */ + 0x00B0, /* DEGREE SIGN */ + 0x00B2, /* SUPERSCRIPT TWO */ + 0x00B7, /* MIDDLE DOT */ + 0x00F7, /* DIVISION SIGN */ + 0x2550, /* BOX DRAWINGS DOUBLE HORIZONTAL */ + 0x2551, /* BOX DRAWINGS DOUBLE VERTICAL */ + 0x2552, /* BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE */ + 0x0451, /* CYRILLIC SMALL LETTER IO */ + 0x0454, /* CYRILLIC SMALL LETTER UKRAINIAN IE */ + 0x2554, /* BOX DRAWINGS DOUBLE DOWN AND RIGHT */ + 0x0456, /* CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I */ + 0x0457, /* CYRILLIC SMALL LETTER YI (UKRAINIAN) */ + 0x2557, /* BOX DRAWINGS DOUBLE DOWN AND LEFT */ + 0x2558, /* BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE */ + 0x2559, /* BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE */ + 0x255A, /* BOX DRAWINGS DOUBLE UP AND RIGHT */ + 0x255B, /* BOX DRAWINGS UP SINGLE AND LEFT DOUBLE */ + 0x0491, /* CYRILLIC SMALL LETTER GHE WITH UPTURN */ + 0x255D, /* BOX DRAWINGS DOUBLE UP AND LEFT */ + 0x255E, /* BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE */ + 0x255F, /* BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE */ + 0x2560, /* BOX DRAWINGS DOUBLE VERTICAL AND RIGHT */ + 0x2561, /* BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE */ + 0x0401, /* CYRILLIC CAPITAL LETTER IO */ + 0x0404, /* CYRILLIC CAPITAL LETTER UKRAINIAN IE */ + 0x2563, /* BOX DRAWINGS DOUBLE VERTICAL AND LEFT */ + 0x0406, /* CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I */ + 0x0407, /* CYRILLIC CAPITAL LETTER YI (UKRAINIAN) */ + 0x2566, /* BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL */ + 0x2567, /* BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE */ + 0x2568, /* BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE */ + 0x2569, /* BOX DRAWINGS DOUBLE UP AND HORIZONTAL */ + 0x256A, /* BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE */ + 0x0490, /* CYRILLIC CAPITAL LETTER GHE WITH UPTURN */ + 0x256C, /* BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL */ + 0x00A9, /* COPYRIGHT SIGN */ + 0x044E, /* CYRILLIC SMALL LETTER YU */ + 0x0430, /* CYRILLIC SMALL LETTER A */ + 0x0431, /* CYRILLIC SMALL LETTER BE */ + 0x0446, /* CYRILLIC SMALL LETTER TSE */ + 0x0434, /* CYRILLIC SMALL LETTER DE */ + 0x0435, /* CYRILLIC SMALL LETTER IE */ + 0x0444, /* CYRILLIC SMALL LETTER EF */ + 0x0433, /* CYRILLIC SMALL LETTER GHE */ + 0x0445, /* CYRILLIC SMALL LETTER KHA */ + 0x0438, /* CYRILLIC SMALL LETTER I */ + 0x0439, /* CYRILLIC SMALL LETTER SHORT I */ + 0x043A, /* CYRILLIC SMALL LETTER KA */ + 0x043B, /* CYRILLIC SMALL LETTER EL */ + 0x043C, /* CYRILLIC SMALL LETTER EM */ + 0x043D, /* CYRILLIC SMALL LETTER EN */ + 0x043E, /* CYRILLIC SMALL LETTER O */ + 0x043F, /* CYRILLIC SMALL LETTER PE */ + 0x044F, /* CYRILLIC SMALL LETTER YA */ + 0x0440, /* CYRILLIC SMALL LETTER ER */ + 0x0441, /* CYRILLIC SMALL LETTER ES */ + 0x0442, /* CYRILLIC SMALL LETTER TE */ + 0x0443, /* CYRILLIC SMALL LETTER U */ + 0x0436, /* CYRILLIC SMALL LETTER ZHE */ + 0x0432, /* CYRILLIC SMALL LETTER VE */ + 0x044C, /* CYRILLIC SMALL LETTER SOFT SIGN */ + 0x044B, /* CYRILLIC SMALL LETTER YERU */ + 0x0437, /* CYRILLIC SMALL LETTER ZE */ + 0x0448, /* CYRILLIC SMALL LETTER SHA */ + 0x044D, /* CYRILLIC SMALL LETTER E */ + 0x0449, /* CYRILLIC SMALL LETTER SHCHA */ + 0x0447, /* CYRILLIC SMALL LETTER CHE */ + 0x044A, /* CYRILLIC SMALL LETTER HARD SIGN */ + 0x042E, /* CYRILLIC CAPITAL LETTER YU */ + 0x0410, /* CYRILLIC CAPITAL LETTER A */ + 0x0411, /* CYRILLIC CAPITAL LETTER BE */ + 0x0426, /* CYRILLIC CAPITAL LETTER TSE */ + 0x0414, /* CYRILLIC CAPITAL LETTER DE */ + 0x0415, /* CYRILLIC CAPITAL LETTER IE */ + 0x0424, /* CYRILLIC CAPITAL LETTER EF */ + 0x0413, /* CYRILLIC CAPITAL LETTER GHE */ + 0x0425, /* CYRILLIC CAPITAL LETTER KHA */ + 0x0418, /* CYRILLIC CAPITAL LETTER I */ + 0x0419, /* CYRILLIC CAPITAL LETTER SHORT I */ + 0x041A, /* CYRILLIC CAPITAL LETTER KA */ + 0x041B, /* CYRILLIC CAPITAL LETTER EL */ + 0x041C, /* CYRILLIC CAPITAL LETTER EM */ + 0x041D, /* CYRILLIC CAPITAL LETTER EN */ + 0x041E, /* CYRILLIC CAPITAL LETTER O */ + 0x041F, /* CYRILLIC CAPITAL LETTER PE */ + 0x042F, /* CYRILLIC CAPITAL LETTER YA */ + 0x0420, /* CYRILLIC CAPITAL LETTER ER */ + 0x0421, /* CYRILLIC CAPITAL LETTER ES */ + 0x0422, /* CYRILLIC CAPITAL LETTER TE */ + 0x0423, /* CYRILLIC CAPITAL LETTER U */ + 0x0416, /* CYRILLIC CAPITAL LETTER ZHE */ + 0x0412, /* CYRILLIC CAPITAL LETTER VE */ + 0x042C, /* CYRILLIC CAPITAL LETTER SOFT SIGN */ + 0x042B, /* CYRILLIC CAPITAL LETTER YERU */ + 0x0417, /* CYRILLIC CAPITAL LETTER ZE */ + 0x0428, /* CYRILLIC CAPITAL LETTER SHA */ + 0x042D, /* CYRILLIC CAPITAL LETTER E */ + 0x0429, /* CYRILLIC CAPITAL LETTER SHCHA */ + 0x0427, /* CYRILLIC CAPITAL LETTER CHE */ + 0x042A /* CYRILLIC CAPITAL LETTER HARD SIGN */ +}; +static const int koi8u_ucs_table_min = 0x80; +static const int koi8u_ucs_table_len = (sizeof (koi8u_ucs_table) / sizeof (unsigned short)); +static const int koi8u_ucs_table_max = 0x80 + (sizeof (koi8u_ucs_table) / sizeof (unsigned short)); + + + +#endif /* UNNICODE_TABLE_KOI8U_H */ + diff -Naurp libmbfl-1.0.1/Makefile.am libmbfl-1.0.1.oden/Makefile.am --- libmbfl-1.0.1/Makefile.am 2004-02-04 04:06:18.000000000 +0100 +++ libmbfl-1.0.1.oden/Makefile.am 2011-11-06 13:39:30.786251024 +0100 @@ -1,5 +1,6 @@ -EXTRA_DIST=AUTHORS DISCLAIMER LICENSE Makefile.bcc32 \ - config.h.bcc32 config.h.vc6 \ - libmbfl.dsp libmbfl.dsw libmbfl.sln libmbfl.vcproj mbfl.rc \ - mksbcc32.bat rules.mak.bcc32 +AUTOMAKE_OPTIONS=dejagnu +DEJATOOL=conv_encoding conv_kana strwidth strcut +LANG=C +EXTRA_DIST=AUTHORS DISCLAIMER LICENSE SUBDIRS = nls filters mbfl +CLEANFILES=*.log *.sum diff -Naurp libmbfl-1.0.1/mbfl/eaw_table.h libmbfl-1.0.1.oden/mbfl/eaw_table.h --- libmbfl-1.0.1/mbfl/eaw_table.h 2006-01-21 05:16:47.000000000 +0100 +++ libmbfl-1.0.1.oden/mbfl/eaw_table.h 2010-03-12 05:55:37.000000000 +0100 @@ -2,16 +2,35 @@ static const struct { int begin; int end; } mbfl_eaw_table[] = { - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 } + { 0x1100, 0x1159 }, + { 0x115f, 0x115f }, + { 0x2329, 0x232a }, + { 0x2e80, 0x2e99 }, + { 0x2e9b, 0x2ef3 }, + { 0x2f00, 0x2fd5 }, + { 0x2ff0, 0x2ffb }, + { 0x3000, 0x303e }, + { 0x3041, 0x3096 }, + { 0x3099, 0x30ff }, + { 0x3105, 0x312c }, + { 0x3131, 0x318e }, + { 0x3190, 0x31b7 }, + { 0x31f0, 0x321e }, + { 0x3220, 0x3243 }, + { 0x3250, 0x327d }, + { 0x327f, 0x32fe }, + { 0x3300, 0x4db5 }, + { 0x4e00, 0x9fa5 }, + { 0xa000, 0xa48c }, + { 0xa490, 0xa4c6 }, + { 0xac00, 0xd7a3 }, + { 0xf900, 0xfa2d }, + { 0xfa30, 0xfa6a }, + { 0xfe30, 0xfe52 }, + { 0xfe54, 0xfe66 }, + { 0xfe68, 0xfe6b }, + { 0xff01, 0xff60 }, + { 0xffe0, 0xffe6 }, + { 0x20000, 0x2fffd }, + { 0x30000, 0x3fffd } }; diff -Naurp libmbfl-1.0.1/mbfl/Makefile.am libmbfl-1.0.1.oden/mbfl/Makefile.am --- libmbfl-1.0.1/mbfl/Makefile.am 2004-06-29 14:13:37.000000000 +0200 +++ libmbfl-1.0.1.oden/mbfl/Makefile.am 2010-03-12 05:55:37.000000000 +0100 @@ -1,12 +1,37 @@ -EXTRA_DIST=Makefile.bcc32 mk_eaw_tbl.awk +EXTRA_DIST=mk_eaw_tbl.awk lib_LTLIBRARIES=libmbfl.la -libmbfl_la_SOURCES=mbfilter.c mbfl_string.c mbfl_language.c mbfl_encoding.c mbfl_convert.c mbfl_ident.c mbfl_memory_device.c mbfl_allocators.c mbfl_filter_output.c mbfilter_pass.c mbfilter_wchar.c mbfilter_8bit.c eaw_table.h +libmbfl_la_SOURCES=mbfilter.c \ + mbfl_string.c \ + mbfl_language.c \ + mbfl_encoding.c \ + mbfl_convert.c \ + mbfl_ident.c \ + mbfl_memory_device.c \ + mbfl_allocators.c \ + mbfl_filter_output.c \ + mbfilter_pass.c \ + mbfilter_wchar.c \ + mbfilter_8bit.c \ + eaw_table.h libmbfl_filters_la=../filters/libmbfl_filters.la libmbfl_nls_la=../nls/libmbfl_nls.la libmbfl_la_LIBADD=$(libmbfl_filters_la) $(libmbfl_nls_la) libmbfl_la_LDFLAGS=-version-info $(SHLIB_VERSION) libmbfl_includedir=$(includedir)/mbfl -libmbfl_include_HEADERS=mbfilter.h mbfl_consts.h mbfl_encoding.h mbfl_language.h mbfl_string.h mbfl_convert.h mbfl_ident.h mbfl_memory_device.h mbfl_allocators.h mbfl_defs.h mbfl_filter_output.h mbfilter_pass.h mbfilter_wchar.h mbfilter_8bit.h +libmbfl_include_HEADERS=mbfilter.h \ + mbfl_consts.h \ + mbfl_encoding.h \ + mbfl_language.h \ + mbfl_string.h \ + mbfl_convert.h \ + mbfl_ident.h \ + mbfl_memory_device.h \ + mbfl_allocators.h \ + mbfl_defs.h \ + mbfl_filter_output.h \ + mbfilter_pass.h \ + mbfilter_wchar.h \ + mbfilter_8bit.h mbfilter.c: eaw_table.h diff -Naurp libmbfl-1.0.1/mbfl/mbfilter.c libmbfl-1.0.1.oden/mbfl/mbfilter.c --- libmbfl-1.0.1/mbfl/mbfilter.c 2006-01-21 04:35:09.000000000 +0100 +++ libmbfl-1.0.1.oden/mbfl/mbfilter.c 2011-09-24 04:20:38.000000000 +0200 @@ -102,6 +102,7 @@ #include "mbfilter.h" #include "mbfl_filter_output.h" #include "mbfilter_pass.h" +#include "filters/mbfilter_tl_jisx0201_jisx0208.h" #include "eaw_table.h" @@ -149,11 +150,15 @@ mbfl_buffer_converter_new( convd->filter1 = NULL; convd->filter2 = NULL; if (mbfl_convert_filter_get_vtbl(convd->from->no_encoding, convd->to->no_encoding) != NULL) { - convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, convd->to->no_encoding, mbfl_memory_device_output, 0, &convd->device); + convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device); } else { - convd->filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, convd->to->no_encoding, mbfl_memory_device_output, 0, &convd->device); + convd->filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device); if (convd->filter2 != NULL) { - convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, mbfl_no_encoding_wchar, (int (*)(int, void*))convd->filter2->filter_function, NULL, convd->filter2); + convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, + mbfl_no_encoding_wchar, + (int (*)(int, void*))convd->filter2->filter_function, + (int (*)(void*))convd->filter2->filter_flush, + convd->filter2); if (convd->filter1 == NULL) { mbfl_convert_filter_delete(convd->filter2); } @@ -331,6 +336,24 @@ mbfl_buffer_converter_feed_result(mbfl_b return mbfl_memory_device_result(&convd->device, result); } +int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd) +{ + int num_illegalchars = 0; + + if (convd == NULL) { + return 0; + } + + if (convd->filter1 != NULL) { + num_illegalchars += convd->filter1->num_illegalchar; + } + + if (convd->filter2 != NULL) { + num_illegalchars += convd->filter2->num_illegalchar; + } + + return (num_illegalchars); +} /* * encoding detector @@ -371,9 +394,9 @@ mbfl_encoding_detector_new(enum mbfl_no_ } identd->filter_list_size = num; - /* set strict flag */ - identd->strict = strict; - + /* set strict flag */ + identd->strict = strict; + return identd; } @@ -444,25 +467,24 @@ enum mbfl_no_encoding mbfl_encoding_dete while (n >= 0) { filter = identd->filter_list[n]; if (!filter->flag) { - if (identd->strict && filter->status) { - continue; - } - encoding = filter->encoding->no_encoding; + if (!identd->strict || !filter->status) { + encoding = filter->encoding->no_encoding; + } } n--; } - /* fallback judge */ - if (encoding == mbfl_no_encoding_invalid) { - n = identd->filter_list_size - 1; - while (n >= 0) { - filter = identd->filter_list[n]; - if (!filter->flag) { - encoding = filter->encoding->no_encoding; - } - n--; + /* fallback judge */ + if (encoding == mbfl_no_encoding_invalid) { + n = identd->filter_list_size - 1; + while (n >= 0) { + filter = identd->filter_list[n]; + if (!filter->flag) { + encoding = filter->encoding->no_encoding; + } + n--; } - } + } } return encoding; @@ -594,7 +616,7 @@ mbfl_identify_encoding(mbfl_string *stri for (i = 0; i < num; i++) { filter = &flist[i]; if (!filter->flag) { - if (strict && filter->status) { + if (strict && filter->status) { continue; } encoding = filter->encoding; @@ -602,16 +624,16 @@ mbfl_identify_encoding(mbfl_string *stri } } - /* fall-back judge */ - if (!encoding) { - for (i = 0; i < num; i++) { - filter = &flist[i]; - if (!filter->flag) { - encoding = filter->encoding; - break; - } - } - } + /* fall-back judge */ + if (!encoding) { + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag && (!strict || !filter->status)) { + encoding = filter->encoding; + break; + } + } + } /* cleanup */ /* dtors should be called in reverse order */ @@ -762,7 +784,7 @@ retry: for (;;) { pc->found_pos++; p = h; - m = pc->needle.buffer; + m = (int *)pc->needle.buffer; n = pc->needle_pos - 1; while (n > 0 && *p == *m) { n--; @@ -841,78 +863,203 @@ mbfl_strpos( int offset, int reverse) { - int n, result; - unsigned char *p; - mbfl_convert_filter *filter; - struct collector_strpos_data pc; + int result; + mbfl_string _haystack_u8, _needle_u8; + const mbfl_string *haystack_u8, *needle_u8; + const unsigned char *u8_tbl; - if (haystack == NULL || needle == NULL) { + if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) { return -8; } - /* needle is converted into wchar */ - mbfl_wchar_device_init(&pc.needle); - filter = mbfl_convert_filter_new( - needle->no_encoding, - mbfl_no_encoding_wchar, - mbfl_wchar_device_output, 0, &pc.needle); - if (filter == NULL) { - return -4; + + { + const mbfl_encoding *u8_enc; + u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8); + if (u8_enc == NULL || u8_enc->mblen_table == NULL) { + return -8; + } + u8_tbl = u8_enc->mblen_table; } - p = needle->val; - n = needle->len; - if (p != NULL) { - while (n > 0) { - if ((*filter->filter_function)(*p++, filter) < 0) { - break; - } - n--; + + if (haystack->no_encoding != mbfl_no_encoding_utf8) { + mbfl_string_init(&_haystack_u8); + haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8); + if (haystack_u8 == NULL) { + result = -4; + goto out; } + } else { + haystack_u8 = haystack; } - mbfl_convert_filter_flush(filter); - mbfl_convert_filter_delete(filter); - pc.needle_len = pc.needle.pos; - if (pc.needle.buffer == NULL) { - return -4; + + if (needle->no_encoding != mbfl_no_encoding_utf8) { + mbfl_string_init(&_needle_u8); + needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8); + if (needle_u8 == NULL) { + result = -4; + goto out; + } + } else { + needle_u8 = needle; } - if (pc.needle_len <= 0) { - mbfl_wchar_device_clear(&pc.needle); - return -2; + + if (needle_u8->len < 1) { + result = -8; + goto out; } - /* initialize filter and collector data */ - filter = mbfl_convert_filter_new( - haystack->no_encoding, - mbfl_no_encoding_wchar, - collector_strpos, 0, &pc); - if (filter == NULL) { - mbfl_wchar_device_clear(&pc.needle); - return -4; + + result = -1; + if (haystack_u8->len < needle_u8->len) { + goto out; } - pc.start = offset; - pc.output = 0; - pc.needle_pos = 0; - pc.found_pos = 0; - pc.matched_pos = -1; - /* feed data */ - p = haystack->val; - n = haystack->len; - if (p != NULL) { - while (n > 0) { - if ((*filter->filter_function)(*p++, filter) < 0) { - pc.matched_pos = -4; - break; + if (!reverse) { + unsigned int jtbl[1 << (sizeof(unsigned char) * 8)]; + unsigned int needle_u8_len = needle_u8->len; + unsigned int i; + const unsigned char *p, *q, *e; + const unsigned char *haystack_u8_val = haystack_u8->val, + *needle_u8_val = needle_u8->val; + for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) { + jtbl[i] = needle_u8_len + 1; + } + for (i = 0; i < needle_u8_len - 1; ++i) { + jtbl[needle_u8_val[i]] = needle_u8_len - i; + } + e = haystack_u8_val + haystack_u8->len; + p = haystack_u8_val; + while (--offset >= 0) { + if (p >= e) { + result = -16; + goto out; } - if (pc.matched_pos >= 0 && !reverse) { - break; + p += u8_tbl[*p]; + } + p += needle_u8_len; + if (p > e) { + goto out; + } + while (p <= e) { + const unsigned char *pv = p; + q = needle_u8_val + needle_u8_len; + for (;;) { + if (q == needle_u8_val) { + result = 0; + while (p > haystack_u8_val) { + unsigned char c = *--p; + if (c < 0x80) { + ++result; + } else if ((c & 0xc0) != 0x80) { + ++result; + } + } + goto out; + } + if (*--q != *--p) { + break; + } + } + p += jtbl[*p]; + if (p <= pv) { + p = pv + 1; + } + } + } else { + unsigned int jtbl[1 << (sizeof(unsigned char) * 8)]; + unsigned int needle_u8_len = needle_u8->len, needle_len = 0; + unsigned int i; + const unsigned char *p, *e, *q, *qe; + const unsigned char *haystack_u8_val = haystack_u8->val, + *needle_u8_val = needle_u8->val; + for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) { + jtbl[i] = needle_u8_len; + } + for (i = needle_u8_len - 1; i > 0; --i) { + unsigned char c = needle_u8_val[i]; + jtbl[c] = i; + if (c < 0x80) { + ++needle_len; + } else if ((c & 0xc0) != 0x80) { + ++needle_len; + } + } + { + unsigned char c = needle_u8_val[0]; + if (c < 0x80) { + ++needle_len; + } else if ((c & 0xc0) != 0x80) { + ++needle_len; + } + } + e = haystack_u8_val; + p = e + haystack_u8->len; + qe = needle_u8_val + needle_u8_len; + if (offset < 0) { + if (-offset > needle_len) { + offset += needle_len; + while (offset < 0) { + unsigned char c; + if (p <= e) { + result = -16; + goto out; + } + c = *(--p); + if (c < 0x80) { + ++offset; + } else if ((c & 0xc0) != 0x80) { + ++offset; + } + } + } + } else { + const unsigned char *ee = haystack_u8_val + haystack_u8->len; + while (--offset >= 0) { + if (e >= ee) { + result = -16; + goto out; + } + e += u8_tbl[*e]; + } + } + if (p < e + needle_u8_len) { + goto out; + } + p -= needle_u8_len; + while (p >= e) { + const unsigned char *pv = p; + q = needle_u8_val; + for (;;) { + if (q == qe) { + result = 0; + p -= needle_u8_len; + while (p > haystack_u8_val) { + unsigned char c = *--p; + if (c < 0x80) { + ++result; + } else if ((c & 0xc0) != 0x80) { + ++result; + } + } + goto out; + } + if (*q != *p) { + break; + } + ++p, ++q; + } + p -= jtbl[*p]; + if (p >= pv) { + p = pv - 1; } - n--; } } - mbfl_convert_filter_flush(filter); - result = pc.matched_pos; - mbfl_convert_filter_delete(filter); - mbfl_wchar_device_clear(&pc.needle); - +out: + if (haystack_u8 == &_haystack_u8) { + mbfl_string_clear(&_haystack_u8); + } + if (needle_u8 == &_needle_u8) { + mbfl_string_clear(&_needle_u8); + } return result; } @@ -990,6 +1137,7 @@ mbfl_substr_count( if (pc.matched_pos >= 0) { ++result; pc.matched_pos = -1; + pc.needle_pos = 0; } n--; } @@ -1054,10 +1202,10 @@ mbfl_substr( len = string->len; start = from; end = from + length; - if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_MWC2LE)) { + if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { start *= 2; end = start + length*2; - } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_MWC4LE)) { + } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { start *= 4; end = start + length*4; } else if (encoding->mblen_table != NULL) { @@ -1184,7 +1332,6 @@ mbfl_substr( return result; } - /* * strcut */ @@ -1196,183 +1343,284 @@ mbfl_strcut( int length) { const mbfl_encoding *encoding; - int n, m, k, len, start, end; - unsigned char *p, *w; - const unsigned char *mbtab; mbfl_memory_device device; - mbfl_convert_filter *encoder, *encoder_tmp, *decoder, *decoder_tmp; + + /* validate the parameters */ + if (string == NULL || string->val == NULL || result == NULL) { + return NULL; + } + + if (from < 0 || length < 0) { + return NULL; + } + + if (from >= string->len) { + from = string->len; + } encoding = mbfl_no2encoding(string->no_encoding); - if (encoding == NULL || string == NULL || result == NULL) { + if (encoding == NULL) { return NULL; } + mbfl_string_init(result); result->no_language = string->no_language; result->no_encoding = string->no_encoding; - if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) || - encoding->mblen_table != NULL) { - len = string->len; - start = from; - end = from + length; + if ((encoding->flag & (MBFL_ENCTYPE_SBCS + | MBFL_ENCTYPE_WCS2BE + | MBFL_ENCTYPE_WCS2LE + | MBFL_ENCTYPE_WCS4BE + | MBFL_ENCTYPE_WCS4LE)) + || encoding->mblen_table != NULL) { + const unsigned char *start = NULL; + const unsigned char *end = NULL; + unsigned char *w; + unsigned int sz; + if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { - start /= 2; - start *= 2; - end = length/2; - end *= 2; - end += start; + from &= -2; + + if (from + length >= string->len) { + length = string->len - from; + } + + start = string->val + from; + end = start + (length & -2); } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { - start /= 4; - start *= 4; - end = length/4; - end *= 4; - end += start; + from &= -4; + + if (from + length >= string->len) { + length = string->len - from; + } + + start = string->val + from; + end = start + (length & -4); + } else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) { + if (from + length >= string->len) { + length = string->len - from; + } + + start = string->val + from; + end = start + length; } else if (encoding->mblen_table != NULL) { - mbtab = encoding->mblen_table; - start = 0; - end = 0; - n = 0; - p = string->val; - if (p != NULL) { - /* search start position */ - for (;;) { - m = mbtab[*p]; - n += m; - p += m; - if (n > from) { - break; - } - start = n; - } - /* search end position */ - k = start + length; - if (k >= (int)string->len) { - end = string->len; - } else { - end = start; - while (n <= k) { - end = n; - m = mbtab[*p]; - n += m; - p += m; - } + const unsigned char *mbtab = encoding->mblen_table; + const unsigned char *p, *q; + int m; + + /* search start position */ + for (m = 0, p = string->val, q = p + from; + p < q; p += (m = mbtab[*p])); + + if (p > q) { + p -= m; + } + + start = p; + + /* search end position */ + if ((start - string->val) + length >= (int)string->len) { + end = string->val + string->len; + } else { + for (q = p + length; p < q; p += (m = mbtab[*p])); + + if (p > q) { + p -= m; } + end = p; } + } else { + /* never reached */ + return NULL; } - if (start > len) { - start = len; - } - if (start < 0) { - start = 0; - } - if (end > len) { - end = len; - } - if (end < 0) { - end = 0; - } - if (start > end) { - start = end; - } /* allocate memory and copy string */ - n = end - start; - result->len = 0; - result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char)); - if (w != NULL) { - result->len = n; - p = &(string->val[start]); - while (n > 0) { - *w++ = *p++; - n--; - } - *w++ = '\0'; - *w++ = '\0'; - *w++ = '\0'; - *w = '\0'; - } else { - result = NULL; + sz = end - start; + if ((w = (unsigned char*)mbfl_calloc(sz + 8, + sizeof(unsigned char))) == NULL) { + return NULL; } + + memcpy(w, start, sz); + w[sz] = '\0'; + w[sz + 1] = '\0'; + w[sz + 2] = '\0'; + w[sz + 3] = '\0'; + + result->val = w; + result->len = sz; } else { - /* wchar filter */ - encoder = mbfl_convert_filter_new( - string->no_encoding, - mbfl_no_encoding_wchar, - mbfl_filter_output_null, 0, 0); - encoder_tmp = mbfl_convert_filter_new( - string->no_encoding, - mbfl_no_encoding_wchar, - mbfl_filter_output_null, 0, 0); + mbfl_convert_filter *encoder = NULL; + mbfl_convert_filter *decoder = NULL; + const unsigned char *p, *q, *r; + struct { + mbfl_convert_filter encoder; + mbfl_convert_filter decoder; + const unsigned char *p; + int pos; + } bk, _bk; + /* output code filter */ - decoder = mbfl_convert_filter_new( - mbfl_no_encoding_wchar, - string->no_encoding, - mbfl_memory_device_output, 0, &device); - decoder_tmp = mbfl_convert_filter_new( - mbfl_no_encoding_wchar, - string->no_encoding, - mbfl_memory_device_output, 0, &device); - if (encoder == NULL || encoder_tmp == NULL || decoder == NULL || decoder_tmp == NULL) { - mbfl_convert_filter_delete(encoder); - mbfl_convert_filter_delete(encoder_tmp); + if (!(decoder = mbfl_convert_filter_new( + mbfl_no_encoding_wchar, + string->no_encoding, + mbfl_memory_device_output, 0, &device))) { + return NULL; + } + + /* wchar filter */ + if (!(encoder = mbfl_convert_filter_new( + string->no_encoding, + mbfl_no_encoding_wchar, + mbfl_filter_output_null, + NULL, NULL))) { mbfl_convert_filter_delete(decoder); - mbfl_convert_filter_delete(decoder_tmp); return NULL; } + mbfl_memory_device_init(&device, length + 8, 0); - k = 0; - n = 0; + p = string->val; - if (p != NULL) { - /* seartch start position */ - while (n < from) { - (*encoder->filter_function)(*p++, encoder); - n++; - } + + /* search start position */ + for (q = string->val + from; p < q; p++) { + (*encoder->filter_function)(*p, encoder); + } + + /* switch the drain direction */ + encoder->output_function = (int(*)(int,void *))decoder->filter_function; + encoder->flush_function = (int(*)(void *))decoder->filter_flush; + encoder->data = decoder; + + q = string->val + string->len; + + /* save the encoder, decoder state and the pointer */ + mbfl_convert_filter_copy(decoder, &_bk.decoder); + mbfl_convert_filter_copy(encoder, &_bk.encoder); + _bk.p = p; + _bk.pos = device.pos; + + if (length > q - p) { + length = q - p; + } + + if (length >= 20) { /* output a little shorter than "length" */ - encoder->output_function = mbfl_filter_output_pipe; - encoder->data = decoder; - k = length - 20; - len = string->len; - while (n < len && device.pos < k) { - (*encoder->filter_function)(*p++, encoder); - n++; - } - /* detect end position */ - for (;;) { - /* backup current state */ - k = device.pos; - mbfl_convert_filter_copy(encoder, encoder_tmp); - mbfl_convert_filter_copy(decoder, decoder_tmp); - if (n >= len) { - break; - } - /* feed 1byte and flush */ + /* XXX: the constant "20" was determined purely on the heuristics. */ + for (r = p + length - 20; p < r; p++) { (*encoder->filter_function)(*p, encoder); + } + + /* if the offset of the resulting string exceeds the length, + * then restore the state */ + if (device.pos > length) { + p = _bk.p; + device.pos = _bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&_bk.decoder, decoder); + mbfl_convert_filter_copy(&_bk.encoder, encoder); + bk = _bk; + } else { + /* save the encoder, decoder state and the pointer */ + mbfl_convert_filter_copy(decoder, &bk.decoder); + mbfl_convert_filter_copy(encoder, &bk.encoder); + bk.p = p; + bk.pos = device.pos; + + /* flush the stream */ (*encoder->filter_flush)(encoder); - (*decoder->filter_flush)(decoder); + + /* if the offset of the resulting string exceeds the length, + * then restore the state */ if (device.pos > length) { - break; + bk.decoder.filter_dtor(&bk.decoder); + bk.encoder.filter_dtor(&bk.encoder); + + p = _bk.p; + device.pos = _bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&_bk.decoder, decoder); + mbfl_convert_filter_copy(&_bk.encoder, encoder); + bk = _bk; + } else { + _bk.decoder.filter_dtor(&_bk.decoder); + _bk.encoder.filter_dtor(&_bk.encoder); + + p = bk.p; + device.pos = bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&bk.decoder, decoder); + mbfl_convert_filter_copy(&bk.encoder, encoder); } - /* restore filter and re-feed data */ - device.pos = k; - mbfl_convert_filter_copy(encoder_tmp, encoder); - mbfl_convert_filter_copy(decoder_tmp, decoder); - (*encoder->filter_function)(*p, encoder); - p++; - n++; } - device.pos = k; - mbfl_convert_filter_copy(encoder_tmp, encoder); - mbfl_convert_filter_copy(decoder_tmp, decoder); - mbfl_convert_filter_flush(encoder); - mbfl_convert_filter_flush(decoder); + } else { + bk = _bk; + } + + /* detect end position */ + while (p < q) { + (*encoder->filter_function)(*p, encoder); + + if (device.pos > length) { + /* restore filter */ + p = bk.p; + device.pos = bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&bk.decoder, decoder); + mbfl_convert_filter_copy(&bk.encoder, encoder); + break; + } + + p++; + + /* backup current state */ + mbfl_convert_filter_copy(decoder, &_bk.decoder); + mbfl_convert_filter_copy(encoder, &_bk.encoder); + _bk.pos = device.pos; + _bk.p = p; + + (*encoder->filter_flush)(encoder); + + if (device.pos > length) { + _bk.decoder.filter_dtor(&_bk.decoder); + _bk.encoder.filter_dtor(&_bk.encoder); + + /* restore filter */ + p = bk.p; + device.pos = bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&bk.decoder, decoder); + mbfl_convert_filter_copy(&bk.encoder, encoder); + break; + } + + bk.decoder.filter_dtor(&bk.decoder); + bk.encoder.filter_dtor(&bk.encoder); + + p = _bk.p; + device.pos = _bk.pos; + decoder->filter_dtor(decoder); + encoder->filter_dtor(encoder); + mbfl_convert_filter_copy(&_bk.decoder, decoder); + mbfl_convert_filter_copy(&_bk.encoder, encoder); + + bk = _bk; } + + (*encoder->filter_flush)(encoder); + + bk.decoder.filter_dtor(&bk.decoder); + bk.encoder.filter_dtor(&bk.encoder); + result = mbfl_memory_device_result(&device, result); + mbfl_convert_filter_delete(encoder); - mbfl_convert_filter_delete(encoder_tmp); mbfl_convert_filter_delete(decoder); - mbfl_convert_filter_delete(decoder_tmp); } return result; @@ -1589,276 +1837,6 @@ mbfl_strimwidth( return result; } - - -/* - * convert Hankaku and Zenkaku - */ -struct collector_hantozen_data { - mbfl_convert_filter *next_filter; - int mode; - int status; - int cache; -}; - -static const unsigned char hankana2zenkata_table[64] = { - 0x00,0x02,0x0C,0x0D,0x01,0xFB,0xF2,0xA1,0xA3,0xA5, - 0xA7,0xA9,0xE3,0xE5,0xE7,0xC3,0xFC,0xA2,0xA4,0xA6, - 0xA8,0xAA,0xAB,0xAD,0xAF,0xB1,0xB3,0xB5,0xB7,0xB9, - 0xBB,0xBD,0xBF,0xC1,0xC4,0xC6,0xC8,0xCA,0xCB,0xCC, - 0xCD,0xCE,0xCF,0xD2,0xD5,0xD8,0xDB,0xDE,0xDF,0xE0, - 0xE1,0xE2,0xE4,0xE6,0xE8,0xE9,0xEA,0xEB,0xEC,0xED, - 0xEF,0xF3,0x9B,0x9C -}; -static const unsigned char hankana2zenhira_table[64] = { - 0x00,0x02,0x0C,0x0D,0x01,0xFB,0x92,0x41,0x43,0x45, - 0x47,0x49,0x83,0x85,0x87,0x63,0xFC,0x42,0x44,0x46, - 0x48,0x4A,0x4B,0x4D,0x4F,0x51,0x53,0x55,0x57,0x59, - 0x5B,0x5D,0x5F,0x61,0x64,0x66,0x68,0x6A,0x6B,0x6C, - 0x6D,0x6E,0x6F,0x72,0x75,0x78,0x7B,0x7E,0x7F,0x80, - 0x81,0x82,0x84,0x86,0x88,0x89,0x8A,0x8B,0x8C,0x8D, - 0x8F,0x93,0x9B,0x9C -}; -static const unsigned char zenkana2hankana_table[84][2] = { - {0x67,0x00},{0x71,0x00},{0x68,0x00},{0x72,0x00},{0x69,0x00}, - {0x73,0x00},{0x6A,0x00},{0x74,0x00},{0x6B,0x00},{0x75,0x00}, - {0x76,0x00},{0x76,0x9E},{0x77,0x00},{0x77,0x9E},{0x78,0x00}, - {0x78,0x9E},{0x79,0x00},{0x79,0x9E},{0x7A,0x00},{0x7A,0x9E}, - {0x7B,0x00},{0x7B,0x9E},{0x7C,0x00},{0x7C,0x9E},{0x7D,0x00}, - {0x7D,0x9E},{0x7E,0x00},{0x7E,0x9E},{0x7F,0x00},{0x7F,0x9E}, - {0x80,0x00},{0x80,0x9E},{0x81,0x00},{0x81,0x9E},{0x6F,0x00}, - {0x82,0x00},{0x82,0x9E},{0x83,0x00},{0x83,0x9E},{0x84,0x00}, - {0x84,0x9E},{0x85,0x00},{0x86,0x00},{0x87,0x00},{0x88,0x00}, - {0x89,0x00},{0x8A,0x00},{0x8A,0x9E},{0x8A,0x9F},{0x8B,0x00}, - {0x8B,0x9E},{0x8B,0x9F},{0x8C,0x00},{0x8C,0x9E},{0x8C,0x9F}, - {0x8D,0x00},{0x8D,0x9E},{0x8D,0x9F},{0x8E,0x00},{0x8E,0x9E}, - {0x8E,0x9F},{0x8F,0x00},{0x90,0x00},{0x91,0x00},{0x92,0x00}, - {0x93,0x00},{0x6C,0x00},{0x94,0x00},{0x6D,0x00},{0x95,0x00}, - {0x6E,0x00},{0x96,0x00},{0x97,0x00},{0x98,0x00},{0x99,0x00}, - {0x9A,0x00},{0x9B,0x00},{0x9C,0x00},{0x9C,0x00},{0x72,0x00}, - {0x74,0x00},{0x66,0x00},{0x9D,0x00},{0x73,0x9E} -}; - -static int -collector_hantozen(int c, void* data) -{ - int s, mode, n; - struct collector_hantozen_data *pc = (struct collector_hantozen_data*)data; - - s = c; - mode = pc->mode; - - if (mode & 0xf) { /* hankaku to zenkaku */ - if ((mode & 0x1) && c >= 0x21 && c <= 0x7d && c != 0x22 && c != 0x27 && c != 0x5c) { /* all except <"> <'> <\> <~> */ - s = c + 0xfee0; - } else if ((mode & 0x2) && ((c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a))) { /* alpha */ - s = c + 0xfee0; - } else if ((mode & 0x4) && c >= 0x30 && c <= 0x39) { /* num */ - s = c + 0xfee0; - } else if ((mode & 0x8) && c == 0x20) { /* spase */ - s = 0x3000; - } - } - - if (mode & 0xf0) { /* zenkaku to hankaku */ - if ((mode & 0x10) && c >= 0xff01 && c <= 0xff5d && c != 0xff02 && c != 0xff07 && c!= 0xff3c) { /* all except <"> <'> <\> <~> */ - s = c - 0xfee0; - } else if ((mode & 0x20) && ((c >= 0xff21 && c <= 0xff3a) || (c >= 0xff41 && c <= 0xff5a))) { /* alpha */ - s = c - 0xfee0; - } else if ((mode & 0x40) && (c >= 0xff10 && c <= 0xff19)) { /* num */ - s = c - 0xfee0; - } else if ((mode & 0x80) && (c == 0x3000)) { /* spase */ - s = 0x20; - } else if ((mode & 0x10) && (c == 0x2212)) { /* MINUS SIGN */ - s = 0x2d; - } - } - - if (mode & 0x300) { /* hankaku kana to zenkaku kana */ - if ((mode & 0x100) && (mode & 0x800)) { /* hankaku kana to zenkaku katakana and glue voiced sound mark */ - if (c >= 0xff61 && c <= 0xff9f) { - if (pc->status) { - n = (pc->cache - 0xff60) & 0x3f; - if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { - pc->status = 0; - s = 0x3001 + hankana2zenkata_table[n]; - } else if (c == 0xff9e && n == 19) { - pc->status = 0; - s = 0x30f4; - } else if (c == 0xff9f && (n >= 42 && n <= 46)) { - pc->status = 0; - s = 0x3002 + hankana2zenkata_table[n]; - } else { - pc->status = 1; - pc->cache = c; - s = 0x3000 + hankana2zenkata_table[n]; - } - } else { - pc->status = 1; - pc->cache = c; - return c; - } - } else { - if (pc->status) { - n = (pc->cache - 0xff60) & 0x3f; - pc->status = 0; - (*pc->next_filter->filter_function)(0x3000 + hankana2zenkata_table[n], pc->next_filter); - } - } - } else if ((mode & 0x200) && (mode & 0x800)) { /* hankaku kana to zenkaku hirangana and glue voiced sound mark */ - if (c >= 0xff61 && c <= 0xff9f) { - if (pc->status) { - n = (pc->cache - 0xff60) & 0x3f; - if (c == 0xff9e && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { - pc->status = 0; - s = 0x3001 + hankana2zenhira_table[n]; - } else if (c == 0xff9f && (n >= 42 && n <= 46)) { - pc->status = 0; - s = 0x3002 + hankana2zenhira_table[n]; - } else { - pc->status = 1; - pc->cache = c; - s = 0x3000 + hankana2zenhira_table[n]; - } - } else { - pc->status = 1; - pc->cache = c; - return c; - } - } else { - if (pc->status) { - n = (pc->cache - 0xff60) & 0x3f; - pc->status = 0; - (*pc->next_filter->filter_function)(0x3000 + hankana2zenhira_table[n], pc->next_filter); - } - } - } else if ((mode & 0x100) && c >= 0xff61 && c <= 0xff9f) { /* hankaku kana to zenkaku katakana */ - s = 0x3000 + hankana2zenkata_table[c - 0xff60]; - } else if ((mode & 0x200) && c >= 0xff61 && c <= 0xff9f) { /* hankaku kana to zenkaku hirangana */ - s = 0x3000 + hankana2zenhira_table[c - 0xff60]; - } - } - - if (mode & 0x3000) { /* Zenkaku kana to hankaku kana */ - if ((mode & 0x1000) && c >= 0x30a1 && c <= 0x30f4) { /* Zenkaku katakana to hankaku kana */ - n = c - 0x30a1; - if (zenkana2hankana_table[n][1] != 0) { - (*pc->next_filter->filter_function)(0xff00 + zenkana2hankana_table[n][0], pc->next_filter); - s = 0xff00 + zenkana2hankana_table[n][1]; - } else { - s = 0xff00 + zenkana2hankana_table[n][0]; - } - } else if ((mode & 0x2000) && c >= 0x3041 && c <= 0x3093) { /* Zenkaku hirangana to hankaku kana */ - n = c - 0x3041; - if (zenkana2hankana_table[n][1] != 0) { - (*pc->next_filter->filter_function)(0xff00 + zenkana2hankana_table[n][0], pc->next_filter); - s = 0xff00 + zenkana2hankana_table[n][1]; - } else { - s = 0xff00 + zenkana2hankana_table[n][0]; - } - } else if (c == 0x3001) { - s = 0xff64; /* HALFWIDTH IDEOGRAPHIC COMMA */ - } else if (c == 0x3002) { - s = 0xff61; /* HALFWIDTH IDEOGRAPHIC FULL STOP */ - } else if (c == 0x300c) { - s = 0xff62; /* HALFWIDTH LEFT CORNER BRACKET */ - } else if (c == 0x300d) { - s = 0xff63; /* HALFWIDTH RIGHT CORNER BRACKET */ - } else if (c == 0x309b) { - s = 0xff9e; /* HALFWIDTH KATAKANA VOICED SOUND MARK */ - } else if (c == 0x309c) { - s = 0xff9f; /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */ - } else if (c == 0x30fc) { - s = 0xff70; /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */ - } else if (c == 0x30fb) { - s = 0xff65; /* HALFWIDTH KATAKANA MIDDLE DOT */ - } - } else if (mode & 0x30000) { - if ((mode & 0x10000) && c >= 0x3041 && c <= 0x3093) { /* Zenkaku hirangana to Zenkaku katakana */ - s = c + 0x60; - } else if ((mode & 0x20000) && c >= 0x30a1 && c <= 0x30f3) { /* Zenkaku katakana to Zenkaku hirangana */ - s = c - 0x60; - } - } - - if (mode & 0x100000) { /* special ascii to symbol */ - if (c == 0x5c) { - s = 0xffe5; /* FULLWIDTH YEN SIGN */ - } else if (c == 0xa5) { /* YEN SIGN */ - s = 0xffe5; /* FULLWIDTH YEN SIGN */ - } else if (c == 0x7e) { - s = 0xffe3; /* FULLWIDTH MACRON */ - } else if (c == 0x203e) { /* OVERLINE */ - s = 0xffe3; /* FULLWIDTH MACRON */ - } else if (c == 0x27) { - s = 0x2019; /* RIGHT SINGLE QUOTATION MARK */ - } else if (c == 0x22) { - s = 0x201d; /* RIGHT DOUBLE QUOTATION MARK */ - } - } else if (mode & 0x200000) { /* special symbol to ascii */ - if (c == 0xffe5) { /* FULLWIDTH YEN SIGN */ - s = 0x5c; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x5c; - } else if (c == 0xffe3) { /* FULLWIDTH MACRON */ - s = 0x7e; - } else if (c == 0x203e) { /* OVERLINE */ - s = 0x7e; - } else if (c == 0x2018) { /* LEFT SINGLE QUOTATION MARK*/ - s = 0x27; - } else if (c == 0x2019) { /* RIGHT SINGLE QUOTATION MARK */ - s = 0x27; - } else if (c == 0x201c) { /* LEFT DOUBLE QUOTATION MARK */ - s = 0x22; - } else if (c == 0x201d) { /* RIGHT DOUBLE QUOTATION MARK */ - s = 0x22; - } - } - - if (mode & 0x400000) { /* special ascii to symbol */ - if (c == 0x5c) { - s = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (c == 0x7e) { - s = 0xff5e; /* FULLWIDTH TILDE */ - } else if (c == 0x27) { - s = 0xff07; /* FULLWIDTH APOSTROPHE */ - } else if (c == 0x22) { - s = 0xff02; /* FULLWIDTH QUOTATION MARK */ - } - } else if (mode & 0x800000) { /* special symbol to ascii */ - if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x5c; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s = 0x7e; - } else if (c == 0xff07) { /* FULLWIDTH APOSTROPHE */ - s = 0x27; - } else if (c == 0xff02) { /* FULLWIDTH QUOTATION MARK */ - s = 0x22; - } - } - - return (*pc->next_filter->filter_function)(s, pc->next_filter); -} - -static int -collector_hantozen_flush(struct collector_hantozen_data *pc) -{ - int ret, n; - - ret = 0; - if (pc->status) { - n = (pc->cache - 0xff60) & 0x3f; - if (pc->mode & 0x100) { /* hankaku kana to zenkaku katakana */ - ret = (*pc->next_filter->filter_function)(0x3000 + hankana2zenkata_table[n], pc->next_filter); - } else if (pc->mode & 0x200) { /* hankaku kana to zenkaku hirangana */ - ret = (*pc->next_filter->filter_function)(0x3000 + hankana2zenhira_table[n], pc->next_filter); - } - pc->status = 0; - } - - return ret; -} - mbfl_string * mbfl_ja_jp_hantozen( mbfl_string *string, @@ -1869,39 +1847,67 @@ mbfl_ja_jp_hantozen( unsigned char *p; const mbfl_encoding *encoding; mbfl_memory_device device; - struct collector_hantozen_data pc; - mbfl_convert_filter *decoder; - mbfl_convert_filter *encoder; + mbfl_convert_filter *decoder = NULL; + mbfl_convert_filter *encoder = NULL; + mbfl_convert_filter *tl_filter = NULL; + mbfl_convert_filter *next_filter = NULL; + mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL; - /* initialize */ + /* validate parameters */ if (string == NULL || result == NULL) { return NULL; } + encoding = mbfl_no2encoding(string->no_encoding); if (encoding == NULL) { return NULL; } + mbfl_memory_device_init(&device, string->len, 0); mbfl_string_init(result); + result->no_language = string->no_language; result->no_encoding = string->no_encoding; + decoder = mbfl_convert_filter_new( - mbfl_no_encoding_wchar, - string->no_encoding, - mbfl_memory_device_output, 0, &device); + mbfl_no_encoding_wchar, + string->no_encoding, + mbfl_memory_device_output, 0, &device); + if (decoder == NULL) { + goto out; + } + next_filter = decoder; + + param = + (mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param)); + if (param == NULL) { + goto out; + } + + param->mode = mode; + + tl_filter = mbfl_convert_filter_new2( + &vtbl_tl_jisx0201_jisx0208, + (int(*)(int, void*))next_filter->filter_function, + (int(*)(void*))next_filter->filter_flush, + next_filter); + if (tl_filter == NULL) { + mbfl_free(param); + goto out; + } + + tl_filter->opaque = param; + next_filter = tl_filter; + encoder = mbfl_convert_filter_new( - string->no_encoding, - mbfl_no_encoding_wchar, - collector_hantozen, 0, &pc); - if (decoder == NULL || encoder == NULL) { - mbfl_convert_filter_delete(encoder); - mbfl_convert_filter_delete(decoder); - return NULL; + string->no_encoding, + mbfl_no_encoding_wchar, + (int(*)(int, void*))next_filter->filter_function, + (int(*)(void*))next_filter->filter_flush, + next_filter); + if (encoder == NULL) { + goto out; } - pc.next_filter = decoder; - pc.mode = mode; - pc.status = 0; - pc.cache = 0; /* feed data */ p = string->val; @@ -1916,11 +1922,22 @@ mbfl_ja_jp_hantozen( } mbfl_convert_filter_flush(encoder); - collector_hantozen_flush(&pc); - mbfl_convert_filter_flush(decoder); result = mbfl_memory_device_result(&device, result); - mbfl_convert_filter_delete(encoder); - mbfl_convert_filter_delete(decoder); +out: + if (tl_filter != NULL) { + if (tl_filter->opaque != NULL) { + mbfl_free(tl_filter->opaque); + } + mbfl_convert_filter_delete(tl_filter); + } + + if (decoder != NULL) { + mbfl_convert_filter_delete(decoder); + } + + if (encoder != NULL) { + mbfl_convert_filter_delete(encoder); + } return result; } @@ -2685,6 +2702,53 @@ collector_decode_htmlnumericentity(int c return c; } +int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter) +{ + struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter; + int n, s, r, d; + + if (pc->status) { + switch (pc->status) { + case 1: /* '&' */ + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + break; + case 2: /* '#' */ + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + break; + case 3: /* '0'-'9' */ + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + + s = pc->cache; + r = 1; + n = pc->digit; + while (n > 0) { + r *= 10; + n--; + } + s %= r; + r /= 10; + while (r > 0) { + d = s/r; + s %= r; + r /= 10; + (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder); + } + + break; + default: + break; + } + } + + pc->status = 0; + pc->cache = 0; + pc->digit = 0; + + return 0; +} + mbfl_string * mbfl_html_numeric_entity( mbfl_string *string, @@ -2722,7 +2786,8 @@ mbfl_html_numeric_entity( encoder = mbfl_convert_filter_new( string->no_encoding, mbfl_no_encoding_wchar, - collector_decode_htmlnumericentity, 0, &pc); + collector_decode_htmlnumericentity, + (int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc); } if (pc.decoder == NULL || encoder == NULL) { mbfl_convert_filter_delete(encoder); diff -Naurp libmbfl-1.0.1/mbfl/mbfilter.h libmbfl-1.0.1.oden/mbfl/mbfilter.h --- libmbfl-1.0.1/mbfl/mbfilter.h 2006-01-21 04:36:56.000000000 +0100 +++ libmbfl-1.0.1.oden/mbfl/mbfilter.h 2010-03-12 05:55:37.000000000 +0100 @@ -99,11 +99,19 @@ #include "mbfl_ident.h" /* + * version information + */ +#define MBFL_VERSION_MAJOR 1 +#define MBFL_VERSION_MINOR 1 +#define MBFL_VERSION_TEENY 0 + +/* * convert filter */ #define MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE 0 #define MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR 1 #define MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG 2 +#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY 3 /* * buffering converter @@ -129,6 +137,7 @@ MBFLAPI extern int mbfl_buffer_converter MBFLAPI extern mbfl_string * mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result); MBFLAPI extern mbfl_string * mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result); MBFLAPI extern mbfl_string * mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result); +MBFLAPI extern int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd); /* * encoding detector diff -Naurp libmbfl-1.0.1/mbfl/mbfl_consts.h libmbfl-1.0.1.oden/mbfl/mbfl_consts.h --- libmbfl-1.0.1/mbfl/mbfl_consts.h 2005-03-22 20:41:14.000000000 +0100 +++ libmbfl-1.0.1.oden/mbfl/mbfl_consts.h 2009-03-18 18:44:25.000000000 +0100 @@ -47,6 +47,7 @@ /* wchar plane, special charactor */ #define MBFL_WCSPLANE_MASK 0xffff #define MBFL_WCSPLANE_UCS2MAX 0x00010000 +#define MBFL_WCSPLANE_UTF32MAX 0x00110000 #define MBFL_WCSPLANE_SUPMIN 0x00010000 #define MBFL_WCSPLANE_SUPMAX 0x00200000 #define MBFL_WCSPLANE_JIS0208 0x70e10000 /* JIS HEX : 2121h - 7E7Eh */ @@ -72,10 +73,13 @@ #define MBFL_WCSPLANE_CNS11643 0x70f50000 /* 2121h - 9898h */ #define MBFL_WCSPLANE_UHC 0x70f60000 /* 8141h - fefeh */ #define MBFL_WCSPLANE_CP1251 0x70f70000 -#define MBFL_WCSPLANE_CP866 0x70f80000 +#define MBFL_WCSPLANE_CP866 0x70f80000 #define MBFL_WCSPLANE_KOI8R 0x70f90000 #define MBFL_WCSPLANE_8859_16 0x70fa0000 /* 00h - FFh */ #define MBFL_WCSPLANE_ARMSCII8 0x70fb0000 +#define MBFL_WCSPLANE_KOI8U 0x70fc0000 +#define MBFL_WCSPLANE_CP1254 0x70fd0000 /* 00h - FFh */ +#define MBFL_WCSPLANE_CP850 0x70fe0000 /* 00h - FFh */ #define MBFL_WCSGROUP_MASK 0xffffff #define MBFL_WCSGROUP_UCS4MAX 0x70000000 #define MBFL_WCSGROUP_WCHARMAX 0x78000000 diff -Naurp libmbfl-1.0.1/mbfl/mbfl_convert.c libmbfl-1.0.1.oden/mbfl/mbfl_convert.c --- libmbfl-1.0.1/mbfl/mbfl_convert.c 2005-03-22 20:41:14.000000000 +0100 +++ libmbfl-1.0.1.oden/mbfl/mbfl_convert.c 2010-03-12 05:55:37.000000000 +0100 @@ -51,16 +51,22 @@ #include "filters/mbfilter_euc_kr.h" #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" +#include "filters/mbfilter_sjis_open.h" +#include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" #include "filters/mbfilter_euc_jp.h" #include "filters/mbfilter_euc_jp_win.h" #include "filters/mbfilter_ascii.h" #include "filters/mbfilter_koi8r.h" +#include "filters/mbfilter_koi8u.h" #include "filters/mbfilter_cp866.h" #include "filters/mbfilter_cp932.h" #include "filters/mbfilter_cp936.h" #include "filters/mbfilter_cp1251.h" #include "filters/mbfilter_cp1252.h" +#include "filters/mbfilter_cp1254.h" +#include "filters/mbfilter_cp5022x.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" @@ -89,8 +95,7 @@ #include "filters/mbfilter_ucs2.h" #include "filters/mbfilter_htmlent.h" #include "filters/mbfilter_armscii8.h" - -static void mbfl_convert_filter_reset_vtbl(mbfl_convert_filter *filter); +#include "filters/mbfilter_cp850.h" /* hex character table "0123456789ABCDEF" */ static char mbfl_hexchar_table[] = { @@ -104,14 +109,22 @@ const struct mbfl_convert_vtbl *mbfl_con &vtbl_wchar_eucjp, &vtbl_sjis_wchar, &vtbl_wchar_sjis, + &vtbl_sjis_open_wchar, + &vtbl_wchar_sjis_open, + &vtbl_cp51932_wchar, + &vtbl_wchar_cp51932, &vtbl_jis_wchar, &vtbl_wchar_jis, + &vtbl_jis_ms_wchar, + &vtbl_wchar_jis_ms, &vtbl_2022jp_wchar, &vtbl_wchar_2022jp, + &vtbl_2022jpms_wchar, + &vtbl_wchar_2022jpms, &vtbl_eucjpwin_wchar, &vtbl_wchar_eucjpwin, - &vtbl_sjiswin_wchar, - &vtbl_wchar_sjiswin, + &vtbl_cp932_wchar, + &vtbl_wchar_cp932, &vtbl_euccn_wchar, &vtbl_wchar_euccn, &vtbl_cp936_wchar, @@ -134,8 +147,20 @@ const struct mbfl_convert_vtbl *mbfl_con &vtbl_wchar_cp866, &vtbl_koi8r_wchar, &vtbl_wchar_koi8r, + &vtbl_koi8u_wchar, + &vtbl_wchar_koi8u, &vtbl_cp1252_wchar, &vtbl_wchar_cp1252, + &vtbl_cp1254_wchar, + &vtbl_wchar_cp1254, + &vtbl_cp50220_wchar, + &vtbl_wchar_cp50220, + &vtbl_cp50220raw_wchar, + &vtbl_wchar_cp50220raw, + &vtbl_cp50221_wchar, + &vtbl_wchar_cp50221, + &vtbl_cp50222_wchar, + &vtbl_wchar_cp50222, &vtbl_ascii_wchar, &vtbl_wchar_ascii, &vtbl_8859_1_wchar, @@ -211,10 +236,54 @@ const struct mbfl_convert_vtbl *mbfl_con &vtbl_wchar_byte2le, &vtbl_armscii8_wchar, &vtbl_wchar_armscii8, + &vtbl_cp850_wchar, + &vtbl_wchar_cp850, &vtbl_pass, NULL }; +static int +mbfl_convert_filter_common_init( + mbfl_convert_filter *filter, + enum mbfl_no_encoding from, + enum mbfl_no_encoding to, + const struct mbfl_convert_vtbl *vtbl, + int (*output_function)(int, void* ), + int (*flush_function)(void*), + void* data) +{ + /* encoding structure */ + if ((filter->from = mbfl_no2encoding(from)) == NULL) { + return 1; + } + + if ((filter->to = mbfl_no2encoding(to)) == NULL) { + return 1; + } + + if (output_function != NULL) { + filter->output_function = output_function; + } else { + filter->output_function = mbfl_filter_output_null; + } + + filter->flush_function = flush_function; + filter->data = data; + filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; + filter->illegal_substchar = 0x3f; /* '?' */ + filter->num_illegalchar = 0; + filter->filter_ctor = vtbl->filter_ctor; + filter->filter_dtor = vtbl->filter_dtor; + filter->filter_function = vtbl->filter_function; + filter->filter_flush = vtbl->filter_flush; + filter->filter_copy = vtbl->filter_copy; + + (*filter->filter_ctor)(filter); + + return 0; +} + + mbfl_convert_filter * mbfl_convert_filter_new( enum mbfl_no_encoding from, @@ -224,6 +293,13 @@ mbfl_convert_filter_new( void* data) { mbfl_convert_filter * filter; + const struct mbfl_convert_vtbl *vtbl; + + vtbl = mbfl_convert_filter_get_vtbl(from, to); + + if (vtbl == NULL) { + vtbl = &vtbl_pass; + } /* allocate */ filter = (mbfl_convert_filter *)mbfl_malloc(sizeof(mbfl_convert_filter)); @@ -231,31 +307,39 @@ mbfl_convert_filter_new( return NULL; } - /* encoding structure */ - filter->from = mbfl_no2encoding(from); - filter->to = mbfl_no2encoding(to); - if (filter->from == NULL) { - filter->from = &mbfl_encoding_pass; - } - if (filter->to == NULL) { - filter->to = &mbfl_encoding_pass; + if (mbfl_convert_filter_common_init(filter, from, to, vtbl, + output_function, flush_function, data)) { + mbfl_free(filter); + return NULL; } - if (output_function != NULL) { - filter->output_function = output_function; - } else { - filter->output_function = mbfl_filter_output_null; + return filter; +} + +mbfl_convert_filter * +mbfl_convert_filter_new2( + const struct mbfl_convert_vtbl *vtbl, + int (*output_function)(int, void* ), + int (*flush_function)(void*), + void* data) +{ + mbfl_convert_filter * filter; + + if (vtbl == NULL) { + vtbl = &vtbl_pass; } - filter->flush_function = flush_function; - filter->data = data; - filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; - filter->illegal_substchar = 0x3f; /* '?' */ - /* setup the function table */ - mbfl_convert_filter_reset_vtbl(filter); + /* allocate */ + filter = (mbfl_convert_filter *)mbfl_malloc(sizeof(mbfl_convert_filter)); + if (filter == NULL) { + return NULL; + } - /* constructor */ - (*filter->filter_ctor)(filter); + if (mbfl_convert_filter_common_init(filter, vtbl->from, vtbl->to, vtbl, + output_function, flush_function, data)) { + mbfl_free(filter); + return NULL; + } return filter; } @@ -285,38 +369,32 @@ mbfl_convert_filter_flush(mbfl_convert_f void mbfl_convert_filter_reset(mbfl_convert_filter *filter, enum mbfl_no_encoding from, enum mbfl_no_encoding to) { + const struct mbfl_convert_vtbl *vtbl; + /* destruct old filter */ (*filter->filter_dtor)(filter); - /* resset filter member */ - filter->from = mbfl_no2encoding(from); - filter->to = mbfl_no2encoding(to); + vtbl = mbfl_convert_filter_get_vtbl(from, to); - /* set the vtbl */ - mbfl_convert_filter_reset_vtbl(filter); + if (vtbl == NULL) { + vtbl = &vtbl_pass; + } - /* construct new filter */ - (*filter->filter_ctor)(filter); + mbfl_convert_filter_common_init(filter, from, to, vtbl, + filter->output_function, filter->flush_function, filter->data); } void mbfl_convert_filter_copy( mbfl_convert_filter *src, - mbfl_convert_filter *dist) + mbfl_convert_filter *dest) { - dist->filter_ctor = src->filter_ctor; - dist->filter_dtor = src->filter_dtor; - dist->filter_function = src->filter_function; - dist->filter_flush = src->filter_flush; - dist->output_function = src->output_function; - dist->flush_function = src->flush_function; - dist->data = src->data; - dist->status = src->status; - dist->cache = src->cache; - dist->from = src->from; - dist->to = src->to; - dist->illegal_mode = src->illegal_mode; - dist->illegal_substchar = src->illegal_substchar; + if (src->filter_copy != NULL) { + src->filter_copy(src, dest); + return; + } + + *dest = *src; } int mbfl_convert_filter_devcat(mbfl_convert_filter *filter, mbfl_memory_device *src) @@ -349,22 +427,6 @@ int mbfl_convert_filter_strcat(mbfl_conv return 0; } -#if 0 -static int -mbfl_convert_filter_strncat(mbfl_convert_filter *filter, const unsigned char *p, - int n) -{ - while (n > 0) { - if ((*filter->filter_function)(*p++, filter) < 0) { - return -1; - } - n--; - } - - return n; -} -#endif - /* illegal character output function for conv-filter */ int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) @@ -428,10 +490,43 @@ mbfl_filt_conv_illegal_output(int c, mbf } } break; + case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY: + if (c >= 0) { + if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */ + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x"); + if (ret < 0) + break; + + m = 0; + r = 28; + while (r >= 0) { + n = (c >> r) & 0xf; + if (n || m) { + m = 1; + ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter); + if (ret < 0) { + break; + } + } + r -= 4; + } + if (ret < 0) { + break; + } + if (m == 0) { + ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter); + } + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";"); + } else { + ret = (*filter->filter_function)(filter->illegal_substchar, filter); + } + } + break; default: break; } filter->illegal_mode = mode_backup; + filter->num_illegalchar++; return ret; } @@ -446,8 +541,8 @@ const struct mbfl_convert_vtbl * mbfl_co to == mbfl_no_encoding_7bit) { from = mbfl_no_encoding_8bit; } else if (from == mbfl_no_encoding_base64 || - from == mbfl_no_encoding_qprint || - from == mbfl_no_encoding_uuencode) { + from == mbfl_no_encoding_qprint || + from == mbfl_no_encoding_uuencode) { to = mbfl_no_encoding_8bit; } @@ -461,22 +556,6 @@ const struct mbfl_convert_vtbl * mbfl_co return NULL; } - -static void mbfl_convert_filter_reset_vtbl(mbfl_convert_filter *filter) -{ - const struct mbfl_convert_vtbl *vtbl; - - vtbl = mbfl_convert_filter_get_vtbl(filter->from->no_encoding, filter->to->no_encoding); - if (vtbl == NULL) { - vtbl = &vtbl_pass; - } - - filter->filter_ctor = vtbl->filter_ctor; - filter->filter_dtor = vtbl->filter_dtor; - filter->filter_function = vtbl->filter_function; - filter->filter_flush = vtbl->filter_flush; -} - /* * commonly used constructor and destructor */ @@ -490,6 +569,10 @@ int mbfl_filt_conv_common_flush(mbfl_con { filter->status = 0; filter->cache = 0; + + if (filter->flush_function != NULL) { + (*filter->flush_function)(filter->data); + } return 0; } diff -Naurp libmbfl-1.0.1/mbfl/mbfl_convert.h libmbfl-1.0.1.oden/mbfl/mbfl_convert.h --- libmbfl-1.0.1/mbfl/mbfl_convert.h 2005-02-21 11:09:40.000000000 +0100 +++ libmbfl-1.0.1.oden/mbfl/mbfl_convert.h 2010-03-12 05:55:37.000000000 +0100 @@ -40,6 +40,7 @@ typedef struct _mbfl_convert_filter mbfl struct _mbfl_convert_filter { void (*filter_ctor)(mbfl_convert_filter *filter); void (*filter_dtor)(mbfl_convert_filter *filter); + void (*filter_copy)(mbfl_convert_filter *src, mbfl_convert_filter *dest); int (*filter_function)(int c, mbfl_convert_filter *filter); int (*filter_flush)(mbfl_convert_filter *filter); int (*output_function)(int c, void *data); @@ -51,6 +52,7 @@ struct _mbfl_convert_filter { const mbfl_encoding *to; int illegal_mode; int illegal_substchar; + int num_illegalchar; void *opaque; }; @@ -61,6 +63,7 @@ struct mbfl_convert_vtbl { void (*filter_dtor)(mbfl_convert_filter *filter); int (*filter_function)(int c, mbfl_convert_filter *filter); int (*filter_flush)(mbfl_convert_filter *filter); + void (*filter_copy)(mbfl_convert_filter *src, mbfl_convert_filter *dest); }; MBFLAPI extern const struct mbfl_convert_vtbl *mbfl_convert_filter_list[]; @@ -71,6 +74,11 @@ MBFLAPI extern mbfl_convert_filter *mbfl int (*output_function)(int, void *), int (*flush_function)(void *), void *data ); +MBFLAPI extern mbfl_convert_filter *mbfl_convert_filter_new2( + const struct mbfl_convert_vtbl *vtbl, + int (*output_function)(int, void *), + int (*flush_function)(void *), + void *data ); MBFLAPI extern void mbfl_convert_filter_delete(mbfl_convert_filter *filter); MBFLAPI extern int mbfl_convert_filter_feed(int c, mbfl_convert_filter *filter); MBFLAPI extern int mbfl_convert_filter_flush(mbfl_convert_filter *filter); diff -Naurp libmbfl-1.0.1/mbfl/mbfl_defs.h libmbfl-1.0.1.oden/mbfl/mbfl_defs.h --- libmbfl-1.0.1/mbfl/mbfl_defs.h 2003-08-25 03:15:33.000000000 +0200 +++ libmbfl-1.0.1.oden/mbfl/mbfl_defs.h 2008-07-20 20:26:02.000000000 +0200 @@ -44,9 +44,13 @@ #define MBFLAPI __declspec(dllexport) #else #define MBFLAPI __declspec(dllimport) -#endif +#endif /* MBFL_DLL_EXPORT */ +#else +#if defined(__GNUC__) && __GNUC__ >= 4 +#define MBFLAPI __attribute__((visibility("default"))) #else #define MBFLAPI -#endif +#endif /* defined(__GNUC__) && __GNUC__ >= 4 */ +#endif /* WIN32 */ #endif /* MBFL_DEFS_H */ diff -Naurp libmbfl-1.0.1/mbfl/mbfl_encoding.c libmbfl-1.0.1.oden/mbfl/mbfl_encoding.c --- libmbfl-1.0.1/mbfl/mbfl_encoding.c 2006-01-21 04:39:51.000000000 +0100 +++ libmbfl-1.0.1.oden/mbfl/mbfl_encoding.c 2010-03-12 05:55:37.000000000 +0100 @@ -57,16 +57,22 @@ #include "filters/mbfilter_euc_kr.h" #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" +#include "filters/mbfilter_sjis_open.h" +#include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" #include "filters/mbfilter_euc_jp.h" #include "filters/mbfilter_euc_jp_win.h" #include "filters/mbfilter_ascii.h" #include "filters/mbfilter_koi8r.h" +#include "filters/mbfilter_koi8u.h" #include "filters/mbfilter_cp866.h" #include "filters/mbfilter_cp932.h" #include "filters/mbfilter_cp936.h" #include "filters/mbfilter_cp1251.h" #include "filters/mbfilter_cp1252.h" +#include "filters/mbfilter_cp1254.h" +#include "filters/mbfilter_cp5022x.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" @@ -96,6 +102,7 @@ #include "filters/mbfilter_ucs2.h" #include "filters/mbfilter_htmlent.h" #include "filters/mbfilter_armscii8.h" +#include "filters/mbfilter_cp850.h" #ifndef HAVE_STRCASECMP #ifdef HAVE_STRICMP @@ -148,10 +155,14 @@ static const mbfl_encoding *mbfl_encodin &mbfl_encoding_euc_jp, &mbfl_encoding_sjis, &mbfl_encoding_eucjp_win, - &mbfl_encoding_sjis_win, + &mbfl_encoding_sjis_open, + &mbfl_encoding_cp932, + &mbfl_encoding_cp51932, &mbfl_encoding_jis, &mbfl_encoding_2022jp, + &mbfl_encoding_2022jpms, &mbfl_encoding_cp1252, + &mbfl_encoding_cp1254, &mbfl_encoding_8859_1, &mbfl_encoding_8859_2, &mbfl_encoding_8859_3, @@ -177,7 +188,14 @@ static const mbfl_encoding *mbfl_encodin &mbfl_encoding_cp1251, &mbfl_encoding_cp866, &mbfl_encoding_koi8r, + &mbfl_encoding_koi8u, &mbfl_encoding_armscii8, + &mbfl_encoding_cp850, + &mbfl_encoding_jis_ms, + &mbfl_encoding_cp50220, + &mbfl_encoding_cp50220raw, + &mbfl_encoding_cp50221, + &mbfl_encoding_cp50222, NULL }; diff -Naurp libmbfl-1.0.1/mbfl/mbfl_encoding.h libmbfl-1.0.1.oden/mbfl/mbfl_encoding.h --- libmbfl-1.0.1/mbfl/mbfl_encoding.h 2005-03-22 20:41:14.000000000 +0100 +++ libmbfl-1.0.1.oden/mbfl/mbfl_encoding.h 2010-03-12 05:55:37.000000000 +0100 @@ -68,11 +68,14 @@ enum mbfl_no_encoding { mbfl_no_encoding_euc_jp, mbfl_no_encoding_sjis, mbfl_no_encoding_eucjp_win, - mbfl_no_encoding_sjis_win, - mbfl_no_encoding_sjis_mac, + mbfl_no_encoding_sjis_open, + mbfl_no_encoding_cp932, + mbfl_no_encoding_cp51932, mbfl_no_encoding_jis, mbfl_no_encoding_2022jp, + mbfl_no_encoding_2022jpms, mbfl_no_encoding_cp1252, + mbfl_no_encoding_cp1254, mbfl_no_encoding_8859_1, mbfl_no_encoding_8859_2, mbfl_no_encoding_8859_3, @@ -97,8 +100,15 @@ enum mbfl_no_encoding { mbfl_no_encoding_cp1251, mbfl_no_encoding_cp866, mbfl_no_encoding_koi8r, + mbfl_no_encoding_koi8u, mbfl_no_encoding_8859_16, mbfl_no_encoding_armscii8, + mbfl_no_encoding_cp850, + mbfl_no_encoding_jis_ms, + mbfl_no_encoding_cp50220, + mbfl_no_encoding_cp50220raw, + mbfl_no_encoding_cp50221, + mbfl_no_encoding_cp50222, mbfl_no_encoding_charset_max }; diff -Naurp libmbfl-1.0.1/mbfl/mbfl_filter_output.c libmbfl-1.0.1.oden/mbfl/mbfl_filter_output.c --- libmbfl-1.0.1/mbfl/mbfl_filter_output.c 2002-12-20 20:36:28.000000000 +0100 +++ libmbfl-1.0.1.oden/mbfl/mbfl_filter_output.c 2010-03-12 05:55:37.000000000 +0100 @@ -41,9 +41,17 @@ int mbfl_filter_output_pipe(int c, void* return (*filter->filter_function)(c, filter); } +int mbfl_filter_output_pipe_flush(void *data) +{ + mbfl_convert_filter *filter = (mbfl_convert_filter*)data; + if (filter->filter_flush != NULL) { + return (*filter->filter_flush)(filter); + } + + return 0; +} + int mbfl_filter_output_null(int c, void* data) { return c; } - - diff -Naurp libmbfl-1.0.1/mbfl/mbfl_filter_output.h libmbfl-1.0.1.oden/mbfl/mbfl_filter_output.h --- libmbfl-1.0.1/mbfl/mbfl_filter_output.h 2003-08-25 03:15:33.000000000 +0200 +++ libmbfl-1.0.1.oden/mbfl/mbfl_filter_output.h 2010-03-12 05:55:37.000000000 +0100 @@ -32,6 +32,7 @@ #define MBFL_FILTER_OUTPUT_H MBFLAPI extern int mbfl_filter_output_pipe(int c, void* data); +MBFLAPI extern int mbfl_filter_output_pipe_flush(void* data); MBFLAPI extern int mbfl_filter_output_null(int c, void* data); #endif /* MBFL_FILTER_OUTPUT_H */ diff -Naurp libmbfl-1.0.1/mbfl/mbfl_ident.c libmbfl-1.0.1.oden/mbfl/mbfl_ident.c --- libmbfl-1.0.1/mbfl/mbfl_ident.c 2005-03-22 20:41:14.000000000 +0100 +++ libmbfl-1.0.1.oden/mbfl/mbfl_ident.c 2010-03-12 05:55:37.000000000 +0100 @@ -50,16 +50,22 @@ #include "filters/mbfilter_euc_kr.h" #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" +#include "filters/mbfilter_sjis_open.h" #include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" #include "filters/mbfilter_euc_jp.h" #include "filters/mbfilter_euc_jp_win.h" #include "filters/mbfilter_ascii.h" #include "filters/mbfilter_koi8r.h" +#include "filters/mbfilter_koi8u.h" #include "filters/mbfilter_cp866.h" #include "filters/mbfilter_cp932.h" #include "filters/mbfilter_cp936.h" #include "filters/mbfilter_cp1251.h" #include "filters/mbfilter_cp1252.h" +#include "filters/mbfilter_cp1254.h" +#include "filters/mbfilter_cp51932.h" +#include "filters/mbfilter_cp5022x.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" @@ -88,6 +94,7 @@ #include "filters/mbfilter_ucs2.h" #include "filters/mbfilter_htmlent.h" #include "filters/mbfilter_armscii8.h" +#include "filters/mbfilter_cp850.h" static const struct mbfl_identify_vtbl vtbl_identify_false = { mbfl_no_encoding_pass, @@ -102,10 +109,13 @@ static const struct mbfl_identify_vtbl * &vtbl_identify_ascii, &vtbl_identify_eucjp, &vtbl_identify_sjis, + &vtbl_identify_sjis_open, &vtbl_identify_eucjpwin, - &vtbl_identify_sjiswin, + &vtbl_identify_cp932, &vtbl_identify_jis, &vtbl_identify_2022jp, + &vtbl_identify_2022jpms, + &vtbl_identify_cp51932, &vtbl_identify_euccn, &vtbl_identify_cp936, &vtbl_identify_hz, @@ -117,7 +127,9 @@ static const struct mbfl_identify_vtbl * &vtbl_identify_cp1251, &vtbl_identify_cp866, &vtbl_identify_koi8r, + &vtbl_identify_koi8u, &vtbl_identify_cp1252, + &vtbl_identify_cp1254, &vtbl_identify_8859_1, &vtbl_identify_8859_2, &vtbl_identify_8859_3, @@ -132,6 +144,11 @@ static const struct mbfl_identify_vtbl * &vtbl_identify_8859_14, &vtbl_identify_8859_15, &vtbl_identify_armscii8, + &vtbl_identify_cp850, + &vtbl_identify_jis_ms, + &vtbl_identify_cp50220, + &vtbl_identify_cp50221, + &vtbl_identify_cp50222, &vtbl_identify_false, NULL }; diff -Naurp libmbfl-1.0.1/mbfl/mbfl_language.c libmbfl-1.0.1.oden/mbfl/mbfl_language.c --- libmbfl-1.0.1/mbfl/mbfl_language.c 2006-01-21 04:40:18.000000000 +0100 +++ libmbfl-1.0.1.oden/mbfl/mbfl_language.c 2008-07-05 08:52:04.000000000 +0200 @@ -57,6 +57,7 @@ #include "nls/nls_uni.h" #include "nls/nls_de.h" #include "nls/nls_ru.h" +#include "nls/nls_ua.h" #include "nls/nls_en.h" #include "nls/nls_hy.h" #include "nls/nls_tr.h" @@ -77,6 +78,7 @@ static const mbfl_language *mbfl_languag &mbfl_language_english, &mbfl_language_german, &mbfl_language_russian, + &mbfl_language_ukrainian, &mbfl_language_armenian, &mbfl_language_turkish, &mbfl_language_neutral, diff -Naurp libmbfl-1.0.1/mbfl/mbfl_language.h libmbfl-1.0.1.oden/mbfl/mbfl_language.h --- libmbfl-1.0.1/mbfl/mbfl_language.h 2006-01-21 04:40:34.000000000 +0100 +++ libmbfl-1.0.1.oden/mbfl/mbfl_language.h 2008-07-05 08:52:04.000000000 +0200 @@ -57,6 +57,7 @@ enum mbfl_no_language { mbfl_no_language_simplified_chinese, /* zh-cn */ mbfl_no_language_traditional_chinese, /* zh-tw */ mbfl_no_language_russian, /* ru */ + mbfl_no_language_ukrainian, /* ua */ mbfl_no_language_armenian, /* hy */ mbfl_no_language_turkish, /* tr */ mbfl_no_language_max diff -Naurp libmbfl-1.0.1/mbfl/mk_eaw_tbl.awk libmbfl-1.0.1.oden/mbfl/mk_eaw_tbl.awk diff -Naurp libmbfl-1.0.1/nls/Makefile.am libmbfl-1.0.1.oden/nls/Makefile.am --- libmbfl-1.0.1/nls/Makefile.am 2005-03-22 20:41:14.000000000 +0100 +++ libmbfl-1.0.1.oden/nls/Makefile.am 2010-03-12 05:55:37.000000000 +0100 @@ -1,5 +1,25 @@ -EXTRA_DIST=Makefile.bcc32 noinst_LTLIBRARIES=libmbfl_nls.la INCLUDES=-I../mbfl libmbfl_nls_la_LDFLAGS=-version-info $(SHLIB_VERSION) -libmbfl_nls_la_SOURCES=nls_ja.c nls_de.c nls_en.c nls_hy.c nls_kr.c nls_ru.c nls_zh.c nls_uni.c nls_neutral.c nls_ja.h nls_de.h nls_en.h nls_hy.h nls_kr.h nls_ru.h nls_zh.h nls_uni.h nls_neutral.h +libmbfl_nls_la_SOURCES=nls_ja.c \ + nls_de.c \ + nls_en.c \ + nls_hy.c \ + nls_tr.c \ + nls_kr.c \ + nls_ru.c \ + nls_ua.c \ + nls_zh.c \ + nls_uni.c \ + nls_neutral.c \ + nls_ja.h \ + nls_de.h \ + nls_en.h \ + nls_hy.h \ + nls_tr.h \ + nls_kr.h \ + nls_ru.h \ + nls_ua.h \ + nls_zh.h \ + nls_uni.h \ + nls_neutral.h diff -Naurp libmbfl-1.0.1/nls/nls_hy.c libmbfl-1.0.1.oden/nls/nls_hy.c --- libmbfl-1.0.1/nls/nls_hy.c 2005-03-22 20:41:14.000000000 +0100 +++ libmbfl-1.0.1.oden/nls/nls_hy.c 2008-02-16 09:32:26.000000000 +0100 @@ -11,7 +11,7 @@ const mbfl_language mbfl_language_armenian = { mbfl_no_language_armenian , - "Armenian ", + "Armenian", "hy", NULL, mbfl_no_encoding_armscii8, diff -Naurp libmbfl-1.0.1/nls/nls_ru.c libmbfl-1.0.1.oden/nls/nls_ru.c --- libmbfl-1.0.1/nls/nls_ru.c 2002-12-24 19:28:44.000000000 +0100 +++ libmbfl-1.0.1.oden/nls/nls_ru.c 2008-07-05 08:52:04.000000000 +0200 @@ -1,20 +1,20 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef HAVE_STDDEF_H -#include <stddef.h> -#endif - -#include "mbfilter.h" -#include "nls_ru.h" - -const mbfl_language mbfl_language_russian = { - mbfl_no_language_russian, - "Russian", - "ru", - NULL, - mbfl_no_encoding_koi8r, - mbfl_no_encoding_qprint, - mbfl_no_encoding_8bit -}; +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#include "mbfilter.h" +#include "nls_ru.h" + +const mbfl_language mbfl_language_russian = { + mbfl_no_language_russian, + "Russian", + "ru", + NULL, + mbfl_no_encoding_koi8r, + mbfl_no_encoding_qprint, + mbfl_no_encoding_8bit +}; diff -Naurp libmbfl-1.0.1/nls/nls_ru.h libmbfl-1.0.1.oden/nls/nls_ru.h --- libmbfl-1.0.1/nls/nls_ru.h 2002-12-07 20:20:44.000000000 +0100 +++ libmbfl-1.0.1.oden/nls/nls_ru.h 2007-09-26 17:44:16.000000000 +0200 @@ -1,9 +1,9 @@ -#ifndef MBFL_NLS_RU_H -#define MBFL_NLS_RU_H - -#include "mbfilter.h" -#include "nls_ru.h" - -extern const mbfl_language mbfl_language_russian; - -#endif /* MBFL_NLS_RU_H */ +#ifndef MBFL_NLS_RU_H +#define MBFL_NLS_RU_H + +#include "mbfilter.h" +#include "nls_ru.h" + +extern const mbfl_language mbfl_language_russian; + +#endif /* MBFL_NLS_RU_H */ diff -Naurp libmbfl-1.0.1/nls/nls_tr.c libmbfl-1.0.1.oden/nls/nls_tr.c --- libmbfl-1.0.1/nls/nls_tr.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/nls/nls_tr.c 2007-09-26 17:44:16.000000000 +0200 @@ -0,0 +1,21 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#include "mbfilter.h" +#include "nls_tr.h" + +const mbfl_language mbfl_language_turkish = { + mbfl_no_language_turkish, + "Turkish", + "tr", + NULL, + mbfl_no_encoding_8859_9, + mbfl_no_encoding_qprint, + mbfl_no_encoding_8bit +}; + diff -Naurp libmbfl-1.0.1/nls/nls_tr.h libmbfl-1.0.1.oden/nls/nls_tr.h --- libmbfl-1.0.1/nls/nls_tr.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/nls/nls_tr.h 2007-09-26 17:44:16.000000000 +0200 @@ -0,0 +1,8 @@ +#ifndef MBFL_NLS_TR_H +#define MBFL_NLS_TR_H + +#include "mbfilter.h" + +extern const mbfl_language mbfl_language_turkish; + +#endif /* MBFL_NLS_TR_H */ diff -Naurp libmbfl-1.0.1/nls/nls_ua.c libmbfl-1.0.1.oden/nls/nls_ua.c --- libmbfl-1.0.1/nls/nls_ua.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/nls/nls_ua.c 2008-07-05 08:52:04.000000000 +0200 @@ -0,0 +1,22 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + + +#include "mbfilter.h" +#include "nls_ua.h" + +const mbfl_language mbfl_language_ukrainian = { + mbfl_no_language_ukrainian, + "Ukrainian", + "ua", + NULL, + mbfl_no_encoding_koi8u, + mbfl_no_encoding_qprint, + mbfl_no_encoding_8bit +}; diff -Naurp libmbfl-1.0.1/nls/nls_ua.h libmbfl-1.0.1.oden/nls/nls_ua.h --- libmbfl-1.0.1/nls/nls_ua.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl-1.0.1.oden/nls/nls_ua.h 2008-07-05 08:52:04.000000000 +0200 @@ -0,0 +1,9 @@ +#ifndef MBFL_NLS_UA_H +#define MBFL_NLS_UA_H + +#include "mbfilter.h" +#include "nls_ua.h" + +extern const mbfl_language mbfl_language_ukrainian; + +#endif /* MBFL_NLS_UA_H */