This patch describes the changes made in ActivePerl build 805 over the official Perl v5.8.0 sources from CPAN. Summary of changes in build 805: * Make "perl -V" output reflect ActiveState build. * Add Win32::BuildNumber() for compatibility. * Add resources to perl.exe and perl58.dll. Detailed logs are at the end of this file. The ActivePerl Release Notes contain an informal summary of these changes. These can be viewed at: http://www.ActiveState.com/ActivePerl/docs/CHANGES.html The included patch may be applied to Perl v5.8.0 sources using the GNU patch utility. e.g: % cd perl-5.8.0 % patch -lNp1 < this_file --------------------------------------------------------------------------- diff -ruN perl-5.8.0/BuildInfo.h AP805_source/BuildInfo.h --- perl-5.8.0/BuildInfo.h Wed Dec 31 16:00:00 1969 +++ AP805_source/BuildInfo.h Tue Feb 4 23:02:56 2003 @@ -0,0 +1,25 @@ +/* BuildInfo.h + * + * Copyright (c) 1998-2002 ActiveState Corp. All rights reserved. + * + */ + +#ifndef ___BuildInfo__h___ +#define ___BuildInfo__h___ + +#define PRODUCT_BUILD_NUMBER "805" +#define PERLFILEVERSION "5,8,0,805\0" +#define PERLRC_VERSION 5,8,0,805 +#define ACTIVEPERL_CHANGELIST "" +#define PERLPRODUCTVERSION "Build " PRODUCT_BUILD_NUMBER ACTIVEPERL_CHANGELIST "\0" +#define PERLPRODUCTNAME "ActivePerl\0" + +#define PERL_VENDORLIB_NAME "ActiveState" + +#define ACTIVEPERL_VERSION "Built " __TIME__ " " __DATE__ "\n" +#define ACTIVEPERL_LOCAL_PATCHES_ENTRY "ActivePerl Build " PRODUCT_BUILD_NUMBER ACTIVEPERL_CHANGELIST +#define BINARY_BUILD_NOTICE printf("\n\ +Binary build " PRODUCT_BUILD_NUMBER ACTIVEPERL_CHANGELIST " provided by ActiveState Corp. http://www.ActiveState.com\n\ +" ACTIVEPERL_VERSION "\n"); + +#endif /* ___BuildInfo__h___ */ diff -ruN perl-5.8.0/Configure AP805_source/Configure --- perl-5.8.0/Configure Thu Jul 18 15:55:15 2002 +++ AP805_source/Configure Tue Feb 4 23:03:06 2003 @@ -9378,6 +9378,8 @@ eval $inlibc : Look for GNU-cc style attribute checking +case "$d_attribut" in +'') echo " " echo "Checking whether your compiler can handle __attribute__ ..." >&4 $cat >attrib.c <<'EOCP' @@ -9396,6 +9398,9 @@ echo "Your C compiler doesn't seem to understand __attribute__ at all." val="$undef" fi +;; +*) val="$d_attribut" ;; +esac set d_attribut eval $setvar $rm -f attrib* diff -ruN perl-5.8.0/INSTALL AP805_source/INSTALL --- perl-5.8.0/INSTALL Thu Jul 18 10:03:49 2002 +++ AP805_source/INSTALL Tue Feb 4 23:03:06 2003 @@ -1550,10 +1550,22 @@ installed. It installs a /usr/local/include/arpa/inet.h that refers to these symbols. Versions of BIND later than 8.1 do not install inet.h in that location and avoid the errors. You should probably update to a -newer version of BIND. If you can't, you can either link with the -updated resolver library provided with BIND 8.1 or rename -/usr/local/bin/arpa/inet.h during the Perl build and test process to -avoid the problem. +newer version of BIND (and remove the files the old one left behind). +If you can't, you can either link with the updated resolver library provided +with BIND 8.1 or rename /usr/local/bin/arpa/inet.h during the Perl build and +test process to avoid the problem. + +=item *_r() prototype NOT found + +On a related note, if you see a bunch of complaints like the above about +reentrant functions - specifically networking-related ones - being present +but without prototypes available, check to see if BIND 8.1 (or possibly +other BIND 8 versions) is (or has been) installed. They install +header files such as netdb.h into places such as /usr/local/include (or into +another directory as specified at build/install time), at least optionally. +Remove them or put them in someplace that isn't in the C preprocessor's +header file include search path (determined by -I options plus defaults, +normally /usr/include). =item #error "No DATAMODEL_NATIVE specified" diff -ruN perl-5.8.0/MANIFEST AP805_source/MANIFEST --- perl-5.8.0/MANIFEST Thu Jul 18 13:05:05 2002 +++ AP805_source/MANIFEST Tue Feb 4 23:03:06 2003 @@ -7,6 +7,7 @@ beos/beos.c BeOS port beos/beosish.h BeOS port beos/nm.c BeOS port +BuildInfo.h ActivePerl build information bytecode.pl Produces ext/ByteLoader/byterun.h, ext/ByteLoader/byterun.c and ext/B/Asmdata.pm cc_runtime.h Macros need by runtime of compiler-generated code cflags.SH A script that emits C compilation flags per file @@ -202,41 +203,42 @@ ext/DynaLoader/README Dynamic Loader notes and intro ext/DynaLoader/XSLoader_pm.PL Simple XS Loader perl module ext/Encode/AUTHORS List of authors -ext/Encode/bin/enc2xs Encode module generator -ext/Encode/bin/piconv iconv by perl +ext/Encode/bin/enc2xs Encode module generator +ext/Encode/bin/piconv iconv by perl ext/Encode/bin/ucm2table Table Generator for testing -ext/Encode/bin/ucmlint A UCM Lint utility -ext/Encode/bin/unidump Unicode Dump like hexdump(1) -ext/Encode/Byte/Byte.pm Encode extension -ext/Encode/Byte/Makefile.PL Encode extension +ext/Encode/bin/ucmlint A UCM Lint utility +ext/Encode/bin/ucmsort A UCM sort utility +ext/Encode/bin/unidump Unicode Dump like hexdump(1) +ext/Encode/Byte/Byte.pm Encode extension +ext/Encode/Byte/Makefile.PL Encode extension ext/Encode/Changes Change Log ext/Encode/CN/CN.pm Encode extension ext/Encode/CN/Makefile.PL Encode extension -ext/Encode/EBCDIC/EBCDIC.pm Encode extension -ext/Encode/EBCDIC/Makefile.PL Encode extension +ext/Encode/EBCDIC/EBCDIC.pm Encode extension +ext/Encode/EBCDIC/Makefile.PL Encode extension ext/Encode/encengine.c Encode extension -ext/Encode/Encode.pm Mother of all Encode extensions +ext/Encode/Encode.pm Mother of all Encode extensions ext/Encode/Encode.xs Encode extension -ext/Encode/Encode/Changes.e2x Skeleton file for enc2xs +ext/Encode/Encode/Changes.e2x Skeleton file for enc2xs ext/Encode/Encode/ConfigLocal_PM.e2x Skeleton file for enc2xs -ext/Encode/Encode/encode.h Encode extension header file +ext/Encode/Encode/encode.h Encode extension header file ext/Encode/Encode/Makefile_PL.e2x Skeleton file for enc2xs -ext/Encode/Encode/README.e2x Skeleton file for enc2xs -ext/Encode/Encode/_PM.e2x Skeleton file for enc2xs -ext/Encode/Encode/_T.e2x Skeleton file for enc2xs -ext/Encode/encoding.pm Perl Pragmactic Module +ext/Encode/Encode/README.e2x Skeleton file for enc2xs +ext/Encode/Encode/_PM.e2x Skeleton file for enc2xs +ext/Encode/Encode/_T.e2x Skeleton file for enc2xs +ext/Encode/encoding.pm Perl Pragmatic Module ext/Encode/JP/JP.pm Encode extension ext/Encode/JP/Makefile.PL Encode extension ext/Encode/KR/KR.pm Encode extension -ext/Encode/KR/Makefile.PL Encode extension -ext/Encode/lib/Encode/Alias.pm Encode extension +ext/Encode/KR/Makefile.PL Encode extension +ext/Encode/lib/Encode/Alias.pm Encode extension ext/Encode/lib/Encode/CJKConstants.pm Encode extension ext/Encode/lib/Encode/CN/HZ.pm Encode extension -ext/Encode/lib/Encode/Config.pm Encode configuration module -ext/Encode/lib/Encode/Encoder.pm OO Encoder +ext/Encode/lib/Encode/Config.pm Encode configuration module +ext/Encode/lib/Encode/Encoder.pm OO Encoder ext/Encode/lib/Encode/Encoding.pm Encode extension ext/Encode/lib/Encode/Guess.pm Encode Extension -ext/Encode/lib/Encode/JP/H2Z.pm Encode extension +ext/Encode/lib/Encode/JP/H2Z.pm Encode extension ext/Encode/lib/Encode/JP/JIS7.pm Encode extension ext/Encode/lib/Encode/KR/2022_KR.pm Encode extension ext/Encode/lib/Encode/MIME/Header.pm Encode extension @@ -245,37 +247,38 @@ ext/Encode/Makefile.PL Encode extension makefile writer ext/Encode/MANIFEST Encode extension ext/Encode/README Encode extension -ext/Encode/Symbol/Makefile.PL Encode extension -ext/Encode/Symbol/Symbol.pm Encode extension -ext/Encode/t/Aliases.t test script -ext/Encode/t/at-cn.t test script -ext/Encode/t/at-tw.t test script +ext/Encode/Symbol/Makefile.PL Encode extension +ext/Encode/Symbol/Symbol.pm Encode extension +ext/Encode/t/Aliases.t test script +ext/Encode/t/at-cn.t test script +ext/Encode/t/at-tw.t test script ext/Encode/t/big5-eten.enc test data ext/Encode/t/big5-eten.utf test data ext/Encode/t/big5-hkscs.enc test data ext/Encode/t/big5-hkscs.utf test data -ext/Encode/t/CJKT.t test script -ext/Encode/t/Encode.t test script -ext/Encode/t/Encoder.t test script -ext/Encode/t/encoding.t test script -ext/Encode/t/fallback.t test script -ext/Encode/t/gb2312.enc test data -ext/Encode/t/gb2312.utf test data -ext/Encode/t/grow.t test script -ext/Encode/t/guess.t test script +ext/Encode/t/CJKT.t test script +ext/Encode/t/Encode.t test script +ext/Encode/t/Encoder.t test script +ext/Encode/t/encoding.t test script +ext/Encode/t/fallback.t test script +ext/Encode/t/gb2312.enc test data +ext/Encode/t/gb2312.utf test data +ext/Encode/t/grow.t test script +ext/Encode/t/guess.t test script ext/Encode/t/jisx0201.enc test data ext/Encode/t/jisx0201.utf test data ext/Encode/t/jisx0208.enc test data ext/Encode/t/jisx0208.utf test data ext/Encode/t/jisx0212.enc test data ext/Encode/t/jisx0212.utf test data -ext/Encode/t/jperl.t test script +ext/Encode/t/jperl.t test script ext/Encode/t/ksc5601.enc test data ext/Encode/t/ksc5601.utf test data ext/Encode/t/mime-header.t test script -ext/Encode/t/perlio.t test script +ext/Encode/t/perlio.t test script +ext/Encode/t/rt.pl test script ext/Encode/t/unibench.pl benchmark script -ext/Encode/t/Unicode.t test script +ext/Encode/t/Unicode.t test script ext/Encode/TW/Makefile.PL Encode extension ext/Encode/TW/TW.pm Encode extension ext/Encode/ucm/8859-1.ucm Unicode Character Map @@ -569,6 +572,7 @@ ext/re/re.xs re extension external subroutines ext/Safe/safe1.t See if Safe works ext/Safe/safe2.t See if Safe works +ext/Safe/safe3.t See if Safe works ext/SDBM_File/Makefile.PL SDBM extension makefile writer ext/SDBM_File/sdbm.t See if SDBM_File works ext/SDBM_File/sdbm/biblio SDBM kit @@ -2325,6 +2329,7 @@ t/comp/hints.t See if %^H works t/comp/multiline.t See if multiline strings work t/comp/package.t See if packages work +t/comp/parser.t See if the parser works in edge cases t/comp/proto.t See if function prototypes work t/comp/redef.t See if we get correct warnings on redefined subs t/comp/require.t See if require works @@ -2545,12 +2550,14 @@ t/op/reverse.t See if reverse operator works t/op/re_tests Regular expressions for regexp.t t/op/runlevel.t See if die() works from perl_call_*() +t/op/sig.t See if signals work t/op/sleep.t See if sleep works t/op/sort.t See if sort works t/op/splice.t See if splice works t/op/split.t See if split works t/op/sprintf.t See if sprintf works t/op/srand.t See if srand works +t/op/stash.t See if %:: stashes work t/op/stat.t See if stat works t/op/study.t See if study works t/op/subst.t See if substitution works @@ -2631,6 +2638,10 @@ t/uni/lower.t See if Unicode casing works t/uni/sprintf.t See if Unicode sprintf works t/uni/title.t See if Unicode casing works +t/uni/tr_7jis.t See if Unicode tr/// works +t/uni/tr_eucjp.t See if Unicode tr/// works +t/uni/tr_sjis.t See if Unicode tr/// works +t/uni/tr_utf8.t See if Unicode tr/// works t/uni/upper.t See if Unicode casing works t/win32/longpath.t Test if Win32::GetLongPathName() works t/win32/system.t See if system works in Win* @@ -2742,6 +2753,7 @@ win32/des_fcrypt.patch Win32 port win32/distclean.bat Remove _ALL_ files not listed here in MANIFEST win32/dl_win32.xs Win32 port +win32/fcrypt.c Eric Young's crypt() implementation win32/FindExt.pm Scan for extensions win32/genmk95.pl Perl code to generate command.com-usable makefile.95 win32/include/arpa/inet.h Win32 port @@ -2751,6 +2763,8 @@ win32/Makefile Win32 makefile for NMAKE (Visual C++ build) win32/makefile.mk Win32 makefile for DMAKE (BC++, VC++ builds) win32/mdelete.bat multifile delete +win32/perldll.rc Resource file for perl5X.dll +win32/perlexe.rc Resource file for perl.exe win32/perlglob.c Win32 port win32/perlhost.h Perl "host" implementation win32/perllib.c Win32 port diff -ruN perl-5.8.0/Porting/apply AP805_source/Porting/apply --- perl-5.8.0/Porting/apply Sat Jun 1 10:02:48 2002 +++ AP805_source/Porting/apply Tue Feb 4 23:03:07 2003 @@ -2,7 +2,7 @@ my $file = pop(@ARGV); my %meta; $ENV{'P4PORT'} = 'bactrian:1667'; -$ENV{'P4CLIENT'} = 'camel-linux'; +$ENV{'P4CLIENT'} = 'ni-s'; open(FILE,$file) || die "Cannot open $file:$!"; while () { @@ -69,3 +69,4 @@ _exit(exec $cmd); } } + diff -ruN perl-5.8.0/README.aix AP805_source/README.aix --- perl-5.8.0/README.aix Tue Jun 4 17:54:57 2002 +++ AP805_source/README.aix Tue Feb 4 23:03:07 2003 @@ -170,6 +170,10 @@ Follow the messages ... and you're done. +If you like a more web-like approach, a good start point can be +http://www14.software.ibm.com/webapp/download/downloadaz.jsp and click +"C for AIX", and follow the instructions. + =head2 Using GNU's gcc for building perl Using gcc-3.0 (tested with 3.0.4) now works out of the box, as do diff -ruN perl-5.8.0/README.hpux AP805_source/README.hpux --- perl-5.8.0/README.hpux Tue Jun 4 19:46:41 2002 +++ AP805_source/README.hpux Tue Feb 4 23:03:07 2003 @@ -38,6 +38,24 @@ If you perform a new installation, then Perl will be installed automatically. +=head2 Using perl from HP's porting centre + +HP porting centre tries very hard to keep up with customer demand and +release updates from the Open Source community. Having precompiled +Perl binaries available is obvious. + +The HP porting centres are limited in what systems they are allowed +to port to and they usually choose the two most recent OS versions +available. This means that at the moment of writing, there are only +HPUX-11.00 and 11-20/22 (IA64) ports available on the porting centres. + +HP has asked the porting centre to move Open Source binaries +from /opt to /usr/local, so binaries produced since the start +of July 2002 are located in /usr/local. + +One of HP porting centres URL's is http://hpux.connect.org.uk/ +The port currently available is built with GNU gcc. + =head2 Compiling Perl 5 on HP-UX When compiling Perl, you must use an ANSI C compiler. The C compiler @@ -64,6 +82,17 @@ The most recent version of PA-RISC at the time of this document's last update is 2.0. +A complete list of models at the time the OS was built is in the file +/usr/sam/lib/mo/sched.models. The first column corresponds to the last +part of the output of the "model" command. The second column is the +PA-RISC version and the third column is the exact chip type used. +(Start browsing at the bottom to prevent confusion ;-) + + # model + 9000/800/L1000-44 + # grep L1000-44 /usr/sam/lib/mo/sched.models + L1000-44 2.0 PA8500 + =head2 PA-RISC 1.0 The original version of PA-RISC, HP no longer sells any system with this chip. @@ -105,9 +134,18 @@ D280, D370, D380, D390, D650, J220, J2240, J280, J282, J400, J410, J5000, J5500XM, J5600, J7000, J7600, K250, K260, K260-EG, K270, K360, K370, K380, K450, K460, K460-EG, K460-XP, K470, K570, K580, L1000, - L2000, L3000, N4000, R380, R390, RP2400, RP2430, RP2450, RP2470, - RP5400, RP5430, RP5450, RP5470, RP7400, RP7410, RP8400, SD16000, - SD32000, SD64000, T540, T600, V2000, V2200, V2250, V2500, V2600 + L2000, L3000, N4000, R380, R390, RP2400, RP2405, RP2430, RP2450, + RP2470, RP5400, RP5405, RP5430, RP5450, RP5470, RP7400, RP7405, + RP7410, RP8400, SD16000, SD32000, SD64000, T540, T600, V2000, V2200, + V2250, V2500, V2600 + +Just before HP took over Compaq, some systems were renamed. Visit +http://www.hp.com/products1/servers/server_names.html to see what +the changes are, or will be. + + HP 9000 A-Class servers, now renamed HP Server rp2400 series. + HP 9000 L-Class servers, now renamed HP Server rp5400 series. + HP 9000 N-Class servers, now renamed HP Server rp7400. =head2 Itanium @@ -115,13 +153,7 @@ date of this document's last update, the following systems contain Itanium chips (this is very likely to be out of date): - RX4610, RX9610 - -A complete list of models at the time the OS was built is in the file -/opt/langtools/lib/sched.models. The first column corresponds to the -output of the "uname -m" command (without the leading "9000/"). The -second column is the PA-RISC version and the third column is the exact -chip type used. (Start browsing at the bottom to prevent confusion ;-) + RX2600, RX4610, RX5670, RX9610 =head2 Portability Between PA-RISC Versions @@ -266,6 +298,12 @@ find the GNU binutils package. (Browse through the list, because there are often multiple versions of the same package available). +Above mentioned distributions are depots. H.Merijn Brand has made prebuilt +gcc binaries available on https://www.beepz.com/personal/merijn/ for +HP-UX 10.20 and HP-UX 11.00 in both 32- and 64-bit versions. These are +bzipped tar archives that also include recent GNU binutils and GNU gdb. +Read the instructions on that page to rebuild gcc using itself. + Building a 64bit capable gcc from source is possible only when you have the HP C-ANSI C compiler available, which you should use anyway when building perl. @@ -457,8 +495,8 @@ #0 0xc004216c in () from /usr/lib/libc.2 #1 0xc00d7550 in __nss_src_state_destr () from /usr/lib/libc.2 #2 0xc00d7768 in __nss_src_state_destr () from /usr/lib/libc.2 - #3 0xc00d78a8 in nss_delete () from /usr/lib/libc.2 - #4 0xc01126d8 in endpwent () from /usr/lib/libc.2 + #3 0xc00d78a8 in nss_delete () from /usr/lib/libc.2 + #4 0xc01126d8 in endpwent () from /usr/lib/libc.2 #5 0xd1950 in Perl_pp_epwent () from ./perl #6 0x94d3c in Perl_runops_standard () from ./perl #7 0x23728 in S_run_body () from ./perl @@ -469,7 +507,7 @@ bug seems to be to create add to the file F (at least) the following lines - group: files + group: files passwd: files Whether you are using NIS does not matter. Amazingly enough, @@ -484,6 +522,6 @@ =head1 DATE -Version 0.6.6: 2002-05-30 +Version 0.6.7: 2002-09-05 =cut diff -ruN perl-5.8.0/README.win32 AP805_source/README.win32 --- perl-5.8.0/README.win32 Tue Feb 4 23:10:06 2003 +++ AP805_source/README.win32 Tue Feb 4 23:03:08 2003 @@ -138,15 +138,26 @@ Perl. Make sure you are building within one of the "Build Environment" shells available after you install the Platform SDK from the Start Menu. -=item Mingw32 with GCC +=item MinGW32 with gcc -GCC-2.95.2 binaries can be downloaded from: +The latest release of MinGW (at the time of writing) is 2.0.0, which comes +with gcc-3.2, and can be downloaded here: - ftp://ftp.xraylith.wisc.edu/pub/khan/gnu-win32/mingw32/ + http://sourceforge.net/projects/mingw + +Perl compiles with earlier releases of gcc (2.95 and up) that can be +downloaded from the same place. If you use gcc-3.2, comment out the +line: + + USE_GCC_V3_2 *= define + +in win32\makefile.mk You also need dmake. See L above on how to get it. -The GCC-2.95.2 bundle comes with Mingw32 libraries and headers. +=item MinGW release 1 + +The MinGW-1.1 bundle comes with gcc-2.95.3. Make sure you install the binaries that work with MSVCRT.DLL as indicated in the README for the GCC bundle. You may need to set up a few environment @@ -694,6 +705,14 @@ have to set the PAGER environment variable to use a specific pager. "perldoc -f foo" will print information about the perl operator "foo". + +One common mistake when using this port with a GUI library like C +is assuming that Perl's normal behavior of opening a command-line +window will go away. This isn't the case. If you want to start a copy +of C without opening a command-line window, use the C +executable built during the installation process. Usage is exactly +the same as normal C on Win32, except that options like C<-h> +don't work (since they need a command-line window to print to). If you find bugs in perl, you can run C to create a bug report (you may have to send it manually if C cannot diff -ruN perl-5.8.0/XSUB.h AP805_source/XSUB.h --- perl-5.8.0/XSUB.h Sat Jun 1 15:19:47 2002 +++ AP805_source/XSUB.h Tue Feb 4 23:03:08 2003 @@ -339,9 +339,9 @@ # define putenv PerlEnv_putenv # define getenv PerlEnv_getenv # define uname PerlEnv_uname -# define stdin PerlSIO_stdin() -# define stdout PerlSIO_stdout() -# define stderr PerlSIO_stderr() +# define stdin PerlSIO_stdin +# define stdout PerlSIO_stdout +# define stderr PerlSIO_stderr # define fopen PerlIO_open # define fclose PerlIO_close # define feof PerlIO_eof @@ -360,8 +360,8 @@ # define setbuf PerlSIO_setbuf # define setvbuf PerlSIO_setvbuf # define setlinebuf PerlSIO_setlinebuf -# define stdoutf PerlIO_stdoutf -# define vfprintf PerlIO_vprintf +# define stdoutf PerlSIO_stdoutf +# define vfprintf PerlSIO_vprintf # define ftell PerlIO_tell # define fseek PerlIO_seek # define fgetpos PerlIO_getpos diff -ruN perl-5.8.0/doio.c AP805_source/doio.c --- perl-5.8.0/doio.c Tue Jul 9 13:57:22 2002 +++ AP805_source/doio.c Tue Feb 4 23:03:08 2003 @@ -325,6 +325,7 @@ } if (num_svs && (SvIOK(*svp) || (SvPOK(*svp) && looks_like_number(*svp)))) { fd = SvUV(*svp); + num_svs = 0; } else if (isDIGIT(*type)) { /*SUPPRESS 530*/ @@ -920,8 +921,8 @@ if (PerlProc_pipe(fd) < 0) goto badexit; - IoIFP(rstio) = PerlIO_fdopen(fd[0], "r"); - IoOFP(wstio) = PerlIO_fdopen(fd[1], "w"); + IoIFP(rstio) = PerlIO_fdopen(fd[0], "r"PIPESOCK_MODE); + IoOFP(wstio) = PerlIO_fdopen(fd[1], "w"PIPESOCK_MODE); IoOFP(rstio) = IoIFP(rstio); IoIFP(wstio) = IoOFP(wstio); IoTYPE(rstio) = IoTYPE_RDONLY; @@ -1519,7 +1520,7 @@ while (*t && isSPACE(*t)) ++t; - if (!*t && (dup2(1,2) != -1)) { + if (!*t && (PerlLIO_dup2(1,2) != -1)) { s[-2] = '\0'; break; } diff -ruN perl-5.8.0/dump.c AP805_source/dump.c --- perl-5.8.0/dump.c Tue Jun 18 13:26:48 2002 +++ AP805_source/dump.c Tue Feb 4 23:03:09 2003 @@ -843,13 +843,15 @@ if (mg->mg_flags) { Perl_dump_indent(aTHX_ level, file, " MG_FLAGS = 0x%02X\n", mg->mg_flags); - if (mg->mg_flags & MGf_TAINTEDDIR) + if (mg->mg_type == PERL_MAGIC_envelem && + mg->mg_flags & MGf_TAINTEDDIR) Perl_dump_indent(aTHX_ level, file, " TAINTEDDIR\n"); if (mg->mg_flags & MGf_REFCOUNTED) Perl_dump_indent(aTHX_ level, file, " REFCOUNTED\n"); if (mg->mg_flags & MGf_GSKIP) Perl_dump_indent(aTHX_ level, file, " GSKIP\n"); - if (mg->mg_flags & MGf_MINMATCH) + if (mg->mg_type == PERL_MAGIC_regex_global && + mg->mg_flags & MGf_MINMATCH) Perl_dump_indent(aTHX_ level, file, " MINMATCH\n"); } if (mg->mg_obj) { @@ -1020,7 +1022,7 @@ sv_catpv(d, "TYPED,"); break; } - if (SvPOK(sv) && SvUTF8(sv)) + if ((SvPOK(sv) || SvPOKp(sv)) && SvUTF8(sv)) sv_catpv(d, "UTF8"); if (*(SvEND(d) - 1) == ',') diff -ruN perl-5.8.0/ext/B/B/Concise.pm AP805_source/ext/B/B/Concise.pm --- perl-5.8.0/ext/B/B/Concise.pm Sat Jun 1 10:02:52 2002 +++ AP805_source/ext/B/B/Concise.pm Tue Feb 4 23:03:10 2003 @@ -534,7 +534,7 @@ # Why these are different for MacOS? Does it matter? my $cop_seq_mnum = $^O eq 'MacOS' ? 12 : 11; -my $seq_mnum = $^O eq 'MacOS' ? 100 : 84; +my $seq_mnum = $^O eq 'MacOS' ? 102 : 86; $cop_seq_base = svref_2object(eval 'sub{0;}')->START->cop_seq + $cop_seq_mnum; $seq_base = svref_2object(eval 'sub{}')->START->seq + $seq_mnum; diff -ruN perl-5.8.0/ext/B/B.pm AP805_source/ext/B/B.pm --- perl-5.8.0/ext/B/B.pm Sat Jun 1 10:02:52 2002 +++ AP805_source/ext/B/B.pm Tue Feb 4 23:03:09 2003 @@ -236,7 +236,7 @@ package B::Section; my $output_fh; my %sections; - + sub new { my ($class, $section, $symtable, $default) = @_; $output_fh ||= FileHandle->new_tmpfile; @@ -244,7 +244,7 @@ $sections{$section} = $obj; return $obj; } - + sub get { my ($class, $section) = @_; return $sections{$section}; @@ -272,12 +272,12 @@ my $section = shift; return $section->[2]; } - + sub default { my $section = shift; return $section->[3]; } - + sub output { my ($section, $fh, $format) = @_; my $name = $section->name; @@ -324,6 +324,186 @@ things as SVs, OPs and the internal symbol table and syntax tree of a program. +=head1 OVERVIEW + +The C module contains a set of utility functions for querying the +current state of the Perl interpreter; typically these functions +return objects from the B::SV and B::OP classes, or their derived +classes. These classes in turn define methods for querying the +resulting objects about their own internal state. + +=head1 Utility Functions + +The C module exports a variety of functions: some are simple +utility functions, others provide a Perl program with a way to +get an initial "handle" on an internal object. + +=head2 Functions Returning C, C, C, and C objects + +For descriptions of the class hierachy of these objects and the +methods that can be called on them, see below, L<"OVERVIEW OF +CLASSES"> and L<"SV-RELATED CLASSES">. + +=over 4 + +=item sv_undef + +Returns the SV object corresponding to the C variable C. + +=item sv_yes + +Returns the SV object corresponding to the C variable C. + +=item sv_no + +Returns the SV object corresponding to the C variable C. + +=item svref_2object(SVREF) + +Takes a reference to any Perl value, and turns the referred-to value +into an object in the appropriate B::OP-derived or B::SV-derived +class. Apart from functions such as C, this is the primary +way to get an initial "handle" on an internal perl data structure +which can then be followed with the other access methods. + +=item amagic_generation + +Returns the SV object corresponding to the C variable C. + +=item C + +Returns the AV object (i.e. in class B::AV) representing INIT blocks. + +=item begin_av + +Returns the AV object (i.e. in class B::AV) representing BEGIN blocks. + +=item end_av + +Returns the AV object (i.e. in class B::AV) representing END blocks. + +=item comppadlist + +Returns the AV object (i.e. in class B::AV) of the global comppadlist. + +=item regex_padav + +Only when perl was compiled with ithreads. + +=item C + +Return the (faked) CV corresponding to the main part of the Perl +program. + +=back + +=head2 Functions for Examining the Symbol Table + +=over 4 + +=item walksymtable(SYMREF, METHOD, RECURSE, PREFIX) + +Walk the symbol table starting at SYMREF and call METHOD on each +symbol (a B::GV object) visited. When the walk reaches package +symbols (such as "Foo::") it invokes RECURSE, passing in the symbol +name, and only recurses into the package if that sub returns true. + +PREFIX is the name of the SYMREF you're walking. + +For example: + + # Walk CGI's symbol table calling print_subs on each symbol. + # Recurse only into CGI::Util:: + walksymtable(\%CGI::, 'print_subs', sub { $_[0] eq 'CGI::Util::' }, + 'CGI::'); + +print_subs() is a B::GV method you have declared. Also see L<"B::GV +Methods">, below. + +=back + +=head2 Functions Returning C objects or for walking op trees + +For descriptions of the class hierachy of these objects and the +methods that can be called on them, see below, L<"OVERVIEW OF +CLASSES"> and L<"OP-RELATED CLASSES">. + +=over 4 + +=item main_root + +Returns the root op (i.e. an object in the appropriate B::OP-derived +class) of the main part of the Perl program. + +=item main_start + +Returns the starting op of the main part of the Perl program. + +=item walkoptree(OP, METHOD) + +Does a tree-walk of the syntax tree based at OP and calls METHOD on +each op it visits. Each node is visited before its children. If +C (see below) has been called to turn debugging on then +the method C is called on each op before METHOD is +called. + +=item walkoptree_debug(DEBUG) + +Returns the current debugging flag for C. If the optional +DEBUG argument is non-zero, it sets the debugging flag to that. See +the description of C above for what the debugging flag +does. + +=back + +=head2 Miscellaneous Utility Functions + +=over 4 + +=item ppname(OPNUM) + +Return the PP function name (e.g. "pp_add") of op number OPNUM. + +=item hash(STR) + +Returns a string in the form "0x..." representing the value of the +internal hash function used by perl on string STR. + +=item cast_I32(I) + +Casts I to the internal I32 type used by that perl. + +=item minus_c + +Does the equivalent of the C<-c> command-line option. Obviously, this +is only useful in a BEGIN block or else the flag is set too late. + +=item cstring(STR) + +Returns a double-quote-surrounded escaped version of STR which can +be used as a string in C source code. + +=item perlstring(STR) + +Returns a double-quote-surrounded escaped version of STR which can +be used as a string in Perl source code. + +=item class(OBJ) + +Returns the class of an object without the part of the classname +preceding the first C<"::">. This is used to turn C<"B::UNOP"> into +C<"UNOP"> for example. + +=item threadsv_names + +In a perl compiled for threads, this returns a list of the special +per-thread threadsv variables. + +=back + + + + =head1 OVERVIEW OF CLASSES The C structures used by Perl's internals to hold SV and OP @@ -331,9 +511,12 @@ class hierarchy and the C module gives access to them via a true object hierarchy. Structure fields which point to other objects (whether types of SV or types of OP) are represented by the C -module as Perl objects of the appropriate class. The bulk of the C -module is the methods for accessing fields of these structures. Note -that all access is read-only: you cannot modify the internals by +module as Perl objects of the appropriate class. + +The bulk of the C module is the methods for accessing fields of +these structures. + +Note that all access is read-only. You cannot modify the internals by using this module. =head2 SV-RELATED CLASSES @@ -341,15 +524,40 @@ B::IV, B::NV, B::RV, B::PV, B::PVIV, B::PVNV, B::PVMG, B::BM, B::PVLV, B::AV, B::HV, B::CV, B::GV, B::FM, B::IO. These classes correspond in the obvious way to the underlying C structures of similar names. The -inheritance hierarchy mimics the underlying C "inheritance". Access -methods correspond to the underlying C macros for field access, +inheritance hierarchy mimics the underlying C "inheritance": + + B::SV + | + +--------------+----------------------+ + | | | + B::PV B::IV B::RV + | \ / \ + | \ / \ + | B::PVIV B::NV + \ / + \____ __/ + \ / + B::PVNV + | + | + B::PVMG + | + +------+-----+----+------+-----+-----+ + | | | | | | | + B::PVLV B::BM B::AV B::GV B::HV B::CV B::IO + | + | + B::FM + + +Access methods correspond to the underlying C macros for field access, usually with the leading "class indication" prefix removed (Sv, Av, Hv, ...). The leading prefix is only left in cases where its removal would cause a clash in method name. For example, C stays as-is since its abbreviation would clash with the "superclass" method C (corresponding to the C function C). -=head2 B::SV METHODS +=head2 B::SV Methods =over 4 @@ -359,7 +567,7 @@ =back -=head2 B::IV METHODS +=head2 B::IV Methods =over 4 @@ -387,7 +595,7 @@ =back -=head2 B::NV METHODS +=head2 B::NV Methods =over 4 @@ -397,7 +605,7 @@ =back -=head2 B::RV METHODS +=head2 B::RV Methods =over 4 @@ -405,7 +613,7 @@ =back -=head2 B::PV METHODS +=head2 B::PV Methods =over 4 @@ -434,7 +642,7 @@ =back -=head2 B::PVMG METHODS +=head2 B::PVMG Methods =over 4 @@ -444,7 +652,7 @@ =back -=head2 B::MAGIC METHODS +=head2 B::MAGIC Methods =over 4 @@ -473,7 +681,7 @@ =back -=head2 B::PVLV METHODS +=head2 B::PVLV Methods =over 4 @@ -487,7 +695,7 @@ =back -=head2 B::BM METHODS +=head2 B::BM Methods =over 4 @@ -501,7 +709,7 @@ =back -=head2 B::GV METHODS +=head2 B::GV Methods =over 4 @@ -556,7 +764,7 @@ =back -=head2 B::IO METHODS +=head2 B::IO Methods =over 4 @@ -595,7 +803,7 @@ =back -=head2 B::AV METHODS +=head2 B::AV Methods =over 4 @@ -611,7 +819,7 @@ =back -=head2 B::CV METHODS +=head2 B::CV Methods =over 4 @@ -643,7 +851,7 @@ =back -=head2 B::HV METHODS +=head2 B::HV Methods =over 4 @@ -665,15 +873,32 @@ =head2 OP-RELATED CLASSES -B::OP, B::UNOP, B::BINOP, B::LOGOP, B::LISTOP, B::PMOP, -B::SVOP, B::PADOP, B::PVOP, B::CVOP, B::LOOP, B::COP. -These classes correspond in -the obvious way to the underlying C structures of similar names. The -inheritance hierarchy mimics the underlying C "inheritance". Access -methods correspond to the underlying C structre field names, with the -leading "class indication" prefix removed (op_). +C, C, C, C, C, C, +C, C, C, C, C, C. -=head2 B::OP METHODS +These classes correspond in the obvious way to the underlying C +structures of similar names. The inheritance hierarchy mimics the +underlying C "inheritance": + + B::OP + | + +---------------+--------+--------+------+ + | | | | | + B::UNOP B::SVOP B::PADOP B::CVOP B::COP + ,' `-. + / `--. + B::BINOP B::LOGOP + | + | + B::LISTOP + ,' `. + / \ + B::LOOP B::PMOP + +Access methods correspond to the underlying C structre field names, +with the leading "class indication" prefix (C<"op_">) removed. + +=head2 B::OP Methods =over 4 @@ -739,7 +964,7 @@ =back -=head2 B::PMOP METHODS +=head2 B::PMOP Methods =over 4 @@ -791,7 +1016,7 @@ =back -=head2 B::LOOP METHODS +=head2 B::LOOP Methods =over 4 @@ -803,7 +1028,7 @@ =back -=head2 B::COP METHODS +=head2 B::COP Methods =over 4 @@ -821,148 +1046,6 @@ =back -=head1 FUNCTIONS EXPORTED BY C - -The C module exports a variety of functions: some are simple -utility functions, others provide a Perl program with a way to -get an initial "handle" on an internal object. - -=over 4 - -=item main_cv - -Return the (faked) CV corresponding to the main part of the Perl -program. - -=item init_av - -Returns the AV object (i.e. in class B::AV) representing INIT blocks. - -=item begin_av - -Returns the AV object (i.e. in class B::AV) representing BEGIN blocks. - -=item end_av - -Returns the AV object (i.e. in class B::AV) representing END blocks. - -=item main_root - -Returns the root op (i.e. an object in the appropriate B::OP-derived -class) of the main part of the Perl program. - -=item main_start - -Returns the starting op of the main part of the Perl program. - -=item comppadlist - -Returns the AV object (i.e. in class B::AV) of the global comppadlist. - -=item regex_padav - -Only when perl was compiled with ithreads. - -=item sv_undef - -Returns the SV object corresponding to the C variable C. - -=item sv_yes - -Returns the SV object corresponding to the C variable C. - -=item sv_no - -Returns the SV object corresponding to the C variable C. - -=item amagic_generation - -Returns the SV object corresponding to the C variable C. - -=item walkoptree(OP, METHOD) - -Does a tree-walk of the syntax tree based at OP and calls METHOD on -each op it visits. Each node is visited before its children. If -C (q.v.) has been called to turn debugging on then -the method C is called on each op before METHOD is -called. - -=item walkoptree_debug(DEBUG) - -Returns the current debugging flag for C. If the optional -DEBUG argument is non-zero, it sets the debugging flag to that. See -the description of C above for what the debugging flag -does. - -=item walksymtable(SYMREF, METHOD, RECURSE, PREFIX) - -Walk the symbol table starting at SYMREF and call METHOD on each -symbol (a B::GV object) visited. When the walk reaches package -symbols (such as "Foo::") it invokes RECURSE, passing in the symbol -name, and only recurses into the package if that sub returns true. - -PREFIX is the name of the SYMREF you're walking. - -For example... - - # Walk CGI's symbol table calling print_subs on each symbol. - # Only recurse into CGI::Util:: - walksymtable(\%CGI::, 'print_subs', sub { $_[0] eq 'CGI::Util::' }, - 'CGI::'); - -print_subs() is a B::GV method you have declared. - - -=item svref_2object(SV) - -Takes any Perl variable and turns it into an object in the -appropriate B::OP-derived or B::SV-derived class. Apart from functions -such as C, this is the primary way to get an initial -"handle" on an internal perl data structure which can then be followed -with the other access methods. - -=item ppname(OPNUM) - -Return the PP function name (e.g. "pp_add") of op number OPNUM. - -=item hash(STR) - -Returns a string in the form "0x..." representing the value of the -internal hash function used by perl on string STR. - -=item cast_I32(I) - -Casts I to the internal I32 type used by that perl. - - -=item minus_c - -Does the equivalent of the C<-c> command-line option. Obviously, this -is only useful in a BEGIN block or else the flag is set too late. - - -=item cstring(STR) - -Returns a double-quote-surrounded escaped version of STR which can -be used as a string in C source code. - -=item perlstring(STR) - -Returns a double-quote-surrounded escaped version of STR which can -be used as a string in Perl source code. - -=item class(OBJ) - -Returns the class of an object without the part of the classname -preceding the first "::". This is used to turn "B::UNOP" into -"UNOP" for example. - -=item threadsv_names - -In a perl compiled for threads, this returns a list of the special -per-thread threadsv variables. - -=back =head1 AUTHOR diff -ruN perl-5.8.0/ext/Devel/DProf/DProf.xs AP805_source/ext/Devel/DProf/DProf.xs --- perl-5.8.0/ext/Devel/DProf/DProf.xs Sat Jun 1 10:02:53 2002 +++ AP805_source/ext/Devel/DProf/DProf.xs Tue Feb 4 23:03:10 2003 @@ -84,7 +84,7 @@ U32 dprof_ticks; char* out_file_name; /* output file (defaults to tmon.out) */ PerlIO* fp; /* pointer to tmon.out file */ - long TIMES_LOCATION; /* Where in the file to store the time totals */ + Off_t TIMES_LOCATION; /* Where in the file to store the time totals */ int SAVE_STACK; /* How much data to buffer until end of run */ int prof_pid; /* pid of profiled process */ struct tms prof_start; @@ -297,7 +297,7 @@ SV *Sub = GvSV(PL_DBsub); /* name of current sub */ if (g_SAVE_STACK) { - if (g_profstack_ix + 5 > g_profstack_max) { + if (g_profstack_ix + 10 > g_profstack_max) { g_profstack_max = g_profstack_max * 3 / 2; Renew(g_profstack, g_profstack_max, PROFANY); } diff -ruN perl-5.8.0/ext/Devel/PPPort/PPPort.pm AP805_source/ext/Devel/PPPort/PPPort.pm --- perl-5.8.0/ext/Devel/PPPort/PPPort.pm Sat Jun 1 10:02:53 2002 +++ AP805_source/ext/Devel/PPPort/PPPort.pm Tue Feb 4 23:03:10 2003 @@ -434,6 +434,15 @@ # define aTHX_ #endif +/* IV could also be a quad (say, a long long), but Perls + * capable of those should have IVSIZE already. */ +#if !defined(IVSIZE) && defined(LONGSIZE) +# define IVSIZE LONGSIZE +#endif +#ifndef IVSIZE +# define IVSIZE 4 /* A bold guess, but the best we can make. */ +#endif + #ifndef UVSIZE # define UVSIZE IVSIZE #endif @@ -649,7 +658,6 @@ #else /* single interpreter */ - #define START_MY_CXT static my_cxt_t my_cxt; #define dMY_CXT_SV dNOOP #define dMY_CXT dNOOP @@ -718,6 +726,18 @@ # endif #else # define SvPVbyte SvPV +#endif + +#ifndef SvPV_nolen +# define SvPV_nolen(sv) \ + ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ + ? SvPVX(sv) : sv_2pv_nolen(sv)) + static char * + sv_2pv_nolen(pTHX_ register SV *sv) + { + STRLEN n_a; + return sv_2pv(sv, &n_a); + } #endif #endif /* _P_P_PORTABILITY_H_ */ diff -ruN perl-5.8.0/ext/Encode/AUTHORS AP805_source/ext/Encode/AUTHORS --- perl-5.8.0/ext/Encode/AUTHORS Sat Jun 1 10:02:54 2002 +++ AP805_source/ext/Encode/AUTHORS Tue Feb 4 23:03:11 2003 @@ -9,7 +9,7 @@ # # This list is in alphabetical order. -- -Andreas J. Koenig +Andreas J. Koenig Anton Tagunov Autrijus Tang Benjamin Goldberg @@ -21,9 +21,11 @@ Graham Barr Gurusamy Sarathy H.Merijn Brand +Hugo van der Sanden Jarkko Hietaniemi Jungshik Shin Laszlo Molnar +MORIYAMA Masayuki Mark-Jason Dominus Mattia Barbon Michael G Schwern diff -ruN perl-5.8.0/ext/Encode/Changes AP805_source/ext/Encode/Changes --- perl-5.8.0/ext/Encode/Changes Sat Jun 1 11:27:38 2002 +++ AP805_source/ext/Encode/Changes Tue Feb 4 23:03:11 2003 @@ -1,9 +1,130 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 1.75 2002/06/01 18:07:49 dankogai Exp dankogai $ +# $Id: Changes,v 1.83 2002/11/18 17:28:49 dankogai Exp dankogai $ # -$Revision: 1.75 $ $Date: 2002/06/01 18:07:49 $ +$Revision: 1.83 $ $Date: 2002/11/18 17:28:49 $ +! Encode.xs lib/Encode/JIS7.pm + Even more patches from Inaba-san has been applied. With this + patch t/uni/tr_7jis.t and t/uni/t_utf8.t of bleedperl will work. + Message-Id: <20021115105514D.inaba.hiroto@toshiba-it.co.jp> + +1.82 2002/11/14 23:06:12 +! Encode.xs + Encode::utf8 (XS Version) assertion botch first found in Cygwin, + later found in perls w/ -Dusemymalloc was fixed by NC. + Message-Id: <20021114210349.GA288@Bagpuss.unfortu.net> + +1.81 2002/11/08 18:29:27 +! Encode.pm Encode.xs + Non-XS version of Encode::utf8 is back (with XS being default). + Encode::predefine_encodings(0) to turn off XS. + This is primarily to cope w/ Cygwin smoke but Sadahiro-san has + found that it was Test::More causing the problem, not Encode. + But I have already made it configurable so it may be useful in + some rare cases.... + Message-Id: <20021107210110.2EE4.BQW10602@nifty.com>, et al. +! bin/enc2xs + The ingenious patch by Nicholas Clark that reduces shlib sizes by + 50% with no penalty and backward compatibility preserved, is in. + Message-Id: <20021103231324.GE288@Bagpuss.unfortu.net> + +1.80 2002/10/21 20:39:09 +! Encode.xs t/mime-header.t + Even more patches from NI-XS regarding Encode::utf8->decode(). + And one more test to t/mime-header.t to prove it + Message-Id: + +1.79 2002/10/21 06:05:37 +! Encode.xs + Further patches from NI-XS. Encode::utf8->decode() now checks the + value of utf8 flag of the argument. As a result, the fix to + lib/Encode/MIME/Header.pm is no longer neccessary but since it did + no harm (even speedwise) I'll leave it unreverted. +! ucm/cp949.ucm ucm/cp950.ucm + U+20AC EURO SIGN + U+00AE REGISTERED SIGN + were missing as a result of 1.78. Discovered by Moriyama-san. + Moriyama-san has also developed a test script that compares + (en|de)coded results to the corresponding Win32 API result and + all cp9?? maps are now verified. + Message-Id: <20021021025220.3AED.MSYK@mtg.biglobe.ne.jp> + +1.78 2002/10/20 15:44:00 +! lib/Encode/MIME/Header.pm + fixed so that it works with new Encode::utf8 +! Encode.pm Encode.xs + Encode::utf8 is now in Encode.xs by Nick In-XS. This allows + :encoding(UTF-8) to handle partial chars at end of buffers + correctly. + Message-Id: <20021020134935.2079.3@bactrian.ni-s.u-net.com> +! lib/Encode/Supported.pod + More nitpickings applied. ++ t/rt.pl MANIFEST +! t/CJKT.t + Moriyama-san has discovered a serious bug in t/CJKT.t; its roundtrip + tests were completely useless. To redeem that and get the peace of + mind again, I wrote t/rt.pl to test ALL '|0' ENTRIES in all + ucm/*.ucm Since this script takes too long to finish (30 seconds on + PIII-800MHz, FreeBSD), it is deliberately excluded from 'make test' + but you can easily run that by either renaming it or: + perl -Mblib t/rt.pl + Message-Id: <20021019065420.0C48.MSYK@mtg.biglobe.ne.jp> +! ucm/cp936.ucm ucm/cp949.ucm ucm/cp950.ucm + Other CJKT cp9?? also updated according to the URI below; + http://www.microsoft.com/typography/unicode/cscp.htm ++ bin/ucmsort MANIFEST + ucmsort is a crude utility that sorts CHARMAP entries in UCM files + to proper order. intended for hardcore develpers only. +! ucm/cp932.ucm JP/JP.pm AUTHORS + CP932 mapping which was based upon the mapping file at unicode.org + was found obsolete by MORIYAMA Masayuki msyk@mtg.biglobe.ne.jp>. He + has also supplied the patch so he was added to AUTHORS. +! lib/Encode/Supported.pod + ISO-8859-11 != TIS 620 + == TIS 620 + \xA0 ( ) + Message-Id: + + +1.77 2002/10/06 03:27:02 +! t/jperl.t + * Modified to accomodate up and comming patch by Inaba-san that + will fix tr/// needing eval qq{} + Message-Id: <9F78A19C-D6C3-11D6-BAC6-0003939A104C@dan.co.jp> +! encoding.pm + * pod fixes/enhancements to reflect the changes above +! lib/Encode/Alias.pm + "Encode::TW is correct, Encode::Alias not." - /Autrijus/ + Message-Id: <20021001015648.GB18710@not.autrijus.org> + +1.76 2002/08/25 15:09:51 +! t/big5-eten.utf + To reflect ucm change by Autrijus. t/big5-eten.enc was regenerated + but naturally identical to previous version -- dankogai +! ucm/big5-eten.ucm + Codepoint fixes -- autrijus + Message-Id: <20020805040236.GC5220@not.autrijus.org> += * + copied everything under perl-5.8.0/ext/Encode to make sure Encode + is in sync w/ perl core +! t/CJKT.t t/guess.t + Change 17175 by jhi@alpha on 2002/06/10 23:24:42 + Now that binmode(FH) does implicit ":bytes" revisit + the failing tests. The worrisome one is the Digest::MD5 + test-- how will it fare in CRLF lands now? +! t/CJKT.t t/guess.t + From: Radu Greab + Date: Mon, 10 Jun 2002 00:40:34 +0300 + Message-Id: <200206092140.g59LeYn15745@ix.netsoft.ro> + Fixes for en_US.UTF-8 failures, all but ext/PerlIO/t/fallback.t + ones which I cannot figure out. +! lib/Encode/Alias.pm + Subject: [Encode PATCH] spurious warning + From: Nicholas Clark + Date: Sun, 2 Jun 2002 20:26:22 +0100 + Message-ID: <20020602192619.GA320@Bagpuss.unfortu.net> + +1.75 2002/06/01 18:07:49 ! lib/Encode/Alias.pm t/Alias.t lib/Encode/Supported.pod TW/TW.pm glibc compliance cited by Autrijus. http://www.li18nux.org/docs/html/CodesetAliasTable-V10.html @@ -679,7 +800,7 @@ Typo fixes and improvements by jhi Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al. -1.11 $Date: 2002/06/01 18:07:49 $ +1.11 2002/03/31 22:12:13 + t/encoding.t + t/jperl.t ! MANIFEST diff -ruN perl-5.8.0/ext/Encode/Encode.pm AP805_source/ext/Encode/Encode.pm --- perl-5.8.0/ext/Encode/Encode.pm Wed Jul 17 06:20:39 2002 +++ AP805_source/ext/Encode/Encode.pm Tue Feb 4 23:03:11 2003 @@ -1,9 +1,9 @@ # -# $Id: Encode.pm,v 1.75 2002/06/01 18:07:42 dankogai Exp $ +# $Id: Encode.pm,v 1.83 2002/11/18 17:28:29 dankogai Exp $ # package Encode; use strict; -our $VERSION = do { my @r = (q$Revision: 1.75 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.83 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; our $DEBUG = 0; use XSLoader (); XSLoader::load(__PACKAGE__, $VERSION); @@ -191,7 +191,7 @@ return $str; } -predefine_encodings(); +predefine_encodings(1); # # This is to restore %Encoding if really needed; @@ -199,6 +199,8 @@ sub predefine_encodings{ use Encode::Encoding; + no warnings 'redefine'; + my $use_xs = shift; if ($ON_EBCDIC) { # was in Encode::UTF_EBCDIC package Encode::UTF_EBCDIC; @@ -243,21 +245,29 @@ # was in Encode::utf8 package Encode::utf8; push @Encode::utf8::ISA, 'Encode::Encoding'; - *decode = sub{ - my ($obj,$octets,$chk) = @_; - my $str = Encode::decode_utf8($octets); - if (defined $str) { + # + if ($use_xs){ + $DEBUG and warn __PACKAGE__, " XS on"; + *decode = \&decode_xs; + *encode = \&encode_xs; + }else{ + $DEBUG and warn __PACKAGE__, " XS off"; + *decode = sub{ + my ($obj,$octets,$chk) = @_; + my $str = Encode::decode_utf8($octets); + if (defined $str) { + $_[1] = '' if $chk; + return $str; + } + return undef; + }; + *encode = sub { + my ($obj,$string,$chk) = @_; + my $octets = Encode::encode_utf8($string); $_[1] = '' if $chk; - return $str; - } - return undef; - }; - *encode = sub { - my ($obj,$string,$chk) = @_; - my $octets = Encode::encode_utf8($string); - $_[1] = '' if $chk; - return $octets; - }; + return $octets; + }; + } $Encode::Encoding{utf8} = bless {Name => "utf8"} => "Encode::utf8"; } diff -ruN perl-5.8.0/ext/Encode/Encode.xs AP805_source/ext/Encode/Encode.xs --- perl-5.8.0/ext/Encode/Encode.xs Sat Jun 1 10:02:54 2002 +++ AP805_source/ext/Encode/Encode.xs Tue Feb 4 23:03:11 2003 @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 1.46 2002/05/20 15:25:44 dankogai Exp dankogai $ + $Id: Encode.xs,v 1.52 2002/11/18 17:28:49 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT @@ -238,6 +238,134 @@ return dst; } +MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ + +void +Method_decode_xs(obj,src,check = 0) +SV * obj +SV * src +int check +CODE: +{ + STRLEN slen; + U8 *s = (U8 *) SvPV(src, slen); + U8 *e = (U8 *) SvEND(src); + SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ + SvPOK_only(dst); + SvCUR_set(dst,0); + if (SvUTF8(src)) { + s = utf8_to_bytes(s,&slen); + if (s) { + SvCUR_set(src,slen); + SvUTF8_off(src); + e = s+slen; + } + else { + croak("Cannot decode string with wide characters"); + } + } + while (s < e) { + if (UTF8_IS_INVARIANT(*s) || UTF8_IS_START(*s)) { + U8 skip = UTF8SKIP(s); + if ((s + skip) > e) { + /* Partial character - done */ + break; + } + else if (is_utf8_char(s)) { + /* Whole char is good */ + sv_catpvn(dst,(char *)s,skip); + s += skip; + continue; + } + else { + /* starts ok but isn't "good" */ + } + } + else { + /* Invalid start byte */ + } + /* If we get here there is something wrong with alleged UTF-8 */ + if (check & ENCODE_DIE_ON_ERR){ + Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", (UV)*s); + XSRETURN(0); + } + if (check & ENCODE_WARN_ON_ERR){ + Perl_warner(aTHX_ packWARN(WARN_UTF8), + ERR_DECODE_NOMAP, "utf8", (UV)*s); + } + if (check & ENCODE_RETURN_ON_ERR) { + break; + } + if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ + SV* perlqq = newSVpvf("\\x%02" UVXf, (UV)*s); + sv_catsv(dst, perlqq); + SvREFCNT_dec(perlqq); + } else { + sv_catpv(dst, FBCHAR_UTF8); + } + s++; + } + *SvEND(dst) = '\0'; + + /* Clear out translated part of source unless asked not to */ + if (check && !(check & ENCODE_LEAVE_SRC)){ + slen = e-s; + if (slen) { + sv_setpvn(src, (char*)s, slen); + } + SvCUR_set(src, slen); + } + SvUTF8_on(dst); + ST(0) = sv_2mortal(dst); + XSRETURN(1); +} + +void +Method_encode_xs(obj,src,check = 0) +SV * obj +SV * src +int check +CODE: +{ + STRLEN slen; + U8 *s = (U8 *) SvPV(src, slen); + U8 *e = (U8 *) SvEND(src); + SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ + if (SvUTF8(src)) { + /* Already encoded - trust it and just copy the octets */ + sv_setpvn(dst,(char *)s,(e-s)); + s = e; + } + else { + /* Native bytes - can always encode */ + U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ + while (s < e) { + UV uv = NATIVE_TO_UNI((UV) *s++); + if (UNI_IS_INVARIANT(uv)) + *d++ = (U8)UTF_TO_NATIVE(uv); + else { + *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); + *d++ = (U8)UTF8_EIGHT_BIT_LO(uv); + } + } + SvCUR_set(dst, d- (U8 *)SvPVX(dst)); + *SvEND(dst) = '\0'; + } + + /* Clear out translated part of source unless asked not to */ + if (check && !(check & ENCODE_LEAVE_SRC)){ + slen = e-s; + if (slen) { + sv_setpvn(src, (char*)s, slen); + } + SvCUR_set(src, slen); + } + SvPOK_only(dst); + SvUTF8_off(dst); + ST(0) = sv_2mortal(dst); + XSRETURN(1); +} + MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ PROTOTYPES: ENABLE @@ -260,6 +388,9 @@ CODE: { encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + if (SvUTF8(src)) { + sv_utf8_downgrade(src, FALSE); + } ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check); SvUTF8_on(ST(0)); XSRETURN(1); diff -ruN perl-5.8.0/ext/Encode/MANIFEST AP805_source/ext/Encode/MANIFEST --- perl-5.8.0/ext/Encode/MANIFEST Sat Jun 1 10:02:55 2002 +++ AP805_source/ext/Encode/MANIFEST Tue Feb 4 23:03:11 2003 @@ -33,6 +33,7 @@ bin/piconv iconv by perl bin/ucm2table Table Generator for testing bin/ucmlint A UCM Lint utility +bin/ucmsort Sorts UCM lines bin/unidump Unicode Dump like hexdump(1) encengine.c Encode extension encoding.pm Perl Pragmactic Module @@ -77,6 +78,7 @@ t/ksc5601.utf test data t/mime-header.t test script t/perlio.t test script +t/rt.pl even more test script t/unibench.pl benchmark script ucm/8859-1.ucm Unicode Character Map ucm/8859-10.ucm Unicode Character Map diff -ruN perl-5.8.0/ext/Encode/bin/enc2xs AP805_source/ext/Encode/bin/enc2xs --- perl-5.8.0/ext/Encode/bin/enc2xs Sat Jun 1 11:27:38 2002 +++ AP805_source/ext/Encode/bin/enc2xs Tue Feb 4 23:03:11 2003 @@ -6,9 +6,10 @@ require Config; import Config; } use strict; +use warnings; use Getopt::Std; my @orig_ARGV = @ARGV; -our $VERSION = do { my @r = (q$Revision: 1.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.31 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # These may get re-ordered. # RAW is a do_now as inserted by &enter @@ -186,7 +187,7 @@ print C "#include \n"; print C "#define U8 U8\n"; } - print C "#include \"encode.h\"\n"; + print C "#include \"encode.h\"\n\n"; } elsif ($cname =~ /\.enc$/) @@ -204,6 +205,9 @@ my %encoding; my %strings; +my $string_acc; +my %strings_in_acc; + my $saved = 0; my $subsave = 0; my $strings = 0; @@ -250,8 +254,19 @@ foreach my $name (sort cmp_name keys %encoding) { my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}}; - output(\*C,$name.'_utf8',$e2u); - output(\*C,'utf8_'.$name,$u2e); + process($name.'_utf8',$e2u); + addstrings(\*C,$e2u); + + process('utf8_'.$name,$u2e); + addstrings(\*C,$u2e); + } + outbigstring(\*C,"enctable"); + foreach my $name (sort cmp_name keys %encoding) + { + my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}}; + outtable(\*C,$e2u, "enctable"); + outtable(\*C,$u2e, "enctable"); + # push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep)); } foreach my $enc (sort cmp_name keys %encoding) @@ -596,43 +611,6 @@ } } - -sub outstring -{ - my ($fh,$name,$s) = @_; - my $sym = $strings{$s}; - if ($sym) - { - $saved += length($s); - } - else - { - if ($opt{'O'}) { - foreach my $o (keys %strings) - { - next unless (my $i = index($o,$s)) >= 0; - $sym = $strings{$o}; - # gcc things that 0x0e+0x10 (anything with e+) starts to look like - # a hexadecimal floating point constant. Silly gcc. Only p - # introduces a floating point constant. Put the space in to stop it - # getting confused. - $sym .= sprintf(" +0x%02x",$i) if ($i); - $subsave += length($s); - return $strings{$s} = $sym; - } - } - $strings{$s} = $sym = $name; - $strings += length($s); - my $definition = sprintf "static const U8 %s[%d] = { ",$name,length($s); - # Maybe we should assert that these are all <256. - $definition .= join(',',unpack "C*",$s); - # We have a single long line. Split it at convenient commas. - $definition =~ s/(.{74,77},)/$1\n/g; - print $fh "$definition };\n\n"; - } - return $sym; -} - sub process { my ($name,$a) = @_; @@ -693,7 +671,8 @@ $a->{'Entries'} = \@ent; } -sub outtable + +sub addstrings { my ($fh,$a) = @_; my $name = $a->{'Cname'}; @@ -701,20 +680,98 @@ foreach my $b (@{$a->{'Entries'}}) { next unless $b->[AGG_OUT_LEN]; - my $s = $b->[AGG_MIN_IN]; - my $e = $b->[AGG_MAX_IN]; - outstring($fh,sprintf("%s__%02x_%02x",$name,$s,$e),$b->[AGG_OUT_BYTES]); + $strings{$b->[AGG_OUT_BYTES]} = undef; } if ($a->{'Forward'}) { my $var = $^O eq 'MacOS' ? 'extern' : 'static'; - print $fh "\n$var encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n"; + print $fh "$var encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n"; } + $a->{'DoneStrings'} = 1; + foreach my $b (@{$a->{'Entries'}}) + { + my ($s,$e,$out,$t,$end,$l) = @$b; + addstrings($fh,$t) unless $t->{'DoneStrings'}; + } +} + +sub outbigstring +{ + my ($fh,$name) = @_; + + $string_acc = ''; + + # Make the big string in the string accumulator. Longest first, on the hope + # that this makes it more likely that we find the short strings later on. + # Not sure if it helps sorting strings of the same length lexcically. + foreach my $s (sort {length $b <=> length $a || $a cmp $b} keys %strings) { + my $index = index $string_acc, $s; + if ($index >= 0) { + $saved += length($s); + $strings_in_acc{$s} = $index; + } else { + OPTIMISER: { + if ($opt{'O'}) { + my $sublength = length $s; + while (--$sublength > 0) { + # progressively lop characters off the end, to see if the start of + # the new string overlaps the end of the accumulator. + if (substr ($string_acc, -$sublength) + eq substr ($s, 0, $sublength)) { + $subsave += $sublength; + $strings_in_acc{$s} = length ($string_acc) - $sublength; + # append the last bit on the end. + $string_acc .= substr ($s, $sublength); + last OPTIMISER; + } + # or if the end of the new string overlaps the start of the + # accumulator + next unless substr ($string_acc, 0, $sublength) + eq substr ($s, -$sublength); + # well, the last $sublength characters of the accumulator match. + # so as we're prepending to the accumulator, need to shift all our + # existing offsets forwards + $_ += $sublength foreach values %strings_in_acc; + $subsave += $sublength; + $strings_in_acc{$s} = 0; + # append the first bit on the start. + $string_acc = substr ($s, 0, -$sublength) . $string_acc; + last OPTIMISER; + } + } + # Optimiser (if it ran) found nothing, so just going have to tack the + # whole thing on the end. + $strings_in_acc{$s} = length $string_acc; + $string_acc .= $s; + }; + } + } + + $strings = length $string_acc; + my $definition = "\nstatic const U8 $name\[$strings] = { " . + join(',',unpack "C*",$string_acc); + # We have a single long line. Split it at convenient commas. + print $fh $1, "\n" while $definition =~ /\G(.{74,77},)/gcs; + print $fh substr ($definition, pos $definition), " };\n"; +} + +sub findstring { + my ($name,$s) = @_; + my $offset = $strings_in_acc{$s}; + die "Can't find string " . join (',',unpack "C*",$s) . " in accumulator" + unless defined $offset; + "$name + $offset"; +} + +sub outtable +{ + my ($fh,$a,$bigname) = @_; + my $name = $a->{'Cname'}; $a->{'Done'} = 1; foreach my $b (@{$a->{'Entries'}}) { my ($s,$e,$out,$t,$end,$l) = @$b; - outtable($fh,$t) unless $t->{'Done'}; + outtable($fh,$t,$bigname) unless $t->{'Done'}; } print $fh "\nstatic encpage_t $name\[",scalar(@{$a->{'Entries'}}),"] = {\n"; foreach my $b (@{$a->{'Entries'}}) @@ -724,7 +781,7 @@ print $fh "{"; if ($l) { - printf $fh outstring($fh,'',$out); + printf $fh findstring($bigname,$out); } else { @@ -736,14 +793,6 @@ print $fh "};\n"; } -sub output -{ - my ($fh,$name,$a) = @_; - process($name,$a); - # Sub-tables - outtable($fh,$a); -} - sub output_enc { my ($fh,$name,$a) = @_; @@ -857,7 +906,7 @@ ); sub find_e2x{ - eval { require File::Find }; + eval { require File::Find; }; my (@inc, %e2x_dir); for my $inc (@INC){ push @inc, $inc unless $inc eq '.'; #skip current dir @@ -869,6 +918,7 @@ = lstat($_) or return; -f _ or return; if (/^.*\.e2x$/o){ + no warnings 'once'; $e2x_dir{$File::Find::dir} ||= $mtime; } return; @@ -927,6 +977,7 @@ eval { require "Encode/$f"; }; $@ and die "Can't require Encode/$f: $@\n"; for my $enc (Encode->encodings()){ + no warnings 'once'; $in_core{$enc} and next; $Encode::Config::ExtModule{$enc} and next; my $mod = "Encode/$f"; diff -ruN perl-5.8.0/ext/Encode/bin/piconv AP805_source/ext/Encode/bin/piconv --- perl-5.8.0/ext/Encode/bin/piconv Sat Jun 1 11:27:38 2002 +++ AP805_source/ext/Encode/bin/piconv Tue Feb 4 23:03:11 2003 @@ -1,5 +1,5 @@ #!./perl -# $Id: piconv,v 1.25 2002/06/01 18:07:49 dankogai Exp dankogai $ +# $Id: piconv,v 1.25 2002/06/01 18:07:49 dankogai Exp $ # use 5.8.0; use strict; diff -ruN perl-5.8.0/ext/Encode/bin/ucmsort AP805_source/ext/Encode/bin/ucmsort --- perl-5.8.0/ext/Encode/bin/ucmsort Wed Dec 31 16:00:00 1969 +++ AP805_source/ext/Encode/bin/ucmsort Tue Feb 4 23:03:11 2003 @@ -0,0 +1,31 @@ +#!/usr/local/bin/perl +# +# $Id: ucmsort,v 0.1 2002/10/18 16:08:28 dankogai Exp $ +# +use strict; +my @lines; +my ($head, $tail); +while (<>){ + unless (m/^[0] cmp $b->[0] # Unicode descending order + or $a->[2] cmp $b->[2] # fallback descending order + or $a->[1] cmp $b->[1] # Encoding descending order + } + @lines) { + print join(" " => @$_), "\n"; +} +print $tail; +__END__ diff -ruN perl-5.8.0/ext/Encode/encoding.pm AP805_source/ext/Encode/encoding.pm --- perl-5.8.0/ext/Encode/encoding.pm Tue Jul 16 13:56:55 2002 +++ AP805_source/ext/Encode/encoding.pm Tue Feb 4 23:03:11 2003 @@ -1,5 +1,5 @@ package encoding; -our $VERSION = do { my @r = (q$Revision: 1.35 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.37 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode; use strict; @@ -144,7 +144,7 @@ s/\bCamel\b/$Rakuda/; The B pragma also modifies the filehandle disciplines of -STDIN, STDOUT, and STDERR to the specified encoding. Therefore, +STDIN and STDOUT to the specified encoding. Therefore, use encoding "euc-jp"; my $message = "Camel is the symbol of perl.\n"; @@ -236,6 +236,53 @@ resort to \x{....} just to spell your name in a native encoding. So feel free to put your strings in your encoding in quotes and regexes. + +=head2 tr/// with ranges remain unaffected + +The B pragma works by decoding string literals in +C and so forth. As of perl 5.8.0, this +does not apply to C. Therefore, + + use encoding 'euc-jp'; + #.... + $kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/; + # -------- -------- -------- -------- + +Does not work as + + $kana =~ tr/\x{3041}-\x{3093}/\x{30a1}-\x{30f3}/; + +=over + +=item Legend of characters above + + utf8 euc-jp charnames::viacode() + ----------------------------------------- + \x{3041} \xA4\xA1 HIRAGANA LETTER SMALL A + \x{3093} \xA4\xF3 HIRAGANA LETTER N + \x{30a1} \xA5\xA1 KATAKANA LETTER SMALL A + \x{30f3} \xA5\xF3 KATAKANA LETTER N + +=back + +=head3 workaround to tr///; + +You can, however, achieve the same as simply as follows; + + use encoding 'euc-jp'; + # .... + eval qq{ \$kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/ }; + +Note the C expression is surronded by C. The idea behind +is the same as classic idiom that makes C 'interpolate'. + + tr/$from/$to/; # wrong! + eval qq{ tr/$from/$to/ }; # workaround. + +Nevertheless, in case of B pragma even C is affected so +C not being decoded was obviously against the will of Perl5 +Porters. In future version of perl, this counter-intuitive behaviour +of C will be fixed so C trick will be unneccesary. =head1 Non-ASCII Identifiers and Filter option diff -ruN perl-5.8.0/ext/Encode/lib/Encode/Alias.pm AP805_source/ext/Encode/lib/Encode/Alias.pm --- perl-5.8.0/ext/Encode/lib/Encode/Alias.pm Wed Jun 5 07:09:12 2002 +++ AP805_source/ext/Encode/lib/Encode/Alias.pm Tue Feb 4 23:03:11 2003 @@ -1,7 +1,7 @@ package Encode::Alias; use strict; use Encode; -our $VERSION = do { my @r = (q$Revision: 1.32 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.34 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; our $DEBUG = 0; use base qw(Exporter); @@ -219,7 +219,7 @@ define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' ); # for Encode::TW define_alias( qr/\bbig-?5$/i => '"big5-eten"' ); - define_alias( qr/\bbig5-?et(?:en)$/i => '"big5-eten"' ); + define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' ); define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' ); define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' ); define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' ); diff -ruN perl-5.8.0/ext/Encode/lib/Encode/JP/JIS7.pm AP805_source/ext/Encode/lib/Encode/JP/JIS7.pm --- perl-5.8.0/ext/Encode/lib/Encode/JP/JIS7.pm Sat Jun 1 10:02:55 2002 +++ AP805_source/ext/Encode/lib/Encode/JP/JIS7.pm Tue Feb 4 23:03:11 2003 @@ -1,7 +1,7 @@ package Encode::JP::JIS7; use strict; -our $VERSION = do { my @r = (q$Revision: 1.8 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.9 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode qw(:fallbacks); @@ -62,21 +62,23 @@ # JIS<->EUC +our $re_scan_jis = qr{ + (?:($RE{JIS_0212})|$RE{JIS_0208}|($RE{ISO_ASC})|($RE{JIS_KANA}))([^\e]*) +}x; sub jis_euc { + local ${^ENCODING}; my $r_str = shift; - $$r_str =~ s( - ($RE{JIS_0212}|$RE{JIS_0208}|$RE{ISO_ASC}|$RE{JIS_KANA}) - ([^\e]*) - ) + $$r_str =~ s($re_scan_jis) { - my ($esc, $chunk) = ($1, $2); - if ($esc !~ /$RE{ISO_ASC}/o) { + my ($esc_0212, $esc_asc, $esc_kana, $chunk) = + ($1, $2, $3, $4); + if (!$esc_asc) { $chunk =~ tr/\x21-\x7e/\xa1-\xfe/; - if ($esc =~ /$RE{JIS_KANA}/o) { + if ($esc_kana) { $chunk =~ s/([\xa1-\xdf])/\x8e$1/og; } - elsif ($esc =~ /$RE{JIS_0212}/o) { + elsif ($esc_0212) { $chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og; } } diff -ruN perl-5.8.0/ext/Encode/lib/Encode/MIME/Header.pm AP805_source/ext/Encode/lib/Encode/MIME/Header.pm --- perl-5.8.0/ext/Encode/lib/Encode/MIME/Header.pm Sat Jun 1 10:02:55 2002 +++ AP805_source/ext/Encode/lib/Encode/MIME/Header.pm Tue Feb 4 23:03:11 2003 @@ -1,7 +1,7 @@ package Encode::MIME::Header; use strict; # use warnings; -our $VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode qw(find_encoding encode_utf8); use MIME::Base64; @@ -74,7 +74,8 @@ my $enc = shift; my $d = find_encoding($enc) or croak(Unknown encoding "$enc"); my $db64 = decode_base64(shift); - return $d->decode($db64, Encode::FB_PERLQQ); + return $d->name eq 'utf8' ? + Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ); } sub decode_q{ @@ -82,7 +83,8 @@ my $d = find_encoding($enc) or croak(Unknown encoding "$enc"); $q =~ s/_/ /go; $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego; - return $d->decode($q, Encode::FB_PERLQQ); + return $d->name eq 'utf8' ? + Encode::decode_utf8($q) : $d->decode($q, Encode::FB_PERLQQ); } my $especials = diff -ruN perl-5.8.0/ext/Encode/lib/Encode/Supported.pod AP805_source/ext/Encode/lib/Encode/Supported.pod --- perl-5.8.0/ext/Encode/lib/Encode/Supported.pod Sat Jun 1 11:27:38 2002 +++ AP805_source/ext/Encode/lib/Encode/Supported.pod Tue Feb 4 23:03:11 2003 @@ -120,8 +120,8 @@ MacCroatian MacRomanian MacRumanian - Latin3 [1] iso-8859-3 - Latin4 [2] iso-8859-4 + Latin3[1] iso-8859-3 + Latin4[2] iso-8859-4 Cyrillics iso-8859-5 cp855 cp1251 MacCyrillic (See also next section) cp866 MacUkrainian Arabic iso-8859-6 cp864 cp1256 MacArabic @@ -133,7 +133,7 @@ Nordics iso-8859-10 cp865 cp861 MacIcelandic MacSami - Thai iso-8859-11 [3] cp874 MacThai + Thai iso-8859-11[3] cp874 MacThai (iso-8859-12 is nonexistent. Reserved for Indics?) Baltics iso-8859-13 cp775 cp1257 Celtics iso-8859-14 @@ -144,7 +144,7 @@ [1] Esperanto, Maltese, and Turkish. Turkish is now on 8859-9. [2] Baltics. Now on 8859-10, except for Latvian. - [3] Also know as TIS 620. + [3] TIS 620 + Non-Breaking Space (0xA0 / U+00A0) [4] Nicknamed Latin0; the Euro sign as well as French and Finnish letters that are missing from 8859-1 were added. @@ -182,9 +182,9 @@ Note that Vietnamese is listed above. Also read "Encoding vs Charset" below. Also note that these are implemented in distinct modules by -countries, due the the size concerns (simplified Chinese is mapped +countries, due to the size concerns (simplified Chinese is mapped to 'CN', continental China, while traditional Chinese is mapped to -'TW', Taiwan). Please refer to their respective documentataion pages. +'TW', Taiwan). Please refer to their respective documentation pages. =over 4 @@ -241,7 +241,7 @@ =item Encode::HanExtra -- More Chinese via CPAN -Due to size concerns, additional Chinese encodings below are +Due to the size concerns, additional Chinese encodings below are distributed separately on CPAN, under the name Encode::HanExtra. Standard DOS/Win Macintosh Comment/Reference @@ -301,7 +301,8 @@ =item Encode::MIME::Header Strictly speaking, MIME header encoding documented in RFC 2047 is more -of encapsulation than encoding. But included anyway. +of encapsulation than encoding. However, their support in modern +world is imperative so they are supported. ---------------------------------------------------------------- MIME-Header [RFC2047] @@ -686,7 +687,7 @@ scheme; this is also what is used as an identifier in MIME "charset=" parameters, and registered in the IANA charset registry ... (Note that this is NOT a term used by other standards bodies, such as ISO). - [RFC 2277] + [RFC 2277] =item EUC diff -ruN perl-5.8.0/ext/Encode/t/CJKT.t AP805_source/ext/Encode/t/CJKT.t --- perl-5.8.0/ext/Encode/t/CJKT.t Mon Jun 10 17:14:09 2002 +++ AP805_source/ext/Encode/t/CJKT.t Tue Feb 4 23:03:11 2003 @@ -20,7 +20,8 @@ $| = 1; } use strict; -use Test::More tests => 73; +use Test::More tests => 42; +#use Test::More tests => 73; #use Test::More qw(no_plan); use Encode; use File::Basename; @@ -92,10 +93,12 @@ } close $src; + my $unisave = $uni; eval{ $txt = $transcoder->encode($uni,1) }; $@ and print $@; ok(defined($txt), "encode $charset"); $seq++; is(length($uni), 0, "encode $charset completely"); $seq++; + $uni = $unisave; open $dst,">$dst_enc" or die "$dst_utf : $!"; binmode($dst); @@ -105,10 +108,5 @@ or ($DEBUG and rename $dst_enc, "$dst_enc.$seq"); $seq++; - for my $canon (@{$Charset{$charset}}){ - is($uni, decode($canon, encode($canon, $uni)), - "RT/$charset/$canon"); - $seq++; - } unlink($dst_utf, $dst_enc); } diff -ruN perl-5.8.0/ext/Encode/t/big5-eten.utf AP805_source/ext/Encode/t/big5-eten.utf --- perl-5.8.0/ext/Encode/t/big5-eten.utf Sat Jun 1 10:02:55 2002 +++ AP805_source/ext/Encode/t/big5-eten.utf Tue Feb 4 23:03:12 2003 @@ -193,8 +193,8 @@ 0xc7c0: ツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャ 0xc7e0: ヤュユョヨラリルレロヮワヰヱヲンヴヵヶАБВГДЕЁЖЗИЙК 0xc840: ЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзий -0xc860: клмнопрстуфхцчшщъыьэюя⇧↸↹‌乚 刂 -0xc8a0: 冈❠ +0xc860: клмнопрстуфхцчшщъыьэюя⇧↸↹𠃌乚𠂊刂 +0xc8a0: 冈𧘇 0xc8c0: ¬¦'"㈱№℡゛゜⺀⺄⺆⺇⺈⺊⺌⺍⺕⺜ 0xc8e0: ⺝⺥⺧⺪⺬⺮⺶⺼⺾⻆⻊⻌⻍⻏⻖⻗⻞⻣ ʃɐɛɔɵœøŋʊɪ 0xc940: 乂乜凵匚厂万丌乇亍囗兀屮彳丏冇与丮亓仂仉仈冘勼卬厹圠夃夬尐巿旡殳 diff -ruN perl-5.8.0/ext/Encode/t/jperl.t AP805_source/ext/Encode/t/jperl.t --- perl-5.8.0/ext/Encode/t/jperl.t Sat Jun 1 10:02:56 2002 +++ AP805_source/ext/Encode/t/jperl.t Tue Feb 4 23:03:12 2003 @@ -1,5 +1,5 @@ # -# $Id: jperl.t,v 1.24 2002/04/26 03:02:04 dankogai Exp $ +# $Id: jperl.t,v 1.25 2002/10/06 03:27:02 dankogai Exp $ # # This script is written in euc-jp @@ -23,7 +23,8 @@ no utf8; # we have raw Japanese encodings here use strict; -use Test::More tests => 18; +#use Test::More tests => 18; +use Test::More tests => 15; # black magic tests commented out my $Debug = shift; no encoding; # ensure @@ -60,14 +61,18 @@ } # should've been isnt() but no scoping is suported -- yet ok(! defined(${^ENCODING}), q{not scoped yet}); -{ - # now let's try some real black magic! - local(${^ENCODING}) = Encode::find_encoding("euc-jp"); - my $str = "\xbe\xae\xbb\xf4\x20\xc3\xc6"; - is (length($str), 4, q{black magic:length}); - is ($str, $Enamae, q{black magic:eq}); -} -ok(! defined(${^ENCODING}), q{out of black magic}); + +# +# The following tests are commented out to accomodate +# Inaba-San's patch to make tr/// work w/o eval qq{} +#{ +# # now let's try some real black magic! +# local(${^ENCODING}) = Encode::find_encoding("euc-jp"); +# my $str = "\xbe\xae\xbb\xf4\x20\xc3\xc6"; +# is (length($str), 4, q{black magic:length}); +# is ($str, $Enamae, q{black magic:eq}); +#} +#ok(! defined(${^ENCODING}), q{out of black magic}); use bytes; is (length($Namae), 10); diff -ruN perl-5.8.0/ext/Encode/t/mime-header.t AP805_source/ext/Encode/t/mime-header.t --- perl-5.8.0/ext/Encode/t/mime-header.t Sat Jun 1 10:02:56 2002 +++ AP805_source/ext/Encode/t/mime-header.t Tue Feb 4 23:03:12 2003 @@ -1,5 +1,5 @@ # -# $Id: mime-header.t,v 1.5 2002/05/23 19:10:10 dankogai Exp $ +# $Id: mime-header.t,v 1.6 2002/10/21 19:47:47 dankogai Exp $ # This script is written in utf8 # BEGIN { @@ -23,7 +23,7 @@ use strict; #use Test::More qw(no_plan); -use Test::More tests => 6; +use Test::More tests => 7; use_ok("Encode::MIME::Header"); my $eheader =<<'EOS'; @@ -41,9 +41,20 @@ Subject: If you can read this you understand the example. EOS -is(Encode::decode('MIME-Header', $eheader), $dheader, "decode (RFC2047)"); +is(Encode::decode('MIME-Header', $eheader), $dheader, "decode ASCII (RFC2047)"); use utf8; + +my $uheader =<<'EOS'; +From: =?US-ASCII?Q?Keith_Moore?= +To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= +CC: =?ISO-8859-1?Q?Andr=E9?= Pirard +Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= + =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= +EOS + +is(Encode::decode('MIME-Header', $uheader), $dheader, "decode UTF-8 (RFC2047)"); + $dheader=<<'EOS'; From: 小飼 弾 diff -ruN perl-5.8.0/ext/Encode/t/rt.pl AP805_source/ext/Encode/t/rt.pl --- perl-5.8.0/ext/Encode/t/rt.pl Wed Dec 31 16:00:00 1969 +++ AP805_source/ext/Encode/t/rt.pl Tue Feb 4 23:03:12 2003 @@ -0,0 +1,75 @@ +#!/usr/local/bin/perl +# +# $Id: rt.pl,v 1.2 2002/11/08 18:29:27 dankogai Exp $ +# + +BEGIN { + my $ucmdir = "ucm"; + if ($ENV{'PERL_CORE'}){ + chdir 't'; + unshift @INC, '../lib'; + $ucmdir = "../ext/Encode/ucm"; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + use strict; + require Test::More; + our $DEBUG; + our @ucm; + unless(@ARGV){ + use File::Spec; + Test::More->import(tests => 103); + opendir my $dh, $ucmdir or die "$ucmdir:$!"; + @ucm = + map {File::Spec->catfile($ucmdir, $_) } + sort grep {/\.ucm$/o} readdir($dh); + closedir $dh; + }else{ + Test::More->import("no_plan"); + $DEBUG = 1; + @ucm = @ARGV; + } +} + +use strict; +use Encode qw/encode decode/; +our $DEBUG; +our @ucm; + +for my $ucm (@ucm){ + my ($name, $nchar, $nrt, $nok) = rttest($ucm); + $nok += 0; + ok($nok == 0, "$ucm => $name ($nchar, $nrt, $nok)"); +} + +sub rttest{ + my $ucm = shift; + my ($name, $nchar, $nrt, $nok); + open my $rfh, "<$ucm" or die "$ucm:$!"; + # \x00 |0 # + while(<$rfh>){ + s/#.*//o; /^$/ and next; + unless ($name){ + /^\s+"([^\"]+)"/io or next; + $name = $1 and next; + }else{ + /^\s+(\S+)\s+\|(\d)/io or next; + $nchar++; + $3 == 0 or next; + $nrt++; + my $uni = chr(hex($1)); + my $enc = eval qq{ "$2" }; + decode($name, $enc) eq $uni or $nok++; + encode($name, $uni) eq $enc or $nok++; + } + } + return($name, $nchar, $nrt, $nok); +} +__END__ diff -ruN perl-5.8.0/ext/Encode/ucm/big5-eten.ucm AP805_source/ext/Encode/ucm/big5-eten.ucm --- perl-5.8.0/ext/Encode/ucm/big5-eten.ucm Sat Jun 1 10:02:57 2002 +++ AP805_source/ext/Encode/ucm/big5-eten.ucm Tue Feb 4 23:03:13 2003 @@ -1,5 +1,5 @@ # -# $Id: big5-eten.ucm,v 1.2 2002/04/22 03:41:13 dankogai Exp $ +# $Id: big5-eten.ucm,v 1.3 2002/08/25 15:09:51 dankogai Exp $ # # ./compile -n big5-eten -o Encode/big5-eten.ucm Encode/big5-eten.enc "big5-eten" @@ -313,8 +313,8 @@ \xC8\x74 |0 \xC8\x75 |0 \xC8\x5B |0 - \xC8\x7C |0 - \xC8\x7A |0 + \xC8\x7C |0 + \xC8\x7A |0 \xA1\x56 |0 \xA1\x58 |0 \xA1\xA5 |0 @@ -500,7 +500,7 @@ \xA1\xF0 |0 \xA1\xF1 |0 \xC6\xE6 |0 - \xC8\xA4 |0 + \xC8\xA4 |0 \xC8\xD6 |0 \xC8\xD7 |0 \xC8\xD8 |0 diff -ruN perl-5.8.0/ext/Encode/ucm/cp932.ucm AP805_source/ext/Encode/ucm/cp932.ucm --- perl-5.8.0/ext/Encode/ucm/cp932.ucm Sat Jun 1 10:02:58 2002 +++ AP805_source/ext/Encode/ucm/cp932.ucm Tue Feb 4 23:03:14 2003 @@ -1,8 +1,8 @@ # -# $Id: cp932.ucm,v 1.20 2002/04/04 19:50:53 dankogai Exp $ +# $Id: cp932.ucm,v 1.21 2002/10/20 15:44:00 dankogai Exp $ # # Original table can be obtained at -# http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT +# http://www.microsoft.com/typography/unicode/932.txt # "cp932" 1 @@ -137,6 +137,7 @@ \x7D |0 # RIGHT CURLY BRACKET \x7E |0 # TILDE \x7F |0 # DELETE + \x80 |0 # \x81\x98 |0 # SECTION SIGN \x81\x4E |0 # DIAERESIS \x81\x8B |0 # DEGREE SIGN @@ -274,29 +275,51 @@ \x81\x8D |0 # DOUBLE PRIME \x81\xA6 |0 # REFERENCE MARK \x81\x8E |0 # DEGREE CELSIUS - \xFA\x59 |0 # NUMERO SIGN - \xFA\x5A |0 # TELEPHONE SIGN + \x87\x82 |0 # NUMERO SIGN + \xFA\x59 |3 # NUMERO SIGN + \x87\x84 |0 # TELEPHONE SIGN + \xFA\x5A |3 # TELEPHONE SIGN \x81\xF0 |0 # ANGSTROM SIGN - \xFA\x4A |0 # ROMAN NUMERAL ONE - \xFA\x4B |0 # ROMAN NUMERAL TWO - \xFA\x4C |0 # ROMAN NUMERAL THREE - \xFA\x4D |0 # ROMAN NUMERAL FOUR - \xFA\x4E |0 # ROMAN NUMERAL FIVE - \xFA\x4F |0 # ROMAN NUMERAL SIX - \xFA\x50 |0 # ROMAN NUMERAL SEVEN - \xFA\x51 |0 # ROMAN NUMERAL EIGHT - \xFA\x52 |0 # ROMAN NUMERAL NINE - \xFA\x53 |0 # ROMAN NUMERAL TEN + \x87\x54 |0 # ROMAN NUMERAL ONE + \xFA\x4A |3 # ROMAN NUMERAL ONE + \x87\x55 |0 # ROMAN NUMERAL TWO + \xFA\x4B |3 # ROMAN NUMERAL TWO + \x87\x56 |0 # ROMAN NUMERAL THREE + \xFA\x4C |3 # ROMAN NUMERAL THREE + \x87\x57 |0 # ROMAN NUMERAL FOUR + \xFA\x4D |3 # ROMAN NUMERAL FOUR + \x87\x58 |0 # ROMAN NUMERAL FIVE + \xFA\x4E |3 # ROMAN NUMERAL FIVE + \x87\x59 |0 # ROMAN NUMERAL SIX + \xFA\x4F |3 # ROMAN NUMERAL SIX + \x87\x5A |0 # ROMAN NUMERAL SEVEN + \xFA\x50 |3 # ROMAN NUMERAL SEVEN + \x87\x5B |0 # ROMAN NUMERAL EIGHT + \xFA\x51 |3 # ROMAN NUMERAL EIGHT + \x87\x5C |0 # ROMAN NUMERAL NINE + \xFA\x52 |3 # ROMAN NUMERAL NINE + \x87\x5D |0 # ROMAN NUMERAL TEN + \xFA\x53 |3 # ROMAN NUMERAL TEN \xFA\x40 |0 # SMALL ROMAN NUMERAL ONE + \xEE\xEF |3 # SMALL ROMAN NUMERAL ONE \xFA\x41 |0 # SMALL ROMAN NUMERAL TWO + \xEE\xF0 |3 # SMALL ROMAN NUMERAL TWO \xFA\x42 |0 # SMALL ROMAN NUMERAL THREE + \xEE\xF1 |3 # SMALL ROMAN NUMERAL THREE \xFA\x43 |0 # SMALL ROMAN NUMERAL FOUR + \xEE\xF2 |3 # SMALL ROMAN NUMERAL FOUR \xFA\x44 |0 # SMALL ROMAN NUMERAL FIVE + \xEE\xF3 |3 # SMALL ROMAN NUMERAL FIVE \xFA\x45 |0 # SMALL ROMAN NUMERAL SIX + \xEE\xF4 |3 # SMALL ROMAN NUMERAL SIX \xFA\x46 |0 # SMALL ROMAN NUMERAL SEVEN + \xEE\xF5 |3 # SMALL ROMAN NUMERAL SEVEN \xFA\x47 |0 # SMALL ROMAN NUMERAL EIGHT + \xEE\xF6 |3 # SMALL ROMAN NUMERAL EIGHT \xFA\x48 |0 # SMALL ROMAN NUMERAL NINE + \xEE\xF7 |3 # SMALL ROMAN NUMERAL NINE \xFA\x49 |0 # SMALL ROMAN NUMERAL TEN + \xEE\xF8 |3 # SMALL ROMAN NUMERAL TEN \x81\xA9 |0 # LEFTWARDS ARROW \x81\xAA |0 # UPWARDS ARROW \x81\xA8 |0 # RIGHTWARDS ARROW @@ -310,25 +333,34 @@ \x81\xB8 |0 # ELEMENT OF \x81\xB9 |0 # CONTAINS AS MEMBER \x87\x94 |0 # N-ARY SUMMATION - \x87\x95 |0 # SQUARE ROOT + \x81\xE3 |0 # SQUARE ROOT + \x87\x95 |3 # SQUARE ROOT \x81\xE5 |0 # PROPORTIONAL TO \x81\x87 |0 # INFINITY \x87\x98 |0 # RIGHT ANGLE - \x87\x97 |0 # ANGLE + \x81\xDA |0 # ANGLE + \x87\x97 |3 # ANGLE \x81\x61 |0 # PARALLEL TO \x81\xC8 |0 # LOGICAL AND \x81\xC9 |0 # LOGICAL OR - \x87\x9B |0 # INTERSECTION - \x87\x9C |0 # UNION - \x87\x92 |0 # INTEGRAL + \x81\xBF |0 # INTERSECTION + \x87\x9B |3 # INTERSECTION + \x81\xBE |0 # UNION + \x87\x9C |3 # UNION + \x81\xE7 |0 # INTEGRAL + \x87\x92 |3 # INTEGRAL \x81\xE8 |0 # DOUBLE INTEGRAL \x87\x93 |0 # CONTOUR INTEGRAL \x81\x88 |0 # THEREFORE - \xFA\x5B |0 # BECAUSE + \x81\xE6 |0 # BECAUSE + \x87\x9A |3 # BECAUSE + \xFA\x5B |3 # BECAUSE \x81\xE4 |0 # REVERSED TILDE - \x87\x90 |0 # APPROXIMATELY EQUAL TO OR THE IMAGE OF + \x81\xE0 |0 # APPROXIMATELY EQUAL TO OR THE IMAGE OF + \x87\x90 |3 # APPROXIMATELY EQUAL TO OR THE IMAGE OF \x81\x82 |0 # NOT EQUAL TO - \x87\x91 |0 # IDENTICAL TO + \x81\xDF |0 # IDENTICAL TO + \x87\x91 |3 # IDENTICAL TO \x81\x85 |0 # LESS-THAN OVER EQUAL TO \x81\x86 |0 # GREATER-THAN OVER EQUAL TO \x81\xE1 |0 # MUCH LESS-THAN @@ -337,7 +369,8 @@ \x81\xBD |0 # SUPERSET OF \x81\xBA |0 # SUBSET OF OR EQUAL TO \x81\xBB |0 # SUPERSET OF OR EQUAL TO - \x87\x96 |0 # UP TACK + \x81\xDB |0 # UP TACK + \x87\x96 |3 # UP TACK \x87\x99 |0 # RIGHT TRIANGLE \x81\xDC |0 # ARC \x87\x40 |0 # CIRCLED DIGIT ONE @@ -608,10 +641,16 @@ \x83\x95 |0 # KATAKANA LETTER SMALL KA \x83\x96 |0 # KATAKANA LETTER SMALL KE \x81\x45 |0 # KATAKANA MIDDLE DOT + \x85\x40 |3 # KATAKANA MIDDLE DOT + \x86\x40 |3 # KATAKANA MIDDLE DOT + \xEB\x40 |3 # KATAKANA MIDDLE DOT + \xEC\x40 |3 # KATAKANA MIDDLE DOT + \xEF\x40 |3 # KATAKANA MIDDLE DOT \x81\x5B |0 # KATAKANA-HIRAGANA PROLONGED SOUND MARK \x81\x52 |0 # KATAKANA ITERATION MARK \x81\x53 |0 # KATAKANA VOICED ITERATION MARK - \xFA\x58 |0 # PARENTHESIZED IDEOGRAPH STOCK + \x87\x8A |0 # PARENTHESIZED IDEOGRAPH STOCK + \xFA\x58 |3 # PARENTHESIZED IDEOGRAPH STOCK \x87\x8B |0 # PARENTHESIZED IDEOGRAPH HAVE \x87\x8C |0 # PARENTHESIZED IDEOGRAPH REPRESENT \x87\x85 |0 # CIRCLED IDEOGRAPH HIGH @@ -669,6 +708,7 @@ \x97\xBC |0 # CJK UNIFIED IDEOGRAPH \x95\xC0 |0 # CJK UNIFIED IDEOGRAPH \xFA\x68 |0 # CJK UNIFIED IDEOGRAPH + \xED\x4C |3 # CJK UNIFIED IDEOGRAPH \x98\xA2 |0 # CJK UNIFIED IDEOGRAPH \x92\x86 |0 # CJK UNIFIED IDEOGRAPH \x98\xA3 |0 # CJK UNIFIED IDEOGRAPH @@ -754,6 +794,7 @@ \x98\xBE |0 # CJK UNIFIED IDEOGRAPH \x98\xC0 |0 # CJK UNIFIED IDEOGRAPH \xFA\x69 |0 # CJK UNIFIED IDEOGRAPH + \xED\x4D |3 # CJK UNIFIED IDEOGRAPH \x91\xE3 |0 # CJK UNIFIED IDEOGRAPH \x97\xDF |0 # CJK UNIFIED IDEOGRAPH \x88\xC8 |0 # CJK UNIFIED IDEOGRAPH @@ -765,9 +806,12 @@ \x98\xC1 |0 # CJK UNIFIED IDEOGRAPH \x94\x43 |0 # CJK UNIFIED IDEOGRAPH \xFA\x6A |0 # CJK UNIFIED IDEOGRAPH + \xED\x4E |3 # CJK UNIFIED IDEOGRAPH \xFA\x6B |0 # CJK UNIFIED IDEOGRAPH + \xED\x4F |3 # CJK UNIFIED IDEOGRAPH \x8A\xE9 |0 # CJK UNIFIED IDEOGRAPH \xFA\x6C |0 # CJK UNIFIED IDEOGRAPH + \xED\x50 |3 # CJK UNIFIED IDEOGRAPH \x98\xC2 |0 # CJK UNIFIED IDEOGRAPH \x88\xC9 |0 # CJK UNIFIED IDEOGRAPH \x8C\xDE |0 # CJK UNIFIED IDEOGRAPH @@ -784,6 +828,7 @@ \x97\xE0 |0 # CJK UNIFIED IDEOGRAPH \x90\x4C |0 # CJK UNIFIED IDEOGRAPH \xFA\x6D |0 # CJK UNIFIED IDEOGRAPH + \xED\x51 |3 # CJK UNIFIED IDEOGRAPH \x8E\x66 |0 # CJK UNIFIED IDEOGRAPH \x8E\x97 |0 # CJK UNIFIED IDEOGRAPH \x89\xBE |0 # CJK UNIFIED IDEOGRAPH @@ -798,6 +843,7 @@ \x91\xCC |0 # CJK UNIFIED IDEOGRAPH \x89\xBD |0 # CJK UNIFIED IDEOGRAPH \xFA\x6E |0 # CJK UNIFIED IDEOGRAPH + \xED\x52 |3 # CJK UNIFIED IDEOGRAPH \x98\xC7 |0 # CJK UNIFIED IDEOGRAPH \x97\x5D |0 # CJK UNIFIED IDEOGRAPH \x98\xC3 |0 # CJK UNIFIED IDEOGRAPH @@ -818,15 +864,19 @@ \x98\xD2 |0 # CJK UNIFIED IDEOGRAPH \x98\xCA |0 # CJK UNIFIED IDEOGRAPH \xFA\x70 |0 # CJK UNIFIED IDEOGRAPH + \xED\x54 |3 # CJK UNIFIED IDEOGRAPH \x97\xE1 |0 # CJK UNIFIED IDEOGRAPH \x8E\x98 |0 # CJK UNIFIED IDEOGRAPH \x98\xCB |0 # CJK UNIFIED IDEOGRAPH \x98\xD0 |0 # CJK UNIFIED IDEOGRAPH \xFA\x6F |0 # CJK UNIFIED IDEOGRAPH + \xED\x53 |3 # CJK UNIFIED IDEOGRAPH \xFA\x72 |0 # CJK UNIFIED IDEOGRAPH + \xED\x56 |3 # CJK UNIFIED IDEOGRAPH \x98\xD3 |0 # CJK UNIFIED IDEOGRAPH \x98\xCC |0 # CJK UNIFIED IDEOGRAPH \xFA\x71 |0 # CJK UNIFIED IDEOGRAPH + \xED\x55 |3 # CJK UNIFIED IDEOGRAPH \x8B\x9F |0 # CJK UNIFIED IDEOGRAPH \x88\xCB |0 # CJK UNIFIED IDEOGRAPH \x8B\xA0 |0 # CJK UNIFIED IDEOGRAPH @@ -842,8 +892,10 @@ \x91\xA3 |0 # CJK UNIFIED IDEOGRAPH \x89\xE2 |0 # CJK UNIFIED IDEOGRAPH \xFA\x61 |0 # CJK UNIFIED IDEOGRAPH + \xED\x45 |3 # CJK UNIFIED IDEOGRAPH \x8F\x72 |0 # CJK UNIFIED IDEOGRAPH \xFA\x73 |0 # CJK UNIFIED IDEOGRAPH + \xED\x57 |3 # CJK UNIFIED IDEOGRAPH \x98\xD7 |0 # CJK UNIFIED IDEOGRAPH \x98\xDC |0 # CJK UNIFIED IDEOGRAPH \x98\xDA |0 # CJK UNIFIED IDEOGRAPH @@ -867,6 +919,7 @@ \x89\xB4 |0 # CJK UNIFIED IDEOGRAPH \x98\xEA |0 # CJK UNIFIED IDEOGRAPH \xFA\x76 |0 # CJK UNIFIED IDEOGRAPH + \xED\x5A |3 # CJK UNIFIED IDEOGRAPH \x98\xE4 |0 # CJK UNIFIED IDEOGRAPH \x98\xED |0 # CJK UNIFIED IDEOGRAPH \x91\x71 |0 # CJK UNIFIED IDEOGRAPH @@ -880,9 +933,11 @@ \x8C\xF3 |0 # CJK UNIFIED IDEOGRAPH \x98\xDF |0 # CJK UNIFIED IDEOGRAPH \xFA\x77 |0 # CJK UNIFIED IDEOGRAPH + \xED\x5B |3 # CJK UNIFIED IDEOGRAPH \x8E\xD8 |0 # CJK UNIFIED IDEOGRAPH \x98\xE7 |0 # CJK UNIFIED IDEOGRAPH \xFA\x75 |0 # CJK UNIFIED IDEOGRAPH + \xED\x59 |3 # CJK UNIFIED IDEOGRAPH \x95\xED |0 # CJK UNIFIED IDEOGRAPH \x92\x6C |0 # CJK UNIFIED IDEOGRAPH \x98\xE3 |0 # CJK UNIFIED IDEOGRAPH @@ -896,9 +951,12 @@ \x8B\xE4 |0 # CJK UNIFIED IDEOGRAPH \x8C\x90 |0 # CJK UNIFIED IDEOGRAPH \xFA\x74 |0 # CJK UNIFIED IDEOGRAPH + \xED\x58 |3 # CJK UNIFIED IDEOGRAPH \xFA\x7A |0 # CJK UNIFIED IDEOGRAPH + \xED\x5E |3 # CJK UNIFIED IDEOGRAPH \x98\xEE |0 # CJK UNIFIED IDEOGRAPH \xFA\x78 |0 # CJK UNIFIED IDEOGRAPH + \xED\x5C |3 # CJK UNIFIED IDEOGRAPH \x98\xEF |0 # CJK UNIFIED IDEOGRAPH \x98\xF3 |0 # CJK UNIFIED IDEOGRAPH \x88\xCC |0 # CJK UNIFIED IDEOGRAPH @@ -911,6 +969,7 @@ \x8C\x92 |0 # CJK UNIFIED IDEOGRAPH \x98\xF6 |0 # CJK UNIFIED IDEOGRAPH \xFA\x79 |0 # CJK UNIFIED IDEOGRAPH + \xED\x5D |3 # CJK UNIFIED IDEOGRAPH \x8E\xC3 |0 # CJK UNIFIED IDEOGRAPH \x91\xA4 |0 # CJK UNIFIED IDEOGRAPH \x92\xE3 |0 # CJK UNIFIED IDEOGRAPH @@ -922,6 +981,7 @@ \x96\x54 |0 # CJK UNIFIED IDEOGRAPH \x8C\x86 |0 # CJK UNIFIED IDEOGRAPH \xFA\x7B |0 # CJK UNIFIED IDEOGRAPH + \xED\x5F |3 # CJK UNIFIED IDEOGRAPH \x8E\x50 |0 # CJK UNIFIED IDEOGRAPH \x94\xF5 |0 # CJK UNIFIED IDEOGRAPH \x98\xF9 |0 # CJK UNIFIED IDEOGRAPH @@ -943,6 +1003,7 @@ \x96\x6C |0 # CJK UNIFIED IDEOGRAPH \x99\x44 |0 # CJK UNIFIED IDEOGRAPH \xFA\x7D |0 # CJK UNIFIED IDEOGRAPH + \xED\x61 |3 # CJK UNIFIED IDEOGRAPH \x97\xBB |0 # CJK UNIFIED IDEOGRAPH \x99\x45 |0 # CJK UNIFIED IDEOGRAPH \x99\x48 |0 # CJK UNIFIED IDEOGRAPH @@ -951,6 +1012,7 @@ \x99\x47 |0 # CJK UNIFIED IDEOGRAPH \x99\x49 |0 # CJK UNIFIED IDEOGRAPH \xFA\x7C |0 # CJK UNIFIED IDEOGRAPH + \xED\x60 |3 # CJK UNIFIED IDEOGRAPH \x99\x4B |0 # CJK UNIFIED IDEOGRAPH \x99\x4A |0 # CJK UNIFIED IDEOGRAPH \x95\xC6 |0 # CJK UNIFIED IDEOGRAPH @@ -984,6 +1046,7 @@ \x90\xE6 |0 # CJK UNIFIED IDEOGRAPH \x8C\xF5 |0 # CJK UNIFIED IDEOGRAPH \xFA\x7E |0 # CJK UNIFIED IDEOGRAPH + \xED\x62 |3 # CJK UNIFIED IDEOGRAPH \x8D\x8E |0 # CJK UNIFIED IDEOGRAPH \x99\x5B |0 # CJK UNIFIED IDEOGRAPH \x96\xC6 |0 # CJK UNIFIED IDEOGRAPH @@ -995,6 +1058,7 @@ \x8A\x95 |0 # CJK UNIFIED IDEOGRAPH \x99\x5D |0 # CJK UNIFIED IDEOGRAPH \xFA\x80 |0 # CJK UNIFIED IDEOGRAPH + \xED\x63 |3 # CJK UNIFIED IDEOGRAPH \x93\xFC |0 # CJK UNIFIED IDEOGRAPH \x91\x53 |0 # CJK UNIFIED IDEOGRAPH \x99\x5F |0 # CJK UNIFIED IDEOGRAPH @@ -1027,6 +1091,7 @@ \x8F\xE7 |0 # CJK UNIFIED IDEOGRAPH \x8E\xCA |0 # CJK UNIFIED IDEOGRAPH \xFA\x81 |0 # CJK UNIFIED IDEOGRAPH + \xED\x64 |3 # CJK UNIFIED IDEOGRAPH \x8A\xA5 |0 # CJK UNIFIED IDEOGRAPH \x99\x6E |0 # CJK UNIFIED IDEOGRAPH \x99\x6C |0 # CJK UNIFIED IDEOGRAPH @@ -1047,6 +1112,7 @@ \x97\xE2 |0 # CJK UNIFIED IDEOGRAPH \x99\x77 |0 # CJK UNIFIED IDEOGRAPH \xFA\x82 |0 # CJK UNIFIED IDEOGRAPH + \xED\x65 |3 # CJK UNIFIED IDEOGRAPH \x90\xA6 |0 # CJK UNIFIED IDEOGRAPH \x99\x78 |0 # CJK UNIFIED IDEOGRAPH \x8F\x79 |0 # CJK UNIFIED IDEOGRAPH @@ -1065,6 +1131,7 @@ \x99\x7D |0 # CJK UNIFIED IDEOGRAPH \x93\xE2 |0 # CJK UNIFIED IDEOGRAPH \xFA\x83 |0 # CJK UNIFIED IDEOGRAPH + \xED\x66 |3 # CJK UNIFIED IDEOGRAPH \x99\x7E |0 # CJK UNIFIED IDEOGRAPH \x99\x80 |0 # CJK UNIFIED IDEOGRAPH \x8A\x4D |0 # CJK UNIFIED IDEOGRAPH @@ -1087,6 +1154,7 @@ \x8C\x59 |0 # CJK UNIFIED IDEOGRAPH \x99\x85 |0 # CJK UNIFIED IDEOGRAPH \xFA\x84 |0 # CJK UNIFIED IDEOGRAPH + \xED\x67 |3 # CJK UNIFIED IDEOGRAPH \x97\xF1 |0 # CJK UNIFIED IDEOGRAPH \x8F\x89 |0 # CJK UNIFIED IDEOGRAPH \x94\xBB |0 # CJK UNIFIED IDEOGRAPH @@ -1139,16 +1207,19 @@ \x99\x99 |0 # CJK UNIFIED IDEOGRAPH \x97\xCD |0 # CJK UNIFIED IDEOGRAPH \xFA\x85 |0 # CJK UNIFIED IDEOGRAPH + \xED\x68 |3 # CJK UNIFIED IDEOGRAPH \x8C\xF7 |0 # CJK UNIFIED IDEOGRAPH \x89\xC1 |0 # CJK UNIFIED IDEOGRAPH \x97\xF2 |0 # CJK UNIFIED IDEOGRAPH \xFA\x86 |0 # CJK UNIFIED IDEOGRAPH + \xED\x69 |3 # CJK UNIFIED IDEOGRAPH \x8F\x95 |0 # CJK UNIFIED IDEOGRAPH \x93\x77 |0 # CJK UNIFIED IDEOGRAPH \x8D\x85 |0 # CJK UNIFIED IDEOGRAPH \x99\xA0 |0 # CJK UNIFIED IDEOGRAPH \x99\xA1 |0 # CJK UNIFIED IDEOGRAPH \xFB\x77 |0 # CJK UNIFIED IDEOGRAPH + \xEE\x5B |3 # CJK UNIFIED IDEOGRAPH \x97\xE3 |0 # CJK UNIFIED IDEOGRAPH \x98\x4A |0 # CJK UNIFIED IDEOGRAPH \x99\xA3 |0 # CJK UNIFIED IDEOGRAPH @@ -1156,6 +1227,7 @@ \x99\xA2 |0 # CJK UNIFIED IDEOGRAPH \x8A\x4E |0 # CJK UNIFIED IDEOGRAPH \xFA\x87 |0 # CJK UNIFIED IDEOGRAPH + \xED\x6A |3 # CJK UNIFIED IDEOGRAPH \x99\xA4 |0 # CJK UNIFIED IDEOGRAPH \x96\x75 |0 # CJK UNIFIED IDEOGRAPH \x92\xBA |0 # CJK UNIFIED IDEOGRAPH @@ -1168,6 +1240,7 @@ \x8A\xA8 |0 # CJK UNIFIED IDEOGRAPH \x96\xB1 |0 # CJK UNIFIED IDEOGRAPH \xFA\x88 |0 # CJK UNIFIED IDEOGRAPH + \xED\x6B |3 # CJK UNIFIED IDEOGRAPH \x8F\x9F |0 # CJK UNIFIED IDEOGRAPH \x99\xA7 |0 # CJK UNIFIED IDEOGRAPH \x95\xE5 |0 # CJK UNIFIED IDEOGRAPH @@ -1186,11 +1259,13 @@ \x8C\xF9 |0 # CJK UNIFIED IDEOGRAPH \x96\xDC |0 # CJK UNIFIED IDEOGRAPH \xFA\x89 |0 # CJK UNIFIED IDEOGRAPH + \xED\x6C |3 # CJK UNIFIED IDEOGRAPH \x96\xE6 |0 # CJK UNIFIED IDEOGRAPH \x93\xF5 |0 # CJK UNIFIED IDEOGRAPH \x95\xEF |0 # CJK UNIFIED IDEOGRAPH \x99\xB0 |0 # CJK UNIFIED IDEOGRAPH \xFA\x8A |0 # CJK UNIFIED IDEOGRAPH + \xED\x6D |3 # CJK UNIFIED IDEOGRAPH \x99\xB1 |0 # CJK UNIFIED IDEOGRAPH \x99\xB3 |0 # CJK UNIFIED IDEOGRAPH \x99\xB5 |0 # CJK UNIFIED IDEOGRAPH @@ -1205,6 +1280,7 @@ \x8B\xA7 |0 # CJK UNIFIED IDEOGRAPH \x99\xB8 |0 # CJK UNIFIED IDEOGRAPH \xFA\x8B |0 # CJK UNIFIED IDEOGRAPH + \xED\x6E |3 # CJK UNIFIED IDEOGRAPH \x94\xD9 |0 # CJK UNIFIED IDEOGRAPH \x99\xB9 |0 # CJK UNIFIED IDEOGRAPH \x99\xBA |0 # CJK UNIFIED IDEOGRAPH @@ -1241,6 +1317,7 @@ \x88\xF3 |0 # CJK UNIFIED IDEOGRAPH \x8A\xEB |0 # CJK UNIFIED IDEOGRAPH \xFA\x8C |0 # CJK UNIFIED IDEOGRAPH + \xED\x6F |3 # CJK UNIFIED IDEOGRAPH \x91\xA6 |0 # CJK UNIFIED IDEOGRAPH \x8B\x70 |0 # CJK UNIFIED IDEOGRAPH \x97\x91 |0 # CJK UNIFIED IDEOGRAPH @@ -1251,6 +1328,7 @@ \x99\xCA |0 # CJK UNIFIED IDEOGRAPH \x96\xEF |0 # CJK UNIFIED IDEOGRAPH \xFA\x8D |0 # CJK UNIFIED IDEOGRAPH + \xED\x70 |3 # CJK UNIFIED IDEOGRAPH \x99\xCB |0 # CJK UNIFIED IDEOGRAPH \x97\xD0 |0 # CJK UNIFIED IDEOGRAPH \x8C\xFA |0 # CJK UNIFIED IDEOGRAPH @@ -1264,6 +1342,7 @@ \x99\xCF |0 # CJK UNIFIED IDEOGRAPH \x99\xD0 |0 # CJK UNIFIED IDEOGRAPH \xFA\x8E |0 # CJK UNIFIED IDEOGRAPH + \xED\x71 |3 # CJK UNIFIED IDEOGRAPH \x8C\xB5 |0 # CJK UNIFIED IDEOGRAPH \x99\xD1 |0 # CJK UNIFIED IDEOGRAPH \x8B\x8E |0 # CJK UNIFIED IDEOGRAPH @@ -1282,6 +1361,7 @@ \x8F\x96 |0 # CJK UNIFIED IDEOGRAPH \x94\xBE |0 # CJK UNIFIED IDEOGRAPH \xFA\x8F |0 # CJK UNIFIED IDEOGRAPH + \xED\x72 |3 # CJK UNIFIED IDEOGRAPH \x99\xD5 |0 # CJK UNIFIED IDEOGRAPH \x89\x62 |0 # CJK UNIFIED IDEOGRAPH \x91\x70 |0 # CJK UNIFIED IDEOGRAPH @@ -1358,6 +1438,7 @@ \x99\xF2 |0 # CJK UNIFIED IDEOGRAPH \x99\xF4 |0 # CJK UNIFIED IDEOGRAPH \xFA\x92 |0 # CJK UNIFIED IDEOGRAPH + \xED\x75 |3 # CJK UNIFIED IDEOGRAPH \x8D\xEE |0 # CJK UNIFIED IDEOGRAPH \x98\x61 |0 # CJK UNIFIED IDEOGRAPH \x99\xE9 |0 # CJK UNIFIED IDEOGRAPH @@ -1365,11 +1446,13 @@ \x99\xF3 |0 # CJK UNIFIED IDEOGRAPH \x99\xEE |0 # CJK UNIFIED IDEOGRAPH \xFA\x91 |0 # CJK UNIFIED IDEOGRAPH + \xED\x74 |3 # CJK UNIFIED IDEOGRAPH \x99\xF6 |0 # CJK UNIFIED IDEOGRAPH \x9A\x42 |0 # CJK UNIFIED IDEOGRAPH \x99\xF8 |0 # CJK UNIFIED IDEOGRAPH \x99\xFC |0 # CJK UNIFIED IDEOGRAPH \xFA\x93 |0 # CJK UNIFIED IDEOGRAPH + \xED\x76 |3 # CJK UNIFIED IDEOGRAPH \x9A\x40 |0 # CJK UNIFIED IDEOGRAPH \x99\xF9 |0 # CJK UNIFIED IDEOGRAPH \x9A\x5D |0 # CJK UNIFIED IDEOGRAPH @@ -1399,6 +1482,7 @@ \x9A\x4D |0 # CJK UNIFIED IDEOGRAPH \x9A\x4A |0 # CJK UNIFIED IDEOGRAPH \xFA\x94 |0 # CJK UNIFIED IDEOGRAPH + \xED\x77 |3 # CJK UNIFIED IDEOGRAPH \x89\x53 |0 # CJK UNIFIED IDEOGRAPH \x8D\xB4 |0 # CJK UNIFIED IDEOGRAPH \x90\x4F |0 # CJK UNIFIED IDEOGRAPH @@ -1432,6 +1516,7 @@ \x9A\x66 |0 # CJK UNIFIED IDEOGRAPH \x91\x50 |0 # CJK UNIFIED IDEOGRAPH \xFA\x95 |0 # CJK UNIFIED IDEOGRAPH + \xED\x78 |3 # CJK UNIFIED IDEOGRAPH \x9A\x68 |0 # CJK UNIFIED IDEOGRAPH \x8D\x41 |0 # CJK UNIFIED IDEOGRAPH \x9A\x5E |0 # CJK UNIFIED IDEOGRAPH @@ -1559,9 +1644,11 @@ \x8D\xBF |0 # CJK UNIFIED IDEOGRAPH \x8D\x42 |0 # CJK UNIFIED IDEOGRAPH \xFA\x96 |0 # CJK UNIFIED IDEOGRAPH + \xED\x79 |3 # CJK UNIFIED IDEOGRAPH \x9A\xB1 |0 # CJK UNIFIED IDEOGRAPH \x8D\xA3 |0 # CJK UNIFIED IDEOGRAPH \xFA\x97 |0 # CJK UNIFIED IDEOGRAPH + \xED\x7A |3 # CJK UNIFIED IDEOGRAPH \x92\x52 |0 # CJK UNIFIED IDEOGRAPH \x9A\xAE |0 # CJK UNIFIED IDEOGRAPH \x92\xD8 |0 # CJK UNIFIED IDEOGRAPH @@ -1577,13 +1664,16 @@ \x9A\xB7 |0 # CJK UNIFIED IDEOGRAPH \x9A\xB8 |0 # CJK UNIFIED IDEOGRAPH \xFA\x98 |0 # CJK UNIFIED IDEOGRAPH + \xED\x7B |3 # CJK UNIFIED IDEOGRAPH \x9A\xB9 |0 # CJK UNIFIED IDEOGRAPH \x9A\xB6 |0 # CJK UNIFIED IDEOGRAPH \x9A\xAF |0 # CJK UNIFIED IDEOGRAPH \x9A\xBA |0 # CJK UNIFIED IDEOGRAPH \x9A\xBB |0 # CJK UNIFIED IDEOGRAPH \xFA\x9A |0 # CJK UNIFIED IDEOGRAPH + \xED\x7D |3 # CJK UNIFIED IDEOGRAPH \xFA\x99 |0 # CJK UNIFIED IDEOGRAPH + \xED\x7C |3 # CJK UNIFIED IDEOGRAPH \x96\x84 |0 # CJK UNIFIED IDEOGRAPH \x8F\xE9 |0 # CJK UNIFIED IDEOGRAPH \x9A\xBD |0 # CJK UNIFIED IDEOGRAPH @@ -1644,11 +1734,13 @@ \x91\x9D |0 # CJK UNIFIED IDEOGRAPH \x92\xC4 |0 # CJK UNIFIED IDEOGRAPH \xFA\x9D |0 # CJK UNIFIED IDEOGRAPH + \xED\x81 |3 # CJK UNIFIED IDEOGRAPH \x9A\xD0 |0 # CJK UNIFIED IDEOGRAPH \x96\x6E |0 # CJK UNIFIED IDEOGRAPH \x9A\xD1 |0 # CJK UNIFIED IDEOGRAPH \x9A\xD6 |0 # CJK UNIFIED IDEOGRAPH \xFA\x9E |0 # CJK UNIFIED IDEOGRAPH + \xED\x82 |3 # CJK UNIFIED IDEOGRAPH \x95\xAD |0 # CJK UNIFIED IDEOGRAPH \x9A\xD5 |0 # CJK UNIFIED IDEOGRAPH \x9A\xCF |0 # CJK UNIFIED IDEOGRAPH @@ -1688,6 +1780,7 @@ \x95\xCF |0 # CJK UNIFIED IDEOGRAPH \x9A\xE8 |0 # CJK UNIFIED IDEOGRAPH \xFA\x9F |0 # CJK UNIFIED IDEOGRAPH + \xED\x83 |3 # CJK UNIFIED IDEOGRAPH \x89\xC4 |0 # CJK UNIFIED IDEOGRAPH \x9A\xE9 |0 # CJK UNIFIED IDEOGRAPH \x97\x5B |0 # CJK UNIFIED IDEOGRAPH @@ -1720,16 +1813,20 @@ \x9A\xF4 |0 # CJK UNIFIED IDEOGRAPH \x8C\x5F |0 # CJK UNIFIED IDEOGRAPH \xFA\xA0 |0 # CJK UNIFIED IDEOGRAPH + \xED\x84 |3 # CJK UNIFIED IDEOGRAPH \x96\x7A |0 # CJK UNIFIED IDEOGRAPH \x9A\xF3 |0 # CJK UNIFIED IDEOGRAPH \x93\x85 |0 # CJK UNIFIED IDEOGRAPH \x9A\xF7 |0 # CJK UNIFIED IDEOGRAPH \x9A\xF6 |0 # CJK UNIFIED IDEOGRAPH \xFA\xA1 |0 # CJK UNIFIED IDEOGRAPH + \xED\x85 |3 # CJK UNIFIED IDEOGRAPH \xFA\xA2 |0 # CJK UNIFIED IDEOGRAPH + \xED\x86 |3 # CJK UNIFIED IDEOGRAPH \x9A\xF9 |0 # CJK UNIFIED IDEOGRAPH \x9A\xF8 |0 # CJK UNIFIED IDEOGRAPH \xFA\xA3 |0 # CJK UNIFIED IDEOGRAPH + \xED\x87 |3 # CJK UNIFIED IDEOGRAPH \x89\x9C |0 # CJK UNIFIED IDEOGRAPH \x9A\xFA |0 # CJK UNIFIED IDEOGRAPH \x8F\xA7 |0 # CJK UNIFIED IDEOGRAPH @@ -1754,12 +1851,14 @@ \x9B\x42 |0 # CJK UNIFIED IDEOGRAPH \x9B\x45 |0 # CJK UNIFIED IDEOGRAPH \xFA\xA4 |0 # CJK UNIFIED IDEOGRAPH + \xED\x88 |3 # CJK UNIFIED IDEOGRAPH \x91\xC3 |0 # CJK UNIFIED IDEOGRAPH \x96\x57 |0 # CJK UNIFIED IDEOGRAPH \x93\x69 |0 # CJK UNIFIED IDEOGRAPH \x9B\x46 |0 # CJK UNIFIED IDEOGRAPH \x96\x85 |0 # CJK UNIFIED IDEOGRAPH \xFA\xA5 |0 # CJK UNIFIED IDEOGRAPH + \xED\x89 |3 # CJK UNIFIED IDEOGRAPH \x8D\xC8 |0 # CJK UNIFIED IDEOGRAPH \x8F\xA8 |0 # CJK UNIFIED IDEOGRAPH \x9B\x47 |0 # CJK UNIFIED IDEOGRAPH @@ -1844,6 +1943,7 @@ \x8D\x45 |0 # CJK UNIFIED IDEOGRAPH \x9B\x73 |0 # CJK UNIFIED IDEOGRAPH \xFA\xA6 |0 # CJK UNIFIED IDEOGRAPH + \xED\x8A |3 # CJK UNIFIED IDEOGRAPH \x8E\x9A |0 # CJK UNIFIED IDEOGRAPH \x91\xB6 |0 # CJK UNIFIED IDEOGRAPH \x9B\x74 |0 # CJK UNIFIED IDEOGRAPH @@ -1897,6 +1997,7 @@ \x97\x65 |0 # CJK UNIFIED IDEOGRAPH \x8F\x68 |0 # CJK UNIFIED IDEOGRAPH \xFA\xA7 |0 # CJK UNIFIED IDEOGRAPH + \xED\x8B |3 # CJK UNIFIED IDEOGRAPH \x8E\xE2 |0 # CJK UNIFIED IDEOGRAPH \x9B\x83 |0 # CJK UNIFIED IDEOGRAPH \x8A\xF1 |0 # CJK UNIFIED IDEOGRAPH @@ -1910,6 +2011,7 @@ \x8B\xF5 |0 # CJK UNIFIED IDEOGRAPH \x9B\x86 |0 # CJK UNIFIED IDEOGRAPH \xFA\xA9 |0 # CJK UNIFIED IDEOGRAPH + \xED\x8D |3 # CJK UNIFIED IDEOGRAPH \x8A\xB0 |0 # CJK UNIFIED IDEOGRAPH \x90\x51 |0 # CJK UNIFIED IDEOGRAPH \x9B\x8B |0 # CJK UNIFIED IDEOGRAPH @@ -1924,6 +2026,7 @@ \x90\x52 |0 # CJK UNIFIED IDEOGRAPH \x9B\x8D |0 # CJK UNIFIED IDEOGRAPH \xFA\xAA |0 # CJK UNIFIED IDEOGRAPH + \xED\x8E |3 # CJK UNIFIED IDEOGRAPH \x97\xBE |0 # CJK UNIFIED IDEOGRAPH \x9B\x8E |0 # CJK UNIFIED IDEOGRAPH \x9B\x90 |0 # CJK UNIFIED IDEOGRAPH @@ -1951,6 +2054,7 @@ \x90\xEB |0 # CJK UNIFIED IDEOGRAPH \x8F\xAE |0 # CJK UNIFIED IDEOGRAPH \xFA\xAB |0 # CJK UNIFIED IDEOGRAPH + \xED\x8F |3 # CJK UNIFIED IDEOGRAPH \x9B\x96 |0 # CJK UNIFIED IDEOGRAPH \x9B\x97 |0 # CJK UNIFIED IDEOGRAPH \x96\xDE |0 # CJK UNIFIED IDEOGRAPH @@ -1996,6 +2100,7 @@ \x9B\xA9 |0 # CJK UNIFIED IDEOGRAPH \x89\xAA |0 # CJK UNIFIED IDEOGRAPH \xFA\xAC |0 # CJK UNIFIED IDEOGRAPH + \xED\x90 |3 # CJK UNIFIED IDEOGRAPH \x91\x5A |0 # CJK UNIFIED IDEOGRAPH \x8A\xE2 |0 # CJK UNIFIED IDEOGRAPH \x9B\xAB |0 # CJK UNIFIED IDEOGRAPH @@ -2006,6 +2111,7 @@ \x9B\xAF |0 # CJK UNIFIED IDEOGRAPH \x8A\xDD |0 # CJK UNIFIED IDEOGRAPH \xFA\xAD |0 # CJK UNIFIED IDEOGRAPH + \xED\x91 |3 # CJK UNIFIED IDEOGRAPH \x9B\xAC |0 # CJK UNIFIED IDEOGRAPH \x9B\xAE |0 # CJK UNIFIED IDEOGRAPH \x9B\xB1 |0 # CJK UNIFIED IDEOGRAPH @@ -2021,6 +2127,7 @@ \x95\xF5 |0 # CJK UNIFIED IDEOGRAPH \x95\xF4 |0 # CJK UNIFIED IDEOGRAPH \xFA\xAE |0 # CJK UNIFIED IDEOGRAPH + \xED\x92 |3 # CJK UNIFIED IDEOGRAPH \x93\x87 |0 # CJK UNIFIED IDEOGRAPH \x9B\xB6 |0 # CJK UNIFIED IDEOGRAPH \x8F\x73 |0 # CJK UNIFIED IDEOGRAPH @@ -2040,18 +2147,22 @@ \x9B\xBE |0 # CJK UNIFIED IDEOGRAPH \x9B\xC2 |0 # CJK UNIFIED IDEOGRAPH \xFA\xAF |0 # CJK UNIFIED IDEOGRAPH + \xED\x93 |3 # CJK UNIFIED IDEOGRAPH \x95\xF6 |0 # CJK UNIFIED IDEOGRAPH \xFA\xB2 |0 # CJK UNIFIED IDEOGRAPH + \xED\x96 |3 # CJK UNIFIED IDEOGRAPH \x9B\xC9 |0 # CJK UNIFIED IDEOGRAPH \x9B\xC6 |0 # CJK UNIFIED IDEOGRAPH \x9B\xC8 |0 # CJK UNIFIED IDEOGRAPH \x97\x92 |0 # CJK UNIFIED IDEOGRAPH \x9B\xC7 |0 # CJK UNIFIED IDEOGRAPH \xFA\xB0 |0 # CJK UNIFIED IDEOGRAPH + \xED\x94 |3 # CJK UNIFIED IDEOGRAPH \x9B\xBD |0 # CJK UNIFIED IDEOGRAPH \x90\x93 |0 # CJK UNIFIED IDEOGRAPH \x9B\xCA |0 # CJK UNIFIED IDEOGRAPH \xFA\xB3 |0 # CJK UNIFIED IDEOGRAPH + \xED\x97 |3 # CJK UNIFIED IDEOGRAPH \x8D\xB5 |0 # CJK UNIFIED IDEOGRAPH \x9B\xCB |0 # CJK UNIFIED IDEOGRAPH \x9B\xCC |0 # CJK UNIFIED IDEOGRAPH @@ -2067,7 +2178,9 @@ \x9B\xD3 |0 # CJK UNIFIED IDEOGRAPH \x9B\xD6 |0 # CJK UNIFIED IDEOGRAPH \xFA\xB4 |0 # CJK UNIFIED IDEOGRAPH + \xED\x98 |3 # CJK UNIFIED IDEOGRAPH \xFA\xB5 |0 # CJK UNIFIED IDEOGRAPH + \xED\x99 |3 # CJK UNIFIED IDEOGRAPH \x97\xE4 |0 # CJK UNIFIED IDEOGRAPH \x9B\xD7 |0 # CJK UNIFIED IDEOGRAPH \x9B\xD4 |0 # CJK UNIFIED IDEOGRAPH @@ -2075,6 +2188,7 @@ \x8A\xDE |0 # CJK UNIFIED IDEOGRAPH \x9B\xD9 |0 # CJK UNIFIED IDEOGRAPH \xFA\xB6 |0 # CJK UNIFIED IDEOGRAPH + \xED\x9A |3 # CJK UNIFIED IDEOGRAPH \x9B\xDB |0 # CJK UNIFIED IDEOGRAPH \x9B\xDA |0 # CJK UNIFIED IDEOGRAPH \x9B\xDC |0 # CJK UNIFIED IDEOGRAPH @@ -2220,6 +2334,7 @@ \x92\x6F |0 # CJK UNIFIED IDEOGRAPH \x92\xED |0 # CJK UNIFIED IDEOGRAPH \xFA\xB7 |0 # CJK UNIFIED IDEOGRAPH + \xED\x9B |3 # CJK UNIFIED IDEOGRAPH \x96\xED |0 # CJK UNIFIED IDEOGRAPH \x8C\xB7 |0 # CJK UNIFIED IDEOGRAPH \x8C\xCA |0 # CJK UNIFIED IDEOGRAPH @@ -2228,6 +2343,7 @@ \x9C\x5E |0 # CJK UNIFIED IDEOGRAPH \x8E\xE3 |0 # CJK UNIFIED IDEOGRAPH \xFA\xB8 |0 # CJK UNIFIED IDEOGRAPH + \xED\x9C |3 # CJK UNIFIED IDEOGRAPH \x92\xA3 |0 # CJK UNIFIED IDEOGRAPH \x8B\xAD |0 # CJK UNIFIED IDEOGRAPH \x9C\x59 |0 # CJK UNIFIED IDEOGRAPH @@ -2235,6 +2351,7 @@ \x92\x65 |0 # CJK UNIFIED IDEOGRAPH \x9C\x5A |0 # CJK UNIFIED IDEOGRAPH \xFA\x67 |0 # CJK UNIFIED IDEOGRAPH + \xED\x4B |3 # CJK UNIFIED IDEOGRAPH \x9C\x5B |0 # CJK UNIFIED IDEOGRAPH \x8B\xAE |0 # CJK UNIFIED IDEOGRAPH \x9C\x5C |0 # CJK UNIFIED IDEOGRAPH @@ -2250,6 +2367,7 @@ \x8C\x60 |0 # CJK UNIFIED IDEOGRAPH \x95\x46 |0 # CJK UNIFIED IDEOGRAPH \xFA\xB9 |0 # CJK UNIFIED IDEOGRAPH + \xED\x9D |3 # CJK UNIFIED IDEOGRAPH \x8D\xCA |0 # CJK UNIFIED IDEOGRAPH \x95\x56 |0 # CJK UNIFIED IDEOGRAPH \x92\xA4 |0 # CJK UNIFIED IDEOGRAPH @@ -2291,6 +2409,7 @@ \x93\xBF |0 # CJK UNIFIED IDEOGRAPH \x92\xA5 |0 # CJK UNIFIED IDEOGRAPH \xFA\xBA |0 # CJK UNIFIED IDEOGRAPH + \xED\x9E |3 # CJK UNIFIED IDEOGRAPH \x93\x4F |0 # CJK UNIFIED IDEOGRAPH \x9C\x74 |0 # CJK UNIFIED IDEOGRAPH \x8B\x4A |0 # CJK UNIFIED IDEOGRAPH @@ -2305,6 +2424,7 @@ \x89\x9E |0 # CJK UNIFIED IDEOGRAPH \x9C\x7A |0 # CJK UNIFIED IDEOGRAPH \xFA\xBB |0 # CJK UNIFIED IDEOGRAPH + \xED\x9F |3 # CJK UNIFIED IDEOGRAPH \x92\x89 |0 # CJK UNIFIED IDEOGRAPH \x9C\x77 |0 # CJK UNIFIED IDEOGRAPH \x89\xF5 |0 # CJK UNIFIED IDEOGRAPH @@ -2350,6 +2470,7 @@ \x9C\x99 |0 # CJK UNIFIED IDEOGRAPH \x9C\x8B |0 # CJK UNIFIED IDEOGRAPH \xFA\xBC |0 # CJK UNIFIED IDEOGRAPH + \xED\xA0 |3 # CJK UNIFIED IDEOGRAPH \x9C\x8F |0 # CJK UNIFIED IDEOGRAPH \x9C\x7E |0 # CJK UNIFIED IDEOGRAPH \x89\xF8 |0 # CJK UNIFIED IDEOGRAPH @@ -2370,8 +2491,10 @@ \x9C\x9D |0 # CJK UNIFIED IDEOGRAPH \x9C\x9F |0 # CJK UNIFIED IDEOGRAPH \xFA\xBD |0 # CJK UNIFIED IDEOGRAPH + \xED\xA1 |3 # CJK UNIFIED IDEOGRAPH \x8E\xBB |0 # CJK UNIFIED IDEOGRAPH \xFA\xBE |0 # CJK UNIFIED IDEOGRAPH + \xED\xA2 |3 # CJK UNIFIED IDEOGRAPH \x9C\xA5 |0 # CJK UNIFIED IDEOGRAPH \x92\xEE |0 # CJK UNIFIED IDEOGRAPH \x9C\x9B |0 # CJK UNIFIED IDEOGRAPH @@ -2402,10 +2525,12 @@ \x98\x66 |0 # CJK UNIFIED IDEOGRAPH \x9C\xA9 |0 # CJK UNIFIED IDEOGRAPH \xFA\xC0 |0 # CJK UNIFIED IDEOGRAPH + \xED\xA4 |3 # CJK UNIFIED IDEOGRAPH \x9C\xAF |0 # CJK UNIFIED IDEOGRAPH \x8D\x9B |0 # CJK UNIFIED IDEOGRAPH \x90\xC9 |0 # CJK UNIFIED IDEOGRAPH \xFA\xBF |0 # CJK UNIFIED IDEOGRAPH + \xED\xA3 |3 # CJK UNIFIED IDEOGRAPH \x88\xD2 |0 # CJK UNIFIED IDEOGRAPH \x9C\xA8 |0 # CJK UNIFIED IDEOGRAPH \x9C\xA6 |0 # CJK UNIFIED IDEOGRAPH @@ -2415,6 +2540,7 @@ \x91\xC4 |0 # CJK UNIFIED IDEOGRAPH \x9C\xBB |0 # CJK UNIFIED IDEOGRAPH \xFA\xC2 |0 # CJK UNIFIED IDEOGRAPH + \xED\xA6 |3 # CJK UNIFIED IDEOGRAPH \x91\x7A |0 # CJK UNIFIED IDEOGRAPH \x9C\xB6 |0 # CJK UNIFIED IDEOGRAPH \x9C\xB3 |0 # CJK UNIFIED IDEOGRAPH @@ -2432,18 +2558,22 @@ \x9C\xBD |0 # CJK UNIFIED IDEOGRAPH \x88\xD3 |0 # CJK UNIFIED IDEOGRAPH \xFA\xC3 |0 # CJK UNIFIED IDEOGRAPH + \xED\xA7 |3 # CJK UNIFIED IDEOGRAPH \x9C\xB1 |0 # CJK UNIFIED IDEOGRAPH \x8B\xF0 |0 # CJK UNIFIED IDEOGRAPH \x88\xA4 |0 # CJK UNIFIED IDEOGRAPH \x8A\xB4 |0 # CJK UNIFIED IDEOGRAPH \xFA\xC1 |0 # CJK UNIFIED IDEOGRAPH + \xED\xA5 |3 # CJK UNIFIED IDEOGRAPH \x9C\xB9 |0 # CJK UNIFIED IDEOGRAPH \x9C\xC1 |0 # CJK UNIFIED IDEOGRAPH \x9C\xC0 |0 # CJK UNIFIED IDEOGRAPH \x9C\xC5 |0 # CJK UNIFIED IDEOGRAPH \xFA\xC5 |0 # CJK UNIFIED IDEOGRAPH + \xED\xA9 |3 # CJK UNIFIED IDEOGRAPH \x9C\xC6 |0 # CJK UNIFIED IDEOGRAPH \xFA\xC4 |0 # CJK UNIFIED IDEOGRAPH + \xED\xA8 |3 # CJK UNIFIED IDEOGRAPH \x9C\xC4 |0 # CJK UNIFIED IDEOGRAPH \x9C\xC7 |0 # CJK UNIFIED IDEOGRAPH \x9C\xBF |0 # CJK UNIFIED IDEOGRAPH @@ -2489,6 +2619,7 @@ \x9C\xDC |0 # CJK UNIFIED IDEOGRAPH \x9C\xD9 |0 # CJK UNIFIED IDEOGRAPH \xFA\xC6 |0 # CJK UNIFIED IDEOGRAPH + \xED\xAA |3 # CJK UNIFIED IDEOGRAPH \x9C\xD8 |0 # CJK UNIFIED IDEOGRAPH \x9C\xDD |0 # CJK UNIFIED IDEOGRAPH \x95\xAE |0 # CJK UNIFIED IDEOGRAPH @@ -2534,6 +2665,7 @@ \x89\xE4 |0 # CJK UNIFIED IDEOGRAPH \x89\xFA |0 # CJK UNIFIED IDEOGRAPH \xFA\xC7 |0 # CJK UNIFIED IDEOGRAPH + \xED\xAB |3 # CJK UNIFIED IDEOGRAPH \x9C\xFB |0 # CJK UNIFIED IDEOGRAPH \x88\xBD |0 # CJK UNIFIED IDEOGRAPH \x90\xCA |0 # CJK UNIFIED IDEOGRAPH @@ -2594,6 +2726,7 @@ \x94\xB2 |0 # CJK UNIFIED IDEOGRAPH \x91\xF0 |0 # CJK UNIFIED IDEOGRAPH \xFA\xC8 |0 # CJK UNIFIED IDEOGRAPH + \xED\xAC |3 # CJK UNIFIED IDEOGRAPH \x94\xE2 |0 # CJK UNIFIED IDEOGRAPH \x9D\xAB |0 # CJK UNIFIED IDEOGRAPH \x95\xF8 |0 # CJK UNIFIED IDEOGRAPH @@ -2716,6 +2849,7 @@ \x8A\xF6 |0 # CJK UNIFIED IDEOGRAPH \x89\x87 |0 # CJK UNIFIED IDEOGRAPH \xFA\xC9 |0 # CJK UNIFIED IDEOGRAPH + \xED\xAD |3 # CJK UNIFIED IDEOGRAPH \x9D\x88 |0 # CJK UNIFIED IDEOGRAPH \x97\x68 |0 # CJK UNIFIED IDEOGRAPH \x9D\x8C |0 # CJK UNIFIED IDEOGRAPH @@ -2737,6 +2871,7 @@ \x9D\x97 |0 # CJK UNIFIED IDEOGRAPH \x93\x45 |0 # CJK UNIFIED IDEOGRAPH \xFA\xCA |0 # CJK UNIFIED IDEOGRAPH + \xED\xAE |3 # CJK UNIFIED IDEOGRAPH \x9D\x94 |0 # CJK UNIFIED IDEOGRAPH \x96\x80 |0 # CJK UNIFIED IDEOGRAPH \x9D\x95 |0 # CJK UNIFIED IDEOGRAPH @@ -2750,6 +2885,7 @@ \x9D\x99 |0 # CJK UNIFIED IDEOGRAPH \x94\x51 |0 # CJK UNIFIED IDEOGRAPH \xFA\xCB |0 # CJK UNIFIED IDEOGRAPH + \xED\xAF |3 # CJK UNIFIED IDEOGRAPH \x93\xB3 |0 # CJK UNIFIED IDEOGRAPH \x93\x50 |0 # CJK UNIFIED IDEOGRAPH \x9D\x9B |0 # CJK UNIFIED IDEOGRAPH @@ -2768,6 +2904,7 @@ \x9D\xA2 |0 # CJK UNIFIED IDEOGRAPH \x91\x80 |0 # CJK UNIFIED IDEOGRAPH \xFA\xCC |0 # CJK UNIFIED IDEOGRAPH + \xED\xB0 |3 # CJK UNIFIED IDEOGRAPH \x9D\xA0 |0 # CJK UNIFIED IDEOGRAPH \x9D\x5E |0 # CJK UNIFIED IDEOGRAPH \x9D\xA4 |0 # CJK UNIFIED IDEOGRAPH @@ -2811,6 +2948,7 @@ \x9D\xC1 |0 # CJK UNIFIED IDEOGRAPH \x9D\xC4 |0 # CJK UNIFIED IDEOGRAPH \xFA\xCD |0 # CJK UNIFIED IDEOGRAPH + \xED\xB1 |3 # CJK UNIFIED IDEOGRAPH \x95\x71 |0 # CJK UNIFIED IDEOGRAPH \x8B\x7E |0 # CJK UNIFIED IDEOGRAPH \x9D\xC3 |0 # CJK UNIFIED IDEOGRAPH @@ -2883,11 +3021,13 @@ \x89\xA0 |0 # CJK UNIFIED IDEOGRAPH \x9D\xDF |0 # CJK UNIFIED IDEOGRAPH \xFA\xCE |0 # CJK UNIFIED IDEOGRAPH + \xED\xB2 |3 # CJK UNIFIED IDEOGRAPH \x8D\x56 |0 # CJK UNIFIED IDEOGRAPH \x9D\xDE |0 # CJK UNIFIED IDEOGRAPH \x8D\xA9 |0 # CJK UNIFIED IDEOGRAPH \x8F\xB8 |0 # CJK UNIFIED IDEOGRAPH \xFA\xD1 |0 # CJK UNIFIED IDEOGRAPH + \xED\xB5 |3 # CJK UNIFIED IDEOGRAPH \x9D\xDD |0 # CJK UNIFIED IDEOGRAPH \x8F\xB9 |0 # CJK UNIFIED IDEOGRAPH \x96\xBE |0 # CJK UNIFIED IDEOGRAPH @@ -2895,22 +3035,28 @@ \x88\xD5 |0 # CJK UNIFIED IDEOGRAPH \x90\xCC |0 # CJK UNIFIED IDEOGRAPH \xFA\xCF |0 # CJK UNIFIED IDEOGRAPH + \xED\xB3 |3 # CJK UNIFIED IDEOGRAPH \x9D\xE4 |0 # CJK UNIFIED IDEOGRAPH \xFA\xD3 |0 # CJK UNIFIED IDEOGRAPH + \xED\xB7 |3 # CJK UNIFIED IDEOGRAPH \x90\xAF |0 # CJK UNIFIED IDEOGRAPH \x89\x66 |0 # CJK UNIFIED IDEOGRAPH \xFA\xD4 |0 # CJK UNIFIED IDEOGRAPH + \xED\xB8 |3 # CJK UNIFIED IDEOGRAPH \x8F\x74 |0 # CJK UNIFIED IDEOGRAPH \x96\x86 |0 # CJK UNIFIED IDEOGRAPH \x8D\xF0 |0 # CJK UNIFIED IDEOGRAPH \x8F\xBA |0 # CJK UNIFIED IDEOGRAPH \xFA\xD2 |0 # CJK UNIFIED IDEOGRAPH + \xED\xB6 |3 # CJK UNIFIED IDEOGRAPH \x90\xA5 |0 # CJK UNIFIED IDEOGRAPH \xFA\x63 |0 # CJK UNIFIED IDEOGRAPH + \xED\x47 |3 # CJK UNIFIED IDEOGRAPH \x9D\xE3 |0 # CJK UNIFIED IDEOGRAPH \x9D\xE1 |0 # CJK UNIFIED IDEOGRAPH \x9D\xE2 |0 # CJK UNIFIED IDEOGRAPH \xFA\xD0 |0 # CJK UNIFIED IDEOGRAPH + \xED\xB4 |3 # CJK UNIFIED IDEOGRAPH \x92\x8B |0 # CJK UNIFIED IDEOGRAPH \x9E\x45 |0 # CJK UNIFIED IDEOGRAPH \x9D\xE8 |0 # CJK UNIFIED IDEOGRAPH @@ -2922,13 +3068,16 @@ \x9D\xE5 |0 # CJK UNIFIED IDEOGRAPH \x8E\x4E |0 # CJK UNIFIED IDEOGRAPH \xFA\xD6 |0 # CJK UNIFIED IDEOGRAPH + \xED\xBA |3 # CJK UNIFIED IDEOGRAPH \xFA\xD7 |0 # CJK UNIFIED IDEOGRAPH + \xED\xBB |3 # CJK UNIFIED IDEOGRAPH \x9D\xEA |0 # CJK UNIFIED IDEOGRAPH \x9D\xE9 |0 # CJK UNIFIED IDEOGRAPH \x9D\xEE |0 # CJK UNIFIED IDEOGRAPH \x9D\xEF |0 # CJK UNIFIED IDEOGRAPH \x9D\xEB |0 # CJK UNIFIED IDEOGRAPH \xFA\xD5 |0 # CJK UNIFIED IDEOGRAPH + \xED\xB9 |3 # CJK UNIFIED IDEOGRAPH \x8A\x41 |0 # CJK UNIFIED IDEOGRAPH \x9D\xEC |0 # CJK UNIFIED IDEOGRAPH \x9D\xED |0 # CJK UNIFIED IDEOGRAPH @@ -2937,6 +3086,7 @@ \x8C\x69 |0 # CJK UNIFIED IDEOGRAPH \x9D\xF0 |0 # CJK UNIFIED IDEOGRAPH \xFA\xD9 |0 # CJK UNIFIED IDEOGRAPH + \xED\xBD |3 # CJK UNIFIED IDEOGRAPH \x90\xB0 |0 # CJK UNIFIED IDEOGRAPH \x8F\xBB |0 # CJK UNIFIED IDEOGRAPH \x92\x71 |0 # CJK UNIFIED IDEOGRAPH @@ -2952,19 +3102,23 @@ \x88\xC3 |0 # CJK UNIFIED IDEOGRAPH \x9D\xF6 |0 # CJK UNIFIED IDEOGRAPH \xFA\xDA |0 # CJK UNIFIED IDEOGRAPH + \xED\xBE |3 # CJK UNIFIED IDEOGRAPH \x9D\xF7 |0 # CJK UNIFIED IDEOGRAPH \xFA\xDB |0 # CJK UNIFIED IDEOGRAPH + \xED\xBF |3 # CJK UNIFIED IDEOGRAPH \x92\xA8 |0 # CJK UNIFIED IDEOGRAPH \x97\xEF |0 # CJK UNIFIED IDEOGRAPH \x8E\x62 |0 # CJK UNIFIED IDEOGRAPH \x95\xE9 |0 # CJK UNIFIED IDEOGRAPH \xFA\xDC |0 # CJK UNIFIED IDEOGRAPH + \xED\xC0 |3 # CJK UNIFIED IDEOGRAPH \x96\x5C |0 # CJK UNIFIED IDEOGRAPH \x9E\x41 |0 # CJK UNIFIED IDEOGRAPH \x9D\xF9 |0 # CJK UNIFIED IDEOGRAPH \x9D\xFC |0 # CJK UNIFIED IDEOGRAPH \x9D\xFB |0 # CJK UNIFIED IDEOGRAPH \xFA\xDD |0 # CJK UNIFIED IDEOGRAPH + \xED\xC1 |3 # CJK UNIFIED IDEOGRAPH \x9D\xF8 |0 # CJK UNIFIED IDEOGRAPH \x9E\x40 |0 # CJK UNIFIED IDEOGRAPH \x93\xDC |0 # CJK UNIFIED IDEOGRAPH @@ -2986,7 +3140,9 @@ \x8F\x91 |0 # CJK UNIFIED IDEOGRAPH \x91\x82 |0 # CJK UNIFIED IDEOGRAPH \xFA\xDE |0 # CJK UNIFIED IDEOGRAPH + \xED\xC2 |3 # CJK UNIFIED IDEOGRAPH \xFA\x66 |0 # CJK UNIFIED IDEOGRAPH + \xED\x4A |3 # CJK UNIFIED IDEOGRAPH \x99\xD6 |0 # CJK UNIFIED IDEOGRAPH \x91\x5D |0 # CJK UNIFIED IDEOGRAPH \x91\x5C |0 # CJK UNIFIED IDEOGRAPH @@ -2998,6 +3154,7 @@ \x95\xFC |0 # CJK UNIFIED IDEOGRAPH \x95\x9E |0 # CJK UNIFIED IDEOGRAPH \xFA\xDF |0 # CJK UNIFIED IDEOGRAPH + \xED\xC3 |3 # CJK UNIFIED IDEOGRAPH \x9E\x4B |0 # CJK UNIFIED IDEOGRAPH \x8D\xF1 |0 # CJK UNIFIED IDEOGRAPH \x92\xBD |0 # CJK UNIFIED IDEOGRAPH @@ -3043,6 +3200,7 @@ \x9E\x5C |0 # CJK UNIFIED IDEOGRAPH \x97\x88 |0 # CJK UNIFIED IDEOGRAPH \xFA\xE1 |0 # CJK UNIFIED IDEOGRAPH + \xED\xC5 |3 # CJK UNIFIED IDEOGRAPH \x9E\x61 |0 # CJK UNIFIED IDEOGRAPH \x8D\x59 |0 # CJK UNIFIED IDEOGRAPH \x94\x74 |0 # CJK UNIFIED IDEOGRAPH @@ -3079,7 +3237,9 @@ \x9E\x6D |0 # CJK UNIFIED IDEOGRAPH \x9E\x73 |0 # CJK UNIFIED IDEOGRAPH \xFA\xE2 |0 # CJK UNIFIED IDEOGRAPH + \xED\xC6 |3 # CJK UNIFIED IDEOGRAPH \xFA\xE4 |0 # CJK UNIFIED IDEOGRAPH + \xED\xC8 |3 # CJK UNIFIED IDEOGRAPH \x91\xC6 |0 # CJK UNIFIED IDEOGRAPH \x95\xBF |0 # CJK UNIFIED IDEOGRAPH \x9E\x75 |0 # CJK UNIFIED IDEOGRAPH @@ -3109,6 +3269,7 @@ \x96\x8F |0 # CJK UNIFIED IDEOGRAPH \x8A\x60 |0 # CJK UNIFIED IDEOGRAPH \xFA\xE5 |0 # CJK UNIFIED IDEOGRAPH + \xED\xC9 |3 # CJK UNIFIED IDEOGRAPH \x92\xCC |0 # CJK UNIFIED IDEOGRAPH \x93\xC8 |0 # CJK UNIFIED IDEOGRAPH \x89\x68 |0 # CJK UNIFIED IDEOGRAPH @@ -3132,6 +3293,7 @@ \x8C\x6A |0 # CJK UNIFIED IDEOGRAPH \x93\x8D |0 # CJK UNIFIED IDEOGRAPH \xFA\xE6 |0 # CJK UNIFIED IDEOGRAPH + \xED\xCA |3 # CJK UNIFIED IDEOGRAPH \x9E\x79 |0 # CJK UNIFIED IDEOGRAPH \x88\xC4 |0 # CJK UNIFIED IDEOGRAPH \x9E\x7C |0 # CJK UNIFIED IDEOGRAPH @@ -3139,6 +3301,7 @@ \x8B\xCB |0 # CJK UNIFIED IDEOGRAPH \x8C\x4B |0 # CJK UNIFIED IDEOGRAPH \xFA\xE3 |0 # CJK UNIFIED IDEOGRAPH + \xED\xC7 |3 # CJK UNIFIED IDEOGRAPH \x8A\xBA |0 # CJK UNIFIED IDEOGRAPH \x8B\x6A |0 # CJK UNIFIED IDEOGRAPH \x9E\x82 |0 # CJK UNIFIED IDEOGRAPH @@ -3181,11 +3344,13 @@ \x8A\xFC |0 # CJK UNIFIED IDEOGRAPH \x9E\xB0 |0 # CJK UNIFIED IDEOGRAPH \xFA\x64 |0 # CJK UNIFIED IDEOGRAPH + \xED\x48 |3 # CJK UNIFIED IDEOGRAPH \x96\xC7 |0 # CJK UNIFIED IDEOGRAPH \x9E\x97 |0 # CJK UNIFIED IDEOGRAPH \x8A\xFB |0 # CJK UNIFIED IDEOGRAPH \x9E\x9E |0 # CJK UNIFIED IDEOGRAPH \xFA\xE7 |0 # CJK UNIFIED IDEOGRAPH + \xED\xCB |3 # CJK UNIFIED IDEOGRAPH \x96\x5F |0 # CJK UNIFIED IDEOGRAPH \x9E\x9F |0 # CJK UNIFIED IDEOGRAPH \x9E\xA1 |0 # CJK UNIFIED IDEOGRAPH @@ -3243,6 +3408,7 @@ \x9E\xBE |0 # CJK UNIFIED IDEOGRAPH \x93\xE8 |0 # CJK UNIFIED IDEOGRAPH \xFA\xE9 |0 # CJK UNIFIED IDEOGRAPH + \xED\xCD |3 # CJK UNIFIED IDEOGRAPH \x9E\xC2 |0 # CJK UNIFIED IDEOGRAPH \x9E\xB5 |0 # CJK UNIFIED IDEOGRAPH \x8B\xC6 |0 # CJK UNIFIED IDEOGRAPH @@ -3265,6 +3431,7 @@ \x98\x50 |0 # CJK UNIFIED IDEOGRAPH \x9E\xD5 |0 # CJK UNIFIED IDEOGRAPH \xFA\xEB |0 # CJK UNIFIED IDEOGRAPH + \xED\xCF |3 # CJK UNIFIED IDEOGRAPH \x90\x59 |0 # CJK UNIFIED IDEOGRAPH \x9E\xD4 |0 # CJK UNIFIED IDEOGRAPH \x9E\xD3 |0 # CJK UNIFIED IDEOGRAPH @@ -3291,6 +3458,7 @@ \x9E\xCD |0 # CJK UNIFIED IDEOGRAPH \x9E\xD7 |0 # CJK UNIFIED IDEOGRAPH \xFA\xEC |0 # CJK UNIFIED IDEOGRAPH + \xED\xD0 |3 # CJK UNIFIED IDEOGRAPH \x9E\xDF |0 # CJK UNIFIED IDEOGRAPH \x9E\xD8 |0 # CJK UNIFIED IDEOGRAPH \x9E\xE5 |0 # CJK UNIFIED IDEOGRAPH @@ -3321,6 +3489,7 @@ \x8A\x7E |0 # CJK UNIFIED IDEOGRAPH \x9E\xD1 |0 # CJK UNIFIED IDEOGRAPH \xFA\xED |0 # CJK UNIFIED IDEOGRAPH + \xED\xD1 |3 # CJK UNIFIED IDEOGRAPH \x8F\xBF |0 # CJK UNIFIED IDEOGRAPH \x9E\xEE |0 # CJK UNIFIED IDEOGRAPH \x9E\xF5 |0 # CJK UNIFIED IDEOGRAPH @@ -3329,6 +3498,7 @@ \x92\x4D |0 # CJK UNIFIED IDEOGRAPH \x9E\xEB |0 # CJK UNIFIED IDEOGRAPH \xFA\xEF |0 # CJK UNIFIED IDEOGRAPH + \xED\xD3 |3 # CJK UNIFIED IDEOGRAPH \x9E\xF0 |0 # CJK UNIFIED IDEOGRAPH \x9E\xF4 |0 # CJK UNIFIED IDEOGRAPH \x8B\xB4 |0 # CJK UNIFIED IDEOGRAPH @@ -3339,10 +3509,13 @@ \x9E\xF1 |0 # CJK UNIFIED IDEOGRAPH \x9E\xF3 |0 # CJK UNIFIED IDEOGRAPH \xFA\xEE |0 # CJK UNIFIED IDEOGRAPH + \xED\xD2 |3 # CJK UNIFIED IDEOGRAPH \x9E\xED |0 # CJK UNIFIED IDEOGRAPH \xFA\xF0 |0 # CJK UNIFIED IDEOGRAPH + \xED\xD4 |3 # CJK UNIFIED IDEOGRAPH \x9E\xEF |0 # CJK UNIFIED IDEOGRAPH \xFA\xF1 |0 # CJK UNIFIED IDEOGRAPH + \xED\xD5 |3 # CJK UNIFIED IDEOGRAPH \x8A\x80 |0 # CJK UNIFIED IDEOGRAPH \x92\x68 |0 # CJK UNIFIED IDEOGRAPH \x9E\xFA |0 # CJK UNIFIED IDEOGRAPH @@ -3370,7 +3543,9 @@ \x9F\x48 |0 # CJK UNIFIED IDEOGRAPH \x9F\x4A |0 # CJK UNIFIED IDEOGRAPH \xFA\xF2 |0 # CJK UNIFIED IDEOGRAPH + \xED\xD6 |3 # CJK UNIFIED IDEOGRAPH \xFA\xF3 |0 # CJK UNIFIED IDEOGRAPH + \xED\xD7 |3 # CJK UNIFIED IDEOGRAPH \x94\xA5 |0 # CJK UNIFIED IDEOGRAPH \x9F\x4D |0 # CJK UNIFIED IDEOGRAPH \x9F\x51 |0 # CJK UNIFIED IDEOGRAPH @@ -3454,6 +3629,7 @@ \x9F\x79 |0 # CJK UNIFIED IDEOGRAPH \x94\xE4 |0 # CJK UNIFIED IDEOGRAPH \xFA\xF4 |0 # CJK UNIFIED IDEOGRAPH + \xED\xD8 |3 # CJK UNIFIED IDEOGRAPH \x94\xF9 |0 # CJK UNIFIED IDEOGRAPH \x96\xD1 |0 # CJK UNIFIED IDEOGRAPH \x9F\x7A |0 # CJK UNIFIED IDEOGRAPH @@ -3475,6 +3651,7 @@ \x89\x69 |0 # CJK UNIFIED IDEOGRAPH \x94\xC3 |0 # CJK UNIFIED IDEOGRAPH \xFA\xF5 |0 # CJK UNIFIED IDEOGRAPH + \xED\xD9 |3 # CJK UNIFIED IDEOGRAPH \x92\xF3 |0 # CJK UNIFIED IDEOGRAPH \x8F\x60 |0 # CJK UNIFIED IDEOGRAPH \x8B\x81 |0 # CJK UNIFIED IDEOGRAPH @@ -3484,6 +3661,7 @@ \x8A\xBE |0 # CJK UNIFIED IDEOGRAPH \x89\x98 |0 # CJK UNIFIED IDEOGRAPH \xFA\xF6 |0 # CJK UNIFIED IDEOGRAPH + \xED\xDA |3 # CJK UNIFIED IDEOGRAPH \x93\xF0 |0 # CJK UNIFIED IDEOGRAPH \x9F\x87 |0 # CJK UNIFIED IDEOGRAPH \x8D\x5D |0 # CJK UNIFIED IDEOGRAPH @@ -3492,6 +3670,7 @@ \x9F\x91 |0 # CJK UNIFIED IDEOGRAPH \x9F\x8A |0 # CJK UNIFIED IDEOGRAPH \xFA\xF8 |0 # CJK UNIFIED IDEOGRAPH + \xED\xDC |3 # CJK UNIFIED IDEOGRAPH \x91\xBF |0 # CJK UNIFIED IDEOGRAPH \x8B\x82 |0 # CJK UNIFIED IDEOGRAPH \x9F\x92 |0 # CJK UNIFIED IDEOGRAPH @@ -3502,6 +3681,7 @@ \x9F\x8B |0 # CJK UNIFIED IDEOGRAPH \x97\x80 |0 # CJK UNIFIED IDEOGRAPH \xFA\xF7 |0 # CJK UNIFIED IDEOGRAPH + \xED\xDB |3 # CJK UNIFIED IDEOGRAPH \x92\xBE |0 # CJK UNIFIED IDEOGRAPH \x93\xD7 |0 # CJK UNIFIED IDEOGRAPH \x9F\x8C |0 # CJK UNIFIED IDEOGRAPH @@ -3537,6 +3717,7 @@ \x9F\x99 |0 # CJK UNIFIED IDEOGRAPH \x9F\xA2 |0 # CJK UNIFIED IDEOGRAPH \xFA\xF9 |0 # CJK UNIFIED IDEOGRAPH + \xED\xDD |3 # CJK UNIFIED IDEOGRAPH \x9F\xA0 |0 # CJK UNIFIED IDEOGRAPH \x9F\x9B |0 # CJK UNIFIED IDEOGRAPH \x96\x41 |0 # CJK UNIFIED IDEOGRAPH @@ -3550,6 +3731,7 @@ \x9F\x96 |0 # CJK UNIFIED IDEOGRAPH \x89\x6A |0 # CJK UNIFIED IDEOGRAPH \xFA\xFA |0 # CJK UNIFIED IDEOGRAPH + \xED\xDE |3 # CJK UNIFIED IDEOGRAPH \x97\x6D |0 # CJK UNIFIED IDEOGRAPH \x9F\xAE |0 # CJK UNIFIED IDEOGRAPH \x9F\xAD |0 # CJK UNIFIED IDEOGRAPH @@ -3584,22 +3766,26 @@ \x8A\x5C |0 # CJK UNIFIED IDEOGRAPH \x95\x82 |0 # CJK UNIFIED IDEOGRAPH \xFA\xFC |0 # CJK UNIFIED IDEOGRAPH + \xED\xE0 |3 # CJK UNIFIED IDEOGRAPH \x97\x81 |0 # CJK UNIFIED IDEOGRAPH \x8A\x43 |0 # CJK UNIFIED IDEOGRAPH \x90\x5A |0 # CJK UNIFIED IDEOGRAPH \x9F\xB3 |0 # CJK UNIFIED IDEOGRAPH \x9F\xB8 |0 # CJK UNIFIED IDEOGRAPH \xFA\xFB |0 # CJK UNIFIED IDEOGRAPH + \xED\xDF |3 # CJK UNIFIED IDEOGRAPH \x8F\xC1 |0 # CJK UNIFIED IDEOGRAPH \x97\x4F |0 # CJK UNIFIED IDEOGRAPH \x9F\xB5 |0 # CJK UNIFIED IDEOGRAPH \x9F\xB0 |0 # CJK UNIFIED IDEOGRAPH \x9F\xB6 |0 # CJK UNIFIED IDEOGRAPH \xFB\x40 |0 # CJK UNIFIED IDEOGRAPH + \xED\xE1 |3 # CJK UNIFIED IDEOGRAPH \x97\xDC |0 # CJK UNIFIED IDEOGRAPH \x93\x93 |0 # CJK UNIFIED IDEOGRAPH \x93\xC0 |0 # CJK UNIFIED IDEOGRAPH \xFB\x41 |0 # CJK UNIFIED IDEOGRAPH + \xED\xE2 |3 # CJK UNIFIED IDEOGRAPH \x8A\x55 |0 # CJK UNIFIED IDEOGRAPH \x89\x74 |0 # CJK UNIFIED IDEOGRAPH \x9F\xBC |0 # CJK UNIFIED IDEOGRAPH @@ -3612,6 +3798,7 @@ \x97\xD2 |0 # CJK UNIFIED IDEOGRAPH \x9F\xC3 |0 # CJK UNIFIED IDEOGRAPH \xFB\x42 |0 # CJK UNIFIED IDEOGRAPH + \xED\xE3 |3 # CJK UNIFIED IDEOGRAPH \x8F\x69 |0 # CJK UNIFIED IDEOGRAPH \x9F\xC5 |0 # CJK UNIFIED IDEOGRAPH \x9F\xCA |0 # CJK UNIFIED IDEOGRAPH @@ -3628,14 +3815,17 @@ \x9F\xCC |0 # CJK UNIFIED IDEOGRAPH \x90\x5B |0 # CJK UNIFIED IDEOGRAPH \xFB\x44 |0 # CJK UNIFIED IDEOGRAPH + \xED\xE5 |3 # CJK UNIFIED IDEOGRAPH \x8F\x7E |0 # CJK UNIFIED IDEOGRAPH \x95\xA3 |0 # CJK UNIFIED IDEOGRAPH \x8D\xAC |0 # CJK UNIFIED IDEOGRAPH \xFB\x43 |0 # CJK UNIFIED IDEOGRAPH + \xED\xE4 |3 # CJK UNIFIED IDEOGRAPH \x9F\xB9 |0 # CJK UNIFIED IDEOGRAPH \x9F\xC7 |0 # CJK UNIFIED IDEOGRAPH \x93\x59 |0 # CJK UNIFIED IDEOGRAPH \xFB\x45 |0 # CJK UNIFIED IDEOGRAPH + \xED\xE6 |3 # CJK UNIFIED IDEOGRAPH \x90\xB4 |0 # CJK UNIFIED IDEOGRAPH \x8A\x89 |0 # CJK UNIFIED IDEOGRAPH \x8D\xCF |0 # CJK UNIFIED IDEOGRAPH @@ -3656,6 +3846,7 @@ \x88\xAD |0 # CJK UNIFIED IDEOGRAPH \x89\x51 |0 #