Unicode::Japanese::JA(3pm) User Contributed Perl Documentation Unicode::Japanese::JA(3pm)
NAME
Unicode::Japanese::JA - XXXXXXXXXX
XX
use Unicode::Japanese;
use Unicode::Japanese qw(unijp);
# convert utf8 -> sjis
print Unicode::Japanese->new($str)->sjis;
print unijp($str)->sjis; # same as above.
# convert sjis -> utf8
print Unicode::Japanese->new($str,'sjis')->get;
# convert sjis (imode_EMOJI) -> utf8
print Unicode::Japanese->new($str,'sjis-imode')->get;
# convert zenkaku (utf8) -> hankaku (utf8)
print Unicode::Japanese->new($str)->z2h->get;
XX
Unicode::Japanese XXXXXXXXXXXXXXXXXXXXXXXXXXX
XX
o Unicode::Japanese XXXXXXXXXUTF-8 XXXXXXXXXXX
o XS XX/XXXXXXXXXXXXXXXX XS XXXXXXXXXXXXXXXXX No-XS XXXXXXXXXXXXXXXXXXXX (Japanese.pm XXXXXXXXXXXXXX)X
o XXXXXXXXXXXXXXXXXXXXXXXXXXXX
o XXXX (DoCoMo i-modeXKDDI AU, Softbank Mobile, ASTEL dot-i) XXXXX Unicode XXXXXXXXXXXXXXXXDB XXXXXXXXXXXXXXX
o XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
o SJIS XX MS-CP932 XXXXX Unicode XXXXXXXXXXXX
o Unicode -> SJISXXXEUC-JP/JISX XXXXXXXXSJIS XXXXXXXXXX &#dddd; XXXXXXXXX XXXUnicodeXXXXXXXXXXXXXXX '?'XXXXX. XX, XXXXXXXXXXXX,
XXXXXXXXXXXX'?'XXXXX.
o Perl-5.8.0 XXXXXX, utf8 XXXXXXXXXXXXX. utf-8 `XXX'X XXXXX utf8() XXXXX, utf-8 `XX'X XXXXX getu() XXXXXXXXX.
get() XXXXXXXXXX utf-8 `XXX'X XXXXX (XXXXXXXXXXXXXXXXX).
sjis(), jis(), utf8(), etc.. XXXXXXXXXXXXXXX. new, set, getcode XXXXXXXXX, utf8-flaged/bytes XXXXXX.
XXXXXXXX
o perl 5.10.x, 5.8.x, etc. (5.004 XX).
o (XXXXOK) C XXXXX. XXXXXXXX XS X Pure Perl XXXXXXXXXX. C XXXXXXXXXXXX, Unicode::Japanese X Pure Perl XXXXXXXXXXXXXXXXXX.
o (XXXXOK) XXXXX Test.pm XX Test::More.
XXXXXXXXXXXXXXXXXX.
XXXX
$s = Unicode::Japanese->new($str [, $icode [, $encode]])
XXX Unicode::Japanese XXXXXXXXXXXXX
XXXXXXXXXXXX"set" XXXXXXXXXXX
$s = unijp($str [, $icode [, $encode]])
Unicode::Janaese->new(...) XXX.
$s->set($str [, $icode [, $encode]])
$str: XXX
$icode: XXXXXXXXXXXXXXXX 'utf8'
$encode: XXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXX XXXXXXXXXXXXX UTF-8 XXXXXXXX
XXXXXXXXXX:
auto
utf8 ucs2 ucs4
utf16-be utf16-le utf16
utf32-be utf32-le utf32
sjis cp932 euc euc-jp jis
sjis-imode sjis-imode1 sjis-imode2
utf8-imode utf8-imode1 utf8-imode2
sjis-doti sjis-doti1
sjis-jsky sjis-jsky1 sjis-jsky2
jis-jsky jis-jsky1 jis-jsky2
utf8-jsky utf8-jsky1 utf8-jsky2
sjis-au sjis-au1 sjis-au2
jis-au jis-au1 jis-au2
sjis-icon-au sjis-icon-au1 sjis-icon-au2
euc-icon-au euc-icon-au1 euc-icon-au2
jis-icon-au jis-icon-au1 jis-icon-au2
utf8-icon-au utf8-icon-au1 utf8-icon-au2
ascii binary
( XXX.)
XXXXXXXXXXXXXXXX'auto' XXXXXXXXXXXXXX 'auto' XXXXXXXXXXXXXgetcode() XXXXXXX XXXXXX
XXXXXXXXXXXX'base64' XXXXXXXXX base64 XXXXXXXXXbase64 XXXXXXXXX Unicode::Japanese XXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 'binary' XXXXXXX
sjis-imodeXsjis-dotiXXXXXXXXXX &#dddd; X XXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXX
sjis, utf8 XXXXXXXXXXXXXXXXXsjisX sjis-auXsjis-doti XXXXXXXXXXXXXXXXXsjis-auX XXXXXX
$str = $s->get
$str: XXX(UTF-8)
XXXX UTF-8 XXXXXXXXXXX
XXX `XXX' X XXXXXX, XXXXXXXXXXXXXXXXX.
XXXXXXXXX utf8() XXXXX, XXXXXXXX getu() XXXXXXXXXXXXXXXXX.
$str = $s->getu
$str: XXX(UTF-8)
XXXX UTF-8 XXXXXXXXXXX
Perl-5.8.0 XXXXXXX, utf-8 XXXXXXX utf-8 XXXXXX XXXX.
$code = $s->getcode($str)
$str: XXX
$code: XXXXXXXXXXX
XXXXXXX($str)XXXXXXXXXXXXXXX
XXXXXX, XXXX, XXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
(PurePerlX)
1. UTF-32 X BOM XXXXXutf32 XXXXXXX
2. UTF-16 X BOM XXXXXutf16 XXXXXXX
3. UTF-32BE XXXXXXXXXXXutf32-be XXXXXXX
4. UTF-32LE XXXXXXXXXXXutf32-le XXXXXXX
5. ESC XX XXX 8 XXXXXXXXXXXXXXXXXXXXXXXascii XXXXX XXESC XXXX ASCII XXXX (0x00-0x1F XX 0x7F) X ascii XXXXXXXXX XX
6. JISXXXXXXXXXXXXXXXXXXXjis XXXXXXX
7. J-PHONE XXXXXXXXXXXXXsjis-jsky XXXXXXX
8. EUC-JP XXXXXXXXXXXXXXeuc XXXXXXX
9. SJIS XXXXXXXXXXXXXXsjis XXXXXXX
10. SJIS XXXX au XXXXXXXXXXXXXXXsjis-au XXXXXXX
11. SJIS X i-mode XXXXXXXXXXXXXXXsjis-imode XXXXXXX
12. SJIS X dot-i XXXXXXXXXXXXXXXsjis-doti XXXXXXX
13. UTF-8 XXXXXXXXXXXutf8 XXXXXXX
14. XXXXXXXXXXXXXXXunknown XXXXXXX
(XSX)
1. UTF-32 X BOM XXXXXutf32 XXXXXXX
2. UTF-16 X BOM XXXXXutf16 XXXXXXX
3. XXXXXXXXXX, XXXXXXXXXXXXXXXXXXXXXXXXXX.
ascii / euc / sjis / jis / utf8 / utf32-be / utf32-le / sjis-jsky / sjis-imode / sjis-au / sjis-doti
4. XXXXXXXXXXXXXXX, XXXXXXXXXXXXXX, XXXXXXXX.
utf32-be / utf32-le / ascii / jis / euc / sjis / sjis-jsky / sjis-imode / sjis-au / sjis-doti / utf8
5. XXXXXXXXXXXXXXXunknown XXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXX
o UTF-8 XXXXXXSJISXXXXXXXXXXXXXXXXXX
o UCS2 XXXXXXXXXXXX
o UTF-16 X BOM XXXXXXXXXXXXXXX
o XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
&#dddd; XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XSXPurePerlXX, XXXXXXXXXXXXXXXXX, XXXXXXXXXXXXXXXX. XX, XXXXXXXXXXXXXsjisXXX, PurePerlXXsjisXXXXXXX XSXXXXXXXX. XXXsjis-
jskyXXXXXXXXXXXXX. XX, XX XXXXXXXXXXXXX, euc-jpXXXXX, XXXXXXXXXXXXXXXXX XXXXXX.
$code = $s->getcodelist($str)
$str: XXX
$code: XXXXXXXXXXX
XXXXXXX($str)XXXXXXXXXXXXXXX
getcode XXXX, XXXXXXXXXXXXXXX XXXXXXX.
$str = $s->conv($ocode, $encode)
$ocode: XXXXX (XXXXXX)
utf8 ucs2 ucs4 utf16
sjis cp932 euc euc-jp jis
sjis-imode sjis-imode1 sjis-imode2
utf8-imode utf8-imode1 utf8-imode2
sjis-doti sjis-doti1
sjis-jsky sjis-jsky1 sjis-jsky2
jis-jsky jis-jsky1 jis-jsky2
utf8-jsky utf8-jsky1 utf8-jsky2
sjis-au sjis-au1 sjis-au2
jis-au jis-au1 jis-au2
sjis-icon-au sjis-icon-au1 sjis-icon-au2
euc-icon-au euc-icon-au1 euc-icon-au2
jis-icon-au jis-icon-au1 jis-icon-au2
utf8-icon-au utf8-icon-au1 utf8-icon-au2
binary
( XXX.)
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
$encode: XXXXXXXXXXXXXX
$str: XXX
XXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXX'base64' XXXXXXXXX base64 XXXXXXXXXbase64 XXXXXXXX XXXXXXXXXX
perl-5.8.0 XXXXXX, XXX utf-8 XXXXXXXXXXXXXXXXX.
$s->tag2bin
XXXXXXXXX &#dddd; XXXXXXXXXXXXXXXXXXXXXXXXX
$s->z2h
XXXXXXXXXXXX
$s->h2z
XXXXXXXXXXXX
$s->hira2kata
XXXXXXXXXXXXXXXX
$s->kata2hira
XXXXXXXXXXXXXXXX
$str = $s->jis
$str: JIS XXXXXXXXXXXXXXX
XXXX JISXISO-2022-JPX XXXXXXXXXXX
$str = $s->euc
$str: euc-jp XXXXXXXXXXXXXXX
XXXX EUC-JP XXXXXXXXXXX
$str = $s->utf8
$str: utf-8 XXXXXXXXXXXXXXX
XXXX UTF-8 XXXXXXXXXXX
perl-5.8.0 XXXXXXX, XXXXXXXXX.
$str = $s->ucs2
$str: ucs2 XXXXXXXXXXXXXXX
XXXX UCS2 XXXXXXXXXXX
$str = $s->ucs4
$str: ucs4 XXXXXXXXXXXXXXX
XXXX UCS4 XXXXXXXXXXX
$str = $s->utf16
$str: ucs-16 XXXXXXXXXXXXXXX
XXXX UTF-16 XXXXXXXXXXX BOMXXXXXXX XXXXXXXXXXXXXXXXXX
$str = $s->sjis
$str: sjis XXXXXXXXXXXXXXX
XXXX SJISXMS-CP932X XXXXXXXXXXX
$str = $s->sjis_imode
$str: sjis/imodeXXX XXXXXXXXXXXXXXX
XXXX i-mode XXXXX SJIS XXXXXXXXXXX XXXimodeXXXXXXXX.
$str = $s->sjis_imode1
$str: sjis/imode XXX XXXXXXXXXXXXXXX
XXXX i-mode XXXXX SJIS XXXXXXXXXXX XXXXXXXXXXXXX.
$str = $s->sjis_imode2
$str: sjis/imode XXX XXXXXXXXXXXXXXX
XXXX i-mode XXXXX SJIS XXXXXXXXXXX XXXXX, XXXXXXXXXX.
$str = $s->sjis_doti
$str: sjis/dot-i XXX XXXXXXXXXXXXXXX
XXXX dot-i XXXXX SJIS XXXXXXXXXXX
$str = $s->sjis_jsky
$str: sjis/j-sky XXX XXXXXXXXXXXXXXX
XXXX j-sky XXXXX SJIS XXXXXXXXXXX XXXj-skyXXX(VERSION 0.15 XX, jsky2)XXXXX.
$str = $s->sjis_jsky1
$str: sjis/j-sky XXX XXXXXXXXXXXXXXX
XXXX j-sky XXXXX SJIS XXXXXXXXXXX Page 1X3 XXXXXXXXXXX.
$str = $s->sjis_jsky
$str: sjis/j-sky XXX XXXXXXXXXXXXXXX
XXXX j-sky XXXXX SJIS XXXXXXXXXXX Page 1X6 XXXXXXXXX.
$str = $s->sjis_icon_au
$str: sjis/AU iconXX XXXXXXXXXXXXXXX
XXXX AU XXXXX SJIS XXXXXXXXXXX
$str_arrayref = $s->strcut($len)
$len: XXXXXXX(XXXX)
$str_arrayref: XXX
$lenXXXXXXXXX(XX)XXXXXXXXXXXXXXXX
XXXXXXX, utf-8 XXXXXXXutf-8XXXXX.
$len = $s->strlen
$len: XXXXXXX
UTF-8 XXXXXX length() XXXXXXXXXXXXXXXXX 3 XXXXXXXXXXX XXXXXXXXXXXXXXXX SJIS XXXXXXXXXXXXXXXXXX 2 XXXXXX
$s->join_csv(@values);
@values: XXXXX
XXX CSV XXXXXXXXXXXXXXXXXXXXX XXXXXXXXXX("
")XXXXXXXX
@values = $s->split_csv;
@values: XXXXX
XXXXXXXXXXXXXXXXXX CSV XXXXXXXXXXXXXX XXXXXXXXXXX("
")XXXXXXXXXXXXXXXX
XXX binary XXXXX utf-8 XXXXXXXX. binary XXXXXXXXXXXXXXX.
XXXXXXXXXXXXXXXXX
+---------------+----+-----+-------+
|encoding | in | out | guess |
+---------------+----+-----+-------+
|auto : OK : -- | ----- |
+---------------+----+-----+-------+
|utf8 : OK : OK | OK |
|ucs2 : OK : OK | ----- |
|ucs4 : OK : OK | ----- |
|utf16-be : OK : -- | ----- |
|utf16-le : OK : -- | ----- |
|utf16 : OK : OK | OK(#) |
|utf32-be : OK : -- | OK |
|utf32-le : OK : -- | OK |
|utf32 : OK : -- | OK(#) |
+---------------+----+-----+-------+
|sjis : OK : OK | OK |
|cp932 : OK : OK | ----- |
|euc : OK : OK | OK |
|euc-jp : OK : OK | ----- |
|jis : OK : OK | OK |
+---------------+----+-----+-------+
|sjis-imode : OK : OK | OK |
|sjis-imode1 : OK : OK | ----- |
|sjis-imode2 : OK : OK | ----- |
|utf8-imode : OK : OK | ----- |
|utf8-imode1 : OK : OK | ----- |
|utf8-imode2 : OK : OK | ----- |
+---------------+----+-----+-------+
|sjis-doti : OK : OK | OK |
|sjis-doti1 : OK : OK | ----- |
+---------------+----+-----+-------+
|sjis-jsky : OK : OK | OK |
|sjis-jsky1 : OK : OK | ----- |
|sjis-jsky2 : OK : OK | ----- |
|jis-jsky : OK : OK | ----- |
|jis-jsky1 : OK : OK | ----- |
|jis-jsky2 : OK : OK | ----- |
|utf8-jsky : OK : OK | ----- |
|utf8-jsky1 : OK : OK | ----- |
|utf8-jsky2 : OK : OK | ----- |
+---------------+----+-----+-------+
|sjis-au : OK : OK | OK |
|sjis-au1 : OK : OK | ----- |
|sjis-au2 : OK : OK | ----- |
|jis-au : OK : OK | ----- |
|jis-au1 : OK : OK | ----- |
|jis-au2 : OK : OK | ----- |
|sjis-icon-au : OK : OK | ----- |
|sjis-icon-au1 : OK : OK | ----- |
|sjis-icon-au2 : OK : OK | ----- |
|euc-icon-au : OK : OK | ----- |
|euc-icon-au1 : OK : OK | ----- |
|euc-icon-au2 : OK : OK | ----- |
|jis-icon-au : OK : OK | ----- |
|jis-icon-au1 : OK : OK | ----- |
|jis-icon-au2 : OK : OK | ----- |
|utf8-icon-au : OK : OK | ----- |
|utf8-icon-au1 : OK : OK | ----- |
|utf8-icon-au2 : OK : OK | ----- |
+---------------+----+-----+-------+
|ascii : OK : -- | OK |
|binary : OK : OK | ----- |
+---------------+----+-----+-------+
(#): guessed when it has bom.
XXXXXXXX
1. utf32 (#)
2. utf16 (#)
3. utf32-be
4. utf32-le
5. ascii
6. jis
7. sjis-jsky (pp)
8. euc
9. sjis
10. sjis-jsky (xs)
11. sjis-au
12. sjis-imode
13. sjis-doti
14. utf8
15. unknown
DESCRIPTION OF UNICODE MAPPING
Unicode XXXXXXXXXXXXXXXXXXXX
Shift_JIS
MS-CP932 XXX Unicode XXXXXXXXXXXX XXXXXXXXXXXXXURLXXXXXXXXXXXX
<ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT>
Unicode XX SJIS XXXXXXXXXXXXXXXXXXXXXXXXX XXXXX &#dddd; XXXXXXXXX XXXXXXXXXXX?XXXXXXXXX
XXXXXXXX SJIS XXXXXXXXXXXXXXXXXXXXXX?XXXXXXXXX
EUC-JP/ISO-2022-JP
XXSJISXXXXXXXXXXXUnicode XXXXXXXXXX XXXXXSJIS XXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXX
DoCoMo i-mode
F800 - F9FF XXXXXXXXXXXXXXXXXXU+0FF800 - U+0FF9FF XXXXXXXXXXXXX
ASTEL dot-i
F000 - F4FF XXXXXXXXXXXXXXXXXXU+0FF000 - U+0FF4FF XXXXXXXXXXXXX
J-PHONE J-SKY
J-SKY XXXXXXXXXXXXXXXX "e$" XXXXXXX1XXXXX 1XXXXXXX2XXXXX"x0f"XXXXXXX 1XXXXXXXXXXXXXXXXX2XXXXXXXXXXXXXXXX XXXXXXXXXXXX
XX1XXXXX2XXXXXXXX1XXXXXXXX4500 - 47FF XXXXX U+0FFB00 - U+0FFDFF XXXXXXXXXXXXX
Unicode::Japanese XXXUnicode XX J-SKY XXXXXXXXXXXXXX 1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
AU
XXXXXXXXXXXXU+0FF500 - U+0FF6FF XXXXXXXXXXXXX
PurePerl mode
use Unicode::Japanese qw(PurePerl);
use XXXXX 'PurePerl' XXXXXXX, XSXXXXXXXXXXXXXXXXXX.
XX
XXXXXX "bug-unicode-japanese at rt.cpan.org" XX XXXXXXXX. XXXX http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Unicode-Japanese
<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Unicode-Japanese>. XXX web XXXXXXXXXXXXXXXXXX. XXXXXXXXXX, XXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXX.
o EUC-JPXJIS XXXXXSJIS XXXXXXXX UTF-8 XXXXXXXXX SJIS XXXXXXXXXXXXXXXXXXXXXXXXXXX
o XSXXXXXXXXXXEUC-JPXSJIS(XXXXX)XXXXXXXXX e XXXXXXXEUC-JPXSJIS XXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX
o Japanese.pm XXXXXXXXXXXXXXXXXXFTP X ASCII XXXX XXXXXXXXXXXXXXX
XXXX
XXXXXXXXXXXXXXX perldoc XXXXXXXXXXXXXX.
perldoc Unicode::Japanese
XX, XXXXXXXXXXXXXXXX:
o AnnoCPAN: Annotated CPAN documentation
http://annocpan.org/dist/Unicode-Japanese <http://annocpan.org/dist/Unicode-Japanese>
o CPAN Ratings
http://cpanratings.perl.org/d/Unicode-Japanese <http://cpanratings.perl.org/d/Unicode-Japanese>
o RT: CPAN's request tracker
http://rt.cpan.org/NoAuth/Bugs.html?Dist=Unicode-Japanese <http://rt.cpan.org/NoAuth/Bugs.html?Dist=Unicode-Japanese>
o Search CPAN
http://search.cpan.org/dist/Unicode-Japanese <http://search.cpan.org/dist/Unicode-Japanese>
CREDITS
Thanks very much to:
NAKAYAMA Nao
SUGIURA Tatsuki & Debian JP Project
XXXXXXXXXX
Copyright 2001-2008 SANO Taku (SAWATARI Mikage) and YAMASHINA Hio, all rights reserved.
XXXXXXXXXXXXXXXXXXXXXXXX Perl XXX XXXXXX XXXXXXXXXXXXXXXXXXX.
perl v5.14.2 2008-08-05 Unicode::Japanese::JA(3pm)