package jcode; ;###################################################################### ;# ;# jcode.pl: Perl library for Japanese character code conversion ;# ;# Copyright (c) 1995,1996,1997 Kazumasa Utashiro <[email protected]> ;# Internet Initiative Japan Inc. ;# 1-4 Sanban-cho, Chiyoda-ku, Tokyo 102, Japan ;# ;# Copyright (c) 1992,1993,1994 Kazumasa Utashiro ;# Software Research Associates, Inc. ;# ;# Original version was developed under the name of [email protected] ;# February 1992 and it was called kconv.pl at the beginning. This ;# address was a pen name for group of individuals and it is no longer ;# valid. ;# ;# Use and redistribution for any purpose, without significant ;# modification, is granted as long as all copyright notices are ;# retained. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ;# ANY EXPRESS OR IMPLIED WARRANTIES ARE DISCLAIMED. ;# ;; $rcsid = q$Id: jcode.pl,v 2.3 1997/02/23 14:12:26 utashiro Exp $; ;# ;###################################################################### ;# ;# INTERFACE: ;# ;# &jcode'getcode(*line) ;# Return 'jis', 'sjis', 'euc' or undef according to ;# Japanese character code in $line. Return 'binary' if ;# the data has non-character code.
;###################################################################### ;# ;# INTERFACE: ;# ;# &jcode'getcode(*line) ;# Return 'jis', 'sjis', 'euc' or undef according to ;# Japanese character code in $line. Return 'binary' if ;# the data has non-character code. ;# ;# Code detection between euc and sjis is very difficult ;# or sometimes impossible or even lead to wrong result ;# when it's include JIS X0201 KANA characters. So JIS ;# X0201 KANA is ignored for automatic code detection. ;# ;# &jcode'convert(*line, $ocode [, $icode [, $option]]) ;# Convert the line in any Japanese code to the specified ;# code in the second argument $ocode. $ocode can be any ;# of "jis", "sjis" or "euc", or use "noconv" when you ;# don't want the code conversion. Input code is ;# recognized automatically from the line itself when ;# $icode is not supplied (JIS X0201 KANA is ignored. ;# See above). $icode also can be specified, but xxx2yyy ;# routine is more efficient when both codes are known. ;# ;# It returns a list of pointer of convert subroutine and ;# input code. It means that this routine returns the ;# input code of the line in scalar context. ;# ;# See next paragraph for $option parameter. ;# ;# &jcode'xxx2yyy(*line [, $option]) ;# Convert the Japanese code from xxx to yyy. String xxx ;# and yyy are any convination from "jis", "euc" or ;# "sjis". They return *approximate* number of converted ;# bytes. So return value 0 means the line was not ;# converted at all. ;# ;# Optional parameter $option is used to specify optional ;# conversion method. String "z" is for JIS X0201 KANA ;# to X0208 KANA, and "h" is for reverse. ;# ;# $jcode'convf{'xxx', 'yyy'} ;# The value of this associative array is pointer to the
;# SAMPLES ;# ;# Convert any Kanji code to JIS and print each line with code name. ;# ;# while (<>) { ;# $code = &jcode'convert(*_, 'jis'); ;# print $code, "\t", $_; ;# } ;# ;# Convert all lines to JIS according to the first recognized line. ;# ;# while (<>) { ;# print, next unless /[\033\200-\377]/; ;# (*f, $icode) = &jcode'convert(*_, 'jis'); ;# print; ;# defined(&f) || next; ;# while (<>) { &f(*_); print; } ;# last; ;# } ;# ;# The safest way of JIS conversion. ;# ;# while (<>) { ;# ($matched, $code) = &jcode'getcode(*_); ;# print, next unless (@buf || $matched); ;# push(@readahead, $_); ;# next unless $code; ;# eval "&jcode'${code}2jis(*_), print while (\$_ = shift(\@buf));"; ;# eval "&jcode'${code}2jis(*_), print while (\$_ = <>);"; ;# last;