|
#!/usr/bin/perl
|
|
#
|
|
# wwwkan1.pl - translate kanji or compounds in Japanese HTML.
|
|
# Copyright (C) 1997,1998 Tuomas J. Lukka. All rights reserved.
|
|
|
|
# Directory to the kanji dictionary database
|
|
$libdir = "/my/home/dir/japanese_files/";
|
|
# The url of this CGI-script, for mangling the links on the page
|
|
$my_url = "http://komodo.media.mit.edu/~tjl/cgi-bin/wwwkan1.cgi";
|
|
|
|
# Link types to substitute.
|
|
# 0 = absolute, 1 = through us.
|
|
%links = (a => ['href', 1], img => ['src', 0],
|
|
form => ['action', 1], link => ['href', 1],
|
|
frame => ['src', 1]);
|
|
|
|
# ---- main program
|
|
|
|
use CGI;
|
|
use LWP::Simple;
|
|
use HTML::Parse;
|
|
use URI::URL;
|
|
use Fcntl;
|
|
use AnyDBM_File;
|
|
tie %kanji,AnyDBM_File, "$libdir/kanji.dbmx", O_RDONLY, 0;
|
|
$query = new CGI;
|
|
print $query->header,
|
|
"CONVERTED By TJL's kanji explainer on ",`date`,'. Mail comments to \
|
|
lukka@fas.harvard.edu.<P>',
|
|
$query->startform(),
|
|
"<b>Go To:</b> ",
|
|
$query->textfield(-name => 'url',
|
|
-default => 'http://www.yahoo.co.jp/',
|
|
-size => 50),
|
|
$query->submit('Action','Doit'),
|
|
$query->endform,
|
|
"<HR>\n";
|
|
|
|
# Get the original document from the net.
|
|
$url = $query->param('url');
|
|
$doc = get $url;
|
|
|
|
# Substitute web addresses so that text documents are fetched with
|
|
# this script and pictures are fetched directly.
|
|
$h = parse_html($doc);
|
|
$h->traverse( sub {
|
|
my($e, $start) = @_;
|
|
return 1 unless $start;
|
|
my $attr = $links{lc $e->tag} or return 1;
|
|
my $url = $e->attr($attr->[0]) or return 1;
|
|
$e->attr($attr->[0], ($attr->[1] ? getlink($url) : absli\
|
|
nk($url)));
|
|
}, 1);
|
|
$doc = $h->as_HTML;
|
|
|
|
# Substitute the explanations on each line and print it.
|
|
for ( split "\n", $doc ) {
|
|
s/((?:[\x80-\xFF][\x40-\xFF])+)/explainstr($1)/ge;
|
|
print;
|
|
}
|
|
exit;
|
|
# SUBROUTINES
|
|
# Make an absolute URL from a relative URL in the original document
|
|
sub abslink {
|
|
return (new URI::URL($_[0]))->abs($url)->as_string;
|
|
}
|
|
|
|
# Make a new URL which gets a document through our translation service\
|
|
.
|
|
sub getlink {
|
|
my $url_to = (new URI::URL($_[0]))->abs($url);
|
|
my $proxy_url = new URI::URL($my_url);
|
|
$proxy_url->query_form(url => $url_to->as_string);
|
|
return $proxy_url->as_string;
|
|
}
|
|
|
|
# Insert explanations into a string of kanjis
|
|
sub explainstr {
|
|
my $str = @_;
|
|
my $res = "";
|
|
my ($pos, $mlen, $s);
|
|
for ( $pos = 0; $pos < length($str); $pos += $mlen ) {
|
|
my $expl;
|
|
$mlen = 20;
|
|
while (!defined($expl = $kanji{$s=(substr(($str),$pos,$mlen))}\
|
|
)
|
|
and $mlen > 2) {
|
|
$mlen -= 2;
|
|
}
|
|
$res .= $s;
|
|
if (defined $expl) {
|
|
$res .= " <small><[[[".($expl)."]]]></smal\
|
|
l> ";
|
|
}
|
|
}
|
|
return $res;
|
|
}
|