|
#!/usr/local/bin/perl
|
|
# File: mangler.cgi
|
|
|
|
use LWP::UserAgent;
|
|
use HTML::Parse;
|
|
use HTTP::Status;
|
|
use CGI qw(:standard :html3);
|
|
$ICON = "pow.gif";
|
|
|
|
srand();
|
|
|
|
$url_to_mangle = param('mangle') if request_method() eq 'POST';
|
|
|
|
print header();
|
|
if ($url_to_mangle && mangle($url_to_mangle)) {
|
|
; # nothing to do
|
|
} else {
|
|
prompt_for_url();
|
|
}
|
|
# ---------------------------------------------------
|
|
# THIS SECTION IS WHERE URLs ARE FETCHED AND MANGLED
|
|
# ---------------------------------------------------
|
|
sub mangle {
|
|
my $url = shift;
|
|
my $agent = new LWP::UserAgent;
|
|
my $request = new HTTP::Request('GET',$url);
|
|
my $response = $agent->request($request);
|
|
|
|
unless ($response->isSuccess) {
|
|
print h1('Error Fetching URL'),
|
|
"An error occurred while fetching the document located a\
|
|
t ",
|
|
a({href=>$url},"$url."),
|
|
p(),
|
|
"The error was ",strong(statusMessage($response->code\
|
|
)),".",
|
|
hr();
|
|
return undef;
|
|
}
|
|
|
|
# make sure that it's an HTML document!
|
|
my $type = $response->header('Content-type');
|
|
unless ($type eq 'text/html') {
|
|
print h1("Document isn't an HTML File!"),
|
|
"The URL ",a({href=>$url},"$url"),
|
|
" is a document of type ",em($type),". ",
|
|
"Please choose an HTML file to mangle.",
|
|
hr();
|
|
return undef;
|
|
}
|
|
print start_html(-title=>'Mangled Document',
|
|
-xbase=>$url),
|
|
div({-align=>CENTER},
|
|
h1("The Mangler"),
|
|
strong(a({-href=>$url},$url))
|
|
),
|
|
p(),
|
|
a({-href=>self_url()},"Mangle another page"),hr();
|
|
|
|
my $parse_tree = parse_html($response->content);
|
|
$parse_tree->traverse(\&swallow);
|
|
$parse_tree->traverse(\®urgitate);
|
|
$parse_tree->delete();
|
|
1;
|
|
}
|
|
sub swallow {
|
|
my ($node,$start,$depth) = @_;
|
|
return 1 if ref($node);
|
|
return &Travesty::swallow($node);
|
|
}
|
|
sub regurgitate {
|
|
my ($node,$start,$depth) = @_;
|
|
if (ref($node)) {
|
|
return 1 if $node->tag =~ /^(html|head|body)/i;
|
|
return 0 if $node->isInside('head');
|
|
&Travesty::reset() if $start;
|
|
print $node->starttag if $start;
|
|
print $node->endtag unless $start;
|
|
} else {
|
|
my @words = split(/\s+/,$node);
|
|
print &Travesty::regurgitate(scalar(@words));
|
|
}
|
|
1;
|
|
}
|
|
|
|
# ---------------------------------------------------
|
|
# THIS SECTION IS WHERE THE PROMPT IS CREATED
|
|
# ---------------------------------------------------
|
|
sub prompt_for_url {
|
|
print start_html('The Mangler'),
|
|
-e $ICON ? img({-src=>$ICON,-align=>LEFT}): '',
|
|
h1('The Mangler'),
|
|
"Enter the URL of an HTML page and press ",em("Mangle. "),
|
|
"For best results, choose a document that contains several p\
|
|
ages of text. ",
|
|
"Very large documents may take a long time to process, so ha\
|
|
ve patience.",
|
|
start_form(),
|
|
textfield(-name=>'mangle',-size=>60),
|
|
submit(-value=>'Mangle'),
|
|
end_form(),
|
|
hr(),
|
|
address(
|
|
"Author: ",
|
|
a({-href=>'http://www.genome.wi.mit.edu/~lstein/'\
|
|
},'Lincoln D. Stein'),
|
|
),
|
|
end_html();
|
|
}
|
|
# --------------- modifications of the travesty code from Perl's eg/ d\
|
|
irectory ------
|
|
package Travesty;
|
|
|
|
sub swallow {
|
|
my $string = shift;
|
|
$string =~ tr/\n/ /s;
|
|
|
|
push(@ary,split(/\s+/,$string));
|
|
while ($#ary > 1) {
|
|
$a = $p;
|
|
$p = $n;
|
|
$w = shift(@ary);
|
|
$n = $num{$w};
|
|
if ($n eq '') {
|
|
push(@word,$w);
|
|
$n = pack('S',$#word);
|
|
$num{$w} = $n;
|
|
}
|
|
$lookup{$a . $p} .= $n;
|
|
}
|
|
1;
|
|
}
|
|
|
|
sub reset {
|
|
my($key) = each(%lookup);
|
|
($a,$p) = (substr($key,0,2),substr($key,2,2));
|
|
}
|
|
sub regurgitate {
|
|
my $words = shift;
|
|
my $result = '';
|
|
while (--$words >= 0) {
|
|
|
|
$n = $lookup{$a . $p};
|
|
($foo,$n) = each(%lookup) if $n eq '';
|
|
$n = substr($n,int(rand(length($n))) & 0177776,2);
|
|
$a = $p;
|
|
$p = $n;
|
|
($w) = unpack('S',$n);
|
|
$w = $word[$w];
|
|
|
|
# most of this formatting stuff is only relevant for <PRE&g\
|
|
t; text,
|
|
# but we leave it in for that purpose
|
|
$col += length($w) + 1;
|
|
if ($col >= 65) {
|
|
$col = 0;
|
|
$result .= "\n";
|
|
} else {
|
|
$result .= ' ';
|
|
}
|
|
$result .= $w;
|
|
if ($w =~ /\.$/) {
|
|
if (rand() < .1) {
|
|
$result .= "\n";
|
|
$col = 80;
|
|
}
|
|
}
|
|
}
|
|
return $result;
|
|
}
|