# # -*- Perl -*- # chm (windows help/compressed html) filter for namazu # Copyright (C) 2007 Alexander Oelzant # using parts from zip.pl, which is # Copyright (C) 2004 MATSUMURA Namihiko # 2004 Yukio USUDA # 2004-2005 Namazu Project All rights reserved. # # This is free software with ABSOLUTELY NO WARRANTY. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either versions 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA # 02111-1307, USA # package chm; use strict; require 'util.pl'; sub mediatype() { return ('application/x-chm'); } my $archmagepath="archmage"; sub status() { return 'yes' if (util::checkcmd($archmagepath)); return 'no'; } sub recursive() { return 0; } sub pre_codeconv() { return 0; } sub post_codeconv () { return 0; } sub add_magic ($) { my ($magic) = @_; $magic->addFileExts('\\.chm', 'application/x-chm'); $magic->addFileExts('\\.CHM', 'application/x-chm'); $magic->addMagicEntry("0\tstring\tITSF\003\000\000\000\x60\000\000\000\001\000\000\000\tapplication/x-chm"); return; } sub filter ($$$$$) { my ($orig_cfile, $contref, $weighted_str, $headings, $fields) = @_; my $tmpfile; my $uniqnumber = int(rand(10000)); do { $tmpfile = util::tmpnam('NMZ.chm' . substr("000$uniqnumber", -4)); $uniqnumber++; } while (-f $tmpfile); { my $fh = util::efopen("> $tmpfile"); print $fh $$contref; util::fclose($fh); } $$contref =""; my $err = undef; $err = unchm_filter($tmpfile, $contref, $weighted_str, $headings, $fields); unlink($tmpfile); return $err; } sub unchm_filter ($$$$$) { my ($status,@cmd); my ($tmpfile, $contref, $weighted_str, $headings, $fields) = @_; util::vprint("Processing chm file ... (using '$archmagepath')\n"); my $tmpfile2 = util::tmpnam('NMZ.chm_dir'); my $tmpfile3 = util::tmpnam('NMZ.chm_stdout'); my $tmpfile4 = util::tmpnam('NMZ.chm_stderr'); # as opposed to clit, archmage does not want a dir # mkdir($tmpfile2); @cmd = ("$archmagepath", "$tmpfile","$tmpfile2"); util::vprint("starting archmage"); system("rm","-rf","$tmpfile2"); $status = util::syscmd( command => \@cmd, option => { "stdout" => $tmpfile3, "stderr" => $tmpfile4, #"stderr" => "/dev/null", }, ); util::vprint("archmage output:"); my ($in,$infile); for $infile ($tmpfile3,$tmpfile4) { open I,"<$infile"; while ($in=) { util::vprint($in); } close I; } if ($status == 0) { my $summary = util::readfile("$tmpfile3"); # codeconv::toeuc(\$summary); codeconv::codeconv_document(\$summary); #$$contref .= $summary . " "; } else { unlink($tmpfile); unlink($tmpfile3); unlink($tmpfile4); system("rm","-rf","$tmpfile2"); return 'Unable to convert chm file (maybe copying protection)'; } unlink($tmpfile3); unlink($tmpfile4); my ($fname,$dname,@dirs); @dirs="$tmpfile2"; while ($dname=pop(@dirs)) { util::vprint("traversing $dname"); opendir DIR,"$dname"; foreach $fname (readdir(DIR)){ if ("$fname" =~ /^\.|\.\.$/) { next; } $fname="$dname/$fname"; if ( -d $fname) { push @dirs,"$fname"; } else { stat $fname; my $size = [stat($fname)]->[7]; if ($size == 0) { util::dprint("$fname: filesize is 0"); } elsif ($size > $conf::FILE_SIZE_MAX) { util::dprint("$fname: Too large chm file"); } elsif ($fname =~ m!^($conf::DENY_FILE)$!i ) { # codeconv::toeuc(\$fname); codeconv::codeconv_document(\$fname); util::vprint(sprintf(_("Denied: %s"), $fname)); } elsif ($fname !~ m!^($conf::ALLOW_FILE)$!i) { # codeconv::toeuc(\$fname); codeconv::codeconv_document(\$fname); util::vprint(sprintf(_("Not allowed: %s"), $fname)); } else { my $con = ""; util::vprint("processing $fname"); my $err = chm::nesting_filter($fname, \$con, $weighted_str); if (defined $err) { util::dprint("filter/chm.pl gets error message \"$err\""); } $$contref .= $con . " "; } } }; closedir DIR; chdir ".."; } system("rm","-rf","$tmpfile2"); return undef; } sub nesting_filter ($$$){ my ($filename, $contref, $weighted_str) = @_; my $err = undef; my $dummy_shelterfname = ""; my $headings = ""; my %fields; my $mmtype = undef; my ($kanji, $mtype) = mknmz::apply_filter(\$filename, $contref, $weighted_str, \$headings, \%fields, $dummy_shelterfname, $mmtype); if ($mtype =~ /; x-system=unsupported$/){ $$contref = ""; $err = $mtype; }elsif ($mtype =~ /; x-error=(.*)$/){ $$contref = ""; $err = $1; }else{ gfilter::show_filter_debug_info($contref, $weighted_str, \%fields, \$headings); for my $field (keys %fields) { $$contref .= " ". $fields{$field}; } $$contref .= util::readfile("$filename"); } return $err; } 1;