#!/usr/bin/perl # -T use strict; use warnings; use Date::Parse; use Encode; use HTML::Entities qw/:DEFAULT encode_entities_numeric /; use LWP; # status/global exec my $debug=1; # file/uri setup $ENV{"HOME"} =~ /(.*)/; my $publichtml="$1/public_html/"; my $filehtml="nblog_real.html"; my $filerss1="nblog.rdf"; #my $dir="mute_psychiatrist"; my $dir="blog"; my $urlhome="http://oelzant.priv.at/~aoe"; my $entryregex="[0-9].*.html"; my $template="$publichtml/$dir/template_nblog.html"; # to contain #UPDATED# and #ITEMLIST# # desc my $author="Alexander Oelzant <alexander\@oelzant.priv.at>"; my $title="aoe: Not a 'blog"; my $desc="various rants and musings"; my $image="http://oelzant.priv.at/~aoe/images/trash/alex3sm_small.jpg"; my $starttime=iso8601date(0); # content"style" my %substs=( "#UPDATED#" => '"

last updated ".time."

"', "#AUTHOR#" => '"$author"', "#DESC#" => '"$desc"', "#IMAGE#" => '"$image"', ); # Create an RFC822 compliant date (current time) sub iso8601date { my ($time)=@_; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($time); my @date = ($year+1900,$mon+1,$mday,$hour,$min,$sec); return sprintf("%d-%02d-%02dT%02d:%02d:%02dZ", @date); } sub html_escape { my ($in)=@_; $in =~ s/>/\>/msg; $in =~ s/\n"; } return << "EOHEAD"; $title $urlhome/$filehtml various rants and musings $author $author $iso8601date en hourly 1 $starttime $list EOHEAD } sub parse_file { my ($in, $lang, $file, $filebase); $file=shift; $in=cat($file); $file =~ m#.*/([^/]+)$#; $filebase="$urlhome/$dir/$1"; if ($in =~ /]*lang="([^"]+)"/) { $lang=$1; } else { $lang=undef; } if ($in =~ m#

(.*?)

.*?((?:]*>.*?

\s*)+)#msi) { my ($head,$body)=($1,$2); if (length($body) > 1100) { # && ($body =~ m#]*>.*?

\s*$#i) ) { $body =~ s#^((
|[^<>]){0,800})\b(
|[^<>])*

.*#$1&\#160;#msi; $body .= ""; if ($lang =~ /de/) { $body .= "(weiterlesen ...)

"; } else { $body .= "(read more ...)

"; } #$body =~ s#]*>.*?

\s*$##i; #$body =~ s#$# #mg; } my $filedate=datefromname($filebase); my $filemod=[stat $file]->[9]; $head =~ s/^\s+//msg; $head =~ s/\s+$//msg; $body =~ s/^\s+//msg; $body =~ s/\s+$//msg; print "$filebase: ($head) $body\n" if $debug; return ([$filebase,$filedate,$filemod,$head,$lang,$body]); } else { warn "no h1 in $filebase"; } return undef; } sub cat { my ($file)=@_; open I,$file || die "open $file";; my $oldifs=$/; $/=undef; my $in=; close $file; $/=$oldifs; return $in; } # converts yyyymmdd.html or ssssssssssss.html to unix timestamp sub datefromname { my $r; my $f=shift; # strip dir if ($f =~ m#/([^/]+)$#) { $f=$1; } print "$f => "; if ($f =~ /^(\d\d\d\d)(\d\d)(\d\d)(?!\d).*\.html$/) { $r=str2time("$1-$2-$3"); } elsif ($f =~ /^(\d+)(?!\d).*\.html/) { $r=$1; } else { $r="0 but should be an error"; } print "$r\n"; return $r; } sub parse_dir { my ($file,$in,@entries,@entry); my ($thisdir)=@_; opendir DIR,$thisdir; while ($file=readdir(DIR)) { if ($file =~ /$entryregex/) { my $tuple = parse_file("$thisdir/$file"); if ($tuple && @$tuple == 6) { push @entries,$tuple; } else { print "$tuple unparseable\n"; } } } return \@entries; } sub format_item_rss1 { my ($url,$itime,$mtime,$head,$lang,$body)=@_; $lang = "un" unless $lang; #$head = encode("utf-8",decode("iso-8859-1", decode_entities($head))); $head = encode_entities_numeric($head); # encode("utf-8",decode("iso-8859-1", decode_entities($head)))); #$body = html_escape(encode_entities(decode("iso-8859-1", decode_entities($body)))); # kludge: absolutize links $body =~ s#="/~aoe#="$urlhome#msg; $body = encode_entities_numeric(decode("iso-8859-1", $body)); my $parsdate=iso8601date($itime); return <<"EOITEM"; $head $url $body $author $parsdate $lang $parsdate EOITEM } sub format_item_html1 { my ($url,$itime,$mtime,$head,$lang,$body)=@_; print "format_item_html: $url,$itime,$mtime,$head,$lang\n"; $lang = "un" unless $lang; my $parsdate=localtime($itime); my $parsmod=localtime($mtime); $body =~ s#]+)>(.*?)#$2#sg; $body =~ s#href="[^"]*"##sg; $body =~ s#(][^>]*>|>))(.*?)(
)#$1$2$3#sg; #$body =~ s#^(.*)$#
$1
#ms; #$body =~ s#]*>##g; return "

$head
$body
($lang) $itime ($parsdate)

"; } sub format_item_html { my ($url,$itime,$mtime,$head,$lang,$body)=@_; print "format_item_html: $url,$itime,$mtime,$head,$lang\n"; $lang = "un" unless $lang; my $parsdate=localtime($itime); my $parsmod=localtime($mtime); #$body =~ s#]*>##g; return "

$head
$body
($lang) $itime ($parsdate)

"; } sub write_file_html { my ($name,$template,$entriesr)=@_; print "write_file_html: @$entriesr\n"; my $outfile=cat($template); my $items=""; for my $subst (keys(%substs)) { my $repl=$substs{$subst}; $outfile =~ s/$subst/eval $repl/emsg; } for (@$entriesr) { $items .= format_item_html(@$_); } $items .= ""; $outfile =~ s/#ITEMLIST#/$items/g; print "write_file_html: writing $name\n"; open O,">$name" || die "open >$name"; print O $outfile; close O; $outfile=cat($template); $items=""; for my $subst (keys(%substs)) { my $repl=$substs{$subst}; $outfile =~ s/$subst/eval $repl/emsg; } for (@$entriesr) { $items .= format_item_html1(@$_); } $items .= ""; $outfile =~ s/#ITEMLIST#/$items/g; $name =~ s/\.html$/1.html/; print "write_file_html: writing $name\n"; open O,">$name" || die "open >$name"; print O "$outfile"; close O; return 0; } sub write_file_rss1 { my ($name,$entriesr)=@_; my $outfile=rss_head($entriesr); for my $subst (keys(%substs)) { my $repl=$substs{$subst}; $outfile =~ s/$subst/eval $repl/emsg; } for (@$entriesr) { $outfile .= format_item_rss1(@$_); } $outfile .= ""; open O,">$name" || die "open >$name"; print O $outfile; close O; } #my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, # $atime,$mtime,$ctime,$blksize,$blocks) = # stat "$publichtml/$dir"; my $age_dir=[stat "$publichtml/$dir"]->[9]; my $age_html=[stat "$publichtml/$filehtml"]->[9]; my $age_rss1=[stat "$publichtml/$filerss1"]->[9]; # check whether files older than dir ... if (grep (/./,@ARGV) || $age_html < $age_dir || $age_rss1 < $age_dir) { my $items=parse_dir("$publichtml/$dir"); $items=[sort {$$b[1] <=> $$a[1]} @{$items}]; print "writing $publichtml/$filehtml\n"; write_file_html("$publichtml/$filehtml",$template,$items); print "writing $publichtml/$filerss1\n"; write_file_rss1("$publichtml/$filerss1",$items); my $ua=LWP::UserAgent->new; $ua->agent("$0 0.1"); # my $url = "http://rpc.weblogs.com/pingSiteForm?name=$title&url=http%3A%2F%2Foelzant.priv.at%2F%7Eaoe%2F$filerss1"; my $url = "http://rpc.weblogs.com/pingSiteForm?name=$title&xmlurl=http%3A%2F%2Foelzant.priv.at%2F%7Eaoe%2F$filerss1&url=http%3A%2F%2Foelzant.priv.at%2F%7Eaoe%2F$filehtml"; print "url: $url\n"; $url =~ /(.*)/; my $cleanurl=$1; my $req = HTTP::Request->new(GET => "$cleanurl"); my $res=$ua->request($req); print "response: ",$res->status_line(),"\n"; } else { print "nothing to update\n"; } #parse_file();