#! /usr/bin/perl -w -I/usr/lib/perl5 # A perl program to sweep usenet newsgroups. MIME encoded attachments, e.g. JPEGs, are saved as files. # Must have program 'munpack' to do the MIME decode. # Command line usage: "news12k.pl news.group.server news.group.name [FirstArticleNo] [LastArticleNo]" use News::NNTPClient; $newserver=$ARGV[0]; $grpname=$ARGV[1]; $articlestart=$ARGV[2]; $articlestop=$ARGV[3]; #$HOMEDIR='/home/rossw/internet/news'; $LISTDIR='/k/news'; $ARCHIVEDIR="$LISTDIR/archive"; $PGDIR='/i/pg/bin'; $debuglevel=2; $portno=119; $lineslimit=50; print "RENEGADE NEWS READER PROGRAM I\n"; open (DBINSERTFILE,">$LISTDIR/$grpname.sql") || die "ERROR -- Can't open file $grpname.sql for write\n"; # Create the news object and login(authenticate) if necessary $c = new News::NNTPClient("$newserver",$portno,$debuglevel); #$c->authinfo("renegade","rescue99"); ($first,$last) = ($c->group("$grpname")); if ($articlestart && $articlestart>$first){$first=$articlestart}; if ($articlestop && $articlestop<$last){$last=$articlestop}; ABORT: for (;$first<=$last;$first++) { @headtext= $c->head($first); @record=(); if ($c->code==221) { $headcount++; foreach $h (@headtext) { if ($h=~ /Message-ID: <(.*)>/) {$record{"ID"}=$1;} if ($h=~ /Subject: (.+)/) {$record{"TMP"}=$1;$record{TMP}=~ s/'/"/g;$record{"SUBJ"}=$record{TMP};} if ($h=~ /Lines: (\d+)/) {$record{"LINES"}=$1;} if ($h=~ /From: (.+)/) { $record{"TMP"}=$1;$record{TMP}=~ s/'/"/g;$record{FROM}=$record{TMP}; if ($record{FROM}=~ /([a-zA-Z0-9_\-\.]+@[a-zA-Z0-9_\-\.]+\.\w{2,3})/) {$addrcount++;$record{"ADDR"}=$1;} } } open (ARTFILE,">$ARCHIVEDIR/$grpname/$first") || die "ERROR -- Can't open news file $first for write\n"; @article= $c->article($first); $articlecount++; print ARTFILE @article; #print @article; close(ARTFILE); if (-z "$ARCHIVEDIR/$grpname/$first") { print "Zero file size error on group $grpname, article# $first, end session.";last ABORT; } print DBINSERTFILE "insert into article_rec (grp,xref,id,subj,frm,addr,tstamp) values ('$grpname',$first,'$record{ID}','$record{SUBJ}','$record{FROM}','$record{ADDR}','now');\n"; $result=(); if ($record{LINES}>$lineslimit) { $result=`$LISTDIR/munpack -C $ARCHIVEDIR/$grpname $first`; print "munpack result=$result\n"; if ($result =~ /\n?([a-zA-Z0-9_\-\.]+\.[a-zA-Z0-9]{1,3})\s\(image/) { print "save and DB insert image file $1\n"; print DBINSERTFILE "insert into jpg_rec (grp, xref, filename, tstamp) values ('$grpname',$first,'$1','now');\n"; $jpgcount++; } } `rm $ARCHIVEDIR/$grpname/$first`; } # end - if return code true } # end - for each header close(DBINSERTFILE); `$PGDIR/psql -d rnews -f $LISTDIR/$grpname.sql`; print "headcount=$headcount\naddrcount=$addrcount\narticlecount=$articlecount\njpgcount=$jpgcount\n"; `/home/rossw/internet/sendmail/mailbottext rwinters\@domain.com "Newsgroup $grpname completed successfully."`;