#! /usr/local/bin/perl # # Author: James Brister -- berkeley-unix -- # Start Date: Sat, 10 Oct 1998 21:40:11 +0200 # Project: INN # File: pullnews.pl # RCSId: $Id: pullnews.in,v 1.2.2.2 2000/09/20 15:21:51 kondou Exp $ # Time-stamp: # Description: A very simple pull feeder. Connects to multiple remote # machines (in the guise of a reader), and pulls over articles # and feeds them to the local server (in the guise of a feeder). # # Uses a simple configuration file: $HOME/.pullnews to define # which machines to pull articles from and which groups at each # machine to pull over. There is no support yet for more specific # configurations like cross-posted newsgroups to kill etc. # # A configuration file looks like: # # data.pa.vix.com # news.software.nntp 0 0 # comp.lang.c 0 0 # news.uu.net username passwd # uunet.announce 0 0 # uunet.help 0 0 # # hostname line has no leading space on it and an optional # username and password after the hostname.and all the # subsequent group lines for that host must have leading # spaces. The two integers on the group line will be updated by # the program when it runs. They are the unix time the group was # accessed, and the highest numbered article that was pulled # over. # # # NOTE NOTE NOTE NOTE: # # The Packages Net::NNTP is required *AND* the function Net::NNTP::new is # redefined in this file. If you're using a new release of Net::NTTP *AND* # if the Net::NNTP::new function supplied there does NOT call the # $obj->reader() function, then you can remove the redefinition in here. # # Net::NNTP is part of the libnet bundle by Graham Barr and is available # from CPAN or his site: # # http://www.connect.net/gbarr/ # # TODO # - Have option to reset the highwater marks to match whatever # is on the remote server. # - Have an option to reset the highwater marks to zero. # - Have an option to add a group to the config. # - Be able to specify articles to drop if they match a crossposted # group or regexp. # require 5.004; $0 =~ s!.*/!!; my $rcsID =<<'EOM'; $Id: pullnews.in,v 1.2.2.2 2000/09/20 15:21:51 kondou Exp $ EOM $SIG{INT} = \&outtaHere ; $SIG{QUIT} = \&bail ; use Net::NNTP; use Getopt::Std ; use IO::Handle; use Fcntl; use Fcntl qw(:flock); use strict; my $usage = $0; my $defaultConfig = "$ENV{HOME}/.pullnews"; my $defaultPort = 119; my $defaultHost = "localhost"; $usage =~ s!.*/!!; $usage .= " [ -h -q -r file -g groups -c config -s host -p port ] -g groups specifies a collection of groups to get. The value must be a single argument with commas between group names: -g comp.lang.c,comp.lang.lisp,comp.lang.python the groups must be defined in the config file somewhere. Only the hosts that carry those groups will be contacted. -c config specifies the configuration file instead of the default of $ENV{HOME}/.pullnews -s host specifies the hostname to feed articles to (default is $defaultHost) -p port specifies the port to connect to to feed articles (default is: $defaultPort). -r file instead of feeding to a server $0 will instead create an rnews-compatible file. -q $0 will normally be verbose about what it's doing. This option will make it quiet. -h prints this message. "; use vars qw($opt_q $opt_r $opt_s $opt_c $opt_g $opt_p $opt_h); getopts("r:c:s:qg:p:h") || die $usage; die $usage if $opt_h; my @groupsToGet = (); # empty list means all groups in config file. my $rnews = $opt_r; my $groupFile = $opt_c || $defaultConfig; my $localServer = $opt_s || $defaultHost ; my $localPort = $opt_p || $defaultPort; my $quiet = $opt_q; die "can\'t have both ``-s'' and ``-r''\n" if $opt_s && $opt_r; die "``-p'' value not an integer: $opt_p\n" if $localPort !~ m!^\d+$!; @groupsToGet = map { s!^\s*(\S+)s*!$1!; $_ } split (",", $opt_g) if $opt_g; $| = 1 ; my $servers = {} ; my $sname = undef ; my %fed = () ; my %refused = (); my %rejected = (); my $pulled = {} ; my %passwd = (); if ($rnews) { open RNEWS, ">$rnews" || die "cant open rnews-format ouptut: $rnews: $!\n"; if ($rnews eq "-") { open LOG, ">/dev/null" || die "can\'t open /dev/null!: $!\n"; } else { open LOG, ">&STDOUT" || die "can't dup stdout!: $!\n"; } } else { open LOG, ">&STDOUT" || die "can't dup stdout!: $!\n"; } my $oldfh = select ; $| = 1; select LOG ; $| = 1; select $oldfh; my $lockfile = $ENV{HOME} . "/.pullnews.pid"; sysopen (LOCK, "$lockfile", O_RDWR | O_CREAT, 0700) || die "cant create lock file ($lockfile): $!\n"; $oldfh = select ; select LOCK ; $| = 1; select $oldfh; if (!flock (LOCK, LOCK_EX | LOCK_NB)) { seek LOCK, 0, 0; my $otherpid = ; chomp $otherpid; die "Another pullnews (pid: $otherpid) seems to be running.\n"; } print LOCK "$$\n"; print LOG "Starting: ", scalar(localtime(time)), "\n\n" unless $quiet; if (@groupsToGet && ! $quiet) { print LOG "Checking for specific groups:\n"; map { printf LOG "\t%s\n", $_ } @groupsToGet ; print LOG "\n"; } open FILE, "<$groupFile" || die "cant open group file $groupFile\n" ; while () { next if m!^\s*\#! || m!^\s*$! ; if (m!^(\S+)\s*((\S+)\s+(\S+))?$!) { $sname = $1 ; $servers->{$sname} = {} ; $passwd{$sname} = [ $3, $4 ] if ($3 ne ""); } elsif (m!^\s+(\S+)\s+(\d+)\s+(\d+)!) { my ($group,$date,$high) = ($1,$2,$3) ; $servers->{$sname}->{$group} = [ $date, $high ]; } elsif (m!^\s+(\S+)\s*$!) { # assume this is a new group my ($group,$date,$high) = ($1,0,0) ; print LOG "Looking for new group $group on $sname\n" unless $quiet ; $servers->{$sname}->{$group} = [ $date, $high ]; } else { die "Fatal error in $groupFile: $.: $_\n" ; } } close FILE ; my @servers = (@ARGV || sort keys %$servers) ; die "No servers!\n" if ! @servers ; my $localcxn; if ( ! $rnews ) { print LOG "Connecting to downstream host: $localServer " . "port: $localPort ..." unless $quiet; my %localopts = ("Port" => "$localPort"); $localcxn = Net::NNTP->new($localServer, %localopts) || die "Cant connect to server $localServer\n" ; } if ( !$quiet ) { print LOG "done.\n\n"; print LOG "Legend: ``.'' is an article the downstream server refused\n"; print LOG " ``*'' is an article the downstream server rejected\n"; print LOG " ``+'' is an article the downstream server accepted\n"; print LOG " ``x'' is an article the upstream server couldn't "; print LOG "give out.\n"; print LOG "\n"; } foreach my $server (@servers) { my ($username, $passwd); if (@groupsToGet > 0) { my $ok; foreach my $sgroup (keys %{$servers->{$server}}) { $ok = 1 if grep($_ eq $sgroup, @groupsToGet); } if (! $ok) { # user gave -g and the server doesn't have those groups warn "Skipping server $server. Doesn't have specified groups\n"; next; } } if (exists $passwd{$server}) { ($username, $passwd) = @{$passwd{$server}} ; } if (!exists($servers->{$server})) { warn "No such upstream host $server configured.\n" ; next ; } my $shash = $servers->{$server} ; print LOG "connecting to upstream server $server..." unless $quiet ; my $upstream = Net::NNTP->new($server) ; if (!$upstream) { print LOG "failed." unless $quiet; warn "cant connect to upstream server $server: $!\n" ; next ; } else { print LOG "done.\n" unless $quiet ; } if (!$upstream->reader()) { warn sprintf ("Cant issue MODE READER command: %s %s\n", $upstream->code(), $upstream->message()); warn "We\'ll try anyway\n" ; } if ($username && !$upstream->authinfo($username, $passwd)) { warn sprintf ("failed to authorize: %s %s\n", $upstream->code(), $upstream->message()); next; } foreach my $group (sort keys %{$servers->{$server}}) { next if (@groupsToGet && !grep ($_ eq $group, @groupsToGet)); last if !crossFeedGroup ($upstream,$localcxn,$server,$group,$shash) ; } $upstream->quit() ; } saveConfig () ; stats() unless $quiet ; print LOG "\nDone ", scalar(localtime(time)), "\n" unless $quiet; exit (0) ; ######################### sub stats { my $ltotal ; my $reftotal ; my $rejtotal ; my $sum; map { $reftotal += $refused{$_} } keys %refused; map { $rejtotal += $rejected{$_} } keys %rejected; map { $ltotal += $fed{$_} } keys %fed ; $sum = $reftotal + $rejtotal + $ltotal; printf LOG "\n%d article%s were offered to server on $localServer\n", $sum, ($sum != 1 ? "s" : "") ; return if ($sum == 0); printf LOG "%d article%s accepted\n", $ltotal, ($ltotal != 1 ? "s were" : " was") if ($ltotal != 0); printf LOG "%d article%s refused\n", $reftotal, ($reftotal != 1 ? "s were" : " was") if ($reftotal != 0); printf LOG "%d article%s rejected\n", $rejtotal, ($rejtotal != 1 ? "s were" : " was") if ($rejtotal != 0); map { print LOG "\nUpstream server $_:\n" ; my $server = $_; my $width = 0; map { $width = length if length > $width; } sort keys %{$pulled->{$server}}; map { printf LOG "\t%${width}s %d\n", $_, $pulled->{$server}->{$_}; } sort keys %{$pulled->{$server}}; } sort keys %{$pulled} ; } sub saveConfig { $SIG{INT} = $SIG{QUIT} = 'IGNORE'; open FILE,">$groupFile" || die "cant open $groupFile: $!\n" ; my $server ; my $group ; print LOG "\nSaving config\n" unless $quiet ; print FILE "# Format: (date is epoch seconds)\n" ; print FILE "# hostname [username passwd]\n" ; print FILE "# group date high\n" ; foreach $server (sort keys %$servers) { print FILE "$server" ; if (defined $passwd{$server}) { printf FILE " %s %s", $passwd{$server}->[0], $passwd{$server}->[1]; } print FILE "\n"; foreach $group (sort keys %{$servers->{$server}}) { my ($date,$high) = @{$servers->{$server}->{$group}} ; printf FILE "\t%s %d %d\n",$group,$date,$high ; } } close FILE ; } sub outtaHere { saveConfig() ; exit (0) ; } sub bail { warn "received QUIT signal. Not saving config.\n"; exit (0); } sub crossFeedGroup { my ($fromServer,$toServer,$server,$group,$shash) = @_ ; my ($date,$high) = @{$shash->{$group}} ; my ($prevDate,$prevHigh) = @{$shash->{$group}} ; my ($narticles,$first,$last,$name) = $fromServer->group($group); my $count ; my $code ; my $startTime = time; if (!defined($narticles)) { # group command failed. warn sprintf ("Group command failed: %s %s\n", $fromServer->code(), $fromServer->message()); return undef; } printf LOG "\n%s:\n", $name; printf LOG "\tlast checked: %s\n", scalar(localtime($prevDate)); printf LOG "\t%d articles available. First %d Last %d\n", $narticles, $first, $last ; printf LOG "\tOur current highest: %d", $prevHigh, ; return 0 if ! $name ; if ($narticles == 0) { print LOG " (nothing to get)\n"; return 1 ; } if ($prevHigh == -1 || $last <= $prevHigh) { # we connected OK but there's nothing there, or we just want # to reset our highwater mark. $shash->{$group} = [ time, $high ]; print LOG " (nothing to get)\n"; return 1 ; } else { my $toget = (($last - $prevHigh) < $narticles ? $last - $prevHigh : $narticles); printf LOG " (%d to get)\n", $toget; } my $i; for ($i = ($first > $high ? $first : $high + 1) ; $i <= $last ; $i++) { $count++ ; my $article = $fromServer->article($i) ; if ($article) { my $msgid ; my $headers = 1; my $idx; for ($idx = 0 ; $idx < @{$article} ; $idx++) { if ($article->[$idx] =~ m!^message-id:\s*(\S+)!i) { $msgid = $1 ; } # catch some of the more common problems with articles. if ($article->[$idx] =~ m!^\s+\n$!) { $article->[$idx] = "\n"; warn "Fixing bad header line: $article->[$idx]\n"; } last if ($article->[$idx] eq "\n"); } if (!$msgid) { warn "No message-id found in article\n" ; next ; } $pulled->{$server}->{$group}++; if ($rnews) { my $len = 0; map { $len += length($_) } @{$article}; printf RNEWS "#! rnews %d\n", $len; map { print RNEWS $_ } @{$article}; print LOG "+" unless $quiet; } else { if (!$toServer->ihave($msgid,$article)) { my $code = $toServer->code() ; if ($code == 435) { print LOG "." unless $quiet; $refused{$group}++; } elsif ($code == 437) { print LOG "*" unless $quiet; $rejected{$group}++; } else { warn "Transfer to local server failed: ", $toServer->message,"\n" ; $toServer->quit() ; saveConfig() ; exit (1); } } else { print LOG "+" unless $quiet; $fed{$group}++ ; } } $shash->{$group} = [ time, $i ]; } else { print LOG "x" unless $quiet; ## printf LOG ("\nDEBUGGING %d %d\n", $fromServer->code(), ## $fromServer->message()); } print LOG "\n" if (!$quiet && (($count % 50) == 0)) ; } print LOG "\n" unless $quiet; printf LOG "%s article%s retrieved in %d seconds\n", $count, ($count == 1 ? "" : "s"), (time - $startTime + 1); return 1; } package Net::NNTP ; ## Slightly modified implementation of the Net::NNTP::new function. The ## original definition automatically sent a MODE READER command over which ## breaks when trying to feed INN via IHAVE. sub new { my $self = shift; my $type = ref($self) || $self; my $host = shift if @_ % 2; my %arg = @_; my $obj; $host ||= $ENV{NNTPSERVER} || $ENV{NEWSHOST}; my $hosts = defined $host ? [ $host ] : $NetConfig{nntp_hosts}; @{$hosts} = qw(news) unless @{$hosts}; my $h; foreach $h (@{$hosts}) { $obj = $type->SUPER::new(PeerAddr => ($host = $h), PeerPort => $arg{Port} || 'nntp(119)', Proto => 'tcp', Timeout => defined $arg{Timeout} ? $arg{Timeout} : 120 ) and last; } return undef unless defined $obj; ${*$obj}{'net_nntp_host'} = $host; $obj->autoflush(1); $obj->debug(exists $arg{Debug} ? $arg{Debug} : undef); unless ($obj->response() == CMD_OK) { $obj->close; return undef; } ##++ brister removed the bit below. ## my $c = $obj->code; ## my @m = $obj->message; ## ## # if server is INN and we have transfer rights the we are currently ## # talking to innd not nnrpd ## if($obj->reader) ## { ## # If reader suceeds the we need to consider this code to determine postok ## $c = $obj->code; ## } ## else ## { ## # I want to ignore this failure, so restore the previous status. ## $obj->set_status($c,\@m); ## } ## ${*$obj}{'net_nntp_post'} = $c >= 200 && $c <= 209 ? 1 : 0; ##-- $obj; }