/usr/share/perl5/WWW/RobotRules/AnyDBM_File.pm is in libwww-robotrules-perl 6.01-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | package WWW::RobotRules::AnyDBM_File;
require WWW::RobotRules;
@ISA = qw(WWW::RobotRules);
$VERSION = "6.00";
use Carp ();
use AnyDBM_File;
use Fcntl;
use strict;
=head1 NAME
WWW::RobotRules::AnyDBM_File - Persistent RobotRules
=head1 SYNOPSIS
require WWW::RobotRules::AnyDBM_File;
require LWP::RobotUA;
# Create a robot useragent that uses a diskcaching RobotRules
my $rules = WWW::RobotRules::AnyDBM_File->new( 'my-robot/1.0', 'cachefile' );
my $ua = WWW::RobotUA->new( 'my-robot/1.0', 'me@foo.com', $rules );
# Then just use $ua as usual
$res = $ua->request($req);
=head1 DESCRIPTION
This is a subclass of I<WWW::RobotRules> that uses the AnyDBM_File
package to implement persistent diskcaching of F<robots.txt> and host
visit information.
The constructor (the new() method) takes an extra argument specifying
the name of the DBM file to use. If the DBM file already exists, then
you can specify undef as agent name as the name can be obtained from
the DBM database.
=cut
sub new
{
my ($class, $ua, $file) = @_;
Carp::croak('WWW::RobotRules::AnyDBM_File filename required') unless $file;
my $self = bless { }, $class;
$self->{'filename'} = $file;
tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_CREAT|O_RDWR, 0640
or Carp::croak("Can't open $file: $!");
if ($ua) {
$self->agent($ua);
}
else {
# Try to obtain name from DBM file
$ua = $self->{'dbm'}{"|ua-name|"};
Carp::croak("No agent name specified") unless $ua;
}
$self;
}
sub agent {
my($self, $newname) = @_;
my $old = $self->{'dbm'}{"|ua-name|"};
if (defined $newname) {
$newname =~ s!/?\s*\d+.\d+\s*$!!; # loose version
unless ($old && $old eq $newname) {
# Old info is now stale.
my $file = $self->{'filename'};
untie %{$self->{'dbm'}};
tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_TRUNC|O_RDWR, 0640;
%{$self->{'dbm'}} = ();
$self->{'dbm'}{"|ua-name|"} = $newname;
}
}
$old;
}
sub no_visits {
my ($self, $netloc) = @_;
my $t = $self->{'dbm'}{"$netloc|vis"};
return 0 unless $t;
(split(/;\s*/, $t))[0];
}
sub last_visit {
my ($self, $netloc) = @_;
my $t = $self->{'dbm'}{"$netloc|vis"};
return undef unless $t;
(split(/;\s*/, $t))[1];
}
sub fresh_until {
my ($self, $netloc, $fresh) = @_;
my $old = $self->{'dbm'}{"$netloc|exp"};
if ($old) {
$old =~ s/;.*//; # remove cleartext
}
if (defined $fresh) {
$fresh .= "; " . localtime($fresh);
$self->{'dbm'}{"$netloc|exp"} = $fresh;
}
$old;
}
sub visit {
my($self, $netloc, $time) = @_;
$time ||= time;
my $count = 0;
my $old = $self->{'dbm'}{"$netloc|vis"};
if ($old) {
my $last;
($count,$last) = split(/;\s*/, $old);
$time = $last if $last > $time;
}
$count++;
$self->{'dbm'}{"$netloc|vis"} = "$count; $time; " . localtime($time);
}
sub push_rules {
my($self, $netloc, @rules) = @_;
my $cnt = 1;
$cnt++ while $self->{'dbm'}{"$netloc|r$cnt"};
foreach (@rules) {
$self->{'dbm'}{"$netloc|r$cnt"} = $_;
$cnt++;
}
}
sub clear_rules {
my($self, $netloc) = @_;
my $cnt = 1;
while ($self->{'dbm'}{"$netloc|r$cnt"}) {
delete $self->{'dbm'}{"$netloc|r$cnt"};
$cnt++;
}
}
sub rules {
my($self, $netloc) = @_;
my @rules = ();
my $cnt = 1;
while (1) {
my $rule = $self->{'dbm'}{"$netloc|r$cnt"};
last unless $rule;
push(@rules, $rule);
$cnt++;
}
@rules;
}
sub dump
{
}
1;
=head1 SEE ALSO
L<WWW::RobotRules>, L<LWP::RobotUA>
=head1 AUTHORS
Hakan Ardo E<lt>hakan@munin.ub2.lu.se>, Gisle Aas E<lt>aas@sn.no>
=cut
|