#!/usr/bin/perl # Do users look at a paper then look at the ones it references # usage: followupcitations3 < ../db/nodupdloads # V3 not just in this session, but over all user usage # Author: Ian Hickman use strict; my (@field, @sourcepapers, %citedto, %citedfrom, $source, %citesinsession); my ($line, $dloads, $refs, $dloadsminussession, $dloadsession, $lastuser)=(0, 0, 0, 0, 0, 0); # open the list of citations (whatciteswhat) print STDERR "opening citation db\n"; open( FILE, "../db/d_cites" ) || die"Cant open: $!"; while( ){ @field=split; $citedto{$field[0]}.=$field[1]." "; $citedfrom{$field[1]}.=$field[0]." "; } close( FILE ) || die"Cant open: $!"; print STDERR "doing rest\n"; while( ){ @field=split; if( $lastuser ne $field[0] ){ # between users %citesinsession=""; $dloadsession=0; } $lastuser=$field[0]; # in a session $line++; if( $field[6]=~/html/ || $field[6]=~/ps/ || $field[6]=~/pdf/ || $field[6]=~/e-print/ || $field[6]=~/ftp/ || $field[6]=~/dvi/ ){ if( $dloadsession eq "0" ){ $dloadsminussession--; $dloadsession=1; } # if it is a paper they are downloading # get the unique paper id my ($paperfield, $papernum); ($papernum)=/(\d{7})/; if( $field[6]=~/papers/ ){ ($paperfield)=/\/ftp\/([\w-]+)\.?.*\/papers/; } else { ($paperfield)=/\/.*\/([\w-]+)\.?.*\/\??\d{7}/; } # $paper is the unique id of the paper my ($paper)=$paperfield."/".$papernum; $dloads++; $dloadsminussession++; # check table to see if this paper has been cited if( exists ( $citesinsession{$paper} )){ #print"$line user: $field[0] sources: $citesinsession{$paper} target: $paper\n"; $_=$citesinsession{$paper}; @field=split; for( my $i=0; $i<@field; $i++ ){ print"$field[$i] $paper\n"; } $refs++; } # add all target citations to table $_=$citedto{$paper}; @sourcepapers=split; for( my $i=1; $i<@sourcepapers; $i++ ){ $citesinsession{$sourcepapers[$i]}.=$paper." "; } # end for } # end if } # end while #print"Total Downloads: $dloads\n"; #print"Downloads minus Sessions: $dloadsminussession\n"; #print"Total References: $refs\n"; #print"% followedup: ".(($refs/$dloadsminussession)*100)."%\n";