#!/usr/bin/perl # Do users look at a paper then look at the ones it references # all in one session? # usage: followupcitations3 < ../db/all # V3 not just in this session, but over all user usage # Author: Ian Hickman use strict; my (@field, @sourcepapers, %citedto, %citedfrom, $source, %citesinsession); my ($line, $dloads, $refs, $dloadsminussession, $dloadsession, $lastuser)=(0, 0, 0, 0, 0, 0); # open the list of citations (whatciteswhat) print STDERR "opening citation db\n"; open( FILE, "whatciteswhat" ) || die"Cant open: $!"; while( ){ @field=split; $citedto{$field[0]}.=$field[1]." "; $citedfrom{$field[1]}.=$field[0]." "; } close( FILE ) || die"Cant open: $!"; print STDERR "doing rest\n"; while( ){ @field=split; if( $lastuser ne $field[0] ){ # between sessions %citesinsession=""; $dloadsession=0; $lastuser="0"; } $lastuser=$field[0]; # in a session $line++; if( $field[6]=~/html/ || $field[6]=~/ps/ || $field[6]=~/pdf/ || $field[6]=~/e-print/ || $field[6]=~/ftp/ || $field[6]=~/dvi/ ){ if( $dloadsession eq "0" ){ $dloadsminussession--; $dloadsession=1; } # if it is a paper they are downloading # get the unique paper id my ($paperfield, $papernum); ($papernum)=/(\d{7})/; if( $field[6]=~/papers/ ){ ($paperfield)=/\/ftp\/([\w-]+)\.?.*\/papers/; } else { ($paperfield)=/\/.*\/([\w-]+)\.?.*\/\??\d{7}/; } # $paper is the unique id of the paper my ($paper)=$paperfield."/".$papernum; if( $paperfield=~/hep/ ){ $dloads++; $dloadsminussession++; } # check table to see if this paper has been cited if( exists ( $citesinsession{$paper} )){ print"$line user: $field[0] sources: $citesinsession{$paper} target: $paper\n"; $refs++; } # add all target citations to table $_=$citedto{$paper}; @sourcepapers=split; for( my $i=1; $i<@sourcepapers; $i++ ){ $citesinsession{$sourcepapers[$i]}.=$paper." "; } # end for } # end if } # end while print"Total Downloads: $dloads\n"; print"Downloads minus Sessions: $dloadsminussession\n"; print"Total References: $refs\n";