#!/usr/bin/perl # calculates the percentage of papers hit within their first # month of submission. # Usage: firsthit < ../db/alldownloads # Author: Ian Hickman use strict; use Time::Local; my (@field, %firstsubmit); my ($paperid, $paperfield, $paper); my ($papers, $hits)=(0, 0); my %months=( "Jan", 0, "Feb", 1, "Mar", 2, "Apr", 3, "May", 4, "Jun", 5, "Jul", 6, "Aug", 7, "Sep", 8, "Oct", 9, "Nov", 10, "Dec", 11 ); # open the list of papers and first submission dates print"opening\n"; print" ".timelocal( 0, 0, 0, 24, 6, 1999)."\n"; print" ".timelocal( 0, 0, 0, 9, 4, 2000)."\n"; open( FILE, "../db/d_firstsubmit" ) || die"Cant open: $!"; while( ){ @field=split; if( $field[0] > timelocal( 0, 0, 0, 24, 6, 1999 ) && $field[0] < timelocal( 0, 0, 0, 9, 4, 2000 ) ){ $firstsubmit{$field[1]}=$field[0]; $papers++; } } close( FILE ) || die"Cant close: $!"; print"$papers\n"; while( ){ @field=split; # get the unique id of the paper $_=$field[6]; ($paperid)=/(\d{7})/; if( $field[6]=~/papers/ ){ ($paperfield)=/\/ftp\/([\w-]+)\.?.*\/papers/; } else { ($paperfield)=/\/.*\/([\w-]+)\.?.*\/\??\d{7}/; } $paper="$paperfield"."/"."$paperid"; #print "$paper\n"; if( exists( $firstsubmit{$paper} )){ # reformat time $_=$field[3]; my ($day, $monthtext, $year, $hour, $min, $sec)=/.(\d\d).(\w+).(\d{4}).(\d\d).(\d\d).(\d\d)/; my $month=$months{$monthtext}; my $hittime=timelocal( $sec, $min, $hour, $day, $month, $year ); my $time=$firstsubmit{$paper}-$hittime; $time/=(3600*24); $time=roundoff($time); if( $time < 31 ){ $hits++; } delete $firstsubmit{$paper}; } } print"papers: $papers hits: $hits"; sub roundoff{ my ($num)=@_; if( int($num) eq $num ){ return $num; } elsif( ($num-int($num))<0.5 ){ return int($num); } else { return (int($num)+1); } }