#!/usr/bin/perl # Author: Ian Hickman use strict; use Time::Local; my %months=( "Jan", 0, "Feb", 1, "Mar", 2, "Apr", 3, "May", 4, "Jun", 5, "Jul", 6, "Aug", 7, "Sep", 8, "Oct", 9, "Nov", 10, "Dec", 11 ); my (%table, %subtimes, %subdates); my (%rank, %top, %middle, %bottom, %unknown); my ($topt, $middlet, $bottomt, $unknownt)=(0, 0, 0, 0); my ($day, $month, $monthtext, $year); my ($paper, $paperid, $paperfield, @field); my ($hittime, $subtime, $diff); my $line=0; print STDERR "opening papers\n"; openpapers(); print STDERR "opening ranks\n"; openranks(); print STDERR "doing the rest\n"; while( ){ @field=split; # get the date of the hit (nearest day) $_=$field[3]; ($day, $monthtext, $year)=/^.(\d\d).(\w+).(\d{4}).*/; $month=$months{$monthtext}; $hittime=timelocal( 0, 0, 0, $day, $month, $year ); # get the unique id of the paper $_=$field[6]; ($paperid)=/(\d{7})/; if( $field[6]=~/papers/ ){ ($paperfield)=/\/ftp\/([\w-]+)\.?.*\/papers/; } else { ($paperfield)=/\/.*\/([\w-]+)\.?.*\/\??\d{7}/; } $paper="$paperfield"."/"."$paperid"; # get the date the paper was first submitted (nearest day) if( exists($subtimes{$paper}) ){ $subtime=$subtimes{$paper}; $diff=($hittime-$subtime)/(3600*24); if( $diff>=0 ){ if( roundoff($rank{$paper}) eq "3"){ $top{roundoff($diff)}++; $topt++; } elsif( roundoff($rank{$paper}) eq "2"){ $middle{roundoff($diff)}++; $middlet++; } elsif( roundoff($rank{$paper}) eq "1"){ $bottom{roundoff($diff)}++; $bottomt++; } else { $unknown{roundoff($diff)}++; $unknownt++; } } else { print STDERR "$line $diff $field[3] $subdates{$paper} $paper\n"; } } else { print STDERR "$line $paper $field[6] not found\n"; } $line++; } # end while my $time; # save top hit distribution open( FILE, "> topdistribution" ) || die"Cant open: $!"; foreach $time ( keys ( %top ) ){ print FILE ("$time\t".($top{$time}/$topt)."\n"); } close( FILE ) || die"Cant close: $!"; # save middle hit distribution open( FILE, "> middledistribution" ) || die"Cant open: $!"; foreach $time ( keys ( %middle ) ){ print FILE ("$time\t".($middle{$time}/$middlet)."\n"); } close( FILE ) || die"Cant close: $!"; # save bottom hit distribution open( FILE, "> bottomdistribution" ) || die"Cant open: $!"; foreach $time ( keys ( %bottom ) ){ print FILE ("$time\t".($bottom{$time}/$bottomt)."\n"); } close( FILE ) || die"Cant close: $!"; # save unknown hit distribution open( FILE, "> unknowndistribution" ) || die"Cant open: $!"; foreach $time ( keys ( %unknown ) ){ print FILE ("$time\t".($unknown{$time}/$unknownt)."\n"); } close( FILE ) || die"Cant close: $!"; sub roundoff{ my ($num)=@_; if( int($num) eq $num ){ return $num; } elsif( ($num-int($num))<0.5 ){ return int($num); } else { return (int($num)+1); } } sub openpapers{ open( PAPERLIST, "../../d_firstsubmit" ) || die"Cant open paperlist: $!"; # loop through all the papers while( defined( $_ = )){ # loop through list of papers getting date and paperid my @field; @field=split; $_=$field[0]; ($year, $month, $day)=/(....)(..)(..)/; if( $month>0 ){ $time=timelocal( 0, 0, 0, $day, $month-1, $year ); if( !(exists ($subtimes{$field[2]})) ){ $subtimes{$field[2]}=$time; $subdates{$field[2]}=$field[0]; } else { print"clash\n"; } } } close( PAPERLIST ) || die"Cant close paper list: $!"; } sub openranks { open( FILE, "myrankedpapers" ) || die"Cant open: $!"; while( ){ @field=split; $rank{$field[0]}=$field[1]; } close( FILE ); }