#!/usr/bin/env perl
#
# "trec_eval" compatible program for NTCIR 5 Patent Classification Task
#
# 2005.08.22: A bug for F-measure calculation has been fixed. In averaging
# process, we had not summed the F-measures of the documents
# without confident categories.
# 2005.02.14: initial version (M. Iwayama)
#
# usage
# trec_eval.pl [-q] [-c] [-f] [-m] trec_rel_file trec_top_file
#
# options
# -q: print results for each query
# -c: print in the CSV format
# -f: print F-measures for confident results (see below)
# -m: print micro averaged interpolated precisions (ranking driven)
#
# trec_rel_file
#
# topic_id \t dummy \t document_id \t rel \n
#
# trec_top_file
#
# topic_id \t dummy \t document_id \t ranking \t similarity \t run_id \n
#
# NTCIR5 patent classification task uses the "dummy" field as
# representing the confidence (1=confident, 0=unconfident) of each
# result. F-measure is calculated by collecting confident results.
#
# Results (lines) in "trec_top_file" should be grouped by the same
# topic (i.e., "topic id"). Within each group, the order of results
# is preserved and not re-sorted by the program. The program only
# checks if the results are in ascending order based on "ranking".
# Note that the original "trec_eval" program re-sorts results by
# "similarity", where ties are broken by the lexicographical order
# of "document id".
#
# micro averaged interpolated precision
#
# Assume that there are N topics. For each topic, the program
# collects K top-ranked results. Consequently, we have N*K results
# for all the topics. Here, K ranges from 1 to the maximum ranking,
# say X, in all the topics. For each of the X points, the program
# calculates recall/precision values, and these values are used for
# calculating interpolated precisions at predefined points of recalls
# (11 points by default).
#
# Modify this array if you want to calculate interpolated precisions
# at different points from those of the standard 11 points. The elements
# should be in ascending order.
#
@i_precision_table = (0.00,0.10,0.20,0.30,0.40,0.50,0.60,0.70,0.80,0.90,1.00);
#@i_precision_table = (0.00,0.05,0.10,0.15,0.20,0.25,0.30,0.35,0.40,0.45,0.50,0.55,0.60,0.65,0.70,0.75,0.80,0.85,0.90,0.95,1.00);
# Modify this array if you want to calculate precisions at any points
# where the specified numbers of documents are retrieved. The elements
# should be in ascending order.
#
@k_precision_table = (5,10,15,20,30,100,200,500,1000);
#@k_precision_table = (1,2,3,4,5,10,15,20,30,100,200,300,1000);
$MAXRET = 1000;
# usage
#
sub usage {
printf STDERR ("trec_eval.pl [-q] [-o] [-f] [-m] trec_rel_file trec_top_file\n");
exit 1;
}
# variables
#
$opt_print_topic_by_topic = 0;
$opt_print_csv = 0;
$opt_print_csv_title = 1; # set to 0 unless you need CSV description row
$opt_print_f_measure = 0;
$opt_print_micro_average = 0;
undef(%relinfo);
undef(%topinfo);
undef(%topiclist);
undef(%n_rel);
$n_topic = 0;
$n_topic_confident = 0;
$a_precision_macro = 0.0;
$r_precision_macro = 0.0;
$recal_confident_macro = 0.0;
$precision_confident_macro = 0.0;
$f_measure_confident_macro = 0.0;
undef(@i_precision_macro);
undef(@k_precision_macro);
$n_ret_macro = 0;
$n_rel_macro = 0;
$n_rel_ret_macro = 0;
$n_ret_confident_macro = 0;
$n_rel_ret_confident_macro = 0;
undef(@n_rel_ret_at);
# options
#
while (@ARGV) {
my $opt = $ARGV[0];
if ($opt =~ /^-{1,2}h(elp)?/) {
&usage;
} elsif ($opt eq "-q") {
$opt_print_topic_by_topic = 1;
} elsif ($opt eq "-c") {
$opt_print_csv = 1;
} elsif ($opt eq "-f") {
$opt_print_f_measure = 1;
} elsif ($opt eq "-m") {
$opt_print_micro_average = 1;
} else {
last;
}
shift @ARGV;
}
if (@ARGV != 2) {
&usage;
}
($trec_rel_file, $trec_top_file) = @ARGV;
$trec_top_file =~ /([^\/]+)$/;
$trec_top_file_base = $1;
# opening files
#
open(TR, $trec_rel_file) || die "can not open \"$trec_rel_file\"\n";
open(TT, $trec_top_file) || die "can not open \"$trec_top_file\"\n";
# reading trec_rel_file
#
while (
) {
chomp;
(my $topic_id, my $dummy, my $document_id, my $rel_val, my @field) = split;
$topiclist{$topic_id}++;
if ($rel_val > 0) {
$rel_info{$topic_id, $document_id} = $dummy;
$n_rel{$topic_id}++;
}
}
close(TR);
# printing CSV description row
#
if ($opt_print_csv && $opt_print_csv_title) {
printf("Runid,Queryid(Num),Retrieved,Relevant,Rel_ret");
for (my $i = 0; $i < @i_precision_table; $i++) {
printf(",%s", $i_precision_table[$i]);
}
printf(",A-Precision");
for (my $i = 0; $i < @k_precision_table; $i++) {
printf(",%s", $k_precision_table[$i]);
}
printf(",R-Precision");
if ($opt_print_f_measure) {
printf(",Num_query(confident),Retrieved(confident),Rel_ret(confident),Recall(confident),Precision(confident),F-measure(confident)");
}
if ($opt_print_micro_average) {
for (my $i = 0; $i < @i_precision_table; $i++) {
printf(",%s", $i_precision_table[$i]);
}
}
printf("\n");
}
# reading trec_top_file (main loop)
#
undef(%submitted_topic);
$pre_topic_id = "";
$line = 0;
while () {
$line++;
chomp;
(my $topic_id, my $dummy, my $document_id, my $rank, my $sim, my $run_id, my @field) = split;
if (!defined($topiclist{$topic_id})) {
next;
}
# the end of the current topic
#
if ($pre_topic_id ne $topic_id) {
if ($pre_topic_id ne "") {
&aggregation($pre_topic_id); # aggregation for the current topic
}
# checking for the new topic
#
if (defined($submitted_topic{$topic_id})) {
die "$line: topic $topic_id is already submitted.\n";
}
$submitted_topic{$topic_id} = 1;
# initializing for the new topic
#
$n_ret = 0;
$n_ret_confident = 0;
$n_rel_ret = 0;
$n_rel_ret_confident = 0;
$a_precision = 0.0;
undef(@i_precision);
undef(@k_precision);
$pre_rank = 0;
}
# checking the order of documents (ranks are in ascending order)
#
if ($rank < $pre_rank) { # ties are OK
die "$line: rank $rank should be in ascending order.\n";
}
if ($n_rel{$topic_id} > 0 && $n_ret < $MAXRET) {
$n_ret++;
if ($dummy == 1) {
$n_ret_confident++;
}
# calculation of interpolated precision and average precision
#
if (defined($rel_info{$topic_id, $document_id})) {
$n_rel_ret++;
if ($dummy == 1) {
$n_rel_ret_confident++;
}
$n_rel_ret_at[$n_ret]++; # for micro averaging
my $recall = $n_rel_ret / $n_rel{$topic_id};
my $precision = $n_rel_ret / $n_ret;
$a_precision += $precision;
for (my $i = 0; $i < @i_precision_table && $i_precision_table[$i] <= $recall; $i++) {
if ($precision > $i_precision[$i]) {
$i_precision[$i] = $precision; # interporation
}
}
}
# calculation of "precision after K (retrieved) documents"
#
for (my $i = 0; $i < @k_precision_table; $i++) {
if ($n_ret == $k_precision_table[$i]) {
$k_precision[$i] = $n_rel_ret / $n_ret;
}
}
# calculation of R-Precision
#
if ($n_ret == $n_rel{$topic_id}) {
$r_precision = $n_rel_ret / $n_ret;
}
}
# storing current information for the next loop
#
$pre_topic_id = $topic_id;
$pre_rank = $rank;
}
&aggregation($pre_topic_id); # aggregation for the previous topic
close(TT);
# averaging and printing
#
if ($opt_print_csv) {
printf("%s,%d,%d,%d,%d", $trec_top_file_base, $n_topic, $n_ret_macro, $n_rel_macro, $n_rel_ret_macro);
} else {
printf("\n");
printf("Qeuryid (Num): %8s\n", $n_topic);
printf("Total number of documents over all queries\n");
printf(" Retrieved: %8s\n", $n_ret_macro);
printf(" Relevant: %8s\n", $n_rel_macro);
printf(" Rel_ret: %8s\n", $n_rel_ret_macro);
printf("Interpolated Recall - Precision Averages:\n");
}
for (my $i = 0; $i < @i_precision_table; $i++) {
if ($opt_print_csv) {
printf(",%.4f", $i_precision_macro[$i] / $n_topic);
} else {
printf(" at %.2f %.4f \n", $i_precision_table[$i], $i_precision_macro[$i] / $n_topic);
}
}
if ($opt_print_csv) {
printf(",%.4f", $a_precision_macro / $n_topic);
} else {
printf("Average precision (non-interpolated) for all rel docs(averaged over queries)\n");
printf(" %.4f \n", $a_precision_macro / $n_topic);
printf("Precision:\n");
}
for (my $i = 0; $i < @k_precision_table; $i++) {
if ($opt_print_csv) {
printf(",%.4f", $k_precision_macro[$i] / $n_topic);
} else {
printf(" At %4s docs: %.4f\n", $k_precision_table[$i], $k_precision_macro[$i] / $n_topic);
}
}
if ($opt_print_csv) {
printf(",%.4f", $r_precision_macro / $n_topic);
} else {
printf("R-Precision (precision after R (= num_rel for a query) docs retrieved):\n");
printf(" Exact: %.4f\n", $r_precision_macro / $n_topic);
}
if ($opt_print_f_measure) {
if ($opt_print_csv) {
printf(",%d,%d,%d,%.4f,%.4f,%.4f", $n_topic, $n_ret_confident_macro, $n_rel_ret_confident_macro, $recall_confident_macro / $n_topic, $precision_confident_macro / $n_topic, $f_measure_confident_macro / $n_topic);
} else {
printf("Number of Qeury (confident): %d\n", $n_topic);
printf("Evaluation at the confident level\n");
printf(" Retrieved(confident): %6d\n", $n_ret_confident_macro);
printf(" Rel_Ret(confident): %6d\n", $n_rel_ret_confident_macro);
printf(" Recall(confident): %.4f\n", $recall_confident_macro / $n_topic);
printf(" Precision(confident): %.4f\n", $precision_confident_macro / $n_topic);
printf(" F-measure(confident): %.4f\n", $f_measure_confident_macro / $n_topic);
}
}
if ($opt_print_micro_average) {
undef(@i_precision_micro);
my $n_rel_ret_micro = 0;
for (my $i = 1; $i <= @n_rel_ret_at; $i++) {
$n_rel_ret_micro += $n_rel_ret_at[$i];
my $recall = $n_rel_ret_micro / $n_rel_macro;
my $precision = $n_rel_ret_micro / ($i * $n_topic);
for (my $j = 0; $j < @i_precision_table && $i_precision_table[$j] <= $recall; $j++) {
if ($precision > $i_precision_micro[$j]) {
$i_precision_micro[$j] = $precision; # interporation
}
}
}
if ($opt_print_csv) {
;
} else {
printf("Interpolated Recall - Precision Averages (micro):\n");
}
for (my $i = 0; $i < @i_precision_table; $i++) {
if ($opt_print_csv) {
printf(",%.4f", $i_precision_micro[$i]);
} else {
printf(" at %.2f %.4f \n", $i_precision_table[$i], $i_precision_micro[$i]);
}
}
}
if ($opt_print_csv) {
printf("\n");
}
# aggregation and printing for each topic
#
sub aggregation {
local($topic_id) = @_;
if ($n_rel{$topic_id} > 0) {
if ($opt_print_topic_by_topic) {
if ($opt_print_csv) {
printf("%s,%s,%d,%d,%d", $trec_top_file_base, $topic_id, $n_ret, $n_rel{$topic_id}, $n_rel_ret);
} else {
printf("\n");
printf("Qeuryid (Num): %8s\n", $topic_id);
printf("Total number of documents over all queries\n");
printf(" Retrieved: %8s\n", $n_ret);
printf(" Relevant: %8s\n", $n_rel{$topic_id});
printf(" Rel_ret: %8s\n", $n_rel_ret);
printf("Interpolated Recall - Precision Averages:\n");
}
}
$n_topic++;
$n_ret_macro += $n_ret;
$n_rel_macro += $n_rel{$topic_id};
$n_rel_ret_macro += $n_rel_ret;
for (my $i = 0; $i < @i_precision_table; $i++) {
if ($opt_print_topic_by_topic) {
if ($opt_print_csv) {
printf(",%.4f", $i_precision[$i]);
} else {
printf(" at %.2f %.4f \n", $i_precision_table[$i], $i_precision[$i]);
}
}
@i_precision_macro[$i] += $i_precision[$i];
}
if ($opt_print_topic_by_topic) {
if ($opt_print_csv) {
printf(",%.4f", $a_precision / $n_rel{$topic_id});
} else {
printf("Average precision (non-interpolated) for all rel docs(averaged over queries)\n");
printf(" %.4f \n", $a_precision / $n_rel{$topic_id});
printf("Precision:\n");
}
}
$a_precision_macro += ($a_precision / $n_rel{$topic_id});
for (my $i = 0; $i < @k_precision_table; $i++) {
if ($opt_print_topic_by_topic) {
if ($opt_print_csv) {
printf(",%.4f", $k_precision[$i]);
} else {
printf(" At %4s docs: %.4f\n", $k_precision_table[$i], $k_precision[$i]);
}
}
@k_precision_macro[$i] += $k_precision[$i];
}
if ($opt_print_topic_by_topic) {
if ($opt_print_csv) {
printf(",%.4f", $r_precision);
} else {
printf("R-Precision (precision after R (= num_rel for a query) docs retrieved):\n");
printf(" Exact: %.4f\n", $r_precision);
}
}
$r_precision_macro += $r_precision;
my $recall_confident = 0.0;
my $precision_confident = 0.0;
my $f_measure_confident = 0.0;
if ($n_ret_confident > 0) {
$recall_confident = $n_rel_ret_confident / $n_rel{$topic_id};
$precision_confident = $n_rel_ret_confident / $n_ret_confident;
if ($recall_confident > 0 && $precision_confident > 0) {
$f_measure_confident = 2.0 / (( 1.0 / $recall_confident) + (1.0 / $precision_confident));
}
}
if ($opt_print_topic_by_topic) {
if ($opt_print_f_measure) {
if ($opt_print_csv) {
printf(",1,%d,%d,%.4f,%.4f,%.4f", $n_ret_confident, $n_rel_ret_confident, $recall_confident, $precision_confident, $f_measure_confident);
} else {
printf("Evaluation at the confident level\n");
printf(" Retrieved(confident): %6d\n", $n_ret_confident);
printf(" Rel_Ret(confident): %6d\n", $n_rel_ret_confident);
printf(" Recall(confident): %.4f\n", $recall_confident);
printf(" Precision(confident): %.4f\n", $precision_confident);
printf(" F-measure(confident): %.4f\n", $f_measure_confident);
}
}
}
$n_ret_confident_macro += $n_ret_confident;
$n_rel_ret_confident_macro += $n_rel_ret_confident;
$recall_confident_macro += $recall_confident;
$precision_confident_macro += $precision_confident;
$f_measure_confident_macro += $f_measure_confident;
if ($opt_print_csv && $opt_print_topic_by_topic) {
printf("\n");
}
}
}