#!/usr/bin/env perl # # "trec_eval" compatible program for NTCIR 5 Patent Classification Task # # 2005.08.22: A bug for F-measure calculation has been fixed. In averaging # process, we had not summed the F-measures of the documents # without confident categories. # 2005.02.14: initial version (M. Iwayama) # # usage # trec_eval.pl [-q] [-c] [-f] [-m] trec_rel_file trec_top_file # # options # -q: print results for each query # -c: print in the CSV format # -f: print F-measures for confident results (see below) # -m: print micro averaged interpolated precisions (ranking driven) # # trec_rel_file # # topic_id \t dummy \t document_id \t rel \n # # trec_top_file # # topic_id \t dummy \t document_id \t ranking \t similarity \t run_id \n # # NTCIR5 patent classification task uses the "dummy" field as # representing the confidence (1=confident, 0=unconfident) of each # result. F-measure is calculated by collecting confident results. # # Results (lines) in "trec_top_file" should be grouped by the same # topic (i.e., "topic id"). Within each group, the order of results # is preserved and not re-sorted by the program. The program only # checks if the results are in ascending order based on "ranking". # Note that the original "trec_eval" program re-sorts results by # "similarity", where ties are broken by the lexicographical order # of "document id". # # micro averaged interpolated precision # # Assume that there are N topics. For each topic, the program # collects K top-ranked results. Consequently, we have N*K results # for all the topics. Here, K ranges from 1 to the maximum ranking, # say X, in all the topics. For each of the X points, the program # calculates recall/precision values, and these values are used for # calculating interpolated precisions at predefined points of recalls # (11 points by default). # # Modify this array if you want to calculate interpolated precisions # at different points from those of the standard 11 points. The elements # should be in ascending order. # @i_precision_table = (0.00,0.10,0.20,0.30,0.40,0.50,0.60,0.70,0.80,0.90,1.00); #@i_precision_table = (0.00,0.05,0.10,0.15,0.20,0.25,0.30,0.35,0.40,0.45,0.50,0.55,0.60,0.65,0.70,0.75,0.80,0.85,0.90,0.95,1.00); # Modify this array if you want to calculate precisions at any points # where the specified numbers of documents are retrieved. The elements # should be in ascending order. # @k_precision_table = (5,10,15,20,30,100,200,500,1000); #@k_precision_table = (1,2,3,4,5,10,15,20,30,100,200,300,1000); $MAXRET = 1000; # usage # sub usage { printf STDERR ("trec_eval.pl [-q] [-o] [-f] [-m] trec_rel_file trec_top_file\n"); exit 1; } # variables # $opt_print_topic_by_topic = 0; $opt_print_csv = 0; $opt_print_csv_title = 1; # set to 0 unless you need CSV description row $opt_print_f_measure = 0; $opt_print_micro_average = 0; undef(%relinfo); undef(%topinfo); undef(%topiclist); undef(%n_rel); $n_topic = 0; $n_topic_confident = 0; $a_precision_macro = 0.0; $r_precision_macro = 0.0; $recal_confident_macro = 0.0; $precision_confident_macro = 0.0; $f_measure_confident_macro = 0.0; undef(@i_precision_macro); undef(@k_precision_macro); $n_ret_macro = 0; $n_rel_macro = 0; $n_rel_ret_macro = 0; $n_ret_confident_macro = 0; $n_rel_ret_confident_macro = 0; undef(@n_rel_ret_at); # options # while (@ARGV) { my $opt = $ARGV[0]; if ($opt =~ /^-{1,2}h(elp)?/) { &usage; } elsif ($opt eq "-q") { $opt_print_topic_by_topic = 1; } elsif ($opt eq "-c") { $opt_print_csv = 1; } elsif ($opt eq "-f") { $opt_print_f_measure = 1; } elsif ($opt eq "-m") { $opt_print_micro_average = 1; } else { last; } shift @ARGV; } if (@ARGV != 2) { &usage; } ($trec_rel_file, $trec_top_file) = @ARGV; $trec_top_file =~ /([^\/]+)$/; $trec_top_file_base = $1; # opening files # open(TR, $trec_rel_file) || die "can not open \"$trec_rel_file\"\n"; open(TT, $trec_top_file) || die "can not open \"$trec_top_file\"\n"; # reading trec_rel_file # while () { chomp; (my $topic_id, my $dummy, my $document_id, my $rel_val, my @field) = split; $topiclist{$topic_id}++; if ($rel_val > 0) { $rel_info{$topic_id, $document_id} = $dummy; $n_rel{$topic_id}++; } } close(TR); # printing CSV description row # if ($opt_print_csv && $opt_print_csv_title) { printf("Runid,Queryid(Num),Retrieved,Relevant,Rel_ret"); for (my $i = 0; $i < @i_precision_table; $i++) { printf(",%s", $i_precision_table[$i]); } printf(",A-Precision"); for (my $i = 0; $i < @k_precision_table; $i++) { printf(",%s", $k_precision_table[$i]); } printf(",R-Precision"); if ($opt_print_f_measure) { printf(",Num_query(confident),Retrieved(confident),Rel_ret(confident),Recall(confident),Precision(confident),F-measure(confident)"); } if ($opt_print_micro_average) { for (my $i = 0; $i < @i_precision_table; $i++) { printf(",%s", $i_precision_table[$i]); } } printf("\n"); } # reading trec_top_file (main loop) # undef(%submitted_topic); $pre_topic_id = ""; $line = 0; while () { $line++; chomp; (my $topic_id, my $dummy, my $document_id, my $rank, my $sim, my $run_id, my @field) = split; if (!defined($topiclist{$topic_id})) { next; } # the end of the current topic # if ($pre_topic_id ne $topic_id) { if ($pre_topic_id ne "") { &aggregation($pre_topic_id); # aggregation for the current topic } # checking for the new topic # if (defined($submitted_topic{$topic_id})) { die "$line: topic $topic_id is already submitted.\n"; } $submitted_topic{$topic_id} = 1; # initializing for the new topic # $n_ret = 0; $n_ret_confident = 0; $n_rel_ret = 0; $n_rel_ret_confident = 0; $a_precision = 0.0; undef(@i_precision); undef(@k_precision); $pre_rank = 0; } # checking the order of documents (ranks are in ascending order) # if ($rank < $pre_rank) { # ties are OK die "$line: rank $rank should be in ascending order.\n"; } if ($n_rel{$topic_id} > 0 && $n_ret < $MAXRET) { $n_ret++; if ($dummy == 1) { $n_ret_confident++; } # calculation of interpolated precision and average precision # if (defined($rel_info{$topic_id, $document_id})) { $n_rel_ret++; if ($dummy == 1) { $n_rel_ret_confident++; } $n_rel_ret_at[$n_ret]++; # for micro averaging my $recall = $n_rel_ret / $n_rel{$topic_id}; my $precision = $n_rel_ret / $n_ret; $a_precision += $precision; for (my $i = 0; $i < @i_precision_table && $i_precision_table[$i] <= $recall; $i++) { if ($precision > $i_precision[$i]) { $i_precision[$i] = $precision; # interporation } } } # calculation of "precision after K (retrieved) documents" # for (my $i = 0; $i < @k_precision_table; $i++) { if ($n_ret == $k_precision_table[$i]) { $k_precision[$i] = $n_rel_ret / $n_ret; } } # calculation of R-Precision # if ($n_ret == $n_rel{$topic_id}) { $r_precision = $n_rel_ret / $n_ret; } } # storing current information for the next loop # $pre_topic_id = $topic_id; $pre_rank = $rank; } &aggregation($pre_topic_id); # aggregation for the previous topic close(TT); # averaging and printing # if ($opt_print_csv) { printf("%s,%d,%d,%d,%d", $trec_top_file_base, $n_topic, $n_ret_macro, $n_rel_macro, $n_rel_ret_macro); } else { printf("\n"); printf("Qeuryid (Num): %8s\n", $n_topic); printf("Total number of documents over all queries\n"); printf(" Retrieved: %8s\n", $n_ret_macro); printf(" Relevant: %8s\n", $n_rel_macro); printf(" Rel_ret: %8s\n", $n_rel_ret_macro); printf("Interpolated Recall - Precision Averages:\n"); } for (my $i = 0; $i < @i_precision_table; $i++) { if ($opt_print_csv) { printf(",%.4f", $i_precision_macro[$i] / $n_topic); } else { printf(" at %.2f %.4f \n", $i_precision_table[$i], $i_precision_macro[$i] / $n_topic); } } if ($opt_print_csv) { printf(",%.4f", $a_precision_macro / $n_topic); } else { printf("Average precision (non-interpolated) for all rel docs(averaged over queries)\n"); printf(" %.4f \n", $a_precision_macro / $n_topic); printf("Precision:\n"); } for (my $i = 0; $i < @k_precision_table; $i++) { if ($opt_print_csv) { printf(",%.4f", $k_precision_macro[$i] / $n_topic); } else { printf(" At %4s docs: %.4f\n", $k_precision_table[$i], $k_precision_macro[$i] / $n_topic); } } if ($opt_print_csv) { printf(",%.4f", $r_precision_macro / $n_topic); } else { printf("R-Precision (precision after R (= num_rel for a query) docs retrieved):\n"); printf(" Exact: %.4f\n", $r_precision_macro / $n_topic); } if ($opt_print_f_measure) { if ($opt_print_csv) { printf(",%d,%d,%d,%.4f,%.4f,%.4f", $n_topic, $n_ret_confident_macro, $n_rel_ret_confident_macro, $recall_confident_macro / $n_topic, $precision_confident_macro / $n_topic, $f_measure_confident_macro / $n_topic); } else { printf("Number of Qeury (confident): %d\n", $n_topic); printf("Evaluation at the confident level\n"); printf(" Retrieved(confident): %6d\n", $n_ret_confident_macro); printf(" Rel_Ret(confident): %6d\n", $n_rel_ret_confident_macro); printf(" Recall(confident): %.4f\n", $recall_confident_macro / $n_topic); printf(" Precision(confident): %.4f\n", $precision_confident_macro / $n_topic); printf(" F-measure(confident): %.4f\n", $f_measure_confident_macro / $n_topic); } } if ($opt_print_micro_average) { undef(@i_precision_micro); my $n_rel_ret_micro = 0; for (my $i = 1; $i <= @n_rel_ret_at; $i++) { $n_rel_ret_micro += $n_rel_ret_at[$i]; my $recall = $n_rel_ret_micro / $n_rel_macro; my $precision = $n_rel_ret_micro / ($i * $n_topic); for (my $j = 0; $j < @i_precision_table && $i_precision_table[$j] <= $recall; $j++) { if ($precision > $i_precision_micro[$j]) { $i_precision_micro[$j] = $precision; # interporation } } } if ($opt_print_csv) { ; } else { printf("Interpolated Recall - Precision Averages (micro):\n"); } for (my $i = 0; $i < @i_precision_table; $i++) { if ($opt_print_csv) { printf(",%.4f", $i_precision_micro[$i]); } else { printf(" at %.2f %.4f \n", $i_precision_table[$i], $i_precision_micro[$i]); } } } if ($opt_print_csv) { printf("\n"); } # aggregation and printing for each topic # sub aggregation { local($topic_id) = @_; if ($n_rel{$topic_id} > 0) { if ($opt_print_topic_by_topic) { if ($opt_print_csv) { printf("%s,%s,%d,%d,%d", $trec_top_file_base, $topic_id, $n_ret, $n_rel{$topic_id}, $n_rel_ret); } else { printf("\n"); printf("Qeuryid (Num): %8s\n", $topic_id); printf("Total number of documents over all queries\n"); printf(" Retrieved: %8s\n", $n_ret); printf(" Relevant: %8s\n", $n_rel{$topic_id}); printf(" Rel_ret: %8s\n", $n_rel_ret); printf("Interpolated Recall - Precision Averages:\n"); } } $n_topic++; $n_ret_macro += $n_ret; $n_rel_macro += $n_rel{$topic_id}; $n_rel_ret_macro += $n_rel_ret; for (my $i = 0; $i < @i_precision_table; $i++) { if ($opt_print_topic_by_topic) { if ($opt_print_csv) { printf(",%.4f", $i_precision[$i]); } else { printf(" at %.2f %.4f \n", $i_precision_table[$i], $i_precision[$i]); } } @i_precision_macro[$i] += $i_precision[$i]; } if ($opt_print_topic_by_topic) { if ($opt_print_csv) { printf(",%.4f", $a_precision / $n_rel{$topic_id}); } else { printf("Average precision (non-interpolated) for all rel docs(averaged over queries)\n"); printf(" %.4f \n", $a_precision / $n_rel{$topic_id}); printf("Precision:\n"); } } $a_precision_macro += ($a_precision / $n_rel{$topic_id}); for (my $i = 0; $i < @k_precision_table; $i++) { if ($opt_print_topic_by_topic) { if ($opt_print_csv) { printf(",%.4f", $k_precision[$i]); } else { printf(" At %4s docs: %.4f\n", $k_precision_table[$i], $k_precision[$i]); } } @k_precision_macro[$i] += $k_precision[$i]; } if ($opt_print_topic_by_topic) { if ($opt_print_csv) { printf(",%.4f", $r_precision); } else { printf("R-Precision (precision after R (= num_rel for a query) docs retrieved):\n"); printf(" Exact: %.4f\n", $r_precision); } } $r_precision_macro += $r_precision; my $recall_confident = 0.0; my $precision_confident = 0.0; my $f_measure_confident = 0.0; if ($n_ret_confident > 0) { $recall_confident = $n_rel_ret_confident / $n_rel{$topic_id}; $precision_confident = $n_rel_ret_confident / $n_ret_confident; if ($recall_confident > 0 && $precision_confident > 0) { $f_measure_confident = 2.0 / (( 1.0 / $recall_confident) + (1.0 / $precision_confident)); } } if ($opt_print_topic_by_topic) { if ($opt_print_f_measure) { if ($opt_print_csv) { printf(",1,%d,%d,%.4f,%.4f,%.4f", $n_ret_confident, $n_rel_ret_confident, $recall_confident, $precision_confident, $f_measure_confident); } else { printf("Evaluation at the confident level\n"); printf(" Retrieved(confident): %6d\n", $n_ret_confident); printf(" Rel_Ret(confident): %6d\n", $n_rel_ret_confident); printf(" Recall(confident): %.4f\n", $recall_confident); printf(" Precision(confident): %.4f\n", $precision_confident); printf(" F-measure(confident): %.4f\n", $f_measure_confident); } } } $n_ret_confident_macro += $n_ret_confident; $n_rel_ret_confident_macro += $n_rel_ret_confident; $recall_confident_macro += $recall_confident; $precision_confident_macro += $precision_confident; $f_measure_confident_macro += $f_measure_confident; if ($opt_print_csv && $opt_print_topic_by_topic) { printf("\n"); } } }