#!/usr/bin/env perl
#
# "trec_eval" compatible program for NTCIR 5 Patent Classification Task
#
# 2005.08.22: A bug for F-measure calculation has been fixed. In averaging
#             process, we had not summed the F-measures of the documents
#             without confident categories.
# 2005.02.14: initial version (M. Iwayama)
#
# usage
#   trec_eval.pl [-q] [-c] [-f] [-m] trec_rel_file trec_top_file
#
# options
#   -q: print results for each query
#   -c: print in the CSV format
#   -f: print F-measures for confident results (see below)
#   -m: print micro averaged interpolated precisions (ranking driven)
#
# trec_rel_file
#
#     topic_id \t dummy \t document_id \t rel \n
#
# trec_top_file
#
#     topic_id \t dummy \t document_id \t ranking \t similarity \t run_id \n
#
#   NTCIR5 patent classification task uses the "dummy" field as
#   representing the confidence (1=confident, 0=unconfident) of each
#   result. F-measure is calculated by collecting confident results.
#
#   Results (lines) in "trec_top_file" should be grouped by the same
#   topic (i.e., "topic id"). Within each group, the order of results
#   is preserved and not re-sorted by the program. The program only
#   checks if the results are in ascending order based on "ranking".
#   Note that the original "trec_eval" program re-sorts results by
#   "similarity", where ties are broken by the lexicographical order
#   of "document id".
#
# micro averaged interpolated precision
#
#   Assume that there are N topics. For each topic, the program
#   collects K top-ranked results. Consequently, we have N*K results
#   for all the topics. Here, K ranges from 1 to the maximum ranking,
#   say X, in all the topics. For each of the X points, the program
#   calculates recall/precision values, and these values are used for
#   calculating interpolated precisions at predefined points of recalls
#   (11 points by default).
#

# Modify this array if you want to calculate interpolated precisions
# at different points from those of the standard 11 points. The elements
# should be in ascending order.
#
@i_precision_table = (0.00,0.10,0.20,0.30,0.40,0.50,0.60,0.70,0.80,0.90,1.00);
#@i_precision_table = (0.00,0.05,0.10,0.15,0.20,0.25,0.30,0.35,0.40,0.45,0.50,0.55,0.60,0.65,0.70,0.75,0.80,0.85,0.90,0.95,1.00);

# Modify this array if you want to calculate precisions at any points
# where the specified numbers of documents are retrieved. The elements
# should be in ascending order.
#
@k_precision_table = (5,10,15,20,30,100,200,500,1000);
#@k_precision_table = (1,2,3,4,5,10,15,20,30,100,200,300,1000);

$MAXRET = 1000;

# usage
#
sub usage {
  printf STDERR ("trec_eval.pl [-q] [-o] [-f] [-m] trec_rel_file trec_top_file\n");
  exit 1;
}

# variables
#
$opt_print_topic_by_topic = 0;
$opt_print_csv = 0;
$opt_print_csv_title = 1;  # set to 0 unless you need CSV description row
$opt_print_f_measure = 0;
$opt_print_micro_average = 0;
undef(%relinfo);
undef(%topinfo);
undef(%topiclist);
undef(%n_rel);
$n_topic = 0;
$n_topic_confident = 0;
$a_precision_macro = 0.0;
$r_precision_macro = 0.0;
$recal_confident_macro = 0.0;
$precision_confident_macro = 0.0;
$f_measure_confident_macro = 0.0;
undef(@i_precision_macro);
undef(@k_precision_macro);
$n_ret_macro = 0;
$n_rel_macro = 0;
$n_rel_ret_macro = 0;
$n_ret_confident_macro = 0;
$n_rel_ret_confident_macro = 0;
undef(@n_rel_ret_at);

# options
#
while (@ARGV) {
  my $opt = $ARGV[0];
  if ($opt =~ /^-{1,2}h(elp)?/) {
    &usage;
  } elsif ($opt eq "-q") {
    $opt_print_topic_by_topic = 1;
  } elsif ($opt eq "-c") {
    $opt_print_csv = 1;
  } elsif ($opt eq "-f") {
    $opt_print_f_measure = 1;
  } elsif ($opt eq "-m") {
    $opt_print_micro_average = 1;
  } else {
    last;
  }
  shift @ARGV;
}
if (@ARGV != 2) {
  &usage;
}
($trec_rel_file, $trec_top_file) = @ARGV;
$trec_top_file =~ /([^\/]+)$/;
$trec_top_file_base = $1;

# opening files
#
open(TR, $trec_rel_file) || die "can not open \"$trec_rel_file\"\n";
open(TT, $trec_top_file) || die "can not open \"$trec_top_file\"\n";

# reading trec_rel_file
#
while (<TR>) {
  chomp;
  (my $topic_id, my $dummy, my $document_id, my $rel_val, my @field) = split;
  $topiclist{$topic_id}++;
  if ($rel_val > 0) {
    $rel_info{$topic_id, $document_id} = $dummy;
    $n_rel{$topic_id}++;
  }
}
close(TR);

# printing CSV description row
#
if ($opt_print_csv && $opt_print_csv_title) {
  printf("Runid,Queryid(Num),Retrieved,Relevant,Rel_ret");
  for (my $i = 0; $i < @i_precision_table; $i++) {
    printf(",%s", $i_precision_table[$i]);
  }
  printf(",A-Precision");
  for (my $i = 0; $i < @k_precision_table; $i++) {
    printf(",%s", $k_precision_table[$i]);
  }
  printf(",R-Precision");
  if ($opt_print_f_measure) {
    printf(",Num_query(confident),Retrieved(confident),Rel_ret(confident),Recall(confident),Precision(confident),F-measure(confident)");
  }
  if ($opt_print_micro_average) {
    for (my $i = 0; $i < @i_precision_table; $i++) {
      printf(",%s", $i_precision_table[$i]);
    }
  }
  printf("\n");
}

# reading trec_top_file (main loop)
#
undef(%submitted_topic);
$pre_topic_id = "";
$line = 0;
while (<TT>) {
  $line++;
  chomp;
  (my $topic_id, my $dummy, my $document_id, my $rank, my $sim, my $run_id, my @field) = split;

  if (!defined($topiclist{$topic_id})) {
    next;
  }

  # the end of the current topic
  #
  if ($pre_topic_id ne $topic_id) {
    if ($pre_topic_id ne "") {
      &aggregation($pre_topic_id);  # aggregation for the current topic
    }

    # checking for the new topic
    #
    if (defined($submitted_topic{$topic_id})) {
      die "$line: topic $topic_id is already submitted.\n";
    }
    $submitted_topic{$topic_id} = 1;

    # initializing for the new topic
    #
    $n_ret = 0;
    $n_ret_confident = 0;
    $n_rel_ret = 0;
    $n_rel_ret_confident = 0;
    $a_precision = 0.0;
    undef(@i_precision);
    undef(@k_precision);
    $pre_rank = 0;
  }

  # checking the order of documents (ranks are in ascending order)
  #
  if ($rank < $pre_rank) {  # ties are OK
    die "$line: rank $rank should be in ascending order.\n";
  }

  if ($n_rel{$topic_id} > 0 && $n_ret < $MAXRET) {

    $n_ret++;
    if ($dummy == 1) {
      $n_ret_confident++;
    }

    # calculation of interpolated precision and average precision
    #
    if (defined($rel_info{$topic_id, $document_id})) {
      $n_rel_ret++;
      if ($dummy == 1) {
	$n_rel_ret_confident++;
      }
      $n_rel_ret_at[$n_ret]++;  # for micro averaging
      my $recall = $n_rel_ret / $n_rel{$topic_id};
      my $precision = $n_rel_ret / $n_ret;
      $a_precision += $precision;
      for (my $i = 0; $i < @i_precision_table && $i_precision_table[$i] <= $recall; $i++) {
	if ($precision > $i_precision[$i]) {
	  $i_precision[$i] = $precision;  # interporation
	}
      }
    }

    # calculation of "precision after K (retrieved) documents"
    #
    for (my $i = 0; $i < @k_precision_table; $i++) {
      if ($n_ret == $k_precision_table[$i]) {
	$k_precision[$i] = $n_rel_ret / $n_ret;
      }
    }

    # calculation of R-Precision
    #
    if ($n_ret == $n_rel{$topic_id}) {
      $r_precision = $n_rel_ret / $n_ret;
    }
  }

  # storing current information for the next loop
  #
  $pre_topic_id = $topic_id;
  $pre_rank = $rank;
}
&aggregation($pre_topic_id);  # aggregation for the previous topic
close(TT);

# averaging and printing
#
if ($opt_print_csv) {
  printf("%s,%d,%d,%d,%d", $trec_top_file_base, $n_topic, $n_ret_macro, $n_rel_macro, $n_rel_ret_macro);
} else {
  printf("\n");
  printf("Qeuryid (Num): %8s\n", $n_topic);
  printf("Total number of documents over all queries\n");
  printf("    Retrieved: %8s\n", $n_ret_macro);
  printf("    Relevant:  %8s\n", $n_rel_macro);
  printf("    Rel_ret:   %8s\n", $n_rel_ret_macro);
  printf("Interpolated Recall - Precision Averages:\n");
}
for (my $i = 0; $i < @i_precision_table; $i++) {
  if ($opt_print_csv) {
    printf(",%.4f", $i_precision_macro[$i] / $n_topic);
  } else {
    printf("    at %.2f       %.4f \n", $i_precision_table[$i], $i_precision_macro[$i] / $n_topic);
  }
}
if ($opt_print_csv) {
  printf(",%.4f", $a_precision_macro / $n_topic);
} else {
  printf("Average precision (non-interpolated) for all rel docs(averaged over queries)\n");
  printf("                  %.4f \n", $a_precision_macro / $n_topic);
  printf("Precision:\n");
}
for (my $i = 0; $i < @k_precision_table; $i++) {
  if ($opt_print_csv) {
    printf(",%.4f", $k_precision_macro[$i] / $n_topic);
  } else {
    printf("  At %4s docs:   %.4f\n", $k_precision_table[$i], $k_precision_macro[$i] / $n_topic);
  }
}
if ($opt_print_csv) {
  printf(",%.4f", $r_precision_macro / $n_topic);
} else {
  printf("R-Precision (precision after R (= num_rel for a query) docs retrieved):\n");
  printf("    Exact:        %.4f\n", $r_precision_macro / $n_topic);
}
if ($opt_print_f_measure) {
  if ($opt_print_csv) {
    printf(",%d,%d,%d,%.4f,%.4f,%.4f", $n_topic, $n_ret_confident_macro, $n_rel_ret_confident_macro, $recall_confident_macro / $n_topic, $precision_confident_macro / $n_topic, $f_measure_confident_macro / $n_topic);
  } else {
    printf("Number of Qeury (confident): %d\n", $n_topic);
    printf("Evaluation at the confident level\n");
    printf("    Retrieved(confident):  %6d\n", $n_ret_confident_macro);
    printf("    Rel_Ret(confident):    %6d\n", $n_rel_ret_confident_macro);
    printf("    Recall(confident):     %.4f\n", $recall_confident_macro / $n_topic);
    printf("    Precision(confident):  %.4f\n", $precision_confident_macro / $n_topic);
    printf("    F-measure(confident):  %.4f\n", $f_measure_confident_macro / $n_topic);
  }
}
if ($opt_print_micro_average) {
  undef(@i_precision_micro);
  my $n_rel_ret_micro = 0;
  for (my $i = 1; $i <= @n_rel_ret_at; $i++) {
    $n_rel_ret_micro += $n_rel_ret_at[$i];
    my $recall = $n_rel_ret_micro / $n_rel_macro;
    my $precision = $n_rel_ret_micro / ($i * $n_topic);
    for (my $j = 0; $j < @i_precision_table && $i_precision_table[$j] <= $recall; $j++) {
      if ($precision > $i_precision_micro[$j]) {
	$i_precision_micro[$j] = $precision;  # interporation
      }
    }
  }
  if ($opt_print_csv) {
    ;
  } else {
    printf("Interpolated Recall - Precision Averages (micro):\n");
  }
  for (my $i = 0; $i < @i_precision_table; $i++) {
    if ($opt_print_csv) {
      printf(",%.4f", $i_precision_micro[$i]);
    } else {
      printf("    at %.2f       %.4f \n", $i_precision_table[$i], $i_precision_micro[$i]);
    }
  }
}
if ($opt_print_csv) {
  printf("\n");
}

# aggregation and printing for each topic
#
sub aggregation {
  local($topic_id) = @_;
  if ($n_rel{$topic_id} > 0) {
    if ($opt_print_topic_by_topic) {
      if ($opt_print_csv) {
	printf("%s,%s,%d,%d,%d", $trec_top_file_base, $topic_id, $n_ret, $n_rel{$topic_id}, $n_rel_ret);
      } else {
	printf("\n");
	printf("Qeuryid (Num): %8s\n", $topic_id);
	printf("Total number of documents over all queries\n");
	printf("    Retrieved: %8s\n", $n_ret);
	printf("    Relevant:  %8s\n", $n_rel{$topic_id});
	printf("    Rel_ret:   %8s\n", $n_rel_ret);
	printf("Interpolated Recall - Precision Averages:\n");
      }
    }
    $n_topic++;
    $n_ret_macro += $n_ret;
    $n_rel_macro += $n_rel{$topic_id};
    $n_rel_ret_macro += $n_rel_ret;
    for (my $i = 0; $i < @i_precision_table; $i++) {
      if ($opt_print_topic_by_topic) {
	if ($opt_print_csv) {
	  printf(",%.4f", $i_precision[$i]);
	} else {
	  printf("    at %.2f       %.4f \n", $i_precision_table[$i], $i_precision[$i]);
	}
      }
      @i_precision_macro[$i] += $i_precision[$i];
    }
    if ($opt_print_topic_by_topic) {
      if ($opt_print_csv) {
	printf(",%.4f", $a_precision / $n_rel{$topic_id});
      } else {
	printf("Average precision (non-interpolated) for all rel docs(averaged over queries)\n");
	printf("                  %.4f \n", $a_precision / $n_rel{$topic_id});
	printf("Precision:\n");
      }
    }
    $a_precision_macro += ($a_precision / $n_rel{$topic_id});
    for (my $i = 0; $i < @k_precision_table; $i++) {
      if ($opt_print_topic_by_topic) {
	if ($opt_print_csv) {
	  printf(",%.4f", $k_precision[$i]);
	} else {
	  printf("  At %4s docs:   %.4f\n", $k_precision_table[$i], $k_precision[$i]);
	}
      }
      @k_precision_macro[$i] += $k_precision[$i];
    }
    if ($opt_print_topic_by_topic) {
      if ($opt_print_csv) {
	printf(",%.4f", $r_precision);
      } else {
	printf("R-Precision (precision after R (= num_rel for a query) docs retrieved):\n");
	printf("    Exact:        %.4f\n", $r_precision);
      }
    }
    $r_precision_macro += $r_precision;
    my $recall_confident = 0.0;
    my $precision_confident = 0.0;
    my $f_measure_confident = 0.0;
    if ($n_ret_confident > 0) {
      $recall_confident = $n_rel_ret_confident / $n_rel{$topic_id};
      $precision_confident = $n_rel_ret_confident / $n_ret_confident;
      if ($recall_confident > 0 && $precision_confident > 0) {
	$f_measure_confident = 2.0 / (( 1.0 / $recall_confident) + (1.0 / $precision_confident));
      }
    }
    if ($opt_print_topic_by_topic) {
      if ($opt_print_f_measure) {
	if ($opt_print_csv) {
	  printf(",1,%d,%d,%.4f,%.4f,%.4f", $n_ret_confident, $n_rel_ret_confident, $recall_confident, $precision_confident, $f_measure_confident);
	} else {
	  printf("Evaluation at the confident level\n");
	  printf("    Retrieved(confident):  %6d\n", $n_ret_confident);
	  printf("    Rel_Ret(confident):    %6d\n", $n_rel_ret_confident);
	  printf("    Recall(confident):     %.4f\n", $recall_confident);
	  printf("    Precision(confident):  %.4f\n", $precision_confident);
	  printf("    F-measure(confident):  %.4f\n", $f_measure_confident);
	}
      }
    }
    $n_ret_confident_macro += $n_ret_confident;
    $n_rel_ret_confident_macro += $n_rel_ret_confident;
    $recall_confident_macro += $recall_confident;
    $precision_confident_macro += $precision_confident;
    $f_measure_confident_macro += $f_measure_confident;
    if ($opt_print_csv && $opt_print_topic_by_topic) {
      printf("\n");
    }
  }
}
