#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Std;
use FileHandle;

my ($usage) = <<EOT
 [perl] xx.pl -g <gold-file> -s <sys-file> -o <eval-file> -v <verbose>

EOT
;
our ($opt_g, $opt_s, $opt_o, $opt_v);
getopts('g:s:o:v:');

if (!defined $opt_s || !defined $opt_o || !defined $opt_g || !defined $opt_v){
	die $usage;
}
my $gf = open_file($opt_g, "r");
my $sf = open_file($opt_s, "r");
my $of = open_file($opt_o, "w");
my $vf = open_file($opt_v, "w");


my $word_ctr_g = 0;
my $word_ctr_s = 0;
my $word_correct_ctr = 0;
my $wordpos_correct_ctr = 0;

my @sent_g = ();
my @sent_s = ();

while( read_sent($gf, \@sent_g) && read_sent($sf, \@sent_s) )
{
   my %map_g = ();
   my %map_s = ();
   compose_map(\@sent_g, \%map_g);
   compose_map(\@sent_s, \%map_s);
   $word_ctr_g += scalar keys %map_g;
   $word_ctr_s += scalar keys %map_s;
   foreach my $bound(keys %map_s) {
       if (defined $map_g{$bound}) {
           ++$word_correct_ctr;
           if ($map_s{$bound} eq $map_g{$bound}) {
               ++$wordpos_correct_ctr;
           }
       }
   }
}

die "zero words!!" if ($word_ctr_g == 0 && $word_ctr_s == 0);

my $word_prec = 100. * $word_correct_ctr / $word_ctr_s;
my $word_recall = 100. * $word_correct_ctr / $word_ctr_g;
my $word_F1 = 2 * $word_prec * $word_recall / ($word_prec + $word_recall);
my $wordpos_prec = 100. * $wordpos_correct_ctr / $word_ctr_s;
my $wordpos_recall = 100. * $wordpos_correct_ctr / $word_ctr_g;
my $wordpos_F1 = 2 * $wordpos_prec * $wordpos_recall / ($wordpos_prec + $wordpos_recall);

printf $of "gold word ctr: %d\n", $word_ctr_g;
printf $of "sys word ctr: %d\n", $word_ctr_s;
printf $of "correct word ctr: %d\n", $word_correct_ctr;
printf $of "correct word&pos ctr: %d\n", $wordpos_correct_ctr;
printf $of "wordseg:P = %.3f R = %.3f F1 = %.3f\n", $word_prec, $word_recall, $word_F1;
printf $of "wordpos:P = %.3f R = %.3f F1 = %.3f\n", $wordpos_prec, $wordpos_recall, $wordpos_F1;


close_file($of);
close_file($gf);
close_file($sf);
close_file($vf);

sub read_sent{
    my ($file_handle, $sent_ref) = @_;
    @{$sent_ref} = ();

    while(my $line = <$file_handle>){
        $line =~s/^\s+//;
        $line =~s/\s+$//;
        last if($line eq "");
        push(@{$sent_ref}, $line);
    }
    return scalar @{$sent_ref};
}


sub open_file {
    my $path = shift;
    my $mode = shift;
    my $fh = FileHandle->new($path, $mode);
    die "Can't open $path $!\n " if !defined $fh;
    return $fh;
}

sub close_file {
    my $fh = shift;
    $fh->close();
}

sub compose_map {
    my ($s_ref, $m_ref) = @_;
    %{$m_ref} = ();
    my @bies = ();
    my @pos = ();
    my $flag_this_should_start_word = 1;
    my $word_start_posi = 0;
    for my $i(0..$#{$s_ref}) {
        my $line = $s_ref->[$i];
        my @vec = split("\t", $line);
		##print $vf "format may wrong: $line\n" if (scalar @vec != 10);
		my @vec2 = split('\^', $vec[3]);
		##print $vf "format may wrong: $line\n" if (scalar @vec2 != 2);

		#print $vec[3]."=========\n";
		#print $vec2[0]."\n";
		if($vec2[0] eq "M") {
			$vec2[0] = "I";
		}
        push(@bies, $vec2[0]);
        push(@pos, "NN");

AGAIN:
        if ($bies[$i] eq "S") {
            if (!$flag_this_should_start_word) {
                printf $vf "format error: should continue a new word at word %d, change S to E\n", $i+1;
                $bies[$i] = "E";
                $pos[$i] = $pos[$i-1];
                goto AGAIN;
            }

            my $bound = "$i:$i";
            $m_ref->{$bound} = $pos[$i];
        } elsif ($bies[$i] eq "B") {
            if (!$flag_this_should_start_word) {
                printf $vf "format error: should continue a new word at word %d, change B to I\n", $i+1;
                $bies[$i] = "I";
                $pos[$i] = $pos[$i-1];
                goto AGAIN;
            }
            if ($i == scalar @{$s_ref} - 1) {  # the end of the sentence S
                printf $vf "format error: last word is B, change to S\n";
                $bies[$i] = "S";
                goto AGAIN;
            }

            $flag_this_should_start_word = 0;
            $word_start_posi = $i;
        } elsif ($bies[$i] eq "I") {
            if ($i == scalar @{$s_ref} - 1) {  # the end of the sentence I -> E
                printf $vf "format error: last word is I, change to E\n";
                $bies[$i] = "E";
                $pos[$i] = $pos[$i-1];
                goto AGAIN;
            }
            if ($flag_this_should_start_word) {
                printf $vf "format error: should start a new word at word %d, change I to B\n", $i+1;
                $bies[$i] = "B";
                goto AGAIN;
            }

            if ($pos[$i] ne $pos[$i-1]) {
                printf $vf "format error: POS not match at word %d, change %s to %s\n", $i+1, $pos[$i], $pos[$i-1];
                $pos[$i] = $pos[$i-1];
            }
        } elsif ($bies[$i] eq "E") {
            if ($flag_this_should_start_word) {
                printf $vf "format error: should start a new word at word %d, change E to S\n", $i+1;
                $bies[$i] = "S";
                goto AGAIN;
            }

            if ($pos[$i] ne $pos[$i-1]) {
                printf $vf "format error: POS not match at word %d, change %s to %s\n", $i+1, $pos[$i], $pos[$i-1];
                $pos[$i] = $pos[$i-1];
            }
            $flag_this_should_start_word = 1;
            my $bound = "$word_start_posi:$i";
            $m_ref->{$bound} = $pos[$i];
        }
    }
}




