#!/usr/bin/python
# encoding: utf-8


import sys

if __name__ == "__main__":
    file1Name = sys.argv[1]
    file2Name = sys.argv[2]
    file1 = open(file1Name, 'r')
    file2 = open(file2Name, 'r')

    char_count, correct_char_count = 0, 0
    line_count, sentence_count = 0, 0
    for (line1, line2) in zip(file1, file2):
        if line1 == '\n' or line2 == '\n':
            line_count += 1
            sentence_count += 1
            continue
        lines1 = line1.split('\t')
        lines2 = line2.split('\t')
        char1 = lines1[1]
        char2 = lines2[1]
        tag1 = lines1[3][0]
        tag2 = lines2[3][0]
        if tag1 == 'M':
            tag1 = 'I'
        if tag2 == 'M':
            tag2 = 'I'
        line_count += 1
        if char1 != char2:
            print "char error!"
            print line_count, '\t', sentence_count
            exit()
        if tag1 == tag2:
            correct_char_count += 1
        char_count += 1
    accuracy = 1.0 * correct_char_count / char_count
    print "word seg accuracy:", accuracy

    file1.close()
    file2.close()
