#!/usr/bin/env python

import os
import sys
import argparse

import subprocess
import re

#argument parsing
a = argparse.ArgumentParser(description='validate parses for parsing assignment')
a.add_argument("--binary", help='binary to use', default = './parser', dest="bin")
a.add_argument("--ref", help='reference solution to use', default = './parser_reference', dest="ref")
a.add_argument("--sentence", help='sentence to try and parse', default = 'sentence', dest="sen")
a.add_argument("--grammar", help='grammar to use', default = 'fish.cnf', dest="grammar")
a.add_argument("--grad", help='whether or not to use graduate student mode', default = False, action="store_true", dest="grad")

arguments = a.parse_args()

#running student solution
command = [arguments.bin, arguments.grammar, "cyk"]
#make the input from a file
sentence = open(arguments.sen, "r")
student_p = subprocess.Popen(command, stdin=sentence, stdout=subprocess.PIPE)
sentence.close();
student_p.wait()
student_output = student_p.communicate()[0]

if not arguments.grad:
    student_output = re.sub("\.|\d+", "", student_output)

#running reference solution
student_output = student_output.split('\n')
command = [arguments.ref, arguments.grammar, "cyk"]
sentence = open(arguments.sen, "r")
reference_p = subprocess.Popen(command, stdin=sentence, stdout=subprocess.PIPE)
reference_p.wait()
sentence.close();
reference_output = reference_p.communicate()[0]
#for undergraduates, the probability should be stripped from the solution.
if not arguments.grad:
    reference_output = re.sub("\.|\d+", "", reference_output)
reference_output = reference_output.split('\n')


if not arguments.grad:
    #checking for lines in the student output that arent in the reference output
    for student_parse in student_output :
        if not student_parse in reference_output:
            print("error: student output contained this parse that reference lacked")
            print(student_parse)

    #checking for lines in the reference output that aren't in the reference output
    for reference_parse in reference_output :
        if not reference_parse in student_output:
            print("error: reference output contained this parse that student lacked")
            print(reference_parse)
#done undergraduate validation
else:
    new_reference = []
    #identify the most probable parses (plural because there might be a tie)
    for reference_parse in reference_output:
        probability = (re.findall("\d+.\d+", reference_parse))
        if probability:
            reference_parse = re.sub("\.|\d+","", reference_parse)
            new_reference.append((reference_parse, float(probability[0])))
    new_reference = sorted(new_reference, key=lambda o:o[1])
    new_reference.reverse()
    probability = new_reference[0]
    probability = probability[1]
    #find the parse in the list of parses
    student_parse = re.sub("\.|\d","", student_output[0])
    matches = [i for i, v in enumerate(new_reference) if v[0] == student_parse]
    matches = matches[0]
    student_prob = (new_reference[matches])[1]
    print student_prob
    if not student_prob == probability:
        print student_parse
        print "Error - incorrect parse selected"
        for p in new_reference:
            if p[1] == probability:
                print p[0] + " with probability " + str(p[1])

#    print(new_reference)

#done everything
print("Done")