/*---------------------------------------------------------------------------------------------------------------------
- File      : patch_aligner.cc                                                            Project ELSE, EPFL - DI/LIA -
-                                                                       Evaluation in Language and Speech Engineering -
- Author    : Seydoux Florian   Creation date : 17 Sept 1999                                                          -
- Eulogist  : -                 Approval date : -                  Version: 0.1                                       -
-                                                                                                                     -
- Descript. : Temp. Used to convert the output of the limsi's aligneur to specification.                              -
-                                                                                                                     -
- Requested : -                                                                                                       -
-                                                                                                                     -
- Gaps      :                                                                                                         -
-                                                                                                                     -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- Rev. date | Reviser               | Revise's description                                                            -
- - - - - - + - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- ../../....| ........              | ...                                                                             -
---------------------------------------------------------------------------------------------------------------------*/

#ifndef PATCH_ALIGNEUR
#define PATCH_ALIGNEUR

#include <fstream>
#include <iostream>
#include <algorithm>
#include <cmath>
#include <limits.h>
#include "globaldef.h"
#include "grace_tags.h"

#ifdef _USE_NAMESPACES
    namespace Else { 
#endif // _USE_NAMESPACES

class ReferenceEntry {
public:
    ReferenceEntry();
    void clear();
    bool readFrom(istream& in);

public:
    bool            fuzzy;
    bool            linked;
    int             id;
    int             link;
    VeryLongNatural loc;
    conform_string  token;
    conform_string  tag;
};

class SystemEntry {
public:
    SystemEntry();
    void clear();
    bool readFrom(istream& in);

public:
    conform_string token;
    conform_string tag;
    VeryLongNatural loc;
};
    

typedef vector<pair<ReferenceEntry, SystemEntry> > Buffer;

const int         deltaMax(4);

Buffer            buf;
conform_string    confFile;
UserConfiguration config;
HtmlCase          caseId;
bool              oldin, oldout;

void dumpHelp();
void checkParams(Parameters&);
void flushBuffer();
void compute(const ReferenceEntry& ref, const SystemEntry& sys, const bool err = false);
int main(int argc, char* argv[]);

void dumpHelp()
{
	cerr << "Description: (Evaluation task grp, alpha release)\n"
            "  Patch the standard input (assume output of the limsi's aligner) to the standard output,\n"
            "  into the newly defined format for Else evaluation. Also remove non-evaluated zone.\n\n"
            "Usage:  stdin -> patch_aligneur <config> [-oldout][-oldin] -> stdout.\n"
            "        <config>  : the reference configuration file.\n"
            "         -oldout  : the output is made in old format (USB without link)\n"
            "         -oldin   : if the input is in USB mode.\n";
   exit(0);
}


void checkParams(Parameters& params)
{
	oldin = oldout = false;
    if ( (params.size() < 1)
        || !strcmp(params.front(), "?")
        || !strcmp(params.front(), "-?")
        || !strcmp(params.front(), "-h")
        || !strcmp(params.front(), "-help")
        || !strcmp(params.front(), "--help")
        ) dumpHelp();

    confFile = params.front(); params.erase(params.begin());
 	for (Parameters::const_iterator currentArg = params.begin(); currentArg != params.end(); ++currentArg) {
        if (!strcmp(*currentArg, "-oldin")) { oldin = true; continue; }
        if (!strcmp(*currentArg, "-oldout")) { oldout = true; continue; }
        cerr << "Invalid argument: " << *currentArg << "\nTry 'recompose --help' for more informations!\n";
    }
}  

void compute(const ReferenceEntry& ref, const SystemEntry& sys, const bool err)
{
	string emptyRef("??");
	if ((ref.tag != emptyRef) && !ref.tag.empty() && !sys.tag.empty())
	{
		++caseId.caseNo.attribute;
		VeryLongNatural loc(0);
		string segorig;
		string sysTag;
		string refTag;

		if (err) refTag = config.unalignedTag; 
		else refTag = ref.tag;

		if (oldin)
		{
			loc = sys.loc;
			sysTag = sys.tag;
		} else {
			string::size_type sep1, sep2;
			sep1 = sys.tag.find("//");
			sysTag = sys.tag.substr(0, sep1);
			if (sep1 != sys.tag.npos) 
			{
				sep2 = sys.tag.find('(', sep1);
				segorig = sys.tag.substr(sep1+2, sep2-sep1-2);
				istringstream locstr(sys.tag.substr(sep2+1));
				locstr >> loc;
				for (string::iterator c = segorig.begin(); c != segorig.end(); ++c)
					if ((*c) == '/')  *c = '|';
			}
		}
		if (oldout) {
			cout << "<TOK NL=" << ref.loc << " >" << ref.token << "<TAG>" << refTag 
			     << "</TOK><TOK NR=" << loc << " >" << sys.token << "<TAG>" << sysTag << "</TOK>\n";
		} else  {
			caseId.write(cout);
			HtmlToken(ref.loc, refTag, string(), ref.token).write(cout);
			HtmlToken(loc, sysTag, segorig, sys.token).write(cout);
			cout << endl;
		}

	}
}

void flushBuffer()
{
	for (Buffer::const_iterator i = buf.begin(); i != buf.end(); ++i) {
		if (i->first.linked) {
			if (abs(i->first.link - i->first.id) < deltaMax) 
				compute(i->first, buf[i->first.link].second, false); 
			else
				compute(i->first, i->second, true);
		} else compute(i->first, i->second, true);
	}
	buf.clear();
}

int main(int argc, char* argv[])
{
	ReferenceEntry refIn;
	SystemEntry    sysIn;
	
	bool out(true);
	bool fuzzyZone(false);
	caseId.caseNo.attribute = 0;
	
    try {
    Parameters params(argc, argv);
    checkParams(params);
    if (!config.readFrom(confFile, config.Reference, false))
        msg.seriousError("Aligner-Patch",
                         "Configuration file for the input format is not readeable, or it has too serious error(s)");

	if (oldin) while (refIn.readFrom(cin) && sysIn.readFrom(cin))
		compute(refIn, sysIn, false);
	else while (refIn.readFrom(cin) && sysIn.readFrom(cin))	{
		if (!fuzzyZone)
		{
			if (refIn.fuzzy) {
				fuzzyZone = true;
				buf.push_back(make_pair(refIn, sysIn));
		    } else compute(refIn, sysIn, false);
		} else {
			if (refIn.fuzzy) {
				buf.push_back(make_pair(refIn, sysIn));
			} else {
				fuzzyZone = false;
				flushBuffer();
				compute(refIn, sysIn, false);
			}
		}
	}
	flushBuffer();
	} catch (exception& e) { cerr << "Exception occurs: " << e.what() << endl; abort(); }
}


ReferenceEntry::ReferenceEntry() { }

void 
ReferenceEntry::clear()
{
    fuzzy = linked = false;
    id = link = loc = 0;
    token.clear();
    tag.clear();
}

bool
ReferenceEntry::readFrom(istream& in)
{
    char c(' ');
    clear();
    while ((in >> c) && (c != 'I') && (c != '>') & (c != '='));
    if (c == '=')
    {
    	in >> loc;
    	while ((in >> c) && (c != 'I') && (c != '>'));
    }
    if (c == 'I')
    {
        fuzzy = true;
        in.ignore(1);  // skip 'I[=]'
        in >> id >> ws;
        in >> c;
        if (c == 'L')
        {
            link = INT_MAX;
            int lnk;
            linked = true;
            in.ignore(3); // skip 'L[NK=]'
            do
            {
                in >> lnk >> c;
                if (abs(lnk - id) < abs(link - id))
                    link = lnk;
            } while (c == ',');
        }
    }
    if (c == '>')
    {
        getline(in, token, '<');
        in.ignore(SKIP_UNTIL_EOF, '>');  // skip '<TAG>'
        getline(in, tag, '<');
        in.ignore(SKIP_UNTIL_EOF, '>'); // skip '</TOK>'
        return true;
    }
    return false;
}

SystemEntry::SystemEntry()
{}

void
SystemEntry::clear()
{
	loc = 0;
    token.clear();
    tag.clear();
}

bool 
SystemEntry::readFrom(istream& in)
{
    clear();
    in.ignore(SKIP_UNTIL_EOF, '<');
    in.ignore(5);
    if (in.peek() == 'R') 
    {
    	 in.ignore(2);
    	 if (isdigit(in.peek())) in >> loc;
   	} else if (in.peek() == '=') 
   	{
   		in.ignore(1);
   		in >> loc;
   	}
   	in.ignore(SKIP_UNTIL_EOF, '>');
    getline(in, token, '<');
    in.ignore(SKIP_UNTIL_EOF, '>');  // skip '<TAG>'
    getline(in, tag, '<');
    in.ignore(SKIP_UNTIL_EOF, '>'); // skip '</TOK>'
    return (!in.eof());
}


#ifdef __USE_NAMESPACES
}
#endif // _USE_NAMESPACES

#endif // defined UNIFIER
        

