/*---------------------------------------------------------------------------------------------------------------------
- File      : grace_tools.h                                                               Project ELSE, EPFL - DI/LIA -
-                                                                       Evaluation in Language and Speech Engineering -
- Author    : Seydoux Florian   Creation date : 29 June 1999                                                          -
- Eulogist  : -                 Approval date : -                  Version: 0.1                                       -
-                                                                                                                     -
- Descript. : Divers utilitaires devant trouver place ailleurs                                                        -
-                                                                                                                     -
- Requested : -                                                                                                       -
-                                                                                                                     -
- Gaps      : o)                                                                                                      -
-                                                                                                                     -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- Rev. date | Reviser               | Revise's description                                                            -
- - - - - - + - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- ../../....| ........              | ...                                                                             -
---------------------------------------------------------------------------------------------------------------------*/

#ifndef GRACE_TOOLS_H
#define GRACE_TOOLS_H

#include <map>
#include <deque>
#include <vector>
#include <string>
#include <cassert>
#include <cstring>
#include <stdexcept>
#include <iomanip>
#include <iostream>
#include <fstream>

#include "globaldef.h"
#include "dataflow.h"
#include "messages_manager.h"
#include "flow_basic_operators.h"
#include "datas.h"

#ifdef _USE_NAMESPACES
    namespace Else { 
#endif // _USE_NAMESPACES

// Messages d'erreurs:
//    il faut pouvoir localiser les erreurs dans les fichiers d'entrées...
//    le flux de données étant passé au travers d'opérateurs, le simple décompte des chr/mots ne donne pas une
//    information utilisable en regards des fichiers d'entrées.
//    On ne peut non plus simplement compter les chr lus dans ces fichiers, puisqu'il y a un délais entre la
//    lecture et la détection des erreurs (le 'pipe' doit etre remplis, et est de longueur variable !).
//    La solution ébauchée consiste à associer à chaque caractère un no d'indice (technique de l'ancrage).
//    Malheureusement, cela implique un redesign de la hierarchie des opérateurs de flux...
//    (Actuellement, on utilise l'astuce de la conversion implicite, mais cela n'est pas propre).
//    Il faut donc repenser la strucutre, qui ne sera peut-etre plus 'templatisée', mais utilisant plutot
//    une classe abstraite de haut niveau, spécialisée suivant les cas. (A voir, peut-etre mixer les 2 techn...
//    le cas est à analyser 'finement').



enum CharacterIndex { FirstOfToken, LastOfToken, Current }; // Used for array indexing !

class GraceTokenizer: public CharFlowAgent {

//
// Produit un flux de string a partir d'un flux de char, constitue de tokens Grace
// 
// Symbole: _DBG_TOKENIZER pour obtenir le dump de la classification des caracteres.


    friend ostream& operator<<(ostream& os, const GraceTokenizer& tok);

public:
    GraceTokenizer(StringFlowListener& nextAgent = nullStringFlowListener)
    : state(Separator),
      token(),
      buffer(),
      newToken(false),
      nextAgent(&nextAgent),
      tokenId(0),
      start(true)
    {
        memset(index, 0, sizeof(VeryLongNatural[3]));         // Warning... not very safe !
        memset(privateIndex, 0, sizeof(VeryLongNatural[2]));
    }

    void setNextAgent(StringFlowListener& nextAgent) 
    {
        this->nextAgent = &nextAgent;
    }

//    bool haveNewToken() const
//    {
//        return newToken;
//    }

    const string& getLastToken() const
    {
        return token;
    }

    const VeryLongNatural& getLastTokenId() const
    {
        return tokenId;
    }

    const VeryLongNatural& getCharIndex(const CharacterIndex who = Current) const
    {
        return index[who];
    }

    void computeString(const string& str)
    {
        for (string::const_iterator i = str.begin(); i != str.end(); ++i)
            (*this) <<= (*i);
    }

    void computeIStream(istream& input)
    {
        char c;
        while (input.get(c)) (*this) <<= c;
    }

    void clear()
    {
        token.clear();
        buffer.clear();
        state = Separator;
    }
    
    // Interface DelayedFlowAgent ...........................................
    void flush()
    {
        if (! buffer.empty()) makeNewToken();
        tokenId = 0;
        memset(index, 0, sizeof(VeryLongNatural[3]));         // Warning... not very safe !
        memset(privateIndex, 0, sizeof(VeryLongNatural[2]));
        start = true;
        state = Separator;
    }

    // Interface CharFlowAgent ............................................
    void computeFlowItem(const ichar& data) 
    {
        CharCategory category(categorize(data));
        newToken = false;

        index[Current] = data.i;
        if (start) {
            privateIndex[FirstOfToken] = privateIndex[LastOfToken] = data.i;
            start = false;
        }

        switch (state) {
            case Alphabetic    :switch (categorize(data)) {
                                case Alphabetic:// state = Alphabetic;
                                                buffer.push_back(data);
                                                break;
                                                
                                case Numeric :  state = Numeric;
                                                makeNewToken(); // assume that the buffer is cleaning
                                                buffer.push_back(data);
                                                privateIndex[FirstOfToken] = data.i;
                                                break;
                                                
                                case Other :    state = Other;
                                                makeNewToken();
                                                buffer.push_back(data);
                                                privateIndex[FirstOfToken] = data.i;
                                                break;
                                                
                                case Separator: state = Separator;    
                                                makeNewToken();
                                                break;
                                                
                                default :       throw domain_error(string("GraceTokenizer: invalid char's category"));
                                                break;
                                }
                                break;
                                
            case Numeric    :   switch (categorize(data)) {
                                case Alphabetic:state = Alphabetic;
                                                makeNewToken();
                                                buffer.push_back(data);
                                                privateIndex[FirstOfToken] = data.i;
                                                break;
                                                
                                case Numeric :  // state = Numeric;
                                                buffer.push_back(data);
                                                break;
                                                
                                case Other :    state = Other;
                                                makeNewToken();
                                                buffer.push_back(data);
                                                privateIndex[FirstOfToken] = data.i;
                                                break;
                                                
                                case Separator: state = Separator;    
                                                makeNewToken();
                                                break;
                                                
                                default :       throw domain_error(string("GraceTokenizer: invalid char's category"));
                                                break;
                                }
                                break;

            case Other      :   makeNewToken();
                                state = categorize(data);
                                if (state != Separator) {
                                    buffer.push_back(data);
                                    privateIndex[FirstOfToken] = data.i;
                                }
                                break;

            case Separator  :   state = categorize(data);
                                if (state != Separator) {
                                   buffer.push_back(data);
                                   privateIndex[FirstOfToken] = data.i;
                                }
                                break;

            default         :   throw domain_error(string("GraceTokenizer: invalid internal state"));
                                break;
        }
        privateIndex[LastOfToken] = data.i;
    }

protected:
    void makeNewToken()
    {
        ++tokenId;
        newToken = true;
        token = buffer;
        buffer.clear();
        index[FirstOfToken] = privateIndex[FirstOfToken];
        index[LastOfToken] = privateIndex[LastOfToken];
//
//      Updating Output's StringsFlow ...
//       (Control is explicitly given to the next operator, and so on... 
//      
        (*nextAgent) <<= token;
    }

private:
    CharCategory          state;
    bool                  newToken;
    conform_string        token;
    conform_string        buffer;
    StringFlowListenerPtr nextAgent;
    VeryLongNatural       tokenId;
    VeryLongNatural       index[3];
    VeryLongNatural       privateIndex[2];
    bool                  start;
};

class LemmesFlowListener {  // A mixer avec les autre Flow.
public:
    virtual void computeFlowItem(const Comment& lineCmt) = 0;
    virtual void computeFlowItem(const Token& token, const LocalisedTag& tag) = 0;
    virtual void computeFlowItem(const Token& token, const LocalisedTag& tag, const Comment& cmt) = 0;

/*  void operator<<=(const Comment& lineCmt)
    {
        computeFlowItem(lineCmt);
    }

    void operator<<=(const Token& token, const LocalisedTag& tag)
    {
        computeFlowItem(token,tag);
    }

    void operator<<=(const Token& token, const LocalisedTag& tag, const Comment& cmt)
    {
        computeFlowItem(token,tag,cmt);
    }
*/

    void compute(const Comment& lineCmt) { computeFlowItem(lineCmt); }
    void compute(const Token& token, const LocalisedTag& tag) { computeFlowItem(token,tag); }
    void compute(const Token& token, const LocalisedTag& tag, const Comment& cmt) { computeFlowItem(token,tag,cmt); }
};
class LemmesFlowAgent : public LemmesFlowListener {
public:
    virtual void flush() = 0;
};

typedef LemmesFlowListener* LemmesFlowListenerPtr;
typedef LemmesFlowAgent* LemmesFlowAgentPtr;

class NullLemmesFlowListener : public LemmesFlowListener {
    void computeFlowItem(const Comment& lineCmt) { };
    void computeFlowItem(const Token& token, const LocalisedTag& tag) { };
    void computeFlowItem(const Token& token, const LocalisedTag& tag, const Comment& cmt) { };
};

class NullLemmesFlowAgent : public LemmesFlowAgent {
    void flush() { }
    void computeFlowItem(const Comment& lineCmt) { };
    void computeFlowItem(const Token& token, const LocalisedTag& tag) { };
    void computeFlowItem(const Token& token, const LocalisedTag& tag, const Comment& cmt) { };
};

NullLemmesFlowListener nullLemmesFlowListener;
NullLemmesFlowAgent nullLemmesFlowAgent;


class TaggedCorpusWriter: public LemmesFlowListener {
public:
    TaggedCorpusWriter(const UserConfiguration& config, StringFlowListener& output, const bool skipComment = false)
    : config(config),
      outFlow(output),
      skipCmt(skipComment)
    { }

    void write(const Comment& lineCmt)
    {
//      msg.message(string("Write <Cmt:") + lineCmt + string(">"), DEBUG_MSG);
		if (!skipCmt)
		{
        	outFlow <<= config(StartCmt, false);
        	outFlow <<= lineCmt;
        	outFlow <<= config(EndCmt, false);
        }
    }

    void write(const Token& token, const LocalisedTag& tag)
    {
//      msg.message(string("Write <") + token + string(",") + tag + string(">"), DEBUG_MSG);
        outFlow <<= token;
        outFlow <<= config(EndToken, false);
        outFlow <<= tag;
        outFlow <<= config(EndTag, false);
    }

    void write(const Token& token, const LocalisedTag& tag, const Comment& cmt)
    {
//      msg.message(string("Write <") + token + string(",") + tag + string(",") + cmt + string(">"), DEBUG_MSG);
		if (skipCmt) write(token,tag);
		else
		{
        	outFlow <<= token;
	        outFlow <<= config(EndToken, false);
    	    outFlow <<= tag;
        	outFlow <<= string(1,'\t');
        	outFlow <<= config(StartCmt,false);
        	outFlow <<= cmt;
        	outFlow <<= config(EndCmt), false;
        }
    }

    void computeFlowItem(const Comment& lineCmt)                                          { write(lineCmt); }
    void computeFlowItem(const Token& token, const LocalisedTag& tag)                     { write(token, tag); }
    void computeFlowItem(const Token& token, const LocalisedTag& tag, const Comment& cmt) { write(token, tag, cmt); }
    
public:
    StringFlowListener& outFlow;
    const UserConfiguration& config;
    const bool skipCmt;
};


enum DataType { Comment_, Token_, Tag_, TagWithComment_ };    // Obsolet

class TaggedCorpusInterpreteur: public CharFlowAgent { // Manque le transcodage
    friend ostream& operator<<(ostream&, const TaggedCorpusInterpreteur&);
    enum State { _Uncertain, _Token, _Tag, _Comment };

public:
    TaggedCorpusInterpreteur(const UserConfiguration& config, StringFlowListener& nextAgent = nullStringFlowListener)
    : newData(false),
      state(_Uncertain),
      lastRead(),
      lastReadType(Comment_),
      nextAgent(&nextAgent),
      caseId(0),
      start(true)
    {
        for (SeparatorId i = EndToken; i <= EndCmt; i = static_cast<SeparatorId>(i + 1))
            sep[i].first = new PatternTracker(config(i,false),sep[i].second);
        for (Natural i = 0; i < 3; ++i) index[i] = 0;
        for (Natural i = 0; i < 2; ++i) privateIndex[i] = 0;
    }

    void setNextAgent(StringFlowListener& nextAgent) 
    {
        this->nextAgent = &nextAgent;
    }
    
    const bool& haveNewData() const
    {
        return newData;
    }

    const string& getLastData() const
    {
        return lastRead;
    }

    const DataType& getLastDataType() const
    {
        return lastReadType;
    }

    const VeryLongNatural& getLastDataId() const
    {
        return caseId;
    }

    const VeryLongNatural& getCharIndex(const CharacterIndex who = Current) const
    {
        return index[who];
    }

    const LocalisedData getBounds() const
    {
        return LocalisedData(index[FirstOfToken], index[LastOfToken]);
    }

    void flush()
    {
        switch (state) {
            case _Uncertain      : if (!sep[StartCmt].first->empty()) {
                                       sep[StartCmt].first->flush();
                                       errorStream << "Warning, residual datas (token/cmt):" << sep[StartCmt].second << '\n';
                                   }
                                   break;

            case _Token          : sep[EndToken].first->flush();
                                   errorStream << "Warning, unfinished token: " << sep[EndToken].second << '\n';
                                   makeNewData(Token_, sep[EndToken].second);
                                   break;
                                
            case _Tag            : sep[EndTag].first->flush();
                                   errorStream << "Warning, unfinished tag: " << sep[EndTag].second << '\n';
                                   makeNewData(Tag_, sep[EndTag].second);
                                   break;

            case _Comment        : sep[EndCmt].first->flush();
                                   errorStream << "Warning, unfinished comment: " << sep[EndCmt].second <<
                                      " at " << privateIndex[FirstOfToken]  << '\n';
                                   makeNewData( Comment_, sep[EndCmt].second);
                                   break;
                                
            default              : throw domain_error(string("TaggedCorpusReader: invalid internal state (flush)"));
                                   break;
        }
        state = _Uncertain;
        newData = false;
        caseId = 0;
        start = true;
        for (Natural i = 0; i < 3; ++i) index[i] = 0;
        for (Natural i = 0; i < 2; ++i) privateIndex[i] = 0;
        for (SeparatorId i = EndToken; i <= EndCmt; i = static_cast<SeparatorId>(i + 1)) {
            sep[i].first->flush();
            sep[i].second.clear();
        }
    }
    
    void computeFlowItem(const ichar& data)
    {
        if (start) {
            privateIndex[FirstOfToken] = privateIndex[LastOfToken] = data.i;
            start = false;
        }
        index[Current] = data.i;
        newData = false;
        switch (state) {
            case _Uncertain      : (*sep[StartCmt].first) <<= data;
                                   (*sep[EndToken].first) <<= data;
                                   if (sep[StartCmt].first->full()) {
                                       if (sep[StartCmt].first->patternFound()) {
                                           state = _Comment;
                                           sep[EndToken].first->clear();
                                           sep[EndToken].second.clear();
                                           break;
                                       } else {
                                           state = _Token;
                                       }
                                       sep[StartCmt].first->clear();
                                   }
                                   if (sep[EndToken].first->patternFound()) {
                                       state = _Tag;
                                       sep[EndToken].first->clear();
                                       sep[StartCmt].first->clear();
                                       sep[StartCmt].second.clear();
                                       makeNewData(Token_, sep[EndToken].second);
                                       privateIndex[FirstOfToken] = index[Current];
                                   }
                                   break;
                                
            case _Token          : (*sep[EndToken].first) <<= data;
                                   if (sep[EndToken].first->patternFound()) {
                                       state = _Tag;
                                       sep[EndToken].first->clear();
                                       makeNewData(Token_, sep[EndToken].second);
                                       privateIndex[FirstOfToken] = index[Current];
                                   }
                                   break;
                                
            case _Tag            : (*sep[EndTag].first) <<= data;
                                   (*sep[StartCmt].first) <<= data;
                                   if (sep[EndTag].first->patternFound() || sep[StartCmt].first->patternFound()) {
                                       if (sep[EndTag].first->patternFound()) {
                                           conform_string& tag(sep[EndTag].second);
                                           tag.ws_back();
                                           state = _Uncertain;
                                           makeNewData(Tag_, tag);
                                       } else {
                                           conform_string& tag(sep[StartCmt].second);
                                           tag.ws_back();
                                           state = _Comment;
                                           makeNewData(TagWithComment_, tag);
                                       }
                                       sep[EndTag].first->clear();
                                       sep[StartCmt].first->clear();
                                       sep[EndTag].second.clear();
                                       sep[StartCmt].second.clear();
                                       privateIndex[FirstOfToken] = index[Current];
                                   }
                                   break;
                                
            case _Comment        : (*sep[EndCmt].first) <<= data;
                                   if (sep[EndCmt].first->patternFound()) {
                                       state = _Uncertain;
                                       sep[EndCmt].first->clear();
                                       makeNewData(Comment_, sep[EndCmt].second);
                                       privateIndex[FirstOfToken] = index[Current];
                                   }
                                   break;

            default              : throw domain_error(string("TaggedCorpusReader: invalid internal state"));
                                   break;
        }
        privateIndex[LastOfToken] = data.i;
    }

protected:

    virtual void makeNewData(const DataType& dtype, conform_string& buffer)
    {
        newData = true;
        lastRead = buffer;
        buffer.clear();
        index[FirstOfToken] = privateIndex[FirstOfToken];
        index[LastOfToken] = privateIndex[LastOfToken];
        if ( ((lastReadType = dtype) == Tag_) || (dtype == TagWithComment_)) ++caseId;
//
//      Updating Output's StringsFlow ...
//       (Control is explicitly given to the next operator, and so on... 
//      
        (*nextAgent) <<= lastRead;
    }

protected:

    typedef pair<PatternTrackerPtr, CharFlowString> Patterns;

    bool                  newData;
    State                 state;
    conform_string        lastRead;
    DataType              lastReadType;
    Patterns              sep[4];
    StringFlowListenerPtr nextAgent;
    VeryLongNatural       caseId;
    VeryLongNatural       index[3];
    VeryLongNatural       privateIndex[2];
    bool                  start;
};

class Lemmatizeur : public CharFlowAgent, public StringFlowListener {
    enum State { _WaitTokenOrCmt, _WaitTag, _WaitEndLineCmt };
public:
    Lemmatizeur(const UserConfiguration& config, const string& sourceName = string(),
                LemmesFlowListener& nextAgent = nullLemmesFlowListener)

    : tci(config, *this),
      sourceName(sourceName),
      nextAgent(&nextAgent),
      state(_WaitTokenOrCmt),
      token(),
      tag(),
      cmt()
    { }

    void setSourceName(const string& sourceName)
    {
        this->sourceName = sourceName;
    }

    const string& getSourceName() const
    {
        return (sourceName);
    }

    void flush()
    {
        tci.flush();
        switch (state)
        {
            case _WaitTokenOrCmt :  // Ok, safe.
                 break;
                 
            case _WaitTag :
                errorStream << "ERROR <B.2.1. - Corrupt tagged file> '" << sourceName << "' " << tci
                            << " Missing tag for token '" << token << "'\n";
                (*nextAgent).compute(token, LocalisedTag(), Comment(WARN_HEAD + string("missing tag (eof).")));
                break;

            case _WaitEndLineCmt :
                errorStream << "WARNING <b.2.2. - Corrupt tagged file> '" << sourceName << "' " << tci
                            << " Missing end line comment for lemme (" << static_cast<string>(token) << ','
                            << static_cast<string>(tag) << ").\n";
                (*nextAgent).compute(token, tag, Comment(WARN_HEAD + string("missing end line comment (eof).")));
                break;

            default :
                throw domain_error(INVALID_STATE);
                break;
        }
    }

    void computeFlowItem(const ichar& data)
    {
        tci <<= data;
    }
 
    void computeFlowItem(const string& data)
    {
        switch (state) {
            case _WaitTokenOrCmt : 
                switch (tci.getLastDataType()) {
                    case Comment_ :
                        (*nextAgent).compute(Comment(data, tci.getBounds()));
                        break;

                    case Token_ :
                        token = Token(data, tci.getBounds());
                        state = _WaitTag;
                        break;

                    case Tag_ :
                        errorStream << "ERROR <B.1.1. - Incorrect Sequence> '" << sourceName << "' " << tci 
                                    << " The tag '" << data << "' is given, but a token or a comment was waited.\n";
                        (*nextAgent).compute(token,
                                             LocalisedTag(data, tci.getBounds()),
                                             Comment(WARN_HEAD + string("missing token")));
                        break;

                    case TagWithComment_ :
                        errorStream << "ERROR <B.1.1. - Incorrect Sequence> '" << sourceName << "' " << tci 
                                    << " The tag '" << data << "' is given, but a token or a comment was waited.\n";
                        token.clear();
                        cmt = WARN_HEAD + "missing token";
                        state = _WaitEndLineCmt;
                        break;
                        
                    default :
                        throw domain_error(INVALID_RESULT);
                        break;
                }
                break;

            case _WaitTag :
                switch (tci.getLastDataType()) {
                    case Comment_ :
                        errorStream << "ERROR <B.1.2. - Incorrect Sequence> '" << sourceName << "' " << tci 
                                    << " The comment '" << data << "' is given, but a tag was waited.\n";
                        cmt = WARN_HEAD + string("comment unauthorized");
                        break;

                    case Token_ :
                        errorStream << "ERROR <B.1.3. - Incorrect Sequence> '" << sourceName << "' " << tci
                                    << " The token '" << data << "' is given, but a tag (for '" << token 
                                    << "') was waited.\n";
                        (*nextAgent).compute(token, LocalisedTag(), cmt.append(WARN_HEAD).append(string("missing tag")));
                        cmt.clear();
                        token = Token(data, tci.getBounds());
                        break;

                    case Tag_ :
                        if (cmt.empty()) (*nextAgent).compute(token, LocalisedTag(data, tci.getBounds()));
                        else {
                            (*nextAgent).compute(token, LocalisedTag(data, tci.getBounds()), cmt);
                            cmt.clear();
                        }
                        state = _WaitTokenOrCmt;
                        break;

                    case TagWithComment_ :
                         tag = LocalisedTag(data, tci.getBounds());
                         state = _WaitEndLineCmt;
                         break;

                    default :
                        throw domain_error(INVALID_RESULT); break;
                }
                break;

            case _WaitEndLineCmt :
                switch (tci.getLastDataType()) {
                    case Comment_ :
                        if (cmt.empty()) (*nextAgent).compute(token, tag, Comment(data, tci.getBounds()));
                        else {
                            (*nextAgent).compute(token, tag, Comment(data+cmt, tci.getBounds()));
                            cmt.clear();
                        }
                        // token.clear();
                        // tag.clear();
                        state = _WaitTokenOrCmt;
                        break;

                    default :
                        THROW(domain_error(INVALID_RESULT));
                        break;
                }
                break;

            default :
                THROW(domain_error(INVALID_STATE));
                break;
        }     
    }

public:
    static const string WARN_HEAD;
    static const string INVALID_STATE;
    static const string INVALID_RESULT;

protected:
    TaggedCorpusInterpreteur  tci;
    string                    sourceName;
    LemmesFlowListener*       nextAgent;
    State                     state;
    Token                     token;
    LocalisedTag              tag;
    Comment                   cmt;
};
const string Lemmatizeur::WARN_HEAD(" [LEMMATIZEUR-WARNING] ");
const string Lemmatizeur::INVALID_STATE("Lemmatizeur: invalid internal state");
const string Lemmatizeur::INVALID_RESULT("Lemmatizeur: invalid result from TaggedCorpusInterpreteur");

class TaggedCorpusReader : public LemmesFlowListener {
    friend ostream& operator<<(ostream&, const TaggedCorpusReader&);
public:
    TaggedCorpusReader(const UserConfiguration& config, const string& fileName)
    : fileName(fileName),
      from(fileName.c_str(), ios::in),
      lemmatiseur(config, fileName, *this),
      count(0),
      endReached(false),
      lemmePtr(NULL),
      readMore(false)
    {
        if (!from) {
//          errorStream << "ERROR <A.0.1 - File couldn't be open> The tagged file '" << fileName << "' can't be open\n";
            endReached = true;
        }
    }

    ~TaggedCorpusReader()
    {
        from.close();
        delete lemmePtr;
    }

    const string& getFileName() const
    {
        return fileName;
    }

    operator const bool() const
    {
        return (!endReached && (from.peek() != EOF));
    }

    LongNatural location() const
    {
        return from.tellg();
    }

    const LemmeAccess readNextLemme() // return 0 if eof reached. Skip comments.
    {
        LemmeBaseAccess readed;
        do {
            readed = readNextEntry();
        //  } while ((readed) && (typeid(readed) != typeid(LemmeAccess)));
        } while ((readed) && (! (dynamic_cast<LemmeAccess>(readed))));
        return dynamic_cast<LemmeAccess>(readed);
    }

    const LemmeBaseAccess readNextEntry()  // return 0 if eof reached; Discard precedent lemmes
    {
        if (!endReached) {
            char c;
            readMore = true;
            delete lemmePtr;
            lemmePtr = 0;
            while (!endReached && readMore) {
                if (!from.get(c)) {
                    endReached = true;
                    lemmatiseur.flush();
                } else static_cast<CharFlowListener&>(lemmatiseur)<<=(ichar(c, ++count)) ; // invoque all needed methodes.
                //
                // Warning:
                // static_cast<CharListener>(lemmatiseur)<<=(ichar(c,++count))
                // make no error at compilation time (gcc 2.8.1), but raise 'pure virtual methode called' at the execution.
                //  
            }
            return lemmePtr;
        } else return 0; // ! warning, flushing lemmatiseur
//      return lemmePtr; // En test ! not safe !
    }
    

// private:
    void computeFlowItem(const Comment& lineCmt)
    {
        lemmePtr = new SingleComment(lineCmt);
        readMore = false; // FlowControl: stop the read process
    }

    void computeFlowItem(const Token& token, const LocalisedTag& tag)
    {
        lemmePtr = new Lemme(token, tag);
        readMore = false; // stop the read process
    }
    
    void computeFlowItem(const Token& token, const LocalisedTag& tag, const Comment& cmt)
    {
//      lemmePtr = new LemmeWithComment(token, tag, cmt);
        lemmePtr = new Lemme(token, tag, cmt);
        readMore = false;
    }

private:
    string           fileName;
    VeryLongNatural  count;
    mutable ifstream from;          // mutable because tellg() is not const.
    Lemmatizeur      lemmatiseur;
    bool             endReached;
    LemmeBasePtr     lemmePtr;
    bool             readMore;
};

 
ostream& operator<<(ostream& os, const LocalisedData& loc)
{
    os << '[' << setfill('0') << setw(15) << loc.from 
       << ':' << setfill('0') << setw(15) << loc.to << ']';
    return os;
}


ostream& operator<<(ostream& os, const GraceTokenizer& tok)
{
    os << '[' << setfill('0') << setw(15) << tok.getCharIndex(FirstOfToken) 
       << ':' << setfill('0') << setw(15) << tok.getCharIndex(LastOfToken) << ']';
    return os;
}

// plus necessaire
ostream& operator<<(ostream& os, const TaggedCorpusInterpreteur& tci)
{
    os << '[' << setfill('0') << setw(15) << tci.getCharIndex(FirstOfToken) 
       << ':' << setfill('0') << setw(15) << tci.getCharIndex(LastOfToken) << ']';
    return os;
}

ostream& operator<<(ostream& os, const TaggedCorpusReader& tcr)
{
    // Library default: tellg() was not declared const.
    os << '(' << setfill('0') << setw(15) << const_cast<TaggedCorpusReader&>(tcr).from.tellg() << ')';
    return os;
}

class Merger : public GraceTools, public StringFlowListener {
    enum State { _Padding, _Merging };
    enum Errs { _WrongToken, _Orphan };
public:
    Merger(TaggedCorpusReader& tagsReader,   // specification 'externe' (multi-corpus)
           TaggedCorpusWriter& tagsWriter,   // ok
           const GraceTokenizer& tokenizer,  // ok, mais devrait meme pouvoir etre mis en interne.
           const TagsMappingTable& table,    // ok
           const string refCorpus,           // spec. 'externe'
           const bool chktags = false,       // ok
           const bool padding = true)        // spec externe.
    : tagsReader(tagsReader),
      tagsWriter(tagsWriter),
      tokenizer(tokenizer),
      tagged(tagged),
      tagsTable(table),
      chktags(chktags),
      count(0),
      state((padding ? _Padding : _Merging)),
      refCorpus(refCorpus),
      taggedCorpus(tagsReader.getFileName())
    { }

    const string& nickname() const
    {
        return TOOLS_NICKNAME;
    }
    
    const unsigned char& id() const
    {
        return TOOLS_ID;
    }

    void switchMode()
    {
        state = static_cast<State>(~state);
    }

    void setPaddingMode(const bool padding = true)
    {
        state = (padding ? _Padding : _Merging);
    }

    bool isInPaddingMode() const
    {
        return (state == _Padding);
    }

    void computeFlowItem(const string& data)
    {
        switch (state)
        {
            case _Padding :
                // Realigner & checker must be corrected ................  
                tagsWriter.write(Token(data), LocalisedTag("??")); // A remplacer par LocalisedTag()
                break;
                                
            case _Merging :
//
// The following is wrapped into a block, because gcc (2.8.1) seems to have problems with declaration statement
// in switch condition statement. [STR
// (Without '{' and '}' around the statement, gcc say that the next statement (default) is not correct
// (jump to case label), and warn again on the declaration of 'readed': cross initialisation of the variable !!!
// 
            
            {
                const LemmeBaseAccess readed(tagsReader.readNextEntry()); 
                // Soit on utilise RTTI pour specialiser les actions,
                // soit on est obligé de 'transferer' la 'connaissance' du fonctionnement
                // du merger dans la hierarchie des 'Lemmes', avec tout les problemes
                // que cela pose pour le dump des infos en cas d'erreur, ...
                // 
                // donc, en attendant mieux, on utilise le RTTI...

                if (!readed)
                {
                    tagsWriter.write(data, string(), nickname() + string(" WARNING: orphan token"));
                    msg.error(id(), 2, 0,
                              string("Orphan token"),
                              string("The token '") + data + string("' seems to be orphan."),
                              Location(&taggedCorpus, 
                                       tagsReader.location()),
                              Location(&refCorpus,
                                       tokenizer.getCharIndex(FirstOfToken),
                                       tokenizer.getCharIndex(LastOfToken)));
                    break;
                }
                if (const SingleCommentAccess singleCmtPtr = dynamic_cast<SingleCommentAccess>(readed))
                {
                    tagsWriter.write(singleCmtPtr->cmt);
                    computeFlowItem(data); // re-run.
                    break;
                }
                if (const LemmeAccess lemmePtr = dynamic_cast<LemmeAccess>(readed))
                {
                    if (lemmePtr->tok != data) {
                        tagsWriter.write(lemmePtr->tok, 
                                         lemmePtr->tag,
                                         lemmePtr->cmt + string("  >>>> ") + nickname() 
                                             + string(" WARNING: waited token was: ") + data);
                        msg.error(id(), 2, 1,
                                  string("Wrong token"),
                                  string("Waited token '") + data + string("', obtained: '") 
                                      + static_cast<const string>(lemmePtr->tok) + string("'."),
                                  Location(&taggedCorpus, lemmePtr->tok.from, lemmePtr->tok.to),
                                  Location(&refCorpus,
                                           tokenizer.getCharIndex(FirstOfToken),
                                           tokenizer.getCharIndex(LastOfToken)));

//
// Il serait peut-etre utile de mesurer/quantifier la différence entre la chaine attendue et celle
// recue: dans le cas ou cette différence est faible, on substituera la chaine attendue, en
// emettant un message d'avertissement (au moins en commentaire dans le fichier recomposé).
//
// Différence: le mot maj/min -> 3
// Sinon, pour chaque lettre:
// Maj/min -> 1
// Sinon -> 2
// Delta = 3, en d'autre terme:, avec comme ref 'Aujourd'hui'
// AUJOURD'HUI  -> 3 (ok, substitution)
// aujourd'hui  -> 1 (ok, subst)
// Aujourdhui   -> 2 (ok, subst)
// Aujourd'huiy -> 2 (ok, subst)
// demain       -> 24 (non)
// 
// Probleme: il faut que aAujourd'hui donne 2, et pas 26 !
// 

                    } else {
                        // 
                        // Tags can now be checked without interrefing with message issued frome the token check.
                        // 
                        if (chktags && !tagsTable.checkRef(*this, taggedCorpus, lemmePtr->tag))
                            tagsWriter.write(lemmePtr->tok,
                                             lemmePtr->tag,
                                             lemmePtr->cmt + string("  >>>> ") + nickname() 
                                                 + string("  ERROR: The tag is not valid !"));
                        else if (lemmePtr->cmt.empty())
                            tagsWriter.write(lemmePtr->tok, lemmePtr->tag);
                        else tagsWriter.write(lemmePtr->tok, lemmePtr->tag, lemmePtr->cmt);
                    }
                }
                break;
            }

            default :
                THROW(domain_error(string("Recomposer: invalid internal state")));
                break;
        }
   }   

private:
    static const string        TOOLS_NICKNAME;
    static const unsigned char TOOLS_ID;

    State                   state;
    TaggedCorpusReader&     tagsReader;
    TaggedCorpusWriter&     tagsWriter;
    istream&                tagged;
    VeryLongNatural         count;       // To merge with 'tagged', in a istreamToCharFlow class.
    const TagsMappingTable& tagsTable;
    const GraceTokenizer&   tokenizer;
    const bool              chktags;
    const bool              stopOnFirstError;
    const string            refCorpus;
    const string&           taggedCorpus;
}; 
const string Merger::TOOLS_NICKNAME("Merger");
const unsigned char Merger::TOOLS_ID('M');


enum Position { Before, After, Between, Around };
class TokenOutput : public StringFlowAgent {
public:
    TokenOutput(const string& mixed = string(), const Position& pos = After,
                StringFlowListener& nextListener = nullStringFlowListener)
    : entrelaced(mixed),
      pos(pos),
      first(true),
      nextListener(&nextListener)
    { }

    void flush()
    {
        if (!first && (pos == Around))
            (*nextListener) <<= entrelaced;
        first = true;
    }

    void computeFlowItem(const string& str)
    {
        if (first) {
            first = false;
            if ((pos == Before) || (pos == Around))
                (*nextListener) <<= entrelaced;
        }
        (*nextListener) <<= str;
        if (pos == After)
            (*nextListener) <<= entrelaced;
    }
    
private:
    string                entrelaced;
    Position              pos;
    bool                  first;
    StringFlowListenerPtr nextListener;
};

    

#ifdef _USE_NAMESPACES
}
#endif // _USE_NAMESPACES

#endif // GRACE_TOOLS_H

