/*---------------------------------------------------------------------------------------------------------------------
- File      : tags.h                                                                      Project ELSE, EPFL - DI/LIA -
-                                                                       Evaluation in Language and Speech Engineering -
- Author    : Seydoux Florian   Creation date : 20 Sep 1999                                                           -
- Eulogist  : -                 Approval date : -                  Version: 0.1                                       -
-                                                                                                                     -
- Descript. : Define all representation of a tag (low and high level)                                                 -
-             Segmentation           - the segmentation information associed with a tag (better to call it a glu)     -
-             BasicTag (lowLevel)    - a specialised string  - all tags form                                          -
-             AtomicTag (middle)     - an alias to BasicTag, but conceptually a very atomic tag (only a name)         -
-             LocalisedTag (middle)  - a BasicTag with origin info (used only in intensive checking)                  -
-             ContractedTag (high)   - a contraction of n atomic tag, with one segm. info (one for all atomic tags)   -
-               (vector<Atomic>)       Ex:  a+b+c+d/i.j -> (like) (a+b+c+d)/i.j == an grace's atomic tag              -
-             CompositTag (high)     - a complex tag, with alternatives of contracted tag                             -
-               (set<Contracted>)      Ex: a/1.2|b+c|d                                                                -
-                                                                                                                     -
- Requested : Symbol UNCHECKED_TG must be set if unchecked BasicTag are given to CompositTag (and/or ContractedTag)   -
-                                                                                                                     -
- Gaps      : o) Perhaps it's better to separate segmented and non segmented tags (with a set of specialized class)   -
-                because we need to use a non segmented tag in the mapping table, and segmented with the 'real' tags. -
-                (Then neutral segmention will be unecessary.)                                                        -
-             o) Without specialisation, neutral/invalid segmentation can be defined more efficiently, with boolean   -
-                attribute (perhaps compacted in only one byte, with the 'specific' information).                     -
-                                                                                                                     -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- Rev. date | Reviser               | Revise's description                                                            -
- - - - - - + - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- ../../....| ........              | ...                                                                             -
---------------------------------------------------------------------------------------------------------------------*/

#ifndef TAGS_H
#define TAGS_H

#include <set>
#include "globaldef.h"

#ifdef _USE_NAMESPACES
    namespace Else { 
#endif // _USE_NAMESPACES

// define 'UNCHECKED_TAG' to enable the check of multiple/invalid segmentation in contracted tag,
// i.e. <A/i1.ji+B/i2.j2> -> <(A+B)/0.0> (A+B with invalid segmentation).
// If UNCHECKED_TAG is not defined, <T1+...+Tn/i.j+...Tm> -> <(T1+..+Tn)/i.j>
// (Warning, UNCHECKED_TAG is used in independent compilation unit (tags.cc must be recompiled)).

//
// conventions: la relation entre la terminologie grace et celle adoptee ici n'est pas tout a fait
//              aussi simple, puisqu'il semble bien qu'il y ait, dans la terminologie grace, collision
//              entre les etiquettes atomique et les formes contractees seules.
//              
//     <tag> -> tag est atomique                    (pas de definiton equivalente dans grace)
//     {tag} -> tag est un ensemble d'alternatives  (etiquette composite)
//     [tag] -> tag est une forme contractée        (etiquette atomique)
//     
//     

struct Segmentation {
	Segmentation();
	Segmentation(const ShortNatural rank, const ShortNatural total);
	void reset();
	bool deepEqual(const Segmentation& sgm) const;
	bool operator<(const Segmentation& sgm) const;
	bool operator==(const Segmentation& sgm) const;
	bool operator!=(const Segmentation& sgm) const;
	bool		   specific;
	ShortNatural   segmentRank;
	ShortNatural   numberOfSegments;

	static const Segmentation NeutralSegmentation; // Equal with all segmentations, except InvalidSegmentation.
    static const Segmentation InvalidSegmentation; // Newer Equal (even with itself).
};
// gcc 2.8.1. bug (internal error)
// const Segmentation NeutralSegmentation(0,0); // Equal with all segmentations, except InvalidSegmentation.
// const Segmentation InvalidSegmentation(MAX_SHORT_NATURAL,0); // Newer Equal (even with itself).


class BasicTag : public conform_string {
public:
	BasicTag();
	BasicTag(const string& tag);
	bool isMultiple() const;
	bool isContracted() const;
	bool isSegmented() const;
	bool isAtomic() const;
	bool checkSyntax(const GraceTools& tools, const string& source, const VeryLongNatural x0,
					 const string& unknownSeq) const; // virer unknownSeq.
}; 

typedef BasicTag AtomicTag;

class LocalisedTag: public BasicTag, public LocalisedData {
public:
	LocalisedTag();
	LocalisedTag(const string& tag);
	LocalisedTag(const string& tag, const LocalisedData& location);
	LocalisedTag(const string& tag, const VeryLongNatural& from, const VeryLongNatural& to);
	bool checkSyntax(const GraceTools& tools, const string& source, const string& unknown) const;
};

typedef vector<AtomicTag> AtomicTags;
class ContractedTag: public AtomicTags { // Implement A+B /i.j
friend ostream& operator<<(ostream&, const ContractedTag&);
public:
    ContractedTag();
	ContractedTag(const BasicTag& tags);
	bool isSegmented() const;
	const AtomicTag::size_type& length() const;
	conform_string str() const;
	void push_back(const AtomicTag& tag);
	bool operator<(const ContractedTag& tag) const;
	bool isSameBase(const ContractedTag& tag) const; // compare tags without segmentation
	bool isSameBase(const BasicTag& tag) const;
private:
	ostream& out(ostream& os) const;
public:
	Segmentation segmentation;
	AtomicTag::size_type len;
};
typedef const ContractedTag* ContractedTagCPtr;

typedef set<ContractedTag> ContractedTagsSet;
class AlternatedTagsSet: public ContractedTagsSet { // Sans doublons: A/1.2|B|A|B/1.1 -> A|A/1.2|B
friend ostream& operator<<(ostream& os, const AlternatedTagsSet&);
public:
	AlternatedTagsSet();
	AlternatedTagsSet(const BasicTag& tags);
	void insert(const ContractedTag& tag);
	const Natural& length() const;
	conform_string str() const;
private:
	ostream& out(ostream& os) const;
private:
	Natural len;
};
ostream& operator<<(ostream& os, const AlternatedTagsSet& tag);


#ifdef _USE_NAMESAPCES
}
#endif // _USE_NAMESPACES

#endif // TAGS_H

