#include "FGen.h"
#include <iterator>
using namespace std;

#include "StringMap.h"
#include "Options.h"
#include "CharUtils.h"
#include "CppAssert.h"
using namespace egstra;

#define CONF_DEFAULT "p|pu|pb|pch"

namespace dparser {
	void FGen::process_options()
	{
		int tmp;
		string strtmp;
		_chartype_file = "chartype.txt";
		if(options::get("chartype-file", strtmp)) {
			_chartype_file = strtmp;
		}

		load_chartype(); 

		_fcutoff = 1;
		if(options::get("fcutoff", tmp)) {
			_fcutoff = tmp;
		}

		_use_joint_features = true;
		_use_separate_features = true;
		if(options::get("use-joint-features", tmp)) {
			_use_joint_features = (1 == tmp);
		}
		if(options::get("use-separate-features", tmp)) {
			_use_separate_features = (1 == tmp);
		}
		cerr << "joint-feat: " << _use_joint_features << endl;
		cerr << "separ-feat: " << _use_separate_features << endl;

		if(!_use_joint_features && !_use_separate_features) {
			cerr << "\n\nAt least one of the two needs to be used: joint-feat or separate-feat!" << endl;
			exit(-1);
		}
		
		_use_lexicon_feature = false;
		/* Add lexicon feature, created by chaojy*/
		if(options::get("use-lexicon-feature", tmp)){
			_use_lexicon_feature = tmp;
		}

	}

	void FGen::addPOSFeature_unigram( const Instance *inst, const int node_id, list<string> &feats_str ) const
	{
		assert(node_id >= 0 && node_id <= inst->size());
		if (0 == node_id || inst->size() == node_id) return;

		const string prefix = "PU-";
		string feat;

/*		feat = prefix + "00=#bias#";		feats_str.push_back(feat);
*/

		const string &c0 = inst->forms[node_id];
		const string &c_L1 = node_id > 1 ? inst->forms[node_id-1] : NO_FORM;
		const string &c_R1 = node_id < inst->size() - 1 ? inst->forms[node_id+1] : NO_FORM;
		const string &c_L2 = node_id > 2 ? inst->forms[node_id-2] : NO_FORM;
		const string &c_R2 = node_id < inst->size() - 2 ? inst->forms[node_id+2] : NO_FORM;

		string t0 = getCharType(c0);
		string t_L1 = (c_L1 == NO_FORM ? NO_FORM : getCharType(c_L1));
		string t_R1 = (c_R1 == NO_FORM ? NO_FORM : getCharType(c_R1));

		feat = prefix + "c0=" + c0;		    feats_str.push_back(feat);
		feat = prefix + "c_L1=" + c_L1;		    feats_str.push_back(feat);
		feat = prefix + "c_L2=" + c_L2;		    feats_str.push_back(feat);
		feat = prefix + "c_R1=" + c_R1;		    feats_str.push_back(feat);
		feat = prefix + "c_R2=" + c_R2;		    feats_str.push_back(feat);

		feat = prefix + "c_L21=" + c_L2 + FEAT_SEP + c_L1;  feats_str.push_back(feat);
		feat = prefix + "c_L10=" + c_L1 + FEAT_SEP + c0;    feats_str.push_back(feat);
		feat = prefix + "c_R01=" + c0   + FEAT_SEP + c_R1;    feats_str.push_back(feat);
		feat = prefix + "c_R12=" + c_R1 + FEAT_SEP + c_R2;    feats_str.push_back(feat);

		feat = prefix + "c_L20="  + c_L2 + FEAT_SEP + c0;    feats_str.push_back(feat);
		feat = prefix + "c_L1R1=" + c_L1 + FEAT_SEP + c_R1;    feats_str.push_back(feat);
		feat = prefix + "c_R02="  + c0   + FEAT_SEP + c_R2;    feats_str.push_back(feat);

		feat = prefix + "c_L210="  + c_L2 + FEAT_SEP + c_L1 + FEAT_SEP + c0;      feats_str.push_back(feat);
		feat = prefix + "c_L10R1=" + c_L1 + FEAT_SEP + c0   + FEAT_SEP + c_R1;    feats_str.push_back(feat);
		feat = prefix + "c_R012="  + c0   + FEAT_SEP + c_R1 + FEAT_SEP + c_R2;    feats_str.push_back(feat);


		feat = prefix + "t0=" + t0;		        feats_str.push_back(feat);
		feat = prefix + "t_L1=" + t_L1;		    feats_str.push_back(feat);
		feat = prefix + "t_R1=" + t_R1;		    feats_str.push_back(feat);

		feat = prefix + "t_L10=" + t_L1 + FEAT_SEP + t0;      feats_str.push_back(feat);
		feat = prefix + "t_R01=" + t0   + FEAT_SEP + t_R1;    feats_str.push_back(feat);

		feat = prefix + "t_L10R1=" + t_L1 + FEAT_SEP + t0 + FEAT_SEP + t_R1;    feats_str.push_back(feat);

		if (c_L2 == c0) { feat = prefix + "c_L2==c0";  feats_str.push_back(feat); }
		if (c_L1 == c0) { feat = prefix + "c_L1==c0";  feats_str.push_back(feat); }
		if (c_R1 == c0) { feat = prefix + "c_R1==c0";  feats_str.push_back(feat); }
		if (c_R2 == c0) { feat = prefix + "c_R2==c0";  feats_str.push_back(feat); }
		if (c_L1 == c_R1) { feat = prefix + "c_L1==c_R1";  feats_str.push_back(feat); }


		if(_use_lexicon_feature){
			const string pprefix = "PL-"  + FEAT_SEP;
			const string &FB_c0 = inst->fbegin_vec[node_id];
			const string &FM_c0 = inst->fmiddle_vec[node_id];
			const string &FE_c0 = inst->fend_vec[node_id];

			const string &FB_L1 =  node_id > 1 ? inst->fbegin_vec[node_id-1] : "0";
			const string &FM_L1 =  node_id > 1 ? inst->fmiddle_vec[node_id-1] : "0";
			const string &FE_L1 =  node_id > 1 ? inst->fend_vec[node_id-1] : "0";

			const string &FB_R1 =  node_id < inst->size() - 1 ? inst->fbegin_vec[node_id+1] : "0";
			const string &FM_R1 =  node_id < inst->size() - 1 ? inst->fbegin_vec[node_id+1] : "0";
			const string &FE_R1 =  node_id < inst->size() - 1 ? inst->fbegin_vec[node_id+1] : "0";

			if(FB_c0 != "0") { feat = pprefix + "FB_c0=" + FB_c0;	feats_str.push_back(feat); }
			if(FM_c0 != "0") { feat = pprefix + "FM_c0=" + FM_c0;	feats_str.push_back(feat); }
			if(FE_c0 != "0") { feat = pprefix + "FE_c0=" + FE_c0;	feats_str.push_back(feat); }

			if(FB_L1 != "0") { feat = pprefix + "FB_L1=" + FB_L1;	feats_str.push_back(feat); }
			if(FM_L1 != "0") { feat = pprefix + "FM_L1=" + FM_L1;	feats_str.push_back(feat); }
			if(FE_L1 != "0") { feat = pprefix + "FE_L1=" + FE_L1;	feats_str.push_back(feat); }

			if(FB_R1 != "0") { feat = pprefix + "FB_R1=" + FB_R1;	feats_str.push_back(feat); }
			if(FM_R1 != "0") { feat = pprefix + "FM_R1=" + FM_R1;	feats_str.push_back(feat); }
			if(FE_R1 != "0") { feat = pprefix + "FE_R1=" + FE_R1;	feats_str.push_back(feat);	}

	/*		
			if ( FB_L1 != "0" || FB_c0 != "0" || FB_R1 != "0") {
				feat = pprefix + "FB_L1C0R1=" + FB_L1 + FEAT_SEP +FB_c0 + FEAT_SEP + FB_R1;	feats_str.push_back(feat);
			}

			if ( FM_L1 != "0" || FM_c0 != "0" || FM_R1 != "0") {
				feat = pprefix + "FB_L1C0R1=" + FM_L1 + FEAT_SEP +FM_c0 + FEAT_SEP + FM_R1;	feats_str.push_back(feat);
			}

			if ( FB_L1 != "0" || FB_c0 != "0" || FB_R1 != "0") {
				feat = pprefix + "FB_L1C0R1=" + FE_L1 + FEAT_SEP +FE_c0 + FEAT_SEP + FE_R1;	feats_str.push_back(feat);
			}
			
*/
		}

		
	}


	void FGen::addPOSFeature_bigram( const Instance *inst, const int node_id, const string &cpostag_L1, list<string> &feats_str ) const
	{
		assert(node_id >= 0 && node_id <= inst->size());
		string prefix = "PB-";
		string feat;

		const string &c0 = (node_id == inst->size() ? NO_FORM : inst->forms[node_id]);
		const string &c_L1 = node_id > 1 ? inst->forms[node_id-1] : NO_FORM;
		//const string &c_R1 = node_id < inst->size() - 1 ? inst->forms[node_id+1] : NO_FORM;

        feat = prefix + "t_L1=" + cpostag_L1; feats_str.push_back(feat);
		feat = prefix + "t_L1+c0=" + cpostag_L1 + FEAT_SEP + c0; feats_str.push_back(feat);
	//	feat = prefix + "t_L1+cL1=" + cpostag_L1 + FEAT_SEP + c_L1; feats_str.push_back(feat);
	//	feat = prefix + "t_L1+cR1=" + cpostag_L1 + FEAT_SEP + c_R1; feats_str.push_back(feat);
        feat = prefix + "t_L1+c_L1+c0=" + cpostag_L1 + FEAT_SEP + c_L1 + FEAT_SEP + c0; feats_str.push_back(feat);
    //  feat = prefix + "t_L1+c0+c_R1=" + cpostag_L1 + FEAT_SEP + c0 + FEAT_SEP + c_R1; feats_str.push_back(feat);
    //    feat = prefix + "t_L1+cL1+c_R1=" + cpostag_L1 + FEAT_SEP + c_L1 + FEAT_SEP + c_R1; feats_str.push_back(feat);
    //    feat = prefix + "t_L1+cL1+c0+c_R1=" + cpostag_L1 + FEAT_SEP + c_L1 + FEAT_SEP + c0 + FEAT_SEP + c_R1; feats_str.push_back(feat);
/*
		string t0 = getCharType(c0);
		string t_L1 = (c_L1 == NO_FORM ? NO_FORM : getCharType(c_L1));
		string t_R1 = (c_R1 == NO_FORM ? NO_FORM : getCharType(c_R1));
		feat = prefix + "t_L1+tL10R1=" + cpostag_L1 + FEAT_SEP + t_L1 + FEAT_SEP + t0 + FEAT_SEP + t_R1;    feats_str.push_back(feat);
		feat = prefix + "t_L1+tL1R1=" + cpostag_L1 + FEAT_SEP + t_L1 + FEAT_SEP + t_R1;    feats_str.push_back(feat);
		feat = prefix + "t_L1+tL10=" + cpostag_L1 + FEAT_SEP + t_L1 + FEAT_SEP + t0;    feats_str.push_back(feat);
		feat = prefix + "t_L1+t0R1=" + cpostag_L1 + FEAT_SEP + t0 + FEAT_SEP + t_R1;    feats_str.push_back(feat);
		feat = prefix + "t_L1+tL1=" + cpostag_L1 + FEAT_SEP + t_L1;    feats_str.push_back(feat);
		feat = prefix + "t_L1+t0=" + cpostag_L1 + FEAT_SEP + t0;    feats_str.push_back(feat);
		feat = prefix + "t_L1+tR1=" + cpostag_L1 + FEAT_SEP + t_R1;    feats_str.push_back(feat);

	*	feat = prefix + "t_L1+cL1+c0=" + cpostag_L1 + FEAT_SEP + c_L1 + c0; feats_str.push_back(feat);
		feat = prefix + "t_L1+c0+cR1=" + cpostag_L1 + FEAT_SEP + c0 + c_R1; feats_str.push_back(feat);
		feat = prefix + "t_L1+cL1+cR1=" + cpostag_L1 + FEAT_SEP + c_L1 + c_R1; feats_str.push_back(feat);
		//feat = prefix + "t_L1+cL10R1=" + cpostag_L1 + FEAT_SEP + c_L1 + c0 + c_R1; feats_str.push_back(feat);
    */
/*		vector<string> vec;
		simpleTokenize(cpostag_L1, vec, "^");
		if (vec.size() == 2) {
			feat = "PB-A=" + vec[0]; feats_str.push_back(feat);
			feat = "PB-B=" + vec[1]; feats_str.push_back(feat);
		}
*/	}


	void FGen::usage(const char* const mesg) const {
		cerr << _name << " options:" << endl;
		cerr << " --fdictdir=<str> : pathname to feature-dictionary directory" << endl;
		cerr << " --fcutoff=<int>  : minimum feature count (default 1)" << endl;
		cerr << " --fconf=\"<flag>|...\" : feature-configuration flags" << endl;
		cerr << "     p    : pos features" << endl;
		cerr << "     pu   : pos unigram features" << endl;
		cerr << "     pb   : pos bigram features" << endl;
		cerr << "     pc   : pos char-based features" << endl;

		cerr << "   (default \"" << CONF_DEFAULT << "\")" << endl;
		cerr << endl;
		cerr << mesg << endl;
	}

	void FGen::dealloc_fvec_prob( Instance * const inst ) const
	{
		const int len = inst->size();
		for (int i = 0; i <= len; ++i) {
			(inst->fvec_unigram_base[i]).dealloc();
			if (inst->fvec_bigram_base_joint.size() > 0) {
				for (int j = 0; j < tag_number(); ++j) {
					(inst->fvec_bigram_base_joint[i][j]).dealloc();
				}
			}
			if (inst->fvec_bigram_base_a.size() > 0) {
				for (int j = 0; j < tag_number_a(); ++j) {
					(inst->fvec_bigram_base_a[i][j]).dealloc();
				}
			}
			if (inst->fvec_bigram_base_b.size() > 0) {
				for (int j = 0; j < tag_number_b(); ++j) {
					(inst->fvec_bigram_base_b[i][j]).dealloc();
				}
			}
		}

		inst->fvec_unigram_base.dealloc();
		inst->fvec_bigram_base_joint.dealloc();
		inst->fvec_bigram_base_a.dealloc();
		inst->fvec_bigram_base_b.dealloc();

		inst->fvec_unigram_joint.dealloc();
		inst->fvec_unigram_a.dealloc();
		inst->fvec_unigram_b.dealloc();

		inst->fvec_bigram_joint.dealloc();
		inst->fvec_bigram_a.dealloc();
		inst->fvec_bigram_b.dealloc();
		
		inst->prob_unigram_joint.dealloc();
		inst->prob_unigram_a.dealloc();
		inst->prob_unigram_b.dealloc();

		inst->prob_bigram_joint.dealloc();
		inst->prob_bigram_a.dealloc();
		inst->prob_bigram_b.dealloc();
	}

	void FGen::create_all_feature_vectors( Instance * const inst )
	{
		const int len = inst->size();
		const int ntag = tag_number();
		const int ntagA = tag_number_a();
		const int ntagB = tag_number_b();
		list<string> feats_str;
		inst->fvec_unigram_base.resize(len+1);
		inst->fvec_unigram_base = fvec();

		assert(_use_joint_features || _use_separate_features);
		if (_use_joint_features) {
			inst->fvec_bigram_base_joint.resize(len+1, ntag);
			inst->fvec_bigram_base_joint = fvec();
			inst->fvec_unigram_joint.resize(len+1, ntag);
			inst->prob_unigram_joint.resize(len+1, ntag);
			inst->fvec_unigram_joint = fvec();
			inst->prob_unigram_joint = DOUBLE_NEGATIVE_INFINITY;
			inst->fvec_bigram_joint.resize(len+1, ntag, ntag);
			inst->prob_bigram_joint.resize(len+1, ntag, ntag);
			inst->fvec_bigram_joint = fvec();
			inst->prob_bigram_joint = DOUBLE_NEGATIVE_INFINITY;
		}

		if (_use_separate_features) {
			inst->fvec_bigram_base_a.resize(len+1, ntagA);
			inst->fvec_bigram_base_a = fvec();
			inst->fvec_unigram_a.resize(len+1, ntagA);
			inst->prob_unigram_a.resize(len+1, ntagA);
			inst->fvec_unigram_a = fvec();
			inst->prob_unigram_a = DOUBLE_NEGATIVE_INFINITY;
			inst->fvec_bigram_a.resize(len+1, ntagA, ntagA);
			inst->prob_bigram_a.resize(len+1, ntagA, ntagA);
			inst->fvec_bigram_a = fvec();
			inst->prob_bigram_a = DOUBLE_NEGATIVE_INFINITY;

			inst->fvec_bigram_base_b.resize(len+1, ntagB);
			inst->fvec_bigram_base_b = fvec();
			inst->fvec_unigram_b.resize(len+1, ntagB);
			inst->prob_unigram_b.resize(len+1, ntagB);
			inst->fvec_unigram_b = fvec();
			inst->prob_unigram_b = DOUBLE_NEGATIVE_INFINITY;
			inst->fvec_bigram_b.resize(len+1, ntagB, ntagB);
			inst->prob_bigram_b.resize(len+1, ntagB, ntagB);
			inst->fvec_bigram_b = fvec();
			inst->prob_bigram_b = DOUBLE_NEGATIVE_INFINITY;
		}

		for (int i = 0; i <= len; ++i) {
			{ // unigram
				fvec * const fv0 = &inst->fvec_unigram_base[i];
				feats_str.clear();
				addPOSFeature_unigram(inst, i, feats_str);
				_dict_unigram.map_all(fv0, 0, feats_str, _generation_mode);
				int t = 0;
				if (_use_joint_features) {
					for (t = 0; t < ntag; ++t) {
						const int offset = _offset_unigram_joint + _dim_unigram * t;
						fvec * const fv = &inst->fvec_unigram_joint[i][t];
						fv->idx = fv0->idx;
						fv->n = fv0->n;
						fv->val = fv0->val;
						fv->offset = offset;
					}
				}
				if (_use_separate_features) {
					for (t = 0; t < ntagA; ++t) {
						const int offset = _offset_unigram_a + _dim_unigram * t;
						fvec * const fv = &inst->fvec_unigram_a[i][t];
						fv->idx = fv0->idx;
						fv->n = fv0->n;
						fv->val = fv0->val;
						fv->offset = offset;
					}
					for (t = 0; t < ntagB; ++t) {
						const int offset = _offset_unigram_b + _dim_unigram * t;
						fvec * const fv = &inst->fvec_unigram_b[i][t];
						fv->idx = fv0->idx;
						fv->n = fv0->n;
						fv->val = fv0->val;
						fv->offset = offset;
					}
				}
			}

			int tL = 0;
			if (_use_joint_features) {
				for (tL = 0; tL < ntag; ++tL) { // bigram-joint
					fvec * const fv0 = &inst->fvec_bigram_base_joint[i][tL];
					feats_str.clear();
					addPOSFeature_bigram(inst, i, pos_id_2_str(tL), feats_str);
					_dict_bigram_joint.map_all(fv0, 0, feats_str, _generation_mode);

					for (int t = 0; t < ntag; ++t) {
						const int offset = _offset_bigram_joint + _dim_bigram_joint * t;
						fvec * const fv = &inst->fvec_bigram_joint[i][tL][t];
						fv->idx = fv0->idx;
						fv->n = fv0->n;
						fv->val = fv0->val;
						fv->offset = offset;
					}
				}
			}

			if (_use_separate_features) {
				for (tL = 0; tL < ntagA; ++tL) { // bigram-a
					fvec * const fv0 = &inst->fvec_bigram_base_a[i][tL];
					feats_str.clear();
					addPOSFeature_bigram(inst, i, pos_id_2_str_a(tL), feats_str);
					_dict_bigram_a.map_all(fv0, 0, feats_str, _generation_mode);

					for (int t = 0; t < ntagA; ++t) {
						const int offset = _offset_bigram_a + _dim_bigram_a * t;
						fvec * const fv = &inst->fvec_bigram_a[i][tL][t];
						fv->idx = fv0->idx;
						fv->n = fv0->n;
						fv->val = fv0->val;
						fv->offset = offset;
					}
				}
				for (tL = 0; tL < ntagB; ++tL) { // bigram-b
					fvec * const fv0 = &inst->fvec_bigram_base_b[i][tL];
					feats_str.clear();
					addPOSFeature_bigram(inst, i, pos_id_2_str_b(tL), feats_str);
					_dict_bigram_b.map_all(fv0, 0, feats_str, _generation_mode);

					for (int t = 0; t < ntagB; ++t) {
						const int offset = _offset_bigram_b + _dim_bigram_b * t;
						fvec * const fv = &inst->fvec_bigram_b[i][tL][t];
						fv->idx = fv0->idx;
						fv->n = fv0->n;
						fv->val = fv0->val;
						fv->offset = offset;
					}
				}
			}
		}
	}

	void FGen::create_all_pos_features_according_to_tree(const Instance * const inst, sparsevec &sp_fv, const vector<string> &cpostags, const double scale /*= 1.0*/)
	{
		const int len = inst->size();
		fvec fv;
		list<string> feats_str;
		list<double> probs;

		for (int i = 1; i <= len; ++i) {
			const string &tag = cpostags[i];
			//vector<string> vec;
			//simpleTokenize(tag, vec, "^");
			//assert(vec.size() == 2);
			//assert(vec[0] != "*" && vec[1] != "*");
			const int t_joint = get_pos_id(i == len ? DUMMY_CPOSTAG : tag);
			const int t_a = joint_id_2_a[t_joint];
			const int t_b = joint_id_2_b[t_joint];

			{ // unigram
				const int offset = _offset_unigram_joint + _dim_unigram * t_joint;
				feats_str.clear();
				addPOSFeature_unigram(inst, i, feats_str);
				_dict_unigram.map_all(&fv, offset, feats_str, _generation_mode);
				if (_use_joint_features) {
					parameters::sparse_add(sp_fv, &fv, scale);
				}
				if (_use_separate_features) {
					fv.offset = _offset_unigram_a + _dim_unigram * t_a;
					parameters::sparse_add(sp_fv, &fv, scale);

					fv.offset = _offset_unigram_b + _dim_unigram * t_b;
					parameters::sparse_add(sp_fv, &fv, scale);
				}
				fv.dealloc();
			}

			const string &tagL = cpostags[i-1];
			if (_use_joint_features) { // bigram-joint
				const int offset = _offset_bigram_joint + _dim_bigram_joint * t_joint;
				feats_str.clear();
				addPOSFeature_bigram(inst, i, tagL, feats_str);
				_dict_bigram_joint.map_all(&fv, offset, feats_str, _generation_mode);
				parameters::sparse_add(sp_fv, &fv, scale);
				fv.dealloc();
			}

			
			vector<string> vecL;
			simpleTokenize(tagL, vecL, "^");
			assert(vecL.size() == 2);
			assert(vecL[0] != "*" && vecL[1] != "*");
			if (_use_separate_features) { // bigram-a
				const int offset = _offset_bigram_a + _dim_bigram_a * t_a;
				feats_str.clear();
				addPOSFeature_bigram(inst, i, vecL[0], feats_str);
				_dict_bigram_a.map_all(&fv, offset, feats_str, _generation_mode);
				parameters::sparse_add(sp_fv, &fv, scale);
				fv.dealloc();
			}
			if (_use_separate_features) { // bigram-b
				const int offset = _offset_bigram_b + _dim_bigram_b * t_b;
				feats_str.clear();
				addPOSFeature_bigram(inst, i, vecL[1], feats_str);
				_dict_bigram_b.map_all(&fv, offset, feats_str, _generation_mode);
				parameters::sparse_add(sp_fv, &fv, scale);
				fv.dealloc();
			}
		}
	}

	void FGen::create_all_pos_features_when_create_dict(const Instance * const inst, const bool collect_word)
	{
		const int len = inst->size();
		fvec fv;
		list<string> feats_str;
		const int offset = 0;

		for (int i = 0; i <= len; ++i) {
			feats_str.clear();
			addPOSFeature_unigram(inst, i, feats_str);
			_dict_unigram.map_all(&fv, offset, feats_str, _generation_mode);
			fv.dealloc();

			if (collect_word && i < len) get_word_id(inst->forms[i]);

			vector<string> tags;
            if (i == 0) continue;

			//if (i == len) {
			//	tags.push_back(DUMMY_CPOSTAG);
			//} else {
				if (!inst->constrained_tags_str.empty()) {
					tags = inst->constrained_tags_str[i-1];
				} else {
					tags.push_back(inst->cpostags[i-1]);
				}
			//}

			for (int j = 0; j < tags.size(); ++j) {
				vector<string> vec;
				simpleTokenize(tags[j], vec, "^");
				assert(vec.size() == 2);
				if (vec[0] == "*" || vec[1] == "*") continue;
				get_pos_id(tags[j]);				
				get_pos_id_a(vec[0]);
				get_pos_id_b(vec[1]);

				if (_use_joint_features) {
					feats_str.clear();
					addPOSFeature_bigram(inst, i, tags[j], feats_str);
					_dict_bigram_joint.map_all(&fv, offset, feats_str, _generation_mode);
					fv.dealloc();
				}

				if (_use_separate_features) {
					feats_str.clear();
					addPOSFeature_bigram(inst, i, vec[0], feats_str);
					_dict_bigram_a.map_all(&fv, offset, feats_str, _generation_mode);
					fv.dealloc();
					feats_str.clear();
					addPOSFeature_bigram(inst, i, vec[1], feats_str);
					_dict_bigram_b.map_all(&fv, offset, feats_str, _generation_mode);
					fv.dealloc();
				}
			}
		}
	}

	void FGen::save_dictionaries( const string &dictdir ) /*const*/
	{
		assert(!_generation_mode);
		cerr << _name << " : saving feature dictionaries to \""
			<< dictdir << "\"" << endl;

		_word_dict.save(dictdir + "/word.dict.gz");
		_dict_joint.save(dictdir + "/pos.dict-joint.gz");
		_dict_a.save(dictdir + "/pos.dict-a.gz");
		_dict_b.save(dictdir + "/pos.dict-b.gz");

		_dict_unigram.save(dictdir + "/feat.unigram.gz");
		if (_use_joint_features) {
			_dict_bigram_joint.save(dictdir + "/feat.bigram-joint.gz");
		}
		if (_use_separate_features) {
			_dict_bigram_a.save(dictdir + "/feat.bigram-a.gz");
			_dict_bigram_b.save(dictdir + "/feat.bigram-b.gz");
		}
	}

	void FGen::load_dictionaries( const string &dictdir )
	{
		assert(!_generation_mode);
		cerr << _name << " : loading feature dictionaries from \""
			<< dictdir << "\""; print_time();

		_word_dict.load(dictdir + "/word.dict.gz", 0);
		_dict_joint.load(dictdir + "/pos.dict-joint.gz", 0);

		_id_2_str_joint.resize(tag_number());
		_id_2_str_joint = NULL;
		_dict_joint.collect_keys(_id_2_str_joint.c_buf(), tag_number());

		_dict_a.load(dictdir + "/pos.dict-a.gz", 0);
		_id_2_str_a.resize(tag_number_a());
		_id_2_str_a = NULL;
		_dict_a.collect_keys(_id_2_str_a.c_buf(), tag_number_a());

		_dict_b.load(dictdir + "/pos.dict-b.gz", 0);
		_id_2_str_b.resize(tag_number_b());
		_id_2_str_b = NULL;
		_dict_b.collect_keys(_id_2_str_b.c_buf(), tag_number_b());

		joint_id_2_a.resize(tag_number());
		joint_id_2_b.resize(tag_number());
		for (int t = 0; t < tag_number(); ++t) {
			vector<string> vec;
			simpleTokenize(pos_id_2_str(t), vec, "^");
			assert(vec.size() == 2);
			joint_id_2_a[t] = get_pos_id_a(vec[0]);
			joint_id_2_b[t] = get_pos_id_b(vec[1]);
		}

		_dict_unigram.load(dictdir + "/feat.unigram.gz", _fcutoff);
		if (_use_joint_features) {
			_dict_bigram_joint.load(dictdir + "/feat.bigram-joint.gz", _fcutoff);
		}
		if (_use_separate_features) {
			_dict_bigram_a.load(dictdir + "/feat.bigram-a.gz", _fcutoff);
			_dict_bigram_b.load(dictdir + "/feat.bigram-b.gz", _fcutoff);
		}


		_dim_unigram = _dict_unigram.dimensionality();
		_dim_bigram_joint = _dict_bigram_joint.dimensionality();
		_dim_bigram_a = _dict_bigram_a.dimensionality();
		_dim_bigram_b = _dict_bigram_b.dimensionality();

		_offset_unigram_joint = 0;
		_offset_unigram_a = _offset_unigram_joint + _dim_unigram * tag_number();
		_offset_unigram_b = _offset_unigram_a + _dim_unigram * tag_number_a();
		_offset_bigram_joint = _offset_unigram_b + _dim_unigram * tag_number_b();
		_offset_bigram_a = _offset_bigram_joint + _dim_bigram_joint * tag_number();
		_offset_bigram_b = _offset_bigram_a + _dim_bigram_a * tag_number_a();
		_total_feature_dim = _offset_bigram_b + _dim_bigram_b * tag_number_b();

		cerr << "word        number: " << _word_dict.dimensionality() << endl;
		cerr << "pos (joint) number: " << _dict_joint.dimensionality() << endl;
		cerr << "pos (a)     number: " << _dict_a.dimensionality() << endl;
		cerr << "pos (b)     number: " << _dict_b.dimensionality() << endl;
		cerr << "pos (unigram)   feature dimensionality: " << _dim_unigram << endl;
		cerr << "pos (bigram-j)  feature dimensionality: " << _dim_bigram_joint << endl;
		cerr << "pos (bigram-a)  feature dimensionality: " << _dim_bigram_a << endl;
		cerr << "pos (bigram-b)  feature dimensionality: " << _dim_bigram_b << endl;
		cerr << "pos uni-joint feature start offset: " << _offset_unigram_joint << endl;
		cerr << "pos uni-a     feature start offset: " << _offset_unigram_a << endl;
		cerr << "pos uni-b     feature start offset: " << _offset_unigram_b << endl;
		cerr << "pos  bi-joint feature start offset: " << _offset_bigram_joint << endl;
		cerr << "pos  bi-a     feature start offset: " << _offset_bigram_a << endl;
		cerr << "pos  bi-b     feature start offset: " << _offset_bigram_b << endl;

		cerr << "\ntotal       feature dimensionality: " << _total_feature_dim << endl;
		cerr << "\n done!"; print_time();
	}

	void FGen::collect_word_postag( Instance * const inst, const bool collect_word/*=false*/ )
	{
		assert(_generation_mode);
		const int length = inst->size();
		for (int i = 1; i < length; ++i) {
			if (collect_word) get_word_id(inst->forms[i]);
			if (inst->constrained_tags_str.empty()) {
				get_pos_id(inst->cpostags[i]);
			} else {
				const vector<string> &tags = inst->constrained_tags_str[i];
				for (int j = 0; j < tags.size(); ++j) {
					get_pos_id(tags[j]);
				}
			}
		}
	}

} // namespace gparser_space


