#ifndef _DEP_PIPE_
#define _DEP_PIPE_

#pragma once

#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>
#include <string>
#include <map>
#include <algorithm>

using namespace std;

#include "Instance.h"
#include "CONLLReader.h"
#include "CONLLWriter.h"
#include "common.h"

#include "Options.h"
using namespace egstra;
using namespace std;

namespace dparser {

	class IOPipe
	{
	private:
		vector<int> m_vecInstIdxToRead;
		vector<Instance *> m_instances;
		vector<size_t> m_instances_posi;
		size_t _start_id;

		string m_inf_name;
		string m_outf_name;
		CONLLReader *m_reader;
		CONLLWriter *m_writer;

		bool _constrained_tag;
		bool _use_instances_posi;
		size_t _inf_current_posi;


		bool _copy_cpostag_from_postag;
		bool _english;
		
		bool _use_guide_postag;
		bool _use_guide_postag_prob;


		//lexicon feature, created by chaojy
		int _maxwordlen;
		bool _use_lexicon_feature;
		string _lexicon_path;
		map<string, int> _lexicon;

	public:
		IOPipe() : m_reader(0), m_writer(0) {}

		~IOPipe()
		{
			dealloc_instance();
			closeInputFile();
			closeOutputFile();
		}

		void dealloc_instance() {
			for (int i = 0; i < m_instances.size(); ++i) {
				assert(m_instances[i]);
				delete m_instances[i];
				m_instances[i] = 0;
			}
			m_instances_posi.clear();
			m_instances.clear();
			m_vecInstIdxToRead.clear();
		}

		const string &in_file_name() const {
			return m_inf_name;
		}

		void use_instances_posi(bool flag) { _use_instances_posi = flag; }
		bool use_instances_posi() const { return _use_instances_posi; }
		
		void set_constrained_tag_flag(bool flag) { _constrained_tag = flag; }

		void process_options() {
			_use_instances_posi = true;
			_inf_current_posi = 0;
			_constrained_tag = false;

			int tmp;
			string strtmp;
			if(options::get("use-file-position-when-read-instance", tmp)) {
				_use_instances_posi = (1 == tmp);
			}


			_copy_cpostag_from_postag = false;
			if(options::get("copy-cpostag-from-postag", tmp)) {
				_copy_cpostag_from_postag = (1 == tmp);
			}

			_english = true;
			if(options::get("english", tmp)) {
				_english = (1 == tmp);
			}

			_use_guide_postag = false;
			_use_guide_postag_prob = false;
			if(options::get("use-guide-postag", tmp)) {
				_use_guide_postag = (1 == tmp);
			}
			if (_use_guide_postag) {
				if(options::get("use-guide-postag-prob", tmp)) {
					_use_guide_postag_prob = (1 == tmp);
				}
			}

			_use_lexicon_feature = false;
			_lexicon_path = ".";
			/* Add lexicon feature, created by chaojy*/
			if(options::get("use-lexicon-feature", tmp)){
				_use_lexicon_feature = tmp;
			}

			if(_use_lexicon_feature){
				if(options::get("lexicon-path", strtmp)){
					_lexicon_path = strtmp;
				}
			}
		}

		const string &input_filename() { return m_inf_name; }
		const string &output_filename() { return m_outf_name; }

		int openInputFile(const char *filename) {
			m_inf_name = filename;
			m_reader = new CONLLReader();
			if (!m_reader) {
				string str = "IOPipe::IOPipe() create reader error";
				cerr << str << endl;
				throw(str);
			}
			_inf_current_posi = 0;
			return m_reader->openFile(filename); 
		}

		void closeInputFile() {	
			if (m_reader) {
				m_reader->closeFile();
				delete m_reader;
				m_reader = 0;
			}
		}

		int openOutputFile(const char *filename) { 
			m_outf_name = filename;
			m_writer = new CONLLWriter();
			if (!m_writer) {
				string str = "IOPipe::IOPipe() create writer error";
				cerr << str << endl;
				throw(str);
			}
			return m_writer->openFile(filename);
		}

		void closeOutputFile() { 
			if (m_writer) {
				m_writer->closeFile();
				delete m_writer;
				m_writer = 0;
			}
		}

		void getInstancesFromInputFile(const int startId = 0, const int maxInstNum=-1, const int instMaxLen=-1);

		void shuffleTrainInstances() {
			random_shuffle(m_vecInstIdxToRead.begin(), m_vecInstIdxToRead.end());
		}

		void preprocessInstance( Instance *inst );

		int getInstanceNum() const {
			return _use_instances_posi ? m_instances_posi.size() : m_instances.size();
		}

		Instance *getInstance(const int instIdx) {
			if (instIdx < 0 || instIdx >= m_vecInstIdxToRead.size()) {
				cerr << "\nIOPipe::getInstance instIdx range err: " << instIdx << endl;
				return 0;
			}
			const int id = m_vecInstIdxToRead[instIdx];
			const int global_id = _start_id + id;
			if (_use_instances_posi) {
				size_t posi = m_instances_posi[ id ];
				Instance *inst = m_reader->getNext(global_id, posi);
				preprocessInstance(inst);
				return inst;
			} else {
				return m_instances[ id ];
			}
		}

		void fillVecInstIdxToRead() {
			m_vecInstIdxToRead.clear();
			m_vecInstIdxToRead.resize(getInstanceNum());
			for (int i = 0; i < getInstanceNum(); ++i) m_vecInstIdxToRead[i] = i;
		}

		int writeInstance(const Instance *inst) {
			return m_writer->write(inst);
		}

		//lexicon feature, created by chaojy
		void set_lexicon_feature_list(Instance *inst);
		void load_lexicon(const string &file);
		void dealloc_lexicon() {
			_lexicon.clear();
			_maxwordlen = 0;
		}
		void get_word(const Instance *inst, int start, int end, string& word) const {
			word = "";
			for (int i = start; i <= end; ++i){
				word += inst->forms[i];
			}
		}
	};
}

#endif


