#include <iostream>
#include <list>
#include <vector>
#include <string>
#include <sstream>
#include <stdexcept>
#include <boost/lexical_cast.hpp>
#include "aligncode.h"
#include "profile.h"
#include "tree.h"
#include "parser.h"
#include "distance.h"
#include "bootstrap.h"
#include "countedsets.h"
#include "bionj_clean.h"
#include "traits.hpp"

using namespace profdist;
using namespace std;
using namespace boost;
struct settings
{
  size_t boot_threshold, id_threshold, num_bootstrap;
  ExpMType expm;
  CorrectionModel cor;
  FZero maximization;
  string q_matrix;
  FileType file_type;
  TraitType trait_type;
  settings()
    : boot_threshold(75), id_threshold(95), num_bootstrap(100), expm(Taylor), cor(Jukes), maximization(Robust)
      , file_type(Fasta), trait_type(Rna) {}
};

std::ostream & operator<<( std::ostream & o, settings const& obj )
{
  char expstr[][7]={"Pade","Taylor"};
  char fmax[][11] = {"Derivation","Newton","Robust","Parabolic"};
  char corr[][23] = {"Jukes Cantor","Kimura Parameter 2","GTR","LogDet", "Uncorrected P Distance"};
  char ft[][15] = {"Fasta","Embl","Bdb","Marna", "Forester","Ct","DistanceMatrix","Newick","RateMatrix"};
  char tt[][13] =  {"Rna","RnaStructure","Proteine"};
  o << "Bootstrap Threshold:" << obj.boot_threshold << std::endl;
  o << "Identity Threshold:" << obj.id_threshold << std::endl;
  o << "Num Bootstraps:" << obj.num_bootstrap << std::endl;
  o << "Matrix Exp:" << expstr[obj.expm] << std::endl;
  o << "Function Maximization:" << fmax[obj.maximization] << std::endl;
  o << "Distance Correction:" << corr[obj.cor] << std::endl;
  o << "File Type:" << ft[obj.file_type] << std::endl;
  o << "Trait Type:" << tt[obj.trait_type] << std::endl;
  o << "Matrix:" << obj.q_matrix << std::endl;
  return o;
}

template<typename Traits>
void run_algorithm( alignment & sequences, settings const & s ) {
  

  std::vector<std::string> names;
  ofstream trees("trees.tre");
  profdist::AlignCode<Traits>  code;
  code.read_sequences( sequences );
  sequences.clear();
  names = code.get_sequence_names();

  profdist::distance_matrix mat( code.get_num_sequences(), code.get_num_sequences(), 0.0);

  cout << "Distance Matrix" << endl;
  typename Traits::rate_matrix Q(0);
  // If correction model is GTR read in the rate matrix into the
  // rate matrix struct Q.
  if( s.cor== GTR ) {
    ifstream in( s.q_matrix.c_str() );
    if( ! in ) throw runtime_error("could not open rate matrix");
    read_rate_matrix<Traits>( in, Q );
    if( in.fail() ) throw runtime_error("could not parse rate matrix");
  }
  
  compute_distance( code, mat, Q, s.cor, s.expm, s.maximization );
  { ofstream dist("matrix_0.mat"); dist << mat << endl; }

  profdist::identical_seq_set ident;
  code.get_identical_sequences( ident, float(s.id_threshold) / 100.0f );

  std::cout << "Identical pairs:" << ident.size()/2 << std::endl;

  std::cout << "Writing distance tree." << std::endl;
  ProgressSink bla;
  bionj( mat, names, trees, bla  );
  trees << std::endl;

  cout <<"Bootstrap process" << std::endl;
  CountedSets c_split_sets;
  profdist::OStreamSink bs_sink(cout, s.num_bootstrap ); 
  bs_sink.update(1, "Bootstrap-Distance-Bionj" );
  for( size_t i = 0; i < s.num_bootstrap ; ++i ){ 
    profdist::AlignCode<Traits> bs;
    bootstrap( code, bs );
    compute_distance( bs, mat, Q, s.cor, s.expm, s.maximization );
    profdist::bionj( mat, c_split_sets, bla );
    bs_sink.update(i+1, "Bootstrap-Distance-Bionj" );
  }

  c_split_sets.consense( code.get_num_sequences() );

  cout << std::endl << "Generating and printing tree\n";

  Tree tree( c_split_sets, code.get_num_sequences(), s.num_bootstrap );

  tree.print( trees, names, s.num_bootstrap );
  trees << std::endl;


  tree_types::profile_map profiles_found;
  profdist::profile_set known_profiles; // starts out empty
  tree.find_profile_first( profiles_found, known_profiles, ident, s.boot_threshold , true );

  cout << "Profiles found:"  << code.get_num_sequences() - profiles_found.size() << std::endl;

  profdist::Profile profile( code, profiles_found );

  code.clear();

  {
    profdist::distance_matrix temp;
    profdist::compute_distance<Traits>( profile, temp, Q, s.cor, s.expm, s.maximization);
    ofstream temp_mat_out( "matrix_1.mat" );
    temp_mat_out << temp << endl;

    ofstream temp_desc_out( "matrix_1.desc" );
    std::size_t prof_index = 0;
    for( tree_types::profile_map::const_iterator it = profiles_found.begin(), e = profiles_found.end();  it != e; ++it, ++prof_index ) {
      temp_desc_out << prof_index;
      if( it->second->get_split_set().empty() )
        temp_desc_out << ", Profile " << it->second->get_reference_position() << " from last round.";
      else {
        temp_desc_out << ", composed by previous profiles: ";
        for( Node::set_type::const_iterator s_it = it->second->get_split_set().begin(), s_e = it->second->get_split_set().end(); s_it != s_e; ++s_it )
          temp_desc_out << *s_it << ", ";
      }
      temp_desc_out << std::endl;
    }
  }

  cout << "Starting Mainloop " << endl;

  {
    size_t num_profiles = profile.get_num_profiles()
      , num_new_profiles = 1
      , index = 2;

    while( num_new_profiles && num_profiles > 3 ) {
      CountedSets split_sets;
      profdist::distance_matrix matrix( num_profiles, num_profiles, 0.0 );
      cout <<"Bootstrap process\n" << std::flush;
      for( size_t i = 0; i < s.num_bootstrap ; ++i ){ 
        profdist::Profile bs( profile.get_num_sites(), profile.get_num_profiles() );
        bootstrap( profile, bs );
        profdist::compute_distance<Traits> ( bs, matrix, Q, s.cor, s.expm, s.maximization);
        profdist::bionj( matrix, split_sets, bla );
        bs_sink.update(i+1,"Bootstrap-Distance-Bionj");
      }

      split_sets.consense( profile.get_num_profiles() );

      Tree tree( split_sets, profile.get_num_profiles(), s.num_bootstrap );
      
      cout  << std::endl << "Printing tree\n";
      tree.union_tree( profiles_found );
      tree.print( trees, names, s.num_bootstrap );

      profiles_found.clear();
      profdist::identical_seq_set ident;
      profile.get_identical_sequences( ident, float(s.id_threshold)/100.0f );
      tree.find_profile( profiles_found, known_profiles, ident, s.boot_threshold, true );
      cout << "Identical pairs:"  << ident.size()/2 << std::endl;
      profile.refine( profiles_found );

      {
        profdist::distance_matrix temp;
        profdist::compute_distance<Traits>( profile, temp, Q, s.cor, s.expm, s.maximization);
        ofstream temp_mat_out( ("matrix_" + lexical_cast<std::string>(index++) + ".mat").c_str() );
        temp_mat_out << temp << endl;

        ofstream temp_desc_out( ("matrix_" + lexical_cast<std::string>(index-1) + ".desc").c_str() );
        std::size_t prof_index = 0;
        for( tree_types::profile_map::const_iterator it = profiles_found.begin(), e = profiles_found.end();  it != e; ++it, ++prof_index ) {
          temp_desc_out << prof_index;
          if( it->second->get_split_set().empty() )
            temp_desc_out << ", Profile " << it->second->get_reference_position() << " from last round.";
          else {
            temp_desc_out << ", composed by previous profiles: ";
            for( Node::set_type::const_iterator s_it = it->second->get_split_set().begin(), s_e = it->second->get_split_set().end(); s_it != s_e; ++s_it )
              temp_desc_out << *s_it << ", ";
          }
          temp_desc_out << std::endl;
        }

      }

      num_new_profiles = num_profiles - profile.get_num_profiles();
      cout << "Profiles found:"  << num_new_profiles << std::endl;
      num_profiles = profile.get_num_profiles();
    }
  }

}

void call_algorithm( string const& filename, settings const& s ) {
  cout << "Parsing Alignment" << endl;
  alignment a;
  switch(s.file_type) {
    case Embl: parse_embl(filename,a ); break;
    case Fasta:parse_fasta(filename, a ); break;
    case Forester: parse_forester( filename,  a); run_algorithm<rna_structure_traits>( a, s ); return;
    case Bdb:  parse_bdb( filename, a); run_algorithm<rna_structure_traits>( a, s ); return;
    case Marna: parse_marna(filename, a); run_algorithm<rna_structure_traits>( a, s ); return;
  }
  switch(s.trait_type ){
    case Rna: run_algorithm<rna_traits>(a,s); return;
    case Proteine: run_algorithm<protein_traits>(a,s);return;
    default: throw runtime_error("bad data type specified");
  }
}

int main( int argc, char ** argv)
{
  try{
    if(argc <= 1 ) {
      cout << "Usage: prof [OPTIONS] file" << endl;
      cout << "Results are printed into trees.tre, Distance Matrices are printed into matrix_N.mat" << endl;
      cout << "String parameters may be shorten as long as they stay distinctive, case is ignored." << endl;
      cout << "-n,-num-bootstrap <NUMBER>\t\tNumber of bootstraps (default:100)"<< endl;
      cout << "-b,-boot-threshold <NUMBER>\t\tdefault: 75" << endl;
      cout << "-i,-id-threshold <NUMBER>\t\tdefault: 95" << endl;
      cout << "-q,-rate-matrix <PATH>\t\t" << endl;
      cout << "-m,-f-maximization <CHAR>\t\tFunction maximization (D)erivation, (N)ewton, default: (R)obust, (P)arabolic" << endl;
      cout << "-c,-correction-model <CHAR>\t\tCorrection Model: default: (J)ukes, (G)TR, (L)ogdet, (U)ncorrected" << endl;
      cout << "-e,-matrix-exp <CHAR>\t\tApproximation Method: default: (T)aylor, (P)ade" << endl;
      cout << "-f,-file-type <CHARS>\t\tdefault: (Fa)sta; (E)mbl; (Fo)rester; (B)db; (M)arna;" << endl;
      cout << "-d,-data-type <CHAR>\t\tdefault: (R)na; (S)tructure, (P)roteine"  << endl;
      cout << "-s,-seed <NUMBER>\t\twill use <NUMBER> for srand, or time(0) otherwise"  << endl;
      return 1;
    }
    else {
      bool srand_init = false;
      settings s;
      for(size_t i = 1; i < argc-1; ++ i ) {
        if( argv[i] == string("-num-bootstrap") || argv[i] == string("-n") ) 
          s.num_bootstrap = lexical_cast<size_t>(argv[++i]);
        else if( argv[i] == string("-boot-threshold") || argv[i] == string("-b"))
          s.boot_threshold = lexical_cast<size_t>(argv[++i]);
        else if( argv[i] == string("-id-threshold") || argv[i] == string("-i"))
          s.id_threshold = lexical_cast<size_t>(argv[++i]);
        else if( argv[i] == string("-rate-matrix") || argv[i] == string("-q"))
          s.q_matrix = argv[++i];
        else if( argv[i] == string("-srand") || argv[i] == string("-s")) {
          bool srand_init = true;
          srand(lexical_cast<int>(argv[++i]));
        }
        else if( argv[i] == string("-f-maximization") || argv[i] == string("-m"))
          switch(argv[++i][0] ) {
            case 'd':
            case 'D': s.maximization = Derivation; break;
            case 'n':
            case 'N': s.maximization = NewtonMethod; break;
            case 'r':
            case 'R': s.maximization = Robust; break;
            case 'p':
            case 'P': s.maximization = Parabolic; break;
            default: throw runtime_error("unkown function maximization");
          }
        else if( argv[i] == string("-matrix-exp") || argv[i] == string("-e"))
          switch(argv[++i][0] ) {
            case 'p':
            case 'P': s.expm = Pade; break;
            case 't':
            case 'T': s.expm = Taylor; break;
            default: throw runtime_error("unkown matrix exponential approximation");
          }
        else if( argv[i] == string("-correction-model") || argv[i] == string("-c"))
          switch(argv[++i][0] ) {
            case 'j':
            case 'J': s.cor = Jukes; break;
            case 'K':
            case 'k': s.cor = Kimura; break;
            case 'G':
            case 'g': s.cor = GTR; break;
            case 'L':
            case 'l': s.cor = LogDet; break;
            case 'U':
            case 'u': s.cor = UncorrectedP; break;
            default: throw runtime_error("unkown correction model");
          }
        else if( argv[i] == string("-file-type") || argv[i] == string("-f")) {
          std::cout << argv[i+1] << std::endl;
          switch(argv[++i][0] ) {
            case 'f':
            case 'F': 
              {
                switch(argv[i][1]) {
                  case 'a':
                  case 'A': s.file_type = Fasta; break;
                  case 'o':
                  case 'O': s.file_type = Forester; break;
                  default: throw runtime_error("unkown file  type");
                }
                break;
              } 
            case 'e':
            case 'E': s.file_type = Embl; break;
            case 'b':
            case 'B': s.file_type = Bdb; break;
            case 'm':
            case 'M': s.file_type = Marna; break;
            default: throw runtime_error("unkown file type");
          }
        }
        else if( argv[i] == string("-data-type") || argv[i] == string("-d"))
          switch(argv[++i][0] ) {
            case 'R':
            case 'r': s.trait_type = Rna; break;
            case 'p': 
            case 'P': s.trait_type = Proteine; break;
            case 'S':
            case 's': s.trait_type = RnaStructure; break;
            default: throw runtime_error("unkown data type");
          }

      }

      if( s.boot_threshold > 100 ) 
        s.boot_threshold = s.num_bootstrap + 1;
      else 
        s.boot_threshold = std::size_t( float(s.boot_threshold * s.num_bootstrap) / 100.0f );

      if(! srand_init )
        srand(time(0));
      std::cout << s;
      string filename = argv[argc-1];
      call_algorithm( filename, s );
      return 0;
    }
  }
  catch( exception const& e ) {
    cout << e.what() << std::endl;;
    return 1;
  } catch( error const& e) {
    return 1;
  } catch(...)  {
    cout << "Unknown exception."<< endl;
    return 1;
  } 
}
