#include <iostream>
#include <list>
#include <vector>
#include <string>
#include <sstream>
#include "aligncode.h"
#include "profile.h"
#include "tree.h"
#include "parser.h"
#include "distance.h"
#include "bootstrap.h"
#include "countedsets.h"
#include "bionj_clean.h"
#include "traits.hpp"

using namespace std;
int main( int argc, char ** argv)
{
  size_t identify_threshold = 70
    , num_bootstrap = 12
    , bootstrap_threshold = size_t(0.85f*float(num_bootstrap))
    ;
  profdist::protein_traits::rate_matrix Q(0.0);

  profdist::CorrectionModel model = profdist::Jukes;
  profdist::ExpMType expm_type = profdist::Pade;
  profdist::FZero f_max = profdist::Derivation;

  
  if( argc == 2 ) {
    try {
      profdist::alignment sequences;
      parse_fasta( argv[1], sequences );
      
      profdist::AlignCode<profdist::protein_traits>  code;
      code.read_sequences( sequences );

      profdist::distance_matrix mat( code.get_num_sequences(), code.get_num_sequences(), 0.0);
      
      compute_distance( code, mat, Q, model, expm_type, f_max );
      cout << "Distance Matrix:\n" << mat << endl;

      
      profdist::identical_seq_set ident;
      code.get_identical_sequences( ident, 0.70f);

      if( ! ident.empty() ) {
        cout << "\nIdentical Sequences when applying identity threshold of 70\n";
        size_t id = ident.begin()->first;
        for( profdist::identical_seq_set::const_iterator it = ident.begin(), e = ident.end(); it != e; ++it ) {
          if( id != it->first ) { cout << '\n'; id = it->first; }
          cout << '('<< it->first << ", " << it->second << ") ";
        }
        cout << endl;
      }
      else {
        cout << "\nNo identical Sequences found wehen applying identity threshold of 70"<< endl;
      }

      profdist::ProgressSink sink;
      profdist::bionj( mat, code.get_sequence_names(), cout, sink );

      CountedSets c_split_sets;
      for( std::size_t i = 0; i < num_bootstrap ; ++i ){ 
        profdist::AlignCode<profdist::protein_traits> bs;
        bootstrap( code, bs );
        compute_distance( bs, mat, Q, model, expm_type, f_max );
        profdist::bionj( mat, c_split_sets, sink );
      }

      cout << "Generating consense out of " << num_bootstrap << " bootstraps.\nPre Consense:\n";
      c_split_sets.print(cout);

      c_split_sets.consense( code.get_num_sequences() );
      cout << "Consense:\n";
      c_split_sets.print(cout);

      cout << endl << "Building a tree from consense:\n";

      Tree tree( c_split_sets, code.get_num_sequences(), num_bootstrap );

      tree.print( cout, code.get_sequence_names(), num_bootstrap );


      tree_types::profile_map profiles_found;
      profdist::profile_set known_profiles; // starts out empty
      tree.find_profile( profiles_found, known_profiles, ident, bootstrap_threshold, true );

      {
        size_t index = 0;
        for( tree_types::profile_map::const_iterator it = profiles_found.begin(), e = profiles_found.end();  it != e; ++it, ++index )
        {
          ostringstream fname;
          fname << "algign_pnj_test_profiles_found_" << index << ".dot";
          ofstream debug( fname.str().c_str() );
          if(debug) {
            debug << "digraph {\n node [shape=Mrecord];\n";
            it->second->print_graphviz_debug( debug, code.get_sequence_names());
            debug << "}\n";
          }
        }
      }

      cout << "Create Profile using nodes found:\n";
      profdist::Profile profile( code, profiles_found );

      cout << profile << endl;

      cout << "Starting Mainloop " << endl;

      {
        size_t num_profiles = profile.get_num_profiles()
          , num_new_profiles = 1
          , index = 0;
        
        while( num_new_profiles && num_profiles > 3 ) {
          CountedSets split_sets;
          profdist::distance_matrix matrix( num_profiles, num_profiles, 0.0 );
          for( std::size_t i = 0; i < num_bootstrap ; ++i ){ 
            profdist::Profile bs( profile.get_num_sites(), profile.get_num_profiles() );
            bootstrap( profile, bs );
            profdist::compute_distance<profdist::protein_traits> ( bs, matrix, Q, model, expm_type, f_max );
            profdist::bionj( matrix, split_sets, sink );
          }
          
          cout << "Num Profiles: " << profile.get_num_profiles() << " FOUND WAS : " << profiles_found.size() << endl;
          split_sets.consense( profile.get_num_profiles() );
          cout << "Generating consense out of " << num_bootstrap << " bootstraps on profile.\n";
          split_sets.print(cout);

          Tree tree( split_sets, profile.get_num_profiles(), num_bootstrap );
          cout << "Consense tree is then:" << endl;
          tree.print( cout, code.get_sequence_names(), num_bootstrap  );
          {
            ofstream debug( "tree_before_union.dot" );
            if(debug) {
              tree.print_graphviz_debug( debug, code.get_sequence_names() );
            }
          }


          tree.union_tree( profiles_found );
          cout << "Union tree is then:" << endl;
          tree.print( cout, code.get_sequence_names(), num_bootstrap );

          profiles_found.clear();
          profdist::identical_seq_set ident;
          profile.get_identical_sequences( ident, 0.95f );
          tree.find_profile( profiles_found, known_profiles, ident, bootstrap_threshold, true );
          profile.refine( profiles_found );
          
          num_new_profiles = num_profiles - profile.get_num_profiles();
          num_profiles = profile.get_num_profiles();
        }
      }
      
    } catch( exception const& e ) {
      return 1;
    } catch( error const& e) {
      return 1;
    } catch(...)  {
      cout << "Unknown exception."<< endl;
      return 1;
    }


    return 0;
  }
  else cout << "Add a file parameter!" << endl;
  return 1;
}
