#ifndef PROFDIST_DISTANCE_INL_INCLUDED
#define PROFDIST_DISTANCE_INL_INCLUDED 
/***************************************************************************
 *   Copyright (C) 2005 by Andreas Pokorny                                 *
 *   andreas.pokorny@biozentrum.uni-wuerzburg.de                           *
 *                                                                         *
 *   This file is part of profdist and cbcanalyzer                         *
 *                                                                         *
 *   Both profdist and cbcanalyzer are free software; you can redistribute * 
 *   it and/or modify it under the terms of the GNU General Public License * 
 *   as published by the Free Software Foundation; either version 2 of the * 
 *   License, or (at your option) any later version.                       *
 *                                                                         *
 *   Profdist and cbcanalyzer are distributed in the hope that it will be  *
 *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty   *
 *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

#include <algorithm>
#include <boost/algorithm/string/replace.hpp>
#include "pair_iterator.h"
#include "debug.h"
#include "correction.h"

namespace profdist {

namespace detail {
  template<size_t N>
  inline void checked_inc( fixed_matrix<size_t,N,N> & mat, size_t i, size_t j ) {
    if( i < N && j < N )
      ++mat[i][j];
  }
  template<size_t N>
  inline void checked_dec( fixed_matrix<size_t,N,N> & mat, size_t i, size_t j ) {
    if( i < N && j < N )
      --mat[i][j];
  }

}

template<typename Traits>
void compute_distance( AlignCode<Traits> const& source, profdist::distance_matrix & matrix, typename Traits::rate_matrix const& Q, profdist::CorrectionModel model, profdist::ExpMType expmType, profdist::FZero fMax, ProgressSink & prg )
{
  using namespace detail;
  std::size_t num_seq = source.get_num_sequences()
    , max_steps = ( num_seq - 1 ) * ( num_seq - 2 ) / 2
    , num_steps = 0;
  double value = 0.0;
  bool cont = true;
  matrix.resize( num_seq, num_seq, 0.0 );

  for ( std::size_t i = 0; i < num_seq - 1; ++i )
  {
    typename AlignCode<Traits>::count_matrix const& A = source.get_matrix(i);
    // compute the distance between Sequence 1 and i + 2 with SubstMatrix N_1_i+2
    value = correction( A , Q, model, expmType, fMax );

    if( std::isfinite( value ) )  {
      // set the values in distancematrix
      matrix( 0, i + 1 ) = matrix( i + 1, 0 ) = value;
    }
    else {
      std::ostringstream er;
      er << "Distance calculation of sequences " << source.get_sequence_name(0) << " and " 
        << source.get_sequence_name(i+1) << " resulted into a non finite number.\n"
        << "Either the alignment contains to many gaps, or is too divergent for this correction method.";
      throw std::logic_error(er.str());
    }

    for( std::size_t j = i + 1; j < num_seq - 1; ++j )
    {
      // Initialialize SubstMatrix N_i+2_j+2 with the diagonale from N_1_i+2
      typename AlignCode<Traits>::count_matrix B( 0U );
      for( int k = 0; k < Traits::num_relevant_elements; ++k )
        B[k][k] = A[k][k];

      // i+1 and j+1 since we start at the second  sequence
      typename AlignCode<Traits>::const_diff_iterator it_b = source.begin_difference( j )
        , it_a = source.begin_difference( i )
        , a_end = source.end_difference( i )
        , b_end = source.end_difference( j )
        ;

      while ( it_a != a_end && it_b != b_end  )
      {
        if ( it_a->first == it_b->first ) // both are different to first sequence
        {
          checked_inc(B, it_a->second, it_b->second );
          ++it_a;
          ++it_b;
        }
        else if( it_a->first < it_b->first ) // Unterschied festgestellt und wird in Unterschiedsliste aufgenommen
        {
          checked_inc(B, it_a->second, source.get_reference_element( it_a->first ));
          ++it_a;
        }
        else
        {
          checked_inc( B, it_b->second, source.get_reference_element( it_b->first ));
          checked_dec( B, source.get_reference_element( it_b->first ), source.get_reference_element( it_b->first ));
          ++it_b;
        }
      } //end of while

      while ( it_a != a_end  )
      {
        checked_inc(B, it_a->second, source.get_reference_element( it_a->first ));
        ++it_a;
      }

      while ( it_b != b_end )
      {
        checked_inc( B, it_b->second, source.get_reference_element( it_b->first ));
        checked_dec( B, source.get_reference_element( it_b->first ), source.get_reference_element( it_b->first ));
        ++it_b;
      }

      value = correction( B, Q, model, expmType, fMax );

      if( std::isfinite( value ) )  {
        matrix( i + 1, j + 1 ) = value;
        matrix( j + 1, i + 1 ) = value;
      }
      else {
        prg.update( max_steps, "Calculation haltet." );
        std::ostringstream er;
        er << "Distance calculation of sequences " << source.get_sequence_name(i+1) << " and " 
          << source.get_sequence_name(j+1) << " resulted into a non finite number.\n"
          << "Either the alignment contains to many gaps, or is too divergent for this correction method.";
        throw std::logic_error(er.str());
      }

      if( ++num_steps == max_steps )
        cont = prg.update( num_steps, "Distance matrix calculated!" );
      else
        cont = prg.update( num_steps );

      if ( !cont )
        return;
    } // end for j
  } //end for  i
}

template<typename Traits>
void compute_distance( AlignCode<Traits> const& source, profdist::distance_matrix & matrix, typename Traits::rate_matrix const& Q, profdist::CorrectionModel model, profdist::ExpMType expmType, profdist::FZero fMax )
{
  using namespace detail;
  using std::cout;
  using std::endl;
  using std::flush;
  std::size_t num_seq = source.get_num_sequences()
    , max_steps = ( num_seq - 1 ) * ( num_seq - 2 ) / 2;
  double value = 0.0;
  matrix.resize( num_seq, num_seq, 0.0 );
  /*
   * Iterate through all count matrices of the actual AlignCode. We start at
   * index 0 which is pointing to the second sequence of this AlignCode. So
   * for i the second sequence of the AlignCode is referenced.
   */
  for ( std::size_t i = 0; i < num_seq - 1; ++i )
  {
    typename AlignCode<Traits>::count_matrix const& A = source.get_matrix(i);

    // compute the distance between Sequence 1 and i + 2 with SubstMatrix N_1_i+2
    value = correction( A , Q, model, expmType, fMax );
    if( std::isfinite( value ) )  {
      // set the values in distancematrix
      matrix( 0, i + 1 ) = matrix( i + 1, 0 ) = value;
    }
    else {
      std::ostringstream er;
      er << "Distance calculation of sequences " << source.get_sequence_name(0) << " and " 
        << source.get_sequence_name(i+1) << " resulted into a non finite number.\n"
        << "Either the alignment contains to many gaps, or is too divergent for this correction method.";
      throw std::logic_error(er.str());
    }


    for( std::size_t j = i + 1; j < num_seq - 1; ++j )
    {
      // Initialialize SubstMatrix N_i+2_j+2 with the diagonale from N_1_i+2
      typename AlignCode<Traits>::count_matrix B( 0U );
      for( int k = 0; k < Traits::num_relevant_elements; ++k )
        B[k][k]= A[k][k];

      // i+1 and j+1 since we start at the second  sequence
      typename AlignCode<Traits>::const_diff_iterator it_b = source.begin_difference( j )
        , it_a = source.begin_difference( i )
        , a_end = source.end_difference( i )
        , b_end = source.end_difference( j )
        ;

      while ( it_a != a_end && it_b != b_end  )
      {
        if ( it_a->first == it_b->first ) // both are different to first sequence
        {
          checked_inc(B, it_a->second,  it_b->second );
          ++it_a;
          ++it_b;
        }
        else if( it_a->first < it_b->first ) // Unterschied festgestellt und wird in Unterschiedsliste aufgenommen
        {
          checked_inc(B, it_a->second, source.get_reference_element( it_a->first ));
          ++it_a;
        }
        else
        {
          checked_inc( B, it_b->second, source.get_reference_element( it_b->first ));
          checked_dec( B, source.get_reference_element( it_b->first ), source.get_reference_element( it_b->first ));
          ++it_b;
        }
      } //end of while

      while ( it_a != a_end  )
      {
        checked_inc(B, it_a->second, source.get_reference_element( it_a->first ));
        ++it_a;
      }

      while ( it_b != b_end )
      {
        checked_inc( B, it_b->second, source.get_reference_element( it_b->first ));
        checked_dec( B, source.get_reference_element( it_b->first ), source.get_reference_element( it_b->first ));
        ++it_b;
      }
      value = correction( B, Q, model, expmType, fMax );
      if( std::isfinite( value ) )  {
        matrix( i + 1, j + 1 ) = value;
        matrix( j + 1, i + 1 ) = value;
      }
      else {
        std::ostringstream er;
        er << "Distance calculation of sequences " << source.get_sequence_name(i+1) << " and " 
          << source.get_sequence_name(j+1) << " resulted into a non finite number.\n"
          << "Either the alignment contains to many gaps, or is too divergent for this correction method.";
        throw std::logic_error(er.str());
      }

    } // end for j
  } //end for  i
}

namespace detail {
  template<typename Iterator1, typename Iterator2> 
  bool get_next_brace_pair( 
      pair_iterator<const char, Iterator1, Iterator2> & open, 
      pair_iterator<const char, Iterator1, Iterator2> & close, 
      pair_iterator<const char, Iterator1, Iterator2> & end, 
      std::pair<std::size_t, std::size_t> & positions
      )
  {
    if( open == end || close == end ) 
      return false;

    if( open != close )
    {
      ++open;
      ++positions.first;
    }

    while( *open != '(' && open != end )
    {
      ++open; 
      ++positions.first;
    }

    if( open == end )
      return false;

    positions.second = positions.first;
    close = open;

    std::size_t stack = 0;
    
    while( close != end ) 
    {
      ++close;
      ++positions.second;
      if( *close == ')' )
      {
        if( stack == 0 )
          return  true;
        else --stack;
      }

      if( *close == '(' )
        ++stack;
    }
    return false;
  }
}

template<typename Detector>
void compute_distance( sequence_data< brace_fold_data< num_sequences<base_adaptor > > > const& source, profdist::distance_matrix & matrix, Detector const& detect )
{
  typedef sequence_data< brace_fold_data< num_sequences<base_adaptor > > > source_type;
  
  std::size_t num_seq = source.get_num_sequences();
  matrix = profdist::distance_matrix( num_seq, num_seq, 0.0 ); 

  std::size_t i_index = 0;
  for( typename source_type::const_sequence_id i = source.begin(), e = source.end(); 
      i != e; ++i, ++i_index )
  {
    typename source_type::const_sequence_id k = i;
    ++k;
    for(std::size_t k_index = i_index + 1; k != e; ++k, ++k_index )
    {
      std::pair<std::size_t, std::size_t> i_pos(0,0), k_pos(0,0);

      pair_iterator<const char, typename source_type::const_brace_iterator, typename source_type::const_sequence_iterator>
        i_open( source.brace_begin( i ), source.sequence_begin( i ) )
        , i_close( i_open )
        , i_end( source.brace_end( i ), source.sequence_end( i ) ) 
        , k_open( source.brace_begin( k ), source.sequence_begin( k ) )
        , k_close( k_open )
        , k_end( source.brace_end( k ), source.sequence_end( k ) );

      while( i_open != i_end && k_open != k_end )
      {
        bool ret;
        if( i_pos.first < k_pos.first )
          ret = detail::get_next_brace_pair( i_open, i_close, i_end, i_pos );
        else
          ret = detail::get_next_brace_pair( k_open, k_close, k_end, k_pos );

        if( ! ret )
          break;

        if( i_pos == k_pos && 1 == detect( *i_open.second(), *i_close.second(), *k_open.second(), *k_close.second() ) )
        {
          ++matrix( i_index, k_index );
          ++matrix( k_index, i_index );
        }
      }
    }
  }
}

namespace detail {
  inline std::size_t get_size( std::string const&  p ) 
  { return p.length(); }

  template<typename IteratorT>
  inline std::size_t get_size( std::pair<IteratorT, IteratorT> const& p ) 
  { return std::distance( p.first, p.second); }


  template<typename ValueT>
  bool compare_pair_length( ValueT const& l, ValueT const& r) 
  { return get_size( l ) < get_size( r ) ; }

  inline std::string prepare_for_print( std::string const& p)
  { return p; }

  template<typename IteratorT>
  inline std::string prepare_for_print( std::pair<IteratorT, IteratorT> const& p)
  { return std::string( p.first, p.second ); }
}

template<typename IteratorT>
std::ostream& print_distance_matrix( std::ostream& out, profdist::distance_matrix const& matrix, IteratorT begin, IteratorT end, bool FixedPoint = false, bool WellFormated = true  )
{
  std::size_t str_size = 0, f_size = 0;
  if(WellFormated)
  {
    IteratorT biggest = std::max_element( begin, end, &detail::compare_pair_length<typename IteratorT::value_type>);
    str_size = detail::get_size( *biggest );
    
    if( FixedPoint )
    {
      double max_d = *(std::max_element( matrix.begin(), matrix.end() ));
      f_size = std::size_t( std::log10( max_d ) ) + 1 ;
    }
    else 
    {
      out.setf( ios::scientific, ios::floatfield ); 
      out.setf( ios::right, ios::adjustfield ); 
      out.setf( ios::showpoint ); 
      out.precision( 5 ); 
      f_size = 12;
    }
  }
 
  out << matrix.nRows() << '\n';
  std::size_t i = 0; 
  while(begin != end)
  {
    out << setw(str_size) << boost::replace_all_copy( detail::prepare_for_print( *begin ), " ", "_" );
    for( std::size_t j = 0; j < matrix.nRows(); ++j ) 
    {
      out << ' '<< setw(f_size) <<  matrix(i,j);
    }
    out << '\n';
    ++begin; 
    ++i;
  }

  return out;
}

template<typename IteratorT>
std::ostream& print_distance_matrix_tsv( std::ostream& out, profdist::distance_matrix const& matrix, IteratorT begin, IteratorT end, char sep = '\t', bool FixedPoint = false )
{
  if( ! FixedPoint )
  {
    out.setf( ios::scientific, ios::floatfield ); 
    out.setf( ios::right, ios::adjustfield ); 
    out.setf( ios::showpoint ); 
    out.precision( 5 ); 
  }
 
  out << matrix.nRows();
  std::size_t i = 0; 
  
  for(IteratorT cp = begin;cp != end; out << sep << detail::prepare_for_print( *cp++ )); 
  
  out << '\n'; 
  while(begin != end)
  {
    out << detail::prepare_for_print( *begin++ );
    for( std::size_t j = 0; j < matrix.nRows(); ++j ) 
    {
      out << sep << matrix(i,j);
    }
    out << '\n';
    ++i;
  }

  return out;
}


template<typename InsertIteratorT>
std::istream& read_distance_matrix( std::istream& in, profdist::distance_matrix & matrix, InsertIteratorT begin )
{
  in >> std::ws;
  std::size_t num;
  if(in >> num) 
  {
    matrix = profdist::distance_matrix( num, num, 0.0 );
    std::size_t i = 0; 
    for( std::string name; i < num && (in >> std::ws >> name >> std::ws); ++i )
    {
      *begin++ = name;
      
      std::size_t k = 0;
      for( double val; k < num && (in >> val);  ++k )  
        matrix(i,k) = val;
    }
  }
  return in;
}

template<typename Traits>
std::istream& read_rate_matrix( std::istream& in, typename Traits::rate_matrix & matrix )
{
  in >> std::ws;
  std::size_t num;
  if(in >> num && num == Traits::num_relevant_elements ) 
  {
    size_t i = 0;
    while( i != Traits::num_relevant_elements * Traits::num_relevant_elements 
        && in >> matrix[i/Traits::num_relevant_elements][i%Traits::num_relevant_elements] ) ++i;
    if( i != Traits::num_relevant_elements * Traits::num_relevant_elements ) 
      in.setstate( std::ios::failbit );
  }
  else 
    in.setstate(std::ios::failbit);
  return in;
}

template<typename Traits>
void compute_distance( Profile const& source, profdist::distance_matrix & matrix, typename Traits::rate_matrix const& Q, profdist::CorrectionModel model, profdist::ExpMType expmType, profdist::FZero fMax )
{
  std::size_t num_profiles = source.get_num_profiles();
  matrix.resize( num_profiles, num_profiles, 0.0 );
  std::size_t num_sites = source.get_num_sites(); 
  
  for( std::size_t i = 0; i != num_profiles - 1; ++i )
    for( std::size_t j = i+1; j != num_profiles; ++j ) {
      
      Profile::ConstSingleProfile i_prof( source[i] )
        , j_prof( source[j] );
     
      
      typename Traits::rate_matrix cm( 0.0 );
      for( std::size_t site_index = 0; site_index != num_sites; ++site_index )
        for( std::size_t A = 0;  A != Traits::num_relevant_elements; ++ A) {
          double val = i_prof.get_value( site_index, A );
          for( std::size_t B = 0 ; B != Traits::num_relevant_elements; ++ B) 
            cm[A][B] = cm[A][B] + val* j_prof.get_value( site_index, B );
        }
#ifdef PROFDIST_DEBUG
      ofstream N_mat("N_matrix");
      N_mat << cm;
      N_mat.close();
#endif
      double value = correction( cm , Q, model, expmType, fMax );

      if( std::isfinite( value ) ) 
        matrix(i,j) = matrix(j,i) = value;
      else {
        std::ostringstream er;
        er << "Distance calculation of profiles " << i << " and " << j << " resulted into a non finite number.\n"
          << "Either the alignment contains to many gaps, or is too divergent for this correction method.";
        throw std::logic_error(er.str());
      }

    }
}



}

#endif

