/***************************************************************************
 *   Copyright (C) 2005 by Andreas Pokorny                                 *
 *   andreas.pokorny@biozentrum.uni-wuerzburg.de                           *
 *                                                                         *
 *   This file is part of profdist and cbcanalyzer                         *
 *                                                                         *
 *   Both profdist and cbcanalyzer are free software; you can redistribute * 
 *   it and/or modify it under the terms of the GNU General Public License * 
 *   as published by the Free Software Foundation; either version 2 of the * 
 *   License, or (at your option) any later version.                       *
 *                                                                         *
 *   Profdist and cbcanalyzer are distributed in the hope that it will be  *
 *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty   *
 *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

#include <string>
#include <stdexcept>
// File is included by aligncode header

 //------------------------ const_sequence_iterator -------------------------------
template<typename Traits>
profdist::AlignCode<Traits>::const_sequence_iterator::const_sequence_iterator( )
  : ref(0), seq_index(0), position(0)
{
}


template<typename Traits>
profdist::AlignCode<Traits>::const_sequence_iterator::const_sequence_iterator( AlignCode<Traits> const& r, size_t seq )
  : ref(&r), seq_index(seq), position(0)
{
  if(seq_index == 0 )
    it = ref->alignment_codes[0].end();
  else 
    it = ref->alignment_codes[seq_index-1].begin();
}

template<typename Traits>
profdist::AlignCode<Traits>::const_sequence_iterator::const_sequence_iterator( 
    AlignCode<Traits> const& r
    , size_t seq, size_t pos
    , typename profdist::AlignCode<Traits>::d_list::const_iterator const& it 
    )
  : ref(&r), seq_index(seq), position(pos), it(it)
{
}



template<typename Traits>
void 
profdist::AlignCode<Traits>::const_sequence_iterator::increment() 
{
  ++position;
  if( seq_index != 0 )
  {
      typename d_list::const_iterator e = ref->alignment_codes[seq_index-1].end();
      while( it != e && position > it->first ) ++it;
  }
}

template<typename Traits>
void
profdist::AlignCode<Traits>::const_sequence_iterator::decrement()
{
  --position;
  if( seq_index != 0 )
  {
    typename d_list::const_iterator e = ref->alignment_codes[seq_index-1].end();
    if(it == e ) --it;
    while( it != ref->alignment_codes[seq_index-1].begin() && position < it->first )
      --it;
  }
}

template<typename Traits>
bool 
profdist::AlignCode<Traits>::const_sequence_iterator::equal( typename profdist::AlignCode<Traits>::const_sequence_iterator const& r) const
{
  return position == r.position && ref == r.ref && seq_index == r.seq_index;
}

template<typename Traits>
void
profdist::AlignCode<Traits>::const_sequence_iterator::advance( long diff )
{
  if(diff != 0 ) {
    if( diff < 0 )
      position -= diff - 1;
    else 
      position += diff - 1;
    ++*this;
  }
}

template<typename Traits>
typename profdist::AlignCode<Traits>::element_type const
profdist::AlignCode<Traits>::const_sequence_iterator::dereference() const 
{
  if( seq_index==0 || it==ref->alignment_codes[seq_index-1].end() || it->first != position)
    return ref->reference_sequence[position] ;
  else  
   return it->second;
}

// --------------------------------------------------------------------------------------
// ALIGNCODE IMPL :

template<typename Traits>
profdist::AlignCode<Traits>::AlignCode( )
{
}

template<typename Traits>
profdist::AlignCode<Traits>::AlignCode( size_t num_sequences, size_t num_sites )
  : alignment_codes( num_sequences -1 , typename profdist::AlignCode<Traits>::d_list() )
  , sequence_names( num_sequences, std::string("not available"))
  , reference_sequence( num_sites, Traits::gap )
{
  count_matrix base(0U);
  // base( Traits::gap, Traits::gap) = num_sites; 
  count_matrices.resize( num_sequences - 1, base );
}

template<typename Traits>
size_t profdist::AlignCode<Traits>::get_num_sites() const
{
  return reference_sequence.size();
}

template<typename Traits>
typename profdist::AlignCode<Traits>::element_type profdist::AlignCode<Traits>::get_reference_element( size_t position ) const 
{
	assert( position < reference_sequence.size() );
	return reference_sequence[position];
}

template<typename Traits>
size_t profdist::AlignCode<Traits>::get_num_sequences() const
{
  return alignment_codes.empty()?0:(alignment_codes.size() + 1);
}

template<typename Traits>
void profdist::AlignCode<Traits>::clear()
{
  reference_sequence.clear();
  count_matrices.clear();
  sequence_names.clear();
  alignment_codes.clear();
}


template<typename Traits>
void profdist::AlignCode<Traits>::clear_resize( size_t num_sequences, size_t num_sites )
{
  count_matrix base(0U);
  if( get_num_sequences() )
  {
    for( size_t i = 0, e = get_num_sequences()-1; i != e; ++i )
    {
      alignment_codes[i].clear();
      count_matrices[i] = base;
    }
  }
  alignment_codes.resize( num_sequences - 1 );
  reference_sequence.resize( num_sites );
  sequence_names.resize( num_sequences);
  count_matrices.resize( num_sequences - 1, base );
}

template<typename Traits>
void profdist::AlignCode<Traits>::resize( size_t num_sequences, size_t num_sites )
{
  if( num_sequences == get_num_sequences() && num_sites == get_num_sites() ) 
    return;
  if( num_sequences == 0 )
  {
    clear();
    return;
  }
  if( num_sites == 0 ) 
  {
    reference_sequence.clear();
    sequence_names.resize( num_sequences, std::string("not available"));
    alignment_codes.resize( num_sequences - 1);
    count_matrix base(0U);
    count_matrices.resize( num_sequences - 1, base);
    for( size_t i = 0; i < alignment_codes.size(); ++i )
    {
      alignment_codes[i].clear();
      count_matrices[i] = base;
    }
    return;
  }
  
  bool bigger = num_sites > get_num_sites();
  size_t difference = num_sites - get_num_sites();
  size_t ac_size = min( get_num_sequences(), num_sequences );
  if(ac_size)
    --ac_size;
  // the gcc-stl will keep the data, even when the new size is bigger
  // the stl vector will copy the old data
  // if the new size is bigger it will add copies of the element supplied in the second parameter
  count_matrix base(0U);
  d_list empty_list;
  if( get_num_sequences() < num_sequences ) 
  {
    // base(Traits::gap, Traits::gap) = (difference>0)*difference; 
    for( size_t i = 0, e = min( get_num_sites(), num_sites ); i < e; ++i )
    {
      empty_list.push_back( a_pair( i, Traits::gap ) );
      // ++base( reference_sequence[i], Traits::gap );
    }
  }

  count_matrices.resize( num_sequences  - 1, base );
  sequence_names.resize( num_sequences, std::string("not available") ); 
  alignment_codes.resize( num_sequences - 1, empty_list );

  // depending on the new sequence length, we have to alter our remainging count matrices, and 
  if( !bigger )
  {
    // First we reduce the count matrices
    for( size_t i = 0; i < ac_size; ++i )
    {
      size_t pos  = 0;
      for( const_sequence_iterator it = begin(i+1) + num_sites, e = end(i+1);
          it != e; ++it, ++pos )
        --count_matrices[i]( reference_sequence[pos + num_sites], *it );
    }

    // Now we crop the old data --
    for( size_t i = 0; i < alignment_codes.size(); ++i )
    {
      d_list &ref = alignment_codes[i];
      while( !ref.empty() && ref.back().first >= num_sites )
        ref.pop_back();
    }
  }
  else 
  {
    // We only have to increase the gap-gap values in the count matrices
/*    for( size_t i = 0; i < ac_size; ++i )
      count_matrices[i](Traits::gap,Traits::gap) += difference; */
  }

  // now we can adjust the reference sequence 
  reference_sequence.resize( num_sites, Traits::gap );
}


//--------------------------------------AlignCode<Traits>--again---------------------------------------

template<typename Traits>
typename profdist::AlignCode<Traits>::const_sequence_iterator 
profdist::AlignCode<Traits>::begin( size_t sequence_index ) const
{
  return const_sequence_iterator( *this, sequence_index );
}

template<typename Traits>
typename profdist::AlignCode<Traits>::const_sequence_iterator 
profdist::AlignCode<Traits>::end( size_t sequence_index ) const
{
  return const_sequence_iterator( *this, sequence_index, reference_sequence.size(), 
      sequence_index
      ? alignment_codes[sequence_index-1].end()
      : alignment_codes[0].end() ); // Is sequence_index == 0 the reference sequence
									// is meant so we get alignment_codes[0].end().
									// So this is the same end iterator as if we
									// would get with sequence_index == 1.
									// If sequence_index > 0 one of the other
									// sequences is meant, so we get an end-iterator
									// of a alignment_codes entry.
}


template<typename Traits>
typename profdist::AlignCode<Traits>::const_diff_iterator
profdist::AlignCode<Traits>::begin_difference( size_t i ) const
{
  return alignment_codes[i].begin();
}

template<typename Traits>
typename profdist::AlignCode<Traits>::const_diff_iterator
profdist::AlignCode<Traits>::end_difference( size_t i ) const
{
  return alignment_codes[i].end();
}

template<typename Traits>
std::string const& 
profdist::AlignCode<Traits>::get_sequence_name( size_t sequence_index ) const
{
  assert( sequence_index < sequence_names.size() && "Illegal sequence index" );
  return sequence_names[sequence_index];
}

template<typename Traits>
void 
profdist::AlignCode<Traits>::set_sequence_name( std::string const& s, size_t sequence_index )
{
  assert( sequence_index < sequence_names.size() && "Illegal sequence index" );
  sequence_names[sequence_index] = s;
}

template<typename Traits>
typename profdist::AlignCode<Traits>::count_matrix const& 
profdist::AlignCode<Traits>::get_matrix( size_t i ) const
{
  assert( i < count_matrices.size() && "Illegal matrix index" );
  return count_matrices[i];
}

#if 0
template<typename Traits>
void profdist::AlignCode<Traits>::push_back( char item )
{
  item = mapper.dna2code[item];
  
  reference_sequence.push_back(item);
  if( get_num_sequences() )
    for( vector<profdist::count_matrix>::iterator i = count_matrices.begin(), 
        e = count_matrices.end(); i < e; ++i )
      ++((*i)[item][item]);
}
#endif

template<typename Traits>
void profdist::AlignCode<Traits>::push_back( vector<typename AlignCode<Traits>::element_type> const& items )
{
  assert( items.size() == sequence_names.size() && "Sequence items and available sequences do not match");
  if( items.size() != sequence_names.size() )
    throw logic_error("Sequence items and available sequences do not match");
  element_type ref_item = items[0];

  assert( ref_item < Traits::num_elements && "Item out of bounds");

  reference_sequence.push_back( ref_item );
  if( get_num_sequences() )
  {
    typename vector<typename AlignCode<Traits>::count_matrix>::iterator c_it = count_matrices.begin();
    typename vector< d_list >::iterator a_it = alignment_codes.begin();
      for( typename vector<element_type>::const_iterator it = ++(items.begin()), end = items.end() ;
        it != end ; ++ it, ++a_it, ++c_it )
    {
      element_type item = *it;

      assert( item < Traits::num_elements && "Item out of bounds");
      if( item != ref_item )
        a_it->push_back( a_pair( reference_sequence.size() - 1, item )  );

      if( ref_item < Traits::num_relevant_elements && item < Traits::num_relevant_elements )
        ++((*c_it)[ref_item][item]);
    }
  }
}

#if 0
template<typename Traits>
void profdist::AlignCode<Traits>::debug( std::ostream & out ) const 
{
  out << "Aligncode: " << get_num_sequences() << " seq " << get_num_sites() << " sites\nref:";
  for( std::size_t i = 0; i < get_num_sites(); ++i ) 
    out << mapper.code2dna[reference_sequence[i]];

  for( std::size_t i = 0; i < get_num_sequences() - 1; ++i ) 
  {
    out << '\n';
    for( d_list::const_iterator it = alignment_codes[i].begin(), e = alignment_codes[i].end(); it!=e; ++it )
      out << '(' << it->first << ", " << mapper.code2dna[it->second] << ')'; 
  }
  out << '\n';
}
#endif 

template<typename Traits>
void 
profdist::AlignCode<Traits>::get_identical_sequences( profdist::identical_seq_set & ids, float percentual_identity  ) const
{
  float difference_threshold = 1.0f - std::min( std::abs(percentual_identity) , 1.0f );
  for( std::size_t i = 0, e = alignment_codes.size(); i != e; ++ i )
    if( difference_threshold > float(alignment_codes[i].size()) / float(reference_sequence.size()) )
    {
      ids.insert( std::make_pair(0, i + 1) );
      ids.insert( std::make_pair(i+1, 0));
    }

  for( std::size_t index_1 = 0; index_1 < alignment_codes.size() - 1; ++index_1 ) {
    for(  std::size_t index_2 = index_1+1; index_2 < alignment_codes.size(); ++index_2 ){
      const_diff_iterator b_1 = begin_difference( index_1 )
        , e_1 = end_difference( index_1 )
        , b_2 = begin_difference( index_2 )
        , e_2 = end_difference( index_2 );
      std::size_t counter = 0;
      while( b_1 != e_1 && b_2 != e_2 ) {
        if( b_1->first == b_2->first ) 
        {
          counter += (b_1->second != b_2->second );
          ++b_1;
          ++b_2;
        }
        else if( b_1->first < b_2->first )  ++counter, ++b_1;
        else if( b_1->first > b_2->first )  ++counter, ++b_2;
      }
      while( b_1 != e_1 )++counter, ++b_1;
      while( b_2 != e_2 )++counter, ++b_2;

      if( difference_threshold >  float(counter) / float(reference_sequence.size())  ){
        ids.insert( std::make_pair(index_1+1, index_2+1));
        ids.insert( std::make_pair(index_2+1, index_1+1));
      }
        
    }
  }

}

template<typename Traits>
std::vector<std::string> const& 
profdist::AlignCode<Traits>::get_sequence_names() const
{ return sequence_names; }

template<typename Traits>
void 
profdist::AlignCode<Traits>::read_sequences( profdist::alignment const& seq ) 
{
  using namespace std;
  typedef alignment::value_type sequence_t;
  typedef alignment::const_iterator const_it;
  typedef list<sequence_t::string_range>::const_iterator range_it;
  
  size_t num_seq = seq.size();  // first sequence is stored seperate
  
  /*
   * Test if all sequences in the alignment have the same length.
   * If not an exception is thrown to indicate this error.
   * Furthermore the maximum length of the sequences is computed
   * here.
   */
  size_t max_length = 0;
  size_t seq_num = 0;
  for( const_it b = seq.begin(), e = seq.end(); b != e; ++b, ++seq_num )
  {
    if( max_length && max_length != b->sequence_length )
    {
      std::ostringstream out;
      out << "Alignment broken, sequence " << seq_num << "(" << std::string(b->id.first, b->id.second) << ") has different size (" << b->sequence_length << ")" << std::flush;
      throw runtime_error(out.str());
      //throw runtime_error("Alignment broken, sequences have different size");
    }
    max_length = max( max_length, b->sequence_length  );
  }

  // clear and resize
  clear_resize( num_seq, max_length);


  /*
   * Initializing the reference sequence
   */
  size_t i = 0;
  
  for( typename Traits::cursor it = Traits::begin( *seq.begin() ), e = Traits::end( *seq.begin() ) ; 
      it != e; ++ it, ++i ) {
    assert( *it <= Traits::num_elements && "Item out of bounds");
    reference_sequence[i] = *it; // old one
  }
  
  /*
   * Getting sequence names and store them in the vector sequence_names in
   * the order they appear.
   */
  i = 0;
  for( const_it it = seq.begin(), e = seq.end(); it != e; ++it, ++i )
    sequence_names[i] = string(it->id.first, it->id.second ); // make a copy of the name 
  
  /*
   * Create the differences of every sequence to the reference sequence. Therefor
   * the first sequence is skipped because it is acting as the reference
   * sequence.
   */
  i = 0;
  for( const_it it = ++(seq.begin()), e = seq.end(); it != e; ++it, ++i )
  {
    size_t position = 0; // variable to track current position
    for( typename Traits::cursor item_it = Traits::begin(*it), item_end = Traits::end(*it);
        item_it != item_end; ++item_it,++position )
    {
	  /*
	   * If the nucleotid at the current position in the actual sequence differs
	   * from the reference sequence, we add an entry into alignment_code for
	   * this position.
	   */
      if( *item_it != reference_sequence[position] )
        alignment_codes[i].push_back(a_pair( position, *item_it ) ); // old one

      /*
	   * Update the count matrix for this sequence. If the entries of the actual
	   * and the reference sequence are differing, we increment the corresponding
	   * entry in the count matrix.
	   */
      if( reference_sequence[position] < Traits::num_relevant_elements && *item_it < Traits::num_relevant_elements )
        ++count_matrices[i](reference_sequence[position], *item_it ); // old one
    }
  }

}


template<typename Traits>
std::ostream& profdist::write_file( profdist::AlignCode<Traits> & obj, std::ostream& file, profdist::FileType t )
{
	switch( t) 
	{
		case profdist::Fasta:
		{
			for( size_t i = 0; i < obj.get_num_sequences(); ++i )
			{
				file << ">" << obj.get_sequence_name( i ) << "\n";
          
				for( typename profdist::AlignCode<Traits>::const_sequence_iterator it = obj.begin(i),
						 e = obj.end(i); it != e; ++it )
				{
					file << Traits::get_char( *it );
				}
				file << '\n';
			}
		}
		break;
		case profdist::Embl:
		{
			for( size_t i = 0; i < obj.get_num_sequences(); ++i )
			{
				file << "ID " << obj.get_sequence_name( i ) << "\n";
				file << "SQ Sequence "  << obj.get_num_sites() << " BP;";
          
				size_t site_index = 0;
				for( typename profdist::AlignCode<Traits>::const_sequence_iterator it = obj.begin(i),
						 e = obj.end(i); it != e; ++it, ++site_index )
				{
					if( site_index%60== 0 ) file << "    ";
					else if( site_index%6== 0 ) file << ' ';
					file << Traits::get_char( *it );
				}
				if( site_index%60 ) file << '\n';
				//file << "\\\\\n";
				file << "//" << endl;
			}
		}
		break;
	}
	return file;
}


template<typename Traits>
void 
profdist::read_from_bootstrap( AlignCode<Traits> & obj, alignment & seq, size_t num_sequences, size_t num_bootstrap, size_t alignment_size ) 
{
  alignment temp( seq.get_store() );
  if( num_bootstrap * alignment_size  == num_sequences ) 
  {
    for( size_t i = 0; i != num_sequences; ++i ) 
    {
      temp.push_back( seq.front() );
      seq.pop_front();
    }
  }
  else {
    temp.push_back( seq.front() );
    seq.pop_front();

    alignment::value_type::string_range name = temp.front().id;

    while( !seq.empty() && !compare_range( seq.front().id, name) )
    {
      temp.push_back( seq.front() );
      seq.pop_front();
    }
      
  }
  obj.read_sequences( temp );
}



template<typename Traits>
std::ostream & profdist::operator<<( std::ostream & out, AlignCode<Traits> const& obj )
{
  out << "ALIGNCODE: " << obj.get_num_sequences() << " SEQ " << obj.get_num_sites() << " SITES\n";
  for(std::size_t i = 0, e = obj.get_num_sequences(); i!=e;++i ){
    out << i << ":";
    for( typename AlignCode<Traits>::const_sequence_iterator it = obj.begin(i), e = obj.end(i); 
        it != e; ++it ) {
      out << Traits::get_char(*it);
    }
    out << '\n';
  }
  out << std::endl;
}

#include "parser.h"
template<typename Traits>
void profdist::parse_file( profdist::AlignCode<Traits> & obj, std::string const& filename, profdist::FileType t )
{
  profdist::alignment sequences; 
  switch( t) 
  {
    case profdist::Fasta:
      parse_fasta( filename, sequences );
      break;
    case profdist::Embl:
      parse_embl( filename, sequences );
      break;
  }

  obj.read_sequences( sequences );
}




