/***************************************************************************
 *   Copyright (C) 2005 by Andreas Pokorny                                 *
 *   andreas.pokorny@biozentrum.uni-wuerzburg.de                           *
 *                                                                         *
 *   This file is part of profdist and cbcanalyzer                         *
 *                                                                         *
 *   Both profdist and cbcanalyzer are free software; you can redistribute * 
 *   it and/or modify it under the terms of the GNU General Public License * 
 *   as published by the Free Software Foundation; either version 2 of the * 
 *   License, or (at your option) any later version.                       *
 *                                                                         *
 *   Profdist and cbcanalyzer are distributed in the hope that it will be  *
 *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty   *
 *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/


#include "aligncode.h"

namespace profdist {
	
	std::ostream& write_file(
		profdist::AlignCode<profdist::rna_structure_traits> & obj,
		std::ostream& file,
		profdist::FileType t )
	{
		switch( t) 
		{
			case profdist::Fasta:
			{
				for( size_t i = 0; i < obj.get_num_sequences(); ++i )
				{
					file << ">" << obj.get_sequence_name( i ) << endl;
		      
					for( profdist::AlignCode<profdist::rna_structure_traits>::const_sequence_iterator it = obj.begin(i),
							 e = obj.end(i); it != e; ++it )
					{
						file << profdist::rna_structure_traits::get_char( *it );
					}
					
					file << endl;
				
					for( profdist::AlignCode<profdist::rna_structure_traits>::const_sequence_iterator it = obj.begin(i),
							 e = obj.end(i); it != e; ++it )
					{
						file << profdist::rna_structure_traits::get_fold_char( *it );
					}
					file << endl;
				}
			}
			break;
			case profdist::Embl:
			{
				for( size_t i = 0; i < obj.get_num_sequences(); ++i )
				{
					file << "ID " << obj.get_sequence_name( i ) << "\n";
					file << "SQ Sequence "  << obj.get_num_sites() << " BP;";
		      
					size_t site_index = 0;
					for( profdist::AlignCode<profdist::rna_structure_traits>::const_sequence_iterator it = obj.begin(i),
							 e = obj.end(i); it != e; ++it, ++site_index )
					{
						if( site_index%60== 0 ) file << "    ";
						else if( site_index%6== 0 ) file << ' ';
						file << profdist::rna_structure_traits::get_char( *it );
					}
				
					// TODO: the structure information has to be put somewhere in
					// the embl file, but who knows where?
				
					if( site_index%60 ) file << '\n';
					//file << "\\\\\n";
					file << "//" << endl;
				}
			}
			break;
		}
		return file;
	}
	
}

#if 0
#include <algorithm>
#include <utility>
#include <stdexcept>
#include "aligncode.h"

using namespace std;
using namespace profdist;

profdist::AlignCode::AlignCode( )
{
}

profdist::AlignCode::AlignCode( size_t num_sequences, size_t num_sites )
  : alignment_codes( num_sequences -1 , profdist::AlignCode::d_list() )
  , sequence_names( num_sequences, std::string("not available"))
  , reference_sequence( num_sites, mapper.dna2code['-'] )
{
  count_matrix base(0U);
  base(mapper.dna2code['-'], mapper.dna2code['-']) = num_sites; 
  count_matrices.resize( num_sequences - 1, base );
}

size_t profdist::AlignCode::get_num_sites() const
{
  return reference_sequence.size();
}


size_t profdist::AlignCode::get_num_sequences() const
{
  return alignment_codes.empty()?0:(alignment_codes.size() + 1);
}

void profdist::AlignCode::clear()
{
  reference_sequence.clear();
  count_matrices.clear();
  sequence_names.clear();
  alignment_codes.clear();
}


void profdist::AlignCode::clear_resize( size_t num_sequences, size_t num_sites )
{
  count_matrix base(0U);
  if( get_num_sequences() )
  {
    for( size_t i = 0, e = get_num_sequences()-1; i != e; ++i )
    {
      alignment_codes[i].clear();
      count_matrices[i] = base;
    }
  }
  alignment_codes.resize( num_sequences - 1 );
  reference_sequence.resize( num_sites );
  sequence_names.resize( num_sequences);
  count_matrices.resize( num_sequences - 1, base );
}

void profdist::AlignCode::resize( size_t num_sequences, size_t num_sites )
{
  if( num_sequences == get_num_sequences() && num_sites == get_num_sites() ) 
    return;
  if( num_sequences == 0 )
  {
    clear();
    return;
  }
  if( num_sites == 0 ) 
  {
    reference_sequence.clear();
    sequence_names.resize( num_sequences, std::string("not available"));
    alignment_codes.resize( num_sequences - 1);
    count_matrix base(0U);
    count_matrices.resize( num_sequences - 1, base);
    for( size_t i = 0; i < alignment_codes.size(); ++i )
    {
      alignment_codes[i].clear();
      count_matrices[i] = base;
    }
    return;
  }
  
  bool bigger = num_sites > get_num_sites();
  size_t difference = num_sites - get_num_sites();
  size_t ac_size = min( get_num_sequences(), num_sequences );
  size_t gap = mapper.dna2code['-'];
  if(ac_size)
    --ac_size;
  // the gcc-stl will keep the data, even when the new size is bigger
  // the stl vector will copy the old data
  // if the new size is bigger it will add copies of the element supplied in the second parameter
  count_matrix base(0U);
  d_list empty_list;
  if( get_num_sequences() < num_sequences ) 
  {
    base(gap, gap) = (difference>0)*difference; 
    for( size_t i = 0, e = min( get_num_sites(), num_sites ); i < e; ++i )
    {
      empty_list.push_back( a_pair( i, gap ) );
      ++base( reference_sequence[i], gap );
    }
  }

  count_matrices.resize( num_sequences  - 1, base );
  sequence_names.resize( num_sequences, std::string("not available") ); 
  alignment_codes.resize( num_sequences - 1, empty_list );

  // depending on the new sequence length, we have to alter our remainging count matrices, and 
  if( !bigger )
  {
    // First we reduce the count matrices
    for( size_t i = 0; i < ac_size; ++i )
    {
      size_t pos  = 0;
      for( const_sequence_iterator it = begin(i+1) + num_sites, e = end(i+1);
          it != e; ++it, ++pos )
        --count_matrices[i]( reference_sequence[pos + num_sites], *it );
    }

    // Now we crop the old data --
    for( size_t i = 0; i < alignment_codes.size(); ++i )
    {
      d_list &ref = alignment_codes[i];
      while( !ref.empty() && ref.back().first >= num_sites )
        ref.pop_back();
    }
  }
  else 
  {
    // We only have to increase the gap-gap values in the count matrices
    for( size_t i = 0; i < ac_size; ++i )
      count_matrices[i](gap,gap) += difference; 
  }

  // now we can adjust the reference sequence 
  reference_sequence.resize( num_sites, mapper.dna2code['-'] );
}


//--------------------------------------AlignCode--again---------------------------------------

profdist::AlignCode::const_sequence_iterator profdist::AlignCode::begin( size_t sequence_index ) const
{
  return const_sequence_iterator( *this, sequence_index );
}

profdist::AlignCode::const_sequence_iterator profdist::AlignCode::end( size_t sequence_index ) const
{
  return const_sequence_iterator( *this, sequence_index, reference_sequence.size(), 
      sequence_index
      ? alignment_codes[sequence_index-1].end()
      : alignment_codes[0].end() );
}

profdist::AlignCode::sequence_iterator profdist::AlignCode::begin( size_t sequence_index )
{
  return sequence_iterator( *this, sequence_index );
}

profdist::AlignCode::sequence_iterator profdist::AlignCode::end( size_t sequence_index )
{
  return sequence_iterator( *this, sequence_index, reference_sequence.size(), 
      sequence_index
      ? alignment_codes[sequence_index-1].end()
      : alignment_codes[0].end() );
}

profdist::AlignCode::const_diff_iterator profdist::AlignCode::begin_difference( size_t i ) const
{
  return alignment_codes[i].begin();
}

profdist::AlignCode::const_diff_iterator profdist::AlignCode::end_difference( size_t i ) const
{
  return alignment_codes[i].end();
}

std::string const& profdist::AlignCode::get_sequence_name( size_t sequence_index ) const
{
  return sequence_names[sequence_index];
}

void profdist::AlignCode::set_sequence_name( std::string const& s, size_t sequence_index )
{
  sequence_names[sequence_index] = s;
}

profdist::count_matrix const& profdist::AlignCode::get_matrix( size_t i ) const
{
  return count_matrices[i];
}

void profdist::AlignCode::push_back( char item )
{
  item = mapper.dna2code[item];
  
  reference_sequence.push_back(item);
  if( get_num_sequences() )
    for( vector<profdist::count_matrix>::iterator i = count_matrices.begin(), 
        e = count_matrices.end(); i < e; ++i )
      ++((*i)[item][item]);
}

void profdist::AlignCode::push_back( vector<char> const& items )
{
  if( items.size() != sequence_names.size() )
    throw logic_error("Sequence items and available sequences do not match");
  char ref_item = mapper.dna2code[items[0]];

  reference_sequence.push_back( ref_item );
  if( get_num_sequences() )
  {
    vector<profdist::count_matrix>::iterator c_it = count_matrices.begin();
    vector< d_list >::iterator a_it = alignment_codes.begin();
    for( vector<char>::const_iterator it = ++(items.begin()), end = items.end() ;
        it != end ; ++ it, ++a_it, ++c_it )
    {
      char item = mapper.dna2code[*it];

      if( item != ref_item )
        a_it->push_back( a_pair( reference_sequence.size() - 1, item )  );

      ++((*c_it)[ref_item][item]);
    }
  }
}

void profdist::AlignCode::debug( std::ostream & out ) const 
{
  out << "Aligncode: " << get_num_sequences() << " seq " << get_num_sites() << " sites\nref:";
  for( std::size_t i = 0; i < get_num_sites(); ++i ) 
    out << mapper.code2dna[reference_sequence[i]];

  for( std::size_t i = 0; i < get_num_sequences() - 1; ++i ) 
  {
    out << '\n';
    for( d_list::const_iterator it = alignment_codes[i].begin(), e = alignment_codes[i].end(); it!=e; ++it )
      out << '(' << it->first << ", " << mapper.code2dna[it->second] << ')'; 
  }
  out << '\n';
}

void profdist::AlignCode::get_identical_sequences( profdist::identical_seq_set & ids, float percentual_identity  ) const
{
  float difference_threshold = 1.0f - std::min( std::abs(percentual_identity) , 1.0f );
  for( std::size_t i = 0, e = alignment_codes.size(); i != e; ++ i )
    if( difference_threshold > float(alignment_codes[i].size()) / float(reference_sequence.size()) )
    {
      ids.insert( std::make_pair(0, i + 1) );
      ids.insert( std::make_pair(i+1, 0));
    }

  for( std::size_t index_1 = 0; index_1 < alignment_codes.size() - 1; ++index_1 ) {
    for(  std::size_t index_2 = index_1+1; index_2 < alignment_codes.size(); ++index_2 ){
      const_diff_iterator b_1 = begin_difference( index_1 )
        , e_1 = end_difference( index_1 )
        , b_2 = begin_difference( index_2 )
        , e_2 = end_difference( index_2 );
      std::size_t counter = 0;
      while( b_1 != e_1 && b_2 != e_2 ) {
        if( b_1->first == b_2->first ) 
        {
          counter += (b_1->second != b_2->second );
          ++b_1;
          ++b_2;
        }
        else if( b_1->first < b_2->first )  ++counter, ++b_1;
        else if( b_1->first > b_2->first )  ++counter, ++b_2;
      }
      while( b_1 != e_1 )++counter, ++b_1;
      while( b_2 != e_2 )++counter, ++b_2;

      if( difference_threshold >  float(counter) / float(reference_sequence.size())  ){
        ids.insert( std::make_pair(index_1+1, index_2+1));
        ids.insert( std::make_pair(index_2+1, index_1+1));
      }
        
    }
  }

}

std::vector<std::string> const& profdist::AlignCode::get_sequence_names() const
{ return sequence_names; }

void profdist::AlignCode::read_sequences( profdist::alignment const& seq ) 
{
  using namespace std;
  typedef alignment::value_type sequence_t;
  typedef alignment::const_iterator const_it;
  typedef list<sequence_t::string_range>::const_iterator range_it;
  
  size_t num_seq = seq.size();  // first sequence is stored seperate

  // get length of sequences: - 
  size_t max_length = 0;
  for( const_it b = seq.begin(), e = seq.end(); b != e; ++b )
  {
    if( max_length && max_length != b->sequence_length )
      throw runtime_error("Alignment broken, sequences have different size");
    max_length = max( max_length, b->sequence_length  );
  }

  // clear and resize
  clear_resize( num_seq, max_length);



  size_t i = 0;
  for( range_it r_it = seq.begin()->sequence_data.begin(), r_e = seq.begin()->sequence_data.end(); r_it != r_e ; ++r_it )// for every sequence string subset,in the first string 
    for( sequence_t::iterator it = r_it->first; i != max_length && it != r_it->second; ++it,++i ) // convert and copy each site
      reference_sequence[i] = mapper.dna2code[*it];

  while( i != max_length )
    reference_sequence[i++] = mapper.dna2code['-'];


  // refill or throw on non complete first sequence !?

  // Getting sequence names, and creating differences to reference sequence
  {
    i = 0;
    // walk through every sequence
    for( const_it it = seq.begin(), e = seq.end(); it != e; ++it, ++i )
      sequence_names[i] = string(it->id.first, it->id.second ); // make a copy of the name 
    
    // skipping first sequence:
    i = 0;
    for( const_it it = ++(seq.begin()), e = seq.end(); it != e; ++it, ++i )
    {
      size_t position = 0; // variable to track current position
      for( range_it r_it = it->sequence_data.begin(), r_e = it->sequence_data.end(); r_it != r_e ; ++r_it ) // and every sequence_t string subset
      {
        for( sequence_t::iterator it = r_it->first; position != max_length && it != r_it->second; ++it,++position )  // check if sequences differ:
        {
          if( mapper.dna2code[*it] != reference_sequence[position] ) // then add an entry into alignment_code 
            alignment_codes[i].push_back(a_pair( position, mapper.dna2code[*it] ) );

          // update count matrices
          ++count_matrices[i](reference_sequence[position], mapper.dna2code[*it] );
        }

      }
      const char gap = mapper.dna2code['-'];
      // Add '-' to too short sequences or throw on error?
      while ( position != max_length )
      {
        if( gap != reference_sequence[position] ) // then add an entry into alignment_code 
          alignment_codes[i].push_back(a_pair( position, gap ) );
        // update count matrices
        ++count_matrices[i](reference_sequence[position], gap );
        ++position;
      }

    }
  }

}



void profdist::read_from_bootstrap( AlignCode & obj, alignment & seq, size_t num_sequences, size_t num_bootstrap, size_t alignment_size ) 
{
  alignment temp( seq.get_store() );
  if( num_bootstrap * alignment_size  == num_sequences ) 
  {
    for( size_t i = 0; i != num_sequences; ++i ) 
    {
      temp.push_back( seq.front() );
      seq.pop_front();
    }
  }
  else {
    temp.push_back( seq.front() );
    seq.pop_front();

    alignment::value_type::string_range name = temp.front().id;

    while( !seq.empty() && !compare_range( seq.front().id, name) )
    {
      temp.push_back( seq.front() );
      seq.pop_front();
    }
      
  }
  obj.read_sequences( temp );
}
#endif

