/***************************************************************************
 *   Copyright (C) 2005 by Andreas Pokorny                                 *
 *   andreas.pokorny@biozentrum.uni-wuerzburg.de                           *
 *                                                                         *
 *   This file is part of profdist and cbcanalyzer                         *
 *                                                                         *
 *   Both profdist and cbcanalyzer are free software; you can redistribute *
 *   it and/or modify it under the terms of the GNU General Public License *
 *   as published by the Free Software Foundation; either version 2 of the *
 *   License, or (at your option) any later version.                       *
 *                                                                         *
 *   Profdist and cbcanalyzer are distributed in the hope that it will be  *
 *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty   *
 *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/


/**
 * @file parsed_sequence.h defines a few raw structures which are 
 * used when parsing FASTA, EMBL, or other formats. These structures
 * have to be lightweight, and are supposed to be used for simple 
 * transportation from parser to user. 
 * @author Andreas Pokorny
 */

#ifndef PROFDIST_LIB_PARSED_SEQUENCE_H_INCLUDED
#define PROFDIST_LIB_PARSED_SEQUENCE_H_INCLUDED

#include <string>
#include <iostream>
#include <list>
#include <set>

namespace profdist {

/**
 * Sequence is structure containing the position of a sequence identifier, 
 * its classification ( a list of positions ), and the position of the sequence 
 * itself. It is the greatest common denominator (+ the classification of the sequence),
 * found in the fasta, embl and the different ct formats. 
 */
template<typename Iterator>
struct sequence
{
  typedef Iterator iterator;
  typedef std::pair<iterator,iterator> string_range;
  string_range id;
  std::list<string_range> classification, sequence_data, char_fold_data;
  std::list<std::size_t> fold_data; ///< Fold information, vector?
  size_t sequence_length;

  size_t get_sequence_length() const;
  void clear();
  sequence();
};


#if 0
template<typename Iterator> 
void get_identical_sequences( std::list<sequence<Iterator> > const& sequences
    , std::set<std::pair<std::size_t,std::size_t> > & ids 
    , float percentual_identity) {
 DNAMapper & mapper  = get_mapper_instance();
 for(std::size_t i = 0, e = sequences.size(); i != e - 1; ++ i ) {
   for(std::size_t j = i + 1; j != e; ++ j ) {
   }
 }
}
#endif

/*template< typename IteratorT >
std::ostream& operator<<( std::ostream & out, std::pair<IteratorT,IteratorT> const& r )
{ out.write( &*(r.first), &*(r.second) -&*(r.first) ); return out; }*/

template<typename Iterator>
size_t sequence<Iterator>::get_sequence_length() const
{
  return sequence_length;
}

template<typename Iterator>
void sequence<Iterator>::clear()
{
  sequence_length = 0;
  sequence_data.clear();
  classification.clear();
  char_fold_data.clear();
  fold_data.clear();
}

template<typename Iterator>
sequence<Iterator>::sequence()
  : sequence_length(0)
{
}

template<typename IteratorT>
bool compare_range( std::pair<IteratorT,IteratorT> const& a, std::pair<IteratorT,IteratorT> const& b )
{
  if( std::distance( a.first, a.second ) != std::distance(b.first, b.second ) )
    return false;

  IteratorT cp_1 = a.first, cp_2 = b.first; 
  while( cp_1 != a.second )
    if( *cp_1++ != *cp_2++ )
      return false;
  return (*cp_1 == *cp_2);
}

template <typename IteratorT>
bool by_name_sorter( sequence<IteratorT> const& left, sequence<IteratorT> const& right ) 
{
  IteratorT cp_1 = left.id.first, cp_2 = right.id.first; 
  while( left.id.second != cp_1 && cp_2 != right.id.second ) 
    if( *cp_1 < *cp_2 )
      return true;
    else if( *cp_2++ < *cp_1++ )
      return false;
  return ( left.id.second == cp_1 );
}
}

#endif

