/***************************************************************************
 *   Copyright (C) 2005 by Andreas Pokorny                                 *
 *   andreas.pokorny@biozentrum.uni-wuerzburg.de                           *
 *                                                                         *
 *   This file is part of profdist and cbcanalyzer                         *
 *                                                                         *
 *   Both profdist and cbcanalyzer are free software; you can redistribute * 
 *   it and/or modify it under the terms of the GNU General Public License * 
 *   as published by the Free Software Foundation; either version 2 of the * 
 *   License, or (at your option) any later version.                       *
 *                                                                         *
 *   Profdist and cbcanalyzer are distributed in the hope that it will be  *
 *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty   *
 *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

#ifndef PROFDIST_ALIGNCODE_H_D
#define PROFDIST_ALIGNCODE_H_D

#include <vector>
#include <list>
#include <assert.h>
#include <string>
#include <cctype>
#include "parsed_sequence.h"
#include "parsed_alignment.hpp"
#include "dnamapper.h"
#include "types.h"
#include "traits.hpp"


namespace profdist {
/**
 * AlignCode creates a reference sequence out of all sequences of one alignment, to 
 * store only the differences of each sequence to the reference sequence. Currently the 
 * first sequence is used as reference sequence. This adds a worst case to the class, when 
 * for example the first sequence totally differs to all other sequences, while these might
 * have more similiarities, the AlignCode class needs more memory than a normal plain storage.
 * There is a solution, store another virtual sequence (because the first sequence
 * is still needed for the count matrices)
 *
 * In that case update the iterators! - Better: redesign the iterators to share the same implemenation
 * by using the curiously recuring template idiom
 *
 * AlignCode stores the acids not in there ansi-character representation, but in a code of 16 numbers
 * starting at zero. For readable i/o use dnamapper.
 */
template<typename Traits  = rna_traits>
class AlignCode
{
  private:
  public:
	/**
	 * The basic element type.
	 */
    typedef typename Traits::encoding_type element_type;
	
	/**
	 * Represents a count matrix belonging to a specific sequence.
	 * This count matrix 
	 */
    typedef fixed_matrix<size_t,Traits::num_relevant_elements,Traits::num_relevant_elements>  count_matrix;
	
	/**
	 * a_pair is used in the sequence representations to indicate
	 * at which position the sequence is differing from the
	 * reference sequence by which character.
	 */
    typedef std::pair<size_t,element_type> a_pair;
	
	/**
	 * d_list represents a sequence with its difference entries
	 * to the reference sequence. This differneces are objects
	 * of type a_pair, which are storing the position and the
	 * character at this position.
	 */
    typedef std::list<a_pair> d_list;

    /**
     * Constructs an empty AlignCode Object.
     */
    AlignCode();

    /**
     * Constructs an AlignCode Object with num_sequences with each num_sites gaps as 
     * entries
     */
    AlignCode( size_t num_sequences, size_t num_sites );

    /**
     * Returns the number of sequences stored in that object.
     */
    size_t get_num_sequences() const;

    /**
     * Returns the number of elements in a sequence stored in that object.
     */
    size_t get_num_sites() const;

    /**
     * Returns the element of the reference sequence, at the supplied position.
     * @param position position of the sequence element
     * @returns sequence element of reference sequence
     */
    element_type get_reference_element( size_t position ) const;

    /**
     * Clears all datastructures of this object
     */
    void clear();

    /**
     * Creates empty structures for num_sequences new sequences 
     * and sets the reference sequence to a size of num_sites elements
     */
    void resize( size_t num_sequences, size_t num_sites );

    /**
     * Intializes Alignment object from list of sequences. 
     */
    void read_sequences( alignment const& seq );

    /**
     * Wraps the heterogenous datastructure of Aligncode into a 
     * stl-compatible random access iterator.
     */
    struct const_sequence_iterator 
      : boost::iterator_facade<const_sequence_iterator,element_type const, std::random_access_iterator_tag, element_type const> 
    {
      AlignCode const* ref;
      size_t seq_index;
      size_t position;
      typename d_list::const_iterator it; ///< Stores an iterator pointing on alignment_code.

      const_sequence_iterator();

      /**
       * Constructs an iterator pointing on the first element of the sequence
       * only used by the constant begin( size_t )
       */
      const_sequence_iterator( AlignCode const& r, size_t seq );

      /**
       * Constructs an iterator pointing on the last element of the sequence
       * only used by the constant end( size_t )
       */
      const_sequence_iterator( AlignCode const& r, size_t seq, size_t pos, typename d_list::const_iterator const& it );

      element_type const dereference()	const;
      bool equal( const_sequence_iterator const& other )const;
      void increment();
      void decrement();
      void advance( long );
      void distance_to( const_sequence_iterator const& other ) const;
    };


#if 0
    Unused code .. 
    /**
     * Wraps the heterogenous datastructure of Aligncode into a 
     * stl-compatible random access iterator.
     * This is the non-const version of const_sequence_iterator. 
     * So you can change elements of the alignment here.
     * TODO: Not complete yet, that iterator needs a proxy class
     * that tracks changes to the sequence element changed, 
     */
    struct sequence_iterator : 
      public std::iterator<std::random_access_iterator_tag, char> 
    {
     
      AlignCode * ref;
      size_t seq_index;
      size_t position;
      mutable d_list::iterator it;
      /**
       * CharProxy detects changes to the char returned by the non-const
       * iterator. This class needs a lot of testing, issues could arouse
       * where removing the operator bool is the only solution for. Which 
       * again will cause problems. E.g. It is impossible to get a char
       * reference and forward that one to a different function, that could 
       * change it.
       * 
       * Test if further operators are needed!
       *
       * Watch out! This is still a work-in-progress.
       */
      struct CharProxy {
        sequence_iterator const& it;
        explicit CharProxy( sequence_iterator const& it );

        /**
         * this op= handles updates to the internal datastructures of AlignCode,
         * thus the sequences wont get invalidated here. The parameter 
         * will be converted into the code used within storage, if it is a readable
         * character otherwise it will be interpreted as an already converted character.
         */
        CharProxy const& operator=( char const& rhs ) const;
 
        /**
         * test when this could break with expected behaviour!
         */
       // operator char() const;
      };

      CharProxy local_proxy;
      
      sequence_iterator(  );
      /**
       * Constructs an iterator pointing on the first element of the sequence
       * only used by the non-constant begin( size_t )
       */
      sequence_iterator( AlignCode & r, size_t seq );
      /**
       * Constructs an iterator pointing on the last element of the sequence
       * only used by the non-constant end( size_t )
       */
      sequence_iterator( AlignCode & r, size_t seq, size_t pos, d_list::iterator const& it );

      sequence_iterator & operator++();
      sequence_iterator operator++( int );
      sequence_iterator & operator--();
      sequence_iterator operator--( int );
      sequence_iterator operator+( long diff ) const;
      sequence_iterator operator-( long diff ) const;
      sequence_iterator & operator+=( long diff );
      sequence_iterator & operator-=( long diff );

      CharProxy const& operator*() const;

    };

    /**
     * Iterator tO walk on all sequences
     * TODO: Rethink..
     */
    struct AlignCodeIterator{
      
    };

    friend class sequence_iterator;
    friend class CharProxy;
#endif

    friend class const_sequence_iterator;

    /**
     * @brief returns the name of a sequence
     * @param sequence_index the index of the sequence
     * Index 0 points on the first and reference sequence,
     */
    std::string const& get_sequence_name( size_t sequence_index ) const;

    /**
     * @brief returns all names in a vector
     * Index 0 points on the first and reference sequence, many of profdist
     * algs need a vector of strings as additional parameter. 
     */
    std::vector<std::string> const& get_sequence_names() const;
    
    /**
     * @brief sets a sequence name 
     * @param sequence_index the index of the sequence
     * @param s the new name of that sequence
     * Index 0 points on the first and reference sequence,
     */
    void set_sequence_name( std::string const& s, size_t sequence_index );

    /**
     * @brief returns a const iterator pointing on the beginning of a certain sequence,
     * @param sequence_index the index of the sequence to iterate on
     * Index 0 points on the first/reference index, 
     */
    const_sequence_iterator begin( size_t sequence_index ) const;
    /**
     * @brief returns a const iterator pointing behind the end of a certain sequence,
     * @param sequence_index the index of the sequence to iterate on
     * Index 0 points on the first/reference index. The position of the iterator
     * is one step behind the last element of that sequence.
     */
    const_sequence_iterator end( size_t sequence_index ) const;

    /**
     * @brief returns an iterator pointing on the beginning of a certain sequence,
     * @param sequence_index the index of the sequence to iterate on
     * Index 0 points on the first/reference index. Note: This iterator does 
     * not behave like a normal iterator, changes are done through a proxy object
     * do not try to cast the dereferenced object to char!
     */
    //sequence_iterator begin( size_t sequence_index );
    /**
     * @brief returns an iterator pointing on the beginning of a certain sequence,
     * @param sequence_index the index of the sequence to iterate on
     * Index 0 points on the first/reference index. Note: This iterator does 
     * not behave like a normal iterator, changes are done through a proxy object
     * do not try to cast the dereferenced object to char!
     *
     * The position of the iterator is one step behind the last element of that sequence.
     */
    //sequence_iterator end( size_t sequence_index );

    /**
     * @brief Returns the count matrix of the first and ith sequence.
     * @param index of the other sequence, compared to the first
     * Index 0 points onto the second sequence 
     */
    count_matrix const& get_matrix( size_t i ) const;

#if 0 
    /**
     * @brief Adds a sequence element to the end of each sequence.
     * @param item the new sequence item,
     * The new sequence item may be converted into the reduced code
     * or may be in the ansi-representation of dna.
     */
    void push_back( char item );
#endif

    /**
     * @brief Adds a sequence elements to the end of each sequence.
     * @param items the new sequence items,
     * @throws logic_error if items is too small, or too big
     * The new sequence items must be converted into the reduced code
     *
     * items.size() must match the number of sequences.
     */
    void push_back( std::vector<element_type> const& items );

    // typedef d_list::iterator diff_iterator; // might be too dangerous
    typedef typename d_list::const_iterator const_diff_iterator;

    /**
     * @brief returns a const iterator on the first difference, of the i+1th sequence to the first sequence.
     */
    const_diff_iterator begin_difference( std::size_t i ) const;

    /**
     * @brief returns a const iterator behind the last difference, of the i+1th sequence to the first sequence.
     */
    const_diff_iterator end_difference( std::size_t i ) const;


    /**
     * Prints the reference sequence and the difference vectors into the stream. 
     */
    //void debug( std::ostream & out ) const;

    void get_identical_sequences( profdist::identical_seq_set & ids, float percentual_identity  ) const;

  private:
    
    std::vector<d_list> alignment_codes; ///< vector with alignment codes
    std::vector<std::string > sequence_names; ///< vector with sequence names
    std::vector<count_matrix> count_matrices; ///< vector with count matrices N
    std::vector<char > reference_sequence; ///< vector with first sequence

    /**
     * This method does not act like resize, since it does not ensure consistent count
     * matrices, or aligncodes. For internal use only, when datastructures get reset
     * by read_sequences. Aligncodes and count_matrices get cleared, and later resized.
     */
    void clear_resize( size_t num_sequences, size_t num_sites );

};


/**
 * Prints the contents of the AlignCode object in a fixed well-formed format. 
 * The Output will look like that:<br/>
 * ALIGNCODE: &lt;NUM_SEQ&gt; SEQ &lt;NUM_SITES&gt; SITES\n<br/>
 * 0:&ltSEQUENCE&gt;\n<br/>
 * 1:&ltSEQUENCE&gt;\n<br/>
 * ...
 * N-1:&ltSEQUENCE&gt;\n<br/>
 */
template<typename Traits>
std::ostream & operator<<( std::ostream & out, AlignCode<Traits> const& obj );

/**
 * @brief parses aligncode from a file
 * @param obj 
 * @param filename
 * @param t type of the file, currently just Fasta and Embl, see doc of FileType
 * @throws runtime_error when parsing fails 
 */
template<typename Traits>
void parse_file( AlignCode<Traits> & obj, std::string const& filename, profdist::FileType t );

/**
 * @brief write aligncode into a filestream 
 * @param obj 
 * @param filename
 * @param t type of the file, currently just Fasta and Embl, see doc of FileType
 */
template<typename Traits>
std::ostream& write_file( AlignCode<Traits> & obj, std::ostream& file, profdist::FileType t );

std::ostream& write_file(AlignCode<profdist::rna_structure_traits>& obj, std::ostream& file, profdist::FileType t);

//std::ostream& write_file( AlignCode<rna_traits> & obj, std::ostream& file, profdist::FileType t );

/**
 * @brief write aligncode into a filestream 
 * @param obj 
 * @param filename
 * @param t type of the file, currently just Fasta and Embl, see doc of FileType
 */
//std::ostream& write_file( AlignCode<protein_traits> & obj, std::ostream& file, profdist::FileType t );

/**
 * @brief reads an alignment from a parsed bootstrap
 * @param obj the aligncode object that receves the bootstrap block
 * @param seq the parsed sequences 
 * @param num_sequences number of sequences parsed
 * @param num_bootstrap number of bootstraps
 * @param alignment number of seuquences per alignment
 */
template<typename Traits>
void read_from_bootstrap( AlignCode<Traits> & obj, alignment & seq, size_t num_sequences, size_t num_bootstrap, size_t alignment_size );

}

#include "aligncode.inl"

#endif

