Cassiopee  1.0
Suffix indexer and search tool
Cassiopee.h
1 #include <iostream>
2 #include <fstream>
3 #include <list>
4 #include <map>
5 
6 #include "tree/tree.hh"
7 
8 #include "CassiopeeConfig.h"
9 
10 #include <boost/archive/text_oarchive.hpp>
11 #include <boost/archive/text_iarchive.hpp>
12 #include <boost/archive/binary_oarchive.hpp>
13 #include <boost/archive/binary_iarchive.hpp>
14 // Provide an implementation of serialize for std::list
15 #include <boost/serialization/list.hpp>
16 
17 using namespace std;
18 
19 
20 
21 
25 class Match {
26 public:
27 
31  bool operator==(const Match& p) const {
32  return pos == p.pos && in+del == p.in + p.del;
33  }
34 
38  int in;
42  int del;
46  int subst;
47 
48 
49  long pos;
50 
51  Match();
52 
53 
54 };
55 
65 class TreeNode {
66 public:
70  char c;
71 
72 
76  list<long> positions;
77 
83  long next_pos;
84 
89 
90 
96  TreeNode(char nc);
97 
104  TreeNode(char nc, long pos);
105  TreeNode();
106 
107 
108 private:
109  friend class boost::serialization::access;
110  template<class Archive>
111  void serialize(Archive & ar, const unsigned int /*version*/)
112  {
113  ar & c;
114  ar & next_pos;
115  ar & next_length;
116  ar & positions;
117  }
118 
119 };
120 
121 
122 
123 #ifndef __CASSIOPEE_H_
124 #define __CASSIOPEE_H_
125 //inline std::ostream& operator<<(std::ostream &strm, const TreeNode &a) {
126 // return strm << "TreeNode(" << a.c << ")";
127 //}
128 #endif
129 
130 
137 public:
138 
139 
145  CassieIndexer(const char* path);
146 
147  ~CassieIndexer();
148 
152  void save();
153 
157  void load();
158 
159 
163  void graph();
164 
170  void graph(int depth);
171 
178  string getSuffix(long pos);
179 
183  void index();
184 
188  tree<TreeNode>* getTree();
189 
193  list<Match> matches;
194 
199 
203  char getCharAtSuffix(long pos);
204 
210  void filltree(long pos);
211 
215  long max_depth;
216 
222 
223  long seq_length;
224 
228  bool index_loaded_from_file();
229 
230 private:
231  list<TreeNode> serialized_nodes;
232 
233  bool loaded_from_file;
234 
235  const char* filename;
236  ifstream seqstream;
237  tree<TreeNode> tr;
238 
239  const long MAX_SUFFIX;
240  long suffix_position;
241  char* suffix;
242 
246  long graphNode(tree<TreeNode>::iterator node, long counter, ofstream& myfile, int maxdepth);
247 
251  char* loadSuffix(long pos);
252 
253 
257  void reset_suffix();
258 
259 
260 
268  void fillTreeWithSuffix(tree<TreeNode>::iterator sib, long suffix_pos, long pos);
269  void fillTreeWithSuffix(long suffix_pos, long pos);
270 
271 
272 
273 };
274 
275 
280 public:
284  static void transform_fasta(const string in, const string out);
285 };
286 
290 class Ambiguous {
291 public:
299  static bool isequal(char a, char b);
300 private:
309  static bool ismatchequal(char a, const char b[], int len);
310  static const char K_MATCH[];
311  static const char M_MATCH[];
312  static const char R_MATCH[];
313  static const char Y_MATCH[];
314  static const char S_MATCH[];
315  static const char W_MATCH[];
316  static const char B_MATCH[];
317  static const char V_MATCH[];
318  static const char H_MATCH[];
319  static const char D_MATCH[];
320  static const char N_MATCH[];
321 };
322 
323 
325 {
326  inline bool operator() (const Match* struct1, const Match* struct2)
327  {
328  return (struct1->pos < struct2->pos);
329  }
330 };
331 
337 
338 public:
344  CassieSearch(CassieIndexer* index_ref);
345 
346  ~CassieSearch();
347 
351  void removeDuplicates();
352 
353  map<std::string, string> morphisms;
354 
359 
363  list<Match*> matches;
364 
369  bool ambiguity;
370 
374  int nmax;
375 
383  int mode;
384 
391  void search(string suffix, bool clear);
392 
393 
400  void search(string suffix);
401 
407  void search(string suffixes[]);
408 
412  bool isequal(char a,char b);
413 
414 
423 
424  long pattern_length;
425 
429  void sort();
430 
431 private:
432 
436  static bool same_match (Match* first, Match* second)
437  { return ( *first == *second ); }
438 
439  CassieIndexer* indexer;
440 
449  void getMatchesFromNode(tree<TreeNode>::iterator sib, const int nbSubst, const int nbIn, const int nbDel);
450 
451 
452 
464  void searchAtNode(string suffix, const long suffix_pos, const tree<TreeNode>::iterator root, int nbSubst, int nbIn, int nbDel, int nbN);
465 
477  void searchAtNode(string suffix, const long suffix_pos, const tree<TreeNode>::iterator root, const tree<TreeNode>::iterator start_node, int nbSubst, int nbIn, int nbDel, int nbN);
478 
483  bool searchAtreduction(const string suffix, const tree<TreeNode>::iterator sib, long counter, long tree_reducted_pos, int nbSubst, int nbIn, int nbDel, int nbN);
484 
485 
486 };
TreeNode::positions
list< long > positions
Definition: Cassiopee.h:76
per_position
Definition: Cassiopee.h:324
CassieIndexer
Definition: Cassiopee.h:136
CassieSearch::max_subst
int max_subst
Definition: Cassiopee.h:422
Match::operator==
bool operator==(const Match &p) const
Definition: Cassiopee.h:31
CassieIndexer::matches
list< Match > matches
Definition: Cassiopee.h:193
CassieSearch::ambiguity
bool ambiguity
Definition: Cassiopee.h:369
CassieSearch::nmax
int nmax
Definition: Cassiopee.h:374
TreeNode::next_length
long next_length
Definition: Cassiopee.h:88
TreeNode::next_pos
long next_pos
Definition: Cassiopee.h:83
CassieSearch::match_limits
Match * match_limits
Definition: Cassiopee.h:358
Match
Definition: Cassiopee.h:25
TreeNode
Definition: Cassiopee.h:65
CassiopeeUtils
Definition: Cassiopee.h:279
CassieIndexer::max_depth
long max_depth
Definition: Cassiopee.h:215
Match::subst
int subst
Definition: Cassiopee.h:46
CassieIndexer::max_index_depth
long max_index_depth
Definition: Cassiopee.h:221
CassieSearch::mode
int mode
Definition: Cassiopee.h:383
Match::in
int in
Definition: Cassiopee.h:38
CassieSearch::matches
list< Match * > matches
Definition: Cassiopee.h:363
Ambiguous
Definition: Cassiopee.h:290
CassieSearch::max_indel
int max_indel
Definition: Cassiopee.h:418
Match::del
int del
Definition: Cassiopee.h:42
CassieIndexer::do_reduction
bool do_reduction
Definition: Cassiopee.h:198
CassieSearch
Definition: Cassiopee.h:336
TreeNode::c
char c
Definition: Cassiopee.h:70