Hitmap 1.3
 All Data Structures Namespaces Files Functions Variables Typedefs Friends Macros Groups Pages
SWseq.c
Go to the documentation of this file.
1 
28 /*
29  * <license>
30  *
31  * Hitmap v1.2
32  *
33  * This software is provided to enhance knowledge and encourage progress in the scientific
34  * community. It should be used only for research and educational purposes. Any reproduction
35  * or use for commercial purpose, public redistribution, in source or binary forms, with or
36  * without modifications, is NOT ALLOWED without the previous authorization of the copyright
37  * holder. The origin of this software must not be misrepresented; you must not claim that you
38  * wrote the original software. If you use this software for any purpose (e.g. publication),
39  * a reference to the software package and the authors must be included.
40  *
41  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND ANY
42  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
43  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
44  * THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
45  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
46  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
48  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
49  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50  *
51  * Copyright (c) 2007-2015, Trasgo Group, Universidad de Valladolid.
52  * All rights reserved.
53  *
54  * More information on http://trasgo.infor.uva.es/
55  *
56  * </license>
57 */
58 
59 // Hitmap Include
60 #include <hitmap.h>
61 
62 // Include the common variables and utilities for all the versions
63 #include "SWcommon.h"
64 
65 // Include the common variables and utilities for the Hitmap versions
66 #include "SWcommon_hit.h"
67 
68 
69 // **********************************************************************
70 // Declarations of the different phases
71 // **********************************************************************
72 
77 
81 void phase_comp_hmatrix();
82 
86 void phase_comp_back();
87 
88 
89 // **********************************************************************
90 // Global variables
91 // **********************************************************************
92 
93 // Protein sequences
94 HitTile_aa_t p1;
95 HitTile_aa_t p2;
96 
97 // Similarity matrix
98 HitTile_h_t H;
99 #define h(i,j) (hit_tileElemAtNoStride(H,2,i,j))
100 
101 // Traceback matrices
102 HitTile_trace_t xTraceback;
103 HitTile_trace_t yTraceback;
104 #define xback(i,j) (hit_tileElemAtNoStride(xTraceback,2,i,j))
105 #define yback(i,j) (hit_tileElemAtNoStride(yTraceback,2,i,j))
106 
107 // Output proteins with the alignment
108 HitTile_aa_t out1;
109 HitTile_aa_t out2;
110 
111 // X and Y coordinates of the maximum value (End coordinates of the match)
112 int xMax;
113 int yMax;
114 
115 // Begin coordinates of the match
116 int xBegin;
117 int yBegin;
118 
119 // Match lengths (total, sequence 1, sequence 2)
123 
124 // Output match begin
126 
127 
128 
129 
133 int main(int argc, char *argv[]) {
134 
135  // **********************************************************************
136  // Hitmap init
137  // **********************************************************************
138  // We need this just for the clocks.
139  hit_comInit( &argc, &argv );
140 
141  // **********************************************************************
142  // Check input parameters
143  // **********************************************************************
144  input_parameters(argc, argv);
145 
146 
147  // **********************************************************************
148  // Timers
149  // **********************************************************************
150  HitClock time_init;
151  HitClock time_comp;
153  HitClock time_comp_read;
154  HitClock time_comp_hmatrix;
155  HitClock time_comp_back;
157  hit_clockReset(time_comp_read);
158  hit_clockReset(time_comp_hmatrix);
159  hit_clockReset(time_comp_back);
160 
161 
162  // **********************************************************************
163  // INIT PHASE: Structure allocation and PAM matrix load
164  // **********************************************************************
165 
166  // Init time
167  hit_clockStart(time_init);
168 
169  // read the PAM matrix
170  initPAM(pam_name);
171 
172  // Open the proteins files
175 
176  // Allocate memory for the proteins
177  hit_tileDomainAlloc(&p1, aa_t, 1, size[0]+1);
178  hit_tileDomainAlloc(&p2, aa_t, 1, size[1]+1);
179 
180  // Similarity matrix
181  hit_tileDomainAlloc(&H, h_t, 2, size[0]+1, size[1]+1);
182 
183  // Traceback matrices
184  hit_tileDomainAlloc(&xTraceback, trace_t, 2 ,size[0]+1, size[1]+1);
185  hit_tileDomainAlloc(&yTraceback, trace_t, 2 ,size[0]+1, size[1]+1);
186 
187  // Allocate size of the output
188  hit_tileDomainAlloc(&out1, aa_t, 1 ,size[0]+size[1]+1);
189  hit_tileDomainAlloc(&out2, aa_t, 1 ,size[0]+size[1]+1);
190 
191  // Init time
192  hit_clockStop(time_init);
193 
194 
195  // **********************************************************************
196  // WHOLE COMPUTATION PHASE: iteration of: Read, H matrix, Backtracking
197  // **********************************************************************
198 
199  // Computation time
200  hit_clockStart(time_comp);
201 
202  int iter;
203  for(iter=0;iter<iterations;iter++){
204 
205 
206 #ifdef DEBUG
207  printf("=== Iteration: %d/%d ===\n\n",iter,iterations);
208 #endif
209 
210  // **********************************************************************
211  // READ PHASE
212  // **********************************************************************
213  hit_clockContinue(time_comp_read);
215  hit_clockStop(time_comp_read);
216 
217 
218  // **********************************************************************
219  // H MATRIX PHASE
220  // **********************************************************************
221  hit_clockContinue(time_comp_hmatrix);
223  hit_clockStop(time_comp_hmatrix);
224 
225 
226  // **********************************************************************
227  // BACKTRACKING PHASE
228  // **********************************************************************
229  hit_clockContinue(time_comp_back);
230  phase_comp_back();
231  hit_clockStop(time_comp_back);
232 
233  } // Iterations Loop
234 
235  // Computation time
236  hit_clockStop(time_comp);
237 
238 
239 
240  // **********************************************************************
241  // Show results
242  // **********************************************************************
243 
244  printf("=== Result ===\n");
245  printf("Init time: %f\n",hit_clockGetSeconds(time_init));
246  printf("Comp time: %f\n",hit_clockGetSeconds(time_comp));
247  printf(" Read: %f\n",hit_clockGetSeconds(time_comp_read));
248  printf(" H matrix: %f\n",hit_clockGetSeconds(time_comp_hmatrix));
249  printf(" Backtracking: %f\n\n",hit_clockGetSeconds(time_comp_back));
250 
251  printf("Last Match length of A: %d\n", match_length1);
252  printf("Last Match length of B: %d\n", match_length2);
253  printf("Last Alignment length: %d\n", match_length);
254 
255  // Print the result for small inputsets
256  if(match_length < 50){
257  printf("\n");
258  printf("Last MCH1: "); printProteinMatch(p1,xBegin,xMax);
259  printf("Last MCH2: "); printProteinMatch(p2,yBegin,yMax);
260  printf("\n");
261 
262  printf("Last OUT1: "); printProtein(out1,match_pos,match_length);
263  printf("Last OUT2: "); printProtein(out2,match_pos,match_length);
264  } else {
265  printf("\nInputset too big to print to stdout\n");
266  }
267 
268  if( pfile1.rewind || pfile2.rewind ){
269  printf("\nSmall inputset, it has been rewinded:\n"
270  " %s: %8d times\n"
271  " %s: %8d times\n", prot_name1,pfile1.rewind, prot_name2, pfile2.rewind);
272  }
273 
274 
275 
276  // **********************************************************************
277  // Free all the resources
278  // **********************************************************************
279 
280  // Free all the resources
281  hit_tileFree(p1);
282  hit_tileFree(p2);
285  hit_tileFree(H);
288 
289 
290  // We need this just for the clocks.
291  hit_comFinalize();
292 
293  return EXIT_SUCCESS;
294 
295 }
296 
297 
298 
299 
300 void phase_read_sequences(){
301 
302  // Read the proteins
303  readProtein(&pfile1, &p1, size[0]);
304  readProtein(&pfile2, &p2, size[1]);
305 
306 
307 #ifdef DEBUG
308  printf("PRT1: "); printProtein(p1,0,size[0]);
309  printf("PRT2: "); printProtein(p2,0,size[1]);
310  printf("\n");
311 #endif
312 
313 
314 }
315 
316 
317 
318 void phase_comp_hmatrix(){
319 
320 
321  // Compute the Similarity Matrix (H)
322 
323  // Initialize values
324  int i,j;
325  for (i=0;i<=size[0];i++){
326  xback(i,0) = -1;
327  yback(i,0) = -1;
328  h(i,0)=0;
329  }
330 
331  for (j=0;j<=size[1];j++){
332  xback(0,j) = -1;
333  yback(0,j) = -1;
334  h(0,j)=0;
335  }
336 
337 
338  // Maximum values to retrieve the alignment
339  h_t Max = -1;
340  xMax = -1;
341  yMax = -1;
342 
343  // Perform wavefront algorithm
344  for(i=1; i<size[0]+1; i++){
345  for(j=1; j<size[1]+1; j++){
346 
347  // Get the scores
348  h_t diag = h(i-1,j-1) + pam(hit_tileElemAt(p1,1,i),hit_tileElemAt(p2,1,j));
349  h_t down = h(i-1,j ) + gapPenalty;
350  h_t right = h(i ,j-1) + gapPenalty;
351 
352  // Calculate the maximum
353  int idx;
354  h_t max = MAX4(diag,down,right,0,&idx);
355  h(i,j) = max;
356 
357  // Set the back trace variables.
358  if ( idx == 0 ){ // max == diag
359  xback(i,j) = (trace_t) (i-1);
360  yback(i,j) = (trace_t) (j-1);
361  } else if ( idx == 1 ) { // max == down
362  xback(i,j) = (trace_t) (i-1);
363  yback(i,j) = (trace_t) j;
364  } else if ( idx == 2 ) { // max == right
365  xback(i,j) = (trace_t) i;
366  yback(i,j) = (trace_t) (j-1);
367  } else { // max == 0
368  xback(i,j) = -1;
369  yback(i,j) = -1;
370  }
371 
372  // Maximum
373  if(max >= Max){
374  xMax = i;
375  yMax = j;
376  Max = max;
377  }
378 
379  } // End loop j;
380  } // End loop i;
381 
382 
383 #ifdef DEBUG
384  // Print
385  printf(" ");
386  for (j=0;j<=size[1];j++){
387  if(j==0)
388  printf(" -");
389  else
390  printf("%6c",AA2char(hit_tileElemAt(p2,1,j)));
391  }
392  printf("\n");
393  for (i=0;i<=size[0];i++){
394 
395  if(i==0)
396  printf("- |");
397  else
398  printf("%c |",AA2char(hit_tileElemAt(p1,1,i)));
399 
400  for (j=0;j<=size[1];j++){
401  printf("%6.1f",(double)h(i,j));
402  }
403  printf("\n");
404  }
405  printf("\n");
406 #endif
407 
408 
409 #ifdef DEBUG
410  // Print
411  printf(" ");
412  for (j=0;j<=size[1];j++){
413  if(j==0)
414  printf(" -");
415  else
416  printf("%8c",AA2char(hit_tileElemAt(p2,1,j)));
417  }
418  printf("\n");
419  for (i=0;i<=size[0];i++){
420 
421  if(i==0)
422  printf("- |");
423  else
424  printf("%c |",AA2char(hit_tileElemAt(p1,1,i)));
425 
426  for (j=0;j<=size[1];j++){
427  printf("(%2d,%2d) ",xback(i,j),yback(i,j));
428  }
429  printf("\n");
430  }
431  printf("\n");
432 #endif
433 
434 
435 
436 }
437 
438 
439 
440 
441 void phase_comp_back(){
442 
443 
444  // reset to max point to do alignment
445  // We start at the maximum point
446  int i=xMax;
447  int j=yMax;
448 
449  // Position of the output proteins.
450  int pos=size[0]+size[1]+1;
451  int outsize=0;
452 
453  while (i>0 && j>0 && h(i,j) > 0){
454 
455  int movex = (i>xback(i,j));
456  int movey = (j>yback(i,j));
457 
458  pos--;
459  outsize++;
460 
461  if (movex && movey) {
462  hit_tileElemAt(out1,1,pos)=hit_tileElemAt(p1,1,i);
463  hit_tileElemAt(out2,1,pos)=hit_tileElemAt(p2,1,j);
464  } else if (movey) {
465  hit_tileElemAt(out1,1,pos)=GAP_AA;
466  hit_tileElemAt(out2,1,pos)=hit_tileElemAt(p2,1,j);
467  } else if (movex) {
468  hit_tileElemAt(out1,1,pos)=hit_tileElemAt(p1,1,i);
469  hit_tileElemAt(out2,1,pos)=GAP_AA;
470  }
471 
472  // Move to the next point.
473  int tempi=i;
474  int tempj=j;
475  i=xback(tempi,tempj);
476  j=yback(tempi,tempj);
477  }
478 
479  pos--;
480 
481  match_length = outsize;
482  match_length1 = xMax - i;
483  match_length2 = yMax - j;
484 
485  xBegin = i;
486  yBegin = j;
487 
488  match_pos = pos;
489 
490 #ifdef DEBUG
491 
492  printf("Match length of A: %d\n", match_length1);
493  printf("Match length of B: %d\n", match_length2);
494  printf("Alignment length: %d\n", match_length);
495  printf("\n");
496 
497  // Print the result for small inputsets
498  if(match_length < 50){
499  printf("\n");
500  printf("MCH1: "); printProteinMatch(p1,xBegin,xMax);
501  printf("MCH2: "); printProteinMatch(p2,yBegin,yMax);
502  printf("\n");
503 
504  printf("OUT1: "); printProtein(out1,match_pos,outsize);
505  printf("OUT2: "); printProtein(out2,match_pos,outsize);
506  printf("\n");
507  }
508 
509 #endif
510 
511 }
512 
513 
514 
HitTile_h_t H
Definition: SWpar.c:243
#define xback(i, j)
Definition: SWseq.c:104
HitTile_aa_t p1
Definition: SWpar.c:206
double h_t
Definition: SWcommon.h:61
int match_pos
Definition: SWpar.c:227
int iter
Definition: mmult_bit.c:68
#define hit_tileElemAt(var, ndims,...)
Definition: hit_tile.h:519
void printProteinMatch(HitTile_aa_t p, int begin, int end)
Definition: SWcommon_hit.c:90
HitTile_aa_t out1
Definition: SWpar.c:210
void readProtein(ProteinFile *fprotein, HitTile_aa_t *protein, int psize)
Definition: SWcommon_hit.c:48
int rewind
Definition: SWcommon.h:205
#define pam(a, b)
Definition: SWcommon.h:181
void phase_comp_back()
Definition: SWpar.c:955
void openProtein(ProteinFile *fprotein, char *filename)
Definition: SWcommon.c:241
#define hit_clockContinue(c)
Definition: hit_utils.h:121
char * prot_name1
Definition: SWcommon.c:49
int yMax
Definition: SWpar.c:215
#define hit_tileDomainAlloc(newVarP, baseType, numDims,...)
Definition: hit_tile.h:336
double gapPenalty
Definition: SWcommon.c:54
void input_parameters(int argc, char *argv[])
Definition: SWcommon.c:75
ProteinFile pfile1
Definition: SWcommon.c:63
HitTile_trace_t yTraceback
Definition: SWpar.c:248
#define h(i, j)
Definition: SWseq.c:99
#define hit_clockReset(c)
Definition: hit_utils.h:78
void initPAM(char *filename)
Definition: SWcommon.c:128
int size[2]
Definition: SWcommon.c:57
int match_length
Definition: SWpar.c:222
void printProtein(HitTile_aa_t p, int offset, int psize)
Definition: SWcommon_hit.c:78
int match_length2
Definition: SWpar.c:224
void hit_comInit(int *pargc, char **pargv[])
Definition: hit_com.c:111
ProteinFile pfile2
Definition: SWcommon.c:64
short aa_t
Definition: SWcommon.h:59
HitTile_trace_t xTraceback
Definition: SWpar.c:247
#define hit_clockStop(c)
Definition: hit_utils.h:109
int main(int argc, char *argv[])
Definition: cannonAsync.c:62
#define iterations
Definition: mg.c:119
HitTile_aa_t p2
Definition: SWpar.c:207
#define yback(i, j)
Definition: SWseq.c:105
char * pam_name
Definition: SWcommon.c:51
int match_length1
Definition: SWpar.c:223
int xMax
Definition: SWpar.c:214
short trace_t
Definition: SWcommon.h:63
int xBegin
Definition: SWpar.c:218
#define hit_tileFree(var)
Definition: hit_tile.h:369
#define GAP_AA
Definition: SWcommon.h:243
HitTile_aa_t out2
Definition: SWpar.c:211
#define max(a, b)
Definition: cannonAsync.c:47
char * prot_name2
Definition: SWcommon.c:50
int yBegin
Definition: SWpar.c:219
void hit_comFinalize()
Definition: hit_com.c:159
#define hit_clockGetSeconds(c)
Definition: hit_utils.h:190
void phase_read_sequences()
Definition: SWpar.c:619
#define hit_clockStart(c)
Definition: hit_utils.h:87
void phase_comp_hmatrix()
Definition: SWpar.c:708