Hitmap 1.3
 All Data Structures Namespaces Files Functions Variables Typedefs Friends Macros Groups Pages
SWseq_ref.c
Go to the documentation of this file.
1 
33 /*
34  * <license>
35  *
36  * Hitmap v1.2
37  *
38  * This software is provided to enhance knowledge and encourage progress in the scientific
39  * community. It should be used only for research and educational purposes. Any reproduction
40  * or use for commercial purpose, public redistribution, in source or binary forms, with or
41  * without modifications, is NOT ALLOWED without the previous authorization of the copyright
42  * holder. The origin of this software must not be misrepresented; you must not claim that you
43  * wrote the original software. If you use this software for any purpose (e.g. publication),
44  * a reference to the software package and the authors must be included.
45  *
46  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND ANY
47  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
48  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
49  * THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
50  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
51  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
53  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
54  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55  *
56  * Copyright (c) 2007-2015, Trasgo Group, Universidad de Valladolid.
57  * All rights reserved.
58  *
59  * More information on http://trasgo.infor.uva.es/
60  *
61  * </license>
62 */
63 
64 // Include the common variables and utilities for all the versions
65 #include "SWcommon.h"
66 
67 // Include the common variables and utilities for the C reference versions
68 #include "SWcommon_ref.h"
69 
70 
71 
72 // **********************************************************************
73 // Declarations of the different phases
74 // **********************************************************************
75 
80 
84 void phase_comp_hmatrix();
85 
89 void phase_comp_back();
90 
91 
92 // **********************************************************************
93 // Global variables
94 // **********************************************************************
95 
96 // Protein sequences
97 aa_t * p1;
98 aa_t * p2;
99 
100 // Similarity matrix
101 h_t * H;
102 #define h(i,j) (H[((i)*(size[1]+1))+(j)])
103 
104 // Traceback matrices
107 #define xback(i,j) (xTraceback[((i)*(size[1]+1))+(j)])
108 #define yback(i,j) (yTraceback[((i)*(size[1]+1))+(j)])
109 
110 // Output proteins with the alignment
113 
114 // X and Y coordinates of the maximum value (End coordinates of the match)
115 int xMax;
116 int yMax;
117 
118 // Begin coordinates of the match
119 int xBegin;
120 int yBegin;
121 
122 // Match lengths (total, sequence 1, sequence 2)
126 
127 // Output match begin
129 
130 
134 int main(int argc, char * argv[]){
135 
136 
137  // **********************************************************************
138  // Check input parameters
139  // **********************************************************************
140  input_parameters(argc, argv);
141 
142 
143  // **********************************************************************
144  // Timers
145  // **********************************************************************
146  double time_initS;
147  double time_initE;
148  double time_compS;
149  double time_compE;
151  double time_phaseS;
152  double time_phaseE;
154  double time_comp_read = 0;
155  double time_comp_hmatrix = 0;
156  double time_comp_back = 0;
159  // **********************************************************************
160  // INIT PHASE: Structure allocation and PAM matrix load
161  // **********************************************************************
162 
163  // Init time
164  time_initS = currentTime();
165 
166  // read the PAM matrix
167  initPAM(pam_name);
168 
169  // Open the proteins files
172 
173  // Allocate memory for the proteins
174  p1 = malloc(sizeof(aa_t)*((size_t)(size[0]+1)));
175  p2 = malloc(sizeof(aa_t)*((size_t)(size[1]+1)));
176  CHECK_NULL(p1);
177  CHECK_NULL(p2);
178 
179  // Similarity matrix
180  H = malloc(sizeof(h_t)*(size_t)((size[0]+1)*(size[1]+1)));
181  CHECK_NULL(H);
182 
183  // Traceback matrices
184  xTraceback = malloc(sizeof(trace_t)*(size_t)((size[0]+1)*(size[1]+1)));
185  yTraceback = malloc(sizeof(trace_t)*(size_t)((size[0]+1)*(size[1]+1)));
188 
189  // Allocate size of the output
190  out1 = malloc(sizeof(aa_t)*((size_t)(size[0]+size[1]+1)));
191  out2 = malloc(sizeof(aa_t)*((size_t)(size[0]+size[1]+1)));
192 
193  // Init time
194  time_initE = currentTime();
195 
196 
197  // **********************************************************************
198  // WHOLE COMPUTATION PHASE: iteration of: Read, H matrix, Backtracking
199  // **********************************************************************
200 
201  // Computation time
202  time_compS = currentTime();
203 
204  int iter;
205  for(iter=0;iter<iterations;iter++){
206 
207 
208 #ifdef DEBUG
209  printf("=== Iteration: %d/%d ===\n\n",iter,iterations);
210 #endif
211 
212  // **********************************************************************
213  // READ PHASE
214  // **********************************************************************
215  time_phaseS = currentTime();
217  time_phaseE = currentTime();
218  time_comp_read += (time_phaseE-time_phaseS);
219 
220  // **********************************************************************
221  // H MATRIX PHASE
222  // **********************************************************************
223  time_phaseS = currentTime();
225  time_phaseE = currentTime();
226  time_comp_hmatrix += (time_phaseE-time_phaseS);
227 
228  // **********************************************************************
229  // BACKTRACKING PHASE
230  // **********************************************************************
231  time_phaseS = currentTime();
232  phase_comp_back();
233  time_phaseE = currentTime();
234  time_comp_back += (time_phaseE-time_phaseS);
235 
236  } // Iterations Loop
237 
238  // Computation time
239  time_compE = currentTime();
240 
241 
242 
243  // **********************************************************************
244  // Show results
245  // **********************************************************************
246 
247  printf("=== Result ===\n");
248  printf("Init time: %f\n",time_initE-time_initS);
249  printf("Comp time: %f\n",time_compE-time_compS);
250  printf(" Read: %f\n",time_comp_read);
251  printf(" H matrix: %f\n",time_comp_hmatrix);
252  printf(" Backtracking: %f\n\n",time_comp_back);
253 
254  printf("Last Match length of A: %d\n", match_length1);
255  printf("Last Match length of B: %d\n", match_length2);
256  printf("Last Alignment length: %d\n", match_length);
257 
258  // Print the result for small inputsets
259  if(match_length < 50){
260  printf("\n");
261  printf("Last MCH1: "); printProteinMatch(p1,size[0],xBegin,xMax);
262  printf("Last MCH2: "); printProteinMatch(p2,size[1],yBegin,yMax);
263  printf("\n");
264 
265  printf("Last OUT1: "); printProtein(&out1[match_pos],match_length);
266  printf("Last OUT2: "); printProtein(&out2[match_pos],match_length);
267  } else {
268  printf("\nInputset too big to print to stdout\n");
269  }
270 
271  if( pfile1.rewind || pfile2.rewind ){
272  printf("\nSmall inputset, it has been rewinded:\n"
273  " %s: %8d times\n"
274  " %s: %8d times\n", prot_name1,pfile1.rewind, prot_name2, pfile2.rewind);
275  }
276 
277 
278  // **********************************************************************
279  // Free all the resources
280  // **********************************************************************
281 
284  free(p1);
285  free(p2);
286  free(xTraceback);
287  free(yTraceback);
288  free(H);
289  free(out1);
290  free(out2);
291 
292  // Exit
293  return EXIT_SUCCESS;
294 }
295 
296 
297 
298 
299 
300 void phase_read_sequences(){
301 
302  // Read the proteins
303  readProtein(&pfile1, p1, size[0]);
304  readProtein(&pfile2, p2, size[1]);
305 
306 #ifdef DEBUG
307  printf("PRT1: "); printProtein(p1,size[0]);
308  printf("PRT2: "); printProtein(p2,size[1]);
309  printf("\n");
310 #endif
311 
312 
313 }
314 
315 
316 
317 void phase_comp_hmatrix(){
318 
319  // Compute the Similarity Matrix (H)
320 
321  // Initialize values
322  int i,j;
323  for (i=0;i<=size[0];i++){
324  xback(i,0) = -1;
325  yback(i,0) = -1;
326  h(i,0)=0;
327  }
328 
329  for (j=0;j<=size[1];j++){
330  xback(0,j) = -1;
331  yback(0,j) = -1;
332  h(0,j)=0;
333  }
334 
335 
336  // Maximum values to retrieve the alignment
337  h_t Max = -1;
338  xMax = -1;
339  yMax = -1;
340 
341  // Perform wavefront algorithm
342  for(i=1; i<size[0]+1; i++){
343  for(j=1; j<size[1]+1; j++){
344 
345  // Get the scores
346  h_t diag = h(i-1,j-1) + pam(p1[i],p2[j]);
347  h_t down = h(i-1,j ) + gapPenalty;
348  h_t right = h(i ,j-1) + gapPenalty;
349 
350  // Calculate the maximum
351  int idx;
352  h_t max = MAX4(diag,down,right,0,&idx);
353  h(i,j) = max;
354 
355  // Set the back trace variables.
356  if ( idx == 0 ){ // max == diag
357  xback(i,j) = (trace_t) (i-1);
358  yback(i,j) = (trace_t) (j-1);
359  } else if ( idx == 1 ) { // max == down
360  xback(i,j) = (trace_t) (i-1);
361  yback(i,j) = (trace_t) j;
362  } else if ( idx == 2 ) { // max == right
363  xback(i,j) = (trace_t) i;
364  yback(i,j) = (trace_t) (j-1);
365  } else { // max == 0
366  xback(i,j) = -1;
367  yback(i,j) = -1;
368  }
369 
370  // Maximum
371  if(max >= Max){
372  xMax = i;
373  yMax = j;
374  Max = max;
375  }
376 
377  } // End loop j;
378  } // End loop i;
379 
380 
381 
382 #ifdef DEBUG
383  // Print
384  printf(" ");
385  for (j=0;j<=size[1];j++){
386  if(j==0)
387  printf(" -");
388  else
389  printf("%6c",AA2char(p2[j]));
390  }
391  printf("\n");
392  for (i=0;i<=size[0];i++){
393 
394  if(i==0)
395  printf("- |");
396  else
397  printf("%c |",AA2char(p1[i]));
398 
399  for (j=0;j<=size[1];j++){
400  printf("%6.1f",(double)h(i,j));
401  }
402  printf("\n");
403  }
404  printf("\n");
405 #endif
406 
407 
408 #ifdef DEBUG
409  // Print
410  printf(" ");
411  for (j=0;j<=size[1];j++){
412  if(j==0)
413  printf(" -");
414  else
415  printf("%8c",AA2char(p2[j]));
416  }
417  printf("\n");
418  for (i=0;i<=size[0];i++){
419 
420  if(i==0)
421  printf("- |");
422  else
423  printf("%c |",AA2char(p1[i]));
424 
425  for (j=0;j<=size[1];j++){
426  printf("(%2d,%2d) ",xback(i,j),yback(i,j));
427  }
428  printf("\n");
429  }
430  printf("\n");
431 #endif
432 
433 }
434 
435 
436 
437 void phase_comp_back(){
438 
439  // reset to max point to do alignment
440  // We start at the maximum point
441  int i=xMax;
442  int j=yMax;
443 
444  // Position of the output proteins.
445  int pos=size[0]+size[1]+1;
446  int outsize=0;
447 
448  while (i>0 && j>0 && h(i,j) > 0){
449 
450  int movex = (i>xback(i,j));
451  int movey = (j>yback(i,j));
452 
453  pos--;
454  outsize++;
455 
456  if (movex && movey) {
457  out1[pos]=p1[i];
458  out2[pos]=p2[j];
459  } else if (movey) {
460  out1[pos]=GAP_AA;
461  out2[pos]=p2[j];
462  } else if (movex) {
463  out1[pos]=p1[i];
464  out2[pos]=GAP_AA;
465  }
466 
467  // Move to the next point.
468  int tempi=i;
469  int tempj=j;
470  i=xback(tempi,tempj);
471  j=yback(tempi,tempj);
472  }
473 
474  pos--;
475 
476  match_length = outsize;
477  match_length1 = xMax - i;
478  match_length2 = yMax - j;
479 
480  xBegin = i;
481  yBegin = j;
482 
483  match_pos = pos;
484 
485 
486 #ifdef DEBUG
487 
488  printf("Match length of A: %d\n", match_length1);
489  printf("Match length of B: %d\n", match_length2);
490  printf("Alignment length: %d\n", match_length);
491  printf("\n");
492 
493  // Print the result for small inputsets
494  if(match_length < 50){
495  printf("\n");
496  printf("MCH1: "); printProteinMatch(p1,size[0],xBegin,xMax);
497  printf("MCH2: "); printProteinMatch(p2,size[1],yBegin,yMax);
498  printf("\n");
499 
500  printf("OUT1: "); printProtein(&out1[match_pos],outsize);
501  printf("OUT2: "); printProtein(&out2[match_pos],outsize);
502  printf("\n");
503  }
504 
505 #endif
506 
507 }
508 
509 
510 
511 
512 
HitTile_h_t H
Definition: SWpar.c:243
HitTile_aa_t p1
Definition: SWpar.c:206
double h_t
Definition: SWcommon.h:61
#define h(i, j)
Definition: SWseq_ref.c:102
int match_pos
Definition: SWpar.c:227
int iter
Definition: mmult_bit.c:68
void printProteinMatch(HitTile_aa_t p, int begin, int end)
Definition: SWcommon_hit.c:90
HitTile_aa_t out1
Definition: SWpar.c:210
void closeProtein(ProteinFile *fprotein)
Definition: SWcommon.c:255
void readProtein(ProteinFile *fprotein, HitTile_aa_t *protein, int psize)
Definition: SWcommon_hit.c:48
int rewind
Definition: SWcommon.h:205
#define CHECK_NULL(check)
Definition: SWcommon.h:275
#define pam(a, b)
Definition: SWcommon.h:181
void phase_comp_back()
Definition: SWpar.c:955
void openProtein(ProteinFile *fprotein, char *filename)
Definition: SWcommon.c:241
char * prot_name1
Definition: SWcommon.c:49
int yMax
Definition: SWpar.c:215
double gapPenalty
Definition: SWcommon.c:54
void input_parameters(int argc, char *argv[])
Definition: SWcommon.c:75
ProteinFile pfile1
Definition: SWcommon.c:63
HitTile_trace_t yTraceback
Definition: SWpar.c:248
void initPAM(char *filename)
Definition: SWcommon.c:128
int size[2]
Definition: SWcommon.c:57
int match_length
Definition: SWpar.c:222
void printProtein(HitTile_aa_t p, int offset, int psize)
Definition: SWcommon_hit.c:78
int match_length2
Definition: SWpar.c:224
ProteinFile pfile2
Definition: SWcommon.c:64
short aa_t
Definition: SWcommon.h:59
HitTile_trace_t xTraceback
Definition: SWpar.c:247
int main(int argc, char *argv[])
Definition: cannonAsync.c:62
#define iterations
Definition: mg.c:119
HitTile_aa_t p2
Definition: SWpar.c:207
char * pam_name
Definition: SWcommon.c:51
int match_length1
Definition: SWpar.c:223
#define yback(i, j)
Definition: SWseq_ref.c:108
int xMax
Definition: SWpar.c:214
short trace_t
Definition: SWcommon.h:63
int xBegin
Definition: SWpar.c:218
#define GAP_AA
Definition: SWcommon.h:243
HitTile_aa_t out2
Definition: SWpar.c:211
double currentTime()
Definition: SWcommon_ref.c:106
#define max(a, b)
Definition: cannonAsync.c:47
char * prot_name2
Definition: SWcommon.c:50
int yBegin
Definition: SWpar.c:219
#define xback(i, j)
Definition: SWseq_ref.c:107
void phase_read_sequences()
Definition: SWpar.c:619
void phase_comp_hmatrix()
Definition: SWpar.c:708