Hitmap 1.3
 All Data Structures Namespaces Files Functions Variables Typedefs Friends Macros Groups Pages
SWcommon.c
Go to the documentation of this file.
1 
9 /*
10  * <license>
11  *
12  * Hitmap v1.2
13  *
14  * This software is provided to enhance knowledge and encourage progress in the scientific
15  * community. It should be used only for research and educational purposes. Any reproduction
16  * or use for commercial purpose, public redistribution, in source or binary forms, with or
17  * without modifications, is NOT ALLOWED without the previous authorization of the copyright
18  * holder. The origin of this software must not be misrepresented; you must not claim that you
19  * wrote the original software. If you use this software for any purpose (e.g. publication),
20  * a reference to the software package and the authors must be included.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND ANY
23  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
25  * THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Copyright (c) 2007-2015, Trasgo Group, Universidad de Valladolid.
33  * All rights reserved.
34  *
35  * More information on http://trasgo.infor.uva.es/
36  *
37  * </license>
38 */
39 
40 // Include the header
41 #include "SWcommon.h"
42 
43 
44 // **********************************************************************
45 // Global variables definition
46 // **********************************************************************
47 
48 // Protein and PAM matrix names
49 char * prot_name1;
50 char * prot_name2;
51 char * pam_name ;
52 
53 // Gap penalty
55 
56 // Size of the input proteins
58 
59 // Number of iterations
61 
62 // Protein files.
65 
66 // The PAM matrix.
68 // Variable for the PAM Matrix.
70 
71 
75 void input_parameters(int argc, char * argv[]){
76 
77  if (argc < 4 || argc > 8){
78  fprintfroot(stderr,
79  "Usage: %s protein1 protein2 PAM [GapPenalty] [SIZE/SIZE1] [SIZE2] [ITERS]\n"
80  "\t protein1: file with the first protein.\n"
81  "\t protein2: file with the second protein.\n"
82  "\t PAM: Point accepted mutation (PAM) Matrix.\n"
83  "\t GapPenalty: Penalty for gaps. Default = %.1f.\n"
84  "\t SIZE/1: Size limit of the proteins of just the first one. Default=%d.\n"
85  "\t SIZE2: Size limit of the second protein. Default=%d.\n"
86  "\t ITERS Number of iterations. Default = %d. Reads another sequence from the files.\n",
87  argv[0],
89  exit(EXIT_FAILURE);
90  }
91 
92  // Read the parameters
93  prot_name1 = argv[1];
94  prot_name2 = argv[2];
95  pam_name = argv[3];
96 
97  if(argc > 4) sscanf(argv[4], "%lf", &gapPenalty);
98  if(argc > 5){
99  size[0] = atoi(argv[5]);
100  size[1] = size[0];
101  }
102  if(argc > 6){
103  size[1] = atoi(argv[6]);
104  }
105  if(argc > 7){
106  iterations = atoi(argv[7]);
107  }
108 
109  // Print summary parameters
110  printfroot("=== Smith Waterman alignment parameters ===\n"
111  " Protein1: %s, size: %d\n"
112  " Protein2: %s, size: %d\n"
113  " Iterations: %d\n"
114  " PAM Matrix: %s\n"
115  " Gap penalty: %f\n\n"
117 
118 }
119 
120 
121 
122 
128 void initPAM(char * filename){
129 
130  // Set the default values
131  pam_matrix.num_aa = 0;
132  int i,j;
133  for(i=0; i<MAX_NUM_AA+1; i++){
134  aa_names.AA2char[i] = NOTFOUND_CHAR;
135  }
136  for(i=0; i<256; i++){
137  aa_names.char2AA[i] = NOTFOUND_AA;
138  }
139 
140  // Open the file
141  FILE * file = fopen(filename,"r");
142  if(file == NULL){
143  exit_error("PAM Matrix read error");
144  }
145 
146  // Read the names and init the pam_matrix.
147  char name;
148  while(fscanf(file, "%c", &name) != EOF){
149  if(name == '\n') break;
150  aa_names.AA2char[pam_matrix.num_aa] = (char)tolower(name);
151  aa_names.char2AA[toupper(name)] = (aa_t) pam_matrix.num_aa;
152  aa_names.char2AA[tolower(name)] = (aa_t) pam_matrix.num_aa;
153  pam_matrix.num_aa ++;
154  }
155 
156  // Set the Gap Values
157  aa_names.AA2char[GAP_AA] = GAP_CHAR;
158  aa_names.char2AA[GAP_CHAR] = GAP_AA;
159 
160 #ifdef DEBUG
161  fflush(stdout);
162 
163  int col = 0;
164 
165  // print
166  printfroot("\n");
167  printfroot("=== AA2char ===\n");
168  for(i=0; i<MAX_NUM_AA+1; i++){
169  printfroot("AA2char(%3d)=%c ",i,AA2char((aa_t)i));
170  col++;
171  if(col==5){
172  col = 0;
173  printfroot("\n");
174  }
175  }
176  printfroot("\n\n");
177 
178 
179  // print just the ASCII printable characters
180  col = 0;
181  printfroot("=== char2AA === (only ASCII printable characters) \n");
182  for(i=' '; i<'~'; i++){
183  printfroot("char2AA(%c)=%3d ",(char)i,char2AA((char)i));
184  col++;
185  if(col==5){
186  col = 0;
187  printfroot("\n");
188  }
189  }
190  printfroot("\n\n");
191 #endif
192 
193 
194  // Read the values
195  double temp;
196  for (i=0; i<pam_matrix.num_aa; i++){
197  for (j=0;j<=i;j++) {
198  if (fscanf(file, "%lf", &temp) == EOF) {
199  exit_error("PAM file error");
200  }
201  pam(i,j)= (h_t) temp;
202  }
203  }
204 
205  // Symmetrify
206  for (i=0;i<pam_matrix.num_aa;i++){
207  for (j=i+1;j<pam_matrix.num_aa;j++){
208  pam(i,j)=pam(j,i);
209  }
210  }
211 
212 #ifdef DEBUG
213  // Print
214  printfroot("=== PAM Matrix ===\n");
215  printfroot(" ");
216  for (j=0;j<pam_matrix.num_aa;j++){
217  printfroot("%6c",AA2char((aa_t)j));
218  }
219  printfroot("\n");
220  for (i=0;i<pam_matrix.num_aa;i++){
221  printfroot("%c |",AA2char((aa_t)i));
222  for (j=0;j<pam_matrix.num_aa;j++){
223  printfroot("%6.1f",pam(i,j));
224  }
225  printfroot("\n");
226  }
227  printfroot("\n");
228  fflush(stdout);
229 #endif
230 
231  // Close the file
232  fclose(file);
233 
234 }
235 
236 
237 
241 void openProtein(ProteinFile * fprotein, char * filename){
242 
243  // Open the file
244  fprotein->file = fopen(filename,"r");
245  if(fprotein->file == NULL){
246  exit_error("Protein file error");
247  }
248  fprotein->rewind=0;
249 }
250 
251 
255 void closeProtein(ProteinFile * fprotein){
256  fclose(fprotein->file);
257 }
258 
259 
263 void exit_error(const char * s){
264  fprintf(stderr,"%s\n",s);
265  exit(EXIT_FAILURE);
266 }
267 
268 
269 
270 
271 #if defined(REFPAR) || defined(HITPAR)
272 
273 
278 void HMaxLoc(void * inPVoid, void * inoutPVoid, int *len, MPI_Datatype * dptr){
279 
280  NOT_USED(dptr);
281 
282  double_3int * in = (double_3int*) inPVoid;
283  double_3int * inout = (double_3int*) inoutPVoid;
284 
285  int i;
286 
287  for (i=0; i< *len; ++i) {
288 
289  if(in->val > inout->val){
290  inout[i] = in[i];
291  } else if ( in[i].val == inout[i].val ) {
292 
293  if(in[i].i > inout[i].i || (in[i].i == inout[i].i && in[i].j > inout[i].j) ){
294  inout[i] = in[i];
295  }
296 
297  }
298 
299  // in->val < inout->val: do nothing
300 
301  }
302 }
303 
304 
305 
306 #endif
307 
308 
309 
310 
311 
312 #ifdef REFPAR
313 
314 
316 extern int rank;
317 
321 int is_root(){
322  return rank == 0;
323 }
324 
325 
326 #elif HITPAR
327 
328 // Include the hitmap header.
329 #include <hitmap.h>
330 
334 int is_root(){
335  return hit_Rank == 0;
336 }
337 
338 
339 #endif
340 
341 
342 
343 
344 
345 
346 
#define fprintfroot(...)
Definition: SWcommon.h:140
double h_t
Definition: SWcommon.h:61
int num_aa
Definition: SWcommon.h:167
#define NOT_USED(p)
Definition: SWcommon.h:271
Definition: SWcommon.h:165
#define hit_Rank
Definition: hit_com.h:140
#define DEFAULT_SIZE
Definition: SWcommon.h:89
void closeProtein(ProteinFile *fprotein)
Definition: SWcommon.c:255
int rewind
Definition: SWcommon.h:205
AAName aa_names
Definition: SWcommon.c:69
#define pam(a, b)
Definition: SWcommon.h:181
void openProtein(ProteinFile *fprotein, char *filename)
Definition: SWcommon.c:241
char * prot_name1
Definition: SWcommon.c:49
PAM pam_matrix
Definition: SWcommon.c:67
double gapPenalty
Definition: SWcommon.c:54
int rank
Definition: SWpar_ref.c:181
#define MAX_NUM_AA
Definition: SWcommon.h:87
#define NOTFOUND_CHAR
Definition: SWcommon.h:241
void exit_error(const char *s)
Definition: SWcommon.c:263
void input_parameters(int argc, char *argv[])
Definition: SWcommon.c:75
ProteinFile pfile1
Definition: SWcommon.c:63
void initPAM(char *filename)
Definition: SWcommon.c:128
int size[2]
Definition: SWcommon.c:57
ProteinFile pfile2
Definition: SWcommon.c:64
short aa_t
Definition: SWcommon.h:59
#define s
#define iterations
Definition: mg.c:119
char * pam_name
Definition: SWcommon.c:51
#define is_root()
Definition: SWcommon.h:123
char AA2char[MAX_NUM_AA+1]
Definition: SWcommon.h:234
#define GAP_AA
Definition: SWcommon.h:243
#define printfroot(...)
Definition: SWcommon.h:135
#define GAP_CHAR
Definition: SWcommon.h:245
#define NOTFOUND_AA
Definition: SWcommon.h:239
char * prot_name2
Definition: SWcommon.c:50
#define DEFAULT_GAPPENALTY
Definition: SWcommon.h:91
aa_t char2AA[256]
Definition: SWcommon.h:232
FILE * file
Definition: SWcommon.h:204
#define DEFAULT_ITERATIONS
Definition: SWcommon.h:93