yamcot 1.0.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1476 @@
1
+ #define _CRT_SECURE_NO_WARNINGS
2
+
3
+ #include <stdio.h>
4
+ #include <stdlib.h>
5
+ #include <string.h>
6
+ #include <math.h>
7
+ #include <time.h>
8
+ #include <ctype.h>
9
+
10
+ #define Min(a,b) ((a)>(b))? (b):(a);
11
+ #define Max(a,b) ((a)>(b))? (a):(b);
12
+ #define SEQLEN 12000
13
+ #define MATLEN 50 //max matrix length
14
+ #define SPACLEN 100 //max spacer length
15
+ #define ARGLEN 300 //max argv length
16
+ #define OLIGNUM 4// di 16 mono 4
17
+ #define DIM 100
18
+
19
+ struct qbs {
20
+ double err;//ERR score
21
+ // int m0;//for given ERR no. of better sites of this model
22
+ // int m1;//for given ERR no. of better sites of other model
23
+ int mod;// model 0 or 1
24
+ };
25
+ int compare_qbs(const void* X1, const void* X2)//decrease
26
+ {
27
+ struct qbs* S1 = (struct qbs*)X1;
28
+ struct qbs* S2 = (struct qbs*)X2;
29
+ if (S1->err - S2->err > 0)return -1;
30
+ if (S1->err - S2->err < 0)return 1;
31
+ return 0;
32
+ }
33
+ int StrNStr(char* str, char c, int n)
34
+ {
35
+ int i, len = (int)strlen(str);
36
+ int k = 0;
37
+ for (i = 0; i < len; i++)
38
+ {
39
+ if (str[i] == c)
40
+ {
41
+ k++;
42
+ if (k == n)return i;
43
+ }
44
+ }
45
+ return -1;
46
+ }
47
+ int StrEndNStr(char* str, char c, int n)
48
+ {
49
+ int i, len = (int)strlen(str);
50
+ int k = 0;
51
+ for (i = len - 1; i >= 0; i--)
52
+ {
53
+ if (str[i] == c)
54
+ {
55
+ k++;
56
+ if (k == n)return i;
57
+ }
58
+ }
59
+ return -1;
60
+ }
61
+ char* TransStr(char* d)
62
+ {
63
+ int i, c, lens;
64
+ lens = strlen(d);
65
+ for (i = 0; i < lens; i++)
66
+ {
67
+ c = int(d[i]);
68
+ if (c < 97) d[i] = char(c + 32);
69
+ //else break;
70
+ }
71
+ return(d);
72
+ }
73
+ char* TransStrBack(char* d)
74
+ {
75
+ int i, c, lens;
76
+ lens = strlen(d);
77
+ for (i = 0; i < lens; i++)
78
+ {
79
+ c = int(d[i]);
80
+ if (c >= 97) d[i] = char(c - 32);
81
+ //else break;
82
+ }
83
+ return(d);
84
+ }
85
+ void DelChar(char* str, char c)
86
+ {
87
+ int i, lens, size;
88
+
89
+ size = 0;
90
+ lens = (int)strlen(str);
91
+ for (i = 0; i < lens; i++)
92
+ {
93
+ if (str[i] != c)str[size++] = str[i];
94
+ }
95
+ str[size] = '\0';
96
+ }
97
+ int ComplStr(char* d)
98
+ {
99
+ char* d1;
100
+ int i, len;
101
+ len = strlen(d);
102
+ d1 = new char[len + 1];
103
+ if (d1 == NULL)
104
+ {
105
+ fprintf(stderr, "Cmpl: Out of memory...");
106
+ return 0;
107
+ }
108
+ strcpy(d1, d);
109
+ // memset(d,0,sizeof(d));
110
+ for (i = 0; i < len; i++)
111
+ {
112
+ switch (d1[len - i - 1])
113
+ {
114
+ case 'a': { d[i] = 't'; break; }
115
+ case 't': { d[i] = 'a'; break; }
116
+ case 'c': { d[i] = 'g'; break; }
117
+ case 'g': { d[i] = 'c'; break; }
118
+ case 'A': { d[i] = 'T'; break; }
119
+ case 'T': { d[i] = 'A'; break; }
120
+ case 'C': { d[i] = 'G'; break; }
121
+ case 'G': { d[i] = 'C'; break; }
122
+ case 'N': { d[i] = 'N'; break; }
123
+ case 'n': { d[i] = 'n'; break; }
124
+ default: d[i] = 'n';
125
+ }
126
+ }
127
+ delete[] d1;
128
+ return 1;
129
+ }
130
+ int UnderStolStr(char* str, int nstol, char* ret, size_t size, char sep)
131
+ {
132
+ memset(ret, '\0', size);
133
+ int p1, p2, len;
134
+ if (nstol == 0)
135
+ {
136
+ p2 = StrNStr(str, sep, 1);
137
+ if (p2 == -1)p2 = strlen(str);
138
+ strncpy(ret, str, p2);
139
+ ret[p2] = '\0';
140
+ return 1;
141
+ }
142
+ else
143
+ {
144
+ p1 = StrNStr(str, sep, nstol);
145
+ p2 = StrNStr(str, sep, nstol + 1);
146
+ if (p2 == -1)
147
+ {
148
+ p2 = strlen(str);
149
+ }
150
+ if (p1 == -1 || p2 == -1) return -1;
151
+ len = p2 - p1 - 1;
152
+ strncpy(ret, &str[p1 + 1], len);
153
+ ret[len] = '\0';
154
+ return 1;
155
+ }
156
+ }
157
+ #include "fasta_to_plain.h"
158
+ #include "pfm_to_pwm.h"
159
+ //#include "pwm_rec.h"
160
+ //#include "pfm_similarity.h"
161
+
162
+ struct due {
163
+ double buf;
164
+ int sta;
165
+ int end;
166
+ int num;
167
+ void get_copy(due* a);
168
+ // void print_all(void);
169
+ };
170
+ void due::get_copy(due* a)
171
+ {
172
+ a->num = num;
173
+ a->sta = sta;
174
+ a->buf = buf;
175
+ a->end = end;
176
+ };
177
+
178
+ //set of dinucleotides
179
+ struct city {
180
+ char site[300];
181
+ int size;
182
+ int len;
183
+ double min;
184
+ double raz;
185
+ struct due tot[DIM];
186
+ void get_copy(city* a);
187
+ void sort_all(void);
188
+ int get_file(char* file);
189
+ //void city::fprint_tab(char *file);
190
+ }sta;
191
+ int city::get_file(char* file)
192
+ {
193
+ FILE* in;
194
+ if ((in = fopen(file, "rt")) == NULL)
195
+ {
196
+ printf("Input file %s can't be opened!", file);
197
+ return -1;
198
+ }
199
+ char d[300];
200
+ fgets(d, sizeof(d), in);
201
+ DelChar(d,'\n');
202
+ strcpy(site, d);
203
+ fgets(d, sizeof(d), in);
204
+ size = atoi(d);
205
+ fgets(d, sizeof(d), in);
206
+ len = atoi(d);
207
+ fgets(d, sizeof(d), in);
208
+ min = atof(d);
209
+ fgets(d, sizeof(d), in);
210
+ raz = atof(d);
211
+ char sep = '\t', s[30];
212
+ int i, test;
213
+ for (i = 0; i < size; i++)
214
+ {
215
+ fgets(d, sizeof(d), in);
216
+ tot[i].sta = atoi(d);
217
+ test = UnderStolStr(d, 1, s, sizeof(s), sep);
218
+ if (test == -1) { printf("Wrong format %s\n", d); return(-1); }
219
+ tot[i].end = atoi(s);
220
+ test = UnderStolStr(d, 2, s, sizeof(s), sep);
221
+ if (test == -1) { printf("Wrong format %s\n", d); return(-1); }
222
+ tot[i].buf = atof(s);
223
+ test = UnderStolStr(d, 3, s, sizeof(s), sep);
224
+ if (test == -1) { printf("Wrong format %s\n", d); return(-1); }
225
+ tot[i].num = atoi(s);
226
+ }
227
+ fclose(in);
228
+ return 1;
229
+ }
230
+ void city::get_copy(city* a)
231
+ {
232
+ strcpy(a->site, site);
233
+ a->size = size;
234
+ a->min = min;
235
+ a->len = len;
236
+ a->raz = raz;
237
+ int i;
238
+ for (i = 0; i < size; i++)
239
+ {
240
+ tot[i].get_copy(&a->tot[i]);
241
+ }
242
+ }
243
+ int compare_due(const void* X1, const void* X2)
244
+ {
245
+ struct due* S1 = (struct due*)X1;
246
+ struct due* S2 = (struct due*)X2;
247
+ if (S1->sta - S2->sta > 0)return 1;
248
+ if (S1->sta - S2->sta < 0)return -1;
249
+ if (S1->end - S2->end > 0)return 1;
250
+ if (S1->end - S2->end < 0)return -1;
251
+ if (S1->num - S2->num > 0)return 1;
252
+ if (S1->num - S2->num < 0)return -1;
253
+ return 0;
254
+ }
255
+ void city::sort_all(void)
256
+ {
257
+ qsort((void*)tot, size, sizeof(tot[0]), compare_due);
258
+ }
259
+ int IdeLet(char c)
260
+ {
261
+ int ret;
262
+ switch (c) {
263
+ case 'a': ret = 0; break;
264
+ case 'c': ret = 1; break;
265
+ case 'g': ret = 2; break;
266
+ case 't': ret = 3; break;
267
+ case 'n': ret = -1; break;
268
+ default: ret = -2;
269
+ }
270
+ return(ret);
271
+ }
272
+ // ras4et 4asot oligonukleotidov po stroke (zdes' - nukleotidov)
273
+ void GetSostPro(char* d, int word, int* sost)
274
+ {
275
+ int i, j, k, i_sost, let;
276
+ char letter[] = "acgt";
277
+ int ten[6] = { 1, 4, 16, 64, 256, 1024 };
278
+ int lens = strlen(d);
279
+ int size = 1;
280
+ for (k = 0; k < word; k++)size *= 4;
281
+ for (i = 0; i < size; i++)sost[i] = 0;
282
+ for (i = 0; i < lens - word + 1; i++)
283
+ {
284
+ i_sost = 0;
285
+ let = -1;
286
+ for (j = word - 1; j >= 0; j--)
287
+ {
288
+ for (k = 0; k < 4; k++)
289
+ {
290
+ if (d[i + j] == letter[k]) { let = k; break; }
291
+ }
292
+ i_sost += ten[word - 1 - j] * let;
293
+ }
294
+ sost[i] = i_sost;
295
+ }
296
+ }
297
+ void PWMScore(double **pwm, double& min, double& raz, int len1)
298
+ {
299
+ int i, j;
300
+ for (i = 0; i < len1; i++)
301
+ {
302
+ double pwmmin = 100;
303
+ double pwmmax = -100;
304
+ for (j = 0; j < OLIGNUM; j++)
305
+ {
306
+ if (pwm[i][j] < pwmmin)pwmmin = pwm[i][j];
307
+ if (pwm[i][j] > pwmmax)pwmmax = pwm[i][j];
308
+ }
309
+ raz += pwmmax;
310
+ min += pwmmin;
311
+ }
312
+ raz -= min;
313
+ }
314
+ int PWM_SGA_rec_real(double ***pwm, double min[2], double raz[2], city sta[2], int model_type[2], int nthr_dist[2], double** thr_all, double** fpr_all, char*** seq, int olen[2], int nseq,
315
+ int shift, int length_fasta_max, char *file_hist, char* file_prc, int yes_out_hist, int yes_out_prc,double *auprc_final_all, double *auprc_final_over,
316
+ double &auprc_final_over1,double &auprc_final_all1,double *shift_final_all,double *shift_final_over,char *strand_final_all,char *strand_final_over)
317
+ {
318
+ int i, j, k, n, m;
319
+ int compl1, kmin, kmax;
320
+ int cod[MATLEN];
321
+ char d[MATLEN];
322
+ int word = 1;
323
+ int nthr_dist1[2], olen1[2];
324
+ double thr_cr[2];
325
+ int nthr_dist_two = nthr_dist[0] + nthr_dist[1];
326
+ if (olen[0] < olen[1]) { kmin = 0; kmax = 1; }
327
+ else { kmin = 1; kmax = 0; }
328
+ double wshift_ov[3];
329
+ int n_shift[3];
330
+ for (i = 0; i < 3; i++) n_shift[i] = 2 * shift;
331
+ int j_ov1[3], j_ov2[3];
332
+ int kpairs[3][2];
333
+ kpairs[0][0] = kpairs[1][0] = kpairs[1][1] = kmax;
334
+ kpairs[0][1] = kpairs[2][0] = kpairs[2][1] = kmin;
335
+ double** wshift;
336
+ wshift = new double* [3];
337
+ if (wshift == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
338
+ for(i = 0; i < 3; i++)
339
+ {
340
+ int kk[2];
341
+ int chet[2];
342
+ for (k = 0; k < 2; k++)
343
+ {
344
+ kk[k] = kpairs[i][k];
345
+ if (olen[kk[k]] % 2 == 0)chet[k] = 1;//4etno
346
+ else chet[k] = 0;//ne4etno
347
+ }
348
+ if (chet[0] == chet[1])n_shift[i]++;
349
+ if (chet[0] == chet[1])// -1 0 +1
350
+ {
351
+ if (chet[0] == 0)wshift_ov[i] = (double)(olen[kk[0]] - 1) / 2;
352
+ else wshift_ov[i] = (double)olen[kk[0]] / 2;
353
+ }
354
+ else //-0.5 +0.5
355
+ {
356
+ if (chet[kk[0]] == 1)wshift_ov[i] = (double)olen[kk[0]] / 2 - 0.5;
357
+ else wshift_ov[i] = (double)(olen[kk[0]] - 1) / 2 + 0.5;
358
+ }
359
+ //shift = n_shift / 2;
360
+ wshift[i] = new double[n_shift[i]];
361
+ if (wshift[i] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
362
+ if (chet[0] == chet[1])
363
+ {
364
+ wshift[i][0] = -shift;
365
+ }
366
+ else
367
+ {
368
+ wshift[i][0] = 0.5 - shift;
369
+ }
370
+ for (k = 1; k < n_shift[i]; k++)wshift[i][k] = wshift[i][k - 1] + 1;
371
+ for (k = 0; k < n_shift[i]; k++)
372
+ {
373
+ if (wshift[i][k] == -wshift_ov[i])
374
+ {
375
+ j_ov1[i] = k; continue;
376
+ }
377
+ if (wshift[i][k] == wshift_ov[i])
378
+ {
379
+ j_ov2[i] = k; break;
380
+ }
381
+ }
382
+ }
383
+ double*** auprc;
384
+ auprc = new double** [3];
385
+ for (j = 0; j < 3; j++)
386
+ {
387
+ auprc[j] = new double* [2];
388
+ if (auprc[j] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
389
+ for (k = 0; k < 2; k++)
390
+ {
391
+ auprc[j][k] = new double[n_shift[j]];
392
+ if (auprc[j][k] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
393
+ }
394
+ }
395
+ for (j = 0; j < 3; j++)for (k = 0; k < 2; k++)for (m = 0; m < n_shift[j]; m++)auprc[j][k][m] = 0;
396
+ double half_win[2];//double shift of center relative to start
397
+ for (k = 0; k < 2; k++)half_win[k] = ((double)olen[k] - 1) / 2;
398
+ int* inx_self[2];
399
+ for (k = 0; k < 2; k++)
400
+ {
401
+ inx_self[k] = new int[nthr_dist[k]];
402
+ if (inx_self[k] == NULL) { fprintf(stderr, "Inx_self Out of memory..."); return -1; }
403
+ }
404
+ for (k = 0; k < 2; k++)for (i = 0; i < nthr_dist[k]; i++)inx_self[k][i] = nthr_dist_two;
405
+ qbs* errs;
406
+ errs = new qbs[nthr_dist_two];
407
+ if (errs == NULL) { fprintf(stderr, "Inx_self Out of memory..."); return -1; }
408
+ j = 0;
409
+ for (k = 0; k < 2; k++)
410
+ {
411
+ for (i = 0; i < nthr_dist[k]; i++)
412
+ {
413
+ errs[j].err = fpr_all[k][i];
414
+ // errs[j].m0 = i;
415
+ errs[j].mod = k;
416
+ j++;
417
+ }
418
+ }
419
+ qsort(errs, nthr_dist_two, sizeof(errs[0]), compare_qbs);
420
+ {
421
+ int cou[2];
422
+ for (k = 0; k < 2; k++)cou[k] = 0;
423
+ for (k = 0; k < nthr_dist_two; k++)
424
+ {
425
+ int model = errs[k].mod;
426
+ //int model1 = 1 - errs[k].mod;
427
+ inx_self[model][cou[model]] = k;
428
+ //inx_cross[model1][cou[model1]] = k;
429
+ cou[model]++;
430
+ }
431
+ }
432
+ int **** tp;
433
+ double ** fp;
434
+ tp = new int*** [3];
435
+ if (tp == NULL) { fprintf(stderr, "TP Out of memory..."); return -1; }
436
+
437
+ for (j = 0; j < 3; j++)
438
+ {
439
+ tp[j] = new int** [2];
440
+ if (tp[j] == NULL) { puts("TP Out of memory..."); return -1; }
441
+
442
+ for (k = 0; k < 2; k++)
443
+ {
444
+ tp[j][k] = new int* [n_shift[j]];
445
+ if (tp[j][k] == NULL) { puts("TP Out of memory..."); return -1; } // Исправлено
446
+
447
+ for (i = 0; i < n_shift[j]; i++)
448
+ {
449
+ tp[j][k][i] = new int[nthr_dist_two];
450
+ if (tp[j][k][i] == NULL) { fprintf(stderr, "TP Out of memory..."); return -1; }
451
+ }
452
+ }
453
+ }
454
+ for (j = 0; j < 3; j++)for (k = 0; k < 2; k++)for (i = 0; i < n_shift[j]; i++)for (m = 0; m < nthr_dist_two; m++)tp[j][k][i][m] = 0;
455
+ fp = new double* [3];
456
+ if (fp == NULL) { fprintf(stderr, "FP Out of memory..."); return -1; }
457
+ for (j = 0; j < 3; j++)
458
+ {
459
+ fp[j] = new double[nthr_dist_two];
460
+ if (fp[j] == NULL) { fprintf(stderr, "FP Out of memory..."); return -1; }
461
+ }
462
+ for (j = 0; j < 3; j++)for (m = 0; m < nthr_dist_two; m++)fp[j][m] = 0;
463
+ int*** tp_tot;
464
+ double fp_tot[3] = { 0,0,0 };
465
+ tp_tot = new int** [3];
466
+ if (tp_tot == NULL) { fprintf(stderr, "TP_tot Out of memory..."); return -1; }
467
+ for (j = 0; j < 3; j++)
468
+ {
469
+ tp_tot[j] = new int* [2];
470
+ if (tp_tot[j] == NULL) { puts("TP_tot Out of memory..."); return -1; }
471
+ for (k = 0; k < 2; k++)
472
+ {
473
+ tp_tot[j][k] = new int[n_shift[j]];
474
+ if (tp_tot[j][k] == NULL) { puts("TP_tot Out of memory..."); return -1; }
475
+ }
476
+ }
477
+ for (j = 0; j < 3; j++) for (k = 0; k < 2; k++)for (i = 0; i < n_shift[j]; i++)tp_tot[j][k][i] = 0;
478
+ int*** err_inx;
479
+ err_inx = new int** [2];
480
+ if (err_inx == NULL) { puts("Err_inx Out of memory..."); return -1; }
481
+ for (k = 0; k < 2; k++)
482
+ {
483
+ err_inx[k] = new int* [2];
484
+ if (err_inx[k] == NULL) { puts("Err_inx Out of memory..."); return -1; }
485
+ for (i = 0; i < 2; i++)
486
+ {
487
+ err_inx[k][i] = new int[length_fasta_max];
488
+ if (err_inx[k][i] == NULL) { fprintf(stderr, "Err_inx Out of memory..."); return -1; }
489
+ }
490
+ }
491
+ for (k = 0; k < 2; k++)for (i = 0; i < 2; i++)for (j = 0; j < length_fasta_max; j++)err_inx[k][i][j] = nthr_dist_two;
492
+ for (n = 0; n < 2; n++)
493
+ {
494
+ nthr_dist1[n] = nthr_dist[n] - 1;
495
+ olen1[n] = olen[n] - 1;
496
+ thr_cr[n] = thr_all[n][nthr_dist1[n]];
497
+ }
498
+ for (n = 0; n < nseq; n++)
499
+ {
500
+ //if ((n + 1) % 100 == 0)printf("\b\b\b\b\b\b\b%7d", n + 1);
501
+ int len_pro1 = strlen(seq[0][n]);
502
+ for (k = 0; k < 2; k++)
503
+ {
504
+ int len21 = len_pro1 - olen[k];
505
+ // for (j = 0; j < 2; j++)for (i = 0; i <= len21; i++)err_inx[k][j][i] = nthr_dist_two;
506
+ for (i = 0; i <= len21; i++)
507
+ {
508
+ int index = nthr_dist_two;
509
+ double sco2 = 0;
510
+ int gom = 0;
511
+ for (compl1 = 0; compl1 < 2; compl1++)
512
+ {
513
+ int ista;
514
+ if (compl1 == 0)ista = i;
515
+ else ista = len21 - i;
516
+ strncpy(d, &seq[compl1][n][ista], olen[k]);
517
+ d[olen[k]] = '\0';
518
+ if (strstr(d, "n") != NULL) { gom = 1; break; }
519
+ double score = 0;
520
+ if (model_type[k] == 0)
521
+ {
522
+ GetSostPro(d, word, cod);
523
+ for (j = 0; j < olen[k]; j++)
524
+ {
525
+ score += pwm[k][j][cod[j]];
526
+ }
527
+ score -= min[k];
528
+ score /= raz[k];
529
+ }
530
+ else
531
+ {
532
+ for (j = 0; j < sta[k].size; j++)
533
+ {
534
+ int rlenj = (sta[k].tot[j].end - sta[k].tot[j].sta + 1);
535
+ double fm = 0;
536
+ for (m = sta[k].tot[j].sta; m <= sta[k].tot[j].end; m++)
537
+ {
538
+ int cod = 4 * IdeLet(d[m]) + IdeLet(d[m + 1]);
539
+ if (sta[k].tot[j].num == cod) { fm++; }
540
+ }
541
+ if (fm != 0)
542
+ {
543
+ fm /= rlenj;
544
+ score += sta[k].tot[j].buf * fm;
545
+ }
546
+ }
547
+ score -= sta[k].min;
548
+ score /= sta[k].raz;
549
+ }
550
+ if (score > sco2)sco2 = score;
551
+ if (gom == 0)
552
+ {
553
+ if (sco2 >= thr_cr[k])
554
+ {
555
+ if (sco2 >= thr_all[k][0])
556
+ {
557
+ index = 0;
558
+ //break;
559
+ }
560
+ else
561
+ {
562
+ for (j = 1; j < nthr_dist[k]; j++)
563
+ {
564
+ if (sco2 >= thr_all[k][j] && sco2 < thr_all[k][j - 1])
565
+ {
566
+ index = j;
567
+ break;
568
+ }
569
+ }
570
+ }
571
+ }
572
+ //if (index == 0)break;
573
+ err_inx[k][compl1][i] = index;
574
+ }
575
+ }
576
+ }
577
+ }
578
+ int len21[2];
579
+ for (k = 0; k < 2; k++)
580
+ {
581
+ len21[k] = len_pro1 - olen[k];
582
+ }
583
+ /*for (k = 0; k < 2; k++)
584
+ {
585
+ printf("%d\n",k);
586
+ for (i = 0; i <= len21[k]; i++)
587
+ {
588
+ for (j = 0; j < 2; j++)if(err_inx[k][j][i]<nthr_dist_two)printf("%.1f_%d_%d ",half_win[j] + i, j, err_inx[k][j][i]);
589
+ if ((i + 1) % 200 == 0)printf("\n");
590
+ }
591
+ printf("\n");
592
+ } */
593
+ for(j = 0; j < 3 ; j++)
594
+ {
595
+ int n_shift1 = n_shift[j] - 1;
596
+ int j1 = kpairs[j][0];
597
+ int j2 = kpairs[j][1];
598
+ for (m = 0; m <= len21[j1]; m++)
599
+ {
600
+ {
601
+ double cent_win1 = m + half_win[j1];
602
+ int inx1[2];
603
+ for(k = 0; k < 2 ; k++)inx1[k] = err_inx[j1][k][m];
604
+ int mini1 = Min(inx1[0], inx1[1]);
605
+ if (mini1 == nthr_dist_two)continue;
606
+ for (i = 0; i <= len21[j2]; i++)
607
+ {
608
+ int inx2[2];
609
+ for (k = 0; k < 2; k++)inx2[k] = err_inx[j2][k][i];
610
+ int mini2 = Min(inx2[0], inx2[1]);
611
+ if (mini2 == nthr_dist_two)continue;
612
+ double cent_win2 = i + half_win[j2];
613
+ double cent_dif = cent_win2 - cent_win1;
614
+ int ori1, ori2;
615
+ for (ori1 = 0; ori1 < 2; ori1++)
616
+ {
617
+ for (ori2 = 0; ori2 < 2; ori2++)
618
+ {
619
+ if (inx1[ori1] == nthr_dist_two || inx2[ori2] == nthr_dist_two)continue;
620
+ int ori_type;
621
+ if (ori1 == ori2)ori_type = 0;//Direct
622
+ else ori_type = 1; //Invert or Evert
623
+ int k1 = inx_self[j1][inx1[ori1]];
624
+ int k2 = inx_self[j2][inx2[ori2]];
625
+ if (cent_dif >= wshift[j][0] && cent_dif <= wshift[j][n_shift1])
626
+ {
627
+ int shift_pos;
628
+ if (ori1 == 0)shift_pos = (int)(cent_win2 - cent_win1 - wshift[j][0]);//Direct12 Invert
629
+ else shift_pos = (int)(cent_win1 - cent_win2 - wshift[j][0]);//Direct21 Evert
630
+ tp_tot[j][ori_type][shift_pos]+=2;
631
+ tp[j][ori_type][shift_pos][k1]++;
632
+ tp[j][ori_type][shift_pos][k2]++;
633
+ }
634
+ else
635
+ {
636
+ double cent_diff = fabs(cent_dif);
637
+ if(cent_diff <= n_shift[j])
638
+ {
639
+ fp_tot[j] += 2;
640
+ fp[j][k1]++;
641
+ fp[j][k2]++;
642
+ }
643
+ }
644
+ }
645
+ }
646
+ }
647
+ }
648
+ }
649
+ }
650
+ }
651
+ FILE* out_hist;
652
+ FILE* out_prc;
653
+ out_hist = NULL;
654
+ out_prc = NULL;
655
+ if (yes_out_hist == 1)
656
+ {
657
+ if ((out_hist = fopen(file_hist, "wt")) == NULL)
658
+ {
659
+ printf("Output file can't be opened!\n");
660
+ return -1;
661
+ }
662
+ }
663
+ if (yes_out_prc == 1)
664
+ {
665
+ if ((out_prc = fopen(file_prc, "wt")) == NULL)
666
+ {
667
+ printf("Output file can't be opened!\n");
668
+ return -1;
669
+ }
670
+ }
671
+ for (j = 0; j < 3; j++)
672
+ {
673
+ if (yes_out_hist == 1)
674
+ {
675
+ if (j == 0)fprintf(out_hist, "Heterotypic\t");
676
+ else fprintf(out_hist, "Homotypic\t");
677
+ fprintf(out_hist, "Motif1\t%d\tMotif2\t%d\n", olen[kpairs[j][0]], olen[kpairs[j][1]]);
678
+ for (i = 0; i < n_shift[j]; i++)
679
+ {
680
+ fprintf(out_hist, "\t%.1f", wshift[j][i]);
681
+ }
682
+ fprintf(out_hist, "\n");
683
+ }
684
+ {
685
+ int del = n_shift[j] * 2;
686
+ fp_tot[j] /= del;
687
+ for (i = 0; i < nthr_dist_two; i++)fp[j][i] /= del;
688
+ }
689
+ /*printf("\n");
690
+ for (k = 0; k < 2; k++)
691
+ {
692
+ printf("%d TP\t",k);
693
+ for (m = 0; m < n_shift; m++)
694
+ {
695
+ printf("%.f ", tp_tot[k][m]);
696
+ }
697
+ printf("\t\t");
698
+ printf("%d FP\t", k);
699
+ //for (m = 0; m < n_shift; m++)
700
+ {
701
+ printf("%.f ", fp_tot);
702
+ }
703
+ printf("\n");
704
+ } */
705
+ for (k = 0; k < 2; k++)
706
+ {
707
+ if (yes_out_prc == 1)
708
+ {
709
+ if (j == 0)fprintf(out_prc, "Heterotypic\t");
710
+ else fprintf(out_prc, "Homotypic\t");
711
+ fprintf(out_prc, "Motif1\t%d\tMotif2\t%d\n", olen[kpairs[j][0]], olen[kpairs[j][1]]);
712
+ if (k == 0)
713
+ {
714
+ fprintf(out_hist, "Direct ShortLong,LongShort");
715
+ }
716
+ else
717
+ {
718
+ fprintf(out_hist, "Evered Inverted");
719
+ }
720
+ }
721
+ for (m = 0; m < n_shift[j]; m++)
722
+ {
723
+ if (yes_out_prc == 1)
724
+ {
725
+ if (k == 0)
726
+ {
727
+ if (wshift[j][m] < 0)fprintf(out_prc, "\tDirect ShortLong");
728
+ else
729
+ {
730
+ if (wshift[j][m] > 0)fprintf(out_prc, "\tDirect LongShort");
731
+ else fprintf(out_prc, "\tDirect Exact");
732
+ }
733
+ }
734
+ else
735
+ {
736
+ if (wshift[j][m] < 0)fprintf(out_prc, "\tEverted");
737
+ else
738
+ {
739
+ if (wshift[j][m] > 0)fprintf(out_prc, "\tInverted");
740
+ else fprintf(out_prc, "\tReverse Exact");
741
+ }
742
+ }
743
+ fprintf(out_prc, " %.1f\n", wshift[j][m]);
744
+ }
745
+ {
746
+ int nthr_dist_two1 = nthr_dist_two - 1;
747
+ double tpr_pred = 0, prec_pred = 1, tpr = 0;
748
+ double dtp = 0, dfp = 0;
749
+ int count_pr = 0, count_roc = 0;
750
+ double tp_sum = 0, fp_sum = 0;
751
+ if (yes_out_prc == 1)fprintf(out_prc, "%f\t%f\n", tpr_pred, prec_pred);
752
+ for (i = 0; i < nthr_dist_two; i++)
753
+ {
754
+ dtp += (double)tp[j][k][m][i];
755
+ dfp += fp[j][i];
756
+ if (dtp > 0 && (i == nthr_dist_two1 || errs[i + 1].err != errs[i].err))
757
+ {
758
+ tp_sum += dtp;
759
+ fp_sum += dfp;
760
+ double prec_cur = tp_sum / (tp_sum + fp_sum);
761
+ tpr = tp_sum / tp_tot[j][k][m];
762
+ double prec_av = (prec_pred + prec_cur) / 2;
763
+ double dauprc = dtp * prec_av / tp_tot[j][k][m];
764
+ if (yes_out_prc == 1)fprintf(out_prc, "%f\t%f\n", tpr, prec_cur);
765
+ prec_pred = prec_cur;
766
+ tpr_pred = tpr;
767
+ auprc[j][k][m] += dauprc;
768
+ dtp = 0;
769
+ dfp = 0;
770
+ count_pr++;
771
+ }
772
+ }
773
+ }
774
+ if (yes_out_hist == 1)fprintf(out_hist, "\t%f", auprc[j][k][m]);
775
+ }
776
+ if (yes_out_hist == 1)fprintf(out_hist, "\n");
777
+ }
778
+ }
779
+ if (yes_out_prc == 1)fclose(out_prc);
780
+ if (yes_out_hist == 1)fclose(out_hist);
781
+ double auprc_max[3][2] = { { -1,-1 },{ -1,-1 },{ -1,-1 } };
782
+ double auprc_ov[3][2] = { { -1,-1 },{ -1,-1 },{ -1,-1 } };
783
+ //int j_best[3][2] = { { 0,0 },{ 0,0 },{ 0,0 } };
784
+ //int j_ov[3][2] = { { 0,0 },{ 0,0 },{ 0,0 } };
785
+ char cepi[] = "+-";
786
+ for (j = 0; j < 3; j++)
787
+ {
788
+ for (i = 0; i < n_shift[j]; i++)
789
+ {
790
+ if (wshift[j][i] >= -wshift_ov[j] && wshift[j][i] <= wshift_ov[j])
791
+ {
792
+ for (k = 0; k < 2; k++)
793
+ {
794
+ if (auprc[j][k][i] > auprc_ov[j][k])
795
+ {
796
+ auprc_ov[j][k] = auprc[j][k][i];
797
+ strand_final_over[j] = cepi[k];
798
+ shift_final_over[j] = wshift[j][i];
799
+ auprc_final_over[j] = auprc[j][k][i];
800
+ }
801
+ }
802
+ }
803
+ }
804
+ for (i = 0; i < n_shift[j]; i++)
805
+ {
806
+ for (k = 0; k < 2; k++)
807
+ {
808
+ if (auprc[j][k][i] > auprc_max[j][k])
809
+ {
810
+ auprc_final_all[j] = auprc_max[j][k] = auprc[j][k][i];
811
+ strand_final_all[j] = cepi[k];
812
+ shift_final_all[j] = wshift[j][i];
813
+ }
814
+ }
815
+ }
816
+ }
817
+ /*for (j = 0; j < 3; j++)
818
+ {
819
+ auprc_final_all[j] = Max(auprc_max[j][0], auprc_max[j][1]);
820
+ auprc_final_over[j] = Max(auprc_ov[j][0], auprc_ov[j][1]);
821
+ if (auprc_max[j][0] >= auprc_max[j][1])
822
+ {
823
+ strand_final_all[j] = '+';
824
+ shift_final_all[j] = wshift[j][j_best[j][0]];
825
+ }
826
+ else
827
+ {
828
+ strand_final_all[j] = '-';
829
+ shift_final_all[j] = wshift[j][j_best[j][1]];
830
+ }
831
+ if (auprc_ov[j][0] >= auprc_ov[j][1])
832
+ {
833
+ strand_final_over[j] = '+';
834
+ shift_final_over[j] = wshift[j][j_ov[j][0]];
835
+ }
836
+ else
837
+ {
838
+ strand_final_over[j] = '-';
839
+ shift_final_over[j] = wshift[j][j_ov[j][1]];
840
+ }
841
+ }*/
842
+ double maxi = Max(auprc_final_over[1], auprc_final_over[2]);
843
+ auprc_final_over1 = auprc_final_over[0] / maxi;
844
+ if (auprc_final_over1 > 1)auprc_final_over1 = 1;
845
+ maxi = Max(auprc_final_all[1], auprc_final_all[2]);
846
+ auprc_final_all1 = auprc_final_all[0] / maxi;
847
+ if (auprc_final_all1 > 1)auprc_final_all1 = 1;
848
+ delete[] errs;
849
+ for (k = 0; k < 2; k++)
850
+ {
851
+ for (j = 0; j < 2; j++)
852
+ {
853
+ delete[] err_inx[k][j];
854
+ }
855
+ delete[] err_inx[k];
856
+ }
857
+ delete[] err_inx;
858
+ for (j = 0; j < 3; j++)
859
+ {
860
+ for (k = 0; k < 2; k++)
861
+ {
862
+ for (i = 0; i < n_shift[j]; i++)delete[] tp[j][k][i];
863
+ delete[] tp[j][k];
864
+ }
865
+ delete[] tp[j];
866
+ }
867
+ delete[] tp;
868
+ for (j = 0; j < 3; j++)delete[] fp[j];
869
+ delete[] fp;
870
+ for (j = 0; j < 3; j++)
871
+ {
872
+ for (k = 0; k < 2; k++)delete[] tp_tot[j][k];
873
+ delete[] tp_tot[j];
874
+ }
875
+ delete[] tp_tot;
876
+ for (j = 0; j < 3; j++)
877
+ {
878
+ for (k = 0; k < 2; k++)
879
+ {
880
+ delete[] auprc[j][k];
881
+ }
882
+ delete[] auprc[j];
883
+ }
884
+ delete[] auprc;
885
+ for (k = 0; k < 2; k++)
886
+ {
887
+ delete[] inx_self[k];
888
+ }
889
+ for (j = 0; j < 3; j++)delete[] wshift[j];
890
+ delete[] wshift;
891
+ return 1;
892
+ }
893
+ int UnderStol(char* str, int nstol, char* ret, size_t size, char sep)
894
+ {
895
+ memset(ret, 0, size);
896
+ int p1, p2, len;
897
+ if (nstol == 0)
898
+ {
899
+ p2 = StrNStr(str, sep, 1);
900
+ if (p2 == -1)p2 = strlen(str);
901
+ strncpy(ret, str, p2);
902
+ ret[p2] = '\0';
903
+ return 1;
904
+ }
905
+ else
906
+ {
907
+ p1 = StrNStr(str, sep, nstol);
908
+ p2 = StrNStr(str, sep, nstol + 1);
909
+ if (p2 == -1)
910
+ {
911
+ p2 = strlen(str);
912
+ }
913
+ if (p1 == -1 || p2 == -1) return -1;
914
+ len = p2 - p1 - 1;
915
+ strncpy(ret, &str[p1 + 1], len);
916
+ ret[len] = '\0';
917
+ return 1;
918
+ }
919
+ }
920
+ int main(int argc, char* argv[])
921
+ {
922
+ int i, k, mot;
923
+ char file_fasta[ARGLEN], file_model[2][ARGLEN], type_model[2][4], file_table[2][ARGLEN];
924
+ char file_hist[ARGLEN], file_prc[ARGLEN], file_short_all[ARGLEN], file_short_over[ARGLEN], file_sta_long[ARGLEN];
925
+ char*** seq;// peaks
926
+ double*** pwm;
927
+ city sta[2];
928
+ int model_type[2] = { -1,-1 };// 0 pwm 1 sga
929
+
930
+ if (argc != 17)
931
+ {
932
+ fprintf(stderr, "Syntax error: %s 1file_fasta 2motif1_type 3motif2_type 4file_motif1_matrix 5file_motif2_matrix 6file_motif1_table 7file_motif2_table 8int max_shift_of_motif_centers", argv[0]);
933
+ fprintf(stderr, "9double pvalue_thr 10file out_hist 11yes,0no out_hist 12file_out_prc 13int 1yes,0no out_prc 14file_out_short_over 15file_out_short_all 16file_out_sta_detailed\n");
934
+ return -1;
935
+ }
936
+ strcpy(file_fasta, argv[1]);
937
+ strcpy(type_model[0], argv[2]);//pwm or sga - type
938
+ strcpy(type_model[1], argv[3]);//pwm or sga - type
939
+ strcpy(file_model[0], argv[4]);//pwm or sga - matrix
940
+ strcpy(file_model[1], argv[5]);//pwm or sga - matrix
941
+ strcpy(file_table[0], argv[6]);//pwm or sga - thr err table
942
+ strcpy(file_table[1], argv[7]);//pwm or sga - thr err table
943
+ int shift = atoi(argv[8]); // shift of motifs
944
+ double pvalue = atof(argv[9]); //threshold of expected recogntion rate
945
+ double pvalue_lg = -log10(pvalue);
946
+ strcpy(file_hist, argv[10]);
947
+ int yes_out_hist = atoi(argv[11]);
948
+ strcpy(file_prc, argv[12]);
949
+ int yes_out_prc = atoi(argv[13]);
950
+ strcpy(file_short_over, argv[14]);
951
+ strcpy(file_short_all, argv[15]);
952
+ strcpy(file_sta_long, argv[16]);
953
+
954
+ {
955
+ char pwm1[] = "pwm", pwm2[] = "PWM", sga1[] = "sga", sga2[] = "SGA";
956
+ for (i = 0; i < 2; i++)
957
+ {
958
+ if (strcmp(type_model[i], pwm1) == 0 || strcmp(type_model[i], pwm2) == 0)
959
+ {
960
+ model_type[i] = 0;
961
+ }
962
+ if (strcmp(type_model[i], sga1) == 0 || strcmp(type_model[i], sga2) == 0)
963
+ {
964
+ model_type[i] = 1;
965
+ }
966
+
967
+ }
968
+ for (i = 0; i < 2; i++)
969
+ {
970
+ if (model_type[i] == -1)
971
+ {
972
+ printf("Model type %d %s is not recognized\n", i + 1, type_model[i]);
973
+ exit(1);
974
+ }
975
+ }
976
+
977
+ }
978
+ int length_fasta_max = 0, nseq_real = 0;
979
+ seq = NULL;
980
+ int ftp = fasta_to_plain0(file_fasta, length_fasta_max, nseq_real);
981
+ if (ftp == -1)
982
+ {
983
+ fprintf(stderr, "Error: Fasta file %s error\n", file_fasta);
984
+ return -1;
985
+ }
986
+ int* peak_len_real;
987
+ peak_len_real = new int[nseq_real];
988
+ if (peak_len_real == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
989
+
990
+ seq = new char** [2];
991
+ if (seq == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
992
+ for (k = 0; k < 2; k++)
993
+ {
994
+ seq[k] = new char* [nseq_real];
995
+ if (seq[k] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
996
+ for (i = 0; i < nseq_real; i++)
997
+ {
998
+ int length_fasta_max1 = length_fasta_max + 1;
999
+ seq[k][i] = new char[length_fasta_max1];
1000
+ if (seq[k][i] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
1001
+ memset(seq[k][i], '\0', length_fasta_max1);
1002
+ }
1003
+ }
1004
+ pwm = new double** [2];
1005
+ if (pwm == NULL) { fprintf(stderr, "PWM Out of memory..."); return -1; }
1006
+ for (k = 0; k < 2; k++)
1007
+ {
1008
+ pwm[k] = new double* [MATLEN];
1009
+ if (pwm[k] == NULL) { fprintf(stderr, "PWM Out of memory..."); return -1; }
1010
+ for (i = 0; i < MATLEN; i++)
1011
+ {
1012
+ pwm[k][i] = new double[OLIGNUM];
1013
+ if (pwm[k][i] == NULL) { fprintf(stderr, "PWM Out of memory..."); return -1; }
1014
+ }
1015
+ }
1016
+
1017
+ ftp = fasta_to_plain1(file_fasta, length_fasta_max, nseq_real, seq, peak_len_real);
1018
+ if (ftp == -1)
1019
+ {
1020
+ fprintf(stderr, "File %s error 2nd stage\n", file_fasta);
1021
+ return -1;
1022
+ }
1023
+ int olen[2];
1024
+ int nthr_dist[2];
1025
+ double min[2] = { 0,0 }, raz[2] = { 0,0 };
1026
+
1027
+ double** thr_all;
1028
+ thr_all = new double* [2];
1029
+ if (thr_all == NULL) { fprintf(stderr, "Thr_all Out of memory..."); return -1; }
1030
+ double** fpr_all;
1031
+ fpr_all = new double* [2];
1032
+ if (fpr_all == NULL) { fprintf(stderr, "Thr_all Out of memory..."); return -1; }
1033
+ for (mot = 0; mot < 2; mot++)
1034
+ {
1035
+ //printf("Mot %d\n", mot);
1036
+ nthr_dist[mot] = 0;
1037
+ FILE* in_tab;
1038
+ if ((in_tab = fopen(file_table[mot], "rt")) == NULL)
1039
+ {
1040
+ printf("Input file %s can't be opened!", file_table[mot]);
1041
+ return -1;
1042
+ }
1043
+ char d[ARGLEN];
1044
+ //fgets(d, sizeof(d), in_tab);//header
1045
+ while (fgets(d, sizeof(d), in_tab) != NULL)
1046
+ {
1047
+ char c = d[0];
1048
+ char sep = '\t';
1049
+ if (c == '-' || isdigit(c))
1050
+ {
1051
+ char s[30];
1052
+ int test = UnderStol(d, 1, s, sizeof(s), sep);
1053
+ if (test == -1) { printf("Wrong format %s\n", d); exit(1); }
1054
+ nthr_dist[mot]++;
1055
+ double fprx = atof(s);
1056
+ if (fprx < pvalue_lg)break;
1057
+ }
1058
+ }
1059
+ rewind(in_tab);
1060
+ thr_all[mot] = new double[nthr_dist[mot]];
1061
+ if (thr_all[mot] == NULL) { puts("thr_all Out of memory..."); return -1; }
1062
+ fpr_all[mot] = new double[nthr_dist[mot]];
1063
+ if (fpr_all[mot] == NULL) { puts("fpr_all Out of memory..."); return -1; }
1064
+ k = 0;
1065
+ while (fgets(d, sizeof(d), in_tab) != NULL)
1066
+ {
1067
+ char c = d[0];
1068
+ if (c == '-' || isdigit(c))
1069
+ {
1070
+ char s[30];
1071
+ char sep = '\t';
1072
+ int test = UnderStol(d, 1, s, sizeof(s), sep);
1073
+ if (test == -1) { printf("Wrong format %s\n", d); exit(1); }
1074
+ thr_all[mot][k] = atof(d);
1075
+ fpr_all[mot][k] = atof(s);
1076
+ if(fpr_all[mot][k] < pvalue_lg)break;
1077
+ k++;
1078
+ }
1079
+ }
1080
+ fclose(in_tab);
1081
+ }
1082
+ for (mot = 0; mot < 2; mot++)
1083
+ {
1084
+ //printf("Mot %d\n", mot);
1085
+ if (model_type[mot] == 0)
1086
+ {
1087
+ int test = pfm_to_pwm(file_model[mot], pwm[mot]);
1088
+ if (test == -1)return -1;
1089
+ else olen[mot] = test;
1090
+ PWMScore(pwm[mot], min[mot], raz[mot], olen[mot]);
1091
+ }
1092
+ else
1093
+ {
1094
+ if (sta[mot].get_file(file_model[mot]) == -1)
1095
+ {
1096
+ printf("Site %s function not found!", file_model[mot]);
1097
+ exit(1);
1098
+ }
1099
+ olen[mot] = sta[mot].len;
1100
+ }
1101
+ }
1102
+ double auprc_final_all[3], auprc_final_over[3];
1103
+ double auprc_final_over1 = 0, auprc_final_all1 = 0;
1104
+ char strand_final_all[4], strand_final_over[4];
1105
+ for (k = 0; k < 3; k++)strand_final_all[k] = strand_final_over[k] = '+';
1106
+ strand_final_all[3] = strand_final_over[3] = '\0';
1107
+ double shift_final_all[3] = {0,0,0}, shift_final_over[3] = { 0,0,0 };
1108
+ for (k = 0; k < 3; k++)shift_final_all[k] = shift_final_over[k] = 0;
1109
+ PWM_SGA_rec_real(pwm, min, raz, sta, model_type, nthr_dist, thr_all, fpr_all, seq, olen, nseq_real, shift, length_fasta_max, file_hist, file_prc, yes_out_hist,yes_out_prc,
1110
+ auprc_final_all, auprc_final_over, auprc_final_over1, auprc_final_all1, shift_final_all, shift_final_over, strand_final_all, strand_final_over);
1111
+ FILE* out_sta_long;
1112
+ if ((out_sta_long = fopen(file_sta_long, "at")) == NULL)
1113
+ {
1114
+ printf("Output file can't be opened!\n");
1115
+ exit(1);
1116
+ }
1117
+ fprintf(out_sta_long, "%s\t%s", file_model[0], file_model[1]);
1118
+ fprintf(out_sta_long, "\tOverlap\t%f", auprc_final_over1);
1119
+ fprintf(out_sta_long, "\tAll\t%f", auprc_final_all1);
1120
+ fprintf(out_sta_long, "\tHeterotypic");
1121
+ //overap
1122
+ fprintf(out_sta_long, "\t%f", auprc_final_over[0]);
1123
+ fprintf(out_sta_long, "\t%.1f", shift_final_over[0]);
1124
+ fprintf(out_sta_long, "\t%c", strand_final_over[0]);
1125
+ //all
1126
+ fprintf(out_sta_long, "\t%f", auprc_final_all[0]);
1127
+ fprintf(out_sta_long, "\t%.1f", shift_final_all[0]);
1128
+ fprintf(out_sta_long, "\t%c", strand_final_all[0]);
1129
+ fprintf(out_sta_long, "\tHomotypic");
1130
+ //overap
1131
+ for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%f", auprc_final_over[i]);
1132
+ for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%.1f", shift_final_over[i]);
1133
+ fprintf(out_sta_long, "\t");
1134
+ for (i = 1; i < 3; i++)fprintf(out_sta_long, "%c", strand_final_over[i]);
1135
+ //all
1136
+ for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%f", auprc_final_all[i]);
1137
+ for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%.1f", shift_final_all[i]);
1138
+ fprintf(out_sta_long, "\t");
1139
+ for (i = 1; i < 3; i++)fprintf(out_sta_long, "%c", strand_final_all[i]);
1140
+ fprintf(out_sta_long, "\n");
1141
+ fclose(out_sta_long);
1142
+ FILE* out_short_over;
1143
+ if ((out_short_over = fopen(file_short_over, "at")) == NULL)
1144
+ {
1145
+ printf("Output file can't be opened!\n");
1146
+ exit(1);
1147
+ }
1148
+ fprintf(out_short_over, "\t%f", auprc_final_over1);
1149
+ fclose(out_short_over);
1150
+
1151
+ FILE* out_short_all;
1152
+ if ((out_short_all = fopen(file_short_all, "at")) == NULL)
1153
+ {
1154
+ printf("Output file can't be opened!\n");
1155
+ exit(1);
1156
+ }
1157
+ fprintf(out_short_all, "\t%f", auprc_final_all1);
1158
+ fclose(out_short_all);
1159
+
1160
+ for (k = 0; k < 2; k++)
1161
+ {
1162
+ delete[] thr_all[k];
1163
+ }
1164
+ delete[] thr_all;
1165
+ for (k = 0; k < 2; k++)
1166
+ {
1167
+ delete[] fpr_all[k];
1168
+ }
1169
+ delete[] fpr_all;
1170
+ delete[] peak_len_real;
1171
+ for (k = 0; k < 2; k++)
1172
+ {
1173
+ for (i = 0; i < nseq_real; i++)
1174
+ {
1175
+ delete[] seq[k][i];
1176
+ }
1177
+ delete[] seq[k];
1178
+ }
1179
+ delete[] seq;
1180
+ for (k = 0; k < 2; k++)
1181
+ {
1182
+ for (i = 0; i < MATLEN; i++)
1183
+ {
1184
+ delete[] pwm[k][i];
1185
+ }
1186
+ delete[] pwm[k];
1187
+ }
1188
+ delete[] pwm;
1189
+ return 0;
1190
+ }
1191
+
1192
+ // Function to run the main computational logic
1193
+ extern "C" int run_motali_cpp(
1194
+ const char* file_fasta,
1195
+ const char* type_model_1,
1196
+ const char* type_model_2,
1197
+ const char* file_model_1,
1198
+ const char* file_model_2,
1199
+ const char* file_table_1,
1200
+ const char* file_table_2,
1201
+ int shift,
1202
+ double pvalue,
1203
+ const char* file_hist,
1204
+ int yes_out_hist,
1205
+ const char* file_prc,
1206
+ int yes_out_prc,
1207
+ const char* file_short_over,
1208
+ const char* file_short_all,
1209
+ const char* file_sta_long
1210
+ ) {
1211
+ int i, k, mot;
1212
+ char file_fasta_local[ARGLEN], file_model[2][ARGLEN], type_model[2][4], file_table[2][ARGLEN];
1213
+ char file_hist_local[ARGLEN], file_prc_local[ARGLEN], file_short_all_local[ARGLEN], file_short_over_local[ARGLEN], file_sta_long_local[ARGLEN];
1214
+ char*** seq;// peaks
1215
+ double*** pwm;
1216
+ city sta[2];
1217
+ int model_type[2] = { -1,-1 };// 0 pwm 1 sga
1218
+
1219
+ // Copy input parameters to local variables
1220
+ strcpy(file_fasta_local, file_fasta);
1221
+ strcpy(type_model[0], type_model_1);//pwm or sga - type
1222
+ strcpy(type_model[1], type_model_2);//pwm or sga - type
1223
+ strcpy(file_model[0], file_model_1);//pwm or sga - matrix
1224
+ strcpy(file_model[1], file_model_2);//pwm or sga - matrix
1225
+ strcpy(file_table[0], file_table_1);//pwm or sga - thr err table
1226
+ strcpy(file_table[1], file_table_2);//pwm or sga - thr err table
1227
+ strcpy(file_hist_local, file_hist);
1228
+ strcpy(file_prc_local, file_prc);
1229
+ strcpy(file_short_over_local, file_short_over);
1230
+ strcpy(file_short_all_local, file_short_all);
1231
+ strcpy(file_sta_long_local, file_sta_long);
1232
+
1233
+ double pvalue_lg = -log10(pvalue);
1234
+
1235
+ {
1236
+ char pwm1[] = "pwm", pwm2[] = "PWM", sga1[] = "sga", sga2[] = "SGA";
1237
+ for (i = 0; i < 2; i++)
1238
+ {
1239
+ if (strcmp(type_model[i], pwm1) == 0 || strcmp(type_model[i], pwm2) == 0)
1240
+ {
1241
+ model_type[i] = 0;
1242
+ }
1243
+ if (strcmp(type_model[i], sga1) == 0 || strcmp(type_model[i], sga2) == 0)
1244
+ {
1245
+ model_type[i] = 1;
1246
+ }
1247
+
1248
+ }
1249
+ for (i = 0; i < 2; i++)
1250
+ {
1251
+ if (model_type[i] == -1)
1252
+ {
1253
+ printf("Model type %d %s is not recognized\n", i + 1, type_model[i]);
1254
+ return -1; // Changed from exit(1) to return -1
1255
+ }
1256
+ }
1257
+
1258
+ }
1259
+ int length_fasta_max = 0, nseq_real = 0;
1260
+ seq = NULL;
1261
+ int ftp = fasta_to_plain0(file_fasta_local, length_fasta_max, nseq_real);
1262
+ if (ftp == -1)
1263
+ {
1264
+ fprintf(stderr, "Error: Fasta file %s error\n", file_fasta_local);
1265
+ return -1;
1266
+ }
1267
+ int* peak_len_real;
1268
+ peak_len_real = new int[nseq_real];
1269
+ if (peak_len_real == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
1270
+
1271
+ seq = new char** [2];
1272
+ if (seq == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
1273
+ for (k = 0; k < 2; k++)
1274
+ {
1275
+ seq[k] = new char* [nseq_real];
1276
+ if (seq[k] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
1277
+ for (i = 0; i < nseq_real; i++)
1278
+ {
1279
+ int length_fasta_max1 = length_fasta_max + 1;
1280
+ seq[k][i] = new char[length_fasta_max1];
1281
+ if (seq[k][i] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
1282
+ memset(seq[k][i], '\0', length_fasta_max1);
1283
+ }
1284
+ }
1285
+ pwm = new double** [2];
1286
+ if (pwm == NULL) { fprintf(stderr, "PWM Out of memory..."); return -1; }
1287
+ for (k = 0; k < 2; k++)
1288
+ {
1289
+ pwm[k] = new double* [MATLEN];
1290
+ if (pwm[k] == NULL) { fprintf(stderr, "PWM Out of memory..."); return -1; }
1291
+ for (i = 0; i < MATLEN; i++)
1292
+ {
1293
+ pwm[k][i] = new double[OLIGNUM];
1294
+ if (pwm[k][i] == NULL) { fprintf(stderr, "PWM Out of memory..."); return -1; }
1295
+ }
1296
+ }
1297
+
1298
+ ftp = fasta_to_plain1(file_fasta_local, length_fasta_max, nseq_real, seq, peak_len_real);
1299
+ if (ftp == -1)
1300
+ {
1301
+ fprintf(stderr, "File %s error 2nd stage\n", file_fasta_local);
1302
+ return -1;
1303
+ }
1304
+ int olen[2];
1305
+ int nthr_dist[2];
1306
+ double min[2] = { 0,0 }, raz[2] = { 0,0 };
1307
+
1308
+ double** thr_all;
1309
+ thr_all = new double* [2];
1310
+ if (thr_all == NULL) { fprintf(stderr, "Thr_all Out of memory..."); return -1; }
1311
+ double** fpr_all;
1312
+ fpr_all = new double* [2];
1313
+ if (fpr_all == NULL) { fprintf(stderr, "Thr_all Out of memory..."); return -1; }
1314
+ for (mot = 0; mot < 2; mot++)
1315
+ {
1316
+ //printf("Mot %d\n", mot);
1317
+ nthr_dist[mot] = 0;
1318
+ FILE* in_tab;
1319
+ if ((in_tab = fopen(file_table[mot], "rt")) == NULL)
1320
+ {
1321
+ printf("Input file %s can't be opened!", file_table[mot]);
1322
+ return -1;
1323
+ }
1324
+ char d[ARGLEN];
1325
+ //fgets(d, sizeof(d), in_tab);//header
1326
+ while (fgets(d, sizeof(d), in_tab) != NULL)
1327
+ {
1328
+ char c = d[0];
1329
+ char sep = '\t';
1330
+ if (c == '-' || isdigit(c))
1331
+ {
1332
+ char s[30];
1333
+ int test = UnderStol(d, 1, s, sizeof(s), sep);
1334
+ if (test == -1) { printf("Wrong format %s\n", d); return -1; } // Changed from exit(1) to return -1
1335
+ nthr_dist[mot]++;
1336
+ double fprx = atof(s);
1337
+ if (fprx < pvalue_lg)break;
1338
+ }
1339
+ }
1340
+ rewind(in_tab);
1341
+ thr_all[mot] = new double[nthr_dist[mot]];
1342
+ if (thr_all[mot] == NULL) { puts("thr_all Out of memory..."); return -1; }
1343
+ fpr_all[mot] = new double[nthr_dist[mot]];
1344
+ if (fpr_all[mot] == NULL) { puts("fpr_all Out of memory..."); return -1; }
1345
+ k = 0;
1346
+ while (fgets(d, sizeof(d), in_tab) != NULL)
1347
+ {
1348
+ char c = d[0];
1349
+ if (c == '-' || isdigit(c))
1350
+ {
1351
+ char s[30];
1352
+ char sep = '\t';
1353
+ int test = UnderStol(d, 1, s, sizeof(s), sep);
1354
+ if (test == -1) { printf("Wrong format %s\n", d); return -1; } // Changed from exit(1) to return -1
1355
+ thr_all[mot][k] = atof(d);
1356
+ fpr_all[mot][k] = atof(s);
1357
+ if(fpr_all[mot][k] < pvalue_lg)break;
1358
+ k++;
1359
+ }
1360
+ }
1361
+ fclose(in_tab);
1362
+ }
1363
+ for (mot = 0; mot < 2; mot++)
1364
+ {
1365
+ //printf("Mot %d\n", mot);
1366
+ if (model_type[mot] == 0)
1367
+ {
1368
+ int test = pfm_to_pwm(file_model[mot], pwm[mot]);
1369
+ if (test == -1)return -1;
1370
+ else olen[mot] = test;
1371
+ PWMScore(pwm[mot], min[mot], raz[mot], olen[mot]);
1372
+ }
1373
+ else
1374
+ {
1375
+ if (sta[mot].get_file(file_model[mot]) == -1)
1376
+ {
1377
+ printf("Site %s function not found!", file_model[mot]);
1378
+ return -1; // Changed from exit(1) to return -1
1379
+ }
1380
+ olen[mot] = sta[mot].len;
1381
+ }
1382
+ }
1383
+ double auprc_final_all[3], auprc_final_over[3];
1384
+ double auprc_final_over1 = 0, auprc_final_all1 = 0;
1385
+ char strand_final_all[4], strand_final_over[4];
1386
+ for (k = 0; k < 3; k++)strand_final_all[k] = strand_final_over[k] = '+';
1387
+ strand_final_all[3] = strand_final_over[3] = '\0';
1388
+ double shift_final_all[3] = {0,0,0}, shift_final_over[3] = { 0,0,0 };
1389
+ for (k = 0; k < 3; k++)shift_final_all[k] = shift_final_over[k] = 0;
1390
+ PWM_SGA_rec_real(pwm, min, raz, sta, model_type, nthr_dist, thr_all, fpr_all, seq, olen, nseq_real, shift, length_fasta_max, file_hist_local, file_prc_local, yes_out_hist, yes_out_prc,
1391
+ auprc_final_all, auprc_final_over, auprc_final_over1, auprc_final_all1, shift_final_all, shift_final_over, strand_final_all, strand_final_over);
1392
+
1393
+ // Output results to files
1394
+ FILE* out_sta_long;
1395
+ if ((out_sta_long = fopen(file_sta_long_local, "at")) == NULL)
1396
+ {
1397
+ printf("Output file can't be opened!\n");
1398
+ return -1; // Changed from exit(1) to return -1
1399
+ }
1400
+ fprintf(out_sta_long, "%s\t%s", file_model[0], file_model[1]);
1401
+ fprintf(out_sta_long, "\tOverlap\t%f", auprc_final_over1);
1402
+ fprintf(out_sta_long, "\tAll\t%f", auprc_final_all1);
1403
+ fprintf(out_sta_long, "\tHeterotypic");
1404
+ //overap
1405
+ fprintf(out_sta_long, "\t%f", auprc_final_over[0]);
1406
+ fprintf(out_sta_long, "\t%.1f", shift_final_over[0]);
1407
+ fprintf(out_sta_long, "\t%c", strand_final_over[0]);
1408
+ //all
1409
+ fprintf(out_sta_long, "\t%f", auprc_final_all[0]);
1410
+ fprintf(out_sta_long, "\t%.1f", shift_final_all[0]);
1411
+ fprintf(out_sta_long, "\t%c", strand_final_all[0]);
1412
+ fprintf(out_sta_long, "\tHomotypic");
1413
+ //overap
1414
+ for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%f", auprc_final_over[i]);
1415
+ for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%.1f", shift_final_over[i]);
1416
+ fprintf(out_sta_long, "\t");
1417
+ for (i = 1; i < 3; i++)fprintf(out_sta_long, "%c", strand_final_over[i]);
1418
+ //all
1419
+ for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%f", auprc_final_all[i]);
1420
+ for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%.1f", shift_final_all[i]);
1421
+ fprintf(out_sta_long, "\t");
1422
+ for (i = 1; i < 3; i++)fprintf(out_sta_long, "%c", strand_final_all[i]);
1423
+ fprintf(out_sta_long, "\n");
1424
+ fclose(out_sta_long);
1425
+
1426
+ FILE* out_short_over;
1427
+ if ((out_short_over = fopen(file_short_over_local, "at")) == NULL)
1428
+ {
1429
+ printf("Output file can't be opened!\n");
1430
+ return -1; // Changed from exit(1) to return -1
1431
+ }
1432
+ fprintf(out_short_over, "\t%f", auprc_final_over1);
1433
+ fclose(out_short_over);
1434
+
1435
+ FILE* out_short_all;
1436
+ if ((out_short_all = fopen(file_short_all_local, "at")) == NULL)
1437
+ {
1438
+ printf("Output file can't be opened!\n");
1439
+ return -1; // Changed from exit(1) to return -1
1440
+ }
1441
+ fprintf(out_short_all, "\t%f", auprc_final_all1);
1442
+ fclose(out_short_all);
1443
+
1444
+ // Cleanup allocated memory
1445
+ for (k = 0; k < 2; k++)
1446
+ {
1447
+ delete[] thr_all[k];
1448
+ }
1449
+ delete[] thr_all;
1450
+ for (k = 0; k < 2; k++)
1451
+ {
1452
+ delete[] fpr_all[k];
1453
+ }
1454
+ delete[] fpr_all;
1455
+ delete[] peak_len_real;
1456
+ for (k = 0; k < 2; k++)
1457
+ {
1458
+ for (i = 0; i < nseq_real; i++)
1459
+ {
1460
+ delete[] seq[k][i];
1461
+ }
1462
+ delete[] seq[k];
1463
+ }
1464
+ delete[] seq;
1465
+ for (k = 0; k < 2; k++)
1466
+ {
1467
+ for (i = 0; i < MATLEN; i++)
1468
+ {
1469
+ delete[] pwm[k][i];
1470
+ }
1471
+ delete[] pwm[k];
1472
+ }
1473
+ delete[] pwm;
1474
+
1475
+ return 0;
1476
+ }