yamcot 1.0.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yamcot/__init__.py +46 -0
- yamcot/_core/__init__.py +17 -0
- yamcot/_core/_core.cp310-win_amd64.pyd +0 -0
- yamcot/_core/bindings.cpp +28 -0
- yamcot/_core/core_functions.h +29 -0
- yamcot/_core/fasta_to_plain.h +182 -0
- yamcot/_core/mco_prc.cpp +1476 -0
- yamcot/_core/pfm_to_pwm.h +130 -0
- yamcot/cli.py +621 -0
- yamcot/comparison.py +1066 -0
- yamcot/execute.py +97 -0
- yamcot/functions.py +787 -0
- yamcot/io.py +522 -0
- yamcot/models.py +1161 -0
- yamcot/pipeline.py +402 -0
- yamcot/ragged.py +126 -0
- yamcot-1.0.0.dist-info/METADATA +433 -0
- yamcot-1.0.0.dist-info/RECORD +21 -0
- yamcot-1.0.0.dist-info/WHEEL +5 -0
- yamcot-1.0.0.dist-info/entry_points.txt +3 -0
- yamcot-1.0.0.dist-info/licenses/LICENSE +21 -0
yamcot/_core/mco_prc.cpp
ADDED
|
@@ -0,0 +1,1476 @@
|
|
|
1
|
+
#define _CRT_SECURE_NO_WARNINGS
|
|
2
|
+
|
|
3
|
+
#include <stdio.h>
|
|
4
|
+
#include <stdlib.h>
|
|
5
|
+
#include <string.h>
|
|
6
|
+
#include <math.h>
|
|
7
|
+
#include <time.h>
|
|
8
|
+
#include <ctype.h>
|
|
9
|
+
|
|
10
|
+
#define Min(a,b) ((a)>(b))? (b):(a);
|
|
11
|
+
#define Max(a,b) ((a)>(b))? (a):(b);
|
|
12
|
+
#define SEQLEN 12000
|
|
13
|
+
#define MATLEN 50 //max matrix length
|
|
14
|
+
#define SPACLEN 100 //max spacer length
|
|
15
|
+
#define ARGLEN 300 //max argv length
|
|
16
|
+
#define OLIGNUM 4// di 16 mono 4
|
|
17
|
+
#define DIM 100
|
|
18
|
+
|
|
19
|
+
struct qbs {
|
|
20
|
+
double err;//ERR score
|
|
21
|
+
// int m0;//for given ERR no. of better sites of this model
|
|
22
|
+
// int m1;//for given ERR no. of better sites of other model
|
|
23
|
+
int mod;// model 0 or 1
|
|
24
|
+
};
|
|
25
|
+
int compare_qbs(const void* X1, const void* X2)//decrease
|
|
26
|
+
{
|
|
27
|
+
struct qbs* S1 = (struct qbs*)X1;
|
|
28
|
+
struct qbs* S2 = (struct qbs*)X2;
|
|
29
|
+
if (S1->err - S2->err > 0)return -1;
|
|
30
|
+
if (S1->err - S2->err < 0)return 1;
|
|
31
|
+
return 0;
|
|
32
|
+
}
|
|
33
|
+
int StrNStr(char* str, char c, int n)
|
|
34
|
+
{
|
|
35
|
+
int i, len = (int)strlen(str);
|
|
36
|
+
int k = 0;
|
|
37
|
+
for (i = 0; i < len; i++)
|
|
38
|
+
{
|
|
39
|
+
if (str[i] == c)
|
|
40
|
+
{
|
|
41
|
+
k++;
|
|
42
|
+
if (k == n)return i;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return -1;
|
|
46
|
+
}
|
|
47
|
+
int StrEndNStr(char* str, char c, int n)
|
|
48
|
+
{
|
|
49
|
+
int i, len = (int)strlen(str);
|
|
50
|
+
int k = 0;
|
|
51
|
+
for (i = len - 1; i >= 0; i--)
|
|
52
|
+
{
|
|
53
|
+
if (str[i] == c)
|
|
54
|
+
{
|
|
55
|
+
k++;
|
|
56
|
+
if (k == n)return i;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return -1;
|
|
60
|
+
}
|
|
61
|
+
char* TransStr(char* d)
|
|
62
|
+
{
|
|
63
|
+
int i, c, lens;
|
|
64
|
+
lens = strlen(d);
|
|
65
|
+
for (i = 0; i < lens; i++)
|
|
66
|
+
{
|
|
67
|
+
c = int(d[i]);
|
|
68
|
+
if (c < 97) d[i] = char(c + 32);
|
|
69
|
+
//else break;
|
|
70
|
+
}
|
|
71
|
+
return(d);
|
|
72
|
+
}
|
|
73
|
+
char* TransStrBack(char* d)
|
|
74
|
+
{
|
|
75
|
+
int i, c, lens;
|
|
76
|
+
lens = strlen(d);
|
|
77
|
+
for (i = 0; i < lens; i++)
|
|
78
|
+
{
|
|
79
|
+
c = int(d[i]);
|
|
80
|
+
if (c >= 97) d[i] = char(c - 32);
|
|
81
|
+
//else break;
|
|
82
|
+
}
|
|
83
|
+
return(d);
|
|
84
|
+
}
|
|
85
|
+
void DelChar(char* str, char c)
|
|
86
|
+
{
|
|
87
|
+
int i, lens, size;
|
|
88
|
+
|
|
89
|
+
size = 0;
|
|
90
|
+
lens = (int)strlen(str);
|
|
91
|
+
for (i = 0; i < lens; i++)
|
|
92
|
+
{
|
|
93
|
+
if (str[i] != c)str[size++] = str[i];
|
|
94
|
+
}
|
|
95
|
+
str[size] = '\0';
|
|
96
|
+
}
|
|
97
|
+
int ComplStr(char* d)
|
|
98
|
+
{
|
|
99
|
+
char* d1;
|
|
100
|
+
int i, len;
|
|
101
|
+
len = strlen(d);
|
|
102
|
+
d1 = new char[len + 1];
|
|
103
|
+
if (d1 == NULL)
|
|
104
|
+
{
|
|
105
|
+
fprintf(stderr, "Cmpl: Out of memory...");
|
|
106
|
+
return 0;
|
|
107
|
+
}
|
|
108
|
+
strcpy(d1, d);
|
|
109
|
+
// memset(d,0,sizeof(d));
|
|
110
|
+
for (i = 0; i < len; i++)
|
|
111
|
+
{
|
|
112
|
+
switch (d1[len - i - 1])
|
|
113
|
+
{
|
|
114
|
+
case 'a': { d[i] = 't'; break; }
|
|
115
|
+
case 't': { d[i] = 'a'; break; }
|
|
116
|
+
case 'c': { d[i] = 'g'; break; }
|
|
117
|
+
case 'g': { d[i] = 'c'; break; }
|
|
118
|
+
case 'A': { d[i] = 'T'; break; }
|
|
119
|
+
case 'T': { d[i] = 'A'; break; }
|
|
120
|
+
case 'C': { d[i] = 'G'; break; }
|
|
121
|
+
case 'G': { d[i] = 'C'; break; }
|
|
122
|
+
case 'N': { d[i] = 'N'; break; }
|
|
123
|
+
case 'n': { d[i] = 'n'; break; }
|
|
124
|
+
default: d[i] = 'n';
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
delete[] d1;
|
|
128
|
+
return 1;
|
|
129
|
+
}
|
|
130
|
+
int UnderStolStr(char* str, int nstol, char* ret, size_t size, char sep)
|
|
131
|
+
{
|
|
132
|
+
memset(ret, '\0', size);
|
|
133
|
+
int p1, p2, len;
|
|
134
|
+
if (nstol == 0)
|
|
135
|
+
{
|
|
136
|
+
p2 = StrNStr(str, sep, 1);
|
|
137
|
+
if (p2 == -1)p2 = strlen(str);
|
|
138
|
+
strncpy(ret, str, p2);
|
|
139
|
+
ret[p2] = '\0';
|
|
140
|
+
return 1;
|
|
141
|
+
}
|
|
142
|
+
else
|
|
143
|
+
{
|
|
144
|
+
p1 = StrNStr(str, sep, nstol);
|
|
145
|
+
p2 = StrNStr(str, sep, nstol + 1);
|
|
146
|
+
if (p2 == -1)
|
|
147
|
+
{
|
|
148
|
+
p2 = strlen(str);
|
|
149
|
+
}
|
|
150
|
+
if (p1 == -1 || p2 == -1) return -1;
|
|
151
|
+
len = p2 - p1 - 1;
|
|
152
|
+
strncpy(ret, &str[p1 + 1], len);
|
|
153
|
+
ret[len] = '\0';
|
|
154
|
+
return 1;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
#include "fasta_to_plain.h"
|
|
158
|
+
#include "pfm_to_pwm.h"
|
|
159
|
+
//#include "pwm_rec.h"
|
|
160
|
+
//#include "pfm_similarity.h"
|
|
161
|
+
|
|
162
|
+
struct due {
|
|
163
|
+
double buf;
|
|
164
|
+
int sta;
|
|
165
|
+
int end;
|
|
166
|
+
int num;
|
|
167
|
+
void get_copy(due* a);
|
|
168
|
+
// void print_all(void);
|
|
169
|
+
};
|
|
170
|
+
void due::get_copy(due* a)
|
|
171
|
+
{
|
|
172
|
+
a->num = num;
|
|
173
|
+
a->sta = sta;
|
|
174
|
+
a->buf = buf;
|
|
175
|
+
a->end = end;
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
//set of dinucleotides
|
|
179
|
+
struct city {
|
|
180
|
+
char site[300];
|
|
181
|
+
int size;
|
|
182
|
+
int len;
|
|
183
|
+
double min;
|
|
184
|
+
double raz;
|
|
185
|
+
struct due tot[DIM];
|
|
186
|
+
void get_copy(city* a);
|
|
187
|
+
void sort_all(void);
|
|
188
|
+
int get_file(char* file);
|
|
189
|
+
//void city::fprint_tab(char *file);
|
|
190
|
+
}sta;
|
|
191
|
+
int city::get_file(char* file)
|
|
192
|
+
{
|
|
193
|
+
FILE* in;
|
|
194
|
+
if ((in = fopen(file, "rt")) == NULL)
|
|
195
|
+
{
|
|
196
|
+
printf("Input file %s can't be opened!", file);
|
|
197
|
+
return -1;
|
|
198
|
+
}
|
|
199
|
+
char d[300];
|
|
200
|
+
fgets(d, sizeof(d), in);
|
|
201
|
+
DelChar(d,'\n');
|
|
202
|
+
strcpy(site, d);
|
|
203
|
+
fgets(d, sizeof(d), in);
|
|
204
|
+
size = atoi(d);
|
|
205
|
+
fgets(d, sizeof(d), in);
|
|
206
|
+
len = atoi(d);
|
|
207
|
+
fgets(d, sizeof(d), in);
|
|
208
|
+
min = atof(d);
|
|
209
|
+
fgets(d, sizeof(d), in);
|
|
210
|
+
raz = atof(d);
|
|
211
|
+
char sep = '\t', s[30];
|
|
212
|
+
int i, test;
|
|
213
|
+
for (i = 0; i < size; i++)
|
|
214
|
+
{
|
|
215
|
+
fgets(d, sizeof(d), in);
|
|
216
|
+
tot[i].sta = atoi(d);
|
|
217
|
+
test = UnderStolStr(d, 1, s, sizeof(s), sep);
|
|
218
|
+
if (test == -1) { printf("Wrong format %s\n", d); return(-1); }
|
|
219
|
+
tot[i].end = atoi(s);
|
|
220
|
+
test = UnderStolStr(d, 2, s, sizeof(s), sep);
|
|
221
|
+
if (test == -1) { printf("Wrong format %s\n", d); return(-1); }
|
|
222
|
+
tot[i].buf = atof(s);
|
|
223
|
+
test = UnderStolStr(d, 3, s, sizeof(s), sep);
|
|
224
|
+
if (test == -1) { printf("Wrong format %s\n", d); return(-1); }
|
|
225
|
+
tot[i].num = atoi(s);
|
|
226
|
+
}
|
|
227
|
+
fclose(in);
|
|
228
|
+
return 1;
|
|
229
|
+
}
|
|
230
|
+
void city::get_copy(city* a)
|
|
231
|
+
{
|
|
232
|
+
strcpy(a->site, site);
|
|
233
|
+
a->size = size;
|
|
234
|
+
a->min = min;
|
|
235
|
+
a->len = len;
|
|
236
|
+
a->raz = raz;
|
|
237
|
+
int i;
|
|
238
|
+
for (i = 0; i < size; i++)
|
|
239
|
+
{
|
|
240
|
+
tot[i].get_copy(&a->tot[i]);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
int compare_due(const void* X1, const void* X2)
|
|
244
|
+
{
|
|
245
|
+
struct due* S1 = (struct due*)X1;
|
|
246
|
+
struct due* S2 = (struct due*)X2;
|
|
247
|
+
if (S1->sta - S2->sta > 0)return 1;
|
|
248
|
+
if (S1->sta - S2->sta < 0)return -1;
|
|
249
|
+
if (S1->end - S2->end > 0)return 1;
|
|
250
|
+
if (S1->end - S2->end < 0)return -1;
|
|
251
|
+
if (S1->num - S2->num > 0)return 1;
|
|
252
|
+
if (S1->num - S2->num < 0)return -1;
|
|
253
|
+
return 0;
|
|
254
|
+
}
|
|
255
|
+
void city::sort_all(void)
|
|
256
|
+
{
|
|
257
|
+
qsort((void*)tot, size, sizeof(tot[0]), compare_due);
|
|
258
|
+
}
|
|
259
|
+
int IdeLet(char c)
|
|
260
|
+
{
|
|
261
|
+
int ret;
|
|
262
|
+
switch (c) {
|
|
263
|
+
case 'a': ret = 0; break;
|
|
264
|
+
case 'c': ret = 1; break;
|
|
265
|
+
case 'g': ret = 2; break;
|
|
266
|
+
case 't': ret = 3; break;
|
|
267
|
+
case 'n': ret = -1; break;
|
|
268
|
+
default: ret = -2;
|
|
269
|
+
}
|
|
270
|
+
return(ret);
|
|
271
|
+
}
|
|
272
|
+
// ras4et 4asot oligonukleotidov po stroke (zdes' - nukleotidov)
|
|
273
|
+
void GetSostPro(char* d, int word, int* sost)
|
|
274
|
+
{
|
|
275
|
+
int i, j, k, i_sost, let;
|
|
276
|
+
char letter[] = "acgt";
|
|
277
|
+
int ten[6] = { 1, 4, 16, 64, 256, 1024 };
|
|
278
|
+
int lens = strlen(d);
|
|
279
|
+
int size = 1;
|
|
280
|
+
for (k = 0; k < word; k++)size *= 4;
|
|
281
|
+
for (i = 0; i < size; i++)sost[i] = 0;
|
|
282
|
+
for (i = 0; i < lens - word + 1; i++)
|
|
283
|
+
{
|
|
284
|
+
i_sost = 0;
|
|
285
|
+
let = -1;
|
|
286
|
+
for (j = word - 1; j >= 0; j--)
|
|
287
|
+
{
|
|
288
|
+
for (k = 0; k < 4; k++)
|
|
289
|
+
{
|
|
290
|
+
if (d[i + j] == letter[k]) { let = k; break; }
|
|
291
|
+
}
|
|
292
|
+
i_sost += ten[word - 1 - j] * let;
|
|
293
|
+
}
|
|
294
|
+
sost[i] = i_sost;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
void PWMScore(double **pwm, double& min, double& raz, int len1)
|
|
298
|
+
{
|
|
299
|
+
int i, j;
|
|
300
|
+
for (i = 0; i < len1; i++)
|
|
301
|
+
{
|
|
302
|
+
double pwmmin = 100;
|
|
303
|
+
double pwmmax = -100;
|
|
304
|
+
for (j = 0; j < OLIGNUM; j++)
|
|
305
|
+
{
|
|
306
|
+
if (pwm[i][j] < pwmmin)pwmmin = pwm[i][j];
|
|
307
|
+
if (pwm[i][j] > pwmmax)pwmmax = pwm[i][j];
|
|
308
|
+
}
|
|
309
|
+
raz += pwmmax;
|
|
310
|
+
min += pwmmin;
|
|
311
|
+
}
|
|
312
|
+
raz -= min;
|
|
313
|
+
}
|
|
314
|
+
int PWM_SGA_rec_real(double ***pwm, double min[2], double raz[2], city sta[2], int model_type[2], int nthr_dist[2], double** thr_all, double** fpr_all, char*** seq, int olen[2], int nseq,
|
|
315
|
+
int shift, int length_fasta_max, char *file_hist, char* file_prc, int yes_out_hist, int yes_out_prc,double *auprc_final_all, double *auprc_final_over,
|
|
316
|
+
double &auprc_final_over1,double &auprc_final_all1,double *shift_final_all,double *shift_final_over,char *strand_final_all,char *strand_final_over)
|
|
317
|
+
{
|
|
318
|
+
int i, j, k, n, m;
|
|
319
|
+
int compl1, kmin, kmax;
|
|
320
|
+
int cod[MATLEN];
|
|
321
|
+
char d[MATLEN];
|
|
322
|
+
int word = 1;
|
|
323
|
+
int nthr_dist1[2], olen1[2];
|
|
324
|
+
double thr_cr[2];
|
|
325
|
+
int nthr_dist_two = nthr_dist[0] + nthr_dist[1];
|
|
326
|
+
if (olen[0] < olen[1]) { kmin = 0; kmax = 1; }
|
|
327
|
+
else { kmin = 1; kmax = 0; }
|
|
328
|
+
double wshift_ov[3];
|
|
329
|
+
int n_shift[3];
|
|
330
|
+
for (i = 0; i < 3; i++) n_shift[i] = 2 * shift;
|
|
331
|
+
int j_ov1[3], j_ov2[3];
|
|
332
|
+
int kpairs[3][2];
|
|
333
|
+
kpairs[0][0] = kpairs[1][0] = kpairs[1][1] = kmax;
|
|
334
|
+
kpairs[0][1] = kpairs[2][0] = kpairs[2][1] = kmin;
|
|
335
|
+
double** wshift;
|
|
336
|
+
wshift = new double* [3];
|
|
337
|
+
if (wshift == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
|
|
338
|
+
for(i = 0; i < 3; i++)
|
|
339
|
+
{
|
|
340
|
+
int kk[2];
|
|
341
|
+
int chet[2];
|
|
342
|
+
for (k = 0; k < 2; k++)
|
|
343
|
+
{
|
|
344
|
+
kk[k] = kpairs[i][k];
|
|
345
|
+
if (olen[kk[k]] % 2 == 0)chet[k] = 1;//4etno
|
|
346
|
+
else chet[k] = 0;//ne4etno
|
|
347
|
+
}
|
|
348
|
+
if (chet[0] == chet[1])n_shift[i]++;
|
|
349
|
+
if (chet[0] == chet[1])// -1 0 +1
|
|
350
|
+
{
|
|
351
|
+
if (chet[0] == 0)wshift_ov[i] = (double)(olen[kk[0]] - 1) / 2;
|
|
352
|
+
else wshift_ov[i] = (double)olen[kk[0]] / 2;
|
|
353
|
+
}
|
|
354
|
+
else //-0.5 +0.5
|
|
355
|
+
{
|
|
356
|
+
if (chet[kk[0]] == 1)wshift_ov[i] = (double)olen[kk[0]] / 2 - 0.5;
|
|
357
|
+
else wshift_ov[i] = (double)(olen[kk[0]] - 1) / 2 + 0.5;
|
|
358
|
+
}
|
|
359
|
+
//shift = n_shift / 2;
|
|
360
|
+
wshift[i] = new double[n_shift[i]];
|
|
361
|
+
if (wshift[i] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
|
|
362
|
+
if (chet[0] == chet[1])
|
|
363
|
+
{
|
|
364
|
+
wshift[i][0] = -shift;
|
|
365
|
+
}
|
|
366
|
+
else
|
|
367
|
+
{
|
|
368
|
+
wshift[i][0] = 0.5 - shift;
|
|
369
|
+
}
|
|
370
|
+
for (k = 1; k < n_shift[i]; k++)wshift[i][k] = wshift[i][k - 1] + 1;
|
|
371
|
+
for (k = 0; k < n_shift[i]; k++)
|
|
372
|
+
{
|
|
373
|
+
if (wshift[i][k] == -wshift_ov[i])
|
|
374
|
+
{
|
|
375
|
+
j_ov1[i] = k; continue;
|
|
376
|
+
}
|
|
377
|
+
if (wshift[i][k] == wshift_ov[i])
|
|
378
|
+
{
|
|
379
|
+
j_ov2[i] = k; break;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
double*** auprc;
|
|
384
|
+
auprc = new double** [3];
|
|
385
|
+
for (j = 0; j < 3; j++)
|
|
386
|
+
{
|
|
387
|
+
auprc[j] = new double* [2];
|
|
388
|
+
if (auprc[j] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
|
|
389
|
+
for (k = 0; k < 2; k++)
|
|
390
|
+
{
|
|
391
|
+
auprc[j][k] = new double[n_shift[j]];
|
|
392
|
+
if (auprc[j][k] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
for (j = 0; j < 3; j++)for (k = 0; k < 2; k++)for (m = 0; m < n_shift[j]; m++)auprc[j][k][m] = 0;
|
|
396
|
+
double half_win[2];//double shift of center relative to start
|
|
397
|
+
for (k = 0; k < 2; k++)half_win[k] = ((double)olen[k] - 1) / 2;
|
|
398
|
+
int* inx_self[2];
|
|
399
|
+
for (k = 0; k < 2; k++)
|
|
400
|
+
{
|
|
401
|
+
inx_self[k] = new int[nthr_dist[k]];
|
|
402
|
+
if (inx_self[k] == NULL) { fprintf(stderr, "Inx_self Out of memory..."); return -1; }
|
|
403
|
+
}
|
|
404
|
+
for (k = 0; k < 2; k++)for (i = 0; i < nthr_dist[k]; i++)inx_self[k][i] = nthr_dist_two;
|
|
405
|
+
qbs* errs;
|
|
406
|
+
errs = new qbs[nthr_dist_two];
|
|
407
|
+
if (errs == NULL) { fprintf(stderr, "Inx_self Out of memory..."); return -1; }
|
|
408
|
+
j = 0;
|
|
409
|
+
for (k = 0; k < 2; k++)
|
|
410
|
+
{
|
|
411
|
+
for (i = 0; i < nthr_dist[k]; i++)
|
|
412
|
+
{
|
|
413
|
+
errs[j].err = fpr_all[k][i];
|
|
414
|
+
// errs[j].m0 = i;
|
|
415
|
+
errs[j].mod = k;
|
|
416
|
+
j++;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
qsort(errs, nthr_dist_two, sizeof(errs[0]), compare_qbs);
|
|
420
|
+
{
|
|
421
|
+
int cou[2];
|
|
422
|
+
for (k = 0; k < 2; k++)cou[k] = 0;
|
|
423
|
+
for (k = 0; k < nthr_dist_two; k++)
|
|
424
|
+
{
|
|
425
|
+
int model = errs[k].mod;
|
|
426
|
+
//int model1 = 1 - errs[k].mod;
|
|
427
|
+
inx_self[model][cou[model]] = k;
|
|
428
|
+
//inx_cross[model1][cou[model1]] = k;
|
|
429
|
+
cou[model]++;
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
int **** tp;
|
|
433
|
+
double ** fp;
|
|
434
|
+
tp = new int*** [3];
|
|
435
|
+
if (tp == NULL) { fprintf(stderr, "TP Out of memory..."); return -1; }
|
|
436
|
+
|
|
437
|
+
for (j = 0; j < 3; j++)
|
|
438
|
+
{
|
|
439
|
+
tp[j] = new int** [2];
|
|
440
|
+
if (tp[j] == NULL) { puts("TP Out of memory..."); return -1; }
|
|
441
|
+
|
|
442
|
+
for (k = 0; k < 2; k++)
|
|
443
|
+
{
|
|
444
|
+
tp[j][k] = new int* [n_shift[j]];
|
|
445
|
+
if (tp[j][k] == NULL) { puts("TP Out of memory..."); return -1; } // Исправлено
|
|
446
|
+
|
|
447
|
+
for (i = 0; i < n_shift[j]; i++)
|
|
448
|
+
{
|
|
449
|
+
tp[j][k][i] = new int[nthr_dist_two];
|
|
450
|
+
if (tp[j][k][i] == NULL) { fprintf(stderr, "TP Out of memory..."); return -1; }
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
for (j = 0; j < 3; j++)for (k = 0; k < 2; k++)for (i = 0; i < n_shift[j]; i++)for (m = 0; m < nthr_dist_two; m++)tp[j][k][i][m] = 0;
|
|
455
|
+
fp = new double* [3];
|
|
456
|
+
if (fp == NULL) { fprintf(stderr, "FP Out of memory..."); return -1; }
|
|
457
|
+
for (j = 0; j < 3; j++)
|
|
458
|
+
{
|
|
459
|
+
fp[j] = new double[nthr_dist_two];
|
|
460
|
+
if (fp[j] == NULL) { fprintf(stderr, "FP Out of memory..."); return -1; }
|
|
461
|
+
}
|
|
462
|
+
for (j = 0; j < 3; j++)for (m = 0; m < nthr_dist_two; m++)fp[j][m] = 0;
|
|
463
|
+
int*** tp_tot;
|
|
464
|
+
double fp_tot[3] = { 0,0,0 };
|
|
465
|
+
tp_tot = new int** [3];
|
|
466
|
+
if (tp_tot == NULL) { fprintf(stderr, "TP_tot Out of memory..."); return -1; }
|
|
467
|
+
for (j = 0; j < 3; j++)
|
|
468
|
+
{
|
|
469
|
+
tp_tot[j] = new int* [2];
|
|
470
|
+
if (tp_tot[j] == NULL) { puts("TP_tot Out of memory..."); return -1; }
|
|
471
|
+
for (k = 0; k < 2; k++)
|
|
472
|
+
{
|
|
473
|
+
tp_tot[j][k] = new int[n_shift[j]];
|
|
474
|
+
if (tp_tot[j][k] == NULL) { puts("TP_tot Out of memory..."); return -1; }
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
for (j = 0; j < 3; j++) for (k = 0; k < 2; k++)for (i = 0; i < n_shift[j]; i++)tp_tot[j][k][i] = 0;
|
|
478
|
+
int*** err_inx;
|
|
479
|
+
err_inx = new int** [2];
|
|
480
|
+
if (err_inx == NULL) { puts("Err_inx Out of memory..."); return -1; }
|
|
481
|
+
for (k = 0; k < 2; k++)
|
|
482
|
+
{
|
|
483
|
+
err_inx[k] = new int* [2];
|
|
484
|
+
if (err_inx[k] == NULL) { puts("Err_inx Out of memory..."); return -1; }
|
|
485
|
+
for (i = 0; i < 2; i++)
|
|
486
|
+
{
|
|
487
|
+
err_inx[k][i] = new int[length_fasta_max];
|
|
488
|
+
if (err_inx[k][i] == NULL) { fprintf(stderr, "Err_inx Out of memory..."); return -1; }
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
for (k = 0; k < 2; k++)for (i = 0; i < 2; i++)for (j = 0; j < length_fasta_max; j++)err_inx[k][i][j] = nthr_dist_two;
|
|
492
|
+
for (n = 0; n < 2; n++)
|
|
493
|
+
{
|
|
494
|
+
nthr_dist1[n] = nthr_dist[n] - 1;
|
|
495
|
+
olen1[n] = olen[n] - 1;
|
|
496
|
+
thr_cr[n] = thr_all[n][nthr_dist1[n]];
|
|
497
|
+
}
|
|
498
|
+
for (n = 0; n < nseq; n++)
|
|
499
|
+
{
|
|
500
|
+
//if ((n + 1) % 100 == 0)printf("\b\b\b\b\b\b\b%7d", n + 1);
|
|
501
|
+
int len_pro1 = strlen(seq[0][n]);
|
|
502
|
+
for (k = 0; k < 2; k++)
|
|
503
|
+
{
|
|
504
|
+
int len21 = len_pro1 - olen[k];
|
|
505
|
+
// for (j = 0; j < 2; j++)for (i = 0; i <= len21; i++)err_inx[k][j][i] = nthr_dist_two;
|
|
506
|
+
for (i = 0; i <= len21; i++)
|
|
507
|
+
{
|
|
508
|
+
int index = nthr_dist_two;
|
|
509
|
+
double sco2 = 0;
|
|
510
|
+
int gom = 0;
|
|
511
|
+
for (compl1 = 0; compl1 < 2; compl1++)
|
|
512
|
+
{
|
|
513
|
+
int ista;
|
|
514
|
+
if (compl1 == 0)ista = i;
|
|
515
|
+
else ista = len21 - i;
|
|
516
|
+
strncpy(d, &seq[compl1][n][ista], olen[k]);
|
|
517
|
+
d[olen[k]] = '\0';
|
|
518
|
+
if (strstr(d, "n") != NULL) { gom = 1; break; }
|
|
519
|
+
double score = 0;
|
|
520
|
+
if (model_type[k] == 0)
|
|
521
|
+
{
|
|
522
|
+
GetSostPro(d, word, cod);
|
|
523
|
+
for (j = 0; j < olen[k]; j++)
|
|
524
|
+
{
|
|
525
|
+
score += pwm[k][j][cod[j]];
|
|
526
|
+
}
|
|
527
|
+
score -= min[k];
|
|
528
|
+
score /= raz[k];
|
|
529
|
+
}
|
|
530
|
+
else
|
|
531
|
+
{
|
|
532
|
+
for (j = 0; j < sta[k].size; j++)
|
|
533
|
+
{
|
|
534
|
+
int rlenj = (sta[k].tot[j].end - sta[k].tot[j].sta + 1);
|
|
535
|
+
double fm = 0;
|
|
536
|
+
for (m = sta[k].tot[j].sta; m <= sta[k].tot[j].end; m++)
|
|
537
|
+
{
|
|
538
|
+
int cod = 4 * IdeLet(d[m]) + IdeLet(d[m + 1]);
|
|
539
|
+
if (sta[k].tot[j].num == cod) { fm++; }
|
|
540
|
+
}
|
|
541
|
+
if (fm != 0)
|
|
542
|
+
{
|
|
543
|
+
fm /= rlenj;
|
|
544
|
+
score += sta[k].tot[j].buf * fm;
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
score -= sta[k].min;
|
|
548
|
+
score /= sta[k].raz;
|
|
549
|
+
}
|
|
550
|
+
if (score > sco2)sco2 = score;
|
|
551
|
+
if (gom == 0)
|
|
552
|
+
{
|
|
553
|
+
if (sco2 >= thr_cr[k])
|
|
554
|
+
{
|
|
555
|
+
if (sco2 >= thr_all[k][0])
|
|
556
|
+
{
|
|
557
|
+
index = 0;
|
|
558
|
+
//break;
|
|
559
|
+
}
|
|
560
|
+
else
|
|
561
|
+
{
|
|
562
|
+
for (j = 1; j < nthr_dist[k]; j++)
|
|
563
|
+
{
|
|
564
|
+
if (sco2 >= thr_all[k][j] && sco2 < thr_all[k][j - 1])
|
|
565
|
+
{
|
|
566
|
+
index = j;
|
|
567
|
+
break;
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
//if (index == 0)break;
|
|
573
|
+
err_inx[k][compl1][i] = index;
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
int len21[2];
|
|
579
|
+
for (k = 0; k < 2; k++)
|
|
580
|
+
{
|
|
581
|
+
len21[k] = len_pro1 - olen[k];
|
|
582
|
+
}
|
|
583
|
+
/*for (k = 0; k < 2; k++)
|
|
584
|
+
{
|
|
585
|
+
printf("%d\n",k);
|
|
586
|
+
for (i = 0; i <= len21[k]; i++)
|
|
587
|
+
{
|
|
588
|
+
for (j = 0; j < 2; j++)if(err_inx[k][j][i]<nthr_dist_two)printf("%.1f_%d_%d ",half_win[j] + i, j, err_inx[k][j][i]);
|
|
589
|
+
if ((i + 1) % 200 == 0)printf("\n");
|
|
590
|
+
}
|
|
591
|
+
printf("\n");
|
|
592
|
+
} */
|
|
593
|
+
for(j = 0; j < 3 ; j++)
|
|
594
|
+
{
|
|
595
|
+
int n_shift1 = n_shift[j] - 1;
|
|
596
|
+
int j1 = kpairs[j][0];
|
|
597
|
+
int j2 = kpairs[j][1];
|
|
598
|
+
for (m = 0; m <= len21[j1]; m++)
|
|
599
|
+
{
|
|
600
|
+
{
|
|
601
|
+
double cent_win1 = m + half_win[j1];
|
|
602
|
+
int inx1[2];
|
|
603
|
+
for(k = 0; k < 2 ; k++)inx1[k] = err_inx[j1][k][m];
|
|
604
|
+
int mini1 = Min(inx1[0], inx1[1]);
|
|
605
|
+
if (mini1 == nthr_dist_two)continue;
|
|
606
|
+
for (i = 0; i <= len21[j2]; i++)
|
|
607
|
+
{
|
|
608
|
+
int inx2[2];
|
|
609
|
+
for (k = 0; k < 2; k++)inx2[k] = err_inx[j2][k][i];
|
|
610
|
+
int mini2 = Min(inx2[0], inx2[1]);
|
|
611
|
+
if (mini2 == nthr_dist_two)continue;
|
|
612
|
+
double cent_win2 = i + half_win[j2];
|
|
613
|
+
double cent_dif = cent_win2 - cent_win1;
|
|
614
|
+
int ori1, ori2;
|
|
615
|
+
for (ori1 = 0; ori1 < 2; ori1++)
|
|
616
|
+
{
|
|
617
|
+
for (ori2 = 0; ori2 < 2; ori2++)
|
|
618
|
+
{
|
|
619
|
+
if (inx1[ori1] == nthr_dist_two || inx2[ori2] == nthr_dist_two)continue;
|
|
620
|
+
int ori_type;
|
|
621
|
+
if (ori1 == ori2)ori_type = 0;//Direct
|
|
622
|
+
else ori_type = 1; //Invert or Evert
|
|
623
|
+
int k1 = inx_self[j1][inx1[ori1]];
|
|
624
|
+
int k2 = inx_self[j2][inx2[ori2]];
|
|
625
|
+
if (cent_dif >= wshift[j][0] && cent_dif <= wshift[j][n_shift1])
|
|
626
|
+
{
|
|
627
|
+
int shift_pos;
|
|
628
|
+
if (ori1 == 0)shift_pos = (int)(cent_win2 - cent_win1 - wshift[j][0]);//Direct12 Invert
|
|
629
|
+
else shift_pos = (int)(cent_win1 - cent_win2 - wshift[j][0]);//Direct21 Evert
|
|
630
|
+
tp_tot[j][ori_type][shift_pos]+=2;
|
|
631
|
+
tp[j][ori_type][shift_pos][k1]++;
|
|
632
|
+
tp[j][ori_type][shift_pos][k2]++;
|
|
633
|
+
}
|
|
634
|
+
else
|
|
635
|
+
{
|
|
636
|
+
double cent_diff = fabs(cent_dif);
|
|
637
|
+
if(cent_diff <= n_shift[j])
|
|
638
|
+
{
|
|
639
|
+
fp_tot[j] += 2;
|
|
640
|
+
fp[j][k1]++;
|
|
641
|
+
fp[j][k2]++;
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
FILE* out_hist;
|
|
652
|
+
FILE* out_prc;
|
|
653
|
+
out_hist = NULL;
|
|
654
|
+
out_prc = NULL;
|
|
655
|
+
if (yes_out_hist == 1)
|
|
656
|
+
{
|
|
657
|
+
if ((out_hist = fopen(file_hist, "wt")) == NULL)
|
|
658
|
+
{
|
|
659
|
+
printf("Output file can't be opened!\n");
|
|
660
|
+
return -1;
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
if (yes_out_prc == 1)
|
|
664
|
+
{
|
|
665
|
+
if ((out_prc = fopen(file_prc, "wt")) == NULL)
|
|
666
|
+
{
|
|
667
|
+
printf("Output file can't be opened!\n");
|
|
668
|
+
return -1;
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
for (j = 0; j < 3; j++)
|
|
672
|
+
{
|
|
673
|
+
if (yes_out_hist == 1)
|
|
674
|
+
{
|
|
675
|
+
if (j == 0)fprintf(out_hist, "Heterotypic\t");
|
|
676
|
+
else fprintf(out_hist, "Homotypic\t");
|
|
677
|
+
fprintf(out_hist, "Motif1\t%d\tMotif2\t%d\n", olen[kpairs[j][0]], olen[kpairs[j][1]]);
|
|
678
|
+
for (i = 0; i < n_shift[j]; i++)
|
|
679
|
+
{
|
|
680
|
+
fprintf(out_hist, "\t%.1f", wshift[j][i]);
|
|
681
|
+
}
|
|
682
|
+
fprintf(out_hist, "\n");
|
|
683
|
+
}
|
|
684
|
+
{
|
|
685
|
+
int del = n_shift[j] * 2;
|
|
686
|
+
fp_tot[j] /= del;
|
|
687
|
+
for (i = 0; i < nthr_dist_two; i++)fp[j][i] /= del;
|
|
688
|
+
}
|
|
689
|
+
/*printf("\n");
|
|
690
|
+
for (k = 0; k < 2; k++)
|
|
691
|
+
{
|
|
692
|
+
printf("%d TP\t",k);
|
|
693
|
+
for (m = 0; m < n_shift; m++)
|
|
694
|
+
{
|
|
695
|
+
printf("%.f ", tp_tot[k][m]);
|
|
696
|
+
}
|
|
697
|
+
printf("\t\t");
|
|
698
|
+
printf("%d FP\t", k);
|
|
699
|
+
//for (m = 0; m < n_shift; m++)
|
|
700
|
+
{
|
|
701
|
+
printf("%.f ", fp_tot);
|
|
702
|
+
}
|
|
703
|
+
printf("\n");
|
|
704
|
+
} */
|
|
705
|
+
for (k = 0; k < 2; k++)
|
|
706
|
+
{
|
|
707
|
+
if (yes_out_prc == 1)
|
|
708
|
+
{
|
|
709
|
+
if (j == 0)fprintf(out_prc, "Heterotypic\t");
|
|
710
|
+
else fprintf(out_prc, "Homotypic\t");
|
|
711
|
+
fprintf(out_prc, "Motif1\t%d\tMotif2\t%d\n", olen[kpairs[j][0]], olen[kpairs[j][1]]);
|
|
712
|
+
if (k == 0)
|
|
713
|
+
{
|
|
714
|
+
fprintf(out_hist, "Direct ShortLong,LongShort");
|
|
715
|
+
}
|
|
716
|
+
else
|
|
717
|
+
{
|
|
718
|
+
fprintf(out_hist, "Evered Inverted");
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
for (m = 0; m < n_shift[j]; m++)
|
|
722
|
+
{
|
|
723
|
+
if (yes_out_prc == 1)
|
|
724
|
+
{
|
|
725
|
+
if (k == 0)
|
|
726
|
+
{
|
|
727
|
+
if (wshift[j][m] < 0)fprintf(out_prc, "\tDirect ShortLong");
|
|
728
|
+
else
|
|
729
|
+
{
|
|
730
|
+
if (wshift[j][m] > 0)fprintf(out_prc, "\tDirect LongShort");
|
|
731
|
+
else fprintf(out_prc, "\tDirect Exact");
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
else
|
|
735
|
+
{
|
|
736
|
+
if (wshift[j][m] < 0)fprintf(out_prc, "\tEverted");
|
|
737
|
+
else
|
|
738
|
+
{
|
|
739
|
+
if (wshift[j][m] > 0)fprintf(out_prc, "\tInverted");
|
|
740
|
+
else fprintf(out_prc, "\tReverse Exact");
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
fprintf(out_prc, " %.1f\n", wshift[j][m]);
|
|
744
|
+
}
|
|
745
|
+
{
|
|
746
|
+
int nthr_dist_two1 = nthr_dist_two - 1;
|
|
747
|
+
double tpr_pred = 0, prec_pred = 1, tpr = 0;
|
|
748
|
+
double dtp = 0, dfp = 0;
|
|
749
|
+
int count_pr = 0, count_roc = 0;
|
|
750
|
+
double tp_sum = 0, fp_sum = 0;
|
|
751
|
+
if (yes_out_prc == 1)fprintf(out_prc, "%f\t%f\n", tpr_pred, prec_pred);
|
|
752
|
+
for (i = 0; i < nthr_dist_two; i++)
|
|
753
|
+
{
|
|
754
|
+
dtp += (double)tp[j][k][m][i];
|
|
755
|
+
dfp += fp[j][i];
|
|
756
|
+
if (dtp > 0 && (i == nthr_dist_two1 || errs[i + 1].err != errs[i].err))
|
|
757
|
+
{
|
|
758
|
+
tp_sum += dtp;
|
|
759
|
+
fp_sum += dfp;
|
|
760
|
+
double prec_cur = tp_sum / (tp_sum + fp_sum);
|
|
761
|
+
tpr = tp_sum / tp_tot[j][k][m];
|
|
762
|
+
double prec_av = (prec_pred + prec_cur) / 2;
|
|
763
|
+
double dauprc = dtp * prec_av / tp_tot[j][k][m];
|
|
764
|
+
if (yes_out_prc == 1)fprintf(out_prc, "%f\t%f\n", tpr, prec_cur);
|
|
765
|
+
prec_pred = prec_cur;
|
|
766
|
+
tpr_pred = tpr;
|
|
767
|
+
auprc[j][k][m] += dauprc;
|
|
768
|
+
dtp = 0;
|
|
769
|
+
dfp = 0;
|
|
770
|
+
count_pr++;
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
if (yes_out_hist == 1)fprintf(out_hist, "\t%f", auprc[j][k][m]);
|
|
775
|
+
}
|
|
776
|
+
if (yes_out_hist == 1)fprintf(out_hist, "\n");
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
if (yes_out_prc == 1)fclose(out_prc);
|
|
780
|
+
if (yes_out_hist == 1)fclose(out_hist);
|
|
781
|
+
double auprc_max[3][2] = { { -1,-1 },{ -1,-1 },{ -1,-1 } };
|
|
782
|
+
double auprc_ov[3][2] = { { -1,-1 },{ -1,-1 },{ -1,-1 } };
|
|
783
|
+
//int j_best[3][2] = { { 0,0 },{ 0,0 },{ 0,0 } };
|
|
784
|
+
//int j_ov[3][2] = { { 0,0 },{ 0,0 },{ 0,0 } };
|
|
785
|
+
char cepi[] = "+-";
|
|
786
|
+
for (j = 0; j < 3; j++)
|
|
787
|
+
{
|
|
788
|
+
for (i = 0; i < n_shift[j]; i++)
|
|
789
|
+
{
|
|
790
|
+
if (wshift[j][i] >= -wshift_ov[j] && wshift[j][i] <= wshift_ov[j])
|
|
791
|
+
{
|
|
792
|
+
for (k = 0; k < 2; k++)
|
|
793
|
+
{
|
|
794
|
+
if (auprc[j][k][i] > auprc_ov[j][k])
|
|
795
|
+
{
|
|
796
|
+
auprc_ov[j][k] = auprc[j][k][i];
|
|
797
|
+
strand_final_over[j] = cepi[k];
|
|
798
|
+
shift_final_over[j] = wshift[j][i];
|
|
799
|
+
auprc_final_over[j] = auprc[j][k][i];
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
for (i = 0; i < n_shift[j]; i++)
|
|
805
|
+
{
|
|
806
|
+
for (k = 0; k < 2; k++)
|
|
807
|
+
{
|
|
808
|
+
if (auprc[j][k][i] > auprc_max[j][k])
|
|
809
|
+
{
|
|
810
|
+
auprc_final_all[j] = auprc_max[j][k] = auprc[j][k][i];
|
|
811
|
+
strand_final_all[j] = cepi[k];
|
|
812
|
+
shift_final_all[j] = wshift[j][i];
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
/*for (j = 0; j < 3; j++)
|
|
818
|
+
{
|
|
819
|
+
auprc_final_all[j] = Max(auprc_max[j][0], auprc_max[j][1]);
|
|
820
|
+
auprc_final_over[j] = Max(auprc_ov[j][0], auprc_ov[j][1]);
|
|
821
|
+
if (auprc_max[j][0] >= auprc_max[j][1])
|
|
822
|
+
{
|
|
823
|
+
strand_final_all[j] = '+';
|
|
824
|
+
shift_final_all[j] = wshift[j][j_best[j][0]];
|
|
825
|
+
}
|
|
826
|
+
else
|
|
827
|
+
{
|
|
828
|
+
strand_final_all[j] = '-';
|
|
829
|
+
shift_final_all[j] = wshift[j][j_best[j][1]];
|
|
830
|
+
}
|
|
831
|
+
if (auprc_ov[j][0] >= auprc_ov[j][1])
|
|
832
|
+
{
|
|
833
|
+
strand_final_over[j] = '+';
|
|
834
|
+
shift_final_over[j] = wshift[j][j_ov[j][0]];
|
|
835
|
+
}
|
|
836
|
+
else
|
|
837
|
+
{
|
|
838
|
+
strand_final_over[j] = '-';
|
|
839
|
+
shift_final_over[j] = wshift[j][j_ov[j][1]];
|
|
840
|
+
}
|
|
841
|
+
}*/
|
|
842
|
+
double maxi = Max(auprc_final_over[1], auprc_final_over[2]);
|
|
843
|
+
auprc_final_over1 = auprc_final_over[0] / maxi;
|
|
844
|
+
if (auprc_final_over1 > 1)auprc_final_over1 = 1;
|
|
845
|
+
maxi = Max(auprc_final_all[1], auprc_final_all[2]);
|
|
846
|
+
auprc_final_all1 = auprc_final_all[0] / maxi;
|
|
847
|
+
if (auprc_final_all1 > 1)auprc_final_all1 = 1;
|
|
848
|
+
delete[] errs;
|
|
849
|
+
for (k = 0; k < 2; k++)
|
|
850
|
+
{
|
|
851
|
+
for (j = 0; j < 2; j++)
|
|
852
|
+
{
|
|
853
|
+
delete[] err_inx[k][j];
|
|
854
|
+
}
|
|
855
|
+
delete[] err_inx[k];
|
|
856
|
+
}
|
|
857
|
+
delete[] err_inx;
|
|
858
|
+
for (j = 0; j < 3; j++)
|
|
859
|
+
{
|
|
860
|
+
for (k = 0; k < 2; k++)
|
|
861
|
+
{
|
|
862
|
+
for (i = 0; i < n_shift[j]; i++)delete[] tp[j][k][i];
|
|
863
|
+
delete[] tp[j][k];
|
|
864
|
+
}
|
|
865
|
+
delete[] tp[j];
|
|
866
|
+
}
|
|
867
|
+
delete[] tp;
|
|
868
|
+
for (j = 0; j < 3; j++)delete[] fp[j];
|
|
869
|
+
delete[] fp;
|
|
870
|
+
for (j = 0; j < 3; j++)
|
|
871
|
+
{
|
|
872
|
+
for (k = 0; k < 2; k++)delete[] tp_tot[j][k];
|
|
873
|
+
delete[] tp_tot[j];
|
|
874
|
+
}
|
|
875
|
+
delete[] tp_tot;
|
|
876
|
+
for (j = 0; j < 3; j++)
|
|
877
|
+
{
|
|
878
|
+
for (k = 0; k < 2; k++)
|
|
879
|
+
{
|
|
880
|
+
delete[] auprc[j][k];
|
|
881
|
+
}
|
|
882
|
+
delete[] auprc[j];
|
|
883
|
+
}
|
|
884
|
+
delete[] auprc;
|
|
885
|
+
for (k = 0; k < 2; k++)
|
|
886
|
+
{
|
|
887
|
+
delete[] inx_self[k];
|
|
888
|
+
}
|
|
889
|
+
for (j = 0; j < 3; j++)delete[] wshift[j];
|
|
890
|
+
delete[] wshift;
|
|
891
|
+
return 1;
|
|
892
|
+
}
|
|
893
|
+
int UnderStol(char* str, int nstol, char* ret, size_t size, char sep)
|
|
894
|
+
{
|
|
895
|
+
memset(ret, 0, size);
|
|
896
|
+
int p1, p2, len;
|
|
897
|
+
if (nstol == 0)
|
|
898
|
+
{
|
|
899
|
+
p2 = StrNStr(str, sep, 1);
|
|
900
|
+
if (p2 == -1)p2 = strlen(str);
|
|
901
|
+
strncpy(ret, str, p2);
|
|
902
|
+
ret[p2] = '\0';
|
|
903
|
+
return 1;
|
|
904
|
+
}
|
|
905
|
+
else
|
|
906
|
+
{
|
|
907
|
+
p1 = StrNStr(str, sep, nstol);
|
|
908
|
+
p2 = StrNStr(str, sep, nstol + 1);
|
|
909
|
+
if (p2 == -1)
|
|
910
|
+
{
|
|
911
|
+
p2 = strlen(str);
|
|
912
|
+
}
|
|
913
|
+
if (p1 == -1 || p2 == -1) return -1;
|
|
914
|
+
len = p2 - p1 - 1;
|
|
915
|
+
strncpy(ret, &str[p1 + 1], len);
|
|
916
|
+
ret[len] = '\0';
|
|
917
|
+
return 1;
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
int main(int argc, char* argv[])
|
|
921
|
+
{
|
|
922
|
+
int i, k, mot;
|
|
923
|
+
char file_fasta[ARGLEN], file_model[2][ARGLEN], type_model[2][4], file_table[2][ARGLEN];
|
|
924
|
+
char file_hist[ARGLEN], file_prc[ARGLEN], file_short_all[ARGLEN], file_short_over[ARGLEN], file_sta_long[ARGLEN];
|
|
925
|
+
char*** seq;// peaks
|
|
926
|
+
double*** pwm;
|
|
927
|
+
city sta[2];
|
|
928
|
+
int model_type[2] = { -1,-1 };// 0 pwm 1 sga
|
|
929
|
+
|
|
930
|
+
if (argc != 17)
|
|
931
|
+
{
|
|
932
|
+
fprintf(stderr, "Syntax error: %s 1file_fasta 2motif1_type 3motif2_type 4file_motif1_matrix 5file_motif2_matrix 6file_motif1_table 7file_motif2_table 8int max_shift_of_motif_centers", argv[0]);
|
|
933
|
+
fprintf(stderr, "9double pvalue_thr 10file out_hist 11yes,0no out_hist 12file_out_prc 13int 1yes,0no out_prc 14file_out_short_over 15file_out_short_all 16file_out_sta_detailed\n");
|
|
934
|
+
return -1;
|
|
935
|
+
}
|
|
936
|
+
strcpy(file_fasta, argv[1]);
|
|
937
|
+
strcpy(type_model[0], argv[2]);//pwm or sga - type
|
|
938
|
+
strcpy(type_model[1], argv[3]);//pwm or sga - type
|
|
939
|
+
strcpy(file_model[0], argv[4]);//pwm or sga - matrix
|
|
940
|
+
strcpy(file_model[1], argv[5]);//pwm or sga - matrix
|
|
941
|
+
strcpy(file_table[0], argv[6]);//pwm or sga - thr err table
|
|
942
|
+
strcpy(file_table[1], argv[7]);//pwm or sga - thr err table
|
|
943
|
+
int shift = atoi(argv[8]); // shift of motifs
|
|
944
|
+
double pvalue = atof(argv[9]); //threshold of expected recogntion rate
|
|
945
|
+
double pvalue_lg = -log10(pvalue);
|
|
946
|
+
strcpy(file_hist, argv[10]);
|
|
947
|
+
int yes_out_hist = atoi(argv[11]);
|
|
948
|
+
strcpy(file_prc, argv[12]);
|
|
949
|
+
int yes_out_prc = atoi(argv[13]);
|
|
950
|
+
strcpy(file_short_over, argv[14]);
|
|
951
|
+
strcpy(file_short_all, argv[15]);
|
|
952
|
+
strcpy(file_sta_long, argv[16]);
|
|
953
|
+
|
|
954
|
+
{
|
|
955
|
+
char pwm1[] = "pwm", pwm2[] = "PWM", sga1[] = "sga", sga2[] = "SGA";
|
|
956
|
+
for (i = 0; i < 2; i++)
|
|
957
|
+
{
|
|
958
|
+
if (strcmp(type_model[i], pwm1) == 0 || strcmp(type_model[i], pwm2) == 0)
|
|
959
|
+
{
|
|
960
|
+
model_type[i] = 0;
|
|
961
|
+
}
|
|
962
|
+
if (strcmp(type_model[i], sga1) == 0 || strcmp(type_model[i], sga2) == 0)
|
|
963
|
+
{
|
|
964
|
+
model_type[i] = 1;
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
}
|
|
968
|
+
for (i = 0; i < 2; i++)
|
|
969
|
+
{
|
|
970
|
+
if (model_type[i] == -1)
|
|
971
|
+
{
|
|
972
|
+
printf("Model type %d %s is not recognized\n", i + 1, type_model[i]);
|
|
973
|
+
exit(1);
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
}
|
|
978
|
+
int length_fasta_max = 0, nseq_real = 0;
|
|
979
|
+
seq = NULL;
|
|
980
|
+
int ftp = fasta_to_plain0(file_fasta, length_fasta_max, nseq_real);
|
|
981
|
+
if (ftp == -1)
|
|
982
|
+
{
|
|
983
|
+
fprintf(stderr, "Error: Fasta file %s error\n", file_fasta);
|
|
984
|
+
return -1;
|
|
985
|
+
}
|
|
986
|
+
int* peak_len_real;
|
|
987
|
+
peak_len_real = new int[nseq_real];
|
|
988
|
+
if (peak_len_real == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
|
|
989
|
+
|
|
990
|
+
seq = new char** [2];
|
|
991
|
+
if (seq == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
|
|
992
|
+
for (k = 0; k < 2; k++)
|
|
993
|
+
{
|
|
994
|
+
seq[k] = new char* [nseq_real];
|
|
995
|
+
if (seq[k] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
|
|
996
|
+
for (i = 0; i < nseq_real; i++)
|
|
997
|
+
{
|
|
998
|
+
int length_fasta_max1 = length_fasta_max + 1;
|
|
999
|
+
seq[k][i] = new char[length_fasta_max1];
|
|
1000
|
+
if (seq[k][i] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
|
|
1001
|
+
memset(seq[k][i], '\0', length_fasta_max1);
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
pwm = new double** [2];
|
|
1005
|
+
if (pwm == NULL) { fprintf(stderr, "PWM Out of memory..."); return -1; }
|
|
1006
|
+
for (k = 0; k < 2; k++)
|
|
1007
|
+
{
|
|
1008
|
+
pwm[k] = new double* [MATLEN];
|
|
1009
|
+
if (pwm[k] == NULL) { fprintf(stderr, "PWM Out of memory..."); return -1; }
|
|
1010
|
+
for (i = 0; i < MATLEN; i++)
|
|
1011
|
+
{
|
|
1012
|
+
pwm[k][i] = new double[OLIGNUM];
|
|
1013
|
+
if (pwm[k][i] == NULL) { fprintf(stderr, "PWM Out of memory..."); return -1; }
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
ftp = fasta_to_plain1(file_fasta, length_fasta_max, nseq_real, seq, peak_len_real);
|
|
1018
|
+
if (ftp == -1)
|
|
1019
|
+
{
|
|
1020
|
+
fprintf(stderr, "File %s error 2nd stage\n", file_fasta);
|
|
1021
|
+
return -1;
|
|
1022
|
+
}
|
|
1023
|
+
int olen[2];
|
|
1024
|
+
int nthr_dist[2];
|
|
1025
|
+
double min[2] = { 0,0 }, raz[2] = { 0,0 };
|
|
1026
|
+
|
|
1027
|
+
double** thr_all;
|
|
1028
|
+
thr_all = new double* [2];
|
|
1029
|
+
if (thr_all == NULL) { fprintf(stderr, "Thr_all Out of memory..."); return -1; }
|
|
1030
|
+
double** fpr_all;
|
|
1031
|
+
fpr_all = new double* [2];
|
|
1032
|
+
if (fpr_all == NULL) { fprintf(stderr, "Thr_all Out of memory..."); return -1; }
|
|
1033
|
+
for (mot = 0; mot < 2; mot++)
|
|
1034
|
+
{
|
|
1035
|
+
//printf("Mot %d\n", mot);
|
|
1036
|
+
nthr_dist[mot] = 0;
|
|
1037
|
+
FILE* in_tab;
|
|
1038
|
+
if ((in_tab = fopen(file_table[mot], "rt")) == NULL)
|
|
1039
|
+
{
|
|
1040
|
+
printf("Input file %s can't be opened!", file_table[mot]);
|
|
1041
|
+
return -1;
|
|
1042
|
+
}
|
|
1043
|
+
char d[ARGLEN];
|
|
1044
|
+
//fgets(d, sizeof(d), in_tab);//header
|
|
1045
|
+
while (fgets(d, sizeof(d), in_tab) != NULL)
|
|
1046
|
+
{
|
|
1047
|
+
char c = d[0];
|
|
1048
|
+
char sep = '\t';
|
|
1049
|
+
if (c == '-' || isdigit(c))
|
|
1050
|
+
{
|
|
1051
|
+
char s[30];
|
|
1052
|
+
int test = UnderStol(d, 1, s, sizeof(s), sep);
|
|
1053
|
+
if (test == -1) { printf("Wrong format %s\n", d); exit(1); }
|
|
1054
|
+
nthr_dist[mot]++;
|
|
1055
|
+
double fprx = atof(s);
|
|
1056
|
+
if (fprx < pvalue_lg)break;
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
rewind(in_tab);
|
|
1060
|
+
thr_all[mot] = new double[nthr_dist[mot]];
|
|
1061
|
+
if (thr_all[mot] == NULL) { puts("thr_all Out of memory..."); return -1; }
|
|
1062
|
+
fpr_all[mot] = new double[nthr_dist[mot]];
|
|
1063
|
+
if (fpr_all[mot] == NULL) { puts("fpr_all Out of memory..."); return -1; }
|
|
1064
|
+
k = 0;
|
|
1065
|
+
while (fgets(d, sizeof(d), in_tab) != NULL)
|
|
1066
|
+
{
|
|
1067
|
+
char c = d[0];
|
|
1068
|
+
if (c == '-' || isdigit(c))
|
|
1069
|
+
{
|
|
1070
|
+
char s[30];
|
|
1071
|
+
char sep = '\t';
|
|
1072
|
+
int test = UnderStol(d, 1, s, sizeof(s), sep);
|
|
1073
|
+
if (test == -1) { printf("Wrong format %s\n", d); exit(1); }
|
|
1074
|
+
thr_all[mot][k] = atof(d);
|
|
1075
|
+
fpr_all[mot][k] = atof(s);
|
|
1076
|
+
if(fpr_all[mot][k] < pvalue_lg)break;
|
|
1077
|
+
k++;
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
fclose(in_tab);
|
|
1081
|
+
}
|
|
1082
|
+
for (mot = 0; mot < 2; mot++)
|
|
1083
|
+
{
|
|
1084
|
+
//printf("Mot %d\n", mot);
|
|
1085
|
+
if (model_type[mot] == 0)
|
|
1086
|
+
{
|
|
1087
|
+
int test = pfm_to_pwm(file_model[mot], pwm[mot]);
|
|
1088
|
+
if (test == -1)return -1;
|
|
1089
|
+
else olen[mot] = test;
|
|
1090
|
+
PWMScore(pwm[mot], min[mot], raz[mot], olen[mot]);
|
|
1091
|
+
}
|
|
1092
|
+
else
|
|
1093
|
+
{
|
|
1094
|
+
if (sta[mot].get_file(file_model[mot]) == -1)
|
|
1095
|
+
{
|
|
1096
|
+
printf("Site %s function not found!", file_model[mot]);
|
|
1097
|
+
exit(1);
|
|
1098
|
+
}
|
|
1099
|
+
olen[mot] = sta[mot].len;
|
|
1100
|
+
}
|
|
1101
|
+
}
|
|
1102
|
+
double auprc_final_all[3], auprc_final_over[3];
|
|
1103
|
+
double auprc_final_over1 = 0, auprc_final_all1 = 0;
|
|
1104
|
+
char strand_final_all[4], strand_final_over[4];
|
|
1105
|
+
for (k = 0; k < 3; k++)strand_final_all[k] = strand_final_over[k] = '+';
|
|
1106
|
+
strand_final_all[3] = strand_final_over[3] = '\0';
|
|
1107
|
+
double shift_final_all[3] = {0,0,0}, shift_final_over[3] = { 0,0,0 };
|
|
1108
|
+
for (k = 0; k < 3; k++)shift_final_all[k] = shift_final_over[k] = 0;
|
|
1109
|
+
PWM_SGA_rec_real(pwm, min, raz, sta, model_type, nthr_dist, thr_all, fpr_all, seq, olen, nseq_real, shift, length_fasta_max, file_hist, file_prc, yes_out_hist,yes_out_prc,
|
|
1110
|
+
auprc_final_all, auprc_final_over, auprc_final_over1, auprc_final_all1, shift_final_all, shift_final_over, strand_final_all, strand_final_over);
|
|
1111
|
+
FILE* out_sta_long;
|
|
1112
|
+
if ((out_sta_long = fopen(file_sta_long, "at")) == NULL)
|
|
1113
|
+
{
|
|
1114
|
+
printf("Output file can't be opened!\n");
|
|
1115
|
+
exit(1);
|
|
1116
|
+
}
|
|
1117
|
+
fprintf(out_sta_long, "%s\t%s", file_model[0], file_model[1]);
|
|
1118
|
+
fprintf(out_sta_long, "\tOverlap\t%f", auprc_final_over1);
|
|
1119
|
+
fprintf(out_sta_long, "\tAll\t%f", auprc_final_all1);
|
|
1120
|
+
fprintf(out_sta_long, "\tHeterotypic");
|
|
1121
|
+
//overap
|
|
1122
|
+
fprintf(out_sta_long, "\t%f", auprc_final_over[0]);
|
|
1123
|
+
fprintf(out_sta_long, "\t%.1f", shift_final_over[0]);
|
|
1124
|
+
fprintf(out_sta_long, "\t%c", strand_final_over[0]);
|
|
1125
|
+
//all
|
|
1126
|
+
fprintf(out_sta_long, "\t%f", auprc_final_all[0]);
|
|
1127
|
+
fprintf(out_sta_long, "\t%.1f", shift_final_all[0]);
|
|
1128
|
+
fprintf(out_sta_long, "\t%c", strand_final_all[0]);
|
|
1129
|
+
fprintf(out_sta_long, "\tHomotypic");
|
|
1130
|
+
//overap
|
|
1131
|
+
for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%f", auprc_final_over[i]);
|
|
1132
|
+
for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%.1f", shift_final_over[i]);
|
|
1133
|
+
fprintf(out_sta_long, "\t");
|
|
1134
|
+
for (i = 1; i < 3; i++)fprintf(out_sta_long, "%c", strand_final_over[i]);
|
|
1135
|
+
//all
|
|
1136
|
+
for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%f", auprc_final_all[i]);
|
|
1137
|
+
for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%.1f", shift_final_all[i]);
|
|
1138
|
+
fprintf(out_sta_long, "\t");
|
|
1139
|
+
for (i = 1; i < 3; i++)fprintf(out_sta_long, "%c", strand_final_all[i]);
|
|
1140
|
+
fprintf(out_sta_long, "\n");
|
|
1141
|
+
fclose(out_sta_long);
|
|
1142
|
+
FILE* out_short_over;
|
|
1143
|
+
if ((out_short_over = fopen(file_short_over, "at")) == NULL)
|
|
1144
|
+
{
|
|
1145
|
+
printf("Output file can't be opened!\n");
|
|
1146
|
+
exit(1);
|
|
1147
|
+
}
|
|
1148
|
+
fprintf(out_short_over, "\t%f", auprc_final_over1);
|
|
1149
|
+
fclose(out_short_over);
|
|
1150
|
+
|
|
1151
|
+
FILE* out_short_all;
|
|
1152
|
+
if ((out_short_all = fopen(file_short_all, "at")) == NULL)
|
|
1153
|
+
{
|
|
1154
|
+
printf("Output file can't be opened!\n");
|
|
1155
|
+
exit(1);
|
|
1156
|
+
}
|
|
1157
|
+
fprintf(out_short_all, "\t%f", auprc_final_all1);
|
|
1158
|
+
fclose(out_short_all);
|
|
1159
|
+
|
|
1160
|
+
for (k = 0; k < 2; k++)
|
|
1161
|
+
{
|
|
1162
|
+
delete[] thr_all[k];
|
|
1163
|
+
}
|
|
1164
|
+
delete[] thr_all;
|
|
1165
|
+
for (k = 0; k < 2; k++)
|
|
1166
|
+
{
|
|
1167
|
+
delete[] fpr_all[k];
|
|
1168
|
+
}
|
|
1169
|
+
delete[] fpr_all;
|
|
1170
|
+
delete[] peak_len_real;
|
|
1171
|
+
for (k = 0; k < 2; k++)
|
|
1172
|
+
{
|
|
1173
|
+
for (i = 0; i < nseq_real; i++)
|
|
1174
|
+
{
|
|
1175
|
+
delete[] seq[k][i];
|
|
1176
|
+
}
|
|
1177
|
+
delete[] seq[k];
|
|
1178
|
+
}
|
|
1179
|
+
delete[] seq;
|
|
1180
|
+
for (k = 0; k < 2; k++)
|
|
1181
|
+
{
|
|
1182
|
+
for (i = 0; i < MATLEN; i++)
|
|
1183
|
+
{
|
|
1184
|
+
delete[] pwm[k][i];
|
|
1185
|
+
}
|
|
1186
|
+
delete[] pwm[k];
|
|
1187
|
+
}
|
|
1188
|
+
delete[] pwm;
|
|
1189
|
+
return 0;
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
// Function to run the main computational logic
|
|
1193
|
+
extern "C" int run_motali_cpp(
|
|
1194
|
+
const char* file_fasta,
|
|
1195
|
+
const char* type_model_1,
|
|
1196
|
+
const char* type_model_2,
|
|
1197
|
+
const char* file_model_1,
|
|
1198
|
+
const char* file_model_2,
|
|
1199
|
+
const char* file_table_1,
|
|
1200
|
+
const char* file_table_2,
|
|
1201
|
+
int shift,
|
|
1202
|
+
double pvalue,
|
|
1203
|
+
const char* file_hist,
|
|
1204
|
+
int yes_out_hist,
|
|
1205
|
+
const char* file_prc,
|
|
1206
|
+
int yes_out_prc,
|
|
1207
|
+
const char* file_short_over,
|
|
1208
|
+
const char* file_short_all,
|
|
1209
|
+
const char* file_sta_long
|
|
1210
|
+
) {
|
|
1211
|
+
int i, k, mot;
|
|
1212
|
+
char file_fasta_local[ARGLEN], file_model[2][ARGLEN], type_model[2][4], file_table[2][ARGLEN];
|
|
1213
|
+
char file_hist_local[ARGLEN], file_prc_local[ARGLEN], file_short_all_local[ARGLEN], file_short_over_local[ARGLEN], file_sta_long_local[ARGLEN];
|
|
1214
|
+
char*** seq;// peaks
|
|
1215
|
+
double*** pwm;
|
|
1216
|
+
city sta[2];
|
|
1217
|
+
int model_type[2] = { -1,-1 };// 0 pwm 1 sga
|
|
1218
|
+
|
|
1219
|
+
// Copy input parameters to local variables
|
|
1220
|
+
strcpy(file_fasta_local, file_fasta);
|
|
1221
|
+
strcpy(type_model[0], type_model_1);//pwm or sga - type
|
|
1222
|
+
strcpy(type_model[1], type_model_2);//pwm or sga - type
|
|
1223
|
+
strcpy(file_model[0], file_model_1);//pwm or sga - matrix
|
|
1224
|
+
strcpy(file_model[1], file_model_2);//pwm or sga - matrix
|
|
1225
|
+
strcpy(file_table[0], file_table_1);//pwm or sga - thr err table
|
|
1226
|
+
strcpy(file_table[1], file_table_2);//pwm or sga - thr err table
|
|
1227
|
+
strcpy(file_hist_local, file_hist);
|
|
1228
|
+
strcpy(file_prc_local, file_prc);
|
|
1229
|
+
strcpy(file_short_over_local, file_short_over);
|
|
1230
|
+
strcpy(file_short_all_local, file_short_all);
|
|
1231
|
+
strcpy(file_sta_long_local, file_sta_long);
|
|
1232
|
+
|
|
1233
|
+
double pvalue_lg = -log10(pvalue);
|
|
1234
|
+
|
|
1235
|
+
{
|
|
1236
|
+
char pwm1[] = "pwm", pwm2[] = "PWM", sga1[] = "sga", sga2[] = "SGA";
|
|
1237
|
+
for (i = 0; i < 2; i++)
|
|
1238
|
+
{
|
|
1239
|
+
if (strcmp(type_model[i], pwm1) == 0 || strcmp(type_model[i], pwm2) == 0)
|
|
1240
|
+
{
|
|
1241
|
+
model_type[i] = 0;
|
|
1242
|
+
}
|
|
1243
|
+
if (strcmp(type_model[i], sga1) == 0 || strcmp(type_model[i], sga2) == 0)
|
|
1244
|
+
{
|
|
1245
|
+
model_type[i] = 1;
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
}
|
|
1249
|
+
for (i = 0; i < 2; i++)
|
|
1250
|
+
{
|
|
1251
|
+
if (model_type[i] == -1)
|
|
1252
|
+
{
|
|
1253
|
+
printf("Model type %d %s is not recognized\n", i + 1, type_model[i]);
|
|
1254
|
+
return -1; // Changed from exit(1) to return -1
|
|
1255
|
+
}
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
}
|
|
1259
|
+
int length_fasta_max = 0, nseq_real = 0;
|
|
1260
|
+
seq = NULL;
|
|
1261
|
+
int ftp = fasta_to_plain0(file_fasta_local, length_fasta_max, nseq_real);
|
|
1262
|
+
if (ftp == -1)
|
|
1263
|
+
{
|
|
1264
|
+
fprintf(stderr, "Error: Fasta file %s error\n", file_fasta_local);
|
|
1265
|
+
return -1;
|
|
1266
|
+
}
|
|
1267
|
+
int* peak_len_real;
|
|
1268
|
+
peak_len_real = new int[nseq_real];
|
|
1269
|
+
if (peak_len_real == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
|
|
1270
|
+
|
|
1271
|
+
seq = new char** [2];
|
|
1272
|
+
if (seq == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
|
|
1273
|
+
for (k = 0; k < 2; k++)
|
|
1274
|
+
{
|
|
1275
|
+
seq[k] = new char* [nseq_real];
|
|
1276
|
+
if (seq[k] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
|
|
1277
|
+
for (i = 0; i < nseq_real; i++)
|
|
1278
|
+
{
|
|
1279
|
+
int length_fasta_max1 = length_fasta_max + 1;
|
|
1280
|
+
seq[k][i] = new char[length_fasta_max1];
|
|
1281
|
+
if (seq[k][i] == NULL) { fprintf(stderr, "Error: Not of memory..."); return -1; }
|
|
1282
|
+
memset(seq[k][i], '\0', length_fasta_max1);
|
|
1283
|
+
}
|
|
1284
|
+
}
|
|
1285
|
+
pwm = new double** [2];
|
|
1286
|
+
if (pwm == NULL) { fprintf(stderr, "PWM Out of memory..."); return -1; }
|
|
1287
|
+
for (k = 0; k < 2; k++)
|
|
1288
|
+
{
|
|
1289
|
+
pwm[k] = new double* [MATLEN];
|
|
1290
|
+
if (pwm[k] == NULL) { fprintf(stderr, "PWM Out of memory..."); return -1; }
|
|
1291
|
+
for (i = 0; i < MATLEN; i++)
|
|
1292
|
+
{
|
|
1293
|
+
pwm[k][i] = new double[OLIGNUM];
|
|
1294
|
+
if (pwm[k][i] == NULL) { fprintf(stderr, "PWM Out of memory..."); return -1; }
|
|
1295
|
+
}
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
ftp = fasta_to_plain1(file_fasta_local, length_fasta_max, nseq_real, seq, peak_len_real);
|
|
1299
|
+
if (ftp == -1)
|
|
1300
|
+
{
|
|
1301
|
+
fprintf(stderr, "File %s error 2nd stage\n", file_fasta_local);
|
|
1302
|
+
return -1;
|
|
1303
|
+
}
|
|
1304
|
+
int olen[2];
|
|
1305
|
+
int nthr_dist[2];
|
|
1306
|
+
double min[2] = { 0,0 }, raz[2] = { 0,0 };
|
|
1307
|
+
|
|
1308
|
+
double** thr_all;
|
|
1309
|
+
thr_all = new double* [2];
|
|
1310
|
+
if (thr_all == NULL) { fprintf(stderr, "Thr_all Out of memory..."); return -1; }
|
|
1311
|
+
double** fpr_all;
|
|
1312
|
+
fpr_all = new double* [2];
|
|
1313
|
+
if (fpr_all == NULL) { fprintf(stderr, "Thr_all Out of memory..."); return -1; }
|
|
1314
|
+
for (mot = 0; mot < 2; mot++)
|
|
1315
|
+
{
|
|
1316
|
+
//printf("Mot %d\n", mot);
|
|
1317
|
+
nthr_dist[mot] = 0;
|
|
1318
|
+
FILE* in_tab;
|
|
1319
|
+
if ((in_tab = fopen(file_table[mot], "rt")) == NULL)
|
|
1320
|
+
{
|
|
1321
|
+
printf("Input file %s can't be opened!", file_table[mot]);
|
|
1322
|
+
return -1;
|
|
1323
|
+
}
|
|
1324
|
+
char d[ARGLEN];
|
|
1325
|
+
//fgets(d, sizeof(d), in_tab);//header
|
|
1326
|
+
while (fgets(d, sizeof(d), in_tab) != NULL)
|
|
1327
|
+
{
|
|
1328
|
+
char c = d[0];
|
|
1329
|
+
char sep = '\t';
|
|
1330
|
+
if (c == '-' || isdigit(c))
|
|
1331
|
+
{
|
|
1332
|
+
char s[30];
|
|
1333
|
+
int test = UnderStol(d, 1, s, sizeof(s), sep);
|
|
1334
|
+
if (test == -1) { printf("Wrong format %s\n", d); return -1; } // Changed from exit(1) to return -1
|
|
1335
|
+
nthr_dist[mot]++;
|
|
1336
|
+
double fprx = atof(s);
|
|
1337
|
+
if (fprx < pvalue_lg)break;
|
|
1338
|
+
}
|
|
1339
|
+
}
|
|
1340
|
+
rewind(in_tab);
|
|
1341
|
+
thr_all[mot] = new double[nthr_dist[mot]];
|
|
1342
|
+
if (thr_all[mot] == NULL) { puts("thr_all Out of memory..."); return -1; }
|
|
1343
|
+
fpr_all[mot] = new double[nthr_dist[mot]];
|
|
1344
|
+
if (fpr_all[mot] == NULL) { puts("fpr_all Out of memory..."); return -1; }
|
|
1345
|
+
k = 0;
|
|
1346
|
+
while (fgets(d, sizeof(d), in_tab) != NULL)
|
|
1347
|
+
{
|
|
1348
|
+
char c = d[0];
|
|
1349
|
+
if (c == '-' || isdigit(c))
|
|
1350
|
+
{
|
|
1351
|
+
char s[30];
|
|
1352
|
+
char sep = '\t';
|
|
1353
|
+
int test = UnderStol(d, 1, s, sizeof(s), sep);
|
|
1354
|
+
if (test == -1) { printf("Wrong format %s\n", d); return -1; } // Changed from exit(1) to return -1
|
|
1355
|
+
thr_all[mot][k] = atof(d);
|
|
1356
|
+
fpr_all[mot][k] = atof(s);
|
|
1357
|
+
if(fpr_all[mot][k] < pvalue_lg)break;
|
|
1358
|
+
k++;
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
fclose(in_tab);
|
|
1362
|
+
}
|
|
1363
|
+
for (mot = 0; mot < 2; mot++)
|
|
1364
|
+
{
|
|
1365
|
+
//printf("Mot %d\n", mot);
|
|
1366
|
+
if (model_type[mot] == 0)
|
|
1367
|
+
{
|
|
1368
|
+
int test = pfm_to_pwm(file_model[mot], pwm[mot]);
|
|
1369
|
+
if (test == -1)return -1;
|
|
1370
|
+
else olen[mot] = test;
|
|
1371
|
+
PWMScore(pwm[mot], min[mot], raz[mot], olen[mot]);
|
|
1372
|
+
}
|
|
1373
|
+
else
|
|
1374
|
+
{
|
|
1375
|
+
if (sta[mot].get_file(file_model[mot]) == -1)
|
|
1376
|
+
{
|
|
1377
|
+
printf("Site %s function not found!", file_model[mot]);
|
|
1378
|
+
return -1; // Changed from exit(1) to return -1
|
|
1379
|
+
}
|
|
1380
|
+
olen[mot] = sta[mot].len;
|
|
1381
|
+
}
|
|
1382
|
+
}
|
|
1383
|
+
double auprc_final_all[3], auprc_final_over[3];
|
|
1384
|
+
double auprc_final_over1 = 0, auprc_final_all1 = 0;
|
|
1385
|
+
char strand_final_all[4], strand_final_over[4];
|
|
1386
|
+
for (k = 0; k < 3; k++)strand_final_all[k] = strand_final_over[k] = '+';
|
|
1387
|
+
strand_final_all[3] = strand_final_over[3] = '\0';
|
|
1388
|
+
double shift_final_all[3] = {0,0,0}, shift_final_over[3] = { 0,0,0 };
|
|
1389
|
+
for (k = 0; k < 3; k++)shift_final_all[k] = shift_final_over[k] = 0;
|
|
1390
|
+
PWM_SGA_rec_real(pwm, min, raz, sta, model_type, nthr_dist, thr_all, fpr_all, seq, olen, nseq_real, shift, length_fasta_max, file_hist_local, file_prc_local, yes_out_hist, yes_out_prc,
|
|
1391
|
+
auprc_final_all, auprc_final_over, auprc_final_over1, auprc_final_all1, shift_final_all, shift_final_over, strand_final_all, strand_final_over);
|
|
1392
|
+
|
|
1393
|
+
// Output results to files
|
|
1394
|
+
FILE* out_sta_long;
|
|
1395
|
+
if ((out_sta_long = fopen(file_sta_long_local, "at")) == NULL)
|
|
1396
|
+
{
|
|
1397
|
+
printf("Output file can't be opened!\n");
|
|
1398
|
+
return -1; // Changed from exit(1) to return -1
|
|
1399
|
+
}
|
|
1400
|
+
fprintf(out_sta_long, "%s\t%s", file_model[0], file_model[1]);
|
|
1401
|
+
fprintf(out_sta_long, "\tOverlap\t%f", auprc_final_over1);
|
|
1402
|
+
fprintf(out_sta_long, "\tAll\t%f", auprc_final_all1);
|
|
1403
|
+
fprintf(out_sta_long, "\tHeterotypic");
|
|
1404
|
+
//overap
|
|
1405
|
+
fprintf(out_sta_long, "\t%f", auprc_final_over[0]);
|
|
1406
|
+
fprintf(out_sta_long, "\t%.1f", shift_final_over[0]);
|
|
1407
|
+
fprintf(out_sta_long, "\t%c", strand_final_over[0]);
|
|
1408
|
+
//all
|
|
1409
|
+
fprintf(out_sta_long, "\t%f", auprc_final_all[0]);
|
|
1410
|
+
fprintf(out_sta_long, "\t%.1f", shift_final_all[0]);
|
|
1411
|
+
fprintf(out_sta_long, "\t%c", strand_final_all[0]);
|
|
1412
|
+
fprintf(out_sta_long, "\tHomotypic");
|
|
1413
|
+
//overap
|
|
1414
|
+
for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%f", auprc_final_over[i]);
|
|
1415
|
+
for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%.1f", shift_final_over[i]);
|
|
1416
|
+
fprintf(out_sta_long, "\t");
|
|
1417
|
+
for (i = 1; i < 3; i++)fprintf(out_sta_long, "%c", strand_final_over[i]);
|
|
1418
|
+
//all
|
|
1419
|
+
for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%f", auprc_final_all[i]);
|
|
1420
|
+
for (i = 1; i < 3; i++)fprintf(out_sta_long, "\t%.1f", shift_final_all[i]);
|
|
1421
|
+
fprintf(out_sta_long, "\t");
|
|
1422
|
+
for (i = 1; i < 3; i++)fprintf(out_sta_long, "%c", strand_final_all[i]);
|
|
1423
|
+
fprintf(out_sta_long, "\n");
|
|
1424
|
+
fclose(out_sta_long);
|
|
1425
|
+
|
|
1426
|
+
FILE* out_short_over;
|
|
1427
|
+
if ((out_short_over = fopen(file_short_over_local, "at")) == NULL)
|
|
1428
|
+
{
|
|
1429
|
+
printf("Output file can't be opened!\n");
|
|
1430
|
+
return -1; // Changed from exit(1) to return -1
|
|
1431
|
+
}
|
|
1432
|
+
fprintf(out_short_over, "\t%f", auprc_final_over1);
|
|
1433
|
+
fclose(out_short_over);
|
|
1434
|
+
|
|
1435
|
+
FILE* out_short_all;
|
|
1436
|
+
if ((out_short_all = fopen(file_short_all_local, "at")) == NULL)
|
|
1437
|
+
{
|
|
1438
|
+
printf("Output file can't be opened!\n");
|
|
1439
|
+
return -1; // Changed from exit(1) to return -1
|
|
1440
|
+
}
|
|
1441
|
+
fprintf(out_short_all, "\t%f", auprc_final_all1);
|
|
1442
|
+
fclose(out_short_all);
|
|
1443
|
+
|
|
1444
|
+
// Cleanup allocated memory
|
|
1445
|
+
for (k = 0; k < 2; k++)
|
|
1446
|
+
{
|
|
1447
|
+
delete[] thr_all[k];
|
|
1448
|
+
}
|
|
1449
|
+
delete[] thr_all;
|
|
1450
|
+
for (k = 0; k < 2; k++)
|
|
1451
|
+
{
|
|
1452
|
+
delete[] fpr_all[k];
|
|
1453
|
+
}
|
|
1454
|
+
delete[] fpr_all;
|
|
1455
|
+
delete[] peak_len_real;
|
|
1456
|
+
for (k = 0; k < 2; k++)
|
|
1457
|
+
{
|
|
1458
|
+
for (i = 0; i < nseq_real; i++)
|
|
1459
|
+
{
|
|
1460
|
+
delete[] seq[k][i];
|
|
1461
|
+
}
|
|
1462
|
+
delete[] seq[k];
|
|
1463
|
+
}
|
|
1464
|
+
delete[] seq;
|
|
1465
|
+
for (k = 0; k < 2; k++)
|
|
1466
|
+
{
|
|
1467
|
+
for (i = 0; i < MATLEN; i++)
|
|
1468
|
+
{
|
|
1469
|
+
delete[] pwm[k][i];
|
|
1470
|
+
}
|
|
1471
|
+
delete[] pwm[k];
|
|
1472
|
+
}
|
|
1473
|
+
delete[] pwm;
|
|
1474
|
+
|
|
1475
|
+
return 0;
|
|
1476
|
+
}
|