yamcot 1.0.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
yamcot/__init__.py ADDED
@@ -0,0 +1,46 @@
1
+ """
2
+ unimotifcomparator
3
+ ==================
4
+
5
+ This package provides a modular and extensible framework for de‑novo motif
6
+ discovery, motif comparison and performance evaluation in ChIP‑seq like
7
+ applications. The design emphasises object oriented abstractions and clear
8
+ interfaces so that new motif discovery tools, motif comparison strategies
9
+ and evaluation metrics can be added with minimal effort. All code is
10
+ intended for educational and fundamental research use on open, anonymised
11
+ datasets only and must not be applied to sensitive or identifiable data.
12
+
13
+ The top level modules expose the following key components:
14
+
15
+ ``io``
16
+ Functions for reading and writing biological sequences in FASTA format
17
+ and motifs in MEME format.
18
+
19
+ ``models``
20
+ Motif model classes. A generic :class:`MotifModel` describes the common
21
+ behaviour of sequence motifs, while concrete subclasses such as
22
+ :class:`PWMMotif` implement specific scoring logic.
23
+
24
+ ``discovery``
25
+ Base classes and concrete implementations of motif discovery tools.
26
+
27
+ ``comparison``
28
+ Base classes and concrete implementations of motif comparison metrics.
29
+
30
+ ``evaluation``
31
+ Classes for computing ROC/PR curves and their associated summary
32
+ statistics.
33
+
34
+ ``pipeline``
35
+ High level orchestrators that perform bootstrapping, odd/even
36
+ cross‑validation, motif comparison and final motif selection.
37
+
38
+ ``cli``
39
+ An example command line interface exposing the pipeline to end users.
40
+
41
+ This modular structure makes it straightforward to integrate alternative
42
+ motif models (e.g. BaMM or SiteGA), comparison methods (e.g. Jaccard
43
+ index, overlap coefficient or recognition‑function correlation) and
44
+ performance metrics. See individual modules for detailed API
45
+ documentation.
46
+ """
@@ -0,0 +1,17 @@
1
+ # Import the C++ extension functionality
2
+ try:
3
+ # Try to import the compiled C++ extension directly
4
+ from . import _core
5
+
6
+ run_motali_cpp = _core.run_motali_cpp
7
+ except (ImportError, AttributeError) as e:
8
+ _import_error = e
9
+ _core = None
10
+
11
+ # Fallback when the compiled extension is not available
12
+ def run_motali_cpp(*args, **kwargs):
13
+ raise ImportError(
14
+ "The C++ extension '_core' is not installed or could not be loaded. "
15
+ "Please ensure the package was built correctly. "
16
+ f"Original error: {_import_error}"
17
+ )
Binary file
@@ -0,0 +1,28 @@
1
+ #include <nanobind/nanobind.h>
2
+ #include <nanobind/stl/string.h>
3
+
4
+ #include "core_functions.h"
5
+
6
+ namespace nb = nanobind;
7
+
8
+ NB_MODULE(_core, m) {
9
+ m.doc() = "C++ backend for yamcot";
10
+
11
+ m.def("run_motali_cpp", &run_motali_cpp,
12
+ nb::arg("file_fasta"),
13
+ nb::arg("type_model_1"),
14
+ nb::arg("type_model_2"),
15
+ nb::arg("file_model_1"),
16
+ nb::arg("file_model_2"),
17
+ nb::arg("file_table_1"),
18
+ nb::arg("file_table_2"),
19
+ nb::arg("shift"),
20
+ nb::arg("threshold"),
21
+ nb::arg("file_hist"),
22
+ nb::arg("yes_out_hist"),
23
+ nb::arg("file_prc"),
24
+ nb::arg("yes_out_prc"),
25
+ nb::arg("file_short_over"),
26
+ nb::arg("file_short_all"),
27
+ nb::arg("file_sta_long"));
28
+ }
@@ -0,0 +1,29 @@
1
+ #pragma once
2
+
3
+ #ifdef __cplusplus
4
+ extern "C" {
5
+ #endif
6
+
7
+ // Function declaration for the main computational function
8
+ int run_motali_cpp(
9
+ const char* file_fasta,
10
+ const char* type_model_1,
11
+ const char* type_model_2,
12
+ const char* file_model_1,
13
+ const char* file_model_2,
14
+ const char* file_table_1,
15
+ const char* file_table_2,
16
+ int shift,
17
+ double pvalue,
18
+ const char* file_hist,
19
+ int yes_out_hist,
20
+ const char* file_prc,
21
+ int yes_out_prc,
22
+ const char* file_short_over,
23
+ const char* file_short_all,
24
+ const char* file_sta_long
25
+ );
26
+
27
+ #ifdef __cplusplus
28
+ }
29
+ #endif
@@ -0,0 +1,182 @@
1
+ char TransStr(char x)
2
+ {
3
+ int c=int(x);
4
+ if(c<97) x=char(c+32);
5
+ return x;
6
+ }
7
+ int fasta_to_plain_genome(char* file_in_fasta, int motif_len_min, int& all_pos, int &nseq, int &len)
8
+ {
9
+ char l[SEQLEN], d[SEQLEN], head[400];
10
+ int fl = 0;
11
+ FILE* in;
12
+
13
+ if ((in = fopen(file_in_fasta, "rt")) == NULL)
14
+ {
15
+ printf("Input file %s can't be opened\n", file_in_fasta);
16
+ return -1;
17
+ }
18
+ char symbol = fgetc(in);
19
+ rewind(in);
20
+ all_pos = nseq = len = 0;
21
+ while (nseq >= 0)
22
+ {
23
+ if (fgets(l, sizeof(l), in) == NULL) fl = -1;
24
+ if (*l == '\n' && fl != -1)continue;
25
+ if (((*l == symbol) || (fl == -1)) && (fl != 0))
26
+ {
27
+ int lenx = (int)strlen(d);
28
+ if (lenx > len)len = lenx;
29
+ all_pos += (lenx-motif_len_min);
30
+ nseq++;
31
+ // int check = CheckStr(file, d, n, 1, outlog);
32
+ //if (check != -1)
33
+ if (fl == -1)
34
+ {
35
+ fclose(in);
36
+ break;
37
+ }
38
+ }
39
+ if (*l == symbol)
40
+ {
41
+ memset(head, 0, sizeof(head));
42
+ DelChar(l, '\n');
43
+ strcpy(head, l);
44
+ fl = 0; continue;
45
+ }
46
+ if (fl == 0)
47
+ {
48
+ memset(d, 0, sizeof(d));
49
+ DelChar(l, '\n');
50
+ strcpy(d, l);
51
+ fl = 1; continue;
52
+ }
53
+ if (strlen(d) + strlen(l) > sizeof(d))
54
+ {
55
+ printf("Size is large...");
56
+ printf("l:%s\nstrlen(l):%zu\n", l, strlen(l));
57
+ printf("d:%s\nstrlen(d):%zu\n", d, strlen(d));
58
+ exit(1);
59
+ }
60
+ DelChar(l, '\n');
61
+ strcat(d, l);
62
+ }
63
+ return 1;
64
+ }
65
+ int fasta_to_plain0(char *file_in_fasta, int &length_fasta_max, int &nseq_fasta)
66
+ {
67
+ char head[200];
68
+ FILE *in;
69
+
70
+ if((in=fopen(file_in_fasta,"rt"))==NULL)
71
+ {
72
+ printf("Input file %s can't be opened\n",file_in_fasta);
73
+ return -1;
74
+ }
75
+ char c, symbol = '>';
76
+ nseq_fasta=0;
77
+ int len=0;
78
+ //double sum_len=0;
79
+ int fl=1;
80
+ length_fasta_max=0;
81
+ do
82
+ {
83
+ c=getc(in);
84
+ if(c==EOF)fl=-1;
85
+ if(c==symbol || fl==-1)
86
+ {
87
+ if(nseq_fasta>0)
88
+ {
89
+ if(len>length_fasta_max)length_fasta_max=len;
90
+ if(len>SEQLEN)
91
+ {
92
+ printf("Sequence N %d too long... %d nt\n",nseq_fasta,len);
93
+ return -1;
94
+ }
95
+ }
96
+ if(fl!=-1)
97
+ {
98
+ nseq_fasta++;
99
+ len=0;
100
+ }
101
+ if(fl==1)
102
+ {
103
+ fgets(head,sizeof(head),in);
104
+ continue;
105
+ }
106
+ }
107
+ if(c=='\n')continue;
108
+ if(c=='\t')continue;
109
+ if(c==' ')continue;
110
+ if(c=='\r')continue;
111
+ len++;
112
+ }
113
+ while(fl==1);
114
+ fclose(in);
115
+ return 1;
116
+ }
117
+ int fasta_to_plain1(char *file_in_fasta, int length_fasta_max, int nseq_fasta, char ***seq, int *peak_len)
118
+ {
119
+ int fl=1, n=0, len=0;
120
+ char head[200];
121
+ char c, symbol = '>';
122
+ char alfavit4[]="acgtnACGTN";
123
+ FILE *in;
124
+ if((in=fopen(file_in_fasta,"rt"))==NULL)
125
+ {
126
+ printf("Input file %s can't be opened\n",file_in_fasta);
127
+ return -1;
128
+ }
129
+ do
130
+ {
131
+ c=getc(in);
132
+ if(c==EOF)
133
+ {
134
+ fl=-1;
135
+ }
136
+ if(c==symbol || fl==-1)
137
+ {
138
+ if(len>0)
139
+ {
140
+ peak_len[n]=len;
141
+ seq[0][n][len]='\0';
142
+ strncpy(seq[1][n],seq[0][n],len);
143
+ seq[1][n][len]='\0';
144
+ ComplStr(seq[1][n]);
145
+ {
146
+ if(fl!=-1)
147
+ {
148
+ n++;
149
+ len=0;
150
+ }
151
+ }
152
+ }
153
+ else
154
+ {
155
+ if(n>0 && fl!=-1)
156
+ {
157
+ printf("Peak length error! peak %d\n",n+1);
158
+ return -1;
159
+ }
160
+ }
161
+ if(fl==-1)break;
162
+ if(fl==1)
163
+ {
164
+ fgets(head,sizeof(head),in);
165
+ continue;
166
+ }
167
+ }
168
+ if(c=='\n')continue;
169
+ if(c=='\t')continue;
170
+ if(c==' ')continue;
171
+ if(c=='\r')continue;
172
+ if(strchr(alfavit4,c)!=NULL)
173
+ {
174
+ c=TransStr(c);
175
+ seq[0][n][len++]=c;
176
+ }
177
+ else seq[0][n][len++]='n';
178
+ }
179
+ while(fl==1);
180
+ fclose(in);
181
+ return 1;
182
+ }