submine 0.1.1__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. submine/__init__.py +37 -0
  2. submine/algorithms/__init__.py +23 -0
  3. submine/algorithms/base.py +143 -0
  4. submine/algorithms/gspan.py +156 -0
  5. submine/algorithms/gspan_cpp.cpython-312-darwin.so +0 -0
  6. submine/algorithms/sopagrami.py +250 -0
  7. submine/algorithms/sopagrami_cpp.cpython-312-darwin.so +0 -0
  8. submine/api.py +134 -0
  9. submine/backends/__init__.py +0 -0
  10. submine/backends/gspan/CMakeLists.txt +65 -0
  11. submine/backends/gspan/dfs.cpp +98 -0
  12. submine/backends/gspan/graph.cpp +165 -0
  13. submine/backends/gspan/gspan.cpp +776 -0
  14. submine/backends/gspan/gspan.h +296 -0
  15. submine/backends/gspan/ismin.cpp +124 -0
  16. submine/backends/gspan/main.cpp +106 -0
  17. submine/backends/gspan/misc.cpp +177 -0
  18. submine/backends/gspan/python_bindings.cpp +133 -0
  19. submine/backends/sopagrami/cpp/CMakeLists.txt +44 -0
  20. submine/backends/sopagrami/cpp/include/alg.hpp +150 -0
  21. submine/backends/sopagrami/cpp/include/common/timer.hpp +18 -0
  22. submine/backends/sopagrami/cpp/src/alg.cpp +805 -0
  23. submine/backends/sopagrami/cpp/src/dump.cpp +262 -0
  24. submine/backends/sopagrami/cpp/src/main.cpp +94 -0
  25. submine/backends/sopagrami/cpp/src/python_bindings.cpp +123 -0
  26. submine/cli/__init__.py +6 -0
  27. submine/cli/main.py +87 -0
  28. submine/core/__init__.py +12 -0
  29. submine/core/graph.py +179 -0
  30. submine/core/result.py +121 -0
  31. submine/datasets/__init__.py +11 -0
  32. submine/datasets/loaders.py +145 -0
  33. submine/errors.py +41 -0
  34. submine/io/__init__.py +30 -0
  35. submine/io/common.py +173 -0
  36. submine/io/gexf.py +88 -0
  37. submine/io/gspan.py +268 -0
  38. submine/io/sopagrami.py +143 -0
  39. submine/io/transcode.py +147 -0
  40. submine/registry.py +8 -0
  41. submine/utils/__init__.py +6 -0
  42. submine/utils/checks.py +115 -0
  43. submine/utils/logging.py +41 -0
  44. submine-0.1.1.dist-info/METADATA +178 -0
  45. submine-0.1.1.dist-info/RECORD +47 -0
  46. submine-0.1.1.dist-info/WHEEL +6 -0
  47. submine-0.1.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,262 @@
1
+ #include "alg.hpp"
2
+
3
+ #include <filesystem>
4
+ #include <fstream>
5
+ #include <sstream>
6
+ #include <functional>
7
+ #include <climits>
8
+ namespace fs = std::filesystem;
9
+
10
+ namespace algo {
11
+ // ---- utilities to reuse your edge-check logic ----
12
+ static inline bool ok_edge_map(const algo::DataGraph& G,
13
+ const algo::Pattern::PEdge& e,
14
+ int va, int vb, // pattern endpoints
15
+ int ga, int gb) // graph nodes mapped to (va,vb)
16
+ {
17
+ if (e.dir == 1) {
18
+ if (e.a == va && e.b == vb) return G.has_edge(ga, gb, e.el);
19
+ if (e.a == vb && e.b == va) return G.has_edge(gb, ga, e.el);
20
+ return true;
21
+ } else {
22
+ if ((e.a == va && e.b == vb) || (e.a == vb && e.b == va))
23
+ return G.has_edge(ga, gb, e.el) || G.has_edge(gb, ga, e.el);
24
+ return true;
25
+ }
26
+ }
27
+
28
+ // Forward check: quick “does gi have any neighbor candidate for each incident edge?”
29
+ static bool forward_ok(const algo::DataGraph& G, const algo::Pattern& P,
30
+ int v, int gi,
31
+ const std::vector<int>& assign,
32
+ const std::vector<std::vector<int>>& dom)
33
+ {
34
+ for (const auto& e : P.pedges){
35
+ if (e.a!=v && e.b!=v) continue;
36
+ int w = (e.a==v ? e.b : e.a);
37
+
38
+ if (assign[w] != -1){
39
+ if (e.a==v){ if (!ok_edge_map(G,e,e.a,e.b,gi,assign[w])) return false; }
40
+ else { if (!ok_edge_map(G,e,e.a,e.b,assign[w],gi)) return false; }
41
+ continue;
42
+ }
43
+
44
+ bool okN = false;
45
+ for (int gj : dom[w]){
46
+ if (e.a==v){ if (ok_edge_map(G,e,e.a,e.b,gi,gj)) { okN=true; break; } }
47
+ else { if (ok_edge_map(G,e,e.a,e.b,gj,gi)) { okN=true; break; } }
48
+ }
49
+ if (!okN) return false;
50
+ }
51
+ return true;
52
+ }
53
+ static bool find_embedding_with_fixed(const algo::DataGraph& G,
54
+ const algo::Pattern& P,
55
+ int fixVar, int fixNode,
56
+ std::vector<int>& assignment)
57
+ {
58
+ const int k = (int)P.vlab.size();
59
+ assignment.assign(k, -1);
60
+
61
+ // Build label-consistent domains
62
+ std::vector<std::vector<int>> dom(k);
63
+ for (int i=0;i<k;++i){
64
+ auto it = G.lab2nodes.find(P.vlab[i]);
65
+ if (it == G.lab2nodes.end()) return false;
66
+ dom[i].assign(it->second.begin(), it->second.end());
67
+ if (dom[i].empty()) return false;
68
+ }
69
+
70
+ // Fix x_fixVar = fixNode
71
+ assignment[fixVar] = fixNode;
72
+ std::vector<char> used(G.vlabels.size(), 0);
73
+ used[fixNode] = 1;
74
+
75
+ auto choose = [&](){
76
+ int best=-1, bestCnt=INT_MAX;
77
+ for (int v=0; v<k; ++v){
78
+ if (assignment[v]!=-1) continue;
79
+ int cnt=0;
80
+ for (int gi : dom[v]){
81
+ if (used[gi]) continue;
82
+ if (forward_ok(G,P,v,gi,assignment,dom)){ ++cnt; if (cnt>=bestCnt) break; }
83
+ }
84
+ if (cnt < bestCnt){ best=v; bestCnt=cnt; }
85
+ }
86
+ return best;
87
+ };
88
+
89
+ std::function<bool()> dfs = [&](){
90
+ for (int i=0;i<k;++i) if (assignment[i]==-1) goto not_done;
91
+ return true;
92
+ not_done:
93
+ int v = choose(); if (v==-1) return false;
94
+ for (int gi : dom[v]){
95
+ if (used[gi]) continue;
96
+ if (!forward_ok(G,P,v,gi,assignment,dom)) continue;
97
+ assignment[v]=gi; used[gi]=1;
98
+ if (dfs()) return true;
99
+ used[gi]=0; assignment[v]=-1;
100
+ }
101
+ return false;
102
+ };
103
+
104
+ return dfs();
105
+ }
106
+
107
+ // For each pattern vertex i, collect up to `max_per_vertex` graph node IDs
108
+ // that participate in at least one full embedding (MNI “image set”).
109
+ // If max_per_vertex < 0 => no cap.
110
+ static std::vector<std::vector<int>>
111
+ collect_mni_image_sets(const algo::DataGraph& G,
112
+ const algo::Pattern& P,
113
+ int max_per_vertex = 100)
114
+ {
115
+ const int k = (int)P.vlab.size();
116
+ std::vector<std::vector<int>> images(k);
117
+
118
+ // Domains by label
119
+ std::vector<std::vector<int>> dom(k);
120
+ for (int i=0;i<k;++i){
121
+ auto it = G.lab2nodes.find(P.vlab[i]);
122
+ if (it == G.lab2nodes.end()) return images;
123
+ dom[i].assign(it->second.begin(), it->second.end());
124
+ }
125
+
126
+ // For each pattern variable v, test each u in dom[v] by trying to find one embedding
127
+ for (int v=0; v<k; ++v){
128
+ int kept = 0;
129
+ for (int u : dom[v]){
130
+ std::vector<int> a;
131
+ if (find_embedding_with_fixed(G, P, v, u, a)){
132
+ images[v].push_back(u);
133
+ ++kept;
134
+ if (max_per_vertex >= 0 && kept >= max_per_vertex) break;
135
+ }
136
+ }
137
+ }
138
+ return images;
139
+ }
140
+
141
+
142
+
143
+ // per-vertex images CSV (patternIndex, graphNodeId) ---
144
+ static void write_pattern_images_csv(const algo::Pattern& P,
145
+ const std::vector<std::vector<int>>& images,
146
+ const std::string& path_csv)
147
+ {
148
+ std::ofstream out(path_csv);
149
+ if (!out) return;
150
+ out << "pattern_vertex,graph_node_id\n";
151
+ for (size_t i=0;i<images.size();++i){
152
+ for (int u : images[i]){
153
+ out << i << "," << u << "\n";
154
+ }
155
+ }
156
+ }
157
+
158
+
159
+ // --- NEW: sample embeddings CSV (one row per embedding, columns are pattern vertex order) ---
160
+ static void write_sample_embeddings_csv(const algo::Pattern& P,
161
+ const std::vector<std::vector<int>>& emb,
162
+ const std::string& path_csv)
163
+ {
164
+ std::ofstream out(path_csv);
165
+ if (!out) return;
166
+ // header
167
+ out << "emb_id";
168
+ for (size_t i=0;i<P.vlab.size();++i) out << ",v" << i;
169
+ out << "\n";
170
+ for (size_t i=0;i<emb.size();++i){
171
+ out << i;
172
+ for (int id : emb[i]) out << "," << id;
173
+ out << "\n";
174
+ }
175
+ }
176
+
177
+ // _____________________________________________________
178
+ static std::string sanitize_dot(const std::string& s){
179
+ std::string t; t.reserve(s.size()*2);
180
+ for (char c: s){
181
+ if (c=='"' || c=='\\') t.push_back('\\');
182
+ t.push_back(c);
183
+ }
184
+ return t;
185
+ }
186
+
187
+ static void write_pattern_as_lg(const algo::Pattern& P, const std::string& path){
188
+ std::ofstream out(path);
189
+ if (!out) return;
190
+ for (size_t i=0;i<P.vlab.size();++i) out << "v " << i << " " << P.vlab[i] << "\n";
191
+ for (const auto& e : P.pedges) out << "e " << e.a << " " << e.b << " " << e.el << "\n";
192
+ }
193
+
194
+ static void write_pattern_as_dot(const algo::Pattern& P, bool directed, const std::string& path){
195
+ std::ofstream out(path);
196
+ if (!out) return;
197
+ out << (directed ? "digraph G {\n" : "graph G {\n");
198
+ // nodes
199
+ for (size_t i=0;i<P.vlab.size();++i){
200
+ out << " " << i << " [shape=circle,label=\"" << sanitize_dot(P.vlab[i]) << "\"];\n";
201
+ }
202
+ // edges
203
+ for (const auto& e : P.pedges){
204
+ const bool use_arrow = directed || e.dir==1;
205
+ out << " " << e.a << (use_arrow ? " -> " : " -- ") << e.b
206
+ << " [label=\"" << sanitize_dot(e.el) << "\"];\n";
207
+ }
208
+ out << "}\n";
209
+ }
210
+
211
+ void dump_patterns_to_dir(
212
+ const Output& out,
213
+ const std::string& dump_dir,
214
+ bool directed,
215
+ const DataGraph& G,
216
+ bool dump_images_csv,
217
+ int max_images_per_vertex,
218
+ bool dump_sample_embeddings,
219
+ int sample_limit
220
+ ) {
221
+ fs::create_directories(dump_dir);
222
+
223
+ // ---- index.tsv ----
224
+ std::ofstream idx(fs::path(dump_dir) / "index.tsv");
225
+ idx << "id\tk\tm\tfull_support\tkey\tlg_path\tdot_path\n";
226
+
227
+ for (size_t i=0; i<out.frequent_patterns.size(); ++i){
228
+ const auto& f = out.frequent_patterns[i];
229
+ const size_t k = f.pat.vlab.size();
230
+ const size_t m = f.pat.pedges.size();
231
+
232
+ std::string base = dump_dir + "/pat_" + std::to_string(i)
233
+ + "_k" + std::to_string(k)
234
+ + "_e" + std::to_string(m)
235
+ + "_full" + std::to_string(f.full_support);
236
+ std::string lgp = base + ".lg";
237
+ std::string dotp = base + ".dot";
238
+
239
+ // always write shape artifacts
240
+ write_pattern_as_lg (f.pat, lgp);
241
+ write_pattern_as_dot(f.pat, directed, dotp);
242
+
243
+ // optionally: image sets (can be heavy)
244
+ if (dump_images_csv){
245
+ auto images = collect_mni_image_sets(G, f.pat, max_images_per_vertex);
246
+ write_pattern_images_csv(f.pat, images, base + ".images.csv");
247
+ }
248
+
249
+ // optionally: sample full embeddings (disabled in your current code; left stub)
250
+ if (dump_sample_embeddings){
251
+ std::vector<std::vector<int>> samples;
252
+ // enumerate_embeddings(G, f.pat, sample_limit, samples); // not implelemnted yet
253
+ write_sample_embeddings_csv(f.pat, samples, base + ".emb.csv");
254
+ }
255
+
256
+ idx << i << '\t' << k << '\t' << m << '\t'
257
+ << f.full_support << '\t' << f.pat.key()
258
+ << '\t' << lgp << '\t' << dotp << "\n";
259
+ }
260
+ }
261
+
262
+ } // namespace algo
@@ -0,0 +1,94 @@
1
+ #include "alg.hpp"
2
+ #include <iostream>
3
+ #include <string>
4
+ #include <fstream>
5
+ #include <sstream>
6
+ #include <string>
7
+ #include <vector>
8
+ #include <iomanip>
9
+ #include <filesystem>
10
+ #include <functional>
11
+ #include <climits>
12
+
13
+ using namespace algo;
14
+ namespace fs = std::filesystem;
15
+
16
+
17
+
18
+ int main(int argc, char** argv){
19
+ // Usage:
20
+ // run <graph.lg> [tau] [directed(0/1)] [sorted(0/1)] [threads]
21
+ //
22
+ // Defaults:
23
+ // tau=2, directed=0, sorted=1 (SoGraMi ordering), threads=4
24
+ if (argc < 2){
25
+ std::cerr
26
+ << "Usage: run <graph.lg> [tau] [directed(0/1)] [sorted(0/1)] [threads]\n"
27
+ << " [dump_dir] [dump_images(0/1)] [max_images_per_vertex]\n"
28
+ << " [dump_emb(0/1)] [sample_limit]\n";
29
+ return 1;
30
+ }
31
+
32
+
33
+ const std::string path = argv[1];
34
+ const int tau = (argc > 2 ? std::stoi(argv[2]) : 2);
35
+ const bool directed = (argc > 3 ? (std::stoi(argv[3]) != 0) : false);
36
+ const bool sorted = (argc > 4 ? (std::stoi(argv[4]) != 0) : true); // default: SoGraMi sorted
37
+ const int threads = (argc > 5 ? std::stoi(argv[5]) : 4); // default: 4
38
+
39
+ DataGraph G;
40
+ G.load_from_lg(path, directed);
41
+
42
+ // Graph stats
43
+ std::cout << "Graph loaded: |V|=" << G.vlabels.size() << ", |E|=";
44
+ long long edge_count = 0;
45
+ for (const auto& adj_list : G.adj) edge_count += (long long)adj_list.size();
46
+ if (!directed) edge_count /= 2;
47
+ std::cout << edge_count << "\n";
48
+
49
+ // Params
50
+ Params p;
51
+ p.tau = tau;
52
+ p.directed = directed;
53
+ p.sorted_seeds = sorted; // SoGraMi ordering toggle
54
+ p.num_threads = threads; // run_sopagrami <=0 will default to all available
55
+ p.compute_full_support = true;
56
+
57
+ std::cout << "Settings: tau=" << p.tau
58
+ << " directed=" << (p.directed?1:0)
59
+ << " sorted=" << (p.sorted_seeds?1:0)
60
+ << " threads=" << p.num_threads
61
+ << "\n\n";
62
+
63
+ // Run
64
+ auto out = run_sopagrami(G, p);
65
+
66
+ // Output
67
+ std::cout << "Frequent patterns: " << out.frequent_patterns.size() << "\n";
68
+ for (const auto& f : out.frequent_patterns){
69
+ std::cout << "k=" << f.pat.vlab.size()
70
+ << " |E|=" << f.pat.pedges.size()
71
+ << " full=" << f.full_support
72
+ << " key=" << f.pat.key() << "\n";
73
+ }
74
+ //dump patterns to dir
75
+
76
+ std::string dump_dir = (argc > 6 ? argv[6] : "");
77
+ bool dump_images_csv = (argc > 7 ? (std::stoi(argv[7]) != 0) : false);
78
+ int max_images_per_vertex = (argc > 8 ? std::stoi(argv[8]) : 200);
79
+ bool dump_sample_embeddings = (argc > 9 ? (std::stoi(argv[9]) != 0) : false);
80
+ int sample_limit = (argc > 10 ? std::stoi(argv[10]) : 50);
81
+
82
+ if (!dump_dir.empty()){
83
+ dump_patterns_to_dir(out, dump_dir, p.directed, G,
84
+ dump_images_csv, max_images_per_vertex,
85
+ dump_sample_embeddings, sample_limit);
86
+ std::cout << "Wrote pattern files to: " << dump_dir
87
+ << " (index.tsv, .lg, .dot"
88
+ << (dump_images_csv ? ", .images.csv" : "")
89
+ << (dump_sample_embeddings ? ", .emb.csv" : "")
90
+ << ")\n";
91
+ }
92
+
93
+ return 0;
94
+ }
@@ -0,0 +1,123 @@
1
+ // python_bindings.cpp
2
+ #include "alg.hpp"
3
+ #include <pybind11/pybind11.h>
4
+ #include <pybind11/stl.h>
5
+
6
+ namespace py = pybind11;
7
+ using namespace algo;
8
+
9
+ // Expose: run on a .lg file, optionally dump pattern artifacts to a directory,
10
+ // and return patterns as a list of dicts.
11
+ py::list run_on_lg_file(
12
+ const std::string& path,
13
+ int tau,
14
+ bool directed,
15
+ bool sorted_seeds,
16
+ int num_threads,
17
+ bool compute_full_support,
18
+ const std::string& out_dir,
19
+ bool dump_images_csv,
20
+ int max_images_per_vertex,
21
+ bool dump_sample_embeddings,
22
+ int sample_limit
23
+ ) {
24
+ DataGraph G;
25
+ G.load_from_lg(path, directed);
26
+
27
+ Params p;
28
+ p.tau = tau;
29
+ p.directed = directed;
30
+ p.sorted_seeds = sorted_seeds;
31
+ p.num_threads = num_threads;
32
+ p.compute_full_support = compute_full_support;
33
+
34
+ Output out = run_sopagrami(G, p);
35
+
36
+ // Optional side-effect: dump pattern files to directory
37
+ if (!out_dir.empty()) {
38
+ dump_patterns_to_dir(
39
+ out,
40
+ out_dir,
41
+ p.directed,
42
+ G,
43
+ dump_images_csv,
44
+ max_images_per_vertex,
45
+ dump_sample_embeddings,
46
+ sample_limit
47
+ );
48
+ }
49
+
50
+ // Return patterns to Python
51
+ py::list py_patterns;
52
+
53
+ for (const auto& f : out.frequent_patterns) {
54
+ const Pattern& P = f.pat;
55
+
56
+ py::dict d;
57
+ d["node_labels"] = P.vlab; // std::vector<std::string>
58
+
59
+ py::list edges;
60
+ for (const auto& e : P.pedges) {
61
+ // (a, b, label, dir) dir: 0 undirected, 1 a->b (per your comment)
62
+ edges.append(py::make_tuple(e.a, e.b, e.el, e.dir));
63
+ }
64
+
65
+ d["edges"] = std::move(edges);
66
+ d["full_support"] = f.full_support;
67
+ d["key"] = P.key();
68
+
69
+ py_patterns.append(std::move(d));
70
+ }
71
+
72
+ return py_patterns;
73
+ }
74
+
75
+ PYBIND11_MODULE(sopagrami_cpp, m) {
76
+ m.doc() = "pybind11 bindings for SoPaGraMi (C++17)";
77
+
78
+ m.def(
79
+ "run_on_lg_file",
80
+ &run_on_lg_file,
81
+ py::arg("path"),
82
+ py::arg("tau") = 2,
83
+ py::arg("directed") = false,
84
+ py::arg("sorted_seeds") = true,
85
+ py::arg("num_threads") = 0,
86
+ py::arg("compute_full_support") = true,
87
+
88
+ // dump-related args
89
+ py::arg("out_dir") = std::string("result"),
90
+ py::arg("dump_images_csv") = false,
91
+ py::arg("max_images_per_vertex")= 200,
92
+ py::arg("dump_sample_embeddings")= false,
93
+ py::arg("sample_limit") = 50,
94
+
95
+ R"doc(
96
+ Run SoPaGraMi on an input .lg graph.
97
+
98
+ Parameters
99
+ ----------
100
+ path : str
101
+ Path to input .lg file.
102
+ tau : int, default=2
103
+ directed : bool, default=False
104
+ sorted_seeds : bool, default=True
105
+ num_threads : int, default=0
106
+ 0 means "use default / auto" as implemented in C++ core.
107
+ compute_full_support : bool, default=True
108
+
109
+ out_dir : str, default=""
110
+ If non-empty, dumps pattern artifacts to this directory:
111
+ index.tsv, per-pattern .lg, .dot, plus optional .images.csv and .emb.csv.
112
+ dump_images_csv : bool, default=False
113
+ max_images_per_vertex : int, default=200
114
+ dump_sample_embeddings : bool, default=False
115
+ sample_limit : int, default=50
116
+
117
+ Returns
118
+ -------
119
+ list[dict]
120
+ Each dict contains: node_labels, edges, full_support, key.
121
+ )doc"
122
+ );
123
+ }
@@ -0,0 +1,6 @@
1
+ """Command line interface for submine.
2
+
3
+ This subpackage defines the entry point for the optional `submine`
4
+ command line tool. Users can run mining algorithms from the shell
5
+ without writing Python code. See :mod:`submine.cli.main` for details.
6
+ """
submine/cli/main.py ADDED
@@ -0,0 +1,87 @@
1
+ """Entry point for the submine command line interface.
2
+
3
+ Use this CLI to run frequent subgraph mining algorithms from the shell.
4
+ It supports selecting an algorithm, loading a dataset and specifying
5
+ common parameters such as the minimum support threshold. The results
6
+ are printed to standard output.
7
+
8
+ Example::
9
+
10
+ python -m submine.cli.main --algorithm gspan --dataset toy --min-support 2
11
+
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import argparse
17
+ from typing import List
18
+
19
+ from .. import get_algorithm, load_dataset
20
+
21
+
22
+ def parse_args(argv: List[str] | None = None) -> argparse.Namespace:
23
+ parser = argparse.ArgumentParser(description="Run frequent subgraph mining algorithms")
24
+ parser.add_argument(
25
+ "--algorithm",
26
+ "-a",
27
+ required=True,
28
+ help="Name of the algorithm to run (e.g., gspan, grami)"
29
+ )
30
+ parser.add_argument(
31
+ "--dataset",
32
+ "-d",
33
+ default="toy",
34
+ help="Dataset name to load (e.g., toy, mutag, enzymes)"
35
+ )
36
+ parser.add_argument(
37
+ "--min-support",
38
+ "-s",
39
+ type=int,
40
+ default=1,
41
+ help="Minimum support threshold (positive integer)"
42
+ )
43
+ parser.add_argument(
44
+ "--top-k",
45
+ "-k",
46
+ type=int,
47
+ default=5,
48
+ help="Print the top K subgraphs by support"
49
+ )
50
+ parser.add_argument(
51
+ "--verbose",
52
+ "-v",
53
+ action="store_true",
54
+ help="Enable verbose logging"
55
+ )
56
+ return parser.parse_args(argv)
57
+
58
+
59
+ def main(argv: List[str] | None = None) -> None:
60
+ args = parse_args(argv)
61
+ # Load dataset
62
+ try:
63
+ graphs = load_dataset(args.dataset)
64
+ except Exception as e:
65
+ raise SystemExit(f"Failed to load dataset '{args.dataset}': {e}")
66
+ # Instantiate algorithm
67
+ try:
68
+ miner = get_algorithm(args.algorithm, verbose=args.verbose)
69
+ except KeyError as e:
70
+ raise SystemExit(str(e))
71
+ # Run mining
72
+ try:
73
+ result = miner.mine(graphs, min_support=args.min_support)
74
+ except NotImplementedError as e:
75
+ raise SystemExit(str(e))
76
+ except Exception as e:
77
+ raise SystemExit(f"Error while running algorithm '{args.algorithm}': {e}")
78
+ # Print results
79
+ top = result.top_k(args.top_k)
80
+ print(f"Found {len(result)} frequent subgraphs (displaying top {len(top)})")
81
+ for idx, fs in enumerate(top, start=1):
82
+ # Provide a simple textual representation
83
+ print(f"#{idx}: support={fs.support}, nodes={fs.pattern.number_of_nodes()}, edges={fs.pattern.number_of_edges()}")
84
+
85
+
86
+ if __name__ == "__main__":
87
+ main()
@@ -0,0 +1,12 @@
1
+ """Core data structures for the submine library.
2
+
3
+ This subpackage contains fundamental classes used throughout the
4
+ library. Currently the primary exported objects are
5
+ :class:`~submine.core.graph.Graph` for representing graphs and
6
+ :class:`~submine.core.result.FrequentSubgraph` for storing mining results.
7
+ """
8
+
9
+ from .graph import Graph
10
+ from .result import SubgraphPattern, MiningResult
11
+
12
+ __all__ = ["Graph", "SubgraphPattern", "MiningResult"]