submine 0.1.1__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. submine/__init__.py +37 -0
  2. submine/algorithms/__init__.py +23 -0
  3. submine/algorithms/base.py +143 -0
  4. submine/algorithms/gspan.py +156 -0
  5. submine/algorithms/gspan_cpp.cpython-312-darwin.so +0 -0
  6. submine/algorithms/sopagrami.py +250 -0
  7. submine/algorithms/sopagrami_cpp.cpython-312-darwin.so +0 -0
  8. submine/api.py +134 -0
  9. submine/backends/__init__.py +0 -0
  10. submine/backends/gspan/CMakeLists.txt +65 -0
  11. submine/backends/gspan/dfs.cpp +98 -0
  12. submine/backends/gspan/graph.cpp +165 -0
  13. submine/backends/gspan/gspan.cpp +776 -0
  14. submine/backends/gspan/gspan.h +296 -0
  15. submine/backends/gspan/ismin.cpp +124 -0
  16. submine/backends/gspan/main.cpp +106 -0
  17. submine/backends/gspan/misc.cpp +177 -0
  18. submine/backends/gspan/python_bindings.cpp +133 -0
  19. submine/backends/sopagrami/cpp/CMakeLists.txt +44 -0
  20. submine/backends/sopagrami/cpp/include/alg.hpp +150 -0
  21. submine/backends/sopagrami/cpp/include/common/timer.hpp +18 -0
  22. submine/backends/sopagrami/cpp/src/alg.cpp +805 -0
  23. submine/backends/sopagrami/cpp/src/dump.cpp +262 -0
  24. submine/backends/sopagrami/cpp/src/main.cpp +94 -0
  25. submine/backends/sopagrami/cpp/src/python_bindings.cpp +123 -0
  26. submine/cli/__init__.py +6 -0
  27. submine/cli/main.py +87 -0
  28. submine/core/__init__.py +12 -0
  29. submine/core/graph.py +179 -0
  30. submine/core/result.py +121 -0
  31. submine/datasets/__init__.py +11 -0
  32. submine/datasets/loaders.py +145 -0
  33. submine/errors.py +41 -0
  34. submine/io/__init__.py +30 -0
  35. submine/io/common.py +173 -0
  36. submine/io/gexf.py +88 -0
  37. submine/io/gspan.py +268 -0
  38. submine/io/sopagrami.py +143 -0
  39. submine/io/transcode.py +147 -0
  40. submine/registry.py +8 -0
  41. submine/utils/__init__.py +6 -0
  42. submine/utils/checks.py +115 -0
  43. submine/utils/logging.py +41 -0
  44. submine-0.1.1.dist-info/METADATA +178 -0
  45. submine-0.1.1.dist-info/RECORD +47 -0
  46. submine-0.1.1.dist-info/WHEEL +6 -0
  47. submine-0.1.1.dist-info/licenses/LICENSE +21 -0
submine/api.py ADDED
@@ -0,0 +1,134 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Iterable, Union, Sequence, Optional
5
+ import networkx as nx
6
+
7
+ import inspect
8
+ from .utils.checks import assert_regular_file
9
+ from .errors import SubmineInputError, ParameterValidationError
10
+
11
+
12
+ from .core.graph import Graph
13
+ from . import get_mining_algorithm as get_algorithm
14
+ from .core.result import MiningResult, SubgraphPattern
15
+
16
+ GraphLike = Union[Graph, nx.Graph]
17
+ GraphSourceLike = Union[
18
+ Graph,
19
+ Iterable[Graph],
20
+ Sequence[Graph],
21
+ Path,
22
+ str,
23
+ # later: DB handles, etc.
24
+ ]
25
+
26
+
27
+ def _accepted_kwargs(callable_obj):
28
+ sig = inspect.signature(callable_obj)
29
+ accepted = set()
30
+ has_var_kw = False
31
+ for name, p in sig.parameters.items():
32
+ if p.kind == inspect.Parameter.VAR_KEYWORD:
33
+ has_var_kw = True
34
+ elif p.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.KEYWORD_ONLY):
35
+ accepted.add(name)
36
+ return accepted, has_var_kw
37
+
38
+ def _normalize_graph_source(source: GraphSourceLike) -> Iterable[Graph]:
39
+ # 1. Already an internal Graph → wrap in list
40
+ if isinstance(source, Graph):
41
+ return [source]
42
+
43
+ # 2. Path / str → load from file
44
+ if isinstance(source, (str, Path)):
45
+ from .io.transcode import load_graphs
46
+
47
+ return load_graphs(Path(source))
48
+
49
+ # 3. Iterable of Graphs → pass through
50
+ try:
51
+ it = iter(source) # type: ignore
52
+ except TypeError:
53
+ pass
54
+ else:
55
+ # could be list[Graph], generator, GraphSource, etc.
56
+ #TODO: sanity check items, but can be lazy.
57
+ return it
58
+
59
+ raise TypeError(f"Cannot interpret {type(source)} as a graph source")
60
+
61
+
62
+ def mine_subgraphs(
63
+ data: GraphSourceLike,
64
+ algorithm: str,
65
+ min_support: int,
66
+ **algo_params,
67
+ ) -> MiningResult:
68
+ """High-level convenience function for users.
69
+
70
+ `data` can be:
71
+ - a single Graph
72
+ - an iterable of Graphs
73
+ - a path to a graph dataset on disk
74
+ """
75
+ AlgoCls = get_algorithm(algorithm)
76
+ # Split kwargs between __init__ and mine()
77
+ init_keys, init_var = _accepted_kwargs(AlgoCls.__init__)
78
+ mine_keys, mine_var = _accepted_kwargs(AlgoCls.mine)
79
+
80
+ init_params = {}
81
+ run_params = {}
82
+ unknown = {}
83
+
84
+ for k, v in algo_params.items():
85
+ in_init = (k in init_keys) or init_var
86
+ in_mine = (k in mine_keys) or mine_var
87
+
88
+ # Prefer explicit match if both accept (rare but possible)
89
+ if k in init_keys and k in mine_keys:
90
+ # Policy choice: treat as runtime override
91
+ run_params[k] = v
92
+ elif k in init_keys:
93
+ init_params[k] = v
94
+ elif k in mine_keys:
95
+ run_params[k] = v
96
+ else:
97
+ unknown[k] = v
98
+
99
+ if unknown:
100
+ raise ParameterValidationError(
101
+ f"Unsupported parameters for algorithm='{algorithm}': {sorted(unknown.keys())}"
102
+ )
103
+ miner = AlgoCls(**init_params)
104
+
105
+ if not isinstance(min_support, int) or min_support <= 0:
106
+ raise ParameterValidationError(f"min_support must be a positive integer; got {min_support!r}")
107
+
108
+ # If user provided a path, and the miner declares a native on-disk format,
109
+ # transcode directly to that format (only when needed) and call mine_native().
110
+ if isinstance(data, (str, Path)):
111
+ from .io.transcode import detect_format, transcode_path
112
+ from .io.common import temporary_directory
113
+
114
+ src_path = assert_regular_file(Path(data))
115
+ src_fmt: Optional[str]
116
+ try:
117
+ src_fmt = detect_format(src_path)
118
+ except Exception:
119
+ src_fmt = None
120
+
121
+ expected = getattr(miner, "expected_input_format", None)
122
+ if expected is not None:
123
+ if src_fmt == expected:
124
+ return miner.mine_native(src_path, min_support=min_support, **run_params)
125
+
126
+ # Not in the miner's native format: transcode once to native file.
127
+ with temporary_directory() as tmp:
128
+ suffix = ".lg" if expected == "lg" else ".data"
129
+ native_path = tmp / f"native{suffix}"
130
+ transcode_path(src_path, native_path, dst_fmt=expected, src_fmt=src_fmt)
131
+ return miner.mine_native(native_path, min_support=min_support, **run_params)
132
+
133
+ graphs = _normalize_graph_source(data)
134
+ return miner.mine(graphs, min_support=min_support,**run_params)
File without changes
@@ -0,0 +1,65 @@
1
+ cmake_minimum_required(VERSION 3.16)
2
+ project(gspan_cpp_bindings LANGUAGES CXX)
3
+
4
+ set(CMAKE_CXX_STANDARD 17)
5
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
6
+ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
7
+ include(FetchContent)
8
+ find_package(Python REQUIRED COMPONENTS Interpreter Development.Module)
9
+
10
+ # ---- pybind11 (vendored) ----
11
+
12
+ include(FetchContent)
13
+
14
+ # Try system / vcpkg / conda / installed pybind11 first
15
+ find_package(pybind11 CONFIG QUIET)
16
+
17
+ if (NOT pybind11_FOUND)
18
+ message(STATUS "pybind11 not found via find_package; fetching with FetchContent...")
19
+
20
+ FetchContent_Declare(
21
+ pybind11
22
+ GIT_REPOSITORY https://github.com/pybind/pybind11.git
23
+ GIT_TAG v2.12.0
24
+ )
25
+ FetchContent_MakeAvailable(pybind11)
26
+ endif()
27
+
28
+
29
+
30
+ set(GSPAN_SOURCES
31
+ dfs.cpp
32
+ graph.cpp
33
+ gspan.cpp
34
+ ismin.cpp
35
+ misc.cpp
36
+ )
37
+
38
+ add_executable(gspan_cli
39
+ main.cpp
40
+ ${GSPAN_SOURCES}
41
+ )
42
+ target_include_directories(gspan_cli PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
43
+
44
+ pybind11_add_module(gspan_cpp
45
+ python_bindings.cpp
46
+ ${GSPAN_SOURCES}
47
+ )
48
+ target_include_directories(gspan_cpp PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
49
+ set_target_properties(gspan_cpp PROPERTIES PREFIX "")
50
+
51
+ # Warnings
52
+ if (MSVC)
53
+ target_compile_options(gspan_cli PRIVATE /W4)
54
+ target_compile_options(gspan_cpp PRIVATE /W4)
55
+ else()
56
+ target_compile_options(gspan_cli PRIVATE -Wall -Wextra -Wformat)
57
+ target_compile_options(gspan_cpp PRIVATE -Wall -Wextra -Wformat)
58
+ endif()
59
+
60
+ # ---- Install into the wheel ----
61
+ install(TARGETS gspan_cpp
62
+ LIBRARY DESTINATION "submine/algorithms"
63
+ RUNTIME DESTINATION "submine/algorithms"
64
+ ARCHIVE DESTINATION "submine/algorithms"
65
+ )
@@ -0,0 +1,98 @@
1
+ /*
2
+ $Id: dfs.cpp,v 1.3 2004/05/21 05:50:13 taku-ku Exp $;
3
+
4
+ Copyright (C) 2004 Taku Kudo, All rights reserved.
5
+ This is free software with ABSOLUTELY NO WARRANTY.
6
+
7
+ This program is free software; you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation; either version 2 of the License, or
10
+ (at your option) any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program; if not, write to the Free Software
19
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20
+ 02111-1307, USA
21
+ */
22
+ #include "gspan.h"
23
+ #include <cstring>
24
+ #include <string>
25
+ #include <iterator>
26
+ #include <set>
27
+
28
+ namespace GSPAN {
29
+
30
+ /* Build a DFS code from a given graph.
31
+ */
32
+ void
33
+ DFSCode::fromGraph (Graph &g)
34
+ {
35
+ clear ();
36
+
37
+ EdgeList edges;
38
+ for (unsigned int from = 0 ; from < g.size () ; ++from) {
39
+ if (get_forward_root (g, g[from], edges) == false)
40
+ continue;
41
+
42
+ for (EdgeList::iterator it = edges.begin () ; it != edges.end () ; ++it)
43
+ push (from, (*it)->to, g[(*it)->from].label, (*it)->elabel, g[(*it)->to].label);
44
+ }
45
+ }
46
+
47
+ bool DFSCode::toGraph (Graph &g)
48
+ {
49
+ g.clear ();
50
+
51
+ for (DFSCode::iterator it = begin(); it != end(); ++it) {
52
+ g.resize (std::max (it->from, it->to) + 1);
53
+
54
+ if (it->fromlabel != -1)
55
+ g[it->from].label = it->fromlabel;
56
+ if (it->tolabel != -1)
57
+ g[it->to].label = it->tolabel;
58
+
59
+ g[it->from].push (it->from, it->to, it->elabel);
60
+ if (g.directed == false)
61
+ g[it->to].push (it->to, it->from, it->elabel);
62
+ }
63
+
64
+ g.buildEdge ();
65
+
66
+ return (true);
67
+ }
68
+
69
+ unsigned int
70
+ DFSCode::nodeCount (void)
71
+ {
72
+ unsigned int nodecount = 0;
73
+
74
+ for (DFSCode::iterator it = begin() ; it != end() ; ++it)
75
+ nodecount = std::max (nodecount, (unsigned int) (std::max (it->from, it->to) + 1));
76
+
77
+ return (nodecount);
78
+ }
79
+
80
+
81
+ std::ostream &DFSCode::write (std::ostream &os)
82
+ {
83
+ if (size() == 0) return os;
84
+
85
+ os << "(" << (*this)[0].fromlabel << ") " << (*this)[0].elabel << " (0f" << (*this)[0].tolabel << ")";
86
+
87
+ for (unsigned int i = 1; i < size(); ++i) {
88
+ if ((*this)[i].from < (*this)[i].to) {
89
+ os << " " << (*this)[i].elabel << " (" << (*this)[i].from << "f" << (*this)[i].tolabel << ")";
90
+ } else {
91
+ os << " " << (*this)[i].elabel << " (b" << (*this)[i].to << ")";
92
+ }
93
+ }
94
+
95
+ return os;
96
+ }
97
+ }
98
+
@@ -0,0 +1,165 @@
1
+ /*
2
+ $Id: graph.cpp,v 1.4 2004/05/21 05:50:13 taku-ku Exp $;
3
+
4
+ Copyright (C) 2004 Taku Kudo, All rights reserved.
5
+ This is free software with ABSOLUTELY NO WARRANTY.
6
+
7
+ This program is free software; you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation; either version 2 of the License, or
10
+ (at your option) any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program; if not, write to the Free Software
19
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20
+ 02111-1307, USA
21
+ */
22
+ #include "gspan.h"
23
+ #include <cstring>
24
+ #include <string>
25
+ #include <iterator>
26
+ #include <strstream>
27
+ #include <set>
28
+
29
+ #include <assert.h>
30
+
31
+
32
+ namespace GSPAN {
33
+
34
+ template <class T, class Iterator>
35
+ void tokenize (const char *str, Iterator iterator)
36
+ {
37
+ std::istrstream is (str, std::strlen(str));
38
+ std::copy (std::istream_iterator <T> (is), std::istream_iterator <T> (), iterator);
39
+ }
40
+
41
+ void Graph::buildEdge ()
42
+ {
43
+ char buf[512];
44
+ std::map <std::string, unsigned int> tmp;
45
+
46
+ unsigned int id = 0;
47
+ for (int from = 0; from < (int)size (); ++from) {
48
+ for (Vertex::edge_iterator it = (*this)[from].edge.begin ();
49
+ it != (*this)[from].edge.end (); ++it)
50
+ {
51
+ if (directed || from <= it->to)
52
+ std::sprintf (buf, "%d %d %d", from, it->to, it->elabel);
53
+ else
54
+ std::sprintf (buf, "%d %d %d", it->to, from, it->elabel);
55
+
56
+ // Assign unique id's for the edges.
57
+ if (tmp.find (buf) == tmp.end()) {
58
+ it->id = id;
59
+ tmp[buf] = id;
60
+ ++id;
61
+ } else {
62
+ it->id = tmp[buf];
63
+ }
64
+ }
65
+ }
66
+
67
+ edge_size_ = id;
68
+ }
69
+
70
+ std::istream &Graph::read (std::istream &is)
71
+ {
72
+ std::vector <std::string> result;
73
+ char line[1024];
74
+
75
+ clear ();
76
+
77
+ while (true) {
78
+
79
+ unsigned int pos = is.tellg ();
80
+ if (! is.getline (line, 1024))
81
+ break;
82
+ result.clear ();
83
+ tokenize<std::string>(line, std::back_inserter (result));
84
+
85
+ if (result.empty()) {
86
+ // do nothing
87
+ } else if (result[0] == "t") {
88
+ if (! empty()) { // use as delimiter
89
+ is.seekg (pos, std::ios_base::beg);
90
+ break;
91
+ } else {
92
+ /*
93
+ * y = atoi (result[3].c_str());
94
+ */
95
+ }
96
+ } else if (result[0] == "v" && result.size() >= 3) {
97
+ unsigned int id = atoi (result[1].c_str());
98
+ this->resize (id + 1);
99
+ (*this)[id].label = atoi (result[2].c_str());
100
+ } else if (result[0] == "e" && result.size() >= 4) {
101
+ int from = atoi (result[1].c_str());
102
+ int to = atoi (result[2].c_str());
103
+ int elabel = atoi (result[3].c_str());
104
+
105
+ if ((int)size () <= from || (int)size () <= to) {
106
+ std::cerr << "Format Error: define vertex lists before edges" << std::endl;
107
+ exit (-1);
108
+ }
109
+
110
+ (*this)[from].push (from, to, elabel);
111
+ if (directed == false)
112
+ (*this)[to].push (to, from, elabel);
113
+ }
114
+ }
115
+
116
+ buildEdge ();
117
+
118
+ return is;
119
+ }
120
+
121
+ std::ostream &Graph::write (std::ostream &os)
122
+ {
123
+ char buf[512];
124
+ std::set <std::string> tmp;
125
+
126
+ for (int from = 0; from < (int)size (); ++from) {
127
+ os << "v " << from << " " << (*this)[from].label << std::endl;
128
+
129
+ for (Vertex::edge_iterator it = (*this)[from].edge.begin ();
130
+ it != (*this)[from].edge.end (); ++it) {
131
+ if (directed || from <= it->to) {
132
+ std::sprintf (buf, "%d %d %d", from, it->to, it->elabel);
133
+ } else {
134
+ std::sprintf (buf, "%d %d %d", it->to, from, it->elabel);
135
+ }
136
+ tmp.insert (buf);
137
+ }
138
+ }
139
+
140
+ for (std::set<std::string>::iterator it = tmp.begin(); it != tmp.end(); ++it) {
141
+ os << "e " << *it << std::endl;
142
+ }
143
+
144
+ return os;
145
+ }
146
+
147
+ void Graph::check (void)
148
+ {
149
+ /* Check all indices
150
+ */
151
+ for (int from = 0 ; from < (int)size () ; ++from) {
152
+ //mexPrintf ("check vertex %d, label %d\n", from, (*this)[from].label);
153
+
154
+ for (Vertex::edge_iterator it = (*this)[from].edge.begin ();
155
+ it != (*this)[from].edge.end (); ++it)
156
+ {
157
+ //mexPrintf (" check edge from %d to %d, label %d\n", it->from, it->to, it->elabel);
158
+ assert (it->from >= 0 && it->from < size ());
159
+ assert (it->to >= 0 && it->to < size ());
160
+ }
161
+ }
162
+ }
163
+
164
+ }
165
+