submine 0.1.0__cp311-cp311-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. submine/__init__.py +37 -0
  2. submine/algorithms/__init__.py +23 -0
  3. submine/algorithms/base.py +143 -0
  4. submine/algorithms/gspan.py +156 -0
  5. submine/algorithms/gspan_cpp.cpython-311-x86_64-linux-musl.so +0 -0
  6. submine/algorithms/sopagrami.py +250 -0
  7. submine/algorithms/sopagrami_cpp.cpython-311-x86_64-linux-musl.so +0 -0
  8. submine/api.py +134 -0
  9. submine/backends/__init__.py +0 -0
  10. submine/backends/gspan/CMakeLists.txt +65 -0
  11. submine/backends/gspan/dfs.cpp +98 -0
  12. submine/backends/gspan/graph.cpp +165 -0
  13. submine/backends/gspan/gspan.cpp +776 -0
  14. submine/backends/gspan/gspan.h +296 -0
  15. submine/backends/gspan/ismin.cpp +124 -0
  16. submine/backends/gspan/main.cpp +106 -0
  17. submine/backends/gspan/misc.cpp +177 -0
  18. submine/backends/gspan/python_bindings.cpp +133 -0
  19. submine/backends/sopagrami/cpp/CMakeLists.txt +44 -0
  20. submine/backends/sopagrami/cpp/include/alg.hpp +150 -0
  21. submine/backends/sopagrami/cpp/include/common/timer.hpp +18 -0
  22. submine/backends/sopagrami/cpp/src/alg.cpp +805 -0
  23. submine/backends/sopagrami/cpp/src/dump.cpp +262 -0
  24. submine/backends/sopagrami/cpp/src/main.cpp +94 -0
  25. submine/backends/sopagrami/cpp/src/python_bindings.cpp +123 -0
  26. submine/cli/__init__.py +6 -0
  27. submine/cli/main.py +87 -0
  28. submine/core/__init__.py +12 -0
  29. submine/core/graph.py +179 -0
  30. submine/core/result.py +121 -0
  31. submine/datasets/__init__.py +11 -0
  32. submine/datasets/loaders.py +145 -0
  33. submine/errors.py +41 -0
  34. submine/io/__init__.py +30 -0
  35. submine/io/common.py +173 -0
  36. submine/io/gexf.py +88 -0
  37. submine/io/gspan.py +268 -0
  38. submine/io/sopagrami.py +143 -0
  39. submine/io/transcode.py +147 -0
  40. submine/registry.py +8 -0
  41. submine/utils/__init__.py +6 -0
  42. submine/utils/checks.py +115 -0
  43. submine/utils/logging.py +41 -0
  44. submine-0.1.0.dist-info/METADATA +178 -0
  45. submine-0.1.0.dist-info/RECORD +49 -0
  46. submine-0.1.0.dist-info/WHEEL +5 -0
  47. submine-0.1.0.dist-info/licenses/LICENSE +21 -0
  48. submine.libs/libgcc_s-2298274a.so.1 +0 -0
  49. submine.libs/libstdc++-08d5c7eb.so.6.0.33 +0 -0
submine/__init__.py ADDED
@@ -0,0 +1,37 @@
1
+ # submine/__init__.py
2
+ from __future__ import annotations
3
+
4
+ __version__ = "0.1.0"
5
+
6
+ from .registry import available_algorithms
7
+ from .errors import (
8
+ SubmineError,
9
+ SubmineInputError,
10
+ ParameterValidationError,
11
+ BackendUnavailableError,
12
+ BackendExecutionError,
13
+ ResourceLimitError,
14
+ )
15
+
16
+
17
+ # Import algorithms so they register themselves via @register
18
+ # (you can add more as you implement them)
19
+ from .algorithms import gspan # noqa: F401
20
+ # from .algorithms import grami # noqa: F401
21
+ from .algorithms import sopagrami # noqa: F401
22
+ # ...
23
+
24
+
25
+
26
+ def get_mining_algorithm(name: str):
27
+ key = name.lower()
28
+ try:
29
+ return available_algorithms[key]
30
+ except KeyError:
31
+ raise ValueError(
32
+ f"Unknown algorithm '{name}'. "
33
+ f"Available: {sorted(available_algorithms.keys())}"
34
+ )
35
+
36
+ def list_algorithms():
37
+ return sorted(available_algorithms.keys())
@@ -0,0 +1,23 @@
1
+ """Algorithm implementations for submine.
2
+
3
+ Each submodule in this package implements a specific subgraph mining
4
+ algorithm. Modules are expected to define a subclass of
5
+ :class:`~submine.algorithms.base.SubgraphMiner` and register it via
6
+ :func:`~submine.algorithms.base.register`. Registered algorithms will
7
+ automatically appear in :func:`submine.get_algorithm`.
8
+
9
+ To avoid the cost of importing heavy dependencies at module import
10
+ time, algorithm modules should not perform expensive setup at the top
11
+ level. Instead they should defer initialization to the constructor or
12
+ :meth:`SubgraphMiner.check_availability`.
13
+
14
+ """
15
+
16
+ from .base import SubgraphMiner # noqa: F401
17
+
18
+ # Import algorithm modules so they can register themselves when this
19
+ # package is imported. Additional algorithms can be added here.
20
+ from .gspan import GSpanMiner # noqa: F401
21
+ from .sopagrami import SoPaGraMiMiner # noqa: F401
22
+
23
+ __all__ = ["SubgraphMiner", "GSpanMiner", "SoPaGraMiMiner"]
@@ -0,0 +1,143 @@
1
+ # submine/algorithms/base.py
2
+ from __future__ import annotations
3
+
4
+ import subprocess
5
+ from abc import ABC, abstractmethod
6
+ from pathlib import Path
7
+ from typing import List, Optional
8
+
9
+
10
+ from ..registry import available_algorithms
11
+ from ..core.graph import Graph
12
+ from ..core.result import MiningResult
13
+ from ..utils.logging import get_logger
14
+ from ..errors import BackendExecutionError, ParameterValidationError
15
+ from typing import Iterable
16
+ __all__ = ["SubgraphMiner", "register"]
17
+
18
+
19
+ class SubgraphMiner(ABC):
20
+ name: str = "base"
21
+
22
+ # Native file input contract
23
+ # -------------------------
24
+ # Some algorithms consume an on-disk dataset in a specific format.
25
+ # If `expected_input_format` is set (e.g., "lg" or "gspan"), the high-level
26
+ # API can transcode user-provided files to this format and call `mine_native`.
27
+ #
28
+ # By default, miners operate on in-memory Graph objects via `mine()`.
29
+ expected_input_format: str | None = None
30
+ multi_graph_policy: str = "reject" # reject | batch | merge (reserved)
31
+
32
+ # Weight handling
33
+ # -------------
34
+ # Most classical subgraph miners operate on *labeled* (unweighted) graphs.
35
+ # We therefore make weight support explicit. If the input graph contains
36
+ # weights and the algorithm does not support them, the weight_strategy
37
+ # controls what happens.
38
+ supports_weighted: bool = False
39
+ weight_strategy: str = "ignore" # one of: ignore | reject
40
+
41
+ def __init__(self, verbose: bool = False) -> None:
42
+ self.verbose = verbose
43
+ self.logger = get_logger(self.__class__.__name__)
44
+ if self.verbose:
45
+ self.logger.setLevel("DEBUG")
46
+
47
+ def _handle_weights(self, graphs: Iterable[Graph]) -> Iterable[Graph]:
48
+ """Apply the configured weight strategy to input graphs.
49
+
50
+ - If the algorithm supports weights: pass through.
51
+ - If it does not and the graph is weighted:
52
+ * ignore: drop weights (treat as unweighted)
53
+ * reject: raise
54
+ """
55
+ for g in graphs:
56
+ if getattr(g, "is_weighted", False) and g.is_weighted and not self.supports_weighted:
57
+ if self.weight_strategy == "reject":
58
+ raise ValueError(
59
+ f"Algorithm '{self.name}' does not support weighted graphs; "
60
+ "set weight_strategy='ignore' to drop weights explicitly."
61
+ )
62
+ # ignore: drop weights
63
+ g.edge_weights = None
64
+ yield g
65
+
66
+ @abstractmethod
67
+ def mine(self, graphs: Iterable[Graph], min_support: int, **kwargs) -> MiningResult:
68
+ raise NotImplementedError
69
+
70
+ def mine_native(self, path: str | Path, min_support: int, **kwargs) -> MiningResult:
71
+ """Run the miner on a native on-disk dataset.
72
+
73
+ Miners with `expected_input_format != None` should override this method.
74
+ The default implementation indicates that the miner does not accept a
75
+ native path entrypoint.
76
+ """
77
+ raise NotImplementedError(
78
+ f"Algorithm '{self.name}' does not implement mine_native(); "
79
+ "use mine(graphs=...) instead."
80
+ )
81
+
82
+ def check_availability(self) -> None:
83
+ return None
84
+
85
+ def run_external(
86
+ self,
87
+ cmd: List[str],
88
+ *,
89
+ cwd: Optional[Path] = None,
90
+ timeout_s: int = 300,
91
+ env: Optional[dict[str, str]] = None,
92
+ ) -> str:
93
+ """Run an external command defensively.
94
+
95
+ - Uses ``shell=False`` implicitly (we pass a list).
96
+ - Applies a default timeout to avoid hung processes.
97
+ - Captures stdout/stderr for error reporting.
98
+ """
99
+ if not cmd or not isinstance(cmd[0], str):
100
+ raise ParameterValidationError("cmd must be a non-empty list of strings")
101
+
102
+ # Basic hardening against accidental injection via newlines/NULs.
103
+ for part in cmd:
104
+ if not isinstance(part, str):
105
+ raise TypeError("All cmd parts must be strings")
106
+ if "\x00" in part or "\n" in part or "\r" in part:
107
+ raise ParameterValidationError("Unsafe characters in command argument")
108
+
109
+ self.logger.debug("Running external command: %s", " ".join(cmd))
110
+ completed = subprocess.run(
111
+ cmd,
112
+ cwd=cwd,
113
+ text=True,
114
+ capture_output=True,
115
+ timeout=timeout_s,
116
+ env=env,
117
+ check=False,
118
+ close_fds=True,
119
+ )
120
+ self.logger.debug("Command stdout: %s", completed.stdout)
121
+ if completed.returncode != 0:
122
+ self.logger.error("Command failed with stderr: %s", completed.stderr)
123
+ raise RuntimeError(
124
+ f"Command '{' '.join(cmd)}' failed with exit code {completed.returncode}\n"
125
+ f"stderr:\n{completed.stderr}"
126
+ )
127
+ return completed.stdout
128
+
129
+
130
+ def register(cls: type[SubgraphMiner]) -> type[SubgraphMiner]:
131
+ if not issubclass(cls, SubgraphMiner):
132
+ raise TypeError("Only subclasses of SubgraphMiner can be registered")
133
+
134
+ name = getattr(cls, "name", None)
135
+ if not isinstance(name, str):
136
+ raise TypeError("Subgraph miner must define a string 'name' attribute")
137
+
138
+ key = name.lower()
139
+ if key in available_algorithms:
140
+ raise ValueError(f"Algorithm '{name}' is already registered")
141
+
142
+ available_algorithms[key] = cls
143
+ return cls
@@ -0,0 +1,156 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import List, Optional
5
+
6
+ from ..utils.checks import safe_read_text, assert_regular_file
7
+ from ..errors import ParameterValidationError
8
+
9
+ import tempfile
10
+ import time
11
+
12
+ from .base import SubgraphMiner, register
13
+ from ..core.graph import Graph
14
+ from ..core.result import MiningResult, SubgraphPattern
15
+ from ..io.gspan import write_gspan_dataset,convert_gspan_graph
16
+ from typing import Iterable
17
+
18
+ @register
19
+ class GSpanMiner(SubgraphMiner):
20
+ name = "gspan"
21
+ expected_input_format = "gspan"
22
+
23
+ def __init__(
24
+ self,
25
+ min_support: int = 2,
26
+ directed: bool = False,
27
+ min_vertices: int = 1,
28
+ max_vertices: Optional[int] = None,
29
+ visualize: bool = False,
30
+ write_out: bool = True,
31
+ verbose: bool = False,
32
+ ) -> None:
33
+ super().__init__(verbose=verbose)
34
+ # Parameter validation (publish-safe defaults)
35
+ if not isinstance(min_support, int) or min_support <= 0:
36
+ raise ParameterValidationError(f"min_support must be a positive int; got {min_support!r}")
37
+ if not isinstance(min_vertices, int) or min_vertices < 1:
38
+ raise ParameterValidationError(f"min_vertices must be an int >= 1; got {min_vertices!r}")
39
+ if max_vertices is not None:
40
+ if not isinstance(max_vertices, int) or max_vertices < min_vertices:
41
+ raise ParameterValidationError(
42
+ f"max_vertices must be None or an int >= min_vertices ({min_vertices}); got {max_vertices!r}"
43
+ )
44
+ self.min_support = min_support
45
+ self.directed = directed
46
+ self.min_vertices = min_vertices
47
+ self.max_vertices = max_vertices
48
+ self.visualize = visualize
49
+ self.write_out = write_out
50
+
51
+
52
+ def _run_on_dataset(self, db_path: Path, support: int):
53
+ from . import gspan_cpp as gspan_mine
54
+
55
+ t0 = time.time()
56
+
57
+ db_path = assert_regular_file(db_path)
58
+ data = safe_read_text(db_path)
59
+
60
+ # TODO: plumb through additional kwargs once exposed by the binding.
61
+ res = gspan_mine.mine_from_string(
62
+ data,
63
+ minsup=support,
64
+ directed=self.directed,
65
+ maxpat_min=self.min_vertices,
66
+ maxpat_max=self.max_vertices if self.max_vertices is not None else 0xFFFFFFFF,
67
+ )
68
+
69
+ runtime = time.time() - t0
70
+ return runtime, res
71
+
72
+ def mine(self, graphs: List[Graph], min_support: Optional[int] = None, **kwargs) -> MiningResult:
73
+ graphs = list(self._handle_weights(graphs))
74
+ support = int(min_support if min_support is not None else self.min_support)
75
+
76
+ with tempfile.TemporaryDirectory() as tmpdir:
77
+ tmpdir_path = Path(tmpdir)
78
+ db_path = tmpdir_path / "gspan_db.data"
79
+
80
+ # write graphs in gspan format
81
+ write_gspan_dataset(graphs, db_path)
82
+ runtime, gs = self._run_on_dataset(db_path, support)
83
+
84
+ patterns = []
85
+ for pid,rec in enumerate(gs):
86
+ pattern_graph = Graph(edges=rec["edges"],nodes=rec['nodes'])
87
+ support = rec['support']
88
+ patterns.append(
89
+ SubgraphPattern(
90
+ pid=pid,
91
+ graph=pattern_graph,
92
+ support=support,
93
+ frequency=None,
94
+ occurrences=[], # can fill later if track embeddings
95
+ attributes={
96
+ "num_vertices": pattern_graph.number_of_nodes(),
97
+ "graph_ids": rec["graph_ids"],
98
+ },
99
+ )
100
+ )
101
+
102
+
103
+ return MiningResult(
104
+ patterns=patterns,
105
+ algorithm=self.name,
106
+ params=dict(
107
+ min_support=support,
108
+ directed=self.directed,
109
+ min_vertices=self.min_vertices,
110
+ max_vertices=self.max_vertices,
111
+ visualize=self.visualize,
112
+ write_out=self.write_out,
113
+ ),
114
+ runtime=runtime,
115
+ metadata={"backend": "gspan-mining"},
116
+ )
117
+
118
+ def mine_native(self, path: str | Path, min_support: int, **kwargs) -> MiningResult:
119
+ """Run gSpan directly on a user-supplied gSpan dataset file."""
120
+ db_path = Path(path)
121
+ support = int(min_support)
122
+ runtime, gs = self._run_on_dataset(db_path, support)
123
+
124
+ patterns = []
125
+ for pid,rec in enumerate(gs):
126
+ pattern_graph = Graph(edges=rec["edges"],nodes=rec['nodes'])
127
+ support = rec['support']
128
+ patterns.append(
129
+ SubgraphPattern(
130
+ pid=pid,
131
+ graph=pattern_graph,
132
+ support=support,
133
+ frequency=None,
134
+ occurrences=[], # can fill later if track embeddings
135
+ attributes={
136
+ "num_vertices": pattern_graph.number_of_nodes(),
137
+ "graph_ids": rec["graph_ids"],
138
+ },
139
+ )
140
+ )
141
+
142
+ return MiningResult(
143
+ patterns=patterns,
144
+ algorithm=self.name,
145
+ params=dict(
146
+ min_support=support,
147
+ directed=self.directed,
148
+ min_vertices=self.min_vertices,
149
+ max_vertices=self.max_vertices,
150
+ visualize=self.visualize,
151
+ write_out=self.write_out,
152
+ input_format="gspan",
153
+ ),
154
+ runtime=runtime,
155
+ metadata={"backend": "gspan-mining", "input_dataset": str(db_path)},
156
+ )
@@ -0,0 +1,250 @@
1
+ # submine/algorithms/sopagrami.py
2
+ from __future__ import annotations
3
+
4
+ import tempfile
5
+ import time
6
+ from pathlib import Path
7
+ from typing import Iterable, List, Optional
8
+
9
+ from .base import SubgraphMiner, register
10
+ from ..core.graph import Graph
11
+ from ..core.result import MiningResult, SubgraphPattern
12
+ from ..io.sopagrami import read_lg, write_lg
13
+ from ..errors import ParameterValidationError
14
+
15
+
16
+
17
+
18
+ @register
19
+ class SoPaGraMiMiner(SubgraphMiner):
20
+ """
21
+ Python wrapper around the C++ SoPaGraMi implementation.
22
+
23
+ Note: SoPaGraMi mines frequent subgraphs from a *single* large graph,
24
+ not a dataset of many graphs.
25
+ """
26
+ name = "sopagrami"
27
+ expected_input_format = "lg"
28
+ multi_graph_policy = "reject"
29
+
30
+ def __init__(
31
+ self,
32
+ tau: int = 2,
33
+ directed: bool = False,
34
+ sorted_seeds: bool = True,
35
+ num_threads: int = 0,
36
+ compute_full_support: bool = True,
37
+ verbose: bool = False,
38
+ ) -> None:
39
+ super().__init__(verbose=verbose)
40
+ # Parameter validation (publish-safe defaults)
41
+ if not isinstance(tau, int) or tau < 1:
42
+ raise ParameterValidationError(f"tau must be an int >= 1; got {tau!r}")
43
+ if not isinstance(num_threads, int) or num_threads < 0:
44
+ raise ParameterValidationError(f"num_threads must be an int >= 0; got {num_threads!r}")
45
+ self.tau = tau
46
+ self.directed = directed
47
+ self.sorted_seeds = sorted_seeds
48
+ self.num_threads = num_threads
49
+ self.compute_full_support = compute_full_support
50
+
51
+ def check_availability(self):
52
+ try:
53
+ from . import sopagrami_cpp
54
+ except ImportError as e:
55
+ raise RuntimeError("SoPaGraMi backend not available") from e
56
+
57
+ def mine(
58
+ self,
59
+ graphs: Iterable[Graph],
60
+ min_support: Optional[int] = None,
61
+ out_dir:str=None,dump_images_csv:bool=False,
62
+ max_images_per_vertex:int=50,dump_sample_embeddings:bool=False
63
+ ) -> MiningResult:
64
+ self.check_availability()
65
+
66
+ # Handle weights explicitly (SoPaGraMi backend treats graphs as labeled, not weighted).
67
+ graphs = self._handle_weights(graphs)
68
+ # SoPaGraMi expects a single graph
69
+ graphs_list = list(graphs)
70
+ if len(graphs_list) != 1:
71
+ raise ValueError(
72
+ "SoPaGraMiMiner currently expects exactly one Graph (single large graph). "
73
+ f"Got {len(graphs_list)}."
74
+ )
75
+ G = graphs_list[0]
76
+
77
+ tau = int(min_support if min_support is not None else self.tau)
78
+
79
+ with tempfile.TemporaryDirectory() as tmpdir:
80
+ tmpdir_path = Path(tmpdir)
81
+ lg_path = tmpdir_path / "graph.lg"
82
+
83
+ # 1) write graph as .lg
84
+ write_lg(G, lg_path, directed=self.directed)
85
+
86
+ # 2) call C++ binding
87
+ runtime, patterns_raw = self._run_backend_on_lg(lg_path, tau=tau,out_dir=out_dir,dump_images_csv=dump_images_csv,
88
+ max_images_per_vertex=max_images_per_vertex,dump_sample_embeddings=dump_sample_embeddings)
89
+
90
+ # 3) Convert to our SubgraphPattern representation
91
+ patterns: List[SubgraphPattern] = []
92
+ for pid, pd in enumerate(patterns_raw):
93
+ node_labels = list(pd["node_labels"])
94
+ edges_raw = list(pd["edges"])
95
+ support = int(pd["full_support"])
96
+ key = pd["key"]
97
+
98
+ # SoPaGraMi pattern node IDs are 0..k-1
99
+ k = len(node_labels)
100
+ nodes = list(range(k))
101
+
102
+ # Build our Graph for the pattern
103
+ pat_edges = []
104
+ edge_labels = {}
105
+ for (a, b, el, dirflag) in edges_raw:
106
+ a = int(a)
107
+ b = int(b)
108
+ # Our Graph is undirected; we store the undirected edge,
109
+ # and put direction info into the label if needed.
110
+ u, v = (a, b) if a <= b else (b, a)
111
+ pat_edges.append((u, v))
112
+
113
+ label = el
114
+ if self.directed and dirflag == 1:
115
+ # encode direction in the label for now
116
+ label = f"{el}->"
117
+ edge_labels[(u, v)] = label
118
+
119
+ node_label_map = {i: lbl for i, lbl in enumerate(node_labels)}
120
+
121
+ pat_graph = Graph(
122
+ nodes=nodes,
123
+ edges=pat_edges,
124
+ node_labels=node_label_map,
125
+ edge_labels=edge_labels,
126
+ )
127
+
128
+ patterns.append(
129
+ SubgraphPattern(
130
+ pid=pid,
131
+ graph=pat_graph,
132
+ support=support,
133
+ frequency=None,
134
+ occurrences=[],
135
+ attributes={
136
+ "key": key,
137
+ "k": k,
138
+ "num_edges": len(pat_edges),
139
+ },
140
+ )
141
+ )
142
+
143
+ return MiningResult(
144
+ patterns=patterns,
145
+ algorithm=self.name,
146
+ params={
147
+ "tau": tau,
148
+ "directed": self.directed,
149
+ "sorted_seeds": self.sorted_seeds,
150
+ "num_threads": self.num_threads,
151
+ "compute_full_support": self.compute_full_support,
152
+
153
+
154
+ },
155
+ runtime=runtime,
156
+ metadata={"backend": "sopagrami_cpp"},
157
+ )
158
+
159
+ def mine_native(self, lg_path: str | Path, min_support: Optional[int] = None, out_dir:str=None,dump_images_csv:bool=False,
160
+ max_images_per_vertex:int=50,dump_sample_embeddings:bool=False) -> MiningResult:
161
+ """Run SoPaGraMi directly on a user-supplied ``.lg`` file.
162
+
163
+ This avoids re-parsing/re-writing the file, which is important for
164
+ large graphs and for preserving any optional attributes present in the
165
+ original ``.lg``.
166
+ """
167
+ self.check_availability()
168
+ lg_path = Path(lg_path)
169
+ if lg_path.suffix.lower() != ".lg":
170
+ raise ValueError(f"Expected a .lg file for SoPaGraMi; got: {lg_path}")
171
+
172
+ tau = int(min_support if min_support is not None else self.tau)
173
+ runtime, patterns_raw = self._run_backend_on_lg(lg_path, tau=tau,out_dir=out_dir,dump_images_csv=dump_images_csv,
174
+ max_images_per_vertex=max_images_per_vertex,dump_sample_embeddings=dump_sample_embeddings)
175
+
176
+ # Convert patterns (same as in mine())
177
+ patterns: List[SubgraphPattern] = []
178
+ for pid, pd in enumerate(patterns_raw):
179
+ node_labels = list(pd["node_labels"])
180
+ edges_raw = list(pd["edges"])
181
+ support = int(pd["full_support"])
182
+ key = pd["key"]
183
+
184
+ k = len(node_labels)
185
+ nodes = list(range(k))
186
+
187
+ pat_edges = []
188
+ edge_labels = {}
189
+ for (a, b, el, dirflag) in edges_raw:
190
+ a = int(a)
191
+ b = int(b)
192
+ u, v = (a, b) if a <= b else (b, a)
193
+ pat_edges.append((u, v))
194
+ label = el
195
+ if self.directed and dirflag == 1:
196
+ label = f"{el}->"
197
+ edge_labels[(u, v)] = label
198
+
199
+ node_label_map = {i: lbl for i, lbl in enumerate(node_labels)}
200
+ pat_graph = Graph(nodes=nodes, edges=pat_edges, node_labels=node_label_map, edge_labels=edge_labels)
201
+
202
+ patterns.append(
203
+ SubgraphPattern(
204
+ pid=pid,
205
+ graph=pat_graph,
206
+ support=support,
207
+ frequency=None,
208
+ occurrences=[],
209
+ attributes={"key": key, "k": k, "num_edges": len(pat_edges)},
210
+ )
211
+ )
212
+
213
+ return MiningResult(
214
+ patterns=patterns,
215
+ algorithm=self.name,
216
+ params={
217
+ "tau": tau,
218
+ "directed": self.directed,
219
+ "sorted_seeds": self.sorted_seeds,
220
+ "num_threads": self.num_threads,
221
+ "compute_full_support": self.compute_full_support,
222
+ "input_format": "lg"
223
+ },
224
+ runtime=runtime,
225
+ metadata={"backend": "sopagrami_cpp", "input_lg": str(lg_path)},
226
+ )
227
+
228
+ def _run_backend_on_lg(self, lg_path: Path, tau: int,out_dir:str=None,dump_images_csv:bool=False,
229
+ max_images_per_vertex:int=50,dump_sample_embeddings:bool=False):
230
+ from . import sopagrami_cpp
231
+ t0 = time.time()
232
+ self.logger.debug("Running SoPaGraMi on %s", lg_path)
233
+ if out_dir is None:
234
+ out_dir = "sopagrami_result"
235
+ patterns_raw = sopagrami_cpp.run_on_lg_file(
236
+ str(lg_path),
237
+ tau=tau,
238
+ directed=self.directed,
239
+ sorted_seeds=self.sorted_seeds,
240
+ num_threads=self.num_threads,
241
+ compute_full_support=self.compute_full_support,
242
+ dump_images_csv = dump_images_csv,
243
+ out_dir = out_dir,
244
+ max_images_per_vertex = max_images_per_vertex,
245
+ dump_sample_embeddings=dump_sample_embeddings
246
+
247
+
248
+
249
+ )
250
+ return time.time() - t0, patterns_raw