strobe 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
strobe/__init__.py ADDED
@@ -0,0 +1,26 @@
1
+ try:
2
+ from strobe._version import __version__
3
+ except ImportError:
4
+ __version__ = "0.0.1.dev0"
5
+
6
+ from strobe.analysis import (
7
+ activity_statistics,
8
+ check_conformance,
9
+ discover_dfg,
10
+ discover_process_model,
11
+ throughput_times,
12
+ )
13
+ from strobe.instrumentation import EventLog, StrobePlugin
14
+ from strobe.visualization import launch_dashboard
15
+
16
+ __all__ = [
17
+ "__version__",
18
+ "StrobePlugin",
19
+ "EventLog",
20
+ "discover_dfg",
21
+ "discover_process_model",
22
+ "check_conformance",
23
+ "throughput_times",
24
+ "activity_statistics",
25
+ "launch_dashboard",
26
+ ]
strobe/_version.py ADDED
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.0.1'
32
+ __version_tuple__ = version_tuple = (0, 0, 1)
33
+
34
+ __commit_id__ = commit_id = None
@@ -0,0 +1,11 @@
1
+ from .conformance import check_conformance
2
+ from .discovery import discover_dfg, discover_process_model
3
+ from .performance import activity_statistics, throughput_times
4
+
5
+ __all__ = [
6
+ "discover_dfg",
7
+ "discover_process_model",
8
+ "check_conformance",
9
+ "throughput_times",
10
+ "activity_statistics",
11
+ ]
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+ import pm4py
5
+
6
+
7
+ def check_conformance(
8
+ df: pd.DataFrame,
9
+ net,
10
+ initial_marking,
11
+ final_marking,
12
+ ) -> dict[str, float]:
13
+ """Run token-based replay conformance checking.
14
+
15
+ Parameters
16
+ ----------
17
+ df:
18
+ pm4py-formatted event log DataFrame.
19
+ net, initial_marking, final_marking:
20
+ Petri net model (e.g. from :func:`~strobe.analysis.discover_process_model`).
21
+
22
+ Returns
23
+ -------
24
+ dict with keys ``fitness``, ``precision``, ``generalization``, ``simplicity``.
25
+ """
26
+ fitness = pm4py.fitness_token_based_replay(df, net, initial_marking, final_marking)
27
+ precision = pm4py.precision_token_based_replay(
28
+ df, net, initial_marking, final_marking
29
+ )
30
+ generalization = pm4py.generalization_tbr(df, net, initial_marking, final_marking)
31
+ simplicity = pm4py.simplicity_petri_net(net, initial_marking, final_marking)
32
+
33
+ return {
34
+ "fitness": fitness.get("average_trace_fitness", float("nan")),
35
+ "precision": float(precision),
36
+ "generalization": float(generalization),
37
+ "simplicity": float(simplicity),
38
+ }
@@ -0,0 +1,48 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal
4
+
5
+ import pandas as pd
6
+ import pm4py
7
+
8
+
9
+ def discover_dfg(
10
+ df: pd.DataFrame,
11
+ ) -> tuple[dict, dict, dict]:
12
+ """Discover a directly-follows graph from *df*.
13
+
14
+ Returns
15
+ -------
16
+ (dfg, start_activities, end_activities)
17
+ """
18
+ return pm4py.discover_dfg(df)
19
+
20
+
21
+ def discover_process_model(
22
+ df: pd.DataFrame,
23
+ algorithm: Literal["inductive", "alpha"] = "inductive",
24
+ noise_threshold: float = 0.0,
25
+ ) -> tuple:
26
+ """Discover a Petri net from *df*.
27
+
28
+ Parameters
29
+ ----------
30
+ algorithm:
31
+ ``"inductive"`` uses the Inductive Miner (default);
32
+ ``"alpha"`` uses the Alpha Miner.
33
+ noise_threshold:
34
+ Noise filtering threshold passed to the Inductive Miner (ignored for
35
+ the Alpha Miner).
36
+
37
+ Returns
38
+ -------
39
+ (net, initial_marking, final_marking)
40
+ """
41
+ if algorithm == "inductive":
42
+ return pm4py.discover_petri_net_inductive(df, noise_threshold=noise_threshold)
43
+ elif algorithm == "alpha":
44
+ return pm4py.discover_petri_net_alpha(df)
45
+ else:
46
+ raise ValueError(
47
+ f"Unknown algorithm: {algorithm!r}. Choose 'inductive' or 'alpha'."
48
+ )
@@ -0,0 +1,52 @@
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+
5
+ from strobe.instrumentation.event_log import EventLog
6
+
7
+
8
+ def throughput_times(df: pd.DataFrame) -> pd.Series:
9
+ """Compute per-case wall-clock duration (last event − first event).
10
+
11
+ Returns
12
+ -------
13
+ pd.Series indexed by case ID, values are :class:`~datetime.timedelta`.
14
+ """
15
+ ts_col = EventLog.TIMESTAMP
16
+ case_col = EventLog.CASE_ID
17
+
18
+ grouped = df.groupby(case_col)[ts_col]
19
+ return grouped.max() - grouped.min()
20
+
21
+
22
+ def activity_statistics(df: pd.DataFrame) -> pd.DataFrame:
23
+ """Compute per-activity execution statistics using ``strobe:duration_s``.
24
+
25
+ Columns: ``count``, ``mean_duration_s``, ``min_duration_s``, ``max_duration_s``.
26
+
27
+ If the ``strobe:duration_s`` column is absent, duration columns contain
28
+ ``NaN``.
29
+ """
30
+ activity_col = EventLog.ACTIVITY
31
+ duration_col = "strobe:duration_s"
32
+
33
+ if duration_col not in df.columns:
34
+ counts = df.groupby(activity_col).size().rename("count")
35
+ stats = counts.to_frame()
36
+ stats["mean_duration_s"] = float("nan")
37
+ stats["min_duration_s"] = float("nan")
38
+ stats["max_duration_s"] = float("nan")
39
+ return stats.reset_index()
40
+
41
+ stats = (
42
+ df.groupby(activity_col)[duration_col]
43
+ .agg(
44
+ count="count",
45
+ mean_duration_s="mean",
46
+ min_duration_s="min",
47
+ max_duration_s="max",
48
+ )
49
+ .reset_index()
50
+ )
51
+ stats = stats.rename(columns={activity_col: "activity"})
52
+ return stats
@@ -0,0 +1,4 @@
1
+ from .event_log import EventLog
2
+ from .plugin import StrobePlugin
3
+
4
+ __all__ = ["EventLog", "StrobePlugin"]
@@ -0,0 +1,76 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+
6
+ import pandas as pd
7
+ import pm4py
8
+
9
+
10
+ class EventLog:
11
+ """Internal accumulator that stores events and exports to XES / DataFrame."""
12
+
13
+ CASE_ID = "case:concept:name"
14
+ ACTIVITY = "concept:name"
15
+ TIMESTAMP = "time:timestamp"
16
+
17
+ def __init__(self) -> None:
18
+ self._events: list[dict] = []
19
+
20
+ def add_event(
21
+ self,
22
+ case_id: str,
23
+ activity: str,
24
+ timestamp: datetime,
25
+ **attrs,
26
+ ) -> None:
27
+ """Append one event to the log.
28
+
29
+ Extra keyword arguments are stored under a ``strobe:`` namespace prefix
30
+ so they survive XES round-trips.
31
+ """
32
+ event: dict = {
33
+ self.CASE_ID: case_id,
34
+ self.ACTIVITY: activity,
35
+ self.TIMESTAMP: timestamp,
36
+ }
37
+ for key, value in attrs.items():
38
+ namespaced = key if key.startswith("strobe:") else f"strobe:{key}"
39
+ event[namespaced] = value
40
+ self._events.append(event)
41
+
42
+ def to_dataframe(self) -> pd.DataFrame:
43
+ """Return a pm4py-compatible DataFrame."""
44
+ if not self._events:
45
+ df = pd.DataFrame(columns=[self.CASE_ID, self.ACTIVITY, self.TIMESTAMP])
46
+ else:
47
+ df = pd.DataFrame(self._events)
48
+ df = pm4py.format_dataframe(
49
+ df,
50
+ case_id=self.CASE_ID,
51
+ activity_key=self.ACTIVITY,
52
+ timestamp_key=self.TIMESTAMP,
53
+ )
54
+ return df
55
+
56
+ def write_xes(self, path: str | Path) -> None:
57
+ """Export the log to an XES file at *path*."""
58
+ pm4py.write_xes(self.to_dataframe(), str(path))
59
+
60
+ @classmethod
61
+ def read_xes(cls, path: str | Path) -> "EventLog":
62
+ """Load an XES file and return a new :class:`EventLog`."""
63
+ df = pm4py.read_xes(str(path))
64
+ log = cls()
65
+ for _, row in df.iterrows():
66
+ case_id = row[cls.CASE_ID]
67
+ activity = row[cls.ACTIVITY]
68
+ timestamp = row[cls.TIMESTAMP]
69
+ extra = {
70
+ k: v
71
+ for k, v in row.items()
72
+ if k not in (cls.CASE_ID, cls.ACTIVITY, cls.TIMESTAMP)
73
+ and not k.startswith("@@")
74
+ }
75
+ log.add_event(case_id, activity, timestamp, **extra)
76
+ return log
@@ -0,0 +1,131 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from datetime import datetime, timezone
5
+
6
+ import pandas as pd
7
+ from google.adk.plugins.base_plugin import BasePlugin
8
+
9
+ from .event_log import EventLog
10
+
11
+
12
+ class StrobePlugin(BasePlugin):
13
+ """ADK plugin that captures tool, LLM, and agent callbacks as XES events."""
14
+
15
+ def __init__(self) -> None:
16
+ super().__init__(name="strobe")
17
+ self._log = EventLog()
18
+ self._pending: dict[tuple, datetime] = {}
19
+
20
+ # ── Tool callbacks ──────────────────────────────────────────────────────
21
+
22
+ async def before_tool_callback(self, tool, tool_args, tool_context):
23
+ key = (tool_context.invocation_id, tool_context.function_call_id)
24
+ self._pending[key] = datetime.now(timezone.utc)
25
+
26
+ async def after_tool_callback(self, tool, tool_args, tool_context, tool_response):
27
+ key = (tool_context.invocation_id, tool_context.function_call_id)
28
+ start = self._pending.pop(key, None)
29
+ now = datetime.now(timezone.utc)
30
+ duration = (now - start).total_seconds() if start is not None else None
31
+
32
+ attrs: dict = {}
33
+ if start is not None:
34
+ attrs["start_time"] = start.isoformat()
35
+ if duration is not None:
36
+ attrs["duration_s"] = duration
37
+ try:
38
+ attrs["tool_args"] = json.dumps(tool_args)
39
+ except (TypeError, ValueError):
40
+ attrs["tool_args"] = str(tool_args)
41
+ try:
42
+ attrs["tool_result"] = json.dumps(tool_response)
43
+ except (TypeError, ValueError):
44
+ attrs["tool_result"] = str(tool_response)
45
+
46
+ self._log.add_event(
47
+ case_id=tool_context.invocation_id,
48
+ activity=f"tool:{tool.name}",
49
+ timestamp=now,
50
+ **attrs,
51
+ )
52
+
53
+ # ── LLM callbacks ───────────────────────────────────────────────────────
54
+
55
+ async def before_model_callback(self, callback_context, llm_request):
56
+ key = (callback_context.invocation_id, "llm")
57
+ self._pending[key] = datetime.now(timezone.utc)
58
+
59
+ async def after_model_callback(self, callback_context, llm_response):
60
+ key = (callback_context.invocation_id, "llm")
61
+ start = self._pending.pop(key, None)
62
+ now = datetime.now(timezone.utc)
63
+ duration = (now - start).total_seconds() if start is not None else None
64
+
65
+ model_name = getattr(llm_response, "model", None) or getattr(
66
+ llm_response, "model_version", None
67
+ )
68
+
69
+ attrs: dict = {}
70
+ if start is not None:
71
+ attrs["start_time"] = start.isoformat()
72
+ if duration is not None:
73
+ attrs["duration_s"] = duration
74
+ if model_name:
75
+ attrs["model_name"] = str(model_name)
76
+
77
+ usage = getattr(llm_response, "usage_metadata", None)
78
+ if usage is not None:
79
+ input_tokens = getattr(usage, "prompt_token_count", None)
80
+ output_tokens = getattr(usage, "candidates_token_count", None)
81
+ if input_tokens is not None:
82
+ attrs["input_tokens"] = input_tokens
83
+ if output_tokens is not None:
84
+ attrs["output_tokens"] = output_tokens
85
+
86
+ activity = f"llm:{model_name}" if model_name else "llm"
87
+ self._log.add_event(
88
+ case_id=callback_context.invocation_id,
89
+ activity=activity,
90
+ timestamp=now,
91
+ **attrs,
92
+ )
93
+
94
+ # ── Agent callbacks ──────────────────────────────────────────────────────
95
+
96
+ async def before_agent_callback(self, callback_context):
97
+ agent_name = getattr(callback_context, "agent_name", "unknown")
98
+ key = (callback_context.invocation_id, f"agent:{agent_name}")
99
+ self._pending[key] = datetime.now(timezone.utc)
100
+
101
+ async def after_agent_callback(self, callback_context):
102
+ agent_name = getattr(callback_context, "agent_name", "unknown")
103
+ key = (callback_context.invocation_id, f"agent:{agent_name}")
104
+ start = self._pending.pop(key, None)
105
+ now = datetime.now(timezone.utc)
106
+ duration = (now - start).total_seconds() if start is not None else None
107
+
108
+ attrs: dict = {}
109
+ if start is not None:
110
+ attrs["start_time"] = start.isoformat()
111
+ if duration is not None:
112
+ attrs["duration_s"] = duration
113
+
114
+ self._log.add_event(
115
+ case_id=callback_context.invocation_id,
116
+ activity=f"agent:{agent_name}",
117
+ timestamp=now,
118
+ **attrs,
119
+ )
120
+
121
+ # ── Export ───────────────────────────────────────────────────────────────
122
+
123
+ @property
124
+ def event_log(self) -> EventLog:
125
+ return self._log
126
+
127
+ def to_dataframe(self) -> pd.DataFrame:
128
+ return self._log.to_dataframe()
129
+
130
+ def write_xes(self, path) -> None:
131
+ self._log.write_xes(path)
@@ -0,0 +1,17 @@
1
+ from .app import launch_dashboard
2
+ from .plots import (
3
+ plot_activity_statistics,
4
+ plot_conformance,
5
+ plot_dfg,
6
+ plot_petri_net,
7
+ plot_throughput_times,
8
+ )
9
+
10
+ __all__ = [
11
+ "launch_dashboard",
12
+ "plot_dfg",
13
+ "plot_petri_net",
14
+ "plot_throughput_times",
15
+ "plot_activity_statistics",
16
+ "plot_conformance",
17
+ ]
@@ -0,0 +1,174 @@
1
+ """Streamlit dashboard for strobe event-log analysis.
2
+
3
+ Run directly::
4
+
5
+ streamlit run strobe/visualization/app.py
6
+
7
+ Or from Python via :func:`launch_dashboard`.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ import subprocess
14
+ import tempfile
15
+ from pathlib import Path
16
+
17
+
18
+ def launch_dashboard(xes_path: str | Path | None = None) -> subprocess.Popen:
19
+ """Launch the Streamlit dashboard in a subprocess.
20
+
21
+ Parameters
22
+ ----------
23
+ xes_path:
24
+ Optional path to a ``.xes`` file. When provided, the dashboard will
25
+ load it automatically via the ``STROBE_XES_PATH`` environment variable.
26
+
27
+ Returns
28
+ -------
29
+ The :class:`subprocess.Popen` handle for the launched process.
30
+ """
31
+ env = os.environ.copy()
32
+ if xes_path is not None:
33
+ env["STROBE_XES_PATH"] = str(xes_path)
34
+
35
+ app_file = Path(__file__).resolve()
36
+ return subprocess.Popen(
37
+ ["streamlit", "run", str(app_file)],
38
+ env=env,
39
+ )
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Everything below only runs when this file is executed by Streamlit.
44
+ # ---------------------------------------------------------------------------
45
+
46
+
47
+ def _run_app() -> None: # pragma: no cover
48
+ from typing import Literal
49
+
50
+ import pandas as pd
51
+ import pm4py
52
+ import streamlit as st
53
+
54
+ from strobe.analysis.conformance import check_conformance
55
+ from strobe.analysis.discovery import discover_dfg, discover_process_model
56
+ from strobe.analysis.performance import activity_statistics, throughput_times
57
+ from strobe.visualization.plots import (
58
+ plot_activity_statistics,
59
+ plot_conformance,
60
+ plot_dfg,
61
+ plot_petri_net,
62
+ plot_throughput_times,
63
+ )
64
+
65
+ st.set_page_config(page_title="strobe dashboard", layout="wide")
66
+ st.title("strobe — Process Mining Dashboard")
67
+
68
+ # ------------------------------------------------------------------
69
+ # Sidebar: data source + discovery options
70
+ # ------------------------------------------------------------------
71
+ with st.sidebar:
72
+ st.header("Data")
73
+ env_path = os.environ.get("STROBE_XES_PATH")
74
+ uploaded = st.file_uploader("Upload XES file", type=["xes"])
75
+
76
+ xes_source: bytes | None = None
77
+ if uploaded is not None:
78
+ xes_source = uploaded.read()
79
+ elif env_path:
80
+ st.info(f"Using env: {env_path}")
81
+ xes_source = Path(env_path).read_bytes()
82
+
83
+ st.header("Discovery")
84
+ algorithm: Literal["inductive", "alpha"] = st.selectbox(
85
+ "Algorithm", ["inductive", "alpha"]
86
+ )
87
+ noise_threshold = 0.0
88
+ if algorithm == "inductive":
89
+ noise_threshold = st.slider(
90
+ "Noise threshold", min_value=0.0, max_value=1.0, value=0.0, step=0.05
91
+ )
92
+
93
+ if xes_source is None:
94
+ st.info("Upload a XES file in the sidebar to begin.")
95
+ st.stop()
96
+
97
+ # ------------------------------------------------------------------
98
+ # Load + format event log (cached)
99
+ # ------------------------------------------------------------------
100
+ @st.cache_data(show_spinner="Loading event log…")
101
+ def _load_df(raw: bytes, algo: str, noise: float) -> pd.DataFrame:
102
+ with tempfile.NamedTemporaryFile(suffix=".xes", delete=False) as f:
103
+ f.write(raw)
104
+ tmp_path = f.name
105
+ df = pm4py.read_xes(tmp_path)
106
+ return df
107
+
108
+ @st.cache_data(show_spinner="Discovering process model…")
109
+ def _discover(raw: bytes, algo: Literal["inductive", "alpha"], noise: float):
110
+ df = _load_df(raw, algo, noise)
111
+ dfg_result = discover_dfg(df)
112
+ model_result = discover_process_model(df, algorithm=algo, noise_threshold=noise)
113
+ return df, dfg_result, model_result
114
+
115
+ df, (dfg, start_acts, end_acts), (net, im, fm) = _discover(
116
+ xes_source, algorithm, noise_threshold
117
+ )
118
+
119
+ # ------------------------------------------------------------------
120
+ # Tabs
121
+ # ------------------------------------------------------------------
122
+ tab_model, tab_throughput, tab_activities, tab_conformance = st.tabs(
123
+ ["Process model", "Throughput", "Activities", "Conformance"]
124
+ )
125
+
126
+ with tab_model:
127
+ col1, col2 = st.columns(2)
128
+ with col1:
129
+ st.subheader("Directly-Follows Graph")
130
+ st.plotly_chart(
131
+ plot_dfg(dfg, start_acts, end_acts), use_container_width=True
132
+ )
133
+ with col2:
134
+ st.subheader("Petri Net")
135
+ st.plotly_chart(plot_petri_net(net, im, fm), use_container_width=True)
136
+
137
+ with tab_throughput:
138
+ st.subheader("Per-case throughput times")
139
+ tt = throughput_times(df)
140
+ st.plotly_chart(plot_throughput_times(tt), use_container_width=True)
141
+ st.dataframe(
142
+ tt.rename("duration").dt.total_seconds().rename("duration_s").reset_index()
143
+ )
144
+
145
+ with tab_activities:
146
+ st.subheader("Activity statistics")
147
+ stats = activity_statistics(df)
148
+ st.plotly_chart(plot_activity_statistics(stats), use_container_width=True)
149
+ st.dataframe(stats)
150
+
151
+ with tab_conformance:
152
+ st.subheader("Conformance scores")
153
+
154
+ @st.cache_data(show_spinner="Running conformance check…")
155
+ def _conformance(
156
+ raw: bytes, algo: Literal["inductive", "alpha"], noise: float
157
+ ) -> dict[str, float]:
158
+ df2, _, (net2, im2, fm2) = _discover(raw, algo, noise)
159
+ return check_conformance(df2, net2, im2, fm2)
160
+
161
+ scores = _conformance(xes_source, algorithm, noise_threshold)
162
+ st.plotly_chart(plot_conformance(scores), use_container_width=True)
163
+
164
+ col_fit, col_prec, col_gen, col_simp = st.columns(4)
165
+ col_fit.metric("Fitness", f"{scores['fitness']:.3f}")
166
+ col_prec.metric("Precision", f"{scores['precision']:.3f}")
167
+ col_gen.metric("Generalization", f"{scores['generalization']:.3f}")
168
+ col_simp.metric("Simplicity", f"{scores['simplicity']:.3f}")
169
+
170
+
171
+ if __name__ == "__main__" or os.environ.get(
172
+ "STREAMLIT_SCRIPT_RUN_CTX"
173
+ ): # pragma: no cover
174
+ _run_app()
@@ -0,0 +1,339 @@
1
+ """Pure Plotly figure factories — no Streamlit dependency."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import defaultdict, deque
6
+
7
+ import networkx as nx
8
+ import pandas as pd
9
+ import plotly.express as px
10
+ import plotly.graph_objects as go
11
+ from plotly.subplots import make_subplots
12
+
13
+
14
+ def _hierarchical_layout(G: nx.DiGraph, spacing: float = 2.0) -> dict:
15
+ """Compute hierarchical (top-down flowchart) positions for a directed graph.
16
+
17
+ Uses BFS from source nodes to assign layers, arranging them top-to-bottom
18
+ for a flowchart-like appearance. Falls back to spring layout if no sources found.
19
+ """
20
+ # Assign nodes to layers using BFS from sources
21
+ in_degree = dict(G.in_degree())
22
+ sources = [n for n in G.nodes() if in_degree[n] == 0]
23
+
24
+ if not sources:
25
+ # No sources found (all nodes in cycles), use spring layout fallback
26
+ return nx.spring_layout(G, seed=42)
27
+
28
+ layer_assignment = {}
29
+ queue = deque(sources)
30
+
31
+ for source in sources:
32
+ layer_assignment[source] = 0
33
+
34
+ while queue:
35
+ node = queue.popleft()
36
+
37
+ for successor in G.successors(node):
38
+ # Assign to max layer of predecessors + 1
39
+ max_pred_layer = max(
40
+ (layer_assignment.get(pred, -1) for pred in G.predecessors(successor)),
41
+ default=-1,
42
+ )
43
+ new_layer = max_pred_layer + 1
44
+ layer_assignment[successor] = max(
45
+ layer_assignment.get(successor, -1), new_layer
46
+ )
47
+
48
+ if (
49
+ successor not in layer_assignment
50
+ or layer_assignment[successor] == new_layer
51
+ ):
52
+ queue.append(successor)
53
+
54
+ # Group nodes by layer
55
+ layers_dict = defaultdict(list)
56
+ for node, layer in layer_assignment.items():
57
+ layers_dict[layer].append(node)
58
+
59
+ # Compute positions: layers go top-to-bottom, nodes spread left-right
60
+ pos = {}
61
+ max_layer = max(layers_dict.keys()) if layers_dict else 0
62
+
63
+ for layer, nodes in sorted(layers_dict.items()):
64
+ y = max_layer - layer # top-down: layer 0 at top
65
+ num_nodes = len(nodes)
66
+ for i, node in enumerate(nodes):
67
+ # Spread nodes horizontally
68
+ x = (i - num_nodes / 2) * spacing
69
+ pos[node] = (x, y)
70
+
71
+ return pos
72
+
73
+
74
+ def plot_dfg(dfg: dict, start_activities: dict, end_activities: dict) -> go.Figure:
75
+ """Return an interactive DFG figure.
76
+
77
+ Edge width and colour encode frequency. Hover shows the frequency count.
78
+ """
79
+ G = nx.DiGraph()
80
+ for (src, tgt), freq in dfg.items():
81
+ G.add_edge(src, tgt, freq=freq)
82
+ for act in list(start_activities) + list(end_activities):
83
+ if act not in G:
84
+ G.add_node(act)
85
+
86
+ pos = _hierarchical_layout(G)
87
+
88
+ max_freq = max((d["freq"] for _, _, d in G.edges(data=True)), default=1)
89
+
90
+ edge_traces = []
91
+ for src, tgt, data in G.edges(data=True):
92
+ freq = data["freq"]
93
+ x0, y0 = pos[src]
94
+ x1, y1 = pos[tgt]
95
+ width = 1 + 5 * freq / max_freq
96
+ color = f"rgba(31,119,180,{0.3 + 0.7 * freq / max_freq:.2f})"
97
+ edge_traces.append(
98
+ go.Scatter(
99
+ x=[x0, x1, None],
100
+ y=[y0, y1, None],
101
+ mode="lines",
102
+ line=dict(width=width, color=color),
103
+ hoverinfo="text",
104
+ text=f"{src} → {tgt}: {freq}",
105
+ showlegend=False,
106
+ )
107
+ )
108
+
109
+ node_x, node_y, node_text, node_hover = [], [], [], []
110
+ node_colors = []
111
+ for node in G.nodes():
112
+ x, y = pos[node]
113
+ node_x.append(x)
114
+ node_y.append(y)
115
+ node_text.append(node)
116
+ if node in start_activities and node in end_activities:
117
+ label = f"{node}<br>(start+end)"
118
+ node_colors.append("purple")
119
+ elif node in start_activities:
120
+ label = f"{node}<br>(start)"
121
+ node_colors.append("green")
122
+ elif node in end_activities:
123
+ label = f"{node}<br>(end)"
124
+ node_colors.append("red")
125
+ else:
126
+ label = node
127
+ node_colors.append("steelblue")
128
+ node_hover.append(label)
129
+
130
+ node_trace = go.Scatter(
131
+ x=node_x,
132
+ y=node_y,
133
+ mode="markers+text",
134
+ text=node_text,
135
+ textposition="top center",
136
+ hovertext=node_hover,
137
+ hoverinfo="text",
138
+ marker=dict(size=20, color=node_colors, line=dict(width=2, color="white")),
139
+ showlegend=False,
140
+ )
141
+
142
+ fig = go.Figure(data=edge_traces + [node_trace])
143
+ fig.update_layout(
144
+ title="Directly-Follows Graph",
145
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
146
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
147
+ plot_bgcolor="white",
148
+ margin=dict(l=20, r=20, t=40, b=20),
149
+ )
150
+ return fig
151
+
152
+
153
+ def plot_petri_net(net, initial_marking, final_marking) -> go.Figure:
154
+ """Return an interactive Petri net figure.
155
+
156
+ Places are rendered as circles; transitions as squares.
157
+ Source/sink places are highlighted in green/red.
158
+ """
159
+ G = nx.DiGraph()
160
+ place_ids = {}
161
+ trans_ids = {}
162
+
163
+ source_places = set(initial_marking.keys())
164
+ sink_places = set(final_marking.keys())
165
+
166
+ for place in net.places:
167
+ node_id = f"p:{place.name}"
168
+ place_ids[place] = node_id
169
+ G.add_node(node_id, kind="place", name=place.name)
170
+
171
+ for trans in net.transitions:
172
+ node_id = f"t:{trans.name}"
173
+ trans_ids[trans] = node_id
174
+ label = trans.label if trans.label else f"τ({trans.name})"
175
+ G.add_node(node_id, kind="transition", name=label)
176
+
177
+ for arc in net.arcs:
178
+ src = arc.source
179
+ tgt = arc.target
180
+ src_id = place_ids.get(src) or trans_ids.get(src)
181
+ tgt_id = place_ids.get(tgt) or trans_ids.get(tgt)
182
+ if src_id and tgt_id:
183
+ G.add_edge(src_id, tgt_id)
184
+
185
+ pos = _hierarchical_layout(G)
186
+
187
+ edge_traces = []
188
+ for src_id, tgt_id in G.edges():
189
+ x0, y0 = pos[src_id]
190
+ x1, y1 = pos[tgt_id]
191
+ edge_traces.append(
192
+ go.Scatter(
193
+ x=[x0, x1, None],
194
+ y=[y0, y1, None],
195
+ mode="lines",
196
+ line=dict(width=1.5, color="gray"),
197
+ hoverinfo="none",
198
+ showlegend=False,
199
+ )
200
+ )
201
+
202
+ place_x, place_y, place_text, place_colors = [], [], [], []
203
+ for place, node_id in place_ids.items():
204
+ x, y = pos[node_id]
205
+ place_x.append(x)
206
+ place_y.append(y)
207
+ place_text.append(place.name)
208
+ if place in source_places and place in sink_places:
209
+ place_colors.append("purple")
210
+ elif place in source_places:
211
+ place_colors.append("green")
212
+ elif place in sink_places:
213
+ place_colors.append("red")
214
+ else:
215
+ place_colors.append("steelblue")
216
+
217
+ place_trace = go.Scatter(
218
+ x=place_x,
219
+ y=place_y,
220
+ mode="markers+text",
221
+ text=place_text,
222
+ textposition="top center",
223
+ hoverinfo="text",
224
+ marker=dict(
225
+ symbol="circle",
226
+ size=18,
227
+ color=place_colors,
228
+ line=dict(width=2, color="white"),
229
+ ),
230
+ name="Places",
231
+ )
232
+
233
+ trans_x, trans_y, trans_text = [], [], []
234
+ for trans, node_id in trans_ids.items():
235
+ x, y = pos[node_id]
236
+ trans_x.append(x)
237
+ trans_y.append(y)
238
+ label = trans.label if trans.label else f"τ({trans.name})"
239
+ trans_text.append(label)
240
+
241
+ trans_trace = go.Scatter(
242
+ x=trans_x,
243
+ y=trans_y,
244
+ mode="markers+text",
245
+ text=trans_text,
246
+ textposition="top center",
247
+ hoverinfo="text",
248
+ marker=dict(
249
+ symbol="square",
250
+ size=16,
251
+ color="orange",
252
+ line=dict(width=2, color="white"),
253
+ ),
254
+ name="Transitions",
255
+ )
256
+
257
+ fig = go.Figure(data=edge_traces + [place_trace, trans_trace])
258
+ fig.update_layout(
259
+ title="Petri Net",
260
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
261
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
262
+ plot_bgcolor="white",
263
+ margin=dict(l=20, r=20, t=40, b=20),
264
+ )
265
+ return fig
266
+
267
+
268
+ def plot_throughput_times(series: pd.Series) -> go.Figure:
269
+ """Return a violin + box plot of per-case throughput times (in seconds)."""
270
+ durations_s = series.dt.total_seconds()
271
+ fig = px.violin(
272
+ y=durations_s,
273
+ box=True,
274
+ points="all",
275
+ labels={"y": "Duration (s)"},
276
+ title="Throughput Times",
277
+ )
278
+ fig.update_layout(margin=dict(l=20, r=20, t=40, b=20))
279
+ return fig
280
+
281
+
282
+ def plot_activity_statistics(df: pd.DataFrame) -> go.Figure:
283
+ """Return a dual-axis grouped bar chart: count (left) + mean duration (right)."""
284
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
285
+
286
+ activities = df["activity"] if "activity" in df.columns else df.iloc[:, 0]
287
+
288
+ fig.add_trace(
289
+ go.Bar(
290
+ x=activities,
291
+ y=df["count"],
292
+ name="Count",
293
+ marker_color="steelblue",
294
+ ),
295
+ secondary_y=False,
296
+ )
297
+ fig.add_trace(
298
+ go.Bar(
299
+ x=activities,
300
+ y=df["mean_duration_s"],
301
+ name="Mean duration (s)",
302
+ marker_color="darkorange",
303
+ ),
304
+ secondary_y=True,
305
+ )
306
+
307
+ fig.update_layout(
308
+ title="Activity Statistics",
309
+ barmode="group",
310
+ margin=dict(l=20, r=20, t=40, b=20),
311
+ )
312
+ fig.update_yaxes(title_text="Count", secondary_y=False)
313
+ fig.update_yaxes(title_text="Mean duration (s)", secondary_y=True)
314
+ return fig
315
+
316
+
317
+ def plot_conformance(scores: dict[str, float]) -> go.Figure:
318
+ """Return a horizontal bar chart of the four conformance metrics."""
319
+ metrics = ["fitness", "precision", "generalization", "simplicity"]
320
+ values = [scores.get(m, 0.0) for m in metrics]
321
+ colors = [f"rgba({int(255 * (1 - v))},{int(200 * v)},80,0.85)" for v in values]
322
+
323
+ fig = go.Figure(
324
+ go.Bar(
325
+ x=values,
326
+ y=metrics,
327
+ orientation="h",
328
+ marker=dict(color=colors),
329
+ text=[f"{v:.3f}" for v in values],
330
+ textposition="outside",
331
+ hovertemplate="%{y}: %{x:.3f}<extra></extra>",
332
+ )
333
+ )
334
+ fig.update_layout(
335
+ title="Conformance Scores",
336
+ xaxis=dict(range=[0, 1.1], title="Score"),
337
+ margin=dict(l=20, r=60, t=40, b=20),
338
+ )
339
+ return fig
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: strobe
3
+ Version: 0.0.1
4
+ Summary: Process Mining & Agent Instrumentation for AI Agent Frameworks
5
+ Requires-Python: >=3.13
6
+ Requires-Dist: google-adk>=1.0.0
7
+ Requires-Dist: pandas>=2.0.0
8
+ Requires-Dist: plotly>=5.18.0
9
+ Requires-Dist: pm4py>=2.7.0
10
+ Requires-Dist: streamlit>=1.32.0
@@ -0,0 +1,15 @@
1
+ strobe/__init__.py,sha256=32IUlfzGttYEFQ2kSGvL4MFcj84jEL0luUspHwPav4I,577
2
+ strobe/_version.py,sha256=qf6R-J7-UyuABBo8c0HgaquJ8bejVbf07HodXgwAwgQ,704
3
+ strobe/analysis/__init__.py,sha256=ZwFhPegU6eCDUGvCkWHEb-v_v3sAvyzqhoHMuOmhsrc,307
4
+ strobe/analysis/conformance.py,sha256=bCNSCmd1p0uus-7yqAu0UwaDe9cNqm-dd9Bo93ixEwk,1148
5
+ strobe/analysis/discovery.py,sha256=fIAjBXEmLl7FaJnmOTSQaPdplJuPqWYBLexdQe1UOP8,1203
6
+ strobe/analysis/performance.py,sha256=TPE1TdVeeXgyQDuAaEaR0MWETIkx92ZmDo2bpSckZ2M,1536
7
+ strobe/instrumentation/__init__.py,sha256=0oF5981MhcrlzOiAu-r1C2A9UfFAAA3E3pyx-sQmv4s,105
8
+ strobe/instrumentation/event_log.py,sha256=kopYmAFbUt6rCeXQr7NjoQW2BGxh4Ov1kLa5xwb2hAQ,2328
9
+ strobe/instrumentation/plugin.py,sha256=F1yeOQlatGVGhEqTbJ80hGq8mk9Vc92XYACS_NCbdGU,5236
10
+ strobe/visualization/__init__.py,sha256=TcmYuPkRanbKUP1OlSaZ6qOxpqrbHyUj2IQCwPCK6ws,332
11
+ strobe/visualization/app.py,sha256=9pG3cSbz-IO5KzWOrFr-HHMK1T1R-XC0SyuRytmCCGg,6118
12
+ strobe/visualization/plots.py,sha256=MbXK2qAY02yA0uU_f9aBgGvn3ZKtV5uvPjuzslfnV4g,10504
13
+ strobe-0.0.1.dist-info/METADATA,sha256=a-LwOF591VoUwyQxKtwR3mOM19srNVabrxTG1ptddUc,299
14
+ strobe-0.0.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
15
+ strobe-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any