waldiez 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of waldiez might be problematic. Click here for more details.

Files changed (79) hide show
  1. waldiez/_version.py +1 -1
  2. waldiez/cli.py +5 -27
  3. waldiez/exporter.py +0 -13
  4. waldiez/exporting/agent/exporter.py +38 -0
  5. waldiez/exporting/agent/extras/__init__.py +2 -0
  6. waldiez/exporting/agent/extras/doc_agent_extras.py +366 -0
  7. waldiez/exporting/agent/extras/group_member_extras.py +3 -2
  8. waldiez/exporting/agent/processor.py +113 -15
  9. waldiez/exporting/chats/processor.py +2 -21
  10. waldiez/exporting/chats/utils/common.py +66 -1
  11. waldiez/exporting/chats/utils/group.py +6 -3
  12. waldiez/exporting/chats/utils/nested.py +1 -1
  13. waldiez/exporting/chats/utils/sequential.py +25 -9
  14. waldiez/exporting/chats/utils/single.py +8 -6
  15. waldiez/exporting/core/context.py +0 -12
  16. waldiez/exporting/core/extras/agent_extras/standard_extras.py +3 -1
  17. waldiez/exporting/core/extras/base.py +20 -17
  18. waldiez/exporting/core/extras/path_resolver.py +39 -41
  19. waldiez/exporting/core/extras/serializer.py +16 -1
  20. waldiez/exporting/core/protocols.py +17 -0
  21. waldiez/exporting/core/types.py +6 -9
  22. waldiez/exporting/flow/execution_generator.py +56 -21
  23. waldiez/exporting/flow/exporter.py +1 -4
  24. waldiez/exporting/flow/factory.py +0 -9
  25. waldiez/exporting/flow/file_generator.py +6 -0
  26. waldiez/exporting/flow/orchestrator.py +27 -21
  27. waldiez/exporting/flow/utils/__init__.py +0 -2
  28. waldiez/exporting/flow/utils/common.py +15 -96
  29. waldiez/exporting/flow/utils/importing.py +4 -0
  30. waldiez/io/mqtt.py +33 -14
  31. waldiez/io/redis.py +18 -13
  32. waldiez/io/structured.py +9 -4
  33. waldiez/io/utils.py +32 -0
  34. waldiez/io/ws.py +8 -2
  35. waldiez/models/__init__.py +6 -0
  36. waldiez/models/agents/__init__.py +8 -0
  37. waldiez/models/agents/agent/agent.py +136 -38
  38. waldiez/models/agents/agent/agent_type.py +3 -2
  39. waldiez/models/agents/agents.py +10 -0
  40. waldiez/models/agents/doc_agent/__init__.py +13 -0
  41. waldiez/models/agents/doc_agent/doc_agent.py +126 -0
  42. waldiez/models/agents/doc_agent/doc_agent_data.py +149 -0
  43. waldiez/models/agents/doc_agent/rag_query_engine.py +127 -0
  44. waldiez/models/chat/chat_message.py +1 -1
  45. waldiez/models/flow/flow.py +13 -2
  46. waldiez/models/model/__init__.py +2 -2
  47. waldiez/models/model/_aws.py +75 -0
  48. waldiez/models/model/_llm.py +516 -0
  49. waldiez/models/model/_price.py +30 -0
  50. waldiez/models/model/model.py +45 -2
  51. waldiez/models/model/model_data.py +2 -83
  52. waldiez/models/tool/predefined/_duckduckgo.py +123 -0
  53. waldiez/models/tool/predefined/_google.py +31 -9
  54. waldiez/models/tool/predefined/_perplexity.py +161 -0
  55. waldiez/models/tool/predefined/_searxng.py +152 -0
  56. waldiez/models/tool/predefined/_tavily.py +46 -9
  57. waldiez/models/tool/predefined/_wikipedia.py +26 -6
  58. waldiez/models/tool/predefined/_youtube.py +36 -8
  59. waldiez/models/tool/predefined/registry.py +6 -0
  60. waldiez/models/waldiez.py +12 -0
  61. waldiez/runner.py +184 -382
  62. waldiez/running/__init__.py +2 -4
  63. waldiez/running/base_runner.py +136 -118
  64. waldiez/running/environment.py +61 -17
  65. waldiez/running/post_run.py +70 -14
  66. waldiez/running/pre_run.py +42 -0
  67. waldiez/running/protocol.py +42 -48
  68. waldiez/running/run_results.py +5 -5
  69. waldiez/running/standard_runner.py +429 -0
  70. waldiez/running/timeline_processor.py +1166 -0
  71. waldiez/utils/version.py +12 -1
  72. {waldiez-0.5.2.dist-info → waldiez-0.5.4.dist-info}/METADATA +61 -63
  73. {waldiez-0.5.2.dist-info → waldiez-0.5.4.dist-info}/RECORD +77 -66
  74. waldiez/running/import_runner.py +0 -424
  75. waldiez/running/subprocess_runner.py +0 -100
  76. {waldiez-0.5.2.dist-info → waldiez-0.5.4.dist-info}/WHEEL +0 -0
  77. {waldiez-0.5.2.dist-info → waldiez-0.5.4.dist-info}/entry_points.txt +0 -0
  78. {waldiez-0.5.2.dist-info → waldiez-0.5.4.dist-info}/licenses/LICENSE +0 -0
  79. {waldiez-0.5.2.dist-info → waldiez-0.5.4.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,1166 @@
1
+ # SPDX-License-Identifier: Apache-2.0.
2
+ # Copyright (c) 2024 - 2025 Waldiez and contributors.
3
+ # pylint: skip-file
4
+ # pyright: reportArgumentType=false,reportUnknownVariableType=false
5
+ # pyright: reportUnknownMemberType=false,reportUnknownArgumentType=false
6
+ # flake8: noqa: C901
7
+ """
8
+ Timeline Analysis Data Processor.
9
+
10
+ Processes CSV files and outputs JSON structure for timeline visualization
11
+ """
12
+
13
+ import json
14
+ import os
15
+ import re
16
+ from pathlib import Path
17
+ from typing import TYPE_CHECKING, Any
18
+
19
+ import pandas as pd
20
+
21
+ from waldiez.logger import WaldiezLogger
22
+
23
+ if TYPE_CHECKING:
24
+ Series = pd.Series[Any]
25
+ else:
26
+ Series = pd.Series
27
+
28
+ # Color palettes
29
+ AGENT_COLORS = [
30
+ "#FF6B35",
31
+ "#4A90E2",
32
+ "#7ED321",
33
+ "#9013FE",
34
+ "#FF9500",
35
+ "#FF3B30",
36
+ "#007AFF",
37
+ "#34C759",
38
+ "#AF52DE",
39
+ "#FF9F0A",
40
+ "#FF2D92",
41
+ "#5AC8FA",
42
+ "#30D158",
43
+ "#BF5AF2",
44
+ "#FFD60A",
45
+ "#FF453A",
46
+ "#64D2FF",
47
+ "#32D74B",
48
+ "#DA70D6",
49
+ "#FFD23F",
50
+ ]
51
+
52
+ ACTIVITY_COLORS = {
53
+ "human_input_waiting": "#FF8C00",
54
+ "user_thinking": "#87CEEB",
55
+ "agent_transition": "#FF7043",
56
+ "tool_call": "#4CAF50",
57
+ "function_call": "#9C27B0",
58
+ "processing": "#BDBDBD",
59
+ "session": "#8B5CF6",
60
+ }
61
+
62
+ DEFAULT_AGENT_COLOR = "#E5E7EB"
63
+
64
+ LOG = WaldiezLogger()
65
+
66
+
67
+ class TimelineProcessor:
68
+ """Class to process timeline data from CSV files."""
69
+
70
+ agents_data: pd.DataFrame | None
71
+ chat_data: pd.DataFrame | None
72
+ events_data: pd.DataFrame | None
73
+ functions_data: pd.DataFrame | None
74
+
75
+ def __init__(self) -> None:
76
+ """Initialize the TimelineProcessor with empty data attributes."""
77
+ self.agents_data = None
78
+ self.chat_data = None
79
+ self.events_data = None
80
+ self.functions_data = None
81
+
82
+ def is_missing_or_nan(self, value: Any) -> bool:
83
+ """Check if a value is missing, NaN, or empty.
84
+
85
+ Parameters
86
+ ----------
87
+ value : Any
88
+ The value to check.
89
+
90
+ Returns
91
+ -------
92
+ bool
93
+ True if the value is missing, NaN, or empty; False otherwise.
94
+ """
95
+ if pd.isna(value): # pyright: ignore
96
+ return True
97
+ if isinstance(value, str) and (
98
+ value.strip() == "" or value.lower() == "nan"
99
+ ):
100
+ return True
101
+ return False
102
+
103
+ def fill_missing_agent_names(
104
+ self, data: pd.DataFrame | None, name_column: str = "source_name"
105
+ ) -> pd.DataFrame | None:
106
+ """Fill missing agent names with the previous valid name.
107
+
108
+ Parameters
109
+ ----------
110
+ data : pd.DataFrame | None
111
+ DataFrame containing agent names.
112
+ name_column : str, optional
113
+ The column name containing agent names, by default "source_name".
114
+
115
+ Returns
116
+ -------
117
+ pd.DataFrame | None
118
+ DataFrame with missing agent names filled.
119
+ """
120
+ if data is None or data.empty:
121
+ return data
122
+
123
+ data = data.copy()
124
+ last_valid_name: str | None = None
125
+
126
+ for idx in range(len(data)):
127
+ current_name = data.iloc[idx][name_column]
128
+
129
+ if self.is_missing_or_nan(current_name):
130
+ if last_valid_name is not None:
131
+ column = data.columns.get_loc(name_column)
132
+ data.iloc[idx, column] = last_valid_name # type: ignore[index]
133
+ LOG.debug(
134
+ "Row %d: Replaced missing agent name with '%s'",
135
+ idx,
136
+ last_valid_name,
137
+ )
138
+ else:
139
+ # If no previous valid name, use a default
140
+ default_name = "unknown_agent"
141
+ column = data.columns.get_loc(name_column)
142
+ data.iloc[idx, column] = default_name # type: ignore[index]
143
+ last_valid_name = default_name
144
+ LOG.debug(
145
+ "Row %d: Used default agent name '%s'",
146
+ idx,
147
+ default_name,
148
+ )
149
+ else:
150
+ last_valid_name = current_name
151
+
152
+ return data
153
+
154
+ def fill_missing_agent_data(self) -> None:
155
+ """Fill missing agent names in agents_data."""
156
+ if self.agents_data is None:
157
+ return
158
+
159
+ self.agents_data = self.fill_missing_agent_names(
160
+ self.agents_data, "name"
161
+ )
162
+
163
+ def load_csv_files(
164
+ self,
165
+ agents_file: str | None = None,
166
+ chat_file: str | None = None,
167
+ events_file: str | None = None,
168
+ functions_file: str | None = None,
169
+ ) -> None:
170
+ """Load CSV files into pandas DataFrames.
171
+
172
+ Parameters
173
+ ----------
174
+ agents_file : str | None
175
+ Path to the agents CSV file.
176
+ chat_file : str | None
177
+ Path to the chat CSV file.
178
+ events_file : str | None
179
+ Path to the events CSV file.
180
+ functions_file : str | None
181
+ Path to the functions CSV file.
182
+ """
183
+ if agents_file:
184
+ self.agents_data = pd.read_csv(agents_file)
185
+ LOG.info("Loaded agents data: %d rows", len(self.agents_data))
186
+ # Fill missing agent names
187
+ self.fill_missing_agent_data()
188
+
189
+ if chat_file:
190
+ self.chat_data = pd.read_csv(chat_file)
191
+ LOG.info("Loaded chat data: %d rows", len(self.chat_data))
192
+ # Fill missing agent names in chat data
193
+ self.chat_data = self.fill_missing_agent_names(
194
+ self.chat_data, "source_name"
195
+ )
196
+
197
+ if events_file:
198
+ self.events_data = pd.read_csv(events_file)
199
+ LOG.info("Loaded events data: %d rows", len(self.events_data))
200
+
201
+ if functions_file:
202
+ self.functions_data = pd.read_csv(functions_file)
203
+ LOG.info("Loaded functions data: %d rows", len(self.functions_data))
204
+
205
+ def parse_date(self, date_str: str) -> pd.Timestamp:
206
+ """Parse date string to datetime.
207
+
208
+ Parameters
209
+ ----------
210
+ date_str : str
211
+ The date string to parse.
212
+
213
+ Returns
214
+ -------
215
+ pd.Timestamp
216
+ The parsed datetime.
217
+ """
218
+ try:
219
+ return pd.to_datetime(date_str)
220
+ except Exception:
221
+ coerced = pd.to_datetime(date_str, errors="coerce")
222
+ if isinstance(coerced, pd.Timestamp):
223
+ return coerced
224
+ return pd.Timestamp("1970-01-01")
225
+
226
+ def generate_agent_colors(self, agent_names: list[str]) -> dict[str, str]:
227
+ """Generate color mapping for agents.
228
+
229
+ Parameters
230
+ ----------
231
+ agent_names : list[str]
232
+ List of agent names.
233
+
234
+ Returns
235
+ -------
236
+ dict[str, str]
237
+ Mapping of agent names to their assigned colors.
238
+ """
239
+ colors = {}
240
+ for i, agent in enumerate(agent_names):
241
+ colors[agent] = AGENT_COLORS[i % len(AGENT_COLORS)]
242
+ return colors
243
+
244
+ def extract_token_info(
245
+ self,
246
+ request_str: Any,
247
+ response_str: Any,
248
+ ) -> dict[str, int]:
249
+ """Extract token information from request/response strings.
250
+
251
+ Parameters
252
+ ----------
253
+ request_str : Any
254
+ The request string containing token usage information.
255
+ response_str : Any
256
+ The response string containing token usage information.
257
+
258
+ Returns
259
+ -------
260
+ dict[str, int]
261
+ A dictionary containing the extracted token information.
262
+ """
263
+ prompt_tokens = 0
264
+ completion_tokens = 0
265
+ total_tokens = 0
266
+ try:
267
+ # Try to parse as JSON first
268
+ if (
269
+ request_str
270
+ and isinstance(request_str, str)
271
+ and request_str.strip().startswith("{")
272
+ ):
273
+ request_data = json.loads(request_str)
274
+ if "usage" in request_data:
275
+ prompt_tokens = request_data["usage"].get(
276
+ "prompt_tokens", 0
277
+ )
278
+ elif "prompt_tokens" in request_data:
279
+ prompt_tokens = request_data["prompt_tokens"]
280
+ elif "messages" in request_data:
281
+ # Estimate tokens from content length
282
+ content_length = sum(
283
+ len(msg.get("content", ""))
284
+ for msg in request_data["messages"]
285
+ if "content" in msg and msg["content"]
286
+ )
287
+ prompt_tokens = max(1, content_length // 4)
288
+
289
+ if (
290
+ response_str
291
+ and isinstance(response_str, str)
292
+ and response_str.strip().startswith("{")
293
+ ):
294
+ response_data = json.loads(response_str)
295
+ if "usage" in response_data:
296
+ prompt_tokens = response_data["usage"].get(
297
+ "prompt_tokens", prompt_tokens
298
+ )
299
+ completion_tokens = response_data["usage"].get(
300
+ "completion_tokens", 0
301
+ )
302
+ total_tokens = response_data["usage"].get(
303
+ "total_tokens", prompt_tokens + completion_tokens
304
+ )
305
+ except json.JSONDecodeError:
306
+ # Fallback to regex patterns if JSON parsing fails
307
+ pass
308
+
309
+ if total_tokens == 0 and (prompt_tokens > 0 or completion_tokens > 0):
310
+ total_tokens = prompt_tokens + completion_tokens
311
+
312
+ return {
313
+ "prompt_tokens": prompt_tokens,
314
+ "completion_tokens": completion_tokens,
315
+ "total_tokens": total_tokens,
316
+ }
317
+
318
+ def extract_llm_model(
319
+ self, agent_name: str, request_str: Any = None
320
+ ) -> str:
321
+ """Extract LLM model from agent data or request.
322
+
323
+ Parameters
324
+ ----------
325
+ agent_name : str
326
+ The name of the agent.
327
+ request_str : Any, optional
328
+ The request string containing token usage information.
329
+
330
+ Returns
331
+ -------
332
+ str
333
+ The extracted LLM model name.
334
+ """
335
+ # Handle missing/nan agent names
336
+ if self.is_missing_or_nan(agent_name):
337
+ agent_name = "unknown_agent"
338
+
339
+ # First try to extract from request_str (chat_completions.csv)
340
+ if request_str:
341
+ model = self._extract_model_from_text(str(request_str))
342
+ if model != "Unknown":
343
+ return model
344
+
345
+ # Then try to extract from agents data
346
+ if self.agents_data is not None:
347
+ agent_row = self.agents_data[self.agents_data["name"] == agent_name]
348
+ if not agent_row.empty and "init_args" in agent_row.columns:
349
+ init_args = str(agent_row.iloc[0]["init_args"])
350
+ model = self._extract_model_from_text(init_args)
351
+ if model != "Unknown":
352
+ return model
353
+
354
+ return "Unknown"
355
+
356
+ def _extract_model_from_text(self, text: Any) -> str:
357
+ """Extract model name from text using dynamic parsing.
358
+
359
+ Parameters
360
+ ----------
361
+ text : Any
362
+ The text to extract the model name from.
363
+
364
+ Returns
365
+ -------
366
+ str
367
+ The extracted model name.
368
+ """
369
+ if not text or not isinstance(text, str):
370
+ return "Unknown"
371
+
372
+ try:
373
+ # Try JSON parsing first
374
+ if text.strip().startswith("{"):
375
+ model = self._extract_model_from_json(text)
376
+ if model != "Unknown":
377
+ return model
378
+ except json.JSONDecodeError:
379
+ pass
380
+
381
+ # Use dynamic regex patterns to catch any model-like strings
382
+ model = self._extract_model_with_regex(text)
383
+ if model != "Unknown":
384
+ return model
385
+
386
+ return "Unknown"
387
+
388
+ def _extract_model_from_json(self, text: str) -> str:
389
+ """Extract model from JSON text using comprehensive key search.
390
+
391
+ Parameters
392
+ ----------
393
+ text : str
394
+ The JSON text to extract the model name from.
395
+
396
+ Returns
397
+ -------
398
+ str
399
+ The extracted model name.
400
+ """
401
+ try:
402
+ parsed = json.loads(text)
403
+
404
+ # Direct model keys
405
+ model_keys = [
406
+ "model",
407
+ "llm_model",
408
+ "engine",
409
+ "model_name",
410
+ "model_id",
411
+ ]
412
+ for key in model_keys:
413
+ if key in parsed and isinstance(parsed[key], str):
414
+ return parsed[key]
415
+
416
+ # Nested searches for different structures
417
+ # Structure 1: config_list array (from agents.csv)
418
+ if "config_list" in parsed and isinstance(
419
+ parsed["config_list"], list
420
+ ):
421
+ for config in parsed["config_list"]:
422
+ if isinstance(config, dict) and "model" in config:
423
+ return config["model"]
424
+
425
+ # Structure 2: llm_config._model.config_list (from agents.csv)
426
+ if "llm_config" in parsed:
427
+ llm_config = parsed["llm_config"]
428
+ if isinstance(llm_config, dict):
429
+ if "_model" in llm_config and isinstance(
430
+ llm_config["_model"], dict
431
+ ):
432
+ model_config = llm_config["_model"]
433
+ if "config_list" in model_config and isinstance(
434
+ model_config["config_list"], list
435
+ ):
436
+ for config in model_config["config_list"]:
437
+ if (
438
+ isinstance(config, dict)
439
+ and "model" in config
440
+ ):
441
+ return config["model"]
442
+ # Also check direct model keys in _model
443
+ for key in model_keys:
444
+ if key in model_config and isinstance(
445
+ model_config[key], str
446
+ ):
447
+ return model_config[key]
448
+
449
+ # Check llm_config level for model keys
450
+ for key in model_keys:
451
+ if key in llm_config and isinstance(
452
+ llm_config[key], str
453
+ ):
454
+ return llm_config[key]
455
+
456
+ # Structure 3: Recursive search for any model key in nested objects
457
+
458
+ model = recursive_search(parsed, model_keys)
459
+ if model != "Unknown":
460
+ return model
461
+
462
+ except (json.JSONDecodeError, AttributeError, TypeError):
463
+ pass
464
+
465
+ return "Unknown"
466
+
467
+ def _extract_model_with_regex(self, text: str) -> str:
468
+ """Extract model using flexible regex patterns.
469
+
470
+ Parameters
471
+ ----------
472
+ text : str
473
+ The input text from which to extract the model name.
474
+
475
+ Returns
476
+ -------
477
+ str
478
+ The extracted model name or "Unknown" if not found.
479
+ """
480
+ # Dynamic patterns that can catch various model names
481
+ patterns = [
482
+ # OpenAI models - flexible to catch versions like gpt-4.1, gpt-4o...
483
+ r"\bgpt-[0-9]+(?:\.[0-9]+)?[a-zA-Z]*(?:-[a-zA-Z0-9]+)*\b",
484
+ # Claude models - flexible for various versions
485
+ r"\bclaude-[0-9]+(?:\.[0-9]+)?(?:-[a-zA-Z0-9]+)*\b",
486
+ # Gemini models
487
+ r"\bgemini-[a-zA-Z0-9]+(?:-[a-zA-Z0-9]+)*\b",
488
+ # Generic model patterns
489
+ r"\b[a-zA-Z]+-[0-9]+(?:\.[0-9]+)?[a-zA-Z]*(?:-[a-zA-Z0-9]+)*\b",
490
+ # Anthropic models
491
+ r"\b(?:anthropic|claude)[/_-][a-zA-Z0-9]+(?:[._-][a-zA-Z0-9]+)*\b",
492
+ # Other common patterns
493
+ r"\b(?:llama|mistral|falcon|vicuna|alpaca)[/_-]?[0-9]+[a-zA-Z]*(?:[._-][a-zA-Z0-9]+)*\b",
494
+ # Cohere models
495
+ r"\bcommand[/_-]?[a-zA-Z0-9]*\b",
496
+ # Generic AI model patterns
497
+ r"\b[a-zA-Z]+(?:ai|ml|model)[/_-]?[0-9]+[a-zA-Z]*\b",
498
+ ]
499
+
500
+ for pattern in patterns:
501
+ matches = re.findall(pattern, text, re.IGNORECASE)
502
+ if matches:
503
+ # Return the first match, but prefer longer matches
504
+ best_match = max(matches, key=len)
505
+ return best_match
506
+
507
+ # Last resort: look for any word that might be a model name
508
+ # This catches custom or unknown models
509
+ model_indicators = [
510
+ r'model["\']?\s*[:=]\s*["\']?([a-zA-Z0-9._-]+)',
511
+ r'engine["\']?\s*[:=]\s*["\']?([a-zA-Z0-9._-]+)',
512
+ r'"model"\s*:\s*"([^"]+)"',
513
+ r"'model'\s*:\s*'([^']+)'",
514
+ ]
515
+
516
+ for pattern in model_indicators:
517
+ matches = re.findall(pattern, text, re.IGNORECASE)
518
+ if matches:
519
+ return matches[0]
520
+
521
+ return "Unknown"
522
+
523
+ def is_human_input_waiting_period(
524
+ self,
525
+ prev_session: Series,
526
+ current_session: Series,
527
+ gap_duration: float,
528
+ ) -> bool:
529
+ """Detect if gap represents human input waiting.
530
+
531
+ Parameters
532
+ ----------
533
+ prev_session : Series
534
+ The previous session data.
535
+ current_session : Series
536
+ The current session data.
537
+ gap_duration : float
538
+ The duration of the gap to analyze.
539
+
540
+ Returns
541
+ -------
542
+ bool
543
+ True if gap likely represents human input waiting, False otherwise.
544
+ """
545
+ if gap_duration < 1.0: # Reduced threshold for better detection
546
+ return False
547
+
548
+ if self.events_data is None:
549
+ return False
550
+
551
+ # Get events around the gap period
552
+ prev_end = self.parse_date(prev_session["end_time"])
553
+ current_start = self.parse_date(current_session["start_time"])
554
+
555
+ # Look for user message events right after the gap (within 1 second)
556
+ after_gap_window = current_start + pd.Timedelta(seconds=1)
557
+
558
+ user_events_after_gap = self.events_data[
559
+ (pd.to_datetime(self.events_data["timestamp"]) >= current_start)
560
+ & (
561
+ pd.to_datetime(self.events_data["timestamp"])
562
+ <= after_gap_window
563
+ )
564
+ & (self.events_data["event_name"] == "received_message")
565
+ ]
566
+
567
+ # Check if any of these events contain user messages
568
+ user_message_found = False
569
+ for _, event in user_events_after_gap.iterrows():
570
+ if self.is_user_message_event(event):
571
+ user_message_found = True
572
+ break
573
+
574
+ if user_message_found:
575
+ return True
576
+
577
+ # Alternative check: look for gaps that are longer and likely represent
578
+ # user thinking time
579
+ # This catches cases where user input detection might be missed
580
+ if gap_duration > 5.0: # Longer gaps are more likely to be user input
581
+ # Check if there are any user messages in the broader timeline
582
+ # around this gap
583
+ broader_window_start = prev_end - pd.Timedelta(seconds=2)
584
+ broader_window_end = current_start + pd.Timedelta(seconds=5)
585
+
586
+ broader_events = self.events_data[
587
+ (
588
+ pd.to_datetime(self.events_data["timestamp"])
589
+ >= broader_window_start
590
+ )
591
+ & (
592
+ pd.to_datetime(self.events_data["timestamp"])
593
+ <= broader_window_end
594
+ )
595
+ ]
596
+
597
+ # Look for user message patterns
598
+ for _, event in broader_events.iterrows():
599
+ if self.is_user_message_event(event):
600
+ return True
601
+
602
+ return False
603
+
604
+ def is_user_message_event(self, event: Series) -> bool:
605
+ """Check if an event represents a user message.
606
+
607
+ Parameters
608
+ ----------
609
+ event : Series
610
+ The event data to check.
611
+
612
+ Returns
613
+ -------
614
+ bool
615
+ True if the event represents a user message, False otherwise.
616
+ """
617
+ json_state = event.get("json_state", "")
618
+ if not json_state or not isinstance(json_state, str):
619
+ return False
620
+
621
+ try:
622
+ parsed = json.loads(json_state)
623
+ # Check for user role in message
624
+ if parsed.get("message", {}).get("role") == "user":
625
+ return True
626
+ # Check for customer sender (another indicator of user input)
627
+ if parsed.get("sender") == "customer":
628
+ return True
629
+ except (json.JSONDecodeError, AttributeError, TypeError):
630
+ pass
631
+
632
+ return False
633
+
634
+ def categorize_gap_activity(
635
+ self,
636
+ prev_session: Series,
637
+ current_session: Series,
638
+ gap_duration: float,
639
+ ) -> dict[str, Any]:
640
+ """Categorize what happened during a gap.
641
+
642
+ Parameters
643
+ ----------
644
+ prev_session : Series
645
+ The previous session data.
646
+ current_session : Series
647
+ The current session data.
648
+ gap_duration : float
649
+ The duration of the gap in seconds.
650
+
651
+ Returns
652
+ -------
653
+ dict[str, Any]
654
+ A dictionary categorizing the gap activity.
655
+ """
656
+ # First check for human input waiting period
657
+ if self.is_human_input_waiting_period(
658
+ prev_session, current_session, gap_duration
659
+ ):
660
+ return {
661
+ "type": "human_input_waiting",
662
+ "label": "👤 Human Input",
663
+ "detail": f"Waiting for user ({gap_duration:.1f}s)",
664
+ }
665
+
666
+ # Check for function calls during gap
667
+ if self.functions_data is not None:
668
+ prev_end = self.parse_date(prev_session["end_time"])
669
+ current_start = self.parse_date(current_session["start_time"])
670
+
671
+ gap_functions = self.functions_data[
672
+ (pd.to_datetime(self.functions_data["timestamp"]) >= prev_end)
673
+ & (
674
+ pd.to_datetime(self.functions_data["timestamp"])
675
+ <= current_start
676
+ )
677
+ ]
678
+
679
+ if not gap_functions.empty:
680
+ primary_function = gap_functions.iloc[0]["function_name"]
681
+
682
+ if (
683
+ "transfer" in primary_function.lower()
684
+ or "switch" in primary_function.lower()
685
+ ):
686
+ detail = (
687
+ f"{primary_function} → {current_session['source_name']}"
688
+ )
689
+ return {
690
+ "type": "agent_transition",
691
+ "label": "🔄 Transfer",
692
+ "detail": detail,
693
+ }
694
+ else:
695
+ return {
696
+ "type": "tool_call",
697
+ "label": f"🛠️ {primary_function.replace('_', ' ')}",
698
+ "detail": "Tool execution",
699
+ }
700
+
701
+ # Check if agent changed
702
+ if prev_session["source_name"] != current_session["source_name"]:
703
+ detail = (
704
+ f"{prev_session['source_name']} → "
705
+ f"{current_session['source_name']}"
706
+ )
707
+ return {
708
+ "type": "agent_transition",
709
+ "label": "🔄 Agent Switch",
710
+ "detail": detail,
711
+ }
712
+
713
+ # For longer gaps without clear indicators,
714
+ # they might still be user input
715
+ # This provides a fallback for cases
716
+ # where the user input detection might miss
717
+ if gap_duration > 8.0: # Longer gaps are more likely to be user input
718
+ return {
719
+ "type": "human_input_waiting",
720
+ "label": "👤 Likely User Input",
721
+ "detail": f"Probable user input ({gap_duration:.1f}s)",
722
+ }
723
+
724
+ return {
725
+ "type": "processing",
726
+ "label": "⚙️ Processing",
727
+ "detail": f"Processing ({gap_duration:.1f}s)",
728
+ }
729
+
730
+ def compress_timeline(
731
+ self,
732
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]], float, float]:
733
+ """Create compressed timeline from chat data.
734
+
735
+ Processes chat data and generates a compressed timeline with gaps,
736
+ sessions, and cost information.
737
+
738
+ Returns
739
+ -------
740
+ tuple[list[dict[str, Any]], list[dict[str, Any]], float, float]
741
+ A tuple containing:
742
+ - Compressed timeline as a list of dictionaries.
743
+ - Cost timeline as a list of dictionaries.
744
+ - Total compressed time.
745
+ - Cumulative cost.
746
+
747
+ Raises
748
+ ------
749
+ ValueError
750
+ If chat data is not provided.
751
+ """
752
+ if self.chat_data is None:
753
+ raise ValueError("Chat data is required")
754
+
755
+ LOG.info("Starting timeline compression...")
756
+
757
+ # Sort by start time and calculate durations
758
+ chat_sorted = self.chat_data.copy()
759
+ chat_sorted["start_time"] = pd.to_datetime(chat_sorted["start_time"])
760
+ chat_sorted["end_time"] = pd.to_datetime(chat_sorted["end_time"])
761
+ chat_sorted = chat_sorted.sort_values("start_time")
762
+ chat_sorted["duration"] = (
763
+ chat_sorted["end_time"] - chat_sorted["start_time"]
764
+ ).dt.total_seconds()
765
+
766
+ LOG.info(
767
+ "Sorted chat data by start time. Total sessions: %d",
768
+ len(chat_sorted),
769
+ )
770
+
771
+ timeline: list[dict[str, Any]] = []
772
+ cost_timeline: list[dict[str, Any]] = []
773
+ current_compressed_time = 0.0
774
+ cumulative_cost = 0.0
775
+ session_id = 1
776
+
777
+ for _i, (_idx, row) in enumerate(chat_sorted.iterrows()):
778
+ try:
779
+ # Get agent name and handle missing values
780
+ agent_name = row["source_name"]
781
+ if self.is_missing_or_nan(agent_name):
782
+ agent_name = "unknown_agent"
783
+
784
+ LOG.debug(
785
+ "Processing session %d: %s",
786
+ session_id,
787
+ agent_name,
788
+ )
789
+ start_compressed = current_compressed_time
790
+ gap_before = 0
791
+ gap_activity = None
792
+
793
+ if session_id > 1: # Not the first session
794
+ prev_row = chat_sorted.iloc[session_id - 2] # Previous row
795
+ gap_duration = (
796
+ row["start_time"] - prev_row["end_time"]
797
+ ).total_seconds()
798
+
799
+ gap_activity = self.categorize_gap_activity(
800
+ prev_row, row, gap_duration
801
+ )
802
+
803
+ # Determine compressed gap duration
804
+ if gap_activity["type"] == "human_input_waiting":
805
+ compressed_gap = 1.0
806
+ gap_before = gap_duration
807
+ elif gap_duration > 2.0 and gap_activity["type"] in [
808
+ "processing",
809
+ "user_thinking",
810
+ ]:
811
+ compressed_gap = 2.0
812
+ gap_before = gap_duration
813
+ else:
814
+ compressed_gap = gap_duration
815
+ gap_before = gap_duration
816
+
817
+ # Add gap to timeline if significant
818
+ if gap_before > 0.1:
819
+ gap_start = current_compressed_time
820
+ gap_end = gap_start + compressed_gap
821
+
822
+ timeline.append(
823
+ {
824
+ "id": f"gap_{session_id - 1}",
825
+ "type": "gap",
826
+ "gap_type": gap_activity["type"],
827
+ "start": gap_start,
828
+ "end": gap_end,
829
+ "duration": compressed_gap,
830
+ "value": compressed_gap,
831
+ "real_duration": gap_before,
832
+ "compressed": gap_activity["type"]
833
+ == "human_input_waiting"
834
+ or (
835
+ gap_duration > 2.0
836
+ and gap_activity["type"]
837
+ in ["processing", "user_thinking"]
838
+ ),
839
+ "color": ACTIVITY_COLORS.get(
840
+ gap_activity["type"],
841
+ ACTIVITY_COLORS["processing"],
842
+ ),
843
+ "label": (
844
+ gap_activity["label"]
845
+ + f" ({gap_before:.1f}s)"
846
+ if gap_before != compressed_gap
847
+ else gap_activity["label"]
848
+ ),
849
+ "y_position": session_id - 0.5,
850
+ }
851
+ )
852
+
853
+ current_compressed_time += compressed_gap
854
+ start_compressed = current_compressed_time
855
+
856
+ end_compressed = start_compressed + row["duration"]
857
+
858
+ # Extract token info with error handling
859
+ try:
860
+ token_info = self.extract_token_info(
861
+ row.get("request", ""), row.get("response", "")
862
+ )
863
+ except Exception as e:
864
+ LOG.error(
865
+ "Error extracting token info for session %s: %s",
866
+ session_id,
867
+ e,
868
+ )
869
+ token_info = {
870
+ "prompt_tokens": 0,
871
+ "completion_tokens": 0,
872
+ "total_tokens": 0,
873
+ }
874
+
875
+ # Add session to timeline
876
+ timeline.append(
877
+ {
878
+ "id": f"session_{session_id}",
879
+ "type": "session",
880
+ "start": start_compressed,
881
+ "end": end_compressed,
882
+ "duration": row["duration"],
883
+ "value": row["duration"],
884
+ "agent": agent_name,
885
+ # Will be updated if agents data available
886
+ "agent_class": agent_name,
887
+ "cost": row.get("cost", 0),
888
+ "tokens": token_info["total_tokens"],
889
+ "prompt_tokens": token_info["prompt_tokens"],
890
+ "completion_tokens": token_info["completion_tokens"],
891
+ "events": 1, # Placeholder
892
+ "color": DEFAULT_AGENT_COLOR, # Will be updated later
893
+ "label": f"S{session_id}: {agent_name}",
894
+ "is_cached": bool(row.get("is_cached", False)),
895
+ "y_position": session_id,
896
+ "llm_model": self.extract_llm_model(
897
+ agent_name, row.get("request", "")
898
+ ),
899
+ "session_id": row.get(
900
+ "session_id", f"session_{session_id}"
901
+ ),
902
+ "real_start_time": row["start_time"].strftime(
903
+ "%H:%M:%S"
904
+ ),
905
+ "request": row.get("request", ""),
906
+ "response": row.get("response", ""),
907
+ }
908
+ )
909
+
910
+ # Add to cost timeline
911
+ cumulative_cost += row.get("cost", 0)
912
+ cost_timeline.append(
913
+ {
914
+ "time": start_compressed + row["duration"] / 2,
915
+ "cumulative_cost": cumulative_cost,
916
+ "session_cost": row.get("cost", 0),
917
+ "session_id": session_id,
918
+ }
919
+ )
920
+
921
+ current_compressed_time = end_compressed
922
+ session_id += 1
923
+
924
+ except Exception as e:
925
+ LOG.error(
926
+ "Error processing session %d: %s",
927
+ session_id,
928
+ e,
929
+ )
930
+ LOG.error("Row data: %s", dict(row))
931
+ raise
932
+
933
+ # Finalize timeline
934
+ if not timeline:
935
+ LOG.warning("No valid sessions found in chat data.")
936
+ return [], [], 0.0, 0.0
937
+ LOG.info(
938
+ "Timeline compression complete. Generated %d items.",
939
+ len(timeline),
940
+ )
941
+ return timeline, cost_timeline, current_compressed_time, cumulative_cost
942
+
943
+ def process_timeline(self) -> dict[str, Any]:
944
+ """Timeline processing function.
945
+
946
+ Processes chat data and generates a timeline with summary statistics.
947
+
948
+ Returns
949
+ -------
950
+ dict
951
+ A dictionary containing the processed timeline, cost timeline,
952
+ """
953
+ if self.chat_data is None:
954
+ raise ValueError("Chat data is required for processing")
955
+
956
+ timeline, cost_timeline, total_time, total_cost = (
957
+ self.compress_timeline()
958
+ )
959
+
960
+ # Get unique agents and assign colors
961
+ # (filter out any remaining NaN values)
962
+ agents_in_timeline = list(
963
+ {
964
+ item["agent"]
965
+ for item in timeline
966
+ if item["type"] == "session"
967
+ and not self.is_missing_or_nan(item["agent"])
968
+ }
969
+ )
970
+ agent_colors = self.generate_agent_colors(agents_in_timeline)
971
+
972
+ # Update timeline with colors and agent classes
973
+ for item in timeline:
974
+ if item["type"] == "session":
975
+ agent_name = item["agent"]
976
+ if self.is_missing_or_nan(agent_name):
977
+ agent_name = "unknown_agent"
978
+ item["agent"] = agent_name
979
+
980
+ item["color"] = agent_colors.get(
981
+ agent_name, DEFAULT_AGENT_COLOR
982
+ )
983
+
984
+ # Update agent class if agents data available
985
+ if self.agents_data is not None:
986
+ agent_row = self.agents_data[
987
+ self.agents_data["name"] == agent_name
988
+ ]
989
+ if not agent_row.empty and "class" in agent_row.columns:
990
+ agent_class = agent_row.iloc[0]["class"]
991
+ if not self.is_missing_or_nan(agent_class):
992
+ item["agent_class"] = agent_class
993
+
994
+ # Create agents list
995
+ agents: list[dict[str, Any]] = []
996
+ for agent_name in agents_in_timeline:
997
+ if self.is_missing_or_nan(agent_name):
998
+ continue
999
+
1000
+ agent_class = agent_name # Default
1001
+ if self.agents_data is not None:
1002
+ agent_row = self.agents_data[
1003
+ self.agents_data["name"] == agent_name
1004
+ ]
1005
+ if not agent_row.empty and "class" in agent_row.columns:
1006
+ agent_class_value = agent_row.iloc[0]["class"]
1007
+ if not self.is_missing_or_nan(agent_class_value):
1008
+ agent_class = agent_class_value
1009
+
1010
+ agents.append(
1011
+ {
1012
+ "name": agent_name,
1013
+ "class": agent_class,
1014
+ "color": agent_colors.get(agent_name, DEFAULT_AGENT_COLOR),
1015
+ }
1016
+ )
1017
+
1018
+ # Calculate summary statistics
1019
+ sessions = [item for item in timeline if item["type"] == "session"]
1020
+ gaps = [item for item in timeline if item["type"] == "gap"]
1021
+
1022
+ total_tokens = sum(session["tokens"] for session in sessions)
1023
+ gaps_compressed = sum(1 for gap in gaps if gap["compressed"])
1024
+ time_saved = sum(
1025
+ gap["real_duration"] - gap["duration"]
1026
+ for gap in gaps
1027
+ if gap["compressed"]
1028
+ )
1029
+
1030
+ # Get model statistics
1031
+ model_stats = {}
1032
+ for session in sessions:
1033
+ model = session.get("llm_model", "Unknown")
1034
+ if model not in model_stats:
1035
+ model_stats[model] = {"count": 0, "tokens": 0, "cost": 0}
1036
+ model_stats[model]["count"] += 1
1037
+ model_stats[model]["tokens"] += session.get("tokens", 0)
1038
+ model_stats[model]["cost"] += session.get("cost", 0)
1039
+
1040
+ summary = {
1041
+ "total_sessions": len(sessions),
1042
+ "total_time": total_time,
1043
+ "total_cost": total_cost,
1044
+ "total_agents": len(agents_in_timeline),
1045
+ "total_events": sum(session["events"] for session in sessions),
1046
+ "total_tokens": total_tokens,
1047
+ "avg_cost_per_session": (
1048
+ total_cost / len(sessions) if sessions else 0
1049
+ ),
1050
+ "compression_info": {
1051
+ "gaps_compressed": gaps_compressed,
1052
+ "time_saved": time_saved,
1053
+ },
1054
+ "model_stats": model_stats,
1055
+ }
1056
+
1057
+ # Create metadata
1058
+ max_time = max([item["end"] for item in timeline]) if timeline else 0
1059
+ max_cost = (
1060
+ max([point["cumulative_cost"] for point in cost_timeline])
1061
+ if cost_timeline
1062
+ else 0
1063
+ )
1064
+
1065
+ metadata = {
1066
+ "time_range": [0, max_time * 1.1],
1067
+ "cost_range": [0, max_cost * 1.1],
1068
+ "colors": {
1069
+ "human_input": ACTIVITY_COLORS["human_input_waiting"],
1070
+ "processing": ACTIVITY_COLORS["processing"],
1071
+ "agent_transition": ACTIVITY_COLORS["agent_transition"],
1072
+ "cost_line": "#E91E63",
1073
+ },
1074
+ }
1075
+
1076
+ return {
1077
+ "timeline": timeline,
1078
+ "cost_timeline": cost_timeline,
1079
+ "summary": summary,
1080
+ "metadata": metadata,
1081
+ "agents": agents,
1082
+ }
1083
+
1084
+ @staticmethod
1085
+ def get_short_results(results: dict[str, Any]) -> dict[str, Any]:
1086
+ """Remove request/response from the timeline entries.
1087
+
1088
+ Parameters
1089
+ ----------
1090
+ results : dict[str, Any]
1091
+ The original results dictionary.
1092
+
1093
+ Returns
1094
+ -------
1095
+ dict[str, Any]
1096
+ The modified results dictionary with shortened timeline.
1097
+ """
1098
+ new_results = results.copy()
1099
+ new_results["timeline"] = []
1100
+ for item in results["timeline"]:
1101
+ new_item = item.copy()
1102
+ # Remove request and response fields
1103
+ new_item.pop("request", None)
1104
+ new_item.pop("response", None)
1105
+ new_results["timeline"].append(new_item)
1106
+ return new_results
1107
+
1108
+ @staticmethod
1109
+ def get_files(logs_dir: Path | str) -> dict[str, str | None]:
1110
+ """Get all CSV files in the specified directory.
1111
+
1112
+ Parameters
1113
+ ----------
1114
+ logs_dir : Path | str
1115
+ The directory to search for CSV files.
1116
+
1117
+ Returns
1118
+ -------
1119
+ dict[str, str | None]
1120
+ A dictionary mapping CSV file names to their paths
1121
+ or None if not found.
1122
+ """
1123
+ agents_file = os.path.join(logs_dir, "agents.csv")
1124
+ chat_file = os.path.join(logs_dir, "chat_completions.csv")
1125
+ events_file = os.path.join(logs_dir, "events.csv")
1126
+ functions_file = os.path.join(logs_dir, "function_calls.csv")
1127
+
1128
+ return {
1129
+ "agents": agents_file if os.path.exists(agents_file) else None,
1130
+ "chat": chat_file if os.path.exists(chat_file) else None,
1131
+ "events": events_file if os.path.exists(events_file) else None,
1132
+ "functions": (
1133
+ functions_file if os.path.exists(functions_file) else None
1134
+ ),
1135
+ }
1136
+
1137
+
1138
+ def recursive_search(obj: Any, keys_to_find: list[str]) -> str:
1139
+ """Recursively search for keys in a nested structure.
1140
+
1141
+ Parameters
1142
+ ----------
1143
+ obj : Any
1144
+ The object to search within.
1145
+ keys_to_find : list[str]
1146
+ The keys to search for.
1147
+
1148
+ Returns
1149
+ -------
1150
+ str
1151
+ The found value or "Unknown" if not found.
1152
+ """
1153
+ if isinstance(obj, dict):
1154
+ for key in keys_to_find:
1155
+ if key in obj and isinstance(obj[key], str) and obj[key].strip():
1156
+ return obj[key]
1157
+ for value in obj.values():
1158
+ result = recursive_search(value, keys_to_find)
1159
+ if result != "Unknown":
1160
+ return result
1161
+ elif isinstance(obj, list):
1162
+ for item in obj:
1163
+ result = recursive_search(item, keys_to_find)
1164
+ if result != "Unknown":
1165
+ return result
1166
+ return "Unknown"