workspaces-euc-mcp-server 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,799 @@
1
+ # Copyright bengroeneveldsg. Licensed under the Apache License, Version 2.0 (the "License").
2
+ # You may not use this file except in compliance with the License.
3
+ # A copy of the License is located at http://www.apache.org/licenses/LICENSE-2.0
4
+ """Troubleshooting & triage tools (read-only, IAM Tier 0).
5
+
6
+ Each tool correlates several AWS signals (resource state, directory health, CloudWatch telemetry,
7
+ auto-scaling activity) into a single synthesized diagnosis with severity-ranked findings and
8
+ recommendations — rather than returning raw API output. All collection is best-effort: a failing
9
+ signal is recorded and the diagnosis proceeds with what it could gather.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import re
15
+ from datetime import UTC, datetime, timedelta
16
+ from typing import Any
17
+
18
+ from .. import consts
19
+ from ..clients import ClientFactory
20
+ from ..models import Diagnosis, DirectoryHealthReport, Finding, ServiceError
21
+ from ._common import read_only, try_call
22
+
23
+ # AWS Directory Service directory IDs look like d-xxxxxxxxxx. WorkSpaces Pools and other
24
+ # WorkSpaces-managed directories use other prefixes (e.g. wsd-...) that are NOT backed by AWS
25
+ # Directory Service, so ds:DescribeDirectories rejects them.
26
+ _AWS_DS_DIRECTORY_ID = re.compile(r"^d-[0-9a-f]{10}$")
27
+
28
+ # WorkSpace states that indicate the desktop itself is broken.
29
+ _UNHEALTHY_WORKSPACE_STATES = {"ERROR", "UNHEALTHY", "IMPAIRED"}
30
+ # States where the desktop is intentionally not running (AutoStop) but recoverable.
31
+ _STOPPED_WORKSPACE_STATES = {"STOPPED", "SUSPENDED"}
32
+
33
+ _SEVERITY_RANK = {"info": 0, "warning": 1, "critical": 2}
34
+ _RANK_STATUS = {0: "healthy", 1: "degraded", 2: "unhealthy"}
35
+
36
+
37
+ def _overall_status(findings: list[Finding]) -> str:
38
+ if not findings:
39
+ return "unknown"
40
+ worst = max(_SEVERITY_RANK.get(f.severity, 0) for f in findings)
41
+ return _RANK_STATUS[worst]
42
+
43
+
44
+ def _metric_stat(
45
+ cloudwatch: Any,
46
+ namespace: str,
47
+ metric_name: str,
48
+ dimensions: dict[str, str],
49
+ lookback_hours: int,
50
+ stat: str = "Sum",
51
+ ) -> float | None:
52
+ """Fetch a single aggregated CloudWatch metric value over the lookback window."""
53
+ end = datetime.now(UTC)
54
+ start = end - timedelta(hours=lookback_hours)
55
+ response = cloudwatch.get_metric_data(
56
+ MetricDataQueries=[
57
+ {
58
+ "Id": "m1",
59
+ "MetricStat": {
60
+ "Metric": {
61
+ "Namespace": namespace,
62
+ "MetricName": metric_name,
63
+ "Dimensions": [{"Name": k, "Value": v} for k, v in dimensions.items()],
64
+ },
65
+ "Period": 3600,
66
+ "Stat": stat,
67
+ },
68
+ "ReturnData": True,
69
+ }
70
+ ],
71
+ StartTime=start,
72
+ EndTime=end,
73
+ )
74
+ values = response.get("MetricDataResults", [{}])[0].get("Values", [])
75
+ if not values:
76
+ return None
77
+ if stat == "Sum":
78
+ return float(sum(values))
79
+ if stat == "Maximum":
80
+ return float(max(values))
81
+ if stat == "Minimum":
82
+ return float(min(values))
83
+ return float(sum(values) / len(values))
84
+
85
+
86
+ # --------------------------------------------------------------------------------------
87
+ # WorkSpaces Personal connectivity
88
+ # --------------------------------------------------------------------------------------
89
+
90
+
91
+ def diagnose_workspace_connectivity_core(
92
+ factory: ClientFactory,
93
+ workspace_id: str,
94
+ region: str | None,
95
+ lookback_hours: int = 24,
96
+ ) -> Diagnosis:
97
+ errors: list[ServiceError] = []
98
+ findings: list[Finding] = []
99
+ signals: dict[str, object] = {}
100
+
101
+ workspaces = factory.client(consts.WORKSPACES_API, region=region)
102
+
103
+ described = try_call(
104
+ errors,
105
+ consts.PRODUCT_WORKSPACES_PERSONAL,
106
+ "DescribeWorkspaces",
107
+ lambda: workspaces.describe_workspaces(WorkspaceIds=[workspace_id]),
108
+ default={},
109
+ )
110
+ items = (described or {}).get("Workspaces", [])
111
+ if not items:
112
+ return Diagnosis(
113
+ target_type=consts.PRODUCT_WORKSPACES_PERSONAL,
114
+ target_id=workspace_id,
115
+ region=region,
116
+ status="not_found" if not errors else "unknown",
117
+ summary=f"WorkSpace {workspace_id} was not found in {region or 'the region'}."
118
+ if not errors
119
+ else f"Could not retrieve WorkSpace {workspace_id}.",
120
+ findings=findings,
121
+ errors=errors,
122
+ )
123
+
124
+ ws = items[0]
125
+ state = ws.get("State", "UNKNOWN")
126
+ directory_id = ws.get("DirectoryId")
127
+ signals["state"] = state
128
+ signals["user_name"] = ws.get("UserName")
129
+ signals["computer_name"] = ws.get("ComputerName")
130
+ signals["directory_id"] = directory_id
131
+ signals["compute_type"] = ws.get("WorkspaceProperties", {}).get("ComputeTypeName")
132
+ signals["running_mode"] = ws.get("WorkspaceProperties", {}).get("RunningMode")
133
+
134
+ if state in _UNHEALTHY_WORKSPACE_STATES:
135
+ findings.append(
136
+ Finding(
137
+ severity="critical",
138
+ title=f"WorkSpace is in {state} state",
139
+ detail=f"The desktop reports {state}, so connections will fail.",
140
+ recommendation="Reboot the WorkSpace; if it persists, rebuild or restore it.",
141
+ )
142
+ )
143
+ elif state in _STOPPED_WORKSPACE_STATES:
144
+ findings.append(
145
+ Finding(
146
+ severity="warning",
147
+ title=f"WorkSpace is {state}",
148
+ detail="An AutoStop WorkSpace is powered off and starts on connect; a failed start "
149
+ "would present as an inability to connect.",
150
+ recommendation="Confirm it resumes on connect; check start failures otherwise.",
151
+ )
152
+ )
153
+ elif state == "AVAILABLE":
154
+ findings.append(
155
+ Finding(
156
+ severity="info",
157
+ title="WorkSpace state is AVAILABLE",
158
+ detail="The desktop itself is healthy and reachable.",
159
+ )
160
+ )
161
+ else:
162
+ findings.append(
163
+ Finding(
164
+ severity="info",
165
+ title=f"WorkSpace is in transitional state {state}",
166
+ detail="The desktop is mid-transition; retry once it reaches AVAILABLE.",
167
+ )
168
+ )
169
+
170
+ conn = try_call(
171
+ errors,
172
+ consts.PRODUCT_WORKSPACES_PERSONAL,
173
+ "DescribeWorkspacesConnectionStatus",
174
+ lambda: workspaces.describe_workspaces_connection_status(WorkspaceIds=[workspace_id]),
175
+ default={},
176
+ )
177
+ conn_items = (conn or {}).get("WorkspacesConnectionStatus", [])
178
+ if conn_items:
179
+ conn_state = conn_items[0].get("ConnectionState", "UNKNOWN")
180
+ signals["connection_state"] = conn_state
181
+ signals["last_known_user_connection"] = str(
182
+ conn_items[0].get("LastKnownUserConnectionTimestamp", "")
183
+ )
184
+ if conn_state == "CONNECTED":
185
+ findings.append(
186
+ Finding(
187
+ severity="info",
188
+ title="A user is currently connected",
189
+ detail="The WorkSpace shows an active connection right now.",
190
+ )
191
+ )
192
+
193
+ if directory_id:
194
+ _diagnose_directory_into(
195
+ factory,
196
+ region,
197
+ directory_id,
198
+ findings,
199
+ errors,
200
+ signals_prefix="directory_",
201
+ signals=signals,
202
+ )
203
+
204
+ cloudwatch = factory.client(consts.CLOUDWATCH_API, region=region)
205
+ failures = try_call(
206
+ errors,
207
+ "Amazon CloudWatch",
208
+ "GetMetricData",
209
+ lambda: _metric_stat(
210
+ cloudwatch,
211
+ "AWS/WorkSpaces",
212
+ "ConnectionFailure",
213
+ {"WorkspaceId": workspace_id},
214
+ lookback_hours,
215
+ ),
216
+ )
217
+ attempts = try_call(
218
+ errors,
219
+ "Amazon CloudWatch",
220
+ "GetMetricData",
221
+ lambda: _metric_stat(
222
+ cloudwatch,
223
+ "AWS/WorkSpaces",
224
+ "ConnectionAttempt",
225
+ {"WorkspaceId": workspace_id},
226
+ lookback_hours,
227
+ ),
228
+ )
229
+ if failures is not None:
230
+ signals["connection_failures"] = failures
231
+ signals["connection_attempts"] = attempts
232
+ if failures > 0:
233
+ ratio = f" ({failures:.0f}/{attempts:.0f} attempts)" if attempts else ""
234
+ findings.append(
235
+ Finding(
236
+ severity="warning",
237
+ title=f"{failures:.0f} connection failures in {lookback_hours}h{ratio}",
238
+ detail="Repeated connection failures suggest a client, network, or directory "
239
+ "problem rather than the desktop state.",
240
+ recommendation="Check the client/network path and directory health below.",
241
+ )
242
+ )
243
+
244
+ status = _overall_status(findings)
245
+ return Diagnosis(
246
+ target_type=consts.PRODUCT_WORKSPACES_PERSONAL,
247
+ target_id=workspace_id,
248
+ region=region,
249
+ status=status,
250
+ summary=_summarize(status, f"WorkSpace {workspace_id}"),
251
+ signals=signals,
252
+ findings=findings,
253
+ errors=errors,
254
+ )
255
+
256
+
257
+ # --------------------------------------------------------------------------------------
258
+ # Directory health (shared dependency + standalone tool)
259
+ # --------------------------------------------------------------------------------------
260
+
261
+
262
+ def _diagnose_directory_into(
263
+ factory: ClientFactory,
264
+ region: str | None,
265
+ directory_id: str,
266
+ findings: list[Finding],
267
+ errors: list[ServiceError],
268
+ signals_prefix: str = "",
269
+ signals: dict[str, object] | None = None,
270
+ ) -> None:
271
+ """Append directory-health findings (registration + Directory Service stage)."""
272
+ workspaces = factory.client(consts.WORKSPACES_API, region=region)
273
+ reg = try_call(
274
+ errors,
275
+ consts.PRODUCT_WORKSPACES_PERSONAL,
276
+ "DescribeWorkspaceDirectories",
277
+ lambda: workspaces.describe_workspace_directories(DirectoryIds=[directory_id]),
278
+ default={},
279
+ )
280
+ dirs = (reg or {}).get("Directories", [])
281
+ if dirs:
282
+ reg_state = dirs[0].get("State", "UNKNOWN")
283
+ if signals is not None:
284
+ signals[f"{signals_prefix}registration_state"] = reg_state
285
+ if reg_state != "REGISTERED":
286
+ findings.append(
287
+ Finding(
288
+ severity="critical",
289
+ title=f"Directory {directory_id} is {reg_state}, not REGISTERED",
290
+ detail="WorkSpaces cannot broker connections through a directory that is not "
291
+ "registered.",
292
+ recommendation="Re-register the directory with WorkSpaces.",
293
+ )
294
+ )
295
+
296
+ if not _AWS_DS_DIRECTORY_ID.match(directory_id):
297
+ # WorkSpaces-managed (e.g. Pools) directory — no AWS Directory Service stage to check.
298
+ if signals is not None:
299
+ signals[f"{signals_prefix}stage"] = "N/A (WorkSpaces-managed)"
300
+ findings.append(
301
+ Finding(
302
+ severity="info",
303
+ title=f"Directory {directory_id} is WorkSpaces-managed",
304
+ detail="This directory is not backed by AWS Directory Service, so there is no "
305
+ "Directory Service stage to evaluate; registration state is used instead.",
306
+ )
307
+ )
308
+ return
309
+
310
+ stage_resp = try_call(
311
+ errors,
312
+ "AWS Directory Service",
313
+ "DescribeDirectories",
314
+ lambda: factory.client(consts.DIRECTORY_API, region=region).describe_directories(
315
+ DirectoryIds=[directory_id]
316
+ ),
317
+ default={},
318
+ )
319
+ desc = (stage_resp or {}).get("DirectoryDescriptions", [])
320
+ if desc:
321
+ stage = desc[0].get("Stage", "Unknown")
322
+ if signals is not None:
323
+ signals[f"{signals_prefix}stage"] = stage
324
+ if stage != "Active":
325
+ findings.append(
326
+ Finding(
327
+ severity="critical",
328
+ title=f"Directory {directory_id} stage is {stage}",
329
+ detail=f"AWS Directory Service reports the directory as {stage}; this blocks "
330
+ "authentication and connections.",
331
+ recommendation="Investigate the directory in AWS Directory Service "
332
+ "(DNS, domain controllers, networking).",
333
+ )
334
+ )
335
+ else:
336
+ findings.append(
337
+ Finding(
338
+ severity="info",
339
+ title=f"Directory {directory_id} is Active",
340
+ detail="The backing directory is healthy.",
341
+ )
342
+ )
343
+
344
+
345
+ def check_directory_health_core(
346
+ factory: ClientFactory,
347
+ directory_id: str | None,
348
+ region: str | None,
349
+ ) -> DirectoryHealthReport:
350
+ errors: list[ServiceError] = []
351
+ workspaces = factory.client(consts.WORKSPACES_API, region=region)
352
+
353
+ if directory_id:
354
+ directory_ids = [directory_id]
355
+ else:
356
+ listed = try_call(
357
+ errors,
358
+ consts.PRODUCT_WORKSPACES_PERSONAL,
359
+ "DescribeWorkspaceDirectories",
360
+ lambda: workspaces.describe_workspace_directories(),
361
+ default={},
362
+ )
363
+ directory_ids = [d.get("DirectoryId") for d in (listed or {}).get("Directories", [])]
364
+ directory_ids = [d for d in directory_ids if d]
365
+
366
+ diagnoses: list[Diagnosis] = []
367
+ for did in directory_ids:
368
+ findings: list[Finding] = []
369
+ signals: dict[str, object] = {}
370
+ dir_errors: list[ServiceError] = []
371
+ _diagnose_directory_into(factory, region, did, findings, dir_errors, signals=signals)
372
+ status = _overall_status(findings)
373
+ diagnoses.append(
374
+ Diagnosis(
375
+ target_type="WorkSpaces directory",
376
+ target_id=did,
377
+ region=region,
378
+ status=status,
379
+ summary=_summarize(status, f"Directory {did}"),
380
+ signals=signals,
381
+ findings=findings,
382
+ errors=dir_errors,
383
+ )
384
+ )
385
+
386
+ return DirectoryHealthReport(region=region, directories=diagnoses, errors=errors)
387
+
388
+
389
+ # --------------------------------------------------------------------------------------
390
+ # WorkSpaces Applications fleet
391
+ # --------------------------------------------------------------------------------------
392
+
393
+
394
+ def diagnose_application_fleet_core(
395
+ factory: ClientFactory,
396
+ fleet_name: str,
397
+ region: str | None,
398
+ lookback_hours: int = 24,
399
+ ) -> Diagnosis:
400
+ errors: list[ServiceError] = []
401
+ findings: list[Finding] = []
402
+ signals: dict[str, object] = {}
403
+
404
+ appstream = factory.client(consts.APPSTREAM_API, region=region)
405
+ described = try_call(
406
+ errors,
407
+ consts.PRODUCT_WORKSPACES_APPLICATIONS,
408
+ "DescribeFleets",
409
+ lambda: appstream.describe_fleets(Names=[fleet_name]),
410
+ default={},
411
+ )
412
+ fleets = (described or {}).get("Fleets", [])
413
+ if not fleets:
414
+ return Diagnosis(
415
+ target_type=consts.PRODUCT_WORKSPACES_APPLICATIONS,
416
+ target_id=fleet_name,
417
+ region=region,
418
+ status="not_found" if not errors else "unknown",
419
+ summary=f"Fleet {fleet_name} was not found in {region or 'the region'}."
420
+ if not errors
421
+ else f"Could not retrieve fleet {fleet_name}.",
422
+ findings=findings,
423
+ errors=errors,
424
+ )
425
+
426
+ fleet = fleets[0]
427
+ state = fleet.get("State", "UNKNOWN")
428
+ signals["state"] = state
429
+
430
+ if state == "STOPPED":
431
+ findings.append(
432
+ Finding(
433
+ severity="warning",
434
+ title="Fleet is STOPPED",
435
+ detail="A stopped fleet serves no sessions.",
436
+ recommendation="Start the fleet if users need access.",
437
+ )
438
+ )
439
+ elif state in {"STARTING", "STOPPING"}:
440
+ findings.append(
441
+ Finding(
442
+ severity="info",
443
+ title=f"Fleet is {state}",
444
+ detail="The fleet is mid-transition.",
445
+ )
446
+ )
447
+ elif state == "RUNNING":
448
+ findings.append(
449
+ Finding(
450
+ severity="info",
451
+ title="Fleet is RUNNING",
452
+ detail="The fleet is active.",
453
+ )
454
+ )
455
+
456
+ for err in fleet.get("FleetErrors", []) or []:
457
+ findings.append(
458
+ Finding(
459
+ severity="critical",
460
+ title=f"Fleet error: {err.get('ErrorCode', 'Unknown')}",
461
+ detail=err.get("ErrorMessage", "No message provided."),
462
+ recommendation="Resolve the underlying error (IAM role, image, or networking).",
463
+ )
464
+ )
465
+
466
+ capacity = fleet.get("ComputeCapacityStatus", {})
467
+ desired = capacity.get("Desired")
468
+ running = capacity.get("Running")
469
+ in_use = capacity.get("InUse")
470
+ available = capacity.get("Available")
471
+ signals["capacity"] = {
472
+ "desired": desired,
473
+ "running": running,
474
+ "in_use": in_use,
475
+ "available": available,
476
+ }
477
+ if available == 0 and running and in_use is not None and in_use >= running:
478
+ findings.append(
479
+ Finding(
480
+ severity="critical",
481
+ title="Fleet capacity is exhausted",
482
+ detail=f"All {running} running instances are in use (0 available); new sessions "
483
+ "will be rejected.",
484
+ recommendation="Raise desired capacity or enable/extend auto scaling.",
485
+ )
486
+ )
487
+ elif desired is not None and running is not None and running < desired:
488
+ findings.append(
489
+ Finding(
490
+ severity="warning",
491
+ title=f"Fleet is below desired capacity ({running}/{desired})",
492
+ detail="Fewer instances are running than desired; users may queue while it scales.",
493
+ recommendation="Check scaling activity and instance launch errors.",
494
+ )
495
+ )
496
+
497
+ scaling = try_call(
498
+ errors,
499
+ "Application Auto Scaling",
500
+ "DescribeScalingActivities",
501
+ lambda: factory.client(
502
+ "application-autoscaling", region=region
503
+ ).describe_scaling_activities(
504
+ ServiceNamespace="appstream", ResourceId=f"fleet/{fleet_name}"
505
+ ),
506
+ default={},
507
+ )
508
+ recent = (scaling or {}).get("ScalingActivities", [])
509
+ failed_scaling = [a for a in recent if a.get("StatusCode") not in (None, "Successful")]
510
+ if failed_scaling:
511
+ signals["failed_scaling_activities"] = len(failed_scaling)
512
+ latest = failed_scaling[0]
513
+ findings.append(
514
+ Finding(
515
+ severity="warning",
516
+ title=f"{len(failed_scaling)} recent scaling activities did not succeed",
517
+ detail=f"Most recent: {latest.get('StatusCode')} — "
518
+ f"{latest.get('StatusMessage', 'no message')}.",
519
+ recommendation="Review service limits and the fleet's scaling policy.",
520
+ )
521
+ )
522
+
523
+ cloudwatch = factory.client(consts.CLOUDWATCH_API, region=region)
524
+ insufficient = try_call(
525
+ errors,
526
+ "Amazon CloudWatch",
527
+ "GetMetricData",
528
+ lambda: _metric_stat(
529
+ cloudwatch,
530
+ "AWS/AppStream",
531
+ "InsufficientCapacityError",
532
+ {"Fleet": fleet_name},
533
+ lookback_hours,
534
+ ),
535
+ )
536
+ if insufficient:
537
+ signals["insufficient_capacity_errors"] = insufficient
538
+ findings.append(
539
+ Finding(
540
+ severity="critical",
541
+ title=f"{insufficient:.0f} insufficient-capacity errors in {lookback_hours}h",
542
+ detail="Users were denied sessions because no capacity was available.",
543
+ recommendation="Increase capacity or auto-scaling headroom.",
544
+ )
545
+ )
546
+
547
+ status = _overall_status(findings)
548
+ return Diagnosis(
549
+ target_type=consts.PRODUCT_WORKSPACES_APPLICATIONS,
550
+ target_id=fleet_name,
551
+ region=region,
552
+ status=status,
553
+ summary=_summarize(status, f"Fleet {fleet_name}"),
554
+ signals=signals,
555
+ findings=findings,
556
+ errors=errors,
557
+ )
558
+
559
+
560
+ def _summarize(status: str, subject: str) -> str:
561
+ return {
562
+ "healthy": f"{subject} looks healthy.",
563
+ "degraded": f"{subject} is degraded — see findings.",
564
+ "unhealthy": f"{subject} is unhealthy — see critical findings.",
565
+ "unknown": f"{subject} could not be fully assessed.",
566
+ "not_found": f"{subject} was not found.",
567
+ }.get(status, f"{subject}: {status}.")
568
+
569
+
570
+ # --------------------------------------------------------------------------------------
571
+ # WorkSpaces Pools
572
+ # --------------------------------------------------------------------------------------
573
+
574
+
575
+ def diagnose_pool_core(
576
+ factory: ClientFactory,
577
+ pool_id: str,
578
+ region: str | None,
579
+ lookback_hours: int = 24,
580
+ ) -> Diagnosis:
581
+ errors: list[ServiceError] = []
582
+ findings: list[Finding] = []
583
+ signals: dict[str, object] = {}
584
+
585
+ workspaces = factory.client(consts.WORKSPACES_API, region=region)
586
+ described = try_call(
587
+ errors,
588
+ consts.PRODUCT_WORKSPACES_POOLS,
589
+ "DescribeWorkspacesPools",
590
+ lambda: workspaces.describe_workspaces_pools(PoolIds=[pool_id]),
591
+ default={},
592
+ )
593
+ pools = (described or {}).get("WorkspacesPools", [])
594
+ if not pools:
595
+ return Diagnosis(
596
+ target_type=consts.PRODUCT_WORKSPACES_POOLS,
597
+ target_id=pool_id,
598
+ region=region,
599
+ status="not_found" if not errors else "unknown",
600
+ summary=f"Pool {pool_id} was not found in {region or 'the region'}."
601
+ if not errors
602
+ else f"Could not retrieve pool {pool_id}.",
603
+ findings=findings,
604
+ errors=errors,
605
+ )
606
+
607
+ pool = pools[0]
608
+ state = pool.get("State", "UNKNOWN")
609
+ directory_id = pool.get("DirectoryId")
610
+ signals["state"] = state
611
+ signals["running_mode"] = pool.get("RunningMode")
612
+ signals["directory_id"] = directory_id
613
+
614
+ if state == "STOPPED":
615
+ findings.append(
616
+ Finding(
617
+ severity="warning",
618
+ title="Pool is STOPPED",
619
+ detail="A stopped pool serves no sessions.",
620
+ recommendation="Start the pool if users need access.",
621
+ )
622
+ )
623
+ elif state in {"STARTING", "STOPPING", "UPDATING"}:
624
+ findings.append(
625
+ Finding(severity="info", title=f"Pool is {state}", detail="The pool is mid-transition.")
626
+ )
627
+ elif state == "RUNNING":
628
+ findings.append(
629
+ Finding(severity="info", title="Pool is RUNNING", detail="The pool is active.")
630
+ )
631
+
632
+ for err in pool.get("Errors", []) or []:
633
+ findings.append(
634
+ Finding(
635
+ severity="critical",
636
+ title=f"Pool error: {err.get('ErrorCode', 'Unknown')}",
637
+ detail=err.get("ErrorMessage", "No message provided."),
638
+ recommendation="Resolve the underlying error (directory, networking, or bundle).",
639
+ )
640
+ )
641
+
642
+ cap = pool.get("CapacityStatus", {})
643
+ desired = cap.get("DesiredUserSessions")
644
+ actual = cap.get("ActualUserSessions")
645
+ active = cap.get("ActiveUserSessions")
646
+ available = cap.get("AvailableUserSessions")
647
+ signals["capacity"] = {
648
+ "desired": desired,
649
+ "actual": actual,
650
+ "active": active,
651
+ "available": available,
652
+ }
653
+ if available == 0 and active and actual is not None and active >= actual:
654
+ findings.append(
655
+ Finding(
656
+ severity="critical",
657
+ title="Pool session capacity is exhausted",
658
+ detail=f"All {actual} session slots are in use (0 available); new sessions will "
659
+ "be rejected.",
660
+ recommendation="Raise desired capacity or enable/extend auto scaling.",
661
+ )
662
+ )
663
+ elif desired is not None and actual is not None and actual < desired:
664
+ findings.append(
665
+ Finding(
666
+ severity="warning",
667
+ title=f"Pool is below desired capacity ({actual}/{desired})",
668
+ detail="Fewer session slots are available than desired; users may queue while it "
669
+ "scales.",
670
+ recommendation="Check directory health and scaling activity.",
671
+ )
672
+ )
673
+
674
+ if directory_id:
675
+ _diagnose_directory_into(
676
+ factory,
677
+ region,
678
+ directory_id,
679
+ findings,
680
+ errors,
681
+ signals_prefix="directory_",
682
+ signals=signals,
683
+ )
684
+
685
+ cloudwatch = factory.client(consts.CLOUDWATCH_API, region=region)
686
+ util = try_call(
687
+ errors,
688
+ "Amazon CloudWatch",
689
+ "GetMetricData",
690
+ lambda: _metric_stat(
691
+ cloudwatch,
692
+ "AWS/WorkSpaces",
693
+ "UserSessionsCapacityUtilization",
694
+ {consts.WORKSPACES_POOL_DIMENSION: pool_id},
695
+ lookback_hours,
696
+ stat="Maximum",
697
+ ),
698
+ )
699
+ if util is not None:
700
+ signals["peak_utilization_percent"] = util
701
+
702
+ status = _overall_status(findings)
703
+ return Diagnosis(
704
+ target_type=consts.PRODUCT_WORKSPACES_POOLS,
705
+ target_id=pool_id,
706
+ region=region,
707
+ status=status,
708
+ summary=_summarize(status, f"Pool {pool_id}"),
709
+ signals=signals,
710
+ findings=findings,
711
+ errors=errors,
712
+ )
713
+
714
+
715
+ # --------------------------------------------------------------------------------------
716
+ # Registration
717
+ # --------------------------------------------------------------------------------------
718
+
719
+
720
+ def register(mcp: Any, factory: ClientFactory) -> None:
721
+ """Register diagnostics tools on the FastMCP app."""
722
+
723
+ async def diagnose_workspace_connectivity(
724
+ workspace_id: str, region: str | None = None, lookback_hours: int = 24
725
+ ) -> dict[str, Any]:
726
+ """Diagnose why a WorkSpaces Personal desktop may be unreachable.
727
+
728
+ Correlates the WorkSpace state, live connection status, backing directory health, and
729
+ recent CloudWatch connection metrics into a single verdict with severity-ranked findings
730
+ and recommendations. Read-only.
731
+
732
+ Args:
733
+ workspace_id: The WorkSpace ID (e.g. ws-xxxxxxxxx).
734
+ region: AWS region. Defaults to the server's configured region.
735
+ lookback_hours: Window for CloudWatch connection metrics (default 24).
736
+ """
737
+ diag = diagnose_workspace_connectivity_core(
738
+ factory, workspace_id, region or factory.region, lookback_hours
739
+ )
740
+ return diag.model_dump()
741
+
742
+ async def diagnose_application_fleet(
743
+ fleet_name: str, region: str | None = None, lookback_hours: int = 24
744
+ ) -> dict[str, Any]:
745
+ """Diagnose a WorkSpaces Applications (formerly AppStream 2.0) fleet's health and capacity.
746
+
747
+ Use this for any "AppStream" fleet request — WorkSpaces Applications is the rebranded
748
+ AppStream 2.0 (same service/API). Correlates fleet state, fleet errors, compute capacity,
749
+ auto-scaling activity, and recent insufficient-capacity CloudWatch errors into a single
750
+ verdict. Read-only.
751
+
752
+ Args:
753
+ fleet_name: The fleet name.
754
+ region: AWS region. Defaults to the server's configured region.
755
+ lookback_hours: Window for CloudWatch capacity metrics (default 24).
756
+ """
757
+ diag = diagnose_application_fleet_core(
758
+ factory, fleet_name, region or factory.region, lookback_hours
759
+ )
760
+ return diag.model_dump()
761
+
762
+ async def check_directory_health(
763
+ directory_id: str | None = None, region: str | None = None
764
+ ) -> dict[str, Any]:
765
+ """Check the health of WorkSpaces-registered directories.
766
+
767
+ Reports registration state and AWS Directory Service stage for one directory, or all
768
+ WorkSpaces-registered directories in the region when no id is given. Read-only.
769
+
770
+ Args:
771
+ directory_id: A specific directory id, or omit to check all registered directories.
772
+ region: AWS region. Defaults to the server's configured region.
773
+ """
774
+ report = check_directory_health_core(factory, directory_id, region or factory.region)
775
+ return report.model_dump()
776
+
777
+ async def diagnose_pool(
778
+ pool_id: str, region: str | None = None, lookback_hours: int = 24
779
+ ) -> dict[str, Any]:
780
+ """Diagnose a WorkSpaces Pool's health and session capacity.
781
+
782
+ Correlates pool state, pool errors, user-session capacity, backing directory health, and
783
+ recent CloudWatch session-capacity utilization into a single verdict. Read-only.
784
+
785
+ Args:
786
+ pool_id: The WorkSpaces Pool ID (wspool-...).
787
+ region: AWS region. Defaults to the server's configured region.
788
+ lookback_hours: Window for CloudWatch utilization (default 24).
789
+ """
790
+ diag = diagnose_pool_core(factory, pool_id, region or factory.region, lookback_hours)
791
+ return diag.model_dump()
792
+
793
+ mcp.add_tool(
794
+ diagnose_workspace_connectivity,
795
+ annotations=read_only("Diagnose WorkSpace connectivity"),
796
+ )
797
+ mcp.add_tool(diagnose_application_fleet, annotations=read_only("Diagnose Applications fleet"))
798
+ mcp.add_tool(check_directory_health, annotations=read_only("Check directory health"))
799
+ mcp.add_tool(diagnose_pool, annotations=read_only("Diagnose WorkSpaces Pool"))