vmware-aria-logs 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vmware_aria_logs-0.1.0/.env.example +17 -0
- vmware_aria_logs-0.1.0/.gitignore +21 -0
- vmware_aria_logs-0.1.0/LICENSE +21 -0
- vmware_aria_logs-0.1.0/PKG-INFO +131 -0
- vmware_aria_logs-0.1.0/README.md +112 -0
- vmware_aria_logs-0.1.0/glama.json +6 -0
- vmware_aria_logs-0.1.0/pyproject.toml +39 -0
- vmware_aria_logs-0.1.0/smithery.yaml +49 -0
- vmware_aria_logs-0.1.0/src/vmware_aria_logs/__init__.py +3 -0
- vmware_aria_logs-0.1.0/src/vmware_aria_logs/analysis/__init__.py +0 -0
- vmware_aria_logs-0.1.0/src/vmware_aria_logs/analysis/events.py +95 -0
- vmware_aria_logs-0.1.0/src/vmware_aria_logs/analysis/incidents.py +81 -0
- vmware_aria_logs-0.1.0/src/vmware_aria_logs/clients/__init__.py +0 -0
- vmware_aria_logs-0.1.0/src/vmware_aria_logs/clients/loginsight.py +215 -0
- vmware_aria_logs-0.1.0/src/vmware_aria_logs/clients/vrops.py +123 -0
- vmware_aria_logs-0.1.0/src/vmware_aria_logs/server.py +265 -0
- vmware_aria_logs-0.1.0/tests/test_clients.py +497 -0
- vmware_aria_logs-0.1.0/tests/test_events.py +123 -0
- vmware_aria_logs-0.1.0/tests/test_incidents.py +91 -0
- vmware_aria_logs-0.1.0/tests/test_server.py +198 -0
- vmware_aria_logs-0.1.0/uv.lock +988 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# VMware Aria Operations for Logs MCP Server
|
|
2
|
+
# Copy to .env and fill in your values
|
|
3
|
+
|
|
4
|
+
# Required: Log Insight connection
|
|
5
|
+
LI_BASE_URL=https://loginsight.example.com
|
|
6
|
+
LI_USERNAME=admin
|
|
7
|
+
LI_PASSWORD=your-password
|
|
8
|
+
LI_PROVIDER=Local
|
|
9
|
+
LI_VERIFY_TLS=false
|
|
10
|
+
LI_TIMEOUT_SEC=30
|
|
11
|
+
|
|
12
|
+
# Optional: VMware Aria Operations (vROps) correlation
|
|
13
|
+
# VROPS_BASE_URL=https://vrops.example.com
|
|
14
|
+
# VROPS_USERNAME=admin
|
|
15
|
+
# VROPS_PASSWORD=your-password
|
|
16
|
+
# VROPS_AUTH_SOURCE=local
|
|
17
|
+
# VROPS_VERIFY_TLS=false
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 apollion69
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vmware-aria-logs
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server for VMware Aria Operations for Logs (formerly vRealize Log Insight)
|
|
5
|
+
Project-URL: Homepage, https://github.com/apollion69/vmware-aria-logs
|
|
6
|
+
Project-URL: Repository, https://github.com/apollion69/vmware-aria-logs
|
|
7
|
+
Project-URL: Issues, https://github.com/apollion69/vmware-aria-logs/issues
|
|
8
|
+
Author: apollion69
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: aria,log-insight,mcp,observability,vmware,vrli
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Topic :: System :: Monitoring
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Requires-Dist: mcp[cli]>=1.0.0
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# VMware Aria Operations for Logs — MCP Server
|
|
21
|
+
|
|
22
|
+
[](https://pypi.org/project/vmware-aria-logs/)
|
|
23
|
+
[](https://pypi.org/project/vmware-aria-logs/)
|
|
24
|
+
[](LICENSE)
|
|
25
|
+
[](https://smithery.ai/server/vmware-aria-logs)
|
|
26
|
+
|
|
27
|
+
MCP server for querying and analyzing logs from **VMware Aria Operations for Logs** (formerly vRealize Log Insight). Provides log search, mass incident detection, and optional VMware Aria Operations (vROps) correlation.
|
|
28
|
+
|
|
29
|
+
Built for use with [Claude Code](https://claude.ai/code), [Claude Desktop](https://claude.ai/download), [LobeChat](https://github.com/lobehub/lobe-chat), and any MCP-compatible client.
|
|
30
|
+
|
|
31
|
+
## Features
|
|
32
|
+
|
|
33
|
+
- **Log Search** — Query events with time range, text filters, and field constraints via Log Insight API v2
|
|
34
|
+
- **Incident Detection** — Signature-based clustering to identify mass log incidents (Stormbreaker engine)
|
|
35
|
+
- **API Surface Probe** — Detect appliance version and available API endpoints
|
|
36
|
+
- **Dashboard Listing** — Enumerate saved dashboards (legacy vRLIC API, deprecated on 8.18+)
|
|
37
|
+
- **vROps Correlation** — Cross-reference log entities with Aria Operations resources and alerts
|
|
38
|
+
|
|
39
|
+
## Quick Start
|
|
40
|
+
|
|
41
|
+
### Install via uvx (recommended)
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
uvx vmware-aria-logs
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Install via pip
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install vmware-aria-logs
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Run with environment variables
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
export LI_BASE_URL=https://loginsight.example.com
|
|
57
|
+
export LI_USERNAME=admin
|
|
58
|
+
export LI_PASSWORD=your-password
|
|
59
|
+
export LI_PROVIDER=Local
|
|
60
|
+
|
|
61
|
+
vmware-aria-logs
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## MCP Tools
|
|
65
|
+
|
|
66
|
+
| Tool | Description |
|
|
67
|
+
|------|-------------|
|
|
68
|
+
| `query_events` | Search log events with time range, text filter, field constraints |
|
|
69
|
+
| `get_version` | Get appliance version and probe API surface |
|
|
70
|
+
| `list_dashboards` | List saved dashboards (legacy vRLIC API, deprecated on 8.18+) |
|
|
71
|
+
| `detect_incidents` | Mass incident detection via signature clustering |
|
|
72
|
+
| `find_vrops_resources` | Find entities in Aria Operations by name |
|
|
73
|
+
| `get_vrops_alerts` | Get alerts for specific vROps resources |
|
|
74
|
+
|
|
75
|
+
## Configuration
|
|
76
|
+
|
|
77
|
+
### Required Environment Variables
|
|
78
|
+
|
|
79
|
+
| Variable | Description | Default |
|
|
80
|
+
|----------|-------------|---------|
|
|
81
|
+
| `LI_BASE_URL` | Log Insight appliance URL | — |
|
|
82
|
+
| `LI_USERNAME` | API username | `admin` |
|
|
83
|
+
| `LI_PASSWORD` | API password | — |
|
|
84
|
+
| `LI_PROVIDER` | Auth provider (Local, ActiveDirectory) | `Local` |
|
|
85
|
+
| `LI_VERIFY_TLS` | Verify TLS certificates | `false` |
|
|
86
|
+
|
|
87
|
+
### Optional (vROps Correlation)
|
|
88
|
+
|
|
89
|
+
| Variable | Description | Default |
|
|
90
|
+
|----------|-------------|---------|
|
|
91
|
+
| `VROPS_BASE_URL` | Aria Operations URL | — |
|
|
92
|
+
| `VROPS_USERNAME` | vROps username | `admin` |
|
|
93
|
+
| `VROPS_PASSWORD` | vROps password | — |
|
|
94
|
+
| `VROPS_AUTH_SOURCE` | Auth source | `local` |
|
|
95
|
+
|
|
96
|
+
## Claude Code / MCP Client Configuration
|
|
97
|
+
|
|
98
|
+
```json
|
|
99
|
+
{
|
|
100
|
+
"mcpServers": {
|
|
101
|
+
"aria-logs": {
|
|
102
|
+
"command": "uvx",
|
|
103
|
+
"args": ["vmware-aria-logs"],
|
|
104
|
+
"env": {
|
|
105
|
+
"LI_BASE_URL": "https://loginsight.example.com",
|
|
106
|
+
"LI_USERNAME": "admin",
|
|
107
|
+
"LI_PASSWORD": "your-password"
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Why This Server?
|
|
115
|
+
|
|
116
|
+
VMware Aria Operations for Logs (Log Insight) is widely deployed in enterprise VMware environments, but lacks modern AI-assisted log analysis tooling. This MCP server bridges that gap:
|
|
117
|
+
|
|
118
|
+
- **Zero dependencies** beyond the MCP SDK — uses Python stdlib `urllib` for HTTP
|
|
119
|
+
- **Stormbreaker engine** — unique signature-based clustering that finds mass incidents humans miss
|
|
120
|
+
- **vROps correlation** — cross-reference log events with infrastructure health in a single conversation
|
|
121
|
+
- **Works on v8.x+** — tested on Aria Operations for Logs 8.18.3, gracefully degrades deprecated APIs
|
|
122
|
+
|
|
123
|
+
## Also Available On
|
|
124
|
+
|
|
125
|
+
- [Smithery](https://smithery.ai/server/vmware-aria-logs)
|
|
126
|
+
- [Glama](https://glama.ai/mcp/servers/apollion69/vmware-aria-logs)
|
|
127
|
+
- [PyPI](https://pypi.org/project/vmware-aria-logs/)
|
|
128
|
+
|
|
129
|
+
## License
|
|
130
|
+
|
|
131
|
+
MIT
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# VMware Aria Operations for Logs — MCP Server
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/vmware-aria-logs/)
|
|
4
|
+
[](https://pypi.org/project/vmware-aria-logs/)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
[](https://smithery.ai/server/vmware-aria-logs)
|
|
7
|
+
|
|
8
|
+
MCP server for querying and analyzing logs from **VMware Aria Operations for Logs** (formerly vRealize Log Insight). Provides log search, mass incident detection, and optional VMware Aria Operations (vROps) correlation.
|
|
9
|
+
|
|
10
|
+
Built for use with [Claude Code](https://claude.ai/code), [Claude Desktop](https://claude.ai/download), [LobeChat](https://github.com/lobehub/lobe-chat), and any MCP-compatible client.
|
|
11
|
+
|
|
12
|
+
## Features
|
|
13
|
+
|
|
14
|
+
- **Log Search** — Query events with time range, text filters, and field constraints via Log Insight API v2
|
|
15
|
+
- **Incident Detection** — Signature-based clustering to identify mass log incidents (Stormbreaker engine)
|
|
16
|
+
- **API Surface Probe** — Detect appliance version and available API endpoints
|
|
17
|
+
- **Dashboard Listing** — Enumerate saved dashboards (legacy vRLIC API, deprecated on 8.18+)
|
|
18
|
+
- **vROps Correlation** — Cross-reference log entities with Aria Operations resources and alerts
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
### Install via uvx (recommended)
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
uvx vmware-aria-logs
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Install via pip
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install vmware-aria-logs
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Run with environment variables
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
export LI_BASE_URL=https://loginsight.example.com
|
|
38
|
+
export LI_USERNAME=admin
|
|
39
|
+
export LI_PASSWORD=your-password
|
|
40
|
+
export LI_PROVIDER=Local
|
|
41
|
+
|
|
42
|
+
vmware-aria-logs
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## MCP Tools
|
|
46
|
+
|
|
47
|
+
| Tool | Description |
|
|
48
|
+
|------|-------------|
|
|
49
|
+
| `query_events` | Search log events with time range, text filter, field constraints |
|
|
50
|
+
| `get_version` | Get appliance version and probe API surface |
|
|
51
|
+
| `list_dashboards` | List saved dashboards (legacy vRLIC API, deprecated on 8.18+) |
|
|
52
|
+
| `detect_incidents` | Mass incident detection via signature clustering |
|
|
53
|
+
| `find_vrops_resources` | Find entities in Aria Operations by name |
|
|
54
|
+
| `get_vrops_alerts` | Get alerts for specific vROps resources |
|
|
55
|
+
|
|
56
|
+
## Configuration
|
|
57
|
+
|
|
58
|
+
### Required Environment Variables
|
|
59
|
+
|
|
60
|
+
| Variable | Description | Default |
|
|
61
|
+
|----------|-------------|---------|
|
|
62
|
+
| `LI_BASE_URL` | Log Insight appliance URL | — |
|
|
63
|
+
| `LI_USERNAME` | API username | `admin` |
|
|
64
|
+
| `LI_PASSWORD` | API password | — |
|
|
65
|
+
| `LI_PROVIDER` | Auth provider (Local, ActiveDirectory) | `Local` |
|
|
66
|
+
| `LI_VERIFY_TLS` | Verify TLS certificates | `false` |
|
|
67
|
+
|
|
68
|
+
### Optional (vROps Correlation)
|
|
69
|
+
|
|
70
|
+
| Variable | Description | Default |
|
|
71
|
+
|----------|-------------|---------|
|
|
72
|
+
| `VROPS_BASE_URL` | Aria Operations URL | — |
|
|
73
|
+
| `VROPS_USERNAME` | vROps username | `admin` |
|
|
74
|
+
| `VROPS_PASSWORD` | vROps password | — |
|
|
75
|
+
| `VROPS_AUTH_SOURCE` | Auth source | `local` |
|
|
76
|
+
|
|
77
|
+
## Claude Code / MCP Client Configuration
|
|
78
|
+
|
|
79
|
+
```json
|
|
80
|
+
{
|
|
81
|
+
"mcpServers": {
|
|
82
|
+
"aria-logs": {
|
|
83
|
+
"command": "uvx",
|
|
84
|
+
"args": ["vmware-aria-logs"],
|
|
85
|
+
"env": {
|
|
86
|
+
"LI_BASE_URL": "https://loginsight.example.com",
|
|
87
|
+
"LI_USERNAME": "admin",
|
|
88
|
+
"LI_PASSWORD": "your-password"
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Why This Server?
|
|
96
|
+
|
|
97
|
+
VMware Aria Operations for Logs (Log Insight) is widely deployed in enterprise VMware environments, but lacks modern AI-assisted log analysis tooling. This MCP server bridges that gap:
|
|
98
|
+
|
|
99
|
+
- **Zero dependencies** beyond the MCP SDK — uses Python stdlib `urllib` for HTTP
|
|
100
|
+
- **Stormbreaker engine** — unique signature-based clustering that finds mass incidents humans miss
|
|
101
|
+
- **vROps correlation** — cross-reference log events with infrastructure health in a single conversation
|
|
102
|
+
- **Works on v8.x+** — tested on Aria Operations for Logs 8.18.3, gracefully degrades deprecated APIs
|
|
103
|
+
|
|
104
|
+
## Also Available On
|
|
105
|
+
|
|
106
|
+
- [Smithery](https://smithery.ai/server/vmware-aria-logs)
|
|
107
|
+
- [Glama](https://glama.ai/mcp/servers/apollion69/vmware-aria-logs)
|
|
108
|
+
- [PyPI](https://pypi.org/project/vmware-aria-logs/)
|
|
109
|
+
|
|
110
|
+
## License
|
|
111
|
+
|
|
112
|
+
MIT
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "vmware-aria-logs"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "MCP server for VMware Aria Operations for Logs (formerly vRealize Log Insight)"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
requires-python = ">=3.11"
|
|
8
|
+
authors = [{ name = "apollion69" }]
|
|
9
|
+
keywords = ["mcp", "vmware", "aria", "log-insight", "vrli", "observability"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"License :: OSI Approved :: MIT License",
|
|
13
|
+
"Programming Language :: Python :: 3",
|
|
14
|
+
"Topic :: System :: Monitoring",
|
|
15
|
+
]
|
|
16
|
+
dependencies = [
|
|
17
|
+
"mcp[cli]>=1.0.0",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[project.urls]
|
|
21
|
+
Homepage = "https://github.com/apollion69/vmware-aria-logs"
|
|
22
|
+
Repository = "https://github.com/apollion69/vmware-aria-logs"
|
|
23
|
+
Issues = "https://github.com/apollion69/vmware-aria-logs/issues"
|
|
24
|
+
|
|
25
|
+
[project.scripts]
|
|
26
|
+
vmware-aria-logs = "vmware_aria_logs.server:main"
|
|
27
|
+
|
|
28
|
+
[build-system]
|
|
29
|
+
requires = ["hatchling"]
|
|
30
|
+
build-backend = "hatchling.build"
|
|
31
|
+
|
|
32
|
+
[tool.hatch.build.targets.wheel]
|
|
33
|
+
packages = ["src/vmware_aria_logs"]
|
|
34
|
+
|
|
35
|
+
[dependency-groups]
|
|
36
|
+
dev = [
|
|
37
|
+
"pytest>=9.0.2",
|
|
38
|
+
"pytest-cov>=7.1.0",
|
|
39
|
+
]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
|
|
2
|
+
startCommand:
|
|
3
|
+
type: stdio
|
|
4
|
+
configSchema:
|
|
5
|
+
type: object
|
|
6
|
+
required:
|
|
7
|
+
- liBaseUrl
|
|
8
|
+
- liPassword
|
|
9
|
+
properties:
|
|
10
|
+
liBaseUrl:
|
|
11
|
+
type: string
|
|
12
|
+
description: "VMware Aria Operations for Logs URL (e.g. https://loginsight.example.com)"
|
|
13
|
+
liUsername:
|
|
14
|
+
type: string
|
|
15
|
+
default: "admin"
|
|
16
|
+
description: "API username"
|
|
17
|
+
liPassword:
|
|
18
|
+
type: string
|
|
19
|
+
description: "API password"
|
|
20
|
+
liProvider:
|
|
21
|
+
type: string
|
|
22
|
+
default: "Local"
|
|
23
|
+
description: "Auth provider (Local, ActiveDirectory)"
|
|
24
|
+
vropsBaseUrl:
|
|
25
|
+
type: string
|
|
26
|
+
description: "Aria Operations URL for cross-correlation (optional)"
|
|
27
|
+
vropsUsername:
|
|
28
|
+
type: string
|
|
29
|
+
default: "admin"
|
|
30
|
+
description: "vROps username"
|
|
31
|
+
vropsPassword:
|
|
32
|
+
type: string
|
|
33
|
+
description: "vROps password"
|
|
34
|
+
commandFunction: |-
|
|
35
|
+
(config) => ({
|
|
36
|
+
command: 'uvx',
|
|
37
|
+
args: ['vmware-aria-logs'],
|
|
38
|
+
env: {
|
|
39
|
+
LI_BASE_URL: config.liBaseUrl,
|
|
40
|
+
LI_USERNAME: config.liUsername || 'admin',
|
|
41
|
+
LI_PASSWORD: config.liPassword,
|
|
42
|
+
LI_PROVIDER: config.liProvider || 'Local',
|
|
43
|
+
...(config.vropsBaseUrl ? {
|
|
44
|
+
VROPS_BASE_URL: config.vropsBaseUrl,
|
|
45
|
+
VROPS_USERNAME: config.vropsUsername || 'admin',
|
|
46
|
+
VROPS_PASSWORD: config.vropsPassword || ''
|
|
47
|
+
} : {})
|
|
48
|
+
}
|
|
49
|
+
})
|
|
File without changes
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Event extraction, normalization, and deduplication."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import re
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
_UUID_RE = re.compile(r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b")
|
|
10
|
+
_HEX_LONG_RE = re.compile(r"\b[0-9a-fA-F]{16,}\b")
|
|
11
|
+
_NUMBER_RE = re.compile(r"\b\d{4,}\b")
|
|
12
|
+
_IP_RE = re.compile(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b")
|
|
13
|
+
_PATH_RE = re.compile(r"/[\w./-]{20,}")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def normalize_text(text: str) -> str:
|
|
17
|
+
"""Normalize log text to a signature by replacing variable parts."""
|
|
18
|
+
result = _UUID_RE.sub("<UUID>", text)
|
|
19
|
+
result = _HEX_LONG_RE.sub("<HEX>", result)
|
|
20
|
+
result = _IP_RE.sub("<IP>", result)
|
|
21
|
+
result = _PATH_RE.sub("<PATH>", result)
|
|
22
|
+
result = _NUMBER_RE.sub("<N>", result)
|
|
23
|
+
return result
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def event_signature(event: dict[str, Any], *, include_source: bool = False) -> str:
|
|
27
|
+
"""Compute a stable signature hash for an event.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
event: Event dict with text/source fields.
|
|
31
|
+
include_source: If True, signature includes source hostname.
|
|
32
|
+
Use False (default) for mass incident detection across hosts.
|
|
33
|
+
Use True for per-host deduplication.
|
|
34
|
+
"""
|
|
35
|
+
text = str(event.get("text") or "")
|
|
36
|
+
normalized = normalize_text(text)
|
|
37
|
+
if include_source:
|
|
38
|
+
source = str(event.get("source") or event.get("hostname") or "")
|
|
39
|
+
key = f"{source}::{normalized}"
|
|
40
|
+
else:
|
|
41
|
+
key = normalized
|
|
42
|
+
return hashlib.sha256(key.encode("utf-8")).hexdigest()[:16]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def extract_text(event: dict[str, Any]) -> str:
|
|
46
|
+
"""Extract the primary text content from an event."""
|
|
47
|
+
text = event.get("text")
|
|
48
|
+
if isinstance(text, str) and text:
|
|
49
|
+
return text
|
|
50
|
+
for field_key in ("message", "msg", "log", "raw"):
|
|
51
|
+
value = event.get(field_key)
|
|
52
|
+
if isinstance(value, str) and value:
|
|
53
|
+
return value
|
|
54
|
+
fields = event.get("fields") or []
|
|
55
|
+
if isinstance(fields, list):
|
|
56
|
+
for field_item in fields:
|
|
57
|
+
if isinstance(field_item, dict):
|
|
58
|
+
name = str(field_item.get("name") or "")
|
|
59
|
+
content = str(field_item.get("content") or "")
|
|
60
|
+
if name.lower() in ("text", "message", "msg") and content:
|
|
61
|
+
return content
|
|
62
|
+
return ""
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def extract_source(event: dict[str, Any]) -> str:
|
|
66
|
+
"""Extract the source hostname from an event."""
|
|
67
|
+
for key in ("source", "hostname", "host", "agent"):
|
|
68
|
+
value = event.get(key)
|
|
69
|
+
if isinstance(value, str) and value:
|
|
70
|
+
return value
|
|
71
|
+
fields = event.get("fields") or []
|
|
72
|
+
if isinstance(fields, list):
|
|
73
|
+
for field_item in fields:
|
|
74
|
+
if isinstance(field_item, dict):
|
|
75
|
+
name = str(field_item.get("name") or "").lower()
|
|
76
|
+
content = str(field_item.get("content") or "")
|
|
77
|
+
if name in ("source", "hostname", "host") and content:
|
|
78
|
+
return content
|
|
79
|
+
return ""
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def dedupe_events(events: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
83
|
+
"""Remove duplicate events based on text + source + timestamp."""
|
|
84
|
+
seen: set[str] = set()
|
|
85
|
+
result: list[dict[str, Any]] = []
|
|
86
|
+
for event in events:
|
|
87
|
+
text = extract_text(event)
|
|
88
|
+
source = extract_source(event)
|
|
89
|
+
ts = str(event.get("timestamp") or "")
|
|
90
|
+
key = f"{source}:{ts}:{text[:200]}"
|
|
91
|
+
digest = hashlib.md5(key.encode("utf-8")).hexdigest()
|
|
92
|
+
if digest not in seen:
|
|
93
|
+
seen.add(digest)
|
|
94
|
+
result.append(event)
|
|
95
|
+
return result
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Mass incident detection via signature clustering."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import Counter, defaultdict
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from .events import event_signature, extract_source, extract_text, normalize_text
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class MassIncident:
|
|
14
|
+
"""A cluster of events sharing the same normalized signature."""
|
|
15
|
+
|
|
16
|
+
signature: str
|
|
17
|
+
normalized_text: str
|
|
18
|
+
event_count: int
|
|
19
|
+
affected_sources: list[str]
|
|
20
|
+
sample_text: str
|
|
21
|
+
blast_radius: int # number of unique sources
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def detect_mass_incidents(
|
|
25
|
+
events: list[dict[str, Any]],
|
|
26
|
+
*,
|
|
27
|
+
mass_threshold: int = 5,
|
|
28
|
+
max_incidents: int = 50,
|
|
29
|
+
) -> list[MassIncident]:
|
|
30
|
+
"""Group events by signature and return those exceeding the mass threshold.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
events: Raw event dicts from Log Insight API.
|
|
34
|
+
mass_threshold: Minimum event count to qualify as a mass incident.
|
|
35
|
+
max_incidents: Maximum number of incidents to return (ranked by count).
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
List of MassIncident objects, sorted by event_count descending.
|
|
39
|
+
"""
|
|
40
|
+
sig_events: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
|
41
|
+
sig_normalized: dict[str, str] = {}
|
|
42
|
+
|
|
43
|
+
for event in events:
|
|
44
|
+
sig = event_signature(event)
|
|
45
|
+
sig_events[sig].append(event)
|
|
46
|
+
if sig not in sig_normalized:
|
|
47
|
+
text = extract_text(event)
|
|
48
|
+
sig_normalized[sig] = normalize_text(text)
|
|
49
|
+
|
|
50
|
+
incidents: list[MassIncident] = []
|
|
51
|
+
for sig, group in sig_events.items():
|
|
52
|
+
if len(group) < mass_threshold:
|
|
53
|
+
continue
|
|
54
|
+
sources = list({extract_source(e) for e in group if extract_source(e)})
|
|
55
|
+
sample = extract_text(group[0])
|
|
56
|
+
incidents.append(MassIncident(
|
|
57
|
+
signature=sig,
|
|
58
|
+
normalized_text=sig_normalized.get(sig, ""),
|
|
59
|
+
event_count=len(group),
|
|
60
|
+
affected_sources=sorted(sources),
|
|
61
|
+
sample_text=sample[:500],
|
|
62
|
+
blast_radius=len(sources),
|
|
63
|
+
))
|
|
64
|
+
|
|
65
|
+
incidents.sort(key=lambda i: i.event_count, reverse=True)
|
|
66
|
+
return incidents[:max_incidents]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def incidents_to_dicts(incidents: list[MassIncident]) -> list[dict[str, Any]]:
|
|
70
|
+
"""Convert incidents to serializable dicts."""
|
|
71
|
+
return [
|
|
72
|
+
{
|
|
73
|
+
"signature": inc.signature,
|
|
74
|
+
"normalized_text": inc.normalized_text,
|
|
75
|
+
"event_count": inc.event_count,
|
|
76
|
+
"blast_radius": inc.blast_radius,
|
|
77
|
+
"affected_sources": inc.affected_sources,
|
|
78
|
+
"sample_text": inc.sample_text,
|
|
79
|
+
}
|
|
80
|
+
for inc in incidents
|
|
81
|
+
]
|
|
File without changes
|