spectra-server 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spectra_server-1.0.0/.env.example +14 -0
- spectra_server-1.0.0/.gitignore +35 -0
- spectra_server-1.0.0/Makefile +4 -0
- spectra_server-1.0.0/PKG-INFO +200 -0
- spectra_server-1.0.0/README.md +185 -0
- spectra_server-1.0.0/app.py +18 -0
- spectra_server-1.0.0/pyproject.toml +34 -0
- spectra_server-1.0.0/pyrightconfig.json +9 -0
- spectra_server-1.0.0/requirements.txt +5 -0
- spectra_server-1.0.0/schema.sql +80 -0
- spectra_server-1.0.0/spectra/__init__.py +35 -0
- spectra_server-1.0.0/spectra/app.py +144 -0
- spectra_server-1.0.0/spectra/bigquery_client.py +120 -0
- spectra_server-1.0.0/spectra/cli.py +67 -0
- spectra_server-1.0.0/spectra/config.py +35 -0
- spectra_server-1.0.0/spectra/events.py +97 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# BigQuery (required for event storage; table name = event.account_id)
|
|
2
|
+
BIGQUERY_PROJECT_ID=your-gcp-project
|
|
3
|
+
BIGQUERY_DATASET=analytics
|
|
4
|
+
|
|
5
|
+
# Server
|
|
6
|
+
HOST=0.0.0.0
|
|
7
|
+
PORT=8000
|
|
8
|
+
|
|
9
|
+
# CORS - comma-separated origins, or * for all
|
|
10
|
+
CORS_ORIGINS=*
|
|
11
|
+
|
|
12
|
+
# Google Auth (for BigQuery) - one of:
|
|
13
|
+
# GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
|
|
14
|
+
# GOOGLE_APPLICATION_CREDENTIALS_JSON=base64-encoded-service-account-json (used when GOOGLE_APPLICATION_CREDENTIALS is not set)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
.venv/
|
|
3
|
+
__pycache__/
|
|
4
|
+
*.py[cod]
|
|
5
|
+
*.egg-info/
|
|
6
|
+
*.egg
|
|
7
|
+
.sqlite/
|
|
8
|
+
|
|
9
|
+
# Node / TypeScript
|
|
10
|
+
.npmrc
|
|
11
|
+
node_modules/
|
|
12
|
+
dist/
|
|
13
|
+
*.tsbuildinfo
|
|
14
|
+
|
|
15
|
+
# Env & secrets
|
|
16
|
+
.env
|
|
17
|
+
.env.local
|
|
18
|
+
secrets/
|
|
19
|
+
|
|
20
|
+
# IDE
|
|
21
|
+
.idea/
|
|
22
|
+
.vscode/
|
|
23
|
+
|
|
24
|
+
# OS
|
|
25
|
+
.DS_Store
|
|
26
|
+
|
|
27
|
+
# Logs
|
|
28
|
+
*.log
|
|
29
|
+
|
|
30
|
+
# Testing
|
|
31
|
+
tests/
|
|
32
|
+
|
|
33
|
+
# AI
|
|
34
|
+
.cursor/
|
|
35
|
+
planning/
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: spectra-server
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Analytics event ingestion server for Spectra
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: fastapi>=0.104.0
|
|
8
|
+
Requires-Dist: google-cloud-bigquery>=3.0.0
|
|
9
|
+
Requires-Dist: pydantic>=2.0.0
|
|
10
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
11
|
+
Requires-Dist: uvicorn[standard]>=0.24.0
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# spectra-server
|
|
17
|
+
|
|
18
|
+
The Spectra event ingestion server. Receives analytics events from the [Spectra tracker script](../script/README.md) and writes them to Google BigQuery.
|
|
19
|
+
|
|
20
|
+
Built with [FastAPI](https://fastapi.tiangolo.com/) and [Pydantic](https://docs.pydantic.dev/).
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install spectra-server
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Requires Python 3.11+.
|
|
29
|
+
|
|
30
|
+
## Running the server
|
|
31
|
+
|
|
32
|
+
### Option A — CLI (quickest)
|
|
33
|
+
|
|
34
|
+
Set the required environment variables (see [Configuration](#configuration)), then:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
spectra-server
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Available flags:
|
|
41
|
+
|
|
42
|
+
| Flag | Default | Description |
|
|
43
|
+
|------|---------|-------------|
|
|
44
|
+
| `--host` | `0.0.0.0` | Bind host (overrides `HOST` env var) |
|
|
45
|
+
| `--port` | `8000` | Bind port (overrides `PORT` env var) |
|
|
46
|
+
| `--reload` | off | Enable auto-reload (development only) |
|
|
47
|
+
| `--workers` | `1` | Number of worker processes |
|
|
48
|
+
|
|
49
|
+
### Option B — uvicorn directly
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
uvicorn spectra:app --host 0.0.0.0 --port 8000
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Configuration
|
|
56
|
+
|
|
57
|
+
Copy `.env.example` to `.env` and fill in the values. The server reads configuration from environment variables (loaded via `python-dotenv`):
|
|
58
|
+
|
|
59
|
+
| Variable | Required | Description |
|
|
60
|
+
|----------|----------|-------------|
|
|
61
|
+
| `BIGQUERY_PROJECT_ID` | Yes (for storage) | GCP project ID |
|
|
62
|
+
| `BIGQUERY_DATASET` | Yes (for storage) | BigQuery dataset name |
|
|
63
|
+
| `GOOGLE_APPLICATION_CREDENTIALS` | One of the two | Path to a service account JSON file |
|
|
64
|
+
| `GOOGLE_APPLICATION_CREDENTIALS_JSON` | One of the two | Base64-encoded service account JSON |
|
|
65
|
+
| `HOST` | No | Bind host (default: `0.0.0.0`) |
|
|
66
|
+
| `PORT` | No | Bind port (default: `8000`) |
|
|
67
|
+
| `CORS_ORIGINS` | No | Comma-separated allowed origins, or `*` (default: `*`) |
|
|
68
|
+
|
|
69
|
+
If neither BigQuery variable is set the server starts successfully and discards events — useful for local development without a GCP project.
|
|
70
|
+
|
|
71
|
+
## API
|
|
72
|
+
|
|
73
|
+
### `POST /track`
|
|
74
|
+
|
|
75
|
+
Ingest one or more events.
|
|
76
|
+
|
|
77
|
+
**Headers**
|
|
78
|
+
|
|
79
|
+
| Header | Description |
|
|
80
|
+
|--------|-------------|
|
|
81
|
+
| `X-Account-ID` | Tenant / BigQuery table name. Can also be passed in the request body. |
|
|
82
|
+
| `Content-Type` | `application/json` or `text/plain` (plain text avoids CORS preflight for `navigator.sendBeacon`) |
|
|
83
|
+
|
|
84
|
+
**Body**
|
|
85
|
+
|
|
86
|
+
```json
|
|
87
|
+
{
|
|
88
|
+
"account_id": "optional_if_provided_in_header",
|
|
89
|
+
"events": [
|
|
90
|
+
{ "name": "page_view", "timestamp": "2024-01-01T00:00:00Z", "properties": {} }
|
|
91
|
+
]
|
|
92
|
+
}
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
**Response**
|
|
96
|
+
|
|
97
|
+
```json
|
|
98
|
+
{ "status": "ok", "count": 2 }
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### `GET /health`
|
|
102
|
+
|
|
103
|
+
Returns server status and whether BigQuery is configured.
|
|
104
|
+
|
|
105
|
+
```json
|
|
106
|
+
{ "status": "ok", "bigquery": "configured" }
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Extending the server
|
|
110
|
+
|
|
111
|
+
The `create_app()` factory lets you mount your own middleware — API key validation, rate limiting, authentication, request logging, etc. — without forking the codebase.
|
|
112
|
+
|
|
113
|
+
### Adding middleware
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
# myapp.py
|
|
117
|
+
from spectra import create_app
|
|
118
|
+
from my_auth import APIKeyMiddleware
|
|
119
|
+
|
|
120
|
+
app = create_app(
|
|
121
|
+
middleware=[
|
|
122
|
+
(APIKeyMiddleware, {"header": "X-API-Key", "keys": ["sk-live-..."]}),
|
|
123
|
+
]
|
|
124
|
+
)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Then run it:
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
uvicorn myapp:app --host 0.0.0.0 --port 8000
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Middleware entries are `(MiddlewareClass, kwargs_dict)` tuples. The first entry in the list is the outermost layer (runs first on incoming requests).
|
|
134
|
+
|
|
135
|
+
### API key validation example
|
|
136
|
+
|
|
137
|
+
Here is a minimal Starlette middleware that validates a bearer token:
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
# auth.py
|
|
141
|
+
from starlette.middleware.base import BaseHTTPMiddleware
|
|
142
|
+
from starlette.requests import Request
|
|
143
|
+
from starlette.responses import JSONResponse
|
|
144
|
+
|
|
145
|
+
class APIKeyMiddleware(BaseHTTPMiddleware):
|
|
146
|
+
def __init__(self, app, *, keys: list[str], header: str = "Authorization"):
|
|
147
|
+
super().__init__(app)
|
|
148
|
+
self.keys = set(keys)
|
|
149
|
+
self.header = header
|
|
150
|
+
|
|
151
|
+
async def dispatch(self, request: Request, call_next):
|
|
152
|
+
if request.url.path == "/health":
|
|
153
|
+
return await call_next(request)
|
|
154
|
+
token = request.headers.get(self.header, "").removeprefix("Bearer ").strip()
|
|
155
|
+
if token not in self.keys:
|
|
156
|
+
return JSONResponse({"detail": "Unauthorized"}, status_code=401)
|
|
157
|
+
return await call_next(request)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
# myapp.py
|
|
162
|
+
from spectra import create_app
|
|
163
|
+
from auth import APIKeyMiddleware
|
|
164
|
+
|
|
165
|
+
app = create_app(
|
|
166
|
+
middleware=[
|
|
167
|
+
(APIKeyMiddleware, {"keys": ["sk-live-abc123"], "header": "Authorization"}),
|
|
168
|
+
]
|
|
169
|
+
)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Overriding FastAPI settings
|
|
173
|
+
|
|
174
|
+
Any keyword argument not recognised by `create_app` is forwarded to `FastAPI()`:
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
app = create_app(
|
|
178
|
+
cors_origins=["https://myapp.com"],
|
|
179
|
+
docs_url=None, # disable Swagger UI in production
|
|
180
|
+
redoc_url=None,
|
|
181
|
+
)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Local development
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
git clone https://github.com/mvallejo3/spectra.git
|
|
188
|
+
cd spectra/server
|
|
189
|
+
|
|
190
|
+
python -m venv .venv && source .venv/bin/activate
|
|
191
|
+
pip install -e ".[dev]"
|
|
192
|
+
|
|
193
|
+
cp .env.example .env # fill in your values
|
|
194
|
+
|
|
195
|
+
make start # uvicorn app:app --reload
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## License
|
|
199
|
+
|
|
200
|
+
[MIT](../LICENSE)
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# spectra-server
|
|
2
|
+
|
|
3
|
+
The Spectra event ingestion server. Receives analytics events from the [Spectra tracker script](../script/README.md) and writes them to Google BigQuery.
|
|
4
|
+
|
|
5
|
+
Built with [FastAPI](https://fastapi.tiangolo.com/) and [Pydantic](https://docs.pydantic.dev/).
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install spectra-server
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Requires Python 3.11+.
|
|
14
|
+
|
|
15
|
+
## Running the server
|
|
16
|
+
|
|
17
|
+
### Option A — CLI (quickest)
|
|
18
|
+
|
|
19
|
+
Set the required environment variables (see [Configuration](#configuration)), then:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
spectra-server
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Available flags:
|
|
26
|
+
|
|
27
|
+
| Flag | Default | Description |
|
|
28
|
+
|------|---------|-------------|
|
|
29
|
+
| `--host` | `0.0.0.0` | Bind host (overrides `HOST` env var) |
|
|
30
|
+
| `--port` | `8000` | Bind port (overrides `PORT` env var) |
|
|
31
|
+
| `--reload` | off | Enable auto-reload (development only) |
|
|
32
|
+
| `--workers` | `1` | Number of worker processes |
|
|
33
|
+
|
|
34
|
+
### Option B — uvicorn directly
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
uvicorn spectra:app --host 0.0.0.0 --port 8000
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Configuration
|
|
41
|
+
|
|
42
|
+
Copy `.env.example` to `.env` and fill in the values. The server reads configuration from environment variables (loaded via `python-dotenv`):
|
|
43
|
+
|
|
44
|
+
| Variable | Required | Description |
|
|
45
|
+
|----------|----------|-------------|
|
|
46
|
+
| `BIGQUERY_PROJECT_ID` | Yes (for storage) | GCP project ID |
|
|
47
|
+
| `BIGQUERY_DATASET` | Yes (for storage) | BigQuery dataset name |
|
|
48
|
+
| `GOOGLE_APPLICATION_CREDENTIALS` | One of the two | Path to a service account JSON file |
|
|
49
|
+
| `GOOGLE_APPLICATION_CREDENTIALS_JSON` | One of the two | Base64-encoded service account JSON |
|
|
50
|
+
| `HOST` | No | Bind host (default: `0.0.0.0`) |
|
|
51
|
+
| `PORT` | No | Bind port (default: `8000`) |
|
|
52
|
+
| `CORS_ORIGINS` | No | Comma-separated allowed origins, or `*` (default: `*`) |
|
|
53
|
+
|
|
54
|
+
If neither BigQuery variable is set the server starts successfully and discards events — useful for local development without a GCP project.
|
|
55
|
+
|
|
56
|
+
## API
|
|
57
|
+
|
|
58
|
+
### `POST /track`
|
|
59
|
+
|
|
60
|
+
Ingest one or more events.
|
|
61
|
+
|
|
62
|
+
**Headers**
|
|
63
|
+
|
|
64
|
+
| Header | Description |
|
|
65
|
+
|--------|-------------|
|
|
66
|
+
| `X-Account-ID` | Tenant / BigQuery table name. Can also be passed in the request body. |
|
|
67
|
+
| `Content-Type` | `application/json` or `text/plain` (plain text avoids CORS preflight for `navigator.sendBeacon`) |
|
|
68
|
+
|
|
69
|
+
**Body**
|
|
70
|
+
|
|
71
|
+
```json
|
|
72
|
+
{
|
|
73
|
+
"account_id": "optional_if_provided_in_header",
|
|
74
|
+
"events": [
|
|
75
|
+
{ "name": "page_view", "timestamp": "2024-01-01T00:00:00Z", "properties": {} }
|
|
76
|
+
]
|
|
77
|
+
}
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**Response**
|
|
81
|
+
|
|
82
|
+
```json
|
|
83
|
+
{ "status": "ok", "count": 2 }
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### `GET /health`
|
|
87
|
+
|
|
88
|
+
Returns server status and whether BigQuery is configured.
|
|
89
|
+
|
|
90
|
+
```json
|
|
91
|
+
{ "status": "ok", "bigquery": "configured" }
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Extending the server
|
|
95
|
+
|
|
96
|
+
The `create_app()` factory lets you mount your own middleware — API key validation, rate limiting, authentication, request logging, etc. — without forking the codebase.
|
|
97
|
+
|
|
98
|
+
### Adding middleware
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
# myapp.py
|
|
102
|
+
from spectra import create_app
|
|
103
|
+
from my_auth import APIKeyMiddleware
|
|
104
|
+
|
|
105
|
+
app = create_app(
|
|
106
|
+
middleware=[
|
|
107
|
+
(APIKeyMiddleware, {"header": "X-API-Key", "keys": ["sk-live-..."]}),
|
|
108
|
+
]
|
|
109
|
+
)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Then run it:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
uvicorn myapp:app --host 0.0.0.0 --port 8000
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Middleware entries are `(MiddlewareClass, kwargs_dict)` tuples. The first entry in the list is the outermost layer (runs first on incoming requests).
|
|
119
|
+
|
|
120
|
+
### API key validation example
|
|
121
|
+
|
|
122
|
+
Here is a minimal Starlette middleware that validates a bearer token:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
# auth.py
|
|
126
|
+
from starlette.middleware.base import BaseHTTPMiddleware
|
|
127
|
+
from starlette.requests import Request
|
|
128
|
+
from starlette.responses import JSONResponse
|
|
129
|
+
|
|
130
|
+
class APIKeyMiddleware(BaseHTTPMiddleware):
|
|
131
|
+
def __init__(self, app, *, keys: list[str], header: str = "Authorization"):
|
|
132
|
+
super().__init__(app)
|
|
133
|
+
self.keys = set(keys)
|
|
134
|
+
self.header = header
|
|
135
|
+
|
|
136
|
+
async def dispatch(self, request: Request, call_next):
|
|
137
|
+
if request.url.path == "/health":
|
|
138
|
+
return await call_next(request)
|
|
139
|
+
token = request.headers.get(self.header, "").removeprefix("Bearer ").strip()
|
|
140
|
+
if token not in self.keys:
|
|
141
|
+
return JSONResponse({"detail": "Unauthorized"}, status_code=401)
|
|
142
|
+
return await call_next(request)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
# myapp.py
|
|
147
|
+
from spectra import create_app
|
|
148
|
+
from auth import APIKeyMiddleware
|
|
149
|
+
|
|
150
|
+
app = create_app(
|
|
151
|
+
middleware=[
|
|
152
|
+
(APIKeyMiddleware, {"keys": ["sk-live-abc123"], "header": "Authorization"}),
|
|
153
|
+
]
|
|
154
|
+
)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Overriding FastAPI settings
|
|
158
|
+
|
|
159
|
+
Any keyword argument not recognised by `create_app` is forwarded to `FastAPI()`:
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
app = create_app(
|
|
163
|
+
cors_origins=["https://myapp.com"],
|
|
164
|
+
docs_url=None, # disable Swagger UI in production
|
|
165
|
+
redoc_url=None,
|
|
166
|
+
)
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Local development
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
git clone https://github.com/mvallejo3/spectra.git
|
|
173
|
+
cd spectra/server
|
|
174
|
+
|
|
175
|
+
python -m venv .venv && source .venv/bin/activate
|
|
176
|
+
pip install -e ".[dev]"
|
|
177
|
+
|
|
178
|
+
cp .env.example .env # fill in your values
|
|
179
|
+
|
|
180
|
+
make start # uvicorn app:app --reload
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## License
|
|
184
|
+
|
|
185
|
+
[MIT](../LICENSE)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Local development entry point.
|
|
2
|
+
|
|
3
|
+
This module re-exports the default ``app`` instance from the ``spectra``
|
|
4
|
+
package so that the Makefile target ``uvicorn app:app`` continues to work
|
|
5
|
+
without modification.
|
|
6
|
+
|
|
7
|
+
For production or custom deployments, import from the package directly::
|
|
8
|
+
|
|
9
|
+
from spectra import app # default instance
|
|
10
|
+
from spectra import create_app # factory for custom middleware
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from spectra import app # noqa: F401
|
|
14
|
+
from spectra.config import HOST, PORT
|
|
15
|
+
|
|
16
|
+
if __name__ == "__main__":
|
|
17
|
+
import uvicorn
|
|
18
|
+
uvicorn.run(app, host=HOST, port=PORT)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "spectra-server"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "Analytics event ingestion server for Spectra"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = { text = "MIT" }
|
|
7
|
+
requires-python = ">=3.11"
|
|
8
|
+
dependencies = [
|
|
9
|
+
"fastapi>=0.104.0",
|
|
10
|
+
"python-dotenv>=1.0.0",
|
|
11
|
+
"google-cloud-bigquery>=3.0.0",
|
|
12
|
+
"uvicorn[standard]>=0.24.0",
|
|
13
|
+
"pydantic>=2.0.0",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.scripts]
|
|
17
|
+
spectra-server = "spectra.cli:main"
|
|
18
|
+
|
|
19
|
+
[build-system]
|
|
20
|
+
requires = ["hatchling"]
|
|
21
|
+
build-backend = "hatchling.build"
|
|
22
|
+
|
|
23
|
+
[tool.hatch.build.targets.wheel]
|
|
24
|
+
packages = ["spectra"]
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
dev = [
|
|
28
|
+
"mypy>=1.0.0",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[tool.mypy]
|
|
32
|
+
python_version = "3.11"
|
|
33
|
+
strict = true
|
|
34
|
+
warn_return_any = true
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
-- BigQuery table schema for Spectra analytics events
|
|
2
|
+
-- Aligned with planning/events.md. Run in BigQuery (replace dataset as needed).
|
|
3
|
+
|
|
4
|
+
CREATE TABLE IF NOT EXISTS `analytics.events` (
|
|
5
|
+
-- Base (required)
|
|
6
|
+
event_id STRING NOT NULL,
|
|
7
|
+
event_timestamp TIMESTAMP NOT NULL,
|
|
8
|
+
event_name STRING NOT NULL,
|
|
9
|
+
session_id STRING NOT NULL,
|
|
10
|
+
-- Base (optional)
|
|
11
|
+
page_url STRING,
|
|
12
|
+
user_agent STRING,
|
|
13
|
+
spectra_version STRING,
|
|
14
|
+
account_id STRING,
|
|
15
|
+
-- 1. Page Context
|
|
16
|
+
page_title STRING,
|
|
17
|
+
page_path STRING,
|
|
18
|
+
page_hostname STRING,
|
|
19
|
+
referrer STRING,
|
|
20
|
+
referrer_domain STRING,
|
|
21
|
+
previous_page_url STRING,
|
|
22
|
+
page_type STRING,
|
|
23
|
+
canonical_url STRING,
|
|
24
|
+
language STRING,
|
|
25
|
+
-- 2. User Context
|
|
26
|
+
user_id STRING,
|
|
27
|
+
anonymous_id STRING,
|
|
28
|
+
user_type STRING,
|
|
29
|
+
-- 3. Traffic & Attribution
|
|
30
|
+
utm_source STRING,
|
|
31
|
+
utm_medium STRING,
|
|
32
|
+
utm_campaign STRING,
|
|
33
|
+
utm_term STRING,
|
|
34
|
+
utm_content STRING,
|
|
35
|
+
gclid STRING,
|
|
36
|
+
fbclid STRING,
|
|
37
|
+
ttclid STRING,
|
|
38
|
+
traffic_source STRING,
|
|
39
|
+
traffic_medium STRING,
|
|
40
|
+
campaign_id STRING,
|
|
41
|
+
ad_group_id STRING,
|
|
42
|
+
creative_id STRING,
|
|
43
|
+
landing_page STRING,
|
|
44
|
+
first_touch_source STRING,
|
|
45
|
+
first_touch_medium STRING,
|
|
46
|
+
first_touch_campaign STRING,
|
|
47
|
+
-- 4. Device & Technical
|
|
48
|
+
device_type STRING,
|
|
49
|
+
browser STRING,
|
|
50
|
+
browser_version STRING,
|
|
51
|
+
operating_system STRING,
|
|
52
|
+
os_version STRING,
|
|
53
|
+
screen_resolution STRING,
|
|
54
|
+
viewport_size STRING,
|
|
55
|
+
timezone STRING,
|
|
56
|
+
connection_type STRING,
|
|
57
|
+
-- 5. Timestamp & Timing
|
|
58
|
+
event_date STRING,
|
|
59
|
+
event_time STRING,
|
|
60
|
+
local_time STRING,
|
|
61
|
+
time_on_page FLOAT64,
|
|
62
|
+
-- Click
|
|
63
|
+
element_tag STRING,
|
|
64
|
+
element_id STRING,
|
|
65
|
+
element_classes STRING,
|
|
66
|
+
element_text STRING,
|
|
67
|
+
element_href STRING,
|
|
68
|
+
position_x INT64,
|
|
69
|
+
position_y INT64,
|
|
70
|
+
-- Scroll
|
|
71
|
+
scroll_depth_pct FLOAT64,
|
|
72
|
+
scroll_y INT64,
|
|
73
|
+
page_height INT64,
|
|
74
|
+
viewport_height INT64,
|
|
75
|
+
-- Form
|
|
76
|
+
form_id STRING,
|
|
77
|
+
form_action STRING,
|
|
78
|
+
form_method STRING,
|
|
79
|
+
field_count INT64
|
|
80
|
+
);
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Spectra analytics ingestion server.
|
|
2
|
+
|
|
3
|
+
Quickstart
|
|
4
|
+
----------
|
|
5
|
+
Install the package and run the built-in CLI::
|
|
6
|
+
|
|
7
|
+
pip install spectra-server
|
|
8
|
+
spectra-server
|
|
9
|
+
|
|
10
|
+
Custom middleware / extending the server
|
|
11
|
+
-----------------------------------------
|
|
12
|
+
Use :func:`create_app` to obtain a FastAPI instance with your own middleware
|
|
13
|
+
injected before running it::
|
|
14
|
+
|
|
15
|
+
# myapp.py
|
|
16
|
+
from spectra import create_app
|
|
17
|
+
from my_auth import APIKeyMiddleware
|
|
18
|
+
|
|
19
|
+
app = create_app(
|
|
20
|
+
middleware=[
|
|
21
|
+
(APIKeyMiddleware, {"header": "X-API-Key", "keys": ["sk-..."]}),
|
|
22
|
+
]
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
Then point uvicorn at your module::
|
|
26
|
+
|
|
27
|
+
uvicorn myapp:app --host 0.0.0.0 --port 8000
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from spectra.app import create_app
|
|
31
|
+
from fastapi import FastAPI
|
|
32
|
+
|
|
33
|
+
app: FastAPI = create_app()
|
|
34
|
+
|
|
35
|
+
__all__ = ["app", "create_app"]
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Factory for the Spectra FastAPI application.
|
|
2
|
+
|
|
3
|
+
Developers who want to extend the server can call :func:`create_app` and
|
|
4
|
+
inject their own middleware (e.g. API-key validation, rate limiting, auth):
|
|
5
|
+
|
|
6
|
+
.. code-block:: python
|
|
7
|
+
|
|
8
|
+
from spectra import create_app
|
|
9
|
+
from my_auth import APIKeyMiddleware
|
|
10
|
+
|
|
11
|
+
app = create_app(
|
|
12
|
+
middleware=[
|
|
13
|
+
(APIKeyMiddleware, {"header": "X-API-Key", "keys": ["sk-..."]}),
|
|
14
|
+
]
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
Then run with ``uvicorn mymodule:app``.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import asyncio
|
|
23
|
+
import json
|
|
24
|
+
import logging
|
|
25
|
+
from typing import Any
|
|
26
|
+
|
|
27
|
+
from fastapi import FastAPI, HTTPException, Request
|
|
28
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
29
|
+
from pydantic import ValidationError
|
|
30
|
+
|
|
31
|
+
from spectra.bigquery_client import insert_events
|
|
32
|
+
from spectra.config import CORS_ORIGINS, bigquery_configured
|
|
33
|
+
from spectra.events import Event
|
|
34
|
+
|
|
35
|
+
_MiddlewareEntry = tuple[type[Any], dict[str, Any]]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def create_app(
|
|
39
|
+
*,
|
|
40
|
+
cors_origins: list[str] | None = None,
|
|
41
|
+
middleware: list[_MiddlewareEntry] | None = None,
|
|
42
|
+
**fastapi_kwargs: Any,
|
|
43
|
+
) -> FastAPI:
|
|
44
|
+
"""Create and return a configured Spectra FastAPI application.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
cors_origins: List of allowed CORS origins. Defaults to the
|
|
48
|
+
``CORS_ORIGINS`` environment variable (``*`` if unset).
|
|
49
|
+
middleware: Additional Starlette/FastAPI middleware to mount, each
|
|
50
|
+
expressed as ``(MiddlewareClass, kwargs_dict)``. Middleware is
|
|
51
|
+
applied in the order given (i.e. the first entry is the outermost
|
|
52
|
+
layer). CORS middleware is always added before any custom entries.
|
|
53
|
+
**fastapi_kwargs: Extra keyword arguments forwarded to
|
|
54
|
+
:class:`fastapi.FastAPI` (e.g. ``title``, ``docs_url``).
|
|
55
|
+
"""
|
|
56
|
+
fastapi_kwargs.setdefault("title", "Spectra JS")
|
|
57
|
+
fastapi_kwargs.setdefault("description", "Event ingestion API for Spectra JS")
|
|
58
|
+
|
|
59
|
+
app = FastAPI(**fastapi_kwargs)
|
|
60
|
+
|
|
61
|
+
origins = cors_origins if cors_origins is not None else CORS_ORIGINS
|
|
62
|
+
app.add_middleware(
|
|
63
|
+
CORSMiddleware,
|
|
64
|
+
allow_origins=origins if origins != ["*"] else ["*"],
|
|
65
|
+
allow_credentials=False,
|
|
66
|
+
allow_methods=["GET", "POST", "OPTIONS"],
|
|
67
|
+
allow_headers=["*"],
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Starlette applies middleware in reverse registration order (last added =
|
|
71
|
+
# outermost). To preserve the intuitive "first entry = outermost" contract
|
|
72
|
+
# we reverse the list before adding.
|
|
73
|
+
for cls, kwargs in reversed(middleware or []):
|
|
74
|
+
app.add_middleware(cls, **kwargs)
|
|
75
|
+
|
|
76
|
+
@app.post("/track")
|
|
77
|
+
async def ingest_events(request: Request) -> dict[str, Any]:
|
|
78
|
+
"""Ingest events and store them in BigQuery.
|
|
79
|
+
|
|
80
|
+
Payload: ``{ account_id?: string, events: [...] }``
|
|
81
|
+
|
|
82
|
+
``account_id`` can be provided via the ``X-Account-ID`` header or in
|
|
83
|
+
the request body. Accepts ``application/json`` or ``text/plain``
|
|
84
|
+
(the latter avoids a CORS preflight for ``navigator.sendBeacon``
|
|
85
|
+
requests).
|
|
86
|
+
"""
|
|
87
|
+
header_account_id = (request.headers.get("X-Account-ID") or "").strip() or None
|
|
88
|
+
body_bytes = await request.body()
|
|
89
|
+
try:
|
|
90
|
+
payload = json.loads(body_bytes)
|
|
91
|
+
except json.JSONDecodeError as e:
|
|
92
|
+
raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e
|
|
93
|
+
if not isinstance(payload, dict):
|
|
94
|
+
raise HTTPException(status_code=400, detail="Expected object with 'events' array")
|
|
95
|
+
raw_events = payload.get("events")
|
|
96
|
+
if not isinstance(raw_events, list):
|
|
97
|
+
raise HTTPException(status_code=400, detail="Expected 'events' array")
|
|
98
|
+
|
|
99
|
+
account_id = header_account_id or (payload.get("account_id") or "").strip() or None
|
|
100
|
+
|
|
101
|
+
events: list[Event] = []
|
|
102
|
+
for i, item in enumerate(raw_events):
|
|
103
|
+
try:
|
|
104
|
+
events.append(Event.model_validate(item))
|
|
105
|
+
except ValidationError as e:
|
|
106
|
+
raise HTTPException(
|
|
107
|
+
status_code=400, detail={"index": i, "errors": e.errors()}
|
|
108
|
+
) from e
|
|
109
|
+
|
|
110
|
+
if not events:
|
|
111
|
+
return {"status": "ok", "count": 0}
|
|
112
|
+
|
|
113
|
+
if bigquery_configured() and not account_id:
|
|
114
|
+
raise HTTPException(status_code=400, detail="X-Account-ID header is required")
|
|
115
|
+
|
|
116
|
+
if not bigquery_configured():
|
|
117
|
+
return {
|
|
118
|
+
"status": "ok",
|
|
119
|
+
"message": "BigQuery not configured; events discarded",
|
|
120
|
+
"count": len(events),
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
def _log_insert_error(task: asyncio.Task[None]) -> None:
|
|
124
|
+
try:
|
|
125
|
+
task.result()
|
|
126
|
+
except Exception as exc:
|
|
127
|
+
logging.getLogger(__name__).exception(
|
|
128
|
+
"Background BigQuery insert failed: %s", exc
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
task = asyncio.create_task(asyncio.to_thread(insert_events, events, account_id))
|
|
132
|
+
task.add_done_callback(_log_insert_error)
|
|
133
|
+
|
|
134
|
+
return {"status": "ok", "count": len(events)}
|
|
135
|
+
|
|
136
|
+
@app.get("/health")
|
|
137
|
+
async def health() -> dict[str, str]:
|
|
138
|
+
"""Health check endpoint."""
|
|
139
|
+
return {
|
|
140
|
+
"status": "ok",
|
|
141
|
+
"bigquery": "configured" if bigquery_configured() else "not_configured",
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return app
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""BigQuery client for inserting analytics events."""
|
|
2
|
+
import base64
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
# Adding a Pyright ignore for the unresolved import.
|
|
8
|
+
# The venv lacks dependencies on local (pip install failed due to SSL),
|
|
9
|
+
# so the linter can't resolve the package.
|
|
10
|
+
import google.cloud.bigquery as bigquery # pyright: ignore[reportMissingImports]
|
|
11
|
+
from google.oauth2 import credentials # pyright: ignore[reportMissingImports]
|
|
12
|
+
from google.oauth2 import service_account # pyright: ignore[reportMissingImports]
|
|
13
|
+
|
|
14
|
+
from spectra.config import BIGQUERY_DATASET, BIGQUERY_PROJECT_ID
|
|
15
|
+
from spectra.events import Event, event_to_row
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _get_credentials():
|
|
19
|
+
"""Return credentials from GOOGLE_APPLICATION_CREDENTIALS or GOOGLE_APPLICATION_CREDENTIALS_JSON."""
|
|
20
|
+
if os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
|
|
21
|
+
return None # Let ADC use the file path
|
|
22
|
+
json_b64 = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS_JSON")
|
|
23
|
+
if json_b64:
|
|
24
|
+
info = json.loads(base64.b64decode(json_b64).decode("utf-8"))
|
|
25
|
+
if info.get("type") == "authorized_user":
|
|
26
|
+
return credentials.Credentials(
|
|
27
|
+
token=None,
|
|
28
|
+
refresh_token=info.get("refresh_token"),
|
|
29
|
+
token_uri="https://oauth2.googleapis.com/token",
|
|
30
|
+
client_id=info.get("client_id"),
|
|
31
|
+
client_secret=info.get("client_secret"),
|
|
32
|
+
)
|
|
33
|
+
return service_account.Credentials.from_service_account_info(info)
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
_client: bigquery.Client | None = None
|
|
38
|
+
_ACCOUNT_ID_RE = re.compile(r"^[A-Za-z0-9_]+$")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_client() -> bigquery.Client:
|
|
42
|
+
"""Return a cached BigQuery client (thread-safe for inserts)."""
|
|
43
|
+
global _client
|
|
44
|
+
if _client is None:
|
|
45
|
+
creds = _get_credentials()
|
|
46
|
+
kwargs: dict = {"project": BIGQUERY_PROJECT_ID or ""}
|
|
47
|
+
if creds is not None:
|
|
48
|
+
kwargs["credentials"] = creds
|
|
49
|
+
_client = bigquery.Client(**kwargs)
|
|
50
|
+
return _client
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def insert_events(events: list[Event], account_id: str | None = None) -> None:
|
|
54
|
+
"""Insert an array of events into BigQuery. Table is determined by account_id header (required)."""
|
|
55
|
+
if not events:
|
|
56
|
+
return
|
|
57
|
+
if not account_id:
|
|
58
|
+
raise ValueError("account_id header is required")
|
|
59
|
+
if not BIGQUERY_PROJECT_ID or not BIGQUERY_DATASET:
|
|
60
|
+
raise ValueError("BigQuery is not configured")
|
|
61
|
+
client = get_client()
|
|
62
|
+
|
|
63
|
+
table_id = f"{BIGQUERY_PROJECT_ID}.{BIGQUERY_DATASET}.{account_id}"
|
|
64
|
+
rows = [event_to_row(e.model_copy(update={"account_id": account_id})) for e in events]
|
|
65
|
+
errors = client.insert_rows_json(table_id, rows)
|
|
66
|
+
if errors:
|
|
67
|
+
raise RuntimeError(f"BigQuery insert failed: {errors}")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def fetch_events(account_id: str, limit: int = 1000) -> list[dict]:
|
|
71
|
+
"""
|
|
72
|
+
Fetch recent analytics events for a given account table.
|
|
73
|
+
Table is resolved as BIGQUERY_PROJECT_ID.BIGQUERY_DATASET.<account_id>.
|
|
74
|
+
"""
|
|
75
|
+
if not account_id:
|
|
76
|
+
raise ValueError("account_id is required")
|
|
77
|
+
if not _ACCOUNT_ID_RE.fullmatch(account_id):
|
|
78
|
+
raise ValueError("account_id may only contain letters, numbers, and underscores")
|
|
79
|
+
if limit <= 0:
|
|
80
|
+
raise ValueError("limit must be greater than 0")
|
|
81
|
+
if not BIGQUERY_PROJECT_ID or not BIGQUERY_DATASET:
|
|
82
|
+
raise ValueError("BigQuery is not configured")
|
|
83
|
+
|
|
84
|
+
client = get_client()
|
|
85
|
+
table_id = f"{BIGQUERY_PROJECT_ID}.{BIGQUERY_DATASET}.{account_id}"
|
|
86
|
+
query = (
|
|
87
|
+
f"SELECT * FROM `{table_id}` "
|
|
88
|
+
"ORDER BY event_timestamp DESC "
|
|
89
|
+
"LIMIT @limit"
|
|
90
|
+
)
|
|
91
|
+
job_config = bigquery.QueryJobConfig(
|
|
92
|
+
query_parameters=[
|
|
93
|
+
bigquery.ScalarQueryParameter("limit", "INT64", limit),
|
|
94
|
+
]
|
|
95
|
+
)
|
|
96
|
+
rows = client.query(query, job_config=job_config).result()
|
|
97
|
+
return [dict(row.items()) for row in rows]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def fetch_all_events(limit: int = 5000) -> list[dict]:
|
|
101
|
+
"""Fetch recent analytics events across all account tables in the dataset."""
|
|
102
|
+
if limit <= 0:
|
|
103
|
+
raise ValueError("limit must be greater than 0")
|
|
104
|
+
if not BIGQUERY_PROJECT_ID or not BIGQUERY_DATASET:
|
|
105
|
+
raise ValueError("BigQuery is not configured")
|
|
106
|
+
|
|
107
|
+
client = get_client()
|
|
108
|
+
wildcard_table = f"{BIGQUERY_PROJECT_ID}.{BIGQUERY_DATASET}.*"
|
|
109
|
+
query = (
|
|
110
|
+
f"SELECT * FROM `{wildcard_table}` "
|
|
111
|
+
"ORDER BY event_timestamp DESC "
|
|
112
|
+
"LIMIT @limit"
|
|
113
|
+
)
|
|
114
|
+
job_config = bigquery.QueryJobConfig(
|
|
115
|
+
query_parameters=[
|
|
116
|
+
bigquery.ScalarQueryParameter("limit", "INT64", limit),
|
|
117
|
+
]
|
|
118
|
+
)
|
|
119
|
+
rows = client.query(query, job_config=job_config).result()
|
|
120
|
+
return [dict(row.items()) for row in rows]
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Command-line entry point for the Spectra server.
|
|
2
|
+
|
|
3
|
+
Installed as the ``spectra-server`` script via ``[project.scripts]``.
|
|
4
|
+
|
|
5
|
+
Usage::
|
|
6
|
+
|
|
7
|
+
spectra-server
|
|
8
|
+
spectra-server --host 0.0.0.0 --port 8080
|
|
9
|
+
spectra-server --reload
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import sys
|
|
16
|
+
|
|
17
|
+
from spectra.config import HOST, PORT
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def main() -> None:
|
|
21
|
+
"""Parse arguments and start the uvicorn server."""
|
|
22
|
+
parser = argparse.ArgumentParser(
|
|
23
|
+
prog="spectra-server",
|
|
24
|
+
description="Run the Spectra event ingestion server.",
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--host",
|
|
28
|
+
default=HOST,
|
|
29
|
+
help=f"Bind host (default: {HOST}, override with HOST env var)",
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--port",
|
|
33
|
+
type=int,
|
|
34
|
+
default=PORT,
|
|
35
|
+
help=f"Bind port (default: {PORT}, override with PORT env var)",
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
"--reload",
|
|
39
|
+
action="store_true",
|
|
40
|
+
default=False,
|
|
41
|
+
help="Enable auto-reload (development only)",
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--workers",
|
|
45
|
+
type=int,
|
|
46
|
+
default=1,
|
|
47
|
+
help="Number of worker processes (default: 1, incompatible with --reload)",
|
|
48
|
+
)
|
|
49
|
+
args = parser.parse_args()
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
import uvicorn
|
|
53
|
+
except ImportError:
|
|
54
|
+
print("uvicorn is required. Install it with: pip install uvicorn[standard]", file=sys.stderr)
|
|
55
|
+
sys.exit(1)
|
|
56
|
+
|
|
57
|
+
uvicorn.run(
|
|
58
|
+
"spectra:app",
|
|
59
|
+
host=args.host,
|
|
60
|
+
port=args.port,
|
|
61
|
+
reload=args.reload,
|
|
62
|
+
workers=args.workers if not args.reload else 1,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
if __name__ == "__main__":
|
|
67
|
+
main()
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Server configuration from environment variables."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from dotenv import load_dotenv
|
|
6
|
+
|
|
7
|
+
load_dotenv()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _require(name: str) -> str:
|
|
11
|
+
value = os.environ.get(name)
|
|
12
|
+
if not value:
|
|
13
|
+
raise ValueError(f"Required environment variable {name} is not set")
|
|
14
|
+
return value
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _opt(name: str, default: str = "") -> str:
|
|
18
|
+
return os.environ.get(name, default)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# BigQuery
|
|
22
|
+
BIGQUERY_PROJECT_ID = _opt("BIGQUERY_PROJECT_ID")
|
|
23
|
+
BIGQUERY_DATASET = _opt("BIGQUERY_DATASET")
|
|
24
|
+
|
|
25
|
+
# Server
|
|
26
|
+
HOST = _opt("HOST", "0.0.0.0")
|
|
27
|
+
PORT = int(_opt("PORT", "8000"))
|
|
28
|
+
|
|
29
|
+
# CORS (comma-separated origins, or * for all)
|
|
30
|
+
CORS_ORIGINS = _opt("CORS_ORIGINS", "*").split(",")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def bigquery_configured() -> bool:
|
|
34
|
+
"""Return True if BigQuery is fully configured."""
|
|
35
|
+
return bool(BIGQUERY_PROJECT_ID and BIGQUERY_DATASET)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Event models and BigQuery ingestion."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Event(BaseModel):
|
|
7
|
+
"""Analytics event - aligned with planning/events.md schema."""
|
|
8
|
+
|
|
9
|
+
# Base (required)
|
|
10
|
+
event_id: str
|
|
11
|
+
event_timestamp: str
|
|
12
|
+
event_name: str
|
|
13
|
+
session_id: str
|
|
14
|
+
|
|
15
|
+
# Base (optional)
|
|
16
|
+
page_url: str | None = None
|
|
17
|
+
user_agent: str | None = None
|
|
18
|
+
spectra_version: str | None = None
|
|
19
|
+
account_id: str | None = None
|
|
20
|
+
|
|
21
|
+
# 1. Page Context
|
|
22
|
+
page_title: str | None = None
|
|
23
|
+
page_path: str | None = None
|
|
24
|
+
page_hostname: str | None = None
|
|
25
|
+
referrer: str | None = None
|
|
26
|
+
referrer_domain: str | None = None
|
|
27
|
+
previous_page_url: str | None = None
|
|
28
|
+
page_type: str | None = None
|
|
29
|
+
canonical_url: str | None = None
|
|
30
|
+
language: str | None = None
|
|
31
|
+
|
|
32
|
+
# 2. User Context
|
|
33
|
+
user_id: str | None = None
|
|
34
|
+
anonymous_id: str | None = None
|
|
35
|
+
user_type: str | None = None
|
|
36
|
+
|
|
37
|
+
# 3. Traffic & Attribution
|
|
38
|
+
utm_source: str | None = None
|
|
39
|
+
utm_medium: str | None = None
|
|
40
|
+
utm_campaign: str | None = None
|
|
41
|
+
utm_term: str | None = None
|
|
42
|
+
utm_content: str | None = None
|
|
43
|
+
gclid: str | None = None
|
|
44
|
+
fbclid: str | None = None
|
|
45
|
+
ttclid: str | None = None
|
|
46
|
+
traffic_source: str | None = None
|
|
47
|
+
traffic_medium: str | None = None
|
|
48
|
+
campaign_id: str | None = None
|
|
49
|
+
ad_group_id: str | None = None
|
|
50
|
+
creative_id: str | None = None
|
|
51
|
+
landing_page: str | None = None
|
|
52
|
+
first_touch_source: str | None = None
|
|
53
|
+
first_touch_medium: str | None = None
|
|
54
|
+
first_touch_campaign: str | None = None
|
|
55
|
+
|
|
56
|
+
# 4. Device & Technical
|
|
57
|
+
device_type: str | None = None
|
|
58
|
+
browser: str | None = None
|
|
59
|
+
browser_version: str | None = None
|
|
60
|
+
operating_system: str | None = None
|
|
61
|
+
os_version: str | None = None
|
|
62
|
+
screen_resolution: str | None = None
|
|
63
|
+
viewport_size: str | None = None
|
|
64
|
+
timezone: str | None = None
|
|
65
|
+
connection_type: str | None = None
|
|
66
|
+
|
|
67
|
+
# 5. Timestamp & Timing
|
|
68
|
+
event_date: str | None = None
|
|
69
|
+
event_time: str | None = None
|
|
70
|
+
local_time: str | None = None
|
|
71
|
+
time_on_page: float | None = None
|
|
72
|
+
|
|
73
|
+
# Click-specific
|
|
74
|
+
element_tag: str | None = None
|
|
75
|
+
element_id: str | None = None
|
|
76
|
+
element_classes: str | None = None
|
|
77
|
+
element_text: str | None = None
|
|
78
|
+
element_href: str | None = None
|
|
79
|
+
position_x: int | None = None
|
|
80
|
+
position_y: int | None = None
|
|
81
|
+
|
|
82
|
+
# Scroll-specific
|
|
83
|
+
scroll_depth_pct: float | None = None
|
|
84
|
+
scroll_y: int | None = None
|
|
85
|
+
page_height: int | None = None
|
|
86
|
+
viewport_height: int | None = None
|
|
87
|
+
|
|
88
|
+
# Form-specific
|
|
89
|
+
form_id: str | None = None
|
|
90
|
+
form_action: str | None = None
|
|
91
|
+
form_method: str | None = None
|
|
92
|
+
field_count: int | None = None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def event_to_row(event: Event) -> dict:
|
|
96
|
+
"""Convert event to BigQuery row. None stays as null."""
|
|
97
|
+
return event.model_dump()
|