spectra-server 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ # BigQuery (required for event storage; table name = event.account_id)
2
+ BIGQUERY_PROJECT_ID=your-gcp-project
3
+ BIGQUERY_DATASET=analytics
4
+
5
+ # Server
6
+ HOST=0.0.0.0
7
+ PORT=8000
8
+
9
+ # CORS - comma-separated origins, or * for all
10
+ CORS_ORIGINS=*
11
+
12
+ # Google Auth (for BigQuery) - one of:
13
+ # GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
14
+ # GOOGLE_APPLICATION_CREDENTIALS_JSON=base64-encoded-service-account-json (used when GOOGLE_APPLICATION_CREDENTIALS is not set)
@@ -0,0 +1,35 @@
1
+ # Python
2
+ .venv/
3
+ __pycache__/
4
+ *.py[cod]
5
+ *.egg-info/
6
+ *.egg
7
+ .sqlite/
8
+
9
+ # Node / TypeScript
10
+ .npmrc
11
+ node_modules/
12
+ dist/
13
+ *.tsbuildinfo
14
+
15
+ # Env & secrets
16
+ .env
17
+ .env.local
18
+ secrets/
19
+
20
+ # IDE
21
+ .idea/
22
+ .vscode/
23
+
24
+ # OS
25
+ .DS_Store
26
+
27
+ # Logs
28
+ *.log
29
+
30
+ # Testing
31
+ tests/
32
+
33
+ # AI
34
+ .cursor/
35
+ planning/
@@ -0,0 +1,4 @@
1
+ .PHONY: start
2
+
3
+ start:
4
+ uvicorn app:app --reload --host 0.0.0.0 --port 8000
@@ -0,0 +1,200 @@
1
+ Metadata-Version: 2.4
2
+ Name: spectra-server
3
+ Version: 1.0.0
4
+ Summary: Analytics event ingestion server for Spectra
5
+ License: MIT
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: fastapi>=0.104.0
8
+ Requires-Dist: google-cloud-bigquery>=3.0.0
9
+ Requires-Dist: pydantic>=2.0.0
10
+ Requires-Dist: python-dotenv>=1.0.0
11
+ Requires-Dist: uvicorn[standard]>=0.24.0
12
+ Provides-Extra: dev
13
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
14
+ Description-Content-Type: text/markdown
15
+
16
+ # spectra-server
17
+
18
+ The Spectra event ingestion server. Receives analytics events from the [Spectra tracker script](../script/README.md) and writes them to Google BigQuery.
19
+
20
+ Built with [FastAPI](https://fastapi.tiangolo.com/) and [Pydantic](https://docs.pydantic.dev/).
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install spectra-server
26
+ ```
27
+
28
+ Requires Python 3.11+.
29
+
30
+ ## Running the server
31
+
32
+ ### Option A — CLI (quickest)
33
+
34
+ Set the required environment variables (see [Configuration](#configuration)), then:
35
+
36
+ ```bash
37
+ spectra-server
38
+ ```
39
+
40
+ Available flags:
41
+
42
+ | Flag | Default | Description |
43
+ |------|---------|-------------|
44
+ | `--host` | `0.0.0.0` | Bind host (overrides `HOST` env var) |
45
+ | `--port` | `8000` | Bind port (overrides `PORT` env var) |
46
+ | `--reload` | off | Enable auto-reload (development only) |
47
+ | `--workers` | `1` | Number of worker processes |
48
+
49
+ ### Option B — uvicorn directly
50
+
51
+ ```bash
52
+ uvicorn spectra:app --host 0.0.0.0 --port 8000
53
+ ```
54
+
55
+ ## Configuration
56
+
57
+ Copy `.env.example` to `.env` and fill in the values. The server reads configuration from environment variables (loaded via `python-dotenv`):
58
+
59
+ | Variable | Required | Description |
60
+ |----------|----------|-------------|
61
+ | `BIGQUERY_PROJECT_ID` | Yes (for storage) | GCP project ID |
62
+ | `BIGQUERY_DATASET` | Yes (for storage) | BigQuery dataset name |
63
+ | `GOOGLE_APPLICATION_CREDENTIALS` | One of the two | Path to a service account JSON file |
64
+ | `GOOGLE_APPLICATION_CREDENTIALS_JSON` | One of the two | Base64-encoded service account JSON |
65
+ | `HOST` | No | Bind host (default: `0.0.0.0`) |
66
+ | `PORT` | No | Bind port (default: `8000`) |
67
+ | `CORS_ORIGINS` | No | Comma-separated allowed origins, or `*` (default: `*`) |
68
+
69
+ If neither BigQuery variable is set the server starts successfully and discards events — useful for local development without a GCP project.
70
+
71
+ ## API
72
+
73
+ ### `POST /track`
74
+
75
+ Ingest one or more events.
76
+
77
+ **Headers**
78
+
79
+ | Header | Description |
80
+ |--------|-------------|
81
+ | `X-Account-ID` | Tenant / BigQuery table name. Can also be passed in the request body. |
82
+ | `Content-Type` | `application/json` or `text/plain` (plain text avoids CORS preflight for `navigator.sendBeacon`) |
83
+
84
+ **Body**
85
+
86
+ ```json
87
+ {
88
+ "account_id": "optional_if_provided_in_header",
89
+ "events": [
90
+ { "name": "page_view", "timestamp": "2024-01-01T00:00:00Z", "properties": {} }
91
+ ]
92
+ }
93
+ ```
94
+
95
+ **Response**
96
+
97
+ ```json
98
+ { "status": "ok", "count": 2 }
99
+ ```
100
+
101
+ ### `GET /health`
102
+
103
+ Returns server status and whether BigQuery is configured.
104
+
105
+ ```json
106
+ { "status": "ok", "bigquery": "configured" }
107
+ ```
108
+
109
+ ## Extending the server
110
+
111
+ The `create_app()` factory lets you mount your own middleware — API key validation, rate limiting, authentication, request logging, etc. — without forking the codebase.
112
+
113
+ ### Adding middleware
114
+
115
+ ```python
116
+ # myapp.py
117
+ from spectra import create_app
118
+ from my_auth import APIKeyMiddleware
119
+
120
+ app = create_app(
121
+ middleware=[
122
+ (APIKeyMiddleware, {"header": "X-API-Key", "keys": ["sk-live-..."]}),
123
+ ]
124
+ )
125
+ ```
126
+
127
+ Then run it:
128
+
129
+ ```bash
130
+ uvicorn myapp:app --host 0.0.0.0 --port 8000
131
+ ```
132
+
133
+ Middleware entries are `(MiddlewareClass, kwargs_dict)` tuples. The first entry in the list is the outermost layer (runs first on incoming requests).
134
+
135
+ ### API key validation example
136
+
137
+ Here is a minimal Starlette middleware that validates a bearer token:
138
+
139
+ ```python
140
+ # auth.py
141
+ from starlette.middleware.base import BaseHTTPMiddleware
142
+ from starlette.requests import Request
143
+ from starlette.responses import JSONResponse
144
+
145
+ class APIKeyMiddleware(BaseHTTPMiddleware):
146
+ def __init__(self, app, *, keys: list[str], header: str = "Authorization"):
147
+ super().__init__(app)
148
+ self.keys = set(keys)
149
+ self.header = header
150
+
151
+ async def dispatch(self, request: Request, call_next):
152
+ if request.url.path == "/health":
153
+ return await call_next(request)
154
+ token = request.headers.get(self.header, "").removeprefix("Bearer ").strip()
155
+ if token not in self.keys:
156
+ return JSONResponse({"detail": "Unauthorized"}, status_code=401)
157
+ return await call_next(request)
158
+ ```
159
+
160
+ ```python
161
+ # myapp.py
162
+ from spectra import create_app
163
+ from auth import APIKeyMiddleware
164
+
165
+ app = create_app(
166
+ middleware=[
167
+ (APIKeyMiddleware, {"keys": ["sk-live-abc123"], "header": "Authorization"}),
168
+ ]
169
+ )
170
+ ```
171
+
172
+ ### Overriding FastAPI settings
173
+
174
+ Any keyword argument not recognised by `create_app` is forwarded to `FastAPI()`:
175
+
176
+ ```python
177
+ app = create_app(
178
+ cors_origins=["https://myapp.com"],
179
+ docs_url=None, # disable Swagger UI in production
180
+ redoc_url=None,
181
+ )
182
+ ```
183
+
184
+ ## Local development
185
+
186
+ ```bash
187
+ git clone https://github.com/mvallejo3/spectra.git
188
+ cd spectra/server
189
+
190
+ python -m venv .venv && source .venv/bin/activate
191
+ pip install -e ".[dev]"
192
+
193
+ cp .env.example .env # fill in your values
194
+
195
+ make start # uvicorn app:app --reload
196
+ ```
197
+
198
+ ## License
199
+
200
+ [MIT](../LICENSE)
@@ -0,0 +1,185 @@
1
+ # spectra-server
2
+
3
+ The Spectra event ingestion server. Receives analytics events from the [Spectra tracker script](../script/README.md) and writes them to Google BigQuery.
4
+
5
+ Built with [FastAPI](https://fastapi.tiangolo.com/) and [Pydantic](https://docs.pydantic.dev/).
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install spectra-server
11
+ ```
12
+
13
+ Requires Python 3.11+.
14
+
15
+ ## Running the server
16
+
17
+ ### Option A — CLI (quickest)
18
+
19
+ Set the required environment variables (see [Configuration](#configuration)), then:
20
+
21
+ ```bash
22
+ spectra-server
23
+ ```
24
+
25
+ Available flags:
26
+
27
+ | Flag | Default | Description |
28
+ |------|---------|-------------|
29
+ | `--host` | `0.0.0.0` | Bind host (overrides `HOST` env var) |
30
+ | `--port` | `8000` | Bind port (overrides `PORT` env var) |
31
+ | `--reload` | off | Enable auto-reload (development only) |
32
+ | `--workers` | `1` | Number of worker processes |
33
+
34
+ ### Option B — uvicorn directly
35
+
36
+ ```bash
37
+ uvicorn spectra:app --host 0.0.0.0 --port 8000
38
+ ```
39
+
40
+ ## Configuration
41
+
42
+ Copy `.env.example` to `.env` and fill in the values. The server reads configuration from environment variables (loaded via `python-dotenv`):
43
+
44
+ | Variable | Required | Description |
45
+ |----------|----------|-------------|
46
+ | `BIGQUERY_PROJECT_ID` | Yes (for storage) | GCP project ID |
47
+ | `BIGQUERY_DATASET` | Yes (for storage) | BigQuery dataset name |
48
+ | `GOOGLE_APPLICATION_CREDENTIALS` | One of the two | Path to a service account JSON file |
49
+ | `GOOGLE_APPLICATION_CREDENTIALS_JSON` | One of the two | Base64-encoded service account JSON |
50
+ | `HOST` | No | Bind host (default: `0.0.0.0`) |
51
+ | `PORT` | No | Bind port (default: `8000`) |
52
+ | `CORS_ORIGINS` | No | Comma-separated allowed origins, or `*` (default: `*`) |
53
+
54
+ If neither BigQuery variable is set the server starts successfully and discards events — useful for local development without a GCP project.
55
+
56
+ ## API
57
+
58
+ ### `POST /track`
59
+
60
+ Ingest one or more events.
61
+
62
+ **Headers**
63
+
64
+ | Header | Description |
65
+ |--------|-------------|
66
+ | `X-Account-ID` | Tenant / BigQuery table name. Can also be passed in the request body. |
67
+ | `Content-Type` | `application/json` or `text/plain` (plain text avoids CORS preflight for `navigator.sendBeacon`) |
68
+
69
+ **Body**
70
+
71
+ ```json
72
+ {
73
+ "account_id": "optional_if_provided_in_header",
74
+ "events": [
75
+ { "name": "page_view", "timestamp": "2024-01-01T00:00:00Z", "properties": {} }
76
+ ]
77
+ }
78
+ ```
79
+
80
+ **Response**
81
+
82
+ ```json
83
+ { "status": "ok", "count": 2 }
84
+ ```
85
+
86
+ ### `GET /health`
87
+
88
+ Returns server status and whether BigQuery is configured.
89
+
90
+ ```json
91
+ { "status": "ok", "bigquery": "configured" }
92
+ ```
93
+
94
+ ## Extending the server
95
+
96
+ The `create_app()` factory lets you mount your own middleware — API key validation, rate limiting, authentication, request logging, etc. — without forking the codebase.
97
+
98
+ ### Adding middleware
99
+
100
+ ```python
101
+ # myapp.py
102
+ from spectra import create_app
103
+ from my_auth import APIKeyMiddleware
104
+
105
+ app = create_app(
106
+ middleware=[
107
+ (APIKeyMiddleware, {"header": "X-API-Key", "keys": ["sk-live-..."]}),
108
+ ]
109
+ )
110
+ ```
111
+
112
+ Then run it:
113
+
114
+ ```bash
115
+ uvicorn myapp:app --host 0.0.0.0 --port 8000
116
+ ```
117
+
118
+ Middleware entries are `(MiddlewareClass, kwargs_dict)` tuples. The first entry in the list is the outermost layer (runs first on incoming requests).
119
+
120
+ ### API key validation example
121
+
122
+ Here is a minimal Starlette middleware that validates a bearer token:
123
+
124
+ ```python
125
+ # auth.py
126
+ from starlette.middleware.base import BaseHTTPMiddleware
127
+ from starlette.requests import Request
128
+ from starlette.responses import JSONResponse
129
+
130
+ class APIKeyMiddleware(BaseHTTPMiddleware):
131
+ def __init__(self, app, *, keys: list[str], header: str = "Authorization"):
132
+ super().__init__(app)
133
+ self.keys = set(keys)
134
+ self.header = header
135
+
136
+ async def dispatch(self, request: Request, call_next):
137
+ if request.url.path == "/health":
138
+ return await call_next(request)
139
+ token = request.headers.get(self.header, "").removeprefix("Bearer ").strip()
140
+ if token not in self.keys:
141
+ return JSONResponse({"detail": "Unauthorized"}, status_code=401)
142
+ return await call_next(request)
143
+ ```
144
+
145
+ ```python
146
+ # myapp.py
147
+ from spectra import create_app
148
+ from auth import APIKeyMiddleware
149
+
150
+ app = create_app(
151
+ middleware=[
152
+ (APIKeyMiddleware, {"keys": ["sk-live-abc123"], "header": "Authorization"}),
153
+ ]
154
+ )
155
+ ```
156
+
157
+ ### Overriding FastAPI settings
158
+
159
+ Any keyword argument not recognised by `create_app` is forwarded to `FastAPI()`:
160
+
161
+ ```python
162
+ app = create_app(
163
+ cors_origins=["https://myapp.com"],
164
+ docs_url=None, # disable Swagger UI in production
165
+ redoc_url=None,
166
+ )
167
+ ```
168
+
169
+ ## Local development
170
+
171
+ ```bash
172
+ git clone https://github.com/mvallejo3/spectra.git
173
+ cd spectra/server
174
+
175
+ python -m venv .venv && source .venv/bin/activate
176
+ pip install -e ".[dev]"
177
+
178
+ cp .env.example .env # fill in your values
179
+
180
+ make start # uvicorn app:app --reload
181
+ ```
182
+
183
+ ## License
184
+
185
+ [MIT](../LICENSE)
@@ -0,0 +1,18 @@
1
+ """Local development entry point.
2
+
3
+ This module re-exports the default ``app`` instance from the ``spectra``
4
+ package so that the Makefile target ``uvicorn app:app`` continues to work
5
+ without modification.
6
+
7
+ For production or custom deployments, import from the package directly::
8
+
9
+ from spectra import app # default instance
10
+ from spectra import create_app # factory for custom middleware
11
+ """
12
+
13
+ from spectra import app # noqa: F401
14
+ from spectra.config import HOST, PORT
15
+
16
+ if __name__ == "__main__":
17
+ import uvicorn
18
+ uvicorn.run(app, host=HOST, port=PORT)
@@ -0,0 +1,34 @@
1
+ [project]
2
+ name = "spectra-server"
3
+ version = "1.0.0"
4
+ description = "Analytics event ingestion server for Spectra"
5
+ readme = "README.md"
6
+ license = { text = "MIT" }
7
+ requires-python = ">=3.11"
8
+ dependencies = [
9
+ "fastapi>=0.104.0",
10
+ "python-dotenv>=1.0.0",
11
+ "google-cloud-bigquery>=3.0.0",
12
+ "uvicorn[standard]>=0.24.0",
13
+ "pydantic>=2.0.0",
14
+ ]
15
+
16
+ [project.scripts]
17
+ spectra-server = "spectra.cli:main"
18
+
19
+ [build-system]
20
+ requires = ["hatchling"]
21
+ build-backend = "hatchling.build"
22
+
23
+ [tool.hatch.build.targets.wheel]
24
+ packages = ["spectra"]
25
+
26
+ [project.optional-dependencies]
27
+ dev = [
28
+ "mypy>=1.0.0",
29
+ ]
30
+
31
+ [tool.mypy]
32
+ python_version = "3.11"
33
+ strict = true
34
+ warn_return_any = true
@@ -0,0 +1,9 @@
1
+ {
2
+ "venvPath": ".",
3
+ "venv": ".venv",
4
+ "include": ["spectra"],
5
+ "pythonVersion": "3.11",
6
+ "typeCheckingMode": "basic",
7
+ "reportMissingImports": true,
8
+ "reportMissingTypeStubs": false
9
+ }
@@ -0,0 +1,5 @@
1
+ fastapi>=0.104.0
2
+ google-cloud-bigquery>=3.0.0
3
+ uvicorn[standard]>=0.24.0
4
+ pydantic>=2.0.0
5
+ python-dotenv>=1.0.0
@@ -0,0 +1,80 @@
1
+ -- BigQuery table schema for Spectra analytics events
2
+ -- Aligned with planning/events.md. Run in BigQuery (replace dataset as needed).
3
+
4
+ CREATE TABLE IF NOT EXISTS `analytics.events` (
5
+ -- Base (required)
6
+ event_id STRING NOT NULL,
7
+ event_timestamp TIMESTAMP NOT NULL,
8
+ event_name STRING NOT NULL,
9
+ session_id STRING NOT NULL,
10
+ -- Base (optional)
11
+ page_url STRING,
12
+ user_agent STRING,
13
+ spectra_version STRING,
14
+ account_id STRING,
15
+ -- 1. Page Context
16
+ page_title STRING,
17
+ page_path STRING,
18
+ page_hostname STRING,
19
+ referrer STRING,
20
+ referrer_domain STRING,
21
+ previous_page_url STRING,
22
+ page_type STRING,
23
+ canonical_url STRING,
24
+ language STRING,
25
+ -- 2. User Context
26
+ user_id STRING,
27
+ anonymous_id STRING,
28
+ user_type STRING,
29
+ -- 3. Traffic & Attribution
30
+ utm_source STRING,
31
+ utm_medium STRING,
32
+ utm_campaign STRING,
33
+ utm_term STRING,
34
+ utm_content STRING,
35
+ gclid STRING,
36
+ fbclid STRING,
37
+ ttclid STRING,
38
+ traffic_source STRING,
39
+ traffic_medium STRING,
40
+ campaign_id STRING,
41
+ ad_group_id STRING,
42
+ creative_id STRING,
43
+ landing_page STRING,
44
+ first_touch_source STRING,
45
+ first_touch_medium STRING,
46
+ first_touch_campaign STRING,
47
+ -- 4. Device & Technical
48
+ device_type STRING,
49
+ browser STRING,
50
+ browser_version STRING,
51
+ operating_system STRING,
52
+ os_version STRING,
53
+ screen_resolution STRING,
54
+ viewport_size STRING,
55
+ timezone STRING,
56
+ connection_type STRING,
57
+ -- 5. Timestamp & Timing
58
+ event_date STRING,
59
+ event_time STRING,
60
+ local_time STRING,
61
+ time_on_page FLOAT64,
62
+ -- Click
63
+ element_tag STRING,
64
+ element_id STRING,
65
+ element_classes STRING,
66
+ element_text STRING,
67
+ element_href STRING,
68
+ position_x INT64,
69
+ position_y INT64,
70
+ -- Scroll
71
+ scroll_depth_pct FLOAT64,
72
+ scroll_y INT64,
73
+ page_height INT64,
74
+ viewport_height INT64,
75
+ -- Form
76
+ form_id STRING,
77
+ form_action STRING,
78
+ form_method STRING,
79
+ field_count INT64
80
+ );
@@ -0,0 +1,35 @@
1
+ """Spectra analytics ingestion server.
2
+
3
+ Quickstart
4
+ ----------
5
+ Install the package and run the built-in CLI::
6
+
7
+ pip install spectra-server
8
+ spectra-server
9
+
10
+ Custom middleware / extending the server
11
+ -----------------------------------------
12
+ Use :func:`create_app` to obtain a FastAPI instance with your own middleware
13
+ injected before running it::
14
+
15
+ # myapp.py
16
+ from spectra import create_app
17
+ from my_auth import APIKeyMiddleware
18
+
19
+ app = create_app(
20
+ middleware=[
21
+ (APIKeyMiddleware, {"header": "X-API-Key", "keys": ["sk-..."]}),
22
+ ]
23
+ )
24
+
25
+ Then point uvicorn at your module::
26
+
27
+ uvicorn myapp:app --host 0.0.0.0 --port 8000
28
+ """
29
+
30
+ from spectra.app import create_app
31
+ from fastapi import FastAPI
32
+
33
+ app: FastAPI = create_app()
34
+
35
+ __all__ = ["app", "create_app"]
@@ -0,0 +1,144 @@
1
+ """Factory for the Spectra FastAPI application.
2
+
3
+ Developers who want to extend the server can call :func:`create_app` and
4
+ inject their own middleware (e.g. API-key validation, rate limiting, auth):
5
+
6
+ .. code-block:: python
7
+
8
+ from spectra import create_app
9
+ from my_auth import APIKeyMiddleware
10
+
11
+ app = create_app(
12
+ middleware=[
13
+ (APIKeyMiddleware, {"header": "X-API-Key", "keys": ["sk-..."]}),
14
+ ]
15
+ )
16
+
17
+ Then run with ``uvicorn mymodule:app``.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import asyncio
23
+ import json
24
+ import logging
25
+ from typing import Any
26
+
27
+ from fastapi import FastAPI, HTTPException, Request
28
+ from fastapi.middleware.cors import CORSMiddleware
29
+ from pydantic import ValidationError
30
+
31
+ from spectra.bigquery_client import insert_events
32
+ from spectra.config import CORS_ORIGINS, bigquery_configured
33
+ from spectra.events import Event
34
+
35
+ _MiddlewareEntry = tuple[type[Any], dict[str, Any]]
36
+
37
+
38
+ def create_app(
39
+ *,
40
+ cors_origins: list[str] | None = None,
41
+ middleware: list[_MiddlewareEntry] | None = None,
42
+ **fastapi_kwargs: Any,
43
+ ) -> FastAPI:
44
+ """Create and return a configured Spectra FastAPI application.
45
+
46
+ Args:
47
+ cors_origins: List of allowed CORS origins. Defaults to the
48
+ ``CORS_ORIGINS`` environment variable (``*`` if unset).
49
+ middleware: Additional Starlette/FastAPI middleware to mount, each
50
+ expressed as ``(MiddlewareClass, kwargs_dict)``. Middleware is
51
+ applied in the order given (i.e. the first entry is the outermost
52
+ layer). CORS middleware is always added before any custom entries.
53
+ **fastapi_kwargs: Extra keyword arguments forwarded to
54
+ :class:`fastapi.FastAPI` (e.g. ``title``, ``docs_url``).
55
+ """
56
+ fastapi_kwargs.setdefault("title", "Spectra JS")
57
+ fastapi_kwargs.setdefault("description", "Event ingestion API for Spectra JS")
58
+
59
+ app = FastAPI(**fastapi_kwargs)
60
+
61
+ origins = cors_origins if cors_origins is not None else CORS_ORIGINS
62
+ app.add_middleware(
63
+ CORSMiddleware,
64
+ allow_origins=origins if origins != ["*"] else ["*"],
65
+ allow_credentials=False,
66
+ allow_methods=["GET", "POST", "OPTIONS"],
67
+ allow_headers=["*"],
68
+ )
69
+
70
+ # Starlette applies middleware in reverse registration order (last added =
71
+ # outermost). To preserve the intuitive "first entry = outermost" contract
72
+ # we reverse the list before adding.
73
+ for cls, kwargs in reversed(middleware or []):
74
+ app.add_middleware(cls, **kwargs)
75
+
76
+ @app.post("/track")
77
+ async def ingest_events(request: Request) -> dict[str, Any]:
78
+ """Ingest events and store them in BigQuery.
79
+
80
+ Payload: ``{ account_id?: string, events: [...] }``
81
+
82
+ ``account_id`` can be provided via the ``X-Account-ID`` header or in
83
+ the request body. Accepts ``application/json`` or ``text/plain``
84
+ (the latter avoids a CORS preflight for ``navigator.sendBeacon``
85
+ requests).
86
+ """
87
+ header_account_id = (request.headers.get("X-Account-ID") or "").strip() or None
88
+ body_bytes = await request.body()
89
+ try:
90
+ payload = json.loads(body_bytes)
91
+ except json.JSONDecodeError as e:
92
+ raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e
93
+ if not isinstance(payload, dict):
94
+ raise HTTPException(status_code=400, detail="Expected object with 'events' array")
95
+ raw_events = payload.get("events")
96
+ if not isinstance(raw_events, list):
97
+ raise HTTPException(status_code=400, detail="Expected 'events' array")
98
+
99
+ account_id = header_account_id or (payload.get("account_id") or "").strip() or None
100
+
101
+ events: list[Event] = []
102
+ for i, item in enumerate(raw_events):
103
+ try:
104
+ events.append(Event.model_validate(item))
105
+ except ValidationError as e:
106
+ raise HTTPException(
107
+ status_code=400, detail={"index": i, "errors": e.errors()}
108
+ ) from e
109
+
110
+ if not events:
111
+ return {"status": "ok", "count": 0}
112
+
113
+ if bigquery_configured() and not account_id:
114
+ raise HTTPException(status_code=400, detail="X-Account-ID header is required")
115
+
116
+ if not bigquery_configured():
117
+ return {
118
+ "status": "ok",
119
+ "message": "BigQuery not configured; events discarded",
120
+ "count": len(events),
121
+ }
122
+
123
+ def _log_insert_error(task: asyncio.Task[None]) -> None:
124
+ try:
125
+ task.result()
126
+ except Exception as exc:
127
+ logging.getLogger(__name__).exception(
128
+ "Background BigQuery insert failed: %s", exc
129
+ )
130
+
131
+ task = asyncio.create_task(asyncio.to_thread(insert_events, events, account_id))
132
+ task.add_done_callback(_log_insert_error)
133
+
134
+ return {"status": "ok", "count": len(events)}
135
+
136
+ @app.get("/health")
137
+ async def health() -> dict[str, str]:
138
+ """Health check endpoint."""
139
+ return {
140
+ "status": "ok",
141
+ "bigquery": "configured" if bigquery_configured() else "not_configured",
142
+ }
143
+
144
+ return app
@@ -0,0 +1,120 @@
1
+ """BigQuery client for inserting analytics events."""
2
+ import base64
3
+ import json
4
+ import os
5
+ import re
6
+
7
+ # Adding a Pyright ignore for the unresolved import.
8
+ # The venv lacks dependencies on local (pip install failed due to SSL),
9
+ # so the linter can't resolve the package.
10
+ import google.cloud.bigquery as bigquery # pyright: ignore[reportMissingImports]
11
+ from google.oauth2 import credentials # pyright: ignore[reportMissingImports]
12
+ from google.oauth2 import service_account # pyright: ignore[reportMissingImports]
13
+
14
+ from spectra.config import BIGQUERY_DATASET, BIGQUERY_PROJECT_ID
15
+ from spectra.events import Event, event_to_row
16
+
17
+
18
+ def _get_credentials():
19
+ """Return credentials from GOOGLE_APPLICATION_CREDENTIALS or GOOGLE_APPLICATION_CREDENTIALS_JSON."""
20
+ if os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
21
+ return None # Let ADC use the file path
22
+ json_b64 = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS_JSON")
23
+ if json_b64:
24
+ info = json.loads(base64.b64decode(json_b64).decode("utf-8"))
25
+ if info.get("type") == "authorized_user":
26
+ return credentials.Credentials(
27
+ token=None,
28
+ refresh_token=info.get("refresh_token"),
29
+ token_uri="https://oauth2.googleapis.com/token",
30
+ client_id=info.get("client_id"),
31
+ client_secret=info.get("client_secret"),
32
+ )
33
+ return service_account.Credentials.from_service_account_info(info)
34
+ return None
35
+
36
+
37
+ _client: bigquery.Client | None = None
38
+ _ACCOUNT_ID_RE = re.compile(r"^[A-Za-z0-9_]+$")
39
+
40
+
41
+ def get_client() -> bigquery.Client:
42
+ """Return a cached BigQuery client (thread-safe for inserts)."""
43
+ global _client
44
+ if _client is None:
45
+ creds = _get_credentials()
46
+ kwargs: dict = {"project": BIGQUERY_PROJECT_ID or ""}
47
+ if creds is not None:
48
+ kwargs["credentials"] = creds
49
+ _client = bigquery.Client(**kwargs)
50
+ return _client
51
+
52
+
53
+ def insert_events(events: list[Event], account_id: str | None = None) -> None:
54
+ """Insert an array of events into BigQuery. Table is determined by account_id header (required)."""
55
+ if not events:
56
+ return
57
+ if not account_id:
58
+ raise ValueError("account_id header is required")
59
+ if not BIGQUERY_PROJECT_ID or not BIGQUERY_DATASET:
60
+ raise ValueError("BigQuery is not configured")
61
+ client = get_client()
62
+
63
+ table_id = f"{BIGQUERY_PROJECT_ID}.{BIGQUERY_DATASET}.{account_id}"
64
+ rows = [event_to_row(e.model_copy(update={"account_id": account_id})) for e in events]
65
+ errors = client.insert_rows_json(table_id, rows)
66
+ if errors:
67
+ raise RuntimeError(f"BigQuery insert failed: {errors}")
68
+
69
+
70
+ def fetch_events(account_id: str, limit: int = 1000) -> list[dict]:
71
+ """
72
+ Fetch recent analytics events for a given account table.
73
+ Table is resolved as BIGQUERY_PROJECT_ID.BIGQUERY_DATASET.<account_id>.
74
+ """
75
+ if not account_id:
76
+ raise ValueError("account_id is required")
77
+ if not _ACCOUNT_ID_RE.fullmatch(account_id):
78
+ raise ValueError("account_id may only contain letters, numbers, and underscores")
79
+ if limit <= 0:
80
+ raise ValueError("limit must be greater than 0")
81
+ if not BIGQUERY_PROJECT_ID or not BIGQUERY_DATASET:
82
+ raise ValueError("BigQuery is not configured")
83
+
84
+ client = get_client()
85
+ table_id = f"{BIGQUERY_PROJECT_ID}.{BIGQUERY_DATASET}.{account_id}"
86
+ query = (
87
+ f"SELECT * FROM `{table_id}` "
88
+ "ORDER BY event_timestamp DESC "
89
+ "LIMIT @limit"
90
+ )
91
+ job_config = bigquery.QueryJobConfig(
92
+ query_parameters=[
93
+ bigquery.ScalarQueryParameter("limit", "INT64", limit),
94
+ ]
95
+ )
96
+ rows = client.query(query, job_config=job_config).result()
97
+ return [dict(row.items()) for row in rows]
98
+
99
+
100
+ def fetch_all_events(limit: int = 5000) -> list[dict]:
101
+ """Fetch recent analytics events across all account tables in the dataset."""
102
+ if limit <= 0:
103
+ raise ValueError("limit must be greater than 0")
104
+ if not BIGQUERY_PROJECT_ID or not BIGQUERY_DATASET:
105
+ raise ValueError("BigQuery is not configured")
106
+
107
+ client = get_client()
108
+ wildcard_table = f"{BIGQUERY_PROJECT_ID}.{BIGQUERY_DATASET}.*"
109
+ query = (
110
+ f"SELECT * FROM `{wildcard_table}` "
111
+ "ORDER BY event_timestamp DESC "
112
+ "LIMIT @limit"
113
+ )
114
+ job_config = bigquery.QueryJobConfig(
115
+ query_parameters=[
116
+ bigquery.ScalarQueryParameter("limit", "INT64", limit),
117
+ ]
118
+ )
119
+ rows = client.query(query, job_config=job_config).result()
120
+ return [dict(row.items()) for row in rows]
@@ -0,0 +1,67 @@
1
+ """Command-line entry point for the Spectra server.
2
+
3
+ Installed as the ``spectra-server`` script via ``[project.scripts]``.
4
+
5
+ Usage::
6
+
7
+ spectra-server
8
+ spectra-server --host 0.0.0.0 --port 8080
9
+ spectra-server --reload
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import argparse
15
+ import sys
16
+
17
+ from spectra.config import HOST, PORT
18
+
19
+
20
+ def main() -> None:
21
+ """Parse arguments and start the uvicorn server."""
22
+ parser = argparse.ArgumentParser(
23
+ prog="spectra-server",
24
+ description="Run the Spectra event ingestion server.",
25
+ )
26
+ parser.add_argument(
27
+ "--host",
28
+ default=HOST,
29
+ help=f"Bind host (default: {HOST}, override with HOST env var)",
30
+ )
31
+ parser.add_argument(
32
+ "--port",
33
+ type=int,
34
+ default=PORT,
35
+ help=f"Bind port (default: {PORT}, override with PORT env var)",
36
+ )
37
+ parser.add_argument(
38
+ "--reload",
39
+ action="store_true",
40
+ default=False,
41
+ help="Enable auto-reload (development only)",
42
+ )
43
+ parser.add_argument(
44
+ "--workers",
45
+ type=int,
46
+ default=1,
47
+ help="Number of worker processes (default: 1, incompatible with --reload)",
48
+ )
49
+ args = parser.parse_args()
50
+
51
+ try:
52
+ import uvicorn
53
+ except ImportError:
54
+ print("uvicorn is required. Install it with: pip install uvicorn[standard]", file=sys.stderr)
55
+ sys.exit(1)
56
+
57
+ uvicorn.run(
58
+ "spectra:app",
59
+ host=args.host,
60
+ port=args.port,
61
+ reload=args.reload,
62
+ workers=args.workers if not args.reload else 1,
63
+ )
64
+
65
+
66
+ if __name__ == "__main__":
67
+ main()
@@ -0,0 +1,35 @@
1
+ """Server configuration from environment variables."""
2
+
3
+ import os
4
+
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+
10
+ def _require(name: str) -> str:
11
+ value = os.environ.get(name)
12
+ if not value:
13
+ raise ValueError(f"Required environment variable {name} is not set")
14
+ return value
15
+
16
+
17
+ def _opt(name: str, default: str = "") -> str:
18
+ return os.environ.get(name, default)
19
+
20
+
21
+ # BigQuery
22
+ BIGQUERY_PROJECT_ID = _opt("BIGQUERY_PROJECT_ID")
23
+ BIGQUERY_DATASET = _opt("BIGQUERY_DATASET")
24
+
25
+ # Server
26
+ HOST = _opt("HOST", "0.0.0.0")
27
+ PORT = int(_opt("PORT", "8000"))
28
+
29
+ # CORS (comma-separated origins, or * for all)
30
+ CORS_ORIGINS = _opt("CORS_ORIGINS", "*").split(",")
31
+
32
+
33
+ def bigquery_configured() -> bool:
34
+ """Return True if BigQuery is fully configured."""
35
+ return bool(BIGQUERY_PROJECT_ID and BIGQUERY_DATASET)
@@ -0,0 +1,97 @@
1
+ """Event models and BigQuery ingestion."""
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class Event(BaseModel):
7
+ """Analytics event - aligned with planning/events.md schema."""
8
+
9
+ # Base (required)
10
+ event_id: str
11
+ event_timestamp: str
12
+ event_name: str
13
+ session_id: str
14
+
15
+ # Base (optional)
16
+ page_url: str | None = None
17
+ user_agent: str | None = None
18
+ spectra_version: str | None = None
19
+ account_id: str | None = None
20
+
21
+ # 1. Page Context
22
+ page_title: str | None = None
23
+ page_path: str | None = None
24
+ page_hostname: str | None = None
25
+ referrer: str | None = None
26
+ referrer_domain: str | None = None
27
+ previous_page_url: str | None = None
28
+ page_type: str | None = None
29
+ canonical_url: str | None = None
30
+ language: str | None = None
31
+
32
+ # 2. User Context
33
+ user_id: str | None = None
34
+ anonymous_id: str | None = None
35
+ user_type: str | None = None
36
+
37
+ # 3. Traffic & Attribution
38
+ utm_source: str | None = None
39
+ utm_medium: str | None = None
40
+ utm_campaign: str | None = None
41
+ utm_term: str | None = None
42
+ utm_content: str | None = None
43
+ gclid: str | None = None
44
+ fbclid: str | None = None
45
+ ttclid: str | None = None
46
+ traffic_source: str | None = None
47
+ traffic_medium: str | None = None
48
+ campaign_id: str | None = None
49
+ ad_group_id: str | None = None
50
+ creative_id: str | None = None
51
+ landing_page: str | None = None
52
+ first_touch_source: str | None = None
53
+ first_touch_medium: str | None = None
54
+ first_touch_campaign: str | None = None
55
+
56
+ # 4. Device & Technical
57
+ device_type: str | None = None
58
+ browser: str | None = None
59
+ browser_version: str | None = None
60
+ operating_system: str | None = None
61
+ os_version: str | None = None
62
+ screen_resolution: str | None = None
63
+ viewport_size: str | None = None
64
+ timezone: str | None = None
65
+ connection_type: str | None = None
66
+
67
+ # 5. Timestamp & Timing
68
+ event_date: str | None = None
69
+ event_time: str | None = None
70
+ local_time: str | None = None
71
+ time_on_page: float | None = None
72
+
73
+ # Click-specific
74
+ element_tag: str | None = None
75
+ element_id: str | None = None
76
+ element_classes: str | None = None
77
+ element_text: str | None = None
78
+ element_href: str | None = None
79
+ position_x: int | None = None
80
+ position_y: int | None = None
81
+
82
+ # Scroll-specific
83
+ scroll_depth_pct: float | None = None
84
+ scroll_y: int | None = None
85
+ page_height: int | None = None
86
+ viewport_height: int | None = None
87
+
88
+ # Form-specific
89
+ form_id: str | None = None
90
+ form_action: str | None = None
91
+ form_method: str | None = None
92
+ field_count: int | None = None
93
+
94
+
95
+ def event_to_row(event: Event) -> dict:
96
+ """Convert event to BigQuery row. None stays as null."""
97
+ return event.model_dump()