uaforger 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uaforger-0.1.0/MANIFEST.in +13 -0
- uaforger-0.1.0/PKG-INFO +184 -0
- uaforger-0.1.0/README.md +159 -0
- uaforger-0.1.0/pyproject.toml +41 -0
- uaforger-0.1.0/setup.cfg +4 -0
- uaforger-0.1.0/uaforge/__init__.py +1 -0
- uaforger-0.1.0/uaforge/core/__init__.py +0 -0
- uaforger-0.1.0/uaforge/core/alias_sampler.py +97 -0
- uaforger-0.1.0/uaforge/core/client_hints.py +232 -0
- uaforger-0.1.0/uaforge/core/generator.py +410 -0
- uaforger-0.1.0/uaforge/core/versioning.py +189 -0
- uaforger-0.1.0/uaforge/data/__init__.py +5 -0
- uaforger-0.1.0/uaforge/data/android_device_specs.json +8127 -0
- uaforger-0.1.0/uaforge/data/chrome_versions.json +1 -0
- uaforger-0.1.0/uaforge/data/chromium_versions.json +1 -0
- uaforger-0.1.0/uaforge/data/device_models.json +1 -0
- uaforger-0.1.0/uaforge/data/edge_versions.json +1 -0
- uaforger-0.1.0/uaforge/data/loader.py +326 -0
- uaforger-0.1.0/uaforge/data/mappings.py +19 -0
- uaforger-0.1.0/uaforge/data/market_share.json +1 -0
- uaforger-0.1.0/uaforge/data/opera_versions.json +1 -0
- uaforger-0.1.0/uaforge/data/os_distribution.json +1 -0
- uaforger-0.1.0/uaforge/exceptions.py +14 -0
- uaforger-0.1.0/uaforge/models/__init__.py +0 -0
- uaforger-0.1.0/uaforge/models/enums.py +37 -0
- uaforger-0.1.0/uaforge/models/objects.py +121 -0
- uaforger-0.1.0/uaforger.egg-info/PKG-INFO +184 -0
- uaforger-0.1.0/uaforger.egg-info/SOURCES.txt +28 -0
- uaforger-0.1.0/uaforger.egg-info/dependency_links.txt +1 -0
- uaforger-0.1.0/uaforger.egg-info/top_level.txt +1 -0
uaforger-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: uaforger
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Statistically-weighted User-Agent + Sec-CH-UA Client-Hint generator backed by real-world market-share data.
|
|
5
|
+
Author-email: Sarper AVCI <sarperavci20@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/sarperavci/UAForge
|
|
8
|
+
Project-URL: Source, https://github.com/sarperavci/UAForge
|
|
9
|
+
Project-URL: Issues, https://github.com/sarperavci/UAForge/issues
|
|
10
|
+
Keywords: user-agent,client-hints,fingerprint,browser,scraping,automation
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
# UAForge
|
|
27
|
+
|
|
28
|
+
**Enterprise-grade, deterministic User Agent & Client Hint generator based on real-world browser statistics**
|
|
29
|
+
|
|
30
|
+
[](https://www.python.org/downloads/)
|
|
31
|
+
[](LICENSE)
|
|
32
|
+
[](https://caniuse.com/usage-table)
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
Most "random user-agent" libraries are broken. They generate outdated browser strings, mix incompatible OS combinations, or lack the modern headers that fingerprinting systems now check.
|
|
37
|
+
|
|
38
|
+
**UAForge takes a different approach.** Instead of picking random strings, it simulates real users based on **statistical probability**. If Chrome 143 on Android holds 40% global market share, UAForge generates that identity 40% of the time.
|
|
39
|
+
|
|
40
|
+
It also generates matching **Client Hints (`Sec-CH-UA`)** headers automatically—allowing your automation to pass modern fingerprinting checks that go beyond the legacy User-Agent string.
|
|
41
|
+
|
|
42
|
+
### Key Features
|
|
43
|
+
|
|
44
|
+
* **Statistically Accurate** — Weighted by real-world global usage data, updated weekly from caniuse.com
|
|
45
|
+
* **Smart Correlations** — Enforces valid browser↔OS mappings (no Safari on Windows)
|
|
46
|
+
* **Real Hardware** — Injects actual device models (Pixel 9, Galaxy S24, etc.) for mobile agents
|
|
47
|
+
* **Client Hints** — Generates Sec-CH-UA, Sec-CH-UA-Mobile, Sec-CH-UA-Platform, and GREASE tokens
|
|
48
|
+
* **Deterministic** — Seed support for consistent, reproducible identities across sessions
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install git+https://github.com/sarperavci/uaforge.git
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
*Note:* The repo does not contain the market-share data files due to size constraints. They are automatically downloaded from the release assets while installing.
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
## Quick Start
|
|
61
|
+
|
|
62
|
+
The API is designed to be simple. You generate an "Identity" object, which contains everything you need for your requests.
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from uaforge.core.generator import UserAgentGenerator
|
|
66
|
+
|
|
67
|
+
# 1. Initialize the generator
|
|
68
|
+
agent = UserAgentGenerator()
|
|
69
|
+
|
|
70
|
+
# 2. Generate an identity
|
|
71
|
+
identity = agent.generate()
|
|
72
|
+
|
|
73
|
+
# 3. Get the headers (includes User-Agent AND Client Hints)
|
|
74
|
+
headers = identity.get_headers()
|
|
75
|
+
full_client_hints = identity.get_all_client_hints()
|
|
76
|
+
|
|
77
|
+
# Use with requests/httpx
|
|
78
|
+
# response = requests.get("https://httpbin.org/headers", headers=headers)
|
|
79
|
+
|
|
80
|
+
print(f"Browser: {identity.meta_browser.value}")
|
|
81
|
+
print(f"OS: {identity.meta_os.value}")
|
|
82
|
+
print(headers)
|
|
83
|
+
print(full_client_hints)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Sample Output
|
|
87
|
+
|
|
88
|
+
Default Headers:
|
|
89
|
+
|
|
90
|
+
```json
|
|
91
|
+
{
|
|
92
|
+
"User-Agent": "Mozilla/5.0 (Linux; Android 13; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Mobile Safari/537.36",
|
|
93
|
+
"Sec-CH-UA": "\"Not A Brand\";v=\"99\", \"Chromium\";v=\"144\", \"Google Chrome\";v=\"144\"",
|
|
94
|
+
"Sec-CH-UA-Mobile": "?1",
|
|
95
|
+
"Sec-CH-UA-Platform": "\"Android\""
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Full Client Hints:
|
|
100
|
+
|
|
101
|
+
```json
|
|
102
|
+
{
|
|
103
|
+
"Sec-CH-UA": "\"Not A Brand\";v=\"99\", \"Chromium\";v=\"144\", \"Google Chrome\";v=\"144\"",
|
|
104
|
+
"Sec-CH-UA-Mobile": "?1",
|
|
105
|
+
"Sec-CH-UA-Platform": "\"Android\"",
|
|
106
|
+
"Sec-CH-UA-Full-Version-List": "\"Not A Brand\";v=\"99.0.0.0\", \"Chromium\";v=\"144.0.7559.59\", \"Google Chrome\";v=\"144.0.7559.59\"",
|
|
107
|
+
"Sec-CH-UA-Platform-Version": "13.0.0",
|
|
108
|
+
"Sec-CH-UA-Model": "\"Pixel 9\"",
|
|
109
|
+
"Sec-CH-UA-Arch": "\"arm\"",
|
|
110
|
+
"Sec-CH-UA-Bitness": "\"64\"",
|
|
111
|
+
"Sec-CH-UA-Full-Version": "\"144.0.7559.59\"",
|
|
112
|
+
"Sec-CH-UA-Form-Factors": "\"Mobile\"",
|
|
113
|
+
"Sec-CH-UA-WoW64": "?0",
|
|
114
|
+
"Sec-CH-Prefers-Color-Scheme": "dark"
|
|
115
|
+
}
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Advanced Usage
|
|
119
|
+
|
|
120
|
+
### Deterministic Generation (Sessions)
|
|
121
|
+
If you are managing long-running sessions, you need the User Agent to stay consistent across restarts. Use a seed.
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
# The identity generated here will ALWAYS be the same for seed 42
|
|
125
|
+
user = UserAgentGenerator(seed=42).generate()
|
|
126
|
+
|
|
127
|
+
print(user.user_agent)
|
|
128
|
+
# Useful for associating a UA with a specific database UserID
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Accessing Granular Data
|
|
132
|
+
Sometimes you need just the OS version or just the device model for analytics.
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
identity = agent.generate()
|
|
136
|
+
|
|
137
|
+
if identity.meta_device == "mobile":
|
|
138
|
+
print(f"Device Model: {identity.ch_model}") # e.g. "Pixel 8 Pro"
|
|
139
|
+
print(f"Architecture: {identity.ch_arch}") # e.g. "arm"
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## How it works
|
|
143
|
+
|
|
144
|
+
### The Data Sources
|
|
145
|
+
|
|
146
|
+
We don't guess. We utilize three distinct data layers:
|
|
147
|
+
|
|
148
|
+
1. **`market_share.json`**: Global browser usage stats (Chrome, Safari, Edge, Firefox).
|
|
149
|
+
2. **`os_distribution.json`**: The probability of an OS given a specific browser (e.g., Safari is 100% macOS/iOS, but Chrome is split between Windows, Mac, Linux, and Android).
|
|
150
|
+
3. **`device_models.json`**: A curated list of ~500 real-world mobile device fingerprints.
|
|
151
|
+
|
|
152
|
+
## Maintenance & Updates
|
|
153
|
+
|
|
154
|
+
The browser ecosystem moves fast. Market share data is **automatically updated weekly** via GitHub Actions by parsing the latest data from [caniuse.com](https://caniuse.com/usage-table).
|
|
155
|
+
|
|
156
|
+
You can also trigger a manual update by running:
|
|
157
|
+
```bash
|
|
158
|
+
python scripts/parse_caniuse.py
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Current Market Share Distribution
|
|
162
|
+
|
|
163
|
+
The table below shows the aggregated browser market share from the current dataset. Data is sourced from caniuse.com and updated automatically.
|
|
164
|
+
|
|
165
|
+
| Browser | Market Share |
|
|
166
|
+
|---------|-------------|
|
|
167
|
+
| Chrome for Android | 57.48% |
|
|
168
|
+
| Chrome (Desktop) | 17.61% |
|
|
169
|
+
| iOS Safari | 12.43% |
|
|
170
|
+
| Edge | 4.60% |
|
|
171
|
+
| Safari (Desktop) | 2.20% |
|
|
172
|
+
| Samsung Internet | 1.73% |
|
|
173
|
+
| Firefox | 1.62% |
|
|
174
|
+
| Opera Mobile | 0.80% |
|
|
175
|
+
| UC Browser | 0.57% |
|
|
176
|
+
| Opera | 0.39% |
|
|
177
|
+
| Firefox for Android | 0.31% |
|
|
178
|
+
| IE | 0.26% |
|
|
179
|
+
|
|
180
|
+
*Last updated: 03-05-2026*
|
|
181
|
+
|
|
182
|
+
## License
|
|
183
|
+
|
|
184
|
+
MIT License. Feel free to use this in your commercial scrapers, bots or testing suites.
|
uaforger-0.1.0/README.md
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# UAForge
|
|
2
|
+
|
|
3
|
+
**Enterprise-grade, deterministic User Agent & Client Hint generator based on real-world browser statistics**
|
|
4
|
+
|
|
5
|
+
[](https://www.python.org/downloads/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
[](https://caniuse.com/usage-table)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
Most "random user-agent" libraries are broken. They generate outdated browser strings, mix incompatible OS combinations, or lack the modern headers that fingerprinting systems now check.
|
|
12
|
+
|
|
13
|
+
**UAForge takes a different approach.** Instead of picking random strings, it simulates real users based on **statistical probability**. If Chrome 143 on Android holds 40% global market share, UAForge generates that identity 40% of the time.
|
|
14
|
+
|
|
15
|
+
It also generates matching **Client Hints (`Sec-CH-UA`)** headers automatically—allowing your automation to pass modern fingerprinting checks that go beyond the legacy User-Agent string.
|
|
16
|
+
|
|
17
|
+
### Key Features
|
|
18
|
+
|
|
19
|
+
* **Statistically Accurate** — Weighted by real-world global usage data, updated weekly from caniuse.com
|
|
20
|
+
* **Smart Correlations** — Enforces valid browser↔OS mappings (no Safari on Windows)
|
|
21
|
+
* **Real Hardware** — Injects actual device models (Pixel 9, Galaxy S24, etc.) for mobile agents
|
|
22
|
+
* **Client Hints** — Generates Sec-CH-UA, Sec-CH-UA-Mobile, Sec-CH-UA-Platform, and GREASE tokens
|
|
23
|
+
* **Deterministic** — Seed support for consistent, reproducible identities across sessions
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install git+https://github.com/sarperavci/uaforge.git
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
*Note:* The repo does not contain the market-share data files due to size constraints. They are automatically downloaded from the release assets while installing.
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
## Quick Start
|
|
36
|
+
|
|
37
|
+
The API is designed to be simple. You generate an "Identity" object, which contains everything you need for your requests.
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from uaforge.core.generator import UserAgentGenerator
|
|
41
|
+
|
|
42
|
+
# 1. Initialize the generator
|
|
43
|
+
agent = UserAgentGenerator()
|
|
44
|
+
|
|
45
|
+
# 2. Generate an identity
|
|
46
|
+
identity = agent.generate()
|
|
47
|
+
|
|
48
|
+
# 3. Get the headers (includes User-Agent AND Client Hints)
|
|
49
|
+
headers = identity.get_headers()
|
|
50
|
+
full_client_hints = identity.get_all_client_hints()
|
|
51
|
+
|
|
52
|
+
# Use with requests/httpx
|
|
53
|
+
# response = requests.get("https://httpbin.org/headers", headers=headers)
|
|
54
|
+
|
|
55
|
+
print(f"Browser: {identity.meta_browser.value}")
|
|
56
|
+
print(f"OS: {identity.meta_os.value}")
|
|
57
|
+
print(headers)
|
|
58
|
+
print(full_client_hints)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Sample Output
|
|
62
|
+
|
|
63
|
+
Default Headers:
|
|
64
|
+
|
|
65
|
+
```json
|
|
66
|
+
{
|
|
67
|
+
"User-Agent": "Mozilla/5.0 (Linux; Android 13; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Mobile Safari/537.36",
|
|
68
|
+
"Sec-CH-UA": "\"Not A Brand\";v=\"99\", \"Chromium\";v=\"144\", \"Google Chrome\";v=\"144\"",
|
|
69
|
+
"Sec-CH-UA-Mobile": "?1",
|
|
70
|
+
"Sec-CH-UA-Platform": "\"Android\""
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Full Client Hints:
|
|
75
|
+
|
|
76
|
+
```json
|
|
77
|
+
{
|
|
78
|
+
"Sec-CH-UA": "\"Not A Brand\";v=\"99\", \"Chromium\";v=\"144\", \"Google Chrome\";v=\"144\"",
|
|
79
|
+
"Sec-CH-UA-Mobile": "?1",
|
|
80
|
+
"Sec-CH-UA-Platform": "\"Android\"",
|
|
81
|
+
"Sec-CH-UA-Full-Version-List": "\"Not A Brand\";v=\"99.0.0.0\", \"Chromium\";v=\"144.0.7559.59\", \"Google Chrome\";v=\"144.0.7559.59\"",
|
|
82
|
+
"Sec-CH-UA-Platform-Version": "13.0.0",
|
|
83
|
+
"Sec-CH-UA-Model": "\"Pixel 9\"",
|
|
84
|
+
"Sec-CH-UA-Arch": "\"arm\"",
|
|
85
|
+
"Sec-CH-UA-Bitness": "\"64\"",
|
|
86
|
+
"Sec-CH-UA-Full-Version": "\"144.0.7559.59\"",
|
|
87
|
+
"Sec-CH-UA-Form-Factors": "\"Mobile\"",
|
|
88
|
+
"Sec-CH-UA-WoW64": "?0",
|
|
89
|
+
"Sec-CH-Prefers-Color-Scheme": "dark"
|
|
90
|
+
}
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Advanced Usage
|
|
94
|
+
|
|
95
|
+
### Deterministic Generation (Sessions)
|
|
96
|
+
If you are managing long-running sessions, you need the User Agent to stay consistent across restarts. Use a seed.
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
# The identity generated here will ALWAYS be the same for seed 42
|
|
100
|
+
user = UserAgentGenerator(seed=42).generate()
|
|
101
|
+
|
|
102
|
+
print(user.user_agent)
|
|
103
|
+
# Useful for associating a UA with a specific database UserID
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Accessing Granular Data
|
|
107
|
+
Sometimes you need just the OS version or just the device model for analytics.
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
identity = agent.generate()
|
|
111
|
+
|
|
112
|
+
if identity.meta_device == "mobile":
|
|
113
|
+
print(f"Device Model: {identity.ch_model}") # e.g. "Pixel 8 Pro"
|
|
114
|
+
print(f"Architecture: {identity.ch_arch}") # e.g. "arm"
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## How it works
|
|
118
|
+
|
|
119
|
+
### The Data Sources
|
|
120
|
+
|
|
121
|
+
We don't guess. We utilize three distinct data layers:
|
|
122
|
+
|
|
123
|
+
1. **`market_share.json`**: Global browser usage stats (Chrome, Safari, Edge, Firefox).
|
|
124
|
+
2. **`os_distribution.json`**: The probability of an OS given a specific browser (e.g., Safari is 100% macOS/iOS, but Chrome is split between Windows, Mac, Linux, and Android).
|
|
125
|
+
3. **`device_models.json`**: A curated list of ~500 real-world mobile device fingerprints.
|
|
126
|
+
|
|
127
|
+
## Maintenance & Updates
|
|
128
|
+
|
|
129
|
+
The browser ecosystem moves fast. Market share data is **automatically updated weekly** via GitHub Actions by parsing the latest data from [caniuse.com](https://caniuse.com/usage-table).
|
|
130
|
+
|
|
131
|
+
You can also trigger a manual update by running:
|
|
132
|
+
```bash
|
|
133
|
+
python scripts/parse_caniuse.py
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Current Market Share Distribution
|
|
137
|
+
|
|
138
|
+
The table below shows the aggregated browser market share from the current dataset. Data is sourced from caniuse.com and updated automatically.
|
|
139
|
+
|
|
140
|
+
| Browser | Market Share |
|
|
141
|
+
|---------|-------------|
|
|
142
|
+
| Chrome for Android | 57.48% |
|
|
143
|
+
| Chrome (Desktop) | 17.61% |
|
|
144
|
+
| iOS Safari | 12.43% |
|
|
145
|
+
| Edge | 4.60% |
|
|
146
|
+
| Safari (Desktop) | 2.20% |
|
|
147
|
+
| Samsung Internet | 1.73% |
|
|
148
|
+
| Firefox | 1.62% |
|
|
149
|
+
| Opera Mobile | 0.80% |
|
|
150
|
+
| UC Browser | 0.57% |
|
|
151
|
+
| Opera | 0.39% |
|
|
152
|
+
| Firefox for Android | 0.31% |
|
|
153
|
+
| IE | 0.26% |
|
|
154
|
+
|
|
155
|
+
*Last updated: 03-05-2026*
|
|
156
|
+
|
|
157
|
+
## License
|
|
158
|
+
|
|
159
|
+
MIT License. Feel free to use this in your commercial scrapers, bots or testing suites.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "uaforger"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Statistically-weighted User-Agent + Sec-CH-UA Client-Hint generator backed by real-world market-share data."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
authors = [{ name = "Sarper AVCI", email = "sarperavci20@gmail.com" }]
|
|
12
|
+
license = { text = "MIT" }
|
|
13
|
+
keywords = ["user-agent", "client-hints", "fingerprint", "browser", "scraping", "automation"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Programming Language :: Python",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Topic :: Internet :: WWW/HTTP",
|
|
26
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
27
|
+
]
|
|
28
|
+
dependencies = []
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://github.com/sarperavci/UAForge"
|
|
32
|
+
Source = "https://github.com/sarperavci/UAForge"
|
|
33
|
+
Issues = "https://github.com/sarperavci/UAForge/issues"
|
|
34
|
+
|
|
35
|
+
[tool.setuptools.packages.find]
|
|
36
|
+
where = ["."]
|
|
37
|
+
include = ["uaforge*"]
|
|
38
|
+
exclude = ["scripts*", "bench*", "tests*"]
|
|
39
|
+
|
|
40
|
+
[tool.setuptools.package-data]
|
|
41
|
+
"uaforge" = ["data/*.json", "data/*.txt"]
|
uaforger-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
File without changes
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class AliasSampler:
|
|
6
|
+
"""
|
|
7
|
+
O(1) weighted random sampler using Vose's Alias Method.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
sampler = AliasSampler(weights, rng)
|
|
11
|
+
index = sampler.sample() # O(1) per call
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, weights: List[float], rng=None):
|
|
15
|
+
"""
|
|
16
|
+
Preprocess weights into alias table.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
weights: List of weights (need not sum to 1, will be normalized)
|
|
20
|
+
rng: Random instance to use (defaults to random module)
|
|
21
|
+
"""
|
|
22
|
+
self.rng = rng if rng is not None else random
|
|
23
|
+
n = len(weights)
|
|
24
|
+
|
|
25
|
+
if n == 0:
|
|
26
|
+
raise ValueError("Cannot create AliasSampler with empty weights")
|
|
27
|
+
|
|
28
|
+
self.n = n
|
|
29
|
+
|
|
30
|
+
# Normalize weights
|
|
31
|
+
total = sum(weights)
|
|
32
|
+
if total <= 0:
|
|
33
|
+
raise ValueError("Sum of weights must be positive")
|
|
34
|
+
|
|
35
|
+
# probabilities normalized to sum to n (for the algorithm)
|
|
36
|
+
prob = [w * n / total for w in weights]
|
|
37
|
+
|
|
38
|
+
# Alias tables
|
|
39
|
+
self.prob = [0.0] * n
|
|
40
|
+
self.alias = [0] * n
|
|
41
|
+
|
|
42
|
+
# Partition into small and large
|
|
43
|
+
small = []
|
|
44
|
+
large = []
|
|
45
|
+
|
|
46
|
+
for i, p in enumerate(prob):
|
|
47
|
+
if p < 1.0:
|
|
48
|
+
small.append(i)
|
|
49
|
+
else:
|
|
50
|
+
large.append(i)
|
|
51
|
+
|
|
52
|
+
# Build alias table
|
|
53
|
+
while small and large:
|
|
54
|
+
l = small.pop()
|
|
55
|
+
g = large.pop()
|
|
56
|
+
|
|
57
|
+
self.prob[l] = prob[l]
|
|
58
|
+
self.alias[l] = g
|
|
59
|
+
|
|
60
|
+
prob[g] = prob[g] + prob[l] - 1.0
|
|
61
|
+
|
|
62
|
+
if prob[g] < 1.0:
|
|
63
|
+
small.append(g)
|
|
64
|
+
else:
|
|
65
|
+
large.append(g)
|
|
66
|
+
|
|
67
|
+
# Remaining items (due to floating point, both could have leftovers)
|
|
68
|
+
while large:
|
|
69
|
+
g = large.pop()
|
|
70
|
+
self.prob[g] = 1.0
|
|
71
|
+
|
|
72
|
+
while small:
|
|
73
|
+
l = small.pop()
|
|
74
|
+
self.prob[l] = 1.0
|
|
75
|
+
|
|
76
|
+
def sample(self, rand: random.Random = None) -> int:
|
|
77
|
+
"""
|
|
78
|
+
Sample an index in O(1) time.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
rand: Optional random instance to use instead of self.rng
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Sampled index
|
|
85
|
+
"""
|
|
86
|
+
rng = rand if rand is not None else self.rng
|
|
87
|
+
# Generate fair die roll
|
|
88
|
+
i = rng.randrange(self.n)
|
|
89
|
+
# Flip biased coin
|
|
90
|
+
if rng.random() < self.prob[i]:
|
|
91
|
+
return i
|
|
92
|
+
else:
|
|
93
|
+
return self.alias[i]
|
|
94
|
+
|
|
95
|
+
def sample_n(self, n: int) -> List[int]:
|
|
96
|
+
"""Sample n indices efficiently."""
|
|
97
|
+
return [self.sample() for _ in range(n)]
|