uaforger 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ include README.md
2
+ include LICENSE
3
+
4
+ recursive-include uaforge/data *.json *.txt
5
+
6
+ global-exclude __pycache__
7
+ global-exclude *.py[cod]
8
+ global-exclude *.so
9
+ global-exclude .DS_Store
10
+
11
+ prune scripts
12
+ prune bench
13
+ prune tests
@@ -0,0 +1,184 @@
1
+ Metadata-Version: 2.4
2
+ Name: uaforger
3
+ Version: 0.1.0
4
+ Summary: Statistically-weighted User-Agent + Sec-CH-UA Client-Hint generator backed by real-world market-share data.
5
+ Author-email: Sarper AVCI <sarperavci20@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/sarperavci/UAForge
8
+ Project-URL: Source, https://github.com/sarperavci/UAForge
9
+ Project-URL: Issues, https://github.com/sarperavci/UAForge/issues
10
+ Keywords: user-agent,client-hints,fingerprint,browser,scraping,automation
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Internet :: WWW/HTTP
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Requires-Python: >=3.9
24
+ Description-Content-Type: text/markdown
25
+
26
+ # UAForge
27
+
28
+ **Enterprise-grade, deterministic User Agent & Client Hint generator based on real-world browser statistics**
29
+
30
+ [![Python Version](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
31
+ [![License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
32
+ [![Auto-Updated](https://img.shields.io/badge/data-auto--updated%20weekly-orange)](https://caniuse.com/usage-table)
33
+
34
+ ---
35
+
36
+ Most "random user-agent" libraries are broken. They generate outdated browser strings, mix incompatible OS combinations, or lack the modern headers that fingerprinting systems now check.
37
+
38
+ **UAForge takes a different approach.** Instead of picking random strings, it simulates real users based on **statistical probability**. If Chrome 143 on Android holds 40% global market share, UAForge generates that identity 40% of the time.
39
+
40
+ It also generates matching **Client Hints (`Sec-CH-UA`)** headers automatically—allowing your automation to pass modern fingerprinting checks that go beyond the legacy User-Agent string.
41
+
42
+ ### Key Features
43
+
44
+ * **Statistically Accurate** — Weighted by real-world global usage data, updated weekly from caniuse.com
45
+ * **Smart Correlations** — Enforces valid browser↔OS mappings (no Safari on Windows)
46
+ * **Real Hardware** — Injects actual device models (Pixel 9, Galaxy S24, etc.) for mobile agents
47
+ * **Client Hints** — Generates Sec-CH-UA, Sec-CH-UA-Mobile, Sec-CH-UA-Platform, and GREASE tokens
48
+ * **Deterministic** — Seed support for consistent, reproducible identities across sessions
49
+
50
+
51
+ ## Installation
52
+
53
+ ```bash
54
+ pip install git+https://github.com/sarperavci/uaforge.git
55
+ ```
56
+
57
+ *Note:* The repo does not contain the market-share data files due to size constraints. They are automatically downloaded from the release assets while installing.
58
+
59
+
60
+ ## Quick Start
61
+
62
+ The API is designed to be simple. You generate an "Identity" object, which contains everything you need for your requests.
63
+
64
+ ```python
65
+ from uaforge.core.generator import UserAgentGenerator
66
+
67
+ # 1. Initialize the generator
68
+ agent = UserAgentGenerator()
69
+
70
+ # 2. Generate an identity
71
+ identity = agent.generate()
72
+
73
+ # 3. Get the headers (includes User-Agent AND Client Hints)
74
+ headers = identity.get_headers()
75
+ full_client_hints = identity.get_all_client_hints()
76
+
77
+ # Use with requests/httpx
78
+ # response = requests.get("https://httpbin.org/headers", headers=headers)
79
+
80
+ print(f"Browser: {identity.meta_browser.value}")
81
+ print(f"OS: {identity.meta_os.value}")
82
+ print(headers)
83
+ print(full_client_hints)
84
+ ```
85
+
86
+ ### Sample Output
87
+
88
+ Default Headers:
89
+
90
+ ```json
91
+ {
92
+ "User-Agent": "Mozilla/5.0 (Linux; Android 13; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Mobile Safari/537.36",
93
+ "Sec-CH-UA": "\"Not A Brand\";v=\"99\", \"Chromium\";v=\"144\", \"Google Chrome\";v=\"144\"",
94
+ "Sec-CH-UA-Mobile": "?1",
95
+ "Sec-CH-UA-Platform": "\"Android\""
96
+ }
97
+ ```
98
+
99
+ Full Client Hints:
100
+
101
+ ```json
102
+ {
103
+ "Sec-CH-UA": "\"Not A Brand\";v=\"99\", \"Chromium\";v=\"144\", \"Google Chrome\";v=\"144\"",
104
+ "Sec-CH-UA-Mobile": "?1",
105
+ "Sec-CH-UA-Platform": "\"Android\"",
106
+ "Sec-CH-UA-Full-Version-List": "\"Not A Brand\";v=\"99.0.0.0\", \"Chromium\";v=\"144.0.7559.59\", \"Google Chrome\";v=\"144.0.7559.59\"",
107
+ "Sec-CH-UA-Platform-Version": "13.0.0",
108
+ "Sec-CH-UA-Model": "\"Pixel 9\"",
109
+ "Sec-CH-UA-Arch": "\"arm\"",
110
+ "Sec-CH-UA-Bitness": "\"64\"",
111
+ "Sec-CH-UA-Full-Version": "\"144.0.7559.59\"",
112
+ "Sec-CH-UA-Form-Factors": "\"Mobile\"",
113
+ "Sec-CH-UA-WoW64": "?0",
114
+ "Sec-CH-Prefers-Color-Scheme": "dark"
115
+ }
116
+ ```
117
+
118
+ ## Advanced Usage
119
+
120
+ ### Deterministic Generation (Sessions)
121
+ If you are managing long-running sessions, you need the User Agent to stay consistent across restarts. Use a seed.
122
+
123
+ ```python
124
+ # The identity generated here will ALWAYS be the same for seed 42
125
+ user = UserAgentGenerator(seed=42).generate()
126
+
127
+ print(user.user_agent)
128
+ # Useful for associating a UA with a specific database UserID
129
+ ```
130
+
131
+ ### Accessing Granular Data
132
+ Sometimes you need just the OS version or just the device model for analytics.
133
+
134
+ ```python
135
+ identity = agent.generate()
136
+
137
+ if identity.meta_device == "mobile":
138
+ print(f"Device Model: {identity.ch_model}") # e.g. "Pixel 8 Pro"
139
+ print(f"Architecture: {identity.ch_arch}") # e.g. "arm"
140
+ ```
141
+
142
+ ## How it works
143
+
144
+ ### The Data Sources
145
+
146
+ We don't guess. We utilize three distinct data layers:
147
+
148
+ 1. **`market_share.json`**: Global browser usage stats (Chrome, Safari, Edge, Firefox).
149
+ 2. **`os_distribution.json`**: The probability of an OS given a specific browser (e.g., Safari is 100% macOS/iOS, but Chrome is split between Windows, Mac, Linux, and Android).
150
+ 3. **`device_models.json`**: A curated list of ~500 real-world mobile device fingerprints.
151
+
152
+ ## Maintenance & Updates
153
+
154
+ The browser ecosystem moves fast. Market share data is **automatically updated weekly** via GitHub Actions by parsing the latest data from [caniuse.com](https://caniuse.com/usage-table).
155
+
156
+ You can also trigger a manual update by running:
157
+ ```bash
158
+ python scripts/parse_caniuse.py
159
+ ```
160
+
161
+ ## Current Market Share Distribution
162
+
163
+ The table below shows the aggregated browser market share from the current dataset. Data is sourced from caniuse.com and updated automatically.
164
+
165
+ | Browser | Market Share |
166
+ |---------|-------------|
167
+ | Chrome for Android | 57.48% |
168
+ | Chrome (Desktop) | 17.61% |
169
+ | iOS Safari | 12.43% |
170
+ | Edge | 4.60% |
171
+ | Safari (Desktop) | 2.20% |
172
+ | Samsung Internet | 1.73% |
173
+ | Firefox | 1.62% |
174
+ | Opera Mobile | 0.80% |
175
+ | UC Browser | 0.57% |
176
+ | Opera | 0.39% |
177
+ | Firefox for Android | 0.31% |
178
+ | IE | 0.26% |
179
+
180
+ *Last updated: 03-05-2026*
181
+
182
+ ## License
183
+
184
+ MIT License. Feel free to use this in your commercial scrapers, bots or testing suites.
@@ -0,0 +1,159 @@
1
+ # UAForge
2
+
3
+ **Enterprise-grade, deterministic User Agent & Client Hint generator based on real-world browser statistics**
4
+
5
+ [![Python Version](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
6
+ [![License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
7
+ [![Auto-Updated](https://img.shields.io/badge/data-auto--updated%20weekly-orange)](https://caniuse.com/usage-table)
8
+
9
+ ---
10
+
11
+ Most "random user-agent" libraries are broken. They generate outdated browser strings, mix incompatible OS combinations, or lack the modern headers that fingerprinting systems now check.
12
+
13
+ **UAForge takes a different approach.** Instead of picking random strings, it simulates real users based on **statistical probability**. If Chrome 143 on Android holds 40% global market share, UAForge generates that identity 40% of the time.
14
+
15
+ It also generates matching **Client Hints (`Sec-CH-UA`)** headers automatically—allowing your automation to pass modern fingerprinting checks that go beyond the legacy User-Agent string.
16
+
17
+ ### Key Features
18
+
19
+ * **Statistically Accurate** — Weighted by real-world global usage data, updated weekly from caniuse.com
20
+ * **Smart Correlations** — Enforces valid browser↔OS mappings (no Safari on Windows)
21
+ * **Real Hardware** — Injects actual device models (Pixel 9, Galaxy S24, etc.) for mobile agents
22
+ * **Client Hints** — Generates Sec-CH-UA, Sec-CH-UA-Mobile, Sec-CH-UA-Platform, and GREASE tokens
23
+ * **Deterministic** — Seed support for consistent, reproducible identities across sessions
24
+
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ pip install git+https://github.com/sarperavci/uaforge.git
30
+ ```
31
+
32
+ *Note:* The repo does not contain the market-share data files due to size constraints. They are automatically downloaded from the release assets while installing.
33
+
34
+
35
+ ## Quick Start
36
+
37
+ The API is designed to be simple. You generate an "Identity" object, which contains everything you need for your requests.
38
+
39
+ ```python
40
+ from uaforge.core.generator import UserAgentGenerator
41
+
42
+ # 1. Initialize the generator
43
+ agent = UserAgentGenerator()
44
+
45
+ # 2. Generate an identity
46
+ identity = agent.generate()
47
+
48
+ # 3. Get the headers (includes User-Agent AND Client Hints)
49
+ headers = identity.get_headers()
50
+ full_client_hints = identity.get_all_client_hints()
51
+
52
+ # Use with requests/httpx
53
+ # response = requests.get("https://httpbin.org/headers", headers=headers)
54
+
55
+ print(f"Browser: {identity.meta_browser.value}")
56
+ print(f"OS: {identity.meta_os.value}")
57
+ print(headers)
58
+ print(full_client_hints)
59
+ ```
60
+
61
+ ### Sample Output
62
+
63
+ Default Headers:
64
+
65
+ ```json
66
+ {
67
+ "User-Agent": "Mozilla/5.0 (Linux; Android 13; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Mobile Safari/537.36",
68
+ "Sec-CH-UA": "\"Not A Brand\";v=\"99\", \"Chromium\";v=\"144\", \"Google Chrome\";v=\"144\"",
69
+ "Sec-CH-UA-Mobile": "?1",
70
+ "Sec-CH-UA-Platform": "\"Android\""
71
+ }
72
+ ```
73
+
74
+ Full Client Hints:
75
+
76
+ ```json
77
+ {
78
+ "Sec-CH-UA": "\"Not A Brand\";v=\"99\", \"Chromium\";v=\"144\", \"Google Chrome\";v=\"144\"",
79
+ "Sec-CH-UA-Mobile": "?1",
80
+ "Sec-CH-UA-Platform": "\"Android\"",
81
+ "Sec-CH-UA-Full-Version-List": "\"Not A Brand\";v=\"99.0.0.0\", \"Chromium\";v=\"144.0.7559.59\", \"Google Chrome\";v=\"144.0.7559.59\"",
82
+ "Sec-CH-UA-Platform-Version": "13.0.0",
83
+ "Sec-CH-UA-Model": "\"Pixel 9\"",
84
+ "Sec-CH-UA-Arch": "\"arm\"",
85
+ "Sec-CH-UA-Bitness": "\"64\"",
86
+ "Sec-CH-UA-Full-Version": "\"144.0.7559.59\"",
87
+ "Sec-CH-UA-Form-Factors": "\"Mobile\"",
88
+ "Sec-CH-UA-WoW64": "?0",
89
+ "Sec-CH-Prefers-Color-Scheme": "dark"
90
+ }
91
+ ```
92
+
93
+ ## Advanced Usage
94
+
95
+ ### Deterministic Generation (Sessions)
96
+ If you are managing long-running sessions, you need the User Agent to stay consistent across restarts. Use a seed.
97
+
98
+ ```python
99
+ # The identity generated here will ALWAYS be the same for seed 42
100
+ user = UserAgentGenerator(seed=42).generate()
101
+
102
+ print(user.user_agent)
103
+ # Useful for associating a UA with a specific database UserID
104
+ ```
105
+
106
+ ### Accessing Granular Data
107
+ Sometimes you need just the OS version or just the device model for analytics.
108
+
109
+ ```python
110
+ identity = agent.generate()
111
+
112
+ if identity.meta_device == "mobile":
113
+ print(f"Device Model: {identity.ch_model}") # e.g. "Pixel 8 Pro"
114
+ print(f"Architecture: {identity.ch_arch}") # e.g. "arm"
115
+ ```
116
+
117
+ ## How it works
118
+
119
+ ### The Data Sources
120
+
121
+ We don't guess. We utilize three distinct data layers:
122
+
123
+ 1. **`market_share.json`**: Global browser usage stats (Chrome, Safari, Edge, Firefox).
124
+ 2. **`os_distribution.json`**: The probability of an OS given a specific browser (e.g., Safari is 100% macOS/iOS, but Chrome is split between Windows, Mac, Linux, and Android).
125
+ 3. **`device_models.json`**: A curated list of ~500 real-world mobile device fingerprints.
126
+
127
+ ## Maintenance & Updates
128
+
129
+ The browser ecosystem moves fast. Market share data is **automatically updated weekly** via GitHub Actions by parsing the latest data from [caniuse.com](https://caniuse.com/usage-table).
130
+
131
+ You can also trigger a manual update by running:
132
+ ```bash
133
+ python scripts/parse_caniuse.py
134
+ ```
135
+
136
+ ## Current Market Share Distribution
137
+
138
+ The table below shows the aggregated browser market share from the current dataset. Data is sourced from caniuse.com and updated automatically.
139
+
140
+ | Browser | Market Share |
141
+ |---------|-------------|
142
+ | Chrome for Android | 57.48% |
143
+ | Chrome (Desktop) | 17.61% |
144
+ | iOS Safari | 12.43% |
145
+ | Edge | 4.60% |
146
+ | Safari (Desktop) | 2.20% |
147
+ | Samsung Internet | 1.73% |
148
+ | Firefox | 1.62% |
149
+ | Opera Mobile | 0.80% |
150
+ | UC Browser | 0.57% |
151
+ | Opera | 0.39% |
152
+ | Firefox for Android | 0.31% |
153
+ | IE | 0.26% |
154
+
155
+ *Last updated: 03-05-2026*
156
+
157
+ ## License
158
+
159
+ MIT License. Feel free to use this in your commercial scrapers, bots or testing suites.
@@ -0,0 +1,41 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "uaforger"
7
+ version = "0.1.0"
8
+ description = "Statistically-weighted User-Agent + Sec-CH-UA Client-Hint generator backed by real-world market-share data."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ authors = [{ name = "Sarper AVCI", email = "sarperavci20@gmail.com" }]
12
+ license = { text = "MIT" }
13
+ keywords = ["user-agent", "client-hints", "fingerprint", "browser", "scraping", "automation"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ "Programming Language :: Python",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Topic :: Internet :: WWW/HTTP",
26
+ "Topic :: Software Development :: Libraries :: Python Modules",
27
+ ]
28
+ dependencies = []
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/sarperavci/UAForge"
32
+ Source = "https://github.com/sarperavci/UAForge"
33
+ Issues = "https://github.com/sarperavci/UAForge/issues"
34
+
35
+ [tool.setuptools.packages.find]
36
+ where = ["."]
37
+ include = ["uaforge*"]
38
+ exclude = ["scripts*", "bench*", "tests*"]
39
+
40
+ [tool.setuptools.package-data]
41
+ "uaforge" = ["data/*.json", "data/*.txt"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
File without changes
@@ -0,0 +1,97 @@
1
+ import random
2
+ from typing import List
3
+
4
+
5
+ class AliasSampler:
6
+ """
7
+ O(1) weighted random sampler using Vose's Alias Method.
8
+
9
+ Usage:
10
+ sampler = AliasSampler(weights, rng)
11
+ index = sampler.sample() # O(1) per call
12
+ """
13
+
14
+ def __init__(self, weights: List[float], rng=None):
15
+ """
16
+ Preprocess weights into alias table.
17
+
18
+ Args:
19
+ weights: List of weights (need not sum to 1, will be normalized)
20
+ rng: Random instance to use (defaults to random module)
21
+ """
22
+ self.rng = rng if rng is not None else random
23
+ n = len(weights)
24
+
25
+ if n == 0:
26
+ raise ValueError("Cannot create AliasSampler with empty weights")
27
+
28
+ self.n = n
29
+
30
+ # Normalize weights
31
+ total = sum(weights)
32
+ if total <= 0:
33
+ raise ValueError("Sum of weights must be positive")
34
+
35
+ # probabilities normalized to sum to n (for the algorithm)
36
+ prob = [w * n / total for w in weights]
37
+
38
+ # Alias tables
39
+ self.prob = [0.0] * n
40
+ self.alias = [0] * n
41
+
42
+ # Partition into small and large
43
+ small = []
44
+ large = []
45
+
46
+ for i, p in enumerate(prob):
47
+ if p < 1.0:
48
+ small.append(i)
49
+ else:
50
+ large.append(i)
51
+
52
+ # Build alias table
53
+ while small and large:
54
+ l = small.pop()
55
+ g = large.pop()
56
+
57
+ self.prob[l] = prob[l]
58
+ self.alias[l] = g
59
+
60
+ prob[g] = prob[g] + prob[l] - 1.0
61
+
62
+ if prob[g] < 1.0:
63
+ small.append(g)
64
+ else:
65
+ large.append(g)
66
+
67
+ # Remaining items (due to floating point, both could have leftovers)
68
+ while large:
69
+ g = large.pop()
70
+ self.prob[g] = 1.0
71
+
72
+ while small:
73
+ l = small.pop()
74
+ self.prob[l] = 1.0
75
+
76
+ def sample(self, rand: random.Random = None) -> int:
77
+ """
78
+ Sample an index in O(1) time.
79
+
80
+ Args:
81
+ rand: Optional random instance to use instead of self.rng
82
+
83
+ Returns:
84
+ Sampled index
85
+ """
86
+ rng = rand if rand is not None else self.rng
87
+ # Generate fair die roll
88
+ i = rng.randrange(self.n)
89
+ # Flip biased coin
90
+ if rng.random() < self.prob[i]:
91
+ return i
92
+ else:
93
+ return self.alias[i]
94
+
95
+ def sample_n(self, n: int) -> List[int]:
96
+ """Sample n indices efficiently."""
97
+ return [self.sample() for _ in range(n)]