ticker-classifier 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ from .classifier import TickerClassifier
2
+
3
+ __all__ = ["TickerClassifier"]
4
+ __version__ = "0.1.0"
File without changes
@@ -0,0 +1,217 @@
1
+ from collections import defaultdict
2
+ from typing import Dict, List
3
+
4
+ import aiohttp
5
+ import requests
6
+
7
+
8
+ class CoinGeckoClient:
9
+ def __init__(self):
10
+ """Initialize CoinGecko client.
11
+
12
+ Sets up base endpoints used for retrieving the coin list and simple
13
+ price information and initializes an internal cache for symbol -> id
14
+ mappings.
15
+
16
+ Notes
17
+ -----
18
+ The client keeps an in-memory `_crypto_map` that maps uppercase
19
+ symbols to a list of CoinGecko ids. This map is populated lazily by
20
+ `_load_map_sync` or `_load_map_async` when price lookup is requested.
21
+ """
22
+ self.list_url = "https://api.coingecko.com/api/v3/coins/list"
23
+ self.price_url = "https://api.coingecko.com/api/v3/simple/price"
24
+ self._crypto_map = None # { 'BTC': ['bitcoin', 'bitcoin-token'], ... }
25
+
26
+ def _load_map_sync(self):
27
+ """Load the CoinGecko symbol->id map synchronously.
28
+
29
+ This method fetches the full coin list from the CoinGecko API and
30
+ populates the in-memory `_crypto_map` mapping uppercase symbol strings
31
+ to lists of CoinGecko ids. If the map is already loaded this is a
32
+ no-op.
33
+
34
+ Errors
35
+ ------
36
+ Any exceptions raised while fetching/parsing are caught and the map
37
+ falls back to an empty dict.
38
+ """
39
+ if self._crypto_map:
40
+ return
41
+ try:
42
+ resp = requests.get(self.list_url, timeout=10)
43
+ data = resp.json()
44
+ self._crypto_map = defaultdict(list)
45
+ for coin in data:
46
+ self._crypto_map[coin["symbol"].upper()].append(coin["id"])
47
+ except Exception:
48
+ self._crypto_map = {}
49
+
50
+ async def _load_map_async(self, session: aiohttp.ClientSession):
51
+ """Asynchronously load the CoinGecko symbol->id map.
52
+
53
+ Parameters
54
+ ----------
55
+ session : aiohttp.ClientSession
56
+ Active aiohttp session used for making the HTTP request.
57
+
58
+ Notes
59
+ -----
60
+ This is the async counterpart to `_load_map_sync`. If the internal map
61
+ is already populated this method returns immediately. Exceptions are
62
+ caught and the map will be set to an empty dict on failure.
63
+ """
64
+ if self._crypto_map:
65
+ return
66
+ try:
67
+ async with session.get(self.list_url) as resp:
68
+ data = await resp.json()
69
+ self._crypto_map = defaultdict(list)
70
+ for coin in data:
71
+ self._crypto_map[coin["symbol"].upper()].append(coin["id"])
72
+ except Exception:
73
+ self._crypto_map = {}
74
+
75
+ def _get_candidate_ids(
76
+ self, symbols: List[str]
77
+ ) -> tuple[List[str], Dict[str, str]]:
78
+ """Return candidate CoinGecko ids for a list of symbols.
79
+
80
+ Parameters
81
+ ----------
82
+ symbols : list[str]
83
+ Uppercase ticker symbols to map to CoinGecko ids.
84
+
85
+ Returns
86
+ -------
87
+ ids : list[str]
88
+ Flat list of candidate CoinGecko ids (limited to first 10
89
+ collisions per symbol).
90
+ id_to_parent : dict
91
+ Mapping of coin id -> original symbol (parent) used to group
92
+ results later.
93
+ """
94
+ ids = []
95
+ id_to_parent = {}
96
+ if not self._crypto_map:
97
+ return ids, id_to_parent
98
+
99
+ for sym in symbols:
100
+ if sym in self._crypto_map:
101
+ # Top 10 collisions only
102
+ for cid in self._crypto_map[sym][:10]:
103
+ ids.append(cid)
104
+ id_to_parent[cid] = sym
105
+ return ids, id_to_parent
106
+
107
+ def get_prices_sync(self, symbols: List[str]) -> Dict[str, Dict]:
108
+ """Synchronous price lookup for a list of symbols using CoinGecko.
109
+
110
+ This method ensures the internal symbol->id map is loaded, finds
111
+ candidate CoinGecko ids for the requested symbols, and retrieves USD
112
+ prices and market caps in chunks. The highest market cap candidate is
113
+ selected per symbol in `_process_response`.
114
+
115
+ Parameters
116
+ ----------
117
+ symbols : list[str]
118
+ Uppercase ticker symbols to look up.
119
+
120
+ Returns
121
+ -------
122
+ dict[str, dict]
123
+ Mapping of symbol -> {"market_cap": ..., "name": ..., "id": ...}
124
+ for matches found. Returns an empty dict if nothing matched.
125
+ """
126
+ self._load_map_sync()
127
+ results = {}
128
+ ids, id_map = self._get_candidate_ids(symbols)
129
+ if not ids:
130
+ return results
131
+
132
+ chunk_size = 200
133
+ for i in range(0, len(ids), chunk_size):
134
+ chunk = ids[i : i + chunk_size]
135
+ try:
136
+ resp = requests.get(
137
+ self.price_url,
138
+ params={
139
+ "ids": ",".join(chunk),
140
+ "vs_currencies": "usd",
141
+ "include_market_cap": "true",
142
+ },
143
+ timeout=10,
144
+ )
145
+ data = resp.json()
146
+ self._process_response(data, id_map, results)
147
+ except Exception:
148
+ pass
149
+ return results
150
+
151
+ async def get_prices_async(
152
+ self, session: aiohttp.ClientSession, symbols: List[str]
153
+ ) -> Dict[str, Dict]:
154
+ """Asynchronously retrieve prices and market caps for symbols.
155
+
156
+ Parameters
157
+ ----------
158
+ session : aiohttp.ClientSession
159
+ Active aiohttp session used to make HTTP requests.
160
+ symbols : list[str]
161
+ Uppercase ticker symbols to query.
162
+
163
+ Returns
164
+ -------
165
+ dict[str, dict]
166
+ Mapping of symbol -> {"market_cap": ..., "name": ..., "id": ...}.
167
+
168
+ Notes
169
+ -----
170
+ Uses the async map loader `_load_map_async` and requests CoinGecko in
171
+ chunks. Failures for a chunk are swallowed and processing continues.
172
+ """
173
+ await self._load_map_async(session)
174
+ results = {}
175
+ ids, id_map = self._get_candidate_ids(symbols)
176
+ if not ids:
177
+ return results
178
+
179
+ chunk_size = 200
180
+ for i in range(0, len(ids), chunk_size):
181
+ chunk = ids[i : i + chunk_size]
182
+ try:
183
+ params = {
184
+ "ids": ",".join(chunk),
185
+ "vs_currencies": "usd",
186
+ "include_market_cap": "true",
187
+ }
188
+ async with session.get(self.price_url, params=params) as resp:
189
+ data = await resp.json()
190
+ self._process_response(data, id_map, results)
191
+ except Exception:
192
+ pass
193
+ return results
194
+
195
+ def _process_response(self, data, id_map, results):
196
+ """Process a CoinGecko price response and update results.
197
+
198
+ Parameters
199
+ ----------
200
+ data : dict
201
+ JSON-decoded response from the CoinGecko simple/price endpoint.
202
+ id_map : dict
203
+ Mapping of coin id -> parent symbol used to group results.
204
+ results : dict
205
+ Mutable mapping that will be updated in-place with the best
206
+ candidate per parent symbol (highest market cap wins).
207
+ """
208
+ for cid, val in data.items():
209
+ parent = id_map.get(cid)
210
+ if parent:
211
+ mcap = val.get("usd_market_cap", 0)
212
+ if mcap > results.get(parent, {}).get("market_cap", 0):
213
+ results[parent] = {
214
+ "market_cap": mcap,
215
+ "name": cid.title(),
216
+ "id": cid,
217
+ }
@@ -0,0 +1,225 @@
1
+ """Yahoo Finance API helpers.
2
+
3
+ This module provides a small client for obtaining the cookie/crumb
4
+ credentials required by Yahoo Finance endpoints and for fetching quote
5
+ data. Both synchronous (requests) and asynchronous (aiohttp) helpers
6
+ are provided.
7
+
8
+ Notes
9
+ -----
10
+ Docstrings follow the NumPy documentation style.
11
+ """
12
+
13
+ from typing import Any, Dict, List, Optional
14
+
15
+ import aiohttp
16
+ import requests
17
+
18
+ API_BASE = "https://query2.finance.yahoo.com"
19
+ COOKIE_URL = "https://fc.yahoo.com"
20
+ CRUMB_URL = API_BASE + "/v1/test/getcrumb"
21
+ QUOTE_URL = API_BASE + "/v7/finance/quote"
22
+
23
+ HEADERS = {
24
+ "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
25
+ }
26
+
27
+
28
+ class YahooClient:
29
+ def __init__(self):
30
+ """Initialize a new :class:`YahooClient`.
31
+
32
+ The client maintains an in-memory cache of authentication
33
+ credentials (cookie + crumb) so repeated requests do not need to
34
+ re-authenticate on every call.
35
+
36
+ Returns
37
+ -------
38
+ None
39
+ """
40
+
41
+ self.credentials: Optional[Dict[str, Any]] = None
42
+
43
+ def _get_credentials_sync(self):
44
+ """Fetch cookie and crumb synchronously.
45
+
46
+ The method performs the two-step Yahoo flow synchronously:
47
+ 1. Request the cookie from ``COOKIE_URL`` (allowing redirects).
48
+ 2. Request the crumb token from ``CRUMB_URL`` using the
49
+ returned cookies.
50
+
51
+ Returns
52
+ -------
53
+ dict or None
54
+ Dictionary with keys ``'cookie'`` and ``'crumb'`` when
55
+ successful, otherwise ``None``.
56
+ """
57
+ if self.credentials:
58
+ return self.credentials
59
+
60
+ try:
61
+ # 1. Get Cookies (allow redirects)
62
+ # fc.yahoo.com redirects to a consent page or main page, setting cookies along the way
63
+ response_cookie = requests.get(COOKIE_URL, headers=HEADERS, timeout=5)
64
+ cookies = response_cookie.cookies
65
+
66
+ # 2. Get Crumb (using the cookies)
67
+ response_crumb = requests.get(
68
+ CRUMB_URL, headers=HEADERS, cookies=cookies, timeout=5
69
+ )
70
+ crumb = response_crumb.text
71
+
72
+ if crumb:
73
+ self.credentials = {"cookie": cookies, "crumb": crumb}
74
+ except Exception as e:
75
+ print(f"Yahoo Auth Error (Sync): {e}")
76
+
77
+ return self.credentials
78
+
79
+ def get_quotes_sync(self, symbols: List[str]) -> Dict[str, Dict]:
80
+ """Get quotes synchronously for the provided symbols.
81
+
82
+ Parameters
83
+ ----------
84
+ symbols : list of str
85
+ List of ticker symbols to query (e.g. ``['AAPL', 'MSFT']``).
86
+
87
+ Returns
88
+ -------
89
+ dict
90
+ Mapping from upper-case symbol to the quote data dictionary
91
+ returned by Yahoo. Returns an empty dict if no symbols are
92
+ provided or if a failure occurs.
93
+ """
94
+
95
+ results = {}
96
+ if not symbols:
97
+ return results
98
+
99
+ creds = self._get_credentials_sync()
100
+ if not creds:
101
+ print("Skipping Yahoo Sync: No credentials.")
102
+ return results
103
+
104
+ try:
105
+ params = {"symbols": ",".join(symbols), "crumb": creds["crumb"]}
106
+
107
+ resp = requests.get(
108
+ QUOTE_URL,
109
+ params=params,
110
+ cookies=creds["cookie"],
111
+ headers=HEADERS,
112
+ timeout=5,
113
+ )
114
+
115
+ if resp.status_code == 200:
116
+ data = resp.json()
117
+ if "quoteResponse" in data and "result" in data["quoteResponse"]:
118
+ for item in data["quoteResponse"]["result"]:
119
+ results[item["symbol"].upper()] = item
120
+ elif resp.status_code == 401:
121
+ # Credentials expired? Clear them for next time.
122
+ self.credentials = None
123
+ print("Yahoo 401 Unauthorized (Sync). Credentials cleared.")
124
+
125
+ except Exception as e:
126
+ print(f"Yahoo Sync Request Error: {e}")
127
+
128
+ return results
129
+
130
+ async def _get_credentials_async(self, session: aiohttp.ClientSession):
131
+ """Fetch cookie and crumb asynchronously.
132
+
133
+ Parameters
134
+ ----------
135
+ session : aiohttp.ClientSession
136
+ An aiohttp session used to make the requests. The session's
137
+ cookie jar will be used to collect cookies from Yahoo's
138
+ redirects.
139
+
140
+ Returns
141
+ -------
142
+ dict or None
143
+ Dictionary with keys ``'cookie'`` and ``'crumb'`` when
144
+ successful, otherwise ``None``.
145
+ """
146
+ if self.credentials:
147
+ return self.credentials
148
+
149
+ try:
150
+ # 1. Get Cookies
151
+ # aiohttp session handles cookies automatically in its cookie_jar if we share the session.
152
+ # However, to be safe and explicit (since we pass cookies manually later), we extract them.
153
+ async with session.get(COOKIE_URL, headers=HEADERS) as resp:
154
+ await resp.read() # Read body to ensure cookies are processed
155
+ # Access cookies from the response history or the session cookie jar
156
+ pass
157
+
158
+ # 2. Get Crumb
159
+ # The session now holds the cookies from step 1
160
+ async with session.get(CRUMB_URL, headers=HEADERS) as resp:
161
+ crumb = await resp.text()
162
+
163
+ if crumb:
164
+ # We grab the cookies directly from the session's cookie jar to save them
165
+ # Dictionary comprehension to convert to standard dict
166
+ cookies = {
167
+ k: v.value
168
+ for k, v in session.cookie_jar.filter_cookies(CRUMB_URL).items()
169
+ }
170
+ self.credentials = {"cookie": cookies, "crumb": crumb}
171
+
172
+ except Exception as e:
173
+ print(f"Yahoo Auth Error (Async): {e}")
174
+
175
+ return self.credentials
176
+
177
+ async def get_quotes_async(
178
+ self, session: aiohttp.ClientSession, symbols: List[str]
179
+ ) -> Dict[str, Dict]:
180
+ """Asynchronously get quotes for the provided symbols.
181
+
182
+ Parameters
183
+ ----------
184
+ session : aiohttp.ClientSession
185
+ Active aiohttp session used to perform the requests.
186
+ symbols : list of str
187
+ List of ticker symbols to query.
188
+
189
+ Returns
190
+ -------
191
+ dict
192
+ Mapping from upper-case symbol to the quote data dictionary
193
+ returned by Yahoo. Returns an empty dict if no symbols are
194
+ provided or if a failure occurs.
195
+ """
196
+
197
+ results = {}
198
+ if not symbols:
199
+ return results
200
+
201
+ creds = await self._get_credentials_async(session)
202
+ if not creds:
203
+ print("Skipping Yahoo Async: No credentials.")
204
+ return results
205
+
206
+ try:
207
+ params = {"symbols": ",".join(symbols), "crumb": creds["crumb"]}
208
+
209
+ # aiohttp allows passing cookies as a dict
210
+ async with session.get(
211
+ QUOTE_URL, params=params, cookies=creds["cookie"], headers=HEADERS
212
+ ) as resp:
213
+ if resp.status == 200:
214
+ data = await resp.json()
215
+ if "quoteResponse" in data and "result" in data["quoteResponse"]:
216
+ for item in data["quoteResponse"]["result"]:
217
+ results[item["symbol"].upper()] = item
218
+ elif resp.status == 401:
219
+ self.credentials = None
220
+ print("Yahoo 401 Unauthorized (Async). Credentials cleared.")
221
+
222
+ except Exception as e:
223
+ print(f"Yahoo Async Request Error: {e}")
224
+
225
+ return results
@@ -0,0 +1,240 @@
1
+ import asyncio
2
+ from typing import Dict, List
3
+
4
+ import aiohttp
5
+
6
+ from .apis.coingecko import CoinGeckoClient
7
+ from .apis.yahoo import YahooClient
8
+ from .constants import MAJOR_FOREX, MINOR_FOREX, SHORTCUTS
9
+ from .db.cache import TickerCache
10
+
11
+
12
+ class TickerClassifier:
13
+ def __init__(self, db_name: str = "ticker_cache.db", hours_to_expire: int = 24):
14
+ """Create a TickerClassifier instance.
15
+
16
+ Parameters
17
+ ----------
18
+ db_name : str, optional
19
+ SQLite filename for the `TickerCache`, by default "ticker_cache.db".
20
+ hours_to_expire : int, optional
21
+ Hours after which cached entries expire, by default 24.
22
+ """
23
+ self.cache = TickerCache(db_name, hours_to_expire)
24
+ self.yahoo = YahooClient()
25
+ self.cg = CoinGeckoClient()
26
+
27
+ def _process_duel(
28
+ self, to_process: List[str], yahoo_data: Dict, crypto_data: Dict
29
+ ) -> Dict:
30
+ """Resolve competing category signals for each symbol.
31
+
32
+ The classifier considers three possible sources for each symbol:
33
+ stock (Yahoo), crypto (CoinGecko), and forex (heuristics). Each source
34
+ receives a numeric score (market cap or heuristic weight) and the
35
+ highest-scoring source determines the final classification.
36
+
37
+ Parameters
38
+ ----------
39
+ to_process : list[str]
40
+ Uppercase symbols to evaluate.
41
+ yahoo_data : dict
42
+ Mapping of symbol -> Yahoo quote dict (as returned by `YahooClient`).
43
+ crypto_data : dict
44
+ Mapping of symbol -> CoinGecko-derived dict containing at least
45
+ a `market_cap` key.
46
+
47
+ Returns
48
+ -------
49
+ dict
50
+ Mapping of symbol -> final classification dict containing keys
51
+ such as `category`, `ticker`, `name`, `market_cap`, and
52
+ `yahoo_lookup`.
53
+ """
54
+ processed = {}
55
+ # Init structure
56
+ duel = {
57
+ s: {"stock": 0, "crypto": 0, "forex": 0, "details": {}} for s in to_process
58
+ }
59
+
60
+ # 1 MILLION USD THRESHOLD
61
+ # If a crypto is smaller than this, we treat it as "Noise" if it clashes with a stock ticker.
62
+ MIN_CRYPTO_MCAP = 1_000_000
63
+
64
+ for sym in to_process:
65
+ # 1. Forex Heuristics
66
+ if sym in MAJOR_FOREX:
67
+ duel[sym]["forex"] = 100_000_000_000_000
68
+ duel[sym]["details"]["forex"] = {
69
+ "type": "Forex",
70
+ "name": f"{sym} Currency",
71
+ "market_cap": None,
72
+ }
73
+ elif sym in MINOR_FOREX:
74
+ duel[sym]["forex"] = 50_000_000
75
+ duel[sym]["details"]["forex"] = {
76
+ "type": "Forex",
77
+ "name": f"{sym} Currency",
78
+ "market_cap": None,
79
+ }
80
+
81
+ # 2. Stock Data
82
+ if sym in yahoo_data:
83
+ info = yahoo_data[sym]
84
+ qtype = info.get("quoteType", "UNKNOWN")
85
+ raw_mcap = info.get("marketCap", 0)
86
+ score = raw_mcap
87
+
88
+ # Boost logic
89
+ if qtype == "INDEX":
90
+ score = 50_000_000_000
91
+ if qtype == "FUTURE":
92
+ score = 10_000_000_000
93
+
94
+ # If we found a valid stock object but mcap is missing/0,
95
+ # give it a base score so it beats tiny cryptos.
96
+ if score == 0 and qtype in ["EQUITY", "ETF"]:
97
+ score = 250_000 # Assume at least micro-cap stock
98
+
99
+ duel[sym]["stock"] = score
100
+ duel[sym]["details"]["stock"] = {
101
+ "type": qtype,
102
+ "name": info.get("shortName") or info.get("longName"),
103
+ "market_cap": raw_mcap,
104
+ }
105
+
106
+ # 3. Crypto Data
107
+ if sym in crypto_data:
108
+ info = crypto_data[sym]
109
+ mcap = info.get("market_cap", 0)
110
+ duel[sym]["crypto"] = mcap
111
+ duel[sym]["details"]["crypto"] = {
112
+ "type": "Crypto",
113
+ "name": info.get("name"),
114
+ "market_cap": mcap,
115
+ }
116
+
117
+ # 4. Resolve
118
+ scores = duel[sym]
119
+ winner = max(["stock", "crypto", "forex"], key=lambda k: scores[k])
120
+
121
+ # If Crypto won, but it's tiny (< $1M), and we tried to look up a Stock...
122
+ # It's highly likely this is a "Fake" token or the Yahoo lookup failed.
123
+ if winner == "crypto":
124
+ mcap = scores["crypto"]
125
+ if mcap < MIN_CRYPTO_MCAP:
126
+ # If the stock score was 0 (Yahoo failed), we'd rather return "Unknown"
127
+ # than return a $1,000 junk token for "NVDA".
128
+ winner = "unknown"
129
+
130
+ # Construct Result
131
+ if winner == "unknown" or scores[winner] == 0:
132
+ final = {"category": "Unknown", "ticker": sym}
133
+ else:
134
+ details = scores["details"].get(winner, {})
135
+ alternatives = [
136
+ k
137
+ for k in ["stock", "crypto", "forex"]
138
+ if scores[k] > 0 and k != winner
139
+ ]
140
+
141
+ y_look = sym
142
+ if winner == "crypto":
143
+ y_look = f"{sym}-USD"
144
+ elif winner == "forex":
145
+ y_look = f"{sym}USD=X"
146
+
147
+ final = {
148
+ "category": winner if winner != "stock" else details.get("type"),
149
+ "ticker": sym,
150
+ "name": details.get("name"),
151
+ "market_cap": details.get("market_cap"),
152
+ "yahoo_lookup": y_look,
153
+ "alternatives": alternatives,
154
+ "source": "api",
155
+ }
156
+ processed[sym] = final
157
+ return processed
158
+
159
+ def classify(self, symbols: List[str]) -> List[Dict]:
160
+ """Synchronously classify a list of ticker-like symbols.
161
+
162
+ Parameters
163
+ ----------
164
+ symbols : list[str]
165
+ Iterable of symbols (may contain duplicates or mixed case). The
166
+ returned list preserves the order of the input list with each
167
+ element replaced by its classification dict or `None`.
168
+
169
+ Returns
170
+ -------
171
+ list[dict]
172
+ List of classification dictionaries aligned with the input order.
173
+ """
174
+ unique = list({s.upper().strip() for s in symbols if s.strip()})
175
+ results_map = {}
176
+ to_process = []
177
+
178
+ # Cache check
179
+ cached = self.cache.get_many(unique)
180
+ for sym in unique:
181
+ if sym in SHORTCUTS:
182
+ results_map[sym] = {**SHORTCUTS[sym], "source": "shortcut"}
183
+ elif sym in cached:
184
+ results_map[sym] = cached[sym]
185
+ else:
186
+ to_process.append(sym)
187
+
188
+ if to_process:
189
+ y_res = self.yahoo.get_quotes_sync(to_process)
190
+ c_res = self.cg.get_prices_sync(to_process)
191
+ processed = self._process_duel(to_process, y_res, c_res)
192
+ self.cache.save_many(processed)
193
+ results_map.update(processed)
194
+
195
+ return [results_map.get(s.upper().strip()) for s in symbols]
196
+
197
+ async def classify_async(self, symbols: List[str]) -> List[Dict]:
198
+ """Asynchronously classify a list of ticker-like symbols.
199
+
200
+ Parameters
201
+ ----------
202
+ symbols : list[str]
203
+ List of symbols to classify. Input order is preserved in the
204
+ returned list.
205
+
206
+ Returns
207
+ -------
208
+ list[dict]
209
+ Classification results aligned with the input list; entries may
210
+ be `None` for unknown symbols.
211
+ """
212
+ unique = list({s.upper().strip() for s in symbols if s.strip()})
213
+ results_map = {}
214
+ to_process = []
215
+
216
+ # Cache Read (Run in thread to avoid blocking loop)
217
+ loop = asyncio.get_running_loop()
218
+ cached = await loop.run_in_executor(None, self.cache.get_many, unique)
219
+
220
+ for sym in unique:
221
+ if sym in SHORTCUTS:
222
+ results_map[sym] = {**SHORTCUTS[sym], "source": "shortcut"}
223
+ elif sym in cached:
224
+ results_map[sym] = cached[sym]
225
+ else:
226
+ to_process.append(sym)
227
+
228
+ if to_process:
229
+ async with aiohttp.ClientSession() as session:
230
+ task_y = self.yahoo.get_quotes_async(session, to_process)
231
+ task_c = self.cg.get_prices_async(session, to_process)
232
+ y_res, c_res = await asyncio.gather(task_y, task_c)
233
+
234
+ processed = self._process_duel(to_process, y_res, c_res)
235
+
236
+ # Cache Write (Run in thread)
237
+ await loop.run_in_executor(None, self.cache.save_many, processed)
238
+ results_map.update(processed)
239
+
240
+ return [results_map.get(s.upper().strip()) for s in symbols]
@@ -0,0 +1,192 @@
1
+ MAJOR_FOREX = {"USD", "EUR", "JPY", "GBP", "AUD", "CAD", "CHF", "CNY", "HKD", "NZD"}
2
+
3
+ MINOR_FOREX = {
4
+ "AED",
5
+ "AFN",
6
+ "ALL",
7
+ "AMD",
8
+ "ANG",
9
+ "AOA",
10
+ "ARS",
11
+ "AWG",
12
+ "AZN",
13
+ "BAM",
14
+ "BBD",
15
+ "BDT",
16
+ "BGN",
17
+ "BHD",
18
+ "BIF",
19
+ "BMD",
20
+ "BND",
21
+ "BOB",
22
+ "BRL",
23
+ "BSD",
24
+ "BTN",
25
+ "BWP",
26
+ "BYN",
27
+ "BZD",
28
+ "CLP",
29
+ "COP",
30
+ "CRC",
31
+ "CUP",
32
+ "CVE",
33
+ "CZK",
34
+ "DJF",
35
+ "DKK",
36
+ "DOP",
37
+ "DZD",
38
+ "EGP",
39
+ "ERN",
40
+ "ETB",
41
+ "FJD",
42
+ "FKP",
43
+ "GEL",
44
+ "GHS",
45
+ "GIP",
46
+ "GMD",
47
+ "GNF",
48
+ "GTQ",
49
+ "GYD",
50
+ "HNL",
51
+ "HRK",
52
+ "HTG",
53
+ "HUF",
54
+ "IDR",
55
+ "ILS",
56
+ "INR",
57
+ "IQD",
58
+ "IRR",
59
+ "ISK",
60
+ "JMD",
61
+ "JOD",
62
+ "KES",
63
+ "KGS",
64
+ "KHR",
65
+ "KMF",
66
+ "KPW",
67
+ "KRW",
68
+ "KWD",
69
+ "KYD",
70
+ "KZT",
71
+ "LAK",
72
+ "LBP",
73
+ "LKR",
74
+ "LRD",
75
+ "LSL",
76
+ "LYD",
77
+ "MAD",
78
+ "MDL",
79
+ "MGA",
80
+ "MKD",
81
+ "MMK",
82
+ "MNT",
83
+ "MOP",
84
+ "MRU",
85
+ "MUR",
86
+ "MVR",
87
+ "MWK",
88
+ "MXN",
89
+ "MYR",
90
+ "MZN",
91
+ "NAD",
92
+ "NGN",
93
+ "NIO",
94
+ "NOK",
95
+ "NPR",
96
+ "OMR",
97
+ "PAB",
98
+ "PEN",
99
+ "PGK",
100
+ "PHP",
101
+ "PKR",
102
+ "PLN",
103
+ "PYG",
104
+ "QAR",
105
+ "RON",
106
+ "RSD",
107
+ "RUB",
108
+ "RWF",
109
+ "SAR",
110
+ "SBD",
111
+ "SCR",
112
+ "SDG",
113
+ "SEK",
114
+ "SGD",
115
+ "SHP",
116
+ "SLL",
117
+ "SOS",
118
+ "SRD",
119
+ "SSP",
120
+ "STN",
121
+ "SYP",
122
+ "SZL",
123
+ "THB",
124
+ "TJS",
125
+ "TMT",
126
+ "TND",
127
+ "TOP",
128
+ "TRY",
129
+ "TTD",
130
+ "TWD",
131
+ "TZS",
132
+ "UAH",
133
+ "UGX",
134
+ "UYU",
135
+ "UZS",
136
+ "VES",
137
+ "VND",
138
+ "VUV",
139
+ "WST",
140
+ "XAF",
141
+ "XCD",
142
+ "XOF",
143
+ "XPF",
144
+ "YER",
145
+ "ZAR",
146
+ "ZMW",
147
+ }
148
+
149
+ SHORTCUTS = {
150
+ "DXY": {
151
+ "category": "Index",
152
+ "ticker": "DXY",
153
+ "name": "US Dollar Index",
154
+ "yahoo_lookup": "DX-Y.NYB",
155
+ },
156
+ "VIX": {
157
+ "category": "Index",
158
+ "ticker": "VIX",
159
+ "name": "CBOE Volatility Index",
160
+ "yahoo_lookup": "^VIX",
161
+ },
162
+ "GOLD": {
163
+ "category": "Commodity",
164
+ "ticker": "GOLD",
165
+ "name": "Gold",
166
+ "yahoo_lookup": "GC=F",
167
+ },
168
+ "SILVER": {
169
+ "category": "Commodity",
170
+ "ticker": "SILVER",
171
+ "name": "Silver",
172
+ "yahoo_lookup": "SI=F",
173
+ },
174
+ "OIL": {
175
+ "category": "Commodity",
176
+ "ticker": "OIL",
177
+ "name": "Crude Oil",
178
+ "yahoo_lookup": "CL=F",
179
+ },
180
+ "SPX": {
181
+ "category": "Index",
182
+ "ticker": "SPX",
183
+ "name": "S&P 500",
184
+ "yahoo_lookup": "^GSPC",
185
+ },
186
+ "SPY": {
187
+ "category": "ETF",
188
+ "ticker": "SPY",
189
+ "name": "SPDR S&P 500 ETF Trust",
190
+ "yahoo_lookup": "SPY",
191
+ },
192
+ }
File without changes
@@ -0,0 +1,93 @@
1
+ import json
2
+ import sqlite3
3
+ from datetime import datetime, timedelta
4
+ from typing import Any, Dict, List
5
+
6
+
7
+ class TickerCache:
8
+ def __init__(self, db_name: str, hours_to_expire: int):
9
+ """Initialize the ticker cache.
10
+
11
+ Parameters
12
+ ----------
13
+ db_name : str
14
+ Path or name of the SQLite database file used for caching.
15
+ hours_to_expire : int
16
+ Number of hours after which cached entries are considered expired.
17
+ """
18
+ self.db_name = db_name
19
+ self.hours_to_expire = hours_to_expire
20
+ self._init_db()
21
+
22
+ def _init_db(self):
23
+ """Create the `tickers` table if it does not already exist.
24
+
25
+ The table stores `symbol` as the primary key, the JSON-serialized
26
+ `data` blob and an ISO-formatted `updated_at` timestamp.
27
+ """
28
+ with sqlite3.connect(self.db_name) as conn:
29
+ conn.execute(
30
+ """
31
+ CREATE TABLE IF NOT EXISTS tickers (
32
+ symbol TEXT PRIMARY KEY,
33
+ data TEXT,
34
+ updated_at TEXT
35
+ )
36
+ """
37
+ )
38
+
39
+ def get_many(self, symbols: List[str]) -> Dict[str, Any]:
40
+ """Retrieve multiple cached ticker entries that are not expired.
41
+
42
+ Parameters
43
+ ----------
44
+ symbols : list[str]
45
+ List of symbol strings to fetch from cache. If empty, returns an
46
+ empty dict.
47
+
48
+ Returns
49
+ -------
50
+ dict[str, Any]
51
+ Mapping of symbol -> deserialized cache object. Each returned
52
+ object will have a `source` key set to `'cache'`.
53
+ """
54
+ if not symbols:
55
+ return {}
56
+ cutoff = (datetime.now() - timedelta(hours=self.hours_to_expire)).isoformat()
57
+ results = {}
58
+
59
+ with sqlite3.connect(self.db_name) as conn:
60
+ cursor = conn.cursor()
61
+ placeholders = ",".join("?" * len(symbols))
62
+ query = f"SELECT symbol, data FROM tickers WHERE symbol IN ({placeholders}) AND updated_at > ?"
63
+ cursor.execute(query, symbols + [cutoff])
64
+ for s, d in cursor.fetchall():
65
+ results[s] = json.loads(d)
66
+ results[s]["source"] = "cache"
67
+ return results
68
+
69
+ def save_many(self, items: Dict[str, Any]):
70
+ """Save multiple items to the cache.
71
+
72
+ Parameters
73
+ ----------
74
+ items : dict[str, Any]
75
+ Mapping of symbol -> item dict. Items with `category == 'Unknown'`
76
+ are not persisted. The optional `source` key is stripped before
77
+ saving.
78
+ """
79
+ if not items:
80
+ return
81
+ with sqlite3.connect(self.db_name) as conn:
82
+ cursor = conn.cursor()
83
+ now = datetime.now().isoformat()
84
+ data_tuples = []
85
+ for s, d in items.items():
86
+ if d.get("category") != "Unknown":
87
+ clean = {k: v for k, v in d.items() if k != "source"}
88
+ data_tuples.append((s, json.dumps(clean), now))
89
+ if data_tuples:
90
+ cursor.executemany(
91
+ "INSERT OR REPLACE INTO tickers (symbol, data, updated_at) VALUES (?, ?, ?)",
92
+ data_tuples,
93
+ )
@@ -0,0 +1,175 @@
1
+ Metadata-Version: 2.4
2
+ Name: ticker_classifier
3
+ Version: 0.1.0
4
+ Summary: A robust stock, crypto, and forex classifier with async support.
5
+ Author-email: Stephan Akkerman <stephan@akkerman.ai>
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: requests
10
+ Requires-Dist: aiohttp
11
+ Dynamic: license-file
12
+
13
+ # ticker-classifier
14
+
15
+ <!-- Add a banner here like: https://github.com/StephanAkkerman/fintwit-bot/blob/main/img/logo/fintwit-banner.png -->
16
+
17
+ ---
18
+ <!-- Adjust the link of the first and second badges to your own repo -->
19
+ <p align="center">
20
+ <img alt="GitHub Actions Workflow Status" src="https://img.shields.io/github/actions/workflow/status/StephanAkkerman/ticker-classifier/pyversions.yml?label=python%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13&logo=python&style=flat-square">
21
+ <img src="https://img.shields.io/github/license/StephanAkkerman/ticker-classifier.svg?color=brightgreen" alt="License">
22
+ <a href="https://github.com/psf/black"><img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Code style: black"></a>
23
+ </p>
24
+
25
+ ## Introduction
26
+
27
+ `ticker-classifier` is a small Python library for classifying ticker-like symbols (for example `AAPL`, `BTC`, `EUR`, `GOLD`) into a simple market/category representation.
28
+ It uses Yahoo Finance for equities, CoinGecko for cryptocurrencies and a few heuristics for currencies/commodities. The output indicates the most likely category, a display name, market cap when available, and a `yahoo_lookup` value to fetch further data if desired.
29
+
30
+ ## Table of Contents 🗂
31
+
32
+ - [Key Features](#key-features)
33
+ - [Installation](#installation)
34
+ - [Usage](#usage)
35
+ - [API](#api)
36
+ - [Development](#development)
37
+ - [Release and Versioning](#release-and-versioning)
38
+ - [Citation](#citation)
39
+ - [Contributing](#contributing)
40
+ - [License](#license)
41
+
42
+ ## Key Features 🔑
43
+
44
+ - Classify symbols as `Equity`, `Crypto`, `Forex`, `Commodity`, `Index` or `Unknown`.
45
+ - Uses multiple public APIs and simple heuristics to make robust decisions.
46
+ - Provides both synchronous and asynchronous APIs.
47
+ - Lightweight disk cache to avoid repeated lookups (`TickerCache`).
48
+
49
+ ## Installation ⚙️
50
+
51
+ Install from pip using the provided `requirements.txt` or install the package directly from the repository for latest changes:
52
+
53
+ ```bash
54
+ pip install -r requirements.txt
55
+ ```
56
+
57
+ or
58
+
59
+ ```bash
60
+ pip install git+https://github.com/StephanAkkerman/ticker-classifier.git
61
+ ```
62
+
63
+ ## Usage ⌨️
64
+
65
+ Basic synchronous usage:
66
+
67
+ ```python
68
+ from ticker_classifier.classifier import TickerClassifier
69
+
70
+ classifier = TickerClassifier()
71
+ symbols = ["AAPL", "BTC", "EUR", "GOLD", "UNKNOWN123"]
72
+ results = classifier.classify(symbols)
73
+ for r in results:
74
+ print(r)
75
+ ```
76
+
77
+ Example asynchronous usage:
78
+
79
+ ```python
80
+ import asyncio
81
+ from ticker_classifier.classifier import TickerClassifier
82
+
83
+ async def main():
84
+ classifier = TickerClassifier()
85
+ symbols = ["AAPL", "BTC", "ETH", "JPY"]
86
+ results = await classifier.classify_async(symbols)
87
+ for r in results:
88
+ print(r)
89
+
90
+ asyncio.run(main())
91
+ ```
92
+
93
+ The output for each symbol is a dictionary like:
94
+
95
+ ```python
96
+ {'category': 'EQUITY', 'ticker': 'AAPL', 'name': 'Apple Inc.', 'market_cap': 4029017227264, 'yahoo_lookup': 'AAPL', 'alternatives': ['crypto'], 'source': 'api'}
97
+ {'category': 'crypto', 'ticker': 'BTC', 'name': 'Bitcoin', 'market_cap': 1736590593460.9607, 'yahoo_lookup': 'BTC-USD', 'alternatives': ['stock'], 'source': 'api'}
98
+ {'category': 'crypto', 'ticker': 'ETH', 'name': 'Ethereum', 'market_cap': 338145915081.1455, 'yahoo_lookup': 'ETH-USD', 'alternatives': ['stock'], 'source': 'cache'}
99
+ {'category': 'forex', 'ticker': 'JPY', 'name': 'JPY Currency', 'market_cap': None, 'yahoo_lookup': 'JPYUSD=X', 'alternatives': ['stock'], 'source': 'cache'}
100
+ ```
101
+
102
+ Notes
103
+ - The classifier caches positive classifications (non-`Unknown`) in an
104
+ SQLite database (default `ticker_cache.db`) for `24` hours by default.
105
+ - You can customize the cache filename and expiry by passing `db_name` and
106
+ `hours_to_expire` to `TickerClassifier`.
107
+
108
+ ## API
109
+
110
+ - `ticker_classifier.classifier.TickerClassifier`
111
+ - `classify(symbols: List[str]) -> List[dict]` – synchronous classification.
112
+ - `classify_async(symbols: List[str]) -> List[dict]` – async classification.
113
+ - `ticker_classifier.apis.yahoo.YahooClient` – low-level Yahoo quote fetcher (sync + async helpers).
114
+ - `ticker_classifier.apis.coingecko.CoinGeckoClient` – crypto lookup + market cap helpers (sync + async).
115
+ - `ticker_classifier.db.cache.TickerCache` – tiny SQLite-backed cache used by `TickerClassifier`.
116
+
117
+ ## Development
118
+
119
+ Run formatting and linting tools you prefer (project uses `black` code style).
120
+
121
+ Run a quick smoke check by running the `classifier.py` module directly:
122
+
123
+ ```powershell
124
+ & .venv\Scripts\python.exe ticker_classifier\classifier.py
125
+ ```
126
+
127
+ If you add tests, run them with your chosen test runner (e.g. `pytest`).
128
+
129
+ ## Release and Versioning
130
+
131
+ This package is published to PyPI through GitHub Actions:
132
+
133
+ - Workflow: `.github/workflows/publish.yml`
134
+ - Trigger: GitHub Release published
135
+ - Publisher: `pypa/gh-action-pypi-publish` using trusted publishing (OIDC)
136
+
137
+ Release flow:
138
+
139
+ 1. Update version in `pyproject.toml`.
140
+ 2. Update `ticker_classifier/__init__.py` `__version__` to match.
141
+ 3. Commit and push.
142
+ 4. Create a GitHub release with tag `vX.Y.Z` (or `X.Y.Z`).
143
+
144
+ The publish workflow validates that the release tag version matches `pyproject.toml` before uploading to PyPI.
145
+
146
+ ## Citation ✍️
147
+ If you use this project in your research, please cite as follows (adjust
148
+ metadata accordingly):
149
+
150
+ ```bibtex
151
+ @misc{ticker-classifier,
152
+ author = {Stephan Akkerman},
153
+ title = {ticker-classifier},
154
+ year = {2025},
155
+ publisher = {GitHub},
156
+ howpublished = {\url{https://github.com/StephanAkkerman/ticker-classifier}}
157
+ }
158
+ ```
159
+
160
+ ## Contributing 🛠
161
+
162
+ Contributions are welcome. Suggested workflow:
163
+
164
+ 1. Fork the repository and create a feature branch.
165
+ 2. Run tests and format your changes with `black`.
166
+ 3. Open a pull request with a clear description of the change.
167
+
168
+ Please open issues for feature requests or bugs and include a small
169
+ reproducible example when possible.
170
+
171
+ ![https://github.com/StephanAkkerman/ticker-classifier/graphs/contributors](https://contributors-img.firebaseapp.com/image?repo=StephanAkkerman/ticker-classifier)
172
+
173
+ ## License 📜
174
+
175
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,13 @@
1
+ ticker_classifier/__init__.py,sha256=X35OW2LFeV0GXY-lkZKNVOpiIMHm2gEsXxs5txD17-A,95
2
+ ticker_classifier/classifier.py,sha256=QrZK1QZCSDWtZFeFzUNhuC3o-Pmc8rMOoyrcpcs0BlQ,8918
3
+ ticker_classifier/constants.py,sha256=MzTG-PeFm8EjtVjbPtBGKnyWaeR5Arh7rAkvRaUyyNU,2682
4
+ ticker_classifier/apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ ticker_classifier/apis/coingecko.py,sha256=a7hwT52-hGc-u6TSLQ2li06qqkVpRecuWrcmcJdpy7I,7603
6
+ ticker_classifier/apis/yahoo.py,sha256=4TmCfK9prMZ7VCc6ZoWqAgBCRTICOlK02ewNH-W7Xpo,7696
7
+ ticker_classifier/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ ticker_classifier/db/cache.py,sha256=oTd_aGJ-NU0Jyf1SnpuKeALKcNg3GW6W0RziwuzGMiM,3257
9
+ ticker_classifier-0.1.0.dist-info/licenses/LICENSE,sha256=M4a6e_RgNGKdxALQ8kkRG280OdA6w5Y3RLHwneYEbq4,1073
10
+ ticker_classifier-0.1.0.dist-info/METADATA,sha256=ZV9ecU1pLhO6UgK8nBwzWk848xXg8RVjsFahq7arMks,6358
11
+ ticker_classifier-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
12
+ ticker_classifier-0.1.0.dist-info/top_level.txt,sha256=aaVnIjqeRswEK3Ph2fWR_9GgI16UCON513u7YDX9mmQ,18
13
+ ticker_classifier-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Stephan Akkerman
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ ticker_classifier