ticker-classifier 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ticker_classifier/__init__.py +4 -0
- ticker_classifier/apis/__init__.py +0 -0
- ticker_classifier/apis/coingecko.py +217 -0
- ticker_classifier/apis/yahoo.py +225 -0
- ticker_classifier/classifier.py +240 -0
- ticker_classifier/constants.py +192 -0
- ticker_classifier/db/__init__.py +0 -0
- ticker_classifier/db/cache.py +93 -0
- ticker_classifier-0.1.0.dist-info/METADATA +175 -0
- ticker_classifier-0.1.0.dist-info/RECORD +13 -0
- ticker_classifier-0.1.0.dist-info/WHEEL +5 -0
- ticker_classifier-0.1.0.dist-info/licenses/LICENSE +21 -0
- ticker_classifier-0.1.0.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from typing import Dict, List
|
|
3
|
+
|
|
4
|
+
import aiohttp
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CoinGeckoClient:
|
|
9
|
+
def __init__(self):
|
|
10
|
+
"""Initialize CoinGecko client.
|
|
11
|
+
|
|
12
|
+
Sets up base endpoints used for retrieving the coin list and simple
|
|
13
|
+
price information and initializes an internal cache for symbol -> id
|
|
14
|
+
mappings.
|
|
15
|
+
|
|
16
|
+
Notes
|
|
17
|
+
-----
|
|
18
|
+
The client keeps an in-memory `_crypto_map` that maps uppercase
|
|
19
|
+
symbols to a list of CoinGecko ids. This map is populated lazily by
|
|
20
|
+
`_load_map_sync` or `_load_map_async` when price lookup is requested.
|
|
21
|
+
"""
|
|
22
|
+
self.list_url = "https://api.coingecko.com/api/v3/coins/list"
|
|
23
|
+
self.price_url = "https://api.coingecko.com/api/v3/simple/price"
|
|
24
|
+
self._crypto_map = None # { 'BTC': ['bitcoin', 'bitcoin-token'], ... }
|
|
25
|
+
|
|
26
|
+
def _load_map_sync(self):
|
|
27
|
+
"""Load the CoinGecko symbol->id map synchronously.
|
|
28
|
+
|
|
29
|
+
This method fetches the full coin list from the CoinGecko API and
|
|
30
|
+
populates the in-memory `_crypto_map` mapping uppercase symbol strings
|
|
31
|
+
to lists of CoinGecko ids. If the map is already loaded this is a
|
|
32
|
+
no-op.
|
|
33
|
+
|
|
34
|
+
Errors
|
|
35
|
+
------
|
|
36
|
+
Any exceptions raised while fetching/parsing are caught and the map
|
|
37
|
+
falls back to an empty dict.
|
|
38
|
+
"""
|
|
39
|
+
if self._crypto_map:
|
|
40
|
+
return
|
|
41
|
+
try:
|
|
42
|
+
resp = requests.get(self.list_url, timeout=10)
|
|
43
|
+
data = resp.json()
|
|
44
|
+
self._crypto_map = defaultdict(list)
|
|
45
|
+
for coin in data:
|
|
46
|
+
self._crypto_map[coin["symbol"].upper()].append(coin["id"])
|
|
47
|
+
except Exception:
|
|
48
|
+
self._crypto_map = {}
|
|
49
|
+
|
|
50
|
+
async def _load_map_async(self, session: aiohttp.ClientSession):
|
|
51
|
+
"""Asynchronously load the CoinGecko symbol->id map.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
session : aiohttp.ClientSession
|
|
56
|
+
Active aiohttp session used for making the HTTP request.
|
|
57
|
+
|
|
58
|
+
Notes
|
|
59
|
+
-----
|
|
60
|
+
This is the async counterpart to `_load_map_sync`. If the internal map
|
|
61
|
+
is already populated this method returns immediately. Exceptions are
|
|
62
|
+
caught and the map will be set to an empty dict on failure.
|
|
63
|
+
"""
|
|
64
|
+
if self._crypto_map:
|
|
65
|
+
return
|
|
66
|
+
try:
|
|
67
|
+
async with session.get(self.list_url) as resp:
|
|
68
|
+
data = await resp.json()
|
|
69
|
+
self._crypto_map = defaultdict(list)
|
|
70
|
+
for coin in data:
|
|
71
|
+
self._crypto_map[coin["symbol"].upper()].append(coin["id"])
|
|
72
|
+
except Exception:
|
|
73
|
+
self._crypto_map = {}
|
|
74
|
+
|
|
75
|
+
def _get_candidate_ids(
|
|
76
|
+
self, symbols: List[str]
|
|
77
|
+
) -> tuple[List[str], Dict[str, str]]:
|
|
78
|
+
"""Return candidate CoinGecko ids for a list of symbols.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
symbols : list[str]
|
|
83
|
+
Uppercase ticker symbols to map to CoinGecko ids.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
ids : list[str]
|
|
88
|
+
Flat list of candidate CoinGecko ids (limited to first 10
|
|
89
|
+
collisions per symbol).
|
|
90
|
+
id_to_parent : dict
|
|
91
|
+
Mapping of coin id -> original symbol (parent) used to group
|
|
92
|
+
results later.
|
|
93
|
+
"""
|
|
94
|
+
ids = []
|
|
95
|
+
id_to_parent = {}
|
|
96
|
+
if not self._crypto_map:
|
|
97
|
+
return ids, id_to_parent
|
|
98
|
+
|
|
99
|
+
for sym in symbols:
|
|
100
|
+
if sym in self._crypto_map:
|
|
101
|
+
# Top 10 collisions only
|
|
102
|
+
for cid in self._crypto_map[sym][:10]:
|
|
103
|
+
ids.append(cid)
|
|
104
|
+
id_to_parent[cid] = sym
|
|
105
|
+
return ids, id_to_parent
|
|
106
|
+
|
|
107
|
+
def get_prices_sync(self, symbols: List[str]) -> Dict[str, Dict]:
|
|
108
|
+
"""Synchronous price lookup for a list of symbols using CoinGecko.
|
|
109
|
+
|
|
110
|
+
This method ensures the internal symbol->id map is loaded, finds
|
|
111
|
+
candidate CoinGecko ids for the requested symbols, and retrieves USD
|
|
112
|
+
prices and market caps in chunks. The highest market cap candidate is
|
|
113
|
+
selected per symbol in `_process_response`.
|
|
114
|
+
|
|
115
|
+
Parameters
|
|
116
|
+
----------
|
|
117
|
+
symbols : list[str]
|
|
118
|
+
Uppercase ticker symbols to look up.
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
dict[str, dict]
|
|
123
|
+
Mapping of symbol -> {"market_cap": ..., "name": ..., "id": ...}
|
|
124
|
+
for matches found. Returns an empty dict if nothing matched.
|
|
125
|
+
"""
|
|
126
|
+
self._load_map_sync()
|
|
127
|
+
results = {}
|
|
128
|
+
ids, id_map = self._get_candidate_ids(symbols)
|
|
129
|
+
if not ids:
|
|
130
|
+
return results
|
|
131
|
+
|
|
132
|
+
chunk_size = 200
|
|
133
|
+
for i in range(0, len(ids), chunk_size):
|
|
134
|
+
chunk = ids[i : i + chunk_size]
|
|
135
|
+
try:
|
|
136
|
+
resp = requests.get(
|
|
137
|
+
self.price_url,
|
|
138
|
+
params={
|
|
139
|
+
"ids": ",".join(chunk),
|
|
140
|
+
"vs_currencies": "usd",
|
|
141
|
+
"include_market_cap": "true",
|
|
142
|
+
},
|
|
143
|
+
timeout=10,
|
|
144
|
+
)
|
|
145
|
+
data = resp.json()
|
|
146
|
+
self._process_response(data, id_map, results)
|
|
147
|
+
except Exception:
|
|
148
|
+
pass
|
|
149
|
+
return results
|
|
150
|
+
|
|
151
|
+
async def get_prices_async(
|
|
152
|
+
self, session: aiohttp.ClientSession, symbols: List[str]
|
|
153
|
+
) -> Dict[str, Dict]:
|
|
154
|
+
"""Asynchronously retrieve prices and market caps for symbols.
|
|
155
|
+
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
session : aiohttp.ClientSession
|
|
159
|
+
Active aiohttp session used to make HTTP requests.
|
|
160
|
+
symbols : list[str]
|
|
161
|
+
Uppercase ticker symbols to query.
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
dict[str, dict]
|
|
166
|
+
Mapping of symbol -> {"market_cap": ..., "name": ..., "id": ...}.
|
|
167
|
+
|
|
168
|
+
Notes
|
|
169
|
+
-----
|
|
170
|
+
Uses the async map loader `_load_map_async` and requests CoinGecko in
|
|
171
|
+
chunks. Failures for a chunk are swallowed and processing continues.
|
|
172
|
+
"""
|
|
173
|
+
await self._load_map_async(session)
|
|
174
|
+
results = {}
|
|
175
|
+
ids, id_map = self._get_candidate_ids(symbols)
|
|
176
|
+
if not ids:
|
|
177
|
+
return results
|
|
178
|
+
|
|
179
|
+
chunk_size = 200
|
|
180
|
+
for i in range(0, len(ids), chunk_size):
|
|
181
|
+
chunk = ids[i : i + chunk_size]
|
|
182
|
+
try:
|
|
183
|
+
params = {
|
|
184
|
+
"ids": ",".join(chunk),
|
|
185
|
+
"vs_currencies": "usd",
|
|
186
|
+
"include_market_cap": "true",
|
|
187
|
+
}
|
|
188
|
+
async with session.get(self.price_url, params=params) as resp:
|
|
189
|
+
data = await resp.json()
|
|
190
|
+
self._process_response(data, id_map, results)
|
|
191
|
+
except Exception:
|
|
192
|
+
pass
|
|
193
|
+
return results
|
|
194
|
+
|
|
195
|
+
def _process_response(self, data, id_map, results):
|
|
196
|
+
"""Process a CoinGecko price response and update results.
|
|
197
|
+
|
|
198
|
+
Parameters
|
|
199
|
+
----------
|
|
200
|
+
data : dict
|
|
201
|
+
JSON-decoded response from the CoinGecko simple/price endpoint.
|
|
202
|
+
id_map : dict
|
|
203
|
+
Mapping of coin id -> parent symbol used to group results.
|
|
204
|
+
results : dict
|
|
205
|
+
Mutable mapping that will be updated in-place with the best
|
|
206
|
+
candidate per parent symbol (highest market cap wins).
|
|
207
|
+
"""
|
|
208
|
+
for cid, val in data.items():
|
|
209
|
+
parent = id_map.get(cid)
|
|
210
|
+
if parent:
|
|
211
|
+
mcap = val.get("usd_market_cap", 0)
|
|
212
|
+
if mcap > results.get(parent, {}).get("market_cap", 0):
|
|
213
|
+
results[parent] = {
|
|
214
|
+
"market_cap": mcap,
|
|
215
|
+
"name": cid.title(),
|
|
216
|
+
"id": cid,
|
|
217
|
+
}
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""Yahoo Finance API helpers.
|
|
2
|
+
|
|
3
|
+
This module provides a small client for obtaining the cookie/crumb
|
|
4
|
+
credentials required by Yahoo Finance endpoints and for fetching quote
|
|
5
|
+
data. Both synchronous (requests) and asynchronous (aiohttp) helpers
|
|
6
|
+
are provided.
|
|
7
|
+
|
|
8
|
+
Notes
|
|
9
|
+
-----
|
|
10
|
+
Docstrings follow the NumPy documentation style.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
|
+
|
|
15
|
+
import aiohttp
|
|
16
|
+
import requests
|
|
17
|
+
|
|
18
|
+
API_BASE = "https://query2.finance.yahoo.com"
|
|
19
|
+
COOKIE_URL = "https://fc.yahoo.com"
|
|
20
|
+
CRUMB_URL = API_BASE + "/v1/test/getcrumb"
|
|
21
|
+
QUOTE_URL = API_BASE + "/v7/finance/quote"
|
|
22
|
+
|
|
23
|
+
HEADERS = {
|
|
24
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class YahooClient:
|
|
29
|
+
def __init__(self):
|
|
30
|
+
"""Initialize a new :class:`YahooClient`.
|
|
31
|
+
|
|
32
|
+
The client maintains an in-memory cache of authentication
|
|
33
|
+
credentials (cookie + crumb) so repeated requests do not need to
|
|
34
|
+
re-authenticate on every call.
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
None
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
self.credentials: Optional[Dict[str, Any]] = None
|
|
42
|
+
|
|
43
|
+
def _get_credentials_sync(self):
|
|
44
|
+
"""Fetch cookie and crumb synchronously.
|
|
45
|
+
|
|
46
|
+
The method performs the two-step Yahoo flow synchronously:
|
|
47
|
+
1. Request the cookie from ``COOKIE_URL`` (allowing redirects).
|
|
48
|
+
2. Request the crumb token from ``CRUMB_URL`` using the
|
|
49
|
+
returned cookies.
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
dict or None
|
|
54
|
+
Dictionary with keys ``'cookie'`` and ``'crumb'`` when
|
|
55
|
+
successful, otherwise ``None``.
|
|
56
|
+
"""
|
|
57
|
+
if self.credentials:
|
|
58
|
+
return self.credentials
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
# 1. Get Cookies (allow redirects)
|
|
62
|
+
# fc.yahoo.com redirects to a consent page or main page, setting cookies along the way
|
|
63
|
+
response_cookie = requests.get(COOKIE_URL, headers=HEADERS, timeout=5)
|
|
64
|
+
cookies = response_cookie.cookies
|
|
65
|
+
|
|
66
|
+
# 2. Get Crumb (using the cookies)
|
|
67
|
+
response_crumb = requests.get(
|
|
68
|
+
CRUMB_URL, headers=HEADERS, cookies=cookies, timeout=5
|
|
69
|
+
)
|
|
70
|
+
crumb = response_crumb.text
|
|
71
|
+
|
|
72
|
+
if crumb:
|
|
73
|
+
self.credentials = {"cookie": cookies, "crumb": crumb}
|
|
74
|
+
except Exception as e:
|
|
75
|
+
print(f"Yahoo Auth Error (Sync): {e}")
|
|
76
|
+
|
|
77
|
+
return self.credentials
|
|
78
|
+
|
|
79
|
+
def get_quotes_sync(self, symbols: List[str]) -> Dict[str, Dict]:
|
|
80
|
+
"""Get quotes synchronously for the provided symbols.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
symbols : list of str
|
|
85
|
+
List of ticker symbols to query (e.g. ``['AAPL', 'MSFT']``).
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
dict
|
|
90
|
+
Mapping from upper-case symbol to the quote data dictionary
|
|
91
|
+
returned by Yahoo. Returns an empty dict if no symbols are
|
|
92
|
+
provided or if a failure occurs.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
results = {}
|
|
96
|
+
if not symbols:
|
|
97
|
+
return results
|
|
98
|
+
|
|
99
|
+
creds = self._get_credentials_sync()
|
|
100
|
+
if not creds:
|
|
101
|
+
print("Skipping Yahoo Sync: No credentials.")
|
|
102
|
+
return results
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
params = {"symbols": ",".join(symbols), "crumb": creds["crumb"]}
|
|
106
|
+
|
|
107
|
+
resp = requests.get(
|
|
108
|
+
QUOTE_URL,
|
|
109
|
+
params=params,
|
|
110
|
+
cookies=creds["cookie"],
|
|
111
|
+
headers=HEADERS,
|
|
112
|
+
timeout=5,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
if resp.status_code == 200:
|
|
116
|
+
data = resp.json()
|
|
117
|
+
if "quoteResponse" in data and "result" in data["quoteResponse"]:
|
|
118
|
+
for item in data["quoteResponse"]["result"]:
|
|
119
|
+
results[item["symbol"].upper()] = item
|
|
120
|
+
elif resp.status_code == 401:
|
|
121
|
+
# Credentials expired? Clear them for next time.
|
|
122
|
+
self.credentials = None
|
|
123
|
+
print("Yahoo 401 Unauthorized (Sync). Credentials cleared.")
|
|
124
|
+
|
|
125
|
+
except Exception as e:
|
|
126
|
+
print(f"Yahoo Sync Request Error: {e}")
|
|
127
|
+
|
|
128
|
+
return results
|
|
129
|
+
|
|
130
|
+
async def _get_credentials_async(self, session: aiohttp.ClientSession):
|
|
131
|
+
"""Fetch cookie and crumb asynchronously.
|
|
132
|
+
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
session : aiohttp.ClientSession
|
|
136
|
+
An aiohttp session used to make the requests. The session's
|
|
137
|
+
cookie jar will be used to collect cookies from Yahoo's
|
|
138
|
+
redirects.
|
|
139
|
+
|
|
140
|
+
Returns
|
|
141
|
+
-------
|
|
142
|
+
dict or None
|
|
143
|
+
Dictionary with keys ``'cookie'`` and ``'crumb'`` when
|
|
144
|
+
successful, otherwise ``None``.
|
|
145
|
+
"""
|
|
146
|
+
if self.credentials:
|
|
147
|
+
return self.credentials
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
# 1. Get Cookies
|
|
151
|
+
# aiohttp session handles cookies automatically in its cookie_jar if we share the session.
|
|
152
|
+
# However, to be safe and explicit (since we pass cookies manually later), we extract them.
|
|
153
|
+
async with session.get(COOKIE_URL, headers=HEADERS) as resp:
|
|
154
|
+
await resp.read() # Read body to ensure cookies are processed
|
|
155
|
+
# Access cookies from the response history or the session cookie jar
|
|
156
|
+
pass
|
|
157
|
+
|
|
158
|
+
# 2. Get Crumb
|
|
159
|
+
# The session now holds the cookies from step 1
|
|
160
|
+
async with session.get(CRUMB_URL, headers=HEADERS) as resp:
|
|
161
|
+
crumb = await resp.text()
|
|
162
|
+
|
|
163
|
+
if crumb:
|
|
164
|
+
# We grab the cookies directly from the session's cookie jar to save them
|
|
165
|
+
# Dictionary comprehension to convert to standard dict
|
|
166
|
+
cookies = {
|
|
167
|
+
k: v.value
|
|
168
|
+
for k, v in session.cookie_jar.filter_cookies(CRUMB_URL).items()
|
|
169
|
+
}
|
|
170
|
+
self.credentials = {"cookie": cookies, "crumb": crumb}
|
|
171
|
+
|
|
172
|
+
except Exception as e:
|
|
173
|
+
print(f"Yahoo Auth Error (Async): {e}")
|
|
174
|
+
|
|
175
|
+
return self.credentials
|
|
176
|
+
|
|
177
|
+
async def get_quotes_async(
|
|
178
|
+
self, session: aiohttp.ClientSession, symbols: List[str]
|
|
179
|
+
) -> Dict[str, Dict]:
|
|
180
|
+
"""Asynchronously get quotes for the provided symbols.
|
|
181
|
+
|
|
182
|
+
Parameters
|
|
183
|
+
----------
|
|
184
|
+
session : aiohttp.ClientSession
|
|
185
|
+
Active aiohttp session used to perform the requests.
|
|
186
|
+
symbols : list of str
|
|
187
|
+
List of ticker symbols to query.
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
dict
|
|
192
|
+
Mapping from upper-case symbol to the quote data dictionary
|
|
193
|
+
returned by Yahoo. Returns an empty dict if no symbols are
|
|
194
|
+
provided or if a failure occurs.
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
results = {}
|
|
198
|
+
if not symbols:
|
|
199
|
+
return results
|
|
200
|
+
|
|
201
|
+
creds = await self._get_credentials_async(session)
|
|
202
|
+
if not creds:
|
|
203
|
+
print("Skipping Yahoo Async: No credentials.")
|
|
204
|
+
return results
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
params = {"symbols": ",".join(symbols), "crumb": creds["crumb"]}
|
|
208
|
+
|
|
209
|
+
# aiohttp allows passing cookies as a dict
|
|
210
|
+
async with session.get(
|
|
211
|
+
QUOTE_URL, params=params, cookies=creds["cookie"], headers=HEADERS
|
|
212
|
+
) as resp:
|
|
213
|
+
if resp.status == 200:
|
|
214
|
+
data = await resp.json()
|
|
215
|
+
if "quoteResponse" in data and "result" in data["quoteResponse"]:
|
|
216
|
+
for item in data["quoteResponse"]["result"]:
|
|
217
|
+
results[item["symbol"].upper()] = item
|
|
218
|
+
elif resp.status == 401:
|
|
219
|
+
self.credentials = None
|
|
220
|
+
print("Yahoo 401 Unauthorized (Async). Credentials cleared.")
|
|
221
|
+
|
|
222
|
+
except Exception as e:
|
|
223
|
+
print(f"Yahoo Async Request Error: {e}")
|
|
224
|
+
|
|
225
|
+
return results
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Dict, List
|
|
3
|
+
|
|
4
|
+
import aiohttp
|
|
5
|
+
|
|
6
|
+
from .apis.coingecko import CoinGeckoClient
|
|
7
|
+
from .apis.yahoo import YahooClient
|
|
8
|
+
from .constants import MAJOR_FOREX, MINOR_FOREX, SHORTCUTS
|
|
9
|
+
from .db.cache import TickerCache
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TickerClassifier:
|
|
13
|
+
def __init__(self, db_name: str = "ticker_cache.db", hours_to_expire: int = 24):
|
|
14
|
+
"""Create a TickerClassifier instance.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
db_name : str, optional
|
|
19
|
+
SQLite filename for the `TickerCache`, by default "ticker_cache.db".
|
|
20
|
+
hours_to_expire : int, optional
|
|
21
|
+
Hours after which cached entries expire, by default 24.
|
|
22
|
+
"""
|
|
23
|
+
self.cache = TickerCache(db_name, hours_to_expire)
|
|
24
|
+
self.yahoo = YahooClient()
|
|
25
|
+
self.cg = CoinGeckoClient()
|
|
26
|
+
|
|
27
|
+
def _process_duel(
|
|
28
|
+
self, to_process: List[str], yahoo_data: Dict, crypto_data: Dict
|
|
29
|
+
) -> Dict:
|
|
30
|
+
"""Resolve competing category signals for each symbol.
|
|
31
|
+
|
|
32
|
+
The classifier considers three possible sources for each symbol:
|
|
33
|
+
stock (Yahoo), crypto (CoinGecko), and forex (heuristics). Each source
|
|
34
|
+
receives a numeric score (market cap or heuristic weight) and the
|
|
35
|
+
highest-scoring source determines the final classification.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
to_process : list[str]
|
|
40
|
+
Uppercase symbols to evaluate.
|
|
41
|
+
yahoo_data : dict
|
|
42
|
+
Mapping of symbol -> Yahoo quote dict (as returned by `YahooClient`).
|
|
43
|
+
crypto_data : dict
|
|
44
|
+
Mapping of symbol -> CoinGecko-derived dict containing at least
|
|
45
|
+
a `market_cap` key.
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
dict
|
|
50
|
+
Mapping of symbol -> final classification dict containing keys
|
|
51
|
+
such as `category`, `ticker`, `name`, `market_cap`, and
|
|
52
|
+
`yahoo_lookup`.
|
|
53
|
+
"""
|
|
54
|
+
processed = {}
|
|
55
|
+
# Init structure
|
|
56
|
+
duel = {
|
|
57
|
+
s: {"stock": 0, "crypto": 0, "forex": 0, "details": {}} for s in to_process
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
# 1 MILLION USD THRESHOLD
|
|
61
|
+
# If a crypto is smaller than this, we treat it as "Noise" if it clashes with a stock ticker.
|
|
62
|
+
MIN_CRYPTO_MCAP = 1_000_000
|
|
63
|
+
|
|
64
|
+
for sym in to_process:
|
|
65
|
+
# 1. Forex Heuristics
|
|
66
|
+
if sym in MAJOR_FOREX:
|
|
67
|
+
duel[sym]["forex"] = 100_000_000_000_000
|
|
68
|
+
duel[sym]["details"]["forex"] = {
|
|
69
|
+
"type": "Forex",
|
|
70
|
+
"name": f"{sym} Currency",
|
|
71
|
+
"market_cap": None,
|
|
72
|
+
}
|
|
73
|
+
elif sym in MINOR_FOREX:
|
|
74
|
+
duel[sym]["forex"] = 50_000_000
|
|
75
|
+
duel[sym]["details"]["forex"] = {
|
|
76
|
+
"type": "Forex",
|
|
77
|
+
"name": f"{sym} Currency",
|
|
78
|
+
"market_cap": None,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# 2. Stock Data
|
|
82
|
+
if sym in yahoo_data:
|
|
83
|
+
info = yahoo_data[sym]
|
|
84
|
+
qtype = info.get("quoteType", "UNKNOWN")
|
|
85
|
+
raw_mcap = info.get("marketCap", 0)
|
|
86
|
+
score = raw_mcap
|
|
87
|
+
|
|
88
|
+
# Boost logic
|
|
89
|
+
if qtype == "INDEX":
|
|
90
|
+
score = 50_000_000_000
|
|
91
|
+
if qtype == "FUTURE":
|
|
92
|
+
score = 10_000_000_000
|
|
93
|
+
|
|
94
|
+
# If we found a valid stock object but mcap is missing/0,
|
|
95
|
+
# give it a base score so it beats tiny cryptos.
|
|
96
|
+
if score == 0 and qtype in ["EQUITY", "ETF"]:
|
|
97
|
+
score = 250_000 # Assume at least micro-cap stock
|
|
98
|
+
|
|
99
|
+
duel[sym]["stock"] = score
|
|
100
|
+
duel[sym]["details"]["stock"] = {
|
|
101
|
+
"type": qtype,
|
|
102
|
+
"name": info.get("shortName") or info.get("longName"),
|
|
103
|
+
"market_cap": raw_mcap,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
# 3. Crypto Data
|
|
107
|
+
if sym in crypto_data:
|
|
108
|
+
info = crypto_data[sym]
|
|
109
|
+
mcap = info.get("market_cap", 0)
|
|
110
|
+
duel[sym]["crypto"] = mcap
|
|
111
|
+
duel[sym]["details"]["crypto"] = {
|
|
112
|
+
"type": "Crypto",
|
|
113
|
+
"name": info.get("name"),
|
|
114
|
+
"market_cap": mcap,
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
# 4. Resolve
|
|
118
|
+
scores = duel[sym]
|
|
119
|
+
winner = max(["stock", "crypto", "forex"], key=lambda k: scores[k])
|
|
120
|
+
|
|
121
|
+
# If Crypto won, but it's tiny (< $1M), and we tried to look up a Stock...
|
|
122
|
+
# It's highly likely this is a "Fake" token or the Yahoo lookup failed.
|
|
123
|
+
if winner == "crypto":
|
|
124
|
+
mcap = scores["crypto"]
|
|
125
|
+
if mcap < MIN_CRYPTO_MCAP:
|
|
126
|
+
# If the stock score was 0 (Yahoo failed), we'd rather return "Unknown"
|
|
127
|
+
# than return a $1,000 junk token for "NVDA".
|
|
128
|
+
winner = "unknown"
|
|
129
|
+
|
|
130
|
+
# Construct Result
|
|
131
|
+
if winner == "unknown" or scores[winner] == 0:
|
|
132
|
+
final = {"category": "Unknown", "ticker": sym}
|
|
133
|
+
else:
|
|
134
|
+
details = scores["details"].get(winner, {})
|
|
135
|
+
alternatives = [
|
|
136
|
+
k
|
|
137
|
+
for k in ["stock", "crypto", "forex"]
|
|
138
|
+
if scores[k] > 0 and k != winner
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
y_look = sym
|
|
142
|
+
if winner == "crypto":
|
|
143
|
+
y_look = f"{sym}-USD"
|
|
144
|
+
elif winner == "forex":
|
|
145
|
+
y_look = f"{sym}USD=X"
|
|
146
|
+
|
|
147
|
+
final = {
|
|
148
|
+
"category": winner if winner != "stock" else details.get("type"),
|
|
149
|
+
"ticker": sym,
|
|
150
|
+
"name": details.get("name"),
|
|
151
|
+
"market_cap": details.get("market_cap"),
|
|
152
|
+
"yahoo_lookup": y_look,
|
|
153
|
+
"alternatives": alternatives,
|
|
154
|
+
"source": "api",
|
|
155
|
+
}
|
|
156
|
+
processed[sym] = final
|
|
157
|
+
return processed
|
|
158
|
+
|
|
159
|
+
def classify(self, symbols: List[str]) -> List[Dict]:
|
|
160
|
+
"""Synchronously classify a list of ticker-like symbols.
|
|
161
|
+
|
|
162
|
+
Parameters
|
|
163
|
+
----------
|
|
164
|
+
symbols : list[str]
|
|
165
|
+
Iterable of symbols (may contain duplicates or mixed case). The
|
|
166
|
+
returned list preserves the order of the input list with each
|
|
167
|
+
element replaced by its classification dict or `None`.
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
list[dict]
|
|
172
|
+
List of classification dictionaries aligned with the input order.
|
|
173
|
+
"""
|
|
174
|
+
unique = list({s.upper().strip() for s in symbols if s.strip()})
|
|
175
|
+
results_map = {}
|
|
176
|
+
to_process = []
|
|
177
|
+
|
|
178
|
+
# Cache check
|
|
179
|
+
cached = self.cache.get_many(unique)
|
|
180
|
+
for sym in unique:
|
|
181
|
+
if sym in SHORTCUTS:
|
|
182
|
+
results_map[sym] = {**SHORTCUTS[sym], "source": "shortcut"}
|
|
183
|
+
elif sym in cached:
|
|
184
|
+
results_map[sym] = cached[sym]
|
|
185
|
+
else:
|
|
186
|
+
to_process.append(sym)
|
|
187
|
+
|
|
188
|
+
if to_process:
|
|
189
|
+
y_res = self.yahoo.get_quotes_sync(to_process)
|
|
190
|
+
c_res = self.cg.get_prices_sync(to_process)
|
|
191
|
+
processed = self._process_duel(to_process, y_res, c_res)
|
|
192
|
+
self.cache.save_many(processed)
|
|
193
|
+
results_map.update(processed)
|
|
194
|
+
|
|
195
|
+
return [results_map.get(s.upper().strip()) for s in symbols]
|
|
196
|
+
|
|
197
|
+
async def classify_async(self, symbols: List[str]) -> List[Dict]:
|
|
198
|
+
"""Asynchronously classify a list of ticker-like symbols.
|
|
199
|
+
|
|
200
|
+
Parameters
|
|
201
|
+
----------
|
|
202
|
+
symbols : list[str]
|
|
203
|
+
List of symbols to classify. Input order is preserved in the
|
|
204
|
+
returned list.
|
|
205
|
+
|
|
206
|
+
Returns
|
|
207
|
+
-------
|
|
208
|
+
list[dict]
|
|
209
|
+
Classification results aligned with the input list; entries may
|
|
210
|
+
be `None` for unknown symbols.
|
|
211
|
+
"""
|
|
212
|
+
unique = list({s.upper().strip() for s in symbols if s.strip()})
|
|
213
|
+
results_map = {}
|
|
214
|
+
to_process = []
|
|
215
|
+
|
|
216
|
+
# Cache Read (Run in thread to avoid blocking loop)
|
|
217
|
+
loop = asyncio.get_running_loop()
|
|
218
|
+
cached = await loop.run_in_executor(None, self.cache.get_many, unique)
|
|
219
|
+
|
|
220
|
+
for sym in unique:
|
|
221
|
+
if sym in SHORTCUTS:
|
|
222
|
+
results_map[sym] = {**SHORTCUTS[sym], "source": "shortcut"}
|
|
223
|
+
elif sym in cached:
|
|
224
|
+
results_map[sym] = cached[sym]
|
|
225
|
+
else:
|
|
226
|
+
to_process.append(sym)
|
|
227
|
+
|
|
228
|
+
if to_process:
|
|
229
|
+
async with aiohttp.ClientSession() as session:
|
|
230
|
+
task_y = self.yahoo.get_quotes_async(session, to_process)
|
|
231
|
+
task_c = self.cg.get_prices_async(session, to_process)
|
|
232
|
+
y_res, c_res = await asyncio.gather(task_y, task_c)
|
|
233
|
+
|
|
234
|
+
processed = self._process_duel(to_process, y_res, c_res)
|
|
235
|
+
|
|
236
|
+
# Cache Write (Run in thread)
|
|
237
|
+
await loop.run_in_executor(None, self.cache.save_many, processed)
|
|
238
|
+
results_map.update(processed)
|
|
239
|
+
|
|
240
|
+
return [results_map.get(s.upper().strip()) for s in symbols]
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
MAJOR_FOREX = {"USD", "EUR", "JPY", "GBP", "AUD", "CAD", "CHF", "CNY", "HKD", "NZD"}
|
|
2
|
+
|
|
3
|
+
MINOR_FOREX = {
|
|
4
|
+
"AED",
|
|
5
|
+
"AFN",
|
|
6
|
+
"ALL",
|
|
7
|
+
"AMD",
|
|
8
|
+
"ANG",
|
|
9
|
+
"AOA",
|
|
10
|
+
"ARS",
|
|
11
|
+
"AWG",
|
|
12
|
+
"AZN",
|
|
13
|
+
"BAM",
|
|
14
|
+
"BBD",
|
|
15
|
+
"BDT",
|
|
16
|
+
"BGN",
|
|
17
|
+
"BHD",
|
|
18
|
+
"BIF",
|
|
19
|
+
"BMD",
|
|
20
|
+
"BND",
|
|
21
|
+
"BOB",
|
|
22
|
+
"BRL",
|
|
23
|
+
"BSD",
|
|
24
|
+
"BTN",
|
|
25
|
+
"BWP",
|
|
26
|
+
"BYN",
|
|
27
|
+
"BZD",
|
|
28
|
+
"CLP",
|
|
29
|
+
"COP",
|
|
30
|
+
"CRC",
|
|
31
|
+
"CUP",
|
|
32
|
+
"CVE",
|
|
33
|
+
"CZK",
|
|
34
|
+
"DJF",
|
|
35
|
+
"DKK",
|
|
36
|
+
"DOP",
|
|
37
|
+
"DZD",
|
|
38
|
+
"EGP",
|
|
39
|
+
"ERN",
|
|
40
|
+
"ETB",
|
|
41
|
+
"FJD",
|
|
42
|
+
"FKP",
|
|
43
|
+
"GEL",
|
|
44
|
+
"GHS",
|
|
45
|
+
"GIP",
|
|
46
|
+
"GMD",
|
|
47
|
+
"GNF",
|
|
48
|
+
"GTQ",
|
|
49
|
+
"GYD",
|
|
50
|
+
"HNL",
|
|
51
|
+
"HRK",
|
|
52
|
+
"HTG",
|
|
53
|
+
"HUF",
|
|
54
|
+
"IDR",
|
|
55
|
+
"ILS",
|
|
56
|
+
"INR",
|
|
57
|
+
"IQD",
|
|
58
|
+
"IRR",
|
|
59
|
+
"ISK",
|
|
60
|
+
"JMD",
|
|
61
|
+
"JOD",
|
|
62
|
+
"KES",
|
|
63
|
+
"KGS",
|
|
64
|
+
"KHR",
|
|
65
|
+
"KMF",
|
|
66
|
+
"KPW",
|
|
67
|
+
"KRW",
|
|
68
|
+
"KWD",
|
|
69
|
+
"KYD",
|
|
70
|
+
"KZT",
|
|
71
|
+
"LAK",
|
|
72
|
+
"LBP",
|
|
73
|
+
"LKR",
|
|
74
|
+
"LRD",
|
|
75
|
+
"LSL",
|
|
76
|
+
"LYD",
|
|
77
|
+
"MAD",
|
|
78
|
+
"MDL",
|
|
79
|
+
"MGA",
|
|
80
|
+
"MKD",
|
|
81
|
+
"MMK",
|
|
82
|
+
"MNT",
|
|
83
|
+
"MOP",
|
|
84
|
+
"MRU",
|
|
85
|
+
"MUR",
|
|
86
|
+
"MVR",
|
|
87
|
+
"MWK",
|
|
88
|
+
"MXN",
|
|
89
|
+
"MYR",
|
|
90
|
+
"MZN",
|
|
91
|
+
"NAD",
|
|
92
|
+
"NGN",
|
|
93
|
+
"NIO",
|
|
94
|
+
"NOK",
|
|
95
|
+
"NPR",
|
|
96
|
+
"OMR",
|
|
97
|
+
"PAB",
|
|
98
|
+
"PEN",
|
|
99
|
+
"PGK",
|
|
100
|
+
"PHP",
|
|
101
|
+
"PKR",
|
|
102
|
+
"PLN",
|
|
103
|
+
"PYG",
|
|
104
|
+
"QAR",
|
|
105
|
+
"RON",
|
|
106
|
+
"RSD",
|
|
107
|
+
"RUB",
|
|
108
|
+
"RWF",
|
|
109
|
+
"SAR",
|
|
110
|
+
"SBD",
|
|
111
|
+
"SCR",
|
|
112
|
+
"SDG",
|
|
113
|
+
"SEK",
|
|
114
|
+
"SGD",
|
|
115
|
+
"SHP",
|
|
116
|
+
"SLL",
|
|
117
|
+
"SOS",
|
|
118
|
+
"SRD",
|
|
119
|
+
"SSP",
|
|
120
|
+
"STN",
|
|
121
|
+
"SYP",
|
|
122
|
+
"SZL",
|
|
123
|
+
"THB",
|
|
124
|
+
"TJS",
|
|
125
|
+
"TMT",
|
|
126
|
+
"TND",
|
|
127
|
+
"TOP",
|
|
128
|
+
"TRY",
|
|
129
|
+
"TTD",
|
|
130
|
+
"TWD",
|
|
131
|
+
"TZS",
|
|
132
|
+
"UAH",
|
|
133
|
+
"UGX",
|
|
134
|
+
"UYU",
|
|
135
|
+
"UZS",
|
|
136
|
+
"VES",
|
|
137
|
+
"VND",
|
|
138
|
+
"VUV",
|
|
139
|
+
"WST",
|
|
140
|
+
"XAF",
|
|
141
|
+
"XCD",
|
|
142
|
+
"XOF",
|
|
143
|
+
"XPF",
|
|
144
|
+
"YER",
|
|
145
|
+
"ZAR",
|
|
146
|
+
"ZMW",
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
SHORTCUTS = {
|
|
150
|
+
"DXY": {
|
|
151
|
+
"category": "Index",
|
|
152
|
+
"ticker": "DXY",
|
|
153
|
+
"name": "US Dollar Index",
|
|
154
|
+
"yahoo_lookup": "DX-Y.NYB",
|
|
155
|
+
},
|
|
156
|
+
"VIX": {
|
|
157
|
+
"category": "Index",
|
|
158
|
+
"ticker": "VIX",
|
|
159
|
+
"name": "CBOE Volatility Index",
|
|
160
|
+
"yahoo_lookup": "^VIX",
|
|
161
|
+
},
|
|
162
|
+
"GOLD": {
|
|
163
|
+
"category": "Commodity",
|
|
164
|
+
"ticker": "GOLD",
|
|
165
|
+
"name": "Gold",
|
|
166
|
+
"yahoo_lookup": "GC=F",
|
|
167
|
+
},
|
|
168
|
+
"SILVER": {
|
|
169
|
+
"category": "Commodity",
|
|
170
|
+
"ticker": "SILVER",
|
|
171
|
+
"name": "Silver",
|
|
172
|
+
"yahoo_lookup": "SI=F",
|
|
173
|
+
},
|
|
174
|
+
"OIL": {
|
|
175
|
+
"category": "Commodity",
|
|
176
|
+
"ticker": "OIL",
|
|
177
|
+
"name": "Crude Oil",
|
|
178
|
+
"yahoo_lookup": "CL=F",
|
|
179
|
+
},
|
|
180
|
+
"SPX": {
|
|
181
|
+
"category": "Index",
|
|
182
|
+
"ticker": "SPX",
|
|
183
|
+
"name": "S&P 500",
|
|
184
|
+
"yahoo_lookup": "^GSPC",
|
|
185
|
+
},
|
|
186
|
+
"SPY": {
|
|
187
|
+
"category": "ETF",
|
|
188
|
+
"ticker": "SPY",
|
|
189
|
+
"name": "SPDR S&P 500 ETF Trust",
|
|
190
|
+
"yahoo_lookup": "SPY",
|
|
191
|
+
},
|
|
192
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sqlite3
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TickerCache:
|
|
8
|
+
def __init__(self, db_name: str, hours_to_expire: int):
|
|
9
|
+
"""Initialize the ticker cache.
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
db_name : str
|
|
14
|
+
Path or name of the SQLite database file used for caching.
|
|
15
|
+
hours_to_expire : int
|
|
16
|
+
Number of hours after which cached entries are considered expired.
|
|
17
|
+
"""
|
|
18
|
+
self.db_name = db_name
|
|
19
|
+
self.hours_to_expire = hours_to_expire
|
|
20
|
+
self._init_db()
|
|
21
|
+
|
|
22
|
+
def _init_db(self):
|
|
23
|
+
"""Create the `tickers` table if it does not already exist.
|
|
24
|
+
|
|
25
|
+
The table stores `symbol` as the primary key, the JSON-serialized
|
|
26
|
+
`data` blob and an ISO-formatted `updated_at` timestamp.
|
|
27
|
+
"""
|
|
28
|
+
with sqlite3.connect(self.db_name) as conn:
|
|
29
|
+
conn.execute(
|
|
30
|
+
"""
|
|
31
|
+
CREATE TABLE IF NOT EXISTS tickers (
|
|
32
|
+
symbol TEXT PRIMARY KEY,
|
|
33
|
+
data TEXT,
|
|
34
|
+
updated_at TEXT
|
|
35
|
+
)
|
|
36
|
+
"""
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def get_many(self, symbols: List[str]) -> Dict[str, Any]:
|
|
40
|
+
"""Retrieve multiple cached ticker entries that are not expired.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
symbols : list[str]
|
|
45
|
+
List of symbol strings to fetch from cache. If empty, returns an
|
|
46
|
+
empty dict.
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
dict[str, Any]
|
|
51
|
+
Mapping of symbol -> deserialized cache object. Each returned
|
|
52
|
+
object will have a `source` key set to `'cache'`.
|
|
53
|
+
"""
|
|
54
|
+
if not symbols:
|
|
55
|
+
return {}
|
|
56
|
+
cutoff = (datetime.now() - timedelta(hours=self.hours_to_expire)).isoformat()
|
|
57
|
+
results = {}
|
|
58
|
+
|
|
59
|
+
with sqlite3.connect(self.db_name) as conn:
|
|
60
|
+
cursor = conn.cursor()
|
|
61
|
+
placeholders = ",".join("?" * len(symbols))
|
|
62
|
+
query = f"SELECT symbol, data FROM tickers WHERE symbol IN ({placeholders}) AND updated_at > ?"
|
|
63
|
+
cursor.execute(query, symbols + [cutoff])
|
|
64
|
+
for s, d in cursor.fetchall():
|
|
65
|
+
results[s] = json.loads(d)
|
|
66
|
+
results[s]["source"] = "cache"
|
|
67
|
+
return results
|
|
68
|
+
|
|
69
|
+
def save_many(self, items: Dict[str, Any]):
|
|
70
|
+
"""Save multiple items to the cache.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
items : dict[str, Any]
|
|
75
|
+
Mapping of symbol -> item dict. Items with `category == 'Unknown'`
|
|
76
|
+
are not persisted. The optional `source` key is stripped before
|
|
77
|
+
saving.
|
|
78
|
+
"""
|
|
79
|
+
if not items:
|
|
80
|
+
return
|
|
81
|
+
with sqlite3.connect(self.db_name) as conn:
|
|
82
|
+
cursor = conn.cursor()
|
|
83
|
+
now = datetime.now().isoformat()
|
|
84
|
+
data_tuples = []
|
|
85
|
+
for s, d in items.items():
|
|
86
|
+
if d.get("category") != "Unknown":
|
|
87
|
+
clean = {k: v for k, v in d.items() if k != "source"}
|
|
88
|
+
data_tuples.append((s, json.dumps(clean), now))
|
|
89
|
+
if data_tuples:
|
|
90
|
+
cursor.executemany(
|
|
91
|
+
"INSERT OR REPLACE INTO tickers (symbol, data, updated_at) VALUES (?, ?, ?)",
|
|
92
|
+
data_tuples,
|
|
93
|
+
)
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ticker_classifier
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A robust stock, crypto, and forex classifier with async support.
|
|
5
|
+
Author-email: Stephan Akkerman <stephan@akkerman.ai>
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: requests
|
|
10
|
+
Requires-Dist: aiohttp
|
|
11
|
+
Dynamic: license-file
|
|
12
|
+
|
|
13
|
+
# ticker-classifier
|
|
14
|
+
|
|
15
|
+
<!-- Add a banner here like: https://github.com/StephanAkkerman/fintwit-bot/blob/main/img/logo/fintwit-banner.png -->
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
<!-- Adjust the link of the first and second badges to your own repo -->
|
|
19
|
+
<p align="center">
|
|
20
|
+
<img alt="GitHub Actions Workflow Status" src="https://img.shields.io/github/actions/workflow/status/StephanAkkerman/ticker-classifier/pyversions.yml?label=python%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13&logo=python&style=flat-square">
|
|
21
|
+
<img src="https://img.shields.io/github/license/StephanAkkerman/ticker-classifier.svg?color=brightgreen" alt="License">
|
|
22
|
+
<a href="https://github.com/psf/black"><img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Code style: black"></a>
|
|
23
|
+
</p>
|
|
24
|
+
|
|
25
|
+
## Introduction
|
|
26
|
+
|
|
27
|
+
`ticker-classifier` is a small Python library for classifying ticker-like symbols (for example `AAPL`, `BTC`, `EUR`, `GOLD`) into a simple market/category representation.
|
|
28
|
+
It uses Yahoo Finance for equities, CoinGecko for cryptocurrencies and a few heuristics for currencies/commodities. The output indicates the most likely category, a display name, market cap when available, and a `yahoo_lookup` value to fetch further data if desired.
|
|
29
|
+
|
|
30
|
+
## Table of Contents 🗂
|
|
31
|
+
|
|
32
|
+
- [Key Features](#key-features)
|
|
33
|
+
- [Installation](#installation)
|
|
34
|
+
- [Usage](#usage)
|
|
35
|
+
- [API](#api)
|
|
36
|
+
- [Development](#development)
|
|
37
|
+
- [Release and Versioning](#release-and-versioning)
|
|
38
|
+
- [Citation](#citation)
|
|
39
|
+
- [Contributing](#contributing)
|
|
40
|
+
- [License](#license)
|
|
41
|
+
|
|
42
|
+
## Key Features 🔑
|
|
43
|
+
|
|
44
|
+
- Classify symbols as `Equity`, `Crypto`, `Forex`, `Commodity`, `Index` or `Unknown`.
|
|
45
|
+
- Uses multiple public APIs and simple heuristics to make robust decisions.
|
|
46
|
+
- Provides both synchronous and asynchronous APIs.
|
|
47
|
+
- Lightweight disk cache to avoid repeated lookups (`TickerCache`).
|
|
48
|
+
|
|
49
|
+
## Installation ⚙️
|
|
50
|
+
|
|
51
|
+
Install from pip using the provided `requirements.txt` or install the package directly from the repository for latest changes:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install -r requirements.txt
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
or
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install git+https://github.com/StephanAkkerman/ticker-classifier.git
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Usage ⌨️
|
|
64
|
+
|
|
65
|
+
Basic synchronous usage:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from ticker_classifier.classifier import TickerClassifier
|
|
69
|
+
|
|
70
|
+
classifier = TickerClassifier()
|
|
71
|
+
symbols = ["AAPL", "BTC", "EUR", "GOLD", "UNKNOWN123"]
|
|
72
|
+
results = classifier.classify(symbols)
|
|
73
|
+
for r in results:
|
|
74
|
+
print(r)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Example asynchronous usage:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
import asyncio
|
|
81
|
+
from ticker_classifier.classifier import TickerClassifier
|
|
82
|
+
|
|
83
|
+
async def main():
|
|
84
|
+
classifier = TickerClassifier()
|
|
85
|
+
symbols = ["AAPL", "BTC", "ETH", "JPY"]
|
|
86
|
+
results = await classifier.classify_async(symbols)
|
|
87
|
+
for r in results:
|
|
88
|
+
print(r)
|
|
89
|
+
|
|
90
|
+
asyncio.run(main())
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
The output for each symbol is a dictionary like:
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
{'category': 'EQUITY', 'ticker': 'AAPL', 'name': 'Apple Inc.', 'market_cap': 4029017227264, 'yahoo_lookup': 'AAPL', 'alternatives': ['crypto'], 'source': 'api'}
|
|
97
|
+
{'category': 'crypto', 'ticker': 'BTC', 'name': 'Bitcoin', 'market_cap': 1736590593460.9607, 'yahoo_lookup': 'BTC-USD', 'alternatives': ['stock'], 'source': 'api'}
|
|
98
|
+
{'category': 'crypto', 'ticker': 'ETH', 'name': 'Ethereum', 'market_cap': 338145915081.1455, 'yahoo_lookup': 'ETH-USD', 'alternatives': ['stock'], 'source': 'cache'}
|
|
99
|
+
{'category': 'forex', 'ticker': 'JPY', 'name': 'JPY Currency', 'market_cap': None, 'yahoo_lookup': 'JPYUSD=X', 'alternatives': ['stock'], 'source': 'cache'}
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Notes
|
|
103
|
+
- The classifier caches positive classifications (non-`Unknown`) in an
|
|
104
|
+
SQLite database (default `ticker_cache.db`) for `24` hours by default.
|
|
105
|
+
- You can customize the cache filename and expiry by passing `db_name` and
|
|
106
|
+
`hours_to_expire` to `TickerClassifier`.
|
|
107
|
+
|
|
108
|
+
## API
|
|
109
|
+
|
|
110
|
+
- `ticker_classifier.classifier.TickerClassifier`
|
|
111
|
+
- `classify(symbols: List[str]) -> List[dict]` – synchronous classification.
|
|
112
|
+
- `classify_async(symbols: List[str]) -> List[dict]` – async classification.
|
|
113
|
+
- `ticker_classifier.apis.yahoo.YahooClient` – low-level Yahoo quote fetcher (sync + async helpers).
|
|
114
|
+
- `ticker_classifier.apis.coingecko.CoinGeckoClient` – crypto lookup + market cap helpers (sync + async).
|
|
115
|
+
- `ticker_classifier.db.cache.TickerCache` – tiny SQLite-backed cache used by `TickerClassifier`.
|
|
116
|
+
|
|
117
|
+
## Development
|
|
118
|
+
|
|
119
|
+
Run formatting and linting tools you prefer (project uses `black` code style).
|
|
120
|
+
|
|
121
|
+
Run a quick smoke check by running the `classifier.py` module directly:
|
|
122
|
+
|
|
123
|
+
```powershell
|
|
124
|
+
& .venv\Scripts\python.exe ticker_classifier\classifier.py
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
If you add tests, run them with your chosen test runner (e.g. `pytest`).
|
|
128
|
+
|
|
129
|
+
## Release and Versioning
|
|
130
|
+
|
|
131
|
+
This package is published to PyPI through GitHub Actions:
|
|
132
|
+
|
|
133
|
+
- Workflow: `.github/workflows/publish.yml`
|
|
134
|
+
- Trigger: GitHub Release published
|
|
135
|
+
- Publisher: `pypa/gh-action-pypi-publish` using trusted publishing (OIDC)
|
|
136
|
+
|
|
137
|
+
Release flow:
|
|
138
|
+
|
|
139
|
+
1. Update version in `pyproject.toml`.
|
|
140
|
+
2. Update `ticker_classifier/__init__.py` `__version__` to match.
|
|
141
|
+
3. Commit and push.
|
|
142
|
+
4. Create a GitHub release with tag `vX.Y.Z` (or `X.Y.Z`).
|
|
143
|
+
|
|
144
|
+
The publish workflow validates that the release tag version matches `pyproject.toml` before uploading to PyPI.
|
|
145
|
+
|
|
146
|
+
## Citation ✍️
|
|
147
|
+
If you use this project in your research, please cite as follows (adjust
|
|
148
|
+
metadata accordingly):
|
|
149
|
+
|
|
150
|
+
```bibtex
|
|
151
|
+
@misc{ticker-classifier,
|
|
152
|
+
author = {Stephan Akkerman},
|
|
153
|
+
title = {ticker-classifier},
|
|
154
|
+
year = {2025},
|
|
155
|
+
publisher = {GitHub},
|
|
156
|
+
howpublished = {\url{https://github.com/StephanAkkerman/ticker-classifier}}
|
|
157
|
+
}
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## Contributing 🛠
|
|
161
|
+
|
|
162
|
+
Contributions are welcome. Suggested workflow:
|
|
163
|
+
|
|
164
|
+
1. Fork the repository and create a feature branch.
|
|
165
|
+
2. Run tests and format your changes with `black`.
|
|
166
|
+
3. Open a pull request with a clear description of the change.
|
|
167
|
+
|
|
168
|
+
Please open issues for feature requests or bugs and include a small
|
|
169
|
+
reproducible example when possible.
|
|
170
|
+
|
|
171
|
+

|
|
172
|
+
|
|
173
|
+
## License 📜
|
|
174
|
+
|
|
175
|
+
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
ticker_classifier/__init__.py,sha256=X35OW2LFeV0GXY-lkZKNVOpiIMHm2gEsXxs5txD17-A,95
|
|
2
|
+
ticker_classifier/classifier.py,sha256=QrZK1QZCSDWtZFeFzUNhuC3o-Pmc8rMOoyrcpcs0BlQ,8918
|
|
3
|
+
ticker_classifier/constants.py,sha256=MzTG-PeFm8EjtVjbPtBGKnyWaeR5Arh7rAkvRaUyyNU,2682
|
|
4
|
+
ticker_classifier/apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
ticker_classifier/apis/coingecko.py,sha256=a7hwT52-hGc-u6TSLQ2li06qqkVpRecuWrcmcJdpy7I,7603
|
|
6
|
+
ticker_classifier/apis/yahoo.py,sha256=4TmCfK9prMZ7VCc6ZoWqAgBCRTICOlK02ewNH-W7Xpo,7696
|
|
7
|
+
ticker_classifier/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
ticker_classifier/db/cache.py,sha256=oTd_aGJ-NU0Jyf1SnpuKeALKcNg3GW6W0RziwuzGMiM,3257
|
|
9
|
+
ticker_classifier-0.1.0.dist-info/licenses/LICENSE,sha256=M4a6e_RgNGKdxALQ8kkRG280OdA6w5Y3RLHwneYEbq4,1073
|
|
10
|
+
ticker_classifier-0.1.0.dist-info/METADATA,sha256=ZV9ecU1pLhO6UgK8nBwzWk848xXg8RVjsFahq7arMks,6358
|
|
11
|
+
ticker_classifier-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
12
|
+
ticker_classifier-0.1.0.dist-info/top_level.txt,sha256=aaVnIjqeRswEK3Ph2fWR_9GgI16UCON513u7YDX9mmQ,18
|
|
13
|
+
ticker_classifier-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Stephan Akkerman
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ticker_classifier
|