xfinlink 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. xfinlink-0.1.1/CLAUDE.md +121 -0
  2. xfinlink-0.1.1/MANIFEST.in +7 -0
  3. xfinlink-0.1.1/PKG-INFO +171 -0
  4. xfinlink-0.1.1/README.md +131 -0
  5. xfinlink-0.1.1/pyproject.toml +69 -0
  6. xfinlink-0.1.1/setup.cfg +4 -0
  7. xfinlink-0.1.1/xfinlink/__init__.py +129 -0
  8. xfinlink-0.1.1/xfinlink/_db_check.py +23 -0
  9. xfinlink-0.1.1/xfinlink/alembic.ini +149 -0
  10. xfinlink-0.1.1/xfinlink/api/__init__.py +0 -0
  11. xfinlink-0.1.1/xfinlink/api/main.py +287 -0
  12. xfinlink-0.1.1/xfinlink/cli.py +443 -0
  13. xfinlink-0.1.1/xfinlink/db.py +35 -0
  14. xfinlink-0.1.1/xfinlink/ingestion/__init__.py +0 -0
  15. xfinlink-0.1.1/xfinlink/ingestion/corporate_events.py +403 -0
  16. xfinlink-0.1.1/xfinlink/ingestion/edgar_cik.py +529 -0
  17. xfinlink-0.1.1/xfinlink/ingestion/utils.py +70 -0
  18. xfinlink-0.1.1/xfinlink/ingestion/wrds_ccm.py +232 -0
  19. xfinlink-0.1.1/xfinlink/ingestion/wrds_compustat.py +489 -0
  20. xfinlink-0.1.1/xfinlink/ingestion/wrds_crsp.py +340 -0
  21. xfinlink-0.1.1/xfinlink/ingestion/wrds_ibes.py +209 -0
  22. xfinlink-0.1.1/xfinlink/license.py +36 -0
  23. xfinlink-0.1.1/xfinlink/mcp_server/__init__.py +0 -0
  24. xfinlink-0.1.1/xfinlink/mcp_server/server.py +328 -0
  25. xfinlink-0.1.1/xfinlink/migrations/README +1 -0
  26. xfinlink-0.1.1/xfinlink/migrations/env.py +75 -0
  27. xfinlink-0.1.1/xfinlink/migrations/script.py.mako +28 -0
  28. xfinlink-0.1.1/xfinlink/migrations/versions/39d93271e7d8_initial_schema.py +122 -0
  29. xfinlink-0.1.1/xfinlink/models.py +157 -0
  30. xfinlink-0.1.1/xfinlink/panel.py +264 -0
  31. xfinlink-0.1.1/xfinlink/resolver/__init__.py +0 -0
  32. xfinlink-0.1.1/xfinlink/resolver/core.py +405 -0
  33. xfinlink-0.1.1/xfinlink.egg-info/PKG-INFO +171 -0
  34. xfinlink-0.1.1/xfinlink.egg-info/SOURCES.txt +36 -0
  35. xfinlink-0.1.1/xfinlink.egg-info/dependency_links.txt +1 -0
  36. xfinlink-0.1.1/xfinlink.egg-info/entry_points.txt +2 -0
  37. xfinlink-0.1.1/xfinlink.egg-info/requires.txt +21 -0
  38. xfinlink-0.1.1/xfinlink.egg-info/top_level.txt +1 -0
@@ -0,0 +1,121 @@
1
+ # xfinlink — guidance for AI agents
2
+
3
+ xfinlink is a local entity-resolution tool for financial data. It answers one
4
+ question: **"which economic entity does this identifier refer to, and what
5
+ other identifiers does that same entity have?"** It ships five MCP tools
6
+ (`resolve_identifier`, `bulk_resolve`, `entity_lineage`, `search_entity`,
7
+ `validate_identifiers`), a Python API, a CLI, and a local HTTP server — all
8
+ backed by the same Postgres crosswalk database built from CRSP, Compustat,
9
+ IBES, and SEC EDGAR.
10
+
11
+ If you are reasoning about a dataset that mixes any of the following
12
+ identifiers, you should be using xfinlink:
13
+
14
+ - **PERMNO / PERMCO** — CRSP (market data: prices, returns, volume)
15
+ - **GVKEY** — Compustat (fundamentals: balance sheet, income statement)
16
+ - **CIK** — SEC EDGAR (filings: 10-K, 10-Q, 8-K, 13F)
17
+ - **CUSIP (8 or 9)** — bond/equity master files
18
+ - **Ticker** — market data vendors (Yahoo, Bloomberg, yfinance, FRED)
19
+ - **ISIN / IBES ticker / FIGI** — international, analyst estimates, OpenFIGI
20
+
21
+ ## Why this matters: the ticker-recycling problem
22
+
23
+ Tickers are not stable identifiers. They get retired, reissued, and reused
24
+ across companies that have nothing to do with each other. A naive
25
+ ticker-based merge across databases silently stitches unrelated entities into
26
+ one row-group and corrupts downstream analysis. **Nearly 10,000 tickers in
27
+ the CRSP universe map to more than one distinct entity over time.** A few
28
+ high-salience examples:
29
+
30
+ - **GM** — Before June 2009, "GM" was General Motors Corporation (PERMNO
31
+ 12079). The company filed Chapter 11 on 2009-06-01. "Old GM" was delisted
32
+ and eventually wound down. On 2010-11-18, an entirely new company, General
33
+ Motors Company (PERMNO 12369), IPO'd out of the bankruptcy and took the
34
+ "GM" ticker. They share the same GVKEY (005073) because Compustat models
35
+ the economic continuation, but they have different PERMNOs because CRSP
36
+ treats them as legally distinct securities. A panel keyed on ticker "GM"
37
+ across 2005-2024 is a silent 50/50 mixture of two different companies.
38
+ - **FB** — Belonged to Meta Platforms Inc until the 2022 rebrand to META.
39
+ Historically the "FB" ticker has also been used by PROSHARES, FBR Capital,
40
+ BankBoston, and Falcon Building Products. Resolving "FB" without a date
41
+ returns five matches.
42
+ - **META** — The ticker was used by another company before Meta adopted it
43
+ in 2022.
44
+ - **INGR** — Recycled multiple times across unrelated corporations over the
45
+ decades.
46
+
47
+ When you see ambiguous=true from `resolve_identifier`, treat it as a
48
+ disambiguation prompt — either pass `as_of_date`, or call `entity_lineage`
49
+ to understand the family tree.
50
+
51
+ ## Which tool to use when
52
+
53
+ - **You have an id_value and need the entity (and all its other ids):**
54
+ `resolve_identifier`. Pass `as_of_date` if the data is time-series.
55
+ - **You have many ids to resolve in one shot:** `bulk_resolve` (max 100).
56
+ - **You have a company name but no id:** `search_entity` → pick a result →
57
+ `resolve_identifier` on one of its PERMNO/GVKEY values for the full record.
58
+ - **You have a merged panel and want to audit it:** `validate_identifiers`.
59
+ Run it AFTER merging. Any `wrong_entity` flag means your panel has stitched
60
+ different companies under one ticker.
61
+ - **You need to understand a company's corporate history:** `entity_lineage`.
62
+ Walks the predecessor/successor graph across bankruptcies, mergers,
63
+ spinoffs, name changes, and ticker changes.
64
+
65
+ ## Canonical merge keys
66
+
67
+ Once you have a resolution, **prefer PERMNO or GVKEY as the merge key** in
68
+ downstream joins. Both are permanent identifiers issued by data vendors that
69
+ survive ticker changes, reorganizations, and most corporate events. Tickers
70
+ are display values, not keys. CIKs are keys for SEC filings but are not
71
+ always unique across subsidiaries and parents.
72
+
73
+ In order of preference for panel construction:
74
+
75
+ 1. **GVKEY** — most stable for economic-continuation analysis (survives
76
+ bankruptcies where the business continues).
77
+ 2. **PERMNO** — most precise for market-data analysis (distinguishes the
78
+ pre- and post-bankruptcy security, which GVKEY does not).
79
+ 3. **CIK** — use for SEC filings. Not recommended as the primary merge key
80
+ because a single parent filer can cover multiple economic entities.
81
+ 4. **CUSIP (9)** — useful for bond/equity-level joins but changes on some
82
+ corporate actions.
83
+ 5. **Ticker** — display only. Never the merge key.
84
+
85
+ ## Historical resolution
86
+
87
+ Almost every financial dataset is a panel. `as_of_date` is the most
88
+ important parameter in xfinlink. Use YYYY-MM-DD format. The resolver applies
89
+ the temporal filter `valid_from <= as_of_date <= valid_to` against each
90
+ identifier's validity window, with NULL treated as "open-ended."
91
+
92
+ If a user asks "who is AAPL?" without a date, that's probably fine (AAPL has
93
+ been Apple Inc since 1980). If they ask "what was FB's GVKEY in 2019?", use
94
+ `as_of_date="2019-01-01"` to avoid the 5-way ambiguity.
95
+
96
+ ## Validation workflow
97
+
98
+ The canonical ticker-recycling bug hides in silence: the merge succeeds, the
99
+ DataFrame has the right number of rows, the regression runs, and the results
100
+ are quietly wrong. The defense is to run `validate_identifiers` on every
101
+ panel before trusting it.
102
+
103
+ ```
104
+ validate_identifiers(identifiers='[{"id_type":"ticker","id_value":"GM","date":"2006-01-01"}, {"id_type":"ticker","id_value":"GM","date":"2022-01-01"}]')
105
+ ```
106
+
107
+ If `is_clean` is false and the issues include `wrong_entity`, re-key on
108
+ PERMNO or GVKEY and re-merge. The `coverage` dict shows you which canonical
109
+ entities the panel actually matched and their row counts, so a stitched GM
110
+ panel will read `{"General Motors Corporation (pre-2009 bankruptcy)": 4,
111
+ "General Motors Company": 6}` — a 50/50 breakdown that's an unmistakable
112
+ signal of the problem.
113
+
114
+ ## Non-goals
115
+
116
+ - xfinlink does not serve market data, prices, fundamentals, or filings. It
117
+ is a crosswalk, not a data provider.
118
+ - xfinlink does not guess. If `not_found` is true, the identifier genuinely
119
+ is not in the crosswalk — do not fabricate a match.
120
+ - xfinlink does not resolve private companies, funds, or non-US entities
121
+ outside what CRSP / Compustat / EDGAR cover.
@@ -0,0 +1,7 @@
1
+ include README.md
2
+ include CLAUDE.md
3
+ recursive-exclude tests *
4
+ recursive-exclude data *
5
+ recursive-exclude scripts *
6
+ global-exclude __pycache__
7
+ global-exclude *.pyc
@@ -0,0 +1,171 @@
1
+ Metadata-Version: 2.4
2
+ Name: xfinlink
3
+ Version: 0.1.1
4
+ Summary: Financial entity identifier resolution — resolve, validate, and fix PERMNO/GVKEY/CIK/CUSIP/ticker/ISIN mappings across CRSP, Compustat, EDGAR, and more.
5
+ Author: xfinlink
6
+ License-Expression: LicenseRef-Proprietary
7
+ Project-URL: Homepage, https://xfinlink.io
8
+ Project-URL: Documentation, https://xfinlink.io/docs
9
+ Keywords: finance,CRSP,Compustat,EDGAR,PERMNO,GVKEY,CUSIP,ticker,identifier,resolver
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Financial and Insurance Industry
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Office/Business :: Financial
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ Requires-Dist: pandas
22
+ Requires-Dist: pyarrow
23
+ Requires-Dist: sqlalchemy>=2.0
24
+ Requires-Dist: psycopg2-binary
25
+ Requires-Dist: asyncpg
26
+ Requires-Dist: greenlet
27
+ Requires-Dist: fastapi
28
+ Requires-Dist: uvicorn
29
+ Requires-Dist: httpx
30
+ Requires-Dist: alembic
31
+ Requires-Dist: pydantic
32
+ Requires-Dist: python-dotenv
33
+ Requires-Dist: typer[all]
34
+ Requires-Dist: mcp
35
+ Provides-Extra: wrds
36
+ Requires-Dist: wrds; extra == "wrds"
37
+ Provides-Extra: dev
38
+ Requires-Dist: pytest; extra == "dev"
39
+ Requires-Dist: pytest-asyncio; extra == "dev"
40
+
41
+ # xfinlink
42
+
43
+ Financial entity identifier resolution. Resolve, validate, and fix identifier mappings across CRSP, Compustat, SEC EDGAR, IBES, and more.
44
+
45
+ ## The problem
46
+
47
+ Financial data is spread across databases that use different identifiers. CRSP uses PERMNOs. Compustat uses GVKEYs. SEC EDGAR uses CIKs. Markets use tickers. Tickers get recycled — GM before 2009 (General Motors Corporation) and GM after 2010 (General Motors Company) are legally distinct entities that happen to share a ticker. A naive merge silently produces wrong data.
48
+
49
+ Nearly 10,000 tickers in the CRSP universe map to more than one distinct entity over time. The bug hides in silence: the merge succeeds, the DataFrame has the right number of rows, the regression runs, and the results are quietly wrong.
50
+
51
+ ## What xfinlink does
52
+
53
+ - **Resolve** — any identifier → all other identifiers for the same entity, correctly matched at any point in time
54
+ - **Validate** — audit a merged panel and flag silent identifier errors (ticker recycling, outside-validity dates, ambiguous matches)
55
+ - **Fix** — return a corrected DataFrame with proper per-row entity, PERMNO, GVKEY, and CIK columns
56
+
57
+ ## Install
58
+
59
+ ```bash
60
+ pip install xfinlink
61
+ ```
62
+
63
+ WRDS ingestion is optional:
64
+
65
+ ```bash
66
+ pip install "xfinlink[wrds]"
67
+ ```
68
+
69
+ ## Quick start
70
+
71
+ ```bash
72
+ xfinlink init # start postgres + run migrations
73
+ xfinlink build # ingest EDGAR (public)
74
+ xfinlink build --wrds # optional: add CRSP/Compustat/IBES
75
+
76
+ xfinlink status # DB census + identifier coverage
77
+ xfinlink resolve ticker AAPL # → Apple Inc with all linked ids
78
+ xfinlink resolve ticker GM --date 2008-01-01
79
+ # → General Motors Corporation (pre-bankruptcy)
80
+ xfinlink resolve ticker GM --date 2020-01-01
81
+ # → General Motors Company (post-bankruptcy)
82
+
83
+ xfinlink validate panel.csv --id-column ticker --date-column date
84
+ # → flags stitched/ambiguous/not-found rows
85
+ xfinlink fix panel.csv --id-column ticker --date-column date
86
+ # → writes panel_fixed.csv with per-row PERMNO/GVKEY/CIK
87
+
88
+ xfinlink serve # local HTTP API on http://localhost:8000
89
+ xfinlink mcp # MCP server for AI agents
90
+ ```
91
+
92
+ Example — catching the GM stitching bug:
93
+
94
+ ```
95
+ $ xfinlink validate gm_panel.csv --id-column ticker --date-column date
96
+ 10 rows checked. 0 clean. 10 issues found.
97
+ ┏━ Coverage (entity → row count) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
98
+ ┃ canonical_name rows ┃
99
+ ┃ General Motors Corporation (pre-2009 bankruptcy) 5 ┃
100
+ ┃ General Motors Company 5 ┃
101
+ ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
102
+ Issues written to xfinlink_validation_report.csv
103
+ ```
104
+
105
+ ## Python API
106
+
107
+ ```python
108
+ from xfinlink import resolve, bulk_resolve, search_entities, get_lineage
109
+ from xfinlink import validate_dataframe, fix_dataframe
110
+
111
+ # Single resolve, with optional historical date
112
+ r = resolve("ticker", "AAPL")
113
+ r = resolve("ticker", "GM", as_of_date="2008-01-01")
114
+
115
+ # Batch resolve
116
+ results = bulk_resolve([
117
+ {"id_type": "ticker", "id_value": "AAPL"},
118
+ {"id_type": "gvkey", "id_value": "001690"},
119
+ {"id_type": "cik", "id_value": "0000320193"},
120
+ ])
121
+
122
+ # Panel validation — the headline feature
123
+ import pandas as pd
124
+ df = pd.read_csv("gm_panel.csv")
125
+ report = validate_dataframe(df, "ticker", date_column="date")
126
+ if not report.is_clean:
127
+ print(report.summary)
128
+ print(report.coverage) # {"General Motors Corporation (pre-2009 bankruptcy)": 5, ...}
129
+ fixed = fix_dataframe(df, "ticker", date_column="date")
130
+ # fixed["_permno"], fixed["_gvkey"], fixed["_cik"], fixed["_xfinlink_flag"]
131
+ ```
132
+
133
+ ## For AI agents
134
+
135
+ ```bash
136
+ xfinlink mcp # start the MCP server over stdio
137
+ ```
138
+
139
+ xfinlink exposes five MCP tools — `resolve_identifier`, `bulk_resolve`, `entity_lineage`, `search_entity`, `validate_identifiers` — connected directly to the local crosswalk database. See [CLAUDE.md](CLAUDE.md) for tool descriptions, the GM/FB/META disambiguation cases, and guidance on when to use each tool.
140
+
141
+ ## Local HTTP server
142
+
143
+ ```bash
144
+ xfinlink serve --port 8000
145
+ ```
146
+
147
+ Endpoints:
148
+
149
+ | Method | Path | Purpose |
150
+ |---|---|---|
151
+ | POST | /v1/resolve | single identifier resolution |
152
+ | POST | /v1/resolve/bulk | batch resolution (max 1000) |
153
+ | GET | /v1/lineage/{entity_id} | corporate event history |
154
+ | GET | /v1/search?q=...&limit=... | name search |
155
+ | POST | /v1/validate | panel validation |
156
+ | GET | /v1/health | DB stats |
157
+
158
+ Interactive docs at http://localhost:8000/docs.
159
+
160
+ ## Data sources
161
+
162
+ - **Without WRDS:** CIK, ticker, CUSIP (SEC EDGAR), ISIN, FIGI (OpenFIGI).
163
+ - **With WRDS:** adds PERMNO, GVKEY, IBES ticker, and the CCM linking table, plus corporate event detection (bankruptcy / relisting pairs).
164
+
165
+ ## Legal
166
+
167
+ xfinlink does not redistribute any proprietary data. When you run `xfinlink build --wrds`, it connects to **your** WRDS account and builds the crosswalk locally from data you are licensed to access. The pip package contains only source code — no CRSP, Compustat, IBES, or EDGAR data is shipped.
168
+
169
+ ## License
170
+
171
+ xfinlink is distributed under a commercial license with a trial mode: 500 resolutions per session, all features available. Get a license key at https://xfinlink.io and drop it in `~/.xfinlink/license.key` to remove the trial limit.
@@ -0,0 +1,131 @@
1
+ # xfinlink
2
+
3
+ Financial entity identifier resolution. Resolve, validate, and fix identifier mappings across CRSP, Compustat, SEC EDGAR, IBES, and more.
4
+
5
+ ## The problem
6
+
7
+ Financial data is spread across databases that use different identifiers. CRSP uses PERMNOs. Compustat uses GVKEYs. SEC EDGAR uses CIKs. Markets use tickers. Tickers get recycled — GM before 2009 (General Motors Corporation) and GM after 2010 (General Motors Company) are legally distinct entities that happen to share a ticker. A naive merge silently produces wrong data.
8
+
9
+ Nearly 10,000 tickers in the CRSP universe map to more than one distinct entity over time. The bug hides in silence: the merge succeeds, the DataFrame has the right number of rows, the regression runs, and the results are quietly wrong.
10
+
11
+ ## What xfinlink does
12
+
13
+ - **Resolve** — any identifier → all other identifiers for the same entity, correctly matched at any point in time
14
+ - **Validate** — audit a merged panel and flag silent identifier errors (ticker recycling, outside-validity dates, ambiguous matches)
15
+ - **Fix** — return a corrected DataFrame with proper per-row entity, PERMNO, GVKEY, and CIK columns
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ pip install xfinlink
21
+ ```
22
+
23
+ WRDS ingestion is optional:
24
+
25
+ ```bash
26
+ pip install "xfinlink[wrds]"
27
+ ```
28
+
29
+ ## Quick start
30
+
31
+ ```bash
32
+ xfinlink init # start postgres + run migrations
33
+ xfinlink build # ingest EDGAR (public)
34
+ xfinlink build --wrds # optional: add CRSP/Compustat/IBES
35
+
36
+ xfinlink status # DB census + identifier coverage
37
+ xfinlink resolve ticker AAPL # → Apple Inc with all linked ids
38
+ xfinlink resolve ticker GM --date 2008-01-01
39
+ # → General Motors Corporation (pre-bankruptcy)
40
+ xfinlink resolve ticker GM --date 2020-01-01
41
+ # → General Motors Company (post-bankruptcy)
42
+
43
+ xfinlink validate panel.csv --id-column ticker --date-column date
44
+ # → flags stitched/ambiguous/not-found rows
45
+ xfinlink fix panel.csv --id-column ticker --date-column date
46
+ # → writes panel_fixed.csv with per-row PERMNO/GVKEY/CIK
47
+
48
+ xfinlink serve # local HTTP API on http://localhost:8000
49
+ xfinlink mcp # MCP server for AI agents
50
+ ```
51
+
52
+ Example — catching the GM stitching bug:
53
+
54
+ ```
55
+ $ xfinlink validate gm_panel.csv --id-column ticker --date-column date
56
+ 10 rows checked. 0 clean. 10 issues found.
57
+ ┏━ Coverage (entity → row count) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
58
+ ┃ canonical_name rows ┃
59
+ ┃ General Motors Corporation (pre-2009 bankruptcy) 5 ┃
60
+ ┃ General Motors Company 5 ┃
61
+ ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
62
+ Issues written to xfinlink_validation_report.csv
63
+ ```
64
+
65
+ ## Python API
66
+
67
+ ```python
68
+ from xfinlink import resolve, bulk_resolve, search_entities, get_lineage
69
+ from xfinlink import validate_dataframe, fix_dataframe
70
+
71
+ # Single resolve, with optional historical date
72
+ r = resolve("ticker", "AAPL")
73
+ r = resolve("ticker", "GM", as_of_date="2008-01-01")
74
+
75
+ # Batch resolve
76
+ results = bulk_resolve([
77
+ {"id_type": "ticker", "id_value": "AAPL"},
78
+ {"id_type": "gvkey", "id_value": "001690"},
79
+ {"id_type": "cik", "id_value": "0000320193"},
80
+ ])
81
+
82
+ # Panel validation — the headline feature
83
+ import pandas as pd
84
+ df = pd.read_csv("gm_panel.csv")
85
+ report = validate_dataframe(df, "ticker", date_column="date")
86
+ if not report.is_clean:
87
+ print(report.summary)
88
+ print(report.coverage) # {"General Motors Corporation (pre-2009 bankruptcy)": 5, ...}
89
+ fixed = fix_dataframe(df, "ticker", date_column="date")
90
+ # fixed["_permno"], fixed["_gvkey"], fixed["_cik"], fixed["_xfinlink_flag"]
91
+ ```
92
+
93
+ ## For AI agents
94
+
95
+ ```bash
96
+ xfinlink mcp # start the MCP server over stdio
97
+ ```
98
+
99
+ xfinlink exposes five MCP tools — `resolve_identifier`, `bulk_resolve`, `entity_lineage`, `search_entity`, `validate_identifiers` — connected directly to the local crosswalk database. See [CLAUDE.md](CLAUDE.md) for tool descriptions, the GM/FB/META disambiguation cases, and guidance on when to use each tool.
100
+
101
+ ## Local HTTP server
102
+
103
+ ```bash
104
+ xfinlink serve --port 8000
105
+ ```
106
+
107
+ Endpoints:
108
+
109
+ | Method | Path | Purpose |
110
+ |---|---|---|
111
+ | POST | /v1/resolve | single identifier resolution |
112
+ | POST | /v1/resolve/bulk | batch resolution (max 1000) |
113
+ | GET | /v1/lineage/{entity_id} | corporate event history |
114
+ | GET | /v1/search?q=...&limit=... | name search |
115
+ | POST | /v1/validate | panel validation |
116
+ | GET | /v1/health | DB stats |
117
+
118
+ Interactive docs at http://localhost:8000/docs.
119
+
120
+ ## Data sources
121
+
122
+ - **Without WRDS:** CIK, ticker, CUSIP (SEC EDGAR), ISIN, FIGI (OpenFIGI).
123
+ - **With WRDS:** adds PERMNO, GVKEY, IBES ticker, and the CCM linking table, plus corporate event detection (bankruptcy / relisting pairs).
124
+
125
+ ## Legal
126
+
127
+ xfinlink does not redistribute any proprietary data. When you run `xfinlink build --wrds`, it connects to **your** WRDS account and builds the crosswalk locally from data you are licensed to access. The pip package contains only source code — no CRSP, Compustat, IBES, or EDGAR data is shipped.
128
+
129
+ ## License
130
+
131
+ xfinlink is distributed under a commercial license with a trial mode: 500 resolutions per session, all features available. Get a license key at https://xfinlink.io and drop it in `~/.xfinlink/license.key` to remove the trial limit.
@@ -0,0 +1,69 @@
1
+ [project]
2
+ name = "xfinlink"
3
+ version = "0.1.1"
4
+ description = "Financial entity identifier resolution — resolve, validate, and fix PERMNO/GVKEY/CIK/CUSIP/ticker/ISIN mappings across CRSP, Compustat, EDGAR, and more."
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ authors = [{name = "xfinlink"}]
8
+ license = "LicenseRef-Proprietary"
9
+ keywords = ["finance", "CRSP", "Compustat", "EDGAR", "PERMNO", "GVKEY", "CUSIP", "ticker", "identifier", "resolver"]
10
+ classifiers = [
11
+ "Development Status :: 4 - Beta",
12
+ "Intended Audience :: Financial and Insurance Industry",
13
+ "Intended Audience :: Science/Research",
14
+ "Operating System :: OS Independent",
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3.11",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Programming Language :: Python :: 3.13",
19
+ "Topic :: Office/Business :: Financial",
20
+ ]
21
+ dependencies = [
22
+ "pandas",
23
+ "pyarrow",
24
+ "sqlalchemy>=2.0",
25
+ "psycopg2-binary",
26
+ "asyncpg",
27
+ "greenlet",
28
+ "fastapi",
29
+ "uvicorn",
30
+ "httpx",
31
+ "alembic",
32
+ "pydantic",
33
+ "python-dotenv",
34
+ "typer[all]",
35
+ "mcp",
36
+ ]
37
+
38
+ [project.optional-dependencies]
39
+ wrds = ["wrds"]
40
+ dev = ["pytest", "pytest-asyncio"]
41
+
42
+ [project.urls]
43
+ Homepage = "https://xfinlink.io"
44
+ Documentation = "https://xfinlink.io/docs"
45
+
46
+ [build-system]
47
+ requires = ["setuptools>=68"]
48
+ build-backend = "setuptools.build_meta"
49
+
50
+ [project.scripts]
51
+ xfinlink = "xfinlink.cli:app"
52
+
53
+ [tool.setuptools.packages.find]
54
+ include = ["xfinlink*"]
55
+ exclude = ["tests*", "data*", "scripts*"]
56
+
57
+ [tool.setuptools.package-data]
58
+ xfinlink = [
59
+ "py.typed",
60
+ "alembic.ini",
61
+ "migrations/*.py",
62
+ "migrations/*.mako",
63
+ "migrations/README",
64
+ "migrations/versions/*.py",
65
+ ]
66
+
67
+ [tool.pytest.ini_options]
68
+ testpaths = ["tests"]
69
+ asyncio_mode = "auto"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,129 @@
1
+ """xfinlink — financial entity identifier resolution.
2
+
3
+ Public Python API. Synchronous wrappers around the async resolver so
4
+ callers do not have to manage event loops or database sessions.
5
+
6
+ from xfinlink import resolve, bulk_resolve, search_entities, get_lineage
7
+
8
+ r = resolve("ticker", "AAPL")
9
+ r = resolve("ticker", "GM", as_of_date="2008-01-01")
10
+ results = bulk_resolve([{"id_type": "ticker", "id_value": "AAPL"}, ...])
11
+ hits = search_entities("General Motors")
12
+ lineage = get_lineage("ticker", "GM")
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ from datetime import date, datetime
18
+ from typing import Any
19
+
20
+ from xfinlink._db_check import check_db_ready as _check_db_ready
21
+ from xfinlink.db import AsyncSessionLocal
22
+ from xfinlink.panel import ValidationReport, fix_dataframe, validate_dataframe
23
+ from xfinlink.resolver import core as _core
24
+ from xfinlink.resolver.core import (
25
+ EntityLineage,
26
+ LineageEvent,
27
+ ResolutionResult,
28
+ ResolvedEntity,
29
+ ResolvedIdentifier,
30
+ )
31
+
32
+ __all__ = [
33
+ "resolve",
34
+ "bulk_resolve",
35
+ "search_entities",
36
+ "get_lineage",
37
+ "validate_dataframe",
38
+ "fix_dataframe",
39
+ "ValidationReport",
40
+ "ResolutionResult",
41
+ "ResolvedEntity",
42
+ "ResolvedIdentifier",
43
+ "EntityLineage",
44
+ "LineageEvent",
45
+ ]
46
+
47
+
48
+ def _to_date(value: Any) -> date | None:
49
+ if value is None or isinstance(value, date) and not isinstance(value, datetime):
50
+ return value
51
+ if isinstance(value, datetime):
52
+ return value.date()
53
+ if isinstance(value, str):
54
+ return datetime.strptime(value, "%Y-%m-%d").date()
55
+ raise TypeError(f"unsupported date type: {type(value).__name__}")
56
+
57
+
58
+ def _run(coro):
59
+ return asyncio.run(coro)
60
+
61
+
62
+ def resolve(
63
+ id_type: str,
64
+ id_value: str,
65
+ as_of_date: str | date | None = None,
66
+ ) -> ResolutionResult:
67
+ """Resolve a single identifier to its entity."""
68
+ _check_db_ready()
69
+ as_of = _to_date(as_of_date)
70
+
71
+ async def _inner() -> ResolutionResult:
72
+ async with AsyncSessionLocal() as session:
73
+ return await _core.resolve(session, id_type, id_value, as_of)
74
+
75
+ return _run(_inner())
76
+
77
+
78
+ def bulk_resolve(requests: list[dict]) -> list[ResolutionResult]:
79
+ """Resolve a batch of identifier requests."""
80
+ _check_db_ready()
81
+ normalized: list[dict] = []
82
+ for req in requests:
83
+ r = dict(req)
84
+ if "as_of_date" in r:
85
+ r["as_of_date"] = _to_date(r["as_of_date"])
86
+ normalized.append(r)
87
+
88
+ async def _inner() -> list[ResolutionResult]:
89
+ async with AsyncSessionLocal() as session:
90
+ return await _core.bulk_resolve(session, normalized)
91
+
92
+ return _run(_inner())
93
+
94
+
95
+ def search_entities(query: str, limit: int = 10) -> list[ResolvedEntity]:
96
+ """Fuzzy-search entities by canonical name."""
97
+ _check_db_ready()
98
+
99
+ async def _inner() -> list[ResolvedEntity]:
100
+ async with AsyncSessionLocal() as session:
101
+ return await _core.search_entities(session, query, limit=limit)
102
+
103
+ return _run(_inner())
104
+
105
+
106
+ def get_lineage(
107
+ id_type: str,
108
+ id_value: str,
109
+ as_of_date: str | date | None = None,
110
+ ) -> EntityLineage | None:
111
+ """Return the corporate lineage for the entity matching an identifier.
112
+
113
+ Resolves `(id_type, id_value)` first, then returns the lineage starting
114
+ from the top-ranked match. The async traversal walks ticker-siblings,
115
+ so both sides of bankruptcy/relisting pairs end up in one lineage
116
+ regardless of which match is used as the seed. Returns None if the
117
+ identifier did not match any entity.
118
+ """
119
+ _check_db_ready()
120
+ as_of = _to_date(as_of_date)
121
+
122
+ async def _inner() -> EntityLineage | None:
123
+ async with AsyncSessionLocal() as session:
124
+ result = await _core.resolve(session, id_type, id_value, as_of)
125
+ if result.not_found:
126
+ return None
127
+ return await _core.get_lineage(session, result.matches[0].entity_id)
128
+
129
+ return _run(_inner())
@@ -0,0 +1,23 @@
1
+ """Shared DB-readiness check used by the public API and panel module."""
2
+ from __future__ import annotations
3
+
4
+ from sqlalchemy import func, select
5
+ from sqlalchemy.exc import OperationalError
6
+
7
+ from xfinlink.db import SessionLocal
8
+ from xfinlink.models import Entity
9
+
10
+ _DB_ERROR = (
11
+ "Database not found or empty. "
12
+ "Run `xfinlink init && xfinlink build` first."
13
+ )
14
+
15
+
16
+ def check_db_ready() -> None:
17
+ try:
18
+ with SessionLocal() as s:
19
+ n = s.scalar(select(func.count()).select_from(Entity))
20
+ except OperationalError as exc:
21
+ raise RuntimeError(_DB_ERROR) from exc
22
+ if not n:
23
+ raise RuntimeError(_DB_ERROR)