typedframes 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. typedframes-0.2.0/README.md → typedframes-0.2.2/PKG-INFO +90 -37
  2. typedframes-0.2.0/PKG-INFO → typedframes-0.2.2/README.md +55 -56
  3. {typedframes-0.2.0 → typedframes-0.2.2}/pyproject.toml +43 -7
  4. {typedframes-0.2.0 → typedframes-0.2.2}/rust/Cargo.lock +15 -45
  5. {typedframes-0.2.0 → typedframes-0.2.2}/rust/Cargo.toml +2 -2
  6. {typedframes-0.2.0 → typedframes-0.2.2}/rust/src/lib.rs +221 -68
  7. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/__init__.py +1 -1
  8. {typedframes-0.2.0 → typedframes-0.2.2}/rust/README.md +0 -0
  9. {typedframes-0.2.0 → typedframes-0.2.2}/rust/benches/parser_bench.rs +0 -0
  10. {typedframes-0.2.0 → typedframes-0.2.2}/rust/src/main.rs +0 -0
  11. {typedframes-0.2.0 → typedframes-0.2.2}/rust/tests/integration_test.rs +0 -0
  12. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/base_schema.py +0 -0
  13. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/cli.py +0 -0
  14. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/column.py +0 -0
  15. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/column_group.py +0 -0
  16. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/column_group_error.py +0 -0
  17. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/column_set.py +0 -0
  18. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/missing_dependency_error.py +0 -0
  19. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/mypy.py +0 -0
  20. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/pandas.py +0 -0
  21. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/pandera.py +0 -0
  22. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/polars.py +0 -0
  23. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/py.typed +0 -0
  24. {typedframes-0.2.0 → typedframes-0.2.2}/src/typedframes/schema_algebra.py +0 -0
@@ -1,8 +1,48 @@
1
+ Metadata-Version: 2.4
2
+ Name: typedframes
3
+ Version: 0.2.2
4
+ Classifier: Development Status :: 3 - Alpha
5
+ Classifier: Intended Audience :: Developers
6
+ Classifier: License :: OSI Approved :: MIT License
7
+ Classifier: Operating System :: OS Independent
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Classifier: Programming Language :: Python :: 3.14
13
+ Classifier: Topic :: Software Development :: Quality Assurance
14
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
15
+ Requires-Dist: mypy ; extra == 'mypy'
16
+ Requires-Dist: pandas>=2.0 ; extra == 'pandas'
17
+ Requires-Dist: pandera>=0.20.0 ; extra == 'pandera'
18
+ Requires-Dist: polars>=1.0 ; extra == 'polars'
19
+ Provides-Extra: mypy
20
+ Provides-Extra: pandas
21
+ Provides-Extra: pandera
22
+ Provides-Extra: polars
23
+ Summary: Static analysis for pandas and polars DataFrames. Catch column errors at lint-time, not runtime.
24
+ Keywords: pandas,polars,type-checking,static-analysis,dataframe,linter,mypy-plugin
25
+ Home-Page: https://github.com/w-martin/typedframes
26
+ Author: William Martin
27
+ License-Expression: MIT
28
+ Requires-Python: >=3.11
29
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
30
+ Project-URL: Documentation, https://typedframes.readthedocs.io/en/latest/
31
+ Project-URL: Homepage, https://github.com/w-martin/typedframes
32
+ Project-URL: Issue Tracker, https://github.com/w-martin/typedframes/issues
33
+ Project-URL: Repository, https://github.com/w-martin/typedframes
34
+
1
35
  # typedframes
2
36
 
37
+ [![CI](https://github.com/w-martin/typedframes/actions/workflows/publish.yml/badge.svg)](https://github.com/w-martin/typedframes/actions/workflows/publish.yml)
38
+ [![PyPI version](https://img.shields.io/pypi/v/typedframes.svg)](https://pypi.org/project/typedframes/)
39
+ [![Python versions](https://img.shields.io/pypi/pyversions/typedframes.svg)](https://pypi.org/project/typedframes/)
40
+ [![Coverage](https://coveralls.io/repos/github/w-martin/typedframes/badge.svg?branch=main)](https://coveralls.io/github/w-martin/typedframes?branch=main)
41
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
42
+
3
43
  > ⚠️ **Project Status: Proof of Concept**
4
44
  >
5
- > `typedframes` (v0.2.0) is currently an experimental proof-of-concept. The core static analysis and mypy/Rust
45
+ > `typedframes` (v0.2.1) is currently an experimental proof-of-concept. The core static analysis and mypy/Rust
6
46
  > integrations work, but expect rough edges. The codebase prioritizes demonstrating the viability of static DataFrame
7
47
  > schema validation over production-grade stability.
8
48
  >
@@ -288,7 +328,7 @@ typedframes check src/
288
328
  - Catches column name errors
289
329
  - Validates schema mismatches between functions
290
330
  - Checks both pandas and polars code
291
- - 10-100x faster than mypy
331
+ - Significantly faster than mypy (see benchmarks below)
292
332
 
293
333
  **Use this for:**
294
334
  - Fast feedback during development
@@ -369,17 +409,15 @@ Fast feedback reduces development time. The typedframes Rust binary provides nea
369
409
 
370
410
  **Benchmark results** (10 runs, 3 warmup, caches cleared between runs):
371
411
 
372
- | Tool | Version | What it does | typedframes (13 files) | great_expectations (490 files) |
373
- |--------------------|---------|-------------------------------|------------------------|---------------------------------|
374
- | typedframes | 0.2.0 | DataFrame column checker | 9ms ±2ms | 930µs ±89µs |
375
- | ruff | 0.15.4 | Linter (no type checking) | 64ms ±16ms | 360ms ±18ms |
376
- | ty | 0.0.19 | Type checker | 115ms ±22ms | 1.65s ±26ms |
377
- | pyrefly | 0.54.0 | Type checker | 3.78s ±7.53s | 693ms ±33ms |
378
- | mypy | 1.19.1 | Type checker (no plugin) | 13.85s ±1.08s | 12.13s ±400ms |
379
- | mypy + typedframes | 1.19.1 | Type checker + column checker | 13.51s ±273ms | 13.89s ±491ms |
380
- | pyright | 1.1.408 | Type checker | 2.10s ±422ms | 8.37s ±253ms |
381
-
382
- *† great_expectations column from previous benchmark run.*
412
+ | Tool | Version | What it does | typedframes (13 files) | great_expectations (488 files) |
413
+ |------|---------|--------------|------------------------|--------------------------------|
414
+ | typedframes | 0.2.1 | DataFrame column checker | 43ms ±531µs | 285ms ±4ms |
415
+ | ruff | 0.15.4 | Linter (no type checking) | 27ms ±932µs | 251ms ±2ms |
416
+ | ty | 0.0.19 | Type checker | 67ms ±1ms | 793ms ±14ms |
417
+ | pyrefly | 0.54.0 | Type checker | 120ms ±2ms | 1.11s ±13ms |
418
+ | mypy | 1.19.1 | Type checker (no plugin) | 3.47s ±15ms | 4.43s ±66ms |
419
+ | mypy + typedframes | 1.19.1 | Type checker + column checker | 3.52s ±40ms | 4.79s ±113ms |
420
+ | pyright | 1.1.408 | Type checker | 822ms ±55ms | 3.43s ±54ms |
383
421
 
384
422
  *Run `uv run python benchmarks/benchmark_checkers.py` to reproduce.*
385
423
 
@@ -590,29 +628,29 @@ See [`examples/schema_algebra_example.py`](examples/schema_algebra_example.py) f
590
628
  Comprehensive comparison of pandas/DataFrame typing and validation tools. **typedframes focuses on static analysis**
591
629
  —catching errors at lint-time before your code runs.
592
630
 
593
- | Feature | typedframes | Pandera | Great Expectations | strictly_typed_pandas | pandas-stubs | dataenforce | pandas-type-checks | StaticFrame | narwhals |
594
- |---------------------------------|------------------------|-------------|--------------------|-----------------------|--------------|-------------|--------------------|------------------|----------|
595
- | **Version tested** | 0.2.0 | 0.29.0 | 1.4.3 | 0.3.6 | 3.0.0 | 0.1.2 | 1.1.3 | 3.7.0 | 2.16.0 |
631
+ | Feature | typedframes | Pandera | Great Expectations | strictly_typed_pandas | pandas-stubs | dataenforce | pandas-type-checks | StaticFrame | narwhals | dataframely | patito |
632
+ |---------------------------------|------------------------|-------------|--------------------|-----------------------|--------------|-------------|--------------------|------------------|----------|------------------|------------------|
633
+ | **Version tested** | 0.2.1 | 0.29.0 | 1.18.0 | 0.3.7 | 3.0.3 | 0.1.2 | 1.1.3 | 4.1.0 | 2.22.1 | 2.10.1 | 0.8.6 |
596
634
  | **Analysis Type** |
597
- | When errors are caught | **Static (lint-time)** | Runtime | Runtime | Static + Runtime | Static | Runtime | Runtime | Static + Runtime | Runtime |
635
+ | When errors are caught | **Static (lint-time)** | Runtime | Runtime | Runtime | Static | Runtime | Runtime | Runtime | Runtime | Runtime | Runtime |
598
636
  | **Static Analysis (our focus)** |
599
- | Mypy plugin | ✅ Yes | ⚠️ Limited | ❌ No | Yes | ✅ Yes | ❌ No | ❌ No | ⚠️ Basic | ❌ No |
600
- | Standalone checker | ✅ Rust (~1ms) | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
601
- | Column name checking | ✅ Yes | ⚠️ Limited | ❌ No | Yes | ❌ No | ❌ No | ❌ No | Yes | ❌ No |
602
- | Column type checking | ✅ Yes | ⚠️ Limited | ❌ No | Yes | ❌ No | ❌ No | ❌ No | Yes | ❌ No |
603
- | Typo suggestions | ✅ Yes | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
637
+ | Mypy plugin | ✅ Yes | ⚠️ Limited | ❌ No | No | ✅ Yes | ❌ No | ❌ No | ⚠️ Basic | ❌ No | ❌ No | ❌ No |
638
+ | Standalone checker | ✅ Rust (~1ms) | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
639
+ | Column name checking | ✅ Yes | ⚠️ Limited | ❌ No | No | ❌ No | ❌ No | ❌ No | No | ❌ No | ❌ No | ❌ No |
640
+ | Column type checking | ✅ Yes | ⚠️ Limited | ❌ No | No | ❌ No | ❌ No | ❌ No | No | ❌ No | ❌ No | ❌ No |
641
+ | Typo suggestions | ✅ Yes | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
604
642
  | **Runtime Validation** |
605
- | Data validation | ❌ No | ✅ Excellent | ✅ Excellent | ✅ typeguard | ❌ No | ✅ Yes | ✅ Yes | ✅ Yes | ❌ No |
606
- | Value constraints | ❌ No | ✅ Yes | ✅ Excellent | ❌ No | ❌ No | ❌ No | ❌ No | ✅ Yes | ❌ No |
643
+ | Data validation | ❌ No | ✅ Excellent | ✅ Excellent | ✅ typeguard | ❌ No | ✅ Yes | ✅ Yes | ✅ Yes | ❌ No | ✅ Yes | ✅ Yes |
644
+ | Value constraints | ❌ No | ✅ Yes | ✅ Excellent | ❌ No | ❌ No | ❌ No | ❌ No | ✅ Yes | ❌ No | ✅ Yes | ✅ Yes |
607
645
  | **Schema Features** |
608
- | Column grouping | ✅ ColumnGroup | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
609
- | Regex column matching | ✅ Yes | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
646
+ | Column grouping | ✅ ColumnGroup | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
647
+ | Regex column matching | ✅ Yes | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
610
648
  | **Backend Support** |
611
- | Pandas | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ❌ Own | ✅ Yes |
612
- | Polars | ✅ Yes | ✅ Yes | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ Own | ✅ Yes |
613
- | DuckDB, cuDF, etc. | ❌ No | ❌ No | ✅ Spark, SQL | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ✅ Yes |
649
+ | Pandas | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ❌ Own | ✅ Yes | ❌ No | ⚠️ Limited |
650
+ | Polars | ✅ Yes | ✅ Yes | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ Own | ✅ Yes | ✅ Yes (only) | ✅ Yes |
651
+ | DuckDB, cuDF, etc. | ❌ No | ❌ No | ✅ Spark, SQL | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ✅ Yes | ❌ No | ❌ No |
614
652
  | **Project Status (Feb 2026)** |
615
- | Active development | ✅ Yes | ✅ Yes | ✅ Yes | ⚠️ Low | ✅ Yes | ❌ Inactive | ⚠️ Low | ✅ Yes | ✅ Yes |
653
+ | Active development | ✅ Yes | ✅ Yes | ✅ Yes | ⚠️ Low | ✅ Yes | ❌ Inactive | ⚠️ Low | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes |
616
654
 
617
655
  **Legend:** ✅ Full support | ⚠️ Limited/Partial | ❌ Not supported
618
656
 
@@ -622,8 +660,9 @@ Comprehensive comparison of pandas/DataFrame typing and validation tools. **type
622
660
  but has limitations—column access via `df["column"]` is not validated, and schema mismatches between functions may not
623
661
  be caught.
624
662
 
625
- - **[strictly_typed_pandas](https://strictly-typed-pandas.readthedocs.io/)** (v0.3.6): Provides `DataSet[Schema]` type
626
- hints with mypy support. No standalone checker. No polars support. Runtime validation via typeguard.
663
+ - **[strictly_typed_pandas](https://strictly-typed-pandas.readthedocs.io/)** (v0.3.7): Provides `DataSet[Schema]` type
664
+ hints for runtime validation via typeguard. Despite documentation implying mypy support, there is no mypy plugin —
665
+ column access errors are not caught statically. No standalone checker. No polars support.
627
666
 
628
667
  - **[pandas-stubs](https://github.com/pandas-dev/pandas-stubs)** (v3.0.0): Official pandas type stubs. Provides
629
668
  API-level types but no column-level checking.
@@ -634,19 +673,32 @@ Comprehensive comparison of pandas/DataFrame typing and validation tools. **type
634
673
  - **[pandas-type-checks](https://pypi.org/project/pandas-type-checks/)** (v1.1.3): Runtime validation decorator. No
635
674
  static analysis.
636
675
 
637
- - **[StaticFrame](https://github.com/static-frame/static-frame)** (v3.7.0): Alternative immutable DataFrame library with
638
- built-in static typing. Not compatible with pandas/polars—requires using StaticFrame's own DataFrame implementation.
676
+ - **[StaticFrame](https://github.com/static-frame/static-frame)** (v4.1.0): Alternative immutable DataFrame library.
677
+ Not compatible with pandas/polars requires a full rewrite to StaticFrame's own API. Column access is still
678
+ string-based; mypy does not catch column name typos. Type safety comes from immutability guarantees, not schema checking.
639
679
 
640
- - **[narwhals](https://narwhals-dev.github.io/narwhals/)** (v2.16.0): Compatibility layer that provides a unified API
680
+ - **[narwhals](https://narwhals-dev.github.io/narwhals/)** (v2.22.1): Compatibility layer that provides a unified API
641
681
  across pandas, polars, DuckDB, cuDF, and more. Solves a different problem—write-once-run-anywhere portability, not
642
682
  type safety. See [Why Abstraction Layers Don't Solve Type Safety](#why-abstraction-layers-dont-solve-type-safety)
643
683
  below.
644
684
 
645
- - **[Great Expectations](https://greatexpectations.io/)** (v1.4.3): Comprehensive data quality framework. Defines
685
+ - **[Great Expectations](https://greatexpectations.io/)** (v1.18.0): Comprehensive data quality framework. Defines
646
686
  "expectations" (assertions) about data values, distributions, and schema properties. Excellent for runtime
647
687
  validation, data documentation, and data quality monitoring. No static analysis or column-level type checking in
648
688
  code. Supports pandas, Spark, and SQL backends.
649
689
 
690
+ - **[dataframely](https://github.com/Quantco/dataframely)** (v2.10.1): Polars-only runtime validation library from Quantco.
691
+ Schemas are defined as classes inheriting `dy.Schema` with typed descriptor fields (`dy.String()`, `dy.Float64()`)
692
+ and `@dy.rule()` decorators for cross-column and group-level constraints. Returns `dy.DataFrame[Schema]` generic
693
+ types that give call-site narrowing to type checkers, but does not validate column subscript access inside function
694
+ bodies. No lint-time or static analysis capability. Supports nullability, string constraints, numeric bounds,
695
+ cross-column rules, soft validation, test data generation, and SQLAlchemy/PyArrow export.
696
+
697
+ - **[patito](https://github.com/JakobGM/patito)** (v0.8.6): Runtime validation library using a Pydantic-style `patito.Model`
698
+ class. Polars is the primary backend; pandas is supported but works by converting to Polars via PyArrow (an
699
+ undeclared dependency). DuckDB is not supported despite appearing in some documentation—validation crashes immediately
700
+ on DuckDB relations. No static analysis or standalone checker.
701
+
650
702
  ### Type Checkers (Not DataFrame-Specific)
651
703
 
652
704
  These are general Python type checkers. They don't validate DataFrame column names, but they can be used alongside
@@ -996,6 +1048,7 @@ Inspired by the needs of ML/data science teams working with complex data pipelin
996
1048
 
997
1049
  ---
998
1050
 
999
- **Questions? Issues? Ideas?** [Open an issue](https://github.com/yourusername/typedframes/issues)
1051
+ **Questions? Issues? Ideas?** [Open an issue](https://github.com/w-martin/typedframes/issues)
1000
1052
 
1001
1053
  **Ready to catch DataFrame bugs before runtime?** `pip install typedframes`
1054
+
@@ -1,26 +1,14 @@
1
- Metadata-Version: 2.4
2
- Name: typedframes
3
- Version: 0.2.0
4
- Requires-Dist: mypy ; extra == 'mypy'
5
- Requires-Dist: pandas>=2.3.3 ; extra == 'pandas'
6
- Requires-Dist: pandera>=0.29.0 ; extra == 'pandera'
7
- Requires-Dist: polars>=1.38.1 ; extra == 'polars'
8
- Provides-Extra: mypy
9
- Provides-Extra: pandas
10
- Provides-Extra: pandera
11
- Provides-Extra: polars
12
- Summary: Static analysis for pandas and polars DataFrames. Catch column errors at lint-time, not runtime.
13
- Home-Page: https://github.com/w-martin/typedframes
14
- Author: William Martin
15
- License: MIT
16
- Requires-Python: >=3.11
17
- Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
18
-
19
1
  # typedframes
20
2
 
3
+ [![CI](https://github.com/w-martin/typedframes/actions/workflows/publish.yml/badge.svg)](https://github.com/w-martin/typedframes/actions/workflows/publish.yml)
4
+ [![PyPI version](https://img.shields.io/pypi/v/typedframes.svg)](https://pypi.org/project/typedframes/)
5
+ [![Python versions](https://img.shields.io/pypi/pyversions/typedframes.svg)](https://pypi.org/project/typedframes/)
6
+ [![Coverage](https://coveralls.io/repos/github/w-martin/typedframes/badge.svg?branch=main)](https://coveralls.io/github/w-martin/typedframes?branch=main)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8
+
21
9
  > ⚠️ **Project Status: Proof of Concept**
22
10
  >
23
- > `typedframes` (v0.2.0) is currently an experimental proof-of-concept. The core static analysis and mypy/Rust
11
+ > `typedframes` (v0.2.1) is currently an experimental proof-of-concept. The core static analysis and mypy/Rust
24
12
  > integrations work, but expect rough edges. The codebase prioritizes demonstrating the viability of static DataFrame
25
13
  > schema validation over production-grade stability.
26
14
  >
@@ -306,7 +294,7 @@ typedframes check src/
306
294
  - Catches column name errors
307
295
  - Validates schema mismatches between functions
308
296
  - Checks both pandas and polars code
309
- - 10-100x faster than mypy
297
+ - Significantly faster than mypy (see benchmarks below)
310
298
 
311
299
  **Use this for:**
312
300
  - Fast feedback during development
@@ -387,17 +375,15 @@ Fast feedback reduces development time. The typedframes Rust binary provides nea
387
375
 
388
376
  **Benchmark results** (10 runs, 3 warmup, caches cleared between runs):
389
377
 
390
- | Tool | Version | What it does | typedframes (13 files) | great_expectations (490 files) |
391
- |--------------------|---------|-------------------------------|------------------------|---------------------------------|
392
- | typedframes | 0.2.0 | DataFrame column checker | 9ms ±2ms | 930µs ±89µs |
393
- | ruff | 0.15.4 | Linter (no type checking) | 64ms ±16ms | 360ms ±18ms |
394
- | ty | 0.0.19 | Type checker | 115ms ±22ms | 1.65s ±26ms |
395
- | pyrefly | 0.54.0 | Type checker | 3.78s ±7.53s | 693ms ±33ms |
396
- | mypy | 1.19.1 | Type checker (no plugin) | 13.85s ±1.08s | 12.13s ±400ms |
397
- | mypy + typedframes | 1.19.1 | Type checker + column checker | 13.51s ±273ms | 13.89s ±491ms |
398
- | pyright | 1.1.408 | Type checker | 2.10s ±422ms | 8.37s ±253ms |
399
-
400
- *† great_expectations column from previous benchmark run.*
378
+ | Tool | Version | What it does | typedframes (13 files) | great_expectations (488 files) |
379
+ |------|---------|--------------|------------------------|--------------------------------|
380
+ | typedframes | 0.2.1 | DataFrame column checker | 43ms ±531µs | 285ms ±4ms |
381
+ | ruff | 0.15.4 | Linter (no type checking) | 27ms ±932µs | 251ms ±2ms |
382
+ | ty | 0.0.19 | Type checker | 67ms ±1ms | 793ms ±14ms |
383
+ | pyrefly | 0.54.0 | Type checker | 120ms ±2ms | 1.11s ±13ms |
384
+ | mypy | 1.19.1 | Type checker (no plugin) | 3.47s ±15ms | 4.43s ±66ms |
385
+ | mypy + typedframes | 1.19.1 | Type checker + column checker | 3.52s ±40ms | 4.79s ±113ms |
386
+ | pyright | 1.1.408 | Type checker | 822ms ±55ms | 3.43s ±54ms |
401
387
 
402
388
  *Run `uv run python benchmarks/benchmark_checkers.py` to reproduce.*
403
389
 
@@ -608,29 +594,29 @@ See [`examples/schema_algebra_example.py`](examples/schema_algebra_example.py) f
608
594
  Comprehensive comparison of pandas/DataFrame typing and validation tools. **typedframes focuses on static analysis**
609
595
  —catching errors at lint-time before your code runs.
610
596
 
611
- | Feature | typedframes | Pandera | Great Expectations | strictly_typed_pandas | pandas-stubs | dataenforce | pandas-type-checks | StaticFrame | narwhals |
612
- |---------------------------------|------------------------|-------------|--------------------|-----------------------|--------------|-------------|--------------------|------------------|----------|
613
- | **Version tested** | 0.2.0 | 0.29.0 | 1.4.3 | 0.3.6 | 3.0.0 | 0.1.2 | 1.1.3 | 3.7.0 | 2.16.0 |
597
+ | Feature | typedframes | Pandera | Great Expectations | strictly_typed_pandas | pandas-stubs | dataenforce | pandas-type-checks | StaticFrame | narwhals | dataframely | patito |
598
+ |---------------------------------|------------------------|-------------|--------------------|-----------------------|--------------|-------------|--------------------|------------------|----------|------------------|------------------|
599
+ | **Version tested** | 0.2.1 | 0.29.0 | 1.18.0 | 0.3.7 | 3.0.3 | 0.1.2 | 1.1.3 | 4.1.0 | 2.22.1 | 2.10.1 | 0.8.6 |
614
600
  | **Analysis Type** |
615
- | When errors are caught | **Static (lint-time)** | Runtime | Runtime | Static + Runtime | Static | Runtime | Runtime | Static + Runtime | Runtime |
601
+ | When errors are caught | **Static (lint-time)** | Runtime | Runtime | Runtime | Static | Runtime | Runtime | Runtime | Runtime | Runtime | Runtime |
616
602
  | **Static Analysis (our focus)** |
617
- | Mypy plugin | ✅ Yes | ⚠️ Limited | ❌ No | Yes | ✅ Yes | ❌ No | ❌ No | ⚠️ Basic | ❌ No |
618
- | Standalone checker | ✅ Rust (~1ms) | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
619
- | Column name checking | ✅ Yes | ⚠️ Limited | ❌ No | Yes | ❌ No | ❌ No | ❌ No | Yes | ❌ No |
620
- | Column type checking | ✅ Yes | ⚠️ Limited | ❌ No | Yes | ❌ No | ❌ No | ❌ No | Yes | ❌ No |
621
- | Typo suggestions | ✅ Yes | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
603
+ | Mypy plugin | ✅ Yes | ⚠️ Limited | ❌ No | No | ✅ Yes | ❌ No | ❌ No | ⚠️ Basic | ❌ No | ❌ No | ❌ No |
604
+ | Standalone checker | ✅ Rust (~1ms) | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
605
+ | Column name checking | ✅ Yes | ⚠️ Limited | ❌ No | No | ❌ No | ❌ No | ❌ No | No | ❌ No | ❌ No | ❌ No |
606
+ | Column type checking | ✅ Yes | ⚠️ Limited | ❌ No | No | ❌ No | ❌ No | ❌ No | No | ❌ No | ❌ No | ❌ No |
607
+ | Typo suggestions | ✅ Yes | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
622
608
  | **Runtime Validation** |
623
- | Data validation | ❌ No | ✅ Excellent | ✅ Excellent | ✅ typeguard | ❌ No | ✅ Yes | ✅ Yes | ✅ Yes | ❌ No |
624
- | Value constraints | ❌ No | ✅ Yes | ✅ Excellent | ❌ No | ❌ No | ❌ No | ❌ No | ✅ Yes | ❌ No |
609
+ | Data validation | ❌ No | ✅ Excellent | ✅ Excellent | ✅ typeguard | ❌ No | ✅ Yes | ✅ Yes | ✅ Yes | ❌ No | ✅ Yes | ✅ Yes |
610
+ | Value constraints | ❌ No | ✅ Yes | ✅ Excellent | ❌ No | ❌ No | ❌ No | ❌ No | ✅ Yes | ❌ No | ✅ Yes | ✅ Yes |
625
611
  | **Schema Features** |
626
- | Column grouping | ✅ ColumnGroup | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
627
- | Regex column matching | ✅ Yes | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
612
+ | Column grouping | ✅ ColumnGroup | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
613
+ | Regex column matching | ✅ Yes | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No |
628
614
  | **Backend Support** |
629
- | Pandas | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ❌ Own | ✅ Yes |
630
- | Polars | ✅ Yes | ✅ Yes | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ Own | ✅ Yes |
631
- | DuckDB, cuDF, etc. | ❌ No | ❌ No | ✅ Spark, SQL | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ✅ Yes |
615
+ | Pandas | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ❌ Own | ✅ Yes | ❌ No | ⚠️ Limited |
616
+ | Polars | ✅ Yes | ✅ Yes | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ Own | ✅ Yes | ✅ Yes (only) | ✅ Yes |
617
+ | DuckDB, cuDF, etc. | ❌ No | ❌ No | ✅ Spark, SQL | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ✅ Yes | ❌ No | ❌ No |
632
618
  | **Project Status (Feb 2026)** |
633
- | Active development | ✅ Yes | ✅ Yes | ✅ Yes | ⚠️ Low | ✅ Yes | ❌ Inactive | ⚠️ Low | ✅ Yes | ✅ Yes |
619
+ | Active development | ✅ Yes | ✅ Yes | ✅ Yes | ⚠️ Low | ✅ Yes | ❌ Inactive | ⚠️ Low | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes |
634
620
 
635
621
  **Legend:** ✅ Full support | ⚠️ Limited/Partial | ❌ Not supported
636
622
 
@@ -640,8 +626,9 @@ Comprehensive comparison of pandas/DataFrame typing and validation tools. **type
640
626
  but has limitations—column access via `df["column"]` is not validated, and schema mismatches between functions may not
641
627
  be caught.
642
628
 
643
- - **[strictly_typed_pandas](https://strictly-typed-pandas.readthedocs.io/)** (v0.3.6): Provides `DataSet[Schema]` type
644
- hints with mypy support. No standalone checker. No polars support. Runtime validation via typeguard.
629
+ - **[strictly_typed_pandas](https://strictly-typed-pandas.readthedocs.io/)** (v0.3.7): Provides `DataSet[Schema]` type
630
+ hints for runtime validation via typeguard. Despite documentation implying mypy support, there is no mypy plugin —
631
+ column access errors are not caught statically. No standalone checker. No polars support.
645
632
 
646
633
  - **[pandas-stubs](https://github.com/pandas-dev/pandas-stubs)** (v3.0.0): Official pandas type stubs. Provides
647
634
  API-level types but no column-level checking.
@@ -652,19 +639,32 @@ Comprehensive comparison of pandas/DataFrame typing and validation tools. **type
652
639
  - **[pandas-type-checks](https://pypi.org/project/pandas-type-checks/)** (v1.1.3): Runtime validation decorator. No
653
640
  static analysis.
654
641
 
655
- - **[StaticFrame](https://github.com/static-frame/static-frame)** (v3.7.0): Alternative immutable DataFrame library with
656
- built-in static typing. Not compatible with pandas/polars—requires using StaticFrame's own DataFrame implementation.
642
+ - **[StaticFrame](https://github.com/static-frame/static-frame)** (v4.1.0): Alternative immutable DataFrame library.
643
+ Not compatible with pandas/polars requires a full rewrite to StaticFrame's own API. Column access is still
644
+ string-based; mypy does not catch column name typos. Type safety comes from immutability guarantees, not schema checking.
657
645
 
658
- - **[narwhals](https://narwhals-dev.github.io/narwhals/)** (v2.16.0): Compatibility layer that provides a unified API
646
+ - **[narwhals](https://narwhals-dev.github.io/narwhals/)** (v2.22.1): Compatibility layer that provides a unified API
659
647
  across pandas, polars, DuckDB, cuDF, and more. Solves a different problem—write-once-run-anywhere portability, not
660
648
  type safety. See [Why Abstraction Layers Don't Solve Type Safety](#why-abstraction-layers-dont-solve-type-safety)
661
649
  below.
662
650
 
663
- - **[Great Expectations](https://greatexpectations.io/)** (v1.4.3): Comprehensive data quality framework. Defines
651
+ - **[Great Expectations](https://greatexpectations.io/)** (v1.18.0): Comprehensive data quality framework. Defines
664
652
  "expectations" (assertions) about data values, distributions, and schema properties. Excellent for runtime
665
653
  validation, data documentation, and data quality monitoring. No static analysis or column-level type checking in
666
654
  code. Supports pandas, Spark, and SQL backends.
667
655
 
656
+ - **[dataframely](https://github.com/Quantco/dataframely)** (v2.10.1): Polars-only runtime validation library from Quantco.
657
+ Schemas are defined as classes inheriting `dy.Schema` with typed descriptor fields (`dy.String()`, `dy.Float64()`)
658
+ and `@dy.rule()` decorators for cross-column and group-level constraints. Returns `dy.DataFrame[Schema]` generic
659
+ types that give call-site narrowing to type checkers, but does not validate column subscript access inside function
660
+ bodies. No lint-time or static analysis capability. Supports nullability, string constraints, numeric bounds,
661
+ cross-column rules, soft validation, test data generation, and SQLAlchemy/PyArrow export.
662
+
663
+ - **[patito](https://github.com/JakobGM/patito)** (v0.8.6): Runtime validation library using a Pydantic-style `patito.Model`
664
+ class. Polars is the primary backend; pandas is supported but works by converting to Polars via PyArrow (an
665
+ undeclared dependency). DuckDB is not supported despite appearing in some documentation—validation crashes immediately
666
+ on DuckDB relations. No static analysis or standalone checker.
667
+
668
668
  ### Type Checkers (Not DataFrame-Specific)
669
669
 
670
670
  These are general Python type checkers. They don't validate DataFrame column names, but they can be used alongside
@@ -1014,7 +1014,6 @@ Inspired by the needs of ML/data science teams working with complex data pipelin
1014
1014
 
1015
1015
  ---
1016
1016
 
1017
- **Questions? Issues? Ideas?** [Open an issue](https://github.com/yourusername/typedframes/issues)
1017
+ **Questions? Issues? Ideas?** [Open an issue](https://github.com/w-martin/typedframes/issues)
1018
1018
 
1019
1019
  **Ready to catch DataFrame bugs before runtime?** `pip install typedframes`
1020
-
@@ -4,23 +4,43 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "typedframes"
7
- version = "0.2.0"
7
+ version = "0.2.2"
8
8
  description = "Static analysis for pandas and polars DataFrames. Catch column errors at lint-time, not runtime."
9
+ keywords = ["pandas", "polars", "type-checking", "static-analysis", "dataframe", "linter", "mypy-plugin"]
9
10
  readme = "README.md"
10
11
  authors = [
11
12
  { name = "William Martin" }
12
13
  ]
13
- license = { text = "MIT" }
14
+ license = "MIT"
14
15
  requires-python = ">=3.11"
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Operating System :: OS Independent",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Programming Language :: Python :: 3.13",
25
+ "Programming Language :: Python :: 3.14",
26
+ "Topic :: Software Development :: Quality Assurance",
27
+ "Topic :: Software Development :: Libraries :: Python Modules",
28
+ ]
15
29
  dependencies = []
16
30
 
31
+ [project.urls]
32
+ Homepage = "https://github.com/w-martin/typedframes"
33
+ Documentation = "https://typedframes.readthedocs.io/en/latest/"
34
+ Repository = "https://github.com/w-martin/typedframes"
35
+ "Issue Tracker" = "https://github.com/w-martin/typedframes/issues"
36
+
17
37
  [project.scripts]
18
38
  typedframes = "typedframes.cli:main"
19
39
 
20
40
  [project.optional-dependencies]
21
- pandas = ["pandas>=2.3.3"]
22
- pandera = ["pandera>=0.29.0"]
23
- polars = ["polars>=1.38.1"]
41
+ pandas = ["pandas>=2.0"]
42
+ pandera = ["pandera>=0.20.0"]
43
+ polars = ["polars>=1.0"]
24
44
  mypy = ["mypy"]
25
45
 
26
46
  [tool.maturin]
@@ -30,7 +50,7 @@ module-name = "typedframes._rust_checker"
30
50
  features = ["pyo3/extension-module"]
31
51
 
32
52
  [tool.pytest.ini_options]
33
- addopts = "--cov=. --cov-report=term-missing --cov-report=json --cov-branch -n auto"
53
+ addopts = "--cov=. --cov-report=term-missing --cov-report=json --cov-report=lcov --cov-branch -n auto"
34
54
  testpaths = ["tests"]
35
55
 
36
56
  [tool.coverage.run]
@@ -72,7 +92,7 @@ ignore = [
72
92
  [tool.ruff.lint.per-file-ignores]
73
93
  "tests/*" = ["S101", "SLF001", "S603", "S607"]
74
94
  "tests/fixtures/*" = ["T201"]
75
- "examples/*" = ["T201", "INP001", "D100", "D101", "D103"]
95
+ "examples/*" = ["T201", "INP001", "D100", "D101", "D103", "BLE001", "ERA001", "F821", "PLR2004"]
76
96
  "tasks.py" = ["T201"]
77
97
  "src/typedframes/cli.py" = ["T201"]
78
98
  "benchmarks/*" = ["T201", "S603", "S607", "PLR2004", "PLR0912", "PLR0913", "TRY300"]
@@ -91,6 +111,7 @@ docs = [
91
111
  dev = [
92
112
  "pandas>=3.0.1",
93
113
  "pandera>=0.29.0",
114
+ "pip-audit>=2.7",
94
115
  "polars>=1.38.1",
95
116
  "bandit>=1.9.3",
96
117
  "complexipy>=5.2.0",
@@ -162,6 +183,18 @@ invalid-type-form = "ignore"
162
183
  invalid-argument-type = "ignore"
163
184
  possibly-missing-attribute = "ignore"
164
185
 
186
+ [[tool.ty.overrides]]
187
+ include = ["examples/**/*.py"]
188
+
189
+ [tool.ty.overrides.rules]
190
+ unresolved-import = "ignore"
191
+ unresolved-attribute = "ignore"
192
+ unresolved-reference = "ignore"
193
+ invalid-assignment = "ignore"
194
+ invalid-return-type = "ignore"
195
+ invalid-argument-type = "ignore"
196
+ possibly-missing-attribute = "ignore"
197
+
165
198
  [[tool.ty.overrides]]
166
199
  include = ["benchmarks/**/*.py"]
167
200
 
@@ -176,3 +209,6 @@ warnings = true
176
209
  using = "PEP631"
177
210
  groups = ["dev"]
178
211
  ignore_packages = ["mypy-extensions", "typedframes", "pyrefly"]
212
+
213
+ [tool.uv.workspace]
214
+ exclude = ["examples/*"]
@@ -382,15 +382,6 @@ dependencies = [
382
382
  "hashbrown",
383
383
  ]
384
384
 
385
- [[package]]
386
- name = "indoc"
387
- version = "2.0.7"
388
- source = "registry+https://github.com/rust-lang/crates.io-index"
389
- checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
390
- dependencies = [
391
- "rustversion",
392
- ]
393
-
394
385
  [[package]]
395
386
  name = "interpolator"
396
387
  version = "0.5.0"
@@ -501,15 +492,6 @@ version = "2.7.6"
501
492
  source = "registry+https://github.com/rust-lang/crates.io-index"
502
493
  checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
503
494
 
504
- [[package]]
505
- name = "memoffset"
506
- version = "0.9.1"
507
- source = "registry+https://github.com/rust-lang/crates.io-index"
508
- checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
509
- dependencies = [
510
- "autocfg",
511
- ]
512
-
513
495
  [[package]]
514
496
  name = "num-traits"
515
497
  version = "0.2.19"
@@ -643,37 +625,32 @@ dependencies = [
643
625
 
644
626
  [[package]]
645
627
  name = "pyo3"
646
- version = "0.23.5"
628
+ version = "0.29.0"
647
629
  source = "registry+https://github.com/rust-lang/crates.io-index"
648
- checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872"
630
+ checksum = "cd274650b21d4bfc26a0a47587962c1edb425f69287324355cd040c3ea66071c"
649
631
  dependencies = [
650
- "cfg-if",
651
- "indoc",
652
632
  "libc",
653
- "memoffset",
654
633
  "once_cell",
655
634
  "portable-atomic",
656
635
  "pyo3-build-config",
657
636
  "pyo3-ffi",
658
637
  "pyo3-macros",
659
- "unindent",
660
638
  ]
661
639
 
662
640
  [[package]]
663
641
  name = "pyo3-build-config"
664
- version = "0.23.5"
642
+ version = "0.29.0"
665
643
  source = "registry+https://github.com/rust-lang/crates.io-index"
666
- checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb"
644
+ checksum = "c5e2a7d2f0d013342f295c048ad19237add5154a55b1c5a254c0ec93d4109078"
667
645
  dependencies = [
668
- "once_cell",
669
646
  "target-lexicon",
670
647
  ]
671
648
 
672
649
  [[package]]
673
650
  name = "pyo3-ffi"
674
- version = "0.23.5"
651
+ version = "0.29.0"
675
652
  source = "registry+https://github.com/rust-lang/crates.io-index"
676
- checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d"
653
+ checksum = "ca85c467da1bbc8d866eea5deff9cf29ea5f7785054a17da36e65bda9c05845b"
677
654
  dependencies = [
678
655
  "libc",
679
656
  "pyo3-build-config",
@@ -681,9 +658,9 @@ dependencies = [
681
658
 
682
659
  [[package]]
683
660
  name = "pyo3-macros"
684
- version = "0.23.5"
661
+ version = "0.29.0"
685
662
  source = "registry+https://github.com/rust-lang/crates.io-index"
686
- checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da"
663
+ checksum = "9ac53762fd065daa3194dd09337a38bd793a188100fd1a9304c4ab312d901771"
687
664
  dependencies = [
688
665
  "proc-macro2",
689
666
  "pyo3-macros-backend",
@@ -693,13 +670,12 @@ dependencies = [
693
670
 
694
671
  [[package]]
695
672
  name = "pyo3-macros-backend"
696
- version = "0.23.5"
673
+ version = "0.29.0"
697
674
  source = "registry+https://github.com/rust-lang/crates.io-index"
698
- checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028"
675
+ checksum = "4ca3a1557399783172dc5bf39cfca835157732532cba56b71d2292161e53b362"
699
676
  dependencies = [
700
677
  "heck",
701
678
  "proc-macro2",
702
- "pyo3-build-config",
703
679
  "quote",
704
680
  "syn",
705
681
  ]
@@ -743,9 +719,9 @@ checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
743
719
 
744
720
  [[package]]
745
721
  name = "rand"
746
- version = "0.8.5"
722
+ version = "0.8.6"
747
723
  source = "registry+https://github.com/rust-lang/crates.io-index"
748
- checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
724
+ checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a"
749
725
  dependencies = [
750
726
  "libc",
751
727
  "rand_chacha",
@@ -1028,9 +1004,9 @@ dependencies = [
1028
1004
 
1029
1005
  [[package]]
1030
1006
  name = "target-lexicon"
1031
- version = "0.12.16"
1007
+ version = "0.13.5"
1032
1008
  source = "registry+https://github.com/rust-lang/crates.io-index"
1033
- checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
1009
+ checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
1034
1010
 
1035
1011
  [[package]]
1036
1012
  name = "tempfile"
@@ -1133,7 +1109,7 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
1133
1109
 
1134
1110
  [[package]]
1135
1111
  name = "typedframes_checker"
1136
- version = "0.2.0"
1112
+ version = "0.2.2"
1137
1113
  dependencies = [
1138
1114
  "anyhow",
1139
1115
  "criterion",
@@ -1192,12 +1168,6 @@ dependencies = [
1192
1168
  "rand",
1193
1169
  ]
1194
1170
 
1195
- [[package]]
1196
- name = "unindent"
1197
- version = "0.2.4"
1198
- source = "registry+https://github.com/rust-lang/crates.io-index"
1199
- checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
1200
-
1201
1171
  [[package]]
1202
1172
  name = "walkdir"
1203
1173
  version = "2.5.0"