tab-cli 0.1.7__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. {tab_cli-0.1.7 → tab_cli-0.1.8}/.gitignore +0 -1
  2. {tab_cli-0.1.7 → tab_cli-0.1.8}/AGENTS.md +6 -2
  3. tab_cli-0.1.8/CHANGELOG.md +34 -0
  4. {tab_cli-0.1.7 → tab_cli-0.1.8}/Makefile +3 -3
  5. {tab_cli-0.1.7 → tab_cli-0.1.8}/PKG-INFO +1 -1
  6. {tab_cli-0.1.7 → tab_cli-0.1.8}/docs/cli-ref.md +2 -2
  7. {tab_cli-0.1.7 → tab_cli-0.1.8}/docs/configuration.md +8 -0
  8. {tab_cli-0.1.7 → tab_cli-0.1.8}/pyproject.toml +1 -1
  9. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/__init__.py +1 -1
  10. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/cli.py +156 -34
  11. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/config.py +24 -2
  12. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/formats/avro.py +7 -2
  13. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/formats/base.py +7 -2
  14. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/formats/csv.py +7 -2
  15. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/formats/jsonl.py +7 -2
  16. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/formats/parquet.py +16 -7
  17. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/handlers/__init__.py +28 -4
  18. tab_cli-0.1.8/src/tab_cli/handlers/base.py +373 -0
  19. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/__init__.py +14 -4
  20. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/aws.py +14 -15
  21. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/az.py +11 -8
  22. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/base.py +4 -0
  23. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/fsspec.py +22 -9
  24. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/gcloud.py +17 -14
  25. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/local.py +12 -2
  26. tab_cli-0.1.8/tests/conftest.py +12 -0
  27. tab_cli-0.1.8/tests/test_cat.py +150 -0
  28. tab_cli-0.1.8/tests/test_config.py +181 -0
  29. tab_cli-0.1.8/tests/test_stdin.py +66 -0
  30. tab_cli-0.1.8/tests/test_storage.py +83 -0
  31. tab_cli-0.1.8/tests/test_summary.py +161 -0
  32. tab_cli-0.1.8/tests/test_view.py +192 -0
  33. tab_cli-0.1.8/uv.lock +2688 -0
  34. tab_cli-0.1.7/CHANGELOG.md +0 -20
  35. tab_cli-0.1.7/src/tab_cli/handlers/base.py +0 -259
  36. tab_cli-0.1.7/tests/test_cli.py +0 -357
  37. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/.gitignore +0 -0
  38. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0 -0
  39. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/282ad8cf3324b2679a7d460c0fc324adfa21dcfad2f197ac6991b98ec91f98495bb3ddb2cba36ce5dfa28a52063a373bb03f5d2e34f6e0c7b6b81b3046a4d7d0 +0 -0
  40. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/891d7ed26f62b0f8757b081e9d76840636877f1613ee17b7695fe2ee8640258c56e01291934e3797728bc2300a1b5f41e3f3ef81ab532c0e5ab475bb9b6f097a +0 -0
  41. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/8926397375b8328137652c18cd4371808b214cb26864d77dd33c8eea895e10e62c297064c3d4c703d58ee11dee81bbf9851525e3a0c33151f54c164b8a4343b0 +0 -0
  42. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/89d29d938e64b1295e138faedfa5df6f6729a67e5ba8e0c3fdd1c9266e0f59a4bc35b8a21e14225072317879af3323fa0457e01d9b08608770bc23957feec6c0 +0 -0
  43. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/95fc2854c0fa2528b03e2bcabc7612f56f0bcb3d6f54a06293e21d1887885a449907b5b320a1a94bac712bf56fdad0a4184b17047c1fc179dfa83acab3f70d21 +0 -0
  44. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/9a58ceae46de4649375b6b880b8500c85d34c8e9bc650dc4c993afb2fa8d45c4180331821c6226a0fb1475b04a0b9a849502647ef9c4dcac74769eb501c885fa +0 -0
  45. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/a19ae062208aa20b7310705af3d26ef53095a9dabfad080883cc7a32e98687063179db95cb2c71ef9064801c59fa46d261f172d83e83f96a39c274387b59dca4 +0 -0
  46. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/d1169a8d10067493e42752c2a7615ff27f55bd90c38b91feef918958e29a2239ee289a0cde8385441d1f0fe9af1fb634fa6d56438b9f33ec382e81fa59d70b54 +0 -0
  47. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/e171e5c6039c050c2368584a537d36df221b1f9f23d285c0399b95b14608d67006229823e6831bff7d8b0c2f9e86ebaec9c6811461119195f430ade055073fed +0 -0
  48. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/ee118dd19aac4e1cb354f83a37dadca70bc5f086cc8d36cd0059b222bf8c7250824401ff681518d0120f8db1f96f0025c464dcfc3b1ee28777e8c76325760134 +0 -0
  49. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0 -0
  50. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/282ad8cf3324b2679a7d460c0fc324adfa21dcfad2f197ac6991b98ec91f98495bb3ddb2cba36ce5dfa28a52063a373bb03f5d2e34f6e0c7b6b81b3046a4d7d0 +0 -0
  51. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/891d7ed26f62b0f8757b081e9d76840636877f1613ee17b7695fe2ee8640258c56e01291934e3797728bc2300a1b5f41e3f3ef81ab532c0e5ab475bb9b6f097a +0 -0
  52. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/8926397375b8328137652c18cd4371808b214cb26864d77dd33c8eea895e10e62c297064c3d4c703d58ee11dee81bbf9851525e3a0c33151f54c164b8a4343b0 +0 -0
  53. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/89d29d938e64b1295e138faedfa5df6f6729a67e5ba8e0c3fdd1c9266e0f59a4bc35b8a21e14225072317879af3323fa0457e01d9b08608770bc23957feec6c0 +0 -0
  54. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/95fc2854c0fa2528b03e2bcabc7612f56f0bcb3d6f54a06293e21d1887885a449907b5b320a1a94bac712bf56fdad0a4184b17047c1fc179dfa83acab3f70d21 +0 -0
  55. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/9a58ceae46de4649375b6b880b8500c85d34c8e9bc650dc4c993afb2fa8d45c4180331821c6226a0fb1475b04a0b9a849502647ef9c4dcac74769eb501c885fa +0 -0
  56. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/a19ae062208aa20b7310705af3d26ef53095a9dabfad080883cc7a32e98687063179db95cb2c71ef9064801c59fa46d261f172d83e83f96a39c274387b59dca4 +0 -0
  57. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/d1169a8d10067493e42752c2a7615ff27f55bd90c38b91feef918958e29a2239ee289a0cde8385441d1f0fe9af1fb634fa6d56438b9f33ec382e81fa59d70b54 +0 -0
  58. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/e171e5c6039c050c2368584a537d36df221b1f9f23d285c0399b95b14608d67006229823e6831bff7d8b0c2f9e86ebaec9c6811461119195f430ade055073fed +0 -0
  59. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/ee118dd19aac4e1cb354f83a37dadca70bc5f086cc8d36cd0059b222bf8c7250824401ff681518d0120f8db1f96f0025c464dcfc3b1ee28777e8c76325760134 +0 -0
  60. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/1029dd1c1f4430d8a667a0e48d0b817652c7ddca6f5ff56cac1755e5bb0c1cb7586935941c9b36f26cab05c0effe6154d073bd1dffbe37f22fed0a6e7d79201f +0 -0
  61. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/3228b9bef9d8374b5532b40df2da8be3bfc86de713bdad7fe620977ffa7c56db83928678caa792bf0d328db607028e045a9e41423ef7501e5b550651c3815ffe +0 -0
  62. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/424c9a53f5458b328d77ed6a943dc35662e949befa8725cfc7eead01a270417c8d07c1001b11623a088da8d2c9c34a41573314fa2394643147e4027e8a96a605 +0 -0
  63. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/44107e81054aa5544b36eab8d811908a559b4d9027dc3fa1762c44e39551652199ef2a31f9bbcda79773c906151c12578cb18c43e0045de8b20c357272e1c62a +0 -0
  64. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/52ea57bde33e8ef4718d833c2df3cf0a9e90fdcd5715c0caad50b4e37ea60aca2b8c71096de6920dc936c40b6291e896293f91c7cfd2aa96cf6c2aa49ef662c8 +0 -0
  65. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/5ea52f6d4771afda4747b9f44954102c02ae2d0686f8aa9eca36c29796bbba0d14e851c0dca0a6af17bfbbbac174e3be645ae708d958390168e85c64786fc9ef +0 -0
  66. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/86a1cfa113399acc7dd2dc90262a845558affb5e9373b6300dff68a485482c5e17ace9466bbc23b4301013ca1a27a577ca57ea838113f45bf321a64a242b1ad3 +0 -0
  67. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/8ef99a12ae0624db198d9ecd83014fb8353c4731e0ba9a472f1fc339784308e38f1287d0765a7b0444f1a89c218c76a820dc4c9a3a39c1cedcd7423a4f5f88dc +0 -0
  68. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/cb3e8da2bf2b7efae4a6e8fd0b8562dacd16ca0531b173a91480a9e60ee795ac5bec13fc6eb461e03edc9a26f5ff1d5ba53521a1c1a6c1ee1765b544b7d7bf73 +0 -0
  69. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/d27ad326963b75736b636adad9fb812eb3f2871e0efb4bc7db37d4b701a4282911eaaee91bed3a759e940769b667be1ed66f2d7f2f41ac3906b87ab7eec19c3a +0 -0
  70. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/type +0 -0
  71. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_heads/heads/891d7ed26f62b0f8757b081e9d76840636877f1613ee17b7695fe2ee8640258c56e01291934e3797728bc2300a1b5f41e3f3ef81ab532c0e5ab475bb9b6f097a +0 -0
  72. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_heads/type +0 -0
  73. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/282ad8cf3324b2679a7d460c0fc324adfa21dcfad2f197ac6991b98ec91f98495bb3ddb2cba36ce5dfa28a52063a373bb03f5d2e34f6e0c7b6b81b3046a4d7d0 +0 -0
  74. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/891d7ed26f62b0f8757b081e9d76840636877f1613ee17b7695fe2ee8640258c56e01291934e3797728bc2300a1b5f41e3f3ef81ab532c0e5ab475bb9b6f097a +0 -0
  75. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/8926397375b8328137652c18cd4371808b214cb26864d77dd33c8eea895e10e62c297064c3d4c703d58ee11dee81bbf9851525e3a0c33151f54c164b8a4343b0 +0 -0
  76. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/89d29d938e64b1295e138faedfa5df6f6729a67e5ba8e0c3fdd1c9266e0f59a4bc35b8a21e14225072317879af3323fa0457e01d9b08608770bc23957feec6c0 +0 -0
  77. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/95fc2854c0fa2528b03e2bcabc7612f56f0bcb3d6f54a06293e21d1887885a449907b5b320a1a94bac712bf56fdad0a4184b17047c1fc179dfa83acab3f70d21 +0 -0
  78. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/9a58ceae46de4649375b6b880b8500c85d34c8e9bc650dc4c993afb2fa8d45c4180331821c6226a0fb1475b04a0b9a849502647ef9c4dcac74769eb501c885fa +0 -0
  79. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/a19ae062208aa20b7310705af3d26ef53095a9dabfad080883cc7a32e98687063179db95cb2c71ef9064801c59fa46d261f172d83e83f96a39c274387b59dca4 +0 -0
  80. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/d1169a8d10067493e42752c2a7615ff27f55bd90c38b91feef918958e29a2239ee289a0cde8385441d1f0fe9af1fb634fa6d56438b9f33ec382e81fa59d70b54 +0 -0
  81. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/e171e5c6039c050c2368584a537d36df221b1f9f23d285c0399b95b14608d67006229823e6831bff7d8b0c2f9e86ebaec9c6811461119195f430ade055073fed +0 -0
  82. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/ee118dd19aac4e1cb354f83a37dadca70bc5f086cc8d36cd0059b222bf8c7250824401ff681518d0120f8db1f96f0025c464dcfc3b1ee28777e8c76325760134 +0 -0
  83. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/type +0 -0
  84. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/123b0e36150cf8e99d644d2dfd1a7b0c8d2f676a78248d6902516d9ae58903665c79ec3f5f6729b98a1237dcf2abc1d41e690ae0ad30888c84786bcc9de5e314 +0 -0
  85. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/2ac0b7d8b1fdfac82b3ff3926e0018f72ef2b48f85b41f5fa541271370e1197d41e37c2c6d62af0c6974658c4a9e5e945b8efcbcc7748bdd99bd9483f7e13e22 +0 -0
  86. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/67b8396b301935ff624ff98952c57d8ee021e7d885e1220053b993e7bc822a4cf061298e9643ded745c55f3ed8e923a6731524129993d2664f48b60660761145 +0 -0
  87. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/9c6fca696f77383cc87d068fe3f5912466b157dccc6465973e653dd4d2c02e2eca9c0725c071857bf3f4f0e263259eafc18f1739615a501e672bb2afd415316b +0 -0
  88. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/cae2e3e5952cb5ba93f27e3898d90dac6c41fc9e20c66ef0791e71b91dc103d924996c921c88179705264a224fb5d2bb29091fc8f18f0ea7a088aaabb859ea2a +0 -0
  89. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/cd8efb6da14127c81c37b56ead18a39b30f2cd154891185a6e906efb491dbf63f290eed3c31d4725f490a93208ffbe5cdc031d5b6de38fcb77cbb11f0357118f +0 -0
  90. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/e434359d6306a4f6997733b9b5308299984f05219c92e1b2a31f1203126be0fada5fc09a2ba86c98d1318981cb53997c7f8674ef25da6ca7bf9fd849598cd355 +0 -0
  91. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/ea75e7ccb42f52b013dc1b45bb4e6d692b37c5e38bd39356ba2806be83ca5ca03ce20481db2788428126f93663e6723f99f4d46c5c705f5b12b70e2127ab15ba +0 -0
  92. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/fa5dec7ee06fbc6cbb2798c8e98bab482a6750776de41406fb06893c83f71f5015ec7ee2873642714b7bbc1c496f880e3acf44ffdabf2f87a39e0fbd68a4cc46 +0 -0
  93. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/ffde172c6285c71851b22780e34962d8f9234067af59ff2dc38b74e905dc540fe35b944e8c0b2d4230dfcf778424a335494cd0aeea1bb4be2a11c4b5428ad465 +0 -0
  94. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/14e3f15273e204cadfe38dd2f38cebd1343a6bfbb91c6af0f5f9de6e9003d8ec8b0eb676975af9dca2d06ee5e6b3886c3c5d3755c6f149b6b09172cccc35adff +0 -0
  95. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/42e0f32dd10ba6ae9f2297ca8ad0bd16337f14545b29f956ce380a7ab92bab771cbc9e04755752a6fa13231286f724379d6662b0fb257ef2cc2c52fe680eb95d +0 -0
  96. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/466a441e56afdde383cef1d6127ca1d4c825157feb65a59f8f6ff5fa7a523bd683d0b7a6bb16c00afff53a890319a472d98da9af3dea5675168a4d424aa7af32 +0 -0
  97. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/482ae5a29fbe856c7272f2071b8b0f0359ee2d89ff392b8a900643fbd0836eccd067b8bf41909e206c90d45d6e7d8b6686b93ecaee5fe1a9060d87b672101310 +0 -0
  98. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/5403fa06049419ddcf620ce0dc20911583e1b1062b42630ee325aa5ac2f918dd266dfed93ebc808a2aecb37ac3a33e251fe1396fd24b8ed566bfbd61a81ff959 +0 -0
  99. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/56119fa0480cb66159978b1a8c9b031f9e978b7a3172eb87407a701fea34fdd90db7771cd433a2cf7e8a84a7b49c924f973eccb0eb05685ec42f36f7ef61cc06 +0 -0
  100. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/90b1f4de4ba65e0652f1de45e4c84b623f99bd9d9453667e19c7040857bd397e59e7a84406f2ab6204d25789d995c47c350d8abc47a4bbb102059f3111f20028 +0 -0
  101. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/9215bd4ac28fdecba111c63bace46d0f1c253ad3af44e0e74bd43d30757bef2e655538c1172c8f25769bce0a3c669b713440773c1859f0a136d9ca42501f2470 +0 -0
  102. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/a79320f784ef97b3d6297e55a48b17a517a38d95d5c61ba8d01c59d68dcd2ccf3a96479f4fc3c4cafdcd56dc7bd58b1cb987e079764c1646533ab32418900727 +0 -0
  103. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/abe6b7a350e1604cb8f5a2cd10cef13a019bd797770e6dc37414d33d98dcf36d8ec80a2ae13bd7dd2d2076772c0ffadec0b53ba72f4669b461fff2da5d30f1ba +0 -0
  104. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/b7689d54193a3798edd58d758966ad65ad57297c5276eab3e4ef07380779363efe9e462a149f4d42f55bbe004eb5ba88bf35df4c78ac975275530382390159d6 +0 -0
  105. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/e75f5f3431d172d7e9434dfaef2be50812105b3ead73eeb10345c9b6892e9cbb5ee0602ebb0ceaf5ab87d22f45930dc30d136d0aac77310fb0261b3857ffde9b +0 -0
  106. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/heads/56119fa0480cb66159978b1a8c9b031f9e978b7a3172eb87407a701fea34fdd90db7771cd433a2cf7e8a84a7b49c924f973eccb0eb05685ec42f36f7ef61cc06 +0 -0
  107. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/git_target +0 -0
  108. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/type +0 -0
  109. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/submodule_store/type +0 -0
  110. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/workspace_store/index +0 -0
  111. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/working_copy/checkout +0 -0
  112. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/working_copy/tree_state +0 -0
  113. {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/working_copy/type +0 -0
  114. {tab_cli-0.1.7 → tab_cli-0.1.8}/LICENSE +0 -0
  115. {tab_cli-0.1.7 → tab_cli-0.1.8}/README.md +0 -0
  116. {tab_cli-0.1.7 → tab_cli-0.1.8}/docs/cloud.md +0 -0
  117. {tab_cli-0.1.7 → tab_cli-0.1.8}/docs/gen_assets.sh +0 -0
  118. {tab_cli-0.1.7 → tab_cli-0.1.8}/docs/index.md +0 -0
  119. {tab_cli-0.1.7 → tab_cli-0.1.8}/mkdocs.yml +0 -0
  120. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/formats/__init__.py +0 -0
  121. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/handlers/cli_table.py +0 -0
  122. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/style.py +0 -0
  123. {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/url_parser.py +0 -0
  124. {tab_cli-0.1.7 → tab_cli-0.1.8}/tests/__init__.py +0 -0
  125. {tab_cli-0.1.7 → tab_cli-0.1.8}/tests/assets/test.csv +0 -0
@@ -29,7 +29,6 @@ ENV/
29
29
 
30
30
  # uv
31
31
  .uv/
32
- uv.lock
33
32
 
34
33
  # PyCharm
35
34
  .idea/
@@ -43,7 +43,7 @@ Use it as the default operating guide when changing code in this repo.
43
43
  - The CLI tests rely on `typer.testing.CliRunner`.
44
44
  - Test data is stored under `tests/assets`.
45
45
  - Existing tests emphasize user-visible CLI output, not internal implementation details.
46
- - When adding CLI behavior, extend `tests/test_cli.py` unless the change clearly deserves a new file.
46
+ - The CLI tests are split across focused files under `tests/`; extend the nearest existing file unless a new one is clearly warranted.
47
47
  - Assert both `exit_code` and key output fragments.
48
48
  - For stdin support, pass `"-"` as the path and provide `input=` to `runner.invoke(...)`.
49
49
 
@@ -89,6 +89,9 @@ Use it as the default operating guide when changing code in this repo.
89
89
 
90
90
  ## Types
91
91
 
92
+ - NEVER implicitly cast any variable to bool with `if var:` or `if not var:` unless the variable is already a bool. Do NOT rely on truthiness for control flow:
93
+ for example, testing if a list is empty with `if not my_list:` is not allowed. Instead, use explicit length checks like `if len(my_list) > 0:`.
94
+ always write `if x is not None:` or `if x is None:` when checking for `None` values.
92
95
  - Type hints are used widely and should be preserved.
93
96
  - Prefer modern built-in generics like `list[str]` and `dict[str, Any]`.
94
97
  - Use `X | None` instead of `Optional[X]` in new code unless matching nearby style requires otherwise.
@@ -118,6 +121,7 @@ Use it as the default operating guide when changing code in this repo.
118
121
  ## Logging And Output
119
122
 
120
123
  - The CLI configures Loguru with `RichHandler` in the Typer callback.
124
+ - When writing Loguru messages, use f-strings instead of Loguru brace-style formatting.
121
125
  - User-facing table and summary output is rendered with Rich.
122
126
  - Streaming command output usually writes bytes to `sys.stdout.buffer`.
123
127
  - Keep stderr/stdout behavior consistent with the existing command design.
@@ -155,4 +159,4 @@ Use it as the default operating guide when changing code in this repo.
155
159
  - Run `uv run pytest` for broader validation before finalizing cross-cutting changes.
156
160
  - Mention pre-existing lint or type-check failures separately from regressions you introduce.
157
161
  - Update CHANGELOG.md when necessary.
158
- -
162
+ -
@@ -0,0 +1,34 @@
1
+ - 0.1.8:
2
+ - Improved `tab view` performance for partitioned directories by reading only as many early partitions as needed for an unfiltered preview.
3
+ - Added glob-pattern support for multi-file inputs such as `s3://.../date=*/*.parquet`.
4
+ - Speed up Parquet row counting in summaries by reading footer metadata instead of scanning file contents.
5
+ - Fixed S3 Polars `storage_options` to avoid nested `client_kwargs` values that could break native reads.
6
+ - Added `default_num_view_rows` config so the default `tab view` preview size can be customized.
7
+ - Added `log_level` config so the CLI log level can default from `~/.config/tab/config.json` when `--log-level` is omitted.
8
+ - Added `max_cell_length` config so `tab view` can default to truncating long cell values without passing `--max-cell-len` every time.
9
+ - Added `num_remote_workers` config to parallelize remote per-partition summary row counting.
10
+ - Validated config file value types instead of silently accepting invalid JSON types.
11
+ - Fixed the developer `Makefile` targets to point at `src/tab_cli`.
12
+ - Tightened multi-file summary validation to reject inconsistent schemas, not just mismatched column counts.
13
+ - `tab cat` now rejects mixed input formats with a clear error instead of reusing the first reader implicitly.
14
+ - Cleaned up package metadata and repository hygiene issues including version drift and `uv.lock` ignore rules.
15
+ - 0.1.7:
16
+ - Optional dependency groups are now named `tab-cli[s3|gs|az]`, in accordance with the protocol name.
17
+ - 0.1.6:
18
+ - Fixed bug in pyarrow loading of Parquet files.
19
+ - Added global config file support: settings can be persisted in `~/.config/tab/config.json`. Config file values serve as defaults that CLI flags override.
20
+ - 0.1.5:
21
+ - Added stdin support: use `-` as the file path to read from stdin (e.g. `cat data.csv | tab view -i csv -`). Requires `-i`/`--input-format` since format cannot be inferred. Works with `view`, `schema`, `summary`, `convert`, and `cat`.
22
+ - Added row-wise JMESPath queries via `--jmespath` / `--jp` on `view`, `convert`, and `cat`. Object results become columns; non-object results go into a `value` column. `--sql` and `--jp` are mutually exclusive.
23
+ - Implemented `--jp` with `LazyFrame.map_batches(...)` so row reshaping stays batch-oriented instead of materializing the full transformed dataset up front.
24
+ - 0.1.4:
25
+ - Removed `tab sql` subcommand; SQL is now a `--sql` option on `tab view`, `tab convert`, and `tab cat`.
26
+ - Automatic PyArrow fallback for Parquet files that fail to read with Polars' native reader.
27
+ - 0.1.3:
28
+ - Separate `tab view` from `tab cat`: `tab view` does not convert formats, `tab cat` does.
29
+ - Added `--max-cell-len` option to `tab view` to truncate long cell contents.
30
+ - 0.1.2:
31
+ - Bugfix on reading directories.
32
+ - 0.1.1:
33
+ - Better credential handling for Azure Blob Storage and Google Cloud Storage.
34
+ - 0.1.0: Initial release
@@ -13,13 +13,13 @@ clean:
13
13
  find . -type d -name __pycache__ -exec rm -rf {} +
14
14
 
15
15
  lint:
16
- uv run ruff check tab_cli/
16
+ uv run ruff check src/tab_cli tests
17
17
 
18
18
  format:
19
- uv run ruff format tab_cli/
19
+ uv run ruff format src/tab_cli tests
20
20
 
21
21
  typecheck:
22
- uv run ty check tab_cli/
22
+ uv run ty check src/tab_cli
23
23
 
24
24
  test:
25
25
  uv run pytest
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tab-cli
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: A CLI tool for tabular data
5
5
  Author-email: Tongfei Chen <tongfei@pm.me>
6
6
  License-File: LICENSE
@@ -17,7 +17,7 @@ Options:
17
17
  | `--jmespath` / `--jp` | JMESPath expression to apply to each row as JSON. Object outputs become columns; non-object outputs go to a `value` column. The result shape must stay consistent across rows. |
18
18
  | `--limit` | Maximum number of rows to display. |
19
19
  | `--skip` | Number of rows to skip from the beginning. |
20
- | `--max-cell-len` | Truncate cell contents longer than this. |
20
+ | `--max-cell-len` | Truncate cell contents longer than this. If omitted, `max_cell_length` from config is used when set. |
21
21
 
22
22
  ## `tab schema`
23
23
 
@@ -91,4 +91,4 @@ Options:
91
91
  | Option | Description |
92
92
  |-------------------------|------------------------------------------------------------------------------------------------------------------------------|
93
93
  | `--az-url-authority-is-account` | Interpret az:// URL authority as storage account name instead of container name. See [azure.md](Azure) for more information. |
94
- | `--log-level` | Log level from `{DEBUG, INFO, WARNING, ERROR, CRITICAL}`. |
94
+ | `--log-level` | Log level from `{DEBUG, INFO, WARNING, ERROR, CRITICAL}`. If omitted, uses `log_level` from config. |
@@ -11,6 +11,10 @@ mkdir -p ~/.config/tab
11
11
  cat > ~/.config/tab/config.json << 'EOF'
12
12
  {
13
13
  "az_url_authority_is_account": false,
14
+ "default_num_view_rows": 20,
15
+ "log_level": "INFO",
16
+ "max_cell_length": null,
17
+ "num_remote_workers": 8,
14
18
  "sampling_size_for_schema_inference": 32
15
19
  }
16
20
  EOF
@@ -21,6 +25,10 @@ EOF
21
25
  | Key | Type | Default | Description |
22
26
  |-----|------|---------|-------------|
23
27
  | `az_url_authority_is_account` | `bool` | `false` | Interpret `az://` URL authority as storage account name instead of container name. |
28
+ | `default_num_view_rows` | `int` | `20` | Default number of rows shown by `tab view` when `--limit` is omitted. |
29
+ | `log_level` | `str` | `"INFO"` | Default CLI log level when `--log-level` is omitted. |
30
+ | `max_cell_length` | `int \| null` | `null` | Default maximum cell length for `tab view`. The CLI `--max-cell-len` flag overrides it. |
31
+ | `num_remote_workers` | `int` | `8` | Maximum worker threads for remote per-partition summary work such as Parquet row counts. |
24
32
  | `sampling_size_for_schema_inference` | `int` | `32` | Number of rows sampled for schema inference (e.g. when using `--jp`). |
25
33
 
26
34
  ## Precedence
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "tab-cli"
3
- version = "0.1.7"
3
+ version = "0.1.8"
4
4
  description = "A CLI tool for tabular data"
5
5
  authors = [{name = "Tongfei Chen", email = "tongfei@pm.me"}]
6
6
  readme = "README.md"
@@ -1,3 +1,3 @@
1
1
  """Tab CLI - A CLI tool for tabular data."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.1.7"
@@ -79,6 +79,8 @@ app = typer.Typer(
79
79
  no_args_is_help=True,
80
80
  )
81
81
 
82
+ DEFAULT_VIEW_TRUNCATION_PROBE_ROWS = 1
83
+
82
84
 
83
85
  @app.callback()
84
86
  def main_callback(
@@ -90,13 +92,18 @@ def main_callback(
90
92
  ),
91
93
  ] = False,
92
94
  log_level: Annotated[
93
- str,
95
+ str | None,
94
96
  typer.Option(
95
- "--log-level", help="Log level from {DEBUG, INFO, WARNING, ERROR, CRITICAL}"
97
+ "--log-level",
98
+ help="Log level from {DEBUG, INFO, WARNING, ERROR, CRITICAL}; defaults to config when omitted",
96
99
  ),
97
- ] = "INFO",
100
+ ] = None,
98
101
  ) -> None:
99
102
  """Global options for tab_cli CLI."""
103
+ load_config_file()
104
+ effective_log_level = (
105
+ log_level.upper() if log_level is not None else config.config.log_level.upper()
106
+ )
100
107
  logger.remove()
101
108
  logger.add(
102
109
  RichHandler(
@@ -105,9 +112,8 @@ def main_callback(
105
112
  markup=True,
106
113
  ),
107
114
  format="{message}",
108
- level=log_level.upper(),
115
+ level=effective_log_level,
109
116
  )
110
- load_config_file()
111
117
  # CLI flags override config file values
112
118
  if az_url_authority_is_account:
113
119
  config.config.az_url_authority_is_account = az_url_authority_is_account
@@ -176,12 +182,20 @@ def _apply_jmespath(lf: pl.LazyFrame, expression: str) -> pl.LazyFrame:
176
182
 
177
183
  compiled = jmespath.compile(expression)
178
184
  sample_df = lf.slice(0, Config.sampling_size_for_schema_inference).collect()
185
+ logger.debug(
186
+ "Inferring JMESPath output schema from "
187
+ f"{Config.sampling_size_for_schema_inference} sampled row(s)"
188
+ )
179
189
  if sample_df.is_empty():
190
+ logger.debug("JMESPath schema inference sample was empty; returning empty LazyFrame")
180
191
  return pl.DataFrame().lazy()
181
192
 
182
193
  transformed_sample, result_mode = _transform_jmespath_batch(sample_df, compiled)
183
194
  output_schema = transformed_sample.schema
184
195
  expected_columns = tuple(transformed_sample.columns)
196
+ logger.debug(
197
+ f"Inferred JMESPath result mode '{result_mode}' with columns {expected_columns}"
198
+ )
185
199
 
186
200
  return lf.map_batches(
187
201
  lambda batch: _transform_jmespath_batch(
@@ -223,16 +237,119 @@ def _apply_limit(
223
237
  and returns whether the data was truncated.
224
238
  """
225
239
  if limit is None and default_limit is not None:
240
+ logger.debug(
241
+ f"Applying inferred default row limit {default_limit} with skip {skip}"
242
+ )
226
243
  lf = lf.slice(skip, length=default_limit + 1)
227
244
  df = lf.collect()
228
245
  truncated = len(df) > default_limit
229
246
  if truncated:
230
247
  df = df.head(default_limit)
248
+ logger.debug("Detected truncated preview after applying inferred default row limit")
231
249
  return df.lazy(), truncated
232
- else:
233
- if skip > 0 or limit is not None:
234
- lf = lf.slice(skip, length=limit)
235
- return lf, False
250
+ if skip > 0 or limit is not None:
251
+ lf = lf.slice(skip, length=limit)
252
+ return lf, False
253
+
254
+
255
+ def _read_source(path: str, input_format: str | None) -> tuple[pl.LazyFrame, str | None]:
256
+ """Read a source path and return its LazyFrame and inferred format."""
257
+ if is_stdin(path):
258
+ logger.debug(
259
+ "Using stdin source with explicit format "
260
+ f"'{input_format.lower() if input_format is not None else None}'"
261
+ )
262
+ return (
263
+ read_stdin(format=input_format),
264
+ input_format.lower() if input_format is not None else None,
265
+ )
266
+
267
+ reader = infer_reader(path, format=input_format)
268
+ logger.debug(
269
+ f"Read source '{path}' using inferred format '{reader.format.extension()}'"
270
+ )
271
+ return reader.read(path), reader.format.extension()
272
+
273
+
274
+ def _prepare_view_frame(
275
+ path: str,
276
+ input_format: str | None,
277
+ sql: str | None,
278
+ jmespath_expr: str | None,
279
+ limit: int | None,
280
+ skip: int,
281
+ ) -> tuple[pl.LazyFrame, bool]:
282
+ """Prepare the LazyFrame used by `tab view` and report truncation."""
283
+ default_view_rows = config.config.default_num_view_rows
284
+
285
+ if is_stdin(path):
286
+ logger.debug("Preparing view for stdin input")
287
+ lf = read_stdin(format=input_format)
288
+ lf = _apply_query(lf, sql=sql, jmespath_expr=jmespath_expr)
289
+ return _apply_limit(
290
+ lf,
291
+ limit=limit,
292
+ skip=skip,
293
+ default_limit=default_view_rows if limit is None else None,
294
+ )
295
+
296
+ reader = infer_reader(path, format=input_format)
297
+ if sql is None and jmespath_expr is None:
298
+ preview_limit = (
299
+ limit
300
+ if limit is not None
301
+ else default_view_rows + DEFAULT_VIEW_TRUNCATION_PROBE_ROWS
302
+ )
303
+ logger.debug(
304
+ f"Using preview read for '{path}' with inferred preview limit "
305
+ f"{preview_limit} and skip {skip}"
306
+ )
307
+ lf = reader.read_preview(path, limit=preview_limit, offset=skip)
308
+ if limit is not None:
309
+ return lf, False
310
+
311
+ df = lf.collect()
312
+ truncated = len(df) > default_view_rows
313
+ if truncated:
314
+ df = df.head(default_view_rows)
315
+ return df.lazy(), truncated
316
+
317
+ logger.debug(f"Using full read for '{path}' because a query transform was provided")
318
+ lf = reader.read(path)
319
+ lf = _apply_query(lf, sql=sql, jmespath_expr=jmespath_expr)
320
+ return _apply_limit(
321
+ lf,
322
+ limit=limit,
323
+ skip=skip,
324
+ default_limit=default_view_rows if limit is None else None,
325
+ )
326
+
327
+
328
+ def _resolve_cat_output_format(
329
+ paths: list[str],
330
+ input_format: str | None,
331
+ ) -> tuple[list[pl.LazyFrame], str | None]:
332
+ """Read all inputs for `tab cat` and validate format consistency."""
333
+ files: list[pl.LazyFrame] = []
334
+ resolved_format = input_format.lower() if input_format is not None else None
335
+ if resolved_format is not None:
336
+ logger.debug(f"Using explicit shared input format '{resolved_format}' for `tab cat`")
337
+
338
+ for path in paths:
339
+ lf, current_format = _read_source(path, input_format)
340
+ if current_format is not None:
341
+ if resolved_format is None:
342
+ resolved_format = current_format
343
+ logger.debug(
344
+ f"Inferred shared `tab cat` format '{resolved_format}' from '{path}'"
345
+ )
346
+ elif current_format != resolved_format:
347
+ raise ValueError(
348
+ "All inputs to `tab cat` must use the same format unless -i/--input-format is provided"
349
+ )
350
+ files.append(lf)
351
+
352
+ return files, resolved_format
236
353
 
237
354
 
238
355
  @app.command()
@@ -247,19 +364,25 @@ def view(
247
364
  table_svg: TableSvgOpt = False,
248
365
  ) -> None:
249
366
  """View tabular data as a formatted table."""
250
- if is_stdin(path):
251
- lf = read_stdin(format=input)
252
- else:
253
- reader = infer_reader(path, format=input)
254
- lf = reader.read(path)
255
- lf = _apply_query(lf, sql=sql, jmespath_expr=jmespath_expr)
256
- lf, truncated = _apply_limit(
257
- lf, limit=limit, skip=skip, default_limit=20 if limit is None else None
367
+ effective_max_cell_len = (
368
+ max_cell_len if max_cell_len is not None else config.config.max_cell_length
369
+ )
370
+ if max_cell_len is None and effective_max_cell_len is not None:
371
+ logger.debug(
372
+ f"Inferred max_cell_len={effective_max_cell_len} for `tab view` from config"
373
+ )
374
+ lf, truncated = _prepare_view_frame(
375
+ path,
376
+ input_format=input,
377
+ sql=sql,
378
+ jmespath_expr=jmespath_expr,
379
+ limit=limit,
380
+ skip=skip,
258
381
  )
259
382
  writer = infer_writer(
260
383
  "table-svg" if table_svg else None,
261
384
  truncated=truncated,
262
- max_cell_len=max_cell_len,
385
+ max_cell_len=effective_max_cell_len,
263
386
  )
264
387
  for chunk in writer.write(lf):
265
388
  sys.stdout.buffer.write(chunk)
@@ -278,7 +401,7 @@ def schema(
278
401
  else:
279
402
  reader = infer_reader(path, format=input)
280
403
  table_schema = reader.schema(path)
281
- console = Console(force_terminal=True)
404
+ console = Console()
282
405
  console.print(table_schema)
283
406
 
284
407
 
@@ -299,7 +422,7 @@ def summary(
299
422
  else:
300
423
  handler = infer_reader(path, format=input)
301
424
  table_summary = handler.summary(path)
302
- console = Console(force_terminal=True)
425
+ console = Console()
303
426
  console.print(table_summary)
304
427
 
305
428
 
@@ -320,6 +443,9 @@ def convert(
320
443
  if output is not None:
321
444
  writer = infer_writer(format=output)
322
445
  elif input is not None:
446
+ logger.debug(
447
+ f"Inferred convert output format '{input.lower()}' from stdin input format"
448
+ )
323
449
  writer = infer_writer(format=input)
324
450
  else:
325
451
  raise ValueError(
@@ -333,9 +459,15 @@ def convert(
333
459
  if output is not None:
334
460
  writer = infer_writer(format=output)
335
461
  elif input is not None:
462
+ logger.debug(
463
+ f"Inferred convert output format '{input.lower()}' from explicit input format override"
464
+ )
336
465
  writer = infer_writer(format=input)
337
466
  else:
338
467
  writer = reader
468
+ logger.debug(
469
+ f"Inferred convert output format '{reader.format.extension()}' from source '{src}'"
470
+ )
339
471
  assert isinstance(writer, TableWriter)
340
472
  lf = reader.read(src)
341
473
  lf = _apply_query(lf, sql=sql, jmespath_expr=jmespath_expr)
@@ -351,24 +483,14 @@ def cat(
351
483
  jmespath_expr: JmespathOpt = None,
352
484
  ) -> None:
353
485
  """Concatenate tabular data from multiple files, or just print a single file."""
354
- files: list[pl.LazyFrame] = []
355
- reader = None
356
- for path in paths:
357
- if is_stdin(path):
358
- files.append(read_stdin(format=input))
359
- else:
360
- if reader is None:
361
- reader = infer_reader(path, format=input)
362
- files.append(reader.read(path))
486
+ files, resolved_format = _resolve_cat_output_format(paths, input)
363
487
  lf = pl.concat(files, how="vertical")
364
488
  lf = _apply_query(lf, sql=sql, jmespath_expr=jmespath_expr)
365
489
  if output is not None:
366
490
  writer = infer_writer(format=output)
367
- elif reader is not None:
368
- writer = infer_writer(format=reader.format.extension())
369
- assert isinstance(writer, TableWriter)
370
- elif input is not None:
371
- writer = infer_writer(format=input)
491
+ elif resolved_format is not None:
492
+ logger.debug(f"Inferred `tab cat` output format '{resolved_format}' from input sources")
493
+ writer = infer_writer(format=resolved_format)
372
494
  assert isinstance(writer, TableWriter)
373
495
  else:
374
496
  raise ValueError(
@@ -3,6 +3,8 @@
3
3
  import json
4
4
  from dataclasses import dataclass, fields
5
5
  from pathlib import Path
6
+ from types import UnionType
7
+ from typing import Any, Union, get_args, get_origin
6
8
 
7
9
  from loguru import logger
8
10
 
@@ -15,6 +17,10 @@ class Config:
15
17
  """Global configuration settings."""
16
18
 
17
19
  az_url_authority_is_account: bool = False
20
+ default_num_view_rows: int = 20
21
+ log_level: str = "INFO"
22
+ max_cell_length: int | None = None
23
+ num_remote_workers: int = 8
18
24
  sampling_size_for_schema_inference: int = 32
19
25
 
20
26
 
@@ -22,6 +28,15 @@ class Config:
22
28
  config: Config = Config()
23
29
 
24
30
 
31
+ def _matches_type(value: Any, expected_type: Any) -> bool:
32
+ origin = get_origin(expected_type)
33
+ if origin in {UnionType, Union}:
34
+ return any(_matches_type(value, option) for option in get_args(expected_type))
35
+ if expected_type is type(None):
36
+ return value is None
37
+ return type(value) is expected_type
38
+
39
+
25
40
  def load_config_file(path: Path = CONFIG_FILE) -> None:
26
41
  """Load settings from a JSON config file into the global config.
27
42
 
@@ -29,6 +44,7 @@ def load_config_file(path: Path = CONFIG_FILE) -> None:
29
44
  If the file does not exist, this is a no-op.
30
45
  """
31
46
  if not path.is_file():
47
+ logger.debug(f"No config file found at {path}; using built-in defaults")
32
48
  return
33
49
 
34
50
  text = path.read_text(encoding="utf-8")
@@ -41,7 +57,13 @@ def load_config_file(path: Path = CONFIG_FILE) -> None:
41
57
  known = {f.name: f.type for f in fields(Config)}
42
58
  for key, value in data.items():
43
59
  if key not in known:
44
- logger.warning("Unknown config key '{}' in {}", key, path)
60
+ logger.warning(f"Unknown config key '{key}' in {path}")
45
61
  continue
62
+ expected_type = known[key]
63
+ expected_name = getattr(expected_type, "__name__", str(expected_type))
64
+ if _matches_type(value, expected_type) is False:
65
+ raise ValueError(
66
+ f"Config key '{key}' must be of type {expected_name}, got {type(value).__name__}"
67
+ )
46
68
  setattr(config, key, value)
47
- logger.debug("Loaded config from {}", path)
69
+ logger.debug(f"Loaded config from {path}")
@@ -2,7 +2,7 @@
2
2
 
3
3
  from collections.abc import Iterable
4
4
  from io import BytesIO
5
- from typing import BinaryIO
5
+ from typing import BinaryIO, Callable
6
6
 
7
7
  import polars as pl
8
8
  import polars_fastavro
@@ -32,7 +32,12 @@ class AvroFormat(FormatHandler):
32
32
  # polars_fastavro doesn't support storage_options
33
33
  return list(polars_fastavro.scan_avro(url).collect_schema().items())
34
34
 
35
- def count_rows(self, url: str, storage_options: dict[str, str] | None = None) -> int:
35
+ def count_rows(
36
+ self,
37
+ url: str,
38
+ storage_options: dict[str, str] | None = None,
39
+ opener: Callable[[str], BinaryIO] | None = None,
40
+ ) -> int:
36
41
  # polars_fastavro doesn't support storage_options
37
42
  return polars_fastavro.scan_avro(url).select(pl.len()).collect().item()
38
43
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  from abc import ABC, abstractmethod
4
4
  from collections.abc import Iterable
5
- from typing import BinaryIO
5
+ from typing import BinaryIO, Callable
6
6
 
7
7
  import polars as pl
8
8
 
@@ -44,7 +44,12 @@ class FormatHandler(ABC):
44
44
  pass
45
45
 
46
46
  @abstractmethod
47
- def count_rows(self, url: str, storage_options: dict[str, str] | None = None) -> int:
47
+ def count_rows(
48
+ self,
49
+ url: str,
50
+ storage_options: dict[str, str] | None = None,
51
+ opener: Callable[[str], BinaryIO] | None = None,
52
+ ) -> int:
48
53
  """Count rows in the file."""
49
54
  pass
50
55
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  from collections.abc import Iterable
4
4
  from io import BytesIO
5
- from typing import BinaryIO
5
+ from typing import BinaryIO, Callable
6
6
 
7
7
  import polars as pl
8
8
 
@@ -30,7 +30,12 @@ class CsvFormat(FormatHandler):
30
30
  def collect_schema(self, url: str, storage_options: dict[str, str] | None = None) -> list[tuple[str, pl.DataType]]:
31
31
  return list(pl.scan_csv(url, separator=self.separator, storage_options=storage_options).collect_schema().items())
32
32
 
33
- def count_rows(self, url: str, storage_options: dict[str, str] | None = None) -> int:
33
+ def count_rows(
34
+ self,
35
+ url: str,
36
+ storage_options: dict[str, str] | None = None,
37
+ opener: Callable[[str], BinaryIO] | None = None,
38
+ ) -> int:
34
39
  return pl.scan_csv(url, separator=self.separator, storage_options=storage_options).select(pl.len()).collect().item()
35
40
 
36
41
  def write(self, lf: pl.LazyFrame) -> Iterable[bytes]:
@@ -2,7 +2,7 @@
2
2
 
3
3
  import json
4
4
  from collections.abc import Iterable
5
- from typing import BinaryIO
5
+ from typing import BinaryIO, Callable
6
6
 
7
7
  import polars as pl
8
8
 
@@ -27,7 +27,12 @@ class JsonlFormat(FormatHandler):
27
27
  def collect_schema(self, url: str, storage_options: dict[str, str] | None = None) -> list[tuple[str, pl.DataType]]:
28
28
  return list(pl.scan_ndjson(url, storage_options=storage_options).collect_schema().items())
29
29
 
30
- def count_rows(self, url: str, storage_options: dict[str, str] | None = None) -> int:
30
+ def count_rows(
31
+ self,
32
+ url: str,
33
+ storage_options: dict[str, str] | None = None,
34
+ opener: Callable[[str], BinaryIO] | None = None,
35
+ ) -> int:
31
36
  return pl.scan_ndjson(url, storage_options=storage_options).select(pl.len()).collect().item()
32
37
 
33
38
  def write(self, lf: pl.LazyFrame) -> Iterable[bytes]:
@@ -2,10 +2,12 @@
2
2
 
3
3
  from collections.abc import Iterable
4
4
  from io import BytesIO
5
- from typing import BinaryIO
5
+ from typing import BinaryIO, Callable
6
6
 
7
7
  from loguru import logger
8
8
  import polars as pl
9
+ import pyarrow as pa
10
+ import pyarrow.parquet as pq
9
11
 
10
12
  from tab_cli.formats.base import FormatHandler
11
13
 
@@ -27,8 +29,7 @@ def _scan_parquet_with_pyarrow_fallback(
27
29
  return lf
28
30
  except Exception as e:
29
31
  logger.warning(
30
- "Polars native Parquet reader failed ({}), retrying with PyArrow backend",
31
- e,
32
+ f"Polars native Parquet reader failed ({e}), retrying with PyArrow backend"
32
33
  )
33
34
  return pl.read_parquet(url, storage_options=storage_options, use_pyarrow=True).lazy()
34
35
 
@@ -51,12 +52,20 @@ class ParquetFormat(FormatHandler):
51
52
  def collect_schema(self, url: str, storage_options: dict[str, str] | None = None) -> list[tuple[str, pl.DataType]]:
52
53
  return list(_scan_parquet_with_pyarrow_fallback(url, storage_options=storage_options).collect_schema().items())
53
54
 
54
- def count_rows(self, url: str, storage_options: dict[str, str] | None = None) -> int:
55
- return _scan_parquet_with_pyarrow_fallback(url, storage_options=storage_options).select(pl.len()).collect().item()
55
+ def count_rows(
56
+ self,
57
+ url: str,
58
+ storage_options: dict[str, str] | None = None,
59
+ opener: Callable[[str], BinaryIO] | None = None,
60
+ ) -> int:
61
+ if opener is not None:
62
+ with opener(url) as stream:
63
+ return pq.ParquetFile(pa.PythonFile(stream, mode="r")).metadata.num_rows
64
+ with open(url, "rb") as stream:
65
+ return pq.ParquetFile(pa.PythonFile(stream, mode="r")).metadata.num_rows
56
66
 
57
67
  def extra_summary(self, url: str) -> dict[str, str | int | float] | None:
58
- # TODO: Parquet metadata
59
- pass
68
+ return None
60
69
 
61
70
  def write(self, lf: pl.LazyFrame) -> Iterable[bytes]:
62
71
  output = BytesIO()