tab-cli 0.1.7__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.gitignore +0 -1
- {tab_cli-0.1.7 → tab_cli-0.1.8}/AGENTS.md +6 -2
- tab_cli-0.1.8/CHANGELOG.md +34 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/Makefile +3 -3
- {tab_cli-0.1.7 → tab_cli-0.1.8}/PKG-INFO +1 -1
- {tab_cli-0.1.7 → tab_cli-0.1.8}/docs/cli-ref.md +2 -2
- {tab_cli-0.1.7 → tab_cli-0.1.8}/docs/configuration.md +8 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/pyproject.toml +1 -1
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/__init__.py +1 -1
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/cli.py +156 -34
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/config.py +24 -2
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/formats/avro.py +7 -2
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/formats/base.py +7 -2
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/formats/csv.py +7 -2
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/formats/jsonl.py +7 -2
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/formats/parquet.py +16 -7
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/handlers/__init__.py +28 -4
- tab_cli-0.1.8/src/tab_cli/handlers/base.py +373 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/__init__.py +14 -4
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/aws.py +14 -15
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/az.py +11 -8
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/base.py +4 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/fsspec.py +22 -9
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/gcloud.py +17 -14
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/storage/local.py +12 -2
- tab_cli-0.1.8/tests/conftest.py +12 -0
- tab_cli-0.1.8/tests/test_cat.py +150 -0
- tab_cli-0.1.8/tests/test_config.py +181 -0
- tab_cli-0.1.8/tests/test_stdin.py +66 -0
- tab_cli-0.1.8/tests/test_storage.py +83 -0
- tab_cli-0.1.8/tests/test_summary.py +161 -0
- tab_cli-0.1.8/tests/test_view.py +192 -0
- tab_cli-0.1.8/uv.lock +2688 -0
- tab_cli-0.1.7/CHANGELOG.md +0 -20
- tab_cli-0.1.7/src/tab_cli/handlers/base.py +0 -259
- tab_cli-0.1.7/tests/test_cli.py +0 -357
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/.gitignore +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/282ad8cf3324b2679a7d460c0fc324adfa21dcfad2f197ac6991b98ec91f98495bb3ddb2cba36ce5dfa28a52063a373bb03f5d2e34f6e0c7b6b81b3046a4d7d0 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/891d7ed26f62b0f8757b081e9d76840636877f1613ee17b7695fe2ee8640258c56e01291934e3797728bc2300a1b5f41e3f3ef81ab532c0e5ab475bb9b6f097a +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/8926397375b8328137652c18cd4371808b214cb26864d77dd33c8eea895e10e62c297064c3d4c703d58ee11dee81bbf9851525e3a0c33151f54c164b8a4343b0 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/89d29d938e64b1295e138faedfa5df6f6729a67e5ba8e0c3fdd1c9266e0f59a4bc35b8a21e14225072317879af3323fa0457e01d9b08608770bc23957feec6c0 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/95fc2854c0fa2528b03e2bcabc7612f56f0bcb3d6f54a06293e21d1887885a449907b5b320a1a94bac712bf56fdad0a4184b17047c1fc179dfa83acab3f70d21 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/9a58ceae46de4649375b6b880b8500c85d34c8e9bc650dc4c993afb2fa8d45c4180331821c6226a0fb1475b04a0b9a849502647ef9c4dcac74769eb501c885fa +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/a19ae062208aa20b7310705af3d26ef53095a9dabfad080883cc7a32e98687063179db95cb2c71ef9064801c59fa46d261f172d83e83f96a39c274387b59dca4 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/d1169a8d10067493e42752c2a7615ff27f55bd90c38b91feef918958e29a2239ee289a0cde8385441d1f0fe9af1fb634fa6d56438b9f33ec382e81fa59d70b54 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/e171e5c6039c050c2368584a537d36df221b1f9f23d285c0399b95b14608d67006229823e6831bff7d8b0c2f9e86ebaec9c6811461119195f430ade055073fed +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/op_links/ee118dd19aac4e1cb354f83a37dadca70bc5f086cc8d36cd0059b222bf8c7250824401ff681518d0120f8db1f96f0025c464dcfc3b1ee28777e8c76325760134 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/282ad8cf3324b2679a7d460c0fc324adfa21dcfad2f197ac6991b98ec91f98495bb3ddb2cba36ce5dfa28a52063a373bb03f5d2e34f6e0c7b6b81b3046a4d7d0 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/891d7ed26f62b0f8757b081e9d76840636877f1613ee17b7695fe2ee8640258c56e01291934e3797728bc2300a1b5f41e3f3ef81ab532c0e5ab475bb9b6f097a +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/8926397375b8328137652c18cd4371808b214cb26864d77dd33c8eea895e10e62c297064c3d4c703d58ee11dee81bbf9851525e3a0c33151f54c164b8a4343b0 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/89d29d938e64b1295e138faedfa5df6f6729a67e5ba8e0c3fdd1c9266e0f59a4bc35b8a21e14225072317879af3323fa0457e01d9b08608770bc23957feec6c0 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/95fc2854c0fa2528b03e2bcabc7612f56f0bcb3d6f54a06293e21d1887885a449907b5b320a1a94bac712bf56fdad0a4184b17047c1fc179dfa83acab3f70d21 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/9a58ceae46de4649375b6b880b8500c85d34c8e9bc650dc4c993afb2fa8d45c4180331821c6226a0fb1475b04a0b9a849502647ef9c4dcac74769eb501c885fa +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/a19ae062208aa20b7310705af3d26ef53095a9dabfad080883cc7a32e98687063179db95cb2c71ef9064801c59fa46d261f172d83e83f96a39c274387b59dca4 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/d1169a8d10067493e42752c2a7615ff27f55bd90c38b91feef918958e29a2239ee289a0cde8385441d1f0fe9af1fb634fa6d56438b9f33ec382e81fa59d70b54 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/e171e5c6039c050c2368584a537d36df221b1f9f23d285c0399b95b14608d67006229823e6831bff7d8b0c2f9e86ebaec9c6811461119195f430ade055073fed +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/operations/ee118dd19aac4e1cb354f83a37dadca70bc5f086cc8d36cd0059b222bf8c7250824401ff681518d0120f8db1f96f0025c464dcfc3b1ee28777e8c76325760134 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/1029dd1c1f4430d8a667a0e48d0b817652c7ddca6f5ff56cac1755e5bb0c1cb7586935941c9b36f26cab05c0effe6154d073bd1dffbe37f22fed0a6e7d79201f +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/3228b9bef9d8374b5532b40df2da8be3bfc86de713bdad7fe620977ffa7c56db83928678caa792bf0d328db607028e045a9e41423ef7501e5b550651c3815ffe +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/424c9a53f5458b328d77ed6a943dc35662e949befa8725cfc7eead01a270417c8d07c1001b11623a088da8d2c9c34a41573314fa2394643147e4027e8a96a605 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/44107e81054aa5544b36eab8d811908a559b4d9027dc3fa1762c44e39551652199ef2a31f9bbcda79773c906151c12578cb18c43e0045de8b20c357272e1c62a +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/52ea57bde33e8ef4718d833c2df3cf0a9e90fdcd5715c0caad50b4e37ea60aca2b8c71096de6920dc936c40b6291e896293f91c7cfd2aa96cf6c2aa49ef662c8 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/5ea52f6d4771afda4747b9f44954102c02ae2d0686f8aa9eca36c29796bbba0d14e851c0dca0a6af17bfbbbac174e3be645ae708d958390168e85c64786fc9ef +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/86a1cfa113399acc7dd2dc90262a845558affb5e9373b6300dff68a485482c5e17ace9466bbc23b4301013ca1a27a577ca57ea838113f45bf321a64a242b1ad3 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/8ef99a12ae0624db198d9ecd83014fb8353c4731e0ba9a472f1fc339784308e38f1287d0765a7b0444f1a89c218c76a820dc4c9a3a39c1cedcd7423a4f5f88dc +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/cb3e8da2bf2b7efae4a6e8fd0b8562dacd16ca0531b173a91480a9e60ee795ac5bec13fc6eb461e03edc9a26f5ff1d5ba53521a1c1a6c1ee1765b544b7d7bf73 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/segments/d27ad326963b75736b636adad9fb812eb3f2871e0efb4bc7db37d4b701a4282911eaaee91bed3a759e940769b667be1ed66f2d7f2f41ac3906b87ab7eec19c3a +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/index/type +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_heads/heads/891d7ed26f62b0f8757b081e9d76840636877f1613ee17b7695fe2ee8640258c56e01291934e3797728bc2300a1b5f41e3f3ef81ab532c0e5ab475bb9b6f097a +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_heads/type +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/282ad8cf3324b2679a7d460c0fc324adfa21dcfad2f197ac6991b98ec91f98495bb3ddb2cba36ce5dfa28a52063a373bb03f5d2e34f6e0c7b6b81b3046a4d7d0 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/891d7ed26f62b0f8757b081e9d76840636877f1613ee17b7695fe2ee8640258c56e01291934e3797728bc2300a1b5f41e3f3ef81ab532c0e5ab475bb9b6f097a +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/8926397375b8328137652c18cd4371808b214cb26864d77dd33c8eea895e10e62c297064c3d4c703d58ee11dee81bbf9851525e3a0c33151f54c164b8a4343b0 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/89d29d938e64b1295e138faedfa5df6f6729a67e5ba8e0c3fdd1c9266e0f59a4bc35b8a21e14225072317879af3323fa0457e01d9b08608770bc23957feec6c0 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/95fc2854c0fa2528b03e2bcabc7612f56f0bcb3d6f54a06293e21d1887885a449907b5b320a1a94bac712bf56fdad0a4184b17047c1fc179dfa83acab3f70d21 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/9a58ceae46de4649375b6b880b8500c85d34c8e9bc650dc4c993afb2fa8d45c4180331821c6226a0fb1475b04a0b9a849502647ef9c4dcac74769eb501c885fa +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/a19ae062208aa20b7310705af3d26ef53095a9dabfad080883cc7a32e98687063179db95cb2c71ef9064801c59fa46d261f172d83e83f96a39c274387b59dca4 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/d1169a8d10067493e42752c2a7615ff27f55bd90c38b91feef918958e29a2239ee289a0cde8385441d1f0fe9af1fb634fa6d56438b9f33ec382e81fa59d70b54 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/e171e5c6039c050c2368584a537d36df221b1f9f23d285c0399b95b14608d67006229823e6831bff7d8b0c2f9e86ebaec9c6811461119195f430ade055073fed +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/operations/ee118dd19aac4e1cb354f83a37dadca70bc5f086cc8d36cd0059b222bf8c7250824401ff681518d0120f8db1f96f0025c464dcfc3b1ee28777e8c76325760134 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/type +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/123b0e36150cf8e99d644d2dfd1a7b0c8d2f676a78248d6902516d9ae58903665c79ec3f5f6729b98a1237dcf2abc1d41e690ae0ad30888c84786bcc9de5e314 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/2ac0b7d8b1fdfac82b3ff3926e0018f72ef2b48f85b41f5fa541271370e1197d41e37c2c6d62af0c6974658c4a9e5e945b8efcbcc7748bdd99bd9483f7e13e22 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/67b8396b301935ff624ff98952c57d8ee021e7d885e1220053b993e7bc822a4cf061298e9643ded745c55f3ed8e923a6731524129993d2664f48b60660761145 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/9c6fca696f77383cc87d068fe3f5912466b157dccc6465973e653dd4d2c02e2eca9c0725c071857bf3f4f0e263259eafc18f1739615a501e672bb2afd415316b +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/cae2e3e5952cb5ba93f27e3898d90dac6c41fc9e20c66ef0791e71b91dc103d924996c921c88179705264a224fb5d2bb29091fc8f18f0ea7a088aaabb859ea2a +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/cd8efb6da14127c81c37b56ead18a39b30f2cd154891185a6e906efb491dbf63f290eed3c31d4725f490a93208ffbe5cdc031d5b6de38fcb77cbb11f0357118f +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/e434359d6306a4f6997733b9b5308299984f05219c92e1b2a31f1203126be0fada5fc09a2ba86c98d1318981cb53997c7f8674ef25da6ca7bf9fd849598cd355 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/ea75e7ccb42f52b013dc1b45bb4e6d692b37c5e38bd39356ba2806be83ca5ca03ce20481db2788428126f93663e6723f99f4d46c5c705f5b12b70e2127ab15ba +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/fa5dec7ee06fbc6cbb2798c8e98bab482a6750776de41406fb06893c83f71f5015ec7ee2873642714b7bbc1c496f880e3acf44ffdabf2f87a39e0fbd68a4cc46 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/op_store/views/ffde172c6285c71851b22780e34962d8f9234067af59ff2dc38b74e905dc540fe35b944e8c0b2d4230dfcf778424a335494cd0aeea1bb4be2a11c4b5428ad465 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/14e3f15273e204cadfe38dd2f38cebd1343a6bfbb91c6af0f5f9de6e9003d8ec8b0eb676975af9dca2d06ee5e6b3886c3c5d3755c6f149b6b09172cccc35adff +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/42e0f32dd10ba6ae9f2297ca8ad0bd16337f14545b29f956ce380a7ab92bab771cbc9e04755752a6fa13231286f724379d6662b0fb257ef2cc2c52fe680eb95d +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/466a441e56afdde383cef1d6127ca1d4c825157feb65a59f8f6ff5fa7a523bd683d0b7a6bb16c00afff53a890319a472d98da9af3dea5675168a4d424aa7af32 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/482ae5a29fbe856c7272f2071b8b0f0359ee2d89ff392b8a900643fbd0836eccd067b8bf41909e206c90d45d6e7d8b6686b93ecaee5fe1a9060d87b672101310 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/5403fa06049419ddcf620ce0dc20911583e1b1062b42630ee325aa5ac2f918dd266dfed93ebc808a2aecb37ac3a33e251fe1396fd24b8ed566bfbd61a81ff959 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/56119fa0480cb66159978b1a8c9b031f9e978b7a3172eb87407a701fea34fdd90db7771cd433a2cf7e8a84a7b49c924f973eccb0eb05685ec42f36f7ef61cc06 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/90b1f4de4ba65e0652f1de45e4c84b623f99bd9d9453667e19c7040857bd397e59e7a84406f2ab6204d25789d995c47c350d8abc47a4bbb102059f3111f20028 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/9215bd4ac28fdecba111c63bace46d0f1c253ad3af44e0e74bd43d30757bef2e655538c1172c8f25769bce0a3c669b713440773c1859f0a136d9ca42501f2470 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/a79320f784ef97b3d6297e55a48b17a517a38d95d5c61ba8d01c59d68dcd2ccf3a96479f4fc3c4cafdcd56dc7bd58b1cb987e079764c1646533ab32418900727 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/abe6b7a350e1604cb8f5a2cd10cef13a019bd797770e6dc37414d33d98dcf36d8ec80a2ae13bd7dd2d2076772c0ffadec0b53ba72f4669b461fff2da5d30f1ba +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/b7689d54193a3798edd58d758966ad65ad57297c5276eab3e4ef07380779363efe9e462a149f4d42f55bbe004eb5ba88bf35df4c78ac975275530382390159d6 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/e75f5f3431d172d7e9434dfaef2be50812105b3ead73eeb10345c9b6892e9cbb5ee0602ebb0ceaf5ab87d22f45930dc30d136d0aac77310fb0261b3857ffde9b +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/extra/heads/56119fa0480cb66159978b1a8c9b031f9e978b7a3172eb87407a701fea34fdd90db7771cd433a2cf7e8a84a7b49c924f973eccb0eb05685ec42f36f7ef61cc06 +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/git_target +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/store/type +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/submodule_store/type +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/repo/workspace_store/index +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/working_copy/checkout +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/working_copy/tree_state +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/.jj/working_copy/type +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/LICENSE +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/README.md +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/docs/cloud.md +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/docs/gen_assets.sh +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/docs/index.md +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/mkdocs.yml +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/formats/__init__.py +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/handlers/cli_table.py +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/style.py +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/src/tab_cli/url_parser.py +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/tests/__init__.py +0 -0
- {tab_cli-0.1.7 → tab_cli-0.1.8}/tests/assets/test.csv +0 -0
|
@@ -43,7 +43,7 @@ Use it as the default operating guide when changing code in this repo.
|
|
|
43
43
|
- The CLI tests rely on `typer.testing.CliRunner`.
|
|
44
44
|
- Test data is stored under `tests/assets`.
|
|
45
45
|
- Existing tests emphasize user-visible CLI output, not internal implementation details.
|
|
46
|
-
-
|
|
46
|
+
- The CLI tests are split across focused files under `tests/`; extend the nearest existing file unless a new one is clearly warranted.
|
|
47
47
|
- Assert both `exit_code` and key output fragments.
|
|
48
48
|
- For stdin support, pass `"-"` as the path and provide `input=` to `runner.invoke(...)`.
|
|
49
49
|
|
|
@@ -89,6 +89,9 @@ Use it as the default operating guide when changing code in this repo.
|
|
|
89
89
|
|
|
90
90
|
## Types
|
|
91
91
|
|
|
92
|
+
- NEVER implicitly cast any variable to bool with `if var:` or `if not var:` unless the variable is already a bool. Do NOT rely on truthiness for control flow:
|
|
93
|
+
for example, testing if a list is empty with `if not my_list:` is not allowed. Instead, use explicit length checks like `if len(my_list) > 0:`.
|
|
94
|
+
always write `if x is not None:` or `if x is None:` when checking for `None` values.
|
|
92
95
|
- Type hints are used widely and should be preserved.
|
|
93
96
|
- Prefer modern built-in generics like `list[str]` and `dict[str, Any]`.
|
|
94
97
|
- Use `X | None` instead of `Optional[X]` in new code unless matching nearby style requires otherwise.
|
|
@@ -118,6 +121,7 @@ Use it as the default operating guide when changing code in this repo.
|
|
|
118
121
|
## Logging And Output
|
|
119
122
|
|
|
120
123
|
- The CLI configures Loguru with `RichHandler` in the Typer callback.
|
|
124
|
+
- When writing Loguru messages, use f-strings instead of Loguru brace-style formatting.
|
|
121
125
|
- User-facing table and summary output is rendered with Rich.
|
|
122
126
|
- Streaming command output usually writes bytes to `sys.stdout.buffer`.
|
|
123
127
|
- Keep stderr/stdout behavior consistent with the existing command design.
|
|
@@ -155,4 +159,4 @@ Use it as the default operating guide when changing code in this repo.
|
|
|
155
159
|
- Run `uv run pytest` for broader validation before finalizing cross-cutting changes.
|
|
156
160
|
- Mention pre-existing lint or type-check failures separately from regressions you introduce.
|
|
157
161
|
- Update CHANGELOG.md when necessary.
|
|
158
|
-
-
|
|
162
|
+
-
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
- 0.1.8:
|
|
2
|
+
- Improved `tab view` performance for partitioned directories by reading only as many early partitions as needed for an unfiltered preview.
|
|
3
|
+
- Added glob-pattern support for multi-file inputs such as `s3://.../date=*/*.parquet`.
|
|
4
|
+
- Speed up Parquet row counting in summaries by reading footer metadata instead of scanning file contents.
|
|
5
|
+
- Fixed S3 Polars `storage_options` to avoid nested `client_kwargs` values that could break native reads.
|
|
6
|
+
- Added `default_num_view_rows` config so the default `tab view` preview size can be customized.
|
|
7
|
+
- Added `log_level` config so the CLI log level can default from `~/.config/tab/config.json` when `--log-level` is omitted.
|
|
8
|
+
- Added `max_cell_length` config so `tab view` can default to truncating long cell values without passing `--max-cell-len` every time.
|
|
9
|
+
- Added `num_remote_workers` config to parallelize remote per-partition summary row counting.
|
|
10
|
+
- Validated config file value types instead of silently accepting invalid JSON types.
|
|
11
|
+
- Fixed the developer `Makefile` targets to point at `src/tab_cli`.
|
|
12
|
+
- Tightened multi-file summary validation to reject inconsistent schemas, not just mismatched column counts.
|
|
13
|
+
- `tab cat` now rejects mixed input formats with a clear error instead of reusing the first reader implicitly.
|
|
14
|
+
- Cleaned up package metadata and repository hygiene issues including version drift and `uv.lock` ignore rules.
|
|
15
|
+
- 0.1.7:
|
|
16
|
+
- Optional dependency groups are now named `tab-cli[s3|gs|az]`, in accordance with the protocol name.
|
|
17
|
+
- 0.1.6:
|
|
18
|
+
- Fixed bug in pyarrow loading of Parquet files.
|
|
19
|
+
- Added global config file support: settings can be persisted in `~/.config/tab/config.json`. Config file values serve as defaults that CLI flags override.
|
|
20
|
+
- 0.1.5:
|
|
21
|
+
- Added stdin support: use `-` as the file path to read from stdin (e.g. `cat data.csv | tab view -i csv -`). Requires `-i`/`--input-format` since format cannot be inferred. Works with `view`, `schema`, `summary`, `convert`, and `cat`.
|
|
22
|
+
- Added row-wise JMESPath queries via `--jmespath` / `--jp` on `view`, `convert`, and `cat`. Object results become columns; non-object results go into a `value` column. `--sql` and `--jp` are mutually exclusive.
|
|
23
|
+
- Implemented `--jp` with `LazyFrame.map_batches(...)` so row reshaping stays batch-oriented instead of materializing the full transformed dataset up front.
|
|
24
|
+
- 0.1.4:
|
|
25
|
+
- Removed `tab sql` subcommand; SQL is now a `--sql` option on `tab view`, `tab convert`, and `tab cat`.
|
|
26
|
+
- Automatic PyArrow fallback for Parquet files that fail to read with Polars' native reader.
|
|
27
|
+
- 0.1.3:
|
|
28
|
+
- Separate `tab view` from `tab cat`: `tab view` does not convert formats, `tab cat` does.
|
|
29
|
+
- Added `--max-cell-len` option to `tab view` to truncate long cell contents.
|
|
30
|
+
- 0.1.2:
|
|
31
|
+
- Bugfix on reading directories.
|
|
32
|
+
- 0.1.1:
|
|
33
|
+
- Better credential handling for Azure Blob Storage and Google Cloud Storage.
|
|
34
|
+
- 0.1.0: Initial release
|
|
@@ -13,13 +13,13 @@ clean:
|
|
|
13
13
|
find . -type d -name __pycache__ -exec rm -rf {} +
|
|
14
14
|
|
|
15
15
|
lint:
|
|
16
|
-
uv run ruff check tab_cli
|
|
16
|
+
uv run ruff check src/tab_cli tests
|
|
17
17
|
|
|
18
18
|
format:
|
|
19
|
-
uv run ruff format tab_cli
|
|
19
|
+
uv run ruff format src/tab_cli tests
|
|
20
20
|
|
|
21
21
|
typecheck:
|
|
22
|
-
uv run ty check tab_cli
|
|
22
|
+
uv run ty check src/tab_cli
|
|
23
23
|
|
|
24
24
|
test:
|
|
25
25
|
uv run pytest
|
|
@@ -17,7 +17,7 @@ Options:
|
|
|
17
17
|
| `--jmespath` / `--jp` | JMESPath expression to apply to each row as JSON. Object outputs become columns; non-object outputs go to a `value` column. The result shape must stay consistent across rows. |
|
|
18
18
|
| `--limit` | Maximum number of rows to display. |
|
|
19
19
|
| `--skip` | Number of rows to skip from the beginning. |
|
|
20
|
-
| `--max-cell-len` | Truncate cell contents longer than this.
|
|
20
|
+
| `--max-cell-len` | Truncate cell contents longer than this. If omitted, `max_cell_length` from config is used when set. |
|
|
21
21
|
|
|
22
22
|
## `tab schema`
|
|
23
23
|
|
|
@@ -91,4 +91,4 @@ Options:
|
|
|
91
91
|
| Option | Description |
|
|
92
92
|
|-------------------------|------------------------------------------------------------------------------------------------------------------------------|
|
|
93
93
|
| `--az-url-authority-is-account` | Interpret az:// URL authority as storage account name instead of container name. See [azure.md](Azure) for more information. |
|
|
94
|
-
| `--log-level` | Log level from `{DEBUG, INFO, WARNING, ERROR, CRITICAL}`.
|
|
94
|
+
| `--log-level` | Log level from `{DEBUG, INFO, WARNING, ERROR, CRITICAL}`. If omitted, uses `log_level` from config. |
|
|
@@ -11,6 +11,10 @@ mkdir -p ~/.config/tab
|
|
|
11
11
|
cat > ~/.config/tab/config.json << 'EOF'
|
|
12
12
|
{
|
|
13
13
|
"az_url_authority_is_account": false,
|
|
14
|
+
"default_num_view_rows": 20,
|
|
15
|
+
"log_level": "INFO",
|
|
16
|
+
"max_cell_length": null,
|
|
17
|
+
"num_remote_workers": 8,
|
|
14
18
|
"sampling_size_for_schema_inference": 32
|
|
15
19
|
}
|
|
16
20
|
EOF
|
|
@@ -21,6 +25,10 @@ EOF
|
|
|
21
25
|
| Key | Type | Default | Description |
|
|
22
26
|
|-----|------|---------|-------------|
|
|
23
27
|
| `az_url_authority_is_account` | `bool` | `false` | Interpret `az://` URL authority as storage account name instead of container name. |
|
|
28
|
+
| `default_num_view_rows` | `int` | `20` | Default number of rows shown by `tab view` when `--limit` is omitted. |
|
|
29
|
+
| `log_level` | `str` | `"INFO"` | Default CLI log level when `--log-level` is omitted. |
|
|
30
|
+
| `max_cell_length` | `int \| null` | `null` | Default maximum cell length for `tab view`. The CLI `--max-cell-len` flag overrides it. |
|
|
31
|
+
| `num_remote_workers` | `int` | `8` | Maximum worker threads for remote per-partition summary work such as Parquet row counts. |
|
|
24
32
|
| `sampling_size_for_schema_inference` | `int` | `32` | Number of rows sampled for schema inference (e.g. when using `--jp`). |
|
|
25
33
|
|
|
26
34
|
## Precedence
|
|
@@ -79,6 +79,8 @@ app = typer.Typer(
|
|
|
79
79
|
no_args_is_help=True,
|
|
80
80
|
)
|
|
81
81
|
|
|
82
|
+
DEFAULT_VIEW_TRUNCATION_PROBE_ROWS = 1
|
|
83
|
+
|
|
82
84
|
|
|
83
85
|
@app.callback()
|
|
84
86
|
def main_callback(
|
|
@@ -90,13 +92,18 @@ def main_callback(
|
|
|
90
92
|
),
|
|
91
93
|
] = False,
|
|
92
94
|
log_level: Annotated[
|
|
93
|
-
str,
|
|
95
|
+
str | None,
|
|
94
96
|
typer.Option(
|
|
95
|
-
"--log-level",
|
|
97
|
+
"--log-level",
|
|
98
|
+
help="Log level from {DEBUG, INFO, WARNING, ERROR, CRITICAL}; defaults to config when omitted",
|
|
96
99
|
),
|
|
97
|
-
] =
|
|
100
|
+
] = None,
|
|
98
101
|
) -> None:
|
|
99
102
|
"""Global options for tab_cli CLI."""
|
|
103
|
+
load_config_file()
|
|
104
|
+
effective_log_level = (
|
|
105
|
+
log_level.upper() if log_level is not None else config.config.log_level.upper()
|
|
106
|
+
)
|
|
100
107
|
logger.remove()
|
|
101
108
|
logger.add(
|
|
102
109
|
RichHandler(
|
|
@@ -105,9 +112,8 @@ def main_callback(
|
|
|
105
112
|
markup=True,
|
|
106
113
|
),
|
|
107
114
|
format="{message}",
|
|
108
|
-
level=
|
|
115
|
+
level=effective_log_level,
|
|
109
116
|
)
|
|
110
|
-
load_config_file()
|
|
111
117
|
# CLI flags override config file values
|
|
112
118
|
if az_url_authority_is_account:
|
|
113
119
|
config.config.az_url_authority_is_account = az_url_authority_is_account
|
|
@@ -176,12 +182,20 @@ def _apply_jmespath(lf: pl.LazyFrame, expression: str) -> pl.LazyFrame:
|
|
|
176
182
|
|
|
177
183
|
compiled = jmespath.compile(expression)
|
|
178
184
|
sample_df = lf.slice(0, Config.sampling_size_for_schema_inference).collect()
|
|
185
|
+
logger.debug(
|
|
186
|
+
"Inferring JMESPath output schema from "
|
|
187
|
+
f"{Config.sampling_size_for_schema_inference} sampled row(s)"
|
|
188
|
+
)
|
|
179
189
|
if sample_df.is_empty():
|
|
190
|
+
logger.debug("JMESPath schema inference sample was empty; returning empty LazyFrame")
|
|
180
191
|
return pl.DataFrame().lazy()
|
|
181
192
|
|
|
182
193
|
transformed_sample, result_mode = _transform_jmespath_batch(sample_df, compiled)
|
|
183
194
|
output_schema = transformed_sample.schema
|
|
184
195
|
expected_columns = tuple(transformed_sample.columns)
|
|
196
|
+
logger.debug(
|
|
197
|
+
f"Inferred JMESPath result mode '{result_mode}' with columns {expected_columns}"
|
|
198
|
+
)
|
|
185
199
|
|
|
186
200
|
return lf.map_batches(
|
|
187
201
|
lambda batch: _transform_jmespath_batch(
|
|
@@ -223,16 +237,119 @@ def _apply_limit(
|
|
|
223
237
|
and returns whether the data was truncated.
|
|
224
238
|
"""
|
|
225
239
|
if limit is None and default_limit is not None:
|
|
240
|
+
logger.debug(
|
|
241
|
+
f"Applying inferred default row limit {default_limit} with skip {skip}"
|
|
242
|
+
)
|
|
226
243
|
lf = lf.slice(skip, length=default_limit + 1)
|
|
227
244
|
df = lf.collect()
|
|
228
245
|
truncated = len(df) > default_limit
|
|
229
246
|
if truncated:
|
|
230
247
|
df = df.head(default_limit)
|
|
248
|
+
logger.debug("Detected truncated preview after applying inferred default row limit")
|
|
231
249
|
return df.lazy(), truncated
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
250
|
+
if skip > 0 or limit is not None:
|
|
251
|
+
lf = lf.slice(skip, length=limit)
|
|
252
|
+
return lf, False
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _read_source(path: str, input_format: str | None) -> tuple[pl.LazyFrame, str | None]:
|
|
256
|
+
"""Read a source path and return its LazyFrame and inferred format."""
|
|
257
|
+
if is_stdin(path):
|
|
258
|
+
logger.debug(
|
|
259
|
+
"Using stdin source with explicit format "
|
|
260
|
+
f"'{input_format.lower() if input_format is not None else None}'"
|
|
261
|
+
)
|
|
262
|
+
return (
|
|
263
|
+
read_stdin(format=input_format),
|
|
264
|
+
input_format.lower() if input_format is not None else None,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
reader = infer_reader(path, format=input_format)
|
|
268
|
+
logger.debug(
|
|
269
|
+
f"Read source '{path}' using inferred format '{reader.format.extension()}'"
|
|
270
|
+
)
|
|
271
|
+
return reader.read(path), reader.format.extension()
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _prepare_view_frame(
|
|
275
|
+
path: str,
|
|
276
|
+
input_format: str | None,
|
|
277
|
+
sql: str | None,
|
|
278
|
+
jmespath_expr: str | None,
|
|
279
|
+
limit: int | None,
|
|
280
|
+
skip: int,
|
|
281
|
+
) -> tuple[pl.LazyFrame, bool]:
|
|
282
|
+
"""Prepare the LazyFrame used by `tab view` and report truncation."""
|
|
283
|
+
default_view_rows = config.config.default_num_view_rows
|
|
284
|
+
|
|
285
|
+
if is_stdin(path):
|
|
286
|
+
logger.debug("Preparing view for stdin input")
|
|
287
|
+
lf = read_stdin(format=input_format)
|
|
288
|
+
lf = _apply_query(lf, sql=sql, jmespath_expr=jmespath_expr)
|
|
289
|
+
return _apply_limit(
|
|
290
|
+
lf,
|
|
291
|
+
limit=limit,
|
|
292
|
+
skip=skip,
|
|
293
|
+
default_limit=default_view_rows if limit is None else None,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
reader = infer_reader(path, format=input_format)
|
|
297
|
+
if sql is None and jmespath_expr is None:
|
|
298
|
+
preview_limit = (
|
|
299
|
+
limit
|
|
300
|
+
if limit is not None
|
|
301
|
+
else default_view_rows + DEFAULT_VIEW_TRUNCATION_PROBE_ROWS
|
|
302
|
+
)
|
|
303
|
+
logger.debug(
|
|
304
|
+
f"Using preview read for '{path}' with inferred preview limit "
|
|
305
|
+
f"{preview_limit} and skip {skip}"
|
|
306
|
+
)
|
|
307
|
+
lf = reader.read_preview(path, limit=preview_limit, offset=skip)
|
|
308
|
+
if limit is not None:
|
|
309
|
+
return lf, False
|
|
310
|
+
|
|
311
|
+
df = lf.collect()
|
|
312
|
+
truncated = len(df) > default_view_rows
|
|
313
|
+
if truncated:
|
|
314
|
+
df = df.head(default_view_rows)
|
|
315
|
+
return df.lazy(), truncated
|
|
316
|
+
|
|
317
|
+
logger.debug(f"Using full read for '{path}' because a query transform was provided")
|
|
318
|
+
lf = reader.read(path)
|
|
319
|
+
lf = _apply_query(lf, sql=sql, jmespath_expr=jmespath_expr)
|
|
320
|
+
return _apply_limit(
|
|
321
|
+
lf,
|
|
322
|
+
limit=limit,
|
|
323
|
+
skip=skip,
|
|
324
|
+
default_limit=default_view_rows if limit is None else None,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _resolve_cat_output_format(
|
|
329
|
+
paths: list[str],
|
|
330
|
+
input_format: str | None,
|
|
331
|
+
) -> tuple[list[pl.LazyFrame], str | None]:
|
|
332
|
+
"""Read all inputs for `tab cat` and validate format consistency."""
|
|
333
|
+
files: list[pl.LazyFrame] = []
|
|
334
|
+
resolved_format = input_format.lower() if input_format is not None else None
|
|
335
|
+
if resolved_format is not None:
|
|
336
|
+
logger.debug(f"Using explicit shared input format '{resolved_format}' for `tab cat`")
|
|
337
|
+
|
|
338
|
+
for path in paths:
|
|
339
|
+
lf, current_format = _read_source(path, input_format)
|
|
340
|
+
if current_format is not None:
|
|
341
|
+
if resolved_format is None:
|
|
342
|
+
resolved_format = current_format
|
|
343
|
+
logger.debug(
|
|
344
|
+
f"Inferred shared `tab cat` format '{resolved_format}' from '{path}'"
|
|
345
|
+
)
|
|
346
|
+
elif current_format != resolved_format:
|
|
347
|
+
raise ValueError(
|
|
348
|
+
"All inputs to `tab cat` must use the same format unless -i/--input-format is provided"
|
|
349
|
+
)
|
|
350
|
+
files.append(lf)
|
|
351
|
+
|
|
352
|
+
return files, resolved_format
|
|
236
353
|
|
|
237
354
|
|
|
238
355
|
@app.command()
|
|
@@ -247,19 +364,25 @@ def view(
|
|
|
247
364
|
table_svg: TableSvgOpt = False,
|
|
248
365
|
) -> None:
|
|
249
366
|
"""View tabular data as a formatted table."""
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
367
|
+
effective_max_cell_len = (
|
|
368
|
+
max_cell_len if max_cell_len is not None else config.config.max_cell_length
|
|
369
|
+
)
|
|
370
|
+
if max_cell_len is None and effective_max_cell_len is not None:
|
|
371
|
+
logger.debug(
|
|
372
|
+
f"Inferred max_cell_len={effective_max_cell_len} for `tab view` from config"
|
|
373
|
+
)
|
|
374
|
+
lf, truncated = _prepare_view_frame(
|
|
375
|
+
path,
|
|
376
|
+
input_format=input,
|
|
377
|
+
sql=sql,
|
|
378
|
+
jmespath_expr=jmespath_expr,
|
|
379
|
+
limit=limit,
|
|
380
|
+
skip=skip,
|
|
258
381
|
)
|
|
259
382
|
writer = infer_writer(
|
|
260
383
|
"table-svg" if table_svg else None,
|
|
261
384
|
truncated=truncated,
|
|
262
|
-
max_cell_len=
|
|
385
|
+
max_cell_len=effective_max_cell_len,
|
|
263
386
|
)
|
|
264
387
|
for chunk in writer.write(lf):
|
|
265
388
|
sys.stdout.buffer.write(chunk)
|
|
@@ -278,7 +401,7 @@ def schema(
|
|
|
278
401
|
else:
|
|
279
402
|
reader = infer_reader(path, format=input)
|
|
280
403
|
table_schema = reader.schema(path)
|
|
281
|
-
console = Console(
|
|
404
|
+
console = Console()
|
|
282
405
|
console.print(table_schema)
|
|
283
406
|
|
|
284
407
|
|
|
@@ -299,7 +422,7 @@ def summary(
|
|
|
299
422
|
else:
|
|
300
423
|
handler = infer_reader(path, format=input)
|
|
301
424
|
table_summary = handler.summary(path)
|
|
302
|
-
console = Console(
|
|
425
|
+
console = Console()
|
|
303
426
|
console.print(table_summary)
|
|
304
427
|
|
|
305
428
|
|
|
@@ -320,6 +443,9 @@ def convert(
|
|
|
320
443
|
if output is not None:
|
|
321
444
|
writer = infer_writer(format=output)
|
|
322
445
|
elif input is not None:
|
|
446
|
+
logger.debug(
|
|
447
|
+
f"Inferred convert output format '{input.lower()}' from stdin input format"
|
|
448
|
+
)
|
|
323
449
|
writer = infer_writer(format=input)
|
|
324
450
|
else:
|
|
325
451
|
raise ValueError(
|
|
@@ -333,9 +459,15 @@ def convert(
|
|
|
333
459
|
if output is not None:
|
|
334
460
|
writer = infer_writer(format=output)
|
|
335
461
|
elif input is not None:
|
|
462
|
+
logger.debug(
|
|
463
|
+
f"Inferred convert output format '{input.lower()}' from explicit input format override"
|
|
464
|
+
)
|
|
336
465
|
writer = infer_writer(format=input)
|
|
337
466
|
else:
|
|
338
467
|
writer = reader
|
|
468
|
+
logger.debug(
|
|
469
|
+
f"Inferred convert output format '{reader.format.extension()}' from source '{src}'"
|
|
470
|
+
)
|
|
339
471
|
assert isinstance(writer, TableWriter)
|
|
340
472
|
lf = reader.read(src)
|
|
341
473
|
lf = _apply_query(lf, sql=sql, jmespath_expr=jmespath_expr)
|
|
@@ -351,24 +483,14 @@ def cat(
|
|
|
351
483
|
jmespath_expr: JmespathOpt = None,
|
|
352
484
|
) -> None:
|
|
353
485
|
"""Concatenate tabular data from multiple files, or just print a single file."""
|
|
354
|
-
files
|
|
355
|
-
reader = None
|
|
356
|
-
for path in paths:
|
|
357
|
-
if is_stdin(path):
|
|
358
|
-
files.append(read_stdin(format=input))
|
|
359
|
-
else:
|
|
360
|
-
if reader is None:
|
|
361
|
-
reader = infer_reader(path, format=input)
|
|
362
|
-
files.append(reader.read(path))
|
|
486
|
+
files, resolved_format = _resolve_cat_output_format(paths, input)
|
|
363
487
|
lf = pl.concat(files, how="vertical")
|
|
364
488
|
lf = _apply_query(lf, sql=sql, jmespath_expr=jmespath_expr)
|
|
365
489
|
if output is not None:
|
|
366
490
|
writer = infer_writer(format=output)
|
|
367
|
-
elif
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
elif input is not None:
|
|
371
|
-
writer = infer_writer(format=input)
|
|
491
|
+
elif resolved_format is not None:
|
|
492
|
+
logger.debug(f"Inferred `tab cat` output format '{resolved_format}' from input sources")
|
|
493
|
+
writer = infer_writer(format=resolved_format)
|
|
372
494
|
assert isinstance(writer, TableWriter)
|
|
373
495
|
else:
|
|
374
496
|
raise ValueError(
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
import json
|
|
4
4
|
from dataclasses import dataclass, fields
|
|
5
5
|
from pathlib import Path
|
|
6
|
+
from types import UnionType
|
|
7
|
+
from typing import Any, Union, get_args, get_origin
|
|
6
8
|
|
|
7
9
|
from loguru import logger
|
|
8
10
|
|
|
@@ -15,6 +17,10 @@ class Config:
|
|
|
15
17
|
"""Global configuration settings."""
|
|
16
18
|
|
|
17
19
|
az_url_authority_is_account: bool = False
|
|
20
|
+
default_num_view_rows: int = 20
|
|
21
|
+
log_level: str = "INFO"
|
|
22
|
+
max_cell_length: int | None = None
|
|
23
|
+
num_remote_workers: int = 8
|
|
18
24
|
sampling_size_for_schema_inference: int = 32
|
|
19
25
|
|
|
20
26
|
|
|
@@ -22,6 +28,15 @@ class Config:
|
|
|
22
28
|
config: Config = Config()
|
|
23
29
|
|
|
24
30
|
|
|
31
|
+
def _matches_type(value: Any, expected_type: Any) -> bool:
|
|
32
|
+
origin = get_origin(expected_type)
|
|
33
|
+
if origin in {UnionType, Union}:
|
|
34
|
+
return any(_matches_type(value, option) for option in get_args(expected_type))
|
|
35
|
+
if expected_type is type(None):
|
|
36
|
+
return value is None
|
|
37
|
+
return type(value) is expected_type
|
|
38
|
+
|
|
39
|
+
|
|
25
40
|
def load_config_file(path: Path = CONFIG_FILE) -> None:
|
|
26
41
|
"""Load settings from a JSON config file into the global config.
|
|
27
42
|
|
|
@@ -29,6 +44,7 @@ def load_config_file(path: Path = CONFIG_FILE) -> None:
|
|
|
29
44
|
If the file does not exist, this is a no-op.
|
|
30
45
|
"""
|
|
31
46
|
if not path.is_file():
|
|
47
|
+
logger.debug(f"No config file found at {path}; using built-in defaults")
|
|
32
48
|
return
|
|
33
49
|
|
|
34
50
|
text = path.read_text(encoding="utf-8")
|
|
@@ -41,7 +57,13 @@ def load_config_file(path: Path = CONFIG_FILE) -> None:
|
|
|
41
57
|
known = {f.name: f.type for f in fields(Config)}
|
|
42
58
|
for key, value in data.items():
|
|
43
59
|
if key not in known:
|
|
44
|
-
logger.warning("Unknown config key '{}' in {}"
|
|
60
|
+
logger.warning(f"Unknown config key '{key}' in {path}")
|
|
45
61
|
continue
|
|
62
|
+
expected_type = known[key]
|
|
63
|
+
expected_name = getattr(expected_type, "__name__", str(expected_type))
|
|
64
|
+
if _matches_type(value, expected_type) is False:
|
|
65
|
+
raise ValueError(
|
|
66
|
+
f"Config key '{key}' must be of type {expected_name}, got {type(value).__name__}"
|
|
67
|
+
)
|
|
46
68
|
setattr(config, key, value)
|
|
47
|
-
logger.debug("Loaded config from {}"
|
|
69
|
+
logger.debug(f"Loaded config from {path}")
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from collections.abc import Iterable
|
|
4
4
|
from io import BytesIO
|
|
5
|
-
from typing import BinaryIO
|
|
5
|
+
from typing import BinaryIO, Callable
|
|
6
6
|
|
|
7
7
|
import polars as pl
|
|
8
8
|
import polars_fastavro
|
|
@@ -32,7 +32,12 @@ class AvroFormat(FormatHandler):
|
|
|
32
32
|
# polars_fastavro doesn't support storage_options
|
|
33
33
|
return list(polars_fastavro.scan_avro(url).collect_schema().items())
|
|
34
34
|
|
|
35
|
-
def count_rows(
|
|
35
|
+
def count_rows(
|
|
36
|
+
self,
|
|
37
|
+
url: str,
|
|
38
|
+
storage_options: dict[str, str] | None = None,
|
|
39
|
+
opener: Callable[[str], BinaryIO] | None = None,
|
|
40
|
+
) -> int:
|
|
36
41
|
# polars_fastavro doesn't support storage_options
|
|
37
42
|
return polars_fastavro.scan_avro(url).select(pl.len()).collect().item()
|
|
38
43
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
4
|
from collections.abc import Iterable
|
|
5
|
-
from typing import BinaryIO
|
|
5
|
+
from typing import BinaryIO, Callable
|
|
6
6
|
|
|
7
7
|
import polars as pl
|
|
8
8
|
|
|
@@ -44,7 +44,12 @@ class FormatHandler(ABC):
|
|
|
44
44
|
pass
|
|
45
45
|
|
|
46
46
|
@abstractmethod
|
|
47
|
-
def count_rows(
|
|
47
|
+
def count_rows(
|
|
48
|
+
self,
|
|
49
|
+
url: str,
|
|
50
|
+
storage_options: dict[str, str] | None = None,
|
|
51
|
+
opener: Callable[[str], BinaryIO] | None = None,
|
|
52
|
+
) -> int:
|
|
48
53
|
"""Count rows in the file."""
|
|
49
54
|
pass
|
|
50
55
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from collections.abc import Iterable
|
|
4
4
|
from io import BytesIO
|
|
5
|
-
from typing import BinaryIO
|
|
5
|
+
from typing import BinaryIO, Callable
|
|
6
6
|
|
|
7
7
|
import polars as pl
|
|
8
8
|
|
|
@@ -30,7 +30,12 @@ class CsvFormat(FormatHandler):
|
|
|
30
30
|
def collect_schema(self, url: str, storage_options: dict[str, str] | None = None) -> list[tuple[str, pl.DataType]]:
|
|
31
31
|
return list(pl.scan_csv(url, separator=self.separator, storage_options=storage_options).collect_schema().items())
|
|
32
32
|
|
|
33
|
-
def count_rows(
|
|
33
|
+
def count_rows(
|
|
34
|
+
self,
|
|
35
|
+
url: str,
|
|
36
|
+
storage_options: dict[str, str] | None = None,
|
|
37
|
+
opener: Callable[[str], BinaryIO] | None = None,
|
|
38
|
+
) -> int:
|
|
34
39
|
return pl.scan_csv(url, separator=self.separator, storage_options=storage_options).select(pl.len()).collect().item()
|
|
35
40
|
|
|
36
41
|
def write(self, lf: pl.LazyFrame) -> Iterable[bytes]:
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
from collections.abc import Iterable
|
|
5
|
-
from typing import BinaryIO
|
|
5
|
+
from typing import BinaryIO, Callable
|
|
6
6
|
|
|
7
7
|
import polars as pl
|
|
8
8
|
|
|
@@ -27,7 +27,12 @@ class JsonlFormat(FormatHandler):
|
|
|
27
27
|
def collect_schema(self, url: str, storage_options: dict[str, str] | None = None) -> list[tuple[str, pl.DataType]]:
|
|
28
28
|
return list(pl.scan_ndjson(url, storage_options=storage_options).collect_schema().items())
|
|
29
29
|
|
|
30
|
-
def count_rows(
|
|
30
|
+
def count_rows(
|
|
31
|
+
self,
|
|
32
|
+
url: str,
|
|
33
|
+
storage_options: dict[str, str] | None = None,
|
|
34
|
+
opener: Callable[[str], BinaryIO] | None = None,
|
|
35
|
+
) -> int:
|
|
31
36
|
return pl.scan_ndjson(url, storage_options=storage_options).select(pl.len()).collect().item()
|
|
32
37
|
|
|
33
38
|
def write(self, lf: pl.LazyFrame) -> Iterable[bytes]:
|
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
from collections.abc import Iterable
|
|
4
4
|
from io import BytesIO
|
|
5
|
-
from typing import BinaryIO
|
|
5
|
+
from typing import BinaryIO, Callable
|
|
6
6
|
|
|
7
7
|
from loguru import logger
|
|
8
8
|
import polars as pl
|
|
9
|
+
import pyarrow as pa
|
|
10
|
+
import pyarrow.parquet as pq
|
|
9
11
|
|
|
10
12
|
from tab_cli.formats.base import FormatHandler
|
|
11
13
|
|
|
@@ -27,8 +29,7 @@ def _scan_parquet_with_pyarrow_fallback(
|
|
|
27
29
|
return lf
|
|
28
30
|
except Exception as e:
|
|
29
31
|
logger.warning(
|
|
30
|
-
"Polars native Parquet reader failed ({}), retrying with PyArrow backend"
|
|
31
|
-
e,
|
|
32
|
+
f"Polars native Parquet reader failed ({e}), retrying with PyArrow backend"
|
|
32
33
|
)
|
|
33
34
|
return pl.read_parquet(url, storage_options=storage_options, use_pyarrow=True).lazy()
|
|
34
35
|
|
|
@@ -51,12 +52,20 @@ class ParquetFormat(FormatHandler):
|
|
|
51
52
|
def collect_schema(self, url: str, storage_options: dict[str, str] | None = None) -> list[tuple[str, pl.DataType]]:
|
|
52
53
|
return list(_scan_parquet_with_pyarrow_fallback(url, storage_options=storage_options).collect_schema().items())
|
|
53
54
|
|
|
54
|
-
def count_rows(
|
|
55
|
-
|
|
55
|
+
def count_rows(
|
|
56
|
+
self,
|
|
57
|
+
url: str,
|
|
58
|
+
storage_options: dict[str, str] | None = None,
|
|
59
|
+
opener: Callable[[str], BinaryIO] | None = None,
|
|
60
|
+
) -> int:
|
|
61
|
+
if opener is not None:
|
|
62
|
+
with opener(url) as stream:
|
|
63
|
+
return pq.ParquetFile(pa.PythonFile(stream, mode="r")).metadata.num_rows
|
|
64
|
+
with open(url, "rb") as stream:
|
|
65
|
+
return pq.ParquetFile(pa.PythonFile(stream, mode="r")).metadata.num_rows
|
|
56
66
|
|
|
57
67
|
def extra_summary(self, url: str) -> dict[str, str | int | float] | None:
|
|
58
|
-
|
|
59
|
-
pass
|
|
68
|
+
return None
|
|
60
69
|
|
|
61
70
|
def write(self, lf: pl.LazyFrame) -> Iterable[bytes]:
|
|
62
71
|
output = BytesIO()
|