tinybird-toolset 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2019, Raul Ochoa
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ 3. Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,2 @@
1
+ global-exclude *.h
2
+ global-exclude *.cpp
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.1
2
+ Name: tinybird-toolset
3
+ Version: 1.0.0
4
+ Summary: UNKNOWN
5
+ Home-page: https://gitlab.com/tinybird/clickhouse-toolset
6
+ Author: Tinybird.co
7
+ Author-email: support@tinybird.co
8
+ License: UNKNOWN
9
+ Description: UNKNOWN
10
+ Platform: UNKNOWN
11
+ Requires-Python: >=3.8, <3.14
12
+ Provides-Extra: test
@@ -0,0 +1,188 @@
1
+ # ClickHouse query tools
2
+
3
+ Exposes ClickHouse internals to parse and manipulate ClickHouse queries.
4
+
5
+ Currently made of 1 module, clickhouse-toolset, which includes functionality both for the server and the CLI.
6
+
7
+ ## Installing prebuilts
8
+
9
+ The module is available in pypi:
10
+
11
+ ```bash
12
+ pip install tinybird-toolset
13
+ ```
14
+ You need to have access to the API Token, it may be shared from LastPass, if you don't have access request it.
15
+
16
+ If we don't have prebuilts for your platform the installation will fail.
17
+
18
+ ## No prebuilts available
19
+
20
+ To simplify things, the main module source distribution includes only the python code so that installing it is possible,
21
+ but it will throw when trying to use it:
22
+
23
+ ```python
24
+ >>> from chtoolset import query as chquery
25
+ Traceback (most recent call last):
26
+ File "<stdin>", line 1, in <module>
27
+ File "/home/raul/.local/lib/python3.9/site-packages/chtoolset/__init__.py", line 1, in <module>
28
+ from . import query
29
+ File "/home/raul/.local/lib/python3.9/site-packages/chtoolset/query.py", line 1, in <module>
30
+ from chtoolset._query import replace_tables, format, tables, table_if_is_simple_query
31
+ ModuleNotFoundError: No module named 'chtoolset._query'
32
+ ```
33
+
34
+ If you see this in the analytics server that means that your platform isn't supported and needs a prebuilt. If you see
35
+ this in the CLI that means that we are not handling the exception as it should (using a remote server).
36
+
37
+ ## Development
38
+
39
+ ### Install pre-requisites (Ubuntu)
40
+
41
+ ```bash
42
+ sudo apt-get update
43
+ sudo apt-get install git cmake ccache python3 python3-pip ninja-build nasm yasm gawk lsb-release wget software-properties-common gnupg
44
+
45
+ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
46
+
47
+ sudo add-apt-repository ppa:deadsnakes/ppa
48
+ sudo apt update
49
+ sudo apt install python3.8-dev python3.9-dev python3.10-dev python3.11-dev python3.12-dev python3.13-dev
50
+
51
+ pip3 install virtualenv
52
+ ```
53
+
54
+ ### Run tests
55
+
56
+ First, you need to clone the repo and **its submodules**.
57
+
58
+ ```bash
59
+ git clone --recursive git@gitlab.com:tinybird/clickhouse-toolset.git
60
+ ```
61
+
62
+ Then, you will compile the dependencies and the module itself. You need a modern compiler (Clang 17) to build it, both under Linux and MacOS (AppleClang is not supported).
63
+
64
+ The best option is to use the Makefile targets which will use virtualenv to install dependencies, build the packages, install them too and run tests:
65
+
66
+ ```bash
67
+ make test-3.9
68
+ ```
69
+
70
+ ### Generate pre-built packages
71
+
72
+ You need to install all the necessary python releases so they are available via virtualenv.
73
+
74
+ #### If you are In MacOS, prepare your environment:
75
+
76
+ You need to be able to compile ClickHouse for MacOS so we follow [their guide](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/development/build-osx.md) to install the necessary packages:
77
+
78
+ * Install Homebrew
79
+ * Install Xcode and Command Line Tools
80
+ * Install the necessary tools (cmake ninja libtool gettext llvm@16 gcc ccache findutils grep)
81
+ * Make sure your local clang is pointing to the llvm installation and to the default from OSX / Xcode. You can check it by running these commands. You should see a similar output
82
+ ```bash
83
+ ➜ clickhouse-toolset git:(master) ✗ clang --version
84
+ Homebrew clang version 16.0.6
85
+ Target: x86_64-apple-darwin22.5.0
86
+ Thread model: posix
87
+ InstalledDir: /usr/local/opt/llvm/bin
88
+ ➜ clickhouse-toolset git:(master) ✗ llvm-as --version
89
+ Homebrew LLVM version 16.0.6
90
+ Optimized build.
91
+ * Follow the normal build (`make build`)
92
+ ```
93
+
94
+ #### Clean environment if you already did some compilation before
95
+ To clean your environment you need to make sure that the repository is clean and updated with the expected values. To do that you can use:
96
+
97
+ ```bash
98
+ make distclean
99
+ git clean -fdx
100
+ ```
101
+
102
+ #### Make sure you have the expeted version from the submodules
103
+ ```bash
104
+ git submodule sync && git submodule update --init --recursive
105
+
106
+ # If it fails because of changes inside the ClickHouse folder related to the patchs we apply, just reset all the changes in that directory so these patchs can be reaplied in the next step.
107
+ cd clickhouse
108
+ git reset --hard
109
+ ```
110
+
111
+ #### Compile and generate the new .whl files
112
+ Use:
113
+
114
+ ```bash
115
+ make build
116
+ ```
117
+
118
+ ```
119
+ Disclaimer: The MacOS package for 3.8 will be generated but it will not work and tests fill fail. We have not been using that version since 0.27.dev0 and no-one has requested it so we'll deprecate the generation for this version soon. You can continue with the process, just don't push that version to Pypi.
120
+ ```
121
+
122
+ Note that to reduce the version of the dependencies in the binary wheel, it is better if generated on an old Linux distribution, and it's best to use the CI.
123
+
124
+ #### Tip: re-compiling and debugging
125
+
126
+ A couple of environment variables can be defined with a non-empty value to help retrying the compilation and debugging:
127
+
128
+ * `OMIT_PATCHES` prevents the application of the ClickHouse patches: this is useful if you've already applied them to avoid having to restore the original source code, since some patches may not re-apply cleanly.
129
+ * `DEBUG_SYMBOLS` triggers the generation of debug symbols for the python extension code (query.cpp and the C++ functions, but not for the ClickHouse code).
130
+
131
+ #### Finish preparation of Linux packages
132
+
133
+ In order to improve compatibility for Linux packages you need to use auditwheel to "repair" them before the upload to pypi:
134
+
135
+ ```bash
136
+ for i in $(ls /tmp/artifacts/*whl); do auditwheel repair --plat manylinux2014_x86_64 $i; done
137
+ ```
138
+
139
+ This will check and rename them to `manylinux2014` or `manylinux_2_17` (provided they have been compiled correctly). If auditwheel fails, or the result is still `linux_x86_64`, then **don't upload them** as they won't be compatible with older Linux releases.
140
+
141
+ #### Finish preparation of MacOS packages
142
+
143
+ In the case of MacOS we need to check the dependencies of the generated wheel using [delocate](https://github.com/matthew-brett/delocate). Use `delocate-listdeps` to check that there aren't any external dependencies and `delocate-wheel` if there are.
144
+
145
+ You can use it by running:
146
+ ```bash
147
+ # Install it
148
+ pip install delocate
149
+
150
+ # Execute it for each .whl generated
151
+ delocate-listdeps ./dist/clickhouse_toolset-0.27.dev0-cp311-cp311-macosx_12_0_x86_64.whl
152
+ ```
153
+
154
+ In addition, to increase compatibility of the generated packages we need to rename them to the oldest release with binary compatibility (based on python tags), which we decided on 11.0:
155
+
156
+ If you're executing in from Linux, run:
157
+ ```bash
158
+ find . -type f -name \*macosx_*_*_x86\* | perl -pe 'print $_; s/macosx_.._.+_x86/macosx_11_0_x86/' | xargs -d "\n" -n2 mv
159
+ find . -type f -name \*macosx_*_*_arm64\* | perl -pe 'print $_; s/macosx_.._.+_arm64/macosx_11_0_arm64/' | xargs -d "\n" -n2 mv
160
+ ```
161
+
162
+ If you're executing in from MacOS, run:
163
+ ```bash
164
+ find -E . -type f -regex '.*macosx_.._._x86.*' | perl -pe 'print $_; s/macosx_.._.+_x86/macosx_11_0_x86/' | xargs -n2 mv
165
+ find -E . -type f -regex '.*macosx_.._._arm64.*' | perl -pe 'print $_; s/macosx_.._.+_arm64/macosx_11_0_arm64/' | xargs -n2 mv
166
+ ```
167
+
168
+ ## Examples
169
+
170
+ Check tests directory
171
+
172
+ ## Publish
173
+
174
+ 1. Update VERSION in `setup.py`
175
+
176
+ 2. Publish the source package for the version you want to use to the **test repository** (if you don't have permissions to do that, upload the generated `.whl` packages to [the compiled package's GCP bucket](https://console.cloud.google.com/storage/browser/tinybird-bdist_wheels?pageState=(%22StorageObjectListTable%22:(%22f%22:%22%255B%255D%22))&prefix=&) and ask for help):
177
+
178
+ ```
179
+ twine upload --repository-url https://test.pypi.org/legacy/ dist/tinybird-toolset-1.0.0.tar.gz
180
+ ```
181
+
182
+ 3. Publish the whl packages (wheelhouse/ is generated by auditwheel):
183
+
184
+ ```
185
+ twine upload --repository-url https://test.pypi.org/legacy/ wheelhouse/*
186
+ ```
187
+
188
+ 4. Once tested, repeat for the production repository (no repository url)
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,60 @@
1
+ from setuptools import setup, Extension
2
+
3
+ NAME = 'tinybird-toolset'
4
+ VERSION = '1.0.0'
5
+
6
+ try:
7
+ from conf import *
8
+ chquery = Extension(
9
+ 'chtoolset._query',
10
+ sources=['src/query.cpp',],
11
+ depends=['conf.py',
12
+ 'functions/AccessControl.h',
13
+ 'functions/Aggregation.h',
14
+ 'functions/CheckCompatibleTypes.h',
15
+ 'functions/CheckValidWriteQuery.h',
16
+ 'functions/ReplaceTables.h',
17
+ 'functions/Tables.h',
18
+ 'functions/TBQueryParser.h',
19
+ 'functions/Validation.h',
20
+ 'functions/simdjsonHelpers.h',
21
+ 'functions/JSONPathQuery.h',
22
+ 'functions/JSONPathTree.h',
23
+ 'functions/DateTimeParser.h',
24
+ 'functions/RowBinaryEncoder.h',
25
+ 'src/PythonThreadHandler.h',
26
+ 'ts_build/libCHToolset.a'],
27
+ )
28
+ setup(
29
+ name=NAME,
30
+ version=VERSION,
31
+ url='https://gitlab.com/tinybird/clickhouse-toolset',
32
+ author='Tinybird.co',
33
+ author_email='support@tinybird.co',
34
+ packages=['chtoolset'],
35
+ package_dir={'': 'src'},
36
+ python_requires='>=3.8, <3.14',
37
+ install_requires=[],
38
+ extras_require={
39
+ 'test': requirements_from_file('requirements-test.txt')
40
+ },
41
+ cmdclass={
42
+ 'clickhouse': ClickHouseBuildExt,
43
+ 'toolset': ToolsetBuildWithFromCH,
44
+ 'build_ext': CustomBuildWithFromCH,
45
+ },
46
+ ext_modules=[chquery]
47
+ )
48
+
49
+ except ModuleNotFoundError:
50
+ setup(
51
+ name=NAME,
52
+ version=VERSION,
53
+ url='https://gitlab.com/tinybird/clickhouse-toolset',
54
+ author='Tinybird.co',
55
+ author_email='support@tinybird.co',
56
+ packages=['chtoolset'],
57
+ package_dir={'': 'src'},
58
+ python_requires='>=3.8, <3.14',
59
+ install_requires=[],
60
+ )
@@ -0,0 +1 @@
1
+ from . import query
@@ -0,0 +1,59 @@
1
+ from chtoolset._query import replace_tables, \
2
+ format, \
3
+ tables, \
4
+ table_if_is_simple_query, \
5
+ query_get_type, \
6
+ check_compatible_types, \
7
+ check_valid_write_query, \
8
+ get_left_table, \
9
+ rewrite_aggregation_states, \
10
+ parser_cache_info, \
11
+ parser_cache_reset, \
12
+ explain_ast, \
13
+ create_row_binary_encoder, \
14
+ apply_row_binary_encoder, \
15
+ delete_row_binary_encoder
16
+
17
+
18
+ class RowBinaryEncoderError(Exception):
19
+ """Custom exception for RowBinaryEncoder errors"""
20
+ pass
21
+
22
+
23
+ class RowBinaryEncoder():
24
+ def __init__(self, schema: str, legacy_conversion_mode: bool = True):
25
+ if not isinstance(schema, str):
26
+ raise TypeError("Schema must be a string")
27
+
28
+ try:
29
+ self._encoder_ptr = create_row_binary_encoder(schema, legacy_conversion_mode)
30
+ if not self._encoder_ptr:
31
+ raise RowBinaryEncoderError("Failed to create encoder")
32
+ except Exception as e:
33
+ raise RowBinaryEncoderError(f"Error initializing encoder: {str(e)}") from e
34
+
35
+ def __enter__(self):
36
+ if not self._encoder_ptr:
37
+ raise RowBinaryEncoderError("Encoder was already closed")
38
+ return self
39
+
40
+ def __exit__(self, exception_type, exception_value, exception_traceback):
41
+ self.close()
42
+
43
+ def encode(self, block: str) -> bytes:
44
+ if not isinstance(block, str):
45
+ raise TypeError("Block must be a string")
46
+
47
+ if not self._encoder_ptr:
48
+ raise RowBinaryEncoderError("Encoder was already closed")
49
+
50
+ try:
51
+ result = apply_row_binary_encoder(self._encoder_ptr, block)
52
+ return result
53
+ except Exception as e:
54
+ raise RowBinaryEncoderError(f"Error encoding block: {str(e)}") from e
55
+
56
+ def close(self):
57
+ if self._encoder_ptr:
58
+ delete_row_binary_encoder(self._encoder_ptr)
59
+ self._encoder_ptr = None
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.1
2
+ Name: tinybird-toolset
3
+ Version: 1.0.0
4
+ Summary: UNKNOWN
5
+ Home-page: https://gitlab.com/tinybird/clickhouse-toolset
6
+ Author: Tinybird.co
7
+ Author-email: support@tinybird.co
8
+ License: UNKNOWN
9
+ Description: UNKNOWN
10
+ Platform: UNKNOWN
11
+ Requires-Python: >=3.8, <3.14
12
+ Provides-Extra: test
@@ -0,0 +1,11 @@
1
+ LICENSE
2
+ MANIFEST.in
3
+ README.md
4
+ setup.py
5
+ src/chtoolset/__init__.py
6
+ src/chtoolset/query.py
7
+ src/tinybird_toolset.egg-info/PKG-INFO
8
+ src/tinybird_toolset.egg-info/SOURCES.txt
9
+ src/tinybird_toolset.egg-info/dependency_links.txt
10
+ src/tinybird_toolset.egg-info/requires.txt
11
+ src/tinybird_toolset.egg-info/top_level.txt