termflow-ir 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. termflow_ir-0.1.2/.gitignore +11 -0
  2. termflow_ir-0.1.2/CMakeLists.txt +178 -0
  3. termflow_ir-0.1.2/PKG-INFO +174 -0
  4. termflow_ir-0.1.2/README.md +151 -0
  5. termflow_ir-0.1.2/bindings/python/module.cpp +296 -0
  6. termflow_ir-0.1.2/cmake/termflowConfig.cmake.in +6 -0
  7. termflow_ir-0.1.2/docker/Dockerfile +30 -0
  8. termflow_ir-0.1.2/docker-compose.yml +9 -0
  9. termflow_ir-0.1.2/docs/customization.md +228 -0
  10. termflow_ir-0.1.2/docs/installation-roadmap.md +118 -0
  11. termflow_ir-0.1.2/docs/installation.md +194 -0
  12. termflow_ir-0.1.2/docs/usage.md +237 -0
  13. termflow_ir-0.1.2/examples/analyze_query.cpp +94 -0
  14. termflow_ir-0.1.2/examples/analyze_text.cpp +39 -0
  15. termflow_ir-0.1.2/examples/custom_analyzer.cpp +66 -0
  16. termflow_ir-0.1.2/examples/extract_terms.cpp +44 -0
  17. termflow_ir-0.1.2/examples/find_package_consumer/CMakeLists.txt +9 -0
  18. termflow_ir-0.1.2/examples/find_package_consumer/main.cpp +15 -0
  19. termflow_ir-0.1.2/include/termflow/analysis/analyzer.hpp +53 -0
  20. termflow_ir-0.1.2/include/termflow/analysis/ascii_folding_filter.hpp +12 -0
  21. termflow_ir-0.1.2/include/termflow/analysis/char_filter.hpp +21 -0
  22. termflow_ir-0.1.2/include/termflow/analysis/english_analyzer.hpp +90 -0
  23. termflow_ir-0.1.2/include/termflow/analysis/english_possessive_filter.hpp +12 -0
  24. termflow_ir-0.1.2/include/termflow/analysis/lower_case_filter.hpp +12 -0
  25. termflow_ir-0.1.2/include/termflow/analysis/porter_stem_filter.hpp +12 -0
  26. termflow_ir-0.1.2/include/termflow/analysis/standard_tokenizer.hpp +34 -0
  27. termflow_ir-0.1.2/include/termflow/analysis/stop_filter.hpp +22 -0
  28. termflow_ir-0.1.2/include/termflow/analysis/term_extractor.hpp +55 -0
  29. termflow_ir-0.1.2/include/termflow/analysis/token.hpp +27 -0
  30. termflow_ir-0.1.2/include/termflow/analysis/token_filter.hpp +22 -0
  31. termflow_ir-0.1.2/include/termflow/analysis/tokenizer.hpp +24 -0
  32. termflow_ir-0.1.2/include/termflow/analysis/unicode_normalize_filter.hpp +12 -0
  33. termflow_ir-0.1.2/include/termflow/query/query_analyzer.hpp +53 -0
  34. termflow_ir-0.1.2/include/termflow/query/query_parser.hpp +30 -0
  35. termflow_ir-0.1.2/include/termflow/query/query_types.hpp +96 -0
  36. termflow_ir-0.1.2/include/termflow/query/rewrite_loader.hpp +33 -0
  37. termflow_ir-0.1.2/include/termflow/query/rewrite_validator.hpp +78 -0
  38. termflow_ir-0.1.2/pyproject.toml +89 -0
  39. termflow_ir-0.1.2/python/termflow/__init__.py +32 -0
  40. termflow_ir-0.1.2/python/termflow/cli.py +258 -0
  41. termflow_ir-0.1.2/src/analysis/analyzer.cpp +71 -0
  42. termflow_ir-0.1.2/src/analysis/ascii_folding_filter.cpp +13 -0
  43. termflow_ir-0.1.2/src/analysis/english_analyzer.cpp +105 -0
  44. termflow_ir-0.1.2/src/analysis/english_possessive_filter.cpp +13 -0
  45. termflow_ir-0.1.2/src/analysis/lower_case_filter.cpp +13 -0
  46. termflow_ir-0.1.2/src/analysis/porter_stem_filter.cpp +15 -0
  47. termflow_ir-0.1.2/src/analysis/standard_tokenizer.cpp +199 -0
  48. termflow_ir-0.1.2/src/analysis/stop_filter.cpp +39 -0
  49. termflow_ir-0.1.2/src/analysis/term_extractor.cpp +149 -0
  50. termflow_ir-0.1.2/src/analysis/unicode_normalize_filter.cpp +13 -0
  51. termflow_ir-0.1.2/src/query/query_analyzer.cpp +105 -0
  52. termflow_ir-0.1.2/src/query/query_parser.cpp +70 -0
  53. termflow_ir-0.1.2/src/query/rewrite_internal.hpp +50 -0
  54. termflow_ir-0.1.2/src/query/rewrite_loader.cpp +215 -0
  55. termflow_ir-0.1.2/src/query/rewrite_validator.cpp +292 -0
  56. termflow_ir-0.1.2/src/util/porter_stemmer.cpp +531 -0
  57. termflow_ir-0.1.2/src/util/porter_stemmer.hpp +10 -0
  58. termflow_ir-0.1.2/src/util/unicode.cpp +190 -0
  59. termflow_ir-0.1.2/src/util/unicode.hpp +17 -0
  60. termflow_ir-0.1.2/tests/english_analyzer_tests.cpp +241 -0
  61. termflow_ir-0.1.2/tests/filter_tests.cpp +109 -0
  62. termflow_ir-0.1.2/tests/python_smoke_test.py +147 -0
  63. termflow_ir-0.1.2/tests/query_tests.cpp +336 -0
  64. termflow_ir-0.1.2/tests/standard_tokenizer_tests.cpp +82 -0
  65. termflow_ir-0.1.2/tests/term_extractor_tests.cpp +42 -0
  66. termflow_ir-0.1.2/tests/test_framework.cpp +22 -0
  67. termflow_ir-0.1.2/tests/test_framework.hpp +80 -0
  68. termflow_ir-0.1.2/tests/test_helpers.hpp +38 -0
  69. termflow_ir-0.1.2/tests/test_main.cpp +23 -0
  70. termflow_ir-0.1.2/tools/build_python_dist.sh +5 -0
  71. termflow_ir-0.1.2/tools/publish_python_dist.sh +9 -0
  72. termflow_ir-0.1.2/tools/test_full_build.sh +31 -0
  73. termflow_ir-0.1.2/tools/test_in_docker.sh +4 -0
  74. termflow_ir-0.1.2/tools/test_installed_consumer.sh +47 -0
  75. termflow_ir-0.1.2/tools/test_python_wheel_install.sh +35 -0
@@ -0,0 +1,11 @@
1
+ /.DS_Store
2
+ /__pycache__/
3
+ *.pyc
4
+ *.egg-info/
5
+ /.venv/
6
+ /build/
7
+ /dist/
8
+ /wheelhouse/
9
+ /.local/
10
+ /.tmp_bench_corpus/
11
+ /.idea/
@@ -0,0 +1,178 @@
1
+ cmake_minimum_required(VERSION 3.24)
2
+
3
+ project(
4
+ termflow
5
+ VERSION 0.1.2
6
+ DESCRIPTION "English text analysis library for C++"
7
+ LANGUAGES CXX)
8
+
9
+ include(GNUInstallDirs)
10
+ include(CMakePackageConfigHelpers)
11
+
12
+ option(TERMFLOW_BUILD_TESTS "Build the termflow test suite" ON)
13
+ option(TERMFLOW_BUILD_EXAMPLES "Build the termflow example programs" ON)
14
+ option(TERMFLOW_BUILD_TOOLS "Build the termflow command-line tools" ON)
15
+ option(TERMFLOW_BUILD_PYTHON "Build the termflow Python bindings" OFF)
16
+ option(TERMFLOW_INSTALL_CPP_ARTIFACTS "Install C++ library and development artifacts" ON)
17
+
18
+ set(CMAKE_CXX_STANDARD 20)
19
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
20
+ set(CMAKE_CXX_EXTENSIONS OFF)
21
+
22
+ find_package(ICU REQUIRED COMPONENTS i18n uc)
23
+
24
+ if(TERMFLOW_BUILD_PYTHON)
25
+ find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module)
26
+ find_package(pybind11 CONFIG REQUIRED)
27
+ endif()
28
+
29
+ add_library(termflow
30
+ src/analysis/analyzer.cpp
31
+ src/analysis/ascii_folding_filter.cpp
32
+ src/analysis/english_analyzer.cpp
33
+ src/analysis/english_possessive_filter.cpp
34
+ src/analysis/lower_case_filter.cpp
35
+ src/analysis/porter_stem_filter.cpp
36
+ src/analysis/standard_tokenizer.cpp
37
+ src/analysis/stop_filter.cpp
38
+ src/analysis/term_extractor.cpp
39
+ src/analysis/unicode_normalize_filter.cpp
40
+ src/query/query_analyzer.cpp
41
+ src/query/query_parser.cpp
42
+ src/query/rewrite_loader.cpp
43
+ src/query/rewrite_validator.cpp
44
+ src/util/porter_stemmer.cpp
45
+ src/util/unicode.cpp)
46
+
47
+ add_library(termflow::termflow ALIAS termflow)
48
+
49
+ target_compile_features(termflow PUBLIC cxx_std_20)
50
+ target_link_libraries(termflow PUBLIC ICU::i18n ICU::uc)
51
+ target_include_directories(termflow
52
+ PUBLIC
53
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
54
+ $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
55
+ PRIVATE
56
+ ${CMAKE_CURRENT_SOURCE_DIR}/src)
57
+
58
+ if(MSVC)
59
+ target_compile_options(termflow PRIVATE /W4 /permissive-)
60
+ else()
61
+ target_compile_options(termflow PRIVATE -Wall -Wextra -Wpedantic)
62
+ endif()
63
+
64
+ set_target_properties(termflow PROPERTIES
65
+ EXPORT_NAME termflow
66
+ POSITION_INDEPENDENT_CODE ON
67
+ VERSION ${PROJECT_VERSION}
68
+ SOVERSION 0)
69
+
70
+ if(TERMFLOW_BUILD_EXAMPLES)
71
+ add_executable(termflow_analyze examples/analyze_text.cpp)
72
+ target_link_libraries(termflow_analyze PRIVATE termflow::termflow)
73
+
74
+ add_executable(termflow_extract_terms examples/extract_terms.cpp)
75
+ target_link_libraries(termflow_extract_terms PRIVATE termflow::termflow)
76
+
77
+ add_executable(termflow_custom_analyzer examples/custom_analyzer.cpp)
78
+ target_link_libraries(termflow_custom_analyzer PRIVATE termflow::termflow)
79
+
80
+ add_executable(termflow_analyze_query examples/analyze_query.cpp)
81
+ target_link_libraries(termflow_analyze_query PRIVATE termflow::termflow)
82
+ endif()
83
+
84
+ if(TERMFLOW_BUILD_TOOLS)
85
+ endif()
86
+
87
+ if(TERMFLOW_BUILD_PYTHON)
88
+ set(TERMFLOW_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/python/termflow)
89
+ set(TERMFLOW_PYTHON_PACKAGE_FILES
90
+ __init__.py
91
+ cli.py)
92
+ file(MAKE_DIRECTORY ${TERMFLOW_PYTHON_PACKAGE_DIR})
93
+ foreach(package_file IN LISTS TERMFLOW_PYTHON_PACKAGE_FILES)
94
+ configure_file(
95
+ ${CMAKE_CURRENT_SOURCE_DIR}/python/termflow/${package_file}
96
+ ${TERMFLOW_PYTHON_PACKAGE_DIR}/${package_file}
97
+ COPYONLY)
98
+ endforeach()
99
+
100
+ pybind11_add_module(termflow_python MODULE bindings/python/module.cpp)
101
+ target_link_libraries(termflow_python PRIVATE termflow::termflow)
102
+ set_target_properties(termflow_python PROPERTIES
103
+ OUTPUT_NAME _termflow
104
+ LIBRARY_OUTPUT_DIRECTORY ${TERMFLOW_PYTHON_PACKAGE_DIR}
105
+ RUNTIME_OUTPUT_DIRECTORY ${TERMFLOW_PYTHON_PACKAGE_DIR})
106
+
107
+ install(
108
+ TARGETS termflow_python
109
+ LIBRARY DESTINATION termflow
110
+ RUNTIME DESTINATION termflow)
111
+
112
+ install(
113
+ FILES
114
+ python/termflow/__init__.py
115
+ python/termflow/cli.py
116
+ DESTINATION termflow)
117
+ endif()
118
+
119
+ if(TERMFLOW_BUILD_TESTS)
120
+ enable_testing()
121
+
122
+ add_executable(termflow_tests
123
+ tests/english_analyzer_tests.cpp
124
+ tests/filter_tests.cpp
125
+ tests/query_tests.cpp
126
+ tests/standard_tokenizer_tests.cpp
127
+ tests/term_extractor_tests.cpp
128
+ tests/test_framework.cpp
129
+ tests/test_main.cpp
130
+ tests/test_helpers.hpp)
131
+
132
+ target_link_libraries(termflow_tests PRIVATE termflow::termflow)
133
+
134
+ add_test(NAME termflow_tests COMMAND termflow_tests)
135
+
136
+ if(TERMFLOW_BUILD_PYTHON)
137
+ add_test(
138
+ NAME termflow_python_smoke
139
+ COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/tests/python_smoke_test.py)
140
+ set_tests_properties(
141
+ termflow_python_smoke
142
+ PROPERTIES ENVIRONMENT "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}/python")
143
+ endif()
144
+ endif()
145
+ if(TERMFLOW_INSTALL_CPP_ARTIFACTS)
146
+ install(
147
+ TARGETS termflow
148
+ EXPORT termflowTargets
149
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
150
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
151
+ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
152
+
153
+ install(
154
+ DIRECTORY include/
155
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
156
+
157
+ install(
158
+ EXPORT termflowTargets
159
+ FILE termflowTargets.cmake
160
+ NAMESPACE termflow::
161
+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/termflow)
162
+
163
+ write_basic_package_version_file(
164
+ ${CMAKE_CURRENT_BINARY_DIR}/termflowConfigVersion.cmake
165
+ VERSION ${PROJECT_VERSION}
166
+ COMPATIBILITY SameMajorVersion)
167
+
168
+ configure_package_config_file(
169
+ ${CMAKE_CURRENT_SOURCE_DIR}/cmake/termflowConfig.cmake.in
170
+ ${CMAKE_CURRENT_BINARY_DIR}/termflowConfig.cmake
171
+ INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/termflow)
172
+
173
+ install(
174
+ FILES
175
+ ${CMAKE_CURRENT_BINARY_DIR}/termflowConfig.cmake
176
+ ${CMAKE_CURRENT_BINARY_DIR}/termflowConfigVersion.cmake
177
+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/termflow)
178
+ endif()
@@ -0,0 +1,174 @@
1
+ Metadata-Version: 2.2
2
+ Name: termflow-ir
3
+ Version: 0.1.2
4
+ Summary: English text analysis for information retrieval
5
+ Keywords: text-analysis,information-retrieval,tokenization,stemming,search
6
+ Author: Mustafa Abualsaud
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3 :: Only
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: C++
15
+ Classifier: Topic :: Text Processing :: Linguistic
16
+ Project-URL: Homepage, https://github.com/gathera/termflow
17
+ Project-URL: Documentation, https://github.com/gathera/termflow/blob/main/docs/installation.md
18
+ Project-URL: Repository, https://github.com/gathera/termflow
19
+ Project-URL: Issues, https://github.com/gathera/termflow/issues
20
+ Project-URL: Releases, https://github.com/gathera/termflow/releases
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+
24
+ # termflow
25
+
26
+ English text analysis for information retrieval workloads in C++ and Python.
27
+
28
+ [![PyPI](https://img.shields.io/pypi/v/termflow-ir.svg)](https://pypi.org/project/termflow-ir/)
29
+ [![Python Versions](https://img.shields.io/pypi/pyversions/termflow-ir.svg)](https://pypi.org/project/termflow-ir/)
30
+ [![Python Package](https://github.com/gathera/termflow/actions/workflows/python-package.yml/badge.svg)](https://github.com/gathera/termflow/actions/workflows/python-package.yml)
31
+
32
+ `termflow` is a library-first analysis stack for search, indexing, tagging, and query normalization. It provides a built-in English analyzer, term extraction helpers, and a lightweight query rewrite layer without trying to be a full search engine.
33
+
34
+ ## Why termflow
35
+
36
+ - C++20 core library with optional Python bindings
37
+ - English analyzer with configurable stemming, stop words, possessive handling, and ASCII folding
38
+ - Term extraction API for finalized search/index terms
39
+ - Query parser and rewrite support for canonicalization, equivalents, and expansions
40
+ - Installable Python wheels for Linux and macOS
41
+ - CMake install flow for downstream C++ consumers
42
+
43
+ ## Install
44
+
45
+ Python package:
46
+
47
+ ```bash
48
+ pip install termflow-ir
49
+ ```
50
+
51
+ Python import:
52
+
53
+ ```python
54
+ import termflow
55
+ ```
56
+
57
+ CLI quick check:
58
+
59
+ ```bash
60
+ termflow analyze "The Running Cars"
61
+ ```
62
+
63
+ For C++ installation and `find_package(termflow)` usage, see [docs/installation.md](docs/installation.md).
64
+
65
+ ## Quick Start
66
+
67
+ Python:
68
+
69
+ ```python
70
+ import termflow
71
+
72
+ analyzer = termflow.EnglishAnalyzer()
73
+ terms = analyzer.analyze_terms("The Running Cars")
74
+ normalized = analyzer.normalize("Running Café")
75
+
76
+ print(terms) # ['run', 'car']
77
+ print(normalized) # 'running café'
78
+ ```
79
+
80
+ C++:
81
+
82
+ ```cpp
83
+ #include <iostream>
84
+ #include "termflow/analysis/english_analyzer.hpp"
85
+
86
+ int main() {
87
+ termflow::EnglishAnalyzer analyzer;
88
+ const auto terms = analyzer.analyze_terms("The Running Cars");
89
+
90
+ for (const auto& term : terms) {
91
+ std::cout << term << "\n";
92
+ }
93
+ }
94
+ ```
95
+
96
+ ## Features
97
+
98
+ | Area | What it includes |
99
+ | --- | --- |
100
+ | Analysis | `EnglishAnalyzer`, token analysis, normalization, stemming, stop words, ASCII folding |
101
+ | Term extraction | `TermExtractor` with length, numeric, and character-policy filtering |
102
+ | Query processing | clause parsing, analyzed query terms, rewrite loading, validation, and alternatives |
103
+ | Python bindings | built-in analyzer, term extractor, and query module under `termflow.query` |
104
+ | CLI | `termflow analyze`, `termflow extract`, and `termflow analyze-query` for quick validation |
105
+ | C++ consumption | installable CMake package and external `find_package` example |
106
+
107
+ ## Documentation
108
+
109
+ - [docs/usage.md](docs/usage.md) for day-to-day analyzer, term extraction, query, and Python usage
110
+ - [docs/customization.md](docs/customization.md) for pipeline tuning, query rewrites, and custom analyzers in C++
111
+ - [docs/installation.md](docs/installation.md) for Python and C++ installation paths
112
+ - [docs/installation-roadmap.md](docs/installation-roadmap.md) for packaging and distribution priorities
113
+
114
+ Runnable examples:
115
+
116
+ - [examples/analyze_text.cpp](examples/analyze_text.cpp)
117
+ - [examples/extract_terms.cpp](examples/extract_terms.cpp)
118
+ - [examples/custom_analyzer.cpp](examples/custom_analyzer.cpp)
119
+ - [examples/analyze_query.cpp](examples/analyze_query.cpp)
120
+ - [examples/find_package_consumer/CMakeLists.txt](examples/find_package_consumer/CMakeLists.txt)
121
+
122
+ ## Scope
123
+
124
+ `termflow` currently focuses on:
125
+
126
+ - English text analysis
127
+ - Batch-oriented APIs
128
+ - Query parsing and rewrite preparation
129
+ - Reusable components for embedding in larger applications
130
+
131
+ `termflow` does not currently provide:
132
+
133
+ - indexing or retrieval
134
+ - ranking or scoring
135
+ - token graphs
136
+ - phrase execution logic
137
+ - multilingual analyzers
138
+
139
+ ## Build From Source
140
+
141
+ Local build:
142
+
143
+ ```bash
144
+ cmake -S . -B build -G Ninja
145
+ cmake --build build
146
+ ctest --test-dir build --output-on-failure
147
+ ```
148
+
149
+ Build Python bindings from source:
150
+
151
+ ```bash
152
+ cmake -S . -B build -G Ninja -DTERMFLOW_BUILD_PYTHON=ON
153
+ cmake --build build
154
+ PYTHONPATH=build/python python3 -c 'import termflow; print(termflow.EnglishAnalyzer().analyze_terms("Running Cars"))'
155
+ ```
156
+
157
+ Build Python distributions:
158
+
159
+ ```bash
160
+ python3 -m build --sdist --wheel
161
+ python3 -m twine check dist/*
162
+ ```
163
+
164
+ ## Project Status
165
+
166
+ `termflow` is early-stage and intentionally narrow in scope. The current focus is making the built-in English analysis and packaging story solid before expanding into more languages or broader IR features.
167
+
168
+ ## Contributing
169
+
170
+ Issues and pull requests are welcome. If you want to make a larger API or packaging change, open an issue first so the direction is clear before implementation work starts.
171
+
172
+ ## License
173
+
174
+ This repository does not yet include a `LICENSE` file. Until that is added, do not assume open source usage terms.
@@ -0,0 +1,151 @@
1
+ # termflow
2
+
3
+ English text analysis for information retrieval workloads in C++ and Python.
4
+
5
+ [![PyPI](https://img.shields.io/pypi/v/termflow-ir.svg)](https://pypi.org/project/termflow-ir/)
6
+ [![Python Versions](https://img.shields.io/pypi/pyversions/termflow-ir.svg)](https://pypi.org/project/termflow-ir/)
7
+ [![Python Package](https://github.com/gathera/termflow/actions/workflows/python-package.yml/badge.svg)](https://github.com/gathera/termflow/actions/workflows/python-package.yml)
8
+
9
+ `termflow` is a library-first analysis stack for search, indexing, tagging, and query normalization. It provides a built-in English analyzer, term extraction helpers, and a lightweight query rewrite layer without trying to be a full search engine.
10
+
11
+ ## Why termflow
12
+
13
+ - C++20 core library with optional Python bindings
14
+ - English analyzer with configurable stemming, stop words, possessive handling, and ASCII folding
15
+ - Term extraction API for finalized search/index terms
16
+ - Query parser and rewrite support for canonicalization, equivalents, and expansions
17
+ - Installable Python wheels for Linux and macOS
18
+ - CMake install flow for downstream C++ consumers
19
+
20
+ ## Install
21
+
22
+ Python package:
23
+
24
+ ```bash
25
+ pip install termflow-ir
26
+ ```
27
+
28
+ Python import:
29
+
30
+ ```python
31
+ import termflow
32
+ ```
33
+
34
+ CLI quick check:
35
+
36
+ ```bash
37
+ termflow analyze "The Running Cars"
38
+ ```
39
+
40
+ For C++ installation and `find_package(termflow)` usage, see [docs/installation.md](docs/installation.md).
41
+
42
+ ## Quick Start
43
+
44
+ Python:
45
+
46
+ ```python
47
+ import termflow
48
+
49
+ analyzer = termflow.EnglishAnalyzer()
50
+ terms = analyzer.analyze_terms("The Running Cars")
51
+ normalized = analyzer.normalize("Running Café")
52
+
53
+ print(terms) # ['run', 'car']
54
+ print(normalized) # 'running café'
55
+ ```
56
+
57
+ C++:
58
+
59
+ ```cpp
60
+ #include <iostream>
61
+ #include "termflow/analysis/english_analyzer.hpp"
62
+
63
+ int main() {
64
+ termflow::EnglishAnalyzer analyzer;
65
+ const auto terms = analyzer.analyze_terms("The Running Cars");
66
+
67
+ for (const auto& term : terms) {
68
+ std::cout << term << "\n";
69
+ }
70
+ }
71
+ ```
72
+
73
+ ## Features
74
+
75
+ | Area | What it includes |
76
+ | --- | --- |
77
+ | Analysis | `EnglishAnalyzer`, token analysis, normalization, stemming, stop words, ASCII folding |
78
+ | Term extraction | `TermExtractor` with length, numeric, and character-policy filtering |
79
+ | Query processing | clause parsing, analyzed query terms, rewrite loading, validation, and alternatives |
80
+ | Python bindings | built-in analyzer, term extractor, and query module under `termflow.query` |
81
+ | CLI | `termflow analyze`, `termflow extract`, and `termflow analyze-query` for quick validation |
82
+ | C++ consumption | installable CMake package and external `find_package` example |
83
+
84
+ ## Documentation
85
+
86
+ - [docs/usage.md](docs/usage.md) for day-to-day analyzer, term extraction, query, and Python usage
87
+ - [docs/customization.md](docs/customization.md) for pipeline tuning, query rewrites, and custom analyzers in C++
88
+ - [docs/installation.md](docs/installation.md) for Python and C++ installation paths
89
+ - [docs/installation-roadmap.md](docs/installation-roadmap.md) for packaging and distribution priorities
90
+
91
+ Runnable examples:
92
+
93
+ - [examples/analyze_text.cpp](examples/analyze_text.cpp)
94
+ - [examples/extract_terms.cpp](examples/extract_terms.cpp)
95
+ - [examples/custom_analyzer.cpp](examples/custom_analyzer.cpp)
96
+ - [examples/analyze_query.cpp](examples/analyze_query.cpp)
97
+ - [examples/find_package_consumer/CMakeLists.txt](examples/find_package_consumer/CMakeLists.txt)
98
+
99
+ ## Scope
100
+
101
+ `termflow` currently focuses on:
102
+
103
+ - English text analysis
104
+ - Batch-oriented APIs
105
+ - Query parsing and rewrite preparation
106
+ - Reusable components for embedding in larger applications
107
+
108
+ `termflow` does not currently provide:
109
+
110
+ - indexing or retrieval
111
+ - ranking or scoring
112
+ - token graphs
113
+ - phrase execution logic
114
+ - multilingual analyzers
115
+
116
+ ## Build From Source
117
+
118
+ Local build:
119
+
120
+ ```bash
121
+ cmake -S . -B build -G Ninja
122
+ cmake --build build
123
+ ctest --test-dir build --output-on-failure
124
+ ```
125
+
126
+ Build Python bindings from source:
127
+
128
+ ```bash
129
+ cmake -S . -B build -G Ninja -DTERMFLOW_BUILD_PYTHON=ON
130
+ cmake --build build
131
+ PYTHONPATH=build/python python3 -c 'import termflow; print(termflow.EnglishAnalyzer().analyze_terms("Running Cars"))'
132
+ ```
133
+
134
+ Build Python distributions:
135
+
136
+ ```bash
137
+ python3 -m build --sdist --wheel
138
+ python3 -m twine check dist/*
139
+ ```
140
+
141
+ ## Project Status
142
+
143
+ `termflow` is early-stage and intentionally narrow in scope. The current focus is making the built-in English analysis and packaging story solid before expanding into more languages or broader IR features.
144
+
145
+ ## Contributing
146
+
147
+ Issues and pull requests are welcome. If you want to make a larger API or packaging change, open an issue first so the direction is clear before implementation work starts.
148
+
149
+ ## License
150
+
151
+ This repository does not yet include a `LICENSE` file. Until that is added, do not assume open source usage terms.