turingbot 3.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- turingbot-3.2.0/LICENSE +21 -0
- turingbot-3.2.0/PKG-INFO +191 -0
- turingbot-3.2.0/README.md +167 -0
- turingbot-3.2.0/pyproject.toml +41 -0
- turingbot-3.2.0/setup.cfg +4 -0
- turingbot-3.2.0/turingbot/__init__.py +4 -0
- turingbot-3.2.0/turingbot/turingbot.py +394 -0
- turingbot-3.2.0/turingbot.egg-info/PKG-INFO +191 -0
- turingbot-3.2.0/turingbot.egg-info/SOURCES.txt +9 -0
- turingbot-3.2.0/turingbot.egg-info/dependency_links.txt +1 -0
- turingbot-3.2.0/turingbot.egg-info/top_level.txt +1 -0
turingbot-3.2.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 TuringBot Software
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
turingbot-3.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: turingbot
|
|
3
|
+
Version: 3.2.0
|
|
4
|
+
Summary: Python interface for TuringBot, a symbolic regression software that discovers mathematical formulas from data
|
|
5
|
+
Author: TuringBot Software
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://turingbotsoftware.com
|
|
8
|
+
Project-URL: Documentation, https://turingbotsoftware.com/documentation.html
|
|
9
|
+
Project-URL: Repository, https://github.com/turingbotsoftware/turingbot-python
|
|
10
|
+
Project-URL: Download, https://turingbotsoftware.com/download.html
|
|
11
|
+
Keywords: symbolic regression,symbolic regression software,machine learning,formula discovery,scientific computing,curve fitting,data science,mathematical modeling,simulated annealing
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Intended Audience :: Education
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
20
|
+
Requires-Python: >=3.7
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# TuringBot Python Library
|
|
26
|
+
|
|
27
|
+
Python interface for [TuringBot](https://turingbotsoftware.com), a symbolic regression software that discovers mathematical formulas from data.
|
|
28
|
+
|
|
29
|
+
TuringBot uses simulated annealing to search the space of possible mathematical expressions, finding accurate and compact formulas that describe your data. It supports 15 search metrics, train/test splits, and a wide range of mathematical functions.
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install turingbot
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
You also need the TuringBot desktop application installed. Download it from [turingbotsoftware.com/download.html](https://turingbotsoftware.com/download.html).
|
|
38
|
+
|
|
39
|
+
## Quick start
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import time
|
|
43
|
+
from turingbot import simulation
|
|
44
|
+
|
|
45
|
+
sim = simulation()
|
|
46
|
+
|
|
47
|
+
# Start a symbolic regression search
|
|
48
|
+
# TuringBot is detected automatically if installed in the default location
|
|
49
|
+
sim.start_process(
|
|
50
|
+
input_file="your_data.csv", # CSV with input columns + target column
|
|
51
|
+
search_metric=4, # RMS error
|
|
52
|
+
threads=4,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Let TuringBot search for formulas
|
|
56
|
+
time.sleep(60)
|
|
57
|
+
|
|
58
|
+
# Read the best formulas found so far
|
|
59
|
+
sim.refresh_functions()
|
|
60
|
+
for f in sim.functions:
|
|
61
|
+
print(f)
|
|
62
|
+
|
|
63
|
+
# Stop the search
|
|
64
|
+
sim.terminate_process()
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Or using the context manager:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
import time
|
|
71
|
+
from turingbot import simulation
|
|
72
|
+
|
|
73
|
+
with simulation() as sim:
|
|
74
|
+
sim.start_process(
|
|
75
|
+
input_file="your_data.csv",
|
|
76
|
+
search_metric=4,
|
|
77
|
+
threads=4,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
time.sleep(60)
|
|
81
|
+
sim.refresh_functions()
|
|
82
|
+
|
|
83
|
+
for f in sim.functions:
|
|
84
|
+
print(f)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
If TuringBot is installed in a non-default location, pass the path explicitly:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
sim.start_process(
|
|
91
|
+
input_file="your_data.csv",
|
|
92
|
+
path="/custom/path/to/TuringBot",
|
|
93
|
+
)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Default paths checked automatically:
|
|
97
|
+
|
|
98
|
+
| OS | Path |
|
|
99
|
+
|----|------|
|
|
100
|
+
| Windows | `C:\Program Files (x86)\TuringBot\TuringBot.exe` |
|
|
101
|
+
| macOS | `/Applications/TuringBot.app/Contents/MacOS/TuringBot` |
|
|
102
|
+
| Linux | `/usr/lib/turingbot/TuringBot` |
|
|
103
|
+
|
|
104
|
+
## Using numpy arrays and pandas DataFrames
|
|
105
|
+
|
|
106
|
+
You can pass numpy arrays or pandas DataFrames directly instead of file paths:
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
import numpy as np
|
|
110
|
+
from turingbot import simulation
|
|
111
|
+
|
|
112
|
+
# Generate some data: y = 2*x1 + x2^2
|
|
113
|
+
data = np.column_stack([
|
|
114
|
+
x1 := np.random.rand(100),
|
|
115
|
+
x2 := np.random.rand(100),
|
|
116
|
+
2 * x1 + x2 ** 2,
|
|
117
|
+
])
|
|
118
|
+
|
|
119
|
+
with simulation() as sim:
|
|
120
|
+
sim.start_process(
|
|
121
|
+
input_file=data,
|
|
122
|
+
column_names=["x1", "x2", "y"],
|
|
123
|
+
)
|
|
124
|
+
# ...
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
If `column_names` is not provided, TuringBot will use its default names (`col1`, `col2`, ...).
|
|
128
|
+
|
|
129
|
+
Pandas DataFrames work the same way, and column names are preserved automatically:
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
import pandas as pd
|
|
133
|
+
from turingbot import simulation
|
|
134
|
+
|
|
135
|
+
df = pd.read_csv("your_data.csv")
|
|
136
|
+
|
|
137
|
+
with simulation() as sim:
|
|
138
|
+
sim.start_process(input_file=df)
|
|
139
|
+
# ...
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Input format
|
|
143
|
+
|
|
144
|
+
The input file should be a CSV or TXT file where the last column is the target variable. Example:
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
x1,x2,y
|
|
148
|
+
0.1,0.2,0.24
|
|
149
|
+
0.3,0.4,0.55
|
|
150
|
+
0.5,0.6,0.81
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Search metrics
|
|
154
|
+
|
|
155
|
+
| ID | Metric |
|
|
156
|
+
|----|--------|
|
|
157
|
+
| 1 | Mean relative error |
|
|
158
|
+
| 2 | Classification accuracy |
|
|
159
|
+
| 3 | Mean error |
|
|
160
|
+
| 4 | RMS error (default) |
|
|
161
|
+
| 5 | F-score |
|
|
162
|
+
| 6 | Correlation coefficient |
|
|
163
|
+
| 7 | Hybrid (CC + RMS) |
|
|
164
|
+
| 8 | Maximum error |
|
|
165
|
+
| 9 | Maximum relative error |
|
|
166
|
+
| 10 | Nash-Sutcliffe efficiency |
|
|
167
|
+
| 11 | Binary cross-entropy |
|
|
168
|
+
| 12 | Matthews correlation coefficient |
|
|
169
|
+
| 13 | Residual sum of squares |
|
|
170
|
+
| 14 | Root mean squared log error |
|
|
171
|
+
| 15 | Percentile error |
|
|
172
|
+
|
|
173
|
+
## Parameters
|
|
174
|
+
|
|
175
|
+
See the full list of parameters in the `start_process` [docstring](https://turingbotsoftware.com/documentation.html), including:
|
|
176
|
+
|
|
177
|
+
- `threads` -- Number of threads
|
|
178
|
+
- `train_test_split` -- Train/test split ratio
|
|
179
|
+
- `maximum_formula_complexity` -- Max complexity of formulas
|
|
180
|
+
- `integer_constants` -- Restrict to integer constants
|
|
181
|
+
- `normalize_dataset` -- Normalize input data
|
|
182
|
+
- `allowed_functions` -- Restrict which math functions to use
|
|
183
|
+
- `custom_formula` -- Provide a formula template
|
|
184
|
+
|
|
185
|
+
## Documentation
|
|
186
|
+
|
|
187
|
+
Full documentation: [turingbotsoftware.com/documentation.html](https://turingbotsoftware.com/documentation.html)
|
|
188
|
+
|
|
189
|
+
## License
|
|
190
|
+
|
|
191
|
+
MIT
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# TuringBot Python Library
|
|
2
|
+
|
|
3
|
+
Python interface for [TuringBot](https://turingbotsoftware.com), a symbolic regression software that discovers mathematical formulas from data.
|
|
4
|
+
|
|
5
|
+
TuringBot uses simulated annealing to search the space of possible mathematical expressions, finding accurate and compact formulas that describe your data. It supports 15 search metrics, train/test splits, and a wide range of mathematical functions.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install turingbot
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
You also need the TuringBot desktop application installed. Download it from [turingbotsoftware.com/download.html](https://turingbotsoftware.com/download.html).
|
|
14
|
+
|
|
15
|
+
## Quick start
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
import time
|
|
19
|
+
from turingbot import simulation
|
|
20
|
+
|
|
21
|
+
sim = simulation()
|
|
22
|
+
|
|
23
|
+
# Start a symbolic regression search
|
|
24
|
+
# TuringBot is detected automatically if installed in the default location
|
|
25
|
+
sim.start_process(
|
|
26
|
+
input_file="your_data.csv", # CSV with input columns + target column
|
|
27
|
+
search_metric=4, # RMS error
|
|
28
|
+
threads=4,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Let TuringBot search for formulas
|
|
32
|
+
time.sleep(60)
|
|
33
|
+
|
|
34
|
+
# Read the best formulas found so far
|
|
35
|
+
sim.refresh_functions()
|
|
36
|
+
for f in sim.functions:
|
|
37
|
+
print(f)
|
|
38
|
+
|
|
39
|
+
# Stop the search
|
|
40
|
+
sim.terminate_process()
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Or using the context manager:
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
import time
|
|
47
|
+
from turingbot import simulation
|
|
48
|
+
|
|
49
|
+
with simulation() as sim:
|
|
50
|
+
sim.start_process(
|
|
51
|
+
input_file="your_data.csv",
|
|
52
|
+
search_metric=4,
|
|
53
|
+
threads=4,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
time.sleep(60)
|
|
57
|
+
sim.refresh_functions()
|
|
58
|
+
|
|
59
|
+
for f in sim.functions:
|
|
60
|
+
print(f)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
If TuringBot is installed in a non-default location, pass the path explicitly:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
sim.start_process(
|
|
67
|
+
input_file="your_data.csv",
|
|
68
|
+
path="/custom/path/to/TuringBot",
|
|
69
|
+
)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Default paths checked automatically:
|
|
73
|
+
|
|
74
|
+
| OS | Path |
|
|
75
|
+
|----|------|
|
|
76
|
+
| Windows | `C:\Program Files (x86)\TuringBot\TuringBot.exe` |
|
|
77
|
+
| macOS | `/Applications/TuringBot.app/Contents/MacOS/TuringBot` |
|
|
78
|
+
| Linux | `/usr/lib/turingbot/TuringBot` |
|
|
79
|
+
|
|
80
|
+
## Using numpy arrays and pandas DataFrames
|
|
81
|
+
|
|
82
|
+
You can pass numpy arrays or pandas DataFrames directly instead of file paths:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
import numpy as np
|
|
86
|
+
from turingbot import simulation
|
|
87
|
+
|
|
88
|
+
# Generate some data: y = 2*x1 + x2^2
|
|
89
|
+
data = np.column_stack([
|
|
90
|
+
x1 := np.random.rand(100),
|
|
91
|
+
x2 := np.random.rand(100),
|
|
92
|
+
2 * x1 + x2 ** 2,
|
|
93
|
+
])
|
|
94
|
+
|
|
95
|
+
with simulation() as sim:
|
|
96
|
+
sim.start_process(
|
|
97
|
+
input_file=data,
|
|
98
|
+
column_names=["x1", "x2", "y"],
|
|
99
|
+
)
|
|
100
|
+
# ...
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
If `column_names` is not provided, TuringBot will use its default names (`col1`, `col2`, ...).
|
|
104
|
+
|
|
105
|
+
Pandas DataFrames work the same way, and column names are preserved automatically:
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
import pandas as pd
|
|
109
|
+
from turingbot import simulation
|
|
110
|
+
|
|
111
|
+
df = pd.read_csv("your_data.csv")
|
|
112
|
+
|
|
113
|
+
with simulation() as sim:
|
|
114
|
+
sim.start_process(input_file=df)
|
|
115
|
+
# ...
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Input format
|
|
119
|
+
|
|
120
|
+
The input file should be a CSV or TXT file where the last column is the target variable. Example:
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
x1,x2,y
|
|
124
|
+
0.1,0.2,0.24
|
|
125
|
+
0.3,0.4,0.55
|
|
126
|
+
0.5,0.6,0.81
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Search metrics
|
|
130
|
+
|
|
131
|
+
| ID | Metric |
|
|
132
|
+
|----|--------|
|
|
133
|
+
| 1 | Mean relative error |
|
|
134
|
+
| 2 | Classification accuracy |
|
|
135
|
+
| 3 | Mean error |
|
|
136
|
+
| 4 | RMS error (default) |
|
|
137
|
+
| 5 | F-score |
|
|
138
|
+
| 6 | Correlation coefficient |
|
|
139
|
+
| 7 | Hybrid (CC + RMS) |
|
|
140
|
+
| 8 | Maximum error |
|
|
141
|
+
| 9 | Maximum relative error |
|
|
142
|
+
| 10 | Nash-Sutcliffe efficiency |
|
|
143
|
+
| 11 | Binary cross-entropy |
|
|
144
|
+
| 12 | Matthews correlation coefficient |
|
|
145
|
+
| 13 | Residual sum of squares |
|
|
146
|
+
| 14 | Root mean squared log error |
|
|
147
|
+
| 15 | Percentile error |
|
|
148
|
+
|
|
149
|
+
## Parameters
|
|
150
|
+
|
|
151
|
+
See the full list of parameters in the `start_process` [docstring](https://turingbotsoftware.com/documentation.html), including:
|
|
152
|
+
|
|
153
|
+
- `threads` -- Number of threads
|
|
154
|
+
- `train_test_split` -- Train/test split ratio
|
|
155
|
+
- `maximum_formula_complexity` -- Max complexity of formulas
|
|
156
|
+
- `integer_constants` -- Restrict to integer constants
|
|
157
|
+
- `normalize_dataset` -- Normalize input data
|
|
158
|
+
- `allowed_functions` -- Restrict which math functions to use
|
|
159
|
+
- `custom_formula` -- Provide a formula template
|
|
160
|
+
|
|
161
|
+
## Documentation
|
|
162
|
+
|
|
163
|
+
Full documentation: [turingbotsoftware.com/documentation.html](https://turingbotsoftware.com/documentation.html)
|
|
164
|
+
|
|
165
|
+
## License
|
|
166
|
+
|
|
167
|
+
MIT
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "turingbot"
|
|
7
|
+
version = "3.2.0"
|
|
8
|
+
description = "Python interface for TuringBot, a symbolic regression software that discovers mathematical formulas from data"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.7"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "TuringBot Software"},
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"symbolic regression",
|
|
17
|
+
"symbolic regression software",
|
|
18
|
+
"machine learning",
|
|
19
|
+
"formula discovery",
|
|
20
|
+
"scientific computing",
|
|
21
|
+
"curve fitting",
|
|
22
|
+
"data science",
|
|
23
|
+
"mathematical modeling",
|
|
24
|
+
"simulated annealing",
|
|
25
|
+
]
|
|
26
|
+
classifiers = [
|
|
27
|
+
"Development Status :: 5 - Production/Stable",
|
|
28
|
+
"Intended Audience :: Science/Research",
|
|
29
|
+
"Intended Audience :: Education",
|
|
30
|
+
"Operating System :: OS Independent",
|
|
31
|
+
"Programming Language :: Python :: 3",
|
|
32
|
+
"Topic :: Scientific/Engineering",
|
|
33
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
34
|
+
"Topic :: Scientific/Engineering :: Mathematics",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://turingbotsoftware.com"
|
|
39
|
+
Documentation = "https://turingbotsoftware.com/documentation.html"
|
|
40
|
+
Repository = "https://github.com/turingbotsoftware/turingbot-python"
|
|
41
|
+
Download = "https://turingbotsoftware.com/download.html"
|
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import platform
|
|
3
|
+
import random
|
|
4
|
+
import shutil
|
|
5
|
+
import subprocess
|
|
6
|
+
import tempfile
|
|
7
|
+
|
|
8
|
+
if os.name != "nt":
|
|
9
|
+
import signal
|
|
10
|
+
|
|
11
|
+
_DEFAULT_PATHS = {
|
|
12
|
+
"Windows": [
|
|
13
|
+
r"C:\Program Files (x86)\TuringBot\TuringBot.exe",
|
|
14
|
+
r"C:\Program Files\TuringBot\TuringBot.exe",
|
|
15
|
+
],
|
|
16
|
+
"Darwin": [
|
|
17
|
+
"/Applications/TuringBot.app/Contents/MacOS/TuringBot",
|
|
18
|
+
],
|
|
19
|
+
"Linux": [
|
|
20
|
+
"/usr/lib/turingbot/TuringBot",
|
|
21
|
+
"/usr/local/lib/turingbot/TuringBot",
|
|
22
|
+
],
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def find_executable():
|
|
27
|
+
"""Auto-detect the TuringBot executable path.
|
|
28
|
+
|
|
29
|
+
Checks the default installation paths for the current OS, then
|
|
30
|
+
falls back to looking for 'turingbot' or 'TuringBot' on PATH.
|
|
31
|
+
|
|
32
|
+
Returns the path as a string, or None if not found.
|
|
33
|
+
"""
|
|
34
|
+
system = platform.system()
|
|
35
|
+
for p in _DEFAULT_PATHS.get(system, []):
|
|
36
|
+
if os.path.isfile(p):
|
|
37
|
+
return p
|
|
38
|
+
|
|
39
|
+
# Fall back to PATH lookup
|
|
40
|
+
for name in ("turingbot", "TuringBot"):
|
|
41
|
+
found = shutil.which(name)
|
|
42
|
+
if found:
|
|
43
|
+
return found
|
|
44
|
+
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _save_array_to_csv(data, column_names=None):
|
|
49
|
+
"""Save a numpy array or pandas DataFrame to a temporary CSV file.
|
|
50
|
+
|
|
51
|
+
Returns the path to the temporary file.
|
|
52
|
+
"""
|
|
53
|
+
fd, path = tempfile.mkstemp(suffix=".csv", prefix="turingbot_input_")
|
|
54
|
+
try:
|
|
55
|
+
# pandas DataFrame
|
|
56
|
+
if hasattr(data, "to_csv"):
|
|
57
|
+
os.close(fd)
|
|
58
|
+
data.to_csv(path, index=False)
|
|
59
|
+
return path
|
|
60
|
+
|
|
61
|
+
# numpy array
|
|
62
|
+
import numpy as np
|
|
63
|
+
data = np.asarray(data)
|
|
64
|
+
if data.ndim != 2:
|
|
65
|
+
raise ValueError(
|
|
66
|
+
f"Expected a 2D array, got {data.ndim}D. "
|
|
67
|
+
"Reshape your data so that rows are samples and columns are variables."
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
os.close(fd)
|
|
71
|
+
header = ""
|
|
72
|
+
if column_names is not None:
|
|
73
|
+
if len(column_names) != data.shape[1]:
|
|
74
|
+
raise ValueError(
|
|
75
|
+
f"column_names has {len(column_names)} names but "
|
|
76
|
+
f"data has {data.shape[1]} columns."
|
|
77
|
+
)
|
|
78
|
+
header = ",".join(str(c) for c in column_names)
|
|
79
|
+
|
|
80
|
+
np.savetxt(path, data, delimiter=",", header=header, comments="")
|
|
81
|
+
return path
|
|
82
|
+
except Exception:
|
|
83
|
+
if os.path.exists(path):
|
|
84
|
+
os.remove(path)
|
|
85
|
+
raise
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class simulation:
|
|
89
|
+
def __init__(self):
|
|
90
|
+
self.functions = []
|
|
91
|
+
self.process = None
|
|
92
|
+
self._outfile = None
|
|
93
|
+
self._outfile_is_temp = False
|
|
94
|
+
self._infile = None
|
|
95
|
+
self._infile_is_temp = False
|
|
96
|
+
self.info = None
|
|
97
|
+
|
|
98
|
+
def parse_lines(self, lines):
|
|
99
|
+
functions = []
|
|
100
|
+
self.info = lines[0].strip()
|
|
101
|
+
|
|
102
|
+
for i in range(2, len(lines)):
|
|
103
|
+
functions.append(lines[i].strip().split())
|
|
104
|
+
functions[-1][0] = int(functions[-1][0])
|
|
105
|
+
functions[-1][1] = float(functions[-1][1])
|
|
106
|
+
if len(functions[-1]) == 4:
|
|
107
|
+
functions[-1][2] = float(functions[-1][2])
|
|
108
|
+
|
|
109
|
+
return functions
|
|
110
|
+
|
|
111
|
+
def start_process(
|
|
112
|
+
self,
|
|
113
|
+
path: str = None,
|
|
114
|
+
input_file=None,
|
|
115
|
+
config: str = None,
|
|
116
|
+
column_names: list = None,
|
|
117
|
+
threads: int = None,
|
|
118
|
+
outfile: str = None,
|
|
119
|
+
predictions_file: str = None,
|
|
120
|
+
formulas_file: str = None,
|
|
121
|
+
search_metric: int = None,
|
|
122
|
+
train_test_split: int = None,
|
|
123
|
+
test_sample: int = None,
|
|
124
|
+
train_test_seed: int = None,
|
|
125
|
+
bound_search_mode: int = None,
|
|
126
|
+
maximum_formula_complexity: int = None,
|
|
127
|
+
history_size: int = None,
|
|
128
|
+
max_occurrences_per_variable: int = None,
|
|
129
|
+
distinct_variables_min: int = None,
|
|
130
|
+
distinct_variables_max: int = None,
|
|
131
|
+
constants_min: int = None,
|
|
132
|
+
constants_max: int = None,
|
|
133
|
+
fscore_beta: float = None,
|
|
134
|
+
percentile: float = None,
|
|
135
|
+
integer_constants: bool = False,
|
|
136
|
+
normalize_dataset: bool = False,
|
|
137
|
+
allow_target_delay: bool = False,
|
|
138
|
+
force_all_variables: bool = False,
|
|
139
|
+
custom_formula: str = None,
|
|
140
|
+
allowed_functions: str = None,
|
|
141
|
+
):
|
|
142
|
+
"""
|
|
143
|
+
Start the process with the specified configuration and dataset.
|
|
144
|
+
|
|
145
|
+
Parameters:
|
|
146
|
+
-----------
|
|
147
|
+
path : str, optional
|
|
148
|
+
Path to the TuringBot executable. If not provided, the default
|
|
149
|
+
installation path for your OS will be used automatically.
|
|
150
|
+
input_file : str, numpy.ndarray, or pandas.DataFrame
|
|
151
|
+
The input data. Can be a file path (str), a 2D numpy array,
|
|
152
|
+
or a pandas DataFrame. When an array or DataFrame is passed,
|
|
153
|
+
it is saved to a temporary CSV file automatically.
|
|
154
|
+
config : str, optional
|
|
155
|
+
Path to the configuration file.
|
|
156
|
+
column_names : list of str, optional
|
|
157
|
+
Column names for the data. Only used when input_file is a
|
|
158
|
+
numpy array. Ignored for DataFrames (which have their own
|
|
159
|
+
column names) and file paths.
|
|
160
|
+
threads : int, optional
|
|
161
|
+
Number of threads to use.
|
|
162
|
+
outfile : str, optional
|
|
163
|
+
Output file path.
|
|
164
|
+
predictions_file : str, optional
|
|
165
|
+
File to store predictions.
|
|
166
|
+
formulas_file : str, optional
|
|
167
|
+
File to store generated formulas.
|
|
168
|
+
search_metric : int, optional
|
|
169
|
+
Search metric to use. Default is 4 (RMS error).
|
|
170
|
+
Options:
|
|
171
|
+
1: Mean relative error
|
|
172
|
+
2: Classification accuracy
|
|
173
|
+
3: Mean error
|
|
174
|
+
4: RMS error (default)
|
|
175
|
+
5: F-score
|
|
176
|
+
6: Correlation coefficient
|
|
177
|
+
7: Hybrid (CC + RMS)
|
|
178
|
+
8: Maximum error
|
|
179
|
+
9: Maximum relative error
|
|
180
|
+
10: Nash-Sutcliffe efficiency
|
|
181
|
+
11: Binary cross-entropy
|
|
182
|
+
12: Matthews correlation coefficient (MCC)
|
|
183
|
+
13: Residual sum of squares (RSS)
|
|
184
|
+
14: Root mean squared log error (RMSLE)
|
|
185
|
+
15: Percentile error
|
|
186
|
+
train_test_split : int, optional
|
|
187
|
+
Train/test split. Default is -1 (no test sample).
|
|
188
|
+
Options:
|
|
189
|
+
-1: No test sample (default)
|
|
190
|
+
50, 60, 70, 75, 80: Percentage split for training data
|
|
191
|
+
100, 1000, 10000: Predefined row counts for training
|
|
192
|
+
Negative values (e.g., -200): Use 200 rows for training
|
|
193
|
+
test_sample : int, optional
|
|
194
|
+
How to select test samples. Default is 1 (random).
|
|
195
|
+
Options:
|
|
196
|
+
1: Chosen randomly (default)
|
|
197
|
+
2: The last points
|
|
198
|
+
train_test_seed : int, optional
|
|
199
|
+
Random seed for train/test split generation. Default is -1 (no specific seed).
|
|
200
|
+
bound_search_mode : int, optional
|
|
201
|
+
Whether to use bound search mode. Default is 0 (deactivated).
|
|
202
|
+
Options:
|
|
203
|
+
0: Deactivated (default)
|
|
204
|
+
1: Lower bound search
|
|
205
|
+
2: Upper bound search
|
|
206
|
+
maximum_formula_complexity : int, optional
|
|
207
|
+
Maximum formula complexity. Default is 60.
|
|
208
|
+
history_size : int, optional
|
|
209
|
+
History size for the optimization process. Default is 20.
|
|
210
|
+
max_occurrences_per_variable : int, optional
|
|
211
|
+
Maximum occurrences per variable. Default is -1 (no limit).
|
|
212
|
+
distinct_variables_min : int, optional
|
|
213
|
+
Minimum number of distinct variables. Default is -1 (no limit).
|
|
214
|
+
distinct_variables_max : int, optional
|
|
215
|
+
Maximum number of distinct variables. Default is -1 (no limit).
|
|
216
|
+
constants_min : int, optional
|
|
217
|
+
Minimum number of constants. Default is -1 (no limit).
|
|
218
|
+
constants_max : int, optional
|
|
219
|
+
Maximum number of constants. Default is -1 (no limit).
|
|
220
|
+
fscore_beta : float, optional
|
|
221
|
+
Beta parameter for F-score. Default is 1.
|
|
222
|
+
percentile : float, optional
|
|
223
|
+
Percentile for the Percentile error metric. Default is 0.5.
|
|
224
|
+
integer_constants : bool, optional
|
|
225
|
+
Whether to use integer constants only. Default is False (disabled).
|
|
226
|
+
normalize_dataset : bool, optional
|
|
227
|
+
Whether to normalize the dataset before optimization. Default is False (no normalization).
|
|
228
|
+
allow_target_delay : bool, optional
|
|
229
|
+
Whether to allow the target variable in lag functions. Default is False (not allowed).
|
|
230
|
+
force_all_variables : bool, optional
|
|
231
|
+
Whether to force the solution to include all input variables. Default is False (not forced).
|
|
232
|
+
custom_formula : str, optional
|
|
233
|
+
Custom formula for the search. If not provided, the last column will be treated as the target variable.
|
|
234
|
+
allowed_functions : str, optional
|
|
235
|
+
Allowed functions for the formula search. Default: "+ * / pow fmod sin cos tan asin acos atan exp log log2 log10 sqrt sinh cosh tanh asinh acosh atanh abs floor ceil round sign tgamma lgamma erf"
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
self.functions = []
|
|
239
|
+
|
|
240
|
+
if input_file is None:
|
|
241
|
+
raise ValueError("input_file is required.")
|
|
242
|
+
|
|
243
|
+
# Convert numpy arrays and pandas DataFrames to a temp CSV
|
|
244
|
+
if not isinstance(input_file, str):
|
|
245
|
+
input_file = _save_array_to_csv(input_file, column_names)
|
|
246
|
+
self._infile = input_file
|
|
247
|
+
self._infile_is_temp = True
|
|
248
|
+
else:
|
|
249
|
+
self._infile = input_file
|
|
250
|
+
self._infile_is_temp = False
|
|
251
|
+
|
|
252
|
+
if path is None:
|
|
253
|
+
path = find_executable()
|
|
254
|
+
if path is None:
|
|
255
|
+
raise FileNotFoundError(
|
|
256
|
+
"TuringBot executable not found. Install TuringBot from "
|
|
257
|
+
"https://turingbotsoftware.com/download.html or pass the "
|
|
258
|
+
"path= argument explicitly."
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
if not os.path.isfile(input_file):
|
|
262
|
+
raise FileNotFoundError(f"Input file does not exist: {input_file}")
|
|
263
|
+
|
|
264
|
+
if not os.path.isfile(path):
|
|
265
|
+
raise FileNotFoundError(f"Executable path is incorrect: {path}")
|
|
266
|
+
|
|
267
|
+
if config is not None and not os.path.isfile(config):
|
|
268
|
+
raise FileNotFoundError(f"Configuration file does not exist: {config}")
|
|
269
|
+
|
|
270
|
+
path = f'"{path}"'
|
|
271
|
+
input_file = f'"{input_file}"'
|
|
272
|
+
if outfile is None:
|
|
273
|
+
if os.name == "nt":
|
|
274
|
+
self._outfile = tempfile.gettempdir() + r"\turingbot_%d.txt" % random.randint(1000000000, 9999999999)
|
|
275
|
+
else:
|
|
276
|
+
self._outfile = tempfile.gettempdir() + "/turingbot_%d.txt" % random.randint(1000000000, 9999999999)
|
|
277
|
+
self._outfile_is_temp = True
|
|
278
|
+
else:
|
|
279
|
+
self._outfile = outfile
|
|
280
|
+
self._outfile_is_temp = False
|
|
281
|
+
|
|
282
|
+
if self.process is not None:
|
|
283
|
+
self.process.kill()
|
|
284
|
+
|
|
285
|
+
if os.name == "nt":
|
|
286
|
+
cmd = f"{path} {input_file} --outfile {self._outfile}"
|
|
287
|
+
elif os.name == "posix":
|
|
288
|
+
cmd = f"exec {path} {input_file} --outfile {self._outfile} 1>/dev/null 2>/dev/null"
|
|
289
|
+
else:
|
|
290
|
+
cmd = f"exec stdbuf -oL {path} {input_file} --outfile {self._outfile} 1>/dev/null 2>/dev/null"
|
|
291
|
+
|
|
292
|
+
if config is not None:
|
|
293
|
+
cmd = f"{cmd} {config}"
|
|
294
|
+
else:
|
|
295
|
+
if search_metric is not None:
|
|
296
|
+
cmd = f"{cmd} --search-metric {search_metric}"
|
|
297
|
+
if train_test_split is not None:
|
|
298
|
+
cmd = f"{cmd} --train-test-split {train_test_split}"
|
|
299
|
+
if test_sample is not None:
|
|
300
|
+
cmd = f"{cmd} --test-sample {test_sample}"
|
|
301
|
+
if train_test_seed is not None:
|
|
302
|
+
cmd = f"{cmd} --train-test-seed {train_test_seed}"
|
|
303
|
+
if bound_search_mode is not None:
|
|
304
|
+
cmd = f"{cmd} --bound-search-mode {bound_search_mode}"
|
|
305
|
+
if maximum_formula_complexity is not None:
|
|
306
|
+
cmd = f"{cmd} --maximum-formula-complexity {maximum_formula_complexity}"
|
|
307
|
+
if history_size is not None:
|
|
308
|
+
cmd = f"{cmd} --history-size {history_size}"
|
|
309
|
+
if max_occurrences_per_variable is not None:
|
|
310
|
+
cmd = f"{cmd} --max-occurrences-per-variable {max_occurrences_per_variable}"
|
|
311
|
+
if distinct_variables_min is not None:
|
|
312
|
+
cmd = f"{cmd} --distinct-variables-min {distinct_variables_min}"
|
|
313
|
+
if distinct_variables_max is not None:
|
|
314
|
+
cmd = f"{cmd} --distinct-variables-max {distinct_variables_max}"
|
|
315
|
+
if constants_min is not None:
|
|
316
|
+
cmd = f"{cmd} --constants-min {constants_min}"
|
|
317
|
+
if constants_max is not None:
|
|
318
|
+
cmd = f"{cmd} --constants-max {constants_max}"
|
|
319
|
+
if fscore_beta is not None:
|
|
320
|
+
cmd = f"{cmd} --fscore-beta {fscore_beta}"
|
|
321
|
+
if percentile is not None:
|
|
322
|
+
cmd = f"{cmd} --percentile {percentile}"
|
|
323
|
+
if integer_constants:
|
|
324
|
+
cmd = f"{cmd} --integer-constants"
|
|
325
|
+
if normalize_dataset:
|
|
326
|
+
cmd = f"{cmd} --normalize-dataset"
|
|
327
|
+
if allow_target_delay:
|
|
328
|
+
cmd = f"{cmd} --allow-target-delay"
|
|
329
|
+
if force_all_variables:
|
|
330
|
+
cmd = f"{cmd} --force-all-variables"
|
|
331
|
+
if custom_formula is not None:
|
|
332
|
+
cmd = f"{cmd} --custom-formula \"{custom_formula}\""
|
|
333
|
+
if allowed_functions is not None:
|
|
334
|
+
cmd = f"{cmd} --allowed-functions \"{allowed_functions}\""
|
|
335
|
+
|
|
336
|
+
if threads is not None:
|
|
337
|
+
cmd = f"{cmd} --threads {int(threads)}"
|
|
338
|
+
|
|
339
|
+
if predictions_file is not None:
|
|
340
|
+
cmd = f"{cmd} --predictions-file {predictions_file}"
|
|
341
|
+
|
|
342
|
+
if formulas_file is not None:
|
|
343
|
+
cmd = f"{cmd} --formulas-file {formulas_file}"
|
|
344
|
+
|
|
345
|
+
cmd = f"{cmd} --library-parent-id {str(os.getpid())}"
|
|
346
|
+
|
|
347
|
+
if os.name == "nt":
|
|
348
|
+
self.process = subprocess.Popen(
|
|
349
|
+
cmd,
|
|
350
|
+
stdout=subprocess.DEVNULL,
|
|
351
|
+
stderr=subprocess.DEVNULL
|
|
352
|
+
)
|
|
353
|
+
else:
|
|
354
|
+
self.process = subprocess.Popen(
|
|
355
|
+
cmd,
|
|
356
|
+
shell=True,
|
|
357
|
+
encoding="utf-8",
|
|
358
|
+
preexec_fn=os.setsid
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
def refresh_functions(self):
|
|
362
|
+
if self._outfile and os.path.isfile(self._outfile):
|
|
363
|
+
with open(self._outfile, "r") as f:
|
|
364
|
+
lines = f.readlines()
|
|
365
|
+
|
|
366
|
+
if len(lines) > 0:
|
|
367
|
+
self.functions = self.parse_lines(lines)
|
|
368
|
+
|
|
369
|
+
def terminate_process(self):
|
|
370
|
+
try:
|
|
371
|
+
if self.process and self.process.poll() is None:
|
|
372
|
+
if os.name == "nt":
|
|
373
|
+
self.process.terminate()
|
|
374
|
+
else:
|
|
375
|
+
os.killpg(os.getpgid(self.process.pid), signal.SIGTERM)
|
|
376
|
+
|
|
377
|
+
if self._outfile_is_temp and self._outfile and os.path.exists(self._outfile):
|
|
378
|
+
os.remove(self._outfile)
|
|
379
|
+
self._outfile_is_temp = False
|
|
380
|
+
|
|
381
|
+
if self._infile_is_temp and self._infile and os.path.exists(self._infile):
|
|
382
|
+
os.remove(self._infile)
|
|
383
|
+
self._infile_is_temp = False
|
|
384
|
+
except Exception as error:
|
|
385
|
+
print(error)
|
|
386
|
+
|
|
387
|
+
def __enter__(self):
|
|
388
|
+
return self
|
|
389
|
+
|
|
390
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
391
|
+
self.terminate_process()
|
|
392
|
+
|
|
393
|
+
def __del__(self):
|
|
394
|
+
self.terminate_process()
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: turingbot
|
|
3
|
+
Version: 3.2.0
|
|
4
|
+
Summary: Python interface for TuringBot, a symbolic regression software that discovers mathematical formulas from data
|
|
5
|
+
Author: TuringBot Software
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://turingbotsoftware.com
|
|
8
|
+
Project-URL: Documentation, https://turingbotsoftware.com/documentation.html
|
|
9
|
+
Project-URL: Repository, https://github.com/turingbotsoftware/turingbot-python
|
|
10
|
+
Project-URL: Download, https://turingbotsoftware.com/download.html
|
|
11
|
+
Keywords: symbolic regression,symbolic regression software,machine learning,formula discovery,scientific computing,curve fitting,data science,mathematical modeling,simulated annealing
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Intended Audience :: Education
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
20
|
+
Requires-Python: >=3.7
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# TuringBot Python Library
|
|
26
|
+
|
|
27
|
+
Python interface for [TuringBot](https://turingbotsoftware.com), a symbolic regression software that discovers mathematical formulas from data.
|
|
28
|
+
|
|
29
|
+
TuringBot uses simulated annealing to search the space of possible mathematical expressions, finding accurate and compact formulas that describe your data. It supports 15 search metrics, train/test splits, and a wide range of mathematical functions.
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install turingbot
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
You also need the TuringBot desktop application installed. Download it from [turingbotsoftware.com/download.html](https://turingbotsoftware.com/download.html).
|
|
38
|
+
|
|
39
|
+
## Quick start
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import time
|
|
43
|
+
from turingbot import simulation
|
|
44
|
+
|
|
45
|
+
sim = simulation()
|
|
46
|
+
|
|
47
|
+
# Start a symbolic regression search
|
|
48
|
+
# TuringBot is detected automatically if installed in the default location
|
|
49
|
+
sim.start_process(
|
|
50
|
+
input_file="your_data.csv", # CSV with input columns + target column
|
|
51
|
+
search_metric=4, # RMS error
|
|
52
|
+
threads=4,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Let TuringBot search for formulas
|
|
56
|
+
time.sleep(60)
|
|
57
|
+
|
|
58
|
+
# Read the best formulas found so far
|
|
59
|
+
sim.refresh_functions()
|
|
60
|
+
for f in sim.functions:
|
|
61
|
+
print(f)
|
|
62
|
+
|
|
63
|
+
# Stop the search
|
|
64
|
+
sim.terminate_process()
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Or using the context manager:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
import time
|
|
71
|
+
from turingbot import simulation
|
|
72
|
+
|
|
73
|
+
with simulation() as sim:
|
|
74
|
+
sim.start_process(
|
|
75
|
+
input_file="your_data.csv",
|
|
76
|
+
search_metric=4,
|
|
77
|
+
threads=4,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
time.sleep(60)
|
|
81
|
+
sim.refresh_functions()
|
|
82
|
+
|
|
83
|
+
for f in sim.functions:
|
|
84
|
+
print(f)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
If TuringBot is installed in a non-default location, pass the path explicitly:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
sim.start_process(
|
|
91
|
+
input_file="your_data.csv",
|
|
92
|
+
path="/custom/path/to/TuringBot",
|
|
93
|
+
)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Default paths checked automatically:
|
|
97
|
+
|
|
98
|
+
| OS | Path |
|
|
99
|
+
|----|------|
|
|
100
|
+
| Windows | `C:\Program Files (x86)\TuringBot\TuringBot.exe` |
|
|
101
|
+
| macOS | `/Applications/TuringBot.app/Contents/MacOS/TuringBot` |
|
|
102
|
+
| Linux | `/usr/lib/turingbot/TuringBot` |
|
|
103
|
+
|
|
104
|
+
## Using numpy arrays and pandas DataFrames
|
|
105
|
+
|
|
106
|
+
You can pass numpy arrays or pandas DataFrames directly instead of file paths:
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
import numpy as np
|
|
110
|
+
from turingbot import simulation
|
|
111
|
+
|
|
112
|
+
# Generate some data: y = 2*x1 + x2^2
|
|
113
|
+
data = np.column_stack([
|
|
114
|
+
x1 := np.random.rand(100),
|
|
115
|
+
x2 := np.random.rand(100),
|
|
116
|
+
2 * x1 + x2 ** 2,
|
|
117
|
+
])
|
|
118
|
+
|
|
119
|
+
with simulation() as sim:
|
|
120
|
+
sim.start_process(
|
|
121
|
+
input_file=data,
|
|
122
|
+
column_names=["x1", "x2", "y"],
|
|
123
|
+
)
|
|
124
|
+
# ...
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
If `column_names` is not provided, TuringBot will use its default names (`col1`, `col2`, ...).
|
|
128
|
+
|
|
129
|
+
Pandas DataFrames work the same way, and column names are preserved automatically:
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
import pandas as pd
|
|
133
|
+
from turingbot import simulation
|
|
134
|
+
|
|
135
|
+
df = pd.read_csv("your_data.csv")
|
|
136
|
+
|
|
137
|
+
with simulation() as sim:
|
|
138
|
+
sim.start_process(input_file=df)
|
|
139
|
+
# ...
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Input format
|
|
143
|
+
|
|
144
|
+
The input file should be a CSV or TXT file where the last column is the target variable. Example:
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
x1,x2,y
|
|
148
|
+
0.1,0.2,0.24
|
|
149
|
+
0.3,0.4,0.55
|
|
150
|
+
0.5,0.6,0.81
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Search metrics
|
|
154
|
+
|
|
155
|
+
| ID | Metric |
|
|
156
|
+
|----|--------|
|
|
157
|
+
| 1 | Mean relative error |
|
|
158
|
+
| 2 | Classification accuracy |
|
|
159
|
+
| 3 | Mean error |
|
|
160
|
+
| 4 | RMS error (default) |
|
|
161
|
+
| 5 | F-score |
|
|
162
|
+
| 6 | Correlation coefficient |
|
|
163
|
+
| 7 | Hybrid (CC + RMS) |
|
|
164
|
+
| 8 | Maximum error |
|
|
165
|
+
| 9 | Maximum relative error |
|
|
166
|
+
| 10 | Nash-Sutcliffe efficiency |
|
|
167
|
+
| 11 | Binary cross-entropy |
|
|
168
|
+
| 12 | Matthews correlation coefficient |
|
|
169
|
+
| 13 | Residual sum of squares |
|
|
170
|
+
| 14 | Root mean squared log error |
|
|
171
|
+
| 15 | Percentile error |
|
|
172
|
+
|
|
173
|
+
## Parameters
|
|
174
|
+
|
|
175
|
+
See the full list of parameters in the `start_process` [docstring](https://turingbotsoftware.com/documentation.html), including:
|
|
176
|
+
|
|
177
|
+
- `threads` -- Number of threads
|
|
178
|
+
- `train_test_split` -- Train/test split ratio
|
|
179
|
+
- `maximum_formula_complexity` -- Max complexity of formulas
|
|
180
|
+
- `integer_constants` -- Restrict to integer constants
|
|
181
|
+
- `normalize_dataset` -- Normalize input data
|
|
182
|
+
- `allowed_functions` -- Restrict which math functions to use
|
|
183
|
+
- `custom_formula` -- Provide a formula template
|
|
184
|
+
|
|
185
|
+
## Documentation
|
|
186
|
+
|
|
187
|
+
Full documentation: [turingbotsoftware.com/documentation.html](https://turingbotsoftware.com/documentation.html)
|
|
188
|
+
|
|
189
|
+
## License
|
|
190
|
+
|
|
191
|
+
MIT
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
turingbot
|