teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +196 -2
- teradataml/__init__.py +4 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +79 -4
- teradataml/analytics/json_parser/metadata.py +12 -3
- teradataml/analytics/json_parser/utils.py +7 -2
- teradataml/analytics/sqle/__init__.py +1 -0
- teradataml/analytics/table_operator/__init__.py +1 -1
- teradataml/analytics/uaf/__init__.py +1 -1
- teradataml/analytics/utils.py +4 -0
- teradataml/automl/data_preparation.py +3 -2
- teradataml/automl/feature_engineering.py +15 -7
- teradataml/automl/model_training.py +39 -33
- teradataml/common/__init__.py +2 -1
- teradataml/common/constants.py +35 -0
- teradataml/common/garbagecollector.py +2 -1
- teradataml/common/messagecodes.py +8 -2
- teradataml/common/messages.py +3 -1
- teradataml/common/sqlbundle.py +25 -3
- teradataml/common/utils.py +134 -9
- teradataml/context/context.py +20 -10
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/data/dataframe_example.json +18 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -2
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/dataframe/dataframe.py +543 -175
- teradataml/dataframe/functions.py +553 -25
- teradataml/dataframe/sql.py +184 -15
- teradataml/dbutils/dbutils.py +556 -18
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
- teradataml/opensource/_lightgbm.py +950 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
- teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
- teradataml/options/__init__.py +7 -23
- teradataml/options/configure.py +29 -3
- teradataml/scriptmgmt/UserEnv.py +3 -3
- teradataml/scriptmgmt/lls_utils.py +74 -21
- teradataml/store/__init__.py +13 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2223 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/store/vector_store/__init__.py +1586 -0
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +37 -38
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/validators.py +33 -1
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
def TextParser(data=None, object=None, text_column=None,
|
|
1
|
+
def TextParser(data=None, object=None, text_column=None, convert_to_lowercase=True, stem_tokens=False,
|
|
2
2
|
remove_stopwords=False, accumulate=None, delimiter=" \t\n\f\r",
|
|
3
3
|
punctuation="!#$%&()*+,-./:;?@\^_`{|}~", token_col_name=None, **generic_arguments):
|
|
4
4
|
"""
|
|
@@ -38,7 +38,7 @@ def TextParser(data=None, object=None, text_column=None, covert_to_lowercase=Tru
|
|
|
38
38
|
Specifies the name of the input data column whose contents are to be tokenized.
|
|
39
39
|
Types: str
|
|
40
40
|
|
|
41
|
-
|
|
41
|
+
convert_to_lowercase:
|
|
42
42
|
Optional Argument.
|
|
43
43
|
Specifies whether to convert the text in "text_column" to lowercase.
|
|
44
44
|
Default Value: True
|
|
@@ -165,7 +165,7 @@ def TextParser(data=None, object=None, text_column=None, covert_to_lowercase=Tru
|
|
|
165
165
|
# Example 2 : Convert words in "text_data" column into their root forms.
|
|
166
166
|
TextParser_out = TextParser(data=complaints,
|
|
167
167
|
text_column="text_data",
|
|
168
|
-
|
|
168
|
+
convert_to_lowercase=True,
|
|
169
169
|
stem_tokens=True)
|
|
170
170
|
|
|
171
171
|
# Print the result DataFrame.
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
def Image2Matrix(data=None,
|
|
2
|
+
output='gray',
|
|
3
|
+
**generic_arguments):
|
|
4
|
+
"""
|
|
5
|
+
DESCRIPTION:
|
|
6
|
+
Image2Matrix() function converts an image to a matrix.
|
|
7
|
+
It converts JPEG or PNG images to matrixes with payload values being the pixel values.
|
|
8
|
+
Note:
|
|
9
|
+
* The image size cannot be greater than 16 MB.
|
|
10
|
+
* The image should not exceed 4,000,000 pixels.
|
|
11
|
+
|
|
12
|
+
PARAMETERS:
|
|
13
|
+
data:
|
|
14
|
+
Required Argument.
|
|
15
|
+
Specifies the teradataml DataFrame which has image details.
|
|
16
|
+
Types: Teradataml DataFrame
|
|
17
|
+
|
|
18
|
+
output:
|
|
19
|
+
Optional Argument.
|
|
20
|
+
Specifies the type of output matrix.
|
|
21
|
+
Default: 'gray'
|
|
22
|
+
Permitted Values:
|
|
23
|
+
'gray': Converts the image to a grayscale matrix.
|
|
24
|
+
'rgb': Converts the image to a RGB matrix.
|
|
25
|
+
Types: str
|
|
26
|
+
|
|
27
|
+
**generic_arguments:
|
|
28
|
+
Specifies the generic keyword arguments SQLE functions accept.
|
|
29
|
+
Below are the generic keyword arguments:
|
|
30
|
+
persist:
|
|
31
|
+
Optional Argument.
|
|
32
|
+
Specifies whether to persist the results of the function in table or not.
|
|
33
|
+
When set to True, results are persisted in table; otherwise, results
|
|
34
|
+
are garbage collected at the end of the session.
|
|
35
|
+
Default Value: False
|
|
36
|
+
Types: boolean
|
|
37
|
+
|
|
38
|
+
volatile:
|
|
39
|
+
Optional Argument.
|
|
40
|
+
Specifies whether to put the results of the function in volatile table or not.
|
|
41
|
+
When set to True, results are stored in volatile table, otherwise not.
|
|
42
|
+
Default Value: False
|
|
43
|
+
Types: boolean
|
|
44
|
+
|
|
45
|
+
Function allows the user to partition, hash, order or local order the input
|
|
46
|
+
data. These generic arguments are available for each argument that accepts
|
|
47
|
+
teradataml DataFrame as input and can be accessed as:
|
|
48
|
+
* "<input_data_arg_name>_partition_column" accepts str or list of str (Strings)
|
|
49
|
+
* "<input_data_arg_name>_hash_column" accepts str or list of str (Strings)
|
|
50
|
+
* "<input_data_arg_name>_order_column" accepts str or list of str (Strings)
|
|
51
|
+
* "local_order_<input_data_arg_name>" accepts boolean
|
|
52
|
+
Note:
|
|
53
|
+
These generic arguments are supported by teradataml if the underlying Analytic Database
|
|
54
|
+
function supports, else an exception is raised.
|
|
55
|
+
|
|
56
|
+
RETURNS:
|
|
57
|
+
Instance of Image2Matrix.
|
|
58
|
+
Output teradataml DataFrames can be accessed using attribute
|
|
59
|
+
references, such as Image2Matrix.<attribute_name>.
|
|
60
|
+
Output teradataml DataFrame attribute name is:
|
|
61
|
+
result
|
|
62
|
+
|
|
63
|
+
RAISES:
|
|
64
|
+
TeradataMlException, TypeError, ValueError
|
|
65
|
+
|
|
66
|
+
EXAMPLES:
|
|
67
|
+
# Notes:
|
|
68
|
+
# 1. Get the connection to Vantage, before importing the
|
|
69
|
+
# function in user space.
|
|
70
|
+
# 2. User can import the function, if it is available on
|
|
71
|
+
# Vantage user is connected to.
|
|
72
|
+
# 3. To check the list of UAF analytic functions available
|
|
73
|
+
# on Vantage user connected to, use
|
|
74
|
+
# "display_analytic_functions()".
|
|
75
|
+
|
|
76
|
+
# Check the list of available analytic functions.
|
|
77
|
+
display_analytic_functions()
|
|
78
|
+
|
|
79
|
+
# Import function Image2Matrix.
|
|
80
|
+
from teradataml import Image2Matrix
|
|
81
|
+
import teradataml
|
|
82
|
+
|
|
83
|
+
# Drop the image table if it is present.
|
|
84
|
+
try:
|
|
85
|
+
db_drop_table('imageTable')
|
|
86
|
+
except:
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
# Create a table to store the image data.
|
|
90
|
+
execute_sql('CREATE TABLE imageTable(id INTEGER, image BLOB);')
|
|
91
|
+
|
|
92
|
+
# Load the image data into the fileContent variable.
|
|
93
|
+
file_dir = os.path.join(os.path.dirname(teradataml.__file__), "data")
|
|
94
|
+
with open(os.path.join(file_dir,'peppers.png'), mode='rb') as file:
|
|
95
|
+
fileContent = file.read()
|
|
96
|
+
|
|
97
|
+
# Insert the image data into the table.
|
|
98
|
+
sql = 'INSERT INTO imageTable VALUES(?, ?);'
|
|
99
|
+
parameters = (1, fileContent)
|
|
100
|
+
execute_sql(sql, parameters)
|
|
101
|
+
|
|
102
|
+
# Create a DataFrame for the image table.
|
|
103
|
+
imageTable = DataFrame('imageTable')
|
|
104
|
+
|
|
105
|
+
# Example 1: Convert the image to matrix with gray values.
|
|
106
|
+
image2matrix = Image2Matrix(data=imageTable.select(['id', 'image']),
|
|
107
|
+
output='gray')
|
|
108
|
+
|
|
109
|
+
# Print the result DataFrame.
|
|
110
|
+
print(image2matrix.result)
|
|
111
|
+
|
|
112
|
+
# Example 2: Convert the image to matrix with rgb values.
|
|
113
|
+
image2matrix2 = Image2Matrix(data=imageTable.select(['id', 'image']),
|
|
114
|
+
output='rgb')
|
|
115
|
+
|
|
116
|
+
# Print the result DataFrame.
|
|
117
|
+
print(image2matrix2.result)
|
|
118
|
+
"""
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
def CopyArt(data=None, database_name = None,
|
|
2
|
+
table_name = None, map_name = None,
|
|
3
|
+
**generic_arguments):
|
|
4
|
+
"""
|
|
5
|
+
DESCRIPTION:
|
|
6
|
+
CopyArt() function creates a copy of an existing analytics result table (ART).
|
|
7
|
+
|
|
8
|
+
PARAMETERS:
|
|
9
|
+
data:
|
|
10
|
+
Required Argument.
|
|
11
|
+
Specifies the ART data to be copied.
|
|
12
|
+
Types: DataFrame
|
|
13
|
+
|
|
14
|
+
database_name:
|
|
15
|
+
Required Argument.
|
|
16
|
+
Specifies the name of the destination database for copied ART.
|
|
17
|
+
Types: str
|
|
18
|
+
|
|
19
|
+
table_name:
|
|
20
|
+
Required Argument.
|
|
21
|
+
Specifies the name of the destination table for copied ART.
|
|
22
|
+
Types: str
|
|
23
|
+
|
|
24
|
+
map_name:
|
|
25
|
+
Optional Argument.
|
|
26
|
+
Specifies the name of the map for the destination ART.
|
|
27
|
+
By default, it refers to the map of the 'data'.
|
|
28
|
+
Types: str
|
|
29
|
+
|
|
30
|
+
**generic_arguments:
|
|
31
|
+
Specifies the generic keyword arguments of UAF functions.
|
|
32
|
+
Below are the generic keyword arguments:
|
|
33
|
+
persist:
|
|
34
|
+
Optional Argument.
|
|
35
|
+
Specifies whether to persist the results of the
|
|
36
|
+
function in a table or not. When set to True,
|
|
37
|
+
results are persisted in a table; otherwise,
|
|
38
|
+
results are garbage collected at the end of the
|
|
39
|
+
session.
|
|
40
|
+
Note that, when UAF function is executed, an
|
|
41
|
+
analytic result table (ART) is created.
|
|
42
|
+
Default Value: False
|
|
43
|
+
Types: bool
|
|
44
|
+
|
|
45
|
+
volatile:
|
|
46
|
+
Optional Argument.
|
|
47
|
+
Specifies whether to put the results of the
|
|
48
|
+
function in a volatile ART or not. When set to
|
|
49
|
+
True, results are stored in a volatile ART,
|
|
50
|
+
otherwise not.
|
|
51
|
+
Default Value: False
|
|
52
|
+
Types: bool
|
|
53
|
+
|
|
54
|
+
output_table_name:
|
|
55
|
+
Optional Argument.
|
|
56
|
+
Specifies the name of the table to store results.
|
|
57
|
+
If not specified, a unique table name is internally
|
|
58
|
+
generated.
|
|
59
|
+
Types: str
|
|
60
|
+
|
|
61
|
+
output_db_name:
|
|
62
|
+
Optional Argument.
|
|
63
|
+
Specifies the name of the database to create output
|
|
64
|
+
table into. If not specified, table is created into
|
|
65
|
+
database specified by the user at the time of context
|
|
66
|
+
creation or configuration parameter. Argument is ignored,
|
|
67
|
+
if "output_table_name" is not specified.
|
|
68
|
+
Types: str
|
|
69
|
+
|
|
70
|
+
RETURNS:
|
|
71
|
+
Instance of CopyArt.
|
|
72
|
+
Output teradataml DataFrames can be accessed using attribute
|
|
73
|
+
references, such as obj.<attribute_name>.
|
|
74
|
+
Output teradataml DataFrame attribute name is:
|
|
75
|
+
1. result
|
|
76
|
+
|
|
77
|
+
RAISES:
|
|
78
|
+
TeradataMlException, TypeError, ValueError
|
|
79
|
+
|
|
80
|
+
EXAMPLES:
|
|
81
|
+
# Notes:
|
|
82
|
+
# 1. Get the connection to Vantage, before importing the
|
|
83
|
+
# function in user space.
|
|
84
|
+
# 2. User can import the function, if it is available on
|
|
85
|
+
# Vantage user is connected to.
|
|
86
|
+
# 3. To check the list of UAF analytic functions available
|
|
87
|
+
# on Vantage user connected to, use
|
|
88
|
+
# "display_analytic_functions()".
|
|
89
|
+
|
|
90
|
+
# Check the list of available UAF analytic functions.
|
|
91
|
+
display_analytic_functions(type="UAF")
|
|
92
|
+
|
|
93
|
+
# Import function CopyArt.
|
|
94
|
+
from teradataml import CopyArt, AutoArima
|
|
95
|
+
|
|
96
|
+
# Load the example data.
|
|
97
|
+
load_example_data("uaf", ["blood2ageandweight"])
|
|
98
|
+
|
|
99
|
+
# Create teradataml DataFrame object.
|
|
100
|
+
data = DataFrame.from_table("blood2ageandweight")
|
|
101
|
+
|
|
102
|
+
# Create teradataml TDSeries object.
|
|
103
|
+
data_series_df = TDSeries(data=data,
|
|
104
|
+
id="PatientID",
|
|
105
|
+
row_index="SeqNo",
|
|
106
|
+
row_index_style="SEQUENCE",
|
|
107
|
+
payload_field="BloodFat",
|
|
108
|
+
payload_content="REAL")
|
|
109
|
+
|
|
110
|
+
# Execute AutoArima function to create ART.
|
|
111
|
+
uaf_out = AutoArima(data=data_series_df,
|
|
112
|
+
start_pq_nonseasonal=[1, 1],
|
|
113
|
+
seasonal=False,
|
|
114
|
+
constant=True,
|
|
115
|
+
algorithm="MLE",
|
|
116
|
+
fit_percentage=80,
|
|
117
|
+
stepwise=True,
|
|
118
|
+
nmodels=7,
|
|
119
|
+
fit_metrics=True,
|
|
120
|
+
residuals=True)
|
|
121
|
+
|
|
122
|
+
# Example 1: Execute CopyArt function to copy ART to a destination table name
|
|
123
|
+
# with persist option.
|
|
124
|
+
res = CopyArt(data=uaf_out.result,
|
|
125
|
+
database_name="alice",
|
|
126
|
+
table_name="copied_table",
|
|
127
|
+
persist=True)
|
|
128
|
+
print(res.result)
|
|
129
|
+
|
|
130
|
+
# Example 2: Execute CopyArt function to copy ART to a destination table name.
|
|
131
|
+
res = CopyArt(data=uaf_out.result,
|
|
132
|
+
database_name="alice",
|
|
133
|
+
table_name="copied_table2")
|
|
134
|
+
|
|
135
|
+
# Print the result DataFrame.
|
|
136
|
+
print(res.result)
|
|
137
|
+
|
|
138
|
+
# Example 3: Copy ART to a destination table name using uaf object.
|
|
139
|
+
res = uaf_out.copy(database_name="alice",
|
|
140
|
+
table_name="copied_table3")
|
|
141
|
+
|
|
142
|
+
# Print the result DataFrame.
|
|
143
|
+
print(res.result)
|
|
144
|
+
|
|
145
|
+
"""
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
def DickeyFuller(data=None, data_filter_expr=None, algorithm=None,
|
|
2
|
-
max_lags=
|
|
2
|
+
max_lags=0,
|
|
3
3
|
**generic_arguments):
|
|
4
4
|
"""
|
|
5
5
|
DESCRIPTION:
|
|
@@ -10,18 +10,23 @@ def DickeyFuller(data=None, data_filter_expr=None, algorithm=None,
|
|
|
10
10
|
other factors.
|
|
11
11
|
|
|
12
12
|
The following procedure is an example of how to use DickeyFuller() function:
|
|
13
|
-
* Run
|
|
14
|
-
*
|
|
15
|
-
|
|
16
|
-
*
|
|
17
|
-
SeasonalNormalize()
|
|
13
|
+
* Run DickeyFuller() on the time series being modeled.
|
|
14
|
+
* Retrieve the results of the DickeyFuller() test to determine if the
|
|
15
|
+
time series contains any unit roots.
|
|
16
|
+
* If unit roots are present, use a technique such as differencing such as Diff()
|
|
17
|
+
or seasonal normalization, such as SeasonalNormalize(), to create a new series,
|
|
18
|
+
then rerun the DickeyFuller() test to verify that the differenced or
|
|
19
|
+
seasonally-normalized series unit root are removed.
|
|
20
|
+
* If the result shows unit roots, use Diff() and SeasonalNormalize()
|
|
21
|
+
to remove unit roots.
|
|
18
22
|
|
|
19
23
|
|
|
20
24
|
PARAMETERS:
|
|
21
25
|
data:
|
|
22
26
|
Required Argument.
|
|
23
|
-
|
|
24
|
-
|
|
27
|
+
Specifies a single logical-runtime series as an input or TDAnalyticResult which
|
|
28
|
+
contains ARTFITRESIDUALS layer.
|
|
29
|
+
Types: TDSeries, TDAnalyticResult
|
|
25
30
|
|
|
26
31
|
data_filter_expr:
|
|
27
32
|
Optional Argument.
|
|
@@ -34,26 +39,18 @@ def DickeyFuller(data=None, data_filter_expr=None, algorithm=None,
|
|
|
34
39
|
Permitted Values:
|
|
35
40
|
* NONE: Random walk
|
|
36
41
|
* DRIFT: Random walk with drift
|
|
37
|
-
* TREND: Random walk with linear trend
|
|
38
42
|
* DRIFTNTREND: Random walk with drift and trend
|
|
39
|
-
*
|
|
40
|
-
|
|
43
|
+
* SQUARED: Random walk with drift, trend, and
|
|
44
|
+
quadratic trend.
|
|
41
45
|
Types: str
|
|
42
46
|
|
|
43
47
|
max_lags:
|
|
44
48
|
Optional Argument.
|
|
45
49
|
Specifies the maximum number of lags to use with the regression
|
|
46
|
-
equation.
|
|
50
|
+
equation. Range is [0, 100]
|
|
51
|
+
DefaultValue: 0
|
|
47
52
|
Types: int
|
|
48
53
|
|
|
49
|
-
drift_trend_formula:
|
|
50
|
-
Optional Argument.
|
|
51
|
-
Specifies the formula used to represent the drift and trend portions
|
|
52
|
-
of the regression.
|
|
53
|
-
Note:
|
|
54
|
-
* Valid only when "algorithm" is set to 'formula'.
|
|
55
|
-
Types: str
|
|
56
|
-
|
|
57
54
|
**generic_arguments:
|
|
58
55
|
Specifies the generic keyword arguments of UAF functions.
|
|
59
56
|
Below are the generic keyword arguments:
|
|
@@ -136,7 +133,7 @@ def DickeyFuller(data=None, data_filter_expr=None, algorithm=None,
|
|
|
136
133
|
# for the presence of the unit roots using random walk with
|
|
137
134
|
# linear trend for regression.
|
|
138
135
|
uaf_out = DickeyFuller(data=data_series_df,
|
|
139
|
-
algorithm='
|
|
136
|
+
algorithm='DRIFT')
|
|
140
137
|
|
|
141
138
|
# Print the result DataFrame.
|
|
142
139
|
print(uaf_out.result)
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "1",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_KMeans",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_type": "fastpath",
|
|
8
|
+
"function_category": "Model Training",
|
|
9
|
+
"function_alias_name": "TD_KMeans",
|
|
10
|
+
"function_r_name": "aa.td_kmeans",
|
|
11
|
+
"short_description": "fastpath function to generate clustering model using KMeans algorithm.",
|
|
12
|
+
"long_description": "fastpath function to generate clustering model containing cluster centroids using KMeans algorithm.",
|
|
13
|
+
"input_tables": [
|
|
14
|
+
{
|
|
15
|
+
"requiredInputKind": [
|
|
16
|
+
"PartitionByAny"
|
|
17
|
+
],
|
|
18
|
+
"isOrdered": false,
|
|
19
|
+
"partitionByOne": false,
|
|
20
|
+
"name": "InputTable",
|
|
21
|
+
"alternateNames": [],
|
|
22
|
+
"isRequired": true,
|
|
23
|
+
"rDescription": "The relation that contains input data.",
|
|
24
|
+
"description": "The relation that contains input data.",
|
|
25
|
+
"datatype": "TABLE_ALIAS",
|
|
26
|
+
"allowsLists": false,
|
|
27
|
+
"rName": "data",
|
|
28
|
+
"useInR": true,
|
|
29
|
+
"rOrderNum": 1
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"requiredInputKind": [
|
|
33
|
+
"Dimension"
|
|
34
|
+
],
|
|
35
|
+
"isOrdered": false,
|
|
36
|
+
"partitionByOne": false,
|
|
37
|
+
"name": "InitialCentroidsTable",
|
|
38
|
+
"alternateNames": [],
|
|
39
|
+
"isRequired": false,
|
|
40
|
+
"rDescription": "The relation that contains set of initial centroids.",
|
|
41
|
+
"description": "The relation that contains set of initial centroids.",
|
|
42
|
+
"datatype": "TABLE_ALIAS",
|
|
43
|
+
"allowsLists": false,
|
|
44
|
+
"rName": "centroids.table",
|
|
45
|
+
"useInR": true,
|
|
46
|
+
"rOrderNum": 2
|
|
47
|
+
}
|
|
48
|
+
],
|
|
49
|
+
"output_tables": [
|
|
50
|
+
{
|
|
51
|
+
"isOutputTable": true,
|
|
52
|
+
"omitPossible": true,
|
|
53
|
+
"name": "ModelTable",
|
|
54
|
+
"alternateNames": [],
|
|
55
|
+
"isRequired": false,
|
|
56
|
+
"rDescription": "Specifies the name of the table in which the generated KMeans model can be stored.",
|
|
57
|
+
"description": "Specifies the name of the table in which the generated KMeans model can be stored.",
|
|
58
|
+
"datatype": "TABLE_NAME",
|
|
59
|
+
"allowsLists": false,
|
|
60
|
+
"rName": "model.table",
|
|
61
|
+
"useInR": true,
|
|
62
|
+
"rOrderNum": 3
|
|
63
|
+
}
|
|
64
|
+
],
|
|
65
|
+
"argument_clauses": [
|
|
66
|
+
{
|
|
67
|
+
"targetTable": [
|
|
68
|
+
"InputTable"
|
|
69
|
+
],
|
|
70
|
+
"checkDuplicate": true,
|
|
71
|
+
"allowedTypes": [],
|
|
72
|
+
"allowedTypeGroups": [
|
|
73
|
+
"ALL"
|
|
74
|
+
],
|
|
75
|
+
"matchLengthOfArgument": "",
|
|
76
|
+
"allowPadding": false,
|
|
77
|
+
"name": "IdColumn",
|
|
78
|
+
"alternateNames": [],
|
|
79
|
+
"isRequired": true,
|
|
80
|
+
"rDescription": "Specifies the column which is unique identifier of input row.",
|
|
81
|
+
"description": "Specifies the column which is unique identifier of input row.",
|
|
82
|
+
"datatype": "COLUMNS",
|
|
83
|
+
"allowsLists": false,
|
|
84
|
+
"rName": "id.column",
|
|
85
|
+
"useInR": true,
|
|
86
|
+
"rOrderNum": 4
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"targetTable": [
|
|
90
|
+
"InputTable"
|
|
91
|
+
],
|
|
92
|
+
"checkDuplicate": true,
|
|
93
|
+
"allowedTypes": [],
|
|
94
|
+
"allowedTypeGroups": [
|
|
95
|
+
"NUMERIC","AIVECTOR","BYTE","VARBYTE"
|
|
96
|
+
],
|
|
97
|
+
"matchLengthOfArgument": "",
|
|
98
|
+
"allowPadding": false,
|
|
99
|
+
"name": "TargetColumns",
|
|
100
|
+
"alternateNames": [],
|
|
101
|
+
"isRequired": true,
|
|
102
|
+
"rDescription": "Specifies the columns/features to be used to cluster the data.",
|
|
103
|
+
"description": "Specifies the columns/features to be used to cluster the data.",
|
|
104
|
+
"datatype": "COLUMNS",
|
|
105
|
+
"allowsLists": true,
|
|
106
|
+
"rName": "target.columns",
|
|
107
|
+
"useInR": true,
|
|
108
|
+
"rOrderNum": 5
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
"lowerBound": 1,
|
|
112
|
+
"upperBound": 2147483647,
|
|
113
|
+
"lowerBoundType": "EXCLUSIVE",
|
|
114
|
+
"upperBoundType": "INCLUSIVE",
|
|
115
|
+
"allowNaN": false,
|
|
116
|
+
"name": "NumClusters",
|
|
117
|
+
"alternateNames": [],
|
|
118
|
+
"isRequired": false,
|
|
119
|
+
"rDescription": "Specifies the number of clusters to be produced. This argument is not allowed with InitialCentroidsTable provided.",
|
|
120
|
+
"description": "Specifies the number of clusters to be produced. This argument is not allowed with InitialCentroidsTable provided.",
|
|
121
|
+
"datatype": "INTEGER",
|
|
122
|
+
"allowsLists": false,
|
|
123
|
+
"rName": "num.clusters",
|
|
124
|
+
"useInR": true,
|
|
125
|
+
"rOrderNum": 6
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
"lowerBound": 0,
|
|
129
|
+
"upperBound": 2147483647,
|
|
130
|
+
"lowerBoundType": "INCLUSIVE",
|
|
131
|
+
"upperBoundType": "INCLUSIVE",
|
|
132
|
+
"allowNaN": false,
|
|
133
|
+
"name": "Seed",
|
|
134
|
+
"alternateNames": [],
|
|
135
|
+
"isRequired": false,
|
|
136
|
+
"rDescription": "Specify the random seed the algorithm uses for repeatable results. The algorithm uses the seed to randomly sample the input table rows as initial clusters.",
|
|
137
|
+
"description": "Specify the random seed the algorithm uses for repeatable results. The algorithm uses the seed to randomly sample the input table rows as initial clusters.",
|
|
138
|
+
"datatype": "INTEGER",
|
|
139
|
+
"allowsLists": false,
|
|
140
|
+
"rName": "seed",
|
|
141
|
+
"useInR": true,
|
|
142
|
+
"rOrderNum": 7
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
"defaultValue": 0.0395,
|
|
146
|
+
"lowerBound": 0,
|
|
147
|
+
"upperBound": 1.797e+308,
|
|
148
|
+
"lowerBoundType": "INCLUSIVE",
|
|
149
|
+
"upperBoundType": "INCLUSIVE",
|
|
150
|
+
"allowNaN": false,
|
|
151
|
+
"name": "StopThreshold",
|
|
152
|
+
"alternateNames": [],
|
|
153
|
+
"isRequired": false,
|
|
154
|
+
"rDescription": "Specify the convergence threshold. When the centroids move by less than this amount, the algorithm has converged.",
|
|
155
|
+
"description": "Specify the convergence threshold. When the centroids move by less than this amount, the algorithm has converged.",
|
|
156
|
+
"datatype": "DOUBLE",
|
|
157
|
+
"allowsLists": false,
|
|
158
|
+
"rName": "threshold",
|
|
159
|
+
"useInR": true,
|
|
160
|
+
"rOrderNum": 8
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
"defaultValue": 10,
|
|
164
|
+
"lowerBound": 1,
|
|
165
|
+
"upperBound": 2147483647,
|
|
166
|
+
"lowerBoundType": "INCLUSIVE",
|
|
167
|
+
"upperBoundType": "INCLUSIVE",
|
|
168
|
+
"allowNaN": false,
|
|
169
|
+
"name": "MaxIterNum",
|
|
170
|
+
"alternateNames": [],
|
|
171
|
+
"isRequired": false,
|
|
172
|
+
"rDescription": "Specify the maximum number of iterations that the algorithm runs before quitting if the convergence threshold has not been met.",
|
|
173
|
+
"description": "Specify the maximum number of iterations that the algorithm runs before quitting if the convergence threshold has not been met.",
|
|
174
|
+
"datatype": "INTEGER",
|
|
175
|
+
"allowsLists": false,
|
|
176
|
+
"rName": "iter.max",
|
|
177
|
+
"useInR": true,
|
|
178
|
+
"rOrderNum": 9
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
"defaultValue": 1,
|
|
182
|
+
"lowerBound": 1,
|
|
183
|
+
"upperBound": 2147483647,
|
|
184
|
+
"lowerBoundType": "INCLUSIVE",
|
|
185
|
+
"upperBoundType": "INCLUSIVE",
|
|
186
|
+
"allowNaN": false,
|
|
187
|
+
"name": "NumInit",
|
|
188
|
+
"alternateNames": [],
|
|
189
|
+
"isRequired": false,
|
|
190
|
+
"rDescription": "The number of times, the k-means algorithm will be run with different initial centroid seeds. The function will emit out the model having the least value of Total Within Cluster Squared Sum.",
|
|
191
|
+
"description": "The number of times, the k-means algorithm will be run with different initial centroid seeds. The function will emit out the model having the least value of Total Within Cluster Squared Sum.",
|
|
192
|
+
"datatype": "INTEGER",
|
|
193
|
+
"allowsLists": false,
|
|
194
|
+
"rName": "num.init",
|
|
195
|
+
"useInR": true,
|
|
196
|
+
"rOrderNum": 10
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
"defaultValue": false,
|
|
200
|
+
"name": "OutputClusterAssignment",
|
|
201
|
+
"alternateNames": [],
|
|
202
|
+
"isRequired": false,
|
|
203
|
+
"rDescription": "Specifies whether to output Cluster Assignment.",
|
|
204
|
+
"description": "Specifies whether to output Cluster Assignment.",
|
|
205
|
+
"datatype": "BOOLEAN",
|
|
206
|
+
"allowsLists": false,
|
|
207
|
+
"rName": "output.cluster.assignment",
|
|
208
|
+
"useInR": true,
|
|
209
|
+
"rOrderNum": 11
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
"permittedValues": [
|
|
213
|
+
"RANDOM",
|
|
214
|
+
"KMEANS++"
|
|
215
|
+
],
|
|
216
|
+
"defaultValue": "RANDOM",
|
|
217
|
+
"isOutputColumn": false,
|
|
218
|
+
"matchLengthOfArgument": "",
|
|
219
|
+
"allowPadding": false,
|
|
220
|
+
"name": "InitialCentroidsMethod",
|
|
221
|
+
"alternateNames": [],
|
|
222
|
+
"isRequired": false,
|
|
223
|
+
"rDescription": "Specifies the initialization method to be used for selecting initial set of centroids.",
|
|
224
|
+
"description": "Specifies the initialization method to be used for selecting initial set of centroids.",
|
|
225
|
+
"datatype": "STRING",
|
|
226
|
+
"allowsLists": false,
|
|
227
|
+
"rName": "initialcentroids.method",
|
|
228
|
+
"useInR": true,
|
|
229
|
+
"rOrderNum": 12
|
|
230
|
+
},
|
|
231
|
+
{
|
|
232
|
+
"defaultValue": 1,
|
|
233
|
+
"lowerBound": 1,
|
|
234
|
+
"upperBound": 4096,
|
|
235
|
+
"lowerBoundType": "INCLUSIVE",
|
|
236
|
+
"upperBoundType": "INCLUSIVE",
|
|
237
|
+
"allowNaN": false,
|
|
238
|
+
"name": "EmbeddingSize",
|
|
239
|
+
"alternateNames": [],
|
|
240
|
+
"isRequired": false,
|
|
241
|
+
"rDescription": "Specify the embedding size of the vectors.",
|
|
242
|
+
"description": "Specify the embedding size of the vectors.",
|
|
243
|
+
"datatype": "INTEGER",
|
|
244
|
+
"allowsLists": false,
|
|
245
|
+
"rName": "embedding.size",
|
|
246
|
+
"useInR": true,
|
|
247
|
+
"rOrderNum": 13
|
|
248
|
+
}
|
|
249
|
+
]
|
|
250
|
+
}
|