teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +182 -13
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +8 -13
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +16 -1
- teradataml/analytics/utils.py +60 -1
- teradataml/automl/__init__.py +290 -106
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +29 -10
- teradataml/automl/data_transformation.py +11 -0
- teradataml/automl/feature_engineering.py +64 -4
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +1 -1
- teradataml/clients/auth_client.py +12 -8
- teradataml/clients/keycloak_client.py +165 -0
- teradataml/common/constants.py +71 -26
- teradataml/common/exceptions.py +32 -0
- teradataml/common/messagecodes.py +28 -0
- teradataml/common/messages.py +13 -4
- teradataml/common/sqlbundle.py +3 -2
- teradataml/common/utils.py +345 -45
- teradataml/context/context.py +259 -93
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +1 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pattern_matching_data.csv +11 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -1
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/url_data.csv +10 -9
- teradataml/dataframe/copy_to.py +38 -27
- teradataml/dataframe/data_transfer.py +61 -45
- teradataml/dataframe/dataframe.py +1110 -132
- teradataml/dataframe/dataframe_utils.py +73 -27
- teradataml/dataframe/functions.py +1070 -9
- teradataml/dataframe/sql.py +750 -959
- teradataml/dbutils/dbutils.py +33 -13
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/utils.py +4 -2
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/_base.py +12 -157
- teradataml/options/configure.py +24 -9
- teradataml/scriptmgmt/UserEnv.py +317 -39
- teradataml/scriptmgmt/lls_utils.py +456 -135
- teradataml/sdk/README.md +79 -0
- teradataml/sdk/__init__.py +4 -0
- teradataml/sdk/_auth_modes.py +422 -0
- teradataml/sdk/_func_params.py +487 -0
- teradataml/sdk/_json_parser.py +453 -0
- teradataml/sdk/_openapi_spec_constants.py +249 -0
- teradataml/sdk/_utils.py +236 -0
- teradataml/sdk/api_client.py +897 -0
- teradataml/sdk/constants.py +62 -0
- teradataml/sdk/modelops/__init__.py +98 -0
- teradataml/sdk/modelops/_client.py +406 -0
- teradataml/sdk/modelops/_constants.py +304 -0
- teradataml/sdk/modelops/models.py +2308 -0
- teradataml/sdk/spinner.py +107 -0
- teradataml/store/__init__.py +1 -1
- teradataml/table_operators/Apply.py +16 -1
- teradataml/table_operators/Script.py +20 -1
- teradataml/table_operators/query_generator.py +4 -21
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/internal_buffer.py +22 -2
- teradataml/utils/utils.py +0 -1
- teradataml/utils/validators.py +318 -58
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/METADATA +188 -14
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/RECORD +131 -84
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "1",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_Apriori",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_alias_name": "TD_Apriori",
|
|
8
|
+
"function_type": "fastpath",
|
|
9
|
+
"function_category": "Association Analysis",
|
|
10
|
+
"function_r_name": "aa.td.apriori",
|
|
11
|
+
"short_description": "This function finds the patterns and calculates different statistical metrics to understand the influence of occurrance of set of items on other.",
|
|
12
|
+
"long_description": "This function finds the patterns and calculates different statistical metrics to understand the influence of occurrance of set of items on other.",
|
|
13
|
+
"input_tables": [
|
|
14
|
+
{
|
|
15
|
+
"requiredInputKind": [
|
|
16
|
+
"PartitionByAny"
|
|
17
|
+
],
|
|
18
|
+
"isOrdered": false,
|
|
19
|
+
"partitionByOne": false,
|
|
20
|
+
"name": "InputTable",
|
|
21
|
+
"alternateNames": [],
|
|
22
|
+
"isRequired": true,
|
|
23
|
+
"rDescription": "Specifies the table containing the input data.",
|
|
24
|
+
"description": "Specifies the table containing the input data.",
|
|
25
|
+
"datatype": "TABLE_ALIAS",
|
|
26
|
+
"allowsLists": false,
|
|
27
|
+
"rName": "data",
|
|
28
|
+
"useInR": true,
|
|
29
|
+
"rOrderNum": 1
|
|
30
|
+
}
|
|
31
|
+
],
|
|
32
|
+
"argument_clauses": [
|
|
33
|
+
{
|
|
34
|
+
"targetTable": [
|
|
35
|
+
"InputTable"
|
|
36
|
+
],
|
|
37
|
+
"checkDuplicate": true,
|
|
38
|
+
"allowedTypes": [],
|
|
39
|
+
"allowedTypeGroups": [
|
|
40
|
+
"STRING"
|
|
41
|
+
],
|
|
42
|
+
"matchLengthOfArgument": "",
|
|
43
|
+
"allowPadding": false,
|
|
44
|
+
"name": "TargetColumn",
|
|
45
|
+
"alternateNames": [],
|
|
46
|
+
"isRequired": true,
|
|
47
|
+
"rDescription": "Specify the column from the input table which contains the data.",
|
|
48
|
+
"description": "Specify the column from the input table which contains the data.",
|
|
49
|
+
"datatype": "COLUMNS",
|
|
50
|
+
"allowsLists": false,
|
|
51
|
+
"rName": "target.column",
|
|
52
|
+
"useInR": true,
|
|
53
|
+
"rOrderNum": 2
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"targetTable": [
|
|
57
|
+
"InputTable"
|
|
58
|
+
],
|
|
59
|
+
"checkDuplicate": true,
|
|
60
|
+
"allowedTypes": [],
|
|
61
|
+
"allowedTypeGroups": [
|
|
62
|
+
"ALL"
|
|
63
|
+
],
|
|
64
|
+
"matchLengthOfArgument": "",
|
|
65
|
+
"allowPadding": false,
|
|
66
|
+
"name": "IDColumn",
|
|
67
|
+
"alternateNames": [],
|
|
68
|
+
"isRequired": false,
|
|
69
|
+
"rDescription": "Specifies the ID column to identify a datapoint(set of rows belong to same data) in a sparse input. Applicable only when isDenseInput is False",
|
|
70
|
+
"description": "Specifies the ID column to identify a datapoint(set of rows belong to same data) in a sparse input. Applicable only when isDenseInput is False",
|
|
71
|
+
"datatype": "COLUMNS",
|
|
72
|
+
"allowsLists": true,
|
|
73
|
+
"rName": "id.column",
|
|
74
|
+
"useInR": true,
|
|
75
|
+
"rOrderNum": 3
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"targetTable": [
|
|
79
|
+
"InputTable"
|
|
80
|
+
],
|
|
81
|
+
"checkDuplicate": true,
|
|
82
|
+
"allowedTypes": [],
|
|
83
|
+
"allowedTypeGroups": [
|
|
84
|
+
"ALL"
|
|
85
|
+
],
|
|
86
|
+
"matchLengthOfArgument": "",
|
|
87
|
+
"allowPadding": false,
|
|
88
|
+
"name": "PartitionColumns",
|
|
89
|
+
"alternateNames": [],
|
|
90
|
+
"isRequired": false,
|
|
91
|
+
"rDescription": "Specify the name of the input table columns on which to partition the input.",
|
|
92
|
+
"description": "Specify the name of the input table columns on which to partition the input.",
|
|
93
|
+
"datatype": "COLUMNS",
|
|
94
|
+
"allowsLists": true,
|
|
95
|
+
"rName": "partition.columns",
|
|
96
|
+
"useInR": true,
|
|
97
|
+
"rOrderNum": 4
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"defaultValue": 2,
|
|
101
|
+
"lowerBound": 1,
|
|
102
|
+
"upperBound": 20,
|
|
103
|
+
"lowerBoundType": "INCLUSIVE",
|
|
104
|
+
"upperBoundType": "INCLUSIVE",
|
|
105
|
+
"allowNaN": false,
|
|
106
|
+
"name": "MaxLen",
|
|
107
|
+
"alternateNames": [],
|
|
108
|
+
"isRequired": false,
|
|
109
|
+
"rDescription": "Specifies the number of items in item set. The default value is 2.",
|
|
110
|
+
"description": "Specifies the number of items in item set. The default value is 2.",
|
|
111
|
+
"datatype": "INTEGER",
|
|
112
|
+
"allowsLists": false,
|
|
113
|
+
"rName": "max.len",
|
|
114
|
+
"useInR": true,
|
|
115
|
+
"rOrderNum": 5
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
"permittedValues": [],
|
|
119
|
+
"defaultValue": ",",
|
|
120
|
+
"isOutputColumn": false,
|
|
121
|
+
"name": "Delimiter",
|
|
122
|
+
"alternateNames": [],
|
|
123
|
+
"isRequired": false,
|
|
124
|
+
"rDescription": "A character or string separates words in the input text. The default value is the comma. Applicable only when isDenseInput is True.",
|
|
125
|
+
"description": "A character or string separates words in the input text. The default value is the comma. Applicable only when isDenseInput is True.",
|
|
126
|
+
"datatype": "STRING",
|
|
127
|
+
"allowsLists": false,
|
|
128
|
+
"rName": "delimiter",
|
|
129
|
+
"useInR": true,
|
|
130
|
+
"rOrderNum": 6
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
"defaultValue": false,
|
|
134
|
+
"name": "isDenseInput",
|
|
135
|
+
"alternateNames": [],
|
|
136
|
+
"isRequired": false,
|
|
137
|
+
"rDescription": "A Boolean value that specifies whether the input to the function is in dense format. Default is false (sparse format).",
|
|
138
|
+
"description": "A Boolean value that specifies whether the input to the function is in dense format. Default is false (sparse format).",
|
|
139
|
+
"datatype": "BOOLEAN",
|
|
140
|
+
"allowsLists": false,
|
|
141
|
+
"rName": "is.dense.input",
|
|
142
|
+
"useInR": true,
|
|
143
|
+
"rOrderNum": 7
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
"permittedValues": [
|
|
147
|
+
"PATTERNS",
|
|
148
|
+
"RULES"
|
|
149
|
+
],
|
|
150
|
+
"isOutputColumn": false,
|
|
151
|
+
"name": "PatternsOrRules",
|
|
152
|
+
"alternateNames": [],
|
|
153
|
+
"isRequired": false,
|
|
154
|
+
"rDescription": "Specify whether to emit PATTERNS or RULES as output.",
|
|
155
|
+
"description": "Specify whether to emit PATTERNS or RULES as output.",
|
|
156
|
+
"datatype": "STRING",
|
|
157
|
+
"allowsLists": false,
|
|
158
|
+
"rName": "patterns.or.rules",
|
|
159
|
+
"useInR": true,
|
|
160
|
+
"rOrderNum": 8
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
"defaultValue": 0.01,
|
|
164
|
+
"lowerBound": 0,
|
|
165
|
+
"upperBound": 1,
|
|
166
|
+
"lowerBoundType": "EXCLUSIVE",
|
|
167
|
+
"upperBoundType": "INCLUSIVE",
|
|
168
|
+
"allowNaN": false,
|
|
169
|
+
"name": "Support",
|
|
170
|
+
"alternateNames": [],
|
|
171
|
+
"isRequired": false,
|
|
172
|
+
"rDescription": "Specify the support value (minimum occurrance threshold) of itemset.",
|
|
173
|
+
"description": "Specify the support value (minimum occurrance threshold) of itemset.",
|
|
174
|
+
"datatype": "DOUBLE",
|
|
175
|
+
"allowsLists": false,
|
|
176
|
+
"rName": "support",
|
|
177
|
+
"useInR": true,
|
|
178
|
+
"rOrderNum": 9
|
|
179
|
+
}
|
|
180
|
+
]
|
|
181
|
+
}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "1",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_NERExtractor",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_type": "fastpath",
|
|
8
|
+
"function_category": "Text Analysis",
|
|
9
|
+
"function_alias_name": "TD_NERExtractor",
|
|
10
|
+
"function_r_name": "aa.td_nerextractor",
|
|
11
|
+
"ref_function_r_name": "aa.td_nerextractor",
|
|
12
|
+
"short_description": "Perform Named Entity Recognition (NER) on input text according to user-defined dictionary words or regular expression (regex) patterns.",
|
|
13
|
+
"long_description": "Perform Named Entity Recognition (NER) on input text according to user-defined dictionary words or regular expression (regex) patterns.",
|
|
14
|
+
"input_tables": [
|
|
15
|
+
{
|
|
16
|
+
"requiredInputKind": [
|
|
17
|
+
"PartitionByAny"
|
|
18
|
+
],
|
|
19
|
+
"isOrdered": false,
|
|
20
|
+
"partitionByOne": false,
|
|
21
|
+
"name": "inputtable",
|
|
22
|
+
"alternateNames": [],
|
|
23
|
+
"isRequired": true,
|
|
24
|
+
"rDescription": "Specifies the table containing the input text data.",
|
|
25
|
+
"description": "Specifies the table containing the input text data.",
|
|
26
|
+
"datatype": "TABLE_ALIAS",
|
|
27
|
+
"allowsLists": false,
|
|
28
|
+
"rName": "data",
|
|
29
|
+
"useInR": true,
|
|
30
|
+
"rOrderNum": 1
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"requiredInputKind": [
|
|
34
|
+
"Dimension"
|
|
35
|
+
],
|
|
36
|
+
"isOrdered": false,
|
|
37
|
+
"partitionByOne": false,
|
|
38
|
+
"name": "dict",
|
|
39
|
+
"alternateNames": [],
|
|
40
|
+
"isRequired": true,
|
|
41
|
+
"rDescription": "Specifies the table containing user-defined dictionary words and their entity label.",
|
|
42
|
+
"description": "Specifies the table containing user-defined dictionary words and their entity label.",
|
|
43
|
+
"datatype": "TABLE_ALIAS",
|
|
44
|
+
"allowsLists": false,
|
|
45
|
+
"rName": "user.defined.data",
|
|
46
|
+
"useInR": true,
|
|
47
|
+
"rOrderNum": 2
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"requiredInputKind": [
|
|
51
|
+
"Dimension"
|
|
52
|
+
],
|
|
53
|
+
"isOrdered": false,
|
|
54
|
+
"partitionByOne": false,
|
|
55
|
+
"name": "rules",
|
|
56
|
+
"alternateNames": [],
|
|
57
|
+
"isRequired": true,
|
|
58
|
+
"rDescription": "Specifies the table containing user-defined regex patterns and their entity label.",
|
|
59
|
+
"description": "Specifies the table containing user-defined regex patterns and their entity label.",
|
|
60
|
+
"datatype": "TABLE_ALIAS",
|
|
61
|
+
"allowsLists": false,
|
|
62
|
+
"rName": "rules.data",
|
|
63
|
+
"useInR": true,
|
|
64
|
+
"rOrderNum": 3
|
|
65
|
+
}
|
|
66
|
+
],
|
|
67
|
+
"argument_clauses": [
|
|
68
|
+
{
|
|
69
|
+
"targetTable": [
|
|
70
|
+
"inputtable"
|
|
71
|
+
],
|
|
72
|
+
"matchLengthOfArgument": "",
|
|
73
|
+
"allowPadding": false,
|
|
74
|
+
"name": "TextColumn",
|
|
75
|
+
"alternateNames": [],
|
|
76
|
+
"isRequired": true,
|
|
77
|
+
"rDescription": "Specify the name of the input table column that will be used for NER search.",
|
|
78
|
+
"description": "Specify the name of the input table column that will be used for NER search.",
|
|
79
|
+
"datatype": "COLUMNS",
|
|
80
|
+
"allowsLists": true,
|
|
81
|
+
"rName": "text.column",
|
|
82
|
+
"useInR": true,
|
|
83
|
+
"rFormulaUsage": true,
|
|
84
|
+
"rOrderNum": 4
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
"defaultValue": "EN",
|
|
88
|
+
"permittedValues": [
|
|
89
|
+
"EN"
|
|
90
|
+
],
|
|
91
|
+
"isOutputColumn": false,
|
|
92
|
+
"name": "InputLanguage",
|
|
93
|
+
"alternateNames": [],
|
|
94
|
+
"isRequired": false,
|
|
95
|
+
"rDescription": "Specify input language. Acceptable values are EN (English).",
|
|
96
|
+
"description": "Specify input language. Acceptable values are EN (English).",
|
|
97
|
+
"datatype": "STRING",
|
|
98
|
+
"allowsLists": false,
|
|
99
|
+
"rName": "input.language",
|
|
100
|
+
"useInR": true,
|
|
101
|
+
"rOrderNum": 5
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"defaultValue": 0,
|
|
105
|
+
"lowerBound": 1,
|
|
106
|
+
"upperBound": 10,
|
|
107
|
+
"lowerBoundType": "INCLUSIVE",
|
|
108
|
+
"upperBoundType": "INCLUSIVE",
|
|
109
|
+
"allowNaN": false,
|
|
110
|
+
"name": "ShowContext",
|
|
111
|
+
"alternateNames": [],
|
|
112
|
+
"isRequired": false,
|
|
113
|
+
"rDescription": "Specify the number of words before and after the matched entity. If leading or trailing words are less than ShowContext ellipsis (...) will be added. Must be a positive value less than 10.",
|
|
114
|
+
"description": "Specify the number of words before and after the matched entity. If leading or trailing words are less than ShowContext ellipsis (...) will be added. Must be a positive value less than 10.",
|
|
115
|
+
"datatype": "INTEGER",
|
|
116
|
+
"allowsLists": false,
|
|
117
|
+
"rName": "show.context",
|
|
118
|
+
"useInR": true,
|
|
119
|
+
"rOrderNum": 6
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"targetTable": [
|
|
123
|
+
"inputtable"
|
|
124
|
+
],
|
|
125
|
+
"checkDuplicate": false,
|
|
126
|
+
"allowedTypes": [],
|
|
127
|
+
"allowedTypeGroups": [
|
|
128
|
+
"ALL"
|
|
129
|
+
],
|
|
130
|
+
"matchLengthOfArgument": "",
|
|
131
|
+
"allowPadding": false,
|
|
132
|
+
"name": "Accumulate",
|
|
133
|
+
"alternateNames": [],
|
|
134
|
+
"isRequired": false,
|
|
135
|
+
"rDescription": "Specify the names of the input table columns that need to be copied from the input test table to output.",
|
|
136
|
+
"description": "Specify the names of the input table columns that need to be copied from the input test table to output.",
|
|
137
|
+
"datatype": "COLUMNS",
|
|
138
|
+
"allowsLists": true,
|
|
139
|
+
"rName": "accumulate",
|
|
140
|
+
"useInR": true,
|
|
141
|
+
"rOrderNum": 7
|
|
142
|
+
}
|
|
143
|
+
]
|
|
144
|
+
}
|
|
145
|
+
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "2",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_SMOTE",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_type": "fastpath",
|
|
8
|
+
"function_category": "Feature Engineering Transform",
|
|
9
|
+
"function_alias_name": "TD_SMOTE",
|
|
10
|
+
"function_r_name": "aa.td_smote",
|
|
11
|
+
"ref_function_r_name": "aa.td_smote",
|
|
12
|
+
"short_description": "This function generates data by oversampling a minority class.",
|
|
13
|
+
"long_description": "This function generates data by oversampling a minority class using smote, adasyn, borderline-2 or smote-nc algorithms.",
|
|
14
|
+
"input_tables": [
|
|
15
|
+
{
|
|
16
|
+
"requiredInputKind": [
|
|
17
|
+
"PartitionByAny"
|
|
18
|
+
],
|
|
19
|
+
"isOrdered": false,
|
|
20
|
+
"partitionByOne": false,
|
|
21
|
+
"name": "InputTable",
|
|
22
|
+
"alternateNames": [],
|
|
23
|
+
"isRequired": true,
|
|
24
|
+
"rDescription": "Specifies the table containing the input data.",
|
|
25
|
+
"description": "Specifies the table containing the input data.",
|
|
26
|
+
"datatype": "TABLE_ALIAS",
|
|
27
|
+
"allowsLists": false,
|
|
28
|
+
"rName": "data",
|
|
29
|
+
"useInR": true,
|
|
30
|
+
"rOrderNum": 1
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"requiredInputKind": [
|
|
34
|
+
"Dimension"
|
|
35
|
+
],
|
|
36
|
+
|
|
37
|
+
"isOrdered": false,
|
|
38
|
+
"partitionByOne": false,
|
|
39
|
+
"name": "EncodingsTable",
|
|
40
|
+
"alternateNames": [],
|
|
41
|
+
"isRequired": false,
|
|
42
|
+
"rDescription": "Specifies the table created with TD_OrdinalEncodingFit output, using the categorical input columns as TargetColumns. Used and required only with smotenc.",
|
|
43
|
+
"description": "Specifies the table creted with TD_OrdinalEncodingFit output, using the categorical input columns as TargetColumns. Used and required only with smotenc.",
|
|
44
|
+
"datatype": "TABLE_ALIAS",
|
|
45
|
+
"allowsLists": false,
|
|
46
|
+
"rName": "encoding.data",
|
|
47
|
+
"useInR": true,
|
|
48
|
+
"rOrderNum": 2
|
|
49
|
+
}
|
|
50
|
+
],
|
|
51
|
+
"argument_clauses": [
|
|
52
|
+
{
|
|
53
|
+
"targetTable": [
|
|
54
|
+
"InputTable"
|
|
55
|
+
],
|
|
56
|
+
|
|
57
|
+
"checkDuplicate": true,
|
|
58
|
+
"allowedTypes": [],
|
|
59
|
+
"allowedTypeGroups": [
|
|
60
|
+
"INTEGER"
|
|
61
|
+
],
|
|
62
|
+
|
|
63
|
+
"matchLengthOfArgument": "",
|
|
64
|
+
"allowPadding": false,
|
|
65
|
+
"name": "IDColumn",
|
|
66
|
+
"alternateNames": [],
|
|
67
|
+
"isRequired": true,
|
|
68
|
+
"rDescription": "specifies the name of the column that uniquely identifies a data sample in the input table.",
|
|
69
|
+
"description": "specifies the name of the column that uniquely identifies a data sample in the input table.",
|
|
70
|
+
"datatype": "COLUMNS",
|
|
71
|
+
"allowsLists": false,
|
|
72
|
+
"rName": "id.column",
|
|
73
|
+
"useInR": true,
|
|
74
|
+
"rOrderNum": 3
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"targetTable": [
|
|
78
|
+
"InputTable"
|
|
79
|
+
],
|
|
80
|
+
"checkDuplicate": true,
|
|
81
|
+
"allowedTypes": [],
|
|
82
|
+
"allowedTypeGroups": [
|
|
83
|
+
"NUMERIC"
|
|
84
|
+
],
|
|
85
|
+
|
|
86
|
+
"matchLengthOfArgument": "",
|
|
87
|
+
"allowPadding": false,
|
|
88
|
+
"name": "ResponseColumn",
|
|
89
|
+
"alternateNames": [],
|
|
90
|
+
"isRequired": false,
|
|
91
|
+
"rDescription": "specifies the name of the input table column that contains the numeric value to be used as the response value for a sample.",
|
|
92
|
+
"description": "specifies the name of the input table column that contains the numeric value to be used as the response value for a sample.",
|
|
93
|
+
"datatype": "COLUMNS",
|
|
94
|
+
"allowsLists": false,
|
|
95
|
+
"rName": "response.column",
|
|
96
|
+
"useInR": true,
|
|
97
|
+
"rOrderNum": 4
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"targetTable": [
|
|
101
|
+
"InputTable"
|
|
102
|
+
],
|
|
103
|
+
"checkDuplicate": true,
|
|
104
|
+
"allowedTypes": [],
|
|
105
|
+
"allowedTypeGroups": [
|
|
106
|
+
"NUMERIC"
|
|
107
|
+
],
|
|
108
|
+
"matchLengthOfArgument": "",
|
|
109
|
+
"allowPadding": false,
|
|
110
|
+
"name": "InputColumns",
|
|
111
|
+
"alternateNames": [],
|
|
112
|
+
"isRequired": true,
|
|
113
|
+
"rDescription": "specifies the name of the input table columns that the function uses for oversampling.",
|
|
114
|
+
"description": "specifies the name of the input table columns that the function uses for oversampling.",
|
|
115
|
+
"datatype": "COLUMNS",
|
|
116
|
+
"allowsLists": true,
|
|
117
|
+
"rName": "input.columns",
|
|
118
|
+
"useInR": true,
|
|
119
|
+
"rOrderNum": 5
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"targetTable": [
|
|
123
|
+
"InputTable"
|
|
124
|
+
],
|
|
125
|
+
"checkDuplicate": true,
|
|
126
|
+
"allowedTypes": [],
|
|
127
|
+
"allowedTypeGroups": [
|
|
128
|
+
"STRING"
|
|
129
|
+
],
|
|
130
|
+
"matchLengthOfArgument": "",
|
|
131
|
+
"allowPadding": false,
|
|
132
|
+
"name": "CategoricalInputColumns",
|
|
133
|
+
"alternateNames": [],
|
|
134
|
+
"isRequired": false,
|
|
135
|
+
"rDescription": "specifies the name of the categorical columns in the input table that the function uses for oversampling with smotenc.",
|
|
136
|
+
"description": "specifies the name of the categorical columns in the input table that the function uses for oversampling with smotenc.",
|
|
137
|
+
"datatype": "COLUMNS",
|
|
138
|
+
"allowsLists": true,
|
|
139
|
+
"rName": "categorical.columns",
|
|
140
|
+
"useInR": true,
|
|
141
|
+
"rOrderNum": 6
|
|
142
|
+
},
|
|
143
|
+
{
|
|
144
|
+
"isOutputColumn": false,
|
|
145
|
+
"name": "MedianStandardDeviation",
|
|
146
|
+
"alternateNames": [],
|
|
147
|
+
"isRequired": false,
|
|
148
|
+
"rDescription": "specifies the median of the standard deviations computed over the numerical input columns. Required only with smotenc.",
|
|
149
|
+
"description": "specifies the median of the standard deviations computed over the numerical input columns. Required only with smotenc.",
|
|
150
|
+
"datatype": "DOUBLE",
|
|
151
|
+
"allowsLists": false,
|
|
152
|
+
"rName": "median.standard.deviation",
|
|
153
|
+
"useInR": true,
|
|
154
|
+
"rOrderNum": 7
|
|
155
|
+
},
|
|
156
|
+
{
|
|
157
|
+
"isOutputColumn": false,
|
|
158
|
+
"name": "MinorityClass",
|
|
159
|
+
"alternateNames": [],
|
|
160
|
+
"isRequired": true,
|
|
161
|
+
"rDescription": "specifies the minority class for which synthetic samples need to be generated. The label for minority class under response column must be numeric integer.",
|
|
162
|
+
"description": "specifies the minority class for which synthetic samples need to be generated. The label for minority class under response column must be numeric integer.",
|
|
163
|
+
"datatype": "STRING",
|
|
164
|
+
"allowsLists": false,
|
|
165
|
+
"rName": "minority.class",
|
|
166
|
+
"useInR": true,
|
|
167
|
+
"rOrderNum": 8
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
"isOutputColumn": false,
|
|
171
|
+
"defaultValue":5,
|
|
172
|
+
"allowNaN": false,
|
|
173
|
+
"name": "OversamplingFactor",
|
|
174
|
+
"alternateNames": [],
|
|
175
|
+
"isRequired": true,
|
|
176
|
+
"rDescription": "specifies the factor for oversampling the minority class.",
|
|
177
|
+
"description": "specifies the factor for oversampling the minority class.",
|
|
178
|
+
"datatype": "DOUBLE",
|
|
179
|
+
"allowsLists": false,
|
|
180
|
+
"rName": "oversampling.factor",
|
|
181
|
+
"useInR": true,
|
|
182
|
+
"rOrderNum": 9
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
"permittedValues": ["smote", "adasyn", "borderline","smotenc"],
|
|
186
|
+
"isOutputColumn": false,
|
|
187
|
+
"defaultValue": "smote",
|
|
188
|
+
"name": "SamplingStrategy",
|
|
189
|
+
"alternateNames": [],
|
|
190
|
+
"isRequired": false,
|
|
191
|
+
"rDescription": "specifies the oversampling algorithm to be used to create synthetic samples.",
|
|
192
|
+
"description": "specifies the oversampling algorithm to be used to create synthetic samples.",
|
|
193
|
+
"datatype": "STRING",
|
|
194
|
+
"allowsLists": false,
|
|
195
|
+
"rName": "sampling.strategy",
|
|
196
|
+
"useInR": true,
|
|
197
|
+
"rOrderNum": 10
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
"defaultValue": true,
|
|
201
|
+
"isOutputColumn": false,
|
|
202
|
+
"name": "FillSampleID",
|
|
203
|
+
"isRequired": false,
|
|
204
|
+
"rDescription": "A boolean flag which specifies whether the function should write out the id of the observation used to generate the corresponding new synthetic observations.",
|
|
205
|
+
"description": "A boolean flag which specifies whether the function should write out the id of the observation used to generate the corresponding new synthetic observations.",
|
|
206
|
+
"datatype": "BOOLEAN",
|
|
207
|
+
"allowsLists": false,
|
|
208
|
+
"rName": "fill.sampleid",
|
|
209
|
+
"useInR": true,
|
|
210
|
+
"rOrderNum": 11
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
"permittedValues": ["sample", "neighbor", "null"],
|
|
214
|
+
"isOutputColumn": false,
|
|
215
|
+
"defaultValue": "sample",
|
|
216
|
+
"name": "ValueForNonInputColumns",
|
|
217
|
+
"alternateNames": [],
|
|
218
|
+
"isRequired": false,
|
|
219
|
+
"rDescription": "Specifies the value to put in a sample column for columns not specified as input columns.",
|
|
220
|
+
"description": "Specifies the value to put in a sample column for columns not specified as input columns.",
|
|
221
|
+
"datatype": "STRING",
|
|
222
|
+
"allowsLists": false,
|
|
223
|
+
"rName": "noninput.columns.value",
|
|
224
|
+
"useInR": true,
|
|
225
|
+
"rOrderNum": 12
|
|
226
|
+
},
|
|
227
|
+
{
|
|
228
|
+
"defaultValue": 5,
|
|
229
|
+
"lowerBound": 2,
|
|
230
|
+
"upperBound": 100,
|
|
231
|
+
"lowerBoundType": "INCLUSIVE",
|
|
232
|
+
"upperBoundType": "INCLUSIVE",
|
|
233
|
+
"allowNaN": false,
|
|
234
|
+
"isOutputColumn": false,
|
|
235
|
+
"matchLengthOfArgument": "",
|
|
236
|
+
"allowPadding": false,
|
|
237
|
+
"name": "NumberOfNeighbors",
|
|
238
|
+
"alternateNames": [],
|
|
239
|
+
"isRequired": false,
|
|
240
|
+
"rDescription": "Specifies the number of nearest neighbors for choosing the sample to be used in oversampling.",
|
|
241
|
+
"description": "Specifies the number of nearest neighbors for choosing the sample to be used in oversampling.",
|
|
242
|
+
"datatype": "INTEGER",
|
|
243
|
+
"allowsLists": false,
|
|
244
|
+
"rName": "n.neighbors",
|
|
245
|
+
"useInR": true,
|
|
246
|
+
"rOrderNum": 13
|
|
247
|
+
},
|
|
248
|
+
{
|
|
249
|
+
"lowerBound": 0,
|
|
250
|
+
"upperBound": 186006,
|
|
251
|
+
"lowerBoundType": "INCLUSIVE",
|
|
252
|
+
"upperBoundType": "INCLUSIVE",
|
|
253
|
+
"allowNaN": false,
|
|
254
|
+
"name": "Seed",
|
|
255
|
+
"alternateNames": [],
|
|
256
|
+
"isRequired": false,
|
|
257
|
+
"rDescription": "Specify the random seed the algorithm uses for repeatable results. The function uses the seed for random interpolation and generate the synthetic sample.",
|
|
258
|
+
"description": "Specify the random seed the algorithm uses for repeatable results. The function uses the seed for random interpolation and generate the synthetic sample.",
|
|
259
|
+
"datatype": "INTEGER",
|
|
260
|
+
"allowsLists": false,
|
|
261
|
+
"rName": "seed",
|
|
262
|
+
"useInR": true,
|
|
263
|
+
"rOrderNum": 14
|
|
264
|
+
}
|
|
265
|
+
]
|
|
266
|
+
}
|
|
267
|
+
|