teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +112 -0
- teradataml/__init__.py +6 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +224 -16
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +1 -0
- teradataml/analytics/json_parser/utils.py +6 -4
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +10 -2
- teradataml/analytics/table_operator/__init__.py +3 -2
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +62 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1502 -323
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +245 -306
- teradataml/automl/data_transformation.py +32 -12
- teradataml/automl/feature_engineering.py +313 -82
- teradataml/automl/model_evaluation.py +44 -35
- teradataml/automl/model_training.py +109 -146
- teradataml/catalog/byom.py +8 -8
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/constants.py +37 -0
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +151 -120
- teradataml/common/messagecodes.py +4 -1
- teradataml/common/messages.py +2 -1
- teradataml/common/sqlbundle.py +1 -1
- teradataml/common/utils.py +97 -11
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +72 -2
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/dataframe_example.json +10 -0
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scripts/deploy_script.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_fit.py +17 -10
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +2 -2
- teradataml/data/scripts/sklearn/sklearn_function.template +30 -7
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
- teradataml/data/scripts/sklearn/sklearn_transform.py +55 -4
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/templates/open_source_ml.json +2 -1
- teradataml/data/teradataml_example.json +20 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/dataframe/copy_to.py +1 -1
- teradataml/dataframe/data_transfer.py +5 -3
- teradataml/dataframe/dataframe.py +474 -41
- teradataml/dataframe/fastload.py +3 -3
- teradataml/dataframe/functions.py +339 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +658 -20
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +322 -16
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +154 -69
- teradataml/options/__init__.py +3 -1
- teradataml/options/configure.py +14 -2
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +10 -6
- teradataml/scriptmgmt/lls_utils.py +3 -2
- teradataml/table_operators/Script.py +2 -2
- teradataml/table_operators/TableOperator.py +106 -20
- teradataml/table_operators/table_operator_util.py +88 -41
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/validators.py +1 -1
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +115 -2
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +200 -140
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
|
@@ -87,13 +87,19 @@ def argument_deprecation(tdml_version, deprecated_arguments, behaviour=False, al
|
|
|
87
87
|
|
|
88
88
|
def decorator(func):
|
|
89
89
|
def wrapper(*args, **kwargs):
|
|
90
|
-
|
|
91
|
-
if
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
if
|
|
95
|
-
|
|
96
|
-
|
|
90
|
+
# Check if deprecated arguments is a list, if not convert it to a list
|
|
91
|
+
deprecated_args_list = deprecated_arguments if isinstance(deprecated_arguments, list) \
|
|
92
|
+
else [deprecated_arguments]
|
|
93
|
+
# Check list of deprecated arguments are used in the function call
|
|
94
|
+
deprecated_arguments_used = [arg for arg in deprecated_args_list if arg in kwargs]
|
|
95
|
+
if deprecated_arguments_used:
|
|
96
|
+
msg = "\nThe argument(s) \"{}\" will be deprecated in {}."
|
|
97
|
+
if behaviour:
|
|
98
|
+
msg = "\nBehaviour of the argument(s) \"{}\" will change in {}."
|
|
99
|
+
msg = msg.format(deprecated_arguments_used, tdml_version)
|
|
100
|
+
if alternatives is not None:
|
|
101
|
+
msg += "\nUse argument(s) \"{}\" instead.".format(alternatives)
|
|
102
|
+
warnings.warn(msg, category=DeprecationWarning, stacklevel=2)
|
|
97
103
|
return func(*args, **kwargs)
|
|
98
104
|
|
|
99
105
|
return wraps(func)(wrapper)
|
|
@@ -22,6 +22,7 @@ from teradataml.common.messages import Messages
|
|
|
22
22
|
from teradataml.common.messagecodes import MessageCodes
|
|
23
23
|
from teradataml.common.constants import TeradataConstants
|
|
24
24
|
from teradataml.options.configure import configure
|
|
25
|
+
from teradataml.utils.internal_buffer import _InternalBuffer
|
|
25
26
|
from teradatasql import OperationalError
|
|
26
27
|
import psutil
|
|
27
28
|
import getpass
|
|
@@ -36,15 +37,22 @@ class GarbageCollector():
|
|
|
36
37
|
garbage collection, so that they can be dropped when connection is disconnected/lost.
|
|
37
38
|
Writes to a output file where the database name & table/view/script names are persisted.
|
|
38
39
|
"""
|
|
39
|
-
|
|
40
|
+
# Adding old garbage collector file name to support backward compatibility.
|
|
41
|
+
__old_garbage_persistent_file_name = getpass.getuser() + "_garbagecollect.info"
|
|
40
42
|
__garbagecollector_folder_name = '.teradataml'
|
|
41
43
|
__contentseperator = ","
|
|
44
|
+
__filenameseperator = "_"
|
|
42
45
|
__version = "ver1.0"
|
|
43
46
|
__gc_tables = []
|
|
44
47
|
__gc_views = []
|
|
45
48
|
__gc_scripts = []
|
|
46
49
|
__gc_container = []
|
|
47
50
|
__gc_apply = []
|
|
51
|
+
# Function to get the garbage collector file name specific to host and process.
|
|
52
|
+
_get_gc_file_name = lambda: "{}_{}_{}_garbagecollect.info".format(
|
|
53
|
+
getpass.getuser(),
|
|
54
|
+
tdmlctx.context._get_host_ip(),
|
|
55
|
+
str(os.getpid()))
|
|
48
56
|
|
|
49
57
|
@staticmethod
|
|
50
58
|
def _get_temp_dir_name():
|
|
@@ -93,7 +101,7 @@ class GarbageCollector():
|
|
|
93
101
|
"""
|
|
94
102
|
tempdir = GarbageCollector._get_temp_dir_name()
|
|
95
103
|
os.makedirs(tempdir, exist_ok=True)
|
|
96
|
-
tempfile = os.path.join(os.path.sep, tempdir, GarbageCollector.
|
|
104
|
+
tempfile = os.path.join(os.path.sep, tempdir, GarbageCollector._get_gc_file_name())
|
|
97
105
|
return tempfile
|
|
98
106
|
|
|
99
107
|
@staticmethod
|
|
@@ -205,29 +213,31 @@ class GarbageCollector():
|
|
|
205
213
|
EXAMPLES:
|
|
206
214
|
GarbageCollector._add_to_garbagecollector(object_name = "temp"."temp_table1")
|
|
207
215
|
"""
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
216
|
+
# Use global lock while writing to the garbage collector file.
|
|
217
|
+
with _InternalBuffer.get("global_lock"):
|
|
218
|
+
if object_name and object_type:
|
|
219
|
+
try:
|
|
220
|
+
tempfilename = GarbageCollector.__make_temp_file_name()
|
|
221
|
+
writecontent = str(GarbageCollector.__version) + "," + str(os.getpid())
|
|
222
|
+
writecontent += "," + str(object_type.value)
|
|
223
|
+
writecontent += "," + object_name + "\n"
|
|
224
|
+
with open(tempfilename, 'a+') as fgc:
|
|
225
|
+
fgc.write(writecontent)
|
|
226
|
+
if configure._validate_gc:
|
|
227
|
+
GarbageCollector.__validate_gc_add_object(object_name, object_type)
|
|
228
|
+
except TeradataMlException:
|
|
229
|
+
raise
|
|
230
|
+
except Exception as err:
|
|
231
|
+
logger.error(Messages.get_message(MessageCodes.TDMLDF_CREATE_GARBAGE_COLLECTOR) + str(err))
|
|
232
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_GARBAGE_COLLECTOR),
|
|
233
|
+
MessageCodes.TDMLDF_CREATE_GARBAGE_COLLECTOR) from err
|
|
234
|
+
finally:
|
|
235
|
+
if fgc is not None:
|
|
236
|
+
fgc.close()
|
|
227
237
|
return True
|
|
228
238
|
|
|
229
239
|
@staticmethod
|
|
230
|
-
def __deleterow(content_row):
|
|
240
|
+
def __deleterow(content_row, file_name):
|
|
231
241
|
"""
|
|
232
242
|
DESCRIPTION:
|
|
233
243
|
Deletes an entry from persisted file.
|
|
@@ -237,6 +247,11 @@ class GarbageCollector():
|
|
|
237
247
|
Required Argument.
|
|
238
248
|
Specifies the text of row to delete from the persisted file.
|
|
239
249
|
Types: str
|
|
250
|
+
|
|
251
|
+
file_name:
|
|
252
|
+
Required Argument.
|
|
253
|
+
Specifies the name of the file to delete the row.
|
|
254
|
+
Types: str
|
|
240
255
|
|
|
241
256
|
RETURNS:
|
|
242
257
|
None.
|
|
@@ -248,10 +263,9 @@ class GarbageCollector():
|
|
|
248
263
|
GarbageCollector._deleterow(content_row = 'ver1.0,72136,3,"alice"."temp_table_gbview1"')
|
|
249
264
|
"""
|
|
250
265
|
try:
|
|
251
|
-
|
|
252
|
-
if not os.path.isfile(tempfilename):
|
|
266
|
+
if not os.path.isfile(file_name):
|
|
253
267
|
return True
|
|
254
|
-
with open(
|
|
268
|
+
with open(file_name, 'r+') as fgc:
|
|
255
269
|
output = fgc.readlines()
|
|
256
270
|
fgc.seek(0)
|
|
257
271
|
for dbtablename in output:
|
|
@@ -491,104 +505,121 @@ class GarbageCollector():
|
|
|
491
505
|
"""
|
|
492
506
|
try:
|
|
493
507
|
td_connection = tdmlctx.context.get_connection()
|
|
494
|
-
|
|
495
|
-
|
|
508
|
+
# Get the temp directory where garbage collector file is persisted.
|
|
509
|
+
tempdir = GarbageCollector._get_temp_dir_name()
|
|
510
|
+
# Garbage collect file that is created by the current host and current process.
|
|
511
|
+
# Also check if file is not of current process and associated process is
|
|
512
|
+
# currently running in the system or not.
|
|
513
|
+
# Walk through the temp directory and filter garbage collector files.
|
|
514
|
+
tempfiles = []
|
|
515
|
+
for root, _, files in os.walk(tempdir):
|
|
516
|
+
for file in files:
|
|
517
|
+
if file.endswith('_garbagecollect.info'):
|
|
518
|
+
try:
|
|
519
|
+
filepath = os.path.join(root, file)
|
|
520
|
+
fileparts = file.split(GarbageCollector.__filenameseperator)
|
|
521
|
+
hostname = fileparts[1]
|
|
522
|
+
filepid = int(fileparts[2])
|
|
523
|
+
if hostname == tdmlctx.context._get_host_ip():
|
|
524
|
+
if filepid == os.getpid() or not psutil.pid_exists(filepid):
|
|
525
|
+
tempfiles.append(filepath)
|
|
526
|
+
except (IndexError, ValueError):
|
|
527
|
+
# Handle the case where the filename format is not as expected
|
|
528
|
+
# check if old garbage collector file is present.
|
|
529
|
+
if file == GarbageCollector.__old_garbage_persistent_file_name:
|
|
530
|
+
tempfiles.append(filepath)
|
|
531
|
+
|
|
532
|
+
# Process each garbage collector file.
|
|
533
|
+
if len(tempfiles) == 0:
|
|
496
534
|
return True
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
contentpid = int(recordparts[1].strip())
|
|
508
|
-
# Check and garbage collect even currrent running process at exit.
|
|
509
|
-
# Check if contentpid is not of current process as well as any
|
|
510
|
-
# currently running process in the system
|
|
511
|
-
proceed_to_cleanup = False
|
|
512
|
-
if contentpid != int(os.getpid()):
|
|
513
|
-
if not psutil.pid_exists(contentpid):
|
|
514
|
-
proceed_to_cleanup = True
|
|
515
|
-
else:
|
|
516
|
-
proceed_to_cleanup = True
|
|
517
|
-
if proceed_to_cleanup == True:
|
|
518
|
-
object_type = int(recordparts[2].strip())
|
|
519
|
-
database_object = recordparts[3].strip()
|
|
520
|
-
|
|
521
|
-
# Create the TeradataConstant to use with __delete_object_from_gc_list().
|
|
522
|
-
object_type_enum = TeradataConstants(object_type)
|
|
523
|
-
|
|
535
|
+
else:
|
|
536
|
+
for tempfilename in tempfiles:
|
|
537
|
+
if not os.path.isfile(tempfilename):
|
|
538
|
+
return True
|
|
539
|
+
with open(tempfilename, 'r+') as fgc:
|
|
540
|
+
content = fgc.readlines()
|
|
541
|
+
|
|
542
|
+
for contentrecord in content:
|
|
543
|
+
contentrecord = contentrecord.strip()
|
|
544
|
+
if (td_connection is not None) and (len(contentrecord) > 0):
|
|
524
545
|
try:
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
"[Teradata Database] [Error 3807] Object" in str(operr) or \
|
|
568
|
-
"[Teradata Database] [Error 9852] The file" in str(operr):
|
|
569
|
-
GarbageCollector.__deleterow(contentrecord)
|
|
570
|
-
# Delete entry from gc lists of required.
|
|
571
|
-
GarbageCollector.__delete_object_from_gc_list(database_object,
|
|
572
|
-
object_type_enum)
|
|
573
|
-
except (TeradataMlException, RuntimeError) as err:
|
|
574
|
-
if "Failed to execute get_env" in str(err) or \
|
|
575
|
-
"Failed to execute remove_file" in str(err):
|
|
576
|
-
# For removing files in OpenAF environment.
|
|
577
|
-
GarbageCollector.__deleterow(contentrecord)
|
|
578
|
-
# Delete entry from gc lists of required.
|
|
546
|
+
recordparts = contentrecord.split(GarbageCollector.__contentseperator)
|
|
547
|
+
object_type = int(recordparts[2].strip())
|
|
548
|
+
database_object = recordparts[3].strip()
|
|
549
|
+
|
|
550
|
+
# Create the TeradataConstant to use with __delete_object_from_gc_list().
|
|
551
|
+
object_type_enum = TeradataConstants(object_type)
|
|
552
|
+
|
|
553
|
+
try:
|
|
554
|
+
# Drop the table/view/script/container based on database object type retrieved from the collector file.
|
|
555
|
+
# # Drop table.
|
|
556
|
+
if TeradataConstants.TERADATA_TABLE.value == object_type:
|
|
557
|
+
tdmlutil.utils.UtilFuncs._drop_table(database_object,
|
|
558
|
+
check_table_exist=False)
|
|
559
|
+
|
|
560
|
+
# # Drop view.
|
|
561
|
+
elif TeradataConstants.TERADATA_VIEW.value == object_type:
|
|
562
|
+
tdmlutil.utils.UtilFuncs._drop_view(database_object,
|
|
563
|
+
check_view_exist=False)
|
|
564
|
+
|
|
565
|
+
elif object_type in [TeradataConstants.TERADATA_LOCAL_SCRIPT.value,
|
|
566
|
+
TeradataConstants.TERADATA_TEXT_FILE.value]:
|
|
567
|
+
GarbageCollector.__delete_gc_tempdir_local_file(database_object, object_type)
|
|
568
|
+
|
|
569
|
+
# # Drop Apply script.
|
|
570
|
+
elif TeradataConstants.TERADATA_APPLY.value == object_type:
|
|
571
|
+
tdmlutil.utils.UtilFuncs._delete_script(database_object,
|
|
572
|
+
file_type=object_type_enum)
|
|
573
|
+
# Delete the script locally
|
|
574
|
+
GarbageCollector.__delete_gc_tempdir_local_file(database_object, object_type)
|
|
575
|
+
|
|
576
|
+
# # Drop STO script.
|
|
577
|
+
else:
|
|
578
|
+
tdmlutil.utils.UtilFuncs._delete_script(database_object,
|
|
579
|
+
file_type=object_type_enum,
|
|
580
|
+
check_script_exist=False)
|
|
581
|
+
# Delete the script locally
|
|
582
|
+
GarbageCollector.__delete_gc_tempdir_local_file(database_object, object_type)
|
|
583
|
+
|
|
584
|
+
# Remove the entry for a table/view from GC, after it has been dropped.
|
|
585
|
+
GarbageCollector.__deleterow(contentrecord, tempfilename)
|
|
586
|
+
|
|
587
|
+
# Finally, delete the entry from gc lists if required.
|
|
579
588
|
GarbageCollector.__delete_object_from_gc_list(database_object,
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
589
|
+
object_type_enum)
|
|
590
|
+
except OperationalError as operr:
|
|
591
|
+
# Remove the entry for a table/view/script even after drop has failed,
|
|
592
|
+
# if that object does not exist.
|
|
593
|
+
# Also added additional check for error when the database containing
|
|
594
|
+
# the object doesn't exist anymore.
|
|
595
|
+
if "[Teradata Database] [Error 3802] Database" in str(operr) or \
|
|
596
|
+
"[Teradata Database] [Error 3807] Object" in str(operr) or \
|
|
597
|
+
"[Teradata Database] [Error 9852] The file" in str(operr):
|
|
598
|
+
GarbageCollector.__deleterow(contentrecord, tempfilename)
|
|
599
|
+
# Delete entry from gc lists of required.
|
|
600
|
+
GarbageCollector.__delete_object_from_gc_list(database_object,
|
|
601
|
+
object_type_enum)
|
|
602
|
+
except (TeradataMlException, RuntimeError) as err:
|
|
603
|
+
if "Failed to execute get_env" in str(err) or \
|
|
604
|
+
"Failed to execute remove_file" in str(err):
|
|
605
|
+
# For removing files in OpenAF environment.
|
|
606
|
+
GarbageCollector.__deleterow(contentrecord, tempfilename)
|
|
607
|
+
# Delete entry from gc lists of required.
|
|
608
|
+
GarbageCollector.__delete_object_from_gc_list(database_object,
|
|
609
|
+
object_type_enum)
|
|
610
|
+
except FileNotFoundError:
|
|
611
|
+
# This will occur only when the item being deleted is a file,
|
|
612
|
+
# and it's local copy is not found.
|
|
613
|
+
GarbageCollector.__deleterow(contentrecord, tempfilename)
|
|
614
|
+
if object_type == TeradataConstants.TERADATA_APPLY:
|
|
615
|
+
GarbageCollector.__gc_apply.remove(database_object)
|
|
616
|
+
elif object_type == TeradataConstants.TERADATA_SCRIPT:
|
|
617
|
+
GarbageCollector.__gc_scripts.remove(database_object)
|
|
618
|
+
except Exception as err:
|
|
619
|
+
pass
|
|
620
|
+
# delete empty file itself after deleting the entry from the file
|
|
621
|
+
if os.path.getsize(tempfilename) == 0:
|
|
622
|
+
GarbageCollector._delete_local_file(tempfilename)
|
|
592
623
|
except Exception as e:
|
|
593
624
|
logger.error(Messages.get_message(MessageCodes.TDMLDF_DELETE_GARBAGE_COLLECTOR) + str(e))
|
|
594
625
|
finally:
|
|
@@ -219,6 +219,7 @@ class ErrorInfoCodes(Enum):
|
|
|
219
219
|
PARTITION_VALUES_NOT_MATCHING = 'TDML_2538'
|
|
220
220
|
PARTITION_IN_BOTH_FIT_AND_PREDICT = 'TDML_2539'
|
|
221
221
|
INVALID_PARTITIONING_COLS = 'TDML_2540'
|
|
222
|
+
TARGET_COL_NOT_FOUND_FOR_EVALUATE = 'TDML_2541'
|
|
222
223
|
|
|
223
224
|
class MessageCodes(Enum):
|
|
224
225
|
"""
|
|
@@ -421,4 +422,6 @@ class MessageCodes(Enum):
|
|
|
421
422
|
PARTITION_VALUES_NOT_MATCHING = "Values in training and test data partition columns should be same."
|
|
422
423
|
PARTITION_IN_BOTH_FIT_AND_PREDICT = "Use \"partition_columns\" only if model is fitted with partition_column(s)."
|
|
423
424
|
INVALID_PARTITIONING_COLS = "Provided partition_column(s) '{}' is/are not present in parent of '{}' DataFrame(s)."
|
|
424
|
-
PATH_NOT_FOUND = "Specified local path '{}' not found. Please check the path."
|
|
425
|
+
PATH_NOT_FOUND = "Specified local path '{}' not found. Please check the path."
|
|
426
|
+
TARGET_COL_NOT_FOUND_FOR_EVALUATE = "Target column '{}' not found in the passed dataFrame. "\
|
|
427
|
+
"evaluate() requires target column to be present in the dataFrame."
|
teradataml/common/messages.py
CHANGED
|
@@ -189,7 +189,8 @@ class Messages():
|
|
|
189
189
|
[ErrorInfoCodes.PARTITION_VALUES_NOT_MATCHING, MessageCodes.PARTITION_VALUES_NOT_MATCHING],
|
|
190
190
|
[ErrorInfoCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT, MessageCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT],
|
|
191
191
|
[ErrorInfoCodes.INVALID_PARTITIONING_COLS, MessageCodes.INVALID_PARTITIONING_COLS],
|
|
192
|
-
[ErrorInfoCodes.PATH_NOT_FOUND, MessageCodes.PATH_NOT_FOUND]
|
|
192
|
+
[ErrorInfoCodes.PATH_NOT_FOUND, MessageCodes.PATH_NOT_FOUND],
|
|
193
|
+
[ErrorInfoCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE, MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE]
|
|
193
194
|
]
|
|
194
195
|
|
|
195
196
|
@staticmethod
|
teradataml/common/sqlbundle.py
CHANGED
|
@@ -47,7 +47,7 @@ class SQLBundle:
|
|
|
47
47
|
[SQLConstants.SQL_HELP_COLUMNS, "help column {0}.*"],
|
|
48
48
|
[SQLConstants.SQL_DROP_TABLE, "DROP TABLE {0}"],
|
|
49
49
|
[SQLConstants.SQL_DROP_VIEW, "DROP VIEW {0}"],
|
|
50
|
-
[SQLConstants.SQL_NROWS_FROM_QUERY, "SELECT COUNT(*) FROM {0}"],
|
|
50
|
+
[SQLConstants.SQL_NROWS_FROM_QUERY, "SELECT CAST(COUNT(*) AS BIGINT) FROM {0}"],
|
|
51
51
|
[SQLConstants.SQL_TOP_NROWS_FROM_TABLEORVIEW, "select top {0} * from {1}"],
|
|
52
52
|
[SQLConstants.SQL_INSERT_INTO_TABLE_VALUES, "insert into {0} values({1})"],
|
|
53
53
|
[SQLConstants.SQL_SELECT_COLUMNNAMES_FROM, "sel {0} from ({1}) as {2}"],
|
teradataml/common/utils.py
CHANGED
|
@@ -13,6 +13,7 @@ by other classes which can be reused according to the need.
|
|
|
13
13
|
Add all the common functions in this class like creating temporary table names, getting
|
|
14
14
|
the datatypes etc.
|
|
15
15
|
"""
|
|
16
|
+
import json
|
|
16
17
|
import uuid
|
|
17
18
|
from math import floor
|
|
18
19
|
import os, itertools
|
|
@@ -22,6 +23,7 @@ import sqlalchemy
|
|
|
22
23
|
from pathlib import Path
|
|
23
24
|
from numpy import number
|
|
24
25
|
from sqlalchemy import Column, MetaData, Table
|
|
26
|
+
|
|
25
27
|
from teradataml.context.context import get_connection
|
|
26
28
|
|
|
27
29
|
from teradataml import _version
|
|
@@ -49,7 +51,7 @@ from teradatasqlalchemy.types import (BYTE, VARBYTE, BLOB)
|
|
|
49
51
|
from teradatasqlalchemy.types import (CHAR, VARCHAR, CLOB)
|
|
50
52
|
from functools import reduce
|
|
51
53
|
import warnings
|
|
52
|
-
from
|
|
54
|
+
from teradataml.telemetry_utils.queryband import collect_queryband
|
|
53
55
|
from teradataml.utils.utils import execute_sql
|
|
54
56
|
from teradataml.utils.validators import _Validators
|
|
55
57
|
from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
|
|
@@ -1857,7 +1859,7 @@ class UtilFuncs():
|
|
|
1857
1859
|
con = tdmlctx.get_connection()
|
|
1858
1860
|
|
|
1859
1861
|
if check_table_exists:
|
|
1860
|
-
table_exists = con.dialect.has_table(con, table_name, schema_name)
|
|
1862
|
+
table_exists = con.dialect.has_table(con, table_name, schema_name, table_only=True)
|
|
1861
1863
|
|
|
1862
1864
|
if not table_exists:
|
|
1863
1865
|
raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_DOES_NOT_EXIST, table_name),
|
|
@@ -2377,14 +2379,11 @@ class UtilFuncs():
|
|
|
2377
2379
|
EXAMPLES:
|
|
2378
2380
|
>>> self._get_python_execution_path()
|
|
2379
2381
|
"""
|
|
2382
|
+
# 'indb_install_location' expects python installation directory path.
|
|
2383
|
+
# Hence, postfixing python binary path.
|
|
2384
|
+
return "python" if UtilFuncs._is_lake() else \
|
|
2385
|
+
'{}/bin/python3'.format(configure.indb_install_location)
|
|
2380
2386
|
|
|
2381
|
-
if UtilFuncs._is_lake():
|
|
2382
|
-
return "python"
|
|
2383
|
-
else:
|
|
2384
|
-
if configure.indb_install_location == "/var/opt/teradata/languages/sles12sp3/Python/":
|
|
2385
|
-
return '{}bin/python3'.format(configure.indb_install_location)
|
|
2386
|
-
else:
|
|
2387
|
-
return configure.indb_install_location
|
|
2388
2387
|
|
|
2389
2388
|
def _is_view(tablename):
|
|
2390
2389
|
"""
|
|
@@ -2411,14 +2410,101 @@ class UtilFuncs():
|
|
|
2411
2410
|
return True
|
|
2412
2411
|
else:
|
|
2413
2412
|
return False
|
|
2413
|
+
|
|
2414
2414
|
@staticmethod
|
|
2415
2415
|
def _set_queryband():
|
|
2416
|
+
from teradataml import session_queryband
|
|
2416
2417
|
try:
|
|
2417
|
-
qb_query =
|
|
2418
|
+
qb_query = session_queryband.generate_set_queryband_query()
|
|
2418
2419
|
execute_sql(qb_query)
|
|
2419
|
-
except Exception:
|
|
2420
|
+
except Exception as _set_queryband_err:
|
|
2420
2421
|
pass
|
|
2421
2422
|
|
|
2423
|
+
def _create_or_get_env(template):
|
|
2424
|
+
"""
|
|
2425
|
+
DESCRIPTION:
|
|
2426
|
+
Internal function to return the environment if already exists else
|
|
2427
|
+
creates the environment using template file and return the environment.
|
|
2428
|
+
|
|
2429
|
+
PARAMETERS:
|
|
2430
|
+
template:
|
|
2431
|
+
Required Argument.
|
|
2432
|
+
Template json file name containing details of environment(s) to be created.
|
|
2433
|
+
Types: str
|
|
2434
|
+
|
|
2435
|
+
RAISES:
|
|
2436
|
+
TeradataMLException
|
|
2437
|
+
|
|
2438
|
+
RETURNS:
|
|
2439
|
+
An object of class UserEnv representing the user environment.
|
|
2440
|
+
|
|
2441
|
+
EXAMPLES:
|
|
2442
|
+
>>> self._create_or_get_env("open_source_ml.json")
|
|
2443
|
+
"""
|
|
2444
|
+
# Get the template file path.
|
|
2445
|
+
from teradataml import _TDML_DIRECTORY
|
|
2446
|
+
from teradataml.scriptmgmt.lls_utils import create_env, get_env
|
|
2447
|
+
template_dir_path = os.path.join(_TDML_DIRECTORY, "data", "templates", template)
|
|
2448
|
+
|
|
2449
|
+
# Read template file.
|
|
2450
|
+
with open(template_dir_path, "r") as r_file:
|
|
2451
|
+
data = json.load(r_file)
|
|
2452
|
+
|
|
2453
|
+
# Get env_name.
|
|
2454
|
+
_env_name = data["env_specs"][0]["env_name"]
|
|
2455
|
+
|
|
2456
|
+
try:
|
|
2457
|
+
# Call function to get env.
|
|
2458
|
+
return get_env(_env_name)
|
|
2459
|
+
except TeradataMlException as tdml_e:
|
|
2460
|
+
# We will get here when error says, env does not exist otherwise raise the exception as is.
|
|
2461
|
+
# Env does not exist so create one.
|
|
2462
|
+
|
|
2463
|
+
exc_msg = "Failed to execute get_env(). User environment '{}' not " \
|
|
2464
|
+
"found.".format(_env_name)
|
|
2465
|
+
if exc_msg in tdml_e.args[0]:
|
|
2466
|
+
print(f"No OpenAF environment with name '{_env_name}' found. Creating one with "\
|
|
2467
|
+
"latest supported python and required packages.")
|
|
2468
|
+
return create_env(template=template_dir_path)
|
|
2469
|
+
else:
|
|
2470
|
+
raise tdml_e
|
|
2471
|
+
except Exception as exc:
|
|
2472
|
+
raise exc
|
|
2473
|
+
|
|
2474
|
+
def _get_env_name(col):
|
|
2475
|
+
"""
|
|
2476
|
+
DESCRIPTION:
|
|
2477
|
+
Internal function to get the env name if passed with ColumnExpression
|
|
2478
|
+
else the default "openml_env".
|
|
2479
|
+
|
|
2480
|
+
PARAMETERS:
|
|
2481
|
+
col:
|
|
2482
|
+
Required Argument.
|
|
2483
|
+
Specifies teradataml DataFrame ColumnExpression.
|
|
2484
|
+
Types: teradataml DataFrame ColumnExpression
|
|
2485
|
+
|
|
2486
|
+
RAISES:
|
|
2487
|
+
None.
|
|
2488
|
+
|
|
2489
|
+
RETURNS:
|
|
2490
|
+
string
|
|
2491
|
+
|
|
2492
|
+
EXAMPLES:
|
|
2493
|
+
>>> self._get_env_name(col)
|
|
2494
|
+
"""
|
|
2495
|
+
|
|
2496
|
+
# If env_name is passed with ColumnExpression fetch the env name,
|
|
2497
|
+
# else check if default "openml_user_env" env is configured or not,
|
|
2498
|
+
# else get the default "openml_env" env if exists or create new deafult env.
|
|
2499
|
+
if col._env_name is not None:
|
|
2500
|
+
from teradataml.scriptmgmt.UserEnv import UserEnv
|
|
2501
|
+
env = col._env_name
|
|
2502
|
+
env_name = env.env_name if isinstance(col._env_name, UserEnv) else env
|
|
2503
|
+
elif configure.openml_user_env is not None:
|
|
2504
|
+
env_name = configure.openml_user_env.env_name
|
|
2505
|
+
else:
|
|
2506
|
+
env_name = UtilFuncs._create_or_get_env("open_source_ml.json").env_name
|
|
2507
|
+
return env_name
|
|
2422
2508
|
|
|
2423
2509
|
from teradataml.common.aed_utils import AedUtils
|
|
2424
2510
|
from teradataml.dbutils.filemgr import remove_file
|
|
@@ -317,7 +317,7 @@ class AnalyticsWrapperUtils:
|
|
|
317
317
|
#TODO: Add support for nested level query as in R.
|
|
318
318
|
return table_ref
|
|
319
319
|
|
|
320
|
-
def _validate_input_table_datatype(self, data, arg_name, reference_function_name
|
|
320
|
+
def _validate_input_table_datatype(self, data, arg_name, reference_function_name=None):
|
|
321
321
|
"""
|
|
322
322
|
Method to verify that the input table parameters of type DataFrame.
|
|
323
323
|
|