teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (200) hide show
  1. teradataml/LICENSE.pdf +0 -0
  2. teradataml/README.md +112 -0
  3. teradataml/__init__.py +6 -3
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/__init__.py +3 -2
  6. teradataml/analytics/analytic_function_executor.py +224 -16
  7. teradataml/analytics/analytic_query_generator.py +92 -0
  8. teradataml/analytics/byom/__init__.py +3 -2
  9. teradataml/analytics/json_parser/metadata.py +1 -0
  10. teradataml/analytics/json_parser/utils.py +6 -4
  11. teradataml/analytics/meta_class.py +40 -1
  12. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  13. teradataml/analytics/sqle/__init__.py +10 -2
  14. teradataml/analytics/table_operator/__init__.py +3 -2
  15. teradataml/analytics/uaf/__init__.py +21 -2
  16. teradataml/analytics/utils.py +62 -1
  17. teradataml/analytics/valib.py +1 -1
  18. teradataml/automl/__init__.py +1502 -323
  19. teradataml/automl/custom_json_utils.py +139 -61
  20. teradataml/automl/data_preparation.py +245 -306
  21. teradataml/automl/data_transformation.py +32 -12
  22. teradataml/automl/feature_engineering.py +313 -82
  23. teradataml/automl/model_evaluation.py +44 -35
  24. teradataml/automl/model_training.py +109 -146
  25. teradataml/catalog/byom.py +8 -8
  26. teradataml/clients/pkce_client.py +1 -1
  27. teradataml/common/constants.py +37 -0
  28. teradataml/common/deprecations.py +13 -7
  29. teradataml/common/garbagecollector.py +151 -120
  30. teradataml/common/messagecodes.py +4 -1
  31. teradataml/common/messages.py +2 -1
  32. teradataml/common/sqlbundle.py +1 -1
  33. teradataml/common/utils.py +97 -11
  34. teradataml/common/wrapper_utils.py +1 -1
  35. teradataml/context/context.py +72 -2
  36. teradataml/data/complaints_test_tokenized.csv +353 -0
  37. teradataml/data/complaints_tokens_model.csv +348 -0
  38. teradataml/data/covid_confirm_sd.csv +83 -0
  39. teradataml/data/dataframe_example.json +10 -0
  40. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  41. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  42. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  43. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  44. teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
  45. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  46. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  47. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  48. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  49. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  51. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  52. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  53. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  54. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  55. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  56. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  57. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  58. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  59. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  60. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  61. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  62. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  63. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  64. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  65. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  66. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  67. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  68. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  69. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  70. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  71. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  72. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  73. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  74. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  75. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  76. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  77. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  78. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  79. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  80. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  81. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  82. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  83. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  84. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  85. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  86. teradataml/data/dwt2d_dataTable.csv +65 -0
  87. teradataml/data/dwt_dataTable.csv +8 -0
  88. teradataml/data/dwt_filterTable.csv +3 -0
  89. teradataml/data/finance_data4.csv +13 -0
  90. teradataml/data/grocery_transaction.csv +19 -0
  91. teradataml/data/idwt2d_dataTable.csv +5 -0
  92. teradataml/data/idwt_dataTable.csv +8 -0
  93. teradataml/data/idwt_filterTable.csv +3 -0
  94. teradataml/data/interval_data.csv +5 -0
  95. teradataml/data/jsons/paired_functions.json +14 -0
  96. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  97. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  98. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  99. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  100. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  101. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  102. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  103. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  104. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  105. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  106. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  107. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  108. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  109. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  110. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  111. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  112. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  113. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  114. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  115. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  116. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  117. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  118. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  119. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  120. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  121. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  122. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  123. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  124. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  125. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  126. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  127. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  128. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  129. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  130. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  131. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  132. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  133. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  134. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  135. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  136. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  137. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  138. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  139. teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
  140. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  141. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  142. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  143. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  144. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  145. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
  146. teradataml/data/load_example_data.py +8 -2
  147. teradataml/data/naivebayestextclassifier_example.json +1 -1
  148. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  149. teradataml/data/peppers.png +0 -0
  150. teradataml/data/real_values.csv +14 -0
  151. teradataml/data/sax_example.json +8 -0
  152. teradataml/data/scripts/deploy_script.py +1 -1
  153. teradataml/data/scripts/sklearn/sklearn_fit.py +17 -10
  154. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +2 -2
  155. teradataml/data/scripts/sklearn/sklearn_function.template +30 -7
  156. teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
  157. teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
  158. teradataml/data/scripts/sklearn/sklearn_transform.py +55 -4
  159. teradataml/data/star_pivot.csv +8 -0
  160. teradataml/data/templates/open_source_ml.json +2 -1
  161. teradataml/data/teradataml_example.json +20 -1
  162. teradataml/data/timestamp_data.csv +4 -0
  163. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  164. teradataml/data/uaf_example.json +55 -1
  165. teradataml/data/unpivot_example.json +15 -0
  166. teradataml/data/url_data.csv +9 -0
  167. teradataml/data/windowdfft.csv +16 -0
  168. teradataml/dataframe/copy_to.py +1 -1
  169. teradataml/dataframe/data_transfer.py +5 -3
  170. teradataml/dataframe/dataframe.py +474 -41
  171. teradataml/dataframe/fastload.py +3 -3
  172. teradataml/dataframe/functions.py +339 -0
  173. teradataml/dataframe/row.py +160 -0
  174. teradataml/dataframe/setop.py +2 -2
  175. teradataml/dataframe/sql.py +658 -20
  176. teradataml/dataframe/window.py +1 -1
  177. teradataml/dbutils/dbutils.py +322 -16
  178. teradataml/geospatial/geodataframe.py +1 -1
  179. teradataml/geospatial/geodataframecolumn.py +1 -1
  180. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  181. teradataml/lib/aed_0_1.dll +0 -0
  182. teradataml/opensource/sklearn/_sklearn_wrapper.py +154 -69
  183. teradataml/options/__init__.py +3 -1
  184. teradataml/options/configure.py +14 -2
  185. teradataml/options/display.py +2 -2
  186. teradataml/plot/axis.py +4 -4
  187. teradataml/scriptmgmt/UserEnv.py +10 -6
  188. teradataml/scriptmgmt/lls_utils.py +3 -2
  189. teradataml/table_operators/Script.py +2 -2
  190. teradataml/table_operators/TableOperator.py +106 -20
  191. teradataml/table_operators/table_operator_util.py +88 -41
  192. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  193. teradataml/telemetry_utils/__init__.py +0 -0
  194. teradataml/telemetry_utils/queryband.py +52 -0
  195. teradataml/utils/validators.py +1 -1
  196. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +115 -2
  197. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +200 -140
  198. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
  199. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
  200. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
@@ -87,13 +87,19 @@ def argument_deprecation(tdml_version, deprecated_arguments, behaviour=False, al
87
87
 
88
88
  def decorator(func):
89
89
  def wrapper(*args, **kwargs):
90
- msg = "\nThe argument(s) \"{}\" will be deprecated in {}."
91
- if behaviour:
92
- msg = "\nBehaviour of the argument(s) \"{}\" will change in {}."
93
- msg = msg.format(deprecated_arguments, tdml_version)
94
- if alternatives is not None:
95
- msg += "\nUse argument(s) \"{}\" instead.".format(alternatives)
96
- warnings.warn(msg, category=DeprecationWarning, stacklevel=2)
90
+ # Check if deprecated arguments is a list, if not convert it to a list
91
+ deprecated_args_list = deprecated_arguments if isinstance(deprecated_arguments, list) \
92
+ else [deprecated_arguments]
93
+ # Check list of deprecated arguments are used in the function call
94
+ deprecated_arguments_used = [arg for arg in deprecated_args_list if arg in kwargs]
95
+ if deprecated_arguments_used:
96
+ msg = "\nThe argument(s) \"{}\" will be deprecated in {}."
97
+ if behaviour:
98
+ msg = "\nBehaviour of the argument(s) \"{}\" will change in {}."
99
+ msg = msg.format(deprecated_arguments_used, tdml_version)
100
+ if alternatives is not None:
101
+ msg += "\nUse argument(s) \"{}\" instead.".format(alternatives)
102
+ warnings.warn(msg, category=DeprecationWarning, stacklevel=2)
97
103
  return func(*args, **kwargs)
98
104
 
99
105
  return wraps(func)(wrapper)
@@ -22,6 +22,7 @@ from teradataml.common.messages import Messages
22
22
  from teradataml.common.messagecodes import MessageCodes
23
23
  from teradataml.common.constants import TeradataConstants
24
24
  from teradataml.options.configure import configure
25
+ from teradataml.utils.internal_buffer import _InternalBuffer
25
26
  from teradatasql import OperationalError
26
27
  import psutil
27
28
  import getpass
@@ -36,15 +37,22 @@ class GarbageCollector():
36
37
  garbage collection, so that they can be dropped when connection is disconnected/lost.
37
38
  Writes to a output file where the database name & table/view/script names are persisted.
38
39
  """
39
- __garbage_persistent_file_name = getpass.getuser() + "_garbagecollect.info"
40
+ # Adding old garbage collector file name to support backward compatibility.
41
+ __old_garbage_persistent_file_name = getpass.getuser() + "_garbagecollect.info"
40
42
  __garbagecollector_folder_name = '.teradataml'
41
43
  __contentseperator = ","
44
+ __filenameseperator = "_"
42
45
  __version = "ver1.0"
43
46
  __gc_tables = []
44
47
  __gc_views = []
45
48
  __gc_scripts = []
46
49
  __gc_container = []
47
50
  __gc_apply = []
51
+ # Function to get the garbage collector file name specific to host and process.
52
+ _get_gc_file_name = lambda: "{}_{}_{}_garbagecollect.info".format(
53
+ getpass.getuser(),
54
+ tdmlctx.context._get_host_ip(),
55
+ str(os.getpid()))
48
56
 
49
57
  @staticmethod
50
58
  def _get_temp_dir_name():
@@ -93,7 +101,7 @@ class GarbageCollector():
93
101
  """
94
102
  tempdir = GarbageCollector._get_temp_dir_name()
95
103
  os.makedirs(tempdir, exist_ok=True)
96
- tempfile = os.path.join(os.path.sep, tempdir, GarbageCollector.__garbage_persistent_file_name)
104
+ tempfile = os.path.join(os.path.sep, tempdir, GarbageCollector._get_gc_file_name())
97
105
  return tempfile
98
106
 
99
107
  @staticmethod
@@ -205,29 +213,31 @@ class GarbageCollector():
205
213
  EXAMPLES:
206
214
  GarbageCollector._add_to_garbagecollector(object_name = "temp"."temp_table1")
207
215
  """
208
- if object_name and object_type:
209
- try:
210
- tempfilename = GarbageCollector.__make_temp_file_name()
211
- writecontent = str(GarbageCollector.__version) + "," + str(os.getpid())
212
- writecontent += "," + str(object_type.value)
213
- writecontent += "," + object_name + "\n"
214
- with open(tempfilename, 'a+') as fgc:
215
- fgc.write(writecontent)
216
- if configure._validate_gc:
217
- GarbageCollector.__validate_gc_add_object(object_name, object_type)
218
- except TeradataMlException:
219
- raise
220
- except Exception as err:
221
- logger.error(Messages.get_message(MessageCodes.TDMLDF_CREATE_GARBAGE_COLLECTOR) + str(err))
222
- raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_GARBAGE_COLLECTOR),
223
- MessageCodes.TDMLDF_CREATE_GARBAGE_COLLECTOR) from err
224
- finally:
225
- if fgc is not None:
226
- fgc.close()
216
+ # Use global lock while writing to the garbage collector file.
217
+ with _InternalBuffer.get("global_lock"):
218
+ if object_name and object_type:
219
+ try:
220
+ tempfilename = GarbageCollector.__make_temp_file_name()
221
+ writecontent = str(GarbageCollector.__version) + "," + str(os.getpid())
222
+ writecontent += "," + str(object_type.value)
223
+ writecontent += "," + object_name + "\n"
224
+ with open(tempfilename, 'a+') as fgc:
225
+ fgc.write(writecontent)
226
+ if configure._validate_gc:
227
+ GarbageCollector.__validate_gc_add_object(object_name, object_type)
228
+ except TeradataMlException:
229
+ raise
230
+ except Exception as err:
231
+ logger.error(Messages.get_message(MessageCodes.TDMLDF_CREATE_GARBAGE_COLLECTOR) + str(err))
232
+ raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_GARBAGE_COLLECTOR),
233
+ MessageCodes.TDMLDF_CREATE_GARBAGE_COLLECTOR) from err
234
+ finally:
235
+ if fgc is not None:
236
+ fgc.close()
227
237
  return True
228
238
 
229
239
  @staticmethod
230
- def __deleterow(content_row):
240
+ def __deleterow(content_row, file_name):
231
241
  """
232
242
  DESCRIPTION:
233
243
  Deletes an entry from persisted file.
@@ -237,6 +247,11 @@ class GarbageCollector():
237
247
  Required Argument.
238
248
  Specifies the text of row to delete from the persisted file.
239
249
  Types: str
250
+
251
+ file_name:
252
+ Required Argument.
253
+ Specifies the name of the file to delete the row.
254
+ Types: str
240
255
 
241
256
  RETURNS:
242
257
  None.
@@ -248,10 +263,9 @@ class GarbageCollector():
248
263
  GarbageCollector._deleterow(content_row = 'ver1.0,72136,3,"alice"."temp_table_gbview1"')
249
264
  """
250
265
  try:
251
- tempfilename = GarbageCollector.__make_temp_file_name()
252
- if not os.path.isfile(tempfilename):
266
+ if not os.path.isfile(file_name):
253
267
  return True
254
- with open(tempfilename, 'r+') as fgc:
268
+ with open(file_name, 'r+') as fgc:
255
269
  output = fgc.readlines()
256
270
  fgc.seek(0)
257
271
  for dbtablename in output:
@@ -491,104 +505,121 @@ class GarbageCollector():
491
505
  """
492
506
  try:
493
507
  td_connection = tdmlctx.context.get_connection()
494
- tempfilename = GarbageCollector.__make_temp_file_name()
495
- if not os.path.isfile(tempfilename):
508
+ # Get the temp directory where garbage collector file is persisted.
509
+ tempdir = GarbageCollector._get_temp_dir_name()
510
+ # Garbage collect file that is created by the current host and current process.
511
+ # Also check if file is not of current process and associated process is
512
+ # currently running in the system or not.
513
+ # Walk through the temp directory and filter garbage collector files.
514
+ tempfiles = []
515
+ for root, _, files in os.walk(tempdir):
516
+ for file in files:
517
+ if file.endswith('_garbagecollect.info'):
518
+ try:
519
+ filepath = os.path.join(root, file)
520
+ fileparts = file.split(GarbageCollector.__filenameseperator)
521
+ hostname = fileparts[1]
522
+ filepid = int(fileparts[2])
523
+ if hostname == tdmlctx.context._get_host_ip():
524
+ if filepid == os.getpid() or not psutil.pid_exists(filepid):
525
+ tempfiles.append(filepath)
526
+ except (IndexError, ValueError):
527
+ # Handle the case where the filename format is not as expected
528
+ # check if old garbage collector file is present.
529
+ if file == GarbageCollector.__old_garbage_persistent_file_name:
530
+ tempfiles.append(filepath)
531
+
532
+ # Process each garbage collector file.
533
+ if len(tempfiles) == 0:
496
534
  return True
497
- with open(tempfilename, 'r+') as fgc:
498
- content = fgc.readlines()
499
-
500
- for contentrecord in content:
501
- contentrecord = contentrecord.strip()
502
-
503
- if (td_connection is not None) and (len(contentrecord) > 0):
504
- try:
505
- recordparts = contentrecord.split(GarbageCollector.__contentseperator)
506
- version = recordparts[0]
507
- contentpid = int(recordparts[1].strip())
508
- # Check and garbage collect even currrent running process at exit.
509
- # Check if contentpid is not of current process as well as any
510
- # currently running process in the system
511
- proceed_to_cleanup = False
512
- if contentpid != int(os.getpid()):
513
- if not psutil.pid_exists(contentpid):
514
- proceed_to_cleanup = True
515
- else:
516
- proceed_to_cleanup = True
517
- if proceed_to_cleanup == True:
518
- object_type = int(recordparts[2].strip())
519
- database_object = recordparts[3].strip()
520
-
521
- # Create the TeradataConstant to use with __delete_object_from_gc_list().
522
- object_type_enum = TeradataConstants(object_type)
523
-
535
+ else:
536
+ for tempfilename in tempfiles:
537
+ if not os.path.isfile(tempfilename):
538
+ return True
539
+ with open(tempfilename, 'r+') as fgc:
540
+ content = fgc.readlines()
541
+
542
+ for contentrecord in content:
543
+ contentrecord = contentrecord.strip()
544
+ if (td_connection is not None) and (len(contentrecord) > 0):
524
545
  try:
525
- # Drop the table/view/script/container based on database object type retrieved from the collector file.
526
- # # Drop table.
527
- if TeradataConstants.TERADATA_TABLE.value == object_type:
528
- tdmlutil.utils.UtilFuncs._drop_table(database_object,
529
- check_table_exist=False)
530
-
531
- # # Drop view.
532
- elif TeradataConstants.TERADATA_VIEW.value == object_type:
533
- tdmlutil.utils.UtilFuncs._drop_view(database_object,
534
- check_view_exist=False)
535
-
536
- elif object_type in [TeradataConstants.TERADATA_LOCAL_SCRIPT.value,
537
- TeradataConstants.TERADATA_TEXT_FILE.value]:
538
- GarbageCollector.__delete_gc_tempdir_local_file(database_object, object_type)
539
-
540
- # # Drop Apply script.
541
- elif TeradataConstants.TERADATA_APPLY.value == object_type:
542
- tdmlutil.utils.UtilFuncs._delete_script(database_object,
543
- file_type=object_type_enum)
544
- # Delete the script locally
545
- GarbageCollector.__delete_gc_tempdir_local_file(database_object, object_type)
546
-
547
- # # Drop STO script.
548
- else:
549
- tdmlutil.utils.UtilFuncs._delete_script(database_object,
550
- file_type=object_type_enum,
551
- check_script_exist=False)
552
- # Delete the script locally
553
- GarbageCollector.__delete_gc_tempdir_local_file(database_object, object_type)
554
-
555
- # Finally, delete the entry from gc lists if required.
556
- GarbageCollector.__delete_object_from_gc_list(database_object,
557
- object_type_enum)
558
-
559
- # Remove the entry for a table/view from GC, after it has been dropped.
560
- GarbageCollector.__deleterow(contentrecord)
561
- except OperationalError as operr:
562
- # Remove the entry for a table/view/script even after drop has failed,
563
- # if that object does not exist.
564
- # Also added additional check for error when the database containing
565
- # the object doesn't exist anymore.
566
- if "[Teradata Database] [Error 3802] Database" in str(operr) or \
567
- "[Teradata Database] [Error 3807] Object" in str(operr) or \
568
- "[Teradata Database] [Error 9852] The file" in str(operr):
569
- GarbageCollector.__deleterow(contentrecord)
570
- # Delete entry from gc lists of required.
571
- GarbageCollector.__delete_object_from_gc_list(database_object,
572
- object_type_enum)
573
- except (TeradataMlException, RuntimeError) as err:
574
- if "Failed to execute get_env" in str(err) or \
575
- "Failed to execute remove_file" in str(err):
576
- # For removing files in OpenAF environment.
577
- GarbageCollector.__deleterow(contentrecord)
578
- # Delete entry from gc lists of required.
546
+ recordparts = contentrecord.split(GarbageCollector.__contentseperator)
547
+ object_type = int(recordparts[2].strip())
548
+ database_object = recordparts[3].strip()
549
+
550
+ # Create the TeradataConstant to use with __delete_object_from_gc_list().
551
+ object_type_enum = TeradataConstants(object_type)
552
+
553
+ try:
554
+ # Drop the table/view/script/container based on database object type retrieved from the collector file.
555
+ # # Drop table.
556
+ if TeradataConstants.TERADATA_TABLE.value == object_type:
557
+ tdmlutil.utils.UtilFuncs._drop_table(database_object,
558
+ check_table_exist=False)
559
+
560
+ # # Drop view.
561
+ elif TeradataConstants.TERADATA_VIEW.value == object_type:
562
+ tdmlutil.utils.UtilFuncs._drop_view(database_object,
563
+ check_view_exist=False)
564
+
565
+ elif object_type in [TeradataConstants.TERADATA_LOCAL_SCRIPT.value,
566
+ TeradataConstants.TERADATA_TEXT_FILE.value]:
567
+ GarbageCollector.__delete_gc_tempdir_local_file(database_object, object_type)
568
+
569
+ # # Drop Apply script.
570
+ elif TeradataConstants.TERADATA_APPLY.value == object_type:
571
+ tdmlutil.utils.UtilFuncs._delete_script(database_object,
572
+ file_type=object_type_enum)
573
+ # Delete the script locally
574
+ GarbageCollector.__delete_gc_tempdir_local_file(database_object, object_type)
575
+
576
+ # # Drop STO script.
577
+ else:
578
+ tdmlutil.utils.UtilFuncs._delete_script(database_object,
579
+ file_type=object_type_enum,
580
+ check_script_exist=False)
581
+ # Delete the script locally
582
+ GarbageCollector.__delete_gc_tempdir_local_file(database_object, object_type)
583
+
584
+ # Remove the entry for a table/view from GC, after it has been dropped.
585
+ GarbageCollector.__deleterow(contentrecord, tempfilename)
586
+
587
+ # Finally, delete the entry from gc lists if required.
579
588
  GarbageCollector.__delete_object_from_gc_list(database_object,
580
- object_type_enum)
581
- except FileNotFoundError:
582
- # This will occur only when the item being deleted is a file,
583
- # and it's local copy is not found.
584
- GarbageCollector.__deleterow(contentrecord)
585
- if object_type == TeradataConstants.TERADATA_APPLY:
586
- GarbageCollector.__gc_apply.remove(database_object)
587
- elif object_type == TeradataConstants.TERADATA_SCRIPT:
588
- GarbageCollector.__gc_scripts.remove(database_object)
589
- except Exception as err:
590
- pass
591
- # logger.error(Messages.get_message(MessageCodes.TDMLDF_DELETE_GARBAGE_COLLECTOR) + str(err))
589
+ object_type_enum)
590
+ except OperationalError as operr:
591
+ # Remove the entry for a table/view/script even after drop has failed,
592
+ # if that object does not exist.
593
+ # Also added additional check for error when the database containing
594
+ # the object doesn't exist anymore.
595
+ if "[Teradata Database] [Error 3802] Database" in str(operr) or \
596
+ "[Teradata Database] [Error 3807] Object" in str(operr) or \
597
+ "[Teradata Database] [Error 9852] The file" in str(operr):
598
+ GarbageCollector.__deleterow(contentrecord, tempfilename)
599
+ # Delete entry from gc lists of required.
600
+ GarbageCollector.__delete_object_from_gc_list(database_object,
601
+ object_type_enum)
602
+ except (TeradataMlException, RuntimeError) as err:
603
+ if "Failed to execute get_env" in str(err) or \
604
+ "Failed to execute remove_file" in str(err):
605
+ # For removing files in OpenAF environment.
606
+ GarbageCollector.__deleterow(contentrecord, tempfilename)
607
+ # Delete entry from gc lists of required.
608
+ GarbageCollector.__delete_object_from_gc_list(database_object,
609
+ object_type_enum)
610
+ except FileNotFoundError:
611
+ # This will occur only when the item being deleted is a file,
612
+ # and it's local copy is not found.
613
+ GarbageCollector.__deleterow(contentrecord, tempfilename)
614
+ if object_type == TeradataConstants.TERADATA_APPLY:
615
+ GarbageCollector.__gc_apply.remove(database_object)
616
+ elif object_type == TeradataConstants.TERADATA_SCRIPT:
617
+ GarbageCollector.__gc_scripts.remove(database_object)
618
+ except Exception as err:
619
+ pass
620
+ # delete empty file itself after deleting the entry from the file
621
+ if os.path.getsize(tempfilename) == 0:
622
+ GarbageCollector._delete_local_file(tempfilename)
592
623
  except Exception as e:
593
624
  logger.error(Messages.get_message(MessageCodes.TDMLDF_DELETE_GARBAGE_COLLECTOR) + str(e))
594
625
  finally:
@@ -219,6 +219,7 @@ class ErrorInfoCodes(Enum):
219
219
  PARTITION_VALUES_NOT_MATCHING = 'TDML_2538'
220
220
  PARTITION_IN_BOTH_FIT_AND_PREDICT = 'TDML_2539'
221
221
  INVALID_PARTITIONING_COLS = 'TDML_2540'
222
+ TARGET_COL_NOT_FOUND_FOR_EVALUATE = 'TDML_2541'
222
223
 
223
224
  class MessageCodes(Enum):
224
225
  """
@@ -421,4 +422,6 @@ class MessageCodes(Enum):
421
422
  PARTITION_VALUES_NOT_MATCHING = "Values in training and test data partition columns should be same."
422
423
  PARTITION_IN_BOTH_FIT_AND_PREDICT = "Use \"partition_columns\" only if model is fitted with partition_column(s)."
423
424
  INVALID_PARTITIONING_COLS = "Provided partition_column(s) '{}' is/are not present in parent of '{}' DataFrame(s)."
424
- PATH_NOT_FOUND = "Specified local path '{}' not found. Please check the path."
425
+ PATH_NOT_FOUND = "Specified local path '{}' not found. Please check the path."
426
+ TARGET_COL_NOT_FOUND_FOR_EVALUATE = "Target column '{}' not found in the passed dataFrame. "\
427
+ "evaluate() requires target column to be present in the dataFrame."
@@ -189,7 +189,8 @@ class Messages():
189
189
  [ErrorInfoCodes.PARTITION_VALUES_NOT_MATCHING, MessageCodes.PARTITION_VALUES_NOT_MATCHING],
190
190
  [ErrorInfoCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT, MessageCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT],
191
191
  [ErrorInfoCodes.INVALID_PARTITIONING_COLS, MessageCodes.INVALID_PARTITIONING_COLS],
192
- [ErrorInfoCodes.PATH_NOT_FOUND, MessageCodes.PATH_NOT_FOUND]
192
+ [ErrorInfoCodes.PATH_NOT_FOUND, MessageCodes.PATH_NOT_FOUND],
193
+ [ErrorInfoCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE, MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE]
193
194
  ]
194
195
 
195
196
  @staticmethod
@@ -47,7 +47,7 @@ class SQLBundle:
47
47
  [SQLConstants.SQL_HELP_COLUMNS, "help column {0}.*"],
48
48
  [SQLConstants.SQL_DROP_TABLE, "DROP TABLE {0}"],
49
49
  [SQLConstants.SQL_DROP_VIEW, "DROP VIEW {0}"],
50
- [SQLConstants.SQL_NROWS_FROM_QUERY, "SELECT COUNT(*) FROM {0}"],
50
+ [SQLConstants.SQL_NROWS_FROM_QUERY, "SELECT CAST(COUNT(*) AS BIGINT) FROM {0}"],
51
51
  [SQLConstants.SQL_TOP_NROWS_FROM_TABLEORVIEW, "select top {0} * from {1}"],
52
52
  [SQLConstants.SQL_INSERT_INTO_TABLE_VALUES, "insert into {0} values({1})"],
53
53
  [SQLConstants.SQL_SELECT_COLUMNNAMES_FROM, "sel {0} from ({1}) as {2}"],
@@ -13,6 +13,7 @@ by other classes which can be reused according to the need.
13
13
  Add all the common functions in this class like creating temporary table names, getting
14
14
  the datatypes etc.
15
15
  """
16
+ import json
16
17
  import uuid
17
18
  from math import floor
18
19
  import os, itertools
@@ -22,6 +23,7 @@ import sqlalchemy
22
23
  from pathlib import Path
23
24
  from numpy import number
24
25
  from sqlalchemy import Column, MetaData, Table
26
+
25
27
  from teradataml.context.context import get_connection
26
28
 
27
29
  from teradataml import _version
@@ -49,7 +51,7 @@ from teradatasqlalchemy.types import (BYTE, VARBYTE, BLOB)
49
51
  from teradatasqlalchemy.types import (CHAR, VARCHAR, CLOB)
50
52
  from functools import reduce
51
53
  import warnings
52
- from teradatasqlalchemy.telemetry.queryband import set_queryband, collect_queryband, get_qb_query
54
+ from teradataml.telemetry_utils.queryband import collect_queryband
53
55
  from teradataml.utils.utils import execute_sql
54
56
  from teradataml.utils.validators import _Validators
55
57
  from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
@@ -1857,7 +1859,7 @@ class UtilFuncs():
1857
1859
  con = tdmlctx.get_connection()
1858
1860
 
1859
1861
  if check_table_exists:
1860
- table_exists = con.dialect.has_table(con, table_name, schema_name)
1862
+ table_exists = con.dialect.has_table(con, table_name, schema_name, table_only=True)
1861
1863
 
1862
1864
  if not table_exists:
1863
1865
  raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_DOES_NOT_EXIST, table_name),
@@ -2377,14 +2379,11 @@ class UtilFuncs():
2377
2379
  EXAMPLES:
2378
2380
  >>> self._get_python_execution_path()
2379
2381
  """
2382
+ # 'indb_install_location' expects python installation directory path.
2383
+ # Hence, postfixing python binary path.
2384
+ return "python" if UtilFuncs._is_lake() else \
2385
+ '{}/bin/python3'.format(configure.indb_install_location)
2380
2386
 
2381
- if UtilFuncs._is_lake():
2382
- return "python"
2383
- else:
2384
- if configure.indb_install_location == "/var/opt/teradata/languages/sles12sp3/Python/":
2385
- return '{}bin/python3'.format(configure.indb_install_location)
2386
- else:
2387
- return configure.indb_install_location
2388
2387
 
2389
2388
  def _is_view(tablename):
2390
2389
  """
@@ -2411,14 +2410,101 @@ class UtilFuncs():
2411
2410
  return True
2412
2411
  else:
2413
2412
  return False
2413
+
2414
2414
  @staticmethod
2415
2415
  def _set_queryband():
2416
+ from teradataml import session_queryband
2416
2417
  try:
2417
- qb_query = get_qb_query()
2418
+ qb_query = session_queryband.generate_set_queryband_query()
2418
2419
  execute_sql(qb_query)
2419
- except Exception:
2420
+ except Exception as _set_queryband_err:
2420
2421
  pass
2421
2422
 
2423
+ def _create_or_get_env(template):
2424
+ """
2425
+ DESCRIPTION:
2426
+ Internal function to return the environment if already exists else
2427
+ creates the environment using template file and return the environment.
2428
+
2429
+ PARAMETERS:
2430
+ template:
2431
+ Required Argument.
2432
+ Template json file name containing details of environment(s) to be created.
2433
+ Types: str
2434
+
2435
+ RAISES:
2436
+ TeradataMLException
2437
+
2438
+ RETURNS:
2439
+ An object of class UserEnv representing the user environment.
2440
+
2441
+ EXAMPLES:
2442
+ >>> self._create_or_get_env("open_source_ml.json")
2443
+ """
2444
+ # Get the template file path.
2445
+ from teradataml import _TDML_DIRECTORY
2446
+ from teradataml.scriptmgmt.lls_utils import create_env, get_env
2447
+ template_dir_path = os.path.join(_TDML_DIRECTORY, "data", "templates", template)
2448
+
2449
+ # Read template file.
2450
+ with open(template_dir_path, "r") as r_file:
2451
+ data = json.load(r_file)
2452
+
2453
+ # Get env_name.
2454
+ _env_name = data["env_specs"][0]["env_name"]
2455
+
2456
+ try:
2457
+ # Call function to get env.
2458
+ return get_env(_env_name)
2459
+ except TeradataMlException as tdml_e:
2460
+ # We will get here when error says, env does not exist otherwise raise the exception as is.
2461
+ # Env does not exist so create one.
2462
+
2463
+ exc_msg = "Failed to execute get_env(). User environment '{}' not " \
2464
+ "found.".format(_env_name)
2465
+ if exc_msg in tdml_e.args[0]:
2466
+ print(f"No OpenAF environment with name '{_env_name}' found. Creating one with "\
2467
+ "latest supported python and required packages.")
2468
+ return create_env(template=template_dir_path)
2469
+ else:
2470
+ raise tdml_e
2471
+ except Exception as exc:
2472
+ raise exc
2473
+
2474
+ def _get_env_name(col):
2475
+ """
2476
+ DESCRIPTION:
2477
+ Internal function to get the env name if passed with ColumnExpression
2478
+ else the default "openml_env".
2479
+
2480
+ PARAMETERS:
2481
+ col:
2482
+ Required Argument.
2483
+ Specifies teradataml DataFrame ColumnExpression.
2484
+ Types: teradataml DataFrame ColumnExpression
2485
+
2486
+ RAISES:
2487
+ None.
2488
+
2489
+ RETURNS:
2490
+ string
2491
+
2492
+ EXAMPLES:
2493
+ >>> self._get_env_name(col)
2494
+ """
2495
+
2496
+ # If env_name is passed with ColumnExpression fetch the env name,
2497
+ # else check if default "openml_user_env" env is configured or not,
2498
+ # else get the default "openml_env" env if exists or create new deafult env.
2499
+ if col._env_name is not None:
2500
+ from teradataml.scriptmgmt.UserEnv import UserEnv
2501
+ env = col._env_name
2502
+ env_name = env.env_name if isinstance(col._env_name, UserEnv) else env
2503
+ elif configure.openml_user_env is not None:
2504
+ env_name = configure.openml_user_env.env_name
2505
+ else:
2506
+ env_name = UtilFuncs._create_or_get_env("open_source_ml.json").env_name
2507
+ return env_name
2422
2508
 
2423
2509
  from teradataml.common.aed_utils import AedUtils
2424
2510
  from teradataml.dbutils.filemgr import remove_file
@@ -317,7 +317,7 @@ class AnalyticsWrapperUtils:
317
317
  #TODO: Add support for nested level query as in R.
318
318
  return table_ref
319
319
 
320
- def _validate_input_table_datatype(self, data, arg_name, reference_function_name = None):
320
+ def _validate_input_table_datatype(self, data, arg_name, reference_function_name=None):
321
321
  """
322
322
  Method to verify that the input table parameters of type DataFrame.
323
323