teradataml 20.0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1208) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +2762 -0
  4. teradataml/__init__.py +78 -0
  5. teradataml/_version.py +11 -0
  6. teradataml/analytics/Transformations.py +2996 -0
  7. teradataml/analytics/__init__.py +82 -0
  8. teradataml/analytics/analytic_function_executor.py +2416 -0
  9. teradataml/analytics/analytic_query_generator.py +1050 -0
  10. teradataml/analytics/byom/H2OPredict.py +514 -0
  11. teradataml/analytics/byom/PMMLPredict.py +437 -0
  12. teradataml/analytics/byom/__init__.py +16 -0
  13. teradataml/analytics/json_parser/__init__.py +133 -0
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
  15. teradataml/analytics/json_parser/json_store.py +191 -0
  16. teradataml/analytics/json_parser/metadata.py +1666 -0
  17. teradataml/analytics/json_parser/utils.py +805 -0
  18. teradataml/analytics/meta_class.py +236 -0
  19. teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
  21. teradataml/analytics/sqle/__init__.py +128 -0
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
  24. teradataml/analytics/table_operator/__init__.py +11 -0
  25. teradataml/analytics/uaf/__init__.py +82 -0
  26. teradataml/analytics/utils.py +828 -0
  27. teradataml/analytics/valib.py +1617 -0
  28. teradataml/automl/__init__.py +5835 -0
  29. teradataml/automl/autodataprep/__init__.py +493 -0
  30. teradataml/automl/custom_json_utils.py +1625 -0
  31. teradataml/automl/data_preparation.py +1384 -0
  32. teradataml/automl/data_transformation.py +1254 -0
  33. teradataml/automl/feature_engineering.py +2273 -0
  34. teradataml/automl/feature_exploration.py +1873 -0
  35. teradataml/automl/model_evaluation.py +488 -0
  36. teradataml/automl/model_training.py +1407 -0
  37. teradataml/catalog/__init__.py +2 -0
  38. teradataml/catalog/byom.py +1759 -0
  39. teradataml/catalog/function_argument_mapper.py +859 -0
  40. teradataml/catalog/model_cataloging_utils.py +491 -0
  41. teradataml/clients/__init__.py +0 -0
  42. teradataml/clients/auth_client.py +137 -0
  43. teradataml/clients/keycloak_client.py +165 -0
  44. teradataml/clients/pkce_client.py +481 -0
  45. teradataml/common/__init__.py +1 -0
  46. teradataml/common/aed_utils.py +2078 -0
  47. teradataml/common/bulk_exposed_utils.py +113 -0
  48. teradataml/common/constants.py +1669 -0
  49. teradataml/common/deprecations.py +166 -0
  50. teradataml/common/exceptions.py +147 -0
  51. teradataml/common/formula.py +743 -0
  52. teradataml/common/garbagecollector.py +666 -0
  53. teradataml/common/logger.py +1261 -0
  54. teradataml/common/messagecodes.py +518 -0
  55. teradataml/common/messages.py +262 -0
  56. teradataml/common/pylogger.py +67 -0
  57. teradataml/common/sqlbundle.py +764 -0
  58. teradataml/common/td_coltype_code_to_tdtype.py +48 -0
  59. teradataml/common/utils.py +3166 -0
  60. teradataml/common/warnings.py +36 -0
  61. teradataml/common/wrapper_utils.py +625 -0
  62. teradataml/config/__init__.py +0 -0
  63. teradataml/config/dummy_file1.cfg +5 -0
  64. teradataml/config/dummy_file2.cfg +3 -0
  65. teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
  66. teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
  67. teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
  68. teradataml/context/__init__.py +0 -0
  69. teradataml/context/aed_context.py +223 -0
  70. teradataml/context/context.py +1462 -0
  71. teradataml/data/A_loan.csv +19 -0
  72. teradataml/data/BINARY_REALS_LEFT.csv +11 -0
  73. teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
  74. teradataml/data/B_loan.csv +49 -0
  75. teradataml/data/BuoyData2.csv +17 -0
  76. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
  77. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
  78. teradataml/data/Convolve2RealsLeft.csv +5 -0
  79. teradataml/data/Convolve2RealsRight.csv +5 -0
  80. teradataml/data/Convolve2ValidLeft.csv +11 -0
  81. teradataml/data/Convolve2ValidRight.csv +11 -0
  82. teradataml/data/DFFTConv_Real_8_8.csv +65 -0
  83. teradataml/data/Employee.csv +5 -0
  84. teradataml/data/Employee_Address.csv +4 -0
  85. teradataml/data/Employee_roles.csv +5 -0
  86. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  87. teradataml/data/Mall_customer_data.csv +201 -0
  88. teradataml/data/Orders1_12mf.csv +25 -0
  89. teradataml/data/Pi_loan.csv +7 -0
  90. teradataml/data/SMOOTHED_DATA.csv +7 -0
  91. teradataml/data/TestDFFT8.csv +9 -0
  92. teradataml/data/TestRiver.csv +109 -0
  93. teradataml/data/Traindata.csv +28 -0
  94. teradataml/data/__init__.py +0 -0
  95. teradataml/data/acf.csv +17 -0
  96. teradataml/data/adaboost_example.json +34 -0
  97. teradataml/data/adaboostpredict_example.json +24 -0
  98. teradataml/data/additional_table.csv +11 -0
  99. teradataml/data/admissions_test.csv +21 -0
  100. teradataml/data/admissions_train.csv +41 -0
  101. teradataml/data/admissions_train_nulls.csv +41 -0
  102. teradataml/data/advertising.csv +201 -0
  103. teradataml/data/ageandheight.csv +13 -0
  104. teradataml/data/ageandpressure.csv +31 -0
  105. teradataml/data/amazon_reviews_25.csv +26 -0
  106. teradataml/data/antiselect_example.json +36 -0
  107. teradataml/data/antiselect_input.csv +8 -0
  108. teradataml/data/antiselect_input_mixed_case.csv +8 -0
  109. teradataml/data/applicant_external.csv +7 -0
  110. teradataml/data/applicant_reference.csv +7 -0
  111. teradataml/data/apriori_example.json +22 -0
  112. teradataml/data/arima_example.json +9 -0
  113. teradataml/data/assortedtext_input.csv +8 -0
  114. teradataml/data/attribution_example.json +34 -0
  115. teradataml/data/attribution_sample_table.csv +27 -0
  116. teradataml/data/attribution_sample_table1.csv +6 -0
  117. teradataml/data/attribution_sample_table2.csv +11 -0
  118. teradataml/data/bank_churn.csv +10001 -0
  119. teradataml/data/bank_marketing.csv +11163 -0
  120. teradataml/data/bank_web_clicks1.csv +43 -0
  121. teradataml/data/bank_web_clicks2.csv +91 -0
  122. teradataml/data/bank_web_url.csv +85 -0
  123. teradataml/data/barrier.csv +2 -0
  124. teradataml/data/barrier_new.csv +3 -0
  125. teradataml/data/betweenness_example.json +14 -0
  126. teradataml/data/bike_sharing.csv +732 -0
  127. teradataml/data/bin_breaks.csv +8 -0
  128. teradataml/data/bin_fit_ip.csv +4 -0
  129. teradataml/data/binary_complex_left.csv +11 -0
  130. teradataml/data/binary_complex_right.csv +11 -0
  131. teradataml/data/binary_matrix_complex_left.csv +21 -0
  132. teradataml/data/binary_matrix_complex_right.csv +21 -0
  133. teradataml/data/binary_matrix_real_left.csv +21 -0
  134. teradataml/data/binary_matrix_real_right.csv +21 -0
  135. teradataml/data/blood2ageandweight.csv +26 -0
  136. teradataml/data/bmi.csv +501 -0
  137. teradataml/data/boston.csv +507 -0
  138. teradataml/data/boston2cols.csv +721 -0
  139. teradataml/data/breast_cancer.csv +570 -0
  140. teradataml/data/buoydata_mix.csv +11 -0
  141. teradataml/data/burst_data.csv +5 -0
  142. teradataml/data/burst_example.json +21 -0
  143. teradataml/data/byom_example.json +34 -0
  144. teradataml/data/bytes_table.csv +4 -0
  145. teradataml/data/cal_housing_ex_raw.csv +70 -0
  146. teradataml/data/callers.csv +7 -0
  147. teradataml/data/calls.csv +10 -0
  148. teradataml/data/cars_hist.csv +33 -0
  149. teradataml/data/cat_table.csv +25 -0
  150. teradataml/data/ccm_example.json +32 -0
  151. teradataml/data/ccm_input.csv +91 -0
  152. teradataml/data/ccm_input2.csv +13 -0
  153. teradataml/data/ccmexample.csv +101 -0
  154. teradataml/data/ccmprepare_example.json +9 -0
  155. teradataml/data/ccmprepare_input.csv +91 -0
  156. teradataml/data/cfilter_example.json +12 -0
  157. teradataml/data/changepointdetection_example.json +18 -0
  158. teradataml/data/changepointdetectionrt_example.json +8 -0
  159. teradataml/data/chi_sq.csv +3 -0
  160. teradataml/data/churn_data.csv +14 -0
  161. teradataml/data/churn_emission.csv +35 -0
  162. teradataml/data/churn_initial.csv +3 -0
  163. teradataml/data/churn_state_transition.csv +5 -0
  164. teradataml/data/citedges_2.csv +745 -0
  165. teradataml/data/citvertices_2.csv +1210 -0
  166. teradataml/data/clicks2.csv +16 -0
  167. teradataml/data/clickstream.csv +13 -0
  168. teradataml/data/clickstream1.csv +11 -0
  169. teradataml/data/closeness_example.json +16 -0
  170. teradataml/data/complaints.csv +21 -0
  171. teradataml/data/complaints_mini.csv +3 -0
  172. teradataml/data/complaints_test_tokenized.csv +353 -0
  173. teradataml/data/complaints_testtoken.csv +224 -0
  174. teradataml/data/complaints_tokens_model.csv +348 -0
  175. teradataml/data/complaints_tokens_test.csv +353 -0
  176. teradataml/data/complaints_traintoken.csv +472 -0
  177. teradataml/data/computers_category.csv +1001 -0
  178. teradataml/data/computers_test1.csv +1252 -0
  179. teradataml/data/computers_train1.csv +5009 -0
  180. teradataml/data/computers_train1_clustered.csv +5009 -0
  181. teradataml/data/confusionmatrix_example.json +9 -0
  182. teradataml/data/conversion_event_table.csv +3 -0
  183. teradataml/data/corr_input.csv +17 -0
  184. teradataml/data/correlation_example.json +11 -0
  185. teradataml/data/covid_confirm_sd.csv +83 -0
  186. teradataml/data/coxhazardratio_example.json +39 -0
  187. teradataml/data/coxph_example.json +15 -0
  188. teradataml/data/coxsurvival_example.json +28 -0
  189. teradataml/data/cpt.csv +41 -0
  190. teradataml/data/credit_ex_merged.csv +45 -0
  191. teradataml/data/creditcard_data.csv +1001 -0
  192. teradataml/data/customer_loyalty.csv +301 -0
  193. teradataml/data/customer_loyalty_newseq.csv +31 -0
  194. teradataml/data/customer_segmentation_test.csv +2628 -0
  195. teradataml/data/customer_segmentation_train.csv +8069 -0
  196. teradataml/data/dataframe_example.json +173 -0
  197. teradataml/data/decisionforest_example.json +37 -0
  198. teradataml/data/decisionforestpredict_example.json +38 -0
  199. teradataml/data/decisiontree_example.json +21 -0
  200. teradataml/data/decisiontreepredict_example.json +45 -0
  201. teradataml/data/dfft2_size4_real.csv +17 -0
  202. teradataml/data/dfft2_test_matrix16.csv +17 -0
  203. teradataml/data/dfft2conv_real_4_4.csv +65 -0
  204. teradataml/data/diabetes.csv +443 -0
  205. teradataml/data/diabetes_test.csv +89 -0
  206. teradataml/data/dict_table.csv +5 -0
  207. teradataml/data/docperterm_table.csv +4 -0
  208. teradataml/data/docs/__init__.py +1 -0
  209. teradataml/data/docs/byom/__init__.py +0 -0
  210. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
  211. teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
  212. teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
  213. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  214. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
  215. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  216. teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
  217. teradataml/data/docs/byom/docs/__init__.py +0 -0
  218. teradataml/data/docs/sqle/__init__.py +0 -0
  219. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
  220. teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
  221. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
  222. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
  223. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
  224. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
  225. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
  226. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
  227. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
  228. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
  229. teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
  230. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
  231. teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
  232. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
  233. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
  234. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
  235. teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
  236. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
  237. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
  238. teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
  239. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
  240. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
  241. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
  242. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
  243. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
  244. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
  245. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
  246. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
  247. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
  248. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
  249. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
  250. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
  251. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
  252. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
  253. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
  254. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
  255. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
  256. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
  257. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
  258. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
  259. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
  260. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
  261. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
  262. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
  263. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
  264. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
  265. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
  266. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
  267. teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
  268. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
  269. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
  270. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  271. teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
  272. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
  273. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
  274. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  275. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
  276. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
  277. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
  278. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
  279. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
  280. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
  281. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
  282. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
  283. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
  284. teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
  285. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
  286. teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
  287. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
  288. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
  289. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
  290. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
  291. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
  292. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
  293. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
  294. teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
  295. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
  296. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
  297. teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
  298. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
  299. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  300. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
  301. teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
  302. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  303. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
  304. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
  305. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
  306. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
  307. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
  308. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
  309. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
  310. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
  311. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
  312. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
  313. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
  314. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
  315. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
  316. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
  317. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
  318. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  319. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
  320. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
  321. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
  322. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
  323. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
  324. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
  325. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
  326. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
  327. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
  328. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
  329. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
  330. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  331. teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
  332. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
  333. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
  334. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
  335. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
  336. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
  337. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
  338. teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
  339. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
  340. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
  341. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
  342. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
  343. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
  344. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
  345. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
  346. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  347. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  348. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
  349. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
  350. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  351. teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
  352. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
  353. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
  354. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
  355. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
  356. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  357. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
  358. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
  359. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
  360. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
  361. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
  362. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
  363. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
  364. teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
  365. teradataml/data/docs/tableoperator/__init__.py +0 -0
  366. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
  367. teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
  368. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
  369. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
  370. teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
  371. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
  372. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
  373. teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
  374. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  375. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
  376. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
  377. teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
  378. teradataml/data/docs/uaf/__init__.py +0 -0
  379. teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
  380. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
  381. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
  382. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
  383. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  384. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  385. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
  386. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
  387. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
  388. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
  389. teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
  390. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
  391. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  392. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
  393. teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
  394. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
  395. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
  396. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
  397. teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
  398. teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
  399. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  400. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
  401. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
  402. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
  403. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
  404. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  405. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
  406. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
  407. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
  408. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
  409. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
  410. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
  411. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
  412. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  413. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  414. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  415. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
  416. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
  417. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
  418. teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
  419. teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
  420. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  421. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
  422. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
  423. teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
  424. teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
  425. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
  426. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
  427. teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
  428. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  429. teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
  430. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
  431. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
  432. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
  433. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
  434. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
  435. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
  436. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
  437. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
  438. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
  439. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
  440. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  441. teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
  442. teradataml/data/dtw_example.json +18 -0
  443. teradataml/data/dtw_t1.csv +11 -0
  444. teradataml/data/dtw_t2.csv +4 -0
  445. teradataml/data/dwt2d_dataTable.csv +65 -0
  446. teradataml/data/dwt2d_example.json +16 -0
  447. teradataml/data/dwt_dataTable.csv +8 -0
  448. teradataml/data/dwt_example.json +15 -0
  449. teradataml/data/dwt_filterTable.csv +3 -0
  450. teradataml/data/dwt_filter_dim.csv +5 -0
  451. teradataml/data/emission.csv +9 -0
  452. teradataml/data/emp_table_by_dept.csv +19 -0
  453. teradataml/data/employee_info.csv +4 -0
  454. teradataml/data/employee_table.csv +6 -0
  455. teradataml/data/excluding_event_table.csv +2 -0
  456. teradataml/data/finance_data.csv +6 -0
  457. teradataml/data/finance_data2.csv +61 -0
  458. teradataml/data/finance_data3.csv +93 -0
  459. teradataml/data/finance_data4.csv +13 -0
  460. teradataml/data/fish.csv +160 -0
  461. teradataml/data/fm_blood2ageandweight.csv +26 -0
  462. teradataml/data/fmeasure_example.json +12 -0
  463. teradataml/data/followers_leaders.csv +10 -0
  464. teradataml/data/fpgrowth_example.json +12 -0
  465. teradataml/data/frequentpaths_example.json +29 -0
  466. teradataml/data/friends.csv +9 -0
  467. teradataml/data/fs_input.csv +33 -0
  468. teradataml/data/fs_input1.csv +33 -0
  469. teradataml/data/genData.csv +513 -0
  470. teradataml/data/geodataframe_example.json +40 -0
  471. teradataml/data/glass_types.csv +215 -0
  472. teradataml/data/glm_admissions_model.csv +12 -0
  473. teradataml/data/glm_example.json +56 -0
  474. teradataml/data/glml1l2_example.json +28 -0
  475. teradataml/data/glml1l2predict_example.json +54 -0
  476. teradataml/data/glmpredict_example.json +54 -0
  477. teradataml/data/gq_t1.csv +21 -0
  478. teradataml/data/grocery_transaction.csv +19 -0
  479. teradataml/data/hconvolve_complex_right.csv +5 -0
  480. teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
  481. teradataml/data/histogram_example.json +12 -0
  482. teradataml/data/hmmdecoder_example.json +79 -0
  483. teradataml/data/hmmevaluator_example.json +25 -0
  484. teradataml/data/hmmsupervised_example.json +10 -0
  485. teradataml/data/hmmunsupervised_example.json +8 -0
  486. teradataml/data/hnsw_alter_data.csv +5 -0
  487. teradataml/data/hnsw_data.csv +10 -0
  488. teradataml/data/house_values.csv +12 -0
  489. teradataml/data/house_values2.csv +13 -0
  490. teradataml/data/housing_cat.csv +7 -0
  491. teradataml/data/housing_data.csv +9 -0
  492. teradataml/data/housing_test.csv +47 -0
  493. teradataml/data/housing_test_binary.csv +47 -0
  494. teradataml/data/housing_train.csv +493 -0
  495. teradataml/data/housing_train_attribute.csv +5 -0
  496. teradataml/data/housing_train_binary.csv +437 -0
  497. teradataml/data/housing_train_parameter.csv +2 -0
  498. teradataml/data/housing_train_response.csv +493 -0
  499. teradataml/data/housing_train_segment.csv +201 -0
  500. teradataml/data/ibm_stock.csv +370 -0
  501. teradataml/data/ibm_stock1.csv +370 -0
  502. teradataml/data/identitymatch_example.json +22 -0
  503. teradataml/data/idf_table.csv +4 -0
  504. teradataml/data/idwt2d_dataTable.csv +5 -0
  505. teradataml/data/idwt_dataTable.csv +8 -0
  506. teradataml/data/idwt_filterTable.csv +3 -0
  507. teradataml/data/impressions.csv +101 -0
  508. teradataml/data/inflation.csv +21 -0
  509. teradataml/data/initial.csv +3 -0
  510. teradataml/data/insect2Cols.csv +61 -0
  511. teradataml/data/insect_sprays.csv +13 -0
  512. teradataml/data/insurance.csv +1339 -0
  513. teradataml/data/interpolator_example.json +13 -0
  514. teradataml/data/interval_data.csv +5 -0
  515. teradataml/data/iris_altinput.csv +481 -0
  516. teradataml/data/iris_attribute_output.csv +8 -0
  517. teradataml/data/iris_attribute_test.csv +121 -0
  518. teradataml/data/iris_attribute_train.csv +481 -0
  519. teradataml/data/iris_category_expect_predict.csv +31 -0
  520. teradataml/data/iris_data.csv +151 -0
  521. teradataml/data/iris_input.csv +151 -0
  522. teradataml/data/iris_response_train.csv +121 -0
  523. teradataml/data/iris_test.csv +31 -0
  524. teradataml/data/iris_train.csv +121 -0
  525. teradataml/data/join_table1.csv +4 -0
  526. teradataml/data/join_table2.csv +4 -0
  527. teradataml/data/jsons/anly_function_name.json +7 -0
  528. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  529. teradataml/data/jsons/byom/dataikupredict.json +148 -0
  530. teradataml/data/jsons/byom/datarobotpredict.json +147 -0
  531. teradataml/data/jsons/byom/h2opredict.json +195 -0
  532. teradataml/data/jsons/byom/onnxembeddings.json +267 -0
  533. teradataml/data/jsons/byom/onnxpredict.json +187 -0
  534. teradataml/data/jsons/byom/pmmlpredict.json +147 -0
  535. teradataml/data/jsons/paired_functions.json +450 -0
  536. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
  537. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
  538. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
  539. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
  540. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
  541. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
  542. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
  543. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
  544. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
  545. teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
  546. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
  547. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
  548. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
  549. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
  550. teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
  551. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
  552. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
  553. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
  554. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
  555. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
  556. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
  557. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
  558. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
  559. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
  560. teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
  561. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
  562. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
  563. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
  564. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
  565. teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
  566. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
  567. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
  568. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
  569. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
  570. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
  571. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
  572. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
  573. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
  574. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
  575. teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
  576. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
  577. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
  578. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
  579. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
  580. teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
  581. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
  582. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
  583. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
  584. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
  585. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
  586. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
  587. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
  588. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
  589. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
  590. teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
  591. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
  592. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
  593. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
  594. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
  595. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
  596. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
  597. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
  598. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
  599. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
  600. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
  601. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
  602. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
  603. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
  604. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
  605. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
  606. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
  607. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
  608. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
  609. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
  610. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
  611. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
  612. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
  613. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
  614. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
  615. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
  616. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
  617. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
  618. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
  619. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
  620. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
  621. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
  622. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
  623. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
  624. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
  625. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
  626. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
  627. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
  628. teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
  629. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
  630. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
  631. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
  632. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
  633. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
  634. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
  635. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
  636. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
  637. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
  638. teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
  639. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
  640. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
  641. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
  642. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
  643. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  644. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
  645. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
  646. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  647. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
  648. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
  649. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
  650. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
  651. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
  652. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
  653. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
  654. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
  655. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
  656. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
  657. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
  658. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
  659. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
  660. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
  661. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
  662. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
  663. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
  664. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
  665. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
  666. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
  667. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
  668. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
  669. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
  670. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  671. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  672. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  673. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
  674. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
  675. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
  676. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
  677. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
  678. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
  679. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
  680. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
  681. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
  682. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
  683. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
  684. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
  685. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  686. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
  687. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
  688. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
  689. teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
  690. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
  691. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
  692. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
  693. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
  694. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
  695. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
  696. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
  697. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  698. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
  699. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
  700. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
  701. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
  702. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
  703. teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
  704. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
  705. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
  706. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
  707. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
  708. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  709. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
  710. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
  711. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  712. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
  713. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
  714. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
  715. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  716. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
  717. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
  718. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
  719. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
  720. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
  721. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
  722. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
  723. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
  724. teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
  725. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
  726. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
  727. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
  728. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
  729. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
  730. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
  731. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
  732. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
  733. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
  734. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
  735. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
  736. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  737. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  738. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  739. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  740. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  741. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  742. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  743. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  744. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  745. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  746. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  747. teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
  748. teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
  749. teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
  750. teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
  751. teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
  752. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  753. teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
  754. teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
  755. teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
  756. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
  757. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
  758. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
  759. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  760. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  761. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
  762. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
  763. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
  764. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
  765. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
  766. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
  767. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
  768. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
  769. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
  770. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
  771. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
  772. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
  773. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
  774. teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
  775. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
  776. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  777. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  778. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
  779. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
  780. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
  781. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
  782. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
  783. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
  784. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
  785. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
  786. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  787. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  788. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
  789. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  790. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
  791. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
  792. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
  793. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  794. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
  795. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
  796. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
  797. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
  798. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
  799. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
  800. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
  801. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
  802. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  803. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
  804. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
  805. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
  806. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
  807. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
  808. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
  809. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
  810. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
  811. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
  812. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
  813. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
  814. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  815. teradataml/data/kmeans_example.json +23 -0
  816. teradataml/data/kmeans_table.csv +10 -0
  817. teradataml/data/kmeans_us_arrests_data.csv +51 -0
  818. teradataml/data/knn_example.json +19 -0
  819. teradataml/data/knnrecommender_example.json +7 -0
  820. teradataml/data/knnrecommenderpredict_example.json +12 -0
  821. teradataml/data/lar_example.json +17 -0
  822. teradataml/data/larpredict_example.json +30 -0
  823. teradataml/data/lc_new_predictors.csv +5 -0
  824. teradataml/data/lc_new_reference.csv +9 -0
  825. teradataml/data/lda_example.json +9 -0
  826. teradataml/data/ldainference_example.json +15 -0
  827. teradataml/data/ldatopicsummary_example.json +9 -0
  828. teradataml/data/levendist_input.csv +13 -0
  829. teradataml/data/levenshteindistance_example.json +10 -0
  830. teradataml/data/linreg_example.json +10 -0
  831. teradataml/data/load_example_data.py +350 -0
  832. teradataml/data/loan_prediction.csv +295 -0
  833. teradataml/data/lungcancer.csv +138 -0
  834. teradataml/data/mappingdata.csv +12 -0
  835. teradataml/data/medical_readings.csv +101 -0
  836. teradataml/data/milk_timeseries.csv +157 -0
  837. teradataml/data/min_max_titanic.csv +4 -0
  838. teradataml/data/minhash_example.json +6 -0
  839. teradataml/data/ml_ratings.csv +7547 -0
  840. teradataml/data/ml_ratings_10.csv +2445 -0
  841. teradataml/data/mobile_data.csv +13 -0
  842. teradataml/data/model1_table.csv +5 -0
  843. teradataml/data/model2_table.csv +5 -0
  844. teradataml/data/models/License_file.txt +1 -0
  845. teradataml/data/models/License_file_empty.txt +0 -0
  846. teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
  847. teradataml/data/models/dr_iris_rf +0 -0
  848. teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
  849. teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
  850. teradataml/data/models/iris_db_glm_model.pmml +57 -0
  851. teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
  852. teradataml/data/models/iris_kmeans_model +0 -0
  853. teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
  854. teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
  855. teradataml/data/modularity_example.json +12 -0
  856. teradataml/data/movavg_example.json +8 -0
  857. teradataml/data/mtx1.csv +7 -0
  858. teradataml/data/mtx2.csv +13 -0
  859. teradataml/data/multi_model_classification.csv +401 -0
  860. teradataml/data/multi_model_regression.csv +401 -0
  861. teradataml/data/mvdfft8.csv +9 -0
  862. teradataml/data/naivebayes_example.json +10 -0
  863. teradataml/data/naivebayespredict_example.json +19 -0
  864. teradataml/data/naivebayestextclassifier2_example.json +7 -0
  865. teradataml/data/naivebayestextclassifier_example.json +8 -0
  866. teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
  867. teradataml/data/name_Find_configure.csv +10 -0
  868. teradataml/data/namedentityfinder_example.json +14 -0
  869. teradataml/data/namedentityfinderevaluator_example.json +10 -0
  870. teradataml/data/namedentityfindertrainer_example.json +6 -0
  871. teradataml/data/nb_iris_input_test.csv +31 -0
  872. teradataml/data/nb_iris_input_train.csv +121 -0
  873. teradataml/data/nbp_iris_model.csv +13 -0
  874. teradataml/data/ner_dict.csv +8 -0
  875. teradataml/data/ner_extractor_text.csv +2 -0
  876. teradataml/data/ner_input_eng.csv +7 -0
  877. teradataml/data/ner_rule.csv +5 -0
  878. teradataml/data/ner_sports_test2.csv +29 -0
  879. teradataml/data/ner_sports_train.csv +501 -0
  880. teradataml/data/nerevaluator_example.json +6 -0
  881. teradataml/data/nerextractor_example.json +18 -0
  882. teradataml/data/nermem_sports_test.csv +18 -0
  883. teradataml/data/nermem_sports_train.csv +51 -0
  884. teradataml/data/nertrainer_example.json +7 -0
  885. teradataml/data/ngrams_example.json +7 -0
  886. teradataml/data/notebooks/__init__.py +0 -0
  887. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
  888. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
  889. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
  890. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
  891. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
  892. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
  893. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
  894. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
  895. teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
  896. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
  897. teradataml/data/npath_example.json +23 -0
  898. teradataml/data/ntree_example.json +14 -0
  899. teradataml/data/numeric_strings.csv +5 -0
  900. teradataml/data/numerics.csv +4 -0
  901. teradataml/data/ocean_buoy.csv +17 -0
  902. teradataml/data/ocean_buoy2.csv +17 -0
  903. teradataml/data/ocean_buoys.csv +28 -0
  904. teradataml/data/ocean_buoys2.csv +10 -0
  905. teradataml/data/ocean_buoys_nonpti.csv +28 -0
  906. teradataml/data/ocean_buoys_seq.csv +29 -0
  907. teradataml/data/onehot_encoder_train.csv +4 -0
  908. teradataml/data/openml_example.json +92 -0
  909. teradataml/data/optional_event_table.csv +4 -0
  910. teradataml/data/orders1.csv +11 -0
  911. teradataml/data/orders1_12.csv +13 -0
  912. teradataml/data/orders_ex.csv +4 -0
  913. teradataml/data/pack_example.json +9 -0
  914. teradataml/data/package_tracking.csv +19 -0
  915. teradataml/data/package_tracking_pti.csv +19 -0
  916. teradataml/data/pagerank_example.json +13 -0
  917. teradataml/data/paragraphs_input.csv +6 -0
  918. teradataml/data/pathanalyzer_example.json +8 -0
  919. teradataml/data/pathgenerator_example.json +8 -0
  920. teradataml/data/patient_profile.csv +101 -0
  921. teradataml/data/pattern_matching_data.csv +11 -0
  922. teradataml/data/payment_fraud_dataset.csv +10001 -0
  923. teradataml/data/peppers.png +0 -0
  924. teradataml/data/phrases.csv +7 -0
  925. teradataml/data/pivot_example.json +9 -0
  926. teradataml/data/pivot_input.csv +22 -0
  927. teradataml/data/playerRating.csv +31 -0
  928. teradataml/data/pos_input.csv +40 -0
  929. teradataml/data/postagger_example.json +7 -0
  930. teradataml/data/posttagger_output.csv +44 -0
  931. teradataml/data/production_data.csv +17 -0
  932. teradataml/data/production_data2.csv +7 -0
  933. teradataml/data/randomsample_example.json +32 -0
  934. teradataml/data/randomwalksample_example.json +9 -0
  935. teradataml/data/rank_table.csv +6 -0
  936. teradataml/data/real_values.csv +14 -0
  937. teradataml/data/ref_mobile_data.csv +4 -0
  938. teradataml/data/ref_mobile_data_dense.csv +2 -0
  939. teradataml/data/ref_url.csv +17 -0
  940. teradataml/data/restaurant_reviews.csv +7 -0
  941. teradataml/data/retail_churn_table.csv +27772 -0
  942. teradataml/data/river_data.csv +145 -0
  943. teradataml/data/roc_example.json +8 -0
  944. teradataml/data/roc_input.csv +101 -0
  945. teradataml/data/rule_inputs.csv +6 -0
  946. teradataml/data/rule_table.csv +2 -0
  947. teradataml/data/sales.csv +7 -0
  948. teradataml/data/sales_transaction.csv +501 -0
  949. teradataml/data/salesdata.csv +342 -0
  950. teradataml/data/sample_cities.csv +3 -0
  951. teradataml/data/sample_shapes.csv +11 -0
  952. teradataml/data/sample_streets.csv +3 -0
  953. teradataml/data/sampling_example.json +16 -0
  954. teradataml/data/sax_example.json +17 -0
  955. teradataml/data/scale_attributes.csv +3 -0
  956. teradataml/data/scale_example.json +74 -0
  957. teradataml/data/scale_housing.csv +11 -0
  958. teradataml/data/scale_housing_test.csv +6 -0
  959. teradataml/data/scale_input_part_sparse.csv +31 -0
  960. teradataml/data/scale_input_partitioned.csv +16 -0
  961. teradataml/data/scale_input_sparse.csv +11 -0
  962. teradataml/data/scale_parameters.csv +3 -0
  963. teradataml/data/scale_stat.csv +11 -0
  964. teradataml/data/scalebypartition_example.json +13 -0
  965. teradataml/data/scalemap_example.json +13 -0
  966. teradataml/data/scalesummary_example.json +12 -0
  967. teradataml/data/score_category.csv +101 -0
  968. teradataml/data/score_summary.csv +4 -0
  969. teradataml/data/script_example.json +10 -0
  970. teradataml/data/scripts/deploy_script.py +84 -0
  971. teradataml/data/scripts/lightgbm/dataset.template +175 -0
  972. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
  973. teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
  974. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
  975. teradataml/data/scripts/mapper.R +20 -0
  976. teradataml/data/scripts/mapper.py +16 -0
  977. teradataml/data/scripts/mapper_replace.py +16 -0
  978. teradataml/data/scripts/sklearn/__init__.py +0 -0
  979. teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
  980. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
  981. teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
  982. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
  983. teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
  984. teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
  985. teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
  986. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  987. teradataml/data/seeds.csv +10 -0
  988. teradataml/data/sentenceextractor_example.json +7 -0
  989. teradataml/data/sentiment_extract_input.csv +11 -0
  990. teradataml/data/sentiment_train.csv +16 -0
  991. teradataml/data/sentiment_word.csv +20 -0
  992. teradataml/data/sentiment_word_input.csv +20 -0
  993. teradataml/data/sentimentextractor_example.json +24 -0
  994. teradataml/data/sentimenttrainer_example.json +8 -0
  995. teradataml/data/sequence_table.csv +10 -0
  996. teradataml/data/seriessplitter_example.json +8 -0
  997. teradataml/data/sessionize_example.json +17 -0
  998. teradataml/data/sessionize_table.csv +116 -0
  999. teradataml/data/setop_test1.csv +24 -0
  1000. teradataml/data/setop_test2.csv +22 -0
  1001. teradataml/data/soc_nw_edges.csv +11 -0
  1002. teradataml/data/soc_nw_vertices.csv +8 -0
  1003. teradataml/data/souvenir_timeseries.csv +168 -0
  1004. teradataml/data/sparse_iris_attribute.csv +5 -0
  1005. teradataml/data/sparse_iris_test.csv +121 -0
  1006. teradataml/data/sparse_iris_train.csv +601 -0
  1007. teradataml/data/star1.csv +6 -0
  1008. teradataml/data/star_pivot.csv +8 -0
  1009. teradataml/data/state_transition.csv +5 -0
  1010. teradataml/data/stock_data.csv +53 -0
  1011. teradataml/data/stock_movement.csv +11 -0
  1012. teradataml/data/stock_vol.csv +76 -0
  1013. teradataml/data/stop_words.csv +8 -0
  1014. teradataml/data/store_sales.csv +37 -0
  1015. teradataml/data/stringsimilarity_example.json +8 -0
  1016. teradataml/data/strsimilarity_input.csv +13 -0
  1017. teradataml/data/students.csv +101 -0
  1018. teradataml/data/svm_iris_input_test.csv +121 -0
  1019. teradataml/data/svm_iris_input_train.csv +481 -0
  1020. teradataml/data/svm_iris_model.csv +7 -0
  1021. teradataml/data/svmdense_example.json +10 -0
  1022. teradataml/data/svmdensepredict_example.json +19 -0
  1023. teradataml/data/svmsparse_example.json +8 -0
  1024. teradataml/data/svmsparsepredict_example.json +14 -0
  1025. teradataml/data/svmsparsesummary_example.json +8 -0
  1026. teradataml/data/target_mobile_data.csv +13 -0
  1027. teradataml/data/target_mobile_data_dense.csv +5 -0
  1028. teradataml/data/target_udt_data.csv +8 -0
  1029. teradataml/data/tdnerextractor_example.json +14 -0
  1030. teradataml/data/templatedata.csv +1201 -0
  1031. teradataml/data/templates/open_source_ml.json +11 -0
  1032. teradataml/data/teradata_icon.ico +0 -0
  1033. teradataml/data/teradataml_example.json +1473 -0
  1034. teradataml/data/test_classification.csv +101 -0
  1035. teradataml/data/test_loan_prediction.csv +53 -0
  1036. teradataml/data/test_pacf_12.csv +37 -0
  1037. teradataml/data/test_prediction.csv +101 -0
  1038. teradataml/data/test_regression.csv +101 -0
  1039. teradataml/data/test_river2.csv +109 -0
  1040. teradataml/data/text_inputs.csv +6 -0
  1041. teradataml/data/textchunker_example.json +8 -0
  1042. teradataml/data/textclassifier_example.json +7 -0
  1043. teradataml/data/textclassifier_input.csv +7 -0
  1044. teradataml/data/textclassifiertrainer_example.json +7 -0
  1045. teradataml/data/textmorph_example.json +11 -0
  1046. teradataml/data/textparser_example.json +15 -0
  1047. teradataml/data/texttagger_example.json +12 -0
  1048. teradataml/data/texttokenizer_example.json +7 -0
  1049. teradataml/data/texttrainer_input.csv +11 -0
  1050. teradataml/data/tf_example.json +7 -0
  1051. teradataml/data/tfidf_example.json +14 -0
  1052. teradataml/data/tfidf_input1.csv +201 -0
  1053. teradataml/data/tfidf_train.csv +6 -0
  1054. teradataml/data/time_table1.csv +535 -0
  1055. teradataml/data/time_table2.csv +14 -0
  1056. teradataml/data/timeseriesdata.csv +1601 -0
  1057. teradataml/data/timeseriesdatasetsd4.csv +105 -0
  1058. teradataml/data/timestamp_data.csv +4 -0
  1059. teradataml/data/titanic.csv +892 -0
  1060. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  1061. teradataml/data/to_num_data.csv +4 -0
  1062. teradataml/data/tochar_data.csv +5 -0
  1063. teradataml/data/token_table.csv +696 -0
  1064. teradataml/data/train_multiclass.csv +101 -0
  1065. teradataml/data/train_regression.csv +101 -0
  1066. teradataml/data/train_regression_multiple_labels.csv +101 -0
  1067. teradataml/data/train_tracking.csv +28 -0
  1068. teradataml/data/trans_dense.csv +16 -0
  1069. teradataml/data/trans_sparse.csv +55 -0
  1070. teradataml/data/transformation_table.csv +6 -0
  1071. teradataml/data/transformation_table_new.csv +2 -0
  1072. teradataml/data/tv_spots.csv +16 -0
  1073. teradataml/data/twod_climate_data.csv +117 -0
  1074. teradataml/data/uaf_example.json +529 -0
  1075. teradataml/data/univariatestatistics_example.json +9 -0
  1076. teradataml/data/unpack_example.json +10 -0
  1077. teradataml/data/unpivot_example.json +25 -0
  1078. teradataml/data/unpivot_input.csv +8 -0
  1079. teradataml/data/url_data.csv +10 -0
  1080. teradataml/data/us_air_pass.csv +37 -0
  1081. teradataml/data/us_population.csv +624 -0
  1082. teradataml/data/us_states_shapes.csv +52 -0
  1083. teradataml/data/varmax_example.json +18 -0
  1084. teradataml/data/vectordistance_example.json +30 -0
  1085. teradataml/data/ville_climatedata.csv +121 -0
  1086. teradataml/data/ville_tempdata.csv +12 -0
  1087. teradataml/data/ville_tempdata1.csv +12 -0
  1088. teradataml/data/ville_temperature.csv +11 -0
  1089. teradataml/data/waveletTable.csv +1605 -0
  1090. teradataml/data/waveletTable2.csv +1605 -0
  1091. teradataml/data/weightedmovavg_example.json +9 -0
  1092. teradataml/data/wft_testing.csv +5 -0
  1093. teradataml/data/windowdfft.csv +16 -0
  1094. teradataml/data/wine_data.csv +1600 -0
  1095. teradataml/data/word_embed_input_table1.csv +6 -0
  1096. teradataml/data/word_embed_input_table2.csv +5 -0
  1097. teradataml/data/word_embed_model.csv +23 -0
  1098. teradataml/data/words_input.csv +13 -0
  1099. teradataml/data/xconvolve_complex_left.csv +6 -0
  1100. teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
  1101. teradataml/data/xgboost_example.json +36 -0
  1102. teradataml/data/xgboostpredict_example.json +32 -0
  1103. teradataml/data/ztest_example.json +16 -0
  1104. teradataml/dataframe/__init__.py +0 -0
  1105. teradataml/dataframe/copy_to.py +2446 -0
  1106. teradataml/dataframe/data_transfer.py +2840 -0
  1107. teradataml/dataframe/dataframe.py +20908 -0
  1108. teradataml/dataframe/dataframe_utils.py +2114 -0
  1109. teradataml/dataframe/fastload.py +794 -0
  1110. teradataml/dataframe/functions.py +2110 -0
  1111. teradataml/dataframe/indexer.py +424 -0
  1112. teradataml/dataframe/row.py +160 -0
  1113. teradataml/dataframe/setop.py +1171 -0
  1114. teradataml/dataframe/sql.py +10904 -0
  1115. teradataml/dataframe/sql_function_parameters.py +440 -0
  1116. teradataml/dataframe/sql_functions.py +652 -0
  1117. teradataml/dataframe/sql_interfaces.py +220 -0
  1118. teradataml/dataframe/vantage_function_types.py +675 -0
  1119. teradataml/dataframe/window.py +694 -0
  1120. teradataml/dbutils/__init__.py +3 -0
  1121. teradataml/dbutils/dbutils.py +2871 -0
  1122. teradataml/dbutils/filemgr.py +318 -0
  1123. teradataml/gen_ai/__init__.py +2 -0
  1124. teradataml/gen_ai/convAI.py +473 -0
  1125. teradataml/geospatial/__init__.py +4 -0
  1126. teradataml/geospatial/geodataframe.py +1105 -0
  1127. teradataml/geospatial/geodataframecolumn.py +392 -0
  1128. teradataml/geospatial/geometry_types.py +926 -0
  1129. teradataml/hyperparameter_tuner/__init__.py +1 -0
  1130. teradataml/hyperparameter_tuner/optimizer.py +4115 -0
  1131. teradataml/hyperparameter_tuner/utils.py +303 -0
  1132. teradataml/lib/__init__.py +0 -0
  1133. teradataml/lib/aed_0_1.dll +0 -0
  1134. teradataml/lib/libaed_0_1.dylib +0 -0
  1135. teradataml/lib/libaed_0_1.so +0 -0
  1136. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  1137. teradataml/lib/libaed_0_1_ppc64le.so +0 -0
  1138. teradataml/opensource/__init__.py +1 -0
  1139. teradataml/opensource/_base.py +1321 -0
  1140. teradataml/opensource/_class.py +464 -0
  1141. teradataml/opensource/_constants.py +61 -0
  1142. teradataml/opensource/_lightgbm.py +949 -0
  1143. teradataml/opensource/_sklearn.py +1008 -0
  1144. teradataml/opensource/_wrapper_utils.py +267 -0
  1145. teradataml/options/__init__.py +148 -0
  1146. teradataml/options/configure.py +489 -0
  1147. teradataml/options/display.py +187 -0
  1148. teradataml/plot/__init__.py +3 -0
  1149. teradataml/plot/axis.py +1427 -0
  1150. teradataml/plot/constants.py +15 -0
  1151. teradataml/plot/figure.py +431 -0
  1152. teradataml/plot/plot.py +810 -0
  1153. teradataml/plot/query_generator.py +83 -0
  1154. teradataml/plot/subplot.py +216 -0
  1155. teradataml/scriptmgmt/UserEnv.py +4273 -0
  1156. teradataml/scriptmgmt/__init__.py +3 -0
  1157. teradataml/scriptmgmt/lls_utils.py +2157 -0
  1158. teradataml/sdk/README.md +79 -0
  1159. teradataml/sdk/__init__.py +4 -0
  1160. teradataml/sdk/_auth_modes.py +422 -0
  1161. teradataml/sdk/_func_params.py +487 -0
  1162. teradataml/sdk/_json_parser.py +453 -0
  1163. teradataml/sdk/_openapi_spec_constants.py +249 -0
  1164. teradataml/sdk/_utils.py +236 -0
  1165. teradataml/sdk/api_client.py +900 -0
  1166. teradataml/sdk/constants.py +62 -0
  1167. teradataml/sdk/modelops/__init__.py +98 -0
  1168. teradataml/sdk/modelops/_client.py +409 -0
  1169. teradataml/sdk/modelops/_constants.py +304 -0
  1170. teradataml/sdk/modelops/models.py +2308 -0
  1171. teradataml/sdk/spinner.py +107 -0
  1172. teradataml/series/__init__.py +0 -0
  1173. teradataml/series/series.py +537 -0
  1174. teradataml/series/series_utils.py +71 -0
  1175. teradataml/store/__init__.py +12 -0
  1176. teradataml/store/feature_store/__init__.py +0 -0
  1177. teradataml/store/feature_store/constants.py +658 -0
  1178. teradataml/store/feature_store/feature_store.py +4814 -0
  1179. teradataml/store/feature_store/mind_map.py +639 -0
  1180. teradataml/store/feature_store/models.py +7330 -0
  1181. teradataml/store/feature_store/utils.py +390 -0
  1182. teradataml/table_operators/Apply.py +979 -0
  1183. teradataml/table_operators/Script.py +1739 -0
  1184. teradataml/table_operators/TableOperator.py +1343 -0
  1185. teradataml/table_operators/__init__.py +2 -0
  1186. teradataml/table_operators/apply_query_generator.py +262 -0
  1187. teradataml/table_operators/query_generator.py +493 -0
  1188. teradataml/table_operators/table_operator_query_generator.py +462 -0
  1189. teradataml/table_operators/table_operator_util.py +726 -0
  1190. teradataml/table_operators/templates/dataframe_apply.template +184 -0
  1191. teradataml/table_operators/templates/dataframe_map.template +176 -0
  1192. teradataml/table_operators/templates/dataframe_register.template +73 -0
  1193. teradataml/table_operators/templates/dataframe_udf.template +67 -0
  1194. teradataml/table_operators/templates/script_executor.template +170 -0
  1195. teradataml/telemetry_utils/__init__.py +0 -0
  1196. teradataml/telemetry_utils/queryband.py +53 -0
  1197. teradataml/utils/__init__.py +0 -0
  1198. teradataml/utils/docstring.py +527 -0
  1199. teradataml/utils/dtypes.py +943 -0
  1200. teradataml/utils/internal_buffer.py +122 -0
  1201. teradataml/utils/print_versions.py +206 -0
  1202. teradataml/utils/utils.py +451 -0
  1203. teradataml/utils/validators.py +3305 -0
  1204. teradataml-20.0.0.8.dist-info/METADATA +2804 -0
  1205. teradataml-20.0.0.8.dist-info/RECORD +1208 -0
  1206. teradataml-20.0.0.8.dist-info/WHEEL +5 -0
  1207. teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
  1208. teradataml-20.0.0.8.dist-info/zip-safe +1 -0
@@ -0,0 +1,2911 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "### Disclaimer\n",
8
+ "Please note, the Vantage Functions via SQLAlchemy feature is a preview/beta code release with limited functionality (the “Code”). As such, you acknowledge that the Code is experimental in nature and that the Code is provided “AS IS” and may not be functional on any machine or in any environment. TERADATA DISCLAIMS ALL WARRANTIES RELATING TO THE CODE, EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES AGAINST INFRINGEMENT OF THIRD-PARTY RIGHTS, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.\n",
9
+ "\n",
10
+ "TERADATA SHALL NOT BE RESPONSIBLE OR LIABLE WITH RESPECT TO ANY SUBJECT MATTER OF THE CODE UNDER ANY CONTRACT, NEGLIGENCE, STRICT LIABILITY OR OTHER THEORY \n",
11
+ " (A) FOR LOSS OR INACCURACY OF DATA OR COST OF PROCUREMENT OF SUBSTITUTE GOODS, SERVICES OR TECHNOLOGY, OR \n",
12
+ " (B) FOR ANY INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO LOSS OF REVENUES AND LOSS OF PROFITS. TERADATA SHALL NOT BE RESPONSIBLE FOR ANY MATTER BEYOND ITS REASONABLE CONTROL.\n",
13
+ "\n",
14
+ "Notwithstanding anything to the contrary: \n",
15
+ " (a) Teradata will have no obligation of any kind with respect to any Code-related comments, suggestions, design changes or improvements that you elect to provide to Teradata in either verbal or written form (collectively, “Feedback”), and \n",
16
+ " (b) Teradata and its affiliates are hereby free to use any ideas, concepts, know-how or techniques, in whole or in part, contained in Feedback: \n",
17
+ " (i) for any purpose whatsoever, including developing, manufacturing, and/or marketing products and/or services incorporating Feedback in whole or in part, and \n",
18
+ " (ii) without any restrictions or limitations, including requiring the payment of any license fees, royalties, or other consideration. "
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 1,
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": [
27
+ "# In this notebook we shall cover examples for following Regular Expression Functions:\n",
28
+ "# SQL Documentation: https://docs.teradata.com/reader/756LNiPSFdY~4JcCCcR5Cw/c2fX4dzxCcDJFKqXbyQtTA\n",
29
+ "\n",
30
+ "# TD Supported Window Aggregate fucntions:\n",
31
+ "# 1. avg\n",
32
+ "# 2. count\n",
33
+ "# 3. corr\n",
34
+ "# 4. max\n",
35
+ "# 5. min\n",
36
+ "# 6. stddev_pop\n",
37
+ "# 7. stddev_samp\n",
38
+ "# 8. sum\n",
39
+ "# 9. var_pop\n",
40
+ "# 10. var_samp\n",
41
+ "# 11. REGR_AVGX\n",
42
+ "# 12. REGR_AVGY\n",
43
+ "# 13. REGR_INTERCEPT\n",
44
+ "# 14. REGR_R2\n",
45
+ "# 15. REGR_SLOPE\n",
46
+ "# 16. REGR_SXX\n",
47
+ "# 17. REGR_SXY\n",
48
+ "# 18. REGR_SYY\n",
49
+ "# 19. cume_dist\n",
50
+ "# 20. dense_rank\n",
51
+ "# 21. first_value\n",
52
+ "# 22. last_value\n",
53
+ "# 23. lag (Without Ignore NULLs)\n",
54
+ "# 24. lead (Without Ignore NULLs)\n",
55
+ "# 25. mdiff \n",
56
+ "# 26. CSUM\n",
57
+ "# 27. MAVG\n",
58
+ "# 28. median --> with drop_columns=True\n",
59
+ "# 29. mlinreg\n",
60
+ "# 30. MSUM\n",
61
+ "# 31. percent_rank\n",
62
+ "# 32. percentile_cont --> with drop_columns=True\n",
63
+ "# 33. percentile_disc --> with drop_columns=True\n",
64
+ "# 34. Quantile\n",
65
+ "# 35. RANK --> Without \"with ties\" and \"RESET WHEN\"\n",
66
+ "# 36. RANK(Teradata)\n",
67
+ "# 37. row_number() --> Without \"RESET WHEN\"\n",
68
+ "# 38. covar_samp\n",
69
+ "# 39. covar_pop"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "execution_count": 2,
75
+ "metadata": {},
76
+ "outputs": [
77
+ {
78
+ "name": "stdout",
79
+ "output_type": "stream",
80
+ "text": [
81
+ "Hostname: ········\n",
82
+ "Username: ········\n",
83
+ "Passowrd: ········\n",
84
+ "WARNING: Skipped loading table admissions_train since it already exists in the database.\n"
85
+ ]
86
+ }
87
+ ],
88
+ "source": [
89
+ "# Get the connection to the Vantage using create_context()\n",
90
+ "import getpass\n",
91
+ "from teradataml import *\n",
92
+ "td_context = create_context(host=getpass.getpass(\"Hostname: \"), username=getpass.getpass(\"Username: \"), password=getpass.getpass(\"Passowrd: \"))\n",
93
+ "# Load the example dataset.\n",
94
+ "load_example_data(\"GLM\", [\"admissions_train\"])"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": 3,
100
+ "metadata": {},
101
+ "outputs": [
102
+ {
103
+ "data": {
104
+ "text/plain": [
105
+ " masters gpa stats programming admitted\n",
106
+ "id \n",
107
+ "5 no 3.44 Novice Novice 0\n",
108
+ "34 yes 3.85 Advanced Beginner 0\n",
109
+ "13 no 4.00 Advanced Novice 1\n",
110
+ "40 yes 3.95 Novice Beginner 0\n",
111
+ "22 yes 3.46 Novice Beginner 0\n",
112
+ "19 yes 1.98 Advanced Advanced 0\n",
113
+ "36 no 3.00 Advanced Novice 0\n",
114
+ "15 yes 4.00 Advanced Advanced 1\n",
115
+ "7 yes 2.33 Novice Novice 1\n",
116
+ "17 no 3.83 Advanced Advanced 1"
117
+ ]
118
+ },
119
+ "execution_count": 3,
120
+ "metadata": {},
121
+ "output_type": "execute_result"
122
+ }
123
+ ],
124
+ "source": [
125
+ "# Create the DataFrame on 'admissions_train' table\n",
126
+ "admissions_train = DataFrame(\"admissions_train\")\n",
127
+ "admissions_train"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": 4,
133
+ "metadata": {},
134
+ "outputs": [],
135
+ "source": [
136
+ "def print_variables(df, columns=None):\n",
137
+ " print(\"Equivalent SQL: {}\".format(df.show_query()))\n",
138
+ " print(\"\\n\")\n",
139
+ " print(\" ************************* DataFrame ********************* \")\n",
140
+ " print(df)\n",
141
+ " print(\"\\n\\n\")\n",
142
+ " print(\" ************************* DataFrame.dtypes ********************* \")\n",
143
+ " print(df.dtypes)\n",
144
+ " print(\"\\n\\n\")\n",
145
+ " if columns is not None:\n",
146
+ " if isinstance(columns, str):\n",
147
+ " columns = [columns]\n",
148
+ " for col in columns:\n",
149
+ " coltype = df.__getattr__(col).type\n",
150
+ " if isinstance(coltype, sqlalchemy.sql.sqltypes.NullType):\n",
151
+ " coltype = \"NullType\"\n",
152
+ " print(\" '{}' Column Type: {}\".format(col, coltype))"
153
+ ]
154
+ },
155
+ {
156
+ "cell_type": "markdown",
157
+ "metadata": {},
158
+ "source": [
159
+ "# Using SQLAlchemy ClasueElements for WindowAggregates in teradataml DataFrame.assign()"
160
+ ]
161
+ },
162
+ {
163
+ "cell_type": "code",
164
+ "execution_count": 5,
165
+ "metadata": {},
166
+ "outputs": [],
167
+ "source": [
168
+ "# Before we move on with examples, one should read below just to understand how teradataml DataFrame and \n",
169
+ "# it's columns are used to create a SQLAlchemy ClauseElement/Expression.\n",
170
+ "\n",
171
+ "# Often in below examples one would see something like this: 'admissions_train.admitted.expression'\n",
172
+ "# Here in the above expression,\n",
173
+ "# 'admissions_train' is 'teradataml DataFrame'\n",
174
+ "# 'admitted' is 'column name' in teradataml DataFrame 'admissions_train'\n",
175
+ "# Thus, \n",
176
+ "# 'admissions_train.admitted' together forms a ColumnExpression.\n",
177
+ "# expression allows us to use teradata ColumnExpression to be treated as SQLAlchemy Expression.\n",
178
+ "# Thus,\n",
179
+ "# 'admissions_train.admitted.expression' gives us an expression that can be used with SQLAlchemy clauseElements."
180
+ ]
181
+ },
182
+ {
183
+ "cell_type": "markdown",
184
+ "metadata": {},
185
+ "source": [
186
+ "## Using SQLAlchemy within_group expression"
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "markdown",
191
+ "metadata": {},
192
+ "source": [
193
+ "### percentile_disc and percentile_cont function"
194
+ ]
195
+ },
196
+ {
197
+ "cell_type": "code",
198
+ "execution_count": 6,
199
+ "metadata": {},
200
+ "outputs": [],
201
+ "source": [
202
+ "# Returns an interpolated value that falls within its value_expression with respect to its sort specification.\n",
203
+ "# SQL Syntax:\n",
204
+ "# function_name(value_expression) WITHIN GROUP(ORDER BY order_by_expression)\n",
205
+ "# where,\n",
206
+ "# value_expression - a column expression\n",
207
+ "# order_by_expression - a numeric column expression for sorting\n",
208
+ "# ordering can be done in Ascending or Descending order with Nulls First or Nulls Last\n",
209
+ "#\n",
210
+ "# SQLAlchmey Syntax:\n",
211
+ "# func.function_name(value_expression).within_group(order_by_expression)\n",
212
+ "# where,\n",
213
+ "# value_expression - a SQLAlchemy column expression\n",
214
+ "# order_by_expression - a SQLAlchemy column expression of numeric type for sorting\n",
215
+ "# ordering can be done in Ascending or Descending order with Nulls First or Nulls Last"
216
+ ]
217
+ },
218
+ {
219
+ "cell_type": "code",
220
+ "execution_count": 7,
221
+ "metadata": {},
222
+ "outputs": [],
223
+ "source": [
224
+ "# Import required modules\n",
225
+ "from sqlalchemy import func"
226
+ ]
227
+ },
228
+ {
229
+ "cell_type": "code",
230
+ "execution_count": 8,
231
+ "metadata": {},
232
+ "outputs": [
233
+ {
234
+ "data": {
235
+ "text/plain": [
236
+ "sqlalchemy.sql.elements.WithinGroup"
237
+ ]
238
+ },
239
+ "execution_count": 8,
240
+ "metadata": {},
241
+ "output_type": "execute_result"
242
+ }
243
+ ],
244
+ "source": [
245
+ "# Execute percentile_cont function using WITHIN GROUP with desceniding odering done on gpa column \n",
246
+ "pc = func.percentile_cont(0.5).within_group(\n",
247
+ " admissions_train.gpa.expression.desc()\n",
248
+ " )\n",
249
+ "type(pc)"
250
+ ]
251
+ },
252
+ {
253
+ "cell_type": "code",
254
+ "execution_count": 9,
255
+ "metadata": {},
256
+ "outputs": [
257
+ {
258
+ "name": "stdout",
259
+ "output_type": "stream",
260
+ "text": [
261
+ "Equivalent SQL: select percentile_cont(0.5) WITHIN GROUP (ORDER BY gpa DESC) AS interpolated_value from \"admissions_train\"\n",
262
+ "\n",
263
+ "\n",
264
+ " ************************* DataFrame ********************* \n",
265
+ " interpolated_value\n",
266
+ "0 3.69\n",
267
+ "\n",
268
+ "\n",
269
+ "\n",
270
+ " ************************* DataFrame.dtypes ********************* \n",
271
+ "interpolated_value float\n",
272
+ "\n",
273
+ "\n",
274
+ "\n",
275
+ " 'interpolated_value' Column Type: FLOAT\n"
276
+ ]
277
+ }
278
+ ],
279
+ "source": [
280
+ "pc_df = admissions_train.assign(True, interpolated_value=pc)\n",
281
+ "print_variables(pc_df, \"interpolated_value\")"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": 10,
287
+ "metadata": {},
288
+ "outputs": [
289
+ {
290
+ "name": "stdout",
291
+ "output_type": "stream",
292
+ "text": [
293
+ "Equivalent SQL: select percentile_cont(1) WITHIN GROUP (ORDER BY gpa ASC NULLS FIRST) AS interpolated_value from \"admissions_train\"\n",
294
+ "\n",
295
+ "\n",
296
+ " ************************* DataFrame ********************* \n",
297
+ " interpolated_value\n",
298
+ "0 4.0\n",
299
+ "\n",
300
+ "\n",
301
+ "\n",
302
+ " ************************* DataFrame.dtypes ********************* \n",
303
+ "interpolated_value float\n",
304
+ "\n",
305
+ "\n",
306
+ "\n",
307
+ " 'interpolated_value' Column Type: FLOAT\n"
308
+ ]
309
+ }
310
+ ],
311
+ "source": [
312
+ "# Execute percentile_cont function using WITHIN GROUP with ascending odering done on gpa column and with NULL FIRST\n",
313
+ "df = admissions_train.assign(True, interpolated_value=func.percentile_cont(1).within_group(\n",
314
+ " admissions_train.gpa.expression.asc().nullsfirst()))\n",
315
+ "print_variables(df, \"interpolated_value\")"
316
+ ]
317
+ },
318
+ {
319
+ "cell_type": "code",
320
+ "execution_count": 11,
321
+ "metadata": {},
322
+ "outputs": [
323
+ {
324
+ "name": "stdout",
325
+ "output_type": "stream",
326
+ "text": [
327
+ "Equivalent SQL: select percentile_disc(1) WITHIN GROUP (ORDER BY gpa NULLS LAST) AS interpolated_value from \"admissions_train\"\n",
328
+ "\n",
329
+ "\n",
330
+ " ************************* DataFrame ********************* \n",
331
+ " interpolated_value\n",
332
+ "0 4.0\n",
333
+ "\n",
334
+ "\n",
335
+ "\n",
336
+ " ************************* DataFrame.dtypes ********************* \n",
337
+ "interpolated_value float\n",
338
+ "\n",
339
+ "\n",
340
+ "\n",
341
+ " 'interpolated_value' Column Type: FLOAT\n"
342
+ ]
343
+ }
344
+ ],
345
+ "source": [
346
+ "# Execute percentile_disc function using WITHIN GROUP with odering done on gpa column and with NULLS LAST\n",
347
+ "df = admissions_train.assign(True, interpolated_value=func.percentile_disc(1).within_group(\n",
348
+ " admissions_train.gpa.expression.nullslast()))\n",
349
+ "print_variables(df, \"interpolated_value\")"
350
+ ]
351
+ },
352
+ {
353
+ "cell_type": "markdown",
354
+ "metadata": {},
355
+ "source": [
356
+ "## Windowed Aggreates using OVER "
357
+ ]
358
+ },
359
+ {
360
+ "cell_type": "code",
361
+ "execution_count": 12,
362
+ "metadata": {},
363
+ "outputs": [],
364
+ "source": [
365
+ "# Windowed aggregation offered by Vantage supportes various clauses and can be used \n",
366
+ "# in teradataml with the help of SQLAlchemy.\n",
367
+ "# Let's take a look at few examples on how can we perform windowed aggregation with \n",
368
+ "# the help of SQLAlchemy and teradataml's DataFrame.assign() API.\n",
369
+ "\n",
370
+ "\n",
371
+ "# Column ordering in SQLAlchemy:\n",
372
+ "# To specify ascending or descending ordering, one can use asc() or desc() methods on SQLAlchemy column.\n",
373
+ "# One can specify nulls first or nulls last as well for odering. \n",
374
+ "# Here is how you do it:\n",
375
+ "# Descending oder: admissions_train.id.expression.desc() \n",
376
+ "# Ascending oder: admissions_train.id.expression.asc() \n",
377
+ "# With NULLS FIRST - df.column_name.expression.nullsfirst()\n",
378
+ "# With NULLS LAST - df.column_name.expression.nullslast()\n",
379
+ "# Ascending Order with NULLS LAST - df.column_name.expression.asc().nullslast()\n",
380
+ "\n",
381
+ "\n",
382
+ "# SQLAlchemy offers a method over(...), that can be operated on a SQLAlchemy column.\n",
383
+ "# Syntax:\n",
384
+ "# over(partition_by = partition_expression, order_by = order_expression, rows = (p, f))\n",
385
+ "# where,\n",
386
+ "# partition_expression - column expression to partition the data on. \n",
387
+ "# For example,\n",
388
+ "# df.column_name.expression\n",
389
+ "# order_expression - column expression to sort the data. \n",
390
+ "# Sorting can be done either in Ascending or Descending order with NULLS FIRST or NULLS LAST.\n",
391
+ "# For example,\n",
392
+ "# Default sorting - df.column_name.expression\n",
393
+ "# Ascending Order - df.column_name.expression.asc()\n",
394
+ "# Descending Order - df.column_name.expression.desc()\n",
395
+ "# With NULLS FIRST - df.column_name.expression.nullsfirst()\n",
396
+ "# With NULLS LAST - df.column_name.expression.nullslast()\n",
397
+ "# Ascending Order with NULLS LAST - df.column_name.expression.asc().nullslast()\n",
398
+ "# rows - Generates the syntax for 'ROWS BETWEEN'\n",
399
+ "# To perform windowed aggregate function over a window using ROWS and ROWS BETWEEN one must use \n",
400
+ "# argument \"rows\" in over which accepts a tuple (p, f).\n",
401
+ "# Where,\n",
402
+ "# p and f can accept Negative Integer Value, Positive Integer Value, 0 or None.\n",
403
+ "# Each value passed to p and f have different meaning or results in different syntax.\n",
404
+ "# SQL syntax will be generated based on these values:\n",
405
+ "# 1. Negative Value --> Indicates a preceding value\n",
406
+ "# 2. Positive Value --> Indiacates a following value\n",
407
+ "# 3. 0 --> For Current row\n",
408
+ "# 4. None --> Unbounded value.\n",
409
+ "#"
410
+ ]
411
+ },
412
+ {
413
+ "cell_type": "markdown",
414
+ "metadata": {},
415
+ "source": [
416
+ "### CSUM Function"
417
+ ]
418
+ },
419
+ {
420
+ "cell_type": "code",
421
+ "execution_count": 13,
422
+ "metadata": {},
423
+ "outputs": [],
424
+ "source": [
425
+ "# Returns the cumulative (or running) sum of a value expression for each row in a partition, \n",
426
+ "# assuming the rows in the partition are sorted by the sort_expression list.\n",
427
+ "# Syntax:\n",
428
+ "# CSUM(value_expression, sort_expression ASC/DESC)\n",
429
+ "# where,\n",
430
+ "# value_expression - a SQLAlchemy column expression\n",
431
+ "# order_by_expression - a SQLAlchemy column expression for sorting\n",
432
+ "# ordering can be done in Ascending or Descending order\n",
433
+ "\n",
434
+ "# To specify ascending or descending ordering, one can use asc() or desc() methods on SQLAlchemy column.\n",
435
+ "# For example,\n",
436
+ "# Descending oder: admissions_train.id.expression.desc() \n",
437
+ "# Ascending oder: admissions_train.id.expression.asc() "
438
+ ]
439
+ },
440
+ {
441
+ "cell_type": "code",
442
+ "execution_count": 14,
443
+ "metadata": {},
444
+ "outputs": [
445
+ {
446
+ "data": {
447
+ "text/plain": [
448
+ "sqlalchemy.sql.functions.Function"
449
+ ]
450
+ },
451
+ "execution_count": 14,
452
+ "metadata": {},
453
+ "output_type": "execute_result"
454
+ }
455
+ ],
456
+ "source": [
457
+ "# Calculate running total of gpa.\n",
458
+ "csum_ = func.csum(admissions_train.gpa.expression, admissions_train.id.expression.desc())\n",
459
+ "type(csum_)"
460
+ ]
461
+ },
462
+ {
463
+ "cell_type": "code",
464
+ "execution_count": 15,
465
+ "metadata": {},
466
+ "outputs": [
467
+ {
468
+ "name": "stdout",
469
+ "output_type": "stream",
470
+ "text": [
471
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, csum(gpa, id DESC) AS csum_gpa_ from \"admissions_train\"\n",
472
+ "\n",
473
+ "\n",
474
+ " ************************* DataFrame ********************* \n",
475
+ " masters gpa stats programming admitted csum_gpa_\n",
476
+ "id \n",
477
+ "38 yes 2.65 Advanced Beginner 1 10.35\n",
478
+ "36 no 3.00 Advanced Novice 0 16.87\n",
479
+ "35 no 3.68 Novice Beginner 1 20.55\n",
480
+ "34 yes 3.85 Advanced Beginner 0 24.40\n",
481
+ "32 yes 3.46 Advanced Beginner 0 31.41\n",
482
+ "31 yes 3.50 Advanced Beginner 1 34.91\n",
483
+ "33 no 3.55 Novice Novice 1 27.95\n",
484
+ "37 no 3.52 Novice Novice 1 13.87\n",
485
+ "39 yes 3.75 Advanced Beginner 0 7.70\n",
486
+ "40 yes 3.95 Novice Beginner 0 3.95\n",
487
+ "\n",
488
+ "\n",
489
+ "\n",
490
+ " ************************* DataFrame.dtypes ********************* \n",
491
+ "id int\n",
492
+ "masters str\n",
493
+ "gpa float\n",
494
+ "stats str\n",
495
+ "programming str\n",
496
+ "admitted int\n",
497
+ "csum_gpa_ float\n",
498
+ "\n",
499
+ "\n",
500
+ "\n",
501
+ " 'csum_gpa_' Column Type: FLOAT\n"
502
+ ]
503
+ }
504
+ ],
505
+ "source": [
506
+ "csum_gpa_df = admissions_train.assign(csum_gpa_=csum_)\n",
507
+ "print_variables(csum_gpa_df, \"csum_gpa_\")"
508
+ ]
509
+ },
510
+ {
511
+ "cell_type": "markdown",
512
+ "metadata": {},
513
+ "source": [
514
+ "### CUME_DIST function"
515
+ ]
516
+ },
517
+ {
518
+ "cell_type": "code",
519
+ "execution_count": 16,
520
+ "metadata": {},
521
+ "outputs": [],
522
+ "source": [
523
+ "# Calculates the cumulative distribution of a value in a group of values.\n",
524
+ "# SQL Syntax:\n",
525
+ "# CUME_DIST() OVER (partition_by_expression, sort_expression)\n",
526
+ "# SQLAlchemy Syntax:\n",
527
+ "# func.CUME_DIST().over(partition_by = partition_by_expression, order_by = sort_expression)\n",
528
+ "# where,\n",
529
+ "# partition_by_expression - a SQLAlchemy column expression\n",
530
+ "# order_by_expression - a SQLAlchemy column expression for sorting\n",
531
+ "# ordering can be done in Ascending or Descending order with NULLS FIRST or NULLS LAST"
532
+ ]
533
+ },
534
+ {
535
+ "cell_type": "code",
536
+ "execution_count": 17,
537
+ "metadata": {},
538
+ "outputs": [
539
+ {
540
+ "data": {
541
+ "text/plain": [
542
+ "sqlalchemy.sql.elements.Over"
543
+ ]
544
+ },
545
+ "execution_count": 17,
546
+ "metadata": {},
547
+ "output_type": "execute_result"
548
+ }
549
+ ],
550
+ "source": [
551
+ "# Calculate cumulative distribution by stats.\n",
552
+ "cum_dist_ = func.Cume_dist().over(order_by=admissions_train.stats.expression.desc())\n",
553
+ "type(cum_dist_)"
554
+ ]
555
+ },
556
+ {
557
+ "cell_type": "code",
558
+ "execution_count": 18,
559
+ "metadata": {},
560
+ "outputs": [
561
+ {
562
+ "name": "stdout",
563
+ "output_type": "stream",
564
+ "text": [
565
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, cume_dist() OVER (ORDER BY stats DESC) AS cum_dist_by_stats from \"admissions_train\"\n",
566
+ "\n",
567
+ "\n",
568
+ " ************************* DataFrame ********************* \n",
569
+ " masters gpa stats programming admitted cum_dist_by_stats\n",
570
+ "id \n",
571
+ "29 yes 4.00 Novice Beginner 0 0.275\n",
572
+ "35 no 3.68 Novice Beginner 1 0.275\n",
573
+ "37 no 3.52 Novice Novice 1 0.275\n",
574
+ "40 yes 3.95 Novice Beginner 0 0.275\n",
575
+ "12 no 3.65 Novice Novice 1 0.275\n",
576
+ "22 yes 3.46 Novice Beginner 0 0.275\n",
577
+ "7 yes 2.33 Novice Novice 1 0.275\n",
578
+ "3 no 3.70 Novice Beginner 1 0.275\n",
579
+ "33 no 3.55 Novice Novice 1 0.275\n",
580
+ "5 no 3.44 Novice Novice 0 0.275\n",
581
+ "\n",
582
+ "\n",
583
+ "\n",
584
+ " ************************* DataFrame.dtypes ********************* \n",
585
+ "id int\n",
586
+ "masters str\n",
587
+ "gpa float\n",
588
+ "stats str\n",
589
+ "programming str\n",
590
+ "admitted int\n",
591
+ "cum_dist_by_stats float\n",
592
+ "\n",
593
+ "\n",
594
+ "\n",
595
+ " 'cum_dist_by_stats' Column Type: FLOAT\n"
596
+ ]
597
+ }
598
+ ],
599
+ "source": [
600
+ "cum_dist_by_stats_df = admissions_train.assign(cum_dist_by_stats=cum_dist_)\n",
601
+ "print_variables(cum_dist_by_stats_df, \"cum_dist_by_stats\")"
602
+ ]
603
+ },
604
+ {
605
+ "cell_type": "code",
606
+ "execution_count": 19,
607
+ "metadata": {},
608
+ "outputs": [
609
+ {
610
+ "data": {
611
+ "text/plain": [
612
+ "sqlalchemy.sql.elements.Over"
613
+ ]
614
+ },
615
+ "execution_count": 19,
616
+ "metadata": {},
617
+ "output_type": "execute_result"
618
+ }
619
+ ],
620
+ "source": [
621
+ "# Calculate cumulative distribution by id, partitioned over stats\n",
622
+ "cum_dist_ = func.Cume_dist().over(partition_by=admissions_train.stats.expression, order_by=admissions_train.id.expression.desc())\n",
623
+ "type(cum_dist_)"
624
+ ]
625
+ },
626
+ {
627
+ "cell_type": "code",
628
+ "execution_count": 20,
629
+ "metadata": {},
630
+ "outputs": [
631
+ {
632
+ "name": "stdout",
633
+ "output_type": "stream",
634
+ "text": [
635
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, cume_dist() OVER (PARTITION BY stats ORDER BY id DESC) AS cum_dist_by_stats from \"admissions_train\"\n",
636
+ "\n",
637
+ "\n",
638
+ " ************************* DataFrame ********************* \n",
639
+ " masters gpa stats programming admitted cum_dist_by_stats\n",
640
+ "id \n",
641
+ "4 yes 3.50 Beginner Novice 1 0.600000\n",
642
+ "1 yes 3.95 Beginner Beginner 0 1.000000\n",
643
+ "39 yes 3.75 Advanced Beginner 0 0.041667\n",
644
+ "38 yes 2.65 Advanced Beginner 1 0.083333\n",
645
+ "34 yes 3.85 Advanced Beginner 0 0.166667\n",
646
+ "32 yes 3.46 Advanced Beginner 0 0.208333\n",
647
+ "31 yes 3.50 Advanced Beginner 1 0.250000\n",
648
+ "30 yes 3.79 Advanced Novice 0 0.291667\n",
649
+ "28 no 3.93 Advanced Advanced 1 0.333333\n",
650
+ "27 yes 3.96 Advanced Advanced 0 0.375000\n",
651
+ "\n",
652
+ "\n",
653
+ "\n",
654
+ " ************************* DataFrame.dtypes ********************* \n",
655
+ "id int\n",
656
+ "masters str\n",
657
+ "gpa float\n",
658
+ "stats str\n",
659
+ "programming str\n",
660
+ "admitted int\n",
661
+ "cum_dist_by_stats float\n",
662
+ "\n",
663
+ "\n",
664
+ "\n",
665
+ " 'cum_dist_by_stats' Column Type: FLOAT\n"
666
+ ]
667
+ }
668
+ ],
669
+ "source": [
670
+ "cum_dist_by_stats_df = admissions_train.assign(cum_dist_by_stats=cum_dist_)\n",
671
+ "print_variables(cum_dist_by_stats_df, \"cum_dist_by_stats\")"
672
+ ]
673
+ },
674
+ {
675
+ "cell_type": "markdown",
676
+ "metadata": {},
677
+ "source": [
678
+ "### DENSE_RANK fucntion"
679
+ ]
680
+ },
681
+ {
682
+ "cell_type": "code",
683
+ "execution_count": 21,
684
+ "metadata": {},
685
+ "outputs": [],
686
+ "source": [
687
+ "# Returns an ordered ranking of rows based on the value_expression in the ORDER BY clause.\n",
688
+ "# SQL Syntax:\n",
689
+ "# DENSE_RANK() OVER (PARTITION BY partition_by_expression, ORDER BY sort_expression)\n",
690
+ "#\n",
691
+ "# SQLAlchemy Syntax:\n",
692
+ "# func.DENSE_RANK().over(partition_by = partition_by_expression, order_by = sort_expression)\n",
693
+ "# where,\n",
694
+ "# partition_by_expression - a SQLAlchemy column expression\n",
695
+ "# order_by_expression - a SQLAlchemy column expression for sorting\n",
696
+ "# ordering can be done in Ascending or Descending order with NULLS FIRST or NULLS LAST"
697
+ ]
698
+ },
699
+ {
700
+ "cell_type": "code",
701
+ "execution_count": 22,
702
+ "metadata": {},
703
+ "outputs": [
704
+ {
705
+ "data": {
706
+ "text/plain": [
707
+ "sqlalchemy.sql.elements.Over"
708
+ ]
709
+ },
710
+ "execution_count": 22,
711
+ "metadata": {},
712
+ "output_type": "execute_result"
713
+ }
714
+ ],
715
+ "source": [
716
+ "# Get the ordered ranking based on stats.\n",
717
+ "dense_rank_ = func.DENSE_RANK().over(order_by=admissions_train.stats.expression.desc())\n",
718
+ "type(dense_rank_)"
719
+ ]
720
+ },
721
+ {
722
+ "cell_type": "code",
723
+ "execution_count": 23,
724
+ "metadata": {},
725
+ "outputs": [
726
+ {
727
+ "name": "stdout",
728
+ "output_type": "stream",
729
+ "text": [
730
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, dense_rank() OVER (ORDER BY stats DESC) AS dense_rank_by_stats from \"admissions_train\"\n",
731
+ "\n",
732
+ "\n",
733
+ " ************************* DataFrame ********************* \n",
734
+ " masters gpa stats programming admitted dense_rank_by_stats\n",
735
+ "id \n",
736
+ "33 no 3.55 Novice Novice 1 1\n",
737
+ "12 no 3.65 Novice Novice 1 1\n",
738
+ "5 no 3.44 Novice Novice 0 1\n",
739
+ "40 yes 3.95 Novice Beginner 0 1\n",
740
+ "3 no 3.70 Novice Beginner 1 1\n",
741
+ "21 no 3.87 Novice Beginner 1 1\n",
742
+ "29 yes 4.00 Novice Beginner 0 1\n",
743
+ "22 yes 3.46 Novice Beginner 0 1\n",
744
+ "35 no 3.68 Novice Beginner 1 1\n",
745
+ "7 yes 2.33 Novice Novice 1 1\n",
746
+ "\n",
747
+ "\n",
748
+ "\n",
749
+ " ************************* DataFrame.dtypes ********************* \n",
750
+ "id int\n",
751
+ "masters str\n",
752
+ "gpa float\n",
753
+ "stats str\n",
754
+ "programming str\n",
755
+ "admitted int\n",
756
+ "dense_rank_by_stats int\n",
757
+ "\n",
758
+ "\n",
759
+ "\n",
760
+ " 'dense_rank_by_stats' Column Type: INTEGER\n"
761
+ ]
762
+ }
763
+ ],
764
+ "source": [
765
+ "dense_rank_by_stats_df = admissions_train.assign(dense_rank_by_stats=dense_rank_)\n",
766
+ "print_variables(dense_rank_by_stats_df, \"dense_rank_by_stats\")"
767
+ ]
768
+ },
769
+ {
770
+ "cell_type": "code",
771
+ "execution_count": 24,
772
+ "metadata": {},
773
+ "outputs": [
774
+ {
775
+ "data": {
776
+ "text/plain": [
777
+ "sqlalchemy.sql.elements.Over"
778
+ ]
779
+ },
780
+ "execution_count": 24,
781
+ "metadata": {},
782
+ "output_type": "execute_result"
783
+ }
784
+ ],
785
+ "source": [
786
+ "# Calculate cumulative distribution by id, partitioned over stats\n",
787
+ "dense_rank_ = func.dense_rank().over(partition_by=admissions_train.stats.expression, order_by=admissions_train.id.expression.desc())\n",
788
+ "type(dense_rank_)"
789
+ ]
790
+ },
791
+ {
792
+ "cell_type": "code",
793
+ "execution_count": 25,
794
+ "metadata": {},
795
+ "outputs": [
796
+ {
797
+ "name": "stdout",
798
+ "output_type": "stream",
799
+ "text": [
800
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, dense_rank() OVER (PARTITION BY stats ORDER BY id DESC) AS dense_rank_by_stats from \"admissions_train\"\n",
801
+ "\n",
802
+ "\n",
803
+ " ************************* DataFrame ********************* \n",
804
+ " masters gpa stats programming admitted dense_rank_by_stats\n",
805
+ "id \n",
806
+ "4 yes 3.50 Beginner Novice 1 3\n",
807
+ "1 yes 3.95 Beginner Beginner 0 5\n",
808
+ "39 yes 3.75 Advanced Beginner 0 1\n",
809
+ "38 yes 2.65 Advanced Beginner 1 2\n",
810
+ "34 yes 3.85 Advanced Beginner 0 4\n",
811
+ "32 yes 3.46 Advanced Beginner 0 5\n",
812
+ "31 yes 3.50 Advanced Beginner 1 6\n",
813
+ "30 yes 3.79 Advanced Novice 0 7\n",
814
+ "28 no 3.93 Advanced Advanced 1 8\n",
815
+ "27 yes 3.96 Advanced Advanced 0 9\n",
816
+ "\n",
817
+ "\n",
818
+ "\n",
819
+ " ************************* DataFrame.dtypes ********************* \n",
820
+ "id int\n",
821
+ "masters str\n",
822
+ "gpa float\n",
823
+ "stats str\n",
824
+ "programming str\n",
825
+ "admitted int\n",
826
+ "dense_rank_by_stats int\n",
827
+ "\n",
828
+ "\n",
829
+ "\n",
830
+ " 'dense_rank_by_stats' Column Type: INTEGER\n"
831
+ ]
832
+ }
833
+ ],
834
+ "source": [
835
+ "dense_rank_by_stats_df = admissions_train.assign(dense_rank_by_stats=dense_rank_)\n",
836
+ "print_variables(dense_rank_by_stats_df, \"dense_rank_by_stats\")"
837
+ ]
838
+ },
839
+ {
840
+ "cell_type": "markdown",
841
+ "metadata": {},
842
+ "source": [
843
+ "### First Value Function"
844
+ ]
845
+ },
846
+ {
847
+ "cell_type": "code",
848
+ "execution_count": 26,
849
+ "metadata": {},
850
+ "outputs": [],
851
+ "source": [
852
+ "# Returns the first value in an ordered set of values.\n",
853
+ "# SQL Syntax:\n",
854
+ "# FIRST_VALUE(value_expression) OVER (PARTITION BY partition_by_expression, ORDER BY sort_expression, ROWS BETWEEN ... )\n",
855
+ "#\n",
856
+ "# SQLAlchemy Syntax:\n",
857
+ "# func.FIRST_VALUE(value_expression).over(partition_by = partition_by_expression, order_by = sort_expression, rows=(p, f))\n",
858
+ "# where,\n",
859
+ "# value_expression - a SQLAlchemy column expression on which function to be executed.\n",
860
+ "# partition_by_expression - a SQLAlchemy column expression\n",
861
+ "# order_by_expression - a SQLAlchemy column expression for sorting\n",
862
+ "# ordering can be done in Ascending or Descending order with NULLS FIRST or NULLS LAST\n",
863
+ "# rows_ = Define a window"
864
+ ]
865
+ },
866
+ {
867
+ "cell_type": "code",
868
+ "execution_count": 27,
869
+ "metadata": {},
870
+ "outputs": [
871
+ {
872
+ "data": {
873
+ "text/plain": [
874
+ "sqlalchemy.sql.elements.Over"
875
+ ]
876
+ },
877
+ "execution_count": 27,
878
+ "metadata": {},
879
+ "output_type": "execute_result"
880
+ }
881
+ ],
882
+ "source": [
883
+ "# Example returns by id first gpa in the moving average group.\n",
884
+ "FIRST_VALUE_ = func.FIRST_VALUE(admissions_train.gpa.expression).over(order_by=admissions_train.id.expression, rows=(-3, 1))\n",
885
+ "type(FIRST_VALUE_)"
886
+ ]
887
+ },
888
+ {
889
+ "cell_type": "code",
890
+ "execution_count": 28,
891
+ "metadata": {},
892
+ "outputs": [
893
+ {
894
+ "name": "stdout",
895
+ "output_type": "stream",
896
+ "text": [
897
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, FIRST_VALUE(gpa) OVER (ORDER BY id ROWS BETWEEN 3 PRECEDING AND 1 FOLLOWING) AS \"FIRST_VALUE_gpa\" from \"admissions_train\"\n",
898
+ "\n",
899
+ "\n",
900
+ " ************************* DataFrame ********************* \n",
901
+ " masters gpa stats programming admitted FIRST_VALUE_gpa\n",
902
+ "id \n",
903
+ "3 no 3.70 Novice Beginner 1 3.95\n",
904
+ "5 no 3.44 Novice Novice 0 3.76\n",
905
+ "6 yes 3.50 Beginner Advanced 1 3.70\n",
906
+ "7 yes 2.33 Novice Novice 1 3.50\n",
907
+ "9 no 3.82 Advanced Advanced 1 3.50\n",
908
+ "10 no 3.71 Advanced Advanced 1 2.33\n",
909
+ "8 no 3.60 Beginner Advanced 1 3.44\n",
910
+ "4 yes 3.50 Beginner Novice 1 3.95\n",
911
+ "2 yes 3.76 Beginner Beginner 0 3.95\n",
912
+ "1 yes 3.95 Beginner Beginner 0 3.95\n",
913
+ "\n",
914
+ "\n",
915
+ "\n",
916
+ " ************************* DataFrame.dtypes ********************* \n",
917
+ "id int\n",
918
+ "masters str\n",
919
+ "gpa float\n",
920
+ "stats str\n",
921
+ "programming str\n",
922
+ "admitted int\n",
923
+ "FIRST_VALUE_gpa float\n",
924
+ "\n",
925
+ "\n",
926
+ "\n",
927
+ " 'FIRST_VALUE_gpa' Column Type: FLOAT\n"
928
+ ]
929
+ }
930
+ ],
931
+ "source": [
932
+ "fv_gpa_df = admissions_train.assign(FIRST_VALUE_gpa=FIRST_VALUE_)\n",
933
+ "print_variables(fv_gpa_df, \"FIRST_VALUE_gpa\")"
934
+ ]
935
+ },
936
+ {
937
+ "cell_type": "markdown",
938
+ "metadata": {},
939
+ "source": [
940
+ "### Last Value Function"
941
+ ]
942
+ },
943
+ {
944
+ "cell_type": "code",
945
+ "execution_count": 29,
946
+ "metadata": {},
947
+ "outputs": [],
948
+ "source": [
949
+ "# Returns the last value in an ordered set of values.\n",
950
+ "# SQL Syntax:\n",
951
+ "# LAST_VALUE(value_expression) OVER (PARTITION BY partition_by_expression, ORDER BY sort_expression, ROWS BETWEEN ... )\n",
952
+ "#\n",
953
+ "# SQLAlchemy Syntax:\n",
954
+ "# func.LAST_VALUE(value_expression).over(partition_by = partition_by_expression, order_by = sort_expression, rows=(p, f))\n",
955
+ "# where,\n",
956
+ "# value_expression - a SQLAlchemy column expression on which function to be executed.\n",
957
+ "# partition_by_expression - a SQLAlchemy column expression\n",
958
+ "# order_by_expression - a SQLAlchemy column expression for sorting\n",
959
+ "# ordering can be done in Ascending or Descending order with NULLS FIRST or NULLS LAST\n",
960
+ "# rows_ = Define a window"
961
+ ]
962
+ },
963
+ {
964
+ "cell_type": "code",
965
+ "execution_count": 30,
966
+ "metadata": {},
967
+ "outputs": [
968
+ {
969
+ "data": {
970
+ "text/plain": [
971
+ "sqlalchemy.sql.elements.Over"
972
+ ]
973
+ },
974
+ "execution_count": 30,
975
+ "metadata": {},
976
+ "output_type": "execute_result"
977
+ }
978
+ ],
979
+ "source": [
980
+ "# Example returns by id last gpa in the moving average group.\n",
981
+ "last_value_ = func.last_value(admissions_train.gpa.expression).over(order_by=admissions_train.id.expression, rows=(-3, 1))\n",
982
+ "type(last_value_)"
983
+ ]
984
+ },
985
+ {
986
+ "cell_type": "code",
987
+ "execution_count": 31,
988
+ "metadata": {},
989
+ "outputs": [
990
+ {
991
+ "name": "stdout",
992
+ "output_type": "stream",
993
+ "text": [
994
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, last_value(gpa) OVER (ORDER BY id ROWS BETWEEN 3 PRECEDING AND 1 FOLLOWING) AS last_value_gpa from \"admissions_train\"\n",
995
+ "\n",
996
+ "\n",
997
+ " ************************* DataFrame ********************* \n",
998
+ " masters gpa stats programming admitted last_value_gpa\n",
999
+ "id \n",
1000
+ "3 no 3.70 Novice Beginner 1 3.50\n",
1001
+ "5 no 3.44 Novice Novice 0 3.50\n",
1002
+ "6 yes 3.50 Beginner Advanced 1 2.33\n",
1003
+ "7 yes 2.33 Novice Novice 1 3.60\n",
1004
+ "9 no 3.82 Advanced Advanced 1 3.71\n",
1005
+ "10 no 3.71 Advanced Advanced 1 3.13\n",
1006
+ "8 no 3.60 Beginner Advanced 1 3.82\n",
1007
+ "4 yes 3.50 Beginner Novice 1 3.44\n",
1008
+ "2 yes 3.76 Beginner Beginner 0 3.70\n",
1009
+ "1 yes 3.95 Beginner Beginner 0 3.76\n",
1010
+ "\n",
1011
+ "\n",
1012
+ "\n",
1013
+ " ************************* DataFrame.dtypes ********************* \n",
1014
+ "id int\n",
1015
+ "masters str\n",
1016
+ "gpa float\n",
1017
+ "stats str\n",
1018
+ "programming str\n",
1019
+ "admitted int\n",
1020
+ "last_value_gpa float\n",
1021
+ "\n",
1022
+ "\n",
1023
+ "\n",
1024
+ " 'last_value_gpa' Column Type: FLOAT\n"
1025
+ ]
1026
+ }
1027
+ ],
1028
+ "source": [
1029
+ "lv_gpa_df = admissions_train.assign(last_value_gpa=last_value_)\n",
1030
+ "print_variables(lv_gpa_df, \"last_value_gpa\")"
1031
+ ]
1032
+ },
1033
+ {
1034
+ "cell_type": "markdown",
1035
+ "metadata": {},
1036
+ "source": [
1037
+ "### LEAD function"
1038
+ ]
1039
+ },
1040
+ {
1041
+ "cell_type": "code",
1042
+ "execution_count": 32,
1043
+ "metadata": {},
1044
+ "outputs": [
1045
+ {
1046
+ "data": {
1047
+ "text/plain": [
1048
+ "sqlalchemy.sql.elements.Over"
1049
+ ]
1050
+ },
1051
+ "execution_count": 32,
1052
+ "metadata": {},
1053
+ "output_type": "execute_result"
1054
+ }
1055
+ ],
1056
+ "source": [
1057
+ "# Example returns by id last gpa in the moving average group.\n",
1058
+ "lead_value_ = func.lead(admissions_train.gpa.expression, 3, \n",
1059
+ " admissions_train.admitted.expression).over(order_by=admissions_train.id.expression)\n",
1060
+ "type(lead_value_)"
1061
+ ]
1062
+ },
1063
+ {
1064
+ "cell_type": "code",
1065
+ "execution_count": 33,
1066
+ "metadata": {},
1067
+ "outputs": [
1068
+ {
1069
+ "name": "stdout",
1070
+ "output_type": "stream",
1071
+ "text": [
1072
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, lead(gpa, 3, admitted) OVER (ORDER BY id) AS lead_value_col from \"admissions_train\"\n",
1073
+ "\n",
1074
+ "\n",
1075
+ " ************************* DataFrame ********************* \n",
1076
+ " masters gpa stats programming admitted lead_value_col\n",
1077
+ "id \n",
1078
+ "3 no 3.70 Novice Beginner 1 3.50\n",
1079
+ "5 no 3.44 Novice Novice 0 3.60\n",
1080
+ "6 yes 3.50 Beginner Advanced 1 3.82\n",
1081
+ "7 yes 2.33 Novice Novice 1 3.71\n",
1082
+ "9 no 3.82 Advanced Advanced 1 3.65\n",
1083
+ "10 no 3.71 Advanced Advanced 1 4.00\n",
1084
+ "8 no 3.60 Beginner Advanced 1 3.13\n",
1085
+ "4 yes 3.50 Beginner Novice 1 2.33\n",
1086
+ "2 yes 3.76 Beginner Beginner 0 3.44\n",
1087
+ "1 yes 3.95 Beginner Beginner 0 3.50\n",
1088
+ "\n",
1089
+ "\n",
1090
+ "\n",
1091
+ " ************************* DataFrame.dtypes ********************* \n",
1092
+ "id int\n",
1093
+ "masters str\n",
1094
+ "gpa float\n",
1095
+ "stats str\n",
1096
+ "programming str\n",
1097
+ "admitted int\n",
1098
+ "lead_value_col float\n",
1099
+ "\n",
1100
+ "\n",
1101
+ "\n",
1102
+ " 'lead_value_col' Column Type: FLOAT\n"
1103
+ ]
1104
+ }
1105
+ ],
1106
+ "source": [
1107
+ "lead_gpa_df = admissions_train.assign(lead_value_col=lead_value_)\n",
1108
+ "print_variables(lead_gpa_df, \"lead_value_col\")"
1109
+ ]
1110
+ },
1111
+ {
1112
+ "cell_type": "markdown",
1113
+ "metadata": {},
1114
+ "source": [
1115
+ "### LAG function"
1116
+ ]
1117
+ },
1118
+ {
1119
+ "cell_type": "code",
1120
+ "execution_count": 34,
1121
+ "metadata": {},
1122
+ "outputs": [
1123
+ {
1124
+ "data": {
1125
+ "text/plain": [
1126
+ "sqlalchemy.sql.elements.Over"
1127
+ ]
1128
+ },
1129
+ "execution_count": 34,
1130
+ "metadata": {},
1131
+ "output_type": "execute_result"
1132
+ }
1133
+ ],
1134
+ "source": [
1135
+ "# Example returns by id last gpa in the moving average group.\n",
1136
+ "lag_value_ = func.LAG(admissions_train.gpa.expression, 3, \n",
1137
+ " admissions_train.admitted.expression).over(order_by=admissions_train.id.expression)\n",
1138
+ "type(lag_value_)"
1139
+ ]
1140
+ },
1141
+ {
1142
+ "cell_type": "code",
1143
+ "execution_count": 35,
1144
+ "metadata": {},
1145
+ "outputs": [
1146
+ {
1147
+ "name": "stdout",
1148
+ "output_type": "stream",
1149
+ "text": [
1150
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, LAG(gpa, 3, admitted) OVER (ORDER BY id) AS lag_value_col from \"admissions_train\"\n",
1151
+ "\n",
1152
+ "\n",
1153
+ " ************************* DataFrame ********************* \n",
1154
+ " masters gpa stats programming admitted lag_value_col\n",
1155
+ "id \n",
1156
+ "3 no 3.70 Novice Beginner 1 1.00\n",
1157
+ "5 no 3.44 Novice Novice 0 3.76\n",
1158
+ "6 yes 3.50 Beginner Advanced 1 3.70\n",
1159
+ "7 yes 2.33 Novice Novice 1 3.50\n",
1160
+ "9 no 3.82 Advanced Advanced 1 3.50\n",
1161
+ "10 no 3.71 Advanced Advanced 1 2.33\n",
1162
+ "8 no 3.60 Beginner Advanced 1 3.44\n",
1163
+ "4 yes 3.50 Beginner Novice 1 3.95\n",
1164
+ "2 yes 3.76 Beginner Beginner 0 0.00\n",
1165
+ "1 yes 3.95 Beginner Beginner 0 0.00\n",
1166
+ "\n",
1167
+ "\n",
1168
+ "\n",
1169
+ " ************************* DataFrame.dtypes ********************* \n",
1170
+ "id int\n",
1171
+ "masters str\n",
1172
+ "gpa float\n",
1173
+ "stats str\n",
1174
+ "programming str\n",
1175
+ "admitted int\n",
1176
+ "lag_value_col float\n",
1177
+ "\n",
1178
+ "\n",
1179
+ "\n",
1180
+ " 'lag_value_col' Column Type: FLOAT\n"
1181
+ ]
1182
+ }
1183
+ ],
1184
+ "source": [
1185
+ "lag_gpa_df = admissions_train.assign(lag_value_col=lag_value_)\n",
1186
+ "print_variables(lag_gpa_df, \"lag_value_col\")"
1187
+ ]
1188
+ },
1189
+ {
1190
+ "cell_type": "markdown",
1191
+ "metadata": {},
1192
+ "source": [
1193
+ "### MAVG Function"
1194
+ ]
1195
+ },
1196
+ {
1197
+ "cell_type": "code",
1198
+ "execution_count": 36,
1199
+ "metadata": {},
1200
+ "outputs": [],
1201
+ "source": [
1202
+ "# Computes the moving average of a value expression for each row in a partition using the specified value expression \n",
1203
+ "# for the current row and the preceding width-1 rows."
1204
+ ]
1205
+ },
1206
+ {
1207
+ "cell_type": "code",
1208
+ "execution_count": 37,
1209
+ "metadata": {},
1210
+ "outputs": [
1211
+ {
1212
+ "data": {
1213
+ "text/plain": [
1214
+ "sqlalchemy.sql.functions.Function"
1215
+ ]
1216
+ },
1217
+ "execution_count": 37,
1218
+ "metadata": {},
1219
+ "output_type": "execute_result"
1220
+ }
1221
+ ],
1222
+ "source": [
1223
+ "mavg_ = func.mavg(admissions_train.gpa.expression, 3, admissions_train.id.expression)\n",
1224
+ "type(mavg_)"
1225
+ ]
1226
+ },
1227
+ {
1228
+ "cell_type": "code",
1229
+ "execution_count": 38,
1230
+ "metadata": {},
1231
+ "outputs": [
1232
+ {
1233
+ "name": "stdout",
1234
+ "output_type": "stream",
1235
+ "text": [
1236
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, mavg(gpa, 3, id) AS mavg_col from \"admissions_train\"\n",
1237
+ "\n",
1238
+ "\n",
1239
+ " ************************* DataFrame ********************* \n",
1240
+ " masters gpa stats programming admitted mavg_col\n",
1241
+ "id \n",
1242
+ "3 no 3.70 Novice Beginner 1 3.803333\n",
1243
+ "5 no 3.44 Novice Novice 0 3.546667\n",
1244
+ "6 yes 3.50 Beginner Advanced 1 3.480000\n",
1245
+ "7 yes 2.33 Novice Novice 1 3.090000\n",
1246
+ "9 no 3.82 Advanced Advanced 1 3.250000\n",
1247
+ "10 no 3.71 Advanced Advanced 1 3.710000\n",
1248
+ "8 no 3.60 Beginner Advanced 1 3.143333\n",
1249
+ "4 yes 3.50 Beginner Novice 1 3.653333\n",
1250
+ "2 yes 3.76 Beginner Beginner 0 3.855000\n",
1251
+ "1 yes 3.95 Beginner Beginner 0 3.950000\n",
1252
+ "\n",
1253
+ "\n",
1254
+ "\n",
1255
+ " ************************* DataFrame.dtypes ********************* \n",
1256
+ "id int\n",
1257
+ "masters str\n",
1258
+ "gpa float\n",
1259
+ "stats str\n",
1260
+ "programming str\n",
1261
+ "admitted int\n",
1262
+ "mavg_col float\n",
1263
+ "\n",
1264
+ "\n",
1265
+ "\n",
1266
+ " 'mavg_col' Column Type: FLOAT\n"
1267
+ ]
1268
+ }
1269
+ ],
1270
+ "source": [
1271
+ "mavg_df = admissions_train.assign(mavg_col=mavg_)\n",
1272
+ "print_variables(mavg_df, \"mavg_col\")"
1273
+ ]
1274
+ },
1275
+ {
1276
+ "cell_type": "markdown",
1277
+ "metadata": {},
1278
+ "source": [
1279
+ "### MDIFF Function"
1280
+ ]
1281
+ },
1282
+ {
1283
+ "cell_type": "code",
1284
+ "execution_count": 39,
1285
+ "metadata": {},
1286
+ "outputs": [],
1287
+ "source": [
1288
+ "# Returns the moving difference between the specified value expression for the current row and the preceding width rows \n",
1289
+ "# for each row in the partition."
1290
+ ]
1291
+ },
1292
+ {
1293
+ "cell_type": "code",
1294
+ "execution_count": 40,
1295
+ "metadata": {},
1296
+ "outputs": [
1297
+ {
1298
+ "data": {
1299
+ "text/plain": [
1300
+ "sqlalchemy.sql.functions.Function"
1301
+ ]
1302
+ },
1303
+ "execution_count": 40,
1304
+ "metadata": {},
1305
+ "output_type": "execute_result"
1306
+ }
1307
+ ],
1308
+ "source": [
1309
+ "mdiff_ = func.mdiff(admissions_train.gpa.expression, 3, admissions_train.id.expression)\n",
1310
+ "type(mdiff_)"
1311
+ ]
1312
+ },
1313
+ {
1314
+ "cell_type": "code",
1315
+ "execution_count": 41,
1316
+ "metadata": {},
1317
+ "outputs": [
1318
+ {
1319
+ "name": "stdout",
1320
+ "output_type": "stream",
1321
+ "text": [
1322
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, mdiff(gpa, 3, id) AS mdiff_col from \"admissions_train\"\n",
1323
+ "\n",
1324
+ "\n",
1325
+ " ************************* DataFrame ********************* \n",
1326
+ " masters gpa stats programming admitted mdiff_col\n",
1327
+ "id \n",
1328
+ "3 no 3.70 Novice Beginner 1 NaN\n",
1329
+ "5 no 3.44 Novice Novice 0 -0.32\n",
1330
+ "6 yes 3.50 Beginner Advanced 1 -0.20\n",
1331
+ "7 yes 2.33 Novice Novice 1 -1.17\n",
1332
+ "9 no 3.82 Advanced Advanced 1 0.32\n",
1333
+ "10 no 3.71 Advanced Advanced 1 1.38\n",
1334
+ "8 no 3.60 Beginner Advanced 1 0.16\n",
1335
+ "4 yes 3.50 Beginner Novice 1 -0.45\n",
1336
+ "2 yes 3.76 Beginner Beginner 0 NaN\n",
1337
+ "1 yes 3.95 Beginner Beginner 0 NaN\n",
1338
+ "\n",
1339
+ "\n",
1340
+ "\n",
1341
+ " ************************* DataFrame.dtypes ********************* \n",
1342
+ "id int\n",
1343
+ "masters str\n",
1344
+ "gpa float\n",
1345
+ "stats str\n",
1346
+ "programming str\n",
1347
+ "admitted int\n",
1348
+ "mdiff_col float\n",
1349
+ "\n",
1350
+ "\n",
1351
+ "\n",
1352
+ " 'mdiff_col' Column Type: FLOAT\n"
1353
+ ]
1354
+ }
1355
+ ],
1356
+ "source": [
1357
+ "mdiff_df = admissions_train.assign(mdiff_col=mdiff_)\n",
1358
+ "print_variables(mdiff_df, \"mdiff_col\")"
1359
+ ]
1360
+ },
1361
+ {
1362
+ "cell_type": "markdown",
1363
+ "metadata": {},
1364
+ "source": [
1365
+ "### Median function"
1366
+ ]
1367
+ },
1368
+ {
1369
+ "cell_type": "code",
1370
+ "execution_count": 42,
1371
+ "metadata": {},
1372
+ "outputs": [
1373
+ {
1374
+ "data": {
1375
+ "text/plain": [
1376
+ "sqlalchemy.sql.functions.Function"
1377
+ ]
1378
+ },
1379
+ "execution_count": 42,
1380
+ "metadata": {},
1381
+ "output_type": "execute_result"
1382
+ }
1383
+ ],
1384
+ "source": [
1385
+ "median_ = func.median(admissions_train.gpa.expression)\n",
1386
+ "type(median_)"
1387
+ ]
1388
+ },
1389
+ {
1390
+ "cell_type": "code",
1391
+ "execution_count": 43,
1392
+ "metadata": {},
1393
+ "outputs": [
1394
+ {
1395
+ "name": "stdout",
1396
+ "output_type": "stream",
1397
+ "text": [
1398
+ "Equivalent SQL: select median(gpa) AS median_col from \"admissions_train\"\n",
1399
+ "\n",
1400
+ "\n",
1401
+ " ************************* DataFrame ********************* \n",
1402
+ " median_col\n",
1403
+ "0 3.69\n",
1404
+ "\n",
1405
+ "\n",
1406
+ "\n",
1407
+ " ************************* DataFrame.dtypes ********************* \n",
1408
+ "median_col float\n",
1409
+ "\n",
1410
+ "\n",
1411
+ "\n",
1412
+ " 'median_col' Column Type: FLOAT\n"
1413
+ ]
1414
+ }
1415
+ ],
1416
+ "source": [
1417
+ "median_df = admissions_train.assign(True, median_col=median_)\n",
1418
+ "print_variables(median_df, \"median_col\")"
1419
+ ]
1420
+ },
1421
+ {
1422
+ "cell_type": "markdown",
1423
+ "metadata": {},
1424
+ "source": [
1425
+ "### MLINREG function"
1426
+ ]
1427
+ },
1428
+ {
1429
+ "cell_type": "code",
1430
+ "execution_count": 44,
1431
+ "metadata": {},
1432
+ "outputs": [],
1433
+ "source": [
1434
+ "# Returns a predicted value for an expression based on a least squares moving linear regression of the previous \n",
1435
+ "# width -1 (based on sort_expression) column values."
1436
+ ]
1437
+ },
1438
+ {
1439
+ "cell_type": "code",
1440
+ "execution_count": 45,
1441
+ "metadata": {},
1442
+ "outputs": [
1443
+ {
1444
+ "data": {
1445
+ "text/plain": [
1446
+ "sqlalchemy.sql.functions.Function"
1447
+ ]
1448
+ },
1449
+ "execution_count": 45,
1450
+ "metadata": {},
1451
+ "output_type": "execute_result"
1452
+ }
1453
+ ],
1454
+ "source": [
1455
+ "MLINREG_ = func.MLINREG(admissions_train.gpa.expression, 3, admissions_train.id.expression)\n",
1456
+ "type(MLINREG_)"
1457
+ ]
1458
+ },
1459
+ {
1460
+ "cell_type": "code",
1461
+ "execution_count": 46,
1462
+ "metadata": {},
1463
+ "outputs": [
1464
+ {
1465
+ "name": "stdout",
1466
+ "output_type": "stream",
1467
+ "text": [
1468
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, MLINREG(gpa, 3, id) AS \"MLINREG_col\" from \"admissions_train\"\n",
1469
+ "\n",
1470
+ "\n",
1471
+ " ************************* DataFrame ********************* \n",
1472
+ " masters gpa stats programming admitted MLINREG_col\n",
1473
+ "id \n",
1474
+ "3 no 3.70 Novice Beginner 1 3.57\n",
1475
+ "5 no 3.44 Novice Novice 0 3.30\n",
1476
+ "6 yes 3.50 Beginner Advanced 1 3.38\n",
1477
+ "7 yes 2.33 Novice Novice 1 3.56\n",
1478
+ "9 no 3.82 Advanced Advanced 1 4.87\n",
1479
+ "10 no 3.71 Advanced Advanced 1 4.04\n",
1480
+ "8 no 3.60 Beginner Advanced 1 1.16\n",
1481
+ "4 yes 3.50 Beginner Novice 1 3.64\n",
1482
+ "2 yes 3.76 Beginner Beginner 0 NaN\n",
1483
+ "1 yes 3.95 Beginner Beginner 0 NaN\n",
1484
+ "\n",
1485
+ "\n",
1486
+ "\n",
1487
+ " ************************* DataFrame.dtypes ********************* \n",
1488
+ "id int\n",
1489
+ "masters str\n",
1490
+ "gpa float\n",
1491
+ "stats str\n",
1492
+ "programming str\n",
1493
+ "admitted int\n",
1494
+ "MLINREG_col float\n",
1495
+ "\n",
1496
+ "\n",
1497
+ "\n",
1498
+ " 'MLINREG_col' Column Type: FLOAT\n"
1499
+ ]
1500
+ }
1501
+ ],
1502
+ "source": [
1503
+ "MLINREG_df = admissions_train.assign(MLINREG_col=MLINREG_)\n",
1504
+ "print_variables(MLINREG_df, \"MLINREG_col\")"
1505
+ ]
1506
+ },
1507
+ {
1508
+ "cell_type": "markdown",
1509
+ "metadata": {},
1510
+ "source": [
1511
+ "### MSUM function"
1512
+ ]
1513
+ },
1514
+ {
1515
+ "cell_type": "code",
1516
+ "execution_count": 47,
1517
+ "metadata": {},
1518
+ "outputs": [],
1519
+ "source": [
1520
+ "# Computes the moving sum specified by a value expression for the current row and the preceding n-1 rows. \n",
1521
+ "# This function is very similar to the MAVG function."
1522
+ ]
1523
+ },
1524
+ {
1525
+ "cell_type": "code",
1526
+ "execution_count": 48,
1527
+ "metadata": {},
1528
+ "outputs": [
1529
+ {
1530
+ "data": {
1531
+ "text/plain": [
1532
+ "sqlalchemy.sql.functions.Function"
1533
+ ]
1534
+ },
1535
+ "execution_count": 48,
1536
+ "metadata": {},
1537
+ "output_type": "execute_result"
1538
+ }
1539
+ ],
1540
+ "source": [
1541
+ "msum_ = func.msum(admissions_train.gpa.expression, 3, admissions_train.id.expression)\n",
1542
+ "type(msum_)"
1543
+ ]
1544
+ },
1545
+ {
1546
+ "cell_type": "code",
1547
+ "execution_count": 49,
1548
+ "metadata": {},
1549
+ "outputs": [
1550
+ {
1551
+ "name": "stdout",
1552
+ "output_type": "stream",
1553
+ "text": [
1554
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, msum(gpa, 3, id) AS msum_col from \"admissions_train\"\n",
1555
+ "\n",
1556
+ "\n",
1557
+ " ************************* DataFrame ********************* \n",
1558
+ " masters gpa stats programming admitted msum_col\n",
1559
+ "id \n",
1560
+ "3 no 3.70 Novice Beginner 1 11.41\n",
1561
+ "5 no 3.44 Novice Novice 0 10.64\n",
1562
+ "6 yes 3.50 Beginner Advanced 1 10.44\n",
1563
+ "7 yes 2.33 Novice Novice 1 9.27\n",
1564
+ "9 no 3.82 Advanced Advanced 1 9.75\n",
1565
+ "10 no 3.71 Advanced Advanced 1 11.13\n",
1566
+ "8 no 3.60 Beginner Advanced 1 9.43\n",
1567
+ "4 yes 3.50 Beginner Novice 1 10.96\n",
1568
+ "2 yes 3.76 Beginner Beginner 0 7.71\n",
1569
+ "1 yes 3.95 Beginner Beginner 0 3.95\n",
1570
+ "\n",
1571
+ "\n",
1572
+ "\n",
1573
+ " ************************* DataFrame.dtypes ********************* \n",
1574
+ "id int\n",
1575
+ "masters str\n",
1576
+ "gpa float\n",
1577
+ "stats str\n",
1578
+ "programming str\n",
1579
+ "admitted int\n",
1580
+ "msum_col float\n",
1581
+ "\n",
1582
+ "\n",
1583
+ "\n",
1584
+ " 'msum_col' Column Type: FLOAT\n"
1585
+ ]
1586
+ }
1587
+ ],
1588
+ "source": [
1589
+ "msum_df = admissions_train.assign(msum_col=msum_)\n",
1590
+ "print_variables(msum_df, \"msum_col\")"
1591
+ ]
1592
+ },
1593
+ {
1594
+ "cell_type": "markdown",
1595
+ "metadata": {},
1596
+ "source": [
1597
+ "### PERCENT_RANK function"
1598
+ ]
1599
+ },
1600
+ {
1601
+ "cell_type": "code",
1602
+ "execution_count": 50,
1603
+ "metadata": {},
1604
+ "outputs": [],
1605
+ "source": [
1606
+ "# Returns the relative rank of rows for a value_expression."
1607
+ ]
1608
+ },
1609
+ {
1610
+ "cell_type": "code",
1611
+ "execution_count": 51,
1612
+ "metadata": {},
1613
+ "outputs": [
1614
+ {
1615
+ "data": {
1616
+ "text/plain": [
1617
+ "sqlalchemy.sql.elements.Over"
1618
+ ]
1619
+ },
1620
+ "execution_count": 51,
1621
+ "metadata": {},
1622
+ "output_type": "execute_result"
1623
+ }
1624
+ ],
1625
+ "source": [
1626
+ "# Get the ordered ranking based on stats.\n",
1627
+ "percent_rank_ = func.percent_rank().over(order_by=admissions_train.id.expression.desc())\n",
1628
+ "type(percent_rank_)"
1629
+ ]
1630
+ },
1631
+ {
1632
+ "cell_type": "code",
1633
+ "execution_count": 52,
1634
+ "metadata": {},
1635
+ "outputs": [
1636
+ {
1637
+ "name": "stdout",
1638
+ "output_type": "stream",
1639
+ "text": [
1640
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, percent_rank() OVER (ORDER BY id DESC) AS percent_rank_col from \"admissions_train\"\n",
1641
+ "\n",
1642
+ "\n",
1643
+ " ************************* DataFrame ********************* \n",
1644
+ " masters gpa stats programming admitted percent_rank_col\n",
1645
+ "id \n",
1646
+ "38 yes 2.65 Advanced Beginner 1 0.051282\n",
1647
+ "36 no 3.00 Advanced Novice 0 0.102564\n",
1648
+ "35 no 3.68 Novice Beginner 1 0.128205\n",
1649
+ "34 yes 3.85 Advanced Beginner 0 0.153846\n",
1650
+ "32 yes 3.46 Advanced Beginner 0 0.205128\n",
1651
+ "31 yes 3.50 Advanced Beginner 1 0.230769\n",
1652
+ "33 no 3.55 Novice Novice 1 0.179487\n",
1653
+ "37 no 3.52 Novice Novice 1 0.076923\n",
1654
+ "39 yes 3.75 Advanced Beginner 0 0.025641\n",
1655
+ "40 yes 3.95 Novice Beginner 0 0.000000\n",
1656
+ "\n",
1657
+ "\n",
1658
+ "\n",
1659
+ " ************************* DataFrame.dtypes ********************* \n",
1660
+ "id int\n",
1661
+ "masters str\n",
1662
+ "gpa float\n",
1663
+ "stats str\n",
1664
+ "programming str\n",
1665
+ "admitted int\n",
1666
+ "percent_rank_col float\n",
1667
+ "\n",
1668
+ "\n",
1669
+ "\n",
1670
+ " 'percent_rank_col' Column Type: FLOAT\n"
1671
+ ]
1672
+ }
1673
+ ],
1674
+ "source": [
1675
+ "percent_rank_df = admissions_train.assign(percent_rank_col=percent_rank_)\n",
1676
+ "print_variables(percent_rank_df, \"percent_rank_col\")"
1677
+ ]
1678
+ },
1679
+ {
1680
+ "cell_type": "code",
1681
+ "execution_count": 53,
1682
+ "metadata": {},
1683
+ "outputs": [
1684
+ {
1685
+ "data": {
1686
+ "text/plain": [
1687
+ "sqlalchemy.sql.elements.Over"
1688
+ ]
1689
+ },
1690
+ "execution_count": 53,
1691
+ "metadata": {},
1692
+ "output_type": "execute_result"
1693
+ }
1694
+ ],
1695
+ "source": [
1696
+ "# Calculate cumulative distribution by id, partitioned over stats\n",
1697
+ "percent_rank_ = func.percent_rank().over(partition_by=admissions_train.stats.expression, order_by=admissions_train.id.expression.desc())\n",
1698
+ "type(percent_rank_)"
1699
+ ]
1700
+ },
1701
+ {
1702
+ "cell_type": "code",
1703
+ "execution_count": 54,
1704
+ "metadata": {},
1705
+ "outputs": [
1706
+ {
1707
+ "name": "stdout",
1708
+ "output_type": "stream",
1709
+ "text": [
1710
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, percent_rank() OVER (PARTITION BY stats ORDER BY id DESC) AS percent_rank_col from \"admissions_train\"\n",
1711
+ "\n",
1712
+ "\n",
1713
+ " ************************* DataFrame ********************* \n",
1714
+ " masters gpa stats programming admitted percent_rank_col\n",
1715
+ "id \n",
1716
+ "4 yes 3.50 Beginner Novice 1 0.500000\n",
1717
+ "1 yes 3.95 Beginner Beginner 0 1.000000\n",
1718
+ "39 yes 3.75 Advanced Beginner 0 0.000000\n",
1719
+ "38 yes 2.65 Advanced Beginner 1 0.043478\n",
1720
+ "34 yes 3.85 Advanced Beginner 0 0.130435\n",
1721
+ "32 yes 3.46 Advanced Beginner 0 0.173913\n",
1722
+ "31 yes 3.50 Advanced Beginner 1 0.217391\n",
1723
+ "30 yes 3.79 Advanced Novice 0 0.260870\n",
1724
+ "28 no 3.93 Advanced Advanced 1 0.304348\n",
1725
+ "27 yes 3.96 Advanced Advanced 0 0.347826\n",
1726
+ "\n",
1727
+ "\n",
1728
+ "\n",
1729
+ " ************************* DataFrame.dtypes ********************* \n",
1730
+ "id int\n",
1731
+ "masters str\n",
1732
+ "gpa float\n",
1733
+ "stats str\n",
1734
+ "programming str\n",
1735
+ "admitted int\n",
1736
+ "percent_rank_col float\n",
1737
+ "\n",
1738
+ "\n",
1739
+ "\n",
1740
+ " 'percent_rank_col' Column Type: FLOAT\n"
1741
+ ]
1742
+ }
1743
+ ],
1744
+ "source": [
1745
+ "percent_rank_df = admissions_train.assign(percent_rank_col=percent_rank_)\n",
1746
+ "print_variables(percent_rank_df, \"percent_rank_col\")"
1747
+ ]
1748
+ },
1749
+ {
1750
+ "cell_type": "markdown",
1751
+ "metadata": {},
1752
+ "source": [
1753
+ "### Quantile Function"
1754
+ ]
1755
+ },
1756
+ {
1757
+ "cell_type": "code",
1758
+ "execution_count": 55,
1759
+ "metadata": {},
1760
+ "outputs": [
1761
+ {
1762
+ "data": {
1763
+ "text/plain": [
1764
+ "sqlalchemy.sql.functions.Function"
1765
+ ]
1766
+ },
1767
+ "execution_count": 55,
1768
+ "metadata": {},
1769
+ "output_type": "execute_result"
1770
+ }
1771
+ ],
1772
+ "source": [
1773
+ "quantile_ = func.quantile(10, admissions_train.gpa.expression)\n",
1774
+ "type(quantile_)"
1775
+ ]
1776
+ },
1777
+ {
1778
+ "cell_type": "code",
1779
+ "execution_count": 56,
1780
+ "metadata": {},
1781
+ "outputs": [
1782
+ {
1783
+ "name": "stdout",
1784
+ "output_type": "stream",
1785
+ "text": [
1786
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, quantile(10, gpa) AS quantile_col from \"admissions_train\"\n",
1787
+ "\n",
1788
+ "\n",
1789
+ " ************************* DataFrame ********************* \n",
1790
+ " masters gpa stats programming admitted quantile_col\n",
1791
+ "id \n",
1792
+ "7 yes 2.33 Novice Novice 1 0\n",
1793
+ "36 no 3.00 Advanced Novice 0 1\n",
1794
+ "11 no 3.13 Advanced Advanced 1 1\n",
1795
+ "5 no 3.44 Novice Novice 0 1\n",
1796
+ "32 yes 3.46 Advanced Beginner 0 2\n",
1797
+ "22 yes 3.46 Novice Beginner 0 2\n",
1798
+ "14 yes 3.45 Advanced Advanced 0 1\n",
1799
+ "38 yes 2.65 Advanced Beginner 1 0\n",
1800
+ "19 yes 1.98 Advanced Advanced 0 0\n",
1801
+ "24 no 1.87 Advanced Novice 1 0\n",
1802
+ "\n",
1803
+ "\n",
1804
+ "\n",
1805
+ " ************************* DataFrame.dtypes ********************* \n",
1806
+ "id int\n",
1807
+ "masters str\n",
1808
+ "gpa float\n",
1809
+ "stats str\n",
1810
+ "programming str\n",
1811
+ "admitted int\n",
1812
+ "quantile_col int\n",
1813
+ "\n",
1814
+ "\n",
1815
+ "\n",
1816
+ " 'quantile_col' Column Type: INTEGER\n"
1817
+ ]
1818
+ }
1819
+ ],
1820
+ "source": [
1821
+ "quantile_df = admissions_train.assign(quantile_col=quantile_)\n",
1822
+ "print_variables(quantile_df, \"quantile_col\")"
1823
+ ]
1824
+ },
1825
+ {
1826
+ "cell_type": "markdown",
1827
+ "metadata": {},
1828
+ "source": [
1829
+ "### RANK function"
1830
+ ]
1831
+ },
1832
+ {
1833
+ "cell_type": "code",
1834
+ "execution_count": 57,
1835
+ "metadata": {},
1836
+ "outputs": [
1837
+ {
1838
+ "data": {
1839
+ "text/plain": [
1840
+ "sqlalchemy.sql.elements.Over"
1841
+ ]
1842
+ },
1843
+ "execution_count": 57,
1844
+ "metadata": {},
1845
+ "output_type": "execute_result"
1846
+ }
1847
+ ],
1848
+ "source": [
1849
+ "# Get the ordered ranking based on stats.\n",
1850
+ "rank_ = func.rank().over(order_by=admissions_train.id.expression.desc())\n",
1851
+ "type(rank_)"
1852
+ ]
1853
+ },
1854
+ {
1855
+ "cell_type": "code",
1856
+ "execution_count": 58,
1857
+ "metadata": {},
1858
+ "outputs": [
1859
+ {
1860
+ "name": "stdout",
1861
+ "output_type": "stream",
1862
+ "text": [
1863
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, rank() OVER (ORDER BY id DESC) AS rank_col from \"admissions_train\"\n",
1864
+ "\n",
1865
+ "\n",
1866
+ " ************************* DataFrame ********************* \n",
1867
+ " masters gpa stats programming admitted rank_col\n",
1868
+ "id \n",
1869
+ "38 yes 2.65 Advanced Beginner 1 3\n",
1870
+ "36 no 3.00 Advanced Novice 0 5\n",
1871
+ "35 no 3.68 Novice Beginner 1 6\n",
1872
+ "34 yes 3.85 Advanced Beginner 0 7\n",
1873
+ "32 yes 3.46 Advanced Beginner 0 9\n",
1874
+ "31 yes 3.50 Advanced Beginner 1 10\n",
1875
+ "33 no 3.55 Novice Novice 1 8\n",
1876
+ "37 no 3.52 Novice Novice 1 4\n",
1877
+ "39 yes 3.75 Advanced Beginner 0 2\n",
1878
+ "40 yes 3.95 Novice Beginner 0 1\n",
1879
+ "\n",
1880
+ "\n",
1881
+ "\n",
1882
+ " ************************* DataFrame.dtypes ********************* \n",
1883
+ "id int\n",
1884
+ "masters str\n",
1885
+ "gpa float\n",
1886
+ "stats str\n",
1887
+ "programming str\n",
1888
+ "admitted int\n",
1889
+ "rank_col int\n",
1890
+ "\n",
1891
+ "\n",
1892
+ "\n",
1893
+ " 'rank_col' Column Type: INTEGER\n"
1894
+ ]
1895
+ }
1896
+ ],
1897
+ "source": [
1898
+ "rank_df = admissions_train.assign(rank_col=rank_)\n",
1899
+ "print_variables(rank_df, \"rank_col\")"
1900
+ ]
1901
+ },
1902
+ {
1903
+ "cell_type": "code",
1904
+ "execution_count": 59,
1905
+ "metadata": {},
1906
+ "outputs": [
1907
+ {
1908
+ "data": {
1909
+ "text/plain": [
1910
+ "sqlalchemy.sql.elements.Over"
1911
+ ]
1912
+ },
1913
+ "execution_count": 59,
1914
+ "metadata": {},
1915
+ "output_type": "execute_result"
1916
+ }
1917
+ ],
1918
+ "source": [
1919
+ "# Calculate cumulative distribution by id, partitioned over stats\n",
1920
+ "rank_ = func.rank().over(partition_by=admissions_train.stats.expression, order_by=admissions_train.id.expression.desc())\n",
1921
+ "type(rank_)"
1922
+ ]
1923
+ },
1924
+ {
1925
+ "cell_type": "code",
1926
+ "execution_count": 60,
1927
+ "metadata": {},
1928
+ "outputs": [
1929
+ {
1930
+ "name": "stdout",
1931
+ "output_type": "stream",
1932
+ "text": [
1933
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, rank() OVER (PARTITION BY stats ORDER BY id DESC) AS rank_col from \"admissions_train\"\n",
1934
+ "\n",
1935
+ "\n",
1936
+ " ************************* DataFrame ********************* \n",
1937
+ " masters gpa stats programming admitted rank_col\n",
1938
+ "id \n",
1939
+ "36 no 3.00 Advanced Novice 0 3\n",
1940
+ "32 yes 3.46 Advanced Beginner 0 5\n",
1941
+ "31 yes 3.50 Advanced Beginner 1 6\n",
1942
+ "30 yes 3.79 Advanced Novice 0 7\n",
1943
+ "27 yes 3.96 Advanced Advanced 0 9\n",
1944
+ "26 yes 3.57 Advanced Advanced 1 10\n",
1945
+ "8 no 3.60 Beginner Advanced 1 1\n",
1946
+ "6 yes 3.50 Beginner Advanced 1 2\n",
1947
+ "4 yes 3.50 Beginner Novice 1 3\n",
1948
+ "2 yes 3.76 Beginner Beginner 0 4\n",
1949
+ "\n",
1950
+ "\n",
1951
+ "\n",
1952
+ " ************************* DataFrame.dtypes ********************* \n",
1953
+ "id int\n",
1954
+ "masters str\n",
1955
+ "gpa float\n",
1956
+ "stats str\n",
1957
+ "programming str\n",
1958
+ "admitted int\n",
1959
+ "rank_col int\n",
1960
+ "\n",
1961
+ "\n",
1962
+ "\n",
1963
+ " 'rank_col' Column Type: INTEGER\n"
1964
+ ]
1965
+ }
1966
+ ],
1967
+ "source": [
1968
+ "rank_df = admissions_train.assign(rank_col=rank_)\n",
1969
+ "print_variables(rank_df, \"rank_col\")"
1970
+ ]
1971
+ },
1972
+ {
1973
+ "cell_type": "markdown",
1974
+ "metadata": {},
1975
+ "source": [
1976
+ "### ROW_NUMBER function"
1977
+ ]
1978
+ },
1979
+ {
1980
+ "cell_type": "code",
1981
+ "execution_count": 61,
1982
+ "metadata": {},
1983
+ "outputs": [
1984
+ {
1985
+ "data": {
1986
+ "text/plain": [
1987
+ "sqlalchemy.sql.elements.Over"
1988
+ ]
1989
+ },
1990
+ "execution_count": 61,
1991
+ "metadata": {},
1992
+ "output_type": "execute_result"
1993
+ }
1994
+ ],
1995
+ "source": [
1996
+ "# Get the ordered ranking based on stats.\n",
1997
+ "row_num_ = func.row_number().over(order_by=admissions_train.id.expression.desc())\n",
1998
+ "type(row_num_)"
1999
+ ]
2000
+ },
2001
+ {
2002
+ "cell_type": "code",
2003
+ "execution_count": 62,
2004
+ "metadata": {},
2005
+ "outputs": [
2006
+ {
2007
+ "name": "stdout",
2008
+ "output_type": "stream",
2009
+ "text": [
2010
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, row_number() OVER (ORDER BY id DESC) AS row_num_col from \"admissions_train\"\n",
2011
+ "\n",
2012
+ "\n",
2013
+ " ************************* DataFrame ********************* \n",
2014
+ " masters gpa stats programming admitted row_num_col\n",
2015
+ "id \n",
2016
+ "38 yes 2.65 Advanced Beginner 1 3\n",
2017
+ "36 no 3.00 Advanced Novice 0 5\n",
2018
+ "35 no 3.68 Novice Beginner 1 6\n",
2019
+ "34 yes 3.85 Advanced Beginner 0 7\n",
2020
+ "32 yes 3.46 Advanced Beginner 0 9\n",
2021
+ "31 yes 3.50 Advanced Beginner 1 10\n",
2022
+ "33 no 3.55 Novice Novice 1 8\n",
2023
+ "37 no 3.52 Novice Novice 1 4\n",
2024
+ "39 yes 3.75 Advanced Beginner 0 2\n",
2025
+ "40 yes 3.95 Novice Beginner 0 1\n",
2026
+ "\n",
2027
+ "\n",
2028
+ "\n",
2029
+ " ************************* DataFrame.dtypes ********************* \n",
2030
+ "id int\n",
2031
+ "masters str\n",
2032
+ "gpa float\n",
2033
+ "stats str\n",
2034
+ "programming str\n",
2035
+ "admitted int\n",
2036
+ "row_num_col int\n",
2037
+ "\n",
2038
+ "\n",
2039
+ "\n",
2040
+ " 'row_num_col' Column Type: INTEGER\n"
2041
+ ]
2042
+ }
2043
+ ],
2044
+ "source": [
2045
+ "row_num_df = admissions_train.assign(row_num_col=row_num_)\n",
2046
+ "print_variables(row_num_df, \"row_num_col\")"
2047
+ ]
2048
+ },
2049
+ {
2050
+ "cell_type": "code",
2051
+ "execution_count": 63,
2052
+ "metadata": {},
2053
+ "outputs": [
2054
+ {
2055
+ "data": {
2056
+ "text/plain": [
2057
+ "sqlalchemy.sql.elements.Over"
2058
+ ]
2059
+ },
2060
+ "execution_count": 63,
2061
+ "metadata": {},
2062
+ "output_type": "execute_result"
2063
+ }
2064
+ ],
2065
+ "source": [
2066
+ "# Calculate cumulative distribution by id, partitioned over stats\n",
2067
+ "row_num_ = func.row_number().over(partition_by=admissions_train.stats.expression, order_by=admissions_train.id.expression.desc())\n",
2068
+ "type(row_num_)"
2069
+ ]
2070
+ },
2071
+ {
2072
+ "cell_type": "code",
2073
+ "execution_count": 64,
2074
+ "metadata": {},
2075
+ "outputs": [
2076
+ {
2077
+ "name": "stdout",
2078
+ "output_type": "stream",
2079
+ "text": [
2080
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, row_number() OVER (PARTITION BY stats ORDER BY id DESC) AS row_num_col from \"admissions_train\"\n",
2081
+ "\n",
2082
+ "\n",
2083
+ " ************************* DataFrame ********************* \n",
2084
+ " masters gpa stats programming admitted row_num_col\n",
2085
+ "id \n",
2086
+ "36 no 3.00 Advanced Novice 0 3\n",
2087
+ "32 yes 3.46 Advanced Beginner 0 5\n",
2088
+ "31 yes 3.50 Advanced Beginner 1 6\n",
2089
+ "30 yes 3.79 Advanced Novice 0 7\n",
2090
+ "27 yes 3.96 Advanced Advanced 0 9\n",
2091
+ "26 yes 3.57 Advanced Advanced 1 10\n",
2092
+ "8 no 3.60 Beginner Advanced 1 1\n",
2093
+ "6 yes 3.50 Beginner Advanced 1 2\n",
2094
+ "4 yes 3.50 Beginner Novice 1 3\n",
2095
+ "2 yes 3.76 Beginner Beginner 0 4\n",
2096
+ "\n",
2097
+ "\n",
2098
+ "\n",
2099
+ " ************************* DataFrame.dtypes ********************* \n",
2100
+ "id int\n",
2101
+ "masters str\n",
2102
+ "gpa float\n",
2103
+ "stats str\n",
2104
+ "programming str\n",
2105
+ "admitted int\n",
2106
+ "row_num_col int\n",
2107
+ "\n",
2108
+ "\n",
2109
+ "\n",
2110
+ " 'row_num_col' Column Type: INTEGER\n"
2111
+ ]
2112
+ }
2113
+ ],
2114
+ "source": [
2115
+ "row_num_df = admissions_train.assign(row_num_col=row_num_)\n",
2116
+ "print_variables(row_num_df, \"row_num_col\")"
2117
+ ]
2118
+ },
2119
+ {
2120
+ "cell_type": "markdown",
2121
+ "metadata": {},
2122
+ "source": [
2123
+ "### AVG function"
2124
+ ]
2125
+ },
2126
+ {
2127
+ "cell_type": "code",
2128
+ "execution_count": 65,
2129
+ "metadata": {},
2130
+ "outputs": [
2131
+ {
2132
+ "data": {
2133
+ "text/plain": [
2134
+ "sqlalchemy.sql.elements.Over"
2135
+ ]
2136
+ },
2137
+ "execution_count": 65,
2138
+ "metadata": {},
2139
+ "output_type": "execute_result"
2140
+ }
2141
+ ],
2142
+ "source": [
2143
+ "### Let's get the mean gpa withing the window partitioned over stats and odered by id.\n",
2144
+ "### SQL Clause Equivalent: \"AVG(gpa) OVER(PARTION BY stats ORDER BY id)\"\n",
2145
+ "avggpa_part_ord = func.avg(admissions_train.gpa.expression).over(partition_by=admissions_train.stats.expression, \n",
2146
+ " order_by=admissions_train.id.expression)\n",
2147
+ "type(avggpa_part_ord)"
2148
+ ]
2149
+ },
2150
+ {
2151
+ "cell_type": "code",
2152
+ "execution_count": 66,
2153
+ "metadata": {},
2154
+ "outputs": [
2155
+ {
2156
+ "name": "stdout",
2157
+ "output_type": "stream",
2158
+ "text": [
2159
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, avg(gpa) OVER (PARTITION BY stats ORDER BY id) AS win_avg_gpa from \"admissions_train\"\n",
2160
+ "\n",
2161
+ "\n",
2162
+ " ************************* DataFrame ********************* \n",
2163
+ " masters gpa stats programming admitted win_avg_gpa\n",
2164
+ "id \n",
2165
+ "11 no 3.13 Advanced Advanced 1 3.50875\n",
2166
+ "14 yes 3.45 Advanced Advanced 0 3.50875\n",
2167
+ "15 yes 4.00 Advanced Advanced 1 3.50875\n",
2168
+ "16 no 3.70 Advanced Advanced 1 3.50875\n",
2169
+ "18 yes 3.81 Advanced Advanced 1 3.50875\n",
2170
+ "19 yes 1.98 Advanced Advanced 0 3.50875\n",
2171
+ "1 yes 3.95 Beginner Beginner 0 3.66200\n",
2172
+ "2 yes 3.76 Beginner Beginner 0 3.66200\n",
2173
+ "4 yes 3.50 Beginner Novice 1 3.66200\n",
2174
+ "6 yes 3.50 Beginner Advanced 1 3.66200\n",
2175
+ "\n",
2176
+ "\n",
2177
+ "\n",
2178
+ " ************************* DataFrame.dtypes ********************* \n",
2179
+ "id int\n",
2180
+ "masters str\n",
2181
+ "gpa float\n",
2182
+ "stats str\n",
2183
+ "programming str\n",
2184
+ "admitted int\n",
2185
+ "win_avg_gpa float\n",
2186
+ "\n",
2187
+ "\n",
2188
+ "\n",
2189
+ " 'win_avg_gpa' Column Type: FLOAT\n"
2190
+ ]
2191
+ }
2192
+ ],
2193
+ "source": [
2194
+ "avg_df = admissions_train.assign(win_avg_gpa=avggpa_part_ord)\n",
2195
+ "print_variables(avg_df, \"win_avg_gpa\")"
2196
+ ]
2197
+ },
2198
+ {
2199
+ "cell_type": "markdown",
2200
+ "metadata": {},
2201
+ "source": [
2202
+ "## Use cases for creating a window using ROWS BETWEEN syntax "
2203
+ ]
2204
+ },
2205
+ {
2206
+ "cell_type": "code",
2207
+ "execution_count": 67,
2208
+ "metadata": {},
2209
+ "outputs": [],
2210
+ "source": [
2211
+ "# Teradata offers various types of computations on windowed aggregate fucntions. Each computation \n",
2212
+ "# 1. Cumulative\n",
2213
+ "# 2. Group\n",
2214
+ "# 3. Moving\n",
2215
+ "# 4. Remaining\n",
2216
+ "# Each computation above can be performed using ROWS or ROWS BETWEEN syntax. Let's take a look at \n",
2217
+ "# examples of each type of computation.\n",
2218
+ "\n",
2219
+ "# To perform windowed aggregate function over a window using ROWS and ROWS BETWEEN one must use \n",
2220
+ "# argument \"rows\" in over which accepts a tuple (p, f).\n",
2221
+ "# Where,\n",
2222
+ "# p and f can accept Negative Integer Value, Positive Integer Value, 0 or None.\n",
2223
+ "# Each value passed to p and f have different meaning or results in different syntax.\n",
2224
+ "# In general, this what these values mean, i.e., SQL syntax will be generated based on these values.\n",
2225
+ "# 1. Negative Value --> Indicates a preceding value\n",
2226
+ "# 2. Positive Value --> Indiacates a following value\n",
2227
+ "# 3. 0 --> For Current row\n",
2228
+ "# 4. None --> Unbounded value.\n",
2229
+ "#"
2230
+ ]
2231
+ },
2232
+ {
2233
+ "cell_type": "markdown",
2234
+ "metadata": {},
2235
+ "source": [
2236
+ "### Windowed aggregation using 'Cumulative' Computation"
2237
+ ]
2238
+ },
2239
+ {
2240
+ "cell_type": "code",
2241
+ "execution_count": 68,
2242
+ "metadata": {},
2243
+ "outputs": [],
2244
+ "source": [
2245
+ "## To perform \"Cumulative\" computation for a windowed aggregate fucntion, \n",
2246
+ "## one should use following SQL Syntax for such cases is:\n",
2247
+ "\n",
2248
+ "# 1. ROWS UNBOUNDED PRECEDING ------------> No support available for this in SQLAlchemy. Needs more investigation.\n",
2249
+ "# 2. ROWS BETWEEN UNBOUNDED PRECEDING AND value PRECEDING ---> rows=(None, negative value)\n",
2250
+ "# 3. ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ---> rows=(None, 0)\n",
2251
+ "# 4. ROWS BETWEEN UNBOUNDED PRECEDING AND value FOLLOWING ---> rows=(None, positive value)"
2252
+ ]
2253
+ },
2254
+ {
2255
+ "cell_type": "code",
2256
+ "execution_count": 69,
2257
+ "metadata": {},
2258
+ "outputs": [
2259
+ {
2260
+ "name": "stdout",
2261
+ "output_type": "stream",
2262
+ "text": [
2263
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, sum(gpa) OVER (ORDER BY stats ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS cumsum_gpa from \"admissions_train\"\n",
2264
+ "\n",
2265
+ "\n",
2266
+ " ************************* DataFrame ********************* \n",
2267
+ " masters gpa stats programming admitted cumsum_gpa\n",
2268
+ "id \n",
2269
+ "15 yes 4.00 Advanced Advanced 1 6.48\n",
2270
+ "24 no 1.87 Advanced Novice 1 14.38\n",
2271
+ "25 no 3.96 Advanced Advanced 1 16.25\n",
2272
+ "16 no 3.70 Advanced Advanced 1 20.21\n",
2273
+ "34 yes 3.85 Advanced Beginner 0 25.89\n",
2274
+ "14 yes 3.45 Advanced Advanced 0 29.74\n",
2275
+ "19 yes 1.98 Advanced Advanced 0 23.91\n",
2276
+ "20 yes 3.90 Advanced Advanced 1 10.48\n",
2277
+ "38 yes 2.65 Advanced Beginner 1 3.83\n",
2278
+ "17 no 3.83 Advanced Advanced 1 NaN\n",
2279
+ "\n",
2280
+ "\n",
2281
+ "\n",
2282
+ " ************************* DataFrame.dtypes ********************* \n",
2283
+ "id int\n",
2284
+ "masters str\n",
2285
+ "gpa float\n",
2286
+ "stats str\n",
2287
+ "programming str\n",
2288
+ "admitted int\n",
2289
+ "cumsum_gpa float\n",
2290
+ "\n",
2291
+ "\n",
2292
+ "\n"
2293
+ ]
2294
+ }
2295
+ ],
2296
+ "source": [
2297
+ "# Example to return Cumulative sum of gpa over a window of ROWS BETWEEN UNBOUNDED PRECEDING AND value PRECEDING\n",
2298
+ "# ROWS BETWEEN UNBOUNDED PRECEDING AND value PRECEDING ---> rows=(None, negative value)\n",
2299
+ "sum_gpa_rbupavp = func.sum(admissions_train.gpa.expression).over(order_by=admissions_train.stats.expression, rows=(None, -1))\n",
2300
+ "df = admissions_train.assign(cumsum_gpa=sum_gpa_rbupavp)\n",
2301
+ "print_variables(df)"
2302
+ ]
2303
+ },
2304
+ {
2305
+ "cell_type": "code",
2306
+ "execution_count": 70,
2307
+ "metadata": {},
2308
+ "outputs": [
2309
+ {
2310
+ "name": "stdout",
2311
+ "output_type": "stream",
2312
+ "text": [
2313
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, sum(gpa) OVER (ORDER BY stats ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS sqlalchemy_gpa from \"admissions_train\"\n",
2314
+ "\n",
2315
+ "\n",
2316
+ " ************************* DataFrame ********************* \n",
2317
+ " masters gpa stats programming admitted sqlalchemy_gpa\n",
2318
+ "id \n",
2319
+ "13 no 4.00 Advanced Novice 1 9.98\n",
2320
+ "23 yes 3.59 Advanced Novice 1 17.02\n",
2321
+ "38 yes 2.65 Advanced Beginner 1 19.67\n",
2322
+ "36 no 3.00 Advanced Novice 0 22.67\n",
2323
+ "30 yes 3.79 Advanced Novice 0 30.29\n",
2324
+ "18 yes 3.81 Advanced Advanced 1 34.10\n",
2325
+ "17 no 3.83 Advanced Advanced 1 26.50\n",
2326
+ "14 yes 3.45 Advanced Advanced 0 13.43\n",
2327
+ "19 yes 1.98 Advanced Advanced 0 5.98\n",
2328
+ "15 yes 4.00 Advanced Advanced 1 4.00\n",
2329
+ "\n",
2330
+ "\n",
2331
+ "\n",
2332
+ " ************************* DataFrame.dtypes ********************* \n",
2333
+ "id int\n",
2334
+ "masters str\n",
2335
+ "gpa float\n",
2336
+ "stats str\n",
2337
+ "programming str\n",
2338
+ "admitted int\n",
2339
+ "sqlalchemy_gpa float\n",
2340
+ "\n",
2341
+ "\n",
2342
+ "\n"
2343
+ ]
2344
+ }
2345
+ ],
2346
+ "source": [
2347
+ "# Example to return Cumulative sum of gpa over a window of Rows between ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\n",
2348
+ "# ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ---> rows=(None, 0)\n",
2349
+ "sum_gpa_rbupavp = func.sum(admissions_train.gpa.expression).over(order_by=admissions_train.stats.expression, rows=(None, 0))\n",
2350
+ "df = admissions_train.assign(sqlalchemy_gpa=sum_gpa_rbupavp)\n",
2351
+ "print_variables(df)"
2352
+ ]
2353
+ },
2354
+ {
2355
+ "cell_type": "code",
2356
+ "execution_count": 71,
2357
+ "metadata": {},
2358
+ "outputs": [
2359
+ {
2360
+ "name": "stdout",
2361
+ "output_type": "stream",
2362
+ "text": [
2363
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, sum(gpa) OVER (ORDER BY stats ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) AS sqlalchemy_gpa from \"admissions_train\"\n",
2364
+ "\n",
2365
+ "\n",
2366
+ " ************************* DataFrame ********************* \n",
2367
+ " masters gpa stats programming admitted sqlalchemy_gpa\n",
2368
+ "id \n",
2369
+ "15 yes 4.00 Advanced Advanced 1 16.25\n",
2370
+ "24 no 1.87 Advanced Novice 1 23.91\n",
2371
+ "25 no 3.96 Advanced Advanced 1 25.89\n",
2372
+ "16 no 3.70 Advanced Advanced 1 29.74\n",
2373
+ "34 yes 3.85 Advanced Beginner 0 36.65\n",
2374
+ "14 yes 3.45 Advanced Advanced 0 40.47\n",
2375
+ "19 yes 1.98 Advanced Advanced 0 33.19\n",
2376
+ "20 yes 3.90 Advanced Advanced 1 20.21\n",
2377
+ "38 yes 2.65 Advanced Beginner 1 14.38\n",
2378
+ "17 no 3.83 Advanced Advanced 1 10.48\n",
2379
+ "\n",
2380
+ "\n",
2381
+ "\n",
2382
+ " ************************* DataFrame.dtypes ********************* \n",
2383
+ "id int\n",
2384
+ "masters str\n",
2385
+ "gpa float\n",
2386
+ "stats str\n",
2387
+ "programming str\n",
2388
+ "admitted int\n",
2389
+ "sqlalchemy_gpa float\n",
2390
+ "\n",
2391
+ "\n",
2392
+ "\n"
2393
+ ]
2394
+ }
2395
+ ],
2396
+ "source": [
2397
+ "### ROWS BETWEEN UNBOUNDED PRECEDING AND value FOLLOWING ---> rows=(None, positive value)\n",
2398
+ "sum_gpa_rbupavf = func.sum(admissions_train.gpa.expression).over(order_by=admissions_train.stats.expression, rows=(None, 2))\n",
2399
+ "df = admissions_train.assign(sqlalchemy_gpa=sum_gpa_rbupavf)\n",
2400
+ "print_variables(df)"
2401
+ ]
2402
+ },
2403
+ {
2404
+ "cell_type": "markdown",
2405
+ "metadata": {},
2406
+ "source": [
2407
+ "### Windowed aggregation using 'Group' Computation"
2408
+ ]
2409
+ },
2410
+ {
2411
+ "cell_type": "code",
2412
+ "execution_count": 72,
2413
+ "metadata": {},
2414
+ "outputs": [],
2415
+ "source": [
2416
+ "## Using \"GROUP\" computation.\n",
2417
+ "## SQL Syntax for such cases is:\n",
2418
+ "### ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ---> rows=(None, None)\n",
2419
+ "### OR using no \"rows\" argument at all."
2420
+ ]
2421
+ },
2422
+ {
2423
+ "cell_type": "code",
2424
+ "execution_count": 73,
2425
+ "metadata": {},
2426
+ "outputs": [
2427
+ {
2428
+ "name": "stdout",
2429
+ "output_type": "stream",
2430
+ "text": [
2431
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, sum(gpa) OVER (ORDER BY stats ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS sqlalchemy_gpa from \"admissions_train\"\n",
2432
+ "\n",
2433
+ "\n",
2434
+ " ************************* DataFrame ********************* \n",
2435
+ " masters gpa stats programming admitted sqlalchemy_gpa\n",
2436
+ "id \n",
2437
+ "13 no 4.00 Advanced Novice 1 141.67\n",
2438
+ "23 yes 3.59 Advanced Novice 1 141.67\n",
2439
+ "38 yes 2.65 Advanced Beginner 1 141.67\n",
2440
+ "36 no 3.00 Advanced Novice 0 141.67\n",
2441
+ "30 yes 3.79 Advanced Novice 0 141.67\n",
2442
+ "18 yes 3.81 Advanced Advanced 1 141.67\n",
2443
+ "17 no 3.83 Advanced Advanced 1 141.67\n",
2444
+ "14 yes 3.45 Advanced Advanced 0 141.67\n",
2445
+ "19 yes 1.98 Advanced Advanced 0 141.67\n",
2446
+ "15 yes 4.00 Advanced Advanced 1 141.67\n",
2447
+ "\n",
2448
+ "\n",
2449
+ "\n",
2450
+ " ************************* DataFrame.dtypes ********************* \n",
2451
+ "id int\n",
2452
+ "masters str\n",
2453
+ "gpa float\n",
2454
+ "stats str\n",
2455
+ "programming str\n",
2456
+ "admitted int\n",
2457
+ "sqlalchemy_gpa float\n",
2458
+ "\n",
2459
+ "\n",
2460
+ "\n"
2461
+ ]
2462
+ }
2463
+ ],
2464
+ "source": [
2465
+ "### ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ---> rows=(None, None)\n",
2466
+ "sum_gpa_rbupauf = func.sum(admissions_train.gpa.expression).over(order_by=admissions_train.stats.expression, rows=(None, None))\n",
2467
+ "df = admissions_train.assign(sqlalchemy_gpa=sum_gpa_rbupauf)\n",
2468
+ "print_variables(df)"
2469
+ ]
2470
+ },
2471
+ {
2472
+ "cell_type": "markdown",
2473
+ "metadata": {},
2474
+ "source": [
2475
+ "### Windowed aggregation using 'Moving' Computation"
2476
+ ]
2477
+ },
2478
+ {
2479
+ "cell_type": "code",
2480
+ "execution_count": 74,
2481
+ "metadata": {},
2482
+ "outputs": [],
2483
+ "source": [
2484
+ "## Using \"Moving\" computation.\n",
2485
+ "## SQL Syntax for such cases is:\n",
2486
+ "### ROWS value PRECEDING ------------> No support available for this in SQLAlchemy.\n",
2487
+ "### ROWS CURRENT ROW ------------> No support available for this in SQLAlchemy.\n",
2488
+ "\n",
2489
+ "#\n",
2490
+ "# Values accepted by rows argument:\n",
2491
+ "# 1. Negative Value --> Indicates a preceding value\n",
2492
+ "# 2. Positive Value --> Indiacates a following value\n",
2493
+ "# 3. 0 --> For Current row\n",
2494
+ "# 4. None --> Unbounded value.\n",
2495
+ "\n",
2496
+ "\n",
2497
+ "### ROWS BETWEEN value PRECEDING AND value PRECEDING ---> rows=(Negative Value, Negative Value)\n",
2498
+ "### ROWS BETWEEN value PRECEDING AND CURRENT ROW ---> rows=(Negative Value, 0)\n",
2499
+ "### ROWS BETWEEN value PRECEDING AND value FOLLOWING ---> rows=(Negative Value, Positive Value)\n",
2500
+ "### ROWS BETWEEN CURRENT ROW AND CURRENT ROW ---> rows=(0, 0)\n",
2501
+ "### ROWS BETWEEN CURRENT ROW AND value FOLLOWING ---> rows=(0, Positive Value)\n",
2502
+ "### ROWS BETWEEN value FOLLOWING AND value FOLLOWING ---> rows=(Positive Value, Positive Value)"
2503
+ ]
2504
+ },
2505
+ {
2506
+ "cell_type": "code",
2507
+ "execution_count": 75,
2508
+ "metadata": {},
2509
+ "outputs": [
2510
+ {
2511
+ "name": "stdout",
2512
+ "output_type": "stream",
2513
+ "text": [
2514
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, sum(gpa) OVER (ORDER BY stats ROWS BETWEEN 3 PRECEDING AND 1 PRECEDING) AS sqlalchemy_gpa from \"admissions_train\"\n",
2515
+ "\n",
2516
+ "\n",
2517
+ " ************************* DataFrame ********************* \n",
2518
+ " masters gpa stats programming admitted sqlalchemy_gpa\n",
2519
+ "id \n",
2520
+ "15 yes 4.00 Advanced Advanced 1 6.48\n",
2521
+ "24 no 1.87 Advanced Novice 1 10.55\n",
2522
+ "25 no 3.96 Advanced Advanced 1 9.77\n",
2523
+ "16 no 3.70 Advanced Advanced 1 9.73\n",
2524
+ "34 yes 3.85 Advanced Beginner 0 9.64\n",
2525
+ "14 yes 3.45 Advanced Advanced 0 9.53\n",
2526
+ "19 yes 1.98 Advanced Advanced 0 9.53\n",
2527
+ "20 yes 3.90 Advanced Advanced 1 10.48\n",
2528
+ "38 yes 2.65 Advanced Beginner 1 3.83\n",
2529
+ "17 no 3.83 Advanced Advanced 1 NaN\n",
2530
+ "\n",
2531
+ "\n",
2532
+ "\n",
2533
+ " ************************* DataFrame.dtypes ********************* \n",
2534
+ "id int\n",
2535
+ "masters str\n",
2536
+ "gpa float\n",
2537
+ "stats str\n",
2538
+ "programming str\n",
2539
+ "admitted int\n",
2540
+ "sqlalchemy_gpa float\n",
2541
+ "\n",
2542
+ "\n",
2543
+ "\n"
2544
+ ]
2545
+ }
2546
+ ],
2547
+ "source": [
2548
+ "### ROWS BETWEEN value PRECEDING AND value PRECEDING ---> rows=(-3, -1)\n",
2549
+ "sum_gpa_rbupauf = func.sum(admissions_train.gpa.expression).over(order_by=admissions_train.stats.expression, rows=(-3, -1))\n",
2550
+ "df = admissions_train.assign(sqlalchemy_gpa=sum_gpa_rbupauf)\n",
2551
+ "print_variables(df)"
2552
+ ]
2553
+ },
2554
+ {
2555
+ "cell_type": "code",
2556
+ "execution_count": 76,
2557
+ "metadata": {},
2558
+ "outputs": [
2559
+ {
2560
+ "name": "stdout",
2561
+ "output_type": "stream",
2562
+ "text": [
2563
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, sum(gpa) OVER (ORDER BY stats ROWS BETWEEN CURRENT ROW AND CURRENT ROW) AS sqlalchemy_gpa from \"admissions_train\"\n",
2564
+ "\n",
2565
+ "\n",
2566
+ " ************************* DataFrame ********************* \n",
2567
+ " masters gpa stats programming admitted sqlalchemy_gpa\n",
2568
+ "id \n",
2569
+ "13 no 4.00 Advanced Novice 1 4.00\n",
2570
+ "23 yes 3.59 Advanced Novice 1 3.59\n",
2571
+ "38 yes 2.65 Advanced Beginner 1 2.65\n",
2572
+ "36 no 3.00 Advanced Novice 0 3.00\n",
2573
+ "30 yes 3.79 Advanced Novice 0 3.79\n",
2574
+ "18 yes 3.81 Advanced Advanced 1 3.81\n",
2575
+ "17 no 3.83 Advanced Advanced 1 3.83\n",
2576
+ "14 yes 3.45 Advanced Advanced 0 3.45\n",
2577
+ "19 yes 1.98 Advanced Advanced 0 1.98\n",
2578
+ "15 yes 4.00 Advanced Advanced 1 4.00\n",
2579
+ "\n",
2580
+ "\n",
2581
+ "\n",
2582
+ " ************************* DataFrame.dtypes ********************* \n",
2583
+ "id int\n",
2584
+ "masters str\n",
2585
+ "gpa float\n",
2586
+ "stats str\n",
2587
+ "programming str\n",
2588
+ "admitted int\n",
2589
+ "sqlalchemy_gpa float\n",
2590
+ "\n",
2591
+ "\n",
2592
+ "\n"
2593
+ ]
2594
+ }
2595
+ ],
2596
+ "source": [
2597
+ "### ROWS BETWEEN CURRENT ROW AND CURRENT ROW ---> rows=(0, 0)\n",
2598
+ "sum_gpa_rbcr = func.sum(admissions_train.gpa.expression).over(order_by=admissions_train.stats.expression, rows=(0, 0))\n",
2599
+ "df = admissions_train.assign(sqlalchemy_gpa=sum_gpa_rbcr)\n",
2600
+ "print_variables(df)"
2601
+ ]
2602
+ },
2603
+ {
2604
+ "cell_type": "code",
2605
+ "execution_count": 77,
2606
+ "metadata": {},
2607
+ "outputs": [
2608
+ {
2609
+ "name": "stdout",
2610
+ "output_type": "stream",
2611
+ "text": [
2612
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, sum(gpa) OVER (ORDER BY stats ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) AS sqlalchemy_gpa from \"admissions_train\"\n",
2613
+ "\n",
2614
+ "\n",
2615
+ " ************************* DataFrame ********************* \n",
2616
+ " masters gpa stats programming admitted sqlalchemy_gpa\n",
2617
+ "id \n",
2618
+ "15 yes 4.00 Advanced Advanced 1 13.73\n",
2619
+ "24 no 1.87 Advanced Novice 1 11.51\n",
2620
+ "25 no 3.96 Advanced Advanced 1 13.49\n",
2621
+ "16 no 3.70 Advanced Advanced 1 12.98\n",
2622
+ "34 yes 3.85 Advanced Beginner 0 14.58\n",
2623
+ "14 yes 3.45 Advanced Advanced 0 14.23\n",
2624
+ "19 yes 1.98 Advanced Advanced 0 12.74\n",
2625
+ "20 yes 3.90 Advanced Advanced 1 13.43\n",
2626
+ "38 yes 2.65 Advanced Beginner 1 12.42\n",
2627
+ "17 no 3.83 Advanced Advanced 1 14.38\n",
2628
+ "\n",
2629
+ "\n",
2630
+ "\n",
2631
+ " ************************* DataFrame.dtypes ********************* \n",
2632
+ "id int\n",
2633
+ "masters str\n",
2634
+ "gpa float\n",
2635
+ "stats str\n",
2636
+ "programming str\n",
2637
+ "admitted int\n",
2638
+ "sqlalchemy_gpa float\n",
2639
+ "\n",
2640
+ "\n",
2641
+ "\n"
2642
+ ]
2643
+ }
2644
+ ],
2645
+ "source": [
2646
+ "### ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING ---> rows=(0, 3)\n",
2647
+ "sum_gpa_rbcr3f = func.sum(admissions_train.gpa.expression).over(order_by=admissions_train.stats.expression, rows=(0, 3))\n",
2648
+ "df = admissions_train.assign(sqlalchemy_gpa=sum_gpa_rbcr3f)\n",
2649
+ "print_variables(df)"
2650
+ ]
2651
+ },
2652
+ {
2653
+ "cell_type": "markdown",
2654
+ "metadata": {},
2655
+ "source": [
2656
+ "### Windowed aggregation using 'Remaining' Computation"
2657
+ ]
2658
+ },
2659
+ {
2660
+ "cell_type": "code",
2661
+ "execution_count": 78,
2662
+ "metadata": {},
2663
+ "outputs": [],
2664
+ "source": [
2665
+ "## Using \"REMAINING\" computation.\n",
2666
+ "## SQL Syntax for such cases is:\n",
2667
+ "\n",
2668
+ "# Values accepted by rows argument:\n",
2669
+ "# 1. Negative Value --> Indicates a preceding value\n",
2670
+ "# 2. Positive Value --> Indiacates a following value\n",
2671
+ "# 3. 0 --> For Current row\n",
2672
+ "# 4. None --> Unbounded value.\n",
2673
+ "\n",
2674
+ "### ROWS BETWEEN value PRECEDING AND UNBOUNDED FOLLOWING ---> rows=(Negative Value, None)\n",
2675
+ "### ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING ---> rows=(0, None)\n",
2676
+ "### ROWS BETWEEN value FOLLOWING AND UNBOUNDED FOLLOWING ---> rows=(Positive Value, None)"
2677
+ ]
2678
+ },
2679
+ {
2680
+ "cell_type": "code",
2681
+ "execution_count": 79,
2682
+ "metadata": {},
2683
+ "outputs": [
2684
+ {
2685
+ "name": "stdout",
2686
+ "output_type": "stream",
2687
+ "text": [
2688
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, sum(gpa) OVER (ORDER BY stats ROWS BETWEEN 5 PRECEDING AND UNBOUNDED FOLLOWING) AS sqlalchemy_gpa from \"admissions_train\"\n",
2689
+ "\n",
2690
+ "\n",
2691
+ " ************************* DataFrame ********************* \n",
2692
+ " masters gpa stats programming admitted sqlalchemy_gpa\n",
2693
+ "id \n",
2694
+ "3 no 3.70 Novice Beginner 1 27.68\n",
2695
+ "21 no 3.87 Novice Beginner 1 35.20\n",
2696
+ "35 no 3.68 Novice Beginner 1 39.15\n",
2697
+ "33 no 3.55 Novice Novice 1 43.10\n",
2698
+ "37 no 3.52 Novice Novice 1 50.46\n",
2699
+ "29 yes 4.00 Novice Beginner 0 53.96\n",
2700
+ "22 yes 3.46 Novice Beginner 0 46.70\n",
2701
+ "12 no 3.65 Novice Novice 1 31.20\n",
2702
+ "7 yes 2.33 Novice Novice 1 24.22\n",
2703
+ "5 no 3.44 Novice Novice 0 20.67\n",
2704
+ "\n",
2705
+ "\n",
2706
+ "\n",
2707
+ " ************************* DataFrame.dtypes ********************* \n",
2708
+ "id int\n",
2709
+ "masters str\n",
2710
+ "gpa float\n",
2711
+ "stats str\n",
2712
+ "programming str\n",
2713
+ "admitted int\n",
2714
+ "sqlalchemy_gpa float\n",
2715
+ "\n",
2716
+ "\n",
2717
+ "\n"
2718
+ ]
2719
+ }
2720
+ ],
2721
+ "source": [
2722
+ "### ROWS BETWEEN 5 PRECEDING AND UNBOUNDED FOLLOWING ---> rows=(-5, None)\n",
2723
+ "sum_gpa_rb5puf = func.sum(admissions_train.gpa.expression).over(order_by=admissions_train.stats.expression, rows=(-5, None))\n",
2724
+ "df = admissions_train.assign(sqlalchemy_gpa=sum_gpa_rb5puf)\n",
2725
+ "print_variables(df)"
2726
+ ]
2727
+ },
2728
+ {
2729
+ "cell_type": "code",
2730
+ "execution_count": 80,
2731
+ "metadata": {},
2732
+ "outputs": [
2733
+ {
2734
+ "name": "stdout",
2735
+ "output_type": "stream",
2736
+ "text": [
2737
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, sum(gpa) OVER (ORDER BY stats ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS sqlalchemy_gpa from \"admissions_train\"\n",
2738
+ "\n",
2739
+ "\n",
2740
+ " ************************* DataFrame ********************* \n",
2741
+ " masters gpa stats programming admitted sqlalchemy_gpa\n",
2742
+ "id \n",
2743
+ "29 yes 4.00 Novice Beginner 0 10.99\n",
2744
+ "35 no 3.68 Novice Beginner 1 18.37\n",
2745
+ "37 no 3.52 Novice Novice 1 21.89\n",
2746
+ "40 yes 3.95 Novice Beginner 0 25.84\n",
2747
+ "12 no 3.65 Novice Novice 1 31.82\n",
2748
+ "22 yes 3.46 Novice Beginner 0 35.28\n",
2749
+ "7 yes 2.33 Novice Novice 1 28.17\n",
2750
+ "3 no 3.70 Novice Beginner 1 14.69\n",
2751
+ "33 no 3.55 Novice Novice 1 6.99\n",
2752
+ "5 no 3.44 Novice Novice 0 3.44\n",
2753
+ "\n",
2754
+ "\n",
2755
+ "\n",
2756
+ " ************************* DataFrame.dtypes ********************* \n",
2757
+ "id int\n",
2758
+ "masters str\n",
2759
+ "gpa float\n",
2760
+ "stats str\n",
2761
+ "programming str\n",
2762
+ "admitted int\n",
2763
+ "sqlalchemy_gpa float\n",
2764
+ "\n",
2765
+ "\n",
2766
+ "\n"
2767
+ ]
2768
+ }
2769
+ ],
2770
+ "source": [
2771
+ "### ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING ---> rows=(0, None)\n",
2772
+ "sum_gpa_rbcruf = func.sum(admissions_train.gpa.expression).over(order_by=admissions_train.stats.expression, rows=(0, None))\n",
2773
+ "df = admissions_train.assign(sqlalchemy_gpa=sum_gpa_rbcruf)\n",
2774
+ "print_variables(df)"
2775
+ ]
2776
+ },
2777
+ {
2778
+ "cell_type": "code",
2779
+ "execution_count": 81,
2780
+ "metadata": {},
2781
+ "outputs": [
2782
+ {
2783
+ "name": "stdout",
2784
+ "output_type": "stream",
2785
+ "text": [
2786
+ "Equivalent SQL: select id AS id, masters AS masters, gpa AS gpa, stats AS stats, programming AS programming, admitted AS admitted, AVG(admitted) OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS sqlalchemy_gpa from \"admissions_train\"\n",
2787
+ "\n",
2788
+ "\n",
2789
+ " ************************* DataFrame ********************* \n",
2790
+ " masters gpa stats programming admitted sqlalchemy_gpa\n",
2791
+ "id \n",
2792
+ "38 yes 2.65 Advanced Beginner 1 0.333333\n",
2793
+ "36 no 3.00 Advanced Novice 0 0.400000\n",
2794
+ "35 no 3.68 Novice Beginner 1 0.500000\n",
2795
+ "34 yes 3.85 Advanced Beginner 0 0.428571\n",
2796
+ "32 yes 3.46 Advanced Beginner 0 0.444444\n",
2797
+ "31 yes 3.50 Advanced Beginner 1 0.500000\n",
2798
+ "33 no 3.55 Novice Novice 1 0.500000\n",
2799
+ "37 no 3.52 Novice Novice 1 0.500000\n",
2800
+ "39 yes 3.75 Advanced Beginner 0 0.000000\n",
2801
+ "40 yes 3.95 Novice Beginner 0 0.000000\n",
2802
+ "\n",
2803
+ "\n",
2804
+ "\n",
2805
+ " ************************* DataFrame.dtypes ********************* \n",
2806
+ "id int\n",
2807
+ "masters str\n",
2808
+ "gpa float\n",
2809
+ "stats str\n",
2810
+ "programming str\n",
2811
+ "admitted int\n",
2812
+ "sqlalchemy_gpa float\n",
2813
+ "\n",
2814
+ "\n",
2815
+ "\n"
2816
+ ]
2817
+ }
2818
+ ],
2819
+ "source": [
2820
+ "### ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING ---> rows=(0, None)\n",
2821
+ "sum_gpa_rbcruf = func.AVG(admissions_train.admitted.expression).over(order_by=admissions_train.id.expression, rows=(0, None))\n",
2822
+ "df = admissions_train.assign(sqlalchemy_gpa=sum_gpa_rbcruf)\n",
2823
+ "print_variables(df)"
2824
+ ]
2825
+ },
2826
+ {
2827
+ "cell_type": "code",
2828
+ "execution_count": 82,
2829
+ "metadata": {},
2830
+ "outputs": [],
2831
+ "source": [
2832
+ "### Following Teradatda SQL syntaxes are not supported by SQLAlchemy.\n",
2833
+ "# 1. RESET WHEN clause.\n",
2834
+ "# 2. ROWS UNBOUNDED PRECEDING, ROWS value PRECEDING, ROWS CURRENT ROW\n",
2835
+ "\n",
2836
+ "### SQLAlchemy offers range over in Window aggregate, Teradata does not support the same. \n",
2837
+ "# For example, \"RANGE BETWEEN 5 PRECEDING AND 10 FOLLOWING\" is not supported"
2838
+ ]
2839
+ },
2840
+ {
2841
+ "cell_type": "code",
2842
+ "execution_count": 83,
2843
+ "metadata": {},
2844
+ "outputs": [
2845
+ {
2846
+ "data": {
2847
+ "text/plain": [
2848
+ "True"
2849
+ ]
2850
+ },
2851
+ "execution_count": 83,
2852
+ "metadata": {},
2853
+ "output_type": "execute_result"
2854
+ }
2855
+ ],
2856
+ "source": [
2857
+ "# One must run remove_context() to close the connection and garbage collect internally generated objects.\n",
2858
+ "remove_context()"
2859
+ ]
2860
+ },
2861
+ {
2862
+ "cell_type": "code",
2863
+ "execution_count": null,
2864
+ "metadata": {},
2865
+ "outputs": [],
2866
+ "source": []
2867
+ },
2868
+ {
2869
+ "cell_type": "code",
2870
+ "execution_count": null,
2871
+ "metadata": {},
2872
+ "outputs": [],
2873
+ "source": []
2874
+ },
2875
+ {
2876
+ "cell_type": "code",
2877
+ "execution_count": null,
2878
+ "metadata": {},
2879
+ "outputs": [],
2880
+ "source": []
2881
+ },
2882
+ {
2883
+ "cell_type": "code",
2884
+ "execution_count": null,
2885
+ "metadata": {},
2886
+ "outputs": [],
2887
+ "source": []
2888
+ }
2889
+ ],
2890
+ "metadata": {
2891
+ "kernelspec": {
2892
+ "display_name": "Python 3",
2893
+ "language": "python",
2894
+ "name": "python3"
2895
+ },
2896
+ "language_info": {
2897
+ "codemirror_mode": {
2898
+ "name": "ipython",
2899
+ "version": 3
2900
+ },
2901
+ "file_extension": ".py",
2902
+ "mimetype": "text/x-python",
2903
+ "name": "python",
2904
+ "nbconvert_exporter": "python",
2905
+ "pygments_lexer": "ipython3",
2906
+ "version": "3.7.1"
2907
+ }
2908
+ },
2909
+ "nbformat": 4,
2910
+ "nbformat_minor": 2
2911
+ }