unstructured-ingest 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/conftest.py +13 -0
- test/integration/connectors/databricks_tests/test_volumes_native.py +8 -4
- test/integration/connectors/sql/test_postgres.py +6 -10
- test/integration/connectors/sql/test_singlestore.py +156 -0
- test/integration/connectors/sql/test_snowflake.py +205 -0
- test/integration/connectors/sql/test_sqlite.py +6 -10
- test/integration/connectors/test_delta_table.py +138 -0
- test/integration/connectors/test_s3.py +1 -1
- test/integration/connectors/utils/docker.py +78 -0
- test/integration/connectors/utils/docker_compose.py +23 -8
- test/integration/connectors/utils/validation.py +93 -2
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/cli/utils/click.py +32 -1
- unstructured_ingest/v2/cli/utils/model_conversion.py +10 -3
- unstructured_ingest/v2/interfaces/file_data.py +1 -0
- unstructured_ingest/v2/interfaces/indexer.py +4 -1
- unstructured_ingest/v2/pipeline/pipeline.py +10 -2
- unstructured_ingest/v2/pipeline/steps/index.py +18 -1
- unstructured_ingest/v2/processes/connectors/__init__.py +13 -6
- unstructured_ingest/v2/processes/connectors/astradb.py +278 -55
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +3 -1
- unstructured_ingest/v2/processes/connectors/delta_table.py +185 -0
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +1 -0
- unstructured_ingest/v2/processes/connectors/slack.py +248 -0
- unstructured_ingest/v2/processes/connectors/sql/__init__.py +15 -2
- unstructured_ingest/v2/processes/connectors/sql/postgres.py +33 -56
- unstructured_ingest/v2/processes/connectors/sql/singlestore.py +168 -0
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py +162 -0
- unstructured_ingest/v2/processes/connectors/sql/sql.py +51 -12
- unstructured_ingest/v2/processes/connectors/sql/sqlite.py +31 -32
- unstructured_ingest/v2/unstructured_api.py +1 -1
- {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/METADATA +19 -17
- {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/RECORD +37 -31
- unstructured_ingest/v2/processes/connectors/databricks_volumes.py +0 -250
- unstructured_ingest/v2/processes/connectors/singlestore.py +0 -156
- {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/top_level.txt +0 -0
|
@@ -4,17 +4,21 @@ test/integration/utils.py,sha256=CWqzEGw6TA_ZoP9hRUkW64TWYssooBbufcTRmbJvod8,401
|
|
|
4
4
|
test/integration/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
test/integration/chunkers/test_chunkers.py,sha256=pqn1Rqh36jZTJL4qpU0iuOMFAEQ-LrKAPOgWtQMAt_I,1482
|
|
6
6
|
test/integration/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
test/integration/connectors/conftest.py,sha256=
|
|
8
|
-
test/integration/connectors/
|
|
7
|
+
test/integration/connectors/conftest.py,sha256=6dVNMBrL6WIO4KXA-0nf2tNrPYk_tsor8uomi6fbi3Q,727
|
|
8
|
+
test/integration/connectors/test_delta_table.py,sha256=4_KPyQJpd6DmyIjjtXWPMw6NNf7xULRkxmqfbvmZ80g,5018
|
|
9
|
+
test/integration/connectors/test_s3.py,sha256=1ErPRpNmbg-88ig80SfIyxujF7xnAWtI42WSue4sgKU,5850
|
|
9
10
|
test/integration/connectors/databricks_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
test/integration/connectors/databricks_tests/test_volumes_native.py,sha256=
|
|
11
|
+
test/integration/connectors/databricks_tests/test_volumes_native.py,sha256=k4lALbwNtlyuI3wd3OHoBULI21E3Ck2Fo8EJXaVfwgw,5812
|
|
11
12
|
test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
test/integration/connectors/sql/test_postgres.py,sha256=
|
|
13
|
-
test/integration/connectors/sql/
|
|
13
|
+
test/integration/connectors/sql/test_postgres.py,sha256=gDBuNyvWmpVPmDrSSYC99z3t17B_a196P1MwIAOp5Dk,6584
|
|
14
|
+
test/integration/connectors/sql/test_singlestore.py,sha256=wGI3-lc6qh0qN4-WD9VtiXBB9MlekeqK402_9EXQyX0,5876
|
|
15
|
+
test/integration/connectors/sql/test_snowflake.py,sha256=XXU2-2z_k8jHWP684v2IuaGOlV3cmPpg3RxkwMp08v8,6998
|
|
16
|
+
test/integration/connectors/sql/test_sqlite.py,sha256=51QrFufAq-XxNjHAkmPWxdJUkGdIRRIGKeRT09A5pkA,5704
|
|
14
17
|
test/integration/connectors/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
18
|
test/integration/connectors/utils/constants.py,sha256=0zSPnsZVqJuNhXduXvdXFQLZTRIQa5Fo_1qjBYVCfb8,209
|
|
16
|
-
test/integration/connectors/utils/
|
|
17
|
-
test/integration/connectors/utils/
|
|
19
|
+
test/integration/connectors/utils/docker.py,sha256=-wknXRVlzr3BVPdEhCyJgsdNjO9aSb2xjb-mQ306j7Q,2256
|
|
20
|
+
test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQQx_cBfz13ZTs1HpbaYipNU,2223
|
|
21
|
+
test/integration/connectors/utils/validation.py,sha256=gnflehoYbFkSBJdXQV-7HwcrlL_Cuqni2ri1YmArjT0,12019
|
|
18
22
|
test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
23
|
test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
|
|
20
24
|
test/integration/embedders/test_bedrock.py,sha256=0oBRNS_DtFDGQ22Z1T3t6VOJ31PrItgvnJpqcLe9Fg4,1903
|
|
@@ -43,7 +47,7 @@ test/unit/embed/test_openai.py,sha256=0O1yshDcE0BMKv1yJqrNuiNLSdPhLpKqJ-D_wmnids
|
|
|
43
47
|
test/unit/embed/test_vertexai.py,sha256=Pl7COc9E3tf_yGidkTEmTizNGyZF1F5zuL2TgPTMnfI,1048
|
|
44
48
|
test/unit/embed/test_voyageai.py,sha256=DviCOJFhe5H4e26-kNyX3JNe8h3qB5Yl0KOe8rQEMrc,981
|
|
45
49
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
46
|
-
unstructured_ingest/__version__.py,sha256=
|
|
50
|
+
unstructured_ingest/__version__.py,sha256=Hmm5OuicK0ynl_R5DSnpRYWJpEXwe7guJdsAMHH7K60,42
|
|
47
51
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
48
52
|
unstructured_ingest/interfaces.py,sha256=m03BgenxSA34HbW157L7V9TGxK_dTG7N2AnAhF31W-U,31364
|
|
49
53
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -310,7 +314,7 @@ unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LG
|
|
|
310
314
|
unstructured_ingest/v2/logger.py,sha256=wcln4s5Nyp2fjjJux9iM3d6t9aQFNJ2H1IAZXmIknjI,4323
|
|
311
315
|
unstructured_ingest/v2/main.py,sha256=WFdLEqEXRy6E9_G-dF20MK2AtgX51Aan1sp_N67U2B8,172
|
|
312
316
|
unstructured_ingest/v2/otel.py,sha256=2fGj1c7cVcC3J8NwL6MNYhyPEAXiB33DsilvRDkrdLo,4130
|
|
313
|
-
unstructured_ingest/v2/unstructured_api.py,sha256=
|
|
317
|
+
unstructured_ingest/v2/unstructured_api.py,sha256=HqOaQ80YTdAnFj_2Ce108g7Pp3-F9Qg329Uw2OXtRmA,3375
|
|
314
318
|
unstructured_ingest/v2/utils.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
|
|
315
319
|
unstructured_ingest/v2/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
316
320
|
unstructured_ingest/v2/cli/cli.py,sha256=qHXIs-PcvMgDZhP1AR9iDMxh8FXBMJCEDksPBfiMULE,648
|
|
@@ -321,13 +325,13 @@ unstructured_ingest/v2/cli/base/dest.py,sha256=zDjqek7anr0JQ2ptEl8KIAsUXuCuHRnBQ
|
|
|
321
325
|
unstructured_ingest/v2/cli/base/importer.py,sha256=nRt0QQ3qpi264-n_mR0l55C2ddM8nowTNzT1jsWaam8,1128
|
|
322
326
|
unstructured_ingest/v2/cli/base/src.py,sha256=cpQ43qQju4e5s_YSaPxUtA70BaisRkTBdjtlPhqn5Mg,2872
|
|
323
327
|
unstructured_ingest/v2/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
324
|
-
unstructured_ingest/v2/cli/utils/click.py,sha256=
|
|
325
|
-
unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=
|
|
328
|
+
unstructured_ingest/v2/cli/utils/click.py,sha256=1_eJgrwS2DFBl1jZPLsj1vgVgR7agFBIEBe4A_n7mH4,7827
|
|
329
|
+
unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=7eEIkk1KU51-ZNiIfI1KRxlwITNW1xl1YxMAG8BcTk0,7604
|
|
326
330
|
unstructured_ingest/v2/interfaces/__init__.py,sha256=Rfa8crx6De7WNOK-EjsWWwFVpsUfCc6gY8B8tQ3ae9I,899
|
|
327
331
|
unstructured_ingest/v2/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
|
|
328
332
|
unstructured_ingest/v2/interfaces/downloader.py,sha256=Lj3nTY1hPA71GfNeedFVCdHdZsHLle8qrx5RtXAy9GY,2940
|
|
329
|
-
unstructured_ingest/v2/interfaces/file_data.py,sha256=
|
|
330
|
-
unstructured_ingest/v2/interfaces/indexer.py,sha256=
|
|
333
|
+
unstructured_ingest/v2/interfaces/file_data.py,sha256=D71bXImJ7Pyjtl3I3pa2O2B2iBqIaY-mC-hdoEF3RmI,1983
|
|
334
|
+
unstructured_ingest/v2/interfaces/indexer.py,sha256=gsa1MLhFa82BzD2h4Yb7ons0VxRwKINZOrzvHAahwVU,846
|
|
331
335
|
unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
|
|
332
336
|
unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
|
|
333
337
|
unstructured_ingest/v2/interfaces/upload_stager.py,sha256=ZFkDxcwKn-6EPrTbdBEgOkz1kGAq4gUtze98KP48KG4,1146
|
|
@@ -335,13 +339,13 @@ unstructured_ingest/v2/interfaces/uploader.py,sha256=JmZDl1blJa5rS61YHCae3Hfet84
|
|
|
335
339
|
unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
336
340
|
unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
|
|
337
341
|
unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
|
|
338
|
-
unstructured_ingest/v2/pipeline/pipeline.py,sha256=
|
|
342
|
+
unstructured_ingest/v2/pipeline/pipeline.py,sha256=7Yg8_xwlSX6lA-oPGlTcn6KXZ9kc51zsoJxME5TiUlw,15956
|
|
339
343
|
unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
340
344
|
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=rYVcHSXeQSzWszg6VmtYlNc66Gsx-22Ti0BxPyQaJak,3135
|
|
341
345
|
unstructured_ingest/v2/pipeline/steps/download.py,sha256=lzvOl5SoUK6OCCVVeG4CzdPIGj6eKKCGdciNo_0RMNk,8173
|
|
342
346
|
unstructured_ingest/v2/pipeline/steps/embed.py,sha256=-YFvmchdsonWiSXxaD7PJfuUUtMLklaQM_8kZCQxCdM,3113
|
|
343
347
|
unstructured_ingest/v2/pipeline/steps/filter.py,sha256=q7bNieaFMprqoF8Mx7w-ZN6jyA5peiGeTGyPtvcV-uw,1199
|
|
344
|
-
unstructured_ingest/v2/pipeline/steps/index.py,sha256=
|
|
348
|
+
unstructured_ingest/v2/pipeline/steps/index.py,sha256=YUUf1sYZRZSrRgapca3Sfzk1sNPJ05yyTQ5wKlyDjEo,3543
|
|
345
349
|
unstructured_ingest/v2/pipeline/steps/partition.py,sha256=9MQViptxK3ALKco8uE4gK9PpEoGq5JjzyU14C_18blU,3193
|
|
346
350
|
unstructured_ingest/v2/pipeline/steps/stage.py,sha256=cphKgHScLz2rNLZRI5Olsb6dAH-MKGu3p6MYS1BEzkA,2246
|
|
347
351
|
unstructured_ingest/v2/pipeline/steps/uncompress.py,sha256=CFSy4tGp6BAvF0oIwWFN8v4zFzh5pRDeESjEn5iP9hE,1756
|
|
@@ -353,13 +357,13 @@ unstructured_ingest/v2/processes/embedder.py,sha256=PQn0IO8xbGRQHpcT2VVl-J8gTJ5H
|
|
|
353
357
|
unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
|
|
354
358
|
unstructured_ingest/v2/processes/partitioner.py,sha256=2Lhztd730soVC2TOqrn_ba7CGZna8AHHpqJY2ZUYVxE,7776
|
|
355
359
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
356
|
-
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=
|
|
360
|
+
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=zMO50wOGWOJrCTdh19Najj-i5tfMUyf977TKz4yN04A,5249
|
|
357
361
|
unstructured_ingest/v2/processes/connectors/airtable.py,sha256=Yi7PEv_FejZ9_y3BPY3gu5YGVfeLh-9YX-qLyQHjJsY,8921
|
|
358
|
-
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=
|
|
362
|
+
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=k6zaxm05-ESpRV6w1jgrtfE10-I2Z50kafURxxJVzdk,14043
|
|
359
363
|
unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py,sha256=S55v7TXu30rEdgythMBB_2VcuomyMPmcPtLYykbhw_E,8466
|
|
360
364
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=skrxRPHZ8y3JxNa0dt5SVitHiDQ5WVxLvY_kh2-QUrQ,8029
|
|
361
365
|
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=SONLywyEfoAlLc-HPabXeGzoiwKnekMHIbRMXd4CGXs,12146
|
|
362
|
-
unstructured_ingest/v2/processes/connectors/
|
|
366
|
+
unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=ZZfdNTw1W0ISQGWCtM1JuIME26FYzuPBOqRKql0wlLg,7013
|
|
363
367
|
unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=ojxMUHkLa6ZG50aTGn2YWhDHZ1n38uFRn5p8_ghAIvM,16762
|
|
364
368
|
unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=7xOQthcqBd9auJxB0nxZlhh1vdjXpMX_CtQZa6YfZz0,13088
|
|
365
369
|
unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=8bGHbZctJ_Tl1AUSMnI7CCZ7CgEtTRVcRuvlB1HPlqQ,5907
|
|
@@ -372,11 +376,11 @@ unstructured_ingest/v2/processes/connectors/outlook.py,sha256=NK67Pd8Nk5oUIXTK-s
|
|
|
372
376
|
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=k_GH55S_OQ6-wCLC6gkhRrNpXIFECYZ_2Gjz_XRtY6Y,7561
|
|
373
377
|
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
|
|
374
378
|
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=hOaV5gBcHFc6N5Rbu3MgM-5Aol1ht-QkNIN4PqjvfxE,19665
|
|
375
|
-
unstructured_ingest/v2/processes/connectors/
|
|
379
|
+
unstructured_ingest/v2/processes/connectors/slack.py,sha256=b9IanzUApUexiJzuNg7PR3tujOoeG8dhM0L0v4MDuPw,9256
|
|
376
380
|
unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
|
|
377
381
|
unstructured_ingest/v2/processes/connectors/weaviate.py,sha256=Ss0YyD5T6k-00eJ6dr5lSo2H0LcOjVTMmozehyTvnAo,8866
|
|
378
382
|
unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=jO71UTC7bLA_N12CrLWJzh_yZML5gfT7VohxzCpUGWg,1848
|
|
379
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=
|
|
383
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=8FasrRcoqa9zrhmnbfYN_rBBTH6xBXM50TzGsUMEm98,6581
|
|
380
384
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=I1MJwe5LOxoPLjwo00H0XbXO6u_SJHWYgsj4s6ePoyI,2754
|
|
381
385
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=P4rfcE3td7WyuuguRgUnGQytCMDpfeYrrpshBZuVynY,3539
|
|
382
386
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=UUotY_-HpgSEJkvdQfZTlbxY7CRLZ4ctL8TlryeFvxk,2790
|
|
@@ -385,18 +389,20 @@ unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Yp
|
|
|
385
389
|
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=Y01BuVRql0Kvzc_cdaZE9dDGYjJzrwJu-etfUrEGcUU,7061
|
|
386
390
|
unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=Cjk0LUxqOCDbme0GmnD_5_b1hfStjI23cKw6BquKNrg,5488
|
|
387
391
|
unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=NNAxIRdOQxUncfwhu7J7SnQRM6BSStNOyQZi-4E51iY,5816
|
|
388
|
-
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=
|
|
392
|
+
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=5uZ_nGBXNQgwvfjNcor6mwzbYOHeja4-EV3nNCXvxaQ,11512
|
|
389
393
|
unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=-_pYHbsBG9FyRyNIaf_xyFbPiiR7pnWEEg_8mp0rIZ8,7053
|
|
390
394
|
unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=je1BDqFWlyMfPa4oAMMNFQLLQtCY9quuqx3xjTwF8OQ,6251
|
|
391
395
|
unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=dwpyqDq0qceCBWX3zM1hiUlgXB4hzX6ObOr-sh-5CJs,6926
|
|
392
396
|
unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
|
|
393
|
-
unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=
|
|
394
|
-
unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=
|
|
395
|
-
unstructured_ingest/v2/processes/connectors/sql/
|
|
396
|
-
unstructured_ingest/v2/processes/connectors/sql/
|
|
397
|
-
unstructured_ingest
|
|
398
|
-
unstructured_ingest
|
|
399
|
-
unstructured_ingest-0.
|
|
400
|
-
unstructured_ingest-0.
|
|
401
|
-
unstructured_ingest-0.
|
|
402
|
-
unstructured_ingest-0.
|
|
397
|
+
unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=D43wrV2ADvQsToIYwbEWnZ7mhzlsYcZMFCqf6jIC7dQ,1333
|
|
398
|
+
unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=__Wf5lkCQGhbtEH_2DxfNmQyWP-UKC9o_KEawG81jY0,4905
|
|
399
|
+
unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=YrmhAL1RQ1c5-2fnR3UAyj_4KfvjYTQ2cWzpvsdJOnU,5535
|
|
400
|
+
unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=0s0oBfMttPg5JL6jn8SsoCeTSRoXXdVy2bJAZv_hiSk,5576
|
|
401
|
+
unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=rWDkefUnYkzJT0mhIcHxieECdaIWLTvbDcOcZgLA4FQ,11636
|
|
402
|
+
unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=9605K36nQ5-gBxzt1daYKYotON1SE85RETusqCJrbdk,5230
|
|
403
|
+
unstructured_ingest-0.2.1.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
404
|
+
unstructured_ingest-0.2.1.dist-info/METADATA,sha256=NBV3OAonxt8Y0Tra7LWqQBoLSROwA106sf8vDCsXu2k,7271
|
|
405
|
+
unstructured_ingest-0.2.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
406
|
+
unstructured_ingest-0.2.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
407
|
+
unstructured_ingest-0.2.1.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
408
|
+
unstructured_ingest-0.2.1.dist-info/RECORD,,
|
|
@@ -1,250 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
5
|
-
|
|
6
|
-
from pydantic import Field, Secret
|
|
7
|
-
|
|
8
|
-
from unstructured_ingest.error import (
|
|
9
|
-
DestinationConnectionError,
|
|
10
|
-
SourceConnectionError,
|
|
11
|
-
SourceConnectionNetworkError,
|
|
12
|
-
)
|
|
13
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
14
|
-
from unstructured_ingest.v2.interfaces import (
|
|
15
|
-
AccessConfig,
|
|
16
|
-
ConnectionConfig,
|
|
17
|
-
Downloader,
|
|
18
|
-
DownloaderConfig,
|
|
19
|
-
DownloadResponse,
|
|
20
|
-
FileData,
|
|
21
|
-
FileDataSourceMetadata,
|
|
22
|
-
Indexer,
|
|
23
|
-
IndexerConfig,
|
|
24
|
-
SourceIdentifiers,
|
|
25
|
-
Uploader,
|
|
26
|
-
UploaderConfig,
|
|
27
|
-
)
|
|
28
|
-
from unstructured_ingest.v2.logger import logger
|
|
29
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
30
|
-
DestinationRegistryEntry,
|
|
31
|
-
SourceRegistryEntry,
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
if TYPE_CHECKING:
|
|
35
|
-
from databricks.sdk import WorkspaceClient
|
|
36
|
-
|
|
37
|
-
CONNECTOR_TYPE = "databricks_volumes"
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class DatabricksVolumesAccessConfig(AccessConfig):
|
|
41
|
-
account_id: Optional[str] = Field(
|
|
42
|
-
default=None,
|
|
43
|
-
description="The Databricks account ID for the Databricks "
|
|
44
|
-
"accounts endpoint. Only has effect when Host is "
|
|
45
|
-
"either https://accounts.cloud.databricks.com/ (AWS), "
|
|
46
|
-
"https://accounts.azuredatabricks.net/ (Azure), "
|
|
47
|
-
"or https://accounts.gcp.databricks.com/ (GCP).",
|
|
48
|
-
)
|
|
49
|
-
client_id: Optional[str] = Field(default=None, description="Client ID of the OAuth app.")
|
|
50
|
-
client_secret: Optional[str] = Field(
|
|
51
|
-
default=None, description="Client Secret of the OAuth app."
|
|
52
|
-
)
|
|
53
|
-
token: Optional[str] = Field(
|
|
54
|
-
default=None,
|
|
55
|
-
description="The Databricks personal access token (PAT) (AWS, Azure, and GCP) or "
|
|
56
|
-
"Azure Active Directory (Azure AD) token (Azure).",
|
|
57
|
-
)
|
|
58
|
-
profile: Optional[str] = None
|
|
59
|
-
azure_workspace_resource_id: Optional[str] = Field(
|
|
60
|
-
default=None,
|
|
61
|
-
description="The Azure Resource Manager ID for the Azure Databricks workspace, "
|
|
62
|
-
"which is exchanged for a Databricks host URL.",
|
|
63
|
-
)
|
|
64
|
-
azure_client_secret: Optional[str] = Field(
|
|
65
|
-
default=None, description="The Azure AD service principal’s client secret."
|
|
66
|
-
)
|
|
67
|
-
azure_client_id: Optional[str] = Field(
|
|
68
|
-
default=None, description="The Azure AD service principal’s application ID."
|
|
69
|
-
)
|
|
70
|
-
azure_tenant_id: Optional[str] = Field(
|
|
71
|
-
default=None, description="The Azure AD service principal’s tenant ID."
|
|
72
|
-
)
|
|
73
|
-
azure_environment: Optional[str] = Field(
|
|
74
|
-
default=None,
|
|
75
|
-
description="The Azure environment type for a " "specific set of API endpoints",
|
|
76
|
-
examples=["Public", "UsGov", "China", "Germany"],
|
|
77
|
-
)
|
|
78
|
-
auth_type: Optional[str] = Field(
|
|
79
|
-
default=None,
|
|
80
|
-
description="When multiple auth attributes are available in the "
|
|
81
|
-
"environment, use the auth type specified by this "
|
|
82
|
-
"argument. This argument also holds the currently "
|
|
83
|
-
"selected auth.",
|
|
84
|
-
)
|
|
85
|
-
google_credentials: Optional[str] = None
|
|
86
|
-
google_service_account: Optional[str] = None
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
class DatabricksVolumesConnectionConfig(ConnectionConfig):
|
|
90
|
-
access_config: Secret[DatabricksVolumesAccessConfig] = Field(
|
|
91
|
-
default=DatabricksVolumesAccessConfig(), validate_default=True
|
|
92
|
-
)
|
|
93
|
-
host: Optional[str] = Field(
|
|
94
|
-
default=None,
|
|
95
|
-
description="The Databricks host URL for either the "
|
|
96
|
-
"Databricks workspace endpoint or the "
|
|
97
|
-
"Databricks accounts endpoint.",
|
|
98
|
-
)
|
|
99
|
-
volume: str = Field(description="Name of volume in the Unity Catalog")
|
|
100
|
-
catalog: str = Field(description="Name of the catalog in the Databricks Unity Catalog service")
|
|
101
|
-
volume_path: Optional[str] = Field(
|
|
102
|
-
default=None, description="Optional path within the volume to write to"
|
|
103
|
-
)
|
|
104
|
-
databricks_schema: str = Field(
|
|
105
|
-
default="default",
|
|
106
|
-
alias="schema",
|
|
107
|
-
description="Schema associated with the volume to write to in the Unity Catalog service",
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
@property
|
|
111
|
-
def path(self) -> str:
|
|
112
|
-
path = f"/Volumes/{self.catalog}/{self.databricks_schema}/{self.volume}"
|
|
113
|
-
if self.volume_path:
|
|
114
|
-
path = f"{path}/{self.volume_path}"
|
|
115
|
-
return path
|
|
116
|
-
|
|
117
|
-
@requires_dependencies(dependencies=["databricks.sdk"], extras="databricks-volumes")
|
|
118
|
-
def get_client(self) -> "WorkspaceClient":
|
|
119
|
-
from databricks.sdk import WorkspaceClient
|
|
120
|
-
|
|
121
|
-
return WorkspaceClient(
|
|
122
|
-
host=self.host,
|
|
123
|
-
**self.access_config.get_secret_value().model_dump(),
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
@dataclass
|
|
128
|
-
class DatabricksVolumesIndexerConfig(IndexerConfig):
|
|
129
|
-
recursive: bool = False
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
@dataclass
|
|
133
|
-
class DatabricksVolumesIndexer(Indexer):
|
|
134
|
-
index_config: DatabricksVolumesIndexerConfig
|
|
135
|
-
connection_config: DatabricksVolumesConnectionConfig
|
|
136
|
-
connector_type: str = CONNECTOR_TYPE
|
|
137
|
-
|
|
138
|
-
def precheck(self) -> None:
|
|
139
|
-
try:
|
|
140
|
-
self.connection_config.get_client()
|
|
141
|
-
except Exception as e:
|
|
142
|
-
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
143
|
-
raise SourceConnectionError(f"failed to validate connection: {e}")
|
|
144
|
-
|
|
145
|
-
def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
|
|
146
|
-
for file_info in self.connection_config.get_client().dbfs.list(
|
|
147
|
-
path=self.connection_config.path, recursive=self.index_config.recursive
|
|
148
|
-
):
|
|
149
|
-
if file_info.is_dir:
|
|
150
|
-
continue
|
|
151
|
-
rel_path = file_info.path.replace(self.connection_config.path, "")
|
|
152
|
-
if rel_path.startswith("/"):
|
|
153
|
-
rel_path = rel_path[1:]
|
|
154
|
-
filename = Path(file_info.path).name
|
|
155
|
-
yield FileData(
|
|
156
|
-
identifier=file_info.path,
|
|
157
|
-
connector_type=CONNECTOR_TYPE,
|
|
158
|
-
source_identifiers=SourceIdentifiers(
|
|
159
|
-
filename=filename,
|
|
160
|
-
rel_path=rel_path,
|
|
161
|
-
fullpath=file_info.path,
|
|
162
|
-
),
|
|
163
|
-
additional_metadata={
|
|
164
|
-
"catalog": self.connection_config.catalog,
|
|
165
|
-
},
|
|
166
|
-
metadata=FileDataSourceMetadata(
|
|
167
|
-
url=file_info.path, date_modified=str(file_info.modification_time)
|
|
168
|
-
),
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
@dataclass
|
|
173
|
-
class DatabricksVolumesDownloaderConfig(DownloaderConfig):
|
|
174
|
-
pass
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
@dataclass
|
|
178
|
-
class DatabricksVolumesDownloader(Downloader):
|
|
179
|
-
download_config: DatabricksVolumesDownloaderConfig
|
|
180
|
-
connection_config: DatabricksVolumesConnectionConfig
|
|
181
|
-
connector_type: str = CONNECTOR_TYPE
|
|
182
|
-
|
|
183
|
-
def precheck(self) -> None:
|
|
184
|
-
try:
|
|
185
|
-
self.connection_config.get_client()
|
|
186
|
-
except Exception as e:
|
|
187
|
-
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
188
|
-
raise SourceConnectionError(f"failed to validate connection: {e}")
|
|
189
|
-
|
|
190
|
-
def get_download_path(self, file_data: FileData) -> Path:
|
|
191
|
-
return self.download_config.download_dir / Path(file_data.source_identifiers.relative_path)
|
|
192
|
-
|
|
193
|
-
def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
|
|
194
|
-
download_path = self.get_download_path(file_data=file_data)
|
|
195
|
-
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
196
|
-
logger.info(f"Writing {file_data.identifier} to {download_path}")
|
|
197
|
-
try:
|
|
198
|
-
with self.connection_config.get_client().dbfs.download(path=file_data.identifier) as c:
|
|
199
|
-
read_content = c._read_handle.read()
|
|
200
|
-
with open(download_path, "wb") as f:
|
|
201
|
-
f.write(read_content)
|
|
202
|
-
except Exception as e:
|
|
203
|
-
logger.error(f"failed to download file {file_data.identifier}: {e}", exc_info=True)
|
|
204
|
-
raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}")
|
|
205
|
-
|
|
206
|
-
return self.generate_download_response(file_data=file_data, download_path=download_path)
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
class DatabricksVolumesUploaderConfig(UploaderConfig):
|
|
210
|
-
overwrite: bool = Field(
|
|
211
|
-
default=False, description="If true, an existing file will be overwritten."
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
@dataclass
|
|
216
|
-
class DatabricksVolumesUploader(Uploader):
|
|
217
|
-
upload_config: DatabricksVolumesUploaderConfig
|
|
218
|
-
connection_config: DatabricksVolumesConnectionConfig
|
|
219
|
-
connector_type: str = CONNECTOR_TYPE
|
|
220
|
-
|
|
221
|
-
def precheck(self) -> None:
|
|
222
|
-
try:
|
|
223
|
-
assert self.connection_config.get_client().current_user.me().active
|
|
224
|
-
except Exception as e:
|
|
225
|
-
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
226
|
-
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
227
|
-
|
|
228
|
-
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
229
|
-
output_path = os.path.join(self.connection_config.path, path.name)
|
|
230
|
-
with open(path, "rb") as elements_file:
|
|
231
|
-
self.connection_config.get_client().files.upload(
|
|
232
|
-
file_path=output_path,
|
|
233
|
-
contents=elements_file,
|
|
234
|
-
overwrite=self.upload_config.overwrite,
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
databricks_volumes_destination_entry = DestinationRegistryEntry(
|
|
239
|
-
connection_config=DatabricksVolumesConnectionConfig,
|
|
240
|
-
uploader=DatabricksVolumesUploader,
|
|
241
|
-
uploader_config=DatabricksVolumesUploaderConfig,
|
|
242
|
-
)
|
|
243
|
-
|
|
244
|
-
databricks_volumes_source_entry = SourceRegistryEntry(
|
|
245
|
-
connection_config=DatabricksVolumesConnectionConfig,
|
|
246
|
-
indexer=DatabricksVolumesIndexer,
|
|
247
|
-
indexer_config=DatabricksVolumesIndexerConfig,
|
|
248
|
-
downloader=DatabricksVolumesDownloader,
|
|
249
|
-
downloader_config=DatabricksVolumesDownloaderConfig,
|
|
250
|
-
)
|
|
@@ -1,156 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
from datetime import date, datetime
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
|
-
import pandas as pd
|
|
9
|
-
from dateutil import parser
|
|
10
|
-
from pydantic import Field, Secret
|
|
11
|
-
|
|
12
|
-
from unstructured_ingest.utils.data_prep import batch_generator
|
|
13
|
-
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
14
|
-
from unstructured_ingest.utils.table import convert_to_pandas_dataframe
|
|
15
|
-
from unstructured_ingest.v2.interfaces import (
|
|
16
|
-
AccessConfig,
|
|
17
|
-
ConnectionConfig,
|
|
18
|
-
FileData,
|
|
19
|
-
Uploader,
|
|
20
|
-
UploaderConfig,
|
|
21
|
-
UploadStager,
|
|
22
|
-
UploadStagerConfig,
|
|
23
|
-
)
|
|
24
|
-
from unstructured_ingest.v2.logger import logger
|
|
25
|
-
from unstructured_ingest.v2.processes.connector_registry import (
|
|
26
|
-
DestinationRegistryEntry,
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
if TYPE_CHECKING:
|
|
30
|
-
from singlestoredb.connection import Connection
|
|
31
|
-
|
|
32
|
-
CONNECTOR_TYPE = "singlestore"
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class SingleStoreAccessConfig(AccessConfig):
|
|
36
|
-
password: Optional[str] = Field(default=None, description="SingleStore password")
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class SingleStoreConnectionConfig(ConnectionConfig):
|
|
40
|
-
host: Optional[str] = Field(default=None, description="SingleStore host")
|
|
41
|
-
port: Optional[int] = Field(default=None, description="SingleStore port")
|
|
42
|
-
user: Optional[str] = Field(default=None, description="SingleStore user")
|
|
43
|
-
database: Optional[str] = Field(default=None, description="SingleStore database")
|
|
44
|
-
access_config: Secret[SingleStoreAccessConfig]
|
|
45
|
-
|
|
46
|
-
@requires_dependencies(["singlestoredb"], extras="singlestore")
|
|
47
|
-
def get_connection(self) -> "Connection":
|
|
48
|
-
import singlestoredb as s2
|
|
49
|
-
|
|
50
|
-
conn = s2.connect(
|
|
51
|
-
host=self.host,
|
|
52
|
-
port=self.port,
|
|
53
|
-
database=self.database,
|
|
54
|
-
user=self.user,
|
|
55
|
-
password=self.access_config.get_secret_value().password,
|
|
56
|
-
)
|
|
57
|
-
return conn
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class SingleStoreUploadStagerConfig(UploadStagerConfig):
|
|
61
|
-
drop_empty_cols: bool = Field(default=False, description="Drop any columns that have no data")
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
@dataclass
|
|
65
|
-
class SingleStoreUploadStager(UploadStager):
|
|
66
|
-
upload_stager_config: SingleStoreUploadStagerConfig
|
|
67
|
-
|
|
68
|
-
@staticmethod
|
|
69
|
-
def parse_date_string(date_string: str) -> date:
|
|
70
|
-
try:
|
|
71
|
-
timestamp = float(date_string)
|
|
72
|
-
return datetime.fromtimestamp(timestamp)
|
|
73
|
-
except Exception as e:
|
|
74
|
-
logger.debug(f"date {date_string} string not a timestamp: {e}")
|
|
75
|
-
return parser.parse(date_string)
|
|
76
|
-
|
|
77
|
-
def run(
|
|
78
|
-
self,
|
|
79
|
-
elements_filepath: Path,
|
|
80
|
-
file_data: FileData,
|
|
81
|
-
output_dir: Path,
|
|
82
|
-
output_filename: str,
|
|
83
|
-
**kwargs: Any,
|
|
84
|
-
) -> Path:
|
|
85
|
-
with open(elements_filepath) as elements_file:
|
|
86
|
-
elements_contents = json.load(elements_file)
|
|
87
|
-
output_path = Path(output_dir) / Path(f"{output_filename}.csv")
|
|
88
|
-
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
89
|
-
|
|
90
|
-
df = convert_to_pandas_dataframe(
|
|
91
|
-
elements_dict=elements_contents,
|
|
92
|
-
drop_empty_cols=self.upload_stager_config.drop_empty_cols,
|
|
93
|
-
)
|
|
94
|
-
datetime_columns = [
|
|
95
|
-
"data_source_date_created",
|
|
96
|
-
"data_source_date_modified",
|
|
97
|
-
"data_source_date_processed",
|
|
98
|
-
]
|
|
99
|
-
for column in filter(lambda x: x in df.columns, datetime_columns):
|
|
100
|
-
df[column] = df[column].apply(self.parse_date_string)
|
|
101
|
-
if "data_source_record_locator" in df.columns:
|
|
102
|
-
df["data_source_record_locator"] = df["data_source_record_locator"].apply(
|
|
103
|
-
lambda x: json.dumps(x) if x else None
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
with output_path.open("w") as output_file:
|
|
107
|
-
df.to_csv(output_file, index=False)
|
|
108
|
-
return output_path
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
class SingleStoreUploaderConfig(UploaderConfig):
|
|
112
|
-
table_name: str = Field(description="SingleStore table to write contents to")
|
|
113
|
-
batch_size: int = Field(default=100, description="Batch size when writing to SingleStore")
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
@dataclass
|
|
117
|
-
class SingleStoreUploader(Uploader):
|
|
118
|
-
connection_config: SingleStoreConnectionConfig
|
|
119
|
-
upload_config: SingleStoreUploaderConfig
|
|
120
|
-
connector_type: str = CONNECTOR_TYPE
|
|
121
|
-
|
|
122
|
-
def upload_csv(self, csv_path: Path) -> None:
|
|
123
|
-
df = pd.read_csv(csv_path)
|
|
124
|
-
logger.debug(
|
|
125
|
-
f"uploading {len(df)} entries to {self.connection_config.database} "
|
|
126
|
-
f"db in table {self.upload_config.table_name}"
|
|
127
|
-
)
|
|
128
|
-
stmt = "INSERT INTO {} ({}) VALUES ({})".format(
|
|
129
|
-
self.upload_config.table_name,
|
|
130
|
-
", ".join(df.columns),
|
|
131
|
-
", ".join(["%s"] * len(df.columns)),
|
|
132
|
-
)
|
|
133
|
-
logger.debug(f"sql statement: {stmt}")
|
|
134
|
-
df.replace({np.nan: None}, inplace=True)
|
|
135
|
-
data_as_tuples = list(df.itertuples(index=False, name=None))
|
|
136
|
-
with self.connection_config.get_connection() as conn:
|
|
137
|
-
with conn.cursor() as cur:
|
|
138
|
-
for chunk in batch_generator(
|
|
139
|
-
data_as_tuples, batch_size=self.upload_config.batch_size
|
|
140
|
-
):
|
|
141
|
-
cur.executemany(stmt, chunk)
|
|
142
|
-
conn.commit()
|
|
143
|
-
|
|
144
|
-
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
145
|
-
if path.suffix != ".csv":
|
|
146
|
-
raise ValueError(f"Only .csv files are supported: {path}")
|
|
147
|
-
self.upload_csv(csv_path=path)
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
singlestore_destination_entry = DestinationRegistryEntry(
|
|
151
|
-
connection_config=SingleStoreConnectionConfig,
|
|
152
|
-
uploader=SingleStoreUploader,
|
|
153
|
-
uploader_config=SingleStoreUploaderConfig,
|
|
154
|
-
upload_stager=SingleStoreUploadStager,
|
|
155
|
-
upload_stager_config=SingleStoreUploadStagerConfig,
|
|
156
|
-
)
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.1.1.dist-info → unstructured_ingest-0.2.1.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|