unstructured-ingest 0.0.2.dev0__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/cli.py +6 -1
- unstructured_ingest/cli/cmds/__init__.py +4 -4
- unstructured_ingest/cli/cmds/{astra.py → astradb.py} +9 -9
- unstructured_ingest/cli/interfaces.py +13 -6
- unstructured_ingest/connector/{astra.py → astradb.py} +29 -29
- unstructured_ingest/connector/biomed.py +12 -5
- unstructured_ingest/connector/confluence.py +3 -3
- unstructured_ingest/connector/github.py +3 -2
- unstructured_ingest/connector/google_drive.py +1 -2
- unstructured_ingest/connector/mongodb.py +1 -2
- unstructured_ingest/connector/notion/client.py +31 -16
- unstructured_ingest/connector/notion/connector.py +3 -2
- unstructured_ingest/connector/registry.py +2 -2
- unstructured_ingest/connector/vectara.py +7 -2
- unstructured_ingest/interfaces.py +13 -9
- unstructured_ingest/pipeline/interfaces.py +8 -3
- unstructured_ingest/pipeline/reformat/chunking.py +13 -9
- unstructured_ingest/pipeline/reformat/embedding.py +3 -3
- unstructured_ingest/runner/__init__.py +2 -2
- unstructured_ingest/runner/{astra.py → astradb.py} +7 -7
- unstructured_ingest/runner/writers/__init__.py +2 -2
- unstructured_ingest/runner/writers/{astra.py → astradb.py} +7 -7
- unstructured_ingest/utils/chunking.py +45 -0
- unstructured_ingest/utils/dep_check.py +1 -1
- unstructured_ingest/utils/google_filetype.py +9 -0
- unstructured_ingest/v2/cli/base/cmd.py +66 -12
- unstructured_ingest/v2/cli/base/dest.py +21 -12
- unstructured_ingest/v2/cli/base/src.py +35 -21
- unstructured_ingest/v2/cli/cmds.py +14 -0
- unstructured_ingest/v2/cli/{utils.py → utils/click.py} +36 -89
- unstructured_ingest/v2/cli/utils/model_conversion.py +199 -0
- unstructured_ingest/v2/interfaces/__init__.py +2 -1
- unstructured_ingest/v2/interfaces/connector.py +5 -7
- unstructured_ingest/v2/interfaces/downloader.py +17 -8
- unstructured_ingest/v2/interfaces/file_data.py +13 -2
- unstructured_ingest/v2/interfaces/indexer.py +3 -4
- unstructured_ingest/v2/interfaces/process.py +3 -4
- unstructured_ingest/v2/interfaces/processor.py +10 -10
- unstructured_ingest/v2/interfaces/upload_stager.py +3 -3
- unstructured_ingest/v2/interfaces/uploader.py +3 -3
- unstructured_ingest/v2/pipeline/interfaces.py +3 -5
- unstructured_ingest/v2/pipeline/pipeline.py +73 -7
- unstructured_ingest/v2/pipeline/steps/chunk.py +5 -11
- unstructured_ingest/v2/pipeline/steps/download.py +90 -24
- unstructured_ingest/v2/pipeline/steps/embed.py +5 -11
- unstructured_ingest/v2/pipeline/steps/filter.py +35 -0
- unstructured_ingest/v2/pipeline/steps/index.py +14 -10
- unstructured_ingest/v2/pipeline/steps/partition.py +5 -5
- unstructured_ingest/v2/pipeline/steps/stage.py +4 -7
- unstructured_ingest/v2/pipeline/steps/uncompress.py +1 -6
- unstructured_ingest/v2/pipeline/steps/upload.py +2 -9
- unstructured_ingest/v2/processes/__init__.py +18 -0
- unstructured_ingest/v2/processes/chunker.py +74 -28
- unstructured_ingest/v2/processes/connector_registry.py +8 -2
- unstructured_ingest/v2/processes/connectors/__init__.py +13 -3
- unstructured_ingest/v2/processes/connectors/{astra.py → astradb.py} +53 -35
- unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +38 -27
- unstructured_ingest/v2/processes/connectors/chroma.py +38 -27
- unstructured_ingest/v2/processes/connectors/couchbase.py +151 -0
- unstructured_ingest/v2/processes/connectors/databricks_volumes.py +95 -31
- unstructured_ingest/v2/processes/connectors/elasticsearch.py +92 -53
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +47 -16
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +23 -13
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +18 -11
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +49 -61
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +46 -13
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +50 -20
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +27 -28
- unstructured_ingest/v2/processes/connectors/google_drive.py +52 -42
- unstructured_ingest/v2/processes/connectors/local.py +36 -28
- unstructured_ingest/v2/processes/connectors/milvus.py +22 -18
- unstructured_ingest/v2/processes/connectors/mongodb.py +32 -22
- unstructured_ingest/v2/processes/connectors/onedrive.py +31 -16
- unstructured_ingest/v2/processes/connectors/opensearch.py +81 -43
- unstructured_ingest/v2/processes/connectors/pinecone.py +29 -23
- unstructured_ingest/v2/processes/connectors/salesforce.py +36 -26
- unstructured_ingest/v2/processes/connectors/sharepoint.py +64 -33
- unstructured_ingest/v2/processes/connectors/singlestore.py +11 -15
- unstructured_ingest/v2/processes/connectors/sql.py +52 -39
- unstructured_ingest/v2/processes/connectors/weaviate.py +35 -18
- unstructured_ingest/v2/processes/embedder.py +106 -47
- unstructured_ingest/v2/processes/filter.py +60 -0
- unstructured_ingest/v2/processes/partitioner.py +79 -33
- unstructured_ingest/v2/processes/uncompress.py +3 -3
- unstructured_ingest/v2/utils.py +45 -0
- unstructured_ingest-0.0.4.dist-info/METADATA +571 -0
- {unstructured_ingest-0.0.2.dev0.dist-info → unstructured_ingest-0.0.4.dist-info}/RECORD +92 -116
- {unstructured_ingest-0.0.2.dev0.dist-info → unstructured_ingest-0.0.4.dist-info}/WHEEL +1 -1
- unstructured_ingest/v2/cli/cmds/__init__.py +0 -89
- unstructured_ingest/v2/cli/cmds/astra.py +0 -85
- unstructured_ingest/v2/cli/cmds/azure_cognitive_search.py +0 -72
- unstructured_ingest/v2/cli/cmds/chroma.py +0 -108
- unstructured_ingest/v2/cli/cmds/databricks_volumes.py +0 -161
- unstructured_ingest/v2/cli/cmds/elasticsearch.py +0 -159
- unstructured_ingest/v2/cli/cmds/fsspec/azure.py +0 -84
- unstructured_ingest/v2/cli/cmds/fsspec/box.py +0 -58
- unstructured_ingest/v2/cli/cmds/fsspec/dropbox.py +0 -58
- unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py +0 -77
- unstructured_ingest/v2/cli/cmds/fsspec/gcs.py +0 -81
- unstructured_ingest/v2/cli/cmds/fsspec/s3.py +0 -84
- unstructured_ingest/v2/cli/cmds/fsspec/sftp.py +0 -80
- unstructured_ingest/v2/cli/cmds/google_drive.py +0 -74
- unstructured_ingest/v2/cli/cmds/local.py +0 -60
- unstructured_ingest/v2/cli/cmds/milvus.py +0 -72
- unstructured_ingest/v2/cli/cmds/mongodb.py +0 -62
- unstructured_ingest/v2/cli/cmds/onedrive.py +0 -91
- unstructured_ingest/v2/cli/cmds/opensearch.py +0 -93
- unstructured_ingest/v2/cli/cmds/pinecone.py +0 -62
- unstructured_ingest/v2/cli/cmds/salesforce.py +0 -79
- unstructured_ingest/v2/cli/cmds/sharepoint.py +0 -112
- unstructured_ingest/v2/cli/cmds/singlestore.py +0 -96
- unstructured_ingest/v2/cli/cmds/sql.py +0 -84
- unstructured_ingest/v2/cli/cmds/weaviate.py +0 -100
- unstructured_ingest/v2/cli/configs/__init__.py +0 -6
- unstructured_ingest/v2/cli/configs/chunk.py +0 -89
- unstructured_ingest/v2/cli/configs/embed.py +0 -74
- unstructured_ingest/v2/cli/configs/partition.py +0 -99
- unstructured_ingest/v2/cli/configs/processor.py +0 -88
- unstructured_ingest/v2/cli/interfaces.py +0 -27
- unstructured_ingest/v2/pipeline/utils.py +0 -15
- unstructured_ingest-0.0.2.dev0.dist-info/METADATA +0 -321
- /unstructured_ingest/v2/cli/{cmds/fsspec → utils}/__init__.py +0 -0
- {unstructured_ingest-0.0.2.dev0.dist-info → unstructured_ingest-0.0.4.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.0.2.dev0.dist-info → unstructured_ingest-0.0.4.dist-info}/top_level.txt +0 -0
|
@@ -1,24 +1,24 @@
|
|
|
1
1
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
2
|
-
unstructured_ingest/__version__.py,sha256=
|
|
2
|
+
unstructured_ingest/__version__.py,sha256=v-TfwgSI0ccjnDoj1c-7HQ-MXHHS7M2fy6BXegIjaRY,42
|
|
3
3
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
4
4
|
unstructured_ingest/evaluate.py,sha256=R-mKLFXbVX1xQ1tjGsLHjdP-TbSSV-925IHzggW_bIg,9793
|
|
5
|
-
unstructured_ingest/interfaces.py,sha256=
|
|
5
|
+
unstructured_ingest/interfaces.py,sha256=AeEywcSKCMA5AiEdENLpu_yPcXp_c6wpvESePfC00yo,31214
|
|
6
6
|
unstructured_ingest/logger.py,sha256=TrhyH7VbCWO5VVuhvL0yUyXxuem3b4pzbqj2uQHUwZk,4480
|
|
7
7
|
unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
|
|
8
8
|
unstructured_ingest/processor.py,sha256=XKKrvbxsb--5cDzz4hB3-GfWZYyIjJ2ah8FpzQKF_DM,2760
|
|
9
9
|
unstructured_ingest/cli/__init__.py,sha256=9kNcBOHuXON5lB1MJU9QewEhwPmId56vXqB29-kqEAA,302
|
|
10
|
-
unstructured_ingest/cli/cli.py,sha256=
|
|
10
|
+
unstructured_ingest/cli/cli.py,sha256=LutBTBYMqboKw8cputHVszpenyfnySzcUC15ifwuYyg,1049
|
|
11
11
|
unstructured_ingest/cli/cmd_factory.py,sha256=UdHm1KacTombpF6DxyTSwTCuApsKHUYw_kVu5Nhcy3Y,364
|
|
12
12
|
unstructured_ingest/cli/common.py,sha256=I0El08FHz5kxw7iz0VWOWPrvcJD1rBgXJSwVIpVmmwU,204
|
|
13
|
-
unstructured_ingest/cli/interfaces.py,sha256=
|
|
13
|
+
unstructured_ingest/cli/interfaces.py,sha256=1Nw9siznXr3vtr5nh71amjRz1itLYroqqnBQ4-lAG5Q,24130
|
|
14
14
|
unstructured_ingest/cli/utils.py,sha256=l7dmDf_KUO3SP4dcVDHjxYAU2b28yR-n-a8xoYVPmw4,7981
|
|
15
15
|
unstructured_ingest/cli/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
unstructured_ingest/cli/base/cmd.py,sha256=BbfjA2v203Jh-7DL6bzxQ7fOeNixd5BsBMuzXz6h5IQ,583
|
|
17
17
|
unstructured_ingest/cli/base/dest.py,sha256=uN44l7kPErm_BQqKFUgaiz_Xu6UKk-mnB1B8c0cb4lQ,3416
|
|
18
18
|
unstructured_ingest/cli/base/src.py,sha256=gDLZlBuOCEGMAAFCLkoURFQKmrmE34WQ5DbT0w1ssy4,2179
|
|
19
|
-
unstructured_ingest/cli/cmds/__init__.py,sha256=
|
|
19
|
+
unstructured_ingest/cli/cmds/__init__.py,sha256=TtVhAdlPv7e1DKYZ_D2q0wauE-1IjF-AtHp0RIskoD8,5932
|
|
20
20
|
unstructured_ingest/cli/cmds/airtable.py,sha256=SgdUztUCFbabWP3K937TwdYlhrdY2PUtE4TXUHfBGtw,2629
|
|
21
|
-
unstructured_ingest/cli/cmds/
|
|
21
|
+
unstructured_ingest/cli/cmds/astradb.py,sha256=T5Ccz29BDNewuHlL1PN5MDnBggQ93o2lGZgxDgTNtys,2931
|
|
22
22
|
unstructured_ingest/cli/cmds/azure_cognitive_search.py,sha256=PcM55PtpRMHbP69trW0JCTz-gx6tKMLUaMU4GGXv0g8,1927
|
|
23
23
|
unstructured_ingest/cli/cmds/biomed.py,sha256=M2jc7_-EvbAeDtDwtZNrPI48QJ1Tm401LcSUD0Ayd20,1442
|
|
24
24
|
unstructured_ingest/cli/cmds/chroma.py,sha256=zO17L7LgUaDfKutfQjSE-QjZJcREyeSpWZetja243lE,3267
|
|
@@ -60,37 +60,37 @@ unstructured_ingest/cli/cmds/fsspec/s3.py,sha256=v-24oFxhabdShryK2dhP4cDBvVyoQ-8
|
|
|
60
60
|
unstructured_ingest/cli/cmds/fsspec/sftp.py,sha256=TCB7sf_GYoifryQbbttknYSt9Q1kRCPtu8B8QgXl3lw,1537
|
|
61
61
|
unstructured_ingest/connector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
62
|
unstructured_ingest/connector/airtable.py,sha256=KcLt-FEabO9D5ev5E4xUf06VYHpYpypP-adTVyhGcb8,10585
|
|
63
|
-
unstructured_ingest/connector/
|
|
63
|
+
unstructured_ingest/connector/astradb.py,sha256=T4jo80foCRQ_QwTClf9abFWWQHCUTlmSOdYK9X8r7aQ,8635
|
|
64
64
|
unstructured_ingest/connector/azure_cognitive_search.py,sha256=cqQdAaEzt4coU7sxnl4GY8Em4a6azFLyAKM6enkmjBA,5850
|
|
65
|
-
unstructured_ingest/connector/biomed.py,sha256=
|
|
65
|
+
unstructured_ingest/connector/biomed.py,sha256=dVVyPhlYkJUzbQNg6qwgfqlmCScA4Ttac9ErS9yX4MM,10847
|
|
66
66
|
unstructured_ingest/connector/chroma.py,sha256=DgAe5xxDiV1BzAbKusL7JmkCGyEJ9lWTdoyy0txXoTw,5713
|
|
67
67
|
unstructured_ingest/connector/clarifai.py,sha256=kAtPGrjOps_aYdlhHkTtQc46Rfc0woNor6VY1UGEKZI,4211
|
|
68
|
-
unstructured_ingest/connector/confluence.py,sha256=
|
|
68
|
+
unstructured_ingest/connector/confluence.py,sha256=VbeGGcoMJQ3WIriHSGz1o5L1u24EeVrYQRjR6t1fcr0,10151
|
|
69
69
|
unstructured_ingest/connector/databricks_volumes.py,sha256=zEJZFO2Uq-eQ5k62_SmNsKC4NZ-ykmtQY5XacLWrr0I,4948
|
|
70
70
|
unstructured_ingest/connector/delta_table.py,sha256=fHUkZ8v3a20k_r64j-i6ulm4_Zi6eYGdiGKWj4q3BAs,7191
|
|
71
71
|
unstructured_ingest/connector/discord.py,sha256=SelvVGEF2SThdf8FSSVXGrIBgQoOcNgdKFUfEvpIcg0,6153
|
|
72
72
|
unstructured_ingest/connector/elasticsearch.py,sha256=UIqTQbXVhHprApfBCXBVBBmPMnWccjoaFgV6shrKG-U,14157
|
|
73
73
|
unstructured_ingest/connector/git.py,sha256=Hjf22SrJ_oFn4llxTa_54zW3jnZ6JVYB9tYWhCsrr1o,3817
|
|
74
|
-
unstructured_ingest/connector/github.py,sha256=
|
|
74
|
+
unstructured_ingest/connector/github.py,sha256=STgcJMcc4RSfOw-N-_Cb97LkHmk1nSI-ivdco7p-7y4,6578
|
|
75
75
|
unstructured_ingest/connector/gitlab.py,sha256=OEilnSFabWT3XY0riNxVTXc9tS3f1lMyHI6oZzb3Cw0,4926
|
|
76
|
-
unstructured_ingest/connector/google_drive.py,sha256=
|
|
76
|
+
unstructured_ingest/connector/google_drive.py,sha256=BgwYmQAL8C91HqxD0pOM2YX3iT406pHOoK8RxzqGRsk,13054
|
|
77
77
|
unstructured_ingest/connector/hubspot.py,sha256=teIvVsX1sSf2vILX9sri8ohpC0SA3yHA5sc-moKqls0,9271
|
|
78
78
|
unstructured_ingest/connector/jira.py,sha256=kxjGhbVSH8FJNPMGJbnpZEV5zZRfGFckVJFiOzExphQ,15690
|
|
79
79
|
unstructured_ingest/connector/kafka.py,sha256=cVEX_yn_9Vdvz6lujf1GdThsJfxJnVsdjfTnqhx7p1A,10053
|
|
80
80
|
unstructured_ingest/connector/local.py,sha256=OyGedubpn39bLs5Z4EeZqsb1Q-M-cJkWcrUV8eQ9yec,4479
|
|
81
|
-
unstructured_ingest/connector/mongodb.py,sha256=
|
|
81
|
+
unstructured_ingest/connector/mongodb.py,sha256=UD8T1V435YvGY68dpL-fyFesD7bcLckptgXzzfgnILI,9771
|
|
82
82
|
unstructured_ingest/connector/onedrive.py,sha256=hvVuC-Kup88ZMbJpXG8AxRiuQyscZw6nOVLOjlF_pK4,8911
|
|
83
83
|
unstructured_ingest/connector/opensearch.py,sha256=kvzqEqanP6nGHjxCJ2e2CAz9iK8na3yYBX1l4ZuVq0A,7937
|
|
84
84
|
unstructured_ingest/connector/outlook.py,sha256=Qbxrt_2ZSz329MxK5hb1_MYndPvPSXxCSfD0dMCy0Gs,10443
|
|
85
85
|
unstructured_ingest/connector/pinecone.py,sha256=hh4hbW7P8ebXf9n4S7ilvcL3Qzt9XEeZwA6_BkPnFEY,4796
|
|
86
86
|
unstructured_ingest/connector/qdrant.py,sha256=Y1PAW6ueAzkTxoeViZ7JjkErFJNJlSYvzaRU1c-hcJA,4964
|
|
87
87
|
unstructured_ingest/connector/reddit.py,sha256=8pyVSXXKGS9vOlNBeXw1ev5oqu-uWka5hzgUI8CFRos,5457
|
|
88
|
-
unstructured_ingest/connector/registry.py,sha256=
|
|
88
|
+
unstructured_ingest/connector/registry.py,sha256=SxXKzOGimHGYOPDSCsYm_xhbwNb-DIcv6XqxoPRIaIY,4846
|
|
89
89
|
unstructured_ingest/connector/salesforce.py,sha256=FrzevH1xB9deXdgt1ph7xa8BRFI8qC2sxGR4KsUHWSY,10941
|
|
90
90
|
unstructured_ingest/connector/sharepoint.py,sha256=4Ex4_rCOvA_7g2YmtsZd_mISjfCD_jRFtk_-JmC4lUc,22159
|
|
91
91
|
unstructured_ingest/connector/slack.py,sha256=1CJ19N2yWrAF1viUrqa4Yb-BUbCrUHmGMkUHhFEe6m4,7617
|
|
92
92
|
unstructured_ingest/connector/sql.py,sha256=YWJIuNtXkhwW_h7nlxkmzZhzMcICkZc1ezZ1CTzcf54,7625
|
|
93
|
-
unstructured_ingest/connector/vectara.py,sha256=
|
|
93
|
+
unstructured_ingest/connector/vectara.py,sha256=r7nE2kC9-2mfpMSNmVH1F_OkLetSapJ2Xj4mFAJJJ88,9539
|
|
94
94
|
unstructured_ingest/connector/weaviate.py,sha256=Pi0bqyTJhXk_1zdbmJCYvW1inHNTBa0i3cYKRRPcXO0,7291
|
|
95
95
|
unstructured_ingest/connector/wikipedia.py,sha256=lGccBwl2JlFJNIWqKj3SmUyTrC4xpmeFliCfahFrXRs,5992
|
|
96
96
|
unstructured_ingest/connector/fsspec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -102,8 +102,8 @@ unstructured_ingest/connector/fsspec/gcs.py,sha256=2PH5jBn09d3ZoM2j0RR-HSOjM0n1H
|
|
|
102
102
|
unstructured_ingest/connector/fsspec/s3.py,sha256=iMsdTzy2KYqkxQJ57UyuZAahtvE21iMT5SsgD4DC7RU,1723
|
|
103
103
|
unstructured_ingest/connector/fsspec/sftp.py,sha256=x2w8JGM81S_HXww7Aa-bTY1LjZSis56aOpCinga_bok,2653
|
|
104
104
|
unstructured_ingest/connector/notion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
105
|
-
unstructured_ingest/connector/notion/client.py,sha256=
|
|
106
|
-
unstructured_ingest/connector/notion/connector.py,sha256=
|
|
105
|
+
unstructured_ingest/connector/notion/client.py,sha256=j5_CEFzI3Bx7oxTGOPoT5sazrzIPTX2467dmCLyziNg,9169
|
|
106
|
+
unstructured_ingest/connector/notion/connector.py,sha256=8A9d-Pej-uXzjEy85zUloxIPK-EbpI5heVR6AVqOvU8,17538
|
|
107
107
|
unstructured_ingest/connector/notion/helpers.py,sha256=jqg5-cPcrjm5G7dBF4jOjHxdJN1CI7yBeTefiX4hBoM,20702
|
|
108
108
|
unstructured_ingest/connector/notion/interfaces.py,sha256=SrTT-9c0nvk0fMqVgudYF647r04AdMKi6wkIkMy7Szw,563
|
|
109
109
|
unstructured_ingest/connector/notion/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -177,7 +177,7 @@ unstructured_ingest/ingest_backoff/_wrapper.py,sha256=tukxuAYn-FbKTofluy9W16ah_6
|
|
|
177
177
|
unstructured_ingest/pipeline/__init__.py,sha256=5kFH21WHi6i1JZri5miY5tB5c9R8sGMBeweYiWH2fqw,537
|
|
178
178
|
unstructured_ingest/pipeline/copy.py,sha256=NwJGLrpP8r6WbWxp3epMYHbQycJUo81r6FjUOjrAlm0,768
|
|
179
179
|
unstructured_ingest/pipeline/doc_factory.py,sha256=Y66k-CoIpwWAD3vWwBeHzI2YESlIsPUhL2OQ8i9RRWE,360
|
|
180
|
-
unstructured_ingest/pipeline/interfaces.py,sha256=
|
|
180
|
+
unstructured_ingest/pipeline/interfaces.py,sha256=YTlJnMQTi4x3jyeU8o-zo4QbHW25f7mFO0jx0IgI84o,8136
|
|
181
181
|
unstructured_ingest/pipeline/partition.py,sha256=xp1Oj_oHZjukGBWrgW-ElJlQMNWASqjqqNSfbi3tFQE,2779
|
|
182
182
|
unstructured_ingest/pipeline/permissions.py,sha256=jTqiFYrOTPHEP79EmrgyzTi0SseqRCwYkcepH4HctLI,365
|
|
183
183
|
unstructured_ingest/pipeline/pipeline.py,sha256=JHsXPGLY129woBcvXMV7wbcstHu_OLB5LR0jIxreNKg,4806
|
|
@@ -185,11 +185,11 @@ unstructured_ingest/pipeline/source.py,sha256=YMRZkcdCwRWCiwhnDfTSYxdl9Vv5JH5ut3
|
|
|
185
185
|
unstructured_ingest/pipeline/utils.py,sha256=RNx4bv2FhKOhaK_YTiRubta7n9wmJwqzznFNlY25Dtw,168
|
|
186
186
|
unstructured_ingest/pipeline/write.py,sha256=xmDjmbieGRrcI342he7PkgxWaMoSJ5nWPmP5AM2xloU,669
|
|
187
187
|
unstructured_ingest/pipeline/reformat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
188
|
-
unstructured_ingest/pipeline/reformat/chunking.py,sha256=
|
|
189
|
-
unstructured_ingest/pipeline/reformat/embedding.py,sha256=
|
|
190
|
-
unstructured_ingest/runner/__init__.py,sha256=
|
|
188
|
+
unstructured_ingest/pipeline/reformat/chunking.py,sha256=10LOAU6b2b-S6mzks8VpI1bP2pY1viNDM8dQSPJ8F_s,6035
|
|
189
|
+
unstructured_ingest/pipeline/reformat/embedding.py,sha256=PLjVYHgEMnrHAnBW34uYPyhOzvVMhefIRpRTPd4Bnl8,2644
|
|
190
|
+
unstructured_ingest/runner/__init__.py,sha256=FO0X_jBIMilXdyjBajyFmzHoC3eVypNMGlhdOW4mcCM,2859
|
|
191
191
|
unstructured_ingest/runner/airtable.py,sha256=1ndJ6PKT63E0gZN3KYFBj4Yo94zQYsIvSjC6ro2nIPE,1115
|
|
192
|
-
unstructured_ingest/runner/
|
|
192
|
+
unstructured_ingest/runner/astradb.py,sha256=FSBtQrsdC9E3eHUcAuQ0apcCnWolz-9tkvy-Uf7QeKg,1102
|
|
193
193
|
unstructured_ingest/runner/base_runner.py,sha256=DRiIRjHwZd0s7DIMZl_4vcDTrTEI-e_295B3UzTJe9M,3223
|
|
194
194
|
unstructured_ingest/runner/biomed.py,sha256=NaWTJmChYfTKkDHY_MVbDazX_KdP6GrmwJqc82WCuWI,1483
|
|
195
195
|
unstructured_ingest/runner/confluence.py,sha256=RlrupdeXvMf3c6XO0S43LQL9gW202knN0vZFwBjN0PM,1099
|
|
@@ -222,8 +222,8 @@ unstructured_ingest/runner/fsspec/fsspec.py,sha256=83LpsJAgPDJ3HzCKeaWXh7alO8duL
|
|
|
222
222
|
unstructured_ingest/runner/fsspec/gcs.py,sha256=HZyQBoHdnLRA9pULopY7k3b9xLEviENwuDmDGpUoZmU,949
|
|
223
223
|
unstructured_ingest/runner/fsspec/s3.py,sha256=LPsm-Kz1XmrVHM5nj6OcADnI-K6rVbtGXGfSicN_g-A,941
|
|
224
224
|
unstructured_ingest/runner/fsspec/sftp.py,sha256=6vD_CVnxcdpHt4wSEQJ-tQvKL6BQJYxlw2g6OHzlTWw,957
|
|
225
|
-
unstructured_ingest/runner/writers/__init__.py,sha256=
|
|
226
|
-
unstructured_ingest/runner/writers/
|
|
225
|
+
unstructured_ingest/runner/writers/__init__.py,sha256=0WPUxIIj4eKvFA5Uli8A06LU-YOoKx6Yh1BEE5PZdRw,1528
|
|
226
|
+
unstructured_ingest/runner/writers/astradb.py,sha256=-WDJtRgdBho1S7ju52HEXcrAAiuqrfH1t2dpFcoAGzg,759
|
|
227
227
|
unstructured_ingest/runner/writers/azure_cognitive_search.py,sha256=_5nG3C4DMIwZ_y6I03DrvzhZ6HoeaNVzFWRzjRR6xAQ,813
|
|
228
228
|
unstructured_ingest/runner/writers/base_writer.py,sha256=S16pacw1HbAj9D5L8tWJbVjVJzv1Xp5RYTj3J9rtrHo,669
|
|
229
229
|
unstructured_ingest/runner/writers/chroma.py,sha256=VDeaZPkJjBl55l1ztMK1cW-72N8j5F4Ro5Oh8stYKPo,750
|
|
@@ -246,113 +246,89 @@ unstructured_ingest/runner/writers/fsspec/dropbox.py,sha256=y0kmx5Xjc9Ypfg6t6N_x
|
|
|
246
246
|
unstructured_ingest/runner/writers/fsspec/gcs.py,sha256=ia-gconOz1kWI1jmYeB9NY6cwjWfofoZAydKfZsaFs0,606
|
|
247
247
|
unstructured_ingest/runner/writers/fsspec/s3.py,sha256=kHJq2O3864QBd_tL2SKb0mdywczOCr2VI5e_bVms-Vw,622
|
|
248
248
|
unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
249
|
+
unstructured_ingest/utils/chunking.py,sha256=efWEfMcCukG5zASZrXhkNgAX8AzHa6t3rClMzm2TwFE,1521
|
|
249
250
|
unstructured_ingest/utils/compression.py,sha256=ZzJQeNn1a6oseKo-oDwaLggBzi32oqPL5Z9I_Y-YYy8,4340
|
|
250
251
|
unstructured_ingest/utils/data_prep.py,sha256=oizt8xZ15hExZ_8xacMG0j1LSH4ILreQpXJUYm2Bb_M,3917
|
|
251
|
-
unstructured_ingest/utils/dep_check.py,sha256=
|
|
252
|
+
unstructured_ingest/utils/dep_check.py,sha256=cVEqZtMwji8BIt7pjtUOMtEmN7KaNXRXwelEKFpOdW8,1914
|
|
253
|
+
unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
|
|
252
254
|
unstructured_ingest/utils/string_and_date_utils.py,sha256=hnGglD8Z626vLhH_UV4QybF_P62vwWRcA8CLk2x-s40,1377
|
|
253
255
|
unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
|
|
254
256
|
unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
255
257
|
unstructured_ingest/v2/example.py,sha256=qkwmpMxUlaJXdDNKQ4LlUt3XGxgTUU3CXGGO57eW5Gs,1644
|
|
256
258
|
unstructured_ingest/v2/logger.py,sha256=akcghdHwpKM3CfoeFzir0zmc7R9Hk7zjquU-X-gwUIw,4324
|
|
257
259
|
unstructured_ingest/v2/main.py,sha256=WFdLEqEXRy6E9_G-dF20MK2AtgX51Aan1sp_N67U2B8,172
|
|
260
|
+
unstructured_ingest/v2/utils.py,sha256=ykmyvmRMHGahkpKbkFbJfEHwNjZccKqbYsixUtUtrFw,1478
|
|
258
261
|
unstructured_ingest/v2/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
259
262
|
unstructured_ingest/v2/cli/cli.py,sha256=qHXIs-PcvMgDZhP1AR9iDMxh8FXBMJCEDksPBfiMULE,648
|
|
260
|
-
unstructured_ingest/v2/cli/
|
|
261
|
-
unstructured_ingest/v2/cli/utils.py,sha256=QK-ee6FzxPf-IbaNXXWlH-GhvqeNnjK2m8ljBD1SusU,9075
|
|
263
|
+
unstructured_ingest/v2/cli/cmds.py,sha256=wWUTbvvxEqKAy6bNE6XhPnj0ELMeSbb9_r1NZl58xMM,489
|
|
262
264
|
unstructured_ingest/v2/cli/base/__init__.py,sha256=zXCa7F4FMqItmzxfUIVmyI-CeGh8X85yF8lRxwX_OYQ,83
|
|
263
|
-
unstructured_ingest/v2/cli/base/cmd.py,sha256=
|
|
264
|
-
unstructured_ingest/v2/cli/base/dest.py,sha256=
|
|
265
|
+
unstructured_ingest/v2/cli/base/cmd.py,sha256=JJ4ON8IrtfK1ub38er81EPOo3urZDdGL829k-JHcZ7A,11481
|
|
266
|
+
unstructured_ingest/v2/cli/base/dest.py,sha256=_m5rUTHusHkXxzKUfcMtX9_xitbqyxajvIxuyev25vg,3197
|
|
265
267
|
unstructured_ingest/v2/cli/base/importer.py,sha256=nRt0QQ3qpi264-n_mR0l55C2ddM8nowTNzT1jsWaam8,1128
|
|
266
|
-
unstructured_ingest/v2/cli/base/src.py,sha256=
|
|
267
|
-
unstructured_ingest/v2/cli/
|
|
268
|
-
unstructured_ingest/v2/cli/
|
|
269
|
-
unstructured_ingest/v2/cli/
|
|
270
|
-
unstructured_ingest/v2/
|
|
271
|
-
unstructured_ingest/v2/
|
|
272
|
-
unstructured_ingest/v2/
|
|
273
|
-
unstructured_ingest/v2/
|
|
274
|
-
unstructured_ingest/v2/
|
|
275
|
-
unstructured_ingest/v2/
|
|
276
|
-
unstructured_ingest/v2/
|
|
277
|
-
unstructured_ingest/v2/
|
|
278
|
-
unstructured_ingest/v2/
|
|
279
|
-
unstructured_ingest/v2/cli/cmds/pinecone.py,sha256=DFJ7vh5-BZ6ll4TKTDCWp9GuiOvVDlSs7OJtiJ5DRI8,1720
|
|
280
|
-
unstructured_ingest/v2/cli/cmds/salesforce.py,sha256=ejyYPOuh3APNUDC0vYynJQoUFTk7792B0eAP0TcVkkQ,2431
|
|
281
|
-
unstructured_ingest/v2/cli/cmds/sharepoint.py,sha256=EK1RVs8cNNIA60JrDvr7SciMeXpSluMzBiQod9hK-UU,3722
|
|
282
|
-
unstructured_ingest/v2/cli/cmds/singlestore.py,sha256=awyP4FlP20bBcPmEOntkJBk18UAY7iqwUmhaxelkiGQ,2667
|
|
283
|
-
unstructured_ingest/v2/cli/cmds/sql.py,sha256=gvxBlVCsrIF4_NHWABMtR4R6PqbgYeIPA1kDL4dt-yg,2228
|
|
284
|
-
unstructured_ingest/v2/cli/cmds/weaviate.py,sha256=3Ra6MFdVGV3iRXq3B1I1D50GMpKe9HOq93NA0aYCKpo,2906
|
|
285
|
-
unstructured_ingest/v2/cli/cmds/fsspec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
286
|
-
unstructured_ingest/v2/cli/cmds/fsspec/azure.py,sha256=ZHfchzSpGkZ99Fq1050JvHP0-aG1pZsBZxxozcFfxwI,2784
|
|
287
|
-
unstructured_ingest/v2/cli/cmds/fsspec/box.py,sha256=kslkI-0-GyGSJOU7bKgrZeQRXh8HFexDq87ew8kT8kE,1338
|
|
288
|
-
unstructured_ingest/v2/cli/cmds/fsspec/dropbox.py,sha256=LtcR3rCQPgzJNbV3S90HlL0LPPbW9lYEfE8BG4F-dSI,1349
|
|
289
|
-
unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py,sha256=Mgr_nto1FWxnGmbGdVlIfm-xuBGL0HEi8k3FEmQnZng,2414
|
|
290
|
-
unstructured_ingest/v2/cli/cmds/fsspec/gcs.py,sha256=3-0LYnDs0fgNrDqnHpNZKj_6rwNj9wQVaV0lGOhVFPE,2737
|
|
291
|
-
unstructured_ingest/v2/cli/cmds/fsspec/s3.py,sha256=EXQzYkDtkFli2sfcj4cRDRPFac7b7z1DfQqYlGQcE6o,2279
|
|
292
|
-
unstructured_ingest/v2/cli/cmds/fsspec/sftp.py,sha256=YY2xKguawMyLdcG0qDYKUgk7DT0KgyZJlV17MfwIhpo,2036
|
|
293
|
-
unstructured_ingest/v2/cli/configs/__init__.py,sha256=5NMXm872QQZTvUFZFS06c8c1b6K940K5gxs9lbp8W6M,258
|
|
294
|
-
unstructured_ingest/v2/cli/configs/chunk.py,sha256=KvIhmIRIZxazCumMztAKdWs-4MK7qzOb5h6Ned_2bdU,3547
|
|
295
|
-
unstructured_ingest/v2/cli/configs/embed.py,sha256=q_TwnkxKTKOsMgVYfW6xxbD8FWjU_Uh_X2BQ5-_VLGM,2725
|
|
296
|
-
unstructured_ingest/v2/cli/configs/partition.py,sha256=7wdI18V6c4kaXuf50Lh66n9LbtrYHYd8ffEgDQLqvSk,3931
|
|
297
|
-
unstructured_ingest/v2/cli/configs/processor.py,sha256=ZHu2DBIuE8VgL3mEt73yYimw2k_PaOEtdxxFqzHfk84,3350
|
|
298
|
-
unstructured_ingest/v2/interfaces/__init__.py,sha256=-CHWUlT4rISd-gSfcGKGYFqqSFhMY9lKsT5wxwmOThM,845
|
|
299
|
-
unstructured_ingest/v2/interfaces/connector.py,sha256=u4hE1DpTPDC04-n_IzYyn9w1gNCiPT81anrUoEh30Z8,855
|
|
300
|
-
unstructured_ingest/v2/interfaces/downloader.py,sha256=aWlacZZrI6SGw6retnRJtZbqT5voOYq_fb326ynNOhI,2506
|
|
301
|
-
unstructured_ingest/v2/interfaces/file_data.py,sha256=5TCMkblUW-Jvy-rS5FqRT22VzDmJqAiQRIWYarpAi64,1543
|
|
302
|
-
unstructured_ingest/v2/interfaces/indexer.py,sha256=pMw0abNHk_tEuA4BkXX1BdAfIwHdytxj7s6tGxMvYRE,821
|
|
303
|
-
unstructured_ingest/v2/interfaces/process.py,sha256=0ecz7mAjlY_DUi9-HhPc9zXphmGclispYwv37O8gvJ0,466
|
|
304
|
-
unstructured_ingest/v2/interfaces/processor.py,sha256=uHVHeKo5Gt_zFkaEXw7xgaCBDTEl2-Amh-ByA07258o,1620
|
|
305
|
-
unstructured_ingest/v2/interfaces/upload_stager.py,sha256=SylhDl9pK6qa7hvfrhpabCkjwE03yIlI6oM-mQnqtho,1220
|
|
306
|
-
unstructured_ingest/v2/interfaces/uploader.py,sha256=bzfx3Ei4poXKu-hsgjAB4sj4jKij9CoaRSadUM5LtGk,1083
|
|
268
|
+
unstructured_ingest/v2/cli/base/src.py,sha256=kcBmARpZmH6HFL2GOCAnABkLaRwc85DLa4oiKwfQlpw,2832
|
|
269
|
+
unstructured_ingest/v2/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
270
|
+
unstructured_ingest/v2/cli/utils/click.py,sha256=SmUAiKiFXVCZ4_bhjrFKvYoLhcVEm5z7zJQw_M0Ad2w,6340
|
|
271
|
+
unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=fg2Gu20aovYmyHIp9Pz8mRLn8gNhxMcPitzKfqsS_hQ,7224
|
|
272
|
+
unstructured_ingest/v2/interfaces/__init__.py,sha256=Rfa8crx6De7WNOK-EjsWWwFVpsUfCc6gY8B8tQ3ae9I,899
|
|
273
|
+
unstructured_ingest/v2/interfaces/connector.py,sha256=Lm7wJTlKUfVKJjk-BchS0RtZ9_Lo9tzZ62rPtF3HOGY,759
|
|
274
|
+
unstructured_ingest/v2/interfaces/downloader.py,sha256=PKT1kr79Mz1urW_8xCyq9sBuK93gDvyTXg5e4ma4htU,2871
|
|
275
|
+
unstructured_ingest/v2/interfaces/file_data.py,sha256=w6sBMCDH1va6XbvVaZyb7EJendXRCa_mo1Qv8uTWzCU,1898
|
|
276
|
+
unstructured_ingest/v2/interfaces/indexer.py,sha256=Bd1S-gTLsxhJBLEh1lYm_gXqwQLaEZMoqPq9yGxtN_E,713
|
|
277
|
+
unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
|
|
278
|
+
unstructured_ingest/v2/interfaces/processor.py,sha256=t1LIrkubfbqt7RMZ9bABrxd0Z9TJxG6zqozBC5Pi4Yc,1615
|
|
279
|
+
unstructured_ingest/v2/interfaces/upload_stager.py,sha256=ZFkDxcwKn-6EPrTbdBEgOkz1kGAq4gUtze98KP48KG4,1146
|
|
280
|
+
unstructured_ingest/v2/interfaces/uploader.py,sha256=ymEC-0JFTvjuAFsz9QLRF_6rFNFlDURkAgZ1tBUb2ec,1009
|
|
307
281
|
unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
308
|
-
unstructured_ingest/v2/pipeline/interfaces.py,sha256=
|
|
309
|
-
unstructured_ingest/v2/pipeline/pipeline.py,sha256=
|
|
310
|
-
unstructured_ingest/v2/pipeline/utils.py,sha256=oPAitfdnITqh2O8Z0uf6VOHg9BTJhitRzNmKXqTwPxg,422
|
|
282
|
+
unstructured_ingest/v2/pipeline/interfaces.py,sha256=Z50-6XFZNajfmJbLKunLxw3RuYMzCYiUp6F0jhQwERE,6441
|
|
283
|
+
unstructured_ingest/v2/pipeline/pipeline.py,sha256=mtCuhku4LoKbdSMmhuyefGiQqMS9G9QsOA_C_lLJE3g,14144
|
|
311
284
|
unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
312
|
-
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=
|
|
313
|
-
unstructured_ingest/v2/pipeline/steps/download.py,sha256=
|
|
314
|
-
unstructured_ingest/v2/pipeline/steps/embed.py,sha256=
|
|
315
|
-
unstructured_ingest/v2/pipeline/steps/
|
|
316
|
-
unstructured_ingest/v2/pipeline/steps/
|
|
317
|
-
unstructured_ingest/v2/pipeline/steps/
|
|
318
|
-
unstructured_ingest/v2/pipeline/steps/
|
|
319
|
-
unstructured_ingest/v2/pipeline/steps/
|
|
320
|
-
unstructured_ingest/v2/
|
|
321
|
-
unstructured_ingest/v2/processes/
|
|
322
|
-
unstructured_ingest/v2/processes/
|
|
323
|
-
unstructured_ingest/v2/processes/
|
|
324
|
-
unstructured_ingest/v2/processes/
|
|
325
|
-
unstructured_ingest/v2/processes/
|
|
326
|
-
unstructured_ingest/v2/processes/
|
|
327
|
-
unstructured_ingest/v2/processes/
|
|
328
|
-
unstructured_ingest/v2/processes/connectors/
|
|
329
|
-
unstructured_ingest/v2/processes/connectors/
|
|
330
|
-
unstructured_ingest/v2/processes/connectors/
|
|
331
|
-
unstructured_ingest/v2/processes/connectors/
|
|
332
|
-
unstructured_ingest/v2/processes/connectors/
|
|
333
|
-
unstructured_ingest/v2/processes/connectors/
|
|
334
|
-
unstructured_ingest/v2/processes/connectors/
|
|
335
|
-
unstructured_ingest/v2/processes/connectors/
|
|
336
|
-
unstructured_ingest/v2/processes/connectors/
|
|
337
|
-
unstructured_ingest/v2/processes/connectors/
|
|
338
|
-
unstructured_ingest/v2/processes/connectors/
|
|
339
|
-
unstructured_ingest/v2/processes/connectors/
|
|
340
|
-
unstructured_ingest/v2/processes/connectors/
|
|
341
|
-
unstructured_ingest/v2/processes/connectors/
|
|
342
|
-
unstructured_ingest/v2/processes/connectors/
|
|
285
|
+
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=I5JQn9rVJu9zEnsAZsJzDnxuASp2hdkF8ZRW4dOtgb0,3124
|
|
286
|
+
unstructured_ingest/v2/pipeline/steps/download.py,sha256=IwysS1_YZRuraIifBW94LWhPNDYU-oaeFkEbgPNpBag,7690
|
|
287
|
+
unstructured_ingest/v2/pipeline/steps/embed.py,sha256=5wONbMvT_hZRZtHPgquok1ryC66dajCU5iifVfIaP9Y,3102
|
|
288
|
+
unstructured_ingest/v2/pipeline/steps/filter.py,sha256=1HM6aBZ5YI0wHQjMXx4KISsiueRlLXVn0mYyiXLMgy4,1188
|
|
289
|
+
unstructured_ingest/v2/pipeline/steps/index.py,sha256=oyYFtDgWg-CJoBM75YTln1t6DjGoLooz2stEeiaB08Q,2537
|
|
290
|
+
unstructured_ingest/v2/pipeline/steps/partition.py,sha256=2NuXpDQ9brf7D4vPhbalCGpjw80XRGYZAAO-Ist1yKs,3182
|
|
291
|
+
unstructured_ingest/v2/pipeline/steps/stage.py,sha256=6gAPzp46DrsOtL914hqgATRDCMvBRI7VtvlsFuMWc4I,2211
|
|
292
|
+
unstructured_ingest/v2/pipeline/steps/uncompress.py,sha256=fEjHPdXnMKpmN5vhqbrNOrVsQGKcoKxRSKE5PrViE9I,2389
|
|
293
|
+
unstructured_ingest/v2/pipeline/steps/upload.py,sha256=5Y6oZV5IyfOvXr8RPYjEfy8_yp4XGatoY0tsznlH_wA,2278
|
|
294
|
+
unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
|
|
295
|
+
unstructured_ingest/v2/processes/chunker.py,sha256=4fAMoFEC4kFkdLsJrpkdtQpexX3WlC1BDR7PQ9eICo0,6524
|
|
296
|
+
unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
|
|
297
|
+
unstructured_ingest/v2/processes/embedder.py,sha256=1AYF0o41tYtQv-ArGCc1PKGnlmNFDiFmhhpgEuG2d4I,5939
|
|
298
|
+
unstructured_ingest/v2/processes/filter.py,sha256=eiAxdYiX8wd4vmD4J40x5t5wwJNmoGa5z33Z9Q-knK8,2145
|
|
299
|
+
unstructured_ingest/v2/processes/partitioner.py,sha256=4lOwHiuCs2ZdQ0a3Eu7ila-UeDYmaQooryM3CBCOnJg,9575
|
|
300
|
+
unstructured_ingest/v2/processes/uncompress.py,sha256=LL68WLq4EfZGORvv5vaQx03EXfiA7k62sbzElPEN8AM,1557
|
|
301
|
+
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=WabtoGs3D8q1Uie7GpYrMycz-mN3hZ0XgADnGTUzg1g,4498
|
|
302
|
+
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=QdcMAGSnpoJI5rk3btOkcSyldtb16_985bm7QiQSr5I,5930
|
|
303
|
+
unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py,sha256=Tx2ux-w7CgEZ4bn0r0dtAUzAy4HB9PvpQRoh52oU0ac,8603
|
|
304
|
+
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=a5Ebd5Rj7OF0bTpj88aIRh6HsKJJYFDhVe-7RQEuzvs,7916
|
|
305
|
+
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=1GzddUJlXOimu-hCXYpG3buABA5QOE1l8Lv8DCiIfM8,5372
|
|
306
|
+
unstructured_ingest/v2/processes/connectors/databricks_volumes.py,sha256=kn5xBdcB2UfnuYxNj0URvBD4WUNIUn0Roi_9mpYOMII,6120
|
|
307
|
+
unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=9ZNNiX69-5beVIQrrYB5uU8x9F4KAogZsPiRshhEuso,16898
|
|
308
|
+
unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=zRhhBCDFF4QzBpT2Ij1uXd5jdKTc_JR9WwfSLV9ynQc,12890
|
|
309
|
+
unstructured_ingest/v2/processes/connectors/local.py,sha256=6aa0azh0L6Xvq9D0UCvVjtcodYy4fqO7tnuvNTIILfs,6974
|
|
310
|
+
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=J3Zjg7dqhqQt35AYWiUt812bzfwHa9hVapWiWxswhHQ,7265
|
|
311
|
+
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=3FavmgKjtQADSyuH3EMIkfUgmRjIQfc0wVDlvLpd7Hs,5098
|
|
312
|
+
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=-B8ELr0rnspzrTy6HBvgbvuiF1eEKRQyCT1ocwmET5Q,9145
|
|
313
|
+
unstructured_ingest/v2/processes/connectors/opensearch.py,sha256=dEj4yYCgdhCD6376kuhKH7NVO5v-vsrN8dbULjEXfss,6811
|
|
314
|
+
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=HpryZEv-lokJrvkH8HnLfu8stnKsobbgXalH6y7mrqg,6257
|
|
315
|
+
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
|
|
316
|
+
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=_ZTjtFNcKcJ0z4cvEZml18TdOMm-Kbwlz8nxTTjp9nc,19500
|
|
317
|
+
unstructured_ingest/v2/processes/connectors/singlestore.py,sha256=Pkax_54XGhbp-BeC_jsLxRHEJlJeYMYsH5r0H1tVi8w,5526
|
|
318
|
+
unstructured_ingest/v2/processes/connectors/sql.py,sha256=UU4EoDMy--QyHnY-Bw96QXcGFmnQAyHO_QhkdjYZi30,9386
|
|
343
319
|
unstructured_ingest/v2/processes/connectors/utils.py,sha256=nmpZZCeX0O7rGrwHSWM_heBgpZK9tKT6EV1Moer-z40,576
|
|
344
|
-
unstructured_ingest/v2/processes/connectors/weaviate.py,sha256=
|
|
320
|
+
unstructured_ingest/v2/processes/connectors/weaviate.py,sha256=5tQbo0jfqhl61Uc6DWgLo4sb5tFL_4BO00H0tq0WoqU,9207
|
|
345
321
|
unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
|
|
346
|
-
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=
|
|
347
|
-
unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=
|
|
348
|
-
unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=
|
|
349
|
-
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=
|
|
350
|
-
unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=
|
|
351
|
-
unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=
|
|
352
|
-
unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=
|
|
322
|
+
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=ovlU5s_s5vfjRqECXOlOTNONlaZ-hWqcjWJ2eZv8iIY,6214
|
|
323
|
+
unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=MerISu_HxJ3n4-4VVnz9tBAbkCbhQmt0k_iHSSZvZYI,4435
|
|
324
|
+
unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=_I_GXnjrzrfJO0zoXgaLQX3foIvXYl_-Cm3yfHOCkUs,4775
|
|
325
|
+
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=Nn9rOkQKH9aq575R-h-l9xZCFsLfrYiry8L5xbHP_DQ,12556
|
|
326
|
+
unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=HoExrmUHigyWRBgEMneKigjcROc-GH_Jp2aUyWkHBH0,6006
|
|
327
|
+
unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=tKxH6h80i7BDzEoMsWZZBLwJ1wpJ-WPxUwT2ezWRVLI,6394
|
|
328
|
+
unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=dpLFv5vGtw8aiLDgrf6E7elg8Pm2dKF0MQnnl84rIqM,6056
|
|
353
329
|
unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
|
|
354
|
-
unstructured_ingest-0.0.
|
|
355
|
-
unstructured_ingest-0.0.
|
|
356
|
-
unstructured_ingest-0.0.
|
|
357
|
-
unstructured_ingest-0.0.
|
|
358
|
-
unstructured_ingest-0.0.
|
|
330
|
+
unstructured_ingest-0.0.4.dist-info/METADATA,sha256=1t52Hehl3ERL62zPh-vHdTlOOnm158Hav3kn7kMhs5s,24632
|
|
331
|
+
unstructured_ingest-0.0.4.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
332
|
+
unstructured_ingest-0.0.4.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
333
|
+
unstructured_ingest-0.0.4.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
|
|
334
|
+
unstructured_ingest-0.0.4.dist-info/RECORD,,
|
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
from collections import Counter
|
|
2
|
-
|
|
3
|
-
import click
|
|
4
|
-
|
|
5
|
-
from .astra import astra_dest_cmd
|
|
6
|
-
from .azure_cognitive_search import azure_cognitive_search_dest_cmd
|
|
7
|
-
from .chroma import chroma_dest_cmd
|
|
8
|
-
from .databricks_volumes import databricks_volumes_dest_cmd
|
|
9
|
-
from .elasticsearch import elasticsearch_dest_cmd, elasticsearch_src_cmd
|
|
10
|
-
from .fsspec.azure import azure_dest_cmd, azure_src_cmd
|
|
11
|
-
from .fsspec.box import box_dest_cmd, box_src_cmd
|
|
12
|
-
from .fsspec.dropbox import dropbox_dest_cmd, dropbox_src_cmd
|
|
13
|
-
from .fsspec.gcs import gcs_dest_cmd, gcs_src_cmd
|
|
14
|
-
from .fsspec.s3 import s3_dest_cmd, s3_src_cmd
|
|
15
|
-
from .fsspec.sftp import sftp_dest_cmd, sftp_src_cmd
|
|
16
|
-
from .google_drive import google_drive_src_cmd
|
|
17
|
-
from .local import local_dest_cmd, local_src_cmd
|
|
18
|
-
from .milvus import milvus_dest_cmd
|
|
19
|
-
from .mongodb import mongodb_dest_cmd
|
|
20
|
-
from .onedrive import onedrive_drive_src_cmd
|
|
21
|
-
from .opensearch import opensearch_dest_cmd, opensearch_src_cmd
|
|
22
|
-
from .pinecone import pinecone_dest_cmd
|
|
23
|
-
from .salesforce import salesforce_src_cmd
|
|
24
|
-
from .sharepoint import sharepoint_drive_src_cmd
|
|
25
|
-
from .singlestore import singlestore_dest_cmd
|
|
26
|
-
from .sql import sql_dest_cmd
|
|
27
|
-
from .weaviate import weaviate_dest_cmd
|
|
28
|
-
|
|
29
|
-
src_cmds = [
|
|
30
|
-
azure_src_cmd,
|
|
31
|
-
box_src_cmd,
|
|
32
|
-
dropbox_src_cmd,
|
|
33
|
-
elasticsearch_src_cmd,
|
|
34
|
-
gcs_src_cmd,
|
|
35
|
-
google_drive_src_cmd,
|
|
36
|
-
local_src_cmd,
|
|
37
|
-
onedrive_drive_src_cmd,
|
|
38
|
-
opensearch_src_cmd,
|
|
39
|
-
s3_src_cmd,
|
|
40
|
-
salesforce_src_cmd,
|
|
41
|
-
sharepoint_drive_src_cmd,
|
|
42
|
-
sftp_src_cmd,
|
|
43
|
-
]
|
|
44
|
-
duplicate_src_names = [
|
|
45
|
-
name for name, count in Counter([s.cmd_name for s in src_cmds]).items() if count > 1
|
|
46
|
-
]
|
|
47
|
-
if duplicate_src_names:
|
|
48
|
-
raise ValueError(
|
|
49
|
-
"the following source cmd names were reused, all must be unique: {}".format(
|
|
50
|
-
", ".join(duplicate_src_names)
|
|
51
|
-
)
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
dest_cmds = [
|
|
55
|
-
astra_dest_cmd,
|
|
56
|
-
azure_cognitive_search_dest_cmd,
|
|
57
|
-
azure_dest_cmd,
|
|
58
|
-
box_dest_cmd,
|
|
59
|
-
chroma_dest_cmd,
|
|
60
|
-
dropbox_dest_cmd,
|
|
61
|
-
elasticsearch_dest_cmd,
|
|
62
|
-
gcs_dest_cmd,
|
|
63
|
-
local_dest_cmd,
|
|
64
|
-
milvus_dest_cmd,
|
|
65
|
-
opensearch_dest_cmd,
|
|
66
|
-
pinecone_dest_cmd,
|
|
67
|
-
s3_dest_cmd,
|
|
68
|
-
sftp_dest_cmd,
|
|
69
|
-
singlestore_dest_cmd,
|
|
70
|
-
weaviate_dest_cmd,
|
|
71
|
-
mongodb_dest_cmd,
|
|
72
|
-
databricks_volumes_dest_cmd,
|
|
73
|
-
sql_dest_cmd,
|
|
74
|
-
]
|
|
75
|
-
|
|
76
|
-
duplicate_dest_names = [
|
|
77
|
-
name for name, count in Counter([d.cmd_name for d in dest_cmds]).items() if count > 1
|
|
78
|
-
]
|
|
79
|
-
if duplicate_dest_names:
|
|
80
|
-
raise ValueError(
|
|
81
|
-
"the following dest cmd names were reused, all must be unique: {}".format(
|
|
82
|
-
", ".join(duplicate_dest_names)
|
|
83
|
-
)
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
src: list[click.Group] = [v.get_cmd() for v in src_cmds]
|
|
88
|
-
|
|
89
|
-
dest: list[click.Command] = [v.get_cmd() for v in dest_cmds]
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
|
|
3
|
-
import click
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.v2.cli.base import DestCmd
|
|
6
|
-
from unstructured_ingest.v2.cli.interfaces import CliConfig
|
|
7
|
-
from unstructured_ingest.v2.cli.utils import Dict
|
|
8
|
-
from unstructured_ingest.v2.processes.connectors.astra import CONNECTOR_TYPE
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@dataclass
|
|
12
|
-
class AstraCliConnectionConfig(CliConfig):
|
|
13
|
-
@staticmethod
|
|
14
|
-
def get_cli_options() -> list[click.Option]:
|
|
15
|
-
options = [
|
|
16
|
-
click.Option(
|
|
17
|
-
["--token"],
|
|
18
|
-
required=True,
|
|
19
|
-
type=str,
|
|
20
|
-
help="Astra DB Token with access to the database.",
|
|
21
|
-
envvar="ASTRA_DB_TOKEN",
|
|
22
|
-
show_envvar=True,
|
|
23
|
-
),
|
|
24
|
-
click.Option(
|
|
25
|
-
["--api-endpoint"],
|
|
26
|
-
required=True,
|
|
27
|
-
type=str,
|
|
28
|
-
help="The API endpoint for the Astra DB.",
|
|
29
|
-
envvar="ASTRA_DB_ENDPOINT",
|
|
30
|
-
show_envvar=True,
|
|
31
|
-
),
|
|
32
|
-
]
|
|
33
|
-
return options
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
@dataclass
|
|
37
|
-
class AstraCliUploaderConfig(CliConfig):
|
|
38
|
-
@staticmethod
|
|
39
|
-
def get_cli_options() -> list[click.Option]:
|
|
40
|
-
options = [
|
|
41
|
-
click.Option(
|
|
42
|
-
["--collection-name"],
|
|
43
|
-
required=False,
|
|
44
|
-
type=str,
|
|
45
|
-
help="The name of the Astra DB collection. "
|
|
46
|
-
"Note that the collection name must only include letters, "
|
|
47
|
-
"numbers, and underscores.",
|
|
48
|
-
),
|
|
49
|
-
click.Option(
|
|
50
|
-
["--embedding-dimension"],
|
|
51
|
-
required=True,
|
|
52
|
-
default=384,
|
|
53
|
-
type=int,
|
|
54
|
-
help="The dimensionality of the embeddings",
|
|
55
|
-
),
|
|
56
|
-
click.Option(
|
|
57
|
-
["--namespace"],
|
|
58
|
-
required=False,
|
|
59
|
-
default=None,
|
|
60
|
-
type=str,
|
|
61
|
-
help="The Astra DB connection namespace.",
|
|
62
|
-
),
|
|
63
|
-
click.Option(
|
|
64
|
-
["--requested-indexing-policy"],
|
|
65
|
-
required=False,
|
|
66
|
-
default=None,
|
|
67
|
-
type=Dict(),
|
|
68
|
-
help="The indexing policy to use for the collection."
|
|
69
|
-
'example: \'{"deny": ["metadata"]}\' ',
|
|
70
|
-
),
|
|
71
|
-
click.Option(
|
|
72
|
-
["--batch-size"],
|
|
73
|
-
default=20,
|
|
74
|
-
type=int,
|
|
75
|
-
help="Number of records per batch",
|
|
76
|
-
),
|
|
77
|
-
]
|
|
78
|
-
return options
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
astra_dest_cmd = DestCmd(
|
|
82
|
-
cmd_name=CONNECTOR_TYPE,
|
|
83
|
-
connection_config=AstraCliConnectionConfig,
|
|
84
|
-
uploader_config=AstraCliUploaderConfig,
|
|
85
|
-
)
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
|
|
3
|
-
import click
|
|
4
|
-
|
|
5
|
-
from unstructured_ingest.v2.cli.base import DestCmd
|
|
6
|
-
from unstructured_ingest.v2.cli.interfaces import CliConfig
|
|
7
|
-
from unstructured_ingest.v2.processes.connectors.azure_cognitive_search import CONNECTOR_TYPE
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
@dataclass
|
|
11
|
-
class AzureCognitiveSearchCliConnectionConfig(CliConfig):
|
|
12
|
-
@staticmethod
|
|
13
|
-
def get_cli_options() -> list[click.Option]:
|
|
14
|
-
options = [
|
|
15
|
-
click.Option(
|
|
16
|
-
["--index"],
|
|
17
|
-
required=True,
|
|
18
|
-
type=str,
|
|
19
|
-
help="The name of the Azure AI (Cognitive) Search index to connect to.",
|
|
20
|
-
envvar="AZURE_SEARCH_INDEX",
|
|
21
|
-
show_envvar=True,
|
|
22
|
-
),
|
|
23
|
-
click.Option(
|
|
24
|
-
["--endpoint"],
|
|
25
|
-
required=True,
|
|
26
|
-
type=str,
|
|
27
|
-
help="The URL endpoint of an Azure AI (Cognitive) search service."
|
|
28
|
-
"In the form of https://{{service_name}}.search.windows.net",
|
|
29
|
-
envvar="AZURE_SEARCH_ENDPOINT",
|
|
30
|
-
show_envvar=True,
|
|
31
|
-
),
|
|
32
|
-
click.Option(
|
|
33
|
-
["--key"],
|
|
34
|
-
required=True,
|
|
35
|
-
type=str,
|
|
36
|
-
help="Credential that is used for authenticating to an Azure service."
|
|
37
|
-
"(is an AzureKeyCredential)",
|
|
38
|
-
envvar="AZURE_SEARCH_API_KEY",
|
|
39
|
-
show_envvar=True,
|
|
40
|
-
),
|
|
41
|
-
]
|
|
42
|
-
return options
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
@dataclass
|
|
46
|
-
class AzureCognitiveSearchCliUploaderConfig(CliConfig):
|
|
47
|
-
@staticmethod
|
|
48
|
-
def get_cli_options() -> list[click.Option]:
|
|
49
|
-
options = [
|
|
50
|
-
click.Option(
|
|
51
|
-
["--batch-size"],
|
|
52
|
-
default=100,
|
|
53
|
-
type=int,
|
|
54
|
-
help="Number of records per batch",
|
|
55
|
-
),
|
|
56
|
-
]
|
|
57
|
-
return options
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
@dataclass
|
|
61
|
-
class AzureCognitiveSearchCliUploadStagerConfig(CliConfig):
|
|
62
|
-
@staticmethod
|
|
63
|
-
def get_cli_options() -> list[click.Option]:
|
|
64
|
-
return []
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
azure_cognitive_search_dest_cmd = DestCmd(
|
|
68
|
-
cmd_name=CONNECTOR_TYPE,
|
|
69
|
-
connection_config=AzureCognitiveSearchCliConnectionConfig,
|
|
70
|
-
uploader_config=AzureCognitiveSearchCliUploaderConfig,
|
|
71
|
-
upload_stager_config=AzureCognitiveSearchCliUploadStagerConfig,
|
|
72
|
-
)
|