vastdb 0.0.5.3__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vast_flatbuf/tabular/GetTableStatsResponse.py +45 -1
- vast_flatbuf/tabular/VipRange.py +56 -0
- vastdb/__init__.py +7 -0
- vastdb/bench/test_perf.py +29 -0
- vastdb/bucket.py +85 -0
- vastdb/{tests/conftest.py → conftest.py} +29 -14
- vastdb/errors.py +175 -0
- vastdb/{api.py → internal_commands.py} +373 -875
- vastdb/schema.py +85 -0
- vastdb/session.py +47 -0
- vastdb/table.py +483 -0
- vastdb/tests/test_imports.py +123 -0
- vastdb/tests/test_nested.py +28 -0
- vastdb/tests/test_projections.py +42 -0
- vastdb/tests/test_sanity.py +34 -15
- vastdb/tests/test_schemas.py +30 -6
- vastdb/tests/test_tables.py +628 -13
- vastdb/tests/util.py +18 -0
- vastdb/transaction.py +54 -0
- vastdb/util.py +11 -10
- vastdb-0.1.1.dist-info/METADATA +38 -0
- {vastdb-0.0.5.3.dist-info → vastdb-0.1.1.dist-info}/RECORD +26 -31
- vast_protobuf/substrait/__init__.py +0 -0
- vast_protobuf/substrait/algebra_pb2.py +0 -1344
- vast_protobuf/substrait/capabilities_pb2.py +0 -46
- vast_protobuf/substrait/ddl_pb2.py +0 -57
- vast_protobuf/substrait/extended_expression_pb2.py +0 -49
- vast_protobuf/substrait/extensions/__init__.py +0 -0
- vast_protobuf/substrait/extensions/extensions_pb2.py +0 -89
- vast_protobuf/substrait/function_pb2.py +0 -168
- vast_protobuf/substrait/parameterized_types_pb2.py +0 -181
- vast_protobuf/substrait/plan_pb2.py +0 -67
- vast_protobuf/substrait/type_expressions_pb2.py +0 -198
- vast_protobuf/substrait/type_pb2.py +0 -350
- vast_protobuf/tabular/__init__.py +0 -0
- vast_protobuf/tabular/rpc_pb2.py +0 -344
- vastdb/bench_scan.py +0 -45
- vastdb/tests/test_create_table_from_parquets.py +0 -50
- vastdb/v2.py +0 -360
- vastdb-0.0.5.3.dist-info/METADATA +0 -47
- {vast_protobuf → vastdb/bench}/__init__.py +0 -0
- {vastdb-0.0.5.3.dist-info → vastdb-0.1.1.dist-info}/LICENSE +0 -0
- {vastdb-0.0.5.3.dist-info → vastdb-0.1.1.dist-info}/WHEEL +0 -0
- {vastdb-0.0.5.3.dist-info → vastdb-0.1.1.dist-info}/top_level.txt +0 -0
vast_protobuf/tabular/rpc_pb2.py
DELETED
|
@@ -1,344 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
-
# source: vast_protobuf/tabular/rpc.proto
|
|
4
|
-
"""Generated protocol buffer code."""
|
|
5
|
-
from google.protobuf import descriptor as _descriptor
|
|
6
|
-
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
7
|
-
from google.protobuf import message as _message
|
|
8
|
-
from google.protobuf import reflection as _reflection
|
|
9
|
-
from google.protobuf import symbol_database as _symbol_database
|
|
10
|
-
# @@protoc_insertion_point(imports)
|
|
11
|
-
|
|
12
|
-
_sym_db = _symbol_database.Default()
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
from vast_protobuf.substrait import algebra_pb2 as vast__protobuf_dot_substrait_dot_algebra__pb2
|
|
16
|
-
from vast_protobuf.substrait import type_pb2 as vast__protobuf_dot_substrait_dot_type__pb2
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1fvast_protobuf/tabular/rpc.proto\x12\tProtoVast\x1a%vast_protobuf/substrait/algebra.proto\x1a\"vast_protobuf/substrait/type.proto\"+\n\x0b\x43ontentInfo\x12\x0e\n\x06offset\x18\x01 \x01(\x04\x12\x0c\n\x04size\x18\x02 \x01(\x04\"x\n\x08SubSplit\x12\n\n\x02id\x18\x01 \x01(\r\x12-\n\x05state\x18\x02 \x01(\x0b\x32\x19.ProtoVast.SubSplit.StateH\x00\x88\x01\x01\x1a\'\n\x05State\x12\x13\n\x06\x62uffer\x18\x01 \x01(\x0cH\x00\x88\x01\x01\x42\t\n\x07_bufferB\x08\n\x06_state\"s\n\x05Split\x12\n\n\x02id\x18\x01 \x01(\r\x12\'\n\x06\x63onfig\x18\x02 \x01(\x0b\x32\x17.ProtoVast.Split.Config\x1a\x35\n\x06\x43onfig\x12\r\n\x05total\x18\x01 \x01(\r\x12\x1c\n\x14row_groups_per_split\x18\x02 \x01(\r\"?\n\x12SubSplitCollection\x12)\n\x06states\x18\x01 \x03(\x0b\x32\x19.ProtoVast.SubSplit.State\".\n\x06RowIds\x12$\n\x04info\x18\x01 \x01(\x0b\x32\x16.ProtoVast.ContentInfo\"8\n\x06Status\x12\x0c\n\x04\x63ode\x18\x01 \x01(\x05\x12\x14\n\x07message\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\n\n\x08_message\"\xef\x01\n\x13SelectRowIdsRequest\x12\x17\n\nlimit_rows\x18\x01 \x01(\rH\x01\x88\x01\x01\x12\x1f\n\x05split\x18\x02 \x01(\x0b\x32\x10.ProtoVast.Split\x12\x30\n\tsubsplits\x18\x03 \x01(\x0b\x32\x1d.ProtoVast.SubSplitCollection\x12\"\n\x08relation\x18\x04 \x01(\x0b\x32\x0e.substrait.RelH\x00\x12\x31\n\x0flegacy_relation\x18\x05 \x01(\x0b\x32\x16.ProtoVast.ContentInfoH\x00\x42\x06\n\x04typeB\r\n\x0b_limit_rows\"\xaa\x06\n\x1aSelectRowIdsResponsePacket\x12\x46\n\x04\x62ody\x18\x01 \x01(\x0b\x32\x36.ProtoVast.SelectRowIdsResponsePacket.SubsplitResponseH\x00\x12\x42\n\x08trailing\x18\x02 \x01(\x0b\x32..ProtoVast.SelectRowIdsResponsePacket.TrailingH\x00\x1ag\n\x10SortedProjection\x12\x19\n\x11projected_columns\x18\x01 \x03(\r\x12\"\n\x07row_ids\x18\x02 \x01(\x0b\x32\x11.ProtoVast.RowIds\x12\x14\n\x0ctable_handle\x18\x03 \x01(\x04\x1a\xc8\x01\n\x08Trailing\x12!\n\x06status\x18\x01 \x01(\x0b\x32\x11.ProtoVast.Status\x12\x1b\n\x13\x66inished_pagination\x18\x02 \x01(\x08\x12L\n\x07metrics\x18\x04 \x03(\x0b\x32;.ProtoVast.SelectRowIdsResponsePacket.Trailing.MetricsEntry\x1a.\n\x0cMetricsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x03:\x02\x38\x01\x1a\xc3\x02\n\x10SubsplitResponse\x12%\n\x08subsplit\x18\x01 \x01(\x0b\x32\x13.ProtoVast.SubSplit\x12\'\n\x07row_ids\x18\x02 \x01(\x0b\x32\x11.ProtoVast.RowIdsH\x00\x88\x01\x01\x12M\n\rprojected_ids\x18\x03 \x03(\x0b\x32\x36.ProtoVast.SelectRowIdsResponsePacket.SortedProjection\x12T\n\x07metrics\x18\x04 \x03(\x0b\x32\x43.ProtoVast.SelectRowIdsResponsePacket.SubsplitResponse.MetricsEntry\x1a.\n\x0cMetricsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x03:\x02\x38\x01\x42\n\n\x08_row_idsB\x06\n\x04type\"\x9a\x02\n\x12ReadColumnsRequest\x12\x15\n\rnum_subsplits\x18\x01 \x01(\r\x12-\n\rcolumn_schema\x18\x02 \x01(\x0b\x32\x16.substrait.NamedStruct\x12;\n\x0erow_ids_blocks\x18\x03 \x03(\x0b\x32#.ProtoVast.ReadColumnsRequest.Block\x12 \n\x18projection_table_handles\x18\x04 \x03(\x04\x1a_\n\x05\x42lock\x12\"\n\x07row_ids\x18\x01 \x01(\x0b\x32\x11.ProtoVast.RowIds\x12\x1d\n\x10projection_index\x18\x02 \x01(\rH\x00\x88\x01\x01\x42\x13\n\x11_projection_index\"\xeb\x04\n\x19ReadColumnsResponsePacket\x12\x45\n\x04\x62ody\x18\x01 \x01(\x0b\x32\x35.ProtoVast.ReadColumnsResponsePacket.SubsplitResponseH\x00\x12\x41\n\x08trailing\x18\x02 \x01(\x0b\x32-.ProtoVast.ReadColumnsResponsePacket.TrailingH\x00\x1a\xaa\x01\n\x08Trailing\x12!\n\x06status\x18\x01 \x01(\x0b\x32\x11.ProtoVast.Status\x12K\n\x07metrics\x18\x02 \x03(\x0b\x32:.ProtoVast.ReadColumnsResponsePacket.Trailing.MetricsEntry\x1a.\n\x0cMetricsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x03:\x02\x38\x01\x1a\x8e\x02\n\x10SubsplitResponse\x12\x13\n\x0bsubsplit_id\x18\x01 \x01(\r\x12\x18\n\x10start_row_offset\x18\x02 \x01(\x04\x12\x33\n\x0e\x61rrow_ipc_info\x18\x03 \x01(\x0b\x32\x16.ProtoVast.ContentInfoH\x00\x88\x01\x01\x12S\n\x07metrics\x18\x04 \x03(\x0b\x32\x42.ProtoVast.ReadColumnsResponsePacket.SubsplitResponse.MetricsEntry\x1a.\n\x0cMetricsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x03:\x02\x38\x01\x42\x11\n\x0f_arrow_ipc_infoB\x06\n\x04type\"\xec\x01\n\x10\x43ountRowsRequest\x12\x17\n\nlimit_rows\x18\x01 \x01(\rH\x01\x88\x01\x01\x12\x1f\n\x05split\x18\x02 \x01(\x0b\x32\x10.ProtoVast.Split\x12\x30\n\tsubsplits\x18\x03 \x01(\x0b\x32\x1d.ProtoVast.SubSplitCollection\x12\"\n\x08relation\x18\x04 \x01(\x0b\x32\x0e.substrait.RelH\x00\x12\x31\n\x0flegacy_relation\x18\x05 \x01(\x0b\x32\x16.ProtoVast.ContentInfoH\x00\x42\x06\n\x04typeB\r\n\x0b_limit_rows\"\xc6\x04\n\x17\x43ountRowsResponsePacket\x12\x43\n\x04\x62ody\x18\x01 \x01(\x0b\x32\x33.ProtoVast.CountRowsResponsePacket.SubsplitResponseH\x00\x12?\n\x08trailing\x18\x02 \x01(\x0b\x32+.ProtoVast.CountRowsResponsePacket.TrailingH\x00\x1a\xc5\x01\n\x08Trailing\x12!\n\x06status\x18\x01 \x01(\x0b\x32\x11.ProtoVast.Status\x12\x1b\n\x13\x66inished_pagination\x18\x02 \x01(\x08\x12I\n\x07metrics\x18\x03 \x03(\x0b\x32\x38.ProtoVast.CountRowsResponsePacket.Trailing.MetricsEntry\x1a.\n\x0cMetricsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x03:\x02\x38\x01\x1a\xd4\x01\n\x10SubsplitResponse\x12%\n\x08subsplit\x18\x01 \x01(\x0b\x32\x13.ProtoVast.SubSplit\x12\x16\n\x0e\x61mount_of_rows\x18\x02 \x01(\x04\x12Q\n\x07metrics\x18\x03 \x03(\x0b\x32@.ProtoVast.CountRowsResponsePacket.SubsplitResponse.MetricsEntry\x1a.\n\x0cMetricsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x03:\x02\x38\x01\x42\x06\n\x04type\"\xcf\x03\n\x03Rpc\x12\x14\n\x0c\x63ontent_size\x18\x01 \x01(\x04\x12@\n\x16select_row_ids_request\x18\x02 \x01(\x0b\x32\x1e.ProtoVast.SelectRowIdsRequestH\x00\x12O\n\x1eselect_row_ids_response_packet\x18\x03 \x01(\x0b\x32%.ProtoVast.SelectRowIdsResponsePacketH\x00\x12=\n\x14read_columns_request\x18\x04 \x01(\x0b\x32\x1d.ProtoVast.ReadColumnsRequestH\x00\x12L\n\x1cread_columns_response_packet\x18\x05 \x01(\x0b\x32$.ProtoVast.ReadColumnsResponsePacketH\x00\x12\x39\n\x12\x63ount_rows_request\x18\x06 \x01(\x0b\x32\x1b.ProtoVast.CountRowsRequestH\x00\x12H\n\x1a\x63ount_rows_response_packet\x18\x07 \x01(\x0b\x32\".ProtoVast.CountRowsResponsePacketH\x00\x42\r\n\x0brpc_messageb\x06proto3')
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
_CONTENTINFO = DESCRIPTOR.message_types_by_name['ContentInfo']
|
|
24
|
-
_SUBSPLIT = DESCRIPTOR.message_types_by_name['SubSplit']
|
|
25
|
-
_SUBSPLIT_STATE = _SUBSPLIT.nested_types_by_name['State']
|
|
26
|
-
_SPLIT = DESCRIPTOR.message_types_by_name['Split']
|
|
27
|
-
_SPLIT_CONFIG = _SPLIT.nested_types_by_name['Config']
|
|
28
|
-
_SUBSPLITCOLLECTION = DESCRIPTOR.message_types_by_name['SubSplitCollection']
|
|
29
|
-
_ROWIDS = DESCRIPTOR.message_types_by_name['RowIds']
|
|
30
|
-
_STATUS = DESCRIPTOR.message_types_by_name['Status']
|
|
31
|
-
_SELECTROWIDSREQUEST = DESCRIPTOR.message_types_by_name['SelectRowIdsRequest']
|
|
32
|
-
_SELECTROWIDSRESPONSEPACKET = DESCRIPTOR.message_types_by_name['SelectRowIdsResponsePacket']
|
|
33
|
-
_SELECTROWIDSRESPONSEPACKET_SORTEDPROJECTION = _SELECTROWIDSRESPONSEPACKET.nested_types_by_name['SortedProjection']
|
|
34
|
-
_SELECTROWIDSRESPONSEPACKET_TRAILING = _SELECTROWIDSRESPONSEPACKET.nested_types_by_name['Trailing']
|
|
35
|
-
_SELECTROWIDSRESPONSEPACKET_TRAILING_METRICSENTRY = _SELECTROWIDSRESPONSEPACKET_TRAILING.nested_types_by_name['MetricsEntry']
|
|
36
|
-
_SELECTROWIDSRESPONSEPACKET_SUBSPLITRESPONSE = _SELECTROWIDSRESPONSEPACKET.nested_types_by_name['SubsplitResponse']
|
|
37
|
-
_SELECTROWIDSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY = _SELECTROWIDSRESPONSEPACKET_SUBSPLITRESPONSE.nested_types_by_name['MetricsEntry']
|
|
38
|
-
_READCOLUMNSREQUEST = DESCRIPTOR.message_types_by_name['ReadColumnsRequest']
|
|
39
|
-
_READCOLUMNSREQUEST_BLOCK = _READCOLUMNSREQUEST.nested_types_by_name['Block']
|
|
40
|
-
_READCOLUMNSRESPONSEPACKET = DESCRIPTOR.message_types_by_name['ReadColumnsResponsePacket']
|
|
41
|
-
_READCOLUMNSRESPONSEPACKET_TRAILING = _READCOLUMNSRESPONSEPACKET.nested_types_by_name['Trailing']
|
|
42
|
-
_READCOLUMNSRESPONSEPACKET_TRAILING_METRICSENTRY = _READCOLUMNSRESPONSEPACKET_TRAILING.nested_types_by_name['MetricsEntry']
|
|
43
|
-
_READCOLUMNSRESPONSEPACKET_SUBSPLITRESPONSE = _READCOLUMNSRESPONSEPACKET.nested_types_by_name['SubsplitResponse']
|
|
44
|
-
_READCOLUMNSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY = _READCOLUMNSRESPONSEPACKET_SUBSPLITRESPONSE.nested_types_by_name['MetricsEntry']
|
|
45
|
-
_COUNTROWSREQUEST = DESCRIPTOR.message_types_by_name['CountRowsRequest']
|
|
46
|
-
_COUNTROWSRESPONSEPACKET = DESCRIPTOR.message_types_by_name['CountRowsResponsePacket']
|
|
47
|
-
_COUNTROWSRESPONSEPACKET_TRAILING = _COUNTROWSRESPONSEPACKET.nested_types_by_name['Trailing']
|
|
48
|
-
_COUNTROWSRESPONSEPACKET_TRAILING_METRICSENTRY = _COUNTROWSRESPONSEPACKET_TRAILING.nested_types_by_name['MetricsEntry']
|
|
49
|
-
_COUNTROWSRESPONSEPACKET_SUBSPLITRESPONSE = _COUNTROWSRESPONSEPACKET.nested_types_by_name['SubsplitResponse']
|
|
50
|
-
_COUNTROWSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY = _COUNTROWSRESPONSEPACKET_SUBSPLITRESPONSE.nested_types_by_name['MetricsEntry']
|
|
51
|
-
_RPC = DESCRIPTOR.message_types_by_name['Rpc']
|
|
52
|
-
ContentInfo = _reflection.GeneratedProtocolMessageType('ContentInfo', (_message.Message,), {
|
|
53
|
-
'DESCRIPTOR' : _CONTENTINFO,
|
|
54
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
55
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.ContentInfo)
|
|
56
|
-
})
|
|
57
|
-
_sym_db.RegisterMessage(ContentInfo)
|
|
58
|
-
|
|
59
|
-
SubSplit = _reflection.GeneratedProtocolMessageType('SubSplit', (_message.Message,), {
|
|
60
|
-
|
|
61
|
-
'State' : _reflection.GeneratedProtocolMessageType('State', (_message.Message,), {
|
|
62
|
-
'DESCRIPTOR' : _SUBSPLIT_STATE,
|
|
63
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
64
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.SubSplit.State)
|
|
65
|
-
})
|
|
66
|
-
,
|
|
67
|
-
'DESCRIPTOR' : _SUBSPLIT,
|
|
68
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
69
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.SubSplit)
|
|
70
|
-
})
|
|
71
|
-
_sym_db.RegisterMessage(SubSplit)
|
|
72
|
-
_sym_db.RegisterMessage(SubSplit.State)
|
|
73
|
-
|
|
74
|
-
Split = _reflection.GeneratedProtocolMessageType('Split', (_message.Message,), {
|
|
75
|
-
|
|
76
|
-
'Config' : _reflection.GeneratedProtocolMessageType('Config', (_message.Message,), {
|
|
77
|
-
'DESCRIPTOR' : _SPLIT_CONFIG,
|
|
78
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
79
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.Split.Config)
|
|
80
|
-
})
|
|
81
|
-
,
|
|
82
|
-
'DESCRIPTOR' : _SPLIT,
|
|
83
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
84
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.Split)
|
|
85
|
-
})
|
|
86
|
-
_sym_db.RegisterMessage(Split)
|
|
87
|
-
_sym_db.RegisterMessage(Split.Config)
|
|
88
|
-
|
|
89
|
-
SubSplitCollection = _reflection.GeneratedProtocolMessageType('SubSplitCollection', (_message.Message,), {
|
|
90
|
-
'DESCRIPTOR' : _SUBSPLITCOLLECTION,
|
|
91
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
92
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.SubSplitCollection)
|
|
93
|
-
})
|
|
94
|
-
_sym_db.RegisterMessage(SubSplitCollection)
|
|
95
|
-
|
|
96
|
-
RowIds = _reflection.GeneratedProtocolMessageType('RowIds', (_message.Message,), {
|
|
97
|
-
'DESCRIPTOR' : _ROWIDS,
|
|
98
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
99
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.RowIds)
|
|
100
|
-
})
|
|
101
|
-
_sym_db.RegisterMessage(RowIds)
|
|
102
|
-
|
|
103
|
-
Status = _reflection.GeneratedProtocolMessageType('Status', (_message.Message,), {
|
|
104
|
-
'DESCRIPTOR' : _STATUS,
|
|
105
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
106
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.Status)
|
|
107
|
-
})
|
|
108
|
-
_sym_db.RegisterMessage(Status)
|
|
109
|
-
|
|
110
|
-
SelectRowIdsRequest = _reflection.GeneratedProtocolMessageType('SelectRowIdsRequest', (_message.Message,), {
|
|
111
|
-
'DESCRIPTOR' : _SELECTROWIDSREQUEST,
|
|
112
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
113
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.SelectRowIdsRequest)
|
|
114
|
-
})
|
|
115
|
-
_sym_db.RegisterMessage(SelectRowIdsRequest)
|
|
116
|
-
|
|
117
|
-
SelectRowIdsResponsePacket = _reflection.GeneratedProtocolMessageType('SelectRowIdsResponsePacket', (_message.Message,), {
|
|
118
|
-
|
|
119
|
-
'SortedProjection' : _reflection.GeneratedProtocolMessageType('SortedProjection', (_message.Message,), {
|
|
120
|
-
'DESCRIPTOR' : _SELECTROWIDSRESPONSEPACKET_SORTEDPROJECTION,
|
|
121
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
122
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.SelectRowIdsResponsePacket.SortedProjection)
|
|
123
|
-
})
|
|
124
|
-
,
|
|
125
|
-
|
|
126
|
-
'Trailing' : _reflection.GeneratedProtocolMessageType('Trailing', (_message.Message,), {
|
|
127
|
-
|
|
128
|
-
'MetricsEntry' : _reflection.GeneratedProtocolMessageType('MetricsEntry', (_message.Message,), {
|
|
129
|
-
'DESCRIPTOR' : _SELECTROWIDSRESPONSEPACKET_TRAILING_METRICSENTRY,
|
|
130
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
131
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.SelectRowIdsResponsePacket.Trailing.MetricsEntry)
|
|
132
|
-
})
|
|
133
|
-
,
|
|
134
|
-
'DESCRIPTOR' : _SELECTROWIDSRESPONSEPACKET_TRAILING,
|
|
135
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
136
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.SelectRowIdsResponsePacket.Trailing)
|
|
137
|
-
})
|
|
138
|
-
,
|
|
139
|
-
|
|
140
|
-
'SubsplitResponse' : _reflection.GeneratedProtocolMessageType('SubsplitResponse', (_message.Message,), {
|
|
141
|
-
|
|
142
|
-
'MetricsEntry' : _reflection.GeneratedProtocolMessageType('MetricsEntry', (_message.Message,), {
|
|
143
|
-
'DESCRIPTOR' : _SELECTROWIDSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY,
|
|
144
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
145
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.SelectRowIdsResponsePacket.SubsplitResponse.MetricsEntry)
|
|
146
|
-
})
|
|
147
|
-
,
|
|
148
|
-
'DESCRIPTOR' : _SELECTROWIDSRESPONSEPACKET_SUBSPLITRESPONSE,
|
|
149
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
150
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.SelectRowIdsResponsePacket.SubsplitResponse)
|
|
151
|
-
})
|
|
152
|
-
,
|
|
153
|
-
'DESCRIPTOR' : _SELECTROWIDSRESPONSEPACKET,
|
|
154
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
155
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.SelectRowIdsResponsePacket)
|
|
156
|
-
})
|
|
157
|
-
_sym_db.RegisterMessage(SelectRowIdsResponsePacket)
|
|
158
|
-
_sym_db.RegisterMessage(SelectRowIdsResponsePacket.SortedProjection)
|
|
159
|
-
_sym_db.RegisterMessage(SelectRowIdsResponsePacket.Trailing)
|
|
160
|
-
_sym_db.RegisterMessage(SelectRowIdsResponsePacket.Trailing.MetricsEntry)
|
|
161
|
-
_sym_db.RegisterMessage(SelectRowIdsResponsePacket.SubsplitResponse)
|
|
162
|
-
_sym_db.RegisterMessage(SelectRowIdsResponsePacket.SubsplitResponse.MetricsEntry)
|
|
163
|
-
|
|
164
|
-
ReadColumnsRequest = _reflection.GeneratedProtocolMessageType('ReadColumnsRequest', (_message.Message,), {
|
|
165
|
-
|
|
166
|
-
'Block' : _reflection.GeneratedProtocolMessageType('Block', (_message.Message,), {
|
|
167
|
-
'DESCRIPTOR' : _READCOLUMNSREQUEST_BLOCK,
|
|
168
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
169
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.ReadColumnsRequest.Block)
|
|
170
|
-
})
|
|
171
|
-
,
|
|
172
|
-
'DESCRIPTOR' : _READCOLUMNSREQUEST,
|
|
173
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
174
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.ReadColumnsRequest)
|
|
175
|
-
})
|
|
176
|
-
_sym_db.RegisterMessage(ReadColumnsRequest)
|
|
177
|
-
_sym_db.RegisterMessage(ReadColumnsRequest.Block)
|
|
178
|
-
|
|
179
|
-
ReadColumnsResponsePacket = _reflection.GeneratedProtocolMessageType('ReadColumnsResponsePacket', (_message.Message,), {
|
|
180
|
-
|
|
181
|
-
'Trailing' : _reflection.GeneratedProtocolMessageType('Trailing', (_message.Message,), {
|
|
182
|
-
|
|
183
|
-
'MetricsEntry' : _reflection.GeneratedProtocolMessageType('MetricsEntry', (_message.Message,), {
|
|
184
|
-
'DESCRIPTOR' : _READCOLUMNSRESPONSEPACKET_TRAILING_METRICSENTRY,
|
|
185
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
186
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.ReadColumnsResponsePacket.Trailing.MetricsEntry)
|
|
187
|
-
})
|
|
188
|
-
,
|
|
189
|
-
'DESCRIPTOR' : _READCOLUMNSRESPONSEPACKET_TRAILING,
|
|
190
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
191
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.ReadColumnsResponsePacket.Trailing)
|
|
192
|
-
})
|
|
193
|
-
,
|
|
194
|
-
|
|
195
|
-
'SubsplitResponse' : _reflection.GeneratedProtocolMessageType('SubsplitResponse', (_message.Message,), {
|
|
196
|
-
|
|
197
|
-
'MetricsEntry' : _reflection.GeneratedProtocolMessageType('MetricsEntry', (_message.Message,), {
|
|
198
|
-
'DESCRIPTOR' : _READCOLUMNSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY,
|
|
199
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
200
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.ReadColumnsResponsePacket.SubsplitResponse.MetricsEntry)
|
|
201
|
-
})
|
|
202
|
-
,
|
|
203
|
-
'DESCRIPTOR' : _READCOLUMNSRESPONSEPACKET_SUBSPLITRESPONSE,
|
|
204
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
205
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.ReadColumnsResponsePacket.SubsplitResponse)
|
|
206
|
-
})
|
|
207
|
-
,
|
|
208
|
-
'DESCRIPTOR' : _READCOLUMNSRESPONSEPACKET,
|
|
209
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
210
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.ReadColumnsResponsePacket)
|
|
211
|
-
})
|
|
212
|
-
_sym_db.RegisterMessage(ReadColumnsResponsePacket)
|
|
213
|
-
_sym_db.RegisterMessage(ReadColumnsResponsePacket.Trailing)
|
|
214
|
-
_sym_db.RegisterMessage(ReadColumnsResponsePacket.Trailing.MetricsEntry)
|
|
215
|
-
_sym_db.RegisterMessage(ReadColumnsResponsePacket.SubsplitResponse)
|
|
216
|
-
_sym_db.RegisterMessage(ReadColumnsResponsePacket.SubsplitResponse.MetricsEntry)
|
|
217
|
-
|
|
218
|
-
CountRowsRequest = _reflection.GeneratedProtocolMessageType('CountRowsRequest', (_message.Message,), {
|
|
219
|
-
'DESCRIPTOR' : _COUNTROWSREQUEST,
|
|
220
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
221
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.CountRowsRequest)
|
|
222
|
-
})
|
|
223
|
-
_sym_db.RegisterMessage(CountRowsRequest)
|
|
224
|
-
|
|
225
|
-
CountRowsResponsePacket = _reflection.GeneratedProtocolMessageType('CountRowsResponsePacket', (_message.Message,), {
|
|
226
|
-
|
|
227
|
-
'Trailing' : _reflection.GeneratedProtocolMessageType('Trailing', (_message.Message,), {
|
|
228
|
-
|
|
229
|
-
'MetricsEntry' : _reflection.GeneratedProtocolMessageType('MetricsEntry', (_message.Message,), {
|
|
230
|
-
'DESCRIPTOR' : _COUNTROWSRESPONSEPACKET_TRAILING_METRICSENTRY,
|
|
231
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
232
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.CountRowsResponsePacket.Trailing.MetricsEntry)
|
|
233
|
-
})
|
|
234
|
-
,
|
|
235
|
-
'DESCRIPTOR' : _COUNTROWSRESPONSEPACKET_TRAILING,
|
|
236
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
237
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.CountRowsResponsePacket.Trailing)
|
|
238
|
-
})
|
|
239
|
-
,
|
|
240
|
-
|
|
241
|
-
'SubsplitResponse' : _reflection.GeneratedProtocolMessageType('SubsplitResponse', (_message.Message,), {
|
|
242
|
-
|
|
243
|
-
'MetricsEntry' : _reflection.GeneratedProtocolMessageType('MetricsEntry', (_message.Message,), {
|
|
244
|
-
'DESCRIPTOR' : _COUNTROWSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY,
|
|
245
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
246
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.CountRowsResponsePacket.SubsplitResponse.MetricsEntry)
|
|
247
|
-
})
|
|
248
|
-
,
|
|
249
|
-
'DESCRIPTOR' : _COUNTROWSRESPONSEPACKET_SUBSPLITRESPONSE,
|
|
250
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
251
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.CountRowsResponsePacket.SubsplitResponse)
|
|
252
|
-
})
|
|
253
|
-
,
|
|
254
|
-
'DESCRIPTOR' : _COUNTROWSRESPONSEPACKET,
|
|
255
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
256
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.CountRowsResponsePacket)
|
|
257
|
-
})
|
|
258
|
-
_sym_db.RegisterMessage(CountRowsResponsePacket)
|
|
259
|
-
_sym_db.RegisterMessage(CountRowsResponsePacket.Trailing)
|
|
260
|
-
_sym_db.RegisterMessage(CountRowsResponsePacket.Trailing.MetricsEntry)
|
|
261
|
-
_sym_db.RegisterMessage(CountRowsResponsePacket.SubsplitResponse)
|
|
262
|
-
_sym_db.RegisterMessage(CountRowsResponsePacket.SubsplitResponse.MetricsEntry)
|
|
263
|
-
|
|
264
|
-
Rpc = _reflection.GeneratedProtocolMessageType('Rpc', (_message.Message,), {
|
|
265
|
-
'DESCRIPTOR' : _RPC,
|
|
266
|
-
'__module__' : 'vast_protobuf.tabular.rpc_pb2'
|
|
267
|
-
# @@protoc_insertion_point(class_scope:ProtoVast.Rpc)
|
|
268
|
-
})
|
|
269
|
-
_sym_db.RegisterMessage(Rpc)
|
|
270
|
-
|
|
271
|
-
if _descriptor._USE_C_DESCRIPTORS == False:
|
|
272
|
-
|
|
273
|
-
DESCRIPTOR._options = None
|
|
274
|
-
_SELECTROWIDSRESPONSEPACKET_TRAILING_METRICSENTRY._options = None
|
|
275
|
-
_SELECTROWIDSRESPONSEPACKET_TRAILING_METRICSENTRY._serialized_options = b'8\001'
|
|
276
|
-
_SELECTROWIDSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY._options = None
|
|
277
|
-
_SELECTROWIDSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY._serialized_options = b'8\001'
|
|
278
|
-
_READCOLUMNSRESPONSEPACKET_TRAILING_METRICSENTRY._options = None
|
|
279
|
-
_READCOLUMNSRESPONSEPACKET_TRAILING_METRICSENTRY._serialized_options = b'8\001'
|
|
280
|
-
_READCOLUMNSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY._options = None
|
|
281
|
-
_READCOLUMNSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY._serialized_options = b'8\001'
|
|
282
|
-
_COUNTROWSRESPONSEPACKET_TRAILING_METRICSENTRY._options = None
|
|
283
|
-
_COUNTROWSRESPONSEPACKET_TRAILING_METRICSENTRY._serialized_options = b'8\001'
|
|
284
|
-
_COUNTROWSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY._options = None
|
|
285
|
-
_COUNTROWSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY._serialized_options = b'8\001'
|
|
286
|
-
_CONTENTINFO._serialized_start=121
|
|
287
|
-
_CONTENTINFO._serialized_end=164
|
|
288
|
-
_SUBSPLIT._serialized_start=166
|
|
289
|
-
_SUBSPLIT._serialized_end=286
|
|
290
|
-
_SUBSPLIT_STATE._serialized_start=237
|
|
291
|
-
_SUBSPLIT_STATE._serialized_end=276
|
|
292
|
-
_SPLIT._serialized_start=288
|
|
293
|
-
_SPLIT._serialized_end=403
|
|
294
|
-
_SPLIT_CONFIG._serialized_start=350
|
|
295
|
-
_SPLIT_CONFIG._serialized_end=403
|
|
296
|
-
_SUBSPLITCOLLECTION._serialized_start=405
|
|
297
|
-
_SUBSPLITCOLLECTION._serialized_end=468
|
|
298
|
-
_ROWIDS._serialized_start=470
|
|
299
|
-
_ROWIDS._serialized_end=516
|
|
300
|
-
_STATUS._serialized_start=518
|
|
301
|
-
_STATUS._serialized_end=574
|
|
302
|
-
_SELECTROWIDSREQUEST._serialized_start=577
|
|
303
|
-
_SELECTROWIDSREQUEST._serialized_end=816
|
|
304
|
-
_SELECTROWIDSRESPONSEPACKET._serialized_start=819
|
|
305
|
-
_SELECTROWIDSRESPONSEPACKET._serialized_end=1629
|
|
306
|
-
_SELECTROWIDSRESPONSEPACKET_SORTEDPROJECTION._serialized_start=989
|
|
307
|
-
_SELECTROWIDSRESPONSEPACKET_SORTEDPROJECTION._serialized_end=1092
|
|
308
|
-
_SELECTROWIDSRESPONSEPACKET_TRAILING._serialized_start=1095
|
|
309
|
-
_SELECTROWIDSRESPONSEPACKET_TRAILING._serialized_end=1295
|
|
310
|
-
_SELECTROWIDSRESPONSEPACKET_TRAILING_METRICSENTRY._serialized_start=1249
|
|
311
|
-
_SELECTROWIDSRESPONSEPACKET_TRAILING_METRICSENTRY._serialized_end=1295
|
|
312
|
-
_SELECTROWIDSRESPONSEPACKET_SUBSPLITRESPONSE._serialized_start=1298
|
|
313
|
-
_SELECTROWIDSRESPONSEPACKET_SUBSPLITRESPONSE._serialized_end=1621
|
|
314
|
-
_SELECTROWIDSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY._serialized_start=1249
|
|
315
|
-
_SELECTROWIDSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY._serialized_end=1295
|
|
316
|
-
_READCOLUMNSREQUEST._serialized_start=1632
|
|
317
|
-
_READCOLUMNSREQUEST._serialized_end=1914
|
|
318
|
-
_READCOLUMNSREQUEST_BLOCK._serialized_start=1819
|
|
319
|
-
_READCOLUMNSREQUEST_BLOCK._serialized_end=1914
|
|
320
|
-
_READCOLUMNSRESPONSEPACKET._serialized_start=1917
|
|
321
|
-
_READCOLUMNSRESPONSEPACKET._serialized_end=2536
|
|
322
|
-
_READCOLUMNSRESPONSEPACKET_TRAILING._serialized_start=2085
|
|
323
|
-
_READCOLUMNSRESPONSEPACKET_TRAILING._serialized_end=2255
|
|
324
|
-
_READCOLUMNSRESPONSEPACKET_TRAILING_METRICSENTRY._serialized_start=1249
|
|
325
|
-
_READCOLUMNSRESPONSEPACKET_TRAILING_METRICSENTRY._serialized_end=1295
|
|
326
|
-
_READCOLUMNSRESPONSEPACKET_SUBSPLITRESPONSE._serialized_start=2258
|
|
327
|
-
_READCOLUMNSRESPONSEPACKET_SUBSPLITRESPONSE._serialized_end=2528
|
|
328
|
-
_READCOLUMNSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY._serialized_start=1249
|
|
329
|
-
_READCOLUMNSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY._serialized_end=1295
|
|
330
|
-
_COUNTROWSREQUEST._serialized_start=2539
|
|
331
|
-
_COUNTROWSREQUEST._serialized_end=2775
|
|
332
|
-
_COUNTROWSRESPONSEPACKET._serialized_start=2778
|
|
333
|
-
_COUNTROWSRESPONSEPACKET._serialized_end=3360
|
|
334
|
-
_COUNTROWSRESPONSEPACKET_TRAILING._serialized_start=2940
|
|
335
|
-
_COUNTROWSRESPONSEPACKET_TRAILING._serialized_end=3137
|
|
336
|
-
_COUNTROWSRESPONSEPACKET_TRAILING_METRICSENTRY._serialized_start=1249
|
|
337
|
-
_COUNTROWSRESPONSEPACKET_TRAILING_METRICSENTRY._serialized_end=1295
|
|
338
|
-
_COUNTROWSRESPONSEPACKET_SUBSPLITRESPONSE._serialized_start=3140
|
|
339
|
-
_COUNTROWSRESPONSEPACKET_SUBSPLITRESPONSE._serialized_end=3352
|
|
340
|
-
_COUNTROWSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY._serialized_start=1249
|
|
341
|
-
_COUNTROWSRESPONSEPACKET_SUBSPLITRESPONSE_METRICSENTRY._serialized_end=1295
|
|
342
|
-
_RPC._serialized_start=3363
|
|
343
|
-
_RPC._serialized_end=3826
|
|
344
|
-
# @@protoc_insertion_point(module_scope)
|
vastdb/bench_scan.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
from vastdb import api
|
|
2
|
-
|
|
3
|
-
from logbook import Logger, StreamHandler
|
|
4
|
-
import sys
|
|
5
|
-
import time
|
|
6
|
-
import pprint
|
|
7
|
-
|
|
8
|
-
StreamHandler(sys.stdout).push_application()
|
|
9
|
-
log = Logger('Logbook')
|
|
10
|
-
|
|
11
|
-
# access_key_id=F3YUMQZDQB60ZZJ1PBAZ
|
|
12
|
-
# secret_access_key=9a9Q3if6IC5LjUexly/nXFv1UCANBnhGxi++Sw6p
|
|
13
|
-
|
|
14
|
-
a = api.VastdbApi(
|
|
15
|
-
access_key='F3YUMQZDQB60ZZJ1PBAZ',
|
|
16
|
-
secret_key='9a9Q3if6IC5LjUexly/nXFv1UCANBnhGxi++Sw6p',
|
|
17
|
-
host='172.19.111.1:172.19.111.16')
|
|
18
|
-
|
|
19
|
-
kwargs = dict(
|
|
20
|
-
bucket='tabular-slothful-jocular-jack',
|
|
21
|
-
schema='tpcds_schema_create_as_select',
|
|
22
|
-
table='store_sales',
|
|
23
|
-
field_names=['ss_sold_date_sk', 'ss_sold_time_sk', 'ss_item_sk'],
|
|
24
|
-
filters={'ss_item_sk': ['le 1']},
|
|
25
|
-
num_sub_splits=8)
|
|
26
|
-
|
|
27
|
-
pprint.pprint(kwargs)
|
|
28
|
-
|
|
29
|
-
res = a.query_iterator(**kwargs)
|
|
30
|
-
|
|
31
|
-
total_bytes = 0
|
|
32
|
-
total_rows = 0
|
|
33
|
-
start = time.time()
|
|
34
|
-
last_log = None
|
|
35
|
-
|
|
36
|
-
for b in res:
|
|
37
|
-
total_bytes += b.get_total_buffer_size()
|
|
38
|
-
total_rows += len(b)
|
|
39
|
-
dt = time.time() - start
|
|
40
|
-
if last_log != int(dt):
|
|
41
|
-
log.info("{:.3f} Mrow/s, {:.3f} MB/s", (total_rows/dt) / 1e6, (total_bytes/dt) / 1e6)
|
|
42
|
-
last_log = int(dt)
|
|
43
|
-
|
|
44
|
-
dt = time.time() - start
|
|
45
|
-
log.info("Done after {:.3f} seconds, {:.3f} Mrows, {:.3f} MB", dt, total_rows / 1e6, total_bytes / 1e6)
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
import os
|
|
3
|
-
|
|
4
|
-
import pyarrow as pa
|
|
5
|
-
import pyarrow.parquet as pq
|
|
6
|
-
|
|
7
|
-
from vastdb.v2 import InvalidArgumentError
|
|
8
|
-
from vastdb import util
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def test_create_table_from_files(rpc, clean_bucket_name, s3):
|
|
12
|
-
datasets = [
|
|
13
|
-
{'num': [0],
|
|
14
|
-
'varch': ['z']},
|
|
15
|
-
{'num': [1, 2, 3, 4, 5],
|
|
16
|
-
'varch': ['a', 'b', 'c', 'd', 'e']},
|
|
17
|
-
{'num': [1, 2, 3, 4, 5],
|
|
18
|
-
'bool': [True, False, None, None, False],
|
|
19
|
-
'varch': ['a', 'b', 'c', 'd', 'e']},
|
|
20
|
-
{'num': [1, 2],
|
|
21
|
-
'bool': [True, True]},
|
|
22
|
-
{'varch': ['a', 'b', 'c'],
|
|
23
|
-
'mismatch': [1, 2, 3]}
|
|
24
|
-
]
|
|
25
|
-
for i, ds in enumerate(datasets):
|
|
26
|
-
table = pa.Table.from_pydict(ds)
|
|
27
|
-
pq.write_table(table, f'prq{i}')
|
|
28
|
-
with open(f'prq{i}', 'rb') as f:
|
|
29
|
-
s3.put_object(Bucket=clean_bucket_name, Key=f'prq{i}', Body=f)
|
|
30
|
-
os.remove(f'prq{i}')
|
|
31
|
-
|
|
32
|
-
same_schema_files = [f'/{clean_bucket_name}/prq{i}' for i in range(2)]
|
|
33
|
-
contained_schema_files = [f'/{clean_bucket_name}/prq{i}' for i in range(4)]
|
|
34
|
-
different_schema_files = [f'/{clean_bucket_name}/prq{i}' for i in range(5)]
|
|
35
|
-
|
|
36
|
-
with rpc.transaction() as tx:
|
|
37
|
-
b = tx.bucket(clean_bucket_name)
|
|
38
|
-
s = b.create_schema('s1')
|
|
39
|
-
t = util.create_table_from_files(s, 't1', contained_schema_files)
|
|
40
|
-
assert len(t.arrow_schema) == 3
|
|
41
|
-
assert t.arrow_schema == pa.schema([('num', pa.int64()), ('bool', pa.bool_()), ('varch', pa.string())])
|
|
42
|
-
|
|
43
|
-
with pytest.raises(InvalidArgumentError):
|
|
44
|
-
util.create_table_from_files(s, 't2', different_schema_files)
|
|
45
|
-
|
|
46
|
-
with pytest.raises(InvalidArgumentError):
|
|
47
|
-
util.create_table_from_files(s, 't2', contained_schema_files, schema_merge_func=util.strict_schema_merge)
|
|
48
|
-
|
|
49
|
-
util.create_table_from_files(s, 't2', different_schema_files, schema_merge_func=util.union_schema_merge)
|
|
50
|
-
util.create_table_from_files(s, 't3', same_schema_files, schema_merge_func=util.strict_schema_merge)
|