stream2pg 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stream2pg
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: Kafka to PostgreSQL sink using Spark Structured Streaming
5
5
  Author-email: Sahand Akramipour <sahandap@gmail.com>
6
6
  License-Expression: MIT
@@ -39,13 +39,13 @@ Dynamic: license-file
39
39
  Create tables. Evolve schemas. Load data.
40
40
 
41
41
  <a href="https://pypi.org/project/stream2pg/">
42
- <img src="https://img.shields.io/pypi/v/stream2pg.svg" alt="PyPI">
42
+ <img src="https://img.shields.io/pypi/v/stream2pg.svg?cacheSeconds=60" alt="PyPI">
43
43
  </a>
44
44
  <a href="https://pypi.org/project/stream2pg/">
45
- <img src="https://img.shields.io/pypi/pyversions/stream2pg.svg" alt="Python">
45
+ <img src="https://img.shields.io/pypi/pyversions/stream2pg.svg?cacheSeconds=60" alt="Python">
46
46
  </a>
47
47
  <a href="LICENSE">
48
- <img src="https://img.shields.io/github/license/YOUR_USERNAME/stream2pg">
48
+ <img src="https://img.shields.io/github/license/shndap/stream2pg">
49
49
  </a>
50
50
 
51
51
  </div>
@@ -118,7 +118,7 @@ config = {
118
118
  },
119
119
  "kafka": {
120
120
  "bootstrap_servers": "localhost:9092",
121
- "subscribe_pattern": "mobility-.*",
121
+ "topic_prefix": "mobility-",
122
122
  "starting_offsets": "earliest",
123
123
  "fail_on_data_loss": "false",
124
124
  },
@@ -190,12 +190,21 @@ run(config)
190
190
  ```python
191
191
  {
192
192
  "bootstrap_servers": "localhost:9092",
193
+ "topic_prefix": "mobility-",
193
194
  "subscribe_pattern": "mobility-.*",
194
195
  "starting_offsets": "earliest",
195
196
  "fail_on_data_loss": "false",
196
197
  }
197
198
  ```
198
199
 
200
+ | Parameter | Description |
201
+ |--------------------|----------------------------------------------------------|
202
+ | `bootstrap_servers`| Kafka broker addresses (default: `localhost:9092`) |
203
+ | `topic_prefix` | Prefix to strip from topic names for table names |
204
+ | `subscribe_pattern`| Kafka topic subscription pattern (default: `{prefix}.*`)|
205
+ | `starting_offsets` | Where to start reading (default: `earliest`) |
206
+ | `fail_on_data_loss`| Fail on data loss (default: `false`) |
207
+
199
208
  ### Processing
200
209
 
201
210
  ```python
@@ -7,13 +7,13 @@
7
7
  Create tables. Evolve schemas. Load data.
8
8
 
9
9
  <a href="https://pypi.org/project/stream2pg/">
10
- <img src="https://img.shields.io/pypi/v/stream2pg.svg" alt="PyPI">
10
+ <img src="https://img.shields.io/pypi/v/stream2pg.svg?cacheSeconds=60" alt="PyPI">
11
11
  </a>
12
12
  <a href="https://pypi.org/project/stream2pg/">
13
- <img src="https://img.shields.io/pypi/pyversions/stream2pg.svg" alt="Python">
13
+ <img src="https://img.shields.io/pypi/pyversions/stream2pg.svg?cacheSeconds=60" alt="Python">
14
14
  </a>
15
15
  <a href="LICENSE">
16
- <img src="https://img.shields.io/github/license/YOUR_USERNAME/stream2pg">
16
+ <img src="https://img.shields.io/github/license/shndap/stream2pg">
17
17
  </a>
18
18
 
19
19
  </div>
@@ -86,7 +86,7 @@ config = {
86
86
  },
87
87
  "kafka": {
88
88
  "bootstrap_servers": "localhost:9092",
89
- "subscribe_pattern": "mobility-.*",
89
+ "topic_prefix": "mobility-",
90
90
  "starting_offsets": "earliest",
91
91
  "fail_on_data_loss": "false",
92
92
  },
@@ -158,12 +158,21 @@ run(config)
158
158
  ```python
159
159
  {
160
160
  "bootstrap_servers": "localhost:9092",
161
+ "topic_prefix": "mobility-",
161
162
  "subscribe_pattern": "mobility-.*",
162
163
  "starting_offsets": "earliest",
163
164
  "fail_on_data_loss": "false",
164
165
  }
165
166
  ```
166
167
 
168
+ | Parameter | Description |
169
+ |--------------------|----------------------------------------------------------|
170
+ | `bootstrap_servers`| Kafka broker addresses (default: `localhost:9092`) |
171
+ | `topic_prefix` | Prefix to strip from topic names for table names |
172
+ | `subscribe_pattern`| Kafka topic subscription pattern (default: `{prefix}.*`)|
173
+ | `starting_offsets` | Where to start reading (default: `earliest`) |
174
+ | `fail_on_data_loss`| Fail on data loss (default: `false`) |
175
+
167
176
  ### Processing
168
177
 
169
178
  ```python
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "stream2pg"
7
- version = "0.1.0"
7
+ version = "0.1.2"
8
8
  description = "Kafka to PostgreSQL sink using Spark Structured Streaming"
9
9
  readme = {file = "README.md", content-type = "text/markdown"}
10
10
  license = "MIT"
@@ -27,6 +27,7 @@ def process_batch(
27
27
  batch_id: int,
28
28
  pg_config: dict[str, Any],
29
29
  error_strategy: ErrorStrategy,
30
+ topic_prefix: str = "",
30
31
  on_metrics: Optional[Callable[..., None]] = None,
31
32
  ) -> None:
32
33
  start_time = time.time()
@@ -52,7 +53,11 @@ def process_batch(
52
53
  try:
53
54
  known_columns: dict[str, frozenset] = {}
54
55
  for row in batch_df.toLocalIterator():
55
- table_name = row.topic.replace("mobility-", "")
56
+ table_name = (
57
+ row.topic[len(topic_prefix) :]
58
+ if row.topic.startswith(topic_prefix)
59
+ else row.topic
60
+ )
56
61
  try:
57
62
  record = json.loads(row.value)
58
63
  except json.JSONDecodeError:
@@ -93,15 +98,16 @@ def create_kafka_stream(
93
98
  kafka_config: dict[str, Any],
94
99
  schema: Optional[StructType] = None,
95
100
  ):
101
+ topic_prefix = kafka_config.get("topic_prefix", "")
102
+ subscribe_pattern = kafka_config.get("subscribe_pattern", f"{topic_prefix}.*")
103
+
96
104
  reader = (
97
105
  spark.readStream.format("kafka")
98
106
  .option(
99
107
  "kafka.bootstrap.servers",
100
108
  kafka_config.get("bootstrap_servers", "localhost:9092"),
101
109
  )
102
- .option(
103
- "subscribePattern", kafka_config.get("subscribe_pattern", "mobility-.*")
104
- )
110
+ .option("subscribePattern", subscribe_pattern)
105
111
  .option("startingOffsets", kafka_config.get("starting_offsets", "earliest"))
106
112
  .option(
107
113
  "failOnDataLoss",
@@ -33,12 +33,15 @@ class Stream2Pg:
33
33
 
34
34
  df = create_kafka_stream(spark, kafka_cfg)
35
35
 
36
+ topic_prefix = kafka_cfg.get("topic_prefix", "")
37
+
36
38
  def foreach_batch_fn(batch_df, batch_id):
37
39
  process_batch(
38
40
  batch_df,
39
41
  batch_id,
40
42
  postgres_cfg,
41
43
  error_strategy,
44
+ topic_prefix,
42
45
  on_metrics=self.on_metrics,
43
46
  )
44
47
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stream2pg
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: Kafka to PostgreSQL sink using Spark Structured Streaming
5
5
  Author-email: Sahand Akramipour <sahandap@gmail.com>
6
6
  License-Expression: MIT
@@ -39,13 +39,13 @@ Dynamic: license-file
39
39
  Create tables. Evolve schemas. Load data.
40
40
 
41
41
  <a href="https://pypi.org/project/stream2pg/">
42
- <img src="https://img.shields.io/pypi/v/stream2pg.svg" alt="PyPI">
42
+ <img src="https://img.shields.io/pypi/v/stream2pg.svg?cacheSeconds=60" alt="PyPI">
43
43
  </a>
44
44
  <a href="https://pypi.org/project/stream2pg/">
45
- <img src="https://img.shields.io/pypi/pyversions/stream2pg.svg" alt="Python">
45
+ <img src="https://img.shields.io/pypi/pyversions/stream2pg.svg?cacheSeconds=60" alt="Python">
46
46
  </a>
47
47
  <a href="LICENSE">
48
- <img src="https://img.shields.io/github/license/YOUR_USERNAME/stream2pg">
48
+ <img src="https://img.shields.io/github/license/shndap/stream2pg">
49
49
  </a>
50
50
 
51
51
  </div>
@@ -118,7 +118,7 @@ config = {
118
118
  },
119
119
  "kafka": {
120
120
  "bootstrap_servers": "localhost:9092",
121
- "subscribe_pattern": "mobility-.*",
121
+ "topic_prefix": "mobility-",
122
122
  "starting_offsets": "earliest",
123
123
  "fail_on_data_loss": "false",
124
124
  },
@@ -190,12 +190,21 @@ run(config)
190
190
  ```python
191
191
  {
192
192
  "bootstrap_servers": "localhost:9092",
193
+ "topic_prefix": "mobility-",
193
194
  "subscribe_pattern": "mobility-.*",
194
195
  "starting_offsets": "earliest",
195
196
  "fail_on_data_loss": "false",
196
197
  }
197
198
  ```
198
199
 
200
+ | Parameter | Description |
201
+ |--------------------|----------------------------------------------------------|
202
+ | `bootstrap_servers`| Kafka broker addresses (default: `localhost:9092`) |
203
+ | `topic_prefix` | Prefix to strip from topic names for table names |
204
+ | `subscribe_pattern`| Kafka topic subscription pattern (default: `{prefix}.*`)|
205
+ | `starting_offsets` | Where to start reading (default: `earliest`) |
206
+ | `fail_on_data_loss`| Fail on data loss (default: `false`) |
207
+
199
208
  ### Processing
200
209
 
201
210
  ```python
File without changes
File without changes
File without changes
File without changes