waterfall 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- waterfall/WatermarkerBase.py +13 -7
- waterfall/watermark.py +0 -2
- {waterfall-0.2.0.dist-info → waterfall-0.2.2.dist-info}/METADATA +1 -1
- {waterfall-0.2.0.dist-info → waterfall-0.2.2.dist-info}/RECORD +7 -7
- {waterfall-0.2.0.dist-info → waterfall-0.2.2.dist-info}/WHEEL +0 -0
- {waterfall-0.2.0.dist-info → waterfall-0.2.2.dist-info}/entry_points.txt +0 -0
- {waterfall-0.2.0.dist-info → waterfall-0.2.2.dist-info}/licenses/LICENSE +0 -0
waterfall/WatermarkerBase.py
CHANGED
|
@@ -22,6 +22,8 @@ from waterfall.permute import Permute
|
|
|
22
22
|
from waterfall.WatermarkingFn import WatermarkingFn
|
|
23
23
|
from waterfall.WatermarkingFnFourier import WatermarkingFnFourier
|
|
24
24
|
|
|
25
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
26
|
+
|
|
25
27
|
class PerturbationProcessor(LogitsProcessor):
|
|
26
28
|
def __init__(self,
|
|
27
29
|
N : int = 32000, # Vocab size
|
|
@@ -90,10 +92,9 @@ class Watermarker:
|
|
|
90
92
|
n_gram : int = 2,
|
|
91
93
|
watermarkingFnClass = WatermarkingFnFourier,
|
|
92
94
|
device = None,
|
|
95
|
+
dtype = torch.bfloat16,
|
|
93
96
|
) -> None:
|
|
94
97
|
assert kappa >= 0, f"kappa must be >= 0, value provided is {kappa}"
|
|
95
|
-
|
|
96
|
-
self.id = id
|
|
97
98
|
self.k_p = k_p
|
|
98
99
|
self.n_gram = n_gram
|
|
99
100
|
self.kappa = kappa
|
|
@@ -114,21 +115,26 @@ class Watermarker:
|
|
|
114
115
|
|
|
115
116
|
self.N = self.tokenizer.vocab_size
|
|
116
117
|
|
|
117
|
-
self.logits_processor = PerturbationProcessor(N = self.N, id = self.id)
|
|
118
|
-
|
|
119
118
|
if isinstance(model, str):
|
|
120
|
-
self.load_model(model, device_map=device)
|
|
119
|
+
self.load_model(model, device_map=device, dtype=dtype)
|
|
121
120
|
else:
|
|
122
121
|
self.model = model
|
|
123
122
|
|
|
124
123
|
assert (self.model is None) or isinstance(self.model, PreTrainedModel), f"model must be a transformers model, value provided is {type(self.model)}" # argument order for tokenizer and model were swapped since the original code
|
|
125
124
|
|
|
126
|
-
self.
|
|
125
|
+
self.watermarkingFnClass = watermarkingFnClass
|
|
126
|
+
self.set_id(id)
|
|
127
|
+
|
|
128
|
+
def set_id(self, id : int):
|
|
129
|
+
self.id = id
|
|
130
|
+
self.logits_processor = PerturbationProcessor(N = self.N, id = self.id)
|
|
131
|
+
self.compute_phi(self.watermarkingFnClass)
|
|
127
132
|
|
|
128
|
-
def load_model(self, model_name_or_path : str, device_map : str = "auto"):
|
|
133
|
+
def load_model(self, model_name_or_path : str, device_map : str = "auto", dtype = torch.bfloat16):
|
|
129
134
|
self.model = AutoModelForCausalLM.from_pretrained(
|
|
130
135
|
model_name_or_path,
|
|
131
136
|
device_map=device_map,
|
|
137
|
+
dtype=dtype,
|
|
132
138
|
)
|
|
133
139
|
|
|
134
140
|
def compute_phi(self, watermarkingFnClass = WatermarkingFnFourier) -> None:
|
waterfall/watermark.py
CHANGED
|
@@ -15,8 +15,6 @@ from waterfall.WatermarkingFnFourier import WatermarkingFnFourier
|
|
|
15
15
|
from waterfall.WatermarkingFnSquare import WatermarkingFnSquare
|
|
16
16
|
from waterfall.WatermarkerBase import Watermarker
|
|
17
17
|
|
|
18
|
-
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
19
|
-
|
|
20
18
|
PROMPT = (
|
|
21
19
|
"Paraphrase the user provided text while preserving semantic similarity. "
|
|
22
20
|
"Do not include any other sentences in the response, such as explanations of the paraphrasing. "
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: waterfall
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Scalable Framework for Robust Text Watermarking and Provenance for LLMs
|
|
5
5
|
Project-URL: Homepage, https://github.com/aoi3142/Waterfall
|
|
6
6
|
Project-URL: Issues, https://github.com/aoi3142/Waterfall/issues
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
waterfall/WatermarkerBase.py,sha256=
|
|
1
|
+
waterfall/WatermarkerBase.py,sha256=cJN1gKY8aFrFf3YVg4H6ukxnc4qLPAtFKyjkSsJjsdQ,21040
|
|
2
2
|
waterfall/WatermarkingFn.py,sha256=-b-kGRdL0a7eKRqJmcHPAR_rCjxQYnsg1Ne6bTwBc1I,1931
|
|
3
3
|
waterfall/WatermarkingFnFourier.py,sha256=QYayAQYwi1dQkDIyqmvhU568VhrVYTVy47HkI8F8SZs,1358
|
|
4
4
|
waterfall/WatermarkingFnSquare.py,sha256=2PAO05DdKT02npo7GDf_82D520nP7kGAWK6H4E4JMt4,1638
|
|
5
5
|
waterfall/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
waterfall/permute.py,sha256=uYKdmn4pGvjB6hInInLGxFIF6vt507lqJ_qe-ST1PFE,2783
|
|
7
|
-
waterfall/watermark.py,sha256=
|
|
8
|
-
waterfall-0.2.
|
|
9
|
-
waterfall-0.2.
|
|
10
|
-
waterfall-0.2.
|
|
11
|
-
waterfall-0.2.
|
|
12
|
-
waterfall-0.2.
|
|
7
|
+
waterfall/watermark.py,sha256=IbH5r3oqjtKztDVryfDTr_NDn-CLZHow0S8nAEtZmdc,14420
|
|
8
|
+
waterfall-0.2.2.dist-info/METADATA,sha256=Np7WY47ZGHDGsquIQIxEwWeoOMuDjPuIE8ZBlNCvUQI,8722
|
|
9
|
+
waterfall-0.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
+
waterfall-0.2.2.dist-info/entry_points.txt,sha256=XXnUzuWXu2nc9j4WAll9tq6HyodN_8WJLjeG0O4Y2Gw,60
|
|
11
|
+
waterfall-0.2.2.dist-info/licenses/LICENSE,sha256=zAtaO-k41Q-Q4Etl4bzuh7pgNJsPH-dYfzvznRa0OvM,11341
|
|
12
|
+
waterfall-0.2.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|