PyPI - tf-models-nightly - Versions diffs - 2.17.0.dev20240327__py2.py3-none-any.whl → 2.17.0.dev20240329__py2.py3-none-any.whl - Mend

tf-models-nightly 2.17.0.dev20240327py2.py3-none-any.whl → 2.17.0.dev20240329py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

official/nlp/modeling/ops/beam_search.py CHANGED Viewed

@@ -69,6 +69,9 @@ class _StateKeys(object):
   # At the beginning, all of the sequences in FINISHED_SEQ are filler values.
   # True -> finished sequence, False -> filler. Shape [batch_size, beam_size]
   FINISHED_FLAGS = "FINISHED_FLAGS"
+  # for prefix matching hack. The BS will only constraint the next token to
+  # where the mask is 1.
+  CONSTRAINT_MASK = "CONSTRAINT_MASK"
 def _expand_to_same_rank(tensor, target):
@@ -150,7 +153,7 @@ class SequenceBeamSearch(tf.Module):
     self.decoding_name = decoding_name
     self.noise_multiplier = noise_multiplier
-  def search(self, initial_ids, initial_cache):
+  def search(self, initial_ids, initial_cache, constraint_mask=None):
     """Beam search for sequences with highest scores.
     Args:
@@ -158,6 +161,9 @@ class SequenceBeamSearch(tf.Module):
         with shape [batch_size, 1]
       initial_cache: dictionary storing values to be passed into the
         symbols_to_logits_fn.
+      constraint_mask: a [vocab_size] tensor, with 1 represent prefix. During
+        autoregressive decoding, the first token should be among where the
+        constraint_mask is 1.
     Returns:
       finished_seq and finished_scores.
@@ -165,8 +171,9 @@ class SequenceBeamSearch(tf.Module):
     batch_size = (
         initial_ids.shape.as_list()[0]
         if self.padded_decode else tf.shape(initial_ids)[0])
-    state, state_shapes = self._create_initial_state(initial_ids, initial_cache,
-                                                     batch_size)
+    state, state_shapes = self._create_initial_state(
+        initial_ids, initial_cache, batch_size, constraint_mask=constraint_mask
+    )
     def _grow_alive_seq(state):
       """Grow alive sequences by one token, collect top 2*beam_size sequences.
@@ -204,6 +211,21 @@ class SequenceBeamSearch(tf.Module):
       flat_logits, flat_cache = self.symbols_to_logits_fn(
           flat_ids, i, flat_cache)
+      if _StateKeys.CONSTRAINT_MASK in state:
+        constraint_mask = state[_StateKeys.CONSTRAINT_MASK]
+        constraint_mask = tf.cond(
+            tf.equal(i, 0),
+            lambda: constraint_mask,
+            lambda: tf.ones_like(constraint_mask),
+        )
+        penalty = tf.cast(
+            tf.cast(constraint_mask != 1, tf.int32) * 999_999_999,
+            flat_logits.dtype,
+        )
+        flat_logits = flat_logits - penalty[tf.newaxis, :]
+      else:
+        constraint_mask = None
       if self.noise_multiplier > 0:
         noise = tf.random.uniform(flat_logits.shape, dtype=flat_logits.dtype)
         # Generates standard Gumbel(0, 1) noise, GSE Tensors
@@ -250,7 +272,7 @@ class SequenceBeamSearch(tf.Module):
       else:
         topk_seq = tf.concat(
             [topk_seq, tf.expand_dims(topk_ids, axis=2)], axis=2)
-      return topk_seq, topk_log_probs, topk_ids, new_cache
+      return topk_seq, topk_log_probs, topk_ids, new_cache, constraint_mask
     def _get_new_alive_state(new_seq, new_log_probs, new_finished_flags,
                              new_cache):
@@ -363,7 +385,9 @@ class SequenceBeamSearch(tf.Module):
         new state dictionary.
       """
       # Grow alive sequences by one token.
-      new_seq, new_log_probs, topk_ids, new_cache = _grow_alive_seq(state)
+      new_seq, new_log_probs, topk_ids, new_cache, constraint_mask = (
+          _grow_alive_seq(state)
+      )
       new_finished_flags = tf.equal(topk_ids, self.eos_id[0])
       for eos_id in self.eos_id[1:]:
         one_finished_flags = tf.equal(topk_ids, eos_id)
@@ -383,6 +407,8 @@ class SequenceBeamSearch(tf.Module):
       new_state = {_StateKeys.CUR_INDEX: state[_StateKeys.CUR_INDEX] + 1}
       new_state.update(alive_state)
       new_state.update(finished_state)
+      if constraint_mask is not None:
+        new_state[_StateKeys.CONSTRAINT_MASK] = constraint_mask
       return [new_state]
     finished_state = tf.nest.map_structure(
@@ -415,7 +441,9 @@ class SequenceBeamSearch(tf.Module):
     finished_scores = tf.where(score_cond, finished_scores, alive_log_probs)
     return finished_seq, finished_scores
-  def _create_initial_state(self, initial_ids, initial_cache, batch_size):
+  def _create_initial_state(
+      self, initial_ids, initial_cache, batch_size, constraint_mask=None
+  ):
     """Return initial state dictionary and its shape invariants."""
     for key, value in initial_cache.items():
       for inner_value in tf.nest.flatten(value):
@@ -466,6 +494,8 @@ class SequenceBeamSearch(tf.Module):
         _StateKeys.FINISHED_SCORES: finished_scores,
         _StateKeys.FINISHED_FLAGS: finished_flags
     }
+    if constraint_mask is not None:
+      state[_StateKeys.CONSTRAINT_MASK] = constraint_mask
     # Create state invariants for each value in the state dictionary. Each
     # dimension must be a constant or None. A None dimension means either:
@@ -509,6 +539,10 @@ class SequenceBeamSearch(tf.Module):
           _StateKeys.FINISHED_FLAGS:
               tf.TensorShape([None, self.beam_size])
       }
+    if constraint_mask is not None:
+      state_shape_invariants[_StateKeys.CONSTRAINT_MASK] = tf.TensorShape(
+          [self.vocab_size]
+      )
     return state, state_shape_invariants
@@ -614,6 +648,7 @@ def sequence_beam_search(
     dtype="float32",
     noise_multiplier: float = 0.0,
     decoding_name=None,
+    constraint_mask=None,
 ):
   """Search for sequence of subtoken ids with the largest probability.
@@ -641,6 +676,8 @@ def sequence_beam_search(
       tf.float32.
     noise_multiplier: The amount of noise.
     decoding_name: an optional name for the decoding loop tensors.
+    constraint_mask: The BS will only constraint the next token to where the
+      mask is 1.
   Returns:
     Top decoded sequences [batch_size, beam_size, max_decode_length]
@@ -658,7 +695,7 @@ def sequence_beam_search(
       noise_multiplier,
       decoding_name,
   )
-  return sbs.search(initial_ids, initial_cache)
+  return sbs.search(initial_ids, initial_cache, constraint_mask=constraint_mask)
 def _log_prob_from_logits(logits):

official/nlp/modeling/ops/beam_search_test.py CHANGED Viewed

@@ -150,6 +150,54 @@ class BeamSearchTests(tf.test.TestCase, parameterized.TestCase):
     else:
       self.assertAllEqual([[[0, 1, 0, 1], [0, 1, 1, 2]]], predictions)
+  @parameterized.named_parameters([
+      ('padded_decode_true_with_name', True, 0.0, 'decoding'),
+      ('padded_decode_false_with_name', False, 0.0, 'decoding'),
+      ('padded_decode_true_without_name', True, 0.0, None),
+      ('padded_decode_false_without_name', False, 0.0, None),
+      ('padded_decode_false_with_noise', False, 0.5, 'decoding'),
+  ])
+  def test_sequence_beam_search_with_prefix_constraint(
+      self, padded_decode, noise_multiplier, name
+  ):
+    # batch_size*beam_size, max_decode_length, vocab_size
+    probabilities = tf.constant([
+        [[0.2, 0.7, 0.1], [0.5, 0.3, 0.2], [0.1, 0.8, 0.1]],
+        [[0.1, 0.8, 0.1], [0.3, 0.4, 0.3], [0.2, 0.1, 0.7]],
+    ])
+    # batch_size, max_decode_length, num_heads, embed_size per head
+    x = tf.zeros([1, 3, 2, 32], dtype=tf.float32)
+    cache = {'layer_%d' % layer: {'k': x, 'v': x} for layer in range(2)}
+    def _get_test_symbols_to_logits_fn():
+      """Test function that returns logits for next token."""
+      def symbols_to_logits_fn(_, i, cache):
+        logits = tf.cast(probabilities[:, i, :], tf.float32)
+        return logits, cache
+      return symbols_to_logits_fn
+    predictions, _ = beam_search.sequence_beam_search(
+        symbols_to_logits_fn=_get_test_symbols_to_logits_fn(),
+        initial_ids=tf.zeros([1], dtype=tf.int32),
+        initial_cache=cache,
+        vocab_size=3,
+        beam_size=2,
+        alpha=0.6,
+        max_decode_length=3,
+        eos_id=[9, 10],
+        padded_decode=padded_decode,
+        dtype=tf.float32,
+        noise_multiplier=noise_multiplier,
+        decoding_name=name,
+        constraint_mask=tf.constant([1, 0, 0]),
+    )
+    if noise_multiplier > 0:
+      self.assertAllEqual([[[0, 0, 0, 1], [0, 0, 0, 2]]], predictions)
+    else:
+      self.assertAllEqual([[[0, 0, 0, 1], [0, 0, 1, 2]]], predictions)
 if __name__ == '__main__':
   tf.test.main()

{tf_models_nightly-2.17.0.dev20240327.dist-info → tf_models_nightly-2.17.0.dev20240329.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tf-models-nightly
-Version: 2.17.0.dev20240327
+Version: 2.17.0.dev20240329
 Summary: TensorFlow Official Models
 Home-page: https://github.com/tensorflow/models
 Author: Google Inc.

{tf_models_nightly-2.17.0.dev20240327.dist-info → tf_models_nightly-2.17.0.dev20240329.dist-info}/RECORD RENAMED Viewed

@@ -414,8 +414,8 @@ official/nlp/modeling/networks/sparse_mixer_test.py,sha256=9AY4gelHc-rrtUexr33-j
 official/nlp/modeling/networks/xlnet_base.py,sha256=ditE18dFpJQ87U1-vC3VzgFpx0aK2Hyy6b4HgOO8De4,25867
 official/nlp/modeling/networks/xlnet_base_test.py,sha256=zt8hLCpKy5wKWsbCizyq8mLGJc32OXIqbhWp0ysQGKc,14788
 official/nlp/modeling/ops/__init__.py,sha256=VnA497WiK08ukev1d5Tjqc283YGQx6MnGyPAPk_jW7s,1011
-official/nlp/modeling/ops/beam_search.py,sha256=rfQluf94mAEUuwCGkKeTUTZhpllYIPysDzuJiHe4OnU,29830
-official/nlp/modeling/ops/beam_search_test.py,sha256=BEWDGlOIxJQaocSCZ58LHvI_OFH69SXhsEJCLZMoNuY,5749
+official/nlp/modeling/ops/beam_search.py,sha256=1kwoD3SF1BiWbxeN4u77CjJXJ2hCEzOWJP295_BeWuU,31255
+official/nlp/modeling/ops/beam_search_test.py,sha256=Sz1sirBnYktqQ82NbyLefVpkmLVr7BPVApVxW8DRuoI,7589
 official/nlp/modeling/ops/decoding_module.py,sha256=-Aw_A2dUbRu7jd-DY4a7iWme-yNSvfng9g_XWdCGwXI,11279
 official/nlp/modeling/ops/decoding_module_test.py,sha256=VTYYaZxihkDz1FkkwUIyc3EuCqGIW9fJS-3mYw3c4-8,2623
 official/nlp/modeling/ops/sampling_module.py,sha256=gyUoOnNdh6TJGebce5BMUxTrhk79HzPM3whuEu5BP9A,19250
@@ -1203,9 +1203,9 @@ tensorflow_models/__init__.py,sha256=etxw45SHxuwFCRX5qGxGMP83II0JfJulzNl5GSNJvhw
 tensorflow_models/tensorflow_models_test.py,sha256=AxUYUdiQn416UR7jg0h6rmv688esvlKDfpyDCIQkF18,1395
 tensorflow_models/nlp/__init__.py,sha256=4tA5Pf4qaFwT-fIFOpX7x7FHJpnyJT-5UgOeFYTyMlc,807
 tensorflow_models/vision/__init__.py,sha256=zBorY_v5xva1uI-qxhZO3Qh-Dii-Suq6wEYh6hKHDfc,833
-tf_models_nightly-2.17.0.dev20240327.dist-info/AUTHORS,sha256=1dG3fXVu9jlo7bul8xuix5F5vOnczMk7_yWn4y70uw0,337
-tf_models_nightly-2.17.0.dev20240327.dist-info/LICENSE,sha256=WxeBS_DejPZQabxtfMOM_xn8qoZNJDQjrT7z2wG1I4U,11512
-tf_models_nightly-2.17.0.dev20240327.dist-info/METADATA,sha256=fIhpn38AKOT9brIxR3wssnpAjVfCTpFQe-BMXLhRxh8,1432
-tf_models_nightly-2.17.0.dev20240327.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
-tf_models_nightly-2.17.0.dev20240327.dist-info/top_level.txt,sha256=gum2FfO5R4cvjl2-QtP-S1aNmsvIZaFFT6VFzU0f4-g,33
-tf_models_nightly-2.17.0.dev20240327.dist-info/RECORD,,
+tf_models_nightly-2.17.0.dev20240329.dist-info/AUTHORS,sha256=1dG3fXVu9jlo7bul8xuix5F5vOnczMk7_yWn4y70uw0,337
+tf_models_nightly-2.17.0.dev20240329.dist-info/LICENSE,sha256=WxeBS_DejPZQabxtfMOM_xn8qoZNJDQjrT7z2wG1I4U,11512
+tf_models_nightly-2.17.0.dev20240329.dist-info/METADATA,sha256=MJ2HcBGugMGyza9VmjXOGjsHsThDRt01z6v5PQmxTOE,1432
+tf_models_nightly-2.17.0.dev20240329.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
+tf_models_nightly-2.17.0.dev20240329.dist-info/top_level.txt,sha256=gum2FfO5R4cvjl2-QtP-S1aNmsvIZaFFT6VFzU0f4-g,33
+tf_models_nightly-2.17.0.dev20240329.dist-info/RECORD,,

{tf_models_nightly-2.17.0.dev20240327.dist-info → tf_models_nightly-2.17.0.dev20240329.dist-info}/AUTHORS RENAMED Viewed

File without changes

{tf_models_nightly-2.17.0.dev20240327.dist-info → tf_models_nightly-2.17.0.dev20240329.dist-info}/LICENSE RENAMED Viewed

File without changes

{tf_models_nightly-2.17.0.dev20240327.dist-info → tf_models_nightly-2.17.0.dev20240329.dist-info}/WHEEL RENAMED Viewed

File without changes

{tf_models_nightly-2.17.0.dev20240327.dist-info → tf_models_nightly-2.17.0.dev20240329.dist-info}/top_level.txt RENAMED Viewed

File without changes

tf-models-nightly 2.17.0.dev20240327__py2.py3-none-any.whl → 2.17.0.dev20240329__py2.py3-none-any.whl

tf-models-nightly 2.17.0.dev20240327py2.py3-none-any.whl → 2.17.0.dev20240329py2.py3-none-any.whl