SynthID: better example (#34372)

* better example

* Update src/transformers/generation/configuration_utils.py

* Update src/transformers/generation/logits_process.py

* nits
This commit is contained in:
Joao Gante
2024-10-25 11:46:46 +01:00
committed by GitHub
parent 223855314f
commit 8814043c8c
3 changed files with 11 additions and 13 deletions

View File

@@ -428,13 +428,11 @@ A [`Constraint`] can be used to force the generation to include specific tokens
- __call__
[[autodoc]] BayesianDetectorConfig
- __call__
[[autodoc]] BayesianDetectorModel
- __call__
- forward
[[autodoc]] SynthIDTextWatermarkingConfig
- __call__
[[autodoc]] SynthIDTextWatermarkDetector
- __call__

View File

@@ -1471,8 +1471,8 @@ class SynthIDTextWatermarkingConfig(BaseWatermarkingConfig):
```python
>>> from transformers import AutoModelForCausalLM, AutoTokenizer, SynthIDTextWatermarkingConfig
>>> tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-2b-it')
>>> model = AutoModelForCausalLM.from_pretrained('google/gemma-2-2b-it')
>>> tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-2b', padding_side="left")
>>> model = AutoModelForCausalLM.from_pretrained('google/gemma-2-2b')
>>> # SynthID Text configuration
>>> watermarking_config = SynthIDTextWatermarkingConfig(
@@ -1481,11 +1481,11 @@ class SynthIDTextWatermarkingConfig(BaseWatermarkingConfig):
... )
>>> # Generation with watermarking
>>> tokenized_prompts = tokenizer(["your prompts here"])
>>> tokenized_prompts = tokenizer(["Once upon a time, "], return_tensors="pt", padding=True)
>>> output_sequences = model.generate(
... **tokenized_prompts, watermarking_config=watermarking_config, do_sample=True,
... **tokenized_prompts, watermarking_config=watermarking_config, do_sample=True, max_new_tokens=10
... )
>>> watermarked_text = tokenizer.batch_decode(output_sequences)
>>> watermarked_text = tokenizer.batch_decode(output_sequences, skip_special_tokens=True)
```
"""

View File

@@ -2565,8 +2565,8 @@ class SynthIDTextWatermarkLogitsProcessor(LogitsProcessor):
```python
>>> from transformers import AutoModelForCausalLM, AutoTokenizer, SynthIDTextWatermarkingConfig
>>> tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-2b-it')
>>> model = AutoModelForCausalLM.from_pretrained('google/gemma-2-2b-it')
>>> tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-2b', padding_side="left")
>>> model = AutoModelForCausalLM.from_pretrained('google/gemma-2-2b')
>>> # SynthID Text configuration
>>> watermarking_config = SynthIDTextWatermarkingConfig(
@@ -2575,11 +2575,11 @@ class SynthIDTextWatermarkLogitsProcessor(LogitsProcessor):
... )
>>> # Generation with watermarking
>>> tokenized_prompts = tokenizer(["your prompts here"])
>>> tokenized_prompts = tokenizer(["Once upon a time, "], return_tensors="pt", padding=True)
>>> output_sequences = model.generate(
... **tokenized_prompts, watermarking_config=watermarking_config, do_sample=True,
... **tokenized_prompts, watermarking_config=watermarking_config, do_sample=True, max_new_tokens=10
... )
>>> watermarked_text = tokenizer.batch_decode(output_sequences)
>>> watermarked_text = tokenizer.batch_decode(output_sequences, skip_special_tokens=True)
```
"""