From 00934026a4832f030bb37131968668c7db3ca7cf Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Date: Fri, 17 Mar 2023 08:55:15 -0400
Subject: [PATCH] LLaMA house-keeping (#22216)

* LLaMA house-keeping

* Doc links
---
 docs/source/en/model_doc/llama.mdx                   | 6 ++++--
 src/transformers/__init__.py                         | 2 +-
 src/transformers/models/llama/configuration_llama.py | 4 ++--
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/source/en/model_doc/llama.mdx b/docs/source/en/model_doc/llama.mdx
index 28e39b11ce..27eee085e6 100644
--- a/docs/source/en/model_doc/llama.mdx
+++ b/docs/source/en/model_doc/llama.mdx
@@ -33,8 +33,10 @@ python src/transformers/models/llama/convert_llama_weights_to_hf.py \
 - After conversion, the model and tokenizer can be loaded via:
 
 ```python
-tokenizer = transformers.LlamaTokenizer.from_pretrained("/output/path/tokenizer/")
-model = transformers.LlamaForCausalLM.from_pretrained("/output/path/llama-7b/")
+from transformers import LlamaForCausalLM, LlamaTokenizer
+
+tokenizer = LlamaTokenizer.from_pretrained("/output/path/tokenizer/")
+model = LlamaForCausalLM.from_pretrained("/output/path/llama-7b/")
 ```
 
 - The LLaMA tokenizer is based on [sentencepiece](https://github.com/google/sentencepiece). One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string. To have the tokenizer output the prefix space, set `decode_with_prefix_space=True` in the `LlamaTokenizer` object or in the tokenizer configuration.
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
index c64febc41d..0ae679b5ce 100644
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -4486,9 +4486,9 @@ if TYPE_CHECKING:
             TypicalLogitsWarper,
             top_k_top_p_filtering,
         )
+        from .modeling_utils import PreTrainedModel
 
         # PyTorch model imports
-        from .modeling_utils import PreTrainedModel
         from .models.albert import (
             ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
             AlbertForMaskedLM,
diff --git a/src/transformers/models/llama/configuration_llama.py b/src/transformers/models/llama/configuration_llama.py
index 5421d429b1..36b8ab72ab 100644
--- a/src/transformers/models/llama/configuration_llama.py
+++ b/src/transformers/models/llama/configuration_llama.py
@@ -30,7 +30,7 @@ LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
 
 class LlamaConfig(PretrainedConfig):
     r"""
-    This is the configuration class to store the configuration of a [`~LlamaModel`]. It is used to instantiate an LLaMA
+    This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
     model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
     defaults will yield a similar configuration to that of the LLaMA-7B.
 
@@ -41,7 +41,7 @@ class LlamaConfig(PretrainedConfig):
     Args:
         vocab_size (`int`, *optional*, defaults to 32000):
             Vocabulary size of the LLaMA model. Defines the number of different tokens that can be represented by the
-            `inputs_ids` passed when calling [`~LlamaModel`]
+            `inputs_ids` passed when calling [`LlamaModel`]
         hidden_size (`int`, *optional*, defaults to 4096):
             Dimension of the hidden representations.
         intermediate_size (`int`, *optional*, defaults to 11008):