1. Test using the trained weights
from transformers import AutoModelForCausalLM,AutoTokenizer,TextGenerationPipeline
import torch
tokenizer = AutoTokenizer.from_pretrained(r"E:\大模型AI开发\AI大模型\projects\gpt2\model\models--uer--gpt2-chinese-cluecorpussmall\snapshots\c2c0249d8a2731f269414cc3b22dff021f8e07a3")
model = AutoModelForCausalLM.from_pretrained(r"E:\大模型AI开发\AI大模型\projects\gpt2\model\models--uer--gpt2-chinese-cluecorpussmall\snapshots\c2c0249d8a2731f269414cc3b22dff021f8e07a3")
# Load our own trained weights (Chinese poetry)
model.load_state_dict(torch.load("net.pt"))
# Use the system's built-in pipeline tool to generate content
pipline = TextGenerationPipeline(model,tokenizer,device=0)
print(pipline("天高", max_length=24))
The performance is actually not good:
data:image/s3,"s3://crabby-images/996a8/996a857c3926240d50404383a3019304a95a2aa1" alt="Local Invocation of Llama3 Large Model Development"
2. Post-process the AI-generated results
# Customized content generation
from transformers import AutoModelForCausalLM,AutoTokenizer,TextGenerationPipeline
import torch
tokenizer = AutoTokenizer.from_pretrained(r"E:\大模型AI开发\AI大模型\projects\gpt2\model\models--uer--gpt2-chinese-cluecorpussmall\snapshots\c2c0249d8a2731f269414cc3b22dff021f8e07a3")
model = AutoModelForCausalLM.from_pretrained(r"E:\大模型AI开发\AI大模型\projects\gpt2\model\models--uer--gpt2-chinese-cluecorpussmall\snapshots\c2c0249d8a2731f269414cc3b22dff021f8e07a3")
# Load our own trained weights (Chinese poetry)
model.load_state_dict(torch.load("net.pt"))
# Define a function to generate 5-character quatrains text is the prompt, row is the number of lines to generate, col is the number of characters per line.
def generate(text,row,col):
# Define an internal recursive function to generate text
def generate_loop(data):
# Disable gradient calculation
with torch.no_grad():
# Use the data in the dictionary as input to the model and get the output
out = model(**data)
# Get the last word (logits unnormalized probability output)
out = out["logits"]
# Select the last logits of each sequence, corresponding to the prediction of the next word
out = out[:,-1]
# Find the top 50 values to serve as a boundary, all values below this will be discarded
topk_value = torch.topk(out,50).values
# Get the top 50 largest logits from each output sequence (to maintain the original dimension, we need to add a dimension to the result because indexing will reduce dimensions)
topk_value = topk_value[:,-1].unsqueeze(dim=1)
# Set all logits less than the 50th largest value to negative infinity, reducing the probability of low probability words being selected
out = out.masked_fill(out< topk_value,-float("inf"))
# Mask [UNK]
out[:, tokenizer.get_vocab()["[UNK]"]] = -float("inf")
# Set the logits of special symbols to negative infinity to prevent the model from generating these symbols.
for i in ",.()《》[]「」{}":
out[:,tokenizer.get_vocab()[i]] = -float("inf")
# Sample based on probabilities, without replacement, to avoid generating duplicate content
out = out.softmax(dim=1)
# Randomly sample the next word's ID from the probability distribution
out = out.multinomial(num_samples=1)
# Forcibly add punctuation
# Calculate the ratio of the current generated text length to the expected length
c = data["input_ids"].shape[1] / (col+1)
# If the current length is an integer multiple of the expected length, add punctuation
if c %1 ==0:
if c%2==0:
# Add a period at even positions
out[:,0] = tokenizer.get_vocab()["."]
else:
# Add a comma at odd positions
out[:,0] = tokenizer.get_vocab()[","]
# Append the newly generated word ID to the end of the input sequence
data["input_ids"] = torch.cat([data["input_ids"],out],dim=1)
# Update the attention mask, marking all valid positions
data["attention_mask"] = torch.ones_like(data["input_ids"])
# Update the token ID type, usually used in BERT models, but not in GPT
data["token_type_ids"] = torch.ones_like(data["input_ids"])
# Update labels, here copy the input ID to the labels, usually used to predict the next word in language generation models
data["labels"] = data["input_ids"].clone()
# Check if the generated text length meets or exceeds the specified function and column count
if data["input_ids"].shape[1] >= row*col + row+1:
# If the length requirement is met, return the final data dictionary
return data
# If the length requirement is not met, recursively call the generate_loop function to continue generating text
return generate_loop(data)
# Generate 3 poems
# Use the tokenizer to encode the input text and repeat 3 times to generate 3 samples
data = tokenizer.batch_encode_plus([text]*3,return_tensors="pt")
# Remove the last token (end symbol) from the encoded sequence
data["input_ids"] = data["input_ids"][:,:-1]
# Create a tensor of ones with the same shape as input_ids for the attention mask
data["attention_mask"] = torch.ones_like(data["input_ids"])
# Create a tensor of ones with the same shape as input_ids for the token ID type
data["token_type_ids"] = torch.ones_like(data["input_ids"])
# Copy input to labels for model targets
data["labels"] = data["input_ids"].clone()
# Call the generate_loop function to start generating text
data = generate_loop(data)
# Iterate through the generated 3 samples
for i in range(3):
print(i,tokenizer.decode(data["input_ids"][i]))
if __name__ == '__main__':
generate("白",row=4,col=5)
data:image/s3,"s3://crabby-images/d8c3a/d8c3ac71475b83abf48426366ed8dc285bd26e35" alt="Local Invocation of Llama3 Large Model Development"
3. Download the llama3 model from modelscope
Install modelscope
pip install modelscope
from modelscope import snapshot_download
model_dir = snapshot_download(model_id='LLM-Research/Llama-3.2-1B-Instruct',cache_dir=r'E:\大模型AI开发\AI大模型\projects\ollama3\model')
4. Use transformers to call llama3
# Use transformer to load the llama3 model
from transformers import AutoModelForCausalLM,AutoTokenizer
DEVICE = "cuda"
# Load local model path as the root directory where the model configuration file is located
model_dir = r"E:\大模型AI开发\AI大模型\projects\ollama3\model\LLM-Research\Llama-3___2-1B-Instruct"
# Use transformer to load the model torch_dtype data types are: float32,float16
model = AutoModelForCausalLM.from_pretrained(model_dir,torch_dtype="auto",device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_dir)
# Call the model
# Define the prompt
prompt = "你好,请介绍下你自己。"
# Wrap the prompt into a message
message = [{"role":"system","content":"You are a helpful assistant system"},{"role":"user","content":prompt}]
# Use the tokenizer's apply_chat_template() method to convert the above-defined message list; tokenize=False means no tokenization at this time
text = tokenizer.apply_chat_template(message,tokenize=False,add_generation_prompt=True)
# Tokenize the processed text and convert it to the model's input tensor
model_inputs = tokenizer([text],return_tensors="pt").to(DEVICE)
# Input the model to get output
generated = model.generate(model_inputs.input_ids,max_new_tokens=512)
print(generated)
# Decode the output content
responce = tokenizer.batch_decode(generated,skip_special_tokens=True)
print(responce)