ChatGLM2-6B/ptuning/finetune-use.py

76 lines
3.0 KiB
Python

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoModel, AutoModelForCausalLM
from transformers import pipeline
checkpoint = "bigscience/mt0-large"
checkpoint = "/Users/hhwang/models/gpt2"
checkpoint = "/Users/hhwang/models/opt-125m"
checkpoint = "/Users/hhwang/models/opt-350m"
# tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
# inputs = tokenizer.encode("Write a short story", return_tensors="pt")
# outputs = model.generate(inputs)
# print(tokenizer.decode(outputs[0]))
# case 1
# pipe = pipeline(task='text-generation', model=checkpoint)
# print(pipe)
# result = pipe("tell me a joke")
# print('result: ',result)
# case 2
# from transformers import GPT2Tokenizer, GPT2LMHeadModel
# tokenizer = GPT2Tokenizer.from_pretrained(checkpoint)
# model = GPT2LMHeadModel.from_pretrained(checkpoint)
# text = "Replace me by any text you'd like."
# encoded_input = tokenizer.encode(text, return_tensors='pt')
# outputs = model.generate(encoded_input, max_length=50, num_return_sequences=1)
# generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
# for i, generated_text in enumerate(generated_texts):
# print(f"Generated text {i + 1}: {generated_text}")
# # case 3
# from transformers import GPT2Tokenizer, GPT2Model
# tokenizer = GPT2Tokenizer.from_pretrained(checkpoint)
# model = GPT2Model.from_pretrained(checkpoint)
# text = "Replace me by any text you'd like."
# encoded_input = tokenizer(text, return_tensors='pt')
# outputs = model(**encoded_input)
# print(outputs)
# last_hidden_states = outputs.last_hidden_state
# print(last_hidden_states)
# case 4
# tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# model = AutoModel.from_pretrained(checkpoint)
# inputs = tokenizer.encode("Write a short story", return_tensors="pt")
# model = model.eval()
# print(inputs)
# outputs = model(inputs)
# print(outputs)
# case 5
print('********* case 5 ***********')
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint)
inputs = tokenizer.encode("Write a short story", return_tensors="pt")
outputs = model.generate(inputs)
print('result: ',tokenizer.batch_decode(outputs))
# case 6
print('********* case 6 ***********')
from transformers import GPT2Tokenizer, OPTForCausalLM
model = OPTForCausalLM.from_pretrained(checkpoint)
tokenizer = GPT2Tokenizer.from_pretrained(checkpoint)
prompt = "Anti Vaccine Movemenet"
inputs = tokenizer(prompt, return_tensors="pt").input_ids
gen_tokens = model.generate(inputs,do_sample=True,temperature=0.9,max_length=100)
gen_text = tokenizer.batch_decode(gen_tokens)[0]
print('gen_text', gen_text)
# generate_ids = model.generate(inputs,max_length=2000,early_stopping= True,do_sample=True,min_length=2000,top_k=125,top_p=0.92,temperature= 0.85,repetition_penalty=1.5,num_return_sequences=3)
# for i, sample_output in enumerate(generate_ids):
# result = tokenizer.decode(sample_output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
# print(result)