diff --git a/ptuning/web_demo.py b/ptuning/web_demo.py
new file mode 100644
index 0000000..43d0c82
--- /dev/null
+++ b/ptuning/web_demo.py
@@ -0,0 +1,166 @@
+import os, sys
+
+import gradio as gr
+import mdtex2html
+
+import torch
+import transformers
+from transformers import (
+ AutoConfig,
+ AutoModel,
+ AutoTokenizer,
+ AutoTokenizer,
+ DataCollatorForSeq2Seq,
+ HfArgumentParser,
+ Seq2SeqTrainingArguments,
+ set_seed,
+)
+
+from arguments import ModelArguments, DataTrainingArguments
+
+
+model = None
+tokenizer = None
+
+"""Override Chatbot.postprocess"""
+
+
+def postprocess(self, y):
+ if y is None:
+ return []
+ for i, (message, response) in enumerate(y):
+ y[i] = (
+ None if message is None else mdtex2html.convert((message)),
+ None if response is None else mdtex2html.convert(response),
+ )
+ return y
+
+
+gr.Chatbot.postprocess = postprocess
+
+
+def parse_text(text):
+ """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
+ lines = text.split("\n")
+ lines = [line for line in lines if line != ""]
+ count = 0
+ for i, line in enumerate(lines):
+ if "```" in line:
+ count += 1
+ items = line.split('`')
+ if count % 2 == 1:
+ lines[i] = f'
'
+ else:
+ lines[i] = f'
'
+ else:
+ if i > 0:
+ if count % 2 == 1:
+ line = line.replace("`", "\`")
+ line = line.replace("<", "<")
+ line = line.replace(">", ">")
+ line = line.replace(" ", " ")
+ line = line.replace("*", "*")
+ line = line.replace("_", "_")
+ line = line.replace("-", "-")
+ line = line.replace(".", ".")
+ line = line.replace("!", "!")
+ line = line.replace("(", "(")
+ line = line.replace(")", ")")
+ line = line.replace("$", "$")
+ lines[i] = "
"+line
+ text = "".join(lines)
+ return text
+
+
+def predict(input, chatbot, max_length, top_p, temperature, history):
+ chatbot.append((parse_text(input), ""))
+ for response, history in model.stream_chat(tokenizer, input, history, max_length=max_length, top_p=top_p,
+ temperature=temperature):
+ chatbot[-1] = (parse_text(input), parse_text(response))
+
+ yield chatbot, history
+
+
+def reset_user_input():
+ return gr.update(value='')
+
+
+def reset_state():
+ return [], []
+
+
+with gr.Blocks() as demo:
+ gr.HTML("""ChatGLM
""")
+
+ chatbot = gr.Chatbot()
+ with gr.Row():
+ with gr.Column(scale=4):
+ with gr.Column(scale=12):
+ user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(
+ container=False)
+ with gr.Column(min_width=32, scale=1):
+ submitBtn = gr.Button("Submit", variant="primary")
+ with gr.Column(scale=1):
+ emptyBtn = gr.Button("Clear History")
+ max_length = gr.Slider(0, 4096, value=2048, step=1.0, label="Maximum length", interactive=True)
+ top_p = gr.Slider(0, 1, value=0.7, step=0.01, label="Top P", interactive=True)
+ temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True)
+
+ history = gr.State([])
+
+ submitBtn.click(predict, [user_input, chatbot, max_length, top_p, temperature, history], [chatbot, history],
+ show_progress=True)
+ submitBtn.click(reset_user_input, [], [user_input])
+
+ emptyBtn.click(reset_state, outputs=[chatbot, history], show_progress=True)
+
+
+
+def main():
+ global model, tokenizer
+
+ parser = HfArgumentParser((
+ ModelArguments))
+ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+ # If we pass only one argument to the script and it's the path to a json file,
+ # let's parse it to get our arguments.
+ model_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))[0]
+ else:
+ model_args = parser.parse_args_into_dataclasses()[0]
+
+ tokenizer = AutoTokenizer.from_pretrained(
+ model_args.model_name_or_path, trust_remote_code=True)
+ config = AutoConfig.from_pretrained(
+ model_args.model_name_or_path, trust_remote_code=True)
+
+ config.pre_seq_len = model_args.pre_seq_len
+ config.prefix_projection = model_args.prefix_projection
+
+ if model_args.ptuning_checkpoint is not None:
+ print(f"Loading prefix_encoder weight from {model_args.ptuning_checkpoint}")
+ model = AutoModel.from_pretrained(model_args.model_name_or_path, config=config, trust_remote_code=True)
+ prefix_state_dict = torch.load(os.path.join(model_args.ptuning_checkpoint, "pytorch_model.bin"))
+ new_prefix_state_dict = {}
+ for k, v in prefix_state_dict.items():
+ if k.startswith("transformer.prefix_encoder."):
+ new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
+ model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
+ else:
+ model = AutoModel.from_pretrained(model_args.model_name_or_path, config=config, trust_remote_code=True)
+
+ if model_args.quantization_bit is not None:
+ print(f"Quantized to {model_args.quantization_bit} bit")
+ model = model.quantize(model_args.quantization_bit)
+
+ if model_args.pre_seq_len is not None:
+ # P-tuning v2
+ model = model.half().cuda()
+ model.transformer.prefix_encoder.float().cuda()
+
+ model = model.eval()
+ demo.queue().launch(share=False, inbrowser=True)
+
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/ptuning/web_demo.sh b/ptuning/web_demo.sh
new file mode 100644
index 0000000..87bf9e9
--- /dev/null
+++ b/ptuning/web_demo.sh
@@ -0,0 +1,7 @@
+PRE_SEQ_LEN=128
+
+CUDA_VISIBLE_DEVICES=0 python3 web_demo.py \
+ --model_name_or_path THUDM/chatglm-6b \
+ --ptuning_checkpoint output/adgen-chatglm-6b-pt-128-2e-2/checkpoint-3000 \
+ --pre_seq_len $PRE_SEQ_LEN
+