python -m pip install --upgrade pip
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
#模型下载
from modelscope import snapshot_download
model_dir = snapshot_download('ZhipuAI/chatglm3-6b', cache_dir='D:\Transformers')
from modelscope import AutoTokenizer, AutoModel, snapshot_download
model_dir = snapshot_download('ZhipuAI/chatglm3-6b', cache_dir='D:\Transformers')
# model_dir ='D:\Transformers\ZhipuAI\chatglm3-6b';
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).half().cpu() # 权重和计算从 32 位浮点数转换为16位
model = model.eval()
response, history = model.chat(tokenizer, '你好', history=[])
print(response)
conda install pytorch torchvision torchaudio cpuonly -c pytorch
pip install transformers==4.40.0
pip install sentencepiece
pip install uvicorn
pip install fastapi
import uvicorn
from fastapi import FastAPI,Body
from fastapi.responses import JSONResponse
from typing import Dict
app = FastAPI()
from modelscope import AutoTokenizer, AutoModel, snapshot_download
model_dir = snapshot_download('ZhipuAI/chatglm3-6b', cache_dir='D:\Transformers')
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).half().cpu() # 权重和计算从 32 位浮点数转换为16位
@app.post("/chat")
def chat(data: Dict):
query = data['query']
history = data['history']
if history== "":
history = []
response, history = model.chat(tokenizer, query, history=history, top_p=0.95, temperature=0.95)
response = {'response':response,'history':history}
return JSONResponse(content=response)
if __name__ == '__main__':
uvicorn.run(app, host="127.0.0.1", port=7866)
点击Debug尝试运行,即可启动一个7866端口的API服务。
(图5)
我们用postman等客户端工具测试一下:
(图6)
pip install langchain
pip install langchain-community
import requests
import logging
from typing import Optional, List, Dict, Mapping, Any
import langchain
from langchain.llms.base import LLM
from langchain.cache import InMemoryCache
logging.basicConfig(level=logging.INFO)
langchain.llm_cache = InMemoryCache()
class ChatLLM(LLM):
url = "http://127.0.0.1:7866/chat"
history = [];
@property
def _llm_type(self) -> str:
return "chatglm"
def _construct_query(self, prompt: str) -> Dict:
query = {
"history": self.history,
"query": prompt
}
import json
query = json.dumps(query)
return query
@classmethod
def _post(self, url: str, query: Dict) -> Any:
response = requests.post(url, data=query).json()
return response
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
query = self._construct_query(prompt=prompt)
response = self._post(url=self.url, query=query)
response_chat = response['response']
self.history = response['history']
return response_chat
@property
def _identifying_params(self) -> Mapping[str, Any]:
_param_dict = {
"url": self.url
}
return _param_dict
if __name__ == "__main__":
llm = ChatLLM()
while True:
user_input = input("我: ")
response = llm(user_input)
print(f"ChatGLM: {response}")
pip install gradio
#如果安装gradio后ImportError: DLL load failed while importing _multiarray_umath: 找不到指定的模块。执行下面:
pip install numpy==1.25.2
import gradio as gr
from client import ChatLLM #引用client.py里面我们定义的ChatLLM
llm = ChatLLM()
# 流式处理
def stream_translate(text):
response = llm(text)
for chunk in response.split():
yield chunk + " "
demo = gr.Interface(fn=stream_translate, inputs="text", outputs="text", title="ChatGLM",
description="A chatbot powered by ChatGLM.")
demo.launch()
执行一下:
(图8)
在浏览器里面打开:http://127.0.0.1:7860 就可以看到我们的客户端。这时随便问大模型几个问题吧(再次友情提示:视频剪辑过,后面那个问题1500多秒!):
(图9)
到这里,入门流程介绍完毕。看的人多的话,我会继续介绍模型微调和训练,希望大家喜欢!
经过无数的蹂躏,我开发了个小应用【i歌词】,无任何条件免费提供全部源码。它基于chatglm4-9b大模型,从部署到训练,通用对话功能,核心根据歌名查歌词并创作歌词!用它来参加gitee AI创新应用大赛,可以在线体验,方便的话投个小票: https://ai.gitee.com/events/iluvatar-ai-app-contest/detail?app=36
(图10)