langchain调用多模态大模型
【代码】langchain调用多模态大模型。
·
下面示例是使用langchain调用多模态的示例
数据处理
import os
import base64
from mimetypes import guess_type
def local_image_to_data_url(image_path):
# Guess the MIME type of the image based on the file extension
mime_type, _ = guess_type(image_path)
if mime_type is None:
mime_type = 'application/octet-stream' # Default MIME type if none is found
# Read and encode the image file
with open(image_path, "rb") as image_file:
base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
# Construct the data URL
return f"data:{mime_type};base64,{base64_encoded_data}"
def get_image_message(img_path):
def _get_template(image_path):
print('文件路径为:' + image_path)
url = local_image_to_data_url(image_path)
return {
"type": "image_url",
"image_url": {
"url": url
},
}
if not os.path.exists(img_path):
print(f"Error: Path {img_path} does not exist.")
return
image_extensions = ['.png', '.jpg', '.jpeg']
image_message = []
# 判断传入的是一个file还是folder
if os.path.isfile(img_path):
image_url = _get_template(img_path)
image_message.append(image_url)
elif os.path.isdir(img_path):
for f in os.listdir(img_path):
if any(f.lower().endswith(ext) for ext in image_extensions):
image_url = _get_template(os.path.join(img_path, f))
image_message.append(image_url)
else:
print("错误!请检测传入的图片文件夹目录结构。")
return image_message
if __name__ == '__main__':
img_path = "img/temp/"
user_image_message = get_image_message(img_path)
主程序
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from langchain.schema.runnable import RunnablePassthrough
from pydantic import BaseModel, Field, validator
from langchain_openai import AzureChatOpenAI
from ulits.data_processing import *
class Result(BaseModel):
Observation: str = Field(description="仔细观察你收的图像, 详细描述图像中的内容,用中中文回答")
Thought: str = Field(description="根据你的描述,结合用户的视频搜索提示词,思考当前画面是否是用户想要寻找的画面")
Result: bool = Field(description="给出结论,根据你的思考判断当前画面是否是用户想要搜索的画面", default=False)
# OutputParser
parser = PydanticOutputParser(pydantic_object=Result)
system_prompt_path = "prompt/prompt1.txt"
with open(system_prompt_path, 'r', encoding='utf-8') as f:
system_prompt = f.read()
user_text_message = [
{
"type": "text",
"text": "用户输入的视频搜索提示词:\n{query}\n"
}]
img_path = "img/"
user_image_message = get_image_message(img_path)
user_message = user_text_message + user_image_message
# Prompt 模板
prompt = ChatPromptTemplate.from_messages(
[
("system", system_prompt),
("user", user_message),
]
).partial(format_instructions=parser.get_format_instructions())
# 模型
model = AzureChatOpenAI(azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
temperature=0,
model_kwargs={"seed": 42})
# LCEL 表达式
runnable = (
{"query": RunnablePassthrough()} | prompt | model | parser
)
# 运行
print(runnable.invoke("搜索一个拿伞的女人"))
print("end")
更多推荐

所有评论(0)