1. 环境准备

安装必要的第三方库:

pip install openai pillow -i https://pypi.tuna.tsinghua.edu.cn/simple

2. 调用纯语言模型

from openai import OpenAI

client = OpenAI(
    api_key="sk-key",  # 替换个人的密钥key
    base_url="https://api.siliconflow.cn/v1/",
)

response = client.chat.completions.create(
    model='deepseek-ai/DeepSeek-V3',  # 可以参考官网选择其他语言模型
    messages=[
        {
            'role': 'user',
            'content': "中国大模型行业2025年将会迎来哪些机遇和挑战"
        }
    ],
    stream=True
)

for chunk in response:
    print(chunk.choices[0].delta.content, end='')

3. 调用图文多模态模型

import base64
from openai import OpenAI
from PIL import Image
import io
 
# 初始化OpenAI客户端
client = OpenAI(
    api_key="sk-key",  # 替换为实际API密钥
    base_url="https://api.siliconflow.cn/v1/"
)
 
def convert_image_to_webp_base64(input_image_path: str) -> str:
    """将本地图片转换为WebP格式的Base64字符串"""
    try:
        with Image.open(input_image_path) as img:
            # 转换为WebP格式(优化大小)
            byte_arr = io.BytesIO()
            img.save(byte_arr, format='WEBP', quality=85 )  # 调整质量平衡大小和清晰度
            byte_arr = byte_arr.getvalue()
            return base64.b64encode(byte_arr).decode('utf-8')
    except Exception as e:
        print(f"图片转换错误: {e}")
        return None
 
# 1. 转换本地图片
input_image_path = "3.jpg"  # 替换为实际图片路径
base64_image = convert_image_to_webp_base64(input_image_path)
 
if not base64_image:
    print("图片转换失败,请检查路径和格式")
    exit()
 
# 2. 创建流式请求
response = client.chat.completions.create(
    model="zai-org/GLM-4.5V",
    #model="Qwen/Qwen2.5-VL-72B-Instruct",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/webp;base64,{base64_image}",  # 指定WebP格式
                        "detail": "high"  # 平衡速度与精度
                    }
                },
                {
                    "type": "text",
                    "text": "使用ocr识别图片内容并输出"  # 替换为你的提示词
                }
            ]
        }
    ],
    stream=True,
    max_tokens=1000  # 控制响应长度
)
 
# 3. 流式处理响应
print("模型响应:")
full_response = ""
for chunk in response:
    if chunk.choices[0].delta.content:
        text_chunk = chunk.choices[0].delta.content
        print(text_chunk, end='', flush=True)
        full_response += text_chunk
print("完成")
Logo

Agent 垂直技术社区,欢迎活跃、内容共建。

更多推荐