让大模型调用外部函数与API

前言

Function Calling(函数调用)是让LLM能够调用外部函数、API和工具的核心能力。通过Function Calling,LLM可以突破纯文本生成的限制,实现数据查询、系统操作、第三方服务集成等功能。本文详细介绍Function Calling的原理与实践。


Function Calling概述

什么是Function Calling

Function Calling允许LLM根据用户意图,决定调用哪个函数以及传递什么参数。

特性 说明
意图理解 LLM理解用户需求,匹配合适的函数
参数提取 从自然语言中提取结构化参数
结构化输出 返回标准化的函数调用指令
可扩展性 轻松添加新函数扩展能力

工作流程

用户输入: "北京明天天气怎么样?"
    ↓
LLM分析: 需要调用天气查询函数
    ↓
Function Call: get_weather(city="北京", date="明天")
    ↓
执行函数: 调用天气API获取数据
    ↓
LLM生成: 基于返回数据生成回答
    ↓
最终回答: "北京明天晴,温度5-12°C..."

OpenAI Function Calling

基础用法

from openai import OpenAI
import json

client = OpenAI()

# 定义函数schema
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "获取指定城市的天气信息",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {
                        "type": "string",
                        "description": "城市名称,如:北京、上海"
                    },
                    "date": {
                        "type": "string",
                        "description": "日期,如:今天、明天、2024-01-15",
                        "default": "今天"
                    }
                },
                "required": ["city"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "search_restaurants",
            "description": "搜索附近的餐厅",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "位置或地址"
                    },
                    "cuisine": {
                        "type": "string",
                        "description": "菜系类型",
                        "enum": ["中餐", "日料", "西餐", "韩餐", "火锅", "烧烤"]
                    },
                    "price_range": {
                        "type": "string",
                        "description": "价格区间",
                        "enum": ["便宜", "中等", "高档"]
                    }
                },
                "required": ["location"]
            }
        }
    }
]

# 调用API
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "user", "content": "北京今天天气怎么样?"}
    ],
    tools=tools,
    tool_choice="auto"  # auto, none, required, 或指定函数
)

# 检查是否需要调用函数
message = response.choices[0].message

if message.tool_calls:
    for tool_call in message.tool_calls:
        function_name = tool_call.function.name
        function_args = json.loads(tool_call.function.arguments)
        print(f"调用函数: {function_name}")
        print(f"参数: {function_args}")

完整调用流程

def get_weather(city: str, date: str = "今天") -> dict:
    """模拟天气API"""
    weather_data = {
        "北京": {"temp": "5-12°C", "condition": "", "humidity": "30%"},
        "上海": {"temp": "8-15°C", "condition": "多云", "humidity": "65%"},
    }
    data = weather_data.get(city, {"temp": "未知", "condition": "未知"})
    return {"city": city, "date": date, **data}

def search_restaurants(location: str, cuisine: str = None, price_range: str = None) -> list:
    """模拟餐厅搜索"""
    return [
        {"name": "老北京炸酱面", "rating": 4.5, "price": "¥30/人"},
        {"name": "全聚德烤鸭", "rating": 4.8, "price": "¥200/人"},
    ]

# 函数映射
available_functions = {
    "get_weather": get_weather,
    "search_restaurants": search_restaurants,
}

def chat_with_functions(user_message: str) -> str:
    """完整的Function Calling流程"""
    messages = [{"role": "user", "content": user_message}]
    
    # 第一次调用:让LLM决定是否需要调用函数
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        tools=tools,
        tool_choice="auto"
    )
    
    assistant_message = response.choices[0].message
    
    # 如果不需要调用函数,直接返回回答
    if not assistant_message.tool_calls:
        return assistant_message.content
    
    # 将assistant消息添加到历史
    messages.append(assistant_message)
    
    # 执行所有函数调用
    for tool_call in assistant_message.tool_calls:
        function_name = tool_call.function.name
        function_args = json.loads(tool_call.function.arguments)
        
        # 调用函数
        if function_name in available_functions:
            function_result = available_functions[function_name](**function_args)
        else:
            function_result = {"error": f"函数 {function_name} 不存在"}
        
        # 将函数结果添加到消息
        messages.append({
            "role": "tool",
            "tool_call_id": tool_call.id,
            "content": json.dumps(function_result, ensure_ascii=False)
        })
    
    # 第二次调用:让LLM基于函数结果生成最终回答
    final_response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        tools=tools
    )
    
    return final_response.choices[0].message.content

# 使用示例
print(chat_with_functions("北京今天天气怎么样?"))
print(chat_with_functions("帮我在王府井附近找一家评价好的中餐厅"))

并行函数调用

多函数同时调用

# GPT-4支持在一次响应中返回多个函数调用
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "user", "content": "北京和上海今天的天气分别怎么样?"}
    ],
    tools=tools,
    tool_choice="auto"
)

message = response.choices[0].message

# 可能返回多个tool_calls
if message.tool_calls:
    print(f"需要调用 {len(message.tool_calls)} 个函数")
    for tool_call in message.tool_calls:
        print(f"- {tool_call.function.name}: {tool_call.function.arguments}")

并行执行优化

import asyncio
from concurrent.futures import ThreadPoolExecutor

async def execute_functions_parallel(tool_calls: list) -> list:
    """并行执行多个函数调用"""
    
    async def execute_one(tool_call):
        function_name = tool_call.function.name
        function_args = json.loads(tool_call.function.arguments)
        
        # 使用线程池执行同步函数
        loop = asyncio.get_event_loop()
        with ThreadPoolExecutor() as pool:
            result = await loop.run_in_executor(
                pool,
                lambda: available_functions[function_name](**function_args)
            )
        
        return {
            "tool_call_id": tool_call.id,
            "role": "tool",
            "content": json.dumps(result, ensure_ascii=False)
        }
    
    # 并行执行所有函数
    results = await asyncio.gather(*[
        execute_one(tc) for tc in tool_calls
    ])
    
    return results

函数Schema设计

JSON Schema详解

function_schema = {
    "type": "function",
    "function": {
        "name": "create_order",  # 函数名,简洁明了
        "description": """
创建订单。在用户明确表示要下单购买商品时调用此函数。
注意:
- 需要确认商品和数量
- 需要用户提供收货地址
- 如果信息不完整,应先询问用户
""",  # 详细描述,帮助LLM判断何时调用
        "parameters": {
            "type": "object",
            "properties": {
                "product_id": {
                    "type": "string",
                    "description": "商品ID"
                },
                "quantity": {
                    "type": "integer",
                    "description": "购买数量",
                    "minimum": 1,
                    "maximum": 100
                },
                "address": {
                    "type": "object",
                    "description": "收货地址",
                    "properties": {
                        "province": {"type": "string"},
                        "city": {"type": "string"},
                        "district": {"type": "string"},
                        "street": {"type": "string"},
                        "phone": {"type": "string", "pattern": "^1[3-9]\\d{9}$"}
                    },
                    "required": ["province", "city", "street", "phone"]
                },
                "payment_method": {
                    "type": "string",
                    "enum": ["alipay", "wechat", "credit_card"],
                    "description": "支付方式"
                },
                "notes": {
                    "type": "string",
                    "description": "订单备注(可选)"
                }
            },
            "required": ["product_id", "quantity", "address"]
        }
    }
}

复杂类型示例

# 数组类型
array_param = {
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "name": {"type": "string"},
            "value": {"type": "number"}
        }
    },
    "minItems": 1,
    "maxItems": 10,
    "description": "数据点列表"
}

# 联合类型
union_param = {
    "oneOf": [
        {"type": "string"},
        {"type": "number"}
    ],
    "description": "可以是字符串或数字"
}

# 带默认值
default_param = {
    "type": "string",
    "default": "default_value",
    "description": "带默认值的参数"
}

函数描述最佳实践

# ❌ 差的描述
bad_function = {
    "name": "search",
    "description": "搜索",  # 太简略
    "parameters": {...}
}

# ✅ 好的描述
good_function = {
    "name": "search_products",
    "description": """
在商品数据库中搜索商品。

使用场景:
- 用户想要查找特定商品
- 用户询问商品价格、库存等信息
- 用户要浏览某类商品

注意事项:
- 关键词应该是用户想查找的商品名称或特征
- 如果用户没有明确指定分类,category参数应留空
- 价格范围应该是合理的数值

返回:匹配的商品列表,包含名称、价格、库存等信息
""",
    "parameters": {
        "type": "object",
        "properties": {
            "keyword": {
                "type": "string",
                "description": "搜索关键词,如:iPhone、笔记本电脑"
            },
            "category": {
                "type": "string",
                "enum": ["电子产品", "服装", "食品", "家居"],
                "description": "商品分类(可选)"
            },
            "min_price": {
                "type": "number",
                "description": "最低价格(元)",
                "minimum": 0
            },
            "max_price": {
                "type": "number",
                "description": "最高价格(元)"
            },
            "sort_by": {
                "type": "string",
                "enum": ["price_asc", "price_desc", "sales", "rating"],
                "default": "sales",
                "description": "排序方式"
            }
        },
        "required": ["keyword"]
    }
}

强制函数调用

tool_choice选项

# 自动选择(默认)
response = client.chat.completions.create(
    model="gpt-4o",
    messages=messages,
    tools=tools,
    tool_choice="auto"  # LLM自行决定是否调用函数
)

# 禁用函数调用
response = client.chat.completions.create(
    model="gpt-4o",
    messages=messages,
    tools=tools,
    tool_choice="none"  # 不调用任何函数
)

# 强制调用某个函数
response = client.chat.completions.create(
    model="gpt-4o",
    messages=messages,
    tools=tools,
    tool_choice="required"  # 必须调用函数
)

# 指定调用特定函数
response = client.chat.completions.create(
    model="gpt-4o",
    messages=messages,
    tools=tools,
    tool_choice={
        "type": "function",
        "function": {"name": "get_weather"}
    }
)

结构化输出场景

# 使用Function Calling实现结构化输出
extract_schema = {
    "type": "function",
    "function": {
        "name": "extract_entities",
        "description": "从文本中提取实体信息",
        "parameters": {
            "type": "object",
            "properties": {
                "persons": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "人名列表"
                },
                "organizations": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "组织机构列表"
                },
                "locations": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "地点列表"
                },
                "dates": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "日期列表"
                }
            },
            "required": ["persons", "organizations", "locations", "dates"]
        }
    }
}

def extract_entities(text: str) -> dict:
    """使用Function Calling提取实体"""
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "提取文本中的实体信息"},
            {"role": "user", "content": text}
        ],
        tools=[extract_schema],
        tool_choice={"type": "function", "function": {"name": "extract_entities"}}
    )
    
    return json.loads(response.choices[0].message.tool_calls[0].function.arguments)

# 使用
result = extract_entities("2024年1月,张三在北京的阿里巴巴公司参加了技术峰会")
print(result)
# {"persons": ["张三"], "organizations": ["阿里巴巴"], "locations": ["北京"], "dates": ["2024年1月"]}

错误处理

函数执行错误

def safe_function_call(function_name: str, function_args: dict) -> dict:
    """安全的函数调用封装"""
    try:
        if function_name not in available_functions:
            return {
                "error": True,
                "message": f"函数 {function_name} 不存在",
                "suggestion": f"可用函数: {list(available_functions.keys())}"
            }
        
        result = available_functions[function_name](**function_args)
        return {"success": True, "data": result}
        
    except TypeError as e:
        return {
            "error": True,
            "message": f"参数错误: {str(e)}",
            "suggestion": "请检查参数类型和必填项"
        }
    except Exception as e:
        return {
            "error": True,
            "message": f"执行错误: {str(e)}",
            "suggestion": "请稍后重试或联系管理员"
        }

参数验证

from pydantic import BaseModel, Field, validator
from typing import Optional, List

class WeatherRequest(BaseModel):
    city: str = Field(..., min_length=1, max_length=50)
    date: str = Field(default="今天")
    
    @validator('city')
    def validate_city(cls, v):
        valid_cities = ["北京", "上海", "广州", "深圳"]
        if v not in valid_cities:
            raise ValueError(f"不支持的城市: {v}")
        return v

class RestaurantSearchRequest(BaseModel):
    location: str
    cuisine: Optional[str] = None
    price_range: Optional[str] = None
    limit: int = Field(default=10, ge=1, le=50)

def validate_and_call(function_name: str, args: dict) -> dict:
    """验证参数并调用函数"""
    validators = {
        "get_weather": WeatherRequest,
        "search_restaurants": RestaurantSearchRequest
    }
    
    if function_name in validators:
        try:
            validated = validators[function_name](**args)
            return available_functions[function_name](**validated.dict())
        except Exception as e:
            return {"error": str(e)}
    
    return available_functions.get(function_name, lambda **x: {"error": "未知函数"})(**args)

重试机制

import time
from functools import wraps

def retry_on_error(max_retries: int = 3, delay: float = 1.0):
    """函数调用重试装饰器"""
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            last_error = None
            for attempt in range(max_retries):
                try:
                    return func(*args, **kwargs)
                except Exception as e:
                    last_error = e
                    if attempt < max_retries - 1:
                        time.sleep(delay * (attempt + 1))
            return {"error": str(last_error), "attempts": max_retries}
        return wrapper
    return decorator

@retry_on_error(max_retries=3)
def call_external_api(endpoint: str, params: dict) -> dict:
    """调用外部API(带重试)"""
    # 实际API调用逻辑
    pass

实战:智能客服系统

from openai import OpenAI
from datetime import datetime
import json

client = OpenAI()

# 定义客服系统的函数
customer_service_tools = [
    {
        "type": "function",
        "function": {
            "name": "query_order",
            "description": "查询订单信息。当用户询问订单状态、物流信息时调用。",
            "parameters": {
                "type": "object",
                "properties": {
                    "order_id": {
                        "type": "string",
                        "description": "订单号"
                    },
                    "phone": {
                        "type": "string",
                        "description": "下单手机号(可选,用于验证)"
                    }
                },
                "required": ["order_id"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "create_ticket",
            "description": "创建客服工单。当用户需要投诉、退款、换货等需要人工处理时调用。",
            "parameters": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "enum": ["complaint", "refund", "exchange", "other"],
                        "description": "工单类型"
                    },
                    "order_id": {
                        "type": "string",
                        "description": "相关订单号"
                    },
                    "description": {
                        "type": "string",
                        "description": "问题描述"
                    },
                    "priority": {
                        "type": "string",
                        "enum": ["low", "medium", "high"],
                        "default": "medium"
                    }
                },
                "required": ["type", "description"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "search_faq",
            "description": "搜索常见问题。当用户询问一般性问题时调用。",
            "parameters": {
                "type": "object",
                "properties": {
                    "question": {
                        "type": "string",
                        "description": "用户问题"
                    },
                    "category": {
                        "type": "string",
                        "enum": ["shipping", "payment", "return", "account", "product"],
                        "description": "问题分类"
                    }
                },
                "required": ["question"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "transfer_to_human",
            "description": "转接人工客服。当问题复杂无法解决,或用户明确要求人工服务时调用。",
            "parameters": {
                "type": "object",
                "properties": {
                    "reason": {
                        "type": "string",
                        "description": "转接原因"
                    },
                    "context": {
                        "type": "string",
                        "description": "对话上下文摘要"
                    }
                },
                "required": ["reason"]
            }
        }
    }
]

# 模拟函数实现
def query_order(order_id: str, phone: str = None) -> dict:
    orders = {
        "ORD123456": {
            "status": "已发货",
            "logistics": "顺丰快递 SF1234567890",
            "estimated_arrival": "2024-01-20",
            "items": ["iPhone 15 Pro x1"]
        }
    }
    return orders.get(order_id, {"error": "订单不存在"})

def create_ticket(type: str, description: str, order_id: str = None, priority: str = "medium") -> dict:
    ticket_id = f"TK{datetime.now().strftime('%Y%m%d%H%M%S')}"
    return {
        "ticket_id": ticket_id,
        "status": "已创建",
        "message": f"工单已创建,编号{ticket_id},预计24小时内处理"
    }

def search_faq(question: str, category: str = None) -> dict:
    faqs = {
        "shipping": [
            {"q": "多久能收到货?", "a": "一般3-5个工作日"},
            {"q": "支持哪些快递?", "a": "支持顺丰、京东、菜鸟等主流快递"}
        ],
        "return": [
            {"q": "如何退货?", "a": "下单7天内可申请无理由退货,请在订单详情页点击退货申请"},
            {"q": "退款多久到账?", "a": "退货收到后3-5个工作日内原路退回"}
        ]
    }
    return {"results": faqs.get(category, [])[:3]}

def transfer_to_human(reason: str, context: str = None) -> dict:
    return {
        "status": "success",
        "queue_position": 3,
        "estimated_wait": "约5分钟",
        "message": "正在为您转接人工客服,请稍候..."
    }

cs_functions = {
    "query_order": query_order,
    "create_ticket": create_ticket,
    "search_faq": search_faq,
    "transfer_to_human": transfer_to_human
}

class CustomerServiceBot:
    def __init__(self):
        self.conversation_history = []
        self.system_prompt = """
你是一个专业的电商客服助手。请遵循以下原则:

1. 礼貌友好:使用亲切的语气与用户交流
2. 高效解决:尽快理解并解决用户问题
3. 合理使用工具:根据用户需求调用合适的函数
4. 适时转人工:复杂问题或用户要求时转接人工

注意:
- 查询订单前应确认用户提供了订单号
- 创建工单时要完整记录用户问题
- 不确定的问题先搜索FAQ
"""
    
    def chat(self, user_message: str) -> str:
        """处理用户消息"""
        self.conversation_history.append({
            "role": "user",
            "content": user_message
        })
        
        messages = [
            {"role": "system", "content": self.system_prompt},
            *self.conversation_history
        ]
        
        # 第一次调用
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            tools=customer_service_tools,
            tool_choice="auto"
        )
        
        assistant_message = response.choices[0].message
        
        # 处理函数调用
        if assistant_message.tool_calls:
            messages.append(assistant_message)
            
            for tool_call in assistant_message.tool_calls:
                func_name = tool_call.function.name
                func_args = json.loads(tool_call.function.arguments)
                
                print(f"[调用函数] {func_name}: {func_args}")
                
                result = cs_functions[func_name](**func_args)
                
                messages.append({
                    "role": "tool",
                    "tool_call_id": tool_call.id,
                    "content": json.dumps(result, ensure_ascii=False)
                })
            
            # 生成最终回答
            final_response = client.chat.completions.create(
                model="gpt-4o",
                messages=messages,
                tools=customer_service_tools
            )
            assistant_message = final_response.choices[0].message
        
        self.conversation_history.append({
            "role": "assistant",
            "content": assistant_message.content
        })
        
        return assistant_message.content

# 使用示例
bot = CustomerServiceBot()

# 模拟对话
conversations = [
    "你好,我想查一下我的订单",
    "订单号是ORD123456",
    "我想退货,商品有质量问题",
    "转人工"
]

for msg in conversations:
    print(f"用户: {msg}")
    response = bot.chat(msg)
    print(f"客服: {response}\n")

Anthropic Function Calling

import anthropic

client = anthropic.Anthropic()

# Anthropic的工具定义格式
tools = [
    {
        "name": "get_weather",
        "description": "获取指定城市的天气信息",
        "input_schema": {
            "type": "object",
            "properties": {
                "city": {
                    "type": "string",
                    "description": "城市名称"
                }
            },
            "required": ["city"]
        }
    }
]

message = client.messages.create(
    model="claude-3-5-sonnet-20241022",
    max_tokens=1024,
    tools=tools,
    messages=[
        {"role": "user", "content": "北京天气怎么样?"}
    ]
)

# 检查是否需要调用工具
if message.stop_reason == "tool_use":
    for block in message.content:
        if block.type == "tool_use":
            print(f"工具: {block.name}")
            print(f"输入: {block.input}")
            print(f"ID: {block.id}")

工业级进阶技巧

1. 并行函数调用 (Parallel Tool Calling)

现代模型(如 GPT-4o)支持在单次响应中返回多个函数调用指令。这对于需要同时获取多个信息的场景(如“查一下北京和上海的天气”)非常有用。

# 处理并行调用的逻辑
if assistant_message.tool_calls:
    messages.append(assistant_message)
    
    # 并行执行(可以使用 asyncio 进一步加速)
    for tool_call in assistant_message.tool_calls:
        func_name = tool_call.function.name
        func_args = json.loads(tool_call.function.arguments)
        
        # 执行函数并获取结果
        result = execute_function(func_name, func_args)
        
        # 将每个结果分别添加回消息列表
        messages.append({
            "role": "tool",
            "tool_call_id": tool_call.id,
            "content": json.dumps(result)
        })

2. Few-shot 增强函数调用精度

如果模型在选择函数或提取参数时不够准确,可以通过 Few-shot 示例来引导。

messages = [
    {"role": "system", "content": "你是一个数据提取助手。"},
    # 示例 1
    {"role": "user", "content": "提取:张三,25岁"},
    {
        "role": "assistant", 
        "tool_calls": [{
            "id": "call_1",
            "type": "function",
            "function": {"name": "save_user", "arguments": '{"name": "张三", "age": 25}'}
        }]
    },
    {"role": "tool", "tool_call_id": "call_1", "content": '{"status": "success"}'},
    # 真实请求
    {"role": "user", "content": "提取:李四,30岁"}
]

总结

Function Calling是连接LLM与外部世界的桥梁:

要点 说明
Schema设计 清晰的描述和参数定义
错误处理 完善的异常捕获和重试
安全验证 参数校验和权限控制
性能优化 并行调用和缓存

Function Calling使LLM从”只会说话”进化为”能做事”的智能助手。

参考资源

版权声明: 如无特别声明,本文版权归 sshipanoo 所有,转载请注明本文链接。

(采用 CC BY-NC-SA 4.0 许可协议进行授权)

本文标题:《 LLM应用开发——Function Calling 》

本文链接:http://localhost:3015/ai/Function-Calling.html

本文最后一次更新为 天前,文章中的某些内容可能已过时!