-
Notifications
You must be signed in to change notification settings - Fork 292
[Feat] Add reasoning parser for OpenAI API #1154
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
04a7714
190e1de
6ec4ef3
957875e
1216489
e6dc21a
e91f99c
a0bc750
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -73,7 +73,7 @@ class CompletionRequest(BaseModel): | |||||||||
| # prompt: string or tokens | ||||||||||
| prompt: Union[str, List[str], List[int], List[List[int]]] | ||||||||||
| suffix: Optional[str] = None | ||||||||||
| max_tokens: Optional[int] = 16 | ||||||||||
| max_tokens: Optional[int] = 8192 | ||||||||||
| temperature: Optional[float] = 1.0 | ||||||||||
| top_p: Optional[float] = 1.0 | ||||||||||
| n: Optional[int] = 1 | ||||||||||
|
|
@@ -145,7 +145,7 @@ class ChatCompletionRequest(BaseModel): | |||||||||
| stream: Optional[bool] = False | ||||||||||
| stream_options: Optional[StreamOptions] = None | ||||||||||
| stop: Optional[Union[str, List[str]]] = None | ||||||||||
| max_tokens: Optional[int] = 16 | ||||||||||
| max_tokens: Optional[int] = 8192 | ||||||||||
| presence_penalty: Optional[float] = 0.0 | ||||||||||
| frequency_penalty: Optional[float] = 0.0 | ||||||||||
| logit_bias: Optional[Dict[str, float]] = None | ||||||||||
|
|
@@ -166,14 +166,18 @@ class ChatCompletionRequest(BaseModel): | |||||||||
| ) # noqa | ||||||||||
| parallel_tool_calls: Optional[bool] = True | ||||||||||
|
|
||||||||||
| # OpenAI parameters for reasoning and others | ||||||||||
| chat_template_kwargs: Optional[Dict] = None | ||||||||||
flyinglandlord marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
| separate_reasoning: Optional[bool] = True | ||||||||||
| stream_reasoning: Optional[bool] = False | ||||||||||
|
|
||||||||||
| # Additional parameters supported by LightLLM | ||||||||||
| do_sample: Optional[bool] = True | ||||||||||
| top_k: Optional[int] = -1 | ||||||||||
| repetition_penalty: Optional[float] = 1.0 | ||||||||||
| ignore_eos: Optional[bool] = False | ||||||||||
| role_settings: Optional[Dict[str, str]] = None | ||||||||||
| character_settings: Optional[List[Dict[str, str]]] = None | ||||||||||
| chat_template_kwargs: Optional[Dict[str, bool]] = None | ||||||||||
|
|
||||||||||
| # Class variables to store loaded default values | ||||||||||
| _loaded_defaults: ClassVar[Dict[str, Any]] = {} | ||||||||||
|
|
@@ -255,8 +259,9 @@ class UsageInfo(BaseModel): | |||||||||
|
|
||||||||||
|
|
||||||||||
| class ChatMessage(BaseModel): | ||||||||||
| role: str | ||||||||||
| content: str | ||||||||||
| role: Optional[str] = None | ||||||||||
| content: Optional[str] = None | ||||||||||
|
Comment on lines
+262
to
+263
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Making the
Suggested change
|
||||||||||
| reasoning_content: Optional[str] = None | ||||||||||
| tool_calls: Optional[List[ToolCall]] = Field(default=None, examples=[None]) | ||||||||||
|
|
||||||||||
|
|
||||||||||
|
|
@@ -283,6 +288,7 @@ class DeltaMessage(BaseModel): | |||||||||
| role: Optional[str] = None | ||||||||||
| content: Optional[str] = None | ||||||||||
| tool_calls: Optional[List[ToolCall]] = Field(default=None, examples=[None]) | ||||||||||
| reasoning_content: Optional[str] = None | ||||||||||
|
|
||||||||||
|
|
||||||||||
| class ChatCompletionStreamResponseChoice(BaseModel): | ||||||||||
|
|
||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,8 @@ | |
| import pickle | ||
| import uuid | ||
|
|
||
| from lightllm.server.reasoning_parser import ReasoningParser | ||
|
|
||
| from .function_call_parser import TOOLS_TAG_LIST, FunctionCallParser, ToolCallItem | ||
| from .build_prompt import build_prompt, init_tokenizer | ||
|
|
||
|
|
@@ -17,7 +19,7 @@ | |
| from http import HTTPStatus | ||
| from PIL import Image | ||
| import multiprocessing as mp | ||
| from typing import AsyncGenerator, Union, List, Dict | ||
| from typing import Any, AsyncGenerator, Optional, Union, List, Dict | ||
| from typing import Callable | ||
| from lightllm.server import TokenLoad | ||
| from fastapi import BackgroundTasks, FastAPI, Request, WebSocket, WebSocketDisconnect | ||
|
|
@@ -109,6 +111,38 @@ def _get_history_tool_calls_cnt(request: ChatCompletionRequest) -> int: | |
| return idx | ||
|
|
||
|
|
||
| def _get_reasoning_from_request(request: ChatCompletionRequest) -> bool: | ||
| """Judge whether the request needs reasoning""" | ||
| reasoning_parser = get_env_start_args().reasoning_parser | ||
| if not reasoning_parser: | ||
| return False | ||
| if reasoning_parser in ["deepseek-v3"]: | ||
| return request.chat_template_kwargs is not None and request.chat_template_kwargs.get("thinking") is True | ||
| if reasoning_parser in ["qwen3", "glm45", "nano_v3", "interns1"]: | ||
| # qwen3, glm45, nano_v3, and interns1 are reasoning by default | ||
| return not request.chat_template_kwargs or request.chat_template_kwargs.get("enable_thinking", True) is True | ||
|
Comment on lines
+119
to
+123
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic in |
||
| return True # default | ||
|
|
||
|
|
||
| def _process_reasoning_stream( | ||
| index: int, | ||
| delta: str, | ||
| reasoning_parser_dict: Dict[int, ReasoningParser], | ||
| content: Dict[str, Any], | ||
| request: ChatCompletionRequest, | ||
| ) -> tuple[Optional[str], str]: | ||
| """Process reasoning content in streaming response""" | ||
| if index not in reasoning_parser_dict: | ||
| request_enable_reasoning = _get_reasoning_from_request(request) | ||
| reasoning_parser_dict[index] = ReasoningParser( | ||
| get_env_start_args().reasoning_parser, | ||
| request.stream_reasoning, | ||
| request_enable_reasoning, | ||
| ) | ||
| reasoning_parser = reasoning_parser_dict[index] | ||
| return reasoning_parser.parse_stream_chunk(delta) | ||
|
|
||
|
|
||
| async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Request) -> Response: | ||
| from .api_http import g_objs | ||
|
|
||
|
|
@@ -226,10 +260,30 @@ async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Req | |
|
|
||
| finish_reason = finish_reason_dict[sub_req_id] | ||
| text = "".join(final_output_dict[sub_req_id]) | ||
|
|
||
| # Handle reasoning content | ||
| reasoning_text = None | ||
| reasoning_parser = get_env_start_args().reasoning_parser | ||
| if reasoning_parser and request.separate_reasoning: | ||
| request_enable_reasoning = _get_reasoning_from_request(request) | ||
| try: | ||
| parser = ReasoningParser( | ||
| model_type=reasoning_parser, | ||
| stream_reasoning=False, | ||
| force_reasoning=request_enable_reasoning, | ||
| ) | ||
| reasoning_text, text = parser.parse_non_stream(text) | ||
| except Exception as e: | ||
| logger.error(f"Reasoning parsing error: {e}") | ||
| return create_error_response( | ||
| HTTPStatus.BAD_REQUEST, | ||
| "Failed to parse fc related info to json format!", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The error message returned in this "Failed to parse reasoning content!", |
||
| ) | ||
|
|
||
| # Handle tool_calls parsing | ||
| tool_calls = None | ||
| tool_choice = request.tool_choice | ||
| tools = request.tools | ||
|
|
||
| if tool_choice != "none" and any([i in text for i in TOOLS_TAG_LIST]): | ||
| if finish_reason == "stop": | ||
| finish_reason = "tool_calls" | ||
|
|
@@ -257,7 +311,12 @@ async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Req | |
| ) | ||
| if finish_reason == "tool_calls": | ||
| text = "" | ||
| chat_message = ChatMessage(role="assistant", content=text, tool_calls=tool_calls) | ||
| chat_message = ChatMessage( | ||
| role="assistant", | ||
| content=text if text else "", | ||
| tool_calls=tool_calls, | ||
| reasoning_content=reasoning_text if reasoning_text else "", | ||
| ) | ||
| choice = ChatCompletionResponseChoice( | ||
| index=i, | ||
| message=chat_message, | ||
|
|
@@ -273,6 +332,7 @@ async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Req | |
| return create_error_response(HTTPStatus.BAD_REQUEST, "stream api only support n = 1") | ||
|
|
||
| parser_dict = {} | ||
| reasoning_parser_dict = {} | ||
|
|
||
| # Streaming case | ||
| async def stream_results() -> AsyncGenerator[bytes, None]: | ||
|
|
@@ -284,12 +344,31 @@ async def stream_results() -> AsyncGenerator[bytes, None]: | |
| async for sub_req_id, request_output, metadata, finish_status in results_generator: | ||
| prompt_tokens = metadata["prompt_tokens"] | ||
| completion_tokens += 1 | ||
| if request.tool_choice != "none" and request.tools: | ||
| delta = request_output | ||
| group_request_id = convert_sub_id_to_group_id(sub_req_id) | ||
| index = sub_req_id | ||
| finish_reason = finish_status.get_finish_reason() | ||
| group_request_id = convert_sub_id_to_group_id(sub_req_id) | ||
| index = sub_req_id | ||
| delta = request_output | ||
| finish_reason = finish_status.get_finish_reason() | ||
|
|
||
| # Handle reasoning content | ||
| if get_env_start_args().reasoning_parser and request.separate_reasoning: | ||
| reasoning_text, delta = _process_reasoning_stream( | ||
| index, delta, reasoning_parser_dict, request_output, request | ||
| ) | ||
| if reasoning_text: | ||
| choice_data = ChatCompletionStreamResponseChoice( | ||
| index=0, | ||
| delta=DeltaMessage(reasoning_content=reasoning_text), | ||
| finish_reason=None, | ||
| ) | ||
| chunk = ChatCompletionStreamResponse( | ||
| id=group_request_id, | ||
| created=created_time, | ||
| choices=[choice_data], | ||
| model=request.model, | ||
| ) | ||
| yield f"data: {chunk.model_dump_json()}\n\n" | ||
|
|
||
| if request.tool_choice != "none" and request.tools: | ||
| if index not in parser_dict: | ||
| # 为 tool_call_parser 提供默认值 | ||
| tool_parser = getattr(g_objs.args, "tool_call_parser", None) or "llama3" | ||
|
|
@@ -368,7 +447,7 @@ async def stream_results() -> AsyncGenerator[bytes, None]: | |
| else: | ||
| group_request_id = convert_sub_id_to_group_id(sub_req_id) | ||
|
|
||
| delta_message = DeltaMessage(role="assistant", content=request_output) | ||
| delta_message = DeltaMessage(role="assistant", content=delta) | ||
| if finish_status.is_finished(): | ||
| finish_reason = finish_status.get_finish_reason() | ||
| stream_choice = ChatCompletionStreamResponseChoice( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,26 @@ class StartArgs: | |
| tool_call_parser: Optional[str] = field( | ||
| default=None, metadata={"choices": ["llama3", "qwen25", "mistral", "deepseekv3", "kimi_k2", "qwen"]} | ||
| ) | ||
| reasoning_parser: Optional[str] = field( | ||
| default=None, | ||
| metadata={ | ||
| "choices": [ | ||
| "deepseek-r1", | ||
| "deepseek-v3", | ||
| "glm45", | ||
| "gpt-oss", | ||
| "kimi", | ||
| "kimi_k2", | ||
| "qwen3", | ||
| "qwen3-thinking", | ||
| "minimax", | ||
| "minimax-append-think", | ||
| "step3", | ||
| "nano_v3", | ||
| "interns1", | ||
| ] | ||
|
Comment on lines
+39
to
+53
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The list of choices for |
||
| }, | ||
| ) | ||
| chat_template: Optional[str] = field(default=None) | ||
| running_max_req_size: int = field(default=1000) | ||
| tp: int = field(default=1) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The list of choices for
reasoning_parseris hardcoded here. This same list is also hardcoded inlightllm/server/core/objs/start_args_type.py. This duplication makes maintenance difficult and error-prone. Consider defining this list as a constant in a shared location (e.g.,start_args_type.py) and importing it here to avoid inconsistencies.