Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions lightllm/server/api_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,27 @@ def make_argument_parser() -> argparse.ArgumentParser:
default=None,
help="tool call parser type",
)
parser.add_argument(
"--reasoning_parser",
type=str,
choices=[
"deepseek-r1",
"deepseek-v3",
"glm45",
"gpt-oss",
"kimi",
"kimi_k2",
"qwen3",
"qwen3-thinking",
"minimax",
"minimax-append-think",
"step3",
"nano_v3",
"interns1",
],
Comment on lines +138 to +152
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The list of choices for reasoning_parser is hardcoded here. This same list is also hardcoded in lightllm/server/core/objs/start_args_type.py. This duplication makes maintenance difficult and error-prone. Consider defining this list as a constant in a shared location (e.g., start_args_type.py) and importing it here to avoid inconsistencies.

default=None,
help="reasoning parser type",
)
parser.add_argument(
"--chat_template",
type=str,
Expand Down
16 changes: 11 additions & 5 deletions lightllm/server/api_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class CompletionRequest(BaseModel):
# prompt: string or tokens
prompt: Union[str, List[str], List[int], List[List[int]]]
suffix: Optional[str] = None
max_tokens: Optional[int] = 16
max_tokens: Optional[int] = 8192
temperature: Optional[float] = 1.0
top_p: Optional[float] = 1.0
n: Optional[int] = 1
Expand Down Expand Up @@ -145,7 +145,7 @@ class ChatCompletionRequest(BaseModel):
stream: Optional[bool] = False
stream_options: Optional[StreamOptions] = None
stop: Optional[Union[str, List[str]]] = None
max_tokens: Optional[int] = 16
max_tokens: Optional[int] = 8192
presence_penalty: Optional[float] = 0.0
frequency_penalty: Optional[float] = 0.0
logit_bias: Optional[Dict[str, float]] = None
Expand All @@ -166,14 +166,18 @@ class ChatCompletionRequest(BaseModel):
) # noqa
parallel_tool_calls: Optional[bool] = True

# OpenAI parameters for reasoning and others
chat_template_kwargs: Optional[Dict] = None
separate_reasoning: Optional[bool] = True
stream_reasoning: Optional[bool] = False

# Additional parameters supported by LightLLM
do_sample: Optional[bool] = True
top_k: Optional[int] = -1
repetition_penalty: Optional[float] = 1.0
ignore_eos: Optional[bool] = False
role_settings: Optional[Dict[str, str]] = None
character_settings: Optional[List[Dict[str, str]]] = None
chat_template_kwargs: Optional[Dict[str, bool]] = None

# Class variables to store loaded default values
_loaded_defaults: ClassVar[Dict[str, Any]] = {}
Expand Down Expand Up @@ -255,8 +259,9 @@ class UsageInfo(BaseModel):


class ChatMessage(BaseModel):
role: str
content: str
role: Optional[str] = None
content: Optional[str] = None
Comment on lines +262 to +263
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Making the role attribute optional in ChatMessage is a significant deviation from the standard OpenAI API, where every message in a conversation history must have a role. While content can be None (e.g., for a tool call response), the role is fundamental. This change could lead to unexpected behavior or compatibility issues. It's recommended to keep role as a required field (role: str).

Suggested change
role: Optional[str] = None
content: Optional[str] = None
role: str
content: Optional[str] = None

reasoning_content: Optional[str] = None
tool_calls: Optional[List[ToolCall]] = Field(default=None, examples=[None])


Expand All @@ -283,6 +288,7 @@ class DeltaMessage(BaseModel):
role: Optional[str] = None
content: Optional[str] = None
tool_calls: Optional[List[ToolCall]] = Field(default=None, examples=[None])
reasoning_content: Optional[str] = None


class ChatCompletionStreamResponseChoice(BaseModel):
Expand Down
97 changes: 88 additions & 9 deletions lightllm/server/api_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import pickle
import uuid

from lightllm.server.reasoning_parser import ReasoningParser

from .function_call_parser import TOOLS_TAG_LIST, FunctionCallParser, ToolCallItem
from .build_prompt import build_prompt, init_tokenizer

Expand All @@ -17,7 +19,7 @@
from http import HTTPStatus
from PIL import Image
import multiprocessing as mp
from typing import AsyncGenerator, Union, List, Dict
from typing import Any, AsyncGenerator, Optional, Union, List, Dict
from typing import Callable
from lightllm.server import TokenLoad
from fastapi import BackgroundTasks, FastAPI, Request, WebSocket, WebSocketDisconnect
Expand Down Expand Up @@ -109,6 +111,38 @@ def _get_history_tool_calls_cnt(request: ChatCompletionRequest) -> int:
return idx


def _get_reasoning_from_request(request: ChatCompletionRequest) -> bool:
"""Judge whether the request needs reasoning"""
reasoning_parser = get_env_start_args().reasoning_parser
if not reasoning_parser:
return False
if reasoning_parser in ["deepseek-v3"]:
return request.chat_template_kwargs is not None and request.chat_template_kwargs.get("thinking") is True
if reasoning_parser in ["qwen3", "glm45", "nano_v3", "interns1"]:
# qwen3, glm45, nano_v3, and interns1 are reasoning by default
return not request.chat_template_kwargs or request.chat_template_kwargs.get("enable_thinking", True) is True
Comment on lines +119 to +123
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic in _get_reasoning_from_request relies on hardcoded lists of model names (e.g., ['deepseek-v3'], ['qwen3', 'glm45', ...]) to determine the reasoning behavior. This approach can be brittle and hard to maintain as new models are added. Consider moving this behavior-specific logic closer to the ReasoningParser detectors themselves. For example, each detector could have a static method or attribute indicating its default reasoning behavior, which would make this function more extensible and less dependent on hardcoded strings.

return True # default


def _process_reasoning_stream(
index: int,
delta: str,
reasoning_parser_dict: Dict[int, ReasoningParser],
content: Dict[str, Any],
request: ChatCompletionRequest,
) -> tuple[Optional[str], str]:
"""Process reasoning content in streaming response"""
if index not in reasoning_parser_dict:
request_enable_reasoning = _get_reasoning_from_request(request)
reasoning_parser_dict[index] = ReasoningParser(
get_env_start_args().reasoning_parser,
request.stream_reasoning,
request_enable_reasoning,
)
reasoning_parser = reasoning_parser_dict[index]
return reasoning_parser.parse_stream_chunk(delta)


async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Request) -> Response:
from .api_http import g_objs

Expand Down Expand Up @@ -226,10 +260,30 @@ async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Req

finish_reason = finish_reason_dict[sub_req_id]
text = "".join(final_output_dict[sub_req_id])

# Handle reasoning content
reasoning_text = None
reasoning_parser = get_env_start_args().reasoning_parser
if reasoning_parser and request.separate_reasoning:
request_enable_reasoning = _get_reasoning_from_request(request)
try:
parser = ReasoningParser(
model_type=reasoning_parser,
stream_reasoning=False,
force_reasoning=request_enable_reasoning,
)
reasoning_text, text = parser.parse_non_stream(text)
except Exception as e:
logger.error(f"Reasoning parsing error: {e}")
return create_error_response(
HTTPStatus.BAD_REQUEST,
"Failed to parse fc related info to json format!",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The error message returned in this except block, 'Failed to parse fc related info to json format!', seems to be copied from the function call (fc) parsing logic. However, this block handles exceptions from reasoning parsing. This misleading message can make debugging difficult. Please update it to accurately reflect that the error is related to reasoning parsing.

                        "Failed to parse reasoning content!",

)

# Handle tool_calls parsing
tool_calls = None
tool_choice = request.tool_choice
tools = request.tools

if tool_choice != "none" and any([i in text for i in TOOLS_TAG_LIST]):
if finish_reason == "stop":
finish_reason = "tool_calls"
Expand Down Expand Up @@ -257,7 +311,12 @@ async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Req
)
if finish_reason == "tool_calls":
text = ""
chat_message = ChatMessage(role="assistant", content=text, tool_calls=tool_calls)
chat_message = ChatMessage(
role="assistant",
content=text if text else "",
tool_calls=tool_calls,
reasoning_content=reasoning_text if reasoning_text else "",
)
choice = ChatCompletionResponseChoice(
index=i,
message=chat_message,
Expand All @@ -273,6 +332,7 @@ async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Req
return create_error_response(HTTPStatus.BAD_REQUEST, "stream api only support n = 1")

parser_dict = {}
reasoning_parser_dict = {}

# Streaming case
async def stream_results() -> AsyncGenerator[bytes, None]:
Expand All @@ -284,12 +344,31 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
async for sub_req_id, request_output, metadata, finish_status in results_generator:
prompt_tokens = metadata["prompt_tokens"]
completion_tokens += 1
if request.tool_choice != "none" and request.tools:
delta = request_output
group_request_id = convert_sub_id_to_group_id(sub_req_id)
index = sub_req_id
finish_reason = finish_status.get_finish_reason()
group_request_id = convert_sub_id_to_group_id(sub_req_id)
index = sub_req_id
delta = request_output
finish_reason = finish_status.get_finish_reason()

# Handle reasoning content
if get_env_start_args().reasoning_parser and request.separate_reasoning:
reasoning_text, delta = _process_reasoning_stream(
index, delta, reasoning_parser_dict, request_output, request
)
if reasoning_text:
choice_data = ChatCompletionStreamResponseChoice(
index=0,
delta=DeltaMessage(reasoning_content=reasoning_text),
finish_reason=None,
)
chunk = ChatCompletionStreamResponse(
id=group_request_id,
created=created_time,
choices=[choice_data],
model=request.model,
)
yield f"data: {chunk.model_dump_json()}\n\n"

if request.tool_choice != "none" and request.tools:
if index not in parser_dict:
# 为 tool_call_parser 提供默认值
tool_parser = getattr(g_objs.args, "tool_call_parser", None) or "llama3"
Expand Down Expand Up @@ -368,7 +447,7 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
else:
group_request_id = convert_sub_id_to_group_id(sub_req_id)

delta_message = DeltaMessage(role="assistant", content=request_output)
delta_message = DeltaMessage(role="assistant", content=delta)
if finish_status.is_finished():
finish_reason = finish_status.get_finish_reason()
stream_choice = ChatCompletionStreamResponseChoice(
Expand Down
20 changes: 20 additions & 0 deletions lightllm/server/core/objs/start_args_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,26 @@ class StartArgs:
tool_call_parser: Optional[str] = field(
default=None, metadata={"choices": ["llama3", "qwen25", "mistral", "deepseekv3", "kimi_k2", "qwen"]}
)
reasoning_parser: Optional[str] = field(
default=None,
metadata={
"choices": [
"deepseek-r1",
"deepseek-v3",
"glm45",
"gpt-oss",
"kimi",
"kimi_k2",
"qwen3",
"qwen3-thinking",
"minimax",
"minimax-append-think",
"step3",
"nano_v3",
"interns1",
]
Comment on lines +39 to +53
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The list of choices for reasoning_parser is hardcoded here. This same list is also hardcoded in lightllm/server/api_cli.py. To improve maintainability and prevent inconsistencies, it's recommended to define this list as a constant in a single location (perhaps in this file) and import it where needed.

},
)
chat_template: Optional[str] = field(default=None)
running_max_req_size: int = field(default=1000)
tp: int = field(default=1)
Expand Down
Loading