From d8fd2b8ec3034c17712adb66118a34a97f8b1462 Mon Sep 17 00:00:00 2001 From: KiyotakaMatsushita Date: Sun, 14 Dec 2025 10:07:27 +0900 Subject: [PATCH 1/7] Fix handle_redact_content to properly handle redactUserContentMessage Previously, handle_redact_content only checked for redactAssistantContentMessage and completely ignored redactUserContentMessage, causing incorrect block messages to be displayed when input guardrails were triggered. This commit fixes the issue by: - Checking redactUserContentMessage first (input guardrail) - Falling back to redactAssistantContentMessage if input message is not present - Updating test expectations to match the corrected behavior Fixes the issue where input guardrail blocks incorrectly showed output guardrail messages, improving user experience and message accuracy. Related to: strands-agents/sdk-python#1324 --- src/strands/event_loop/streaming.py | 6 +++++- tests/strands/event_loop/test_streaming.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/strands/event_loop/streaming.py b/src/strands/event_loop/streaming.py index 43836fe34..956449960 100644 --- a/src/strands/event_loop/streaming.py +++ b/src/strands/event_loop/streaming.py @@ -336,7 +336,11 @@ def handle_redact_content(event: RedactContentEvent, state: dict[str, Any]) -> N event: Redact Content Event. state: The current state of message processing. """ - if event.get("redactAssistantContentMessage") is not None: + # Check for input redaction first + if event.get("redactUserContentMessage") is not None: + state["message"]["content"] = [{"text": event["redactUserContentMessage"]}] + # Check for output redaction + elif event.get("redactAssistantContentMessage") is not None: state["message"]["content"] = [{"text": event["redactAssistantContentMessage"]}] diff --git a/tests/strands/event_loop/test_streaming.py b/tests/strands/event_loop/test_streaming.py index 02be400b1..8893ebe9a 100644 --- a/tests/strands/event_loop/test_streaming.py +++ b/tests/strands/event_loop/test_streaming.py @@ -676,7 +676,7 @@ async def test_process_stream(response, exp_events, agenerator, alist): { "stop": ( "guardrail_intervened", - {"role": "assistant", "content": [{"text": "REDACTED."}]}, + {"role": "assistant", "content": [{"text": "REDACTED"}]}, {"inputTokens": 1, "outputTokens": 1, "totalTokens": 1}, {"latencyMs": 1}, ) From 742142c1e16b53098e644d4776eab20a6df5ed4a Mon Sep 17 00:00:00 2001 From: KiyotakaMatsushita Date: Sun, 14 Dec 2025 10:14:13 +0900 Subject: [PATCH 2/7] Add comprehensive test cases for redactContent handling Added test cases to cover all scenarios: 1. Both redactUserContentMessage and redactAssistantContentMessage present - Verifies that redactUserContentMessage takes priority 2. Only redactUserContentMessage present (input guardrail) - Verifies input-only blocking works correctly 3. Only redactAssistantContentMessage present (output guardrail) - Verifies output-only blocking works correctly This ensures the fix properly handles all possible guardrail configurations. --- tests/strands/event_loop/test_streaming.py | 134 ++++++++++++++++++++- 1 file changed, 133 insertions(+), 1 deletion(-) diff --git a/tests/strands/event_loop/test_streaming.py b/tests/strands/event_loop/test_streaming.py index 8893ebe9a..803c5824c 100644 --- a/tests/strands/event_loop/test_streaming.py +++ b/tests/strands/event_loop/test_streaming.py @@ -615,7 +615,7 @@ async def test_process_stream(response, exp_events, agenerator, alist): @pytest.mark.parametrize( ("response", "exp_events"), [ - # Redacted Message + # Redacted Message - Both input and output messages present (input takes priority) ( [ {"messageStart": {"role": "assistant"}}, @@ -683,6 +683,138 @@ async def test_process_stream(response, exp_events, agenerator, alist): }, ], ), + # Redacted Message - Input only (redactUserContentMessage) + ( + [ + {"messageStart": {"role": "assistant"}}, + { + "contentBlockStart": {"start": {}}, + }, + { + "contentBlockDelta": {"delta": {"text": "Hello!"}}, + }, + {"contentBlockStop": {}}, + { + "messageStop": {"stopReason": "guardrail_intervened"}, + }, + { + "redactContent": { + "redactUserContentMessage": "INPUT_BLOCKED", + } + }, + { + "metadata": { + "usage": { + "inputTokens": 1, + "outputTokens": 1, + "totalTokens": 1, + }, + "metrics": {"latencyMs": 1}, + } + }, + ], + [ + {"event": {"messageStart": {"role": "assistant"}}}, + {"event": {"contentBlockStart": {"start": {}}}}, + {"event": {"contentBlockDelta": {"delta": {"text": "Hello!"}}}}, + {"data": "Hello!", "delta": {"text": "Hello!"}}, + {"event": {"contentBlockStop": {}}}, + {"event": {"messageStop": {"stopReason": "guardrail_intervened"}}}, + { + "event": { + "redactContent": { + "redactUserContentMessage": "INPUT_BLOCKED", + } + } + }, + { + "event": { + "metadata": { + "usage": { + "inputTokens": 1, + "outputTokens": 1, + "totalTokens": 1, + }, + "metrics": {"latencyMs": 1}, + } + } + }, + { + "stop": ( + "guardrail_intervened", + {"role": "assistant", "content": [{"text": "INPUT_BLOCKED"}]}, + {"inputTokens": 1, "outputTokens": 1, "totalTokens": 1}, + {"latencyMs": 1}, + ) + }, + ], + ), + # Redacted Message - Output only (redactAssistantContentMessage) + ( + [ + {"messageStart": {"role": "assistant"}}, + { + "contentBlockStart": {"start": {}}, + }, + { + "contentBlockDelta": {"delta": {"text": "Hello!"}}, + }, + {"contentBlockStop": {}}, + { + "messageStop": {"stopReason": "guardrail_intervened"}, + }, + { + "redactContent": { + "redactAssistantContentMessage": "OUTPUT_BLOCKED", + } + }, + { + "metadata": { + "usage": { + "inputTokens": 1, + "outputTokens": 1, + "totalTokens": 1, + }, + "metrics": {"latencyMs": 1}, + } + }, + ], + [ + {"event": {"messageStart": {"role": "assistant"}}}, + {"event": {"contentBlockStart": {"start": {}}}}, + {"event": {"contentBlockDelta": {"delta": {"text": "Hello!"}}}}, + {"data": "Hello!", "delta": {"text": "Hello!"}}, + {"event": {"contentBlockStop": {}}}, + {"event": {"messageStop": {"stopReason": "guardrail_intervened"}}}, + { + "event": { + "redactContent": { + "redactAssistantContentMessage": "OUTPUT_BLOCKED", + } + } + }, + { + "event": { + "metadata": { + "usage": { + "inputTokens": 1, + "outputTokens": 1, + "totalTokens": 1, + }, + "metrics": {"latencyMs": 1}, + } + } + }, + { + "stop": ( + "guardrail_intervened", + {"role": "assistant", "content": [{"text": "OUTPUT_BLOCKED"}]}, + {"inputTokens": 1, "outputTokens": 1, "totalTokens": 1}, + {"latencyMs": 1}, + ) + }, + ], + ), ( [ {"messageStart": {"role": "assistant"}}, From 10a5ae69d0c23ccc4861778efb0f8d5ee7346863 Mon Sep 17 00:00:00 2001 From: KiyotakaMatsushita Date: Sun, 14 Dec 2025 10:26:45 +0900 Subject: [PATCH 3/7] Fix redactContent to handle only first event AWS Bedrock sends multiple redactContent events in sequence: 1. First event with redactUserContentMessage 2. Second event with redactAssistantContentMessage Previous implementation processed both events, causing the second one to override the first, leading to incorrect messages being displayed. This fix adds a 'redacted' flag to state to ensure only the first redactContent event is processed, maintaining the correct priority: - redactUserContentMessage (input guardrail) takes precedence - redactAssistantContentMessage (output guardrail) is used if no input message Related to: strands-agents/sdk-python#1324 --- src/strands/event_loop/streaming.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/strands/event_loop/streaming.py b/src/strands/event_loop/streaming.py index 956449960..e73edc425 100644 --- a/src/strands/event_loop/streaming.py +++ b/src/strands/event_loop/streaming.py @@ -336,12 +336,18 @@ def handle_redact_content(event: RedactContentEvent, state: dict[str, Any]) -> N event: Redact Content Event. state: The current state of message processing. """ + # Skip if already redacted (handle only the first redactContent event) + if state.get("redacted"): + return + # Check for input redaction first if event.get("redactUserContentMessage") is not None: state["message"]["content"] = [{"text": event["redactUserContentMessage"]}] + state["redacted"] = True # Check for output redaction elif event.get("redactAssistantContentMessage") is not None: state["message"]["content"] = [{"text": event["redactAssistantContentMessage"]}] + state["redacted"] = True def extract_usage_metrics(event: MetadataEvent, time_to_first_byte_ms: int | None = None) -> tuple[Usage, Metrics]: From 4503fad1cb2d652251c7f6f8a2c51ffb4835a327 Mon Sep 17 00:00:00 2001 From: KiyotakaMatsushita Date: Sun, 14 Dec 2025 10:59:34 +0900 Subject: [PATCH 4/7] Fix: Use trace metadata to determine correct redact message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AWS Bedrock always sends both redactUserContentMessage and redactAssistantContentMessage regardless of which guardrail was triggered. The trace metadata contains the actual trigger information. Changes: - Store both redact messages in state instead of choosing immediately - Add finalize_redact_message() to analyze trace and select correct message - Add _check_if_blocked() helper to check if any policy was blocked - Call finalize_redact_message() when metadata event is received This ensures: - Input guardrail → shows redactUserContentMessage - Output guardrail → shows redactAssistantContentMessage --- src/strands/event_loop/streaming.py | 101 +++++++++++++++++++++++++--- 1 file changed, 91 insertions(+), 10 deletions(-) diff --git a/src/strands/event_loop/streaming.py b/src/strands/event_loop/streaming.py index e73edc425..24ea7addb 100644 --- a/src/strands/event_loop/streaming.py +++ b/src/strands/event_loop/streaming.py @@ -336,18 +336,97 @@ def handle_redact_content(event: RedactContentEvent, state: dict[str, Any]) -> N event: Redact Content Event. state: The current state of message processing. """ - # Skip if already redacted (handle only the first redactContent event) - if state.get("redacted"): + # Store both messages for later decision based on trace + # AWS Bedrock sends both messages regardless of which guardrail was triggered + if event.get("redactUserContentMessage") is not None: + state["redactUserContentMessage"] = event["redactUserContentMessage"] + if event.get("redactAssistantContentMessage") is not None: + state["redactAssistantContentMessage"] = event["redactAssistantContentMessage"] + + +def _check_if_blocked(assessment: dict[str, Any]) -> bool: + """Check if any policy in the assessment has BLOCKED action. + + Args: + assessment: Guardrail assessment data + + Returns: + True if any policy has BLOCKED action + """ + # Check word policy + word_policy = assessment.get("wordPolicy", {}) + custom_words = word_policy.get("customWords", []) + for word in custom_words: + if word.get("action") == "BLOCKED" and word.get("detected"): + return True + + # Check content policy + content_policy = assessment.get("contentPolicy", {}) + filters = content_policy.get("filters", []) + for filter_item in filters: + if filter_item.get("action") == "BLOCKED": + return True + + # Check sensitive information policy + pii_entities = assessment.get("sensitiveInformationPolicy", {}).get("piiEntities", []) + for entity in pii_entities: + if entity.get("action") == "BLOCKED": + return True + + return False + + +def finalize_redact_message(event: MetadataEvent, state: dict[str, Any]) -> None: + """Finalize the redacted message based on trace information. + + AWS Bedrock sends both redactUserContentMessage and redactAssistantContentMessage + regardless of which guardrail was triggered. We need to check the trace to determine + which one to use. + + Args: + event: Metadata event containing trace information + state: The current state of message processing + """ + # Check if we have redact messages stored + if "redactUserContentMessage" not in state and "redactAssistantContentMessage" not in state: return - # Check for input redaction first - if event.get("redactUserContentMessage") is not None: - state["message"]["content"] = [{"text": event["redactUserContentMessage"]}] - state["redacted"] = True - # Check for output redaction - elif event.get("redactAssistantContentMessage") is not None: - state["message"]["content"] = [{"text": event["redactAssistantContentMessage"]}] - state["redacted"] = True + # Get trace information + trace = event.get("trace", {}) + guardrail = trace.get("guardrail", {}) + + # Check input assessment + input_blocked = False + input_assessment = guardrail.get("inputAssessment", {}) + for guardrail_id, assessment in input_assessment.items(): + if _check_if_blocked(assessment): + input_blocked = True + break + + # Check output assessments + output_blocked = False + output_assessments = guardrail.get("outputAssessments", []) + for output_assessment_dict in output_assessments: + for guardrail_id, assessments in output_assessment_dict.items(): + for assessment in assessments: + if _check_if_blocked(assessment): + output_blocked = True + break + if output_blocked: + break + if output_blocked: + break + + # Select the appropriate message based on trace + if output_blocked and "redactAssistantContentMessage" in state: + state["message"]["content"] = [{"text": state["redactAssistantContentMessage"]}] + elif input_blocked and "redactUserContentMessage" in state: + state["message"]["content"] = [{"text": state["redactUserContentMessage"]}] + # Fallback: use input message if trace is unclear but we have redact messages + elif "redactUserContentMessage" in state: + state["message"]["content"] = [{"text": state["redactUserContentMessage"]}] + elif "redactAssistantContentMessage" in state: + state["message"]["content"] = [{"text": state["redactAssistantContentMessage"]}] def extract_usage_metrics(event: MetadataEvent, time_to_first_byte_ms: int | None = None) -> tuple[Usage, Metrics]: @@ -420,6 +499,8 @@ async def process_stream( int(1000 * (first_byte_time - start_time)) if (start_time and first_byte_time) else None ) usage, metrics = extract_usage_metrics(chunk["metadata"], time_to_first_byte_ms) + # Finalize redacted message based on trace information + finalize_redact_message(chunk["metadata"], state) elif "redactContent" in chunk: handle_redact_content(chunk["redactContent"], state) From 8630d9ec4aaf2ff04a4a35cc370d2c74f8cdf92f Mon Sep 17 00:00:00 2001 From: KiyotakaMatsushita Date: Sun, 14 Dec 2025 11:03:20 +0900 Subject: [PATCH 5/7] Fix: Correct outputAssessments structure parsing The outputAssessments field is a dict (not a list) with guardrail IDs as keys and assessment lists as values. Before: outputAssessments = [] After: outputAssessments = { "guardrail_id": [...] } --- src/strands/event_loop/streaming.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/strands/event_loop/streaming.py b/src/strands/event_loop/streaming.py index 24ea7addb..c8fbdc09b 100644 --- a/src/strands/event_loop/streaming.py +++ b/src/strands/event_loop/streaming.py @@ -405,15 +405,14 @@ def finalize_redact_message(event: MetadataEvent, state: dict[str, Any]) -> None # Check output assessments output_blocked = False - output_assessments = guardrail.get("outputAssessments", []) - for output_assessment_dict in output_assessments: - for guardrail_id, assessments in output_assessment_dict.items(): + output_assessments = guardrail.get("outputAssessments", {}) + # outputAssessments is a dict with guardrail IDs as keys + for guardrail_id, assessments in output_assessments.items(): + if isinstance(assessments, list): for assessment in assessments: if _check_if_blocked(assessment): output_blocked = True break - if output_blocked: - break if output_blocked: break From 83fc7b3bdd8abcda018dab7cb50434c15409fc9c Mon Sep 17 00:00:00 2001 From: KiyotakaMatsushita Date: Mon, 15 Dec 2025 14:21:39 +0900 Subject: [PATCH 6/7] fix: prevent redactContent chunks from being yielded to stream AWS Bedrock sends redactContent events with both input and output guardrail messages regardless of which guardrail was actually triggered. Previously, these chunks were yielded directly to the stream via ModelStreamChunkEvent, causing the input guardrail message to appear in the output even when the output guardrail was triggered. This fix prevents redactContent chunks from being yielded to the stream. The messages are still processed by handle_redact_content and stored in state, then finalize_redact_message uses the trace information to select the correct message based on which guardrail actually blocked. --- src/strands/event_loop/streaming.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/strands/event_loop/streaming.py b/src/strands/event_loop/streaming.py index c8fbdc09b..1670a4fd3 100644 --- a/src/strands/event_loop/streaming.py +++ b/src/strands/event_loop/streaming.py @@ -480,7 +480,10 @@ async def process_stream( # Track first byte time when we get first content if first_byte_time is None and ("contentBlockDelta" in chunk or "contentBlockStart" in chunk): first_byte_time = time.time() - yield ModelStreamChunkEvent(chunk=chunk) + # Don't yield redactContent chunks to stream - they will be processed by + # handle_redact_content and finalize_redact_message to select the correct message + if "redactContent" not in chunk: + yield ModelStreamChunkEvent(chunk=chunk) if "messageStart" in chunk: state["message"] = handle_message_start(chunk["messageStart"], state["message"]) From abb4404555aeb6cd2f113baec7ddb7252992f950 Mon Sep 17 00:00:00 2001 From: KiyotakaMatsushita Date: Mon, 15 Dec 2025 14:28:57 +0900 Subject: [PATCH 7/7] fix: generate correct redaction message based on guardrail trace Previously _generate_redaction_events() would generate both input and output redaction messages based only on config flags, regardless of which guardrail actually triggered the block. This fix: 1. Adds guardrail_data parameter to _generate_redaction_events() 2. Uses trace data (inputAssessment/outputAssessments) to determine which guardrail was actually blocked 3. Generates only the appropriate redaction message: - Output guardrail block -> redactAssistantContentMessage - Input guardrail block -> redactUserContentMessage 4. Falls back to legacy behavior if guardrail_data is not provided This ensures that when an output guardrail blocks AI-generated content, the output guardrail message is displayed instead of the input guardrail message. --- src/strands/models/bedrock.py | 103 +++++++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 25 deletions(-) diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py index 4a7c81672..b2eac6db7 100644 --- a/src/strands/models/bedrock.py +++ b/src/strands/models/bedrock.py @@ -555,39 +555,91 @@ def _has_blocked_guardrail(self, guardrail_data: dict[str, Any]) -> bool: return False - def _generate_redaction_events(self) -> list[StreamEvent]: - """Generate redaction events based on configuration. + def _generate_redaction_events(self, guardrail_data: dict[str, Any] | None = None) -> list[StreamEvent]: + """Generate redaction events based on configuration and which guardrail was triggered. + + Args: + guardrail_data: Guardrail trace data to determine which guardrail (input/output) was blocked. + If None, falls back to legacy behavior using config flags only. Returns: List of redaction events to yield. """ events: list[StreamEvent] = [] - if self.config.get("guardrail_redact_input", True): - logger.debug("Redacting user input due to guardrail.") - events.append( - { - "redactContent": { - "redactUserContentMessage": self.config.get( - "guardrail_redact_input_message", "[User input redacted.]" - ) - } - } + # Determine which guardrail was blocked from trace data + input_blocked = False + output_blocked = False + + if guardrail_data: + input_assessment = guardrail_data.get("inputAssessment", {}) + output_assessments = guardrail_data.get("outputAssessments", {}) + + # Check if input guardrail blocked + input_blocked = any( + self._find_detected_and_blocked_policy(assessment) + for assessment in input_assessment.values() ) - if self.config.get("guardrail_redact_output", False): - logger.debug("Redacting assistant output due to guardrail.") - events.append( - { - "redactContent": { - "redactAssistantContentMessage": self.config.get( - "guardrail_redact_output_message", - "[Assistant output redacted.]", - ) - } - } + # Check if output guardrail blocked + output_blocked = any( + self._find_detected_and_blocked_policy(assessment) + for assessment in output_assessments.values() ) + # Generate appropriate redaction event based on which guardrail was triggered + if guardrail_data: + # Use trace data to determine which message to send + if output_blocked and self.config.get("guardrail_redact_output", False): + logger.debug("Redacting assistant output due to output guardrail.") + events.append( + { + "redactContent": { + "redactAssistantContentMessage": self.config.get( + "guardrail_redact_output_message", + "[Assistant output redacted.]", + ) + } + } + ) + elif input_blocked and self.config.get("guardrail_redact_input", True): + logger.debug("Redacting user input due to input guardrail.") + events.append( + { + "redactContent": { + "redactUserContentMessage": self.config.get( + "guardrail_redact_input_message", "[User input redacted.]" + ) + } + } + ) + else: + # Legacy fallback: use config flags only (original behavior) + if self.config.get("guardrail_redact_input", True): + logger.debug("Redacting user input due to guardrail.") + events.append( + { + "redactContent": { + "redactUserContentMessage": self.config.get( + "guardrail_redact_input_message", "[User input redacted.]" + ) + } + } + ) + + if self.config.get("guardrail_redact_output", False): + logger.debug("Redacting assistant output due to guardrail.") + events.append( + { + "redactContent": { + "redactAssistantContentMessage": self.config.get( + "guardrail_redact_output_message", + "[Assistant output redacted.]", + ) + } + } + ) + return events @override @@ -691,7 +743,7 @@ def _stream( ): guardrail_data = chunk["metadata"]["trace"]["guardrail"] if self._has_blocked_guardrail(guardrail_data): - for event in self._generate_redaction_events(): + for event in self._generate_redaction_events(guardrail_data): callback(event) # Track if we see tool use events @@ -723,7 +775,8 @@ def _stream( and "guardrail" in response["trace"] and self._has_blocked_guardrail(response["trace"]["guardrail"]) ): - for event in self._generate_redaction_events(): + guardrail_data = response["trace"]["guardrail"] + for event in self._generate_redaction_events(guardrail_data): callback(event) except ClientError as e: