diff --git a/HISTORY.rst b/HISTORY.rst index 4dada63..6b3360d 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -9,14 +9,26 @@ Release History **Updates** - Fix Pypi Workflow. +- [`PR #48 `_] Add support for common CSS properties on text for: ,

and | `Lynuxen `_ **Fixes** -- None +- Fixes `#46 `_: background-color style property highlights the whole paragraph instead of a single word +- Fixes `#47 `_: text-decoration style property for underline is not applied **New Features** - Add support for HTML Comments. | [Dfop02](https://github.com/dfop02) +- Add support for text-align,line-height, margin-left, margin-right, text-indent for paragraphs +- Add support for the following text properties (applies to \, \ and \ tags): + - font-weight: ('bold', 'bolder', '700', '800', '900', 'normal', 'lighter', '400', '300', '100') + - font-style: ('italic', 'oblique', 'normal'') + - text-decoration: ('underline', 'line-through') ('solid', 'double', 'dotted', 'dashed', 'wavy'), and the longhand properties (text-decoration-*) + - text-transform: ('uppercase', 'lowercase', 'capitalize') + - font-size + - font-family + - color + - background-color: Paragraph and run highlight colors can now differ. Partial support on what can be used as a color. 1.1.0 (2025-11-01) diff --git a/README.md b/README.md index 39f2d23..a58cc1f 100644 --- a/README.md +++ b/README.md @@ -185,6 +185,7 @@ My goal in forking and fixing/updating this package was to complete my current t - Fix Ordered and Unordered Lists | [TaylorN15](https://github.com/TaylorN15) from [PR](https://github.com/dfop02/html4docx/pull/16) - Fixed styles was only being applied to span tag. | [Dfop02](https://github.com/dfop02) from [PR](https://github.com/dfop02/html4docx/issues/40) - Fixed bug on styles parsing when style contains multiple colon. | [Dfop02](https://github.com/dfop02) +- Fixed highlighting a single word | [Lynuxen](https://github.com/Lynuxen) **New Features** - Add Witdh/Height style to images | [maifeeulasad](https://github.com/maifeeulasad) from [PR](https://github.com/pqzx/html2docx/pull/29) @@ -202,6 +203,7 @@ My goal in forking and fixing/updating this package was to complete my current t - Add support to table cells style (border, background-color, width, height, margin) | [Dfop02](https://github.com/dfop02) - Being able to use inline images on same paragraph. | [Dfop02](https://github.com/dfop02) - Refactory Tests to be more consistent and less 'human validation' | [Dfop02](https://github.com/dfop02) +- Support for common CSS properties for text | [Lynuxen](https://github.com/Lynuxen) ## Known Issues diff --git a/html4docx/constants.py b/html4docx/constants.py index 825fbe8..5fbf868 100644 --- a/html4docx/constants.py +++ b/html4docx/constants.py @@ -1,7 +1,9 @@ # values in inches +from docx.enum.text import WD_UNDERLINE + INDENT = 0.25 LIST_INDENT = 0.5 -MAX_INDENT = 5.5 # To stop indents going off the page +MAX_INDENT = 5.5 # To stop indents going off the page # Style to use with tables. By default no style is used. DEFAULT_TABLE_STYLE = None @@ -48,6 +50,19 @@ 'xx-large': '32px' } +FONT_UNDERLINE = { + 'underline', + 'line-through', +} + +FONT_UNDERLINE_STYLES = { + 'solid': WD_UNDERLINE.SINGLE, + 'dashed': WD_UNDERLINE.DASH, + 'dotted': WD_UNDERLINE.DOTTED, + 'wavy': WD_UNDERLINE.WAVY, + 'double': WD_UNDERLINE.DOUBLE, +} + STYLES = { 'ul': 'List Bullet', 'ul2': 'List Bullet 2', @@ -79,6 +94,50 @@ '0': '0px', } +GENERIC_FONT_STYLES = { + 'serif': 'Times New Roman', + 'sans-serif': 'Arial', + 'monospace': 'Courier New' +} + +# Paragraph-level styles (ParagraphFormat) +PARAGRAPH_FORMAT_STYLES = { + 'text-align': '_apply_alignment_paragraph', + 'line-height': '_apply_line_height_paragraph', + 'margin-left': '_apply_margins_paragraph', + 'margin-right': '_apply_margins_paragraph', + 'text-indent': '_apply_text_indent_paragraph', +} + +# Run-level styles (affect text formatting within runs) +PARAGRAPH_RUN_STYLES = { + 'font-weight': '_apply_font_weight_paragraph', + 'font-style': '_apply_font_style_paragraph', + 'text-decoration': '_apply_text_decoration_paragraph', + 'text-decoration-line': '_apply_text_decoration_paragraph', + 'text-decoration-style': '_apply_text_decoration_paragraph', + 'text-decoration-color': '_apply_text_decoration_paragraph', + 'text-transform': '_apply_text_transform_paragraph', + 'font-size': '_apply_font_size_paragraph', + 'font-family': '_apply_font_family_paragraph', + 'color': '_apply_color_paragraph', + 'background-color': '_apply_background_color_paragraph' +} + +RUN_STYLES = { + 'font-weight': '_apply_font_weight_to_run', + 'font-style': '_apply_font_style_to_run', + 'text-decoration': '_apply_text_decoration_to_run', + 'text-decoration-line': '_apply_text_decoration_line_to_run', + 'text-decoration-style': '_apply_text_decoration_style_to_run', + 'text-decoration-color': '_apply_text_decoration_color_to_run', + 'text-transform': '_apply_text_transform_to_run', + 'font-size': '_apply_font_size_to_run', + 'font-family': '_apply_font_family_to_run', + 'color': '_apply_color_to_run', + 'background-color': '_apply_background_color_to_run' +} + def default_borders(): return { "top": {"size": DEFAULT_BORDER_SIZE, "color": DEFAULT_BORDER_COLOR, "style": DEFAULT_BORDER_STYLE}, diff --git a/html4docx/h4d.py b/html4docx/h4d.py index b8af805..505b688 100644 --- a/html4docx/h4d.py +++ b/html4docx/h4d.py @@ -1,4 +1,5 @@ import argparse +import logging import os import re from io import BytesIO @@ -29,6 +30,7 @@ def __init__(self): self.options = dict(constants.DEFAULT_OPTIONS) self.table_row_selectors = constants.DEFAULT_TABLE_ROW_SELECTORS self.table_style = constants.DEFAULT_TABLE_STYLE + self.paragraph_span_styles = {} # paragraph_id -> set(run_indices) def set_initial_attrs(self, document = None): self.tags = { @@ -38,6 +40,7 @@ def set_initial_attrs(self, document = None): self.doc = document if document else Document() self.bs = self.options['fix-html'] # whether or not to clean with BeautifulSoup self.paragraph = None + self.run = None self.skip = False self.skip_tag = None self.instances_to_skip = 0 @@ -289,6 +292,571 @@ def add_bookmark(self, bookmark_name): self.paragraph._element.append(bookmark_end) self.bookmark_id += 1 + def apply_styles_to_run(self, run, style): + if not style or not hasattr(run, 'font'): + return + + # Find current paragraph and run position + if not hasattr(self, 'paragraph') or self.paragraph is None: + return + + paragraph_id = id(self.paragraph) + if paragraph_id not in self.paragraph_span_styles: + self.paragraph_span_styles[paragraph_id] = {} + + # The current run is the last one in the paragraph + run_index = len(self.paragraph.runs) - 1 + + if run_index not in self.paragraph_span_styles[paragraph_id]: + self.paragraph_span_styles[paragraph_id][run_index] = set() + + for style_name, style_value in style.items(): + if style_name in constants.RUN_STYLES: + if style_name.startswith('background-color') and style_value in ('inherit', 'initial'): + continue + + self.paragraph_span_styles[paragraph_id][run_index].add(style_name) + + if style_name == 'text-decoration': + # If span sets text-decoration shorthand, it conflicts with all text-decoration-* properties + self.paragraph_span_styles[paragraph_id][run_index].add('text-decoration-line') + self.paragraph_span_styles[paragraph_id][run_index].add('text-decoration-style') + self.paragraph_span_styles[paragraph_id][run_index].add('text-decoration-color') + elif style_name.startswith('text-decoration-'): + pass + + for style_name, style_value in style.items(): + if style_name in constants.PARAGRAPH_FORMAT_STYLES: + continue + elif style_name in constants.RUN_STYLES: + handler = getattr(self, constants.RUN_STYLES[style_name]) + param_name = style_name.replace('-', '_') + handler(run=run, **{param_name: style_value}) + else: + logging.warning(f"Warning: Unrecognized style '{style_name}', will be skipped.") + + def apply_styles_to_paragraph(self, paragraph, style): + if not style or not hasattr(paragraph, 'paragraph_format'): + return + + for style_name, style_value in style.items(): + if style_name in constants.PARAGRAPH_FORMAT_STYLES: + handler = getattr(self, constants.PARAGRAPH_FORMAT_STYLES[style_name]) + elif style_name in constants.PARAGRAPH_RUN_STYLES: + handler = getattr(self, constants.PARAGRAPH_RUN_STYLES[style_name]) + else: + logging.warning(f"Warning: Unrecognized paragraph style '{style_name}', will be skipped.") + continue + + handler( + paragraph=paragraph, + style_name=style_name, + value=style_value, + all_styles=style + ) + + def _apply_alignment_paragraph(self, **kwargs): + paragraph = kwargs['paragraph'] + value = kwargs['value'] + + align = utils.remove_important_from_style(value) + + if 'center' in align: + paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER + elif 'left' in align: + paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT + elif 'right' in align: + paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT + elif 'justify' in align: + paragraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY + + def _apply_line_height_paragraph(self, **kwargs): + paragraph = kwargs['paragraph'] + value = kwargs['value'] + + line_height = utils.remove_important_from_style(value) + + if line_height in ('normal', 'inherit'): + paragraph.paragraph_format.line_spacing = None + else: + try: + if line_height.replace('.', '').replace('%', '').isdigit(): + multiplier = float(line_height[:-1]) / 100.0 if line_height.endswith('%') else float(line_height) + paragraph.paragraph_format.line_spacing = multiplier + else: + converted = utils.unit_converter(line_height, target_unit="pt") + if converted is not None: + paragraph.paragraph_format.line_spacing = converted + except (ValueError, TypeError): + paragraph.paragraph_format.line_spacing = None + + def _apply_margins_paragraph(self, **kwargs): + paragraph = kwargs['paragraph'] + style_name = kwargs['style_name'] + all_styles = kwargs['all_styles'] + + margin_left = all_styles.get('margin-left') + margin_right = all_styles.get('margin-right') + + if margin_left and margin_right: + if 'auto' in margin_left and 'auto' in margin_right: + paragraph.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER + return + + if style_name == 'margin-left' and margin_left and 'auto' not in margin_left: + paragraph.paragraph_format.left_indent = utils.unit_converter(margin_left) + + if style_name == 'margin-right' and margin_right and 'auto' not in margin_right: + paragraph.paragraph_format.right_indent = utils.unit_converter(margin_right) + + def _apply_text_indent_paragraph(self, **kwargs): + paragraph = kwargs['paragraph'] + value = kwargs['value'] + + indent_value = utils.remove_important_from_style(value) + + paragraph.paragraph_format.first_line_indent = utils.unit_converter(indent_value, target_unit="pt") + + def _apply_font_weight_paragraph(self, **kwargs): + paragraph = kwargs['paragraph'] + value = kwargs['value'] + + font_weight = utils.remove_important_from_style(value).lower() + + paragraph_id = id(paragraph) + paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {}) + + for i, run in enumerate(paragraph.runs): + if i in paragraph_spans and 'font-weight' in paragraph_spans[i]: + continue + + self._apply_font_weight_to_run( + run=run, + font_weight=font_weight, + ) + + def _apply_font_weight_to_run(self, **kwargs): + font_weight = kwargs['font_weight'] + run = kwargs['run'] + if font_weight in ('bold', 'bolder', '700', '800', '900'): + run.font.bold = True + elif font_weight in ('normal', 'lighter', '400', '300', '100'): + run.font.bold = False + # Note: Decide what to do for values between 400-700 + elif font_weight.isdigit(): + weight = int(font_weight) + run.font.bold = weight >= 700 + + def _apply_font_style_paragraph(self, **kwargs): + paragraph = kwargs['paragraph'] + value = kwargs['value'] + + font_style = utils.remove_important_from_style(value).lower() + + paragraph_id = id(paragraph) + paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {}) + + for i, run in enumerate(paragraph.runs): + if i in paragraph_spans and 'font-style' in paragraph_spans[i]: + continue + + self._apply_font_style_to_run( + run=run, + font_style=font_style, + ) + + def _apply_font_style_to_run(self, **kwargs): + font_style = kwargs['font_style'] + run = kwargs['run'] + + if font_style in ('italic', 'oblique'): + run.font.italic = True + elif font_style == 'normal': + run.font.italic = False + + def _apply_font_size_paragraph(self, **kwargs): + paragraph = kwargs['paragraph'] + value = kwargs['value'] + + font_size = utils.remove_important_from_style(value).lower() + + if font_size in constants.FONT_SIZES_NAMED: + font_size = constants.FONT_SIZES_NAMED[font_size] + + paragraph_id = id(paragraph) + paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {}) + + for i, run in enumerate(paragraph.runs): + if i in paragraph_spans and 'font-size' in paragraph_spans[i]: + continue + + self._apply_font_size_to_run( + run=run, + font_size=font_size, + ) + + def _apply_font_size_to_run(self, **kwargs): + run = kwargs['run'] + font_size = kwargs['font_size'] + + font_size = utils.remove_important_from_style(font_size).lower() + font_size = utils.adapt_font_size(font_size) + + try: + if font_size in ('normal', 'initial', 'inherit'): + run.font.size = None + else: + converted_size = utils.unit_converter(font_size, target_unit="pt") + if converted_size is not None: + run.font.size = converted_size + + except (ValueError, TypeError) as e: + logging.warning(f"Warning: Could not parse font-size '{font_size}': {e}") + + def _apply_font_family_paragraph(self, **kwargs): + paragraph = kwargs['paragraph'] + value = kwargs['value'] + + font_family = utils.remove_important_from_style(value).strip() + + paragraph_id = id(paragraph) + paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {}) + + for i, run in enumerate(paragraph.runs): + if i in paragraph_spans and 'font-family' in paragraph_spans[i]: + continue + + self._apply_font_family_to_run( + run=run, + font_family=font_family, + ) + + def _apply_font_family_to_run(self, **kwargs): + run = kwargs['run'] + font_family = kwargs['font_family'] + + if not font_family or font_family in ('inherit', 'initial', 'unset'): + return + + try: + font_families = [f.strip().strip('"\'') for f in font_family.split(',')] + + for font_name in font_families: + if font_name and font_name not in ('inherit', 'initial', 'unset', 'serif', 'sans-serif', 'monospace', + 'cursive', 'fantasy', 'system-ui'): + run.font.name = font_name + break + elif font_name in ('serif', 'sans-serif', 'monospace'): + run.font.name = constants.GENERIC_FONT_STYLES[font_name] + break + + except (AttributeError, Exception) as e: + logging.warning(f"Warning: Could not apply font-family '{font_family}': {e}") + + def _apply_color_paragraph(self, **kwargs): + paragraph = kwargs['paragraph'] + all_styles = kwargs['all_styles'] + color_value = utils.remove_important_from_style(all_styles.get('color', '')).lower().strip() + if color_value in ('inherit', 'initial', 'transparent', 'currentcolor'): + return + + paragraph_id = id(paragraph) + paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {}) + + for i, run in enumerate(paragraph.runs): + if i in paragraph_spans and 'color' in paragraph_spans[i]: + continue + self._apply_color_to_run( + run=run, + color=color_value, + ) + + def _apply_color_to_run(self, **kwargs): + run = kwargs['run'] + color_value = kwargs['color'] + try: + colors = utils.parse_color(color_value) + run.font.color.rgb = RGBColor(*colors) + except (ValueError, AttributeError) as e: + logging.warning(f"Could not apply color '{color_value}': {e}") + + def _apply_text_transform_paragraph(self, **kwargs): + paragraph = kwargs['paragraph'] + value = kwargs['value'] + + text_transform = utils.remove_important_from_style(value).lower() + + paragraph_id = id(paragraph) + paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {}) + + for i, run in enumerate(paragraph.runs): + if i in paragraph_spans and 'text-transform' in paragraph_spans[i]: + continue + + self._apply_text_transform_to_run( + run=run, + text_transform=text_transform, + ) + + def _apply_text_transform_to_run(self, **kwargs): + run = kwargs['run'] + text_transform = kwargs['text_transform'] + + if not run.text: + return + + try: + if text_transform == 'uppercase': + run.text = run.text.upper() + elif text_transform == 'lowercase': + run.text = run.text.lower() + elif text_transform == 'capitalize': + run.text = run.text.title() + elif text_transform in ('none', 'initial', 'inherit'): + # No transformation needed + pass + elif text_transform in ('full-width', 'math-auto', 'full-size-kana'): + logging.warning(f"Warning: Unsupported text transform '{text_transform}'") + + except (AttributeError, Exception) as e: + logging.warning(f"Warning: Could not apply text-transform '{text_transform}': {e}") + + def _parse_text_decoration(self, text_decoration): + """Parse text-decoration using regex to preserve color values.""" + # Pattern to match color values (rgb, hex, named colors) or other tokens + pattern = r'rgb\(\s*\d+\s*,\s*\d+\s*,\s*\d+\s*\)|#[\da-fA-F]+|[\w-]+' + + tokens = re.findall(pattern, text_decoration) + + result = { + 'line_type': None, + 'line_style': 'solid', + 'color': None + } + + for token in tokens: + if token in constants.FONT_UNDERLINE: + result['line_type'] = token + elif token == 'none': + result['line_type'] = 'none' + elif token in constants.FONT_UNDERLINE_STYLES: + result['line_style'] = token + elif utils.is_color(token): + result['color'] = token + elif token in ('blink', 'overline'): + result['line_style'] = None + result['line_style'] = None + logging.warning( + f"Blink or overline not supported.") + + + if result['line_type'] == 'line-through' and result['color'] is not None: + logging.warning( + f"Word does not support colored strike-through. Color '{result['color']}' will be ignored for line-through.") + return result + + def _apply_text_decoration_paragraph(self, **kwargs): + paragraph = kwargs['paragraph'] + all_styles = kwargs['all_styles'] + + # Initialize decorations + decorations = { + 'line_type': None, + 'line_style': None, + 'color': None + } + + if 'text-decoration' in all_styles: + text_decoration_value = utils.remove_important_from_style(all_styles['text-decoration']).lower() + decorations = self._parse_text_decoration(text_decoration_value) + + if 'text-decoration-line' in all_styles: + line_value = utils.remove_important_from_style(all_styles['text-decoration-line']).lower() + decorations['line_type'] = line_value + + if 'text-decoration-style' in all_styles: + style_value = utils.remove_important_from_style(all_styles['text-decoration-style']).lower() + decorations['line_style'] = style_value + + if 'text-decoration-color' in all_styles: + color_value = utils.remove_important_from_style(all_styles['text-decoration-color']).lower() + decorations['color'] = color_value + + paragraph_id = id(paragraph) + paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {}) + + for i, run in enumerate(paragraph.runs): + span_styles = paragraph_spans.get(i, set()) + + # If span has text-decoration shorthand, skip entirely + if 'text-decoration' in span_styles: + continue + + if decorations['line_type'] and 'text-decoration-line' not in span_styles: + self._apply_text_decoration_line_to_run( + run=run, + text_decoration_line=decorations['line_type'], + ) + + if decorations['line_style'] and 'text-decoration-style' not in span_styles: + self._apply_text_decoration_style_to_run( + run=run, + text_decoration_style=decorations['line_style'], + ) + + if decorations['color'] and 'text-decoration-color' not in span_styles: + self._apply_text_decoration_color_to_run( + run=run, + text_decoration_color=decorations['color'], + ) + + def _apply_text_decoration_to_run(self, **kwargs): + run = kwargs['run'] + text_decoration = kwargs['text_decoration'] + + if not text_decoration: + return + + decorations = self._parse_text_decoration(text_decoration) + if decorations['line_type']: + self._apply_text_decoration_line_to_run( + run=run, + text_decoration_line=decorations['line_type'], + ) + + if decorations['line_style']: + self._apply_text_decoration_style_to_run( + run=run, + text_decoration_style=decorations['line_style'], + ) + + if decorations['color']: + self._apply_text_decoration_color_to_run( + run=run, + text_decoration_color=decorations['color'], + ) + + def _apply_text_decoration_line_to_run(self, **kwargs): + run = kwargs['run'] + text_decoration_line = kwargs['text_decoration_line'] + + if text_decoration_line in constants.FONT_UNDERLINE: + if text_decoration_line == 'underline': + run.font.underline = True + run.font.strike = False + elif text_decoration_line == 'line-through': + run.font.strike = True + run.font.underline = False + elif text_decoration_line == 'none': + run.font.underline = False + run.font.strike = False + else: + logging.warning(f"Warning: Unsupported text decoration '{text_decoration_line}'") + + def _apply_text_decoration_style_to_run(self, **kwargs): + run = kwargs['run'] + text_decoration_style = kwargs['text_decoration_style'] + if not text_decoration_style or run.font.underline is False: + return False + + should_apply = False + if run.font.underline: + should_apply = True + elif hasattr(self.paragraph, '_pending_styles'): + for pending_style in self.paragraph._pending_styles: + if 'text-decoration' in pending_style or 'text-decoration-line' in pending_style: + should_apply = True + break + + if not should_apply: + return False + try: + run.font.underline = constants.FONT_UNDERLINE_STYLES[text_decoration_style] + except KeyError: + logging.warning(f"Warning: Style not recognized'{text_decoration_style}', defaulting to single line.") + + # Mark that we applied a text-decoration style by adding text-decoration-line to span_styles + paragraph_id = id(self.paragraph) + run_index = len(self.paragraph.runs) - 1 + if paragraph_id in self.paragraph_span_styles and run_index in self.paragraph_span_styles[paragraph_id]: + self.paragraph_span_styles[paragraph_id][run_index].add('text-decoration-line') + return True + + def _apply_text_decoration_color_to_run(self, **kwargs): + run = kwargs['run'] + text_decoration_color = kwargs['text_decoration_color'] + + if not text_decoration_color or not utils.is_color(text_decoration_color): + return + + color_hex = utils.parse_color(text_decoration_color, return_hex=True) + rPr = run._r.get_or_add_rPr() + u = rPr.find(qn('w:u')) + if u is not None: + u.set(qn('w:color'), color_hex.upper()) + + def _apply_background_color_paragraph(self, **kwargs): + paragraph = kwargs['paragraph'] + value = kwargs['value'] + + background_color = utils.remove_important_from_style(value).lower().strip() + + if background_color in ('inherit', 'initial'): + return + elif background_color in ('transparent', 'none'): + logging.warning(f"Warning: Unsupported background color '{background_color}'") + return + + try: + color_hex = utils.parse_color(background_color, return_hex=True) + if not color_hex: + return + + paragraph_id = id(paragraph) + paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {}) + + for i, run in enumerate(paragraph.runs): + if i in paragraph_spans and 'background-color' in paragraph_spans[i]: + continue + self._apply_background_color_to_run( + run=run, + background_color=background_color, + ) + + except Exception as e: + logging.warning(f"Could not apply background-color to paragraph: {e}") + + def _apply_background_color_to_run(self, **kwargs): + run = kwargs['run'] + background_color = kwargs['background_color'] + try: + if background_color in ('inherit', 'initial'): + return + elif background_color in ('transparent', 'none'): + logging.warning(f"Warning: Unsupported background color '{background_color}'") + return + + color_hex = utils.parse_color(background_color, return_hex=True) + if not color_hex: + return + + shd = OxmlElement('w:shd') + shd.set(qn('w:val'), 'clear') + shd.set(qn('w:color'), 'auto') + shd.set(qn('w:fill'), color_hex.lstrip('#')) + + r_pr = run._element.get_or_add_rPr() + + # Remove existing shading + existing_shd = r_pr.find(qn('w:shd')) + if existing_shd is not None: + r_pr.remove(existing_shd) + + r_pr.append(shd) + + except Exception as e: + logging.warning(f"Could not apply background-color to run: {e}") + def add_text_align_or_margin_to(self, obj, style): """Styles that can be applied on multiple objects""" if 'text-align' in style: @@ -363,11 +931,12 @@ def add_styles_to_run(self, style): self.run.font.color.rgb = RGBColor(*colors) if 'background-color' in style: - color = utils.parse_color(style['background-color'], return_hex=True) - + # This should stay here for div. # Little trick to apply background-color to paragraph # because `self.run.font.highlight_color` # has a very limited amount of colors + color = utils.parse_color(style['background-color'], return_hex=True) + shd = OxmlElement('w:shd') shd.set(qn('w:val'), 'clear') shd.set(qn('w:color'), 'auto') @@ -747,7 +1316,12 @@ def handle_starttag(self, tag, attrs): if not self.include_styles: return - if 'style' in current_attrs and self.paragraph: + if 'style' in current_attrs and self.paragraph and (tag in ['p'] or re.match(r'h[1-9]', tag)): + if not hasattr(self.paragraph, '_pending_styles'): + self.paragraph._pending_styles = [] + style = utils.parse_dict_string(current_attrs['style']) + self.paragraph._pending_styles.append(style) + elif 'style' in current_attrs and self.paragraph: style = utils.parse_dict_string(current_attrs['style']) self.add_text_align_or_margin_to(self.paragraph.paragraph_format, style) @@ -782,6 +1356,14 @@ def handle_endtag(self, tag): elif tag == 'li': self.in_li = False + if tag in ['p', 'pre'] or re.match(r'h[1-9]', tag): + if hasattr(self.paragraph, '_pending_styles'): + for style in self.paragraph._pending_styles: + self.apply_styles_to_paragraph(self.paragraph, style) + # Clear the pending styles + del self.paragraph._pending_styles + self.paragraph_span_styles.clear() + if tag in self.tags: self.tags.pop(tag) # maybe set relevant reference to None? @@ -814,10 +1396,19 @@ def handle_data(self, data): for span in self.tags['span']: if 'style' in span: - style = utils.parse_dict_string(span['style']) - self.add_styles_to_run(style) + span_style = utils.parse_dict_string(span['style']) + self.apply_styles_to_run(self.run, span_style) + + for tag, attrs in self.tags.items(): + if tag == 'div' and 'style' in attrs: + div_style = utils.parse_dict_string(attrs['style']) + + for span_style_name in span_style.keys(): + if span_style_name in div_style: + del div_style[span_style_name] + + self.tags[tag]['style'] = utils.dict_to_style_string(div_style) - # add font style and name for tag, attrs in self.tags.items(): if tag in constants.FONT_STYLES: font_style = constants.FONT_STYLES[tag] @@ -827,7 +1418,7 @@ def handle_data(self, data): font_name = constants.FONT_NAMES[tag] self.run.font.name = font_name - if 'style' in attrs and (tag in ['div', 'li', 'p', 'pre'] or re.match(r'h[1-9]', tag)): + if 'style' in attrs and (tag in ['div', 'li', 'pre']): style = utils.parse_dict_string(attrs['style']) self.add_styles_to_run(style) diff --git a/html4docx/utils.py b/html4docx/utils.py index 8c35d31..3da6b1b 100644 --- a/html4docx/utils.py +++ b/html4docx/utils.py @@ -20,6 +20,10 @@ class ImageAlignment(Enum): def get_filename_from_url(url: str): return os.path.basename(urlparse(url).path) +def dict_to_style_string(style_dict): + """Convert style dictionary back to CSS string""" + return '; '.join([f'{k}: {v}' for k, v in style_dict.items()]) + def is_url(url: str): """ Not to be used for actually validating a url, but in our use case we only diff --git a/tests/assets/htmls/css_properties.html b/tests/assets/htmls/css_properties.html new file mode 100644 index 0000000..fd9328c --- /dev/null +++ b/tests/assets/htmls/css_properties.html @@ -0,0 +1,196 @@ +

+ Paragraph with all styles + + Span overriding all styles + + Text inheriting paragraph styles again +

+ +

+ Paragraph with individual text-decoration properties + + Span removing underline but keeping other styles + + Text continuing with paragraph styles +

+ +

+ Paragraph with numeric and complex values + Span adding oblique and changing underline style only + Normal text continuing + + Span with only background color (should inherit other styles) + +

+ +

+ Paragraph with font fallback and text case + + Span only bolding (should inherit font, color, transform) + + + Span changing color and removing transform only + + Final text with original styles +

+ +

+ Paragraph with multiple decorations and named size + + Span simplifying decorations and changing size + + + Plain span inheriting everything + + + Span with weight and normal style only + +

+ +

+ Reset paragraph + + Span with completely different styles + + Back to reset styles +

+ +

+ Testing text-decoration shorthand vs longhand + + Span using longhand to override shorthand + + + Span using different shorthand + +

+ +

+ Small gray text + + Span with RGB color + + + Span with RGBA background + +

+ +

+ Paragraph with less common values + + Span with numeric weight and normal style + + Continuing with original styles +

+ +

+ Paragraph with advanced formatting: margins, justification, line height, and text indent. + Bold span inheriting paragraph formatting + Continuing text that should maintain all paragraph styles including justified alignment. +

+ +

+ Centered paragraph with auto margins + Span trying to override alignment (should not work) + Still centered text +

+ +

+ text decorations with double style + Span changing only the style + Span changing only the color +

+ +

+ Testing unit conversions and line height + Span with relative font size + Span with percentage font size + Span with pixel font size + Text with original 12pt size +

+ +

+ Testing font fallback chain + Span switching to generic sans-serif + Back to original font stack +

+ +

+ Testing HSL colors + Span with HSLA color + Span with red HSL background +

+ +

+ Testing blink and overline (unsupported features) + Span with supported underline only + Text with original (partially supported) styles +

+ +

+ Testing CSS keywords + Span with inherit color + Span with initial background + Span with unset font size +

+ +

+ Testing !important declarations + Span with important normal weight + Span with important no decoration + Text that should respect important styles +

+ +

+ Mixed valid and invalid properties + Span with invalid decoration style + Span with invalid color +

+ +

+ Testing various units and normal values + Span with named font size + Span with percentage line height +

+ +

+ Testing unsupported CSS3 properties + Span with box-shadow (unsupported) + Span with opacity (unsupported) +

+ +

+ Plain paragraph with no styles + + Span with all run styles applied individually + + Back to plain text +

+ +

+ Testing minimum values + + Span with maximum values + + Back to minimum values +

+ +

+ Testing text-decoration inheritance + + Plain span inheriting decoration + + + Span explicitly inheriting decoration line + + + Span explicitly inheriting decoration color + +

+ +

+ Testing edge case values + Span with very large font and visible color + Span inheriting edge values +

+ diff --git a/tests/assets/htmls/header.html b/tests/assets/htmls/header.html new file mode 100644 index 0000000..bebb1bb --- /dev/null +++ b/tests/assets/htmls/header.html @@ -0,0 +1,71 @@ +

+ Main Heading H1 - Large and Centered +

+ +

+ Secondary Heading H2 - Underlined with Background +

+ +

+ Tertiary Heading H3 - Italic and Right Aligned +

+ +

+ quaternary heading H4 - normal weight and capitalized +

+ +

+ Strikethrough H1 with Complex Decoration Underlined Span in H1 +

+ +

+ Light Weight H3 with Text Indent Bold Span in Light Heading +

+ +

+ H3 FORCED TO LOWERCASE WITH TEXT-TRANSFORM span forced to uppercase +

+ +

+ H4 with Serif Font +

+ +

+ Centered H1 with Auto Margins and Background +

+ +

+ H2 with Lighter Weight and Dotted Underline Bolder Span with Solid Underline +

+ +

+ H3 with RGB and Colors Span with Different RGB Colors +

+ +

+ H4 with Strike-through and Light Weight Important Note Without Strike-through +

+ +

+ H3 with Unsupported Text Transform Supported transform in span +

+ +

+ Plain H4 with Reset Styles Styled Span in Plain Heading +

+ +

+ H1 with Text Color Visible Span in Transparent Heading +

+ +

+ H3 with All Three Decorations Span with Single Decoration +

+ +

+ H2 with Middle Weight and Justified Text Darker Span in Middle Weight Heading +

+ +

+ H4 with Style Valid Style Span +

\ No newline at end of file diff --git a/tests/assets/htmls/paragraph_first_line_indent.html b/tests/assets/htmls/paragraph_first_line_indent.html new file mode 100644 index 0000000..4d5dcc7 --- /dev/null +++ b/tests/assets/htmls/paragraph_first_line_indent.html @@ -0,0 +1,45 @@ +

text-indent: 3cm: Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium + doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae + vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia + consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum + quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et + dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis + suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea + voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

+

text-indent: 20pt: Sed ut perspiciatis unde omnis iste natus error sit voluptatem + accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi + architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut + fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui + dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam + corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui + in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

+

text-indent: 40px: Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium + doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae + vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia + consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum + quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et + dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis + suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea + voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

+

text-indent: 35mm: Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium + doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae + vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia + consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum + quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et + dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis + suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea + voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

+

text-indent: 35mm: Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium + doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae + vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia + consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum + quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et + dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis + suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea + voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

\ No newline at end of file diff --git a/tests/assets/htmls/paragraph_line_height.html b/tests/assets/htmls/paragraph_line_height.html new file mode 100644 index 0000000..c79851d --- /dev/null +++ b/tests/assets/htmls/paragraph_line_height.html @@ -0,0 +1,117 @@ +

line height 1: Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium + doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae + vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia + consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum + quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et + dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis + suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea + voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

" +

line height 1.15: Sed ut perspiciatis unde omnis iste natus error sit voluptatem + accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi + architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut + fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui + dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam + corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui + in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

" +

line height 1.5: Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium + doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae + vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia + consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum + quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et + dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis + suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea + voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

" +

line height 2: Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium + doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae + vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia + consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum + quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et + dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis + suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea + voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

" +

line height 20px: Sed ut perspiciatis unde omnis iste natus error sit voluptatem + accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi + architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut + fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui + dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam + corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui + in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

" +

line height 1.2em: Sed ut perspiciatis unde omnis iste natus error sit voluptatem + accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi + architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut + fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui + dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam + corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui + in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

" +

line height 1.5em: Sed ut perspiciatis unde omnis iste natus error sit voluptatem + accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi + architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut + fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui + dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam + corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui + in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

" +

line height 2em: Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium + doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae + vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia + consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum + quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et + dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis + suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea + voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

+

line height 1.2rem: Sed ut perspiciatis unde omnis iste natus error sit voluptatem + accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi + architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut + fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui + dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam + corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui + in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

+

line height 1.5rem: Sed ut perspiciatis unde omnis iste natus error sit voluptatem + accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi + architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut + fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui + dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam + corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui + in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

+

line height 2rem: Sed ut perspiciatis unde omnis iste natus error sit voluptatem + accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi + architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut + fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui + dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam + corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui + in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

+

line height 150%: Sed ut perspiciatis unde omnis iste natus error sit voluptatem + accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi + architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut + fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui + dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam + corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui + in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

+

line height 200%: Sed ut perspiciatis unde omnis iste natus error sit voluptatem + accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi + architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut + fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui + dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam + corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui + in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla + pariatur?

\ No newline at end of file diff --git a/tests/assets/htmls/text_decoration.html b/tests/assets/htmls/text_decoration.html new file mode 100644 index 0000000..40cb7ed --- /dev/null +++ b/tests/assets/htmls/text_decoration.html @@ -0,0 +1,60 @@ +underlined span (red) +no decoration span (rgb(0, 0, 0)) +strikethrough span (gray) +underline+line-through span (orange) +wavy underlined span (blue) +dotted underlined span (purple) +dashed underlined span (green) +double underlined span (brown) +

Normal text wavy underlined span (blue)continues +

+

Normal text dotted underlined span (purple)continues +

" +

Normal text strikethrough span (red) continues +

+"

Normal text overline span (orange) continues +

+ +

Start underlined + strikethrough + dashed underline + wavy overline + end +

+ +

Underlined paragraph with + normal span inside

" + +

Strikethrough paragraph with + underlined red span inside

+ +

Outer blue underline with + red strikethrough inside + continues blue underline normal text

+ +

Dotted green + underlined paragraph

+

Wavy purple + strikethrough paragraph

+

Double teal + overline paragraph

" + +

Dark blue underlined paragraph with + wavy coral strikethrough span + + and gold overline+underline span +

+ +

Base text just line + just color + just style + line and color + line and style + color and style + all properties +

\ No newline at end of file diff --git a/tests/test_h4d.py b/tests/test_h4d.py index 78f0ba9..003f845 100644 --- a/tests/test_h4d.py +++ b/tests/test_h4d.py @@ -4,7 +4,7 @@ import unittest from docx import Document from docx.oxml.ns import qn -from docx.enum.text import WD_ALIGN_PARAGRAPH +from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_UNDERLINE from docx.shared import RGBColor from html4docx import HtmlToDocx @@ -31,6 +31,11 @@ def setUpClass(cls): cls.clean_up_docx() cls.document = Document() cls.text1 = cls.get_html_from_file('text1.html') + cls.paragraph_line_height = cls.get_html_from_file('paragraph_line_height.html') + cls.paragraph_first_line_indent = cls.get_html_from_file('paragraph_first_line_indent.html') + cls.text_decoration = cls.get_html_from_file('text_decoration.html') + cls.css_properties = cls.get_html_from_file('css_properties.html') + cls.css_properties_header = cls.get_html_from_file('header.html') cls.table_html = cls.get_html_from_file('tables1.html') cls.table2_html = cls.get_html_from_file('tables2.html') cls.table3_html = cls.get_html_from_file('tables3.html') @@ -469,6 +474,709 @@ def test_font_size(self): font_sizes = [str(p.runs[1].font.size) for p in document.paragraphs] assert ['76200', '355600', '914400', '431800', 'None', '762000', '177800', '203200', '69850', '120650'] == font_sizes + def test_font_size_paragraph(self): + font_size_html_example = ( + "

paragraph 8px

" + "

paragraph 1cm

" + "

paragraph 6em

" + "

paragraph 12cm

" + "

paragraph 12vh not supported

" + "

paragraph 5pc

" + "

paragraph 14pt

" + "

paragraph 16pt

" + "

paragraph 2mm

" + "

paragraph small

" + ) + + self.document.add_heading( + 'Test: Font-Size on

', + level=1 + ) + self.parser.add_html_to_document(font_size_html_example, self.document) + + document = self.parser.parse_html_string(font_size_html_example) + font_sizes = [str(p.runs[0].font.size) for p in document.paragraphs] + assert ['76200', '355600', '914400', '431800', 'None', '762000', '177800', '203200', '69850', '120650'] == font_sizes + + def test_font_weight_paragraph(self): + self.document.add_heading('Test: font weight on

', level=1) + font_weight_html_example = ( + "

bold text

" + "

bolder text

" + "

700 weight

" + "

900 weight

" + "

normal text

" + "

lighter text

" + "

400 weight

" + "

100 weight

" + ) + + self.parser.add_html_to_document(font_weight_html_example, self.document) + + document = self.parser.parse_html_string(font_weight_html_example) + + font_weights = [p.runs[0].font.bold for p in document.paragraphs] + + expected_weights = [ + True, # bold + True, # bolder + True, # 700 + True, # 900 + False, # normal + False, # lighter + False, # 400 + False, # 100 + ] + + self.assertEqual(font_weights, expected_weights) + + def test_font_style_paragraph(self): + self.document.add_heading('Test: font style on

', level=1) + font_style_html_example = ( + "

italic text

" + "

oblique text

" + "

normal text

" + ) + + self.parser.add_html_to_document(font_style_html_example, self.document) + + document = self.parser.parse_html_string(font_style_html_example) + + font_styles = [p.runs[0].font.italic for p in document.paragraphs] + + expected_styles = [ + True, # italic + True, # oblique (should be treated as italic) + False, # normal + ] + + self.assertEqual(font_styles, expected_styles) + + def test_font_family_paragraph(self): + self.document.add_heading('Test: font family on

', level=1) + font_family_html_example = ( + "

Arial font text

" + "

Helvetica font text

" + "

Noto Sans font text

" + "

Times New Roman font text

" + "

Generic serif font text

" + "

Generic sans-serif font text

" + "

Generic monospace font text

" + "

Courier New font text

" + "

Inherit font text

" + ) + + self.parser.add_html_to_document(font_family_html_example, self.document) + + def test_text_transform_paragraph(self): + self.document.add_heading('Test: text-transform on

', level=1) + text_transform_html_example = ( + "

uppercase text

" + "

LOWERCASE TEXT

" + "

capitalize each word

" + "

normal text

" + "

default text

" + ) + + self.parser.add_html_to_document(text_transform_html_example, self.document) + + def test_text_decoration_span(self): + self.document.add_heading('Test: text-decoration on ', level=1) + text_decoration_html_example = ( + # Standalone spans + "underlined span (red)" + "no decoration span (rgb(0, 0, 0))" + "strikethrough span (gray) (not supported)" + "underline+line-through span (orange)\ + (should be strike)" + + # Spans inside paragraphs + "

Normal text wavy underlined span (blue) continues

" + "

Normal text dotted underlined span (purple) continues

" + "

Normal text strikethrough span (red) continues

" + + # Multiple spans with different decorations in same paragraph + "

Start underlined " + "strikethrough " + "dashed underline end

" + + # Span with no decoration inside decorated paragraph + "

Underlined paragraph with " + "normal span inside

" + + # Span with decoration inside decorated paragraph (should override) + "

Strikethrough paragraph with " + "underlined red span inside

" + + # Override behavior with individual properties + "

Blue underlined paragraph with " + "strikethrough span inside

" + + # Check if equal - shorthand vs individual properties + "

Blue underlined paragraph

" + "

Blue underlined paragraph

" + ) + + self.parser.add_html_to_document(text_decoration_html_example, self.document) + + document = self.parser.parse_html_string(text_decoration_html_example) + + standalone_para = document.paragraphs[0] + assert len(standalone_para.runs) == 4 + + span1 = standalone_para.runs[0] + assert span1.text == "underlined span (red)" + assert span1.font.underline is True + + span2 = standalone_para.runs[1] + assert span2.text == "no decoration span (rgb(0, 0, 0))" + assert span2.font.underline is False + assert span2.font.strike is False + + span3 = standalone_para.runs[2] + assert span3.text == "strikethrough span (gray) (not supported)" + assert span3.font.strike is True + + span4 = standalone_para.runs[3] + assert span4.text == "underline+line-through span (orange) (should be strike)" + assert span4.font.strike is True + + # Test equivalence: shorthand vs individual properties + p8_individual = document.paragraphs[8] # Individual properties + p9_shorthand = document.paragraphs[9] # Shorthand + + # Both should have the same text decoration applied + assert p8_individual.runs[0].font.underline == WD_UNDERLINE.WAVY + assert p9_shorthand.runs[0].font.underline == WD_UNDERLINE.WAVY + + # Both paragraphs should have the same text + assert p8_individual.text == "Blue underlined paragraph" + assert p9_shorthand.text == "Blue underlined paragraph" + + # Both should have the same styling result + assert p8_individual.runs[0].font.underline == p9_shorthand.runs[0].font.underline + + def test_text_decoration_paragraph(self): + self.document.add_heading('Test: text-decoration on

', level=1) + text_decoration_html_example = ( + "

underlined text (red)

" + "

no decoration text (rgb(0, 0, 0))

" + "

strikethrough text (gray) (color not supported)

" + "

underline+line-through (orange)\ + (should be strike)

" + "

wavy underline (blue)

" + "

dotted underline (rgb(0, 128, 0))

" + "

dotted underline (rgb(0, 255, 0))

" + "

dashed underline (purple)

" + "

double underline (rgb(255, 69, 0))

" + "

overline text (hotpink) (not supported)

" + "

blink text (hotpink) (not supported)

" + ) + + self.parser.add_html_to_document(text_decoration_html_example, self.document) + + document = self.parser.parse_html_string(text_decoration_html_example) + + underline_states = [] + strike_states = [] + + for p in document.paragraphs: + run = p.runs[0] + + # Check underline + underline = run.font.underline + if underline is None: + underline_states.append(None) + elif underline is True: + underline_states.append(True) + elif underline is False: + underline_states.append(False) + else: + underline_states.append(underline) + + # Check strike-through + strike = run.font.strike + if strike is None: + strike_states.append(None) + elif strike is True: + strike_states.append(True) + elif strike is False: + strike_states.append(False) + else: + strike_states.append(strike) + + expected_underline_states = [ + True, # underline (default single) - explicitly True + False, # none - explicitly False for both underline and strike + False, # line-through - explicitly False for underline when strike is True + False, # underline + line-through - line-through wins, underline explicitly False + WD_UNDERLINE.WAVY, # wavy underline - explicitly set to wavy + WD_UNDERLINE.DOTTED, # dotted underline - explicitly set to dotted + WD_UNDERLINE.DOTTED, # dotted underline - explicitly set to dotted + WD_UNDERLINE.DASH, # dashed underline - explicitly set to dash + WD_UNDERLINE.DOUBLE, # double underline - explicitly set to double + None, # overline (not supported) - remains None/unchanged + None, # blink (not supported) - remains None/unchanged + ] + + expected_strike_states = [ + False, # underline only - explicitly False for strike when underline is True + False, # none - explicitly False for both underline and strike + True, # line-through - explicitly True + True, # underline + line-through - line-through wins, strike explicitly True + False, # wavy underline only - explicitly False for strike when underline is set + False, # dotted underline only - explicitly False for strike when underline is set + False, # dotted underline only - explicitly False for strike when underline is set + False, # dashed underline only - explicitly False for strike when underline is set + False, # double underline only - explicitly False for strike when underline is set + None, # overline (not supported) - remains None/unchanged + None, # blink (not supported) - remains None/unchanged + ] + + self.assertEqual(underline_states, expected_underline_states) + self.assertEqual(strike_states, expected_strike_states) + + def test_first_line_paragraph(self): + self.document.add_heading('Test text-indent on

tags', level=1) + self.parser.add_html_to_document(self.paragraph_first_line_indent, self.document) + document = self.parser.parse_html_string(self.paragraph_first_line_indent) + + indent_values = [] + + for p in document.paragraphs: + indent_pt = p.paragraph_format.first_line_indent + if indent_pt is not None: + indent_values.append(indent_pt) + + expected_values = [ + 1080000, # 3cm + 254000, # 20pt + 381000, # 40px + 1260000, # 35mm + None, # Word does not support negative values here + ] + + for actual, expected in zip(indent_values, expected_values): + self.assertAlmostEqual(actual, expected, delta=634) + + def test_color_paragraph(self): + self.document.add_heading('Test: color on p tags', level=1) + color_html_example = ( + "

red text

" + "

green hex text

" + "

blue rgb text

" + "

inherit color text

" + "

transparent color text

" + "

current color text

" + "

red with other styles

" + "

default text

" + ) + + self.parser.add_html_to_document(color_html_example, self.document) + + document = self.parser.parse_html_string(color_html_example) + + color_states = [] + for p in document.paragraphs: + if p.runs and p.runs[0].font.color: + color_rgb = p.runs[0].font.color.rgb + if color_rgb: + color_states.append((color_rgb[0], color_rgb[1], color_rgb[2])) + else: + color_states.append(None) + else: + color_states.append(None) + + expected_colors = [ + (255, 0, 0), # red + (0, 255, 0), # #00ff00 (green) + (0, 0, 255), # rgb(0, 0, 255) (blue) + None, # inherit (should not apply color) + None, # transparent (should not apply color) + None, # currentcolor (should not apply color) + (255, 0, 0), # #ff0000 (red) with other styles + None, # default text + ] + + self.assertEqual(color_states, expected_colors) + + def test_line_height_paragraph(self): + self.document.add_heading('Test: line-height on

', level=1) + self.parser.add_html_to_document(self.paragraph_line_height, self.document) + document = self.parser.parse_html_string(self.paragraph_line_height) + + line_heights = [] + line_rules = [] + + for p in document.paragraphs: + line_spacing = p.paragraph_format.line_spacing + line_rule = p.paragraph_format.line_spacing_rule + line_heights.append(str(line_spacing) if line_spacing is not None else 'None') + line_rules.append(str(line_rule) if line_rule is not None else 'None') + + expected_line_heights = [ + '1.0', + '1.15', + '1.5', + '2.0', + '190500', # line-height: 20px + '182880', # line-height: 1.2em + '228600', # line-height: 1.5em + '304800', # line-height: 2em + '182880', # line-height: 1.2rem + '228600', # line-height: 1.5rem + '304800', # line-height: 2rem + '1.5', # line-height: 150% + '2.0', # line-height: 200% + ] + + self.assertEqual(line_heights, expected_line_heights, + f"Line heights don't match expected values. Got {line_heights}, expected {expected_line_heights}") + + def test_margins_paragraph(self): + margins_html_example = ( + "

centered paragraph

" + "

left margin 20px

" + "

right margin 1.5cm

" + "

left margin 1cm

" + "

both margins set

" + "

only left auto

" + "

only right auto

" + "

zero margins

" + "

left margin 2in

" + ) + + self.document.add_heading('Test margins on

', level=1) + self.parser.add_html_to_document(margins_html_example, self.document) + document = self.parser.parse_html_string(margins_html_example) + + expected_margins = [ + # Paragraph 1: "centered paragraph" - auto margins (None values) + {'left': None, 'right': None}, + # Paragraph 2: "left margin 20px" - 20px = 20 * 9525 = 190500 EMU + {'left': 190500, 'right': None}, + # Paragraph 3: "right margin 1.5cm" - 1.5cm = 1.5 * 360000 = 540000 EMU + {'left': None, 'right': 540000}, + # Paragraph 4: "left margin 1cm" - 1cm = 360000 EMU + {'left': 360000, 'right': None}, + # Paragraph 5: "both margins set" - 10px=95250 EMU, 15px=142875 EMU + {'left': 95250, 'right': 142875}, + # Paragraph 6: "only left auto" - auto margin + {'left': None, 'right': None}, + # Paragraph 7: "only right auto" - auto margin + {'left': None, 'right': None}, + # Paragraph 8: "zero margins" - 0px = 0 EMU + {'left': 0, 'right': 0}, + # Paragraph 9: "left margin 2in" - 2in = 2 * 914400 = 1828800 EMU + {'left': 1828800, 'right': None}, + ] + + self.assertEqual(len(document.paragraphs), len(expected_margins)) + + for i, paragraph in enumerate(document.paragraphs): + expected = expected_margins[i] + actual_left = paragraph.paragraph_format.left_indent + actual_right = paragraph.paragraph_format.right_indent + + # Check left margin + if expected['left'] is None: + self.assertIsNone(actual_left, f"Paragraph {i} left margin should be None") + else: + self.assertIsNotNone(actual_left, f"Paragraph {i} left margin should not be None") + self.assertTrue(abs(actual_left - expected['left']) <= 634, + f"Paragraph {i} left margin: expected {expected['left']} EMU, got {actual_left} EMU") + + # Check right margin + if expected['right'] is None: + self.assertIsNone(actual_right, f"Paragraph {i} right margin should be None") + else: + self.assertIsNotNone(actual_right, f"Paragraph {i} right margin should not be None") + self.assertTrue(abs(actual_right - expected['right']) <= 634, + f"Paragraph {i} right margin: expected {expected['right']} EMU, got {actual_right} EMU") + + def test_background_color_styles(self): + self.document.add_heading('Test background color on

, multiple cases', level=1) + html_example2 = """ +

+ Start of paragraph + First yellow span + middle text + Red span with white text + end of paragraph + + Purple span with + nested orange + inside + +

+ """ + self.parser.add_html_to_document(html_example2, self.document) + + html_example3 = """ +

+ Base paragraph background + Bold pink span + regular text + + Green span with + italic blue nested + and more green + + + Yellow span with + pink nested + and + cyan underlined + +

+ """ + self.parser.add_html_to_document(html_example3, self.document) + + html_example4 = """ +

+ White paragraph + Yellow span + Transparent span +

+ """ + self.parser.add_html_to_document(html_example4, self.document) + + html_example5 = """ +

+ RGB color background + Hex red + Inherit background + Initial background +

+ """ + self.parser.add_html_to_document(html_example5, self.document) + + html_example6 = """ +

+ Level 0 + + Level 1 + + Level 2 + + Level 3 + + Level 4 + + Level 5 + + + + + + Back to level 0 +

+ """ + self.parser.add_html_to_document(html_example6, self.document) + + html_example7 = """ +

+ Paragraph with padding + Styled span + Underlined green + Italic Arial pink +

+ """ + self.parser.add_html_to_document(html_example7, self.document) + + html_example8 = """ +

+ Normal paragraph + Highlighted text + normal text + Green highlight + more normal text +

+ """ + self.parser.add_html_to_document(html_example8, self.document) + + html_example9 = """ +

+ Light yellow background entire paragraph +

+

+ No background + Light red span only +

+

+ Light blue background + Light green span + Light orange span +

+ """ + self.parser.add_html_to_document(html_example9, self.document) + + def test_headers_with_css(self): + self.document.add_heading('Test: headers with css', level=1) + self.parser.add_html_to_document(self.css_properties_header, self.document) + + document = self.parser.parse_html_string(self.css_properties_header) + + # Test H1 - Large and Centered + h1 = document.paragraphs[0] + assert h1.style.name.startswith('Heading 1') + assert str(h1.runs[0].font.color.rgb) == '2C3E50' + assert h1.runs[0].font.bold is True + assert h1.runs[0].font.size == 342900 + assert h1.alignment == WD_ALIGN_PARAGRAPH.CENTER + assert h1.runs[0].text == 'MAIN HEADING H1 - LARGE AND CENTERED' # uppercase due to text-transform + + # Test H2 - Underlined with Background (no span in this one) + h2 = document.paragraphs[1] + assert h2.style.name.startswith('Heading 2') + assert str(h2.runs[0].font.color.rgb) == '34495E' + assert h2.runs[0].font.underline is True + assert h2.runs[0].font.name == 'Arial' + assert h2.runs[0].font.size == 266700 + + # Test H3 - Italic and Right Aligned + h3 = document.paragraphs[2] + assert h3.style.name.startswith('Heading 3') + assert str(h3.runs[0].font.color.rgb) == '7F8C8D' + assert h3.runs[0].font.italic is True + assert h3.runs[0].font.size == 209550 + assert h3.alignment == WD_ALIGN_PARAGRAPH.RIGHT + + # Test H4 - Normal Weight and Capitalized + h4 = document.paragraphs[3] + assert h4.style.name.startswith('Heading 4') + assert str(h4.runs[0].font.color.rgb) == '95A5A6' + assert h4.runs[0].font.bold is False # font-weight: normal + assert h4.runs[0].font.name == 'Georgia' + assert h4.runs[0].font.size == 171450 + assert h4.runs[0].text == 'Quaternary Heading H4 - Normal Weight And Capitalized' # capitalized + + # Test H1 with Complex Text Decoration and Span + h1_complex = document.paragraphs[4] + assert h1_complex.runs[0].font.strike is True # line-through + assert str(h1_complex.runs[0].font.color.rgb) == '8E44AD' + assert h1_complex.runs[0].font.size == 381000 + + # Test span in complex H1 + assert len(h1_complex.runs) >= 2 + span_in_h1 = h1_complex.runs[1] + assert span_in_h1.font.underline is True # underline in span + assert str(span_in_h1.font.color.rgb) == '2980B9' + + # Test H3 with Light Weight and Span + h3_light = document.paragraphs[5] + assert h3_light.runs[0].font.bold is False # font-weight: 100 + assert str(h3_light.runs[0].font.color.rgb) == 'D35400' + assert h3_light.runs[0].font.size == 190500 + + # Test bold span in light H3 + assert len(h3_light.runs) >= 2 + bold_span = h3_light.runs[1] + assert bold_span.font.bold is True # font-weight: 900 + + # Test H3 with Text Transform + h3_transform = document.paragraphs[6] + assert h3_transform.runs[0].text == 'h3 forced to lowercase with text-transform ' + assert len(h3_transform.runs) >= 2 + uppercase_span = h3_transform.runs[1] + assert uppercase_span.text == 'SPAN FORCED TO UPPERCASE' + + # Test H4 with Serif Font + h4_serif = document.paragraphs[7] + assert h4_serif.runs[0].font.name == 'Times New Roman' + assert str(h4_serif.runs[0].font.color.rgb) == '7D3C98' + assert h4_serif.alignment == WD_ALIGN_PARAGRAPH.CENTER + + # Test H1 with Auto Margins and Background + h1_centered = document.paragraphs[8] + assert h1_centered.alignment == WD_ALIGN_PARAGRAPH.CENTER + assert str(h1_centered.runs[0].font.color.rgb) == 'FFFFFF' + + # Test H2 with Lighter Weight and Span + h2_lighter = document.paragraphs[9] + assert h2_lighter.runs[0].font.bold is False # lighter weight + assert h2_lighter.runs[0].font.underline == WD_UNDERLINE.DOTTED + assert h2_lighter.runs[0].font.size == 228600 + + # Test bolder span + assert len(h2_lighter.runs) >= 2 + bolder_span = h2_lighter.runs[1] + assert bolder_span.font.bold is True # bolder + + # Test H3 with RGB Colors and Span + h3_rgb = document.paragraphs[10] + assert str(h3_rgb.runs[0].font.color.rgb) == '3498DB' # rgb(52, 152, 219) + assert h3_rgb.runs[0].font.size == 177800 + + # Test RGB span + assert len(h3_rgb.runs) >= 2 + rgb_span = h3_rgb.runs[1] + assert str(rgb_span.font.color.rgb) == 'E74C3C' # rgb(231, 76, 60) + + # Test H4 with Strike-through and Span + h4_strike = document.paragraphs[11] + assert h4_strike.runs[0].font.strike is True + assert h4_strike.runs[0].font.bold is False # font-weight: 300 + + # Test span without strike-through + assert len(h4_strike.runs) >= 2 + no_strike_span = h4_strike.runs[1] + assert no_strike_span.font.strike is False + assert str(no_strike_span.font.color.rgb) == 'E74C3C' + + # Test H3 with Unsupported Transform and Span + h3_unsupported = document.paragraphs[12] + assert str(h3_unsupported.runs[0].font.color.rgb) == 'F39C12' + assert h3_unsupported.runs[0].font.size == 196850 + + # Test supported transform in span + assert len(h3_unsupported.runs) >= 2 + supported_span = h3_unsupported.runs[1] + assert supported_span.text == 'Supported Transform In Span' # capitalize + + # Test H4 with Reset Styles and Span + h4_reset = document.paragraphs[13] + assert h4_reset.runs[0].font.bold is True # font-weight: 700 + assert h4_reset.runs[0].font.italic is False # font-style: normal + assert h4_reset.runs[0].font.underline is False # text-decoration: none + + # Test styled span + assert len(h4_reset.runs) >= 2 + styled_span = h4_reset.runs[1] + assert styled_span.font.bold is False # font-weight: 400 + assert styled_span.font.italic is True + assert styled_span.font.underline is True + + # Test H1 with Text Color and Span + h1_transparent = document.paragraphs[14] + assert h1_transparent.runs[0].font.size == 361950 + visible_span = h1_transparent.runs[1] + assert str(visible_span.font.color.rgb) == 'ECF0F1' + + # Test H3 with All Three Decorations and Span + h3_all_decorations = document.paragraphs[15] + assert h3_all_decorations.runs[0].font.strike is True + assert h3_all_decorations.runs[0].font.underline is False + + # Test span with single decoration + assert len(h3_all_decorations.runs) >= 2 + single_decoration_span = h3_all_decorations.runs[1] + assert single_decoration_span.font.underline is True + + # Test H2 with Middle Weight and Span + h2_middle = document.paragraphs[16] + assert h2_middle.runs[0].font.bold is False + + # Test darker span + assert len(h2_middle.runs) >= 2 + darker_span = h2_middle.runs[1] + assert darker_span.font.bold is False + + # Test H4 with Style and Span + h4_style = document.paragraphs[17] + assert h4_style.runs[0].font.underline is WD_UNDERLINE.WAVY + assert h4_style.runs[1].font.underline is WD_UNDERLINE.DOUBLE + def test_color_by_name(self): color_html_example = ( "

paragraph red

"