python-hwpx/src/hwpx/document.py at main · airmang/python-hwpx · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# SPDX-License-Identifier: Apache-2.0
"""High-level representation of an HWPX document."""

from __future__ import annotations

import xml.etree.ElementTree as ET
import io
import os
import re
import tempfile
import warnings
from datetime import datetime
import logging
import uuid

from os import PathLike
from pathlib import Path, PurePosixPath
from typing import TYPE_CHECKING, Any, BinaryIO, Iterator, Mapping, Sequence, overload

from lxml import etree

from .oxml import (
    Bullet,
    GenericElement,
    HwpxOxmlDocument,
    HwpxOxmlHeader,
    HwpxOxmlHistory,
    HwpxOxmlInlineObject,
    HwpxOxmlMasterPage,
    HwpxOxmlMemo,
    HwpxOxmlNote,
    HwpxOxmlParagraph,
    HwpxOxmlRun,
    HwpxOxmlSection,
    HwpxOxmlSectionHeaderFooter,
    HwpxOxmlShape,
    HwpxOxmlTable,
    HwpxOxmlVersion,
    MemoShape,
    ParagraphProperty,
    RunStyle,
    Style,
    TrackChange,
    TrackChangeAuthor,
)
from .opc.package import (
    HwpxPackage,
    _UNCHECKED_SAVE_TOKEN,
)
from .oxml.namespaces import HC, HH, HH_NS, HP, HP_NS, register_owpml_namespaces
from .templates import blank_document_bytes

register_owpml_namespaces(ET.register_namespace)

_HP_NS = HP_NS
_HP = HP
_HC = HC
_HH_NS = HH_NS
_HH = HH
_HWP_UNITS_PER_MM = 7200 / 25.4
_HWP_UNITS_PER_PT = 100

logger = logging.getLogger(__name__)

if TYPE_CHECKING:
    from .tools.table_navigation import TableFillResult, TableLabelSearchResult, TableMapResult


def _append_element(
    parent: Any,
    tag: str,
    attributes: dict[str, str] | None = None,
) -> Any:
    """Create and append a child element that matches *parent*'s element type."""

    child = parent.makeelement(tag, attributes or {})
    parent.append(child)
    return child


def _mm_to_hwp_units(value: float) -> int:
    return round(value * _HWP_UNITS_PER_MM)


def _pt_to_hwp_units(value: float) -> int:
    return round(value * _HWP_UNITS_PER_PT)


_PAPER_SIZES_MM: dict[str, tuple[float, float]] = {
    "A3": (297.0, 420.0),
    "A4": (210.0, 297.0),
    "A5": (148.0, 210.0),
    "B4": (257.0, 364.0),
    "B5": (182.0, 257.0),
    "LETTER": (215.9, 279.4),
    "LEGAL": (215.9, 355.6),
}

_FORM_FIELD_EXCLUDED_TYPES = {"HYPERLINK", "MEMO"}
_FORM_FIELD_TYPES = {"FORM", "CLICKHERE", "CLICK_HERE", "CLICK-HERE", "NURUMTUL", "누름틀"}
_FORM_FIELD_NAME_ATTRS = ("fieldName", "fieldname", "name", "title", "id", "fieldid")
_FORM_FIELD_PROMPT_ATTRS = ("prompt", "instruction", "description", "desc", "help", "memo")
_FORM_FIELD_PARAM_NAMES = {
    "fieldname",
    "field_name",
    "name",
    "title",
    "prompt",
    "instruction",
    "description",
    "desc",
    "help",
    "memo",
    "guide",
}
_TEXT_ILLEGAL = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f\ufffe\uffff]")


def _local_name(node_or_tag: Any) -> str:
    tag = getattr(node_or_tag, "tag", node_or_tag)
    if not isinstance(tag, str):
        return ""
    if "}" in tag:
        return tag.rsplit("}", 1)[1]
    return tag


def _sanitize_field_text(value: str) -> str:
    return _TEXT_ILLEGAL.sub("", value)


def _field_type_tokens(*values: str | None) -> set[str]:
    tokens: set[str] = set()
    for value in values:
        if not value:
            continue
        raw = str(value).strip()
        if not raw:
            continue
        tokens.add(raw.upper())
        tokens.add(raw.replace("_", "").replace("-", "").upper())
    return tokens


def _is_form_field_begin(ctrl: Any, field_begin: Any) -> bool:
    tokens = _field_type_tokens(
        ctrl.get("type"),
        field_begin.get("type"),
        field_begin.get("name"),
        field_begin.get("fieldName"),
        field_begin.get("fieldname"),
    )
    if tokens & _FORM_FIELD_EXCLUDED_TYPES:
        return False
    if tokens & _FORM_FIELD_TYPES:
        return True
    return (ctrl.get("type") or "").strip().upper() == "FORM"


def _field_identifier(field_begin: Any) -> str:
    for attr in ("id", "fieldid", "name", "fieldName", "fieldname"):
        value = (field_begin.get(attr) or "").strip()
        if value:
            return value
    return ""


def _field_end_matches(field_begin: Any, field_end: Any) -> bool:
    begin_keys = {
        value
        for value in (
            field_begin.get("id"),
            field_begin.get("fieldid"),
            field_begin.get("name"),
        )
        if value
    }
    end_keys = {
        value
        for value in (
            field_end.get("beginIDRef"),
            field_end.get("fieldid"),
            field_end.get("id"),
        )
        if value
    }
    if begin_keys and end_keys:
        return bool(begin_keys & end_keys)
    return not begin_keys


def _field_parameters(field_begin: Any) -> list[dict[str, str]]:
    parameters: list[dict[str, str]] = []
    for node in field_begin.iter():
        if not _local_name(node).endswith("Param"):
            continue
        name = (node.get("name") or "").strip()
        value = "".join(node.itertext()).strip()
        if name or value:
            parameters.append({"name": name, "value": value})
    return parameters


def _first_attr(element: Any, names: Sequence[str]) -> str:
    for name in names:
        value = (element.get(name) or "").strip()
        if value:
            return value
    return ""


def _field_parameter_value(parameters: Sequence[dict[str, str]], *names: str) -> str:
    wanted = {name.casefold() for name in names}
    for item in parameters:
        name = item.get("name", "").casefold()
        value = item.get("value", "").strip()
        if name in wanted and value:
            return value
    return ""


def _clear_form_field_layout_cache(paragraph: Any) -> int:
    removed = 0
    for child in list(paragraph):
        if _local_name(child).lower() == "linesegarray":
            paragraph.remove(child)
            removed += 1
    return removed


def _normalize_page_orientation(value: str | None) -> str | None:
    if value is None:
        return None
    normalized = value.strip().upper()
    aliases = {
        "PORTRAIT": "PORTRAIT",
        "NARROW": "PORTRAIT",
        "NARROWLY": "PORTRAIT",
        "LANDSCAPE": "WIDELY",
        "WIDE": "WIDELY",
        "WIDELY": "WIDELY",
    }
    orientation = aliases.get(normalized)
    if orientation is None:
        raise ValueError(f"unsupported page orientation: {value}")
    return orientation


def _png_dimensions(image_data: bytes) -> tuple[int, int] | None:
    if len(image_data) < 24 or not image_data.startswith(b"\x89PNG\r\n\x1a\n"):
        return None
    width = int.from_bytes(image_data[16:20], "big")
    height = int.from_bytes(image_data[20:24], "big")
    if width <= 0 or height <= 0:
        return None
    return width, height


def _bin_data_stem(value: Any) -> str | None:
    if value is None:
        return None
    raw = str(value).strip()
    if not raw:
        return None
    stem = PurePosixPath(raw).stem
    return stem or None


def _write_bytes_atomically(path: str | PathLike[str], data: bytes) -> None:
    target = Path(path)
    fd, tmp_path = tempfile.mkstemp(dir=str(target.parent), suffix=".hwpx.tmp")
    try:
        with os.fdopen(fd, "wb") as tmp_fh:
            tmp_fh.write(data)
        os.replace(tmp_path, str(target))
    except BaseException:
        try:
            os.unlink(tmp_path)
        except OSError:
            pass
        raise


def _capture_stream_checkpoint(stream: BinaryIO) -> tuple[int, bytes] | None:
    try:
        position = stream.tell()
    except (AttributeError, OSError):
        return None
    try:
        tail = stream.read()
    except (AttributeError, OSError):
        try:
            end_position = stream.seek(0, os.SEEK_END)
        except (AttributeError, OSError):
            return None
        try:
            stream.seek(position)
        except (AttributeError, OSError):
            return None
        if end_position == position:
            return position, b""
        return None
    try:
        stream.seek(position)
    except (AttributeError, OSError):
        return None
    return position, tail


def _rollback_stream(stream: BinaryIO, checkpoint: tuple[int, bytes] | None) -> None:
    if checkpoint is None:
        return
    position, tail = checkpoint
    try:
        stream.seek(position)
        if tail:
            stream.write(tail)
            stream.truncate(position + len(tail))
        else:
            stream.truncate(position)
        stream.seek(position)
    except (AttributeError, OSError):
        return


def _write_stream_or_rollback(stream: BinaryIO, data: bytes) -> None:
    checkpoint = _capture_stream_checkpoint(stream)
    if checkpoint is None:
        raise OSError(
            "HWPX stream save requires a checkpointable stream; "
            "use save_to_path() for non-seekable outputs"
        )
    try:
        written = stream.write(data)
        if written is not None and written != len(data):
            raise OSError(
                "short write while saving HWPX stream: "
                f"wrote {written} of {len(data)} bytes"
            )
    except BaseException:
        _rollback_stream(stream, checkpoint)
        raise


def _summarize_validation_issues(issues: Sequence[Any], *, limit: int = 5) -> str:
    selected = [str(issue) for issue in issues[:limit]]
    remaining = len(issues) - len(selected)
    summary = "; ".join(selected)
    if remaining > 0:
        summary += f" ... and {remaining} more"
    return summary


class HwpxDocument:
    """Provides a user-friendly API for editing HWPX documents."""

    def __init__(
        self,
        package: HwpxPackage,
        root: HwpxOxmlDocument,
        *,
        managed_resources: tuple[Any, ...] = (),
        validate_on_save: bool = False,
    ):
        self._package = package
        self._root = root
        self._managed_resources = list(managed_resources)
        self._closed = False
        self.validate_on_save = validate_on_save

    def __repr__(self) -> str:
        """Return a compact and safe summary of the document state."""

        return (
            f"{self.__class__.__name__}("
            f"sections={len(self.sections)}, "
            f"paragraphs={len(self.paragraphs)}, "
            f"headers={len(self.headers)}, "
            f"master_pages={len(self.master_pages)}, "
            f"histories={len(self.histories)}, "
            f"closed={self._closed}"
            ")"
        )

    # ------------------------------------------------------------------
    # construction helpers
    @classmethod
    def open(
        cls,
        source: str | PathLike[str] | bytes | BinaryIO,
    ) -> "HwpxDocument":
        """Open *source* and return a :class:`HwpxDocument` instance.

        Raises:
            HwpxStructureError: 필수 파일이나 구조가 올바르지 않은 HWPX를 열 때 발생합니다.
            HwpxPackageError: 패키지를 여는 과정에서 일반적인 I/O/포맷 오류가 발생하면 전달됩니다.
        """
        internal_resources: list[Any] = []
        open_source = source
        if isinstance(source, bytes):
            stream = io.BytesIO(source)
            open_source = stream
            internal_resources.append(stream)
        package = HwpxPackage.open(open_source)
        root = HwpxOxmlDocument.from_package(package)
        return cls(package, root, managed_resources=tuple(internal_resources))

    @classmethod
    def new(cls) -> "HwpxDocument":
        """Return a new blank document based on the default skeleton template."""

        return cls.open(blank_document_bytes())

    @classmethod
    def from_package(cls, package: HwpxPackage) -> "HwpxDocument":
        """Create a document backed by an existing :class:`HwpxPackage`.

        Args:
            package: :class:`hwpx.opc.package.HwpxPackage` 인스턴스.
        """
        root = HwpxOxmlDocument.from_package(package)
        return cls(package, root)

    def __enter__(self) -> "HwpxDocument":
        """컨텍스트 매니저 진입 시 현재 문서 인스턴스를 반환합니다."""

        return self

    def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> bool:
        """예외 발생 여부와 무관하게 내부 자원을 안전하게 정리합니다."""

        self.close()
        return False

    def close(self) -> None:
        """문서가 관리하는 내부 패키지/스트림 자원을 정리합니다.

        정리 정책:
        - ``flush()`` 가능한 자원은 먼저 flush를 시도합니다.
        - ``close()`` 가능한 자원은 flush 이후 close를 시도합니다.
        - flush/close 중 발생한 예외는 로깅하고 무시하여 정리 루틴을 계속 진행합니다.
        - 같은 문서에서 ``close()``를 여러 번 호출해도 안전합니다.
        """

        if self._closed:
            return

        self._flush_resource(self._package)
        for resource in self._managed_resources:
            self._flush_resource(resource)

        self._close_resource(self._package)
        for resource in self._managed_resources:
            self._close_resource(resource)

        self._managed_resources.clear()
        self._closed = True

    @staticmethod
    def _flush_resource(resource: Any) -> None:
        flush = getattr(resource, "flush", None)
        if not callable(flush):
            return
        try:
            flush()
        except Exception:
            logger.debug("자원 flush 중 예외를 무시합니다: resource=%r", resource, exc_info=True)

    @staticmethod
    def _close_resource(resource: Any) -> None:
        close = getattr(resource, "close", None)
        if not callable(close):
            return
        try:
            close()
        except Exception:
            logger.debug("자원 close 중 예외를 무시합니다: resource=%r", resource, exc_info=True)

    # ------------------------------------------------------------------
    # properties exposing document content
    @property
    def package(self) -> HwpxPackage:
        """Return the :class:`HwpxPackage` backing this document."""
        return self._package

    @property
    def oxml(self) -> HwpxOxmlDocument:
        """Return the low-level XML object tree representing the document."""
        return self._root

    @property
    def sections(self) -> list[HwpxOxmlSection]:
        """Return the sections contained in the document."""
        return self._root.sections

    @property
    def headers(self) -> list[HwpxOxmlHeader]:
        """Return the header parts referenced by the document."""
        return self._root.headers

    @property
    def master_pages(self) -> list[HwpxOxmlMasterPage]:
        """Return the master-page parts declared in the manifest."""
        return self._root.master_pages

    @property
    def histories(self) -> list[HwpxOxmlHistory]:
        """Return document history parts referenced by the manifest."""
        return self._root.histories

    @property
    def version(self) -> HwpxOxmlVersion | None:
        """Return the version metadata part if present."""
        return self._root.version

    @property
    def border_fills(self) -> dict[str, GenericElement]:
        """Return border fill definitions declared in the headers."""

        return self._root.border_fills

    def border_fill(self, border_fill_id_ref: int | str | None) -> GenericElement | None:
        """Return the border fill definition referenced by *border_fill_id_ref*."""

        return self._root.border_fill(border_fill_id_ref)

    @property
    def memo_shapes(self) -> dict[str, MemoShape]:
        """Return memo shapes available in the header reference lists."""

        return self._root.memo_shapes

    def memo_shape(self, memo_shape_id_ref: int | str | None) -> MemoShape | None:
        """Return the memo shape definition referenced by *memo_shape_id_ref*."""

        return self._root.memo_shape(memo_shape_id_ref)

    @property
    def bullets(self) -> dict[str, Bullet]:
        """Return bullet definitions declared in header reference lists."""

        return self._root.bullets

    def bullet(self, bullet_id_ref: int | str | None) -> Bullet | None:
        """Return the bullet definition referenced by *bullet_id_ref*."""

        return self._root.bullet(bullet_id_ref)

    @property
    def paragraph_properties(self) -> dict[str, ParagraphProperty]:
        """Return paragraph property definitions declared in headers."""

        return self._root.paragraph_properties

    def paragraph_property(
        self, para_pr_id_ref: int | str | None
    ) -> ParagraphProperty | None:
        """Return the paragraph property referenced by *para_pr_id_ref*."""

        return self._root.paragraph_property(para_pr_id_ref)

    def ensure_numbering(
        self,
        *,
        kind: str,
        levels: Sequence[dict[str, str]] | None = None,
    ) -> list[str]:
        """Return paragraph property ids for bullet or numbered-list levels."""

        return self._root.ensure_numbering(kind=kind, levels=levels)

    @property
    def styles(self) -> dict[str, Style]:
        """Return style definitions available in the document."""

        return self._root.styles

    def style(self, style_id_ref: int | str | None) -> Style | None:
        """Return the style definition referenced by *style_id_ref*."""

        return self._root.style(style_id_ref)

    @property
    def track_changes(self) -> dict[str, TrackChange]:
        """Return tracked change metadata declared in the headers."""

        return self._root.track_changes

    def track_change(self, change_id_ref: int | str | None) -> TrackChange | None:
        """Return tracked change metadata referenced by *change_id_ref*."""

        return self._root.track_change(change_id_ref)

    @property
    def track_change_authors(self) -> dict[str, TrackChangeAuthor]:
        """Return tracked change author metadata declared in the headers."""

        return self._root.track_change_authors

    def track_change_author(
        self, author_id_ref: int | str | None
    ) -> TrackChangeAuthor | None:
        """Return tracked change author details referenced by *author_id_ref*."""

        return self._root.track_change_author(author_id_ref)

    @property
    def memos(self) -> list[HwpxOxmlMemo]:
        """Return all memo entries declared in every section."""

        memos: list[HwpxOxmlMemo] = []
        for section in self._root.sections:
            memos.extend(section.memos)
        return memos

    def add_memo(
        self,
        text: str = "",
        *,
        section: HwpxOxmlSection | None = None,
        section_index: int | None = None,
        memo_shape_id_ref: str | int | None = None,
        memo_id: str | None = None,
        char_pr_id_ref: str | int | None = None,
        attributes: dict[str, str] | None = None,
    ) -> HwpxOxmlMemo:
        """Create a memo entry inside *section* (or the last section by default)."""

        if section is None and section_index is not None:
            section = self._root.sections[section_index]
        if section is None:
            if not self._root.sections:
                raise ValueError("document does not contain any sections")
            section = self._root.sections[-1]
        return section.add_memo(
            text,
            memo_shape_id_ref=memo_shape_id_ref,
            memo_id=memo_id,
            char_pr_id_ref=char_pr_id_ref,
            attributes=attributes,
        )

    def remove_memo(self, memo: HwpxOxmlMemo) -> None:
        """Remove *memo* from the section it belongs to."""

        memo.remove()

    def attach_memo_field(
        self,
        paragraph: HwpxOxmlParagraph,
        memo: HwpxOxmlMemo,
        *,
        field_id: str | None = None,
        author: str | None = None,
        created: datetime | str | None = None,
        number: int = 1,
        char_pr_id_ref: str | int | None = None,
    ) -> str:
        """Attach a MEMO field control to *paragraph* so Hangul shows *memo*."""

        if paragraph.section is None:
            raise ValueError("paragraph must belong to a section before anchoring a memo")
        if memo.group.section is None:
            raise ValueError("memo is not attached to a section")

        field_value = field_id or uuid.uuid4().hex
        author_value = author or memo.attributes.get("author") or ""

        created_value = created if created is not None else memo.attributes.get("createDateTime")
        if isinstance(created_value, datetime):
            created_value = created_value.strftime("%Y-%m-%d %H:%M:%S")
        elif created_value is None:
            created_value = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        else:
            created_value = str(created_value)

        memo_shape_id = memo.memo_shape_id_ref or ""

        char_ref = char_pr_id_ref
        if char_ref is None:
            char_ref = paragraph.char_pr_id_ref
        if char_ref is None:
            char_ref = memo._infer_char_pr_id_ref()
        if char_ref is None:
            char_ref = "0"
        char_ref = str(char_ref)

        paragraph_element = paragraph.element
        run_begin = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
        ctrl_begin = _append_element(run_begin, f"{_HP}ctrl")
        field_begin = _append_element(
            ctrl_begin,
            f"{_HP}fieldBegin",
            {
                "id": field_value,
                "type": "MEMO",
                "editable": "true",
                "dirty": "false",
                "fieldid": field_value,
            },
        )

        parameters = _append_element(field_begin, f"{_HP}parameters", {"count": "5", "name": ""})
        _append_element(parameters, f"{_HP}stringParam", {"name": "ID"}).text = memo.id or ""
        _append_element(parameters, f"{_HP}integerParam", {"name": "Number"}).text = str(max(1, number))
        _append_element(parameters, f"{_HP}stringParam", {"name": "CreateDateTime"}).text = created_value
        _append_element(parameters, f"{_HP}stringParam", {"name": "Author"}).text = author_value
        _append_element(parameters, f"{_HP}stringParam", {"name": "MemoShapeID"}).text = memo_shape_id

        sub_list = _append_element(
            field_begin,
            f"{_HP}subList",
            {
                "id": f"memo-field-{memo.id or field_value}",
                "textDirection": "HORIZONTAL",
                "lineWrap": "BREAK",
                "vertAlign": "TOP",
            },
        )
        sub_para = _append_element(
            sub_list,
            f"{_HP}p",
            {
                "id": f"memo-field-{(memo.id or field_value)}-p",
                "paraPrIDRef": "0",
                "styleIDRef": "0",
                "pageBreak": "0",
                "columnBreak": "0",
                "merged": "0",
            },
        )
        sub_run = _append_element(sub_para, f"{_HP}run", {"charPrIDRef": char_ref})
        _append_element(sub_run, f"{_HP}t").text = memo.id or field_value

        run_end = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
        ctrl_end = _append_element(run_end, f"{_HP}ctrl")
        _append_element(ctrl_end, f"{_HP}fieldEnd", {"beginIDRef": field_value, "fieldid": field_value})

        paragraph.element.insert(0, run_begin)
        paragraph.element.append(run_end)
        paragraph.section.mark_dirty()

        return field_value

    def add_memo_with_anchor(
        self,
        text: str = "",
        *,
        paragraph: HwpxOxmlParagraph | None = None,
        section: HwpxOxmlSection | None = None,
        section_index: int | None = None,
        paragraph_text: str | None = None,
        memo_shape_id_ref: str | int | None = None,
        memo_id: str | None = None,
        char_pr_id_ref: str | int | None = None,
        attributes: dict[str, str] | None = None,
        field_id: str | None = None,
        author: str | None = None,
        created: datetime | str | None = None,
        number: int = 1,
        anchor_char_pr_id_ref: str | int | None = None,
    ) -> tuple[HwpxOxmlMemo, HwpxOxmlParagraph, str]:
        """Create a memo and ensure it is visible by anchoring a MEMO field."""

        memo = self.add_memo(
            text,
            section=section,
            section_index=section_index,
            memo_shape_id_ref=memo_shape_id_ref,
            memo_id=memo_id,
            char_pr_id_ref=char_pr_id_ref,
            attributes=attributes,
        )

        target_paragraph = paragraph
        if target_paragraph is None:
            memo_section = memo.group.section
            if memo_section is None:
                raise ValueError("memo must belong to a section")
            paragraph_value = "" if paragraph_text is None else paragraph_text
            anchor_char = anchor_char_pr_id_ref or char_pr_id_ref
            target_paragraph = self.add_paragraph(
                paragraph_value,
                section=memo_section,
                char_pr_id_ref=anchor_char,
            )
        elif paragraph_text is not None:
            target_paragraph.text = paragraph_text

        field_value = self.attach_memo_field(
            target_paragraph,
            memo,
            field_id=field_id,
            author=author,
            created=created,
            number=number,
            char_pr_id_ref=anchor_char_pr_id_ref,
        )

        return memo, target_paragraph, field_value

    def remove_paragraph(
        self,
        paragraph: HwpxOxmlParagraph | int,
        *,
        section: HwpxOxmlSection | None = None,
        section_index: int | None = None,
    ) -> None:
        """Remove a paragraph from the document.

        *paragraph* may be a :class:`HwpxOxmlParagraph` instance or an
        integer index into the paragraphs of the specified (or last)
        section.

        Raises ``ValueError`` if the target section would become empty.
        """
        self._root.remove_paragraph(
            paragraph,
            section=section,
            section_index=section_index,
        )

    def add_section(self, *, after: int | None = None) -> HwpxOxmlSection:
        """Append a new empty section to the document.

        If *after* is given, the section is inserted after the section at
        that index.  Returns the newly created section.
        """
        return self._root.add_section(after=after)

    def remove_section(
        self, section: HwpxOxmlSection | int,
    ) -> None:
        """Remove a section from the document.

        Raises ``ValueError`` if the document would have no sections left.
        """
        self._root.remove_section(section)

    @property
    def paragraphs(self) -> list[HwpxOxmlParagraph]:
        """Return all paragraphs across every section."""
        return self._root.paragraphs

    @property
    def char_properties(self) -> dict[str, RunStyle]:
        """Return the resolved character style definitions available to the document."""

        return self._root.char_properties

    def char_property(self, char_pr_id_ref: int | str | None) -> RunStyle | None:
        """Return the style referenced by *char_pr_id_ref* if known."""

        return self._root.char_property(char_pr_id_ref)

    def ensure_run_style(
        self,
        *,
        bold: bool = False,
        italic: bool = False,
        underline: bool = False,
        color: str | None = None,
        font: str | None = None,
        size: int | float | None = None,
        highlight: str | None = None,
        strike: bool | None = None,
        base_char_pr_id: str | int | None = None,
    ) -> str:
        """Return a ``charPr`` identifier matching the requested flags."""

        return self._root.ensure_run_style(
            bold=bold,
            italic=italic,
            underline=underline,
            color=color,
            font=font,
            size=size,
            highlight=highlight,
            strike=strike,
            base_char_pr_id=base_char_pr_id,
        )

    def iter_runs(self) -> Iterator[HwpxOxmlRun]:
        """Yield every run element contained in the document."""

        for paragraph in self.paragraphs:
            for run in paragraph.runs:
                yield run

    def find_runs_by_style(
        self,
        *,
        text_color: str | None = None,
        underline_type: str | None = None,
        underline_color: str | None = None,
        char_pr_id_ref: str | int | None = None,
    ) -> list[HwpxOxmlRun]:
        """Return runs matching the requested style criteria."""

        matches: list[HwpxOxmlRun] = []
        target_char = str(char_pr_id_ref).strip() if char_pr_id_ref is not None else None

        for run in self.iter_runs():
            if target_char is not None:
                run_char = (run.char_pr_id_ref or "").strip()
                if run_char != target_char:
                    continue
            style = run.style
            if text_color is not None:
                if style is None or style.text_color() != text_color:
                    continue
            if underline_type is not None:
                if style is None or style.underline_type() != underline_type:
                    continue
            if underline_color is not None:
                if style is None or style.underline_color() != underline_color:
                    continue
            matches.append(run)
        return matches

    def replace_text_in_runs(
        self,
        search: str,
        replacement: str,
        *,
        text_color: str | None = None,
        underline_type: str | None = None,
        underline_color: str | None = None,
        char_pr_id_ref: str | int | None = None,
        limit: int | None = None,
    ) -> int:
        """Replace occurrences of *search* in runs matching the provided style filters."""

        if not search:
            raise ValueError("search must be a non-empty string")

        replacements = 0
        runs = self.find_runs_by_style(
            text_color=text_color,
            underline_type=underline_type,
            underline_color=underline_color,
            char_pr_id_ref=char_pr_id_ref,
        )

        for run in runs:
            remaining = None
            if limit is not None:
                remaining = limit - replacements
                if remaining <= 0:
                    break
            original_char_pr = run.char_pr_id_ref
            replaced_here = run.replace_text(
                search,
                replacement,
                count=remaining,
            )
            if replaced_here and original_char_pr is not None:
                # Ensure the run retains its original formatting reference even
                # if XML nodes were rewritten during substitution.
                run.char_pr_id_ref = original_char_pr
            replacements += replaced_here
            if limit is not None and replacements >= limit:
                break
        return replacements

    # ------------------------------------------------------------------
    # editing helpers
    def add_paragraph(
        self,
        text: str = "",
        *,
        section: HwpxOxmlSection | None = None,
        section_index: int | None = None,
        para_pr_id_ref: str | int | None = None,
        style_id_ref: str | int | None = None,
        char_pr_id_ref: str | int | None = None,
        run_attributes: dict[str, str] | None = None,
        include_run: bool = True,
        inherit_style: bool = True,
        **extra_attrs: str,
    ) -> HwpxOxmlParagraph:
        """Append a paragraph to the document and return it.

        When *inherit_style* is ``True`` (the default) and no explicit
        style references are given, the new paragraph inherits
        ``paraPrIDRef``, ``styleIDRef`` and ``charPrIDRef`` from the
        last paragraph in the target section so that consecutive
        paragraphs share the same formatting.

        Formatting references may be overridden via ``para_pr_id_ref``,
        ``style_id_ref`` and ``char_pr_id_ref``. Any additional keyword
        arguments are added as raw paragraph attributes.
        """
        return self._root.add_paragraph(
            text,
            section=section,
            section_index=section_index,
            para_pr_id_ref=para_pr_id_ref,
            style_id_ref=style_id_ref,
            char_pr_id_ref=char_pr_id_ref,