-
Notifications
You must be signed in to change notification settings - Fork 31
Expand file tree
/
Copy pathdocument.py
More file actions
2636 lines (2294 loc) · 92.6 KB
/
Copy pathdocument.py
File metadata and controls
2636 lines (2294 loc) · 92.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# SPDX-License-Identifier: Apache-2.0
"""High-level representation of an HWPX document."""
from __future__ import annotations
import xml.etree.ElementTree as ET
import io
import os
import re
import tempfile
import warnings
from datetime import datetime
import logging
import uuid
from os import PathLike
from pathlib import Path, PurePosixPath
from typing import TYPE_CHECKING, Any, BinaryIO, Iterator, Mapping, Sequence, overload
from lxml import etree
from .oxml import (
Bullet,
GenericElement,
HwpxOxmlDocument,
HwpxOxmlHeader,
HwpxOxmlHistory,
HwpxOxmlInlineObject,
HwpxOxmlMasterPage,
HwpxOxmlMemo,
HwpxOxmlNote,
HwpxOxmlParagraph,
HwpxOxmlRun,
HwpxOxmlSection,
HwpxOxmlSectionHeaderFooter,
HwpxOxmlShape,
HwpxOxmlTable,
HwpxOxmlVersion,
MemoShape,
ParagraphProperty,
RunStyle,
Style,
TrackChange,
TrackChangeAuthor,
)
from .opc.package import (
HwpxPackage,
_UNCHECKED_SAVE_TOKEN,
)
from .oxml.namespaces import HC, HH, HH_NS, HP, HP_NS, register_owpml_namespaces
from .templates import blank_document_bytes
register_owpml_namespaces(ET.register_namespace)
_HP_NS = HP_NS
_HP = HP
_HC = HC
_HH_NS = HH_NS
_HH = HH
_HWP_UNITS_PER_MM = 7200 / 25.4
_HWP_UNITS_PER_PT = 100
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from .tools.table_navigation import TableFillResult, TableLabelSearchResult, TableMapResult
def _append_element(
parent: Any,
tag: str,
attributes: dict[str, str] | None = None,
) -> Any:
"""Create and append a child element that matches *parent*'s element type."""
child = parent.makeelement(tag, attributes or {})
parent.append(child)
return child
def _mm_to_hwp_units(value: float) -> int:
return round(value * _HWP_UNITS_PER_MM)
def _pt_to_hwp_units(value: float) -> int:
return round(value * _HWP_UNITS_PER_PT)
_PAPER_SIZES_MM: dict[str, tuple[float, float]] = {
"A3": (297.0, 420.0),
"A4": (210.0, 297.0),
"A5": (148.0, 210.0),
"B4": (257.0, 364.0),
"B5": (182.0, 257.0),
"LETTER": (215.9, 279.4),
"LEGAL": (215.9, 355.6),
}
_FORM_FIELD_EXCLUDED_TYPES = {"HYPERLINK", "MEMO"}
_FORM_FIELD_TYPES = {"FORM", "CLICKHERE", "CLICK_HERE", "CLICK-HERE", "NURUMTUL", "누름틀"}
_FORM_FIELD_NAME_ATTRS = ("fieldName", "fieldname", "name", "title", "id", "fieldid")
_FORM_FIELD_PROMPT_ATTRS = ("prompt", "instruction", "description", "desc", "help", "memo")
_FORM_FIELD_PARAM_NAMES = {
"fieldname",
"field_name",
"name",
"title",
"prompt",
"instruction",
"description",
"desc",
"help",
"memo",
"guide",
}
_TEXT_ILLEGAL = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f\ufffe\uffff]")
def _local_name(node_or_tag: Any) -> str:
tag = getattr(node_or_tag, "tag", node_or_tag)
if not isinstance(tag, str):
return ""
if "}" in tag:
return tag.rsplit("}", 1)[1]
return tag
def _sanitize_field_text(value: str) -> str:
return _TEXT_ILLEGAL.sub("", value)
def _field_type_tokens(*values: str | None) -> set[str]:
tokens: set[str] = set()
for value in values:
if not value:
continue
raw = str(value).strip()
if not raw:
continue
tokens.add(raw.upper())
tokens.add(raw.replace("_", "").replace("-", "").upper())
return tokens
def _is_form_field_begin(ctrl: Any, field_begin: Any) -> bool:
tokens = _field_type_tokens(
ctrl.get("type"),
field_begin.get("type"),
field_begin.get("name"),
field_begin.get("fieldName"),
field_begin.get("fieldname"),
)
if tokens & _FORM_FIELD_EXCLUDED_TYPES:
return False
if tokens & _FORM_FIELD_TYPES:
return True
return (ctrl.get("type") or "").strip().upper() == "FORM"
def _field_identifier(field_begin: Any) -> str:
for attr in ("id", "fieldid", "name", "fieldName", "fieldname"):
value = (field_begin.get(attr) or "").strip()
if value:
return value
return ""
def _field_end_matches(field_begin: Any, field_end: Any) -> bool:
begin_keys = {
value
for value in (
field_begin.get("id"),
field_begin.get("fieldid"),
field_begin.get("name"),
)
if value
}
end_keys = {
value
for value in (
field_end.get("beginIDRef"),
field_end.get("fieldid"),
field_end.get("id"),
)
if value
}
if begin_keys and end_keys:
return bool(begin_keys & end_keys)
return not begin_keys
def _field_parameters(field_begin: Any) -> list[dict[str, str]]:
parameters: list[dict[str, str]] = []
for node in field_begin.iter():
if not _local_name(node).endswith("Param"):
continue
name = (node.get("name") or "").strip()
value = "".join(node.itertext()).strip()
if name or value:
parameters.append({"name": name, "value": value})
return parameters
def _first_attr(element: Any, names: Sequence[str]) -> str:
for name in names:
value = (element.get(name) or "").strip()
if value:
return value
return ""
def _field_parameter_value(parameters: Sequence[dict[str, str]], *names: str) -> str:
wanted = {name.casefold() for name in names}
for item in parameters:
name = item.get("name", "").casefold()
value = item.get("value", "").strip()
if name in wanted and value:
return value
return ""
def _clear_form_field_layout_cache(paragraph: Any) -> int:
removed = 0
for child in list(paragraph):
if _local_name(child).lower() == "linesegarray":
paragraph.remove(child)
removed += 1
return removed
def _normalize_page_orientation(value: str | None) -> str | None:
if value is None:
return None
normalized = value.strip().upper()
aliases = {
"PORTRAIT": "PORTRAIT",
"NARROW": "PORTRAIT",
"NARROWLY": "PORTRAIT",
"LANDSCAPE": "WIDELY",
"WIDE": "WIDELY",
"WIDELY": "WIDELY",
}
orientation = aliases.get(normalized)
if orientation is None:
raise ValueError(f"unsupported page orientation: {value}")
return orientation
def _png_dimensions(image_data: bytes) -> tuple[int, int] | None:
if len(image_data) < 24 or not image_data.startswith(b"\x89PNG\r\n\x1a\n"):
return None
width = int.from_bytes(image_data[16:20], "big")
height = int.from_bytes(image_data[20:24], "big")
if width <= 0 or height <= 0:
return None
return width, height
def _bin_data_stem(value: Any) -> str | None:
if value is None:
return None
raw = str(value).strip()
if not raw:
return None
stem = PurePosixPath(raw).stem
return stem or None
def _write_bytes_atomically(path: str | PathLike[str], data: bytes) -> None:
target = Path(path)
fd, tmp_path = tempfile.mkstemp(dir=str(target.parent), suffix=".hwpx.tmp")
try:
with os.fdopen(fd, "wb") as tmp_fh:
tmp_fh.write(data)
os.replace(tmp_path, str(target))
except BaseException:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
def _capture_stream_checkpoint(stream: BinaryIO) -> tuple[int, bytes] | None:
try:
position = stream.tell()
except (AttributeError, OSError):
return None
try:
tail = stream.read()
except (AttributeError, OSError):
try:
end_position = stream.seek(0, os.SEEK_END)
except (AttributeError, OSError):
return None
try:
stream.seek(position)
except (AttributeError, OSError):
return None
if end_position == position:
return position, b""
return None
try:
stream.seek(position)
except (AttributeError, OSError):
return None
return position, tail
def _rollback_stream(stream: BinaryIO, checkpoint: tuple[int, bytes] | None) -> None:
if checkpoint is None:
return
position, tail = checkpoint
try:
stream.seek(position)
if tail:
stream.write(tail)
stream.truncate(position + len(tail))
else:
stream.truncate(position)
stream.seek(position)
except (AttributeError, OSError):
return
def _write_stream_or_rollback(stream: BinaryIO, data: bytes) -> None:
checkpoint = _capture_stream_checkpoint(stream)
if checkpoint is None:
raise OSError(
"HWPX stream save requires a checkpointable stream; "
"use save_to_path() for non-seekable outputs"
)
try:
written = stream.write(data)
if written is not None and written != len(data):
raise OSError(
"short write while saving HWPX stream: "
f"wrote {written} of {len(data)} bytes"
)
except BaseException:
_rollback_stream(stream, checkpoint)
raise
def _summarize_validation_issues(issues: Sequence[Any], *, limit: int = 5) -> str:
selected = [str(issue) for issue in issues[:limit]]
remaining = len(issues) - len(selected)
summary = "; ".join(selected)
if remaining > 0:
summary += f" ... and {remaining} more"
return summary
class HwpxDocument:
"""Provides a user-friendly API for editing HWPX documents."""
def __init__(
self,
package: HwpxPackage,
root: HwpxOxmlDocument,
*,
managed_resources: tuple[Any, ...] = (),
validate_on_save: bool = False,
):
self._package = package
self._root = root
self._managed_resources = list(managed_resources)
self._closed = False
self.validate_on_save = validate_on_save
def __repr__(self) -> str:
"""Return a compact and safe summary of the document state."""
return (
f"{self.__class__.__name__}("
f"sections={len(self.sections)}, "
f"paragraphs={len(self.paragraphs)}, "
f"headers={len(self.headers)}, "
f"master_pages={len(self.master_pages)}, "
f"histories={len(self.histories)}, "
f"closed={self._closed}"
")"
)
# ------------------------------------------------------------------
# construction helpers
@classmethod
def open(
cls,
source: str | PathLike[str] | bytes | BinaryIO,
) -> "HwpxDocument":
"""Open *source* and return a :class:`HwpxDocument` instance.
Raises:
HwpxStructureError: 필수 파일이나 구조가 올바르지 않은 HWPX를 열 때 발생합니다.
HwpxPackageError: 패키지를 여는 과정에서 일반적인 I/O/포맷 오류가 발생하면 전달됩니다.
"""
internal_resources: list[Any] = []
open_source = source
if isinstance(source, bytes):
stream = io.BytesIO(source)
open_source = stream
internal_resources.append(stream)
package = HwpxPackage.open(open_source)
root = HwpxOxmlDocument.from_package(package)
return cls(package, root, managed_resources=tuple(internal_resources))
@classmethod
def new(cls) -> "HwpxDocument":
"""Return a new blank document based on the default skeleton template."""
return cls.open(blank_document_bytes())
@classmethod
def from_package(cls, package: HwpxPackage) -> "HwpxDocument":
"""Create a document backed by an existing :class:`HwpxPackage`.
Args:
package: :class:`hwpx.opc.package.HwpxPackage` 인스턴스.
"""
root = HwpxOxmlDocument.from_package(package)
return cls(package, root)
def __enter__(self) -> "HwpxDocument":
"""컨텍스트 매니저 진입 시 현재 문서 인스턴스를 반환합니다."""
return self
def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> bool:
"""예외 발생 여부와 무관하게 내부 자원을 안전하게 정리합니다."""
self.close()
return False
def close(self) -> None:
"""문서가 관리하는 내부 패키지/스트림 자원을 정리합니다.
정리 정책:
- ``flush()`` 가능한 자원은 먼저 flush를 시도합니다.
- ``close()`` 가능한 자원은 flush 이후 close를 시도합니다.
- flush/close 중 발생한 예외는 로깅하고 무시하여 정리 루틴을 계속 진행합니다.
- 같은 문서에서 ``close()``를 여러 번 호출해도 안전합니다.
"""
if self._closed:
return
self._flush_resource(self._package)
for resource in self._managed_resources:
self._flush_resource(resource)
self._close_resource(self._package)
for resource in self._managed_resources:
self._close_resource(resource)
self._managed_resources.clear()
self._closed = True
@staticmethod
def _flush_resource(resource: Any) -> None:
flush = getattr(resource, "flush", None)
if not callable(flush):
return
try:
flush()
except Exception:
logger.debug("자원 flush 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
@staticmethod
def _close_resource(resource: Any) -> None:
close = getattr(resource, "close", None)
if not callable(close):
return
try:
close()
except Exception:
logger.debug("자원 close 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
# ------------------------------------------------------------------
# properties exposing document content
@property
def package(self) -> HwpxPackage:
"""Return the :class:`HwpxPackage` backing this document."""
return self._package
@property
def oxml(self) -> HwpxOxmlDocument:
"""Return the low-level XML object tree representing the document."""
return self._root
@property
def sections(self) -> list[HwpxOxmlSection]:
"""Return the sections contained in the document."""
return self._root.sections
@property
def headers(self) -> list[HwpxOxmlHeader]:
"""Return the header parts referenced by the document."""
return self._root.headers
@property
def master_pages(self) -> list[HwpxOxmlMasterPage]:
"""Return the master-page parts declared in the manifest."""
return self._root.master_pages
@property
def histories(self) -> list[HwpxOxmlHistory]:
"""Return document history parts referenced by the manifest."""
return self._root.histories
@property
def version(self) -> HwpxOxmlVersion | None:
"""Return the version metadata part if present."""
return self._root.version
@property
def border_fills(self) -> dict[str, GenericElement]:
"""Return border fill definitions declared in the headers."""
return self._root.border_fills
def border_fill(self, border_fill_id_ref: int | str | None) -> GenericElement | None:
"""Return the border fill definition referenced by *border_fill_id_ref*."""
return self._root.border_fill(border_fill_id_ref)
@property
def memo_shapes(self) -> dict[str, MemoShape]:
"""Return memo shapes available in the header reference lists."""
return self._root.memo_shapes
def memo_shape(self, memo_shape_id_ref: int | str | None) -> MemoShape | None:
"""Return the memo shape definition referenced by *memo_shape_id_ref*."""
return self._root.memo_shape(memo_shape_id_ref)
@property
def bullets(self) -> dict[str, Bullet]:
"""Return bullet definitions declared in header reference lists."""
return self._root.bullets
def bullet(self, bullet_id_ref: int | str | None) -> Bullet | None:
"""Return the bullet definition referenced by *bullet_id_ref*."""
return self._root.bullet(bullet_id_ref)
@property
def paragraph_properties(self) -> dict[str, ParagraphProperty]:
"""Return paragraph property definitions declared in headers."""
return self._root.paragraph_properties
def paragraph_property(
self, para_pr_id_ref: int | str | None
) -> ParagraphProperty | None:
"""Return the paragraph property referenced by *para_pr_id_ref*."""
return self._root.paragraph_property(para_pr_id_ref)
def ensure_numbering(
self,
*,
kind: str,
levels: Sequence[dict[str, str]] | None = None,
) -> list[str]:
"""Return paragraph property ids for bullet or numbered-list levels."""
return self._root.ensure_numbering(kind=kind, levels=levels)
@property
def styles(self) -> dict[str, Style]:
"""Return style definitions available in the document."""
return self._root.styles
def style(self, style_id_ref: int | str | None) -> Style | None:
"""Return the style definition referenced by *style_id_ref*."""
return self._root.style(style_id_ref)
@property
def track_changes(self) -> dict[str, TrackChange]:
"""Return tracked change metadata declared in the headers."""
return self._root.track_changes
def track_change(self, change_id_ref: int | str | None) -> TrackChange | None:
"""Return tracked change metadata referenced by *change_id_ref*."""
return self._root.track_change(change_id_ref)
@property
def track_change_authors(self) -> dict[str, TrackChangeAuthor]:
"""Return tracked change author metadata declared in the headers."""
return self._root.track_change_authors
def track_change_author(
self, author_id_ref: int | str | None
) -> TrackChangeAuthor | None:
"""Return tracked change author details referenced by *author_id_ref*."""
return self._root.track_change_author(author_id_ref)
@property
def memos(self) -> list[HwpxOxmlMemo]:
"""Return all memo entries declared in every section."""
memos: list[HwpxOxmlMemo] = []
for section in self._root.sections:
memos.extend(section.memos)
return memos
def add_memo(
self,
text: str = "",
*,
section: HwpxOxmlSection | None = None,
section_index: int | None = None,
memo_shape_id_ref: str | int | None = None,
memo_id: str | None = None,
char_pr_id_ref: str | int | None = None,
attributes: dict[str, str] | None = None,
) -> HwpxOxmlMemo:
"""Create a memo entry inside *section* (or the last section by default)."""
if section is None and section_index is not None:
section = self._root.sections[section_index]
if section is None:
if not self._root.sections:
raise ValueError("document does not contain any sections")
section = self._root.sections[-1]
return section.add_memo(
text,
memo_shape_id_ref=memo_shape_id_ref,
memo_id=memo_id,
char_pr_id_ref=char_pr_id_ref,
attributes=attributes,
)
def remove_memo(self, memo: HwpxOxmlMemo) -> None:
"""Remove *memo* from the section it belongs to."""
memo.remove()
def attach_memo_field(
self,
paragraph: HwpxOxmlParagraph,
memo: HwpxOxmlMemo,
*,
field_id: str | None = None,
author: str | None = None,
created: datetime | str | None = None,
number: int = 1,
char_pr_id_ref: str | int | None = None,
) -> str:
"""Attach a MEMO field control to *paragraph* so Hangul shows *memo*."""
if paragraph.section is None:
raise ValueError("paragraph must belong to a section before anchoring a memo")
if memo.group.section is None:
raise ValueError("memo is not attached to a section")
field_value = field_id or uuid.uuid4().hex
author_value = author or memo.attributes.get("author") or ""
created_value = created if created is not None else memo.attributes.get("createDateTime")
if isinstance(created_value, datetime):
created_value = created_value.strftime("%Y-%m-%d %H:%M:%S")
elif created_value is None:
created_value = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
else:
created_value = str(created_value)
memo_shape_id = memo.memo_shape_id_ref or ""
char_ref = char_pr_id_ref
if char_ref is None:
char_ref = paragraph.char_pr_id_ref
if char_ref is None:
char_ref = memo._infer_char_pr_id_ref()
if char_ref is None:
char_ref = "0"
char_ref = str(char_ref)
paragraph_element = paragraph.element
run_begin = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
ctrl_begin = _append_element(run_begin, f"{_HP}ctrl")
field_begin = _append_element(
ctrl_begin,
f"{_HP}fieldBegin",
{
"id": field_value,
"type": "MEMO",
"editable": "true",
"dirty": "false",
"fieldid": field_value,
},
)
parameters = _append_element(field_begin, f"{_HP}parameters", {"count": "5", "name": ""})
_append_element(parameters, f"{_HP}stringParam", {"name": "ID"}).text = memo.id or ""
_append_element(parameters, f"{_HP}integerParam", {"name": "Number"}).text = str(max(1, number))
_append_element(parameters, f"{_HP}stringParam", {"name": "CreateDateTime"}).text = created_value
_append_element(parameters, f"{_HP}stringParam", {"name": "Author"}).text = author_value
_append_element(parameters, f"{_HP}stringParam", {"name": "MemoShapeID"}).text = memo_shape_id
sub_list = _append_element(
field_begin,
f"{_HP}subList",
{
"id": f"memo-field-{memo.id or field_value}",
"textDirection": "HORIZONTAL",
"lineWrap": "BREAK",
"vertAlign": "TOP",
},
)
sub_para = _append_element(
sub_list,
f"{_HP}p",
{
"id": f"memo-field-{(memo.id or field_value)}-p",
"paraPrIDRef": "0",
"styleIDRef": "0",
"pageBreak": "0",
"columnBreak": "0",
"merged": "0",
},
)
sub_run = _append_element(sub_para, f"{_HP}run", {"charPrIDRef": char_ref})
_append_element(sub_run, f"{_HP}t").text = memo.id or field_value
run_end = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
ctrl_end = _append_element(run_end, f"{_HP}ctrl")
_append_element(ctrl_end, f"{_HP}fieldEnd", {"beginIDRef": field_value, "fieldid": field_value})
paragraph.element.insert(0, run_begin)
paragraph.element.append(run_end)
paragraph.section.mark_dirty()
return field_value
def add_memo_with_anchor(
self,
text: str = "",
*,
paragraph: HwpxOxmlParagraph | None = None,
section: HwpxOxmlSection | None = None,
section_index: int | None = None,
paragraph_text: str | None = None,
memo_shape_id_ref: str | int | None = None,
memo_id: str | None = None,
char_pr_id_ref: str | int | None = None,
attributes: dict[str, str] | None = None,
field_id: str | None = None,
author: str | None = None,
created: datetime | str | None = None,
number: int = 1,
anchor_char_pr_id_ref: str | int | None = None,
) -> tuple[HwpxOxmlMemo, HwpxOxmlParagraph, str]:
"""Create a memo and ensure it is visible by anchoring a MEMO field."""
memo = self.add_memo(
text,
section=section,
section_index=section_index,
memo_shape_id_ref=memo_shape_id_ref,
memo_id=memo_id,
char_pr_id_ref=char_pr_id_ref,
attributes=attributes,
)
target_paragraph = paragraph
if target_paragraph is None:
memo_section = memo.group.section
if memo_section is None:
raise ValueError("memo must belong to a section")
paragraph_value = "" if paragraph_text is None else paragraph_text
anchor_char = anchor_char_pr_id_ref or char_pr_id_ref
target_paragraph = self.add_paragraph(
paragraph_value,
section=memo_section,
char_pr_id_ref=anchor_char,
)
elif paragraph_text is not None:
target_paragraph.text = paragraph_text
field_value = self.attach_memo_field(
target_paragraph,
memo,
field_id=field_id,
author=author,
created=created,
number=number,
char_pr_id_ref=anchor_char_pr_id_ref,
)
return memo, target_paragraph, field_value
def remove_paragraph(
self,
paragraph: HwpxOxmlParagraph | int,
*,
section: HwpxOxmlSection | None = None,
section_index: int | None = None,
) -> None:
"""Remove a paragraph from the document.
*paragraph* may be a :class:`HwpxOxmlParagraph` instance or an
integer index into the paragraphs of the specified (or last)
section.
Raises ``ValueError`` if the target section would become empty.
"""
self._root.remove_paragraph(
paragraph,
section=section,
section_index=section_index,
)
def add_section(self, *, after: int | None = None) -> HwpxOxmlSection:
"""Append a new empty section to the document.
If *after* is given, the section is inserted after the section at
that index. Returns the newly created section.
"""
return self._root.add_section(after=after)
def remove_section(
self, section: HwpxOxmlSection | int,
) -> None:
"""Remove a section from the document.
Raises ``ValueError`` if the document would have no sections left.
"""
self._root.remove_section(section)
@property
def paragraphs(self) -> list[HwpxOxmlParagraph]:
"""Return all paragraphs across every section."""
return self._root.paragraphs
@property
def char_properties(self) -> dict[str, RunStyle]:
"""Return the resolved character style definitions available to the document."""
return self._root.char_properties
def char_property(self, char_pr_id_ref: int | str | None) -> RunStyle | None:
"""Return the style referenced by *char_pr_id_ref* if known."""
return self._root.char_property(char_pr_id_ref)
def ensure_run_style(
self,
*,
bold: bool = False,
italic: bool = False,
underline: bool = False,
color: str | None = None,
font: str | None = None,
size: int | float | None = None,
highlight: str | None = None,
strike: bool | None = None,
base_char_pr_id: str | int | None = None,
) -> str:
"""Return a ``charPr`` identifier matching the requested flags."""
return self._root.ensure_run_style(
bold=bold,
italic=italic,
underline=underline,
color=color,
font=font,
size=size,
highlight=highlight,
strike=strike,
base_char_pr_id=base_char_pr_id,
)
def iter_runs(self) -> Iterator[HwpxOxmlRun]:
"""Yield every run element contained in the document."""
for paragraph in self.paragraphs:
for run in paragraph.runs:
yield run
def find_runs_by_style(
self,
*,
text_color: str | None = None,
underline_type: str | None = None,
underline_color: str | None = None,
char_pr_id_ref: str | int | None = None,
) -> list[HwpxOxmlRun]:
"""Return runs matching the requested style criteria."""
matches: list[HwpxOxmlRun] = []
target_char = str(char_pr_id_ref).strip() if char_pr_id_ref is not None else None
for run in self.iter_runs():
if target_char is not None:
run_char = (run.char_pr_id_ref or "").strip()
if run_char != target_char:
continue
style = run.style
if text_color is not None:
if style is None or style.text_color() != text_color:
continue
if underline_type is not None:
if style is None or style.underline_type() != underline_type:
continue
if underline_color is not None:
if style is None or style.underline_color() != underline_color:
continue
matches.append(run)
return matches
def replace_text_in_runs(
self,
search: str,
replacement: str,
*,
text_color: str | None = None,
underline_type: str | None = None,
underline_color: str | None = None,
char_pr_id_ref: str | int | None = None,
limit: int | None = None,
) -> int:
"""Replace occurrences of *search* in runs matching the provided style filters."""
if not search:
raise ValueError("search must be a non-empty string")
replacements = 0
runs = self.find_runs_by_style(
text_color=text_color,
underline_type=underline_type,
underline_color=underline_color,
char_pr_id_ref=char_pr_id_ref,
)
for run in runs:
remaining = None
if limit is not None:
remaining = limit - replacements
if remaining <= 0:
break
original_char_pr = run.char_pr_id_ref
replaced_here = run.replace_text(
search,
replacement,
count=remaining,
)
if replaced_here and original_char_pr is not None:
# Ensure the run retains its original formatting reference even
# if XML nodes were rewritten during substitution.
run.char_pr_id_ref = original_char_pr
replacements += replaced_here
if limit is not None and replacements >= limit:
break
return replacements
# ------------------------------------------------------------------
# editing helpers
def add_paragraph(
self,
text: str = "",
*,
section: HwpxOxmlSection | None = None,
section_index: int | None = None,
para_pr_id_ref: str | int | None = None,
style_id_ref: str | int | None = None,
char_pr_id_ref: str | int | None = None,
run_attributes: dict[str, str] | None = None,
include_run: bool = True,
inherit_style: bool = True,
**extra_attrs: str,
) -> HwpxOxmlParagraph:
"""Append a paragraph to the document and return it.
When *inherit_style* is ``True`` (the default) and no explicit
style references are given, the new paragraph inherits
``paraPrIDRef``, ``styleIDRef`` and ``charPrIDRef`` from the
last paragraph in the target section so that consecutive
paragraphs share the same formatting.
Formatting references may be overridden via ``para_pr_id_ref``,
``style_id_ref`` and ``char_pr_id_ref``. Any additional keyword
arguments are added as raw paragraph attributes.
"""
return self._root.add_paragraph(
text,
section=section,
section_index=section_index,
para_pr_id_ref=para_pr_id_ref,
style_id_ref=style_id_ref,
char_pr_id_ref=char_pr_id_ref,