|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | | -import copy |
| 3 | +import pickle |
4 | 4 | from contextlib import contextmanager |
5 | 5 | from pathlib import Path |
6 | 6 | from typing import Any, Dict, List, Optional, Tuple, Union |
@@ -165,7 +165,13 @@ def apply_ti( |
165 | 165 | new_tokens_added = None |
166 | 166 |
|
167 | 167 | try: |
168 | | - ti_tokenizer = copy.deepcopy(tokenizer) |
| 168 | + # HACK: The CLIPTokenizer API does not include a way to remove tokens after calling add_tokens(...). As a |
| 169 | + # workaround, we create a full copy of `tokenizer` so that its original behavior can be restored after |
| 170 | + # exiting this `apply_ti(...)` context manager. |
| 171 | + # |
| 172 | + # In a previous implementation, the deep copy was obtained with `ti_tokenizer = copy.deepcopy(tokenizer)`, |
| 173 | + # but a pickle roundtrip was found to be much faster (1 sec vs. 0.05 secs). |
| 174 | + ti_tokenizer = pickle.loads(pickle.dumps(tokenizer)) |
169 | 175 | ti_manager = TextualInversionManager(ti_tokenizer) |
170 | 176 | init_tokens_count = text_encoder.resize_token_embeddings(None).num_embeddings |
171 | 177 |
|
@@ -439,7 +445,13 @@ def apply_ti( |
439 | 445 | orig_embeddings = None |
440 | 446 |
|
441 | 447 | try: |
442 | | - ti_tokenizer = copy.deepcopy(tokenizer) |
| 448 | + # HACK: The CLIPTokenizer API does not include a way to remove tokens after calling add_tokens(...). As a |
| 449 | + # workaround, we create a full copy of `tokenizer` so that its original behavior can be restored after |
| 450 | + # exiting this `apply_ti(...)` context manager. |
| 451 | + # |
| 452 | + # In a previous implementation, the deep copy was obtained with `ti_tokenizer = copy.deepcopy(tokenizer)`, |
| 453 | + # but a pickle roundtrip was found to be much faster (1 sec vs. 0.05 secs). |
| 454 | + ti_tokenizer = pickle.loads(pickle.dumps(tokenizer)) |
443 | 455 | ti_manager = TextualInversionManager(ti_tokenizer) |
444 | 456 |
|
445 | 457 | def _get_trigger(ti_name, index): |
|
0 commit comments