Skip to content

Commit 5f6f380

Browse files
author
Lincoln Stein
committed
merge with main
2 parents 1a7fb60 + 25b8dd3 commit 5f6f380

132 files changed

Lines changed: 4274 additions & 680 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

invokeai/app/invocations/compel.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from invokeai.app.invocations.util.choose_model import choose_model
55
from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext, InvocationConfig
6+
from ...backend.prompting.conditioning import try_parse_legacy_blend
67

78
from ...backend.util.devices import choose_torch_device, torch_dtype
89
from ...backend.stable_diffusion.diffusion import InvokeAIDiffuserComponent
@@ -13,7 +14,7 @@
1314
Blend,
1415
CrossAttentionControlSubstitute,
1516
FlattenedPrompt,
16-
Fragment,
17+
Fragment, Conjunction,
1718
)
1819

1920

@@ -93,25 +94,22 @@ def load_huggingface_concepts(concepts: list[str]):
9394
text_encoder=text_encoder,
9495
textual_inversion_manager=pipeline.textual_inversion_manager,
9596
dtype_for_device_getter=torch_dtype,
96-
truncate_long_prompts=True, # TODO:
97+
truncate_long_prompts=False,
9798
)
9899

99-
# TODO: support legacy blend?
100-
101-
conjunction = Compel.parse_prompt_string(prompt_str)
102-
prompt: Union[FlattenedPrompt, Blend] = conjunction.prompts[0]
100+
legacy_blend = try_parse_legacy_blend(prompt_str, skip_normalize=False)
101+
if legacy_blend is not None:
102+
conjunction = legacy_blend
103+
else:
104+
conjunction = Compel.parse_prompt_string(prompt_str)
103105

104106
if context.services.configuration.log_tokenization:
105-
log_tokenization_for_prompt_object(prompt, tokenizer)
106-
107-
c, options = compel.build_conditioning_tensor_for_prompt_object(prompt)
107+
log_tokenization_for_conjunction(conjunction, tokenizer)
108108

109-
# TODO: long prompt support
110-
#if not self.truncate_long_prompts:
111-
# [c, uc] = compel.pad_conditioning_tensors_to_same_length([c, uc])
109+
c, options = compel.build_conditioning_tensor_for_conjunction(conjunction)
112110

113111
ec = InvokeAIDiffuserComponent.ExtraConditioningInfo(
114-
tokens_count_including_eos_bos=get_max_token_count(tokenizer, prompt),
112+
tokens_count_including_eos_bos=get_max_token_count(tokenizer, conjunction),
115113
cross_attention_control_args=options.get("cross_attention_control", None),
116114
)
117115

@@ -128,14 +126,22 @@ def load_huggingface_concepts(concepts: list[str]):
128126

129127

130128
def get_max_token_count(
131-
tokenizer, prompt: Union[FlattenedPrompt, Blend], truncate_if_too_long=False
129+
tokenizer, prompt: Union[FlattenedPrompt, Blend, Conjunction], truncate_if_too_long=False
132130
) -> int:
133131
if type(prompt) is Blend:
134132
blend: Blend = prompt
135133
return max(
136134
[
137-
get_max_token_count(tokenizer, c, truncate_if_too_long)
138-
for c in blend.prompts
135+
get_max_token_count(tokenizer, p, truncate_if_too_long)
136+
for p in blend.prompts
137+
]
138+
)
139+
elif type(prompt) is Conjunction:
140+
conjunction: Conjunction = prompt
141+
return sum(
142+
[
143+
get_max_token_count(tokenizer, p, truncate_if_too_long)
144+
for p in conjunction.prompts
139145
]
140146
)
141147
else:
@@ -170,6 +176,22 @@ def get_tokens_for_prompt_object(
170176
return tokens
171177

172178

179+
def log_tokenization_for_conjunction(
180+
c: Conjunction, tokenizer, display_label_prefix=None
181+
):
182+
display_label_prefix = display_label_prefix or ""
183+
for i, p in enumerate(c.prompts):
184+
if len(c.prompts)>1:
185+
this_display_label_prefix = f"{display_label_prefix}(conjunction part {i + 1}, weight={c.weights[i]})"
186+
else:
187+
this_display_label_prefix = display_label_prefix
188+
log_tokenization_for_prompt_object(
189+
p,
190+
tokenizer,
191+
display_label_prefix=this_display_label_prefix
192+
)
193+
194+
173195
def log_tokenization_for_prompt_object(
174196
p: Union[Blend, FlattenedPrompt], tokenizer, display_label_prefix=None
175197
):

invokeai/app/invocations/controlnet_image_processors.py

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,13 @@
9494
CONTROLNET_NAME_VALUES = Literal[tuple(CONTROLNET_DEFAULT_MODELS)]
9595

9696
class ControlField(BaseModel):
97-
image: ImageField = Field(default=None, description="processed image")
98-
control_model: Optional[str] = Field(default=None, description="control model used")
99-
control_weight: Optional[float] = Field(default=1, description="weight given to controlnet")
97+
image: ImageField = Field(default=None, description="The control image")
98+
control_model: Optional[str] = Field(default=None, description="The ControlNet model to use")
99+
control_weight: Optional[float] = Field(default=1, description="The weight given to the ControlNet")
100100
begin_step_percent: float = Field(default=0, ge=0, le=1,
101-
description="% of total steps at which controlnet is first applied")
101+
description="When the ControlNet is first applied (% of total steps)")
102102
end_step_percent: float = Field(default=1, ge=0, le=1,
103-
description="% of total steps at which controlnet is last applied")
103+
description="When the ControlNet is last applied (% of total steps)")
104104

105105
class Config:
106106
schema_extra = {
@@ -112,7 +112,7 @@ class ControlOutput(BaseInvocationOutput):
112112
"""node output for ControlNet info"""
113113
# fmt: off
114114
type: Literal["control_output"] = "control_output"
115-
control: ControlField = Field(default=None, description="The control info dict")
115+
control: ControlField = Field(default=None, description="The output control image")
116116
# fmt: on
117117

118118

@@ -121,15 +121,15 @@ class ControlNetInvocation(BaseInvocation):
121121
# fmt: off
122122
type: Literal["controlnet"] = "controlnet"
123123
# Inputs
124-
image: ImageField = Field(default=None, description="image to process")
124+
image: ImageField = Field(default=None, description="The control image")
125125
control_model: CONTROLNET_NAME_VALUES = Field(default="lllyasviel/sd-controlnet-canny",
126-
description="control model used")
127-
control_weight: float = Field(default=1.0, ge=0, le=1, description="weight given to controlnet")
126+
description="The ControlNet model to use")
127+
control_weight: float = Field(default=1.0, ge=0, le=1, description="The weight given to the ControlNet")
128128
# TODO: add support in backend core for begin_step_percent, end_step_percent, guess_mode
129129
begin_step_percent: float = Field(default=0, ge=0, le=1,
130-
description="% of total steps at which controlnet is first applied")
130+
description="When the ControlNet is first applied (% of total steps)")
131131
end_step_percent: float = Field(default=1, ge=0, le=1,
132-
description="% of total steps at which controlnet is last applied")
132+
description="When the ControlNet is last applied (% of total steps)")
133133
# fmt: on
134134

135135

@@ -152,7 +152,7 @@ class ImageProcessorInvocation(BaseInvocation, PILInvocationConfig):
152152
# fmt: off
153153
type: Literal["image_processor"] = "image_processor"
154154
# Inputs
155-
image: ImageField = Field(default=None, description="image to process")
155+
image: ImageField = Field(default=None, description="The image to process")
156156
# fmt: on
157157

158158

@@ -204,8 +204,8 @@ class CannyImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfi
204204
# fmt: off
205205
type: Literal["canny_image_processor"] = "canny_image_processor"
206206
# Input
207-
low_threshold: float = Field(default=100, ge=0, description="low threshold of Canny pixel gradient")
208-
high_threshold: float = Field(default=200, ge=0, description="high threshold of Canny pixel gradient")
207+
low_threshold: int = Field(default=100, ge=0, le=255, description="The low threshold of the Canny pixel gradient (0-255)")
208+
high_threshold: int = Field(default=200, ge=0, le=255, description="The high threshold of the Canny pixel gradient (0-255)")
209209
# fmt: on
210210

211211
def run_processor(self, image):
@@ -214,16 +214,16 @@ def run_processor(self, image):
214214
return processed_image
215215

216216

217-
class HedImageprocessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
217+
class HedImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
218218
"""Applies HED edge detection to image"""
219219
# fmt: off
220220
type: Literal["hed_image_processor"] = "hed_image_processor"
221221
# Inputs
222-
detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
223-
image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
222+
detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
223+
image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
224224
# safe not supported in controlnet_aux v0.0.3
225225
# safe: bool = Field(default=False, description="whether to use safe mode")
226-
scribble: bool = Field(default=False, description="whether to use scribble mode")
226+
scribble: bool = Field(default=False, description="Whether to use scribble mode")
227227
# fmt: on
228228

229229
def run_processor(self, image):
@@ -243,9 +243,9 @@ class LineartImageProcessorInvocation(ImageProcessorInvocation, PILInvocationCon
243243
# fmt: off
244244
type: Literal["lineart_image_processor"] = "lineart_image_processor"
245245
# Inputs
246-
detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
247-
image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
248-
coarse: bool = Field(default=False, description="whether to use coarse mode")
246+
detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
247+
image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
248+
coarse: bool = Field(default=False, description="Whether to use coarse mode")
249249
# fmt: on
250250

251251
def run_processor(self, image):
@@ -262,8 +262,8 @@ class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation, PILInvocati
262262
# fmt: off
263263
type: Literal["lineart_anime_image_processor"] = "lineart_anime_image_processor"
264264
# Inputs
265-
detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
266-
image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
265+
detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
266+
image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
267267
# fmt: on
268268

269269
def run_processor(self, image):
@@ -280,9 +280,9 @@ class OpenposeImageProcessorInvocation(ImageProcessorInvocation, PILInvocationCo
280280
# fmt: off
281281
type: Literal["openpose_image_processor"] = "openpose_image_processor"
282282
# Inputs
283-
hand_and_face: bool = Field(default=False, description="whether to use hands and face mode")
284-
detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
285-
image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
283+
hand_and_face: bool = Field(default=False, description="Whether to use hands and face mode")
284+
detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
285+
image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
286286
# fmt: on
287287

288288
def run_processor(self, image):
@@ -300,8 +300,8 @@ class MidasDepthImageProcessorInvocation(ImageProcessorInvocation, PILInvocation
300300
# fmt: off
301301
type: Literal["midas_depth_image_processor"] = "midas_depth_image_processor"
302302
# Inputs
303-
a_mult: float = Field(default=2.0, ge=0, description="Midas parameter a = amult * PI")
304-
bg_th: float = Field(default=0.1, ge=0, description="Midas parameter bg_th")
303+
a_mult: float = Field(default=2.0, ge=0, description="Midas parameter `a_mult` (a = a_mult * PI)")
304+
bg_th: float = Field(default=0.1, ge=0, description="Midas parameter `bg_th`")
305305
# depth_and_normal not supported in controlnet_aux v0.0.3
306306
# depth_and_normal: bool = Field(default=False, description="whether to use depth and normal mode")
307307
# fmt: on
@@ -322,8 +322,8 @@ class NormalbaeImageProcessorInvocation(ImageProcessorInvocation, PILInvocationC
322322
# fmt: off
323323
type: Literal["normalbae_image_processor"] = "normalbae_image_processor"
324324
# Inputs
325-
detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
326-
image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
325+
detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
326+
image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
327327
# fmt: on
328328

329329
def run_processor(self, image):
@@ -339,10 +339,10 @@ class MlsdImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig
339339
# fmt: off
340340
type: Literal["mlsd_image_processor"] = "mlsd_image_processor"
341341
# Inputs
342-
detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
343-
image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
344-
thr_v: float = Field(default=0.1, ge=0, description="MLSD parameter thr_v")
345-
thr_d: float = Field(default=0.1, ge=0, description="MLSD parameter thr_d")
342+
detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
343+
image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
344+
thr_v: float = Field(default=0.1, ge=0, description="MLSD parameter `thr_v`")
345+
thr_d: float = Field(default=0.1, ge=0, description="MLSD parameter `thr_d`")
346346
# fmt: on
347347

348348
def run_processor(self, image):
@@ -360,10 +360,10 @@ class PidiImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig
360360
# fmt: off
361361
type: Literal["pidi_image_processor"] = "pidi_image_processor"
362362
# Inputs
363-
detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
364-
image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
365-
safe: bool = Field(default=False, description="whether to use safe mode")
366-
scribble: bool = Field(default=False, description="whether to use scribble mode")
363+
detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
364+
image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
365+
safe: bool = Field(default=False, description="Whether to use safe mode")
366+
scribble: bool = Field(default=False, description="Whether to use scribble mode")
367367
# fmt: on
368368

369369
def run_processor(self, image):
@@ -381,11 +381,11 @@ class ContentShuffleImageProcessorInvocation(ImageProcessorInvocation, PILInvoca
381381
# fmt: off
382382
type: Literal["content_shuffle_image_processor"] = "content_shuffle_image_processor"
383383
# Inputs
384-
detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
385-
image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
386-
h: Union[int | None] = Field(default=512, ge=0, description="content shuffle h parameter")
387-
w: Union[int | None] = Field(default=512, ge=0, description="content shuffle w parameter")
388-
f: Union[int | None] = Field(default=256, ge=0, description="cont")
384+
detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
385+
image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
386+
h: Union[int, None] = Field(default=512, ge=0, description="Content shuffle `h` parameter")
387+
w: Union[int, None] = Field(default=512, ge=0, description="Content shuffle `w` parameter")
388+
f: Union[int, None] = Field(default=256, ge=0, description="Content shuffle `f` parameter")
389389
# fmt: on
390390

391391
def run_processor(self, image):
@@ -418,8 +418,8 @@ class MediapipeFaceProcessorInvocation(ImageProcessorInvocation, PILInvocationCo
418418
# fmt: off
419419
type: Literal["mediapipe_face_processor"] = "mediapipe_face_processor"
420420
# Inputs
421-
max_faces: int = Field(default=1, ge=1, description="maximum number of faces to detect")
422-
min_confidence: float = Field(default=0.5, ge=0, le=1, description="minimum confidence for face detection")
421+
max_faces: int = Field(default=1, ge=1, description="Maximum number of faces to detect")
422+
min_confidence: float = Field(default=0.5, ge=0, le=1, description="Minimum confidence for face detection")
423423
# fmt: on
424424

425425
def run_processor(self, image):

invokeai/app/invocations/latent.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import einops
55
from typing import Literal, Optional, Union, List
66

7+
from compel import Compel
78
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import MultiControlNetModel
89

910
from pydantic import BaseModel, Field, validator
@@ -233,6 +234,15 @@ def get_conditioning_data(self, context: InvocationContext, model: StableDiffusi
233234
c, extra_conditioning_info = context.services.latents.get(self.positive_conditioning.conditioning_name)
234235
uc, _ = context.services.latents.get(self.negative_conditioning.conditioning_name)
235236

237+
compel = Compel(
238+
tokenizer=model.tokenizer,
239+
text_encoder=model.text_encoder,
240+
textual_inversion_manager=model.textual_inversion_manager,
241+
dtype_for_device_getter=torch_dtype,
242+
truncate_long_prompts=False,
243+
)
244+
[c, uc] = compel.pad_conditioning_tensors_to_same_length([c, uc])
245+
236246
conditioning_data = ConditioningData(
237247
uc,
238248
c,

invokeai/backend/prompting/conditioning.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def get_uc_and_c_and_ec(prompt_string,
3838
dtype_for_device_getter=torch_dtype,
3939
truncate_long_prompts=False,
4040
)
41-
41+
4242
config = get_invokeai_config()
4343

4444
# get rid of any newline characters
@@ -282,6 +282,8 @@ def split_weighted_subprompts(text, skip_normalize=False) -> list:
282282
(match.group("prompt").replace("\\:", ":"), float(match.group("weight") or 1))
283283
for match in re.finditer(prompt_parser, text)
284284
]
285+
if len(parsed_prompts) == 0:
286+
return []
285287
if skip_normalize:
286288
return parsed_prompts
287289
weight_sum = sum(map(lambda x: x[1], parsed_prompts))

invokeai/frontend/web/docs/API_CLIENT.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ We need to start the nodes web server, which serves the OpenAPI schema to the ge
2626

2727
```bash
2828
# from the repo root
29-
python scripts/invoke-new.py --web
29+
python scripts/invokeai-web.py
3030
```
3131

32-
2. Generate the API client.
32+
2. Generate the API client.
3333

3434
```bash
3535
# from invokeai/frontend/web/

0 commit comments

Comments
 (0)