hitomodev
diff --git a/‎invokeai/app/invocations/compel.py‎
Lines changed: 38 additions & 16 deletions b/‎invokeai/app/invocations/compel.py‎
Lines changed: 38 additions & 16 deletions
diff --git a/‎invokeai/app/invocations/controlnet_image_processors.py‎
Lines changed: 45 additions & 45 deletions b/‎invokeai/app/invocations/controlnet_image_processors.py‎
Lines changed: 45 additions & 45 deletions
diff --git a/‎invokeai/app/invocations/latent.py‎
Lines changed: 10 additions & 0 deletions b/‎invokeai/app/invocations/latent.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎invokeai/backend/prompting/conditioning.py‎
Lines changed: 3 additions & 1 deletion b/‎invokeai/backend/prompting/conditioning.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎invokeai/frontend/web/docs/API_CLIENT.md‎
Lines changed: 2 additions & 2 deletions b/‎invokeai/frontend/web/docs/API_CLIENT.md‎
Lines changed: 2 additions & 2 deletions
@@ -3,6 +3,7 @@
 
 from invokeai.app.invocations.util.choose_model import choose_model
 from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext, InvocationConfig
+from ...backend.prompting.conditioning import try_parse_legacy_blend
 
 from ...backend.util.devices import choose_torch_device, torch_dtype
 from ...backend.stable_diffusion.diffusion import InvokeAIDiffuserComponent
@@ -13,7 +14,7 @@
     Blend,
     CrossAttentionControlSubstitute,
     FlattenedPrompt,
-    Fragment,
+    Fragment, Conjunction,
 )
 
 
@@ -93,25 +94,22 @@ def load_huggingface_concepts(concepts: list[str]):
             text_encoder=text_encoder,
             textual_inversion_manager=pipeline.textual_inversion_manager,
             dtype_for_device_getter=torch_dtype,
-            truncate_long_prompts=True, # TODO:
+            truncate_long_prompts=False,
         )
 
-        # TODO: support legacy blend?
-
-        conjunction = Compel.parse_prompt_string(prompt_str)
-        prompt: Union[FlattenedPrompt, Blend] = conjunction.prompts[0]
+        legacy_blend = try_parse_legacy_blend(prompt_str, skip_normalize=False)
+        if legacy_blend is not None:
+            conjunction = legacy_blend
+        else:
+            conjunction = Compel.parse_prompt_string(prompt_str)
 
         if context.services.configuration.log_tokenization:
-            log_tokenization_for_prompt_object(prompt, tokenizer)
-
-        c, options = compel.build_conditioning_tensor_for_prompt_object(prompt)
+            log_tokenization_for_conjunction(conjunction, tokenizer)
 
-        # TODO: long prompt support
-        #if not self.truncate_long_prompts:
-        #    [c, uc] = compel.pad_conditioning_tensors_to_same_length([c, uc])
+        c, options = compel.build_conditioning_tensor_for_conjunction(conjunction)
 
         ec = InvokeAIDiffuserComponent.ExtraConditioningInfo(
-            tokens_count_including_eos_bos=get_max_token_count(tokenizer, prompt),
+            tokens_count_including_eos_bos=get_max_token_count(tokenizer, conjunction),
             cross_attention_control_args=options.get("cross_attention_control", None),
         )
 
@@ -128,14 +126,22 @@ def load_huggingface_concepts(concepts: list[str]):
 
 
 def get_max_token_count(
-    tokenizer, prompt: Union[FlattenedPrompt, Blend], truncate_if_too_long=False
+    tokenizer, prompt: Union[FlattenedPrompt, Blend, Conjunction], truncate_if_too_long=False
 ) -> int:
     if type(prompt) is Blend:
         blend: Blend = prompt
         return max(
             [
-                get_max_token_count(tokenizer, c, truncate_if_too_long)
-                for c in blend.prompts
+                get_max_token_count(tokenizer, p, truncate_if_too_long)
+                for p in blend.prompts
+            ]
+        )
+    elif type(prompt) is Conjunction:
+        conjunction: Conjunction = prompt
+        return sum(
+            [
+                get_max_token_count(tokenizer, p, truncate_if_too_long)
+                for p in conjunction.prompts
             ]
         )
     else:
@@ -170,6 +176,22 @@ def get_tokens_for_prompt_object(
     return tokens
 
 
+def log_tokenization_for_conjunction(
+    c: Conjunction, tokenizer, display_label_prefix=None
+):
+    display_label_prefix = display_label_prefix or ""
+    for i, p in enumerate(c.prompts):
+        if len(c.prompts)>1:
+            this_display_label_prefix = f"{display_label_prefix}(conjunction part {i + 1}, weight={c.weights[i]})"
+        else:
+            this_display_label_prefix = display_label_prefix
+        log_tokenization_for_prompt_object(
+            p,
+            tokenizer,
+            display_label_prefix=this_display_label_prefix
+        )
+
+
 def log_tokenization_for_prompt_object(
     p: Union[Blend, FlattenedPrompt], tokenizer, display_label_prefix=None
 ):
 
@@ -94,13 +94,13 @@
 CONTROLNET_NAME_VALUES = Literal[tuple(CONTROLNET_DEFAULT_MODELS)]
 
 class ControlField(BaseModel):
-    image: ImageField = Field(default=None, description="processed image")
-    control_model: Optional[str] = Field(default=None, description="control model used")
-    control_weight: Optional[float] = Field(default=1, description="weight given to controlnet")
+    image: ImageField = Field(default=None, description="The control image")
+    control_model: Optional[str] = Field(default=None, description="The ControlNet model to use")
+    control_weight: Optional[float] = Field(default=1, description="The weight given to the ControlNet")
     begin_step_percent: float = Field(default=0, ge=0, le=1,
-                                                description="% of total steps at which controlnet is first applied")
+                                                description="When the ControlNet is first applied (% of total steps)")
     end_step_percent: float = Field(default=1, ge=0, le=1,
-                                    description="% of total steps at which controlnet is last applied")
+                                    description="When the ControlNet is last applied (% of total steps)")
 
     class Config:
         schema_extra = {
@@ -112,7 +112,7 @@ class ControlOutput(BaseInvocationOutput):
     """node output for ControlNet info"""
     # fmt: off
     type: Literal["control_output"] = "control_output"
-    control: ControlField = Field(default=None, description="The control info dict")
+    control: ControlField = Field(default=None, description="The output control image")
     # fmt: on
 
 
@@ -121,15 +121,15 @@ class ControlNetInvocation(BaseInvocation):
     # fmt: off
     type: Literal["controlnet"] = "controlnet"
     # Inputs
-    image: ImageField = Field(default=None, description="image to process")
+    image: ImageField = Field(default=None, description="The control image")
     control_model: CONTROLNET_NAME_VALUES = Field(default="lllyasviel/sd-controlnet-canny",
-                                                  description="control model used")
-    control_weight: float = Field(default=1.0, ge=0, le=1, description="weight given to controlnet")
+                                                  description="The ControlNet model to use")
+    control_weight: float = Field(default=1.0, ge=0, le=1, description="The weight given to the ControlNet")
     # TODO: add support in backend core for begin_step_percent, end_step_percent, guess_mode
     begin_step_percent: float = Field(default=0, ge=0, le=1,
-                                        description="% of total steps at which controlnet is first applied")
+                                        description="When the ControlNet is first applied (% of total steps)")
     end_step_percent: float = Field(default=1, ge=0, le=1,
-                                      description="% of total steps at which controlnet is last applied")
+                                      description="When the ControlNet is last applied (% of total steps)")
     # fmt: on
 
 
@@ -152,7 +152,7 @@ class ImageProcessorInvocation(BaseInvocation, PILInvocationConfig):
     # fmt: off
     type: Literal["image_processor"] = "image_processor"
     # Inputs
-    image: ImageField = Field(default=None, description="image to process")
+    image: ImageField = Field(default=None, description="The image to process")
     # fmt: on
 
 
@@ -204,8 +204,8 @@ class CannyImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfi
     # fmt: off
     type: Literal["canny_image_processor"] = "canny_image_processor"
     # Input
-    low_threshold: float = Field(default=100, ge=0, description="low threshold of Canny pixel gradient")
-    high_threshold: float = Field(default=200, ge=0, description="high threshold of Canny pixel gradient")
+    low_threshold: int = Field(default=100, ge=0, le=255, description="The low threshold of the Canny pixel gradient (0-255)")
+    high_threshold: int = Field(default=200, ge=0, le=255, description="The high threshold of the Canny pixel gradient (0-255)")
     # fmt: on
 
     def run_processor(self, image):
@@ -214,16 +214,16 @@ def run_processor(self, image):
         return processed_image
 
 
-class HedImageprocessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
+class HedImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
     """Applies HED edge detection to image"""
     # fmt: off
     type: Literal["hed_image_processor"] = "hed_image_processor"
     # Inputs
-    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
-    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
+    detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
+    image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
     # safe not supported in controlnet_aux v0.0.3
     # safe: bool = Field(default=False, description="whether to use safe mode")
-    scribble: bool = Field(default=False, description="whether to use scribble mode")
+    scribble: bool = Field(default=False, description="Whether to use scribble mode")
     # fmt: on
 
     def run_processor(self, image):
@@ -243,9 +243,9 @@ class LineartImageProcessorInvocation(ImageProcessorInvocation, PILInvocationCon
     # fmt: off
     type: Literal["lineart_image_processor"] = "lineart_image_processor"
     # Inputs
-    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
-    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
-    coarse: bool = Field(default=False, description="whether to use coarse mode")
+    detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
+    image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
+    coarse: bool = Field(default=False, description="Whether to use coarse mode")
     # fmt: on
 
     def run_processor(self, image):
@@ -262,8 +262,8 @@ class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation, PILInvocati
     # fmt: off
     type: Literal["lineart_anime_image_processor"] = "lineart_anime_image_processor"
     # Inputs
-    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
-    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
+    detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
+    image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
     # fmt: on
 
     def run_processor(self, image):
@@ -280,9 +280,9 @@ class OpenposeImageProcessorInvocation(ImageProcessorInvocation, PILInvocationCo
     # fmt: off
     type: Literal["openpose_image_processor"] = "openpose_image_processor"
     # Inputs
-    hand_and_face: bool = Field(default=False, description="whether to use hands and face mode")
-    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
-    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
+    hand_and_face: bool = Field(default=False, description="Whether to use hands and face mode")
+    detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
+    image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
     # fmt: on
 
     def run_processor(self, image):
@@ -300,8 +300,8 @@ class MidasDepthImageProcessorInvocation(ImageProcessorInvocation, PILInvocation
     # fmt: off
     type: Literal["midas_depth_image_processor"] = "midas_depth_image_processor"
     # Inputs
-    a_mult: float = Field(default=2.0, ge=0, description="Midas parameter a = amult * PI")
-    bg_th: float = Field(default=0.1, ge=0, description="Midas parameter bg_th")
+    a_mult: float = Field(default=2.0, ge=0, description="Midas parameter `a_mult` (a = a_mult * PI)")
+    bg_th: float = Field(default=0.1, ge=0, description="Midas parameter `bg_th`")
     # depth_and_normal not supported in controlnet_aux v0.0.3
     # depth_and_normal: bool = Field(default=False, description="whether to use depth and normal mode")
     # fmt: on
@@ -322,8 +322,8 @@ class NormalbaeImageProcessorInvocation(ImageProcessorInvocation, PILInvocationC
     # fmt: off
     type: Literal["normalbae_image_processor"] = "normalbae_image_processor"
     # Inputs
-    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
-    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
+    detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
+    image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
     # fmt: on
 
     def run_processor(self, image):
@@ -339,10 +339,10 @@ class MlsdImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig
     # fmt: off
     type: Literal["mlsd_image_processor"] = "mlsd_image_processor"
     # Inputs
-    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
-    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
-    thr_v: float = Field(default=0.1, ge=0, description="MLSD parameter thr_v")
-    thr_d: float = Field(default=0.1, ge=0, description="MLSD parameter thr_d")
+    detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
+    image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
+    thr_v: float = Field(default=0.1, ge=0, description="MLSD parameter `thr_v`")
+    thr_d: float = Field(default=0.1, ge=0, description="MLSD parameter `thr_d`")
     # fmt: on
 
     def run_processor(self, image):
@@ -360,10 +360,10 @@ class PidiImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig
     # fmt: off
     type: Literal["pidi_image_processor"] = "pidi_image_processor"
     # Inputs
-    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
-    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
-    safe: bool = Field(default=False, description="whether to use safe mode")
-    scribble: bool = Field(default=False, description="whether to use scribble mode")
+    detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
+    image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
+    safe: bool = Field(default=False, description="Whether to use safe mode")
+    scribble: bool = Field(default=False, description="Whether to use scribble mode")
     # fmt: on
 
     def run_processor(self, image):
@@ -381,11 +381,11 @@ class ContentShuffleImageProcessorInvocation(ImageProcessorInvocation, PILInvoca
     # fmt: off
     type: Literal["content_shuffle_image_processor"] = "content_shuffle_image_processor"
     # Inputs
-    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
-    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
-    h: Union[int | None] = Field(default=512, ge=0, description="content shuffle h parameter")
-    w: Union[int | None] = Field(default=512, ge=0, description="content shuffle w parameter")
-    f: Union[int | None] = Field(default=256, ge=0, description="cont")
+    detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection")
+    image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
+    h: Union[int, None] = Field(default=512, ge=0, description="Content shuffle `h` parameter")
+    w: Union[int, None] = Field(default=512, ge=0, description="Content shuffle `w` parameter")
+    f: Union[int, None] = Field(default=256, ge=0, description="Content shuffle `f` parameter")
     # fmt: on
 
     def run_processor(self, image):
@@ -418,8 +418,8 @@ class MediapipeFaceProcessorInvocation(ImageProcessorInvocation, PILInvocationCo
     # fmt: off
     type: Literal["mediapipe_face_processor"] = "mediapipe_face_processor"
     # Inputs
-    max_faces: int = Field(default=1, ge=1, description="maximum number of faces to detect")
-    min_confidence: float = Field(default=0.5, ge=0, le=1, description="minimum confidence for face detection")
+    max_faces: int = Field(default=1, ge=1, description="Maximum number of faces to detect")
+    min_confidence: float = Field(default=0.5, ge=0, le=1, description="Minimum confidence for face detection")
     # fmt: on
 
     def run_processor(self, image):
 
@@ -4,6 +4,7 @@
 import einops
 from typing import Literal, Optional, Union, List
 
+from compel import Compel
 from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import MultiControlNetModel
 
 from pydantic import BaseModel, Field, validator
@@ -233,6 +234,15 @@ def get_conditioning_data(self, context: InvocationContext, model: StableDiffusi
         c, extra_conditioning_info = context.services.latents.get(self.positive_conditioning.conditioning_name)
         uc, _ = context.services.latents.get(self.negative_conditioning.conditioning_name)
 
+        compel = Compel(
+            tokenizer=model.tokenizer,
+            text_encoder=model.text_encoder,
+            textual_inversion_manager=model.textual_inversion_manager,
+            dtype_for_device_getter=torch_dtype,
+            truncate_long_prompts=False,
+        )
+        [c, uc] = compel.pad_conditioning_tensors_to_same_length([c, uc])
+
         conditioning_data = ConditioningData(
             uc,
             c,
 
@@ -38,7 +38,7 @@ def get_uc_and_c_and_ec(prompt_string,
                     dtype_for_device_getter=torch_dtype,
                     truncate_long_prompts=False,
                     )
-    
+
     config = get_invokeai_config()
 
     # get rid of any newline characters
@@ -282,6 +282,8 @@ def split_weighted_subprompts(text, skip_normalize=False) -> list:
         (match.group("prompt").replace("\\:", ":"), float(match.group("weight") or 1))
         for match in re.finditer(prompt_parser, text)
     ]
+    if len(parsed_prompts) == 0:
+        return []
     if skip_normalize:
         return parsed_prompts
     weight_sum = sum(map(lambda x: x[1], parsed_prompts))
 
@@ -26,10 +26,10 @@ We need to start the nodes web server, which serves the OpenAPI schema to the ge
 
 ```bash
 # from the repo root
-python scripts/invoke-new.py --web
+python scripts/invokeai-web.py
 ```
 
-2. Generate the API client.
+2. Generate the API client. 
 
 ```bash
 # from invokeai/frontend/web/