Skip to content

Commit 0eaa558

Browse files
Segment union (mcmonkeyprojects#894)
* Add syntax for merging multiple segment masks into a single segment. `<segment:face || hair || yolo-skin_yolov8m-seg_60.pt> prompt for face and hair and skin` Also supports a threshold for each part of the mask, by appending a semicolon and the threshold `<segment:face;0.5 || hair, 0.4, 0.6>` will apply 0.5 threshold to the face mask and the overall segment threshold (0.6) to the hair mask. * Update UI to support multiple segment masks * Update Prompt Syntax with the new segment syntax * Revert segment modal window changes to keep it simple for beginners. * Update SwarmClipSeg node to emit mask with 3 dimensions. * Update SwarmYolo node to emit masks with 3 dimensions (instead of sometimes 2) * simplify the PR to just the focal point * single-pipe --------- Co-authored-by: Alex "mcmonkey" Goodwin <[email protected]>
1 parent 7686ddb commit 0eaa558

File tree

5 files changed

+68
-35
lines changed

5 files changed

+68
-35
lines changed

docs/Features/Prompt Syntax.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,11 @@
150150
- To control the creativity/threshold with a yolo model just append `,<creativity>,<threshold>`, for example `<segment:yolo-face_yolov8m-seg_60.pt-1,0.8,0.25>` sets a `0.8` creativity and `0.25` threshold.
151151
- Note the default "confidence threshold" for Yolo models is `0.25`, which is different than is often used with ClipSeg, and does not have a "max threshold" like ClipSeg does.
152152
- If you have a yolo model with multiple supported classes, you can filter specific classes by appending `:<classes>:` to the model name where `<classes>` is a comma-separated list of class IDs or names, e.g., `<segment:yolo-modelnamehere:0,apple,2:,0.8,0.25>`
153+
- You can also combine multiple areas into a single segment to refine them as a single group.
154+
- Separate the areas with `|` in `texthere`
155+
- For example, `<segment:face|hair>` will find all the faces and hair in the image and refine them as a single group.
156+
- This works with YOLOv8 models as well.
157+
- `<segment:yolo-face_yolov8m-seg_60.pt | yolo-hair_yolov8m-seg_60.pt | fingers>` will refine the group of faces and hair (found by YOLO) and fingers (found by CLIPSeg) as a single group.
153158
- There's an advanced parameter under `Segment Refining` named `Segment Model` to customize the base model used for segment processing
154159
- There's also a parameter named `Save Segment Mask` to save a preview copy of the generated mask
155160

src/BuiltinExtensions/ComfyUIBackend/ExtraNodes/SwarmComfyCommon/SwarmClipSeg.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def seg(self, images, match_text, threshold):
6363
mask /= max
6464
while mask.ndim < 4:
6565
mask = mask.unsqueeze(0)
66-
mask = torch.nn.functional.interpolate(mask, size=(images.shape[1], images.shape[2]), mode="bilinear").squeeze(0).squeeze(0)
66+
mask = torch.nn.functional.interpolate(mask, size=(images.shape[1], images.shape[2]), mode="bilinear").squeeze(0)
6767
return (mask,)
6868

6969
NODE_CLASS_MAPPINGS = {

src/BuiltinExtensions/ComfyUIBackend/ExtraNodes/SwarmComfyExtra/SwarmYolo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,9 @@ def seg(self, image, model_name, index, class_filter=None, sort_order="left-righ
8383
result = masks[0]
8484
for i in range(1, len(masks)):
8585
result = torch.max(result, masks[i])
86-
return (result, )
86+
return (result.unsqueeze(0), )
8787
elif index > len(masks):
88-
return (torch.zeros_like(masks[0]), )
88+
return (torch.zeros_like(masks[0]).unsqueeze(0), )
8989
else:
9090
sortedindices = []
9191
for mask in masks:

src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorSteps.cs

Lines changed: 59 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,45 +1342,73 @@ JArray doMaskShrinkApply(WorkflowGenerator g, JArray imgIn)
13421342
for (int i = 0; i < parts.Length; i++)
13431343
{
13441344
PromptRegion.Part part = parts[i];
1345-
string segmentNode;
1346-
if (part.DataText.StartsWith("yolo-"))
1347-
{
1348-
string fullname = part.DataText.After("yolo-");
1349-
string[] modelParts = fullname.Split(':');
1350-
fullname = modelParts[0];
1351-
string classFilter = modelParts.Length > 1 ? modelParts[1] : "";
1352-
(string mname, string indexText) = fullname.BeforeAndAfterLast('-');
1353-
if (!string.IsNullOrWhiteSpace(indexText) && int.TryParse(indexText, out int index))
1354-
{
1355-
fullname = mname;
1345+
string[] segmentSections = part.DataText.Split('|', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
1346+
string segmentNode = null;
1347+
foreach (string dataText in segmentSections)
1348+
{
1349+
string newSegmentNode = null;
1350+
if (dataText.StartsWith("yolo-"))
1351+
{
1352+
string fullname = dataText.After("yolo-");
1353+
string[] modelParts = fullname.Split(':');
1354+
fullname = modelParts[0];
1355+
string classFilter = modelParts.Length > 1 ? modelParts[1] : "";
1356+
(string mname, string indexText) = fullname.BeforeAndAfterLast('-');
1357+
if (!string.IsNullOrWhiteSpace(indexText) && int.TryParse(indexText, out int index))
1358+
{
1359+
fullname = mname;
1360+
}
1361+
else
1362+
{
1363+
index = 0;
1364+
}
1365+
if (part.Strength > 0.999)
1366+
{
1367+
Logs.Warning($"Yolo confidence threshold is set to 1. This was recommended syntax before yolo thresholds were supported, but is no longer valid. Swarm will automatically reset the value to default (0.25) instead.");
1368+
part.Strength = 0.25;
1369+
}
1370+
newSegmentNode = g.CreateNode("SwarmYoloDetection", new JObject()
1371+
{
1372+
["image"] = g.FinalImageOut,
1373+
["model_name"] = fullname,
1374+
["index"] = index,
1375+
["class_filter"] = classFilter,
1376+
["sort_order"] = g.UserInput.Get(T2IParamTypes.SegmentSortOrder, "left-right"),
1377+
["threshold"] = Math.Abs(part.Strength)
1378+
});
13561379
}
13571380
else
13581381
{
1359-
index = 0;
1382+
newSegmentNode = g.CreateNode("SwarmClipSeg", new JObject()
1383+
{
1384+
["images"] = g.FinalImageOut,
1385+
["match_text"] = dataText,
1386+
["threshold"] = Math.Abs(part.Strength)
1387+
});
13601388
}
1361-
if (part.Strength > 0.999)
1389+
if (segmentSections.Length > 1 && g.UserInput.Get(T2IParamTypes.SaveSegmentMask, false))
13621390
{
1363-
Logs.Warning($"Yolo confidence threshold is set to 1. This was recommended syntax before yolo thresholds were supported, but is no longer valid. Swarm will automatically reset the value to default (0.25) instead.");
1364-
part.Strength = 0.25;
1391+
string imageNode = g.CreateNode("MaskToImage", new JObject()
1392+
{
1393+
["mask"] = new JArray() { newSegmentNode, 0 }
1394+
});
1395+
g.CreateImageSaveNode([imageNode, 0], g.GetStableDynamicID(50000, 0));
13651396
}
1366-
segmentNode = g.CreateNode("SwarmYoloDetection", new JObject()
1397+
if (segmentNode is null)
13671398
{
1368-
["image"] = g.FinalImageOut,
1369-
["model_name"] = fullname,
1370-
["index"] = index,
1371-
["class_filter"] = classFilter,
1372-
["sort_order"] = g.UserInput.Get(T2IParamTypes.SegmentSortOrder, "left-right"),
1373-
["threshold"] = Math.Abs(part.Strength)
1374-
});
1375-
}
1376-
else
1377-
{
1378-
segmentNode = g.CreateNode("SwarmClipSeg", new JObject()
1399+
segmentNode = newSegmentNode;
1400+
}
1401+
else
13791402
{
1380-
["images"] = g.FinalImageOut,
1381-
["match_text"] = part.DataText,
1382-
["threshold"] = Math.Abs(part.Strength)
1383-
});
1403+
segmentNode = g.CreateNode("MaskComposite", new JObject()
1404+
{
1405+
["destination"] = new JArray() { segmentNode, 0 },
1406+
["source"] = new JArray() { newSegmentNode, 0 },
1407+
["operation"] = "add",
1408+
["x"] = 0,
1409+
["y"] = 0
1410+
});
1411+
}
13841412
}
13851413
if (part.Strength < 0)
13861414
{

src/wwwroot/js/genpage/gentab/prompttools.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class PromptTabCompleteClass {
7676
return this.getOrderedMatches(yolomodels.map(m => `yolo-${m}`), prefixLow);
7777
}
7878
}
79-
return ['\nSpecify before the ">" some text to match against in the image, like "<segment:face>".', '\nCan also do "<segment:text,creativity,threshold>" eg "face,0.6,0.5" where creativity is InitImageCreativity, and threshold is mask matching threshold for CLIP-Seg.', '\nYou can use a negative threshold value like "<segment:face,0.6,-0.5>" to invert the mask.', '\nYou may use the "yolo-" prefix to use a YOLOv8 seg model,', '\nor format "yolo-<model>-1" to get specifically the first result from a YOLOv8 match list.', '\n Additionally, you can apply a class filter by appending "yolo-<model>:<class_ids>:" where <class_ids> is a comma-separated list of class IDs or names to filter the detection results.'];
79+
return ['\nSpecify before the ">" some text to match against in the image, like "<segment:face>".', '\nCan also do "<segment:text,creativity,threshold>" eg "face,0.6,0.5" where creativity is InitImageCreativity, and threshold is mask matching threshold for CLIP-Seg.', '\nYou can use a negative threshold value like "<segment:face,0.6,-0.5>" to invert the mask.', '\nYou may use the "yolo-" prefix to use a YOLOv8 seg model,', '\nFor more advanced usages and a link to relevant docs, click the "+" button next to the prompt box, then "Auto Segment Refinement".'];
8080
});
8181
this.registerPrefix('setvar[var_name]', 'Store text for reference later in the prompt', (prefix) => {
8282
return ['\nSave the content of the tag into the named variable. eg "<setvar[colors]: red and blue>", then use like "<var:colors>"', '\nVariables can include the results of other tags. eg "<setvar[expression]: <random: smiling|frowning|crying>>"', '\nReference stored values later in the prompt with the <var:> tag', '\nThe setvar tag emits a copy the variable value in place. You can not do this with eg "<setvar[colors,false]: red and blue>"'];

0 commit comments

Comments
 (0)