Merge pull request #46 from meshroomHub/dev/addPromptInMetadata

demoulinv · web-flow · commit 4fbed5858171 · 2026-03-11T11:20:14.000+01:00
Add prompt in metadata
diff --git a/meshroom/imageSegmentation/ImageSegmentationSam3.py b/meshroom/imageSegmentation/ImageSegmentationSam3.py
@@ -302,12 +302,13 @@ def processChunk(self, chunk):
             posBboxDictFromShape = self.getBboxDictWithViewIdAsKeyFromShape(chunk.node.positiveBoxes)
             negBboxDictFromShape = self.getBboxDictWithViewIdAsKeyFromShape(chunk.node.negativeBoxes)
 
+            textPrompts = re.split(r'[\n]+', chunk.node.prompt.value)
+            textPrompts = [textPrompt for textPrompt in textPrompts if textPrompt]
+
             metadata_deep_model = {}
             metadata_deep_model["Meshroom:mrSegmentation:DeepModelName"] = "SegmentAnything"
             metadata_deep_model["Meshroom:mrSegmentation:DeepModelVersion"] = "sam3"
-
-            textPrompts = re.split(r'[\n]+', chunk.node.prompt.value)
-            textPrompts = [textPrompt for textPrompt in textPrompts if textPrompt]
+            metadata_deep_model["Meshroom:mrSegmentation:Prompt"] = ";".join(textPrompts)
 
             detectedShapeBboxes = []
 
diff --git a/meshroom/imageSegmentation/VideoSegmentationSam3.py b/meshroom/imageSegmentation/VideoSegmentationSam3.py
@@ -314,9 +314,12 @@ def processChunk(self, chunk):
             negClickDictFromShape = self.getClickDictWithViewIdAsKeyFromShape(chunk.node.negativeClicks)
             posBboxDictFromShape = self.getBboxDictWithViewIdAsKeyFromShape(chunk.node.boxPrompt)
 
+            prompt = chunk.node.prompt.value.splitlines()[0]
+
             metadata_deep_model = {}
             metadata_deep_model["Meshroom:mrSegmentation:DeepModelName"] = "SegmentAnything"
             metadata_deep_model["Meshroom:mrSegmentation:DeepModelVersion"] = "sam3-Video"
+            metadata_deep_model["Meshroom:mrSegmentation:Prompt"] = prompt
 
             pil_images = []
             clicks = {}
@@ -414,7 +417,7 @@ def processChunk(self, chunk):
                         type="add_prompt",
                         session_id=session_id,
                         frame_index=fIdx,
-                        text=chunk.node.prompt.value,
+                        text=prompt,
                     )
                 )
                 outputs_per_frame_curr_fwd = self.propagate_in_video(video_predictor, session_id, fIdx, max_frame_num_to_track_fwd, "forward")
@@ -432,7 +435,7 @@ def processChunk(self, chunk):
                             type="add_prompt",
                             session_id=session_id,
                             frame_index=fIdx,
-                            text=chunk.node.prompt.value,
+                            text=prompt,
                         )
                     )
                     outputs_per_frame_curr_bwd = self.propagate_in_video(video_predictor, session_id, fIdx, max_frame_num_to_track_bwd, "backward")
@@ -455,7 +458,7 @@ def processChunk(self, chunk):
 
                 if len(masks.keys()) > 0:
                     colorPalette.generate_palette(max(masks.keys()) + 1)
-                cryptoName = "object" if chunk.node.prompt.value == "" else chunk.node.prompt.value
+                cryptoName = "object" if prompt == "" else prompt
                 for key, mask in masks.items():
                     maskImage[mask] = [255, 255, 255]
                     color = colorPalette.at(int(key)) if colorPalette.at(int(key)) is not None else [255, 255, 255]
diff --git a/meshroom/imageSegmentation/VideoSegmentationSam3Text.py b/meshroom/imageSegmentation/VideoSegmentationSam3Text.py
@@ -206,7 +206,7 @@ def processChunk(self, chunk):
 
             metadata_deep_model = {}
             metadata_deep_model["Meshroom:mrSegmentation:DeepModelName"] = "SegmentAnything"
-            metadata_deep_model["Meshroom:mrSegmentation:DeepModelVersion"] = "sam3-Video"
+            metadata_deep_model["Meshroom:mrSegmentation:DeepModelVersion"] = "sam3-Video-TextPrompt"
 
             pil_images = []
             mask_images = []
@@ -257,6 +257,7 @@ def processChunk(self, chunk):
                 logger.info(f"textPrompt: {textPrompt}")
                 boxes[textPrompt] = {"forward": {}, "backward": {}}
                 cryptoName = "object" if textPrompt == "" else textPrompt
+                metadata_deep_model["Meshroom:mrSegmentation:Prompt"] = textPrompt
 
                 video_predictor.handle_request(request=dict(type="reset_session", session_id=session_id))
 
@@ -431,25 +432,28 @@ def processChunk(self, chunk):
 
                                 image.writeCryptomatte(cryptomattePath, cryptoName, img.shape[1], img.shape[0], manifest_bwd, crypto_id_bwd, crypto_cov_bwd)
 
-                for frameId in range(frameNumber):
-                    if chunk.node.maskInvert.value:
-                        mask = (mask_images[frameId][:,:,0:1] == 0).astype('float32')
-                    else:
-                        mask = (mask_images[frameId][:,:,0:1] > 0).astype('float32')
-                    logger.info("frameId: {} - {}".format(frameId, chunk_image_paths[frameId][0]))
+            prompts = [textPrompt.strip() for textPrompt in self.textPrompts if textPrompt.strip()]
+            metadata_deep_model["Meshroom:mrSegmentation:Prompt"] = ";".join(prompts)
 
-                    if chunk.node.keepFilename.value:
-                        outputFileMask = os.path.join(chunk.node.output.value, Path(chunk_image_paths[frameId][0]).stem + "." + chunk.node.extensionOut.value)
-                    else:
-                        outputFileMask = os.path.join(chunk.node.output.value, str(chunk_image_paths[frameId][1]) + "." + chunk.node.extensionOut.value)
+            for frameId in range(frameNumber):
+                if chunk.node.maskInvert.value:
+                    mask = (mask_images[frameId][:,:,0:1] == 0).astype('float32')
+                else:
+                    mask = (mask_images[frameId][:,:,0:1] > 0).astype('float32')
+                logger.info("frameId: {} - {}".format(frameId, chunk_image_paths[frameId][0]))
+
+                if chunk.node.keepFilename.value:
+                    outputFileMask = os.path.join(chunk.node.output.value, Path(chunk_image_paths[frameId][0]).stem + "." + chunk.node.extensionOut.value)
+                else:
+                    outputFileMask = os.path.join(chunk.node.output.value, str(chunk_image_paths[frameId][1]) + "." + chunk.node.extensionOut.value)
 
-                    optWrite = avimg.ImageWriteOptions()
-                    optWrite.toColorSpace(avimg.EImageColorSpace_NO_CONVERSION)
-                    if Path(outputFileMask).suffix.lower() == ".exr":
-                        optWrite.exrCompressionMethod(avimg.EImageExrCompression_stringToEnum("DWAA"))
-                        optWrite.exrCompressionLevel(300)
+                optWrite = avimg.ImageWriteOptions()
+                optWrite.toColorSpace(avimg.EImageColorSpace_NO_CONVERSION)
+                if Path(outputFileMask).suffix.lower() == ".exr":
+                    optWrite.exrCompressionMethod(avimg.EImageExrCompression_stringToEnum("DWAA"))
+                    optWrite.exrCompressionLevel(300)
 
-                    image.writeImage(outputFileMask, mask, sourceInfo["h_ori"], sourceInfo["w_ori"], sourceInfo["orientation"], sourceInfo["PAR"], metadata_deep_model, optWrite)
+                image.writeImage(outputFileMask, mask, sourceInfo["h_ori"], sourceInfo["w_ori"], sourceInfo["orientation"], sourceInfo["PAR"], metadata_deep_model, optWrite)
 
             jsonFilename = chunk.node.output.value + "/bboxes.json"
             with open(jsonFilename, "w", encoding="utf_8") as f: