Skip to content

Commit 4fbed58

Browse files
authored
Merge pull request #46 from meshroomHub/dev/addPromptInMetadata
Add prompt in metadata
2 parents fe7d98d + 26aa525 commit 4fbed58

3 files changed

Lines changed: 31 additions & 23 deletions

File tree

meshroom/imageSegmentation/ImageSegmentationSam3.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,12 +302,13 @@ def processChunk(self, chunk):
302302
posBboxDictFromShape = self.getBboxDictWithViewIdAsKeyFromShape(chunk.node.positiveBoxes)
303303
negBboxDictFromShape = self.getBboxDictWithViewIdAsKeyFromShape(chunk.node.negativeBoxes)
304304

305+
textPrompts = re.split(r'[\n]+', chunk.node.prompt.value)
306+
textPrompts = [textPrompt for textPrompt in textPrompts if textPrompt]
307+
305308
metadata_deep_model = {}
306309
metadata_deep_model["Meshroom:mrSegmentation:DeepModelName"] = "SegmentAnything"
307310
metadata_deep_model["Meshroom:mrSegmentation:DeepModelVersion"] = "sam3"
308-
309-
textPrompts = re.split(r'[\n]+', chunk.node.prompt.value)
310-
textPrompts = [textPrompt for textPrompt in textPrompts if textPrompt]
311+
metadata_deep_model["Meshroom:mrSegmentation:Prompt"] = ";".join(textPrompts)
311312

312313
detectedShapeBboxes = []
313314

meshroom/imageSegmentation/VideoSegmentationSam3.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -314,9 +314,12 @@ def processChunk(self, chunk):
314314
negClickDictFromShape = self.getClickDictWithViewIdAsKeyFromShape(chunk.node.negativeClicks)
315315
posBboxDictFromShape = self.getBboxDictWithViewIdAsKeyFromShape(chunk.node.boxPrompt)
316316

317+
prompt = chunk.node.prompt.value.splitlines()[0]
318+
317319
metadata_deep_model = {}
318320
metadata_deep_model["Meshroom:mrSegmentation:DeepModelName"] = "SegmentAnything"
319321
metadata_deep_model["Meshroom:mrSegmentation:DeepModelVersion"] = "sam3-Video"
322+
metadata_deep_model["Meshroom:mrSegmentation:Prompt"] = prompt
320323

321324
pil_images = []
322325
clicks = {}
@@ -414,7 +417,7 @@ def processChunk(self, chunk):
414417
type="add_prompt",
415418
session_id=session_id,
416419
frame_index=fIdx,
417-
text=chunk.node.prompt.value,
420+
text=prompt,
418421
)
419422
)
420423
outputs_per_frame_curr_fwd = self.propagate_in_video(video_predictor, session_id, fIdx, max_frame_num_to_track_fwd, "forward")
@@ -432,7 +435,7 @@ def processChunk(self, chunk):
432435
type="add_prompt",
433436
session_id=session_id,
434437
frame_index=fIdx,
435-
text=chunk.node.prompt.value,
438+
text=prompt,
436439
)
437440
)
438441
outputs_per_frame_curr_bwd = self.propagate_in_video(video_predictor, session_id, fIdx, max_frame_num_to_track_bwd, "backward")
@@ -455,7 +458,7 @@ def processChunk(self, chunk):
455458

456459
if len(masks.keys()) > 0:
457460
colorPalette.generate_palette(max(masks.keys()) + 1)
458-
cryptoName = "object" if chunk.node.prompt.value == "" else chunk.node.prompt.value
461+
cryptoName = "object" if prompt == "" else prompt
459462
for key, mask in masks.items():
460463
maskImage[mask] = [255, 255, 255]
461464
color = colorPalette.at(int(key)) if colorPalette.at(int(key)) is not None else [255, 255, 255]

meshroom/imageSegmentation/VideoSegmentationSam3Text.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ def processChunk(self, chunk):
206206

207207
metadata_deep_model = {}
208208
metadata_deep_model["Meshroom:mrSegmentation:DeepModelName"] = "SegmentAnything"
209-
metadata_deep_model["Meshroom:mrSegmentation:DeepModelVersion"] = "sam3-Video"
209+
metadata_deep_model["Meshroom:mrSegmentation:DeepModelVersion"] = "sam3-Video-TextPrompt"
210210

211211
pil_images = []
212212
mask_images = []
@@ -257,6 +257,7 @@ def processChunk(self, chunk):
257257
logger.info(f"textPrompt: {textPrompt}")
258258
boxes[textPrompt] = {"forward": {}, "backward": {}}
259259
cryptoName = "object" if textPrompt == "" else textPrompt
260+
metadata_deep_model["Meshroom:mrSegmentation:Prompt"] = textPrompt
260261

261262
video_predictor.handle_request(request=dict(type="reset_session", session_id=session_id))
262263

@@ -431,25 +432,28 @@ def processChunk(self, chunk):
431432

432433
image.writeCryptomatte(cryptomattePath, cryptoName, img.shape[1], img.shape[0], manifest_bwd, crypto_id_bwd, crypto_cov_bwd)
433434

434-
for frameId in range(frameNumber):
435-
if chunk.node.maskInvert.value:
436-
mask = (mask_images[frameId][:,:,0:1] == 0).astype('float32')
437-
else:
438-
mask = (mask_images[frameId][:,:,0:1] > 0).astype('float32')
439-
logger.info("frameId: {} - {}".format(frameId, chunk_image_paths[frameId][0]))
435+
prompts = [textPrompt.strip() for textPrompt in self.textPrompts if textPrompt.strip()]
436+
metadata_deep_model["Meshroom:mrSegmentation:Prompt"] = ";".join(prompts)
440437

441-
if chunk.node.keepFilename.value:
442-
outputFileMask = os.path.join(chunk.node.output.value, Path(chunk_image_paths[frameId][0]).stem + "." + chunk.node.extensionOut.value)
443-
else:
444-
outputFileMask = os.path.join(chunk.node.output.value, str(chunk_image_paths[frameId][1]) + "." + chunk.node.extensionOut.value)
438+
for frameId in range(frameNumber):
439+
if chunk.node.maskInvert.value:
440+
mask = (mask_images[frameId][:,:,0:1] == 0).astype('float32')
441+
else:
442+
mask = (mask_images[frameId][:,:,0:1] > 0).astype('float32')
443+
logger.info("frameId: {} - {}".format(frameId, chunk_image_paths[frameId][0]))
444+
445+
if chunk.node.keepFilename.value:
446+
outputFileMask = os.path.join(chunk.node.output.value, Path(chunk_image_paths[frameId][0]).stem + "." + chunk.node.extensionOut.value)
447+
else:
448+
outputFileMask = os.path.join(chunk.node.output.value, str(chunk_image_paths[frameId][1]) + "." + chunk.node.extensionOut.value)
445449

446-
optWrite = avimg.ImageWriteOptions()
447-
optWrite.toColorSpace(avimg.EImageColorSpace_NO_CONVERSION)
448-
if Path(outputFileMask).suffix.lower() == ".exr":
449-
optWrite.exrCompressionMethod(avimg.EImageExrCompression_stringToEnum("DWAA"))
450-
optWrite.exrCompressionLevel(300)
450+
optWrite = avimg.ImageWriteOptions()
451+
optWrite.toColorSpace(avimg.EImageColorSpace_NO_CONVERSION)
452+
if Path(outputFileMask).suffix.lower() == ".exr":
453+
optWrite.exrCompressionMethod(avimg.EImageExrCompression_stringToEnum("DWAA"))
454+
optWrite.exrCompressionLevel(300)
451455

452-
image.writeImage(outputFileMask, mask, sourceInfo["h_ori"], sourceInfo["w_ori"], sourceInfo["orientation"], sourceInfo["PAR"], metadata_deep_model, optWrite)
456+
image.writeImage(outputFileMask, mask, sourceInfo["h_ori"], sourceInfo["w_ori"], sourceInfo["orientation"], sourceInfo["PAR"], metadata_deep_model, optWrite)
453457

454458
jsonFilename = chunk.node.output.value + "/bboxes.json"
455459
with open(jsonFilename, "w", encoding="utf_8") as f:

0 commit comments

Comments
 (0)