A class for performing image segmentation using a specified model.
Parameters:
Name |
Type |
Description |
Default |
device |
str
|
The device to run the model on (e.g., 'cpu', 'cuda').
|
'cpu'
|
model_name |
str
|
The name of the model to be loaded. Defaults to
'facebook/sam-vit-huge'.
|
'facebook/sam-vit-huge'
|
Source code in maestro/markers/sam.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 | class SegmentAnythingMarkGenerator:
"""A class for performing image segmentation using a specified model.
Parameters:
device (str): The device to run the model on (e.g., 'cpu', 'cuda').
model_name (str): The name of the model to be loaded. Defaults to
'facebook/sam-vit-huge'.
"""
def __init__(self, device: str = "cpu", model_name: str = "facebook/sam-vit-huge") -> None:
self.model = SamModel.from_pretrained(model_name).to(device)
self.processor = SamProcessor.from_pretrained(model_name)
self.image_processor = SamImageProcessor.from_pretrained(model_name)
self.device = device
self.pipeline = pipeline(
task="mask-generation", model=self.model, image_processor=self.image_processor, device=self.device
)
def generate(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> sv.Detections:
"""Generate image segmentation marks.
Parameters:
image (np.ndarray): The image to be marked in BGR format.
mask: (Optional[np.ndarray]): The mask to be used as a guide for
segmentation.
Returns:
sv.Detections: An object containing the segmentation masks and their
corresponding bounding box coordinates.
"""
image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
if mask is None:
outputs = self.pipeline(image, points_per_batch=64)
masks = np.array(outputs["masks"])
return masks_to_marks(masks=masks)
else:
inputs = self.processor(image, return_tensors="pt").to(self.device)
image_embeddings = self.model.get_image_embeddings(inputs.pixel_values)
masks = []
for polygon in sv.mask_to_polygons(mask.astype(bool)):
indexes = np.random.default_rng().choice(a=polygon.shape[0], size=5, replace=True)
input_points = polygon[indexes]
inputs = self.processor(images=image, input_points=[[input_points]], return_tensors="pt").to(
self.device
)
del inputs["pixel_values"]
outputs = self.model(image_embeddings=image_embeddings, **inputs)
mask = self.processor.image_processor.post_process_masks(
masks=outputs.pred_masks.cpu().detach(),
original_sizes=inputs["original_sizes"].cpu().detach(),
reshaped_input_sizes=inputs["reshaped_input_sizes"].cpu().detach(),
)[0][0][0].numpy()
masks.append(mask)
masks = np.array(masks)
return masks_to_marks(masks=masks)
|
generate(image, mask=None)
Generate image segmentation marks.
Parameters:
Name |
Type |
Description |
Default |
image |
ndarray
|
The image to be marked in BGR format.
|
required
|
mask |
Optional[ndarray]
|
(Optional[np.ndarray]): The mask to be used as a guide for
segmentation.
|
None
|
Returns:
Type |
Description |
Detections
|
sv.Detections: An object containing the segmentation masks and their
corresponding bounding box coordinates.
|
Source code in maestro/markers/sam.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 | def generate(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> sv.Detections:
"""Generate image segmentation marks.
Parameters:
image (np.ndarray): The image to be marked in BGR format.
mask: (Optional[np.ndarray]): The mask to be used as a guide for
segmentation.
Returns:
sv.Detections: An object containing the segmentation masks and their
corresponding bounding box coordinates.
"""
image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
if mask is None:
outputs = self.pipeline(image, points_per_batch=64)
masks = np.array(outputs["masks"])
return masks_to_marks(masks=masks)
else:
inputs = self.processor(image, return_tensors="pt").to(self.device)
image_embeddings = self.model.get_image_embeddings(inputs.pixel_values)
masks = []
for polygon in sv.mask_to_polygons(mask.astype(bool)):
indexes = np.random.default_rng().choice(a=polygon.shape[0], size=5, replace=True)
input_points = polygon[indexes]
inputs = self.processor(images=image, input_points=[[input_points]], return_tensors="pt").to(
self.device
)
del inputs["pixel_values"]
outputs = self.model(image_embeddings=image_embeddings, **inputs)
mask = self.processor.image_processor.post_process_masks(
masks=outputs.pred_masks.cpu().detach(),
original_sizes=inputs["original_sizes"].cpu().detach(),
reshaped_input_sizes=inputs["reshaped_input_sizes"].cpu().detach(),
)[0][0][0].numpy()
masks.append(mask)
masks = np.array(masks)
return masks_to_marks(masks=masks)
|