A class for performing image segmentation using a specified model.
Parameters:
Name |
Type |
Description |
Default |
device |
str
|
The device to run the model on (e.g., 'cpu', 'cuda').
|
'cpu'
|
model_name |
str
|
The name of the model to be loaded. Defaults to
'facebook/sam-vit-huge'.
|
'facebook/sam-vit-huge'
|
Source code in maestro/markers/sam.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74 | class SegmentAnythingMarkGenerator:
"""
A class for performing image segmentation using a specified model.
Parameters:
device (str): The device to run the model on (e.g., 'cpu', 'cuda').
model_name (str): The name of the model to be loaded. Defaults to
'facebook/sam-vit-huge'.
"""
def __init__(self, device: str = 'cpu', model_name: str = "facebook/sam-vit-huge"):
self.model = SamModel.from_pretrained(model_name).to(device)
self.processor = SamProcessor.from_pretrained(model_name)
self.image_processor = SamImageProcessor.from_pretrained(model_name)
self.device = device
self.pipeline = pipeline(
task="mask-generation",
model=self.model,
image_processor=self.image_processor,
device=self.device)
def generate(
self,
image: np.ndarray,
mask: Optional[np.ndarray] = None
) -> sv.Detections:
"""
Generate image segmentation marks.
Parameters:
image (np.ndarray): The image to be marked in BGR format.
mask: (Optional[np.ndarray]): The mask to be used as a guide for
segmentation.
Returns:
sv.Detections: An object containing the segmentation masks and their
corresponding bounding box coordinates.
"""
image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
if mask is None:
outputs = self.pipeline(image, points_per_batch=64)
masks = np.array(outputs['masks'])
return masks_to_marks(masks=masks)
else:
inputs = self.processor(image, return_tensors="pt").to(self.device)
image_embeddings = self.model.get_image_embeddings(inputs.pixel_values)
masks = []
for polygon in sv.mask_to_polygons(mask.astype(bool)):
indexes = np.random.choice(a=polygon.shape[0], size=5, replace=True)
input_points = polygon[indexes]
inputs = self.processor(
images=image,
input_points=[[input_points]],
return_tensors="pt"
).to(self.device)
del inputs["pixel_values"]
outputs = self.model(image_embeddings=image_embeddings, **inputs)
mask = self.processor.image_processor.post_process_masks(
masks=outputs.pred_masks.cpu().detach(),
original_sizes=inputs["original_sizes"].cpu().detach(),
reshaped_input_sizes=inputs["reshaped_input_sizes"].cpu().detach()
)[0][0][0].numpy()
masks.append(mask)
masks = np.array(masks)
return masks_to_marks(masks=masks)
|
generate(image, mask=None)
Generate image segmentation marks.
Parameters:
Name |
Type |
Description |
Default |
image |
ndarray
|
The image to be marked in BGR format.
|
required
|
mask |
Optional[ndarray]
|
(Optional[np.ndarray]): The mask to be used as a guide for
segmentation.
|
None
|
Returns:
Type |
Description |
Detections
|
sv.Detections: An object containing the segmentation masks and their
corresponding bounding box coordinates.
|
Source code in maestro/markers/sam.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74 | def generate(
self,
image: np.ndarray,
mask: Optional[np.ndarray] = None
) -> sv.Detections:
"""
Generate image segmentation marks.
Parameters:
image (np.ndarray): The image to be marked in BGR format.
mask: (Optional[np.ndarray]): The mask to be used as a guide for
segmentation.
Returns:
sv.Detections: An object containing the segmentation masks and their
corresponding bounding box coordinates.
"""
image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
if mask is None:
outputs = self.pipeline(image, points_per_batch=64)
masks = np.array(outputs['masks'])
return masks_to_marks(masks=masks)
else:
inputs = self.processor(image, return_tensors="pt").to(self.device)
image_embeddings = self.model.get_image_embeddings(inputs.pixel_values)
masks = []
for polygon in sv.mask_to_polygons(mask.astype(bool)):
indexes = np.random.choice(a=polygon.shape[0], size=5, replace=True)
input_points = polygon[indexes]
inputs = self.processor(
images=image,
input_points=[[input_points]],
return_tensors="pt"
).to(self.device)
del inputs["pixel_values"]
outputs = self.model(image_embeddings=image_embeddings, **inputs)
mask = self.processor.image_processor.post_process_masks(
masks=outputs.pred_masks.cpu().detach(),
original_sizes=inputs["original_sizes"].cpu().detach(),
reshaped_input_sizes=inputs["reshaped_input_sizes"].cpu().detach()
)[0][0][0].numpy()
masks.append(mask)
masks = np.array(masks)
return masks_to_marks(masks=masks)
|