Skip to content

Markers

Segment Anything

A class for performing image segmentation using a specified model.

Parameters:

Name Type Description Default
device str

The device to run the model on (e.g., 'cpu', 'cuda').

'cpu'
model_name str

The name of the model to be loaded. Defaults to 'facebook/sam-vit-huge'.

'facebook/sam-vit-huge'
Source code in maestro/markers/sam.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
class SegmentAnythingMarkGenerator:
    """
    A class for performing image segmentation using a specified model.

    Parameters:
        device (str): The device to run the model on (e.g., 'cpu', 'cuda').
        model_name (str): The name of the model to be loaded. Defaults to
            'facebook/sam-vit-huge'.
    """
    def __init__(self, device: str = 'cpu', model_name: str = "facebook/sam-vit-huge"):
        self.model = SamModel.from_pretrained(model_name).to(device)
        self.processor = SamProcessor.from_pretrained(model_name)
        self.image_processor = SamImageProcessor.from_pretrained(model_name)
        self.device = device
        self.pipeline = pipeline(
            task="mask-generation",
            model=self.model,
            image_processor=self.image_processor,
            device=self.device)

    def generate(
        self,
        image: np.ndarray,
        mask: Optional[np.ndarray] = None
    ) -> sv.Detections:
        """
        Generate image segmentation marks.

        Parameters:
            image (np.ndarray): The image to be marked in BGR format.
            mask: (Optional[np.ndarray]): The mask to be used as a guide for
                segmentation.

        Returns:
            sv.Detections: An object containing the segmentation masks and their
                corresponding bounding box coordinates.
        """
        image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        if mask is None:
            outputs = self.pipeline(image, points_per_batch=64)
            masks = np.array(outputs['masks'])
            return masks_to_marks(masks=masks)
        else:
            inputs = self.processor(image, return_tensors="pt").to(self.device)
            image_embeddings = self.model.get_image_embeddings(inputs.pixel_values)
            masks = []
            for polygon in sv.mask_to_polygons(mask.astype(bool)):
                indexes = np.random.choice(a=polygon.shape[0], size=5, replace=True)
                input_points = polygon[indexes]
                inputs = self.processor(
                    images=image,
                    input_points=[[input_points]],
                    return_tensors="pt"
                ).to(self.device)
                del inputs["pixel_values"]
                outputs = self.model(image_embeddings=image_embeddings, **inputs)
                mask = self.processor.image_processor.post_process_masks(
                    masks=outputs.pred_masks.cpu().detach(),
                    original_sizes=inputs["original_sizes"].cpu().detach(),
                    reshaped_input_sizes=inputs["reshaped_input_sizes"].cpu().detach()
                )[0][0][0].numpy()
                masks.append(mask)
            masks = np.array(masks)
            return masks_to_marks(masks=masks)

generate(image, mask=None)

Generate image segmentation marks.

Parameters:

Name Type Description Default
image ndarray

The image to be marked in BGR format.

required
mask Optional[ndarray]

(Optional[np.ndarray]): The mask to be used as a guide for segmentation.

None

Returns:

Type Description
Detections

sv.Detections: An object containing the segmentation masks and their corresponding bounding box coordinates.

Source code in maestro/markers/sam.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def generate(
    self,
    image: np.ndarray,
    mask: Optional[np.ndarray] = None
) -> sv.Detections:
    """
    Generate image segmentation marks.

    Parameters:
        image (np.ndarray): The image to be marked in BGR format.
        mask: (Optional[np.ndarray]): The mask to be used as a guide for
            segmentation.

    Returns:
        sv.Detections: An object containing the segmentation masks and their
            corresponding bounding box coordinates.
    """
    image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    if mask is None:
        outputs = self.pipeline(image, points_per_batch=64)
        masks = np.array(outputs['masks'])
        return masks_to_marks(masks=masks)
    else:
        inputs = self.processor(image, return_tensors="pt").to(self.device)
        image_embeddings = self.model.get_image_embeddings(inputs.pixel_values)
        masks = []
        for polygon in sv.mask_to_polygons(mask.astype(bool)):
            indexes = np.random.choice(a=polygon.shape[0], size=5, replace=True)
            input_points = polygon[indexes]
            inputs = self.processor(
                images=image,
                input_points=[[input_points]],
                return_tensors="pt"
            ).to(self.device)
            del inputs["pixel_values"]
            outputs = self.model(image_embeddings=image_embeddings, **inputs)
            mask = self.processor.image_processor.post_process_masks(
                masks=outputs.pred_masks.cpu().detach(),
                original_sizes=inputs["original_sizes"].cpu().detach(),
                reshaped_input_sizes=inputs["reshaped_input_sizes"].cpu().detach()
            )[0][0][0].numpy()
            masks.append(mask)
        masks = np.array(masks)
        return masks_to_marks(masks=masks)