Skip to content

Markers

Segment Anything

A class for performing image segmentation using a specified model.

Parameters:

Name Type Description Default
device str

The device to run the model on (e.g., 'cpu', 'cuda').

'cpu'
model_name str

The name of the model to be loaded. Defaults to 'facebook/sam-vit-huge'.

'facebook/sam-vit-huge'
Source code in maestro/markers/sam.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
class SegmentAnythingMarkGenerator:
    """A class for performing image segmentation using a specified model.

    Parameters:
        device (str): The device to run the model on (e.g., 'cpu', 'cuda').
        model_name (str): The name of the model to be loaded. Defaults to
            'facebook/sam-vit-huge'.
    """

    def __init__(self, device: str = "cpu", model_name: str = "facebook/sam-vit-huge") -> None:
        self.model = SamModel.from_pretrained(model_name).to(device)
        self.processor = SamProcessor.from_pretrained(model_name)
        self.image_processor = SamImageProcessor.from_pretrained(model_name)
        self.device = device
        self.pipeline = pipeline(
            task="mask-generation", model=self.model, image_processor=self.image_processor, device=self.device
        )

    def generate(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> sv.Detections:
        """Generate image segmentation marks.

        Parameters:
            image (np.ndarray): The image to be marked in BGR format.
            mask: (Optional[np.ndarray]): The mask to be used as a guide for
                segmentation.

        Returns:
            sv.Detections: An object containing the segmentation masks and their
                corresponding bounding box coordinates.
        """
        image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        if mask is None:
            outputs = self.pipeline(image, points_per_batch=64)
            masks = np.array(outputs["masks"])
            return masks_to_marks(masks=masks)
        else:
            inputs = self.processor(image, return_tensors="pt").to(self.device)
            image_embeddings = self.model.get_image_embeddings(inputs.pixel_values)
            masks = []
            for polygon in sv.mask_to_polygons(mask.astype(bool)):
                indexes = np.random.default_rng().choice(a=polygon.shape[0], size=5, replace=True)
                input_points = polygon[indexes]
                inputs = self.processor(images=image, input_points=[[input_points]], return_tensors="pt").to(
                    self.device
                )
                del inputs["pixel_values"]
                outputs = self.model(image_embeddings=image_embeddings, **inputs)
                mask = self.processor.image_processor.post_process_masks(
                    masks=outputs.pred_masks.cpu().detach(),
                    original_sizes=inputs["original_sizes"].cpu().detach(),
                    reshaped_input_sizes=inputs["reshaped_input_sizes"].cpu().detach(),
                )[0][0][0].numpy()
                masks.append(mask)
            masks = np.array(masks)
            return masks_to_marks(masks=masks)

generate(image, mask=None)

Generate image segmentation marks.

Parameters:

Name Type Description Default
image ndarray

The image to be marked in BGR format.

required
mask Optional[ndarray]

(Optional[np.ndarray]): The mask to be used as a guide for segmentation.

None

Returns:

Type Description
Detections

sv.Detections: An object containing the segmentation masks and their corresponding bounding box coordinates.

Source code in maestro/markers/sam.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def generate(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> sv.Detections:
    """Generate image segmentation marks.

    Parameters:
        image (np.ndarray): The image to be marked in BGR format.
        mask: (Optional[np.ndarray]): The mask to be used as a guide for
            segmentation.

    Returns:
        sv.Detections: An object containing the segmentation masks and their
            corresponding bounding box coordinates.
    """
    image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    if mask is None:
        outputs = self.pipeline(image, points_per_batch=64)
        masks = np.array(outputs["masks"])
        return masks_to_marks(masks=masks)
    else:
        inputs = self.processor(image, return_tensors="pt").to(self.device)
        image_embeddings = self.model.get_image_embeddings(inputs.pixel_values)
        masks = []
        for polygon in sv.mask_to_polygons(mask.astype(bool)):
            indexes = np.random.default_rng().choice(a=polygon.shape[0], size=5, replace=True)
            input_points = polygon[indexes]
            inputs = self.processor(images=image, input_points=[[input_points]], return_tensors="pt").to(
                self.device
            )
            del inputs["pixel_values"]
            outputs = self.model(image_embeddings=image_embeddings, **inputs)
            mask = self.processor.image_processor.post_process_masks(
                masks=outputs.pred_masks.cpu().detach(),
                original_sizes=inputs["original_sizes"].cpu().detach(),
                reshaped_input_sizes=inputs["reshaped_input_sizes"].cpu().detach(),
            )[0][0][0].numpy()
            masks.append(mask)
        masks = np.array(masks)
        return masks_to_marks(masks=masks)