o
    Vh0                     @   s<  d dl mZ d dlZd dlmZ ddlmZ ddlmZm	Z	m
Z
mZmZmZ ddlmZmZ ddlmZmZ dd	lmZ dd
lmZ d+ddZd+ddZd+ddZd+ddZd+ddZd+ddZd+ddZd+ddZ		d,ddZ dg dd g d!g d"d#d#gg d$dfd%d&Z!eeeeeeeeeeeed'Z"d-d)d*Z#dS ).    )partialN)attempt_download_asset   )MaskDecoder)FpnNeckHieraImageEncoderImageEncoderViTMemoryEncoderPromptEncoder)MemoryAttentionMemoryAttentionLayer)	SAM2ModelSAMModel)TinyViT)TwoWayTransformerc                 C      t dddg d| dS )zaBuilds and returns a Segment Anything Model (SAM) h-size model with specified encoder parameters.                      encoder_embed_dimencoder_depthencoder_num_headsencoder_global_attn_indexes
checkpoint
_build_samr     r$   P/var/www/vscode/kcb/lib/python3.10/site-packages/ultralytics/models/sam/build.pybuild_sam_vit_h      r&   c                 C   r   )zaBuilds and returns a Segment Anything Model (SAM) l-size model with specified encoder parameters.      r   )         r   r   r!   r#   r$   r$   r%   build_sam_vit_l"   r'   r-   c                 C   s   t dddg d| dS )zgConstructs and returns a Segment Anything Model (SAM) with b-size architecture and optional checkpoint.      )   r*      r+   r   r!   r#   r$   r$   r%   build_sam_vit_b-   r'   r2   c                 C   s    t g dg dg ddd| dS )zaBuilds and returns a Mobile Segment Anything Model (Mobile-SAM) for efficient image segmentation.)@         i@  )r0   r0      r0   )r0      r*   
   NT)r   r   r   r   
mobile_samr    r!   r#   r$   r$   r%   build_mobile_sam8   s   r:   c              	   C   &   t dg ddg dg dg d| dS )zlBuilds and returns a Segment Anything Model 2 (SAM2) tiny-size model with specified architecture parameters.`   )r   r0   r   r0   r   )r*   r   	   r1   r7      r   r.   i     r<   r   encoder_stagesr   encoder_global_att_blocksencoder_window_specencoder_backbone_channel_listr    _build_sam2r#   r$   r$   r%   build_sam2_tD      rI   c              	   C   r;   )zeBuilds and returns a small-size Segment Anything Model (SAM2) with specified architecture parameters.r<   )r   r0   r+   r0   r   )r   r8      r>   r@   rB   rG   r#   r$   r$   r%   build_sam2_sQ   rJ   rL   c              
   C   s,   t dg ddg dg dddgg d| dS )	zQBuilds and returns a SAM2 base-size model with specified architecture parameters.p   )r0      r   rN   r0   )r/   r      r>   r?   )i  i     rM   )r   rC   r   rD   rE   encoder_window_spatial_sizerF   r    rG   r#   r$   r$   r%   build_sam2_b^   s   rR   c              	   C   r;   )zeBuilds and returns a large-size Segment Anything Model (SAM2) with specified architecture parameters.   r0   r6   $   r7   r0   )r   !   +   r1   r7   r   r1   i  i@  i   rS   rB   rG   r#   r$   r$   r%   build_sam2_ll   rJ   rZ   Fc                 C   s  d}d}d}|| }	|r t ddd| ||g ddddd	ddd
dnt|| |dttjjdd||dd|d|d}
t|
t||	|	f||fddtdt	d|ddd|dddg dg dd}|durt
|}t|d}t|}W d   n1 sww   Y  || |  |S )aA  
    Builds a Segment Anything Model (SAM) with specified encoder parameters.

    Args:
        encoder_embed_dim (int | List[int]): Embedding dimension for the encoder.
        encoder_depth (int | List[int]): Depth of the encoder.
        encoder_num_heads (int | List[int]): Number of attention heads in the encoder.
        encoder_global_attn_indexes (List[int] | None): Indexes for global attention in the encoder.
        checkpoint (str | None): Path to the model checkpoint file.
        mobile_sam (bool): Whether to build a Mobile-SAM model.

    Returns:
        (SAMModel): A Segment Anything Model instance with the specified architecture.

    Examples:
        >>> sam = _build_sam(768, 12, 12, [2, 5, 8, 11])
        >>> sam = _build_sam([64, 128, 160, 320], [2, 2, 6, 2], [2, 4, 5, 10], None, mobile_sam=True)
       r(   r   rN   i  )r   r   r?   r   g      @g        Fg?)img_sizein_chansnum_classes
embed_dimsdepths	num_headswindow_sizes	mlp_ratio	drop_ratedrop_path_rateuse_checkpointmbconv_expand_ratiolocal_conv_sizelayer_lr_decayr7   gư>)epsTr?   )depth	embed_dimr\   rc   
norm_layerra   
patch_sizeqkv_biasuse_rel_posglobal_attn_indexeswindow_size	out_chans)rl   image_embedding_sizeinput_image_sizemask_in_chansr0   i   r1   )rk   embedding_dimmlp_dimra   )num_multimask_outputstransformertransformer_dimiou_head_depthiou_head_hidden_dim)g33333^@gR]@gRY@)g(\2M@g(\L@g     L@)image_encoderprompt_encodermask_decoder
pixel_mean	pixel_stdNrb)r   r	   r   torchnn	LayerNormr   r   r   r   r   openloadload_state_dicteval)r   r   r   r   r    r9   prompt_embed_dim
image_sizevit_patch_sizert   r~   samf
state_dictr$   r$   r%   r"   y   s   !
r"   r   rT   r0   r   rY   r   rX   c              	   C   st  t t| |||||dtd|ddgdddd}tdd	d
t d}	tdd}
|duo,d|v }td7i d|d|	d|
dddddddddd	dd	dd	dd	dd	d d	d!d	d"d	d#d	d$d	d%d	d&d	d'd	d(d)d*dd+d	d,d-d.|d/|d0|d1td	d2d3d4}|durt|}t	|d5}t
|d6 }W d   n1 sw   Y  || |  |S )8a  
    Builds and returns a Segment Anything Model 2 (SAM2) with specified architecture parameters.

    Args:
        encoder_embed_dim (int): Embedding dimension for the encoder.
        encoder_stages (List[int]): Number of blocks in each stage of the encoder.
        encoder_num_heads (int): Number of attention heads in the encoder.
        encoder_global_att_blocks (List[int]): Indices of global attention blocks in the encoder.
        encoder_backbone_channel_list (List[int]): Channel dimensions for each level of the encoder backbone.
        encoder_window_spatial_size (List[int]): Spatial size of the window for position embeddings.
        encoder_window_spec (List[int]): Window specifications for each stage of the encoder.
        checkpoint (str | None): Path to the checkpoint file for loading pre-trained weights.

    Returns:
        (SAM2Model): A configured and initialized SAM2 model.

    Examples:
        >>> sam2_model = _build_sam2(encoder_embed_dim=96, encoder_stages=[1, 2, 7, 2])
        >>> sam2_model.eval()
    )rl   ra   stagesglobal_att_blocks!window_pos_embed_bkg_spatial_sizewindow_specr[   r0   rN   nearest)d_modelbackbone_channel_listfpn_top_down_levelsfpn_interp_modelr   )trunkneckscalpTr7   )r   pos_enc_at_input
num_layerslayerr3   )out_dimNzsam2.1r~   memory_attentionmemory_encodernum_maskmemr   r   r(   sigmoid_scale_for_mem_encg      4@sigmoid_bias_for_mem_encg      $$use_mask_input_as_output_without_samdirectly_add_no_mem_embeduse_high_res_features_in_sammultimask_output_in_samiou_prediction_use_sigmoiduse_obj_ptrs_in_encoderadd_tpos_enc_to_obj_ptrs"only_obj_ptrs_in_the_past_for_evalpred_obj_scorespred_obj_scores_mlpfixed_no_obj_ptrmultimask_output_for_trackinguse_multimask_token_for_obj_ptrmultimask_min_pt_numr   multimask_max_pt_numuse_mlp_for_obj_ptr_projcompile_image_encoderFno_obj_embed_spatialproj_tpos_enc_in_obj_ptrsuse_signed_tpos_enc_to_obj_ptrssam_mask_decoder_extra_argsg?g\(\?)dynamic_multimask_via_stability!dynamic_multimask_stability_delta"dynamic_multimask_stability_threshr   modelr$   )r   r   r   r   r   r
   r   dictr   r   r   r   r   r   )r   rC   r   rD   rF   rQ   rE   r    r~   r   r   	is_sam2_1sam2r   r   r$   r$   r%   rH      s   
	
#
rH   )zsam_h.ptzsam_l.ptsam_b.ptzmobile_sam.ptz	sam2_t.ptz	sam2_s.ptz	sam2_b.ptz	sam2_l.ptzsam2.1_t.ptzsam2.1_s.ptzsam2.1_b.ptzsam2.1_l.ptr   c                 C   sP   d}t | } t D ]}| |rt|}q
|s$t|  dt  || S )a  
    Builds and returns a Segment Anything Model (SAM) based on the provided checkpoint.

    Args:
        ckpt (str | Path): Path to the checkpoint file or name of a pre-defined SAM model.

    Returns:
        (SAMModel | SAM2Model): A configured and initialized SAM or SAM2 model instance.

    Raises:
        FileNotFoundError: If the provided checkpoint is not a supported SAM model.

    Examples:
        >>> sam_model = build_sam("sam_b.pt")
        >>> sam_model = build_sam("path/to/custom_checkpoint.pt")

    Notes:
        Supported pre-defined models include:
        - SAM: 'sam_h.pt', 'sam_l.pt', 'sam_b.pt', 'mobile_sam.pt'
        - SAM2: 'sam2_t.pt', 'sam2_s.pt', 'sam2_b.pt', 'sam2_l.pt'
    Nz7 is not a supported SAM model. Available models are: 
 )strsam_model_mapkeysendswithgetFileNotFoundError)ckptmodel_builderkr$   r$   r%   	build_samG  s   

r   )N)NF)r   )$	functoolsr   r   ultralytics.utils.downloadsr   modules.decodersr   modules.encodersr   r   r   r	   r
   r   modules.memory_attentionr   r   modules.samr   r   modules.tiny_encoderr   modules.transformerr   r&   r-   r2   r:   rI   rL   rR   rZ   r"   rH   r   r   r$   r$   r$   r%   <module>   sV    








`
`