o
    Vñhã0  ã                   @   s<  d dl mZ d dlZd dlmZ ddlmZ ddlmZm	Z	m
Z
mZmZmZ ddlmZmZ ddlmZmZ dd	lmZ dd
lmZ d+dd„Zd+dd„Zd+dd„Zd+dd„Zd+dd„Zd+dd„Zd+dd„Zd+dd„Z		d,dd„Z dg d¢d g d!¢g d"¢d#d#gg d$¢dfd%d&„Z!eeeeeeeeeeeed'œZ"d-d)d*„Z#dS ).é    )ÚpartialN)Úattempt_download_asseté   )ÚMaskDecoder)ÚFpnNeckÚHieraÚImageEncoderÚImageEncoderViTÚMemoryEncoderÚPromptEncoder)ÚMemoryAttentionÚMemoryAttentionLayer)Ú	SAM2ModelÚSAMModel)ÚTinyViT)ÚTwoWayTransformerc                 C   ó   t dddg d¢| dS )zaBuilds and returns a Segment Anything Model (SAM) h-size model with specified encoder parameters.é   é    é   ©é   é   é   é   ©Úencoder_embed_dimÚencoder_depthÚencoder_num_headsÚencoder_global_attn_indexesÚ
checkpoint©Ú
_build_sam©r    © r$   úP/var/www/vscode/kcb/lib/python3.10/site-packages/ultralytics/models/sam/build.pyÚbuild_sam_vit_h   ó   ûr&   c                 C   r   )zaBuilds and returns a Segment Anything Model (SAM) l-size model with specified encoder parameters.é   é   r   )é   é   é   r   r   r!   r#   r$   r$   r%   Úbuild_sam_vit_l"   r'   r-   c                 C   s   t dddg d¢| dS )zgConstructs and returns a Segment Anything Model (SAM) with b-size architecture and optional checkpoint.é   é   )é   r*   é   r+   r   r!   r#   r$   r$   r%   Úbuild_sam_vit_b-   r'   r2   c                 C   s    t g d¢g d¢g d¢dd| dS )zaBuilds and returns a Mobile Segment Anything Model (Mobile-SAM) for efficient image segmentation.)é@   é€   é    i@  )r0   r0   é   r0   )r0   é   r*   é
   NT)r   r   r   r   Ú
mobile_samr    r!   r#   r$   r$   r%   Úbuild_mobile_sam8   s   úr:   c              	   C   ó&   t dg d¢dg d¢g d¢g d¢| dS )zlBuilds and returns a Segment Anything Model 2 (SAM2) tiny-size model with specified architecture parameters.é`   )r   r0   r   r0   r   )r*   r   é	   ©r1   r7   é   r   ©r.   i€  éÀ   r<   ©r   Úencoder_stagesr   Úencoder_global_att_blocksÚencoder_window_specÚencoder_backbone_channel_listr    ©Ú_build_sam2r#   r$   r$   r%   Úbuild_sam2_tD   ó   ùrI   c              	   C   r;   )zeBuilds and returns a small-size Segment Anything Model (SAM2) with specified architecture parameters.r<   )r   r0   r+   r0   r   )r   r8   é   r>   r@   rB   rG   r#   r$   r$   r%   Úbuild_sam2_sQ   rJ   rL   c              
   C   s,   t dg d¢dg d¢g d¢ddgg d¢| dS )	zQBuilds and returns a SAM2 base-size model with specified architecture parameters.ép   )r0   é   r   rN   r0   )r/   r   é   r>   r?   )i€  iÀ  éà   rM   )r   rC   r   rD   rE   Úencoder_window_spatial_sizerF   r    rG   r#   r$   r$   r%   Úbuild_sam2_b^   s   ørR   c              	   C   r;   )zeBuilds and returns a large-size Segment Anything Model (SAM2) with specified architecture parameters.é   ©r0   r6   é$   r7   r0   )r   é!   é+   ©r1   r7   r   r1   ©i€  i@  i   rS   rB   rG   r#   r$   r$   r%   Úbuild_sam2_ll   rJ   rZ   Fc                 C   s  d}d}d}|| }	|r t ddd| ||g d¢dddd	ddd
dnt|| |dttjjdd||dd|d|d}
t|
t||	|	f||fddtdt	d|ddd|dddg d¢g d¢d}|durt
|ƒ}t|dƒ}t |¡}W d  ƒ n1 sww   Y  | |¡ | ¡  |S )aA  
    Builds a Segment Anything Model (SAM) with specified encoder parameters.

    Args:
        encoder_embed_dim (int | List[int]): Embedding dimension for the encoder.
        encoder_depth (int | List[int]): Depth of the encoder.
        encoder_num_heads (int | List[int]): Number of attention heads in the encoder.
        encoder_global_attn_indexes (List[int] | None): Indexes for global attention in the encoder.
        checkpoint (str | None): Path to the model checkpoint file.
        mobile_sam (bool): Whether to build a Mobile-SAM model.

    Returns:
        (SAMModel): A Segment Anything Model instance with the specified architecture.

    Examples:
        >>> sam = _build_sam(768, 12, 12, [2, 5, 8, 11])
        >>> sam = _build_sam([64, 128, 160, 320], [2, 2, 6, 2], [2, 4, 5, 10], None, mobile_sam=True)
    é   r(   r   rN   iè  )r   r   r?   r   g      @g        Fgš™™™™™é?)Úimg_sizeÚin_chansÚnum_classesÚ
embed_dimsÚdepthsÚ	num_headsÚwindow_sizesÚ	mlp_ratioÚ	drop_rateÚdrop_path_rateÚuse_checkpointÚmbconv_expand_ratioÚlocal_conv_sizeÚlayer_lr_decayr7   gíµ ÷Æ°>)ÚepsTr?   )ÚdepthÚ	embed_dimr\   rc   Ú
norm_layerra   Ú
patch_sizeÚqkv_biasÚuse_rel_posÚglobal_attn_indexesÚwindow_sizeÚ	out_chans)rl   Úimage_embedding_sizeÚinput_image_sizeÚmask_in_chansr0   i   r1   )rk   Úembedding_dimÚmlp_dimra   )Únum_multimask_outputsÚtransformerÚtransformer_dimÚiou_head_depthÚiou_head_hidden_dim)g33333ë^@gR¸…ë]@gR¸…ëáY@)gÃõ(\2M@gÂõ(\L@g     °L@)Úimage_encoderÚprompt_encoderÚmask_decoderÚ
pixel_meanÚ	pixel_stdNÚrb)r   r	   r   ÚtorchÚnnÚ	LayerNormr   r   r   r   r   ÚopenÚloadÚload_state_dictÚeval)r   r   r   r   r    r9   Úprompt_embed_dimÚ
image_sizeÚvit_patch_sizert   r~   ÚsamÚfÚ
state_dictr$   r$   r%   r"   y   s†   ðòôî!üüöëÿ
r"   r   rT   r0   r   rY   r   rX   c              	   C   st  t t| |||||dtd|ddgdddd}tdd	d
tƒ d}	tdd}
|duo,d|v }td7i d|“d|	“d|
“dd“dd“dd“dd“dd	“dd	“dd	“dd	“dd	“d d	“d!d	“d"d	“d#d	“d$d	“d%d	“d&d	“d'd	“d(d)“d*d“d+d	“d,d-“d.|“d/|“d0|“d1td	d2d3d4“Ž}|dur´t|ƒ}t	|d5ƒ}t
 |¡d6 }W d  ƒ n1 sªw   Y  | |¡ | ¡  |S )8a  
    Builds and returns a Segment Anything Model 2 (SAM2) with specified architecture parameters.

    Args:
        encoder_embed_dim (int): Embedding dimension for the encoder.
        encoder_stages (List[int]): Number of blocks in each stage of the encoder.
        encoder_num_heads (int): Number of attention heads in the encoder.
        encoder_global_att_blocks (List[int]): Indices of global attention blocks in the encoder.
        encoder_backbone_channel_list (List[int]): Channel dimensions for each level of the encoder backbone.
        encoder_window_spatial_size (List[int]): Spatial size of the window for position embeddings.
        encoder_window_spec (List[int]): Window specifications for each stage of the encoder.
        checkpoint (str | None): Path to the checkpoint file for loading pre-trained weights.

    Returns:
        (SAM2Model): A configured and initialized SAM2 model.

    Examples:
        >>> sam2_model = _build_sam2(encoder_embed_dim=96, encoder_stages=[1, 2, 7, 2])
        >>> sam2_model.eval()
    )rl   ra   ÚstagesÚglobal_att_blocksÚ!window_pos_embed_bkg_spatial_sizeÚwindow_specr[   r0   rN   Únearest)Úd_modelÚbackbone_channel_listÚfpn_top_down_levelsÚfpn_interp_modelr   )ÚtrunkÚneckÚscalpTr7   )r–   Úpos_enc_at_inputÚ
num_layersÚlayerr3   )Úout_dimNzsam2.1r~   Úmemory_attentionÚmemory_encoderÚnum_maskmemr   rŒ   r(   Úsigmoid_scale_for_mem_encg      4@Úsigmoid_bias_for_mem_encg      $ÀÚ$use_mask_input_as_output_without_samÚdirectly_add_no_mem_embedÚuse_high_res_features_in_samÚmultimask_output_in_samÚiou_prediction_use_sigmoidÚuse_obj_ptrs_in_encoderÚadd_tpos_enc_to_obj_ptrsÚ"only_obj_ptrs_in_the_past_for_evalÚpred_obj_scoresÚpred_obj_scores_mlpÚfixed_no_obj_ptrÚmultimask_output_for_trackingÚuse_multimask_token_for_obj_ptrÚmultimask_min_pt_numr   Úmultimask_max_pt_numÚuse_mlp_for_obj_ptr_projÚcompile_image_encoderFÚno_obj_embed_spatialÚproj_tpos_enc_in_obj_ptrsÚuse_signed_tpos_enc_to_obj_ptrsÚsam_mask_decoder_extra_argsgš™™™™™©?g\Âõ(\ï?)Údynamic_multimask_via_stabilityÚ!dynamic_multimask_stability_deltaÚ"dynamic_multimask_stability_threshrƒ   Úmodelr$   )r   r   r   r   r   r
   r   Údictr   r‡   r„   rˆ   r‰   rŠ   )r   rC   r   rD   rF   rQ   rE   r    r~   r¡   r¢   Ú	is_sam2_1Úsam2r   r   r$   r$   r%   rH   Ø   s²   úüñ
ÿþýüûúùø	÷
öõôóòñðïîíìëêéèçæåýä#ÿ
rH   )zsam_h.ptzsam_l.ptúsam_b.ptzmobile_sam.ptz	sam2_t.ptz	sam2_s.ptz	sam2_b.ptz	sam2_l.ptzsam2.1_t.ptzsam2.1_s.ptzsam2.1_b.ptzsam2.1_l.ptrÂ   c                 C   sP   d}t | ƒ} t ¡ D ]}|  |¡rt |¡}q
|s$t| › dt ¡ › ƒ‚|| ƒS )aÊ  
    Builds and returns a Segment Anything Model (SAM) based on the provided checkpoint.

    Args:
        ckpt (str | Path): Path to the checkpoint file or name of a pre-defined SAM model.

    Returns:
        (SAMModel | SAM2Model): A configured and initialized SAM or SAM2 model instance.

    Raises:
        FileNotFoundError: If the provided checkpoint is not a supported SAM model.

    Examples:
        >>> sam_model = build_sam("sam_b.pt")
        >>> sam_model = build_sam("path/to/custom_checkpoint.pt")

    Notes:
        Supported pre-defined models include:
        - SAM: 'sam_h.pt', 'sam_l.pt', 'sam_b.pt', 'mobile_sam.pt'
        - SAM2: 'sam2_t.pt', 'sam2_s.pt', 'sam2_b.pt', 'sam2_l.pt'
    Nz7 is not a supported SAM model. Available models are: 
 )ÚstrÚsam_model_mapÚkeysÚendswithÚgetÚFileNotFoundError)ÚckptÚmodel_builderÚkr$   r$   r%   Ú	build_samG  s   

€rÌ   )N)NF)rÂ   )$Ú	functoolsr   r„   Úultralytics.utils.downloadsr   Úmodules.decodersr   Úmodules.encodersr   r   r   r	   r
   r   Úmodules.memory_attentionr   r   Úmodules.samr   r   Úmodules.tiny_encoderr   Úmodules.transformerr   r&   r-   r2   r:   rI   rL   rR   rZ   r"   rH   rÄ   rÌ   r$   r$   r$   r%   Ú<module>   sV    








ú`
ø`ô