o
    Ih+                     @  s  d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZmZ d dlmZ d dlmZ e	r>d dlmZ ejed	Zejed
Zeee
eejf   Zeee gef ZejddG dd dZ ejddG dd dZ!ejddG dd dZ"d`ddZ#daddZ$dbddZ%dcd!d"Z&ddd%d&Z'ded+d,Z(dfd2d3Z)dgd5d6Z*dhd9d:Z+dhd;d<Z,did?d@Z-ejG dAdB dBZ.djdJdKZ/dkdMdNZ0G dOdP dPeZ1dldUdVZ2dmd\d]Z3ejddG d^d_ d_Z4dS )n    )annotationsN)Enum)AnyCallableOptionalTYPE_CHECKINGUnion)countersget_metrics_context)	InputType)
OrderedSet)Sequence
perf_hintscudagraph_static_inputsT)frozenc                   @  s   e Zd ZU dZded< dS )
FunctionIDz9Unique counter of a function wrapped in cudagraphify_implintidN__name__
__module____qualname____doc____annotations__ r   r   S/var/www/vscode/kcb/lib/python3.10/site-packages/torch/_inductor/cudagraph_utils.pyr      s   
 r   c                   @  s2   e Zd ZU dZded< ded< ded< ded< d	S )
PlaceholderInfoz
    A serializable version of torch.fx.Node that contains information
    pertinent to placeholder stack traces. We use these in logging and error messages
    related to cudagraphs, and will cache these results.
    strnameOptional[str]stack_tracelist[PlaceholderInfo]usersmutating_use_stack_traceNr   r   r   r   r   r   #   s   
 r   c                   @  sB   e Zd ZU dZded< ded< ded< ded	< d
ed< ded< dS )WrappedFunctionz
    Represents a function that you want to record for CUDA graph replay,
    with a little more metadata so we can identify if we have an applicable
    CUDA graph in our CUDA graph tree for it.
    zCallable[..., Any]modelSequence[int]static_input_idxsr   r   ztuple[torch.Tensor, ...]	constantsSequence[PlaceholderInfo]placeholdersmutated_input_idxsNr   r   r   r   r   r$   2   s   
 r$   placeholder_nodetorch.fx.Nodereturnr   c                 C  sb   t | jdkrtt| jjdd S | jD ]}|jtjj	j
jkr.|jdd  }r.|  S qd S )N   r    )lenr"   nextitermetagettargettorchopsatencopy_default)r,   user    r   r   r   &get_mutating_use_stack_trace_from_nodeB   s   
r<   placeholder_infoc                 C  s   | j S N)r#   )r=   r   r   r   get_mutating_use_stack_traceQ   s   r?   c                 C  sL   | j }| jdd }g }d }| jdkrdd | jD }t| }t||||S )Nr    placeholderc                 S  s   g | ]}t |qS r   )to_placeholder_info.0ir   r   r   
<listcomp>\       z'to_placeholder_info.<locals>.<listcomp>)r   r3   r4   opr"   r<   r   )r,   r   r    r"   r#   r   r   r   rA   U   s   
rA   graphtorch.fx.Graphr!   c                 C  s   dd | j D S )Nc                 S  s   g | ]}|j d krt|qS )r@   )rG   rA   )rC   noder   r   r   rE   e   s    z(get_placeholder_info.<locals>.<listcomp>)nodes)rH   r   r   r   get_placeholder_infod   s   rL   reasonr   c                 C  s
   d|  S )Nzskipping cudagraphs due to r   )rM   r   r   r   format_default_skip_messagej   s   
rN   r*   r)   mutation_indicesr&   c                 C  sP   d}|D ]}| | }t | }r nqtdt| d}|r&| d| S |S )N zmutated inputs (z instances). Found from : 
 )r?   rN   r0   )r*   rO   r    idxr@   msgr   r   r   get_mutation_stack_tracen   s   rT   funcinputslist[InputType]is_cuda_graph_recorded_tensorCallable[[torch.Tensor], bool]c                   sZ   t jjjjr fdd jD }n j}td j td| |r+t	 j
|S d S )Nc                   s&   g | ]}| j v s| s|qS r   )r'   rC   rR   rU   rV   rX   r   r   rE      s    

z&check_for_mutation.<locals>.<listcomp>z'check mutation static input indices: %sz#check mutation mutation indices: %s)r6   	_inductorconfigtritoncudagraph_treesr+   static_inputs_logdebugr'   rT   r*   )rU   rV   rX   rO   r   r[   r   check_for_mutation   s   	rb   rJ   c                 C  s*   | j D ]}|jdd  }r|  S qd S )Nr    )r"   r3   r4   )rJ   r;   r    r   r   r   _get_use_stack_trace   s
   
rc   device_node_mapping!dict[torch.device, torch.fx.Node]c                 C  s   |  td }r$d|j d}t| }r t| d| S t|S t| dkr7tt| 	 j
dkr7d S dd | 	 D }td	d
| S )Ncpuzcpu device ()rQ   r/   cudac                 s  s    | ]}t |V  qd S r>   )repr)rC   keyr   r   r   	<genexpr>   s    z:check_multiple_devices_or_any_cpu_nodes.<locals>.<genexpr>zmultiple devices: z, )r4   r6   devicer   rc   rN   r0   r1   r2   keystypejoin)rd   cpu_noderS   r    	keys_reprr   r   r   'check_multiple_devices_or_any_cpu_nodes   s   rr   c                 C  s   t | S r>   )rr   )rd   r   r   r    check_lowering_disable_cudagraph   s   rs   rS   Nonec                 C  sD   t |  td d  d7  < t }| r |jd| dd d S d S )Ninductorcudagraph_skipsr/   cudagraph_skip_reasonT)	overwrite)perf_hint_logwarningr	   r
   in_progressset)rS   metrics_contextr   r   r   #log_cudagraph_skip_and_bump_counter   s   
r~   c                   @  s    e Zd ZU ded< d	ddZdS )
BoxedDeviceIndexOptional[int]value
device_idxr.   rt   c                 C  s    |d u st |tsJ || _d S r>   )
isinstancer   r   )selfr   r   r   r   r|      s   
zBoxedDeviceIndex.setN)r   r   r.   rt   )r   r   r   r   r|   r   r   r   r   r      s   
 r   gmtorch.fx.GraphModulemutated_inputsOrderedSet[str]r+   OrderedSet[int]r'   c                   sn   t d}tjjjjr+t|  fdd|D }t|dk}|s!d S t| j	}t
||S t|dk}|s5d S |S )Nzmutated inputsc                   s   g | ]}| vr|qS r   r   rZ   unique_idxsr   r   rE      s    zGcheck_for_mutation_ignore_cuda_graph_managed_tensor.<locals>.<listcomp>r   )rN   r6   r\   r]   r^   r_   r   r0   rL   rH   rT   )r   r   r+   r'   default_msgrO   has_mutationr*   r   r   r   3check_for_mutation_ignore_cuda_graph_managed_tensor   s   

r   r@   c                 C  s,   | j r| j S | jD ]
}|j r|j   S q	dS )zM
    Gets the first non-empty stack trace of a placeholder or its users.
    N)r    r"   )r@   userr   r   r   get_placeholder_stack_trace   s   

r   c                   @  s&   e Zd ZdZdZdZdZd
ddZd	S )CheckInvariantStatusr/            r.   r   c                 C  s<   | j dkrdS | j dkrdS | j dkrdS | j  d| j S )NCudagraphManagedIdxMismatchz-cudagraph managed tensor data pointer changedStaticInputIdxMismatchz!static input data pointer changed&ExpectedDeadIndicesBeforeGraphMismatchz+expected dead indices before graph are livez: )r   r   )r   r   r   r   __str__  s   


zCheckInvariantStatus.__str__Nr.   r   )r   r   r   SUCCESSr   r   r   r   r   r   r   r   r      s    r   recorded_data_ptrSequence[Optional[int]]target_idxsmismatchc                   s   t  t krt  t | ksJ d fdd|D }fdd|D }| d}tt||D ]2\}\}	}
t|	tjs@J || }|	 |
krd| | }| d|j d|
 d|	  d	t| d

}q2|S )z}
    Logs the mismatch between input data pointers and recorded data pointers.
    This checks only idxs in target_idxs.
    zClength mismatch between inputs, recorded_data_ptr, and placeholdersc                      g | ]} | qS r   r   rB   )rV   r   r   rE     rF   z)log_data_ptr_mismatch.<locals>.<listcomp>c                   r   r   r   rB   )r   r   r   rE      rF   z.
zinput name: z. data pointer changed from z to z. input stack trace: 
)	r0   	enumeratezipr   r6   Tensordata_ptrr   r   )r*   rV   r   r   r   	t_tensorst_data_ptrs	error_msgrD   tensorr   indexr@   r   )rV   r   r   log_data_ptr_mismatch  s*   "
r   fn_cache)dict[tuple[int, ...], Callable[..., Any]]new_int_keyr   boolc                   sN   t |  d  d fdd}tjjjjr% tjjjjkr%t|  dS dS )	Nr/   r.   r   c                     s   d  dS )NzCUDAGraph supports dynamic shapes by recording a new graph for each distinct input size. Recording too many CUDAGraphs may lead to extra overhead. We have observed a0   distinct sizes. Please consider the following options for better performance: a) padding inputs to a few fixed number of shapes; or b) set torch._inductor.config.triton.cudagraph_skip_dynamic_graphs=True. Set torch._inductor.config.triton.cudagraph_dynamic_shape_warn_limit=None to silence this warning.r   r   num_cudagraphsr   r   warn_msg5  s   z4maybe_warning_due_to_dynamic_shape.<locals>.warn_msgTFr   )	r0   rm   r6   r\   r]   r^   "cudagraph_dynamic_shape_warn_limitry   rz   )r   r   r   r   r   r   "maybe_warning_due_to_dynamic_shape/  s   

r   c                   @  s*   e Zd ZU dZded< ded< ded< dS )	CudagraphCachedInfoz'
    Info needed to realign inputs
    r)   r*   zlist[Optional[str]]stack_tracesz	list[str]cudagraph_fail_reasonsNr   r   r   r   r   r   L  s
   
 r   )r,   r-   r.   r   )r=   r   r.   r   )r,   r-   r.   r   )rH   rI   r.   r!   )rM   r   r.   r   )r*   r)   rO   r&   r.   r   )rU   r$   rV   rW   rX   rY   r.   r   )rJ   r-   r.   r   )rd   re   r.   r   )rS   r   r.   rt   )
r   r   r   r   r+   r   r'   r&   r.   r   )r@   r   r.   r   )r*   r)   rV   rW   r   r   r   r&   r   r   r.   r   )r   r   r   r   r.   r   )5
__future__r   dataclassesenumr   typingr   r   r   r   r   r6   torch._dynamo.utilsr	   r
   torch._inductor.utilsr   torch.utils._ordered_setr   collections.abcr   _logginggetArtifactLoggerr   ry   r`   listr   r   
OutputType	ModelType	dataclassr   r   r$   r<   r?   rA   rL   rN   rT   rb   rc   rr   rs   r~   r   r   r   r   r   r   r   r   r   r   r   <module>   sT   


















