o
    Ih8                     @   s  d dl mZ d dlZd dlm  mZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZ d d
lmZmZ dd Zd ddZdejfddZdejfddZdd Zdd Z dd Z!dd Z"e! Z#e" Z$dd Z%e% Z&dd Z'dS )!    )OptionalN)_prims)DispatchKey)autograd_not_implemented)HigherOrderOperator)CUDARngStateHelpermake_contiguous_strides_for)FakeTensorMode)disable_proxy_modes_tracingProxyTorchDispatchModetrack_tensor_tree)_device_dtypec                 C   s"   t d| j d| j d| j d)Nz"You are trying to functionalize a z RNG operator but zE does not use Philox/counter-based RNG. Therefore, functionalizing a zo RNG operator is not supported. We are discussing the possibility of a Philox-based RNG implementation for CPU.)RuntimeErrortype)device r   J/var/www/vscode/kcb/lib/python3.10/site-packages/torch/_prims/rng_prims.pythrow_on_non_cuda   s
   r   c           
      C   s|   t jjd|  |d|d}|| tt jjj| }|j}|r!||_	||fD ]}	||	_
t jjj|	_| | |	_||	_||	_q%d S )Nz
rngprims::r   )mutates_argsschema)torchlibrary	custom_opregister_fakegetattr_opsopsrngprimsdefault_tags__doc___prims_commonRETURN_TYPENEWreturn_typer   	impl_atenprim_meta_impl)
namer   r&   	impl_metadoctagsrngprim_defprim_packetprimpr   r   r   register_rng_prim   s   

r0   shapec                 C   s   t tjdtjdS )Nr   dtype)r   
TensorLiker   tensorint64)r1   r   r   r   philox_rand_offset_meta2   s   r7   c                 C   s   d}| D ]}||9 }qt j|t jd}d}d}d}t jt j }|j| }|| d | }	t|	|j| }	|d ||	 |  d | }
|
S )N   r2         )	r   scalar_tensorr6   cudaget_device_propertiescurrent_devicemax_threads_per_multi_processorminmulti_processor_count)r1   numel_scalardim_sizenumel
block_sizeunrollcurand4_engine_callsdevice_propertyblocks_per_sm	grid_sizeoffsetr   r   r   philox_rand_offset8   s   

rL   c                  C   s   d} d}dt jdt jdt jdtttdf  dtd	tfd
d}dt jdt jdt jdtttdf  dtd	tfdd}t| |||dt j	j
fd d S )Nphilox_randz{(SymInt[] size, Tensor seed, Tensor offset, int[]? stride, Device? device=None, ScalarType? dtype=None) -> (Tensor, Tensor)r1   seedrK   stride.r   r3   c                 S   s6   |d u sJ t | }tj| |||d}t| }||fS )N)r1   stridesr3   r   )r   r   
TensorMetar7   )r1   rN   rK   rO   r   r3   random_valuesr   r   r   _philox_rand_metaT   s   	z/register_philox_rand.<locals>._philox_rand_metac                 S   s   |d u sJ |j dkrg }n|g}|j dkrt|tj| t|| tj| ||d}W d    n1 s9w   Y  |t| fS )Ncpur<   )r   r3   )	r   r   r   randomfork_rngr   set_torch_state_tensorrandrL   )r1   rN   rK   rO   r   r3   devicesrR   r   r   r   _philox_rande   s   	

z*register_philox_rand.<locals>._philox_randz$Philox based stateless rand operator)r(   r   r&   r)   r*   r+   )r   SizeTensorr   tupleintr   r   r0   Tagnondeterministic_seeded)r(   r   rS   rZ   r   r   r   register_philox_randP   sH   


ra   c                 C   s   | dr| d}t|trt|}|jS dd | D }tdd |D r)dS tdd |D r4dS td	d |D r?d
S tdd |D rJdS d S )Nr   c                 S   s    h | ]}t |tjr|jjqS r   )
isinstancer   r\   r   r   ).0argr   r   r   	<setcomp>   s     zget_device.<locals>.<setcomp>c                 s       | ]}|d kV  qdS )r<   Nr   rc   devr   r   r   	<genexpr>       zget_device.<locals>.<genexpr>r<   c                 s   rf   )xpuNr   rg   r   r   r   ri      rj   rk   c                 s   rf   )hpuNr   rg   r   r   r   ri      rj   rl   c                 s   rf   )rT   Nr   rg   r   r   r   ri      rj   rT   )getrb   strr   r   r   any)argskwargsr   rY   r   r   r   
get_device   s   



rr   c                     s   G dd dt } |  tjtdd tjdd tjdd tjd	d
 tjdd tj	fdd t
 fdd}t fdd}S )Nc                       (   e Zd Z fddZ fddZ  ZS )z>register_run_and_save_rng_state_op.<locals>.RunAndSaveRngStatec                       t  d d S )Nrun_and_save_rng_statesuper__init__self	__class__r   r   rx         zGregister_run_and_save_rng_state_op.<locals>.RunAndSaveRngState.__init__c                    s   t  j|g|R i |S Nrw   __call__)rz   oprp   rq   r{   r   r   r      s   zGregister_run_and_save_rng_state_op.<locals>.RunAndSaveRngState.__call____name__
__module____qualname__rx   r   __classcell__r   r   r{   r   RunAndSaveRngState       r   Tdeferred_errorc                 _      t j | |i |fS r~   )r   r<   get_rng_stater   rp   rq   r   r   r   	impl_cuda      z5register_run_and_save_rng_state_op.<locals>.impl_cudac                 _   s   t  | |i |fS r~   )r   r   r   r   r   r   impl_cpu   s   z4register_run_and_save_rng_state_op.<locals>.impl_cpuc                 _   s*   t tdrtj | |i |fS tdNrl   z2functionalize a hpu RNG operator is not supported.)hasattrr   rl   r   r   r   r   r   r   impl_hpu   s   
z4register_run_and_save_rng_state_op.<locals>.impl_hpuc                 _   r   r~   )r   rk   r   r   r   r   r   impl_xpu   r   z4register_run_and_save_rng_state_op.<locals>.impl_xpuc                    sL    d}t ||}||v sJ d| || }|| g|R i |S N)r<   rT   rl   rk   zBackend not supported for rr   )r   rp   rq   impl_mapr   implr   r   r   r   r   r   impl_backend_select   s   
z?register_run_and_save_rng_state_op.<locals>.impl_backend_selectc                    s>   |   |g|R i |W  d    S 1 sw   Y  d S r~   r   )moder   rp   rq   )r   r   r   impl_fake_tensor_mode   s   $zAregister_run_and_save_rng_state_op.<locals>.impl_fake_tensor_modec                    sb    |g|R i |}t | jj|g|R }t | jj|}| jd||}t||d | jdS Ncall_functionconstanttracer)pytreetree_mapr   unwrap_proxycreate_proxyr   )r   r   rp   rq   out
proxy_argsproxy_kwargs	out_proxy)r   ru   r   r   impl_proxy_dispatch_mode   s   zDregister_run_and_save_rng_state_op.<locals>.impl_proxy_dispatch_mode)r   py_implr   Autogradr   CUDACPUHPUXPUBackendSelectr	   r   )r   r   r   r   )r   r   r   r   r   ru   r   "register_run_and_save_rng_state_op   s(   










	r   c                     s   G dd dt } |  tjtdd tjdd tjdd  tjd	d
 tjdd t	fdd}tj
 fdd}tdd }jfdd}S )Nc                       rs   )z7register_run_with_rng_state_op.<locals>.RunWithRngStatec                    rt   )Nrun_with_rng_staterv   ry   r{   r   r   rx      r}   z@register_run_with_rng_state_op.<locals>.RunWithRngState.__init__c                    s   t  j||g|R i |S r~   r   )rz   	rng_stater   rp   rq   r{   r   r   r      s   z@register_run_with_rng_state_op.<locals>.RunWithRngState.__call__r   r   r   r{   r   RunWithRngState   r   r   Tr   c                 _   s8   t j }t j|   ||i |}t j| |S r~   )r   r<   r   set_rng_staterT   r   r   rp   rq   current_stater   r   r   r   r      s
   
z1register_run_with_rng_state_op.<locals>.impl_cudac                 _   s.   t  }t |  ||i |}t | |S r~   )r   r   r   r   r   r   r   r      s
   

z0register_run_with_rng_state_op.<locals>.impl_cpuc                 _   sF   t tdrtj }tj|  ||i |}tj| |S tdr   )r   r   rl   r   r   r   r   r   r   r   r      s   

z0register_run_with_rng_state_op.<locals>.impl_hpuc                 _   s4   t j }t j|  ||i |}t j| |S r~   )r   rk   r   r   r   r   r   r   r     s
   
z0register_run_with_rng_state_op.<locals>.impl_xpuc           	         s   t    ||g|R i |}W d    n1 sw   Y  t| jj||g|R }t| jj|}| jd ||}t||d | jdS r   r
   r   r   r   r   r   r   )	r   r   r   rp   rq   r   r   r   r   r   r   r   r   
  s   z@register_run_with_rng_state_op.<locals>.impl_proxy_dispatch_modec                    sN    d}t ||}||v sJ d| || }|| |g|R i |S r   r   )r   r   rp   rq   r   r   r   r   r   r   r     s   
z;register_run_with_rng_state_op.<locals>.impl_backend_selectc                 _   s6   |  ||i |W  d    S 1 sw   Y  d S r~   r   )r   r   r   rp   rq   r   r   r   r   $  s   $z=register_run_with_rng_state_op.<locals>.impl_fake_tensor_modec           	         sl   |  |}|  |}|  |}|    ||g|R i |}| |W  d    S 1 s/w   Y  d S r~   unwrap_tensorsredispatch_to_nextwrap_tensors)	ctxr   r   rp   rq   unwrapped_rng_stateunwrapped_argsunwrapped_kwargsr   r   r   r   impl_functional+  s   



$z7register_run_with_rng_state_op.<locals>.impl_functional)r   r   r   r   r   r   r   r   r   r   r   r	   py_functionalize_impl)r   r   r   r   r   r   )r   r   r   r   r   r   register_run_with_rng_state_op   s,   








	


r   c                     s   G dd dt } |    tjt dd  tjd ddd tjd dfdd	
} td dd
d} td d fdd
} j	d d fdd
} S )Nc                       s.   e Zd Z fddZdd fdd
Z  ZS )zJregister_graphsafe_run_with_rng_state_op.<locals>.GraphSafeRunWithRngStatec                    rt   )Ngraphsafe_run_with_rng_staterv   ry   r{   r   r   rx   @  r}   zSregister_graphsafe_run_with_rng_state_op.<locals>.GraphSafeRunWithRngState.__init__Nr   c                   s   t  j|g|R d|i|S Nr   r   )rz   r   r   rp   rq   r{   r   r   r   C  s   zSregister_graphsafe_run_with_rng_state_op.<locals>.GraphSafeRunWithRngState.__call__r   r   r   r{   r   GraphSafeRunWithRngState?  s    r   Tr   r   c                _   sB   |j j}tjj| }| }|| | |i |}|| |S r~   )r   indexr   r<   default_generatorsgraphsafe_get_stategraphsafe_set_state)r   r   rp   rq   
device_idx	generatorr   r   r   r   r   r   L  s   

z;register_graphsafe_run_with_rng_state_op.<locals>.impl_cudac                   s:   t ||}|dksJ d|  | g|R d|i|S )Nr<   z6GraphSafe RNG operations only supported for CUDA, got r   r   )r   r   rp   rq   r   )r   r   r   r   V  s
   

zEregister_graphsafe_run_with_rng_state_op.<locals>.impl_backend_selectc                _   s6   |  ||i |W  d    S 1 sw   Y  d S r~   r   )r   r   r   rp   rq   r   r   r   r   ^  s   $zGregister_graphsafe_run_with_rng_state_op.<locals>.impl_fake_tensor_modec          	         s   t    |g|R d|i|}W d    n1 sw   Y  t| jj|g|R }t| jjd|i|}| jd ||}t||d | jdS )Nr   r   r   r   )	r   r   r   rp   rq   r   r   r   r   r   r   r   r   c  s   zJregister_graphsafe_run_with_rng_state_op.<locals>.impl_proxy_dispatch_modec          	         sz   |d ur	|  |nd }|  |}|  |}|    |g|R d|i|}| |W  d    S 1 s6w   Y  d S r   r   )	r   r   r   rp   rq   r   r   r   r   r   r   r   r   p  s    


$zAregister_graphsafe_run_with_rng_state_op.<locals>.impl_functional)
r   r   r   r   r   r   r   r	   r   r   )r   r   r   r   r   r   )r   r   r   (register_graphsafe_run_with_rng_state_op>  s    



	r   c                   C   s
   t   d S r~   )ra   r   r   r   r   register_rng_prims  s   
r   r~   )(typingr   r   torch.utils._pytreeutils_pytreer   r   torch._Cr   torch._higher_order_ops.utilsr   
torch._opsr   torch._prims_commonr   r   torch._subclasses.fake_tensorr	   "torch.fx.experimental.proxy_tensorr
   r   r   torch.typesr   r   r   r0   r[   r7   rL   ra   rr   r   r   ru   r   r   r   r   r   r   r   r   <module>   s8   


7@`C