o
    0h|\                    @   s&  U d dl Z d dlZd dlmZ d dlmZ d dlmZmZ d dl	m
Z
mZ d dl	Z	d dlZd dlm  mZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& d dl'm(Z( d dl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7 d dl8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z? d dl@mAZA d dlBZBe jjCe jDjjEjCe jDjjFjCe jDjjGjCe jDjjGjCgZHeIeA eJd< e jjKe jjLe jjMe jDjjGjNe jDjjGjNhZOePeQ eJd< g e
Rdd eHD ZSeIeQ eJd< dd eSD ZSi ZTeUeQeVf eJd< eHD ](ZWeWjXD ]!ZYeZeWeYZ[eWj\]dd]ddZ^e[eTvr2e^ deY eTe[< qqeddZ_G dd deZCdd Z`G d d! d!ZaG d"d# d#ZbG d$d% d%eZcG d&d' d'ZdG d(d) d)Zed*d+ Zfd,d- Zgd.d/ Zhd0d1 Zid2d3 Zjd4d5 Zkd6d7 Zld8d9 Zmd:d; Znd<d= Zod>d? Zpd@dA ZqdBdC ZrdDdE ZsdFdG ZtdHdI ZudJdK ZvdLdM ZwdNdO ZxdPdQ ZydRdS ZzdTdU Z{dVdW Z|dXdY Z}dZd[ Z~d\d] Zd^d_ Zd`da Zdbdc Zddde Zdfdg Zdhdi Zdjdk Zdldm Zdndo Zdpdq Zdrds Zdtdu Zdvdw Zdxdy Zdzd{ Zd|d} Zd~d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZddÄ Zddń ZddǄ ZddɄ Zdd˄ Zdd̈́ Zddτ Zddф Zddӄ ZddՄ Zddׄ Zddل ZdUdd܄Zddބ Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Ze(ejddeoe= dd e(ejddeo*e= dd e(ejddeo9e= dd e(ejddeoGe=e jfdde(e&dgdddde(e&dgdd	ddfZѐd
d ZҐdd ZӐdd ZԐdd ZՐdd Z֐dd Ze jj٠ڡ oe jj٠ېdd Zg eee jjeve(e%fdeee jje<ewe(e%e(ejddeBߐddfdeee jje<exe(eddde(e%fdeee jjeydeee jje<ezdeee jje<e{e(eddde(e%fdeee jjesdeee jjete(ejddeBߐddd e(e;de jgdd fdeee jjeue<e(eddde(e%fd!eee jjd"e|e(ejϐd#d$eBߐdde(ejϐd#d%eBߐddfd&eee jjd"e}e(eddeBߐdde(ejϐd#d$eBߐdde(ejϐd#d%eBߐddfd&eee jjd"e~e(e%e(ejϐd#d$eBߐdde(ejϐd#d%eBߐddfd&eee jjee(ejdd'de jgd(fdeee jjeed)dڐd*e<d"e(e!d+d,dde(e"dde jgd-e(e;ddde jgd(e(e;ddde jgd(fe(e#e jd.iddfd/eee jjeed0dڐd*e<d"e(e!d+d,dde(e"dde jgd-e(ejddde jgd(e(ejddde je jgd(e(e;ddde jgd(e(e;ddde jgd(fe(e#e jd.iddfd/eee jjeed1dڐd*e<d"e(e!d2d,dde(e"dde jgd-e(e%dde(ejddfe(e#e jd.iddfd/eee jjeed)dڐd"d3e<d"ee je(e!d+d,dde(e"dde jgd-e(ejdd4e jfdd e(e;ddde jgd(e(e;ddde jgd(fe(e#e jd.idde(e#e jd5iddfd6eee jjeed0dڐd"d3e<d"ee je(e!d+d,dde(e"dde jgd-e(ejdde je je jfd-e(ejddde je jgd(e(ejddde je jgd(e(ejdd4e jfdd e(e;ddde jgd(e(e;ddde jgd(fe(e#e jd.idde(e#e jd5iddfd6eee jjeed1dڐd"d3ee je<d"e(e!d2d,dde(e"dde jgd-e(e%e(ejdde(ejddde je jge=d7e(ejdd4e jfdd fe(e#e jd.idde(e#e jd.idd4e(e#e jd5iddfd8eee jjee(edddfdeee jjee(ejdd'de jgd(fdeee jjee<e(e%e(edddfd!eee jjee<e(e%e(edddfd!eee jjee(edddfdeee jjee(eddde(e:dde jgdd fdeee jjeed)d"d*e<d"e(e!d+d,dde(e"dde jgd-e(e$e(e;ddde jgd(e(e;ddde jgd(fe(e#e jd.iddfd/eee jj eed0d"d*e<d"e(e!d+d,dde(e"dde jgd-e(e$e(ejddde jgd(e(ejddde je jgd(e(e;ddde jgd(e(e;ddde jgd(fe(e#e jd.iddfd/eee jjeed1d"d*e<d"e(e!d2d,dde(e"dde jgd-e(e$e(e%e(ejddfe(e#e jd.iddfd/eee jjeed)d"d"d3e<d"e(e!d+d,dde(e"dde jgd-e(e$e(e;ddde jgd(e(e;ddde jgd(fe(e#e jd.iddfd/eee jjeed0d"d"d3e<d"e(e!d+d,dde(e"dde jgd-e(e$e(ejddde jgd(e(ejddde je jgd(e(e;ddde jgd(e(e;ddde jgd(fe(e#e jd.iddfd/eee jjeed1d"d"d3e<d"e(e!d2d,dde(e"dde jgd-e(e$e(e%e(ejddfe(e#e jd.iddfd/eee jjefe(edddfdeee jjege(ee je d.d.d9e je d.d.d9idd:d;dge(eddde(ejdd:de jgd(fd<eee jjee(eddde(edddfdeee jjee(eddde(eddde(ejddeBߐddd fdeee jj	ee(eddde(eddde(eddde(e:ddfdeee jj
edeee jjNedeee jjee<e(e:ddfd!eee jjehe(eddde(eddd4e(eddde(edddfdeee jjele(eddde(e;ddde jgd(e(ejdd:de jgd(fdeee jjee(edddfdeee jjee(eddde(e:ddde(edddfdeee jjee(eddde(e:ddde(edddfdeee jjee(eddde(ejdd:de jgd(fdeee jjee(edddfdeee jjeie(eddde(ejdd:de jgd(fdeee jjeje(edddfdeee jjeke(edddfdeee jjee(edddfdeee jjee(eddde(ejdd:de jgd(fdeee jjee(eddde(e:de jgdd fdeee jjee(eddde(e:de jgdd fdeee jjeed"dڐd=e(ejdde(ee je d>d?d9idd:e jgd;d e(ejdd4e jgdd fd@eee jjee(eddde(e:ddde(eddde(eddde(edddfdeee jjee(ejdd:de jgd(fdeee jjedeee jjeed"d"dAe(eddd4e(ee je d.d.d9iddd;de(eddddde(eddddde(eddde=dd fdBeee jjee(ejdd:dde(ejddCdde(ejdddde(ejdddde(ejddDddfdeee jj eeܐr;dne(ejddeBߐddd fdڐdEeee jj!edeee jj"eed)dFd"e(edddfdGeee jj#eed0dFd"e(edddfdGeee jj$eed1dFd"e(edHde(edHde(edHd:e(edHdIe(edHdDe(edddfdGeee jj%ee(edde(edde(edd:e(eddCe(eddIe(eddDfdeee jj&ee(edddfdeee jj'edeee jj(d"ee(edddd;dge(eddde(ejddJfdKeee jj)d"ee(ee je d.d.d9iddd;e>dLe(ee je d.dMd9idd:dde(edddd;dge(edddfdKeee jj*ee(edddd;dge(edddfd<eee jj+ed"e(edddd;dge(edddfdNeee jj,d"ee(edddfd&eee jj-ee(ee je d.d.d9iddddge(edddfd<eee jj.eeܐr;dne(ejddeBߐddd fdeee jj/edeee jj0ee(e%fdeee jj1ee(e%fdeee jj2eed"dOeҐdPeee jj3eeҐdPeee jj4eeӐdPeee jj5eeܐrdne(ejddeBߐddd fdeee jj6ee(ejdd:de jgd(fdeee jj7edeee jj8edeee jj9ee(eddde(ejdd:de jgd(fdeee jj:ee(eddde(ejdd:de jgd(fdeee jj;ee(edddfdeee jj<ee(e%fdeee jj=ee(e%fdeee jj>edeee jj?eeܐrvdne(ejddeBߐddd fdeee jj@eeܐrdne(ejddeBߐddd fdeee jjAee(e%fdeee jjBee(e%fdeee jjCd"eed"dOeeѐdQeee jjDd"eedڐdOeeѐdQeee jjEd"eee(e%feѐdReee jjFedeee jjGee<e(eddddde(edddddfd!eee jjHee<e(eddddde(edddddfd!eee jjIeÐdeee jjJee<e(eddddde(edddddfd!eee jjKee<e(eddddde(edddddfd!eee jjLee(e%fdeee jjMeƐdeee jjNee(ejddddfdeee jjOee(ejddddfdeee jjPeeՐdPeee jjQee֐dPeee jjReee(ejddfdSeee jjSeɐdeee jjTee(ejddddfdeee jjUee(ejddddfdZVeIee eJdT< dS (V      N)deepcopy)Enum)wrapspartial)chainproduct)pack_padded_sequence)make_tensor)
TEST_CUDNN)floating_typesfloating_and_complex_types_andget_all_fp_dtypes)_TestParametrizer_update_param_kwargsexpectedFailureMPStoleranceOverridetolskipCUDAIfCudnnVersionLessThanskipCUDAIfRocmprecisionOverrideskipMetaskipMPSskipCUDAVersionIn)DecorateInfo)cosineembeddingloss_referencecross_entropy_loss_referencectcloss_referencehingeembeddingloss_referencehuberloss_referencekldivloss_referencemarginrankingloss_referencemultimarginloss_referencemultilabelmarginloss_referencenllloss_referencenlllossNd_referencesmoothl1loss_referencesoftmarginloss_referenceget_reduction)freeze_rng_state	skipIfMPSskipIfMPSOnMacOS13GRADCHECK_NONDET_TOLTEST_WITH_ROCM
IS_WINDOWSskipIfTorchDynamo)
ModuleTypeMODULE_NAMESPACESMODULES_TO_SKIPc                    s    g | ]  fd d j D qS )c                    s   g | ]}t  |qS  )getattr).0module_name	namespacer2   Z/var/www/vscode/kcb/lib/python3.10/site-packages/torch/testing/_internal/common_modules.py
<listcomp>5       z<listcomp>.<listcomp>)__all__)r4   r2   r6   r8   r9   4   s    r9   MODULE_CLASSESc                 C   s   g | ]}|t vr|qS r2   )r1   )r4   clsr2   r2   r8   r9   7       MODULE_CLASS_NAMESztorch. z.modules.TrainEvalMode)
train_only	eval_onlytrain_and_evalc                   @   s2   e Zd ZdZdejdfddZdd Zdd	 ZdS )
moduleszQ PROTOTYPE: Decorator for specifying a list of modules over which to run a test. NTc                 C   s0   t || _|d urt|nd | _|| _|| _d S N)listmodule_info_listsetallowed_dtypestrain_eval_modeskip_if_dynamo)selfmodule_info_iterablerK   rL   rM   r2   r2   r8   __init__M   s   

zmodules.__init__c                 C   s^   g }| j tjks| j tjkr|d | j tjks| j tjkr$|d |js-|d d }|S )NTF   )rL   rB   rC   rE   appendrD   train_and_eval_differ)rN   module_infotraining_flagsr2   r2   r8   _get_training_flagsT   s   

zmodules._get_training_flagsc                 #   s>   |d u r	t d| jD ]}t||j}| jd ur!|| j}| |}t||D ]p\}}|j	}	t
|dkrC|	d|r>dnd 7 }	d|i}
t|
d| t|
d| z.t  fd	d
}| jrmtjjjjsmtd|}t|j|j j|j|}||	|
|fV  W q+ ty } ztd|	 d|j d |d }~ww qd S )NzThe @modules decorator is only intended to be used in a device-specific context; use it with instantiate_device_type_tests() instead of instantiate_parametrized_tests()rQ   _
train_mode	eval_moderT   dtypetrainingc                     s    | i |S rG   r2   argskwargstestr2   r8   test_wrapper~      z/modules._parametrize_test.<locals>.test_wrapperz/Policy: we don't run ModuleInfo tests w/ DynamozFailed to instantiate z for module !)RuntimeErrorrI   rJ   supported_dtypesdevice_typerK   intersectionrV   r   formatted_namelenr   r   rM   torchtesting	_internalcommon_utilsTEST_WITH_TORCHINDUCTORr.   r   get_decorators__name__	Exceptionprintname)rN   r`   generic_cls
device_clsrT   dtypesrU   r[   rZ   	test_nameparam_kwargsra   decorator_fnexr2   r_   r8   _parametrize_testd   s>   




zmodules._parametrize_test)	rp   
__module____qualname____doc__rB   rE   rP   rV   r{   r2   r2   r2   r8   rF   J   s    
rF   c                 C   s   | t v rt |  S | jS rG   )r?   rp   )
module_clsr2   r2   r8   get_module_common_name   s   r   c                   @   s    e Zd ZdZddgZdd ZdS )FunctionInputz: Contains args and kwargs to pass as input to a function. r]   r^   c                 O   s   || _ || _d S rG   r\   )rN   r]   r^   r2   r2   r8   rP      s   
zFunctionInput.__init__Nrp   r|   r}   r~   	__slots__rP   r2   r2   r2   r8   r      s    r   c                   @   s"   e Zd ZdZg dZdddZdS )ModuleInputzA Contains args / kwargs for module instantiation + forward pass. constructor_inputforward_inputdescreference_fnNr@   c                    sB   || _ || _|| _ | _ d urt  fdd}|| _d S d S )Nc                    s2   t |t |}} | t|  g|R i |S rG   )r   rH   
parameters)mr]   r^   r   r2   r8   copy_reference_fn   s    z/ModuleInput.__init__.<locals>.copy_reference_fn)r   r   r   r   r   )rN   r   r   r   r   r   r2   r   r8   rP      s   
zModuleInput.__init__)Nr@   Nr   r2   r2   r2   r8   r      s    r   c                   @   s   e Zd ZdZdZdZdS )ModuleErrorEnumz7 Enumerates when error is raised when testing modules. r   rQ   N)rp   r|   r}   r~   CONSTRUCTION_ERRORFORWARD_ERRORr2   r2   r2   r8   r      s    r   c                   @   s*   e Zd ZdZg dZejedddZdS )ErrorModuleInputzw
    A ModuleInput that will cause the operation to throw an error plus information
    about the resulting error.
    module_error_inputerror_on
error_typeerror_regex)r   r   c                C   s   || _ || _|| _|| _d S rG   r   )rN   r   r   r   r   r2   r2   r8   rP      s   
zErrorModuleInput.__init__N)	rp   r|   r}   r~   r   r   r   rd   rP   r2   r2   r2   r8   r      s    r   c                   @   sl   e Zd ZdZdde ejejfejejfddddddddd	Z	d
d Z
dd Zedd Zedd ZdS )
ModuleInfoz+ Module information to be used in testing. r2   NT        F)skips
decoratorsrv   dtypesIfMPSdtypesIfHpusupports_gradgradgradcheck_nondet_tolmodule_memformat_affects_outrS   module_error_inputs_funcgradcheck_fast_modec                C   sz   || _ || _g |r|ng |r|ng R | _|| _|| _|| _|| _|	| _|
| _|| _	|| _
|| _t|tjjjj| _d S rG   )r   module_inputs_funcr   rv   r   r   r   r   r   rS   r   r   
issubclassrj   nnrF   lazyLazyModuleMixinis_lazy)rN   r   r   r   r   rv   r   r   r   r   r   rS   r   r   r2   r2   r8   rP      s    zModuleInfo.__init__c                 C   sH   g }| j D ]}t|tr||||||r||j  q|| q|S rG   )r   
isinstancer   	is_activeextendrR   )rN   
test_classrw   devicerZ   rx   result	decoratorr2   r2   r8   ro      s   

zModuleInfo.get_decoratorsc                 C   s"   |dkr| j S |dkr| jS | jS )Nmpshpu)r   r   rv   )rN   rf   r2   r2   r8   re      s
   zModuleInfo.supported_dtypesc                 C   s
   t | jS rG   )r   r   rN   r2   r2   r8   rs     s   
zModuleInfo.namec                 C   s   | j ddS )NrA   rW   )rs   replacer   r2   r2   r8   rh   	  rb   zModuleInfo.formatted_name)rp   r|   r}   r~   r   rj   float16float32bfloat16rP   ro   re   propertyrs   rh   r2   r2   r2   r8   r      s(    

!

r   c                 K   s|   t t|||d}ttddt|dddd dttddd	d
t|dddd dttddt|dddd dg}|S )Nr   rZ   requires_grad
         r   )inputc                 S   s,   t ||d  |d dddd S )Nr   rQ   r   r   )rj   mmtviewexpand)r   pr   r2   r2   r8   <lambda>  s   , z/module_inputs_torch_nn_Linear.<locals>.<lambda>r   r   r   Fbiasno_biasc                 S   s   t ||d  S )Nr   )rj   r   r   r   r   ir2   r2   r8   r         r         no_batch_dimc                 S   s*   t |dd|d  d|d  S )NrQ   r   r   )rj   r   r   r   r   r2   r2   r8   r        * r   r	   r   r   rT   r   rZ   r   r[   r^   
make_inputmodule_inputsr2   r2   r8   module_inputs_torch_nn_Linear  s"   



r   c              	      s   t t|||d}ddd ttdddt|d|d	 d
ttdddddt|d|d	d fdddttdddt|d|dd fdddg}|S )Nr   Tc                 S   sl   t d||d |}|r4|jd dkr|d|d  }|S ||d dd|jd |d jd  }|S )Nzbn,anm,bm->bar   rQ   r   )rj   einsumshaper   r   )r   r   x1x2r   r   r2   r2   r8   bilinear_reference_fn&  s   ,z>module_inputs_torch_nn_Bilinear.<locals>.bilinear_reference_fn   r   r   )r   r   )r   r   r   Fr   r   c                    s    | |||ddS )NFr   r2   r   r   r   r   r   r2   r8   r   6  s    z1module_inputs_torch_nn_Bilinear.<locals>.<lambda>r   r   c                    s    | || dd| ddS )NrQ   r   )r   r   r   r2   r8   r   :      )Tr   r   r2   r   r8   module_inputs_torch_nn_Bilinear#  s$   



r   c              	   K   s  t t|||d}di fdddifdddifddd	ifd
d
difg}g }|D ]^\}	}
|
fdd}|d }|d
dr?|dn|d }|ttdi |
t|||	|d |d }|d
drh|dn|d }|ttdi |
t||d|	 |d q%|S )Nr   r@   reduction_sum	reductionsumreduction_batchmean	batchmeanreduction_nonenone
log_targetTc                 S      t ||fi |S rG   )r   r   r   r   r   constructor_kwargsr2   r2   r8   r   M     z6module_inputs_torch_nn_KLDivLoss.<locals>.reference_fnr   r   Fr   r2   scalar_)r   r	   loggetrR   r   r   )rT   r   rZ   r   r[   r^   r   casesr   r   r   r   r   targetscalar_inputscalar_targetr2   r2   r8    module_inputs_torch_nn_KLDivLoss@  s<   



 r   c                 K   s  |||fdd}t t||dd}di fdddifd	dd
ifdddifdd|d ifd|d ddfd|d ddfg}g }	|D ]\}
}|fdd}|	ttd#i |t|dtjd|d 	d
  |
|d |fdd}|	ttd#i |t|dtjddd|d 	d
  d|
 |d |	ttd#i |t|dtjddddd|d 	d
  d |
 |d |	ttd#i |t|d!tjdd|d 	d
  d"|
 |d qE|	S )$Nc                 S   s   t | ||ddjdd|S )NFr   rQ   dim)r	   log_softmaxrequires_grad_)r   r   rZ   r   r2   r2   r8   r   i  s   z2module_inputs_torch_nn_NLLLoss.<locals>.make_inputFr   r@   r   r   r   r   r   ignore_indexr   weightsweightr   weights_ignore_index)r   r   weights_ignore_index_negr   c                 S   r   rG   )r#   r   r2   r2   r8   r     r   z4module_inputs_torch_nn_NLLLoss.<locals>.reference_fn)   r   r   r   r   c                 S   r   rG   )r$   r   r2   r2   r8   nd_reference_fn  r   z7module_inputs_torch_nn_NLLLoss.<locals>.nd_reference_fn)r   r   r   r   r   nd_)r   r   r   r   r   r   higher_dim_)r   r   r   3d_r2   )r   r	   absrR   r   r   rj   emptyuniform_mulfloorlong)rT   r   rZ   r   r[   r^   r   make_weightr   r   r   r   r   r   r2   r2   r8   module_inputs_torch_nn_NLLLossh  sl   


"	&	 	r
  c              
   K   s   t t|||d}t t||dd}di fdddifdddifd	dd
ifg}g }	|D ] \}
}|	ttdi |t|d|d|d |
td q(|	S )Nr   Fr@   r   r   r   reduction_meanmeanr   r   r   rQ   r   r2   )r   r	   rR   r   r   r  no_batch_dim_reference_fn)rT   r   rZ   r   r[   r^   r   make_targetr   r   r   r   r2   r2   r8   &module_inputs_torch_nn_GaussianNLLLoss  s(   



	r  c              
      s   t t|||d}t t||dd}di fdddifdddifd	dd
ifdddifdddifddddfg}ddd g }	|D ]:\}
}|f fdd	}|dd}|rU|dn|d d}|	ttdi |t||d 	 |
|d q=|	S )Nr   Fr@   r   r   r   r  r  r   r   fullTno_log_input	log_inputfull_no_log_input)r  r  :0yE>c                 S   s   |r|   ||  }n| || |   }|r5||| | ddtj |    |dkd7 }|dkr;|S |dkrG| |   S | S )N      ?       @rQ   r   r   r  )expr  r   mathpimasked_fillr   numel)r   r   r  r  r   epsr   r2   r2   r8   poissonnllloss_reference_fn  s   8zJmodule_inputs_torch_nn_PoissonNLLLoss.<locals>.poissonnllloss_reference_fnc                    s    ||fi |S rG   r2   r   r  r2   r8   r     r   z;module_inputs_torch_nn_PoissonNLLLoss.<locals>.reference_fnr   r   r   r   MbP?r   )TFr  r  r2   )
r   r	   r   r  addrR   r   r   floor_abs_)rT   r   rZ   r   r[   r^   r   r  r   r   r   r   r   r  r   r2   r  r8   %module_inputs_torch_nn_PoissonNLLLoss  s4   






r$  c                 K   s   t t|||d}t t||dd}di fdddifdddifd	dd
ifg}ddd}	g }
|D ]A\}}|
ttdi |t|d|d|t |	fi |d |
ttdi |t|d|d| dt |	fi |d q-|
S )Nr   Fr@   r   r   r   r  r  r   r   c                 S   sJ   |dkr||  dS |dkr||  d |  S ||  d S )Nr   r   r  )powr   r  )r   r   r   r   r   r2   r2   r8   mse_loss_reference_fn  s
   z=module_inputs_torch_nn_MSELoss.<locals>.mse_loss_reference_fnr  r   r2   _scalar)r  r   r	   rR   r   r   )rT   r   rZ   r   r[   r^   r   r  r   r&  r   r   r   r2   r2   r8   module_inputs_torch_nn_MSELoss  s:   



r)  c                    s   fdd}|ddrdnd |dd}|d	d
}|dur@t |ts$J  D ]\}}||v r?|dur?|| }	||	|< q( fdd|D }
t  | |
i  }W d   n1 saw   Y  |rut| }|dkru|dS |S )a  Reference function for modules supporting no batch dimensions.

    Unbatched inputs are unsqueezed to form a
    single batch input before passing them to the module.
    The output is squeezed to compare with the
    output of unbatched input to the module.

    Currently it only supports modules which return a single Tensor as output.
    You can bind the following kwargs.
    Kwargs:
        batch_first[bool] : If True, all the Tensors in `args` while be unsqueezed at dim `0` .
                        and output will be squeezed at dim `0` else dim `1` for both.
        kwargs_to_batchify[dict] : Dictionary specifying the name of the argument and dimension to unsqueeze.
                               Useful if there are few arguments whose batch dimension are different
                               from the ones selected by `batch_first`.
        is_criterion[bool] : Specify if the module is a criterion and handle the reduction for output accordingly.
    c                    s"     | |}|  v r |  |S rG   )r   pop)keydefaultv)r^   r2   r8   get_and_pop-  s   
z.no_batch_dim_reference_fn.<locals>.get_and_popbatch_firstTr   rQ   kwargs_to_batchifyNis_criterionFc                       g | ]}|  qS r2   	unsqueezer4   r   	batch_dimr2   r8   r9   >  r:   z-no_batch_dim_reference_fn.<locals>.<listcomp>r   )r   dictitemsr4  r(   squeezer'   )r   r   r]   r^   r.  r0  r1  kr-  bdimsingle_batch_input_argsoutputr   r2   )r7  r^   r8   r    s(   


r  c                    s   | ddrdnd d|v r|d d|v r&|d dur&|d d|d<  fdd|D }t  | |i |}|d  |d dfW  d   S 1 sRw   Y  dS )	a  Reference function for MultiheadAttention supporting no batch dimensions.

    Unbatched inputs are unsqueezed to form a
    single batch input before passing them to the module.
    The output is squeezed to compare with the
    output of unbatched input to the module.
    r/  Tr   rQ   key_padding_maskNc                    r2  r2   r3  r5  r6  r2   r8   r9   V  r:   z.no_batch_dim_reference_mha.<locals>.<listcomp>)r   r*  r4  r(   r:  )r   r   r]   r^   r=  r>  r2   r6  r8   no_batch_dim_reference_mhaI  s   
$r@  c           	      O   s   t |dkr|\}d}nt |dkr|\}}|d}|d r!dnd}|d ||}||f}t  | |i |}|d ||d dfW  d   S 1 sTw   Y  dS )a  Reference function for RNN and GRU supporting no batch dimensions.

    Unbatched inputs are unsqueezed to form a
    single batch input before passing them to the module.
    The output is squeezed to compare with the
    output of unbatched input to the module.
    rQ   Nr   r/  r   ri   r4  r*  r(   r:  	r   r   r]   r^   inphr7  r=  r>  r2   r2   r8   no_batch_dim_reference_rnn_gru\  s   


$rE  c           	      O   s   t |dkr|\}d}nt |dkr$|\}}|d d|d df}|d r*dnd}|d ||}||f}t ( | |i |}|d ||d d d|d d dffW  d   S 1 shw   Y  dS )a  Reference function for LSTM supporting no batch dimensions.

    Unbatched inputs are unsqueezed to form a
    single batch input before passing them to the module.
    The output is squeezed to compare with the
    output of unbatched input to the module.
    rQ   Nr   r   r/  rA  rB  r2   r2   r8   no_batch_dim_reference_lstmt  s   

0$rF  c           	      O   s~   |\}\}}| d| d| dff}t  | |i |}|d d|d dfW  d   S 1 s8w   Y  dS )zReference function for LSTMCell supporting no batch dimensions.

    The module is passed the input and target in batched form with a single item.
    The output is squeezed to compare with the no-batch input.
    r   rQ   N)r4  r(   r:  )	r   r   r]   r^   rC  rD  cr=  r>  r2   r2   r8   no_batch_dim_reference_lstmcell  s   $rH  c                    s    fdddD S )Nc              	      s>   g | ]}t t|d t d dttddd| dqS ))r   r   r   Tr1  no_batch_dim_r   r   r   r   )r   r   r   r  )r4   r   r   r2   r8   r9     s    
z8generate_regression_criterion_inputs.<locals>.<listcomp>)r   r  r   r2   rM  r2   rM  r8   $generate_regression_criterion_inputs  s   
rN  c                 K   s|   t t|||d}ttddt|ddtdttdt|ddttd	d	t|dd
dttdddt|dddgS )Nr   r   )kernel_size)r      r   r   )r   r   rP  r   r   r   strider   r   r   rQ   
stride_padr   r	   r   r   r  rT   r   rZ   r   r[   r^   r   r2   r2   r8    module_inputs_torch_nn_AvgPool1d  s$   





rX  c                 K   s   t t|||d}ttdt|ddtdttdt|ddttddt|ddd	ttddd
t|ddd	ttdddt|ddd	ttddddt|ddd	ttddd
ddt|ddd	gS )Nr   r   r   r   rP  rP  r   r   r   r   rP  rP  rQ  rS  rT  rQ   rQ   rU  rQ   divisor_overridedivisordivisor_stridedivisor_stride_padrV  rW  r2   r2   r8    module_inputs_torch_nn_AvgPool2d  s<   







rb  c                 K   s  t t|||d}ttdt|ddtdttdt|ddttddt|d	d
dttdddt|d	ddttdddt|d	ddttdddt|dddttdddt|dddttdddt|dddttdddt|dddttddddt|d	ddttdddddt|d	ddttdddddt|d	ddttdddddt|dddttdddddt|dd dttdddddt|dd!dgS )"Nr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rQ  r   r   r   r   r   r   rS  rT  rQ   rQ   rQ   rU  r   )rQ   r   rQ   stride_pad_gpu_fixedkw_output)r   r   r   rQ   )rQ   rQ   r   )r   r   r   r   r   stride_pad_gpu_general_outputr   r   stride1_pad0_gpu_inputstride_pad_gpu_input_nooverlapr]  r_  r`  ra  %divisor_stride_pad_gpu_fixedkw_output%divisor_stride_pad_gpu_general_outputdivisor_stride1_pad0_gpu_input&divisor_stride_pad_gpu_input_nooverlaprV  rW  r2   r2   r8    module_inputs_torch_nn_AvgPool3d  s|   















rp  c                 K   s^   t t|||d}ttdt|dddttdt|dtddttd	t|dd
dgS )Nr   r   rQ   r   r   singlerT  r   r   r   rL  rQ   
one_outputrV  rW  r2   r2   r8   (module_inputs_torch_nn_AdaptiveAvgPool1d  s   


ru  c              	   K   s   t t|||d}ttdt|dddttdt|dtddttd	t|dd
dttdt|dddttdt|dddgS )Nr   r   rQ   r   r   rP  rr  rT  r   r   rP  r   rL  rQ   single_1x1outputr   r   tupler   N
tuple_nonerV  rW  r2   r2   r8   (module_inputs_torch_nn_AdaptiveAvgPool2d  .   




r}  c              	   K   s   t t|||d}ttdt|dddttdt|dtddttd	t|d
ddttdt|d
ddttdt|dddgS )Nr   r   )r   r   r   r      rr  rT  )r   r   r   r  r   rL  r   r   r   )r   r   r   r   r  rz  )Nr   r   r|  )r   r   r   )rQ   rQ   r   r   rP  last_dimrV  rW  r2   r2   r8   (module_inputs_torch_nn_AdaptiveAvgPool3d*  r~  r  c                 K   sF   t t|||d}ttdt|dddttdt|dtddgS )	Nr   r   rq  rr  rT  rs  r   rL  rV  rW  r2   r2   r8   (module_inputs_torch_nn_AdaptiveMaxPool1d@  s   

r  c                 K   sv   t t|||d}ttdt|dddttdt|dtddttd	t|dd
dttdt|dddgS )Nr   r   rv  rr  rT  rw  r   rL  ry  rz  r{  r|  rV  rW  r2   r2   r8   (module_inputs_torch_nn_AdaptiveMaxPool2dM  s&   



r  c              
   K   s   t t|||d}ttdt|dddttdt|dtddttd	t|dd
dttdt|dddttdt|dddttd	t|dddgS )Nr   r   )r   r   r   rP  r  rr  rT  )r   r   rP  r  r   rL  r  rz  )r   Nr   r|  )r   r      	   r   single_nonatomic)r   r   rP  r   r   tuple_nonatomicrV  rW  r2   r2   r8   (module_inputs_torch_nn_AdaptiveMaxPool3d`  s6   





r  c                 K   s   t t|||d}ttdt|dddttdt|dddttdd	d t|dd
dttdd	ddt|dddttdd	dddt|dddttdd	ddt|dddttdd	ddt|dddgS )Nr   r   r   affinerT  r   )r   r   r   3d_inputr   affine_simple_average333333?F
not_affineTnot_tracking_stats3d_input_not_affine)r   r   r  
zero_batchr   rW  r2   r2   r8   "module_inputs_torch_nn_BatchNorm1dy  s<   






r  c                 K      t t|||d}ttdt|ddttddd t|dddttdddt|dd	dttdddd
t|dddttddddd
t|dddttdddd
t|dddgS )Nr   r   r[  rQ  r   2d_simple_averagerT  g?momentumFr  Tr  r   r  )r   r   r   r   r  r   rW  r2   r2   r8   "module_inputs_torch_nn_BatchNorm2d  2   





r  c                 K   r  )Nr   r   re  rQ  r   3d_simple_averagerT  ffffff?r  Fr  Tr  r   r  )r   r   r   r   r   r  r   rW  r2   r2   r8   "module_inputs_torch_nn_BatchNorm3d  r  r  c           	         s   |d }| dd| dd}tt|||d|ri gni ddig}d\  ftd	d
 t|D  d  fddtddg|D S )NNr   F
transposedr   paddingsamer  c                 s   s    | ]}|d  V  qdS r{  r2   r4   r   r2   r2   r8   	<genexpr>  s    z0module_inputs_torch_nn_ConvNd.<locals>.<genexpr>rR  c                    sf   g | ]/\}}t rtfi |n	t fi |t|r!n|r(d nd|r-dntdqS )r@   r   Nr   )r   r   r  )r4   
with_batchconv_kwargsC_inC_outinput_batch_shapeinput_no_batch_shaperO  r   r   r2   r8   r9     s    


z1module_inputs_torch_nn_ConvNd.<locals>.<listcomp>T)r   r   r	   rz  range	itertoolsr   )	rT   r   rZ   r   r[   r^   r  r  conv_kwargs_listr2   r  r8   module_inputs_torch_nn_ConvNd  s   
r  c              
   K   s   t t|||d}t t||dd}di fdddifdddifd	dd
ifdddifg}g }	|D ]&\}
}|fdd}|	ttdi |t|d|d|d |
|d q-|	S )Nr   Fr@   r   r   r   r  r  r   r   marginr  c                 S      t |||fi |S rG   )r   r   r   i1i2r   r   r2   r2   r8   r        z@module_inputs_torch_nn_CosineEmbeddingLoss.<locals>.reference_fnr   r   )r   r   r2   r   r	   rR   r   r   signrT   r   rZ   r   r[   r^   r   r  r   r   r   r   r   r2   r2   r8   *module_inputs_torch_nn_CosineEmbeddingLoss  s*   




r  c                 K   s~   t t|||d}ttddt|ddd dttddt|dd	d
tt t|ddtdttddt|ddd
gS )Nr   r  alphar   r   r   c                 S   s   t |dk|d| d  S )Nr   r   rQ   rj   wherer  r   r2   r2   r8   r     r   z,module_inputs_torch_nn_ELU.<locals>.<lambda>r   r2   scalarrT  r   r   r   r   r   r   r   4d_inputrV  rW  r2   r2   r8   module_inputs_torch_nn_ELU  s&   






r  c                 K   sn   t t|||d}ttddt|ddd dttddt|dd	d d
dttddt|ddtdgS )Nr   r  r  r  c                 S   "   t |dk|dd|  d  S Nr   r  r  rQ   r  r   r2   r2   r8   r        " z-module_inputs_torch_nn_CELU.<locals>.<lambda>r   r2   c                 S   r  r  r  r   r2   r2   r8   r     r  r  rL  r  r   r   rV  rW  r2   r2   r8   module_inputs_torch_nn_CELU  s    





r  c                 K   sX   t t|||d}tt t|ddttdt|dddtt t|dd	td
gS )Nr   )r   rP  rQ  rQ   r   rP  r  r   rT  rI  r   r   rV  rW  r2   r2   r8   module_inputs_torch_nn_GLU  s   


r  c                 K   sf   t t|||d}ttdt|ddd ddttdt|dd	d d
tt t|ddtdgS )Nr   r   r2   c                 W       |d dt |td   S Nr        ?r  rj   erfr  sqrtr   r   xrW   r2   r2   r8   r   +       z-module_inputs_torch_nn_GELU.<locals>.<lambda>r  rL  r  c                 W   r  r  r  r  r2   r2   r8   r   /  r  r   r  r   r   rV  rW  r2   r2   r8   module_inputs_torch_nn_GELU%  s    


r  c                 K   n   t t|||d}tt t|dddtt t|dtddtt t|dd	dtt t|d
ddgS Nr   r2   r  rT  r   r   rL  r  channels_last_mem_format)r   r   r   r   r   channels_last_3d_mem_formatrV  rW  r2   r2   r8   module_inputs_torch_nn_ReLU6  &   



r  c                 K   r  r  rV  rW  r2   r2   r8   module_inputs_torch_nn_ReLU6I  r  r  c              	   K   s   t t|||d}tt t|ddtt t|dtddttdt|ddd	ttd
t|ddd	ttdt|ddd	gS )Nr   r  rQ  r   r   rL  r  with_negvalrT  r   r   with_zero_negvalr2   with_negval_scalarrV  rW  r2   r2   r8    module_inputs_torch_nn_LeakyReLU\  s,   




r  c                 K   s   t t|||d}tt t|dddtt t|dtddtt t|dd	d
 ddttdt|ddd
 ddtt t|ddd
 ddttdt|ddd
 ddtt t|ddd
 ddttdt|ddd
 ddgS )Nr   r2   r  rT  r   r   rL  r   r   r   c                 S   (   t j|ddt j|dd|d d   S Nr   min)maxrj   clampr   r2   r2   r8   r   ~     ( z.module_inputs_torch_nn_PReLU.<locals>.<lambda>1dr   c                 S   r  r  r  r   r2   r2   r8   r     r  1d_multiparamr  c                 S   r  r  r  r   r2   r2   r8   r     r  2dc                 S   r  r  r  r   r2   r2   r8   r     r  2d_multiparam)r   r   r   r   rP  c                 S   r  r  r  r   r2   r2   r8   r     r  3dc                 S   r  r  r  r   r2   r2   r8   r     r  3d_multiparamrV  rW  r2   r2   r8   module_inputs_torch_nn_PReLUq  sR   







r  c                 K   sV   t t|||d}tt t|ddtt t|dtddtt t|ddd	gS )
Nr   r  rQ  r   r   rL  r2   r  rT  rV  rW  r2   r2   r8   module_inputs_torch_nn_SELU  s   


r  c                 K   sb   t t|||d}tt t|ddd ddtt t|dtddtt t|d	d
d dgS )Nr   r2   c                 W      |t | S rG   rj   sigmoidr  r2   r2   r8   r         z-module_inputs_torch_nn_SiLU.<locals>.<lambda>r  rL  r   r   r  c                 W   r  rG   r  r  r2   r2   r8   r     r  r   rV  rW  r2   r2   r8   module_inputs_torch_nn_SiLU  s    


r  c                 K   sh   t t|||d}ttdt|ddd dttdt|dd	d d
dttdt|dtddgS )Nr   rQ   r      c                 S   s&   t |t |ddddS NrQ   Tr   r  )rj   r  divr   r   r   r2   r2   r8   r     s   & z0module_inputs_torch_nn_Softmax.<locals>.<lambda>r   r   r2   c                 S      t |t |ddS )Nr   Trj   r  r  r   r   r2   r2   r8   r     r   r  rL  r   r   r   r   rV  rW  r2   r2   r8   module_inputs_torch_nn_Softmax  s    


r  c                 K   sF   t t|||d}tt t|ddd dtt t|dtddgS )	Nr   rQ   r   r   r  c                 S   r  NrQ   Fr  r   r2   r2   r8   r     r   z2module_inputs_torch_nn_Softmax2d.<locals>.<lambda>r   r  r   rL  rV  rW  r2   r2   r8    module_inputs_torch_nn_Softmax2d  s   

r  c              	   K   s   t t|||d}ttdt|ddd dttdt|ddd d	d
ttdt|ddd dd
ttdt|dtdd
gS )Nr   rQ   r  c                 S   s*   t |t |dddd S r  )rj   r  div_r   r   log_r   r2   r2   r8   r     r   z3module_inputs_torch_nn_LogSoftmax.<locals>.<lambda>r   r  c                 S   "   t |t |dd S r  rj   r  r  r   r  r   r2   r2   r8   r     r  
multiparamrL  r   r2   c                 S   r  )Nr   Fr   r   r2   r2   r8   r     r  multiparam_scalarr   r  r   rV  rW  r2   r2   r8   !module_inputs_torch_nn_LogSoftmax  s*   



r  c              	   K   st   t t|||d}ttdt|ddttdt|dddttdt|d	d
dttdt|dtddgS )Nr   rQ   r  rQ  )r   r   r   r   multidimrT  r   r2   r  r   )r   r   r   r   rL  rV  rW  r2   r2   r8   module_inputs_torch_nn_Softmin  s$   



r  c              
   K   s   t t|||d}tt t|ddd dttdt|ddd dd	ttdd
t|ddd dd	ttdd
t|ddd dd	tt t|dtdd	gS )Nr   r  c                 S   s   t t |S rG   rj   log1pr  r   r2   r2   r8   r     s    z1module_inputs_torch_nn_Softplus.<locals>.<lambda>r   r   c                 S   s   dt t d|  S )Nr  r   r  r   r2   r2   r8   r     r>   betarL  c                 S   D   |d dk || |d dk |d d ttd|   S Nr   r	  r  r  type_asrj   r  r  r   r2   r2   r8   r   	      ,beta_thresholdr2   c                 S   r
  r  r  r   r2   r2   r8   r     r  beta_threshold_scalarr   r   rV  rW  r2   r2   r8   module_inputs_torch_nn_Softplus  s4   






r  c              	   K   sp   t t|||d}tt t|ddttdt|dddttdt|dddtt t|d	td
dgS )Nr   r  rQ  rQ   lambdarT  r2   lambda_scalarr   r   rL  rV  rW  r2   r2   r8   !module_inputs_torch_nn_Softshrink  s$   



r  c                 K   b   t t|||d}tt t|ddd dtt t|ddd dd	tt t|d
tdd	gS )Nr   r  c                 S      | dt| S NrQ   r  rj   r  r   r2   r2   r8   r   0  r   z1module_inputs_torch_nn_Softsign.<locals>.<lambda>r   r2   c                 S   r  r  r  r   r2   r2   r8   r   3  r   r  rL  r   r   rV  rW  r2   r2   r8   module_inputs_torch_nn_Softsign*      


r  c                 K   V   t t|||d}tt t|ddtt t|dddtt t|dtdd	gS 
Nr   r  rQ  r2   r  rT  r   r   rL  rV  rW  r2   r2   r8   module_inputs_torch_nn_Tanh;     


r  c                 K   r  r  rV  rW  r2   r2   r8   !module_inputs_torch_nn_TanhshrinkK  r  r  c              	   K   s~   t t|||d}ttddt|dddttddt|dddttddt|d	d
dttddt|dtddgS )Nr   r  r  r  threshold_valuerT  g      $@large_valuer2   threshold_value_scalarr   r   rL  rV  rW  r2   r2   r8    module_inputs_torch_nn_ThresholdZ  s&   







r#  c                 K   r  )Nr   r  c                 S      |t t| S rG   rj   tanhFsoftplusr   r2   r2   r8   r   s  r   z-module_inputs_torch_nn_Mish.<locals>.<lambda>r   r2   c                 S   r$  rG   r%  r   r2   r2   r8   r   v  r   r  rL  r   r   rV  rW  r2   r2   r8   module_inputs_torch_nn_Mishm  r  r)  c                 K   s^   t t|||d}tt t|d|ddd dtt t|d|ddd dd	gt| S )
Nr   r  c                 S   s$   d|   tdd t||D  S )Nr  c                 s   s$    | ]\}}||    V  qd S rG   )r  r   )r4   abr2   r2   r8   r    s    zBmodule_inputs_torch_nn_L1Loss.<locals>.<lambda>.<locals>.<genexpr>)r  r   zipr   r   r   r   r2   r2   r8   r     s    
z/module_inputs_torch_nn_L1Loss.<locals>.<lambda>r   r2   c                 S   s   d|   ||    S )Nr  )r  r  r   r-  r2   r2   r8   r     s    r  rL  )r   r	   r   r   rN  rW  r2   r2   r8   module_inputs_torch_nn_L1Loss~  s   	r.  c              	   K   s   t t|||d}di fdddifdddifddd	ifg}g }|D ];\}	}
|
fd
d}|ttdi |
t|d|d|	|d |ttdi |
t|d|dd|	 |d q |S )Nr   r@   r   r   r   r  r  r   r   c                 S   r   rG   )r%   r   r2   r2   r8   r     r   z9module_inputs_torch_nn_SmoothL1Loss.<locals>.reference_fnr   r   r   r2   r   r(  rT   r   rZ   r   r[   r^   r   r   r   r   r   r   r2   r2   r8   #module_inputs_torch_nn_SmoothL1Loss  s8   


r1  c                 K   s  t t|||d}t t||dd}t t||dd}di fdddifdddifd	dd
ifdd|difg}	ddd}
g }|	D ]*\}}|ttdi |t|dddd|dd||t |
fi |d q<|d}|tt|dt|dddd|dd|dt |
|dd |S )Nr   Fr@   r   r   r   r  r  r   r   r   r   r   c                 S   s^   ||   d| d|      }|d ur|| }|dkr|S |dkr+| |  S | S )NrQ   r   r  )r   r   r  r   r   r   r   r   r   r   r2   r2   r8   bce_loss_reference_fn  s   "z=module_inputs_torch_nn_BCELoss.<locals>.bce_loss_reference_fnr  {Gz?Gz?lowhighr   r   r2   )r   scalar_weightr  Nr   r	   rR   r   r   gtto)rT   r   rZ   r   r[   r^   r   r  r	  r   r4  r   r   r   r:  r2   r2   r8   module_inputs_torch_nn_BCELoss  s@   





r?  c                 K   s   t t|||d}t t||dd}t t||dd}di fdddifdddifd	dd
ifdd|difdd|difg}	ddd}
g }|	D ]*\}}|ttdi |t|dddd|dd||t |
fi |d qC|S )Nr   Fr@   r   r   r   r  r  r   r   r   r   r2  scalar_weightsr2   c                 S   s~   | j dd}d| |||  | |   }|d ur)|| }|dkr/|S |dkr;| |  S | S )Nr   r  rQ   r   r  )r  mul_add_exp_r  r   r  )r   r   r   r   r   r   max_valr   r2   r2   r8   bce_withlogitsloss_reference_fn  s   4zQmodule_inputs_torch_nn_BCEWithLogitsLoss.<locals>.bce_withlogitsloss_reference_fnr  r5  r6  r7  r   r   r;  r<  )rT   r   rZ   r   r[   r^   r   r  r	  r   rE  r   r   r   r2   r2   r8   (module_inputs_torch_nn_BCEWithLogitsLoss  s.   



	rF  c                 K   s(  t t|||d}t t|tjdd}t t||dd}g d}	di fdd|difddd	ifd
d
difdd	ddfg}
g }t|	|
D ]P\}\}}||fdd}|ttd*d|i|t|d|ddddd| d| |d |ttd*d|i|t|d|ddddd| d| |d |ttd*d|i|t|d|ddddd| d| |d |ttd*d|i|t|d|d dddd!| d| |d |dd d u r|ttd*d|i|t|d"|d"j	d	d#d$| d| |d |ttd*d|i|t|d%|d%j	d	d#d&| d| |d |ttd*d|i|t|d'|d'j	d	d#d(| d| |d |ttd*d|i|t|d|dj	d	d#d)| d| |d |ttd*d|i|t|d|d*dddd+| d| t t
d,d-d q@|S ).Nr   F)r  r   r   r@   r   r   r  r   rQ   label_smoothingg333333?ignore_index_label_smoothing)r   rG  c                 S   s   t ||fd|i|S )Nr   )r   )r   r   r   r   r   r   r2   r2   r8   r        z=module_inputs_torch_nn_CrossEntropyLoss.<locals>.reference_fnr   )r   r   r   r   )r   r   r   r   r   r7  4d_rW   r   )r   r   r   )r   r   r  r   r   r   2d_)r   r   r   r   r   r   )r   r   r   r   r   r  )r   r   r   r   r   4d_prob_target_)r   r   r   3d_prob_target_)r   r   2d_prob_target_higher_dim_prob_target_r2   rK  TrJ  )r   r	   rj   r  r   rR   r   r   r   softmaxr  )rT   r   rZ   r   r[   r^   r   r  r	  
reductionsr   r   r   r   r   r   r2   r2   r8   'module_inputs_torch_nn_CrossEntropyLoss
  s   


rS  c                 K   s  t t|||d}t t|dd}di fdddifddd	ifd
ddifdddifg}tjtjg}	g }
t|	|D ]\}\}}|fdd}|dd}|dkrMdnd}|dkrUdnd}|
tt	d i |t	|d
d|d|||ddd| d|d |
tt	d i |t	|d
d|d|||dtjd|dtjd|d| d|d |
tt	d i |t	|d
d|d|||ddd| d|d |
tt	d i |t	|d
d|d|||dtjd|dtjd|d| d|d q5|
S )!Nr   F)r   r   r@   r   r   r   r  r  r   r   blank   c                 S   s   t ||||fi |S rG   )r   )r   r   r   r   iltlr   r2   r2   r8   r   r  rI  z4module_inputs_torch_nn_CTCLoss.<locals>.reference_fnr   rQ   r   )2   r   r   r   )r      )rZ   r8  r9  )rX  rX  rX  )rY     r  _lengths_intlistsr   r   _lengths_tensors)K   _1d_target_lengths_intlists_1d_target_lengths_tensorsr2   )r   r	   rj   intr  r   r   rR   r   r   r   tensor)rT   r   rZ   r   r[   r^   r   r  r   target_dtypesr   target_dtyper   r   r   rT  r8  r9  r2   r2   r8   module_inputs_torch_nn_CTCLossc  s|   



	
	rd  c                 K   s   t t|||d}ttdddt|dddttdddt|d	d
dttdddt|dddttddddt|dddttddddt|dddttdddt|dddttddddt|dddttddddt|dddgS )Nr   r   rP  r   )r   rP  r   	1d_affinerT  r  )r   r  1d_affine_GNrQ   )   rP  1d_affine_large_batchr   Fr   r   r   1d_no_affine_INr   r   1d_no_affine_LN)r   rP  r   r   	2d_affine)r   r   r   r   2d_no_affine_IN2d_no_affine_LNr   rW  r2   r2   r8    module_inputs_torch_nn_GroupNorm  sT   











ro  c                 K   sZ   t t|||d}ttdt|ddttdt|dddtt t|dtd	d
gS )Nr   r  )r   r   r   r   rQ  r2   r  rT  r   r   rL  rV  rW  r2   r2   r8   !module_inputs_torch_nn_Hardshrink  s"   


rp  c                 K   sB   t t|||d}tt t|dtddtt t|dddgS )Nr   r   r   rL  r  r  rT  rV  rW  r2   r2   r8    module_inputs_torch_nn_Hardswish  s   

rq  c                 K   r  )Nr   r  c                 S      | ddS Nr   rQ   r  r   r2   r2   r8   r         z1module_inputs_torch_nn_Hardtanh.<locals>.<lambda>r   r2   c                 S   rr  rs  rt  r   r2   r2   r8   r     ru  r  rL  r   r   rV  rW  r2   r2   r8   module_inputs_torch_nn_Hardtanh  s&   


rv  c              
   K   s  t t|||d}t t||dd}di fdddifdddifd	dd
ifdddifg}g }	|D ]S\}
}|fdd}|	ttdi |t|d|dd|dd|
|d |	ttdi |t|d|dd|ddd|
 |d q-|	S )Nr   Fr@   r   r   r   r  r  r   r   r  r  c                 S   r   rG   )r   r   r2   r2   r8   r     r   z?module_inputs_torch_nn_HingeEmbeddingLoss.<locals>.reference_fnr2  r   r   rQ   r   r2   r   )	r   r	   rR   r   r   r=  r>  rA  sub_r  r2   r2   r8   )module_inputs_torch_nn_HingeEmbeddingLoss	  s<   



rx  c              	   K   s   t t|||d}di fdddifdddifddd	ifg}g }|D ]!\}	}
|
fd
d}|ttdi |
t|d|d|	|d q |S )Nr   r@   r   r   r   r  r  r   r   c                 S   r   rG   )r   r   r2   r2   r8   r   8  r   z6module_inputs_torch_nn_HuberLoss.<locals>.reference_fnr/  r   r2   r(  r0  r2   r2   r8    module_inputs_torch_nn_HuberLoss,  s&   


ry  c              
   K   s  t t|||d}|dd}|d }d\}	}
}}}dddd	}|| }d
| }t|r/t|
|nt|	|
|t||dt|rEt|
|||nt|	|
|||t||ddt|r\t|
|nt|	|
|t||tddt|rtt|
|||nt|	|
|||t||tddgS )Nr   r   Fr  )r   r   r  FT)r   r   rZ  rd  rQ   r   r   rI  rQ  tracking_statsrT  tracking_stats_no_batch_dimrL  r   )r   r	   r   r   r   r  )rT   r   rZ   r   r[   r^   r   r   r  num_featuresr  r  r  track_running_statsinput_no_batch_shape_dictr  r  r2   r2   r8   %module_inputs_torch_nn_InstanceNormNdF  s>   



r  c                 K   s   t t|||d}ttdgdt|dddttdgdt|dddttdgdd	t|dd
dttg ddt|dddttg ddd	t|dddttdgdt|dddttg dddd	dt|dddgS )Nr   r   r   ri  1d_elementwise_affinerT     r   r   !1d_elementwise_affine_large_batchF1d_no_elementwise_affiner   r   r   r   r   r   r   3d_elementwise_affine3d_no_elementwise_affiner   r   1d_empty_elementwise_affineT)elementwise_affiner   3d_elementwise_affine_no_biasr   rW  r2   r2   r8    module_inputs_torch_nn_LayerNormm  sJ   









r  c                 K   s   t t|||d}dd }ttdgdt|dd|dttdgdt|d	d
|dttdgddt|dd|dttg ddt|dd|dttg dddt|dd|dttdgdt|dd|dgS )Nr   c           	         s   | j }|d u rt|jj }|j | j}| j} fddtt|D }|	 }|t
|dj|dd| j   }|d urB||9 }||S )Nc                    s   g | ]} | d  qS )rQ   r2   r  ndimr2   r8   r9     r>   zQmodule_inputs_torch_nn_RMSNorm.<locals>.rms_norm_reference_fn.<locals>.<listcomp>r   T)r   keepdim)r  rj   finforZ   r  normalized_shaper   r  ri   floatrsqrtr%  r  r  )	r   r   r   r  r  r   dims
upcasted_ir   r2   r  r8   rms_norm_reference_fn  s   $
z=module_inputs_torch_nn_RMSNorm.<locals>.rms_norm_reference_fnr   r   ri  r  r   r  r  Fr  r  r  r  r  r  r  r   )rT   r   rZ   r   r[   r^   r   r  r2   r2   r8   module_inputs_torch_nn_RMSNorm  sN   








r  c                 K   sb   t t|||d}ttdt|dddttdt|dddttd	d
ddt|dddgS )Nr   r   )rQ   r   r  r  rT  r   )rQ   r   r  r  2d_uneven_padrQ   r  r  r  )rQ   r   r  r  r  3d_custom_paramsr   rW  r2   r2   r8   (module_inputs_torch_nn_LocalResponseNorm  s"   


r  c                 K   sf   t t|||d}ttddt|dddttdddt|ddttdddt|d	td
dgS )Nr         ?r   )rQ   r   r  normrT  r   rQ  )r   r  r   rL  rV  rW  r2   r2   r8   module_inputs_torch_nn_LPPool1d  s"   




r  c                 K   f   t t|||d}ttdddt|ddttdddt|dtddttddt|dd	d
gS )Nr   r   rQ   r   r  r  rQ  r   r  r  r   rL  r  r  rT  rV  rW  r2   r2   r8   module_inputs_torch_nn_LPPool2d  "   




r  c                 K   r  )Nr   r   )rQ   r   r  r  r  rQ  )r   r  r  r  r   rL  r  r  rT  rV  rW  r2   r2   r8   module_inputs_torch_nn_LPPool3d  r  r  c                 K   sb   t t|||d}ttdt|dddttddt|dddttdddt|dd	dgS )
Nr   r   )r   r   r   r  rT  rS  Treturn_indicesr  r   rW  r2   r2   r8    module_inputs_torch_nn_MaxPool1d  s"   



r  c              	   K   sl   t t|||d}ttdddt|dddttdddt|dd	dttdddd
dt|dddgS )Nr   r   r   rY  r\  r  r  rT  r  r  Tr  r  r   rW  r2   r2   r8    module_inputs_torch_nn_MaxPool2d"  s"   




r  c              
   K   s   t t|||d}ttdt|ddttddt|dddttdddt|dd	dttdddd
dt|dddgS )Nr   rc  rf  rQ  r   rS  rT  rg  stride_paddingTr  r  r   rW  r2   r2   r8    module_inputs_torch_nn_MaxPool3d4  s*   




r  c              
      s   t t ||d} fdd}ttdd| dt|ddd	ttd
d| dt|ddd	ttdd| ddt|ddd	ttdd| dt|dtddttd
d| dt|dtddgS )Nr   c                         t jdt j d S )N)rQ   r   r   rZ   r   rj   r  doubler  r2   r   r2   r8   make_random_samplesM  rI  zGmodule_inputs_torch_nn_FractionalMaxPool2d.<locals>.make_random_samplesr   r  output_ratio_random_samples)rQ   r   r   r  ratiorT  rK  )r   r   output_sizer  )rQ   r   r  rP  sizeTr  r  r  ratio_return_indices)r   r   r  ratio_no_batch_dimrL  )r   r  rP  size_no_batch_dimrV  rT   r   rZ   r   r[   r^   r   r  r2   r   r8   *module_inputs_torch_nn_FractionalMaxPool2dJ  s@   





r  c                    s   t t ||d} fdd}ttdd| dt|ddd	ttd
d| dt|ddd	ttdd| dt|ddd	ttdd| ddt|ddd	ttdd| dt|dtddttd
d| dt|dtddgS )Nr   c                      r  )N)r   r   r   r  r  r2   r   r2   r8   r  o  rI  zGmodule_inputs_torch_nn_FractionalMaxPool3d.<locals>.make_random_samplesr   r  r  )r   r   r   r   r   r  rT  rc  )r   r   r   r  )r   r   r  r  r  r  )r   r   r   )r   r   r   )r   r      r  r   asymsizeTr  r  )r   r   r   r   r  rL  )r   r  r  r  r  rV  r  r2   r   r8   *module_inputs_torch_nn_FractionalMaxPool3dl  sJ   






r  c                 K   r  r  rV  rW  r2   r2   r8   module_inputs_torch_nn_Sigmoid  s.   



r  c                 K   sb   t t|||d}tt t|ddd ddtt t|ddd d	tt t|d
tddgS )Nr   r2   c                 S      |   S rG   r  r   r   r2   r2   r8   r     ru  z3module_inputs_torch_nn_LogSigmoid.<locals>.<lambda>r  rL  r  c                 S   r  rG   r  r   r2   r2   r8   r     ru  r   r   r   rV  rW  r2   r2   r8   !module_inputs_torch_nn_LogSigmoid  s&   


r  c              
   K   s   t t|||d}t t|tjdd}di fdddifdddifd	dd
ifdddifg}g }	|D ]&\}
}|fdd}|	ttdi |t|d|d|d |
|d q.|	S )Nr   Fr@   r   r   r   r  r  r   r   r  r  c                 S   r  rG   )r    r  r2   r2   r8   r     r  z>module_inputs_torch_nn_MarginRankingLoss.<locals>.reference_fn)rX  r   r2   )r   r	   rj   r  rR   r   r   r  r  r2   r2   r8   (module_inputs_torch_nn_MarginRankingLoss  s*   




r  c                 K   s   t t|||d}t t|tjdd}di fdddifdddifd	dd
ifg}g }	|D ]A\}
}|fdd}|	ttdi |t|d|ddddd|
 |d |	ttdi |t|d|dddd|
|d q)|	S )Nr   Fr@   r   r   r   r  r  r   r   c                 S   r   rG   )r"   r   r2   r2   r8   r     r   zAmodule_inputs_torch_nn_MultiLabelMarginLoss.<locals>.reference_fnr2  r   r   r7  1d_r   r/  r2   r   r	   rj   r  rR   r   r   r  r2   r2   r8   +module_inputs_torch_nn_MultiLabelMarginLoss  s:   


r  c                 K   s   t t|||d}t t|tjdd}t t||dd}di fdddifdddifd	dd
ifdddifdddifdd|difg}	g }
|	D ]$\}}|fdd}|
ttdi |t|d|dddd||d qB|
S )Nr   Fr@   r   r   r   r  r  r   r   r   r   r  r  r   r   r   c                 S   r   rG   )r!   r   r2   r2   r8   r   	  r   z<module_inputs_torch_nn_MultiMarginLoss.<locals>.reference_fnr/  r   r   r7  r   r2   r  )rT   r   rZ   r   r[   r^   r   r  r	  r   r   r   r   r   r2   r2   r8   &module_inputs_torch_nn_MultiMarginLoss	  s0   





r  c                 K   s   t t|||d}t t|tjdd}t t||dd}di fdddifdddifd	dd
ifdd|difg}	ddd}
g }|	D ]$\}}|ttdi |t|d|dddd|t |
fi |d q=|S )Nr   Fr@   r   r   r   r  r  r   r   r   r   c                 S   sx   ||    d| |      }|d ur||9 }| | d |d }|dkr0|S |dkr8| S | S )NrQ   r   r   r  )r  r   r   r   r  r  r3  r2   r2   r8   &multilabelsoftmargin_loss_reference_fn1	  s   &z_module_inputs_torch_nn_MultiLabelSoftMarginLoss.<locals>.multilabelsoftmargin_loss_reference_fnr/  r   r   r7  r   r;  r2   r  )rT   r   rZ   r   r[   r^   r   r  r	  r   r  r   r   r   r2   r2   r8   /module_inputs_torch_nn_MultiLabelSoftMarginLoss$	  s,   



r  c              	   K   s   t t|||d}t t||dd}di fdddifdddifd	dd
ifg}g }	|D ]#\}
}|fdd}|	ttdi |t|d|d |
|d q(|	S )Nr   Fr@   r   r   r   r  r  r   r   c                 S   r   rG   )r&   r   r2   r2   r8   r   X	  r   z;module_inputs_torch_nn_SoftMarginLoss.<locals>.reference_fn)r   r   r   r2   r  r  r2   r2   r8   %module_inputs_torch_nn_SoftMarginLossK	  s(   



r  c                 K   s   g }t d ||||D ]@}|jj|jj}}	||	d< ||	d< tjj|i |	}
d}|j}d|jv r<|jd |jd< |jd= |t	t
|
|||jd q
|S )Nr   rZ   r   src_maskmaskrT  ).module_inputs_torch_nn_TransformerEncoderLayerr   r]   r^   rj   r   TransformerEncoderLayerr   rR   r   r   r   )rT   r   rZ   r   r[   r^   sampleslayer_module_inputl_argsl_kwargsencoder_layer
num_layersr   r2   r2   r8   )module_inputs_torch_nn_TransformerEncoderf	  s*   


r  c                 K   s  t t|||d}ttddddt|dddttddd	dtjt|dd
dttddd	dddt|dddg}d tjg d|tjdf}d tjg d|tjd	df}	t
|	|dddD ],\}
}}}}|ttddd	d|||dt|d|
|dt t|ddidd| d qadd }|rt
ddD ]&\}}|ttddd	dd||dt|d|r|nd d| d| d q|S ) Nr   r   r   r  r   r  relu_activationrT  r   gelu_activationFr   r   FFTr   rZ   r  TFd_modelnheaddim_feedforwarddropoutr/  
norm_firstr   ry  )r  src_key_padding_maskr  r   r/  r0  no_batch_dim_batch_first_rL  c                 _   sX   | j sJ | d t  | |i |}W d    n1 s w   Y  | d |S )NFT)r[   trainrj   no_grad)moduler   r]   r^   r>  r2   r2   r8   fast_path_reference_fn	  s   



zNmodule_inputs_torch_nn_TransformerEncoderLayer.<locals>.fast_path_reference_fnT)r  r/  r  r   	fastpath__norm_first_)r   r	   r   r   r'  gelurj   ra  boolr   r  r   rR   r  )rT   r   rZ   r   r[   r^   r   r  key_padding_masks
attn_masksr  r  r  r/  r   r  r2   r2   r8   r  	  st    

r  c                 K   s  t t|||d}ttddddt|d|dddttddd	dtjt|d|dd
dttddd	dddt|d|dddg}d tjg d|tjdf}d tjg d|tjd	df}	t
|	|dddD ]\}
}}}}|
}|}|ttddd	d|||dt|d|d|
|||dt t|ddddd| d |d|d}}|s|dd|dd}}|d ur|	ddfd \}}|ttddd	d|||dt|||
|||dd| d| d| d qj|S )Nr   r   r   r  r   r  r  rT  r   r  Fr   r   r  r  r  r  r  ry  )tgt_maskmemory_masktgt_key_padding_maskmemory_key_padding_maskr   )r  r  r  r  rL  rQ   r   norm_first__batch_first__bias_)r   r	   r   r   r'  r  rj   ra  r  r   r  r   rR   r  	transpose)rT   r   rZ   r   r[   r^   r   r  r  r  r  r  r  r   r/  r  r  srctgtr2   r2   r8   .module_inputs_torch_nn_TransformerDecoderLayer	  s    r  c                 K   sn  t t|||d}g }d tjg d|tjdf}d tjg d|tjddf}	t|	|dddD ]\}
}}}}|
fd \}}|fd \}}|t	t
dddd	d	d
|||d	t
|d|d||||dt t|ddddd| d |d|d}}|s|dd	}|dd	}|d ur|ddfd \}}|t	t
dddd	d	d
|||d	t
||||||dd q0|S )Nr   r  r  r  r  r   r   r   rQ   r   )	r  r  r  num_encoder_layersnum_decoder_layersr  r/  r  r   ry  )r  r  r  r  r   )r  r  r  r  rL  r  r   rQ  )r   r	   rj   ra  r  r   r  r   rR   r   r   r  r  )rT   r   rZ   r   r[   r^   r   r  r  r  r  r?  r  r   r/  r  r  r  r  r  r  r2   r2   r8   "module_inputs_torch_nn_Transformer
  sX    
r  c                 K   sf   t tj|tjdd}ttdddt|ddddttdddt|dd	dd
d	ddgS )NFr   r   r   )num_embeddingsembedding_dimr   rQ  rQ   i   r  discontiguousrT  )r   rj   r  r  r   r   random_r   )rT   r   rZ   r   r[   r^   
make_emptyr2   r2   r8    module_inputs_torch_nn_Embedding:
  s   

r  c                 K   s   t t|||d}g }d}d tjg d|tjdf}	d tjg d|tjddf}
t||||	|
}|D ]I\}}}}}|t	t
ddd|||dt
|d	|d	|d	||d
td |t	t
ddd|||dt
|d	|d	|d	||d
t tddd q4|S )Nr   r  r  r  )r   r   r   r   T)	embed_dim	num_headsr/  r   add_bias_kvadd_zero_attnr  )r?  	attn_maskr   Fr/  )r   r	   rj   ra  r  r   r  r   rR   r   r   r@  )rT   r   rZ   r   r[   r^   r   r  	bool_valsr  r  productsr   r  r  r?  r	  r2   r2   r8   )module_inputs_torch_nn_MultiheadAttentionI
  s<    	

r  c           	   	   K   s   t t|||d}ttddt|d|dtdttddddt|d|dtdg}|dd}|rK|ttdddd	d
t|d|dtd |S )Nr   r   r   r   Tr   is_rnnFrelu)r   nonlinearity)r   r	   r   r   r  r   rR   )	rT   r   rZ   r   r[   r^   r   r  r  r2   r2   r8   #module_inputs_torch_nn_RNN_GRU_Cellh
  s,   r  c                 K   sl   t t|||d}ttddt|d|d|dftdttddddt|d|d|dftdf}|S )Nr   r   r   r   Tr   )r   r	   r   r   rH  rT   r   rZ   r   r[   r^   r   r  r2   r2   r8   module_inputs_torch_nn_LSTMCell
  s   r  c                 C   s*   | j }| d t| |}|j| |S )NF)r   r   r   data)rC  batch_sizesrequired_gradseqr2   r2   r8   make_packed_sequence
  s
   

r  Fc                 K   s  t t|||d}|d }d}	d}
d}d}g }|r t|	|
||}nt|
||}|D ]}|r3|\}}}}n|\}}}ddd|||d}ddd|||d}|rT||d< ||d< |ttdi |t|d	t t|d
d |ttdi |t|d	||r}dnddft t|d
d |r|ttdi |tt|dt	ddgt t|d
d |ttdi |tt|dt	g dt t|d
d q(|S )Nr   r  )r  r&  FTr   )
input_sizehidden_sizer  r/  r   bidirectionalr   r  r   r   r
  r   r   )r   r   r   r   )r   r   r   )r   r   r   r   r   r2   )
r   r	   r   rR   r   r   rE  r  rj   ra  )rT   r   rZ   r   r[   with_packed_sequencer^   r   r  r  r   r/  r  r  prod_genr]   nlr+  b_fbidir	cons_argscons_args_hiddenr2   r2   r8   module_inputs_torch_nn_RNN_GRU
  sl   





r%  c              
   K   s  t t|||d}d}d}d}	d}
g }t|||	|
}|D ]i}|\}}}}d}d|d||||d}d|d||||d}|ttdi |t|dt t|dd	 |d
krV|n|}||r]dnd|f||rfdnd|ff}|ttdi |t|d|t t|dd	 q|S )Nr   r  )r   r   r   r   )r  r  r  	proj_sizer/  r   r  rY  r
  r   r   r   r  r2   )r   r	   r   rR   r   r   rF  )rT   r   rZ   r   r[   r^   r   r   r/  r  
proj_sizesr  r  r]   r+  r!  r"  r&  r  r#  r$  h_outhxr2   r2   r8   module_inputs_torch_nn_LSTM
  sB   

(
	r*  c                 K   B   t t|||d}ttdt|dtdttdt|ddgS )Nr   rQ   rK  r   rQ   r   r  rQ  rV  rW  r2   r2   r8   &module_inputs_torch_nn_ReflectionPad1d	     

r-  c                 K   r+  Nr   rQ   r  r   rQ   r   r   r   r   r   r   rP  rQ  rV  rW  r2   r2   r8   &module_inputs_torch_nn_ReflectionPad2d  r.  r2  c                 K   r+  )Nr   rQ   r  r   rQ   r   rQ   r   rQ   r   )r   r   r   r   r   rQ  rV  rW  r2   r2   r8   &module_inputs_torch_nn_ReflectionPad3d'  r.  r4  c                 K   r+  Nr   rQ   ry  r   r,  r  rQ  rV  rW  r2   r2   r8   'module_inputs_torch_nn_ReplicationPad1d6  r.  r6  c                 K   r+  r/  rV  rW  r2   r2   r8   'module_inputs_torch_nn_ReplicationPad2dE  r.  r7  c                 K   r+  )Nr   rQ   r1  r   rQ   r   r   r   r   rP  )r   r   r   rP  r  rQ  rV  rW  r2   r2   r8   'module_inputs_torch_nn_ReplicationPad3dT  r.  r9  c                 K   r+  r5  rV  rW  r2   r2   r8    module_inputs_torch_nn_ZeroPad1dc  r.  r:  c                 K   sB   t t|||d}ttdt|dtdttdt|ddgS )Nr   rQ   rz  r   r0  rQ  rV  rW  r2   r2   r8    module_inputs_torch_nn_ZeroPad2dr  r.  r;  c                 K   r+  )Nr   rQ   r1  r   r8  )rQ   r   r   r   r   rQ  rV  rW  r2   r2   r8    module_inputs_torch_nn_ZeroPad3d  r.  r<  c                 K   F   t t|||d}ttddt|dtdttddt|dd	gS )
Nr   rQ   r   ry  r   r,  r   r  rQ  rV  rW  r2   r2   r8   $module_inputs_torch_nn_ConstantPad1d     

r>  c                 K   sF   t t|||d}ttddt|dtdttddt|ddgS )	Nr   rQ   r   r  r   r0  r   rQ  rV  rW  r2   r2   r8   $module_inputs_torch_nn_ConstantPad2d  r?  r@  c                 K   r=  )
Nr   rQ   r   r1  r   r8  r  )rQ   r   rQ   r   rQ   rQ  rV  rW  r2   r2   r8   $module_inputs_torch_nn_ConstantPad3d  r?  rA  c              	         t t|||d}dd  ttdt|dtdttdt|d fd	d
dttdt|d fdd
dttdt|d fdd
dgS )Nr   c              	   S   sJ   t j| dddd|d  df | | ddddd|d f gddS )z input:
                [[[0., 1., 2.],
                  [3., 4., 5.]]]
                pad: (1, 2)
                output:
                    [[[2., 0., 1., 2., 0., 1.],
                      [5., 3., 4., 5., 3., 4.]]]
            Nr   rQ   r   r   rj   catrC  padr2   r2   r8   padding1d_circular_ref  s   J	zDmodule_inputs_torch_nn_CircularPad1d.<locals>.padding1d_circular_refrQ   ry  r   r,  rz  c                        || j S rG   r  r   rG  r2   r8   r     ru  z6module_inputs_torch_nn_CircularPad1d.<locals>.<lambda>)r   rQ   c                    rH  rG   rI  r   rJ  r2   r8   r     ru  r  c                    rH  rG   rI  r   rJ  r2   r8   r     ru  rV  rW  r2   rJ  r8   $module_inputs_torch_nn_CircularPad1d  s.   






rK  c              	      s   t t|||d}dd  ttdt|dtdttdt|d fd	d
dttdt|d fdd
dttdt|d fdd
dgS )Nr   c              
   S   s   t j| dddd|d  df | | ddddd|d f gdd} t j| dddddd|d  df | | ddddddd|d f gddS )aS  input:
                [[[[0., 1., 2],
                   [3., 4., 5.]]]]
                pad: (1, 2, 2, 1)
        output:
            [[[[2., 0., 1., 2., 0., 1.],
               [5., 3., 4., 5., 3., 4.],
               [2., 0., 1., 2., 0., 1.],
               [5., 3., 4., 5., 3., 4.],
               [2., 0., 1., 2., 0., 1.]]]]
        Nr   r   r   r   rQ   rC  rE  r2   r2   r8   padding2d_circular_ref  s   JVzDmodule_inputs_torch_nn_CircularPad2d.<locals>.padding2d_circular_refrQ   r  r   )rQ   r   r   rQ   )rQ   rQ   r   r   c                    rH  rG   rI  r   rL  r2   r8   r     ru  z6module_inputs_torch_nn_CircularPad2d.<locals>.<lambda>)r   r   r   r   c                    rH  rG   rI  r   rM  r2   r8   r     ru  )r   r   r   rQ   )rQ   rQ   r   r   c                    rH  rG   rI  r   rM  r2   r8   r     ru  rV  rW  r2   rM  r8   $module_inputs_torch_nn_CircularPad2d  s.   






rN  c              	      rB  )Nr   c                 S   s  t j| dddd|d  df | | ddddd|d f gdd} t j| dddddd|d  df | | ddddddd|d f gdd} t j| dddddddd|d  df | | ddddddddd|d f gddS )	aN  input:
                [[[[[ 0.,  1.,  2.],
                    [ 3.,  4.,  5.]],
                   [[ 6.,  7.,  8.],
                    [ 9., 10., 11.]]]]]
            pad: (1, 2, 2, 1, 1, 2)
            output: [[[[[ 8.,  6.,  7.,  8.,  6.,  7.],
                        [11.,  9., 10., 11.,  9., 10.],
                        [ 8.,  6.,  7.,  8.,  6.,  7.],
                        [11.,  9., 10., 11.,  9., 10.],
                        [ 8.,  6.,  7.,  8.,  6.,  7.]],

                       [[ 2.,  0.,  1.,  2.,  0.,  1.],
                        [ 5.,  3.,  4.,  5.,  3.,  4.],
                        [ 2.,  0.,  1.,  2.,  0.,  1.],
                        [ 5.,  3.,  4.,  5.,  3.,  4.],
                        [ 2.,  0.,  1.,  2.,  0.,  1.]],

                       [[ 8.,  6.,  7.,  8.,  6.,  7.],
                        [11.,  9., 10., 11.,  9., 10.],
                        [ 8.,  6.,  7.,  8.,  6.,  7.],
                        [11.,  9., 10., 11.,  9., 10.],
                        [ 8.,  6.,  7.,  8.,  6.,  7.]],

                       [[ 2.,  0.,  1.,  2.,  0.,  1.],
                        [ 5.,  3.,  4.,  5.,  3.,  4.],
                        [ 2.,  0.,  1.,  2.,  0.,  1.],
                        [ 5.,  3.,  4.,  5.,  3.,  4.],
                        [ 2.,  0.,  1.,  2.,  0.,  1.]],

                       [[ 8.,  6.,  7.,  8.,  6.,  7.],
                        [11.,  9., 10., 11.,  9., 10.],
                        [ 8.,  6.,  7.,  8.,  6.,  7.],
                        [11.,  9., 10., 11.,  9., 10.],
                        [ 8.,  6.,  7.,  8.,  6.,  7.]]]]]
        Nr   r   r   r   r   r   rQ   rC  rE  r2   r2   r8   padding3d_circular_ref  s   J%VbzDmodule_inputs_torch_nn_CircularPad3d.<locals>.padding3d_circular_refrQ   r1  r   r3  )rQ   rQ   r   r   r   c                    rH  rG   rI  r   rO  r2   r8   r   A  ru  z6module_inputs_torch_nn_CircularPad3d.<locals>.<lambda>)r   r   r   rQ   rQ   r   c                    rH  rG   rI  r   rP  r2   r8   r   F  ru  )r   r   r   rQ   r   r   c                    rH  rG   rI  r   rP  r2   r8   r   K  ru  rV  rW  r2   rP  r8   $module_inputs_torch_nn_CircularPad3d  s.   *






rQ  
TestModule	test_gradcuda)	active_ifrf   test_gradgradtest_non_contiguous_tensors)rU  rv   rf   )   r  TestExpandedWeightModuletest_module)rf   
TestDecomptest_rnn_decomp_modulec                 K   s@  t t|||d}tttddt|dd|dddtjtddtttddt|dd|dd	dtjtd
dtttddt|dd|dddtjtddtttddt|dd|dddddtjtddtttdddt|dd|dd	dtjtd
dtttdddt|dd|dd	dtjtd
dg}|S )Nr   r   r  r   rX  rQ  5input has inconsistent input_size: got 11 expected 10r   r   r      9hidden0 has inconsistent hidden_size: got 21, expected 20r   5Input batch size 3 doesn't match hidden0 batch size 5rQ   z.Expected hidden to be 1D or 2D, got 4D insteadr  r&  	r   r	   r   r   r   r   r   rd   
ValueErrorr  r2   r2   r8   )module_error_inputs_torch_nn_RNN_GRU_Cellt  sr   				
	
8rd  c                 K   s  t t|||d}tttddt|dd|dd|ddfdtjtddtttddt|dd|dd	|dd	fdtjtd
dtttddt|dd|dd|ddfdtjtddtttddt|dd|dddd|ddddfdtjtddg}|S )Nr   r   r  r   rX  rQ  r]  r^  r_  r`  r   ra  rQ   z/Expected hx\[0\] to be 1D or 2D, got 4D insteadrb  r  r2   r2   r8   %module_error_inputs_torch_nn_LSTMCell  sN   			&&re  c                 K   sD   t ttddddtjtddt ttddddtjtddg}|S )Nr   r   rQ   )r   z%hidden_size must be greater than zeror^  z$num_layers must be greater than zero)r   r   r   r   r   rc  )rT   r   rZ   r   r[   r^   r  r2   r2   r8   $module_error_inputs_torch_nn_RNN_GRU  s   rf  c                 K   R   t t|||d}|dd}tt|rtddntdt|ddtjtdd	gS )
Nr   is_constantFrQ   r   r  rQ  z(expected 2D or 3D input \(got 4D input\)r^  	r   r	   r   r   r   r   r   r   rc  rT   r   rZ   r   r[   r^   r   rh  r2   r2   r8   "module_error_inputs_torch_nn_Pad1d     
rk  c                 K   rg  )
Nr   rh  FrQ   r   rK  rQ  z(expected 3D or 4D input \(got 2D input\)r^  ri  rj  r2   r2   r8   "module_error_inputs_torch_nn_Pad2d  rl  rm  c                 K   rg  )
Nr   rh  FrQ   r   rK  rQ  z(expected 4D or 5D input \(got 2D input\)r^  ri  rj  r2   r2   r8   "module_error_inputs_torch_nn_Pad3d  rl  rn  r   )r   r   test_memory_formatr[   )rU  )r   r   r   zSkipped!)r   )r   r   r   )rv   rf   )r   r   r   TTestEagerFusionModuleInfo,test_aot_autograd_symbolic_module_exhaustive#test_aot_autograd_module_exhaustive)rS   r   r   test_check_inplace)rf   rv   rQ   )r  r   i  )version)rv   g-C6?)r   r   r   r   r   r   r   iE  )r  r   r  test_cpu_gpu_parityg{Gzt?)r   r   r   rv   r   r   )rf   rv   rU  )r   rv   r   r   r   r   )atolrtoltest_forwardcpu)r   r   r   )include_halfinclude_bfloat16gQ?r   )r   rv   r   )r{  rz  )r   rv   r   #test_if_train_and_eval_modes_differtest_save_load)r   r   r   )r  )r   rS   r   TestModuleMPStest_non_contiguoustest_factory_kwargs)rS   r   r   r   )rf   rU  gMb`?)r   r   r   r   )r  )r   r   )rS   r   r   r   )rS   r   r   r   r   )r   r   r   	module_db)F(W  rj   unittestcopyr   enumr   	functoolsr   r   r  r   r   r  torch.nn.functionalr   
functionalr'  torch.nn.utils.rnnr   torch.testingr	   #torch.testing._internal.common_cudar
   $torch.testing._internal.common_dtyper   r   r   *torch.testing._internal.common_device_typer   r   r   r   r   r   r   r   r   r   r   2torch.testing._internal.common_methods_invocationsr   !torch.testing._internal.common_nnr   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   $torch.testing._internal.common_utilsr(   r)   r*   r+   r,   r-   r.   typesr/   operatorrF   aoqatquantizable	quantizedr0   rH   __annotations__Module	Container	NLLLoss2d	MaxPool2dr1   rJ   typefrom_iterabler<   r?   r8  strr7   r;   r5   r3   r   rp   r   namespace_namerB   r   r   r   r   r   r   r   r   r   r
  r  r$  r)  r  r@  rE  rF  rH  rN  rX  rb  rp  ru  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r#  r)  r.  r1  r?  rF  rS  rd  ro  rp  rq  rv  rx  ry  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r%  r*  r-  r2  r4  r6  r7  r9  r:  r;  r<  r>  r@  rA  rK  rN  rQ  expectedFailurer  #rnn_gru_lstm_module_info_decoratorsrd  re  rf  rk  rm  rn  backendsr   is_availableis_macos_or_newer_macos15_or_newerAdaptiveAvgPool1dAdaptiveAvgPool2d
itemgetterAdaptiveAvgPool3dskipAdaptiveMaxPool1dAdaptiveMaxPool2dAdaptiveMaxPool3d	AvgPool1d	AvgPool2dr   	AvgPool3dBatchNorm1dBatchNorm2dBatchNorm3dCELUConv1dr   Conv2dfloat64Conv3dConvTranspose1dchalfConvTranspose2d	complex32	complex64
complex128ConvTranspose3dCosineEmbeddingLossELUFractionalMaxPool2dFractionalMaxPool3dL1LossSmoothL1Loss
LazyConv1d
LazyConv2d
LazyConv3dLazyConvTranspose1dLazyConvTranspose2dLazyConvTranspose3dLinearBilinearLPPool1dLPPool2dLPPool3d	MaxPool1d	MaxPool3d	KLDivLossMSELossMarginRankingLossMultiLabelMarginLossMultiMarginLossSoftMarginLossMultiLabelSoftMarginLossNLLLossGaussianNLLLossPoissonNLLLossHingeEmbeddingLoss	HuberLossBCELossBCEWithLogitsLossCrossEntropyLossCTCLossGELUGLU	GroupNorm
Hardshrink	HardswishHardtanhInstanceNorm1dInstanceNorm2dInstanceNorm3dLocalResponseNorm	LayerNormRMSNormTransformerEncoderr  TransformerDecoderLayerTransformerMultiheadAttention	EmbeddingReLU	LeakyReLUReLU6PReLURNNCellGRUCellLSTMCellSigmoid
LogSigmoidSiLUSoftmax	Softmax2d
LogSoftmaxSoftminSoftplus
SoftshrinkSoftsignTanh
Tanhshrink	ThresholdMishRNNGRULSTMReflectionPad1dReflectionPad2dReflectionPad3dReplicationPad1dReplicationPad2dReplicationPad3dSELU	ZeroPad1d	ZeroPad2d	ZeroPad3dCircularPad1dCircularPad2dCircularPad3dConstantPad1dConstantPad2dConstantPad3dr  r2   r2   r2   r8   <module>   s  
4@$






	




E	@(C0'.
4%#0*Y@'#'"3"&#'JC.>*%)I


#<+ 
 $,/
?H

Z

n

 
  




  



  7  I



  `




           $
    +    3    ;    A

    K



    
a



    
~     




      
*



      
G      
\      
b
	
      
r      
x
       
        
                
        
        
'


        
3        
9        
C        
M
        
V        
\
        
e        
j        
o        
u
        
~         
          
          
          
'
          
.          
1
          
?          
I


          
V          
Y          
`          
g          
t          
           
            

            
            
+            
6            
D            
K            
T

            
a            
d            
k            
r            
v            
z            
~

             
              
              
              
              
               
(              
.              
5              
;              
>

              
K

              
X              
_              
e              
k              
p              
x              
{               
                
                
                
                
#                
*                
-                
3                
9                
=                
A                
H                
K                
Q        