o
    IhX                     @   s  d dl Z d dlmZmZ d dlmZmZmZmZ d dl	Z	d dl
mZ d dl	mZ d dlmZ defddZd	eeeef eeeef  f d
eeeef eeeef  f deeee f fddZdejdedeeeef  f deegef deeedf eedf eeee f f fddZdejdeeedf eedf eeee f f fddZdejdeeedf eedf eeee f f fddZ	dFdejdee dee deddf
ddZdejdeeee f dee dee fdd Z	dFdejdee d!ee deddf
d"d#Z	$	$dGd%ejd&ee d'ee d!ee d(ee dejfd)d*Zd%ejfd+d,Zd%ejfd-d.ZG d/d0 d0ejZ G d1d2 d2ejZ!	dFd%ejd3edee!eedf f fd4d5Z"	dFd%ejd3edee eedf eedf f fd6d7Z#d8eeedf df deedf fd9d:Z$d;eej dee eedf eedf f fd<d=Z%	$	>dHd?e&ej d@eed$ ee' f dAe	j(j)fdBdCZ*	$	>dHd?e&ej d@eed$ ee' f dAe	j(j)fdDdEZ+dS )I    N)IterableSequence)AnyCallableNoReturnUnion)Tensor)NamedMemberAccessorreturnc                   C   s   t d)Na$  make_functional(module): we don't yet support models that do parameter tying (also sometimes known as weight sharing). Please try to rewrite your model by replacing all instances of the tied parameter with another and/or comment your support in https://github.com/pytorch/functorch/issues/446)RuntimeError r   r   T/var/www/vscode/kcb/lib/python3.10/site-packages/torch/_functorch/make_functional.pyraise_parameter_tying_error   s   r   named_paramstied_named_paramsc                 C   s   t | } t |}t|  }t| }||sJ i }|  D ]
\}}|g f||< q!| D ]\}}||v s:J || d | q0t | S )a[  
    named_params is a dictionary of tensors: {'A': A, 'B': B}
    tied_named_params is another dictionary of tensors {'A': A, 'B': B, 'B_tied': B}
    with potentially tied (or 'duplicated') tensors

    This function creates a mapping from the names in named_params to the
    names in tied_named_params: {'A': ['A'], 'B': ['B', 'B_tied']}.
       )dictsetkeysissubsetitemsappendvalues)r   r   tensors_dict_keystied_tensors_dict_keystensor_to_mappingkeytensorr   r   r   create_names_map!   s   r   modnamed_members.subclassc                 C   s   t |dd}t |dd}t||}i }t| }|D ]\}}	|	|vr.|tj|	dd||	< ||	 }
|||
 qt|dkrDd\}}nt| \}}|||fS )NF)remove_duplicateTmeta)devicer   r   r   )tupler   r	   torch
empty_like
set_tensorlenzip)r   r    r!   all_named_membersunique_named_members	names_mapmemoaccessornamepreplacementnamesparamsr   r   r   _extract_members=   s   


r6   c                 C   s   t | | jtjS )aZ  
    This function removes all the Parameters from the model and
    return them as a tuple as well as their original attribute names.
    The weights must be re-loaded with `load_weights` before the model
    can be used again.
    Note that this function modifies the model in place and after this
    call, mod.parameters() will be empty.
    )r6   named_parametersnn	Parameterr   r   r   r   extract_weightsV   s   r;   c                 C   s   t | | jdd S )Nc                 S   s   | S Nr   )xr   r   r   <lambda>g   s    z!extract_buffers.<locals>.<lambda>)r6   named_buffersr:   r   r   r   extract_buffersd   s   r@   Fr4   r5   	as_paramsc                 C   s*   t | }|rdd |D }||| dS )a	  
    Reload a set of weights so that `mod` can be used again to perform a forward pass.
    Note that the `params` are regular Tensors (that can have history) and so are left
    as Tensors. This means that mod.parameters() will still be empty after this call.
    c                 S   s   g | ]}t |qS r   )r8   r9   ).0r2   r   r   r   
<listcomp>w   s    z load_weights.<locals>.<listcomp>Nr	   set_tensors)r   r4   r5   rA   r0   r   r   r   load_weightsj   s   rF   r.   elemsc           
      C   sf   g }t | }t| |D ]#\\}}}t|D ]\}}	|dkr)|||	| q||	| qq|S )Nr   )r	   r+   r   	enumerater   swap_tensorr)   )
r   r.   rG   resultr0   _
attr_nameselemi	attr_namer   r   r   _swap_state{   s   rP   buffersc                 C   s   t | }||| d S r<   rD   )r   r4   rQ   rA   r0   r   r   r   load_buffers   s   rR   r   modelweightsweight_namesbuffer_namesc                 C   sP   t |t |ks
J t| || t |dkr&t |t |ks J t| || | S )zload_state(model, weights, weight_names, buffers=(), buffer_names=()) -> model

    load_state takes `weights` and `buffers` and assigns them to the model.
    This is the inverse operation of `make_functional_deprecated_v1`.
    r   )r*   rF   rR   )rS   rT   rU   rQ   rV   r   r   r   
load_state   s   rW   c                    sF   t  }t|dkrtdt\} } fdd}|| fS )a  make_functional_deprecated_v1(model) -> weights, func, weight_names

    Given an nn.Module, make_functional_deprecated_v1 extracts the state (weights)
    and returns a functional version of the model, `func`. This makes
    it so that it is possible use transforms over the parameters of
    `model`.

    `func` can be invoked as follows:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, func, _ = make_functional_deprecated_v1(model)
    func(weights, (x,))
    ```

    And here is an example of applying the grad transform:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, _, func = make_functional_deprecated_v1(model)
    grad_weights = grad(func)(weights, (x,))
    ```

    To put the state back into a model, use `load_state`.
    r   zmake_functional_deprecated_v1(model): `model` has buffers. Please use make_functional_with_buffers_deprecated_v1(model) instead.c                    s   t }t| |  || S r<   )copydeepcopyrF   )rT   datamutable_modeldescriptorsrS   r   r   fun   s   
z*make_functional_deprecated_v1.<locals>.fun)listrQ   r*   r   r;   )rS   rQ   rT   rK   r^   r   r\   r   make_functional_deprecated_v1   s   
r`   c                    s:   t \}}t\} } fdd}||| fS )a`  make_functional_with_buffers_deprecated_v1(model) -> weights, buffers, func, weight_names, buffer_names

    Given an nn.Module, make_functional_with_buffers_deprecated_v1 extracts the state (weights and buffers)
    and returns a functional version of the model, `func`.

    `func` can be invoked as follows:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, buffers, func, _, _ = make_functional_with_buffers_deprecated_v1(model)
    func(weights, buffers, (x,))
    ```

    And here is an example of applying the grad transform:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, buffers, func, _, _ = make_functional_with_buffers_deprecated_v1(model)
    func(weights, buffers, (x,))
    grad_weights = grad(func)(weights, buffers, (x,))
    ```

    To put the state back into a model, use `load_state`.
    c                    s*   t }t||  t| | || S r<   )rX   rY   rF   rR   )rT   rQ   rZ   r[   buf_descriptorsrS   weight_descriptorsr   r   r^      s   
z7make_functional_with_buffers_deprecated_v1.<locals>.fun)r;   r@   )rS   rT   rK   rQ   r^   r   ra   r   *make_functional_with_buffers_deprecated_v1   s   rd   c                       s   e Zd ZdZdejdeedf deedf deee	e f deee	e f dd	f fd
dZ
e	ddejdeded eedf eedf f fddZdee dee defddZ  ZS )FunctionalModuleWithBufferszW
    This is the callable object returned by :func:`make_functional_with_buffers`.
    stateless_modelparam_names.rV   param_names_mapbuffer_names_mapr
   Nc                    s6   t    || _|| _|| _t|| _| j| d S r<   )super__init__rf   rg   rV   r   all_names_mapupdate)selfrf   rg   rV   rh   ri   	__class__r   r   rk      s   

z$FunctionalModuleWithBuffers.__init__FrS   disable_autograd_trackingc           
      C   sT   t | }t|\}}}t|\}}}|r|D ]}	|	d qt|||||||fS NF)rX   rY   r;   r@   requires_grad_re   )
rS   rq   
model_copyr5   rg   rh   rQ   rV   ri   paramr   r   r   _create_from  s   

z(FunctionalModuleWithBuffers._create_fromr5   rQ   c              
   O   sR   t | j| jt|t| }z| j|i |W t | j| j| S t | j| j| w r<   )rP   rf   rl   r&   )rn   r5   rQ   argskwargs	old_stater   r   r   forward  s   $z#FunctionalModuleWithBuffers.forwardF__name__
__module____qualname____doc__r8   Moduler&   strr   r_   rk   staticmethodboolr   rv   r   r   rz   __classcell__r   r   ro   r   re      s>    

re   c                       s   e Zd ZdZdejdeedf deee	e f ddf fdd	Z
e	
ddejdeded eedf f fddZdee defddZ  ZS )FunctionalModulezJ
    This is the callable object returned by :func:`make_functional`.
    rf   rg   .r.   r
   Nc                    s    t    || _|| _|| _d S r<   )rj   rk   rf   rg   r.   )rn   rf   rg   r.   ro   r   r   rk   3  s   

zFunctionalModule.__init__FrS   rq   c                 C   s@   t | }t|\}}}|r|D ]}|d qt||||fS rr   )rX   rY   r;   rs   r   )rS   rq   rt   r5   rg   r.   ru   r   r   r   rv   >  s   
zFunctionalModule._create_fromr5   c              
   O   sF   t | j| j|}z| j|i |W t | j| j| S t | j| j| w r<   )rP   rf   r.   )rn   r5   rw   rx   ry   r   r   r   rz   J  s   $zFunctionalModule.forwardr{   r|   r   r   ro   r   r   .  s*    
r   rq   c                 C   s.   t |  }t|dkrtdtj| |dS )a  make_functional(model, disable_autograd_tracking=False) -> func, params

    Given a ``torch.nn.Module``, :func:`make_functional` extracts the state
    (params) and returns a functional version of the model, ``func``. This
    makes it so that it is possible use transforms over the parameters of
    ``model``.

    ``func`` can be invoked as follows:

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional

        x = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params = make_functional(model)
        func(params, x)

    And here is an example of applying the grad transform over the parameters
    of a model.

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional, grad

        x = torch.randn(4, 3)
        t = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params = make_functional(model)

        def compute_loss(params, x, t):
            y = func(params, x)
            return nn.functional.mse_loss(y, t)

        grad_weights = grad(compute_loss)(params, x, t)

    If the model has any buffers, please use :func:`make_functional_with_buffers` instead.

    Args:
        model (torch.nn.Module): Input model.
        disable_autograd_tracking (bool): Flag to disable gradients tracking for output parameters.
            The returned params are unrelated to the set of params from the original model. If False (default),
            the params will have ``requires_grad=True`` on them (aka they will be trackable with regular
            PyTorch autograd), matching the requires_grad-ness of the params from the original model.
            Otherwise, the returned params will have ``requires_grad=False``. Default, False.
            If you plan on using regular PyTorch autograd (e.g., if you want to call ``.backward()`` or
            ``torch.autograd.grad()``, then set ``disable_autograd_tracking=False``.
            Otherwise, if you're only planning on using functorch's gradient transforms,
            then please set ``disable_autograd_tracking=True`` to avoid unnecessarily tracking
            history with PyTorch autograd.

    r   zdmake_functional(model): `model` has buffers. Please use make_functional_with_buffers(model) instead.rq   )r_   rQ   r*   r   r   rv   )rS   rq   rQ   r   r   r   make_functionalT  s   ;r   c                 C   s   t j| |dS )a  make_functional_with_buffers(model, disable_autograd_tracking=False) -> func, params, buffers

    Given a ``torch.nn.Module``, make_functional_with_buffers extracts the
    state (params and buffers) and returns a functional version of the model
    ``func`` that can be invoked like a function.

    ``func`` can be invoked as follows:

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional_with_buffers

        x = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params, buffers = make_functional_with_buffers(model)
        func(params, buffers, x)

    And here is an example of applying the grad transform over the parameters
    of a model:

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional_with_buffers, grad

        x = torch.randn(4, 3)
        t = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params, buffers = make_functional_with_buffers(model)

        def compute_loss(params, buffers, x, t):
            y = func(params, buffers, x)
            return nn.functional.mse_loss(y, t)

        grad_weights = grad(compute_loss)(params, buffers, x, t)

    Args:
        model (torch.nn.Module): Input model.
        disable_autograd_tracking (bool): Flag to disable gradients tracking for output parameters.
            The returned params are unrelated to the set of params from the original model. If False (default),
            the params will have ``requires_grad=True`` on them (aka they will be trackable with regular
            PyTorch autograd), matching the requires_grad-ness of the params from the original model.
            Otherwise, the returned params will have ``requires_grad=False``. Default, False.
            If you plan on using regular PyTorch autograd (e.g., if you want to call ``.backward()`` or
            ``torch.autograd.grad()``, then set ``disable_autograd_tracking=False``.
            Otherwise, if you're only planning on using functorch's gradient transforms,
            then please set ``disable_autograd_tracking=True`` to avoid unnecessarily tracking
            history with PyTorch autograd.

    r   )re   rv   )rS   rq   r   r   r   make_functional_with_buffers  s   8r   tuple_of_tuple_of_tensorsc                 C   s"   t t|  } t dd | D }|S )Nc                 s       | ]
}t | V  qd S r<   r'   stackdetachrB   shardsr   r   r   	<genexpr>  s    
z"transpose_stack.<locals>.<genexpr>)r&   r+   )r   resultsr   r   r   transpose_stack  s
   r   modelsc                    s   t | dkr
tdtdd | D s tdd | D s tdt| d  t fdd| D s5tdtd	d
 | D  \}}}t|}t|}|d ||fS )a(  combine_state_for_ensemble(models) -> func, params, buffers

    Prepares a list of torch.nn.Modules for ensembling with :func:`vmap`.

    Given a list of ``M`` ``nn.Modules`` of the same class, stacks all of their
    parameters and buffers together to make ``params`` and ``buffers``.
    Each parameter and buffer in the result will have an additional dimension
    of size ``M``.

    :func:`combine_state_for_ensemble` also returns ``func``, a functional
    version of one of the models in :attr:`models`. One cannot directly run
    ``func(params, buffers, *args, **kwargs)`` directly, you probably want to
    use ``vmap(func, ...)(params, buffers, *args, **kwargs)``

    Here's an example of how to ensemble over a very simple model:

    .. code-block:: python

        num_models = 5
        batch_size = 64
        in_features, out_features = 3, 3
        models = [torch.nn.Linear(in_features, out_features) for i in range(num_models)]
        data = torch.randn(batch_size, 3)

        fmodel, params, buffers = combine_state_for_ensemble(models)
        output = vmap(fmodel, (0, 0, None))(params, buffers, data)

        assert output.shape == (num_models, batch_size, out_features)

    .. warning::
        All of the modules being stacked together must be the same (except for
        the values of their parameters/buffers). For example, they should be in the
        same mode (training vs eval).

        This API is subject to change -- we're investigating better ways to
        create ensembles and would love your feedback how to improve this.
    r   z?combine_state_for_ensemble: Expected at least one model, got 0.c                 s   s    | ]}|j V  qd S r<   trainingrB   mr   r   r   r     s    z-combine_state_for_ensemble.<locals>.<genexpr>c                 s   s    | ]}|j  V  qd S r<   r   r   r   r   r   r     s    zTcombine_state_for_ensemble: Expected all models to have the same training/eval mode.c                 3   s    | ]	}t | kV  qd S r<   )typer   
model0_typr   r   r         zHcombine_state_for_ensemble: Expected all models to be of the same class.c                 S   s   g | ]}t |qS r   )r   rB   rS   r   r   r   rC     s    z.combine_state_for_ensemble.<locals>.<listcomp>)r*   r   allr   r+   r   )r   funcsr5   rQ   r   r   r   combine_state_for_ensemble  s&   ($
r   cpumodel_classensemble_shaper$   c                        fdd}|S )Nc            	         s   t dkr
tdt dkr i }t|S d }|dkr.td| dt fddt|D }t i \}}}tdd |D }tt| }td	d |D }|||fS )
N   ,NYI: ensemble_shape with more than 1 elementr   num_models  should be > 0c                 3   $    | ]} i  V  qd S r<   torB   rK   rw   r$   rx   r   r   r   r   .      
z3functional_init.<locals>.wrapped.<locals>.<genexpr>c                 s   s    | ]	}t |d  V  qdS )r   N)r`   r   r   r   r   r   2  r   c                 s   r   r<   r   r   r   r   r   r   4      )r*   
ValueErrorr   r`   r&   ranger+   )	rw   rx   rS   
num_modelsr   rK   fnr4   rT   r$   r   r   rw   rx   r   wrapped$  s    
z functional_init.<locals>.wrappedr   r   r   r$   r   r   r   r   functional_init  s   r   c                    r   )Nc                     s   t dkr
tdt dkr i }t|S d }|dkr.td| dt fddt|D }t i \}}}}}ttdd |D  \}	}
tt|	 }	td	d |	D }	tt|
 }
td
d |
D }
|	|
|||fS )Nr   r   r   r   r   c                 3   r   r<   r   r   r   r   r   r   I  r   z@functional_init_with_buffers.<locals>.wrapped.<locals>.<genexpr>c                 s   s     | ]}t |d d V  qd S )Nr   )rd   r   r   r   r   r   T  s
    
c                 s   r   r<   r   r   r   r   r   r   Z  r   c                 s   r   r<   r   r   r   r   r   r   \  r   )r*   r   r   r`   r&   r   rd   r+   )rw   rx   rS   r   r   rK   r   rU   rV   rT   rQ   r   r   r   r   ?  s8   	z-functional_init_with_buffers.<locals>.wrappedr   r   r   r   r   functional_init_with_buffers:  s    r   r{   r%   )r   r   ),rX   collections.abcr   r   typingr   r   r   r   r'   torch.nnr8   r   %torch.nn.utils._named_member_accessorr	   r   r   r   r&   r_   r   r   r6   r;   r@   r   rF   rP   rR   rW   r`   rd   re   r   r   r   r   r   r   inttypesDevicer   r   r   r   r   r   <module>   s  

(
(
(





*%8'
G
=



@
