o
    h0m                     @  s  d dl mZ d dlZd dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dlm
Z
 d dlmZ d d	lmZ d dlZd d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ ededZerd dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dlm&Z& d dl'm(Z( d dl'm)Z) d dl'm*Z* ede#dZ+ej,ej-ej.hZ/dZ0e1e0ej2Z3dZ4e1e4ej2Z5d Z6e1e6ej2Z7d!Z8e1e8ej2Z9dyd(d)Z:dzd1d2Z;d{d5d6Z<d|d9d:Z=ej>d;d<d}dAdBZ?d~dDdEZ@ddIdJZAddKdLZBddOdPZCej>d;d<ddRdSZDddWdXZEddZd[ZFdd]d^ZGddcddZHddedfZIddjdkZJddqdrZKddudvZLG dwdx dxed#ef ZMdS )    )annotationsN)suppress)TYPE_CHECKING)Any)Callable)Sequence)Sized)TypeVar)EagerSeriesNamespace)_DelayedCategories)ColumnNotFoundError)DuplicateError)
ShapeError)Implementation)Version)import_dtypes_module)isinstance_or_issubclassT)bound)DtypePandasLikeDataFrame)PandasLikeExprPandasLikeSeries)DType)DTypeBackend)TimeUnit)_1DArrayExprTa  ^
    datetime64\[
        (?P<time_unit>s|ms|us|ns)                 # Match time unit: s, ms, us, or ns
        (?:,                                      # Begin non-capturing group for optional timezone
            \s*                                   # Optional whitespace after comma
            (?P<time_zone>                        # Start named group for timezone
                [a-zA-Z\/]+                       # Match timezone name, e.g., UTC, America/New_York
                (?:[+-]\d{2}:\d{2})?              # Optional offset in format +HH:MM or -HH:MM
                |                                 # OR
                pytz\.FixedOffset\(\d+\)          # Match pytz.FixedOffset with integer offset in parentheses
            )                                     # End time_zone group
        )?                                        # End optional timezone group
    \]                                            # Closing bracket for datetime64
$a  ^
    timestamp\[
        (?P<time_unit>s|ms|us|ns)                 # Match time unit: s, ms, us, or ns
        (?:,                                      # Begin non-capturing group for optional timezone
            \s?tz=                                # Match "tz=" prefix
            (?P<time_zone>                        # Start named group for timezone
                [a-zA-Z\/]*                       # Match timezone name (e.g., UTC, America/New_York)
                (?:                               # Begin optional non-capturing group for offset
                    [+-]\d{2}:\d{2}               # Match offset in format +HH:MM or -HH:MM
                )?                                # End optional offset group
            )                                     # End time_zone group
        )?                                        # End optional timezone group
    \]                                            # Closing bracket for timestamp
    \[pyarrow\]                                   # Literal string "[pyarrow]"
$z^
    timedelta64\[
        (?P<time_unit>s|ms|us|ns)                 # Match time unit: s, ms, us, or ns
    \]                                            # Closing bracket for timedelta64
$a  ^
    duration\[
        (?P<time_unit>s|ms|us|ns)                 # Match time unit: s, ms, us, or ns
    \]                                            # Closing bracket for duration
    \[pyarrow\]                                   # Literal string "[pyarrow]"
$lhsr   rhsPandasLikeSeries | objectreturn7tuple[pd.Series[Any] | object, pd.Series[Any] | object]c                 C  s   ddl m} ddlm} | jj}t||rtS | jr+t||r+|js+| jj	d |jfS t||rV|jr<| j|jj	d fS |jj|urP| jt
|j||j|jdfS | j|jfS t|trad}t|| j|fS )zValidate RHS of binary operation.

    If the comparison isn't supported, return `NotImplemented` so that the
    "right-hand-side" operation (e.g. `__radd__`) can be tried.
    r   r   r   implementationbackend_versionz$Expected Series or scalar, got list.)narwhals._pandas_like.dataframer   narwhals._pandas_like.seriesr   nativeindex
isinstanceNotImplemented
_broadcastiloc	set_index_implementation_backend_versionlist	TypeError)r    r!   r   r   	lhs_indexmsg r7   O/var/www/vscode/kcb/lib/python3.10/site-packages/narwhals/_pandas_like/utils.pyalign_and_extract_native]   s0   

	

r9   objr+   r   r&   r   r'   tuple[int, ...]c                C  s   t || jr t| }t|  }kr d| d| }t||tju r0| jdd} || _| S |tj	u r>|dk r>ddi}ni }|tj	u rVd|  krOdk rVn nd|d	< n	 | j
|fd
di|S )z}Wrapper around pandas' set_axis to set object index.

    We can set `copy` / `inplace` based on implementation/version.
    zExpected object of length z, got length: Fdeep   inplacer?         copyaxisr   )r,   to_native_namespaceIndexlenr   r   CUDFrE   r+   PANDASset_axis)r:   r+   r&   r'   expected_len
actual_lenr6   kwargsr7   r7   r8   r0      s&   






r0   columns	list[str]c                C  s   |t ju r| jdd} || _| S |t ju r|dk rddi}ni }|t ju r6d|  kr/dk r6n nd|d< n	 | j|fdd	i|S )
zWrapper around pandas' set_axis to set object columns.

    We can set `copy` / `inplace` based on implementation/version.
    Fr<   r>   r@   rA   rC   rE   rF   r?   )r   rJ   rE   rP   rK   rL   )r:   rP   r&   r'   rO   r7   r7   r8   set_columns   s   




rR   argsrO   c                O  s:   |t ju r|dkr| j|i |S | j|i |ddiS )zXWrapper around pandas' rename so that we can set `copy` based on implementation/version.rC   rE   F)r   rK   rename)r:   r&   r'   rS   rO   r7   r7   r8   rT      s   
rT      )maxsizenative_dtypeversionr   r   c                   s  t  }t|}|dv r| S |dv r| S |dv r | S |dv r(| S |dv r0| S |dv r8| S |dv r@| S |dv rH|	 S |d	v rP|
 S |d
v rX| S |dv r`| S |dv rh| S |drq| S |dkrt | fddS t| }st| }r|d}|d}|||S t| }st| }r|d}||S |dkr| S |dr|dr| S |dr|dr| S |dr|dr| S | S )N>   int64[pyarrow]Int64int64Int64[pyarrow]>   int32[pyarrow]Int32int32Int32[pyarrow]>   int16[pyarrow]Int16int16Int16[pyarrow]>   int8[pyarrow]Int8int8Int8[pyarrow]>   uint64[pyarrow]UInt64uint64UInt64[pyarrow]>   uint32[pyarrow]UInt32uint32UInt32[pyarrow]>   uint16[pyarrow]UInt16uint16UInt16[pyarrow]>   uint8[pyarrow]UInt8uint8UInt8[pyarrow]>   double[pyarrow]float64[pyarrow]Float64float64Float64[pyarrow]>   float[pyarrow]float32[pyarrow]Float32float32Float32[pyarrow]>   string[python]large_string[pyarrow]stringstring[pyarrow]>   bool[pyarrow]boolbooleanboolean[pyarrow]zdictionary<categoryc                     s
   t  jS N)tuple
categoriesr7   rW   r7   r8   <lambda>  s   
 z5non_object_native_to_narwhals_dtype.<locals>.<lambda>	time_unit	time_zonezdate32[day][pyarrow]decimalz	[pyarrow]timebinary)strr   rZ   r^   rb   rf   rj   rn   rr   rv   r{   r   StringBoolean
startswithCategorical$native_categorical_to_narwhals_dtypePATTERN_PD_DATETIMEmatchPATTERN_PA_DATETIMEgroupDatetimePATTERN_PD_DURATIONPATTERN_PA_DURATIONDurationDateendswithDecimalTimeBinaryUnknown)rW   rX   dtypedtypesmatch_dt_time_unitdt_time_zonedu_time_unitr7   r   r8   #non_object_native_to_narwhals_dtype   sh   




r   seriesc                 C  st   t |}|tju r| S tjjj| ddd}|dkr!| S |dkr.|t	j
ur.| S |dkr6| S | S )Nd   T)skipnar   empty)r   r   rJ   r   pdapitypesinfer_dtypeheadr   V1Object)r   rX   r&   r   inferred_dtyper7   r7   r8   object_native_to_narwhals_dtype  s   
r   pd.CategoricalDtypeget_categoriesCallable[[], tuple[str, ...]]c                 C  s6   t |}|tju r| S | jr|t|S | S r   )r   r   r   r   orderedEnumr   )rW   rX   r   r   r7   r7   r8   r   1  s   
r   c                   s   t  }|dr!ddlm} t dr|  |S | j|S |dkr3| r3t | fddS |dkr<t	 |S |t
ju rIt|}| S d	}t|)
N)
large_listr3   structfixed_size_listr   )native_to_narwhals_dtypeto_arrowr   c                     s   t  j S r   )r   r   	to_pandasr7   r   r7   r8   r   Q  s    z*native_to_narwhals_dtype.<locals>.<lambda>objectz;Unreachable code, object dtype should be handled separately)r   r   narwhals._arrow.utilsr   hasattrr   pyarrow_dtypeis_cudfr   r   r   DASKr   r   AssertionError)rW   rX   r&   	str_dtypearrow_native_to_narwhals_dtyper   r6   r7   r   r8   r   >  s$   



r   r   r   c                 C  s   |t ju rdS ttdrt| tjrdS tt- t }t| tj	j
jr5t| d|du r=	 W d   dS W d   dS W d   dS 1 sHw   Y  dS )zjGet dtype backend for pandas type.

    Matches pandas' `dtype_backend` argument in `convert_dtypes`.
    N
ArrowDtypepyarrowbasenumpy_nullable)r   rJ   r   r   r,   r   r   AttributeErrorr   r   
extensionsExtensionDtypegetattr)r   r&   sentinelr7   r7   r8   get_dtype_backend`  s&   



r   r   c                 C  s   t | |dkS )Nr   )r   )r   r&   r7   r7   r8   is_pyarrow_dtype_backends  s   r   DType | type[DType]dtype_backendstr | PandasDtypec              
   C  s~  |d ur|dvrd| d}t |t|}t| |jr"d}t|t| |jr6|dkr.dS |dkr4dS d	S t| |jrJ|dkrBd
S |dkrHdS dS t| |jr^|dkrVdS |dkr\dS dS t| |jrr|dkrjdS |dkrpdS dS t| |j	r|dkr~dS |dkrdS dS t| |j
r|dkrdS |dkrdS dS t| |jr|dkrdS |dkrdS dS t| |jr|dkrdS |dkrdS dS t| |jr|dkrdS |dkrd S d!S t| |jr|dkrd"S |dkrd#S d$S t| |jr|dkrd%S |dkrd&S tS t| |jr|dkrd'S |dkrd(S d)S t| |jrd*S t| |jre|tju r3|d+k r3d,}n| j}|dkrP| j }rFd-| nd.}	d/| |	 d0S | j }r[d1| nd.}	d2| |	 d3S t| |jr|tju rz|d+k rzd,}
n| j}
|dkrd4|
 d0S d5|
 d3S t| |jrzd6d l}W d8S  ty   d7}Y d8S w t| |jr|tju rd9}t|t| |jr|  }|j!| j"d:d;S d<}t |t| |j#|j$|j%|j&|j'fr6|tju r&|d=kr&z
d6d l(}d6d l}W n t)y } zd>|  d?|j* }t)||d }~ww d6d@l+m,} |-|| |dAS dB|  dC| dD| dE}t|dF|  }t.|)GN>   r   r   z;Expected one of {None, 'pyarrow', 'numpy_nullable'}, got: ''z(Casting to Decimal is not supported yet.r   r}   r   r{   r|   r   r   r   r\   rZ   r[   r`   r^   r_   rd   rb   rc   rh   rf   rg   rl   rj   rk   rp   rn   ro   rt   rr   rs   rx   rv   rw   r   r   r   r   r   r   )   nsz, tz= z
timestamp[z
][pyarrow]z, zdatetime64[]z	duration[ztimedelta64[r   z-PyArrow>=11.0.0 is required for `Date` dtype.zdate32[pyarrow]z9Converting to Enum is not supported in narwhals.stable.v1T)r   z9Can not cast / initialize Enum without categories present)r   r   zUnable to convert to z  to to the following exception: )narwhals_to_native_dtype)rX   zConverting to z+ dtype is not supported for implementation z and version .zUnknown dtype: )/
ValueErrorr   r   r   NotImplementedErrorr{   r   rZ   r^   rb   rf   rj   rn   rr   rv   r   r   r   r   r   r   rK   r   r   r   r   r   ModuleNotFoundErrorr   r   r   r,   rG   CategoricalDtyper   StructArrayListr   r   pandasImportErrorr6   r   r   r   r   )r   r   r&   r'   rX   r6   r   r   tztz_partr   par   r   excarrow_narwhals_to_native_dtyper7   r7   r8   r   x  s  








r   list[PandasLikeSeries]c                  G  s   | d   }dd | D }t|}| || jj}g }| D ]<}|jr=|||j|jjd g| ||j	|jj
d q|jj|urU||t|j||j|jd q|| q|S )Nr   c                 S  s   g | ]}t |qS r7   )rI   ).0sr7   r7   r8   
<listcomp>!  s    z/align_series_full_broadcast.<locals>.<listcomp>)r+   namer   r%   )__native_namespace__maxr+   r*   r.   append_with_nativeSeriesr/   r   r   r0   r1   r2   )r   native_namespacelengths
max_lengthidx	reindexedr   r7   r7   r8   align_series_full_broadcast  s<   r  r   c                 C  s,   dt | v rdS t |  t | krdS dS )Nr   r\   rZ   r[   )r   lower)r   r7   r7   r8   int_dtype_mapperC  s
   r  r   pd.Series[int]original_time_unitr   c                 C  s   |dkr|dkr| }|S |dkr| d }|S | d }|S |dkr8|dkr*| d }|S |dkr2| }|S | d }|S |dkrT|dkrF| d }|S |dkrP| d }|S | }|S |dkrr|dkrb| d }|S |dkrl| d }|S | d }|S d| d	}t |)
Nr   us  @B msr    ʚ;zunexpected time unit zA, please report a bug at https://github.com/narwhals-dev/narwhals)r   )r   r  r   resultr6   r7   r7   r8   calculate_timestamp_datetimeK  sL   r  c                 C  s<   | d } |dkr| d }|S |dkr| d }|S | d }|S )NiQ r   r  r	  r  r
  r7   )r   r   r  r7   r7   r8   calculate_timestamp_datep  s   r  dfcolumn_nameslist[str] | _1DArrayc              
     s   t || jd krt|| jkr| S | jjjdks"|tju rA|dk rA| j   fdd|D }|r8t	
| | jdd|f S z| | W S  tyg } z| j   fdd|D }t	
| |d}~ww )zsSelect columns by name.

    Prefer this over `df.loc[:, column_names]` as it's
    generally more performant.
    r?   brA   c                      g | ]}| vr|qS r7   r7   r   xavailable_columnsr7   r8   r         z*select_columns_by_name.<locals>.<listcomp>Nc                   r  r7   r7   r  r  r7   r8   r     r  )rI   shapeallrP   r   kindr   rK   tolistr   'from_missing_and_available_column_nameslocKeyError)r  r  r'   r&   missing_columnser7   r  r8   select_columns_by_name{  s.    


r$  r   valuesSequence[str]aggregate_function
str | Nonec                   s   t | j | jtju r5t fdd| jg ||| j D r(d}t	|| j
j||||dd}|S | j
j||||ddd}|S )Nc                 3  s    | ]}| j kV  qd S r   )r   r  r   r7   r8   	<genexpr>  s
    
zpivot_table.<locals>.<genexpr>z=`pivot` with Categoricals is not implemented for cuDF backendF)r%  r+   rP   aggfuncmarginsT)r%  r+   rP   r+  r,  observed)r   _versionr1   r   rJ   anysimple_selectschemar%  r   _native_framepivot_table)r  r%  r+   rP   r'  r6   r  r7   r)  r8   r3    s0   
r3  pd.Index[str]Nonec                 C  s   zt |  }W n ty   d|  }t|d w t | |krMddlm} || }d}| D ]\}}|dkrC|d| d| d7 }q0d	| }t|d S )
Nz7Expected hashable (e.g. str or int) column names, got: r   )Counterr   r?   z
- 'z' z timesz"Expected unique column names, got:)rI   drop_duplicates	Exceptionr   collectionsr6  itemsr   )rP   len_unique_columnsr6   r6  counterkeyvaluer7   r7   r8   check_column_names_are_unique  s"   


r?  c                   @  s6   e Zd ZedddZedddZedd	d
ZdS )PandasLikeSeriesNamespacer#   r   c                 C     | j jS r   )	compliantr1   selfr7   r7   r8   r&        z(PandasLikeSeriesNamespace.implementationr;   c                 C  rA  r   )rB  r2   rC  r7   r7   r8   r'     rE  z)PandasLikeSeriesNamespace.backend_versionr   c                 C  rA  r   )rB  r.  rC  r7   r7   r8   rX     rE  z!PandasLikeSeriesNamespace.versionN)r#   r   )r#   r;   )r#   r   )__name__
__module____qualname__propertyr&   r'   rX   r7   r7   r7   r8   r@    s    r@  )r    r   r!   r"   r#   r$   )
r:   r   r+   r   r&   r   r'   r;   r#   r   )
r:   r   rP   rQ   r&   r   r'   r;   r#   r   )r:   r   rS   r   r&   r   r'   r;   rO   r   r#   r   )rW   r   rX   r   r#   r   )r   r   rX   r   r&   r   r#   r   )rW   r   rX   r   r   r   r#   r   )rW   r   rX   r   r&   r   r#   r   )r   r   r&   r   r#   r   )r   r   r&   r   r#   r   )r   r   r   r   r&   r   r'   r;   rX   r   r#   r   )r   r   r#   r   )r   r   r#   r   )r   r  r  r   r   r   r#   r  )r   r  r   r   r#   r  )
r  r   r  r  r'   r;   r&   r   r#   r   )r  r   r%  r&  r+   r&  rP   r&  r'  r(  r#   r   )rP   r4  r#   r5  )N
__future__r   	functoolsre
contextlibr   typingr   r   r   r   r   r	   r   r   narwhals._compliant.seriesr
   narwhals.dtypesr   narwhals.exceptionsr   r   r   narwhals.utilsr   r   r   r   r   pandas._typingr   PandasDtyper(   r   narwhals._pandas_like.exprr   r)   r   r   narwhals.typingr   r   r   r   rK   rJ   MODINPANDAS_LIKE_IMPLEMENTATIONPD_DATETIME_RGXcompileVERBOSEr   PA_DATETIME_RGXr   PD_DURATION_RGXr   PA_DURATION_RGXr   r9   r0   rR   rT   	lru_cacher   r   r   r   r   r   r   r  r  r  r  r$  r3  r?  r@  r7   r7   r7   r8   <module>   s    

)
#


D


"

 
 
,

%

#
#