o
    hYN                     @  s<  U d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dlm
Z
 d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlm Z  d dlm!Z! d dlm"Z" d dlm#Z# erd dl$m%Z% d dl&Z'd dl(m)Z) d dl*m+Z+ d dl,m-Z- d d l.m/Z/ d d!l.m0Z0 d d"l.m1Z1 d d#l2m3Z3 d d$l4m5Z5 d d%l6m7Z7 d d&l8m9Z9 d d'lm:Z: d d(lm;Z; d d)lm<Z< d d*lm=Z= e+e	e	e	e	e	f Z>e	Z?d+e@d,< 	 G d-d. d.ed/ ZAdS )0    )annotationsN)reduce)and_)TYPE_CHECKING)Any)Iterator)Mapping)Sequenceis_native_spark_like)evaluate_exprs)import_functions)import_native_dtypes)import_window)native_to_narwhals_dtype)InvalidOperationError)CompliantDataFrame)CompliantLazyFrame)Implementation)check_column_exists)find_stacklevel)generate_temporary_column_name)import_dtypes_module)not_implemented)parse_columns_to_drop)parse_version)validate_backend_version)
ModuleType)Column)BaseDataFrameWindow)Self)	TypeAlias)TypeIs)SparkLikeExprSparkLikeLazyGroupBySparkLikeNamespace)DType)JoinStrategy)LazyUniqueKeepStrategy)Version)_FullContextr#   
Incompletec                   @  s  e Zd ZdddZedd Zedd ZedddZedddZ	e
dddZdd d!Zdd#d$Zdd%d&Zdd'd(Zdd*d+Zdd-d.Zdd0d1Zedd3d4Zdd:d;Zdd>d?ZddBdCZddDdEZddFdGZddIdJZeddLdMZddNdOZddTdUZddXdYZdd]d^ZddcddZddgdhZ ddkdlZ!ddodpZ"ddwdxZ#ddydzZ$dddZ%e&'dZ(e& Z)e&'dZ*e& Z+dS )SparkLikeLazyFramenative_dataframeSQLFrameDataFramebackend_versiontuple[int, ...]versionr-   implementationr   returnNonec                C  s6   || _ || _|| _|| _d | _d | _t| j| j d S N)_native_frame_backend_version_implementation_version_cached_schema_cached_columnsr   )selfr1   r3   r5   r6    rA   R/var/www/vscode/kcb/lib/python3.10/site-packages/narwhals/_spark_like/dataframe.py__init__:   s   zSparkLikeLazyFrame.__init__c                 C     t r
ddlm} |S t| jS )Nr   )	functions)r   sqlframe.baserE   r   r<   )r@   rE   rA   rA   rB   _FJ      
zSparkLikeLazyFrame._Fc                 C  rD   )Nr   )types)r   rF   rI   r   r<   )r@   rI   rA   rA   rB   _native_dtypesS   rH   z!SparkLikeLazyFrame._native_dtypestype[Window]c                 C  rD   )Nr   r    )r   sqlframe.base.windowr!   r   r<   )r@   r!   rA   rA   rB   _Window\   rH   zSparkLikeLazyFrame._WindowobjSQLFrameDataFrame | AnyTypeIs[SQLFrameDataFrame]c                 C  s   t | S r9   r
   )rN   rA   rA   rB   
_is_nativee   s   zSparkLikeLazyFrame._is_nativedatacontextr.   r"   c               C  s   | ||j |j|jdS Nr3   r5   r6   )r;   r=   r<   )clsrR   rS   rA   rA   rB   from_nativei   s   zSparkLikeLazyFrame.from_nativer   c                 C  s
   | j  S r9   )r<   to_native_namespacer@   rA   rA   rB   __native_namespace__r   s   
z'SparkLikeLazyFrame.__native_namespace__r)   c                 C  s    ddl m} || j| j| jdS )Nr   r(   rU   )narwhals._spark_like.namespacer)   r;   r=   r<   )r@   r)   rA   rA   rB   __narwhals_namespace__u   s   z)SparkLikeLazyFrame.__narwhals_namespace__c                 C  s   | S r9   rA   rY   rA   rA   rB   __narwhals_lazyframe__~   s   z)SparkLikeLazyFrame.__narwhals_lazyframe__c                 C  s   | j | j| j|| jdS rT   )	__class__nativer;   r<   )r@   r5   rA   rA   rB   _with_version   s   z SparkLikeLazyFrame._with_versiondfc                 C  s   | j || j| j| jdS rT   )r^   r;   r=   r<   )r@   ra   rA   rA   rB   _with_native   s   zSparkLikeLazyFrame._with_nativepa.Tablec                 C  sN  | j tju r| jdk rdd l}z
|j| j W S  t	y } z|dt
|v rddlm} i }g }|  }| D ]Q\}}g ||< z||| j}	W n9 ty } z-| jj| j}
| jj}t|
|sotjd|
 d|t d ||| f W Y d }~q9d }~ww |||	f q9|jj|||dW  Y d }~S  d }~ww | j S )	N)   r   zat least one RecordBatch)narwhals_to_native_dtypezCould not convert dtype z to PyArrow dtype, )
stacklevelschema)r<   r   PYSPARKr;   pyarrowTablefrom_batchesr_   _collect_as_arrow
ValueErrorstrnarwhals._arrow.utilsre   collect_schemaitemsr=   	Exceptionrh   dataTyperJ   NullType
isinstancewarningswarnr   appendnullfrom_pydicttoArrow)r@   paexcre   rR   rh   current_schemakeyvaluenative_dtypenative_spark_dtype	null_typerA   rA   rB   _collect_to_arrow   s>   
"
z$SparkLikeLazyFrame._collect_to_arrowIterator[Column]c                 c  s     | j D ]	}| j|V  qd S r9   )columnsrG   col)r@   r   rA   rA   rB   _iter_columns   s   
z SparkLikeLazyFrame._iter_columns	list[str]c                 C  s.   | j d u r| jd urt| jn| jj| _ | j S r9   )r?   r>   listrh   r_   r   rY   rA   rA   rB   r      s   


zSparkLikeLazyFrame.columnsbackend(ModuleType | Implementation | str | Nonekwargsr   !CompliantDataFrame[Any, Any, Any]c           
      K  s   |t ju rdd l}ddlm} || j t jt|| jddS |d u s(|t j	u r?dd l
}ddlm} ||  t|| jddS |t ju radd l}dd l
}ddlm} |||  t|| jdS d	| }	t|	)
Nr   )PandasLikeDataFrameT)r6   r3   r5   validate_column_names)ArrowDataFrame)r3   r5   r   )PolarsDataFrame)r3   r5   zUnsupported `backend` value: )r   PANDASpandasnarwhals._pandas_like.dataframer   r_   toPandasr   r=   PYARROWrj   narwhals._arrow.dataframer   r   POLARSpolarsnarwhals._polars.dataframer   
from_arrowrn   )
r@   r   r   pdr   r}   r   plr   msgrA   rA   rB   collect   s<   


zSparkLikeLazyFrame.collectcolumn_namesro   c                 G  s   |  | jj| S r9   )rb   r_   select)r@   r   rA   rA   rB   simple_select   s   z SparkLikeLazyFrame.simple_selectexprsr%   c                 G  0   t | g|R  }dd |D }| | jj| S )Nc                 S     g | ]	\}}| |qS rA   alias.0col_namer   rA   rA   rB   
<listcomp>       z0SparkLikeLazyFrame.aggregate.<locals>.<listcomp>)r   rb   r_   aggr@   r   new_columnsnew_columns_listrA   rA   rB   	aggregate   s   zSparkLikeLazyFrame.aggregatec                 G  r   )Nc                 S  r   rA   r   r   rA   rA   rB   r     r   z-SparkLikeLazyFrame.select.<locals>.<listcomp>)r   rb   r_   r   r   rA   rA   rB   r      s   zSparkLikeLazyFrame.selectc                 G  s&   t | g|R  }| | jt|S r9   )r   rb   r_   withColumnsdict)r@   r   r   rA   rA   rB   with_columns  s   zSparkLikeLazyFrame.with_columns	predicatec                 C  s$   | | d }| j|}| |S Nr   )_callr_   whererb   )r@   r   	conditionspark_dfrA   rA   rB   filter  s   
zSparkLikeLazyFrame.filterdict[str, DType]c                   s(    j d u r fdd jjD  _  j S )Nc                   s$   i | ]}|j t|j j jd qS ))dtyper5   spark_types)namer   rt   r=   rJ   )r   fieldrY   rA   rB   
<dictcomp>  s    z-SparkLikeLazyFrame.schema.<locals>.<dictcomp>)r>   r_   rh   rY   rA   rY   rB   rh     s
   

zSparkLikeLazyFrame.schemac                 C  s   | j S r9   rg   rY   rA   rA   rB   rq     s   z!SparkLikeLazyFrame.collect_schemar   Sequence[str]strictboolc                C  s    t | ||d}| | jj| S )N)compliant_framer   r   )r   rb   r_   drop)r@   r   r   columns_to_droprA   rA   rB   r   "  s   zSparkLikeLazyFrame.dropnintc                 C  s   |  | jj|dS )N)num)rb   r_   limit)r@   r   rA   rA   rB   head(  s   zSparkLikeLazyFrame.headkeysdrop_null_keysr'   c                G  s   ddl m} || ||dS )Nr   r&   )r   )narwhals._spark_like.group_byr'   )r@   r   r   r'   rA   rA   rB   group_by+  s   zSparkLikeLazyFrame.group_byby
descendingbool | Sequence[bool]
nulls_lastc                  sh   t |tr|gt| }|r fdd|D }n	 fdd|D }dd t||D }  jj| S )Nc                 3  $    | ]}|r
 j jn j jV  qd S r9   )rG   desc_nulls_lastasc_nulls_lastr   drY   rA   rB   	<genexpr>:  
    
z*SparkLikeLazyFrame.sort.<locals>.<genexpr>c                 3  r   r9   )rG   desc_nulls_firstasc_nulls_firstr   rY   rA   rB   r   ?  r   c                 S  s   g | ]\}}||qS rA   rA   )r   r   sort_frA   rA   rB   r   D      z+SparkLikeLazyFrame.sort.<locals>.<listcomp>)rv   r   lenziprb   r_   sort)r@   r   r   r   
sort_funcs	sort_colsrA   rY   rB   r   0  s   


zSparkLikeLazyFrame.sortsubsetSequence[str] | Nonec                 C  s$   |rt |nd }| | jj|dS )Nr   )r   rb   r_   dropna)r@   r   rA   rA   rB   
drop_nullsG  s   zSparkLikeLazyFrame.drop_nullsmappingMapping[str, str]c                   s8    fddj D }jfdd| D S )Nc                   s   i | ]	}|  ||qS rA   )getr   colname)r   rA   rB   r   L  s    z-SparkLikeLazyFrame.rename.<locals>.<dictcomp>c                   "   g | ]\}} j ||qS rA   rG   r   r   r   oldnewrY   rA   rB   r   Q     " z-SparkLikeLazyFrame.rename.<locals>.<listcomp>)r   rb   r_   r   rr   )r@   r   rename_mappingrA   )r   r@   rB   renameK  s   
zSparkLikeLazyFrame.renamekeepr,   c                C  s   t | j| |rt|nd }|dkrAtd| j}|  |p | j}| j|| j	d
|| j|dk|}| |S | | jj|dS )Nnone   *   r   )r   r   r   r   rM   partitionByr_   
withColumnrG   countoverr   r   r   rb   dropDuplicates)r@   r   r   tmpwindowra   rA   rA   rB   uniqueU  s   
zSparkLikeLazyFrame.uniqueotherhowr+   left_onright_onsuffixc                   sP  j  |j }|d urt|ng |d urt|ng }|dkr'fdd|D n|}i tt| fdd|D |jfdd D   }	|dv ra|	fdd	|D  n|dkrl|		  fd
dD }
|dkrt
tfdd	t||
D n|dkrd n|}|dkrdn|}jj||d|	S )Nfullc                   s   g | ]}| vr|qS rA   rA   r   c)	right_on_rA   rB   r   w  r   z+SparkLikeLazyFrame.join.<locals>.<listcomp>c                   s&   i | ]}|| v r|  n|qS rA   rA   r   )left_columnsr
  rA   rB   r   ~  s    z+SparkLikeLazyFrame.join.<locals>.<dictcomp>c                   r   rA   r   r   rY   rA   rB   r     r   >   leftcrossinnerc                 3  s     | ]}|vr | V  qd S r9   rA   r   )r   r  rA   rB   r     s    z*SparkLikeLazyFrame.join.<locals>.<genexpr>c                   s   g | ]} | qS rA   rA   r  )r   rA   rB   r     s    c                 3  s*    | ]\}}t j|t  |kV  qd S r9   )getattrr_   )r   left_key	right_key)other_nativer@   rA   rB   r     s
    
r  
full_outer)onr  )r   r   r   r   r_   r   rr   copyextendvaluesr   r   rb   join)r@   r  r  r  r	  r
  right_columnsleft_on_right_cols_to_rename	col_orderright_on_remappedon_
how_nativerA   )r  r  r   r  r@   r
  rB   r  e  sR   
		zSparkLikeLazyFrame.joinc           	   	     s   t j} } D ]}|| }||jkr d| d}t|qj}t dkr0d}t|j	 rF
jj fdd|D  S j rydfdd}
jj fdd|D  j| d j fdd|D  S d}t|)Nz-`explode` operation not supported for dtype `z`, expected List typer   zExploding on multiple columns is not supported with SparkLike backend since we cannot guarantee that the exploded columns have matching element counts.c                   <   g | ]}| d  krj ||nj ||qS r   )rG   r   r   explode_outerr   r   r   r@   rA   rB   r         z.SparkLikeLazyFrame.explode.<locals>.<listcomp>r   ro   r7   r   c                   s    j |  j | dkB S r   )rG   isnull
array_size)r   rY   rA   rB   null_condition  s   z2SparkLikeLazyFrame.explode.<locals>.null_conditionc                   r$  r%  )rG   r   r   exploder'  r(  rA   rB   r     r)  r   c                   s<   g | ]}| d  krj ||nj d|qS )r   N)rG   r   r   litr'  r(  rA   rB   r     r)  z[Unreachable code, please report an issue at https://github.com/narwhals-dev/narwhals/issues)r   ro   r7   r   )r   r=   rq   Listr   r   r   NotImplementedErrorr<   
is_pysparkrb   r_   r   is_sqlframeunionr   AssertionError)	r@   r   dtypesrh   col_to_exploder   r   r   r,  rA   r(  rB   r-    sP   





zSparkLikeLazyFrame.exploder  indexvariable_name
value_namec           	      C  s   | j  r|dkrd}t||dkrd}t|n	 |r!t|nd}|d u r3tt| jt|nt|}| jj||||d}|d u rJ|j	| }| 
|S )N z<`variable_name` cannot be empty string for sqlframe backend.z9`value_name` cannot be empty string for sqlframe backend.rA   )idsr  variableColumnNamevalueColumnName)r<   r2  r0  tuplesetr   
differencer_   unpivotr   rb   )	r@   r  r7  r8  r9  r   r;  r  unpivoted_native_framerA   rA   rB   rA    s*   
&

zSparkLikeLazyFrame.unpivotzO`LazyFrame.gather_every` is deprecated and will be removed in a future version.zG`LazyFrame.tail` is deprecated and will be removed in a future version.N)
r1   r2   r3   r4   r5   r-   r6   r   r7   r8   )r7   rK   )rN   rO   r7   rP   )rR   r2   rS   r.   r7   r"   )r7   r   )r7   r)   )r7   r"   )r5   r-   r7   r"   )ra   r2   r7   r"   )r7   rc   )r7   r   )r7   r   )r   r   r   r   r7   r   )r   ro   r7   r"   )r   r%   r7   r"   )r   r%   r7   r"   )r7   r   )r   r   r   r   r7   r"   )r   r   r7   r"   )r   ro   r   r   r7   r'   )r   ro   r   r   r   r   r7   r"   )r   r   r7   r"   )r   r   r7   r"   )r   r   r   r,   r7   r"   )r  r"   r  r+   r  r   r	  r   r
  ro   r7   r"   )r   r   r7   r"   )
r  r   r7  r   r8  ro   r9  ro   r7   r"   ),__name__
__module____qualname__rC   propertyrG   rJ   rM   staticmethodrQ   classmethodrW   rZ   r\   r]   r`   rb   r   r   r   r   r   r   r   r   r   rh   rq   r   r   r   r   r   r   r  r  r-  rA  r   
deprecatedgather_every	join_asoftailwith_row_indexrA   rA   rA   rB   r0   9   s`    





	



(
	
-

	












C
C 
r0   )r%   r2   )B
__future__r   rw   	functoolsr   operatorr   typingr   r   r   r   r	   narwhals._namespacer   narwhals._spark_like.utilsr   r   r   r   r   narwhals.exceptionsr   narwhals.typingr   r   narwhals.utilsr   r   r   r   r   r   r   r   r   rI   r   rj   r}   sqlframe.base.columnr   sqlframe.base.dataframer   rL   r!   typing_extensionsr"   r#   r$   narwhals._spark_like.exprr%   r   r'   r[   r)   narwhals.dtypesr*   r+   r,   r-   r.   r2   r/   __annotations__r0   rA   rA   rA   rB   <module>   s`    