o
    h`t                     @   s  d dl Z zd dlZW n ey   dZY nw d dlZd dlmZ e jj	Z
dddZzd dlm	Z W n ey<   dZY nw dddZG dd	 d	eZe jd
ddd Ze jd
ddd Ze jd
ddddZe jd
ddddZe jd
ddd Ze jd
ddd Ze jd
ddd Ze jd
ddd Ze jd
ddd Ze jd
ddd Ze jd
dd d! Ze jd
dd"d# Ze jd
dd$d% Ze jd
dd&d' Ze jd
dd(d) Z d*dd+d,d-Z!d.d/ Z"d0d1 Z#d2d3 Z$d4d5 Z%d6d7 Z&d8d9 Z'd:d; Z(d<d= Z)d>d? Z*d@dA Z+dBdC Z,dDdE Z-dFdG Z.dHdI Z/dJdK Z0dLdM Z1dNdO Z2dPdQ Z3dRdS Z4dTdU Z5dVdW Z6dXdY Z7dZd[ Z8d\d] Z9d^d_ Z:d`da Z;dbdc Z<ddde Z=dfdg Z>dhdi Z?e jjdjdk Z@e jjdldm ZAdndo ZBdpdq ZCe jjdrds ZDe jjdtdu ZEe jjdvdw ZFe jjdxdy ZGdzd{ ZHd|d} ZIe jjd~d ZJe jjdd ZKe jjLdd ZMe jjLdd ZNe jjLdd ZOdS )    N)compute summarydescription
   c                 C   s   ddl m} |t | S )Nr   )_get_udf_context)pyarrow._computer   padefault_memory_pool)batch_lengthr    r   J/var/www/vscode/kcb/lib/python3.10/site-packages/pyarrow/tests/test_udf.pymock_udf_context)   s   r   c                   @   s   e Zd ZdS )MyErrorN)__name__
__module____qualname__r   r   r   r   r   .   s    r   session)scopec                  C   6   dd } d}t }t| ||dt it  | |fS )z4
    Register a unary aggregate function (mean)
    c                 W   s   t t|S N)r
   scalarnpnansum)ctxxargsr   r   r   func7      z"sum_agg_func_fixture.<locals>.funcsum_udfr   )empty_udf_docpcregister_aggregate_functionr
   float64r   	func_namefunc_docr   r   r   sum_agg_func_fixture2   s   r(   c                  C   r   )Nc                 S      t d)NOops)RuntimeErrorr
   r   lenr   r   r   r   r   r   J      z(exception_agg_func_fixture.<locals>.funcy=exception_len(x)r   r!   r"   r#   r
   int64r%   r   r   r   exception_agg_func_fixtureH   s   r2   c                 C   6   dd }d}t }t|||dt it  ||fS )Nc                 S   s   t t|t  S r   )r
   r   r,   int32r-   r   r   r   r   ^   s   z1wrong_output_dtype_agg_func_fixture.<locals>.funcy=wrong_output_dtype(x)r   r0   r   r   r&   r'   r   r   r   #wrong_output_dtype_agg_func_fixture\      r7   c                 C   r3   )Nc                 S   s   t |S r   )r,   r-   r   r   r   r   q   r.   z0wrong_output_type_agg_func_fixture.<locals>.funcy=wrong_output_type(x)r   r0   r6   r   r   r   "wrong_output_type_agg_func_fixtureo   r8   r:   c                  C   B   dd } d}ddd}t | ||t t dt  | |fS )z,
    Register a binary scalar function.
    c                 S   s   t jd||g| jdS )Nmultiplymemory_poolr"   call_functionr>   )r   mr   r   r   r   binary_function   s   z,binary_func_fixture.<locals>.binary_functionzy=mxzfind y from y = mxr   )rA   r   r"   register_scalar_functionr
   r1   )rB   r&   
binary_docr   r   r   binary_func_fixture   s   rF   c               	   C   sH   dd } ddd}d}t | ||t t t dt  | |fS )z-
    Register a ternary scalar function.
    c                 S   s,   t jd||g| jd}t jd||g| jdS )Nr<   r=   addr?   )r   rA   r   cmxr   r   r   ternary_function   s   z.ternary_func_fixture.<locals>.ternary_functionzy=mx+czfind y from y = mx + cr   )array1array2array3rC   )rJ   ternary_docr&   r   r   r   ternary_func_fixture   s   	rO   c                  C   r;   )zI
    Register a varargs scalar function with at least two arguments.
    c                 W   s(   |}|D ]}t jd||g| jd}q|S )NrG   r=   r?   )r   firstvaluesaccvalr   r   r   varargs_function   s   z.varargs_func_fixture.<locals>.varargs_functionz	z=ax+by+czfind z from z = ax + by + cr   )rK   rL   rC   )rT   r&   varargs_docr   r   r   varargs_func_fixture   s   rV   c                  C   s4   dd } ddd}d}t | ||i t  | |fS )z-
    Register a nullary scalar function.
    c                 S      t jdg| j t  | jdS N*   typer>   r
   arrayr   r1   r>   contextr   r   r   nullary_func      z*nullary_func_fixture.<locals>.nullary_funcrandom functiongenerates a random valuer   test_nullary_funcrC   r`   r'   r&   r   r   r   nullary_func_fixture   s   rf   c                  C   s0   dd } ddd}d}t | ||i t  |S )z
    Register a nullary scalar function with an ephemeral Python function.
    This stresses that the Python function object is properly kept alive by the
    registered function.
    c                 S   rW   rX   r\   r^   r   r   r   r`      ra   z4ephemeral_nullary_func_fixture.<locals>.nullary_funcrb   rc   r   test_ephemeral_nullary_funcrC   re   r   r   r   ephemeral_nullary_func_fixture   s   rh   c                  C   <   dd } d}i }t  }ddd}t| |||| | |fS )zi
    Register a scalar function which returns something that is neither
    a Arrow scalar or array.
    c                 S   s   dS )NrY   r   r   r   r   r   wrong_output_type      z9wrong_output_type_func_fixture.<locals>.wrong_output_typetest_wrong_output_typezreturn wrong output typer   r   r
   r1   r"   rD   )rk   r&   in_typesout_typedocr   r   r   wrong_output_type_func_fixture      
rr   c                  C   sD   dd } d}dt  i}t  }ddd}t| |||| | |fS )zq
    Register a scalar function whose actual output DataType doesn't
    match the declared output DataType.
    c                 S      t d|dgS NrG      r"   r@   )r   r]   r   r   r   wrong_output_datatype  r   zAwrong_output_datatype_func_fixture.<locals>.wrong_output_datatypetest_wrong_output_datatyper]   zreturn wrong output datatyper   r   )r
   r1   int16r"   rD   )rx   r&   ro   rp   rq   r   r   r   "wrong_output_datatype_func_fixture  s   
r{   c                  C   ri   )z>
    Register a scalar function with the wrong signature.
    c                   S   s   t jdt  dS )Nrv   r[   )r
   r   r1   r   r   r   r   wrong_signature*  s   z5wrong_signature_func_fixture.<locals>.wrong_signaturetest_wrong_signaturezUDF with wrong signaturer   r   rn   )r}   r&   ro   rp   rq   r   r   r   wrong_signature_func_fixture$  rs   r   c                  C   s4   dd } d}ddd}t | ||i t  | |fS )zE
    Register a scalar function which raises a custom exception.
    c                 S   r)   )Nerror raised by scalar UDF)r   rj   r   r   r   raising_func>  r.   z*raising_func_fixture.<locals>.raising_func
test_raisezraising functionr   r   rC   )r   r&   rq   r   r   r   raising_func_fixture9  s   
r   c                  C   r   )z$
    Register a vector function
    c                 S   s   t |  jddS )NT)pct)r
   r]   	to_pandascopyrankr-   r   r   r   pct_rankO  s   z+unary_vector_func_fixture.<locals>.pct_ranky=pct_rank(x)r   )r!   r"   register_vector_functionr
   r$   )r   r&   rq   r   r   r   unary_vector_func_fixtureJ  s   
r   c                  C   sb   dd } d}t }t| ||t t t dtdt fdt fdt fg | |fS )z@
    Register a vector function that returns a struct array
    c                 S   sB   t jj|||gg dd }|jdddd }t j| S )NkvrH   namesrH   r   r   )columnsrQ   index)r
   RecordBatchfrom_arraysr   pivotreset_indexfrom_pandasto_struct_array)r   r   r   rH   dfdf_pivotr   r   r   r   `  s   z)struct_vector_func_fixture.<locals>.pivot
y=pivot(x)r   r   v1v2)r!   r"   r   r
   r1   r$   utf8struct)r   r&   rq   r   r   r   struct_vector_func_fixture[  s   &r   Trun_in_datasetr   c                C   s   | \}}|d u rd}|D ]}t |tjrd}t|}q|rd}t|}|j|ks+J tj|||d}	|t|g|R  }
|	|
ksCJ |rzdd |D }tj	
||}t|}dd |D }|jdtd	||id
}|djd |
ks|J d S d S )NTFrv   lengthc                 S   s   g | ]	\}}d | qS )fieldr   ).0r   in_arrr   r   r   
<listcomp>  s    z)check_scalar_function.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   )dsr   )r   
field_namer   r   r   r     s    resultr   )r   r   )
isinstancer
   Arrayr,   r"   get_functionnamer@   r   Tabler   r   datasetto_tabler   _callcolumnchunks)func_fixtureinputsr   r   functionr   
all_scalarargr   r   expected_outputfield_namestabler   	func_argsresult_tabler   r   r   check_scalar_functionp  s4   

r   c                 C   s    t | tddgt g d S )Nr      r   r
   r]   r1   )unary_func_fixturer   r   r   test_udf_array_unary  s   r   c                 C   s2   t | tddgt tddgt g d S )Nr   r         r   )rF   r   r   r   test_udf_array_binary  s
   r   c              	   C   sD   t | tddgt tddgt tddgt g d S )Nr   r   r   r      r   )rO   r   r   r   test_udf_array_ternary  s   r   c                 C   sh   t | tddgt tddgt tddgt tddgt tddgt g d S )Nr      r   r         r   r   )rV   r   r   r   test_udf_array_varargs  s   r   c                  C   s  ddd} dt  i}t  }dd }tt t|d | || W d    n1 s,w   Y  tjtdd td d	| || W d    n1 sLw   Y  d
}tjt|d t|d| |d  W d    n1 snw   Y  d}tjt|d t|d| d | W d    n1 sw   Y  t|d| i | d}tjt|d t|d| i | W d    d S 1 sw   Y  d S )Nztest udf inputzparameters are validatedr   r   c                 S   s   t dgS )Nr   )r
   r]   r^   r   r   r   test_reg_function     z3test_registration_errors.<locals>.test_reg_functionzfunc must be a callablematchtest_none_function)DataType expected, got <class 'NoneType'>test_output_functionz)in_types must be a dictionary of DataTypetest_input_functionr   z?Already have a function registered with name: test_reg_function)r
   r1   pytestraises	TypeErrorr"   rD   KeyError)rq   ro   rp   r   expected_exprr   r   r   test_registration_errors  sT   "r   c                 C   sN   | \}}d}t jt|d t|dg W d    d S 1 s w   Y  d S )Nz9VarArgs function 'z=ax\+by\+c' needs at least 2 argumentsr   rY   )r   r   
ValueErrorr"   r@   )rV   _r&   	error_msgr   r   r    test_varargs_function_validation  s
   "r   c                  C   s   dt  i} t  }ddi}dd }tjtdd t|d|| | W d    n1 s-w   Y  d	d
i}tjtdd t|d|| | W d    d S 1 sRw   Y  d S )Nr   r   descc                 S   rt   ru   rw   )r   r   r   r   r   	add_const  r   z/test_function_doc_validation.<locals>.add_constz#Function doc must contain a summaryr   test_no_summaryr   ztest summaryz'Function doc must contain a descriptiontest_no_desc)r
   r1   r   r   r   r"   rD   )ro   rp   r'   r   r   r   r   test_function_doc_validation  s.   "r   c                 C   s   t | g ddd d S )NFrv   r   )r   )rf   r   r   r   test_nullary_function  s   
r   c                 C   s*   | }t j|g dd}| dgksJ d S )Nrv   r   rY   )r"   r@   	to_pylist)rh   r   r   r   r   r   test_ephemeral_function  s   r   c                 C   L   | \}}t jtdd tj|g dd W d    d S 1 sw   Y  d S )NzUnexpected output type: intr   rv   r   r   r   r   r"   r@   )rr   r   r&   r   r   r   rm     s   "rm   c                 C   sX   | \}}d}t jt|d t|tddgg W d    d S 1 s%w   Y  d S )NzDExpected output datatype int16, but function returned datatype int64r   r   r   )r   r   r   r"   r@   r
   r]   )r{   r   r&   r   r   r   r   ry   &  s
   "ry   c                 C   sP   | \}}d}t jt|d tj|g dd W d    d S 1 s!w   Y  d S )Nz@wrong_signature\(\) takes 0 positional arguments but 1 was givenr   rv   r   r   )r   r   r&   r   r   r   r   r~   0  s
   "r~   c                  C   sl   dd } d}dt  i}i }ddd}tjtdd	 t| |||| W d    d S 1 s/w   Y  d S )
Nc                 S      |S r   r   r   rS   r   r   r   identity;  rl   z1test_wrong_datatype_declaration.<locals>.identitytest_wrong_datatype_declarationr]   ztest output valueztest outputr   z%DataType expected, got <class 'dict'>r   r
   r1   r   r   r   r"   rD   r   r&   ro   rp   rq   r   r   r   r   :  s   "r   c                  C   sl   dd } d}dd i}t  }ddd}tjtdd	 t| |||| W d    d S 1 s/w   Y  d S )
Nc                 S   r   r   r   r   r   r   r   r   L  rl   z3test_wrong_input_type_declaration.<locals>.identity!test_wrong_input_type_declarationr]   ztest invalid input typezinvalid input functionr   r   r   r   r   r   r   r   r   K  s   
"r   c                 C   s   t t  }| \}}tj|t jdgd t  dg|d}|t jdgd t  dks.J | dks6J d }| dks@J d S )Nrv     r|   r=   r   @  r   )r
   proxy_memory_poolr   r"   r@   r]   r1   bytes_allocated)r   
proxy_poolr   r&   resr   r   r   test_scalar_udf_context\  s    r   c                 C   r   )Nr   r   rv   r   )r   r   r   r"   r@   )r   r   r&   r   r   r   test_raising_funck  s   "r   c                 C   s2   | \}}t |tdg}|tdksJ d S )Nr      )r"   r@   r
   r   )r   r   r&   r   r   r   r   test_scalar_inputq  s   r   c                 C   s   | \}}t t  }| dksJ t jdgd t  |d}| dks(J t||g | dks7J d }| dksAJ d S )Nr   rv   r   rZ   r   )r
   r   r   r   r]   r1   r"   r@   )r   r   r&   r   r   r   r   r   test_input_lifetimew  s   r   c                    s&    fddt |D }tjj| dS )Nc                    s(   g | ]\}}t jt| | jd qS r|   )r
   r]   listr[   )r   ir   schemar   r   r     s    z,_record_batch_from_iters.<locals>.<listcomp>arraysr   )	enumerater
   r   r   )r   itersr  r   r   r   _record_batch_from_iters  s   
r  c                 C   s$   t | t||d t|d |d S )Nr   rv   r   )r  range)r   nr   r   r   _record_batch_for_range  s   r  c                    s    fdd}|S )Nc                    s   G  fddd}| S )Nc                       s"   e Zd Zdd Z fddZdS )z,make_udt_func.<locals>.udf_func.<locals>.UDTc                 S   s
   d | _ d S r   )callerselfr   r   r   __init__     
z5make_udt_func.<locals>.udf_func.<locals>.UDT.__init__c                    sh   z| j d u r |jd | _ }|  |}W | S  ty3   dd D }tjj|d}Y | S w )Nc                 S   s   g | ]
}t jg |jd qS r   )r
   r]   r[   )r   r   r   r   r   r     s    zImake_udt_func.<locals>.udf_func.<locals>.UDT.__call__.<locals>.<listcomp>r  )r	  sendStopIterationr
   r   r   r   )r  r   batchr  	batch_genr   r   r   __call__  s   
z5make_udt_func.<locals>.udf_func.<locals>.UDT.__call__Nr   r   r   r  r  r   r  r   r   UDT  s    r  r   )r   r  r  r   r   udf_func  s   zmake_udt_func.<locals>.udf_funcr   )r   r  r  r   r  r   make_udt_func  s   r  c                      s$   t  G fddd  fddS )zA short datasetc                       s    e Zd Zdd Z fddZdS )z%datasource1_direct.<locals>.Generatorc                 S   s
   d| _ d S )Nr   )r  r
  r   r   r   r    r  z.datasource1_direct.<locals>.Generator.__init__c                    s@   | j dkrt g g }| S |  j d8  _ t | j }| S )Nr   rv   )r  r  r  r   )r  r   r  r   r   r   r    s   
z.datasource1_direct.<locals>.Generator.__call__Nr  r   r   r   r   	Generator  s    r  c                    s     S r   r   rj   )r  r   r   <lambda>  s    z$datasource1_direct.<locals>.<lambda>)datasource1_schemar   r   )r  r   r   datasource1_direct  s   r  c                        t    fdd} t | S )Nc                 3   s(    t dddD ]
}t |d V  qd S )Nr   r   rv   )r  r  r   r  r   r   r   r    s   z(datasource1_generator.<locals>.batch_genr  r  r  r   r   r   datasource1_generator  s   
r!  c                     r  )Nc                 3   s,    t dddD ]
}t |d V  qtd)Nr   r   r  rv   datasource1_exception)r  r  r+   r  r   r   r   r    s   z(datasource1_exception.<locals>.batch_genr  r   r   r   r   r"    s   
r"  c                   C   s   t dt  fdt  fgS )Nr   )r
   r   r4   r   r   r   r   r    s   r  c                 C   s@   | ddd}i }t dt  fdt  fg}| ||||fS )Nz UDTztest {func_name} UDTr   r   )r
   r   r4   )r   r&   r'   ro   rp   r   r   r   datasource1_args  s   r#  c                 C   sX   t  }|  }| j}t||}tj|  d}t|D ]}|d8 }|t||ks)J qd S )Nr   rv   )r  r   r#  r"   register_tabular_functioncall_tabular_functionr  )
func_makerr   r   r&   r   r  itemr   r   r   _test_datasource1_udt  s   

r(  c                   C      t t d S r   )r(  r  r   r   r   r   test_udt_datasource1_direct  r   r*  c                   C   r)  r   )r(  r!  r   r   r   r   test_udt_datasource1_generator  r   r+  c                   C   s<   t jtdd tt W d    d S 1 sw   Y  d S )Nr"  r   )r   r   r+   r(  r"  r   r   r   r   test_udt_datasource1_exception  s   
"r,  c                 C   s<   t g dt  }td|g}t d}||ksJ d S )Ng      $@g      4@      >@g      D@g      I@mean_udfr.  )r
   r]   r$   r"   r@   r   )unary_agg_func_fixturearrr   expectedr   r   r   test_scalar_agg_basic  s   
r3  c                 C   sT   t g t  }tjt jdd td|g W d    d S 1 s#w   Y  d S )Nzempty inputsr   r/  )r
   r]   r$   r   r   ArrowInvalidr"   r@   )r0  emptyr   r   r   test_scalar_agg_empty  s   "r6  c                 C   X   t g dt  }tjt jdd td|g W d    d S 1 s%w   Y  d S )Nr   r   r   (   2   output datatyper   r5   r
   r]   r1   r   r   ArrowTypeErrorr"   r@   )r7   r1  r   r   r   "test_scalar_agg_wrong_output_dtype     "r>  c                 C   r7  )Nr8  output typer   r9   r<  )r:   r1  r   r   r   !test_scalar_agg_wrong_output_type	  r?  rA  c                 C   sR   t g dt  }t g dt  }td||g}t d}||ks'J d S )Nr8  )      ?       @      @      @g      @sum_meang     @@)r
   r]   r1   r$   r"   r@   r   )varargs_agg_func_fixturearr1arr2r   r2  r   r   r   test_scalar_agg_varargs  s   
rJ  c                 C   sV   t g dt  }tjtdd td|g W d    d S 1 s$w   Y  d S )N)r   r   r   r9  r:  <   r*   r   r/   )r
   r]   r1   r   r   r+   r"   r@   )r2   r1  r   r   r   test_scalar_agg_exception  s   "rL  c           
      C   s   t g dt  }t g dt  }t g dt  }t g dt  }t j||gddgd}t j||gddgd}t ||g}|ddg}|dd	gdd
g}	|	d|		dkshJ d S )Nr-  r   r   rv   r   rv   )g      N@g     Q@g      T@g     V@g      Y@)r   rv   rv   r   rv   idvaluer   rO  r/  )rO  meanvalue_mean_udf)
r
   r]   r$   r4   r   concat_tablesgroup_by	aggregaterename_columnssort_by)
r0  rH  rI  arr3arr4table1table2r   r   r2  r   r   r   test_hash_agg_basic#  s   

r\  c                 C   s   t g t  }t g t  }t j||gddgd}|ddg}t jt g t  t g t  gddgd}||ksAJ d S )NrN  rO  r   rP  rR  )r
   r]   r$   r4   r   rT  rU  )r0  rH  rI  r   r   r2  r   r   r   test_hash_agg_empty6  s   r]  c                 C      t g dt  }t g dt  }t j||gddgd}tjt jdd |d	dg W d    d S 1 s<w   Y  d S )	Nr8  rM  rN  rO  r   r;  r   )rO  r5   
r
   r]   r1   r4   r   r   r   r=  rT  rU  )r7   rH  rI  r   r   r   r    test_hash_agg_wrong_output_dtypeC  s   "r`  c                 C   r^  )	Nr8  rM  rN  rO  r   r@  r   )rO  r9   r_  )r:   rH  rI  r   r   r   r   test_hash_agg_wrong_output_typeL  s   "ra  c                 C   s   t g dt  }t g dt  }t j||gddgd}tjtdd |d	dg W d    d S 1 s;w   Y  d S )	Nr8  rM  rN  rO  r   r*   r   )rO  r/   )
r
   r]   r1   r4   r   r   r   r+   rT  rU  )r2   rH  rI  r   r   r   r   test_hash_agg_exceptionU  s   "rb  c                 C   s   d}d}t td|t  }t tj||t  }t j||gddgd}|	d
dg}|	d
dgdd	g}|d|dksKJ d
S )z2Test hash aggregate udf with randomly sampled datai@B r   rv   rN  rO  r   )rO  r    )rO  sumvalue_sum_udfN)r
   r]   r   repeatr$   randomchoicer4   r   rT  rU  rV  rW  )r(   	value_num	group_numrH  rI  r   r   r2  r   r   r   test_hash_agg_random_  s   

rj  c                 C   s@   t g dt  }td|g}| d d |}||ksJ d S )Nr-  r   r   r
   r]   r$   r"   r@   r   r1  r   r2  r   r   r   test_vector_basicr  s   rm  c                 C   s>   t dgt  }td|g}| d d |}||ksJ d S )Nrv   r   r   rk  rl  r   r   r   test_vector_emptyz  s   rn  c                 C   sj   t g dt  }t g dt  }t g d}td|||g}| d d |||}||ks3J d S )N)rv   rv   r   r   )rB  rC  rD  rE  )r   r   r   r   r   r   )r
   r]   r1   r$   r"   r@   )r   r   r   rH   r   r2  r   r   r   test_vector_struct  s   ro  )r   )r   )Pr   numpyr   ImportErrorpyarrowr
   r   r"   markr   
pytestmarkr!   pyarrow.datasetr   r   r+   r   fixturer(   r2   r7   r:   rF   rO   rV   rf   rh   rr   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rm   ry   r~   r   r   r   r   r   r   r  r  r  r  r!  r"  r  r#  r(  r*  r+  r,  r3  r6  r>  rA  rJ  rL  r\  r]  r`  ra  rb  rj  pandasrm  rn  ro  r   r   r   r   <module>   s   





























 	
2	








		
	


