o
    h;                  
   @   s  d dl mZmZ d dlmZmZ zd dlmZmZm	Z	m
Z
mZmZmZmZmZ W n ey? Z zedee dddZ[ww zd dlmZ d dlmZ W n ey`   G dd	 d	ZeZY nw dddZddd
defddZd
efddZdd ZefddZdddZdS )    )TableRecordBatch)
Expressionfield)	DeclarationExecNodeOptionsTableSourceNodeOptionsFilterNodeOptionsProjectNodeOptionsAggregateNodeOptionsOrderByNodeOptionsHashJoinNodeOptionsAsofJoinNodeOptionsz@The pyarrow installation is not built with support for 'acero' ()N)ScanNodeOptionsc                   @   s(   e Zd ZG dd dZG dd dZdS )DatasetModuleStubc                   @      e Zd ZdS )zDatasetModuleStub.DatasetN__name__
__module____qualname__ r   r   A/var/www/vscode/kcb/lib/python3.10/site-packages/pyarrow/acero.pyDataset3       r   c                   @   r   )z!DatasetModuleStub.InMemoryDatasetNr   r   r   r   r   InMemoryDataset6   r   r   N)r   r   r   r   r   r   r   r   r   r   2   s    r   TFc                 C   sn   t dt| ||d}dd | jjD }t |t dt|g}| jd}|d ur5t |t dt|g}|S )Nscanuse_threadsrequire_sequenced_outputc                 S   s   g | ]}t |qS r   )r   ).0fr   r   r   
<listcomp>B   s    z$_dataset_to_decl.<locals>.<listcomp>projectfilter)	r   r   schemanamesfrom_sequencer
   _scan_optionsgetr	   )datasetr   r   declprojectionsfilter_exprr   r   r   _dataset_to_decl;   s   r.   c
                    s~  t |ttjfstdt| t |ttjfs"tdt| i  t |ttfs.|g}t|D ]\}
}|
 |< q2i t |ttfsG|g}t|D ]\}
}|
|< qK|j	j
}|j	j
}| dksd| dkrgg }n*| dkso| dkrrg }n| dksz| dkrfdd	|D }n| d
kr fdd	|D }i }t|D ]\}
}||v r|
||< qi }t|D ]\}
}||v r|
||< qt |tjrt||d}ntdt|}t |tjrt||d}ntdt|}|rt| |||||pd|pdd}nt| |||pd|pdd}td|||gd}|r| dkrt|}t|}t|}g }g }t|| D ]n\}
}|
t|k rW||v rW|| || |   }|tdt|
t|| g q%|
|krc||v rcq%|rt|
|k rt||v rt||7 }|r|
|kr||v r||7 }|| |t|
 q%tdt||}t||g}|j|d}|	tkr|S |	tjkrt|S td)a\  
    Perform join of two tables or datasets.

    The result will be an output table with the result of the join operation

    Parameters
    ----------
    join_type : str
        One of supported join types.
    left_operand : Table or Dataset
        The left operand for the join operation.
    left_keys : str or list[str]
        The left key (or keys) on which the join operation should be performed.
    right_operand : Table or Dataset
        The right operand for the join operation.
    right_keys : str or list[str]
        The right key (or keys) on which the join operation should be performed.
    left_suffix : str, default None
        Which suffix to add to left column names. This prevents confusion
        when the columns in left and right operands have colliding names.
    right_suffix : str, default None
        Which suffix to add to the right column names. This prevents confusion
        when the columns in left and right operands have colliding names.
    use_threads : bool, default True
        Whether to use multithreading or not.
    coalesce_keys : bool, default False
        If the duplicated keys should be omitted from one of the sides
        in the join result.
    output_type: Table or InMemoryDataset
        The output type for the exec plan result.

    Returns
    -------
    result_table : Table or InMemoryDataset
    Expected Table or Dataset, got z	left semiz	left antiz
right semiz
right antiinnerz
left outerc                       g | ]}| vr|qS r   r   r    col)right_keys_orderr   r   r"          z!_perform_join.<locals>.<listcomp>zright outerc                    r1   r   r   r2   )left_keys_orderr   r   r"      r5   r   table_source )output_suffix_for_leftoutput_suffix_for_righthashjoinoptionsinputsz
full outercoalescer#   Unsupported output type)
isinstancer   dsr   	TypeErrortypetuplelist	enumerater%   r&   r.   r   r   r   setlenappendr   _call_fieldr
   r'   to_tabler   )	join_typeleft_operand	left_keysright_operand
right_keysleft_suffixright_suffixr   coalesce_keysoutput_typeidxkeyleft_columnsright_columnsleft_column_keys_indicescolnameright_column_keys_indicesleft_sourceright_source	join_optsr+   left_columns_setright_columns_setright_operand_indexprojected_col_namesr,   r3   right_key_index
projectionresult_tabler   )r6   r4   r   _perform_joinR   s   (















ri   c	                    sT  t | ttjfstdt|  t |ttjfs"tdt| t |ttfs,|g}t  ttfs6 g  fdd|jj	D }	t
| jj	t
|	@ }
|
rUtd|
t | tjrct| |dd}ntdt| }t |tjrxt||dd}ntdt|}t|| |}td|||gd	}|j|d
}|tkr|S |tjkrt|S td)a-  
    Perform asof join of two tables or datasets.

    The result will be an output table with the result of the join operation

    Parameters
    ----------
    left_operand : Table or Dataset
        The left operand for the join operation.
    left_on : str
        The left key (or keys) on which the join operation should be performed.
    left_by: str or list[str]
        The left key (or keys) on which the join operation should be performed.
    right_operand : Table or Dataset
        The right operand for the join operation.
    right_on : str or list[str]
        The right key (or keys) on which the join operation should be performed.
    right_by: str or list[str]
        The right key (or keys) on which the join operation should be performed.
    tolerance : int
        The tolerance to use for the asof join. The tolerance is interpreted in
        the same units as the "on" key.
    output_type: Table or InMemoryDataset
        The output type for the exec plan result.

    Returns
    -------
    result_table : Table or InMemoryDataset
    r/   c                    s   g | ]}|g  vr|qS r   r   r2   right_byright_onr   r   r"   /  s
    z&_perform_join_asof.<locals>.<listcomp>zOColumns {} present in both tables. AsofJoin does not support column collisions.Tr   r8   asofjoinr=   r7   rA   )rB   r   rC   r   rD   rE   rF   rG   r%   r&   rI   
ValueErrorformatr.   r   r   r   rN   r   )rP   left_onleft_byrR   rl   rk   	tolerancer   rW   r[   columns_collisionsr_   r`   ra   r+   rh   r   rj   r   _perform_join_asof  s\   !



rt   c                 C   sh   d}t | trt| g} d}ttdt| dtdt|dg}|jdd}|r2|	 
 d }|S )a}  Filter rows of a table based on the provided expression.

    The result will be an output table with only the rows matching
    the provided expression.

    Parameters
    ----------
    table : Table or RecordBatch
        Table that should be filtered.
    expression : Expression
        The expression on which rows should be filtered.

    Returns
    -------
    Table
    FTr8   )r>   r$   r7   r   )rB   r   r   from_batchesr   r'   r   r	   rN   combine_chunks
to_batches)table
expressionis_batchr+   resultr   r   r   _filter_table^  s   
r|   c                 K   s   t | tjrt| dd}ntdt| }tdt|fi |}t||g}|jdd}|t	kr2|S |tj
kr<t
|S td)NTr7   r8   order_byrA   )rB   rC   r   r.   r   r   r   r'   rN   r   r   rD   )table_or_dataset	sort_keysrW   kwargsdata_sourcer}   r+   rh   r   r   r   _sort_source~  s   

r   c              	   C   s2   t t dt| t dt||dg}|j|dS )Nr8   	aggregate)keysr7   )r   r'   r   r   rN   )rx   
aggregatesr   r   r+   r   r   r   	_group_by  s
   r   )TF)T)pyarrow.libr   r   pyarrow.computer   r   pyarrow._aceror   r   r   r	   r
   r   r   r   r   ImportErrorexcstrpyarrow.datasetr*   rC   pyarrow._datasetr   r   r.   ri   rt   r|   r   r   r   r   r   r   <module>   s>   0


 4
[ 