o
    h?                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ zd dlZW n eyI   dZY nw d dlZd dlZd dlmZ d dlZd dlZd dlmZ d dlZd dlmZ d dlm Z m!Z!m"Z"m#Z#m$Z$ zd dl%Z&W n ey   dZ&Y nw zd dl'm(Z) W n ey   dZ)Y nw zd dl*m+Z, W n ey   dZ,Y nw ej-j(Z.G dd dZ/dd	 Z0d
d Z1dd Z2ej3dd Z4ej3dd Z5ej3dddd Z6ej3dd Z(ej3ddgddgddd Z7ej-j+dd  Z8d!d" Z9ej-j+d#d$ Z:ej-j+d%d& Z;ej-j+d'd( Z<ej-j+d)d* Z=ej-j+d+d, Z>ej-j+d-d. Z?ej-j+d/d0 Z@d1d2 ZAd3d4 ZBd5d6 ZCej-Dd7g d8d9eEd:eFd;eFfd<d=ZGd>d? ZHd@dA ZIej-j+dBdC ZJej-j+dDdE ZKej-j+dFdG ZLdHdI ZMdJdK ZNej-DdLejOdMddNdOdPggej-DdQddgej-j+dRdS ZPej-j+dTdU ZQej-j+ej-jRdVdW ZSdXdY ZTdZd[ ZUej-j+d\d] ZVej-j+dd^d_ZWej-j+d`da ZXej-j%ej-j+dbdc ZYej-j+ddde ZZej-j+dfdg Z[ej-j+dhdi Z\ej-j%ej-j+djdk Z]ej-j+dldm Z^ej-j+dndo Z_ddpdqZ`ej-j%ej-j+drds Zaej-j+dtdu Zbej-j%ej-j+dvdw Zcej-j+dxdy Zdej-j+dzd{ Zeej-j+d|d} Zfej-j+d~d Zgej-j+dd Zhej-j+dd Ziej-j%ej-j+dd Zjej-j+ej-Dddd dd gdd Zkej-j+ej-Ddddgej-Dddd dd gdd Zlej-Dddd dd gdd Zmej-Dddd dd gdd Zndd Zodd Zpej-j+ej-j%dd Zqdd Zrdd Zsdd Ztdd Zudd Zvdd Zwdd Zxej-j+dd Zydd ZzdddZ{dd Z|dd Z}dd Z~ej-j+dd Zej-j+dd Zej-j+dd Zej-j+dd Zej-j+dd Zej-j+ddĄ Zej-j+ddƄ Zej-j+ddȄ Zej-j+ddʄ Zej-j+dd̄ Zdd΄ ZddЄ Zdd҄ ZddԄ Zej-Ddddgddׄ Zddل Zej-j+ddۄ Zej-j+dd݄ Zej-j+dd߄ Zej-j+dd Zdd Zdd Zej-j+ej-Ddddgej-Ddddgej-Ddddgej-Ddg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfgdd Zej-j%dd Zej3dd Zej-j+ej-jRdd Zej-j+ej-jRdd Zej-j+ej-jRdd Zej-j+ej-jRdd  Zej-j+dd Zej-j+dd Zej-j%dd Zej-j+dd Zej-j+d	d
 Zej-j+dd Zdd Zdd Zdd Zdd Zej-j+dd Zej-j+dd Zdd Zej-jdd Zej-jdd Zdd  Zej-jd!d" Zej-j%d#d$ Zej-j%ej-Dd%g d&d'd( Zd)d* Zd+d, Zd-d. Zej-j%d/d0 Zej-j%d1d2 Zej-j%d3d4 Zd5d6 Zd7d8 Zd9d: Zej-j%ej-Dd%g d;d<d= Zd>d? Zej-j+ej-j%d@dA Zej-j+ej-j%ej-jejdBkdCdDdEdF Zej-j+ej-j%dGdH Zej-j+dIdJ Zej-j+ej-j%dKdL ZdMdN ZdOdP Zej-j+ej-j%dQdR Zej-j+ej-j%dSdT Zej-j+ej-j%dUdV Zej-j+ej-j%dWdX Zej-j+dYdZ Zej-j+ej-j%d[d\ Zej-j+ej-j%d]d^ Zʐd_d` Zej-j%ej-j+dadb Zej-j+ej-j%dcdd Z͐dedf Z	dڐdgdhZej-j+didj Zej-j+ej-j%dkdl Zѐdmdn ZҐdodp ZӐdqdr Zej-j+ej-j	dsdt ZՐdudv Zej-j%dwdx Zאdܐd|d}Zؐd~d Zِdd Zej-j+dd Zej-j+dd Zej-j+dd Zej-j+dd Zej-j+ej-j%dd Zej-j+ej-j%dd Zdd Zdd Zdd Zdd Zdd Zej-jej-j+dd Zdd Zej-j+dd Zej-j+dd Zej-j+ej-j%dd Zdd Zej-j+dd Zej-j+ej-jRdd ZdZej-j+ej-jRdd Zej-j+dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-Ddddgdd Zej-Ddddgdd ZddÄ ZdĐdń Zej-DdƐdǡdȐdɄ Zdʐd˄ Zej-j+d̐d̈́ Zdΐdτ ZdАdф Z ej-DdddgdҐdӄ ZdԐdՄ Zd֐dׄ Zdؐdل ZdS (      N)copytreequote)is_threading_enabled)FSProtocolClassProxyHandler_configure_s3_limited_user_filesystem_uri
change_cwdc                   @   s   e Zd Zdd ZdddZdS )TableStreamWrapperc                 C   s
   || _ d S Ntable)selfr    r   N/var/www/vscode/kcb/lib/python3.10/site-packages/pyarrow/tests/test_dataset.py__init__F      
zTableStreamWrapper.__init__Nc                 C   s   | j |S r   )r   __arrow_c_stream__)r   requested_schemar   r   r   r   I      z%TableStreamWrapper.__arrow_c_stream__r   )__name__
__module____qualname__r   r   r   r   r   r   r   E   s    r   c                 C   s~   dd l }dd l}| ddd}|jdd}|g d}g }t| D ]}|||t|t|f ||7 }q"tj	|g ddS )	Nr   i        )days)greenblueyellowredorange)dateindexvaluecolorcolumns)
datetime	itertools	timedeltacyclerangeappendfloatnextpd	DataFrame)nr(   r)   dayintervalcolorsdatair   r   r   _generate_dataM   s   
r8   c              
   C   s\   t t dt  t dt  t dt  t dt  g}t jj| |dd}|	 S )Nr"   r#   r$   r%   F)schemapreserve_index)
par9   fielddate32int64float64stringTablefrom_pandasreplace_schema_metadata)dfr9   r   r   r   r   _table_from_pandas]   s   rE   c              	   C   sx   |   D ]5}| '}t|tjsJ |jrJ | sJ | s$J | r*J W d    n1 s4w   Y  qd S r   )	get_fragmentsopen
isinstancer;   
NativeFileclosedseekablereadablewritable)datasetfragmentnfr   r   r   +assert_dataset_fragment_convenience_methodsh   s   

rQ   c            
      C   s   t  } ddg}t|D ]\}}d||}| | | |e}ttdttt	tdttt
td|gd dd tdD g}tdt fdt fd	t fd
t fdtt t dfg}tj||d}tj|g}	t|	| W d    n1 sw   Y  q| S )Nzsubdir/1/xxxzsubdir/2/yyyz{}/file{}.parquetr   c                 S   "   g | ]}|d  t |d  dqS    abstr).0jr   r   r   
<listcomp>      " zmockfs.<locals>.<listcomp>i64f64rY   conststructrU   r9   )fs_MockFileSystem	enumerateformat
create_diropen_output_streamlistr,   mapr.   rY   r;   r9   r>   r?   r@   ra   record_batchrA   from_batchespqwrite_table)
mockfsdirectoriesr7   	directorypathoutr6   r9   batchr   r   r   r   ro   s   s6   





ro   c                    sx   ddl m}m} ddlm} |   fddt  fdd}| |d	| || }tjfd
d}||fS )Nr   )LocalFileSystemPyFileSystemr   )r   c                    s    fdd| D S )Nc                    s   h | ]	}  t|qS r   )normalize_pathrY   rZ   plocalfsr   r   	<setcomp>       z6open_logging_fs.<locals>.normalized.<locals>.<setcomp>r   )pathsrz   r   r   
normalized   s   z#open_logging_fs.<locals>.normalizedc                    s$     t|}| | j|S r   )rw   rY   add_fsopen_input_file)r   rr   )r{   openedr   r   r      s   
z(open_logging_fs.<locals>.open_input_filer   c              	   3   sB       zd V  W   | ksJ d S   | ks J w r   )clear)expected_opened)r   r   r   r   assert_opens   s
   .z%open_logging_fs.<locals>.assert_opens)	
pyarrow.fsru   rv   test_fsr   setsetattr
contextlibcontextmanager)monkeypatchru   rv   r   r   rc   r   r   )r{   r   r   r   open_logging_fs   s   r   module)scopec              	      sd  | j jd | j jd td t }t  fddtdd D \}}}|d tt	fd	dtdd
 D D ]&\}}d
|}||}tt|| W d    n1 sjw   Y  qI|d ||jjj|jgD ]0\}	}dj
|	 }
d
|
}||
 ||}tt|| W d    n1 sw   Y  q|d ||jjj|jjjgD ]0\}	}dj
|	 }
d
|
}||
 ||}tt|| W d    n1 sw   Y  q|d |dD ]1\}	}d
|	}
d
|
}||
 ||}tt|| W d    n	1 s*w   Y  q|S )Npandasparquet  c                    "   g | ]} j ||d    qS )   ilocrZ   r7   )rD   r2   r   r   r\      r]   z!multisourcefs.<locals>.<listcomp>r   r   plainc                    r   
   r   r   )df_ar2   r   r   r\      r]   r   zplain/chunk-{}.parquetr9   zschema/{}/{}z{}/chunk.parquethivezhive/year={}/month={}
hive_colorr%   zhive_color/color={})configpyarrowrequiresr8   rc   rd   lenr,   rg   re   rf   rh   rm   rn   rE   groupbyr"   dt	dayofweekr%   yearmonth)requestro   df_bdf_cdf_dr7   chunkrr   rs   partfolderr   )rD   r   r2   r   multisourcefs   sT   (
,





"






r   c              
   C   sf   t  }tjddd}t d}t ttdt	 tdt
 g|_t | |||}| S )NsubdirT	recursivegroupkey)dsParquetFileFormatrc   FileSelectorFileSystemFactoryOptionsDirectoryPartitioningr;   r9   r<   int32r@   partitioningFileSystemDatasetFactoryfinish)ro   rf   selectoroptionsfactoryr   r   r   rN      s   
rN   TFthreadedserial)paramsidsc                    s   | j  G  fddd}| S )z]
    Fixture which allows dataset scanning operations to be
    run with/without threads
    c                       sT   e Zd Z fddZ fddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )zdataset_reader.<locals>.readerc                    s
    | _ d S r   use_threads)r   r   r   r   r     r   z'dataset_reader.<locals>.reader.__init__c                    s   d|v rt d |d< d S )Nr   z9Invalid use of dataset_reader, do not specify use_threads)	Exception)r   kwargsr   r   r   _patch_kwargs
  s
   z,dataset_reader.<locals>.reader._patch_kwargsc                 [      |  | |jdi |S Nr   )r   to_tabler   rN   r   r   r   r   r        
z'dataset_reader.<locals>.reader.to_tablec                 [   r   r   )r   
to_batchesr   r   r   r   r     r   z)dataset_reader.<locals>.reader.to_batchesc                 [   r   r   )r   scannerr   r   r   r   r     r   z&dataset_reader.<locals>.reader.scannerc                 [      |  | |j|fi |S r   )r   head)r   rN   num_rowsr   r   r   r   r        
z#dataset_reader.<locals>.reader.headc                 [   r   r   )r   take)r   rN   indicesr   r   r   r   r   !  r   z#dataset_reader.<locals>.reader.takec                 [   r   r   )r   
count_rowsr   r   r   r   r   %  r   z)dataset_reader.<locals>.reader.count_rowsN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   reader  s    r   )param)r   r   r   r   r   dataset_reader   s   	$r   c                    sX  t t dt  g}t  ddg}dd tddD } fddt||D }td	td
k}tj	|| |d}tj	j
|| ||d}||fD ]}t|tj	sYJ t|jtjsbJ |j|sjJ t|jt|ksuJ t| }t|||D ]q\}	}
}|	j|
sJ |	j|ksJ t|	jtjsJ t|	tjsJ |	jdgksJ |	jdksJ t|	 }|	jt|  krdksJ  J t|d tjsJ |d j|ksJ |d jdgksJ |d jdksJ qt|jtddkd}t|dks	J qOtj	|| d}|jtdsJ tj	j
|| d}|jtds5J | D ]}	|	jtdsGJ q9tjtdd t	| | W d    n	1 scw   Y  tjtdd tj	|| dd W d    n	1 sw   Y  tjtdd tj	j
| d W d    d S 1 sw   Y  d S )Nr`   subdir/1/xxx/file0.parquetsubdir/2/yyy/file1.parquetc                 S   s   g | ]	}t d |kqS )r   r   r<   rZ   xr   r   r   r\   3  r}   z+test_filesystem_dataset.<locals>.<listcomp>r   rT   c                    s   g | ]\}}  ||qS r   make_fragment)rZ   rr   r   file_formatro   r   r   r\   4  s    leveli9  )r9   rf   
filesystemroot_partition)r9   rf   r   
partitionsr   r   filter   r9   rf   r   Tzincorrect typematch)r9   rf   r   rf   )r;   r9   r<   r>   r   r   r,   zipscalarFileSystemDataset
from_pathsrH   rf   partition_expressionequalsr   filesri   rF   rr   ParquetFileFragment
row_groupsnum_row_groupssplit_by_row_groupr   pytestraises	TypeError)ro   r9   r~   r   	fragmentsr   dataset_from_fragmentsdataset_from_pathsrN   rO   	partitionrr   row_group_fragmentsr   r   r   test_filesystem_dataset,  sx   "$r  c                 C   s   t t dt  g}t }dg}tjj|||t	 d}|
  tt | | W d    d S 1 s9w   Y  d S )Nf1znonexistingfile.arrowr   )r;   r9   r<   r>   r   IpcFileFormatr   r   rc   ru   rF   r   r   FileNotFoundErrorr   )r   r9   r   r~   rN   r   r   r   1test_filesystem_dataset_no_filesystem_interactiont  s   "r  c           	      C   s  t | tjsJ t | jtjsJ tjg dt d}tjg dt d}|	| D ]}t |tj
s6J |d|s@J |d|sJJ q,||  D ]}t |tjs\J t |jtjseJ qR|| }t |tjssJ t|dks{J tddk}| jd|d}|d	 }|d ddgksJ |d
 ddgksJ t|d	 ddgksJ t|d ddgksJ tddk}| jd|d}|d	 }|d g dksJ |d
 g dksJ |d	 g dksJ |d g dksJ tdtd
tddkd}| jd|d}|d }t|g dks#J |d g dks.J |d
 g dks9J |d g dksDJ t|  d S )Nr   r   r   rT   r   typer   r   r   r^   T)r   r   r   r_         ?r   r   xxxyyy)ra   rW   1)r   r   r   r   )r
        @r
  r  )r   r   r   r   )r  r  r  r  )r^   r_   new)r   r'   )
r   r   r   r   r   r   rT   rT   r   r   )
        r  r
  r
         @r        @r  r  r  r  )
FFTTFFFFTT)rH   r   Datasetr9   r;   Schemaarrayr>   r?   r   RecordBatchcolumnr   r   scan_batchesTaggedRecordBatchrO   Fragmentr   rA   r   r<   sort_by	to_pydictsortedri   rQ   )	rN   r   expected_i64expected_f64rt   r   	conditionresult
projectionr   r   r   test_dataset  sN   
r#  c                 C   s(   | j ddd}t|}|jdksJ d S )N      )fragment_readaheadbatch_readahead   )r   r/   num_columns)rN   r   rt   r   r   r   test_scanner_options  s   r*  c           	      C   sV  |j | t d}t|tjsJ ttj |j | dgd W d    n1 s*w   Y  |j | dgt d}|j	| j
ksBJ |jt
dt fgksQJ t|tjsYJ | }| D ]}|j
|jkskJ |jdksrJ qa||  ks}J |j
|jksJ t|jD ]}t|g}||||ksJ qttj |t|jg W d    n1 sw   Y  |j| ksJ |j | g dt d}| }g d}|j|ksJ |d}|d	  d
gd dgd  ksJ |d  dgd dgd  ksJ |d  dgd ksJ |d  dgd ks)J d S )N)memory_poolunknownr&   r^   )r'   r+  r   )
__filename__fragment_index__batch_index__last_in_fragmentr.  r-  r   r   r   r   r/  r   r0  T)r   r;   default_memory_poolrH   r   Scannerr   r   ArrowInvaliddataset_schemar9   projected_schemar>   r   r   r)  	to_readerread_allr,   r   r  r   ArrowIndexErrorr   column_namesr  	to_pylist)	rN   r   r   r   rt   r7   r   expected_namessorted_tabler   r   r   test_scanner  sR   


& r=  c              	   C   sd   t  }t  }t | z| }tj| }| }| |ks$J W t | d S t | w r   )	r;   r1  system_memory_poolset_memory_poolbytes_allocatedr   r2  from_datasetr   )rN   old_poolpoolallocated_beforer   _r   r   r   test_scanner_memory_pool  s   
rF  c                 C   s  | | d}|tjjg | jdksJ |j | ddgd }|ddgiks'J |j | ddgtddkd }|dddgiksBJ |j | d	dgd }|dtt	d
d iks[J t
|  }|j ddgd }|ddgikstJ |j d	dgd }|dtt	d
iksJ d S )Nr   rb   r   r^   r&   r   r'   r   rT      r   )r   r;   rA   rl   r9   r  r   r<   ri   r,   r/   rF   )rN   r   r!  rO   r   r   r   	test_head  s"   rI  c                 C   s
  t |  }ddgtddgfD ]}|||}||||ks%J qtt ||tdg W d    n1 s@w   Y  ddgtddgfD ]}|| ||| |ksbJ qPtt || tdg W d    d S 1 s~w   Y  d S )Nr   rT   r   r(  r   )	r/   rF   r;   r  r   r   r   r   
IndexError)rN   r   rO   r   expectedr   r   r   	test_take(  s    
"rL  c                 C   s   t |  }||dksJ |j|tddkddksJ || dks(J |j| tddkddks8J |j| tddkddksHJ |j| tdd	k dd	ksXJ d S )
Nr   r^   r   r   r   r   r   rT   r   )r/   rF   r   r   r<   )rN   r   rO   r   r   r   test_count_rows8  s    $rM  c               	   C   sN   t jt jt jg} | D ]}tt |  W d    n1 sw   Y  q
d S r   )r   
FileFormatr2  Partitioningr   r   r   )classesklassr   r   r   test_abstract_classesH  s   rR  c                  C   sD  t t dt  t dt  g} tjtjtjfD ]}|| }t	|tj
s)J ||| ks1J |dks7J qt t dt  t dt  g} t| }t|jdksYJ tdd |jD seJ |d	}t	|tjsrJ tdd
ktddk@ }||sJ tt j |d W d    n1 sw   Y  |d}tdd
k}||sJ |tj| ddksJ t t dt  t dt  g} tj| dd}t|jdksJ tdd |jD sJ |d}tdtdktdtd
k@ }||sJ |d}td tdtd
k@ }||s.J dD ]}tt j || W d    n	1 sIw   Y  q0|tj| ddks\J t t dt  t dt  g} t| }t|jdks~J tdd |jD sJ |d}t	|tjsJ tdd
ktddk@ }||sJ tt j |d W d    n	1 sw   Y  |tj| ddksJ t t dt  t dt t  t  g} tj| dt g did}|jd d u sJ |jd  g dksJ |tj| d dks"J tjt t dt  t dt t  t  gdt g did}|jd d u sQJ |jd  g dks_J t jt td t d!d td D t d"gd# d$gd#  gg d%d&}t d't  fg}tjtjtjfD ]9}t )}||}tj||d(|d) tj |d(|d)}	|	! }
|
|sJ W d    n	1 sw   Y  qt B}t|}tj||d(|d) d }	tjt"d*d+ tj |d(t#dd)}	W d    n	1 sw   Y  |	d u sJ W d    d S 1 sw   Y  d S ),Nr^   r_   zother objectr   r   r   c                 s       | ]}|d u V  qd S r   r   r   r   r   r   	<genexpr>e      z$test_partitioning.<locals>.<genexpr>z/3/3.14/rT   gQ	@z/prefix/3/aaaz/3/nonesegment_encodingalphabetaxyz)null_fallbackc                 s   rS  r   r   r   r   r   r   rT  {  rU  z/alpha=0/beta=3/r   z/alpha=xyz/beta=3/)z/alpha=one/beta=2/z/alpha=one/z
/beta=two/otherc                 s   rS  r   r   r   r   r   r   rT    rU  z3_3.14_prefix_3_aaa_)firstsecondthirddictionariesr      c                 s       | ]}t   V  qd S r   randomrZ   rE  r   r   r   rT    rU  rV   r   rW   r  f2r   namesr   ipcrf   r   z,Expected Partitioning or PartitioningFactoryr   )$r;   r9   r<   r>   r?   r   r   HivePartitioningFilenamePartitioningrH   rO  r   rc  allparse
Expressionr   r   r   r3  r   is_null
dictionaryint8r@   r  r:  r   r,   tempfileTemporaryDirectorywrite_datasetrN   r   
ValueErrorint)r9   rQ  r   exprrK  
shouldfailr   partitioning_schematempdir	load_backload_back_tabler   r   r   test_partitioningS  s   




 

"



$r  c              
   C   s   t t dt  t dt  g}t|t|t|tj|ddtj|ddtj|dddg}|D ]}| 	| 
||ksDJ q6d S )Nr^   r_   rV  rW  r[  )rX  r\  )r;   r9   r<   r>   r?   r   r   ro  rp  loadsdumps)pickle_moduler9   partsr   r   r   r   test_partitioning_pickling  s   	r  z@flavor, expected_defined_partition, expected_undefined_partition))ro  )zfoo=A/bar=ant%20bee r  r  )r   )z	A/ant beer  r  )rp  )r  z
A_ant bee_)r  rE  flavorexpected_defined_partitionexpected_undefined_partitionc                 C   s  t dt  fdt  fg}tt| |d}|tddktddk@ |ks,J |d	|
tddktddk@ sEJ |tddktddk@ tddktddk@ @ |kshJ |tddktddk@ tddktddk@ B |ksJ | dkrtjt jdd	 |tddk W d    d S 1 sw   Y  d S |tddkd
ksJ d S )Nfoobarrb   zant beeA/ro  zDNo partition key for foo but a key was provided subsequently for barr   )zbar=ant%20beer  )r;   r9   r@   getattrr   rf   pcr<   rr  joinr   r   r   r3  )r  r  r  r~  r   r   r   r    test_dataset_partitioning_format  s<    
" 
r  c                  C   s   t tg dg dd} t d}t d}| j|d || |d |d| d	d
}tg dg dg dg dd	}||sHJ d S )Nr   r   rT   )r   r   r   rU   rV   rW   r   r   r?   )za+1zb-aza*2za/br&   r   rT   r   )r   r   )r   r      )      ?r
  g      ?)r   rN   r;   r   r<   r   castr   )rN   rV   rW   r!  rK  r   r   r   $test_expression_arithmetic_operators,  s   


r  c                  C   s   dd dD \} }}t | ddiksJ t | t | ks!J t | |@ |@ dd dD ks3J t ddk}t |i ksCJ t | |@ ddiksPJ t d }t |dd iksbJ d S )	Nc                 S   s   g | ]	}t ||kqS r   r   rZ   fr   r   r   r\   >  r}   z'test_partition_keys.<locals>.<listcomp>abcrV   c                 S   s   i | ]}||qS r   r   r  r   r   r   
<dictcomp>A  s    z'test_partition_keys.<locals>.<dictcomp>drT   )r   get_partition_keys_get_partition_keysr<   rt  )rV   rW   cnopenullr   r   r   test_partition_keys=  s   $r  c                  C   s   t  } t jddgd}t jdd}| jt ksJ |jddhks#J | jdks*J |jdks1J | | ks7J | |ks=J | |ksCJ d S )NrV   rW   dictionary_columnsmscoerce_int96_timestamp_unitns)r   ParquetReadOptionsr  r   r  )opts1opts2opts3r   r   r   test_parquet_read_optionsK  s   r  c                  C   sf   t  } t jdhd}t jdd}| jt  ksJ |jt jdgdks&J |jt jddks1J d S )NrV   r  sr  )r   r   read_optionsr  )pff1pff2pff3r   r   r   %test_parquet_file_format_read_options]  s   r  c                  C   s  t  } t jdd}t jddd}t jddd}t jdd	d
}t jdd}tjdddd}t jd|d}| jdu s;J | jdksBJ t rL| jdu sLJ | jdksSJ | j	dksZJ | j
du saJ |jdu shJ |jdksoJ t ry|jdu syJ |jdu sJ |jdksJ t r|jdu sJ |jdu sJ |jdksJ t r|jdu sJ |jdksJ |j	d	ksJ |j
du sJ t r|jdu sJ |j|ksJ |j| jksJ | | ksJ | |ksJ ||ksJ ||ksJ || ksJ || ksJ || ksJ d S )N   buffer_sizei    T)r  use_buffered_streamF)r  
pre_bufferi@ i )thrift_string_size_limitthrift_container_size_limitpage_checksum_verificationrH  )hole_size_limitrange_size_limitlazy)r  cache_optionsi @B )r   ParquetFragmentScanOptionsr;   CacheOptionsr  r  r   r  r  r  r  r  )r  r  r  opts4opts5opts6
cache_optsopts7r   r   r   test_parquet_scan_optionsi  sd   r  c                 C   s  t  t  t tjjdddt jtjjddgddt jtjjddd	dt  t jtjjdd
ddt jtjjddddg}z	|	t 
  W n	 tyT   Y nw td urt|t  t jdhdt jddt jdddddg |D ]}| | ||ksJ qvd S )N	T)	delimiterignore_empty_linesrT   r  )	skip_rowsr9  r  i   )r  
block_sizeignorenewlines_in_valuesunexpected_field_behavior)parse_optionsF   r   r  rV   r  )r  r  {   i  )r  r  r  r  )r   r  CsvFileFormatr;   csvParseOptionsReadOptionsJsonFileFormatjsonr-   OrcFileFormatImportErrorrm   extendr   r  r  )r  formatsr   r   r   r   test_file_format_pickling  sR   



r  c              
   C   s   t  t jtjjdddt jtjjdddt  t tjjddd	t jtjjdd
ddg}t	d urD|
t jddt jddg |D ]}| | ||ksTJ qFd S )NT)strings_can_be_nullconvert_options   r  r  Ferrorr  i   r  r  r  )r  )r   CsvFragmentScanOptionsr;   r  ConvertOptionsr  JsonFragmentScanOptionsr  r  rm   r  r  r  r  )r  r   optionr   r   r   #test_fragment_scan_options_pickling  s2   

r  paths_or_selectorr   r   r   r   r  c                 C   sj  t jt jdhd|d}t d}t ttdt tdt	 g|_
|jdks/J |jddgks8J |jd	u s?J t | |||}| }| jttd
t tdt tdtt t	 tdt tdtt t	 dtdt tdt	 gd	dsJ t| tsJ t||t jsJ |jt dsJ | }t|t jsJ | }tjg dt d}	tjg dt d}
tjtjg dt dtjd  t	 d}tdd t!dD }|" }t#|ddgddgD ]\\}}}}tj|gd t d}tj|gd t	 d}tj|d gd t d}|j$d usEJ |j%dksMJ |d |	sWJ |d |
saJ |d |skJ |d |suJ |d |sJ |d |sJ |d |sJ q|& }t|tj'sJ t(|d ksJ |j%dksJ d S )!NrY   r  )r  r  r   r   r   .rE  Fr^   r_   r`   ra   rU   check_metadataTr  r  z	0 1 2 3 4c                 S   rR   rS   rX   r   r   r   r   r\     s    z+test_filesystem_factory.<locals>.<listcomp>r   r   r   r  r  r(  r   rT   r   r  r   ))r   r   r  r   r   r;   r9   r<   r   r@   r   partition_base_dirselector_ignore_prefixesexclude_invalid_filesr   inspectr   r>   r?   ru  ra   rH   inspect_schemasri   r   r   r   r   r   r  DictionaryArrayfrom_arrayssplitr,   r  r   r   r)  r   rA   r   )ro   r  r  rf   r   r   inspected_schemarN   r   r  r  expected_strexpected_structiteratorrt   rO   r   r   expected_groupexpected_keyexpected_constr   r   r   r   test_filesystem_factory  s   

	


"r  c                 C   s   t  }t jd| |d}|jD ]A}||| }|jdgksJ |j|| dgd}||fD ]}t|t js6J |j|ks=J t|j	t
| sGJ q,|jdgksPJ qd S )N/plainr   rf   r   r   )r   r   rN   r   r   r   rH   r   rr   r   r	  )r   parquet_formatrN   rr   rO   row_group_fragmentr  r   r   r   test_make_fragment4  s    
r  c                    s  | \}}}}}}}t  |g}fdd|D }	t j|	|jd   }
|
|s0J  fdd jD }fddt||D }t j||jd}  }
|
|s\J dd |D }fddt||D }t j||jd}tj	t
jjdd	 | }W d
   n1 sw   Y  dd |D }fddt||D }t j||jd}tj	tdd	 | }W d
   d
S 1 sw   Y  d
S )z
    Test passing file_size to make_fragment. Not all FS implementations make use
    of the file size (by implementing an OpenInputFile that takes a FileInfo), but
    s3 does, which is why it's used here.
    c                    s   g | ]}  |qS r   r   rZ   rr   r   rc   r   r   r\   T  s    z0test_make_fragment_with_size.<locals>.<listcomp>)rf   r9   r   c                    s   g | ]	} j |jqS r   )r   get_file_infosizer   rN   r   r   r\   ^  r}   c                        g | ]\}} j ||d qS )	file_sizer   rZ   rr   r  r  r   r   r\   _      c                 S      g | ]}d qS )r   r   r  r   r   r   r\   h      c                    r  r  r   r  r  r   r   r\   i  r  zParquet file size is 1 bytesr   Nc                 S   r  )r  r   r  r   r   r   r\   t  r  c                    r  r  r   r  r  r   r   r\   u  r  zHTTP status 416)r   r   r   r9   r   r   r   r   r   r   r   libr3  OSError)s3_example_simpler   rr   urihostport
access_key
secret_keyr~   r   tbl
sizes_truefragments_with_sizedataset_with_sizesizes_toosmallsizes_toolarger   )rN   r   rc   r   test_make_fragment_with_sizeG  sP   





"r&  c                 C   s   t d}t|d}t }||}t|	 tj
s J tjg dg dg dgg dd}| ||s<J |||}| || sPJ d S )NzT
        alpha,num,animal
        a,12,dog
        b,11,cat
        c,10,rabbit
    utf-8rV   rW   r        r   dogcatrabbitrY  numanimalrk  )textwrapdedentr;   	py_bufferencoder   r  r   rH   rG   BufferReaderr   r   r   r  r  )r   r  contentbuffer
csv_formatrO   rK  pickledr   r   r   "test_make_csv_fragment_from_buffer  s   


r<  c                 C   s   d}t |d}t }||}t| t jsJ t j	g dg dg dgg dd}| 
||s9J |||}| 
||
 sMJ d S )Nz{"alpha" : "a", "num": 12, "animal" : "dog"}
{"alpha" : "b", "num": 11, "animal" : "cat"}
{"alpha" : "c", "num": 10, "animal" : "rabbit"}
r'  r(  r)  r,  r0  rk  )r;   r5  r6  r   r  r   rH   rG   r7  r   r   r   r  r  )r   r  r8  r9  json_formatrO   rK  r;  r   r   r   #test_make_json_fragment_from_buffer  s   

r>  c                 C   s   t g dt g dt g dg}|d  |d |d  g}tjtjddgd	d
dd}|t f||fg}|D ]<\}}t j|g dd}t  }t	|| |
 }	||	}
| |
|sgJ |||
}| ||syJ q=d S )Nr(  r)  r,  r   r   r   rY  r2  r  Tr  )r  r  r  r0  rk  )r;   r  dictionary_encoder   r   r  r   BufferOutputStreamrm   rn   getvaluer   r   r   r  r  )r   r  arraysdictionary_arraysdictionary_formatcasesformat_r   rs   r9  rO   r;  r   r   r   &test_make_parquet_fragment_from_buffer  s8   


	
rG  c                 C   sl   t jtddgd dgd dgd  gg dd}t| d }tj||d	g|d
 tj|dd|d}||fS )Nr%  r   rV   r   rW   ri  rk  test_parquet_datasetr   )partition_cols
chunk_sizer   r   )rf   r   r   )r;   r   r,   rY   rm   write_to_datasetr   rN   )r  rJ  r   r   rr   rN   r   r   r   _create_dataset_for_fragments  s   "rL  c                 C   s2  t | \}}t| }t|dksJ |d }ddg}|jj|ks$J |j|j|j	|jks2J |j
tddks?J ||}|j|ksKJ ||dddsYJ |j||jd}|jg d	ksjJ ||ddsuJ |j|jdksJ |j||jtddk d
}|jg d	ksJ d S )Nr   r   r  rj  r   rV   r   rb   ri  )r9   r   )rL  ri   rF   r   physical_schemarl  rf   r  rr   r   r   r   r   r<   r   r9  remove_columnslicer9   remove)r  r   r   rN   r   r  physical_namesr!  r   r   r   test_fragments  s&   
rR  c                 C   s   t jtddgd dgd  gddgd}t| d }tj||dgd	 tjt d
gdd}tj	|d|d}|j
tddkd}tt|dksLJ d S )Nr%  r   r   r   colr   rk  rH  rI  )r   rv  r   r  r   rn  r   )r;   r   r,   rY   rm   rK  r   r   r9   rN   rF   r<   r   ri   )r  r   rr   r   rN   r   r   r   r   test_fragments_implicit_cast	  s   *rV  c           
         s  t | \ }	 d fdd	}t| d }|j}|||}||||ks-J |j|j|j	|j
d}||||sEJ ||d |j|j|j	|j
d}||dtddk d	 |j|j|j	|j
d}||ddgtdd
k d |j|j|j	|j
d}||dtddkd	 d|jddd }	tjt|	d  |j|j|j	|j
d}|j|tddkd	 W d    d S 1 sw   Y  d S )Nc                    sP   | j  j||d}|r|n j}|j|ksJ  j| |}||s&J d S )Nr9   r'   r   )r   r9   r9  rO  selectr   )rO   	row_slicer'   r   actualr9  rK  r   r   r   assert_yields_projected  s   z;test_fragments_reconstruct.<locals>.assert_yields_projectedr   )r   )r   r   )r   r   r  r   r   r  rG  r   rV   z&No match for FieldRef.Name\(part\) in Fr   NN)rL  ri   rF   rf   r  r  r   r   rr   r   r   r   r   r<   rM  	to_stringr   r   rz  )
r  r   r  rN   r[  rO   r	  pickled_fragmentnew_fragmentpatternr   r   r   test_fragments_reconstruct  s`   


"ra  c                 C   s^  t | dd\}}t| d }t| }t||j  kr$dks'J  J |j|d |jd}|jg dks:J t|dksBJ |	|
ddsMJ |d jd usVJ |d jdks_J |d jd jdddddddkstJ t|jtd	dk d
d }t|td	dk }t|dksJ |j|d td	dk d
}t|dksJ d S )Nr   rJ  r   rb   ri  r   minmaxr  rj  r  r   )rL  ri   rF   r   r   r   r   r9   r9  r   rO  r   
statisticsr   r<   )r  r   r   rN   rO   r  r!  r   r   r   !test_fragments_parquet_row_groupsT  s.   "
rh  c                 C   s   t dtdi}tj|| d dd tj| d dd}t| d }|j	j
|j|jd	d
gd}|jdks8J |  |jdksCJ t|jdksLJ d S )NrV   r%  test.parquetr   row_group_sizer   r   r   r   rT   r  )r;   r   r,   rm   rn   r   rN   ri   rF   rf   r   rr   r   r   ensure_complete_metadatar   r   )r  r   rN   original_fragmentrO   r   r   r   %test_fragments_parquet_num_row_groupsr  s   rn  c                 C   s   t tddgddgd}|d d|d< tt|| d  d	d lm	} |	| d }|j
||ddkd
}|jd	 | k  sIJ d S )NrV   rW   r   r   )col1col2ro  categoryztest_filter_dictionary.parquetr   r   )r0   r1   dictastyperm   rn   r;   r   pyarrow.datasetrN   r   r<   r   	to_pandasrq  )r  r   rD   r   rN   r!  r   r   r   ,test_fragments_parquet_row_groups_dictionary  s   "rv  c                 C   s  |\}}t | d|d\}}t| d }||jg |  W d    n1 s*w   Y  |jddgks8J |g  |  W d    n1 sKw   Y  t|jtj	sYJ |j
j|j|jddgd}|j|jksnJ |  |jd }	|	jdks~J |	jdksJ |	jd usJ |||}
||jg% |
jddgksJ |
jd }	|	jdksJ |	jd usJ W d    d S 1 sw   Y  d S )Nr   rJ  r   r   r   r  )rL  ri   rF   rr   rl  r   rH   metadatarm   FileMetaDatarf   r   r   idr   rg  r  r  )r  r   r  rc   r   rE  rN   rO   r_  	row_groupr^  r   r   r   &test_fragments_parquet_ensure_metadata  s:   





"r|  c           
      C   s   |\}}t | |d\}}t| d }|g  |||}W d    n1 s+w   Y  ||jg |j}	W d    n1 sDw   Y  |	dgksPJ d S )Nr   r   r   )rL  ri   rF   r  r  rr   r   )
r  r   r  rc   r   rE  rN   rO   r^  r   r   r   r   )test_fragments_parquet_pickle_no_metadata  s   
r~  c                 C   s  t jt g dt  t g dt  t g dt  t g dt  t g dt  t g dt  t g dt 	 t g dt 
 t g dt  t g dt  t g dt  t g dt  t g dt  t g dt dt g dt dt g dt dt g dt  t g dt  t g dt dt g dt dgg d	d
}t| d }tj|||d |tj|dddfS )N)TNF)r   r   *   )r
  g      $@      E@)rV   Nzr  r  us)r   r   l    jt )booleanrv  uint8int16uint16r   uint32r>   uint64r.   doubleutf8binaryts[s]ts[ms]ts[us]r=   date64time32time64rk  test_parquet_dataset_all_typesrb  r   r   rn  )r;   r   r  bool_rv  r  r  r  r   r  r>   r  float32r?   r  r  	timestampr=   r  r  r  rY   rm   rK  r   rN   )r  rJ  r   rr   r   r   r   _create_dataset_all_types  s6   /r  c              
      s  t | \}}t| d }dd l  fdd} fdd} fdd} j} j}t| }	|	d jd us9J |	d jd }
|
jdksGJ |
j	d	ksNJ |
j
i d
ddddddddddddddddddddddddddddddddddddddddddddddddd d!dd"|d|ddd#|d|ddd$|d|ddd%|d&dd'|d&d'd(d|d&dd|d&d'd)d|ddd|dddd|dddd|dddddd*ksJ d S )+Nr   c                    s     ddddd| S N  r   r   r(   r   r  r   r   dt_s      z.test_parquet_fragment_statistics.<locals>.dt_sc              
      s     dddddd| d S )Nr  r   r   r   r  r  r  r   r   dt_ms  r}   z/test_parquet_fragment_statistics.<locals>.dt_msc              	      s     dddddd| S r  r  r  r  r   r   dt_us      z/test_parquet_fragment_statistics.<locals>.dt_usrT   r   r  FTrc  rv  r   r  r  r  r  r   r  r>   r  r.   r
  r  r  r  rV   r  r     a   zr  r  r  r=   r  r   r*     )r  r  r  )r  ri   rF   r(   r"   timer   r   r   total_byte_sizerg  )r  r   rN   rO   r  r  r  r"   r  r  r{  r   r  r    test_parquet_fragment_statistics  sh   








	




r  c                 C   sn   t g dg dd}tj|| d dd tj| d dd}t| d	  }|d
 j	d	 j
i ks5J d S )N)r   r   NN)rV   rW   NNrU   ri  r   rj  r   r   r   r   )r;   r   rm   rn   r   rN   ri   rF   r   r   rg  )r  r   rN   r   r   r   r   &test_parquet_fragment_statistics_nulls2  s
   r  c                 C   st   t g dg ddd d }|j| d dd tj| d dd	}t| d  }|d jd j	i ks8J d S )
N)rV   rW   rW   r   r   r  rU   r   ri  r   enginer   r   )
r0   r1   
to_parquetr   rN   ri   rF   r   r   rg  )r  rD   rN   r   r   r   r   'test_parquet_empty_row_group_statistics=  s
    r  c                 C   s   t | dd\}}t| d }|jtddksJ t|jtddk|jd}t	|dks4J t|jtddk|jd}t	|dksKJ d S )Nr   rb  r   r   rV   r   r9   rW   )
rL  ri   rF   r   r   r   r<   r   r9   r   )r  r   rN   rO   r  r   r   r   +test_fragments_parquet_row_groups_predicateI  s   r  c                 C   sL  t | dd\}}t| d }|j}t| }|||}||||ks-J |j|j	|j
|jdgd}	||	}
|
||d sKJ |j|j	|j
|jdhd}	|j|	|jddgtddk d	}
|
jddgksrJ t|
dkszJ |j|j	|j
|jdhd}	tjtd
d ||	 W d    d S 1 sw   Y  d S )Nr   rb  r   )r   r   r   r  r   rT   rW  zreferences row group 2r   )rL  ri   rF   rf   r   r  r  r   r   rr   r   r   r   r9   r   r<   r9  r   r   r   rJ  )r  r   r  r   rN   rO   r	  r  r^  r_  r!  r   r   r   -test_fragments_parquet_row_groups_reconstruct`  sH   
"r  c           
      C   s  |\}}t | d|d\}}t| d }|jddgd}|g " |jdks)J |jddgks2J |jd jd us<J W d    n1 sFw   Y  ||}	|	 ddgddgdks_J |jg d}|jdkslJ |jg kssJ |j||j	d}	|	j
dksJ |	|d d sJ d S )	Nr   rw  r   rT   row_group_idsr   rf  rb   )rL  ri   rF   subsetr   r   rg  r   r  r9   r   r   
r  r   r   rc   r   r   rN   rO   subfragr!  r   r   r   !test_fragments_parquet_subset_ids  s&   


r  c           
      C   sR  |\}}t | d|d\}}t| d }|tddk}|g " |jdks+J t|jdks4J |jd j	d us>J W d    n1 sHw   Y  |
|}	|	 g dg ddksaJ |tdd	k}|jdksrJ |jg ksyJ |j
||jd
}	|	jdksJ |	|d d sJ |jtddk|jd
}|jdksJ d S )Nr   rw  r   r  rT   r  )r   r   r   rf  r   rb   r   rV   r   )rL  ri   rF   r  r   r<   r   r   r   rg  r   r  r9   r   r   r  r   r   r   $test_fragments_parquet_subset_filter  s*   


r  c                 C   s   t | dd\}}t| d }tt |jtddkddgd W d    n1 s.w   Y  tt |  W d    d S 1 sHw   Y  d S )Nr   rb  r   r  r   r  )	rL  ri   rF   r   r   rz  r  r   r<   )r  rE  rN   rO   r   r   r   %test_fragments_parquet_subset_invalid  s   
"r  c           
      C   s  t g d}t g d}t g d}t jj||gddgd}t jj||gddgd}t d	|i}tj|| d
 dd tj| d
 dd}t	|
 d }|jdksVJ |td	ddk}	|	jdkshJ |td	ddk}	|	jdkszJ |td	dddk}	|	jdksJ |td	dddk}	|	jdksJ tjt jdd |td	ddk W d    n1 sw   Y  tjtdd |td	ddk W d    d S 1 sw   Y  d S )N)r   r   r   rT   )皙?皙?333333?皙?r   r   rT   r   f21f22rk  r  rj  rS  zdata_struct.parquetr   rj  r   r   r   r   r   zNo match for FieldRef.Nestedr   f3z)Function 'greater' has no kernel matching)r;   r  StructArrayr  r   rm   rn   r   rN   ri   rF   r   r  r<   r   r   r3  NotImplementedError)
r  r  r  r  rj  
struct_colr   rN   rO   r  r   r   r   0test_fragments_parquet_subset_with_nested_fields  s4   "r  c                 C   s   t | d }t|dkst|dksJ t| \}}tj|dd}t | d }t|d|jt	|ks<J | d }t
j|| tj|dd}t | d }t|d	|jt	|ksgJ d S )
Nr   zb<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet partition=[key=xxx, group=1]>zb<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet partition=[group=1, key=xxx]>r   r   z-<pyarrow.dataset.ParquetFileFragment path={}>data.featherfeatherz/<pyarrow.dataset.FileFragment type=ipc path={}>)ri   rF   repr_create_single_filer   rN   rf   r   rw   rY   r;   r  write_feather)r  rN   rO   r   rr   r   r   r   test_fragments_repr  s0   r  r;  c                 C      | S r   r   r   mr   r   r   <lambda>      r  c                 C      | || S r   r  r  r  r   r   r   r    r  c                 C   s   t jddd}t }td}tjddg}|||}t|tjs%J ||_	t
| |||}| }tdt fdt fdt fd	t fd
tt t dfdt fdt fg}	||	skJ tj }
t|
tjsxJ d S )Nr   Tr   r   r   r^   r_   rY   r`   ra   rU   )rc   r   r   r   r   r   discoverrH   PartitioningFactorypartitioning_factoryr   r  r;   r9   r>   r?   r@   ra   r   r   ro  )ro   r;  r  r  rf   r   r  r   r  expected_schemahive_partitioning_factoryr   r   r   test_partitioning_factory  s.   







	
r  infer_dictionaryc                 C   r  r   r   r  r   r   r   r  @  r  c                 C   r  r   r  r  r   r   r   r  @  r  c                 C   s4  t jddd}t }td}tjjddg|d}||||_t| |||}|	 }	|rt
t
 t
 }
|	dj|
ksBJ |   }|dd}t
dgd	 d
gd	   }||shJ | jtddkd}|dd}|dd	}||sJ d S |	djt
 ksJ d S )Nr   Tr   r   r   r  r   r  r   r  r   )rc   r   r   r   r   r   r  r  r   r  r;   ru  r   r@   r<   r	  r   r   combine_chunksr  r   r  r?  r   rO  )ro   r  r;  r  r  rf   r   r  r   inferred_schemaexpected_typer   rZ  rK  r   r   r   $test_partitioning_factory_dictionary=  s.   
r  c                 C   r  r   r   r  r   r   r   r  b  r  c                 C   r  r   r  r  r   r   r   r  b  r  c              
   C   s  t  }t }tdt fg}tjtt	dg|d}tdt
dfdt fg}tdt fdt fg}tt|t| }dD ]>}	||	 ||	d (}
tj|
|}|| |  W d    n1 suw   Y  W d    n1 sw   Y  qKt jd	d
d}td	}tjj|d}| |||_t||||}| }||ksJ | jdtdt id}|d d  dksJ tjjddgdd}| |||_t||||}t|  }|d j !tddktddk@ s
J tj|dd}| |||_"t||||}t|  }|d j !tddktddk@ s>J tjj|dd}| |||_t||||}t#j$tj%dd | }W d    n	1 slw   Y  t jdd
d}td}tj&j|d}| |||_t||||}| }||ksJ | jdtdt id}|d d  dksJ tj&jdd}| |||_t||||}t|  }|d j !tddktddk@ sJ tj&|dd|_"t||||}t|  }|d j !tddktddk@ sJ tj&j|dd}| |||_t||||}t#j$tj%dd | }W d    d S 1 sNw   Y  d S )Nr^   r   rb   r"   r  r@   )z%directory/2021-05-04 00%3A00%3A00/%24z,hive/date=2021-05-04 00%3A00%3A00/string=%24
/0.featherrq   Tr   date_intr&   r   逎`rV  rW  2021-05-04 00%3A00%3A00%24r9   rX  +Could not cast segments for partition fieldr   r   )'rc   rd   r   r  r;   r9   r>   r   r  r,   r  r@   ri   rg   rh   rm  new_filern   closer   r   r   r  r  r   r  r   r   r<   r  as_pyrF   r   r   r   r   r   r3  ro  )r;  r  ro   rf   r9   r   partition_schemastring_partition_schemafull_schemarq   sinkwriterr   r   r  r   r  rZ  r   r   r   r   r   *test_partitioning_factory_segment_encodinga  s   














$r  c                 C   r  r   r   r  r   r   r   r    r  c                 C   r  r   r  r  r   r   r   r    r  c              	   C   s  t  }t }tdt fg}tjtt	dg|d}tdt
dfdt fg}tdt fdt fg}tt|t| }tdt
dfdt fg}	tdt fdt fg}
d	}|| ||d
 (}tj||}|| |  W d    n1 sw   Y  W d    n1 sw   Y  t jddd}td}tjj|d}| |||_t||||}| }||ksJ | jdtdt id}|d d  dksJ tjjdd}| |||_t||||}t|  }|d j !tddktddk@ s#J tj|dd}| |||_"t||||}t|  }|d j !tddktddk@ sWJ tjjdd}| |||_t||||}t|  }|d j !tddktddk@ sJ tj|
dd}| |||_"t||||}t|  }|d j !tddktddk@ sJ tjj|	dd}| |||_t||||}t#j$tj%dd | }W d    d S 1 sw   Y  d S )Nr^   r   rb   ztest'; dater  ztest';[ string'ztest%27%3B%20dateztest%27%3B%5B%20string%27zLhive/test%27%3B%20date=2021-05-04 00%3A00%3A00/test%27%3B%5B%20string%27=%24r  r   Tr   r  r&   r   r  r  rW  z2021-05-04 00:00:00$rV  r  r  r  r  r   )&rc   rd   r   r  r;   r9   r>   r   r  r,   r  r@   ri   rg   rh   rm  r  rn   r  r   r   ro  r  r  r   r  r   r   r<   r  r  rF   r   r   r   r   r   r3  )r;  r  ro   rf   r9   r   r  r  r  partition_schema_enstring_partition_schema_enrq   r  r  r   r   r  r   r  rZ  r   r   r   r   r   ;test_partitioning_factory_hive_segment_encoding_key_encoded  s   















$r  c              
   C   s   t g dg dd}tt t dt  t dt  g}tt j	 tj
|| d|d W d    d S 1 s=w   Y  d S )Nr   yNr   r  r  rU   rV   rW   rm  rn  )r;   r   r   r   r9   r<   r@   r   r   r3  ry  r  r   r   r   r   r   /test_dictionary_partitioning_outer_nulls_raises  s   $"r  c                 C   sV   t g dg dd}tt t|| d W d    d S 1 s$w   Y  d S )Nr  r  rU   zbasename-{i}.arrow)r;   r   r   r   r   r   ry  )r  r   r   r   r   test_positional_keywords_raises&  s   "r  c                 C   s   d}t t d|d t|d d}tj|d | | d dgd tj|d |d  | d dgd tj| d dgd	}|d jdksHJ tj| d dd
gd	}|d jdks\J tj| d dgd	}|d jdksoJ d S )Ni   r   r   )r   r$   oner   rT  twor&   r$   r   )	r;   r   repeatnparangerm   rK  
read_table
num_chunks)r  
BATCH_SIZEr   r   r   r   test_read_partition_keys_only,  s&   


r  c                    s    t  }t fdd|D S )Nc                    s"   g | ]}t jt j |qS r   )osrr   isdirr  )rZ   elbasedirr   r   r\   I  r]   z _has_subdirs.<locals>.<listcomp>)r  listdirany)r  elementsr   r  r   _has_subdirsG  s   
r  c                 C   sZ   t | D ]%}t j| |}t j|r*t||}t|r%t||| q|| qd S r   )	r  r  rr   r  r   	posixpathr  _do_list_all_dirsr-   )r  path_so_farr!  r  true_nestednorm_nestedr   r   r   r	  L  s   
r	  c                 C   s   g }t | d| |S )Nr  )r	  )r  r!  r   r   r   _list_all_dirsW  s   r  c                 C   s    t t| }|t |ksJ d S r   )r   r  )r  expected_directoriesactual_directoriesr   r   r   _check_dataset_directories]  s   r  c              
   C   sh   t g dg dd}tt t dt  t dt  g}tj|| d|d t| g d d S )	Nr  r  rU   rV   rW   rm  rn  )zx/xzy/yr  )	r;   r   r   r   r9   r<   r@   ry  r  r  r   r   r   (test_dictionary_partitioning_inner_nullsb  s   $r  c              
   C   sl   t g dg dd}tt t dt  t dt  gd d}tj|| d|d t| g d	 d S )
N)r   Nr  r  rU   rV   rW   r[  rm  rn  )za=x/b=xz	a=xyz/b=yz	a=z/b=xyz)	r;   r   r   ro  r9   r<   r@   ry  r  r  r   r   r   test_hive_partitioning_nullsj  s   r  c                  C   s0  t dt  fdt  fg} ddg}t| }t|tjs J tj| dd}t|tjs/J tj|d}t|tjs=J t	
t t  W d    n1 sQw   Y  t	j
tdd tj| d W d    n1 snw   Y  t	j
tdd tj| | d W d    n1 sw   Y  tj| d	d
}t|tjsJ tj| dd	d}t|tjsJ tjd	d
}t|tjsJ t	
t tj|d	d
 W d    n1 sw   Y  t	j
tdd tj|d	d W d    n1 sw   Y  t	
t tj| dd
 W d    d S 1 sw   Y  d S )Nr   r   inferrb  )field_nameszExpected listr   zCannot specify bothr   rU  )rc  r  zCannot specify 'field_names')r  r  unsupported)r;   r9   r  rv  r   r   rH   r   r  r   r   rz  ro  )r9   rl  r   r   r   r   test_partitioning_functionr  s@   

$r  c                 C   s   t t dt t  t  t dt t  t  g}tjj	|d}tj
dd| |d}|jj|ks7J | }|dj|jd sIJ |d dgd	 d
gd	  ks\J |dj|jd sjJ |d dgd	 dgd	  ks}J d S )Nr   r   rb   r   r   rf   r   r   r   r   r   r   r  r  )r;   r9   r<   ru  rv  r   r@   r   r   r  rN   r   r   r  r	  r   typesr:  )ro   r9   r   rN   r   r   r   r   *test_directory_partitioning_dictionary_key  s   &*r  c           	      C   s.  t t dt t  t  t dt t  t  g}tjj|d}tj	dd| |d}|j
j|ks7J | }ttdd}ttd	d
}|dj|jd sWJ |djD ]}|j }|  ||ksnJ q]|dj|jd	 s}J |djD ]}|j }|  ||ksJ qd S )Nr   r   rb   r   r   r  i  i  r      r   )r;   r9   r<   ru  rv  r  r   ro  r  rN   r   r   ri   r,   r  r	  r   r  chunksr:  sort)	r   r9   r   rN   r   year_dictionarymonth_dictionaryr   rZ  r   r   r   %test_hive_partitioning_dictionary_key  s.   

r  c                 C   sL   |d u rt tddgd dgd  d}| d }tj|||d ||fS )	N	   r  r   r
  r   rU   ri  rj  r;   r   r,   rm   rn   )base_dirr   rk  rr   r   r   r   r    s
   $r  c                 C   s   t tddgd dgd  d}| d }t|| t tdddgd dgd  d}| d	 }t|| ||f||ffS )
Nr   r  r   r
  r   rU   ztest1.parquetr  ztest2.parquetr!  )r"  table1path1table2path2r   r   r   _create_directory_of_files  s   $&r'  c                 C   sD   | | || fD ]}| j|jsJ || |sJ q
d S r   )r  r  r9   r   r   )rN   r   r   picklerr  r   r   r   _check_dataset  s   r)  c                 K   s   t | tjsJ | t| | gt| gfD ]}tj| fi |}t |tjs'J t|||| qt| j	" tj| j
fi |}t |tjsGJ t|||| W d    d S 1 sYw   Y  d S r   )rH   pathlibPathrY   r   rN   r   r)  r
   parentname)rr   r   r   r(  r   ry   rN   r   r   r   _check_dataset_from_path  s   "r.  c                 C   s   t | \}}t|||| d S r   r  r.  r  r   r  r   rr   r   r   r   test_open_dataset_single_file  s   r1  c                 C   s"   t | dd\}}t|||| d S )Nr   rj  r/  r0  r   r   r   test_deterministic_row_order  s   r2  c                 C   s(   t | \}}t|}t| ||| d S r   )r'  r;   concat_tablesr.  )r  r   r  tablesrE  r   r   r   r   test_open_dataset_directory	  s   
r5  c           
         s   t | \}\}}t|}t||gtt|t|gg}| fdd|D 7 }|D ]}|j|js7J ||}	|	|sCJ q,d S )Nc                    s   g | ]
}   |qS r   r  )rZ   r  r  r   r   r\   	  s    z3test_open_dataset_list_of_files.<locals>.<listcomp>)	r'  r;   r3  r   rN   rY   r9   r   r   )
r  r   r  r4  r$  r&  r   datasetsrN   r!  r   r6  r   test_open_dataset_list_of_files	  s   

r8  c                 C   s   t | \}}t|}t|}|j|jsJ tj|t d}|j|js*J t	t
 tj|t d W d    d S 1 sDw   Y  d S )Nr}  )r  r   r   rN   r9   r   rc   ru   r   r   r   rd   )r  r   rr   fspathdataset1dataset2r   r   r   #test_open_dataset_filesystem_fspath	  s   
"r<  c                 C   s   | d }|   t|\}}||}t|}tj|t d}tjt|t|d}	|	|
|}
||||  krP||	  krP||
ksSJ  J d S )Nsingle-filer}  )mkdirr  relative_tor   rN   rc   ru   rY   r	   r  r  r   )r  r   r  rq   r   rr   relative_pathd1d2d3d4r   r   r   test_construct_from_single_file/	  s   


rE  c                 C   s   | d }|   t|\}}t|}tj|t d}tj|jt| d}||}	||}
||}|	|
  kr@|ksCJ  J |||fD ]}|	|
|}|||	ks[J qHd S )Nsingle-directoryr}  )r>  r'  r   rN   rc   ru   r-  r	   r   r  r  )r  r   r  rq   r4  r~   rA  rB  rC  t1t2t3r  restoredr   r   r   $test_construct_from_single_directoryC	  s   



rK  c                    s    d }|   t|\}} fdd|D }t  t|}||}t|ttt|ks3J W d    n1 s=w   Y  tj|t	 d}||}	t|}
||
}tj|t
 d}||}||	  krx|  krx|ks{J  J d S )Nzlist-of-filesc                    s   g | ]}|  qS r   )r?  rx   r  r   r   r\   ^	  r  z5test_construct_from_list_of_files.<locals>.<listcomp>r}  )r>  r'  r
   r   rN   r   r   sumrj   r	   rc   ru   )r  r   rq   r4  r~   relative_pathsrA  rG  rB  rH  rC  rI  rD  t4r   rL  r   !test_construct_from_list_of_filesW	  s    






*rP  c                 C   sJ   ddg}t jtdd tj|| d W d    d S 1 sw   Y  d S )Nr   z!subdir/1/xxx/doesnt-exist.parquetzdoesnt-existr   r}  )r   r   r  r   rN   )ro   r   r   r   r   -test_construct_from_list_of_mixed_paths_failsn	  s   "rQ  c                 C   s   t jddg| d}t jd| d}t ||g}t|t jsJ tt| dks+J | }t|dks7J |jdks>J t|j	dksGJ |j	D ]}|j
ddgksUJ qJd S )	Nr   r   r}  r   r   rd  r   r   )r   rN   rH   UnionDatasetr   ri   rF   r   r)  childrenr   )ro   rV   rW   rN   r   childr   r   r   (test_construct_from_mixed_child_datasetsy	  s$   
rU  c                  C   s6   t jg dd} |  }|jdksJ |jdksJ d S )Nrm  r   r   )r   rN   r   r   r)  )emptyr   r   r   r   test_construct_empty_dataset	  s   rW  c               	   C   sf   t jg dtdt fdt fgd} tjtdd | 	  W d    d S 1 s,w   Y  d S )Nrm  rV   rf   r9   zMultiple matches for .*a.* in r   )
r   rN   r;   r9   r>   r@   r   r   rz  r   )rV  r   r   r   *test_construct_dataset_with_invalid_schema	  s   



"rY  c                    s|  t j| tdt  d}t j| tdt  d}tjjtt	dgdgd tjjtt	dgdgd}t
jtdd	 t ||g W d    n1 sQw   Y  d
}t
jt|d	 t g d W d    n1 sqw   Y  d}t
jt|d	 t d  W d    n1 sw   Y  d}t
jt|d	 t  fddt	dD  W d    n1 sw   Y  d}t
jt|d	 t g  W d    n1 sw   Y  d}t
jt|d	 t  |g W d    n1 sw   Y  d}t
jt|d	 t  dg W d    n	1 sw   Y  d}t
jt|d	 t  dg W d    d S 1 s7w   Y  d S )Nr  r   /schemar   rV   rk  rW   z"Expected.*FileSystemDatasetFactoryr   zExpected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types: intr  zbExpected a path-like, list of path-likes or a list of Datasets instead of the given type: NoneTypezcExpected a path-like, list of path-likes or a list of Datasets instead of the given type: generatorc                 3       | ]} V  qd S r   r   rh  batch1r   r   rT  	      z<test_construct_from_invalid_sources_raise.<locals>.<genexpr>rT   zEMust provide schema to construct in-memory dataset from an empty listzFItem has schema
b: int64
which does not match expected schema
a: int64z}Expected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types:r   zCExpected a list of tables or batches. The given list contains a int)r   r   rc   r   r   r;   r  r  r  r,   r   r   r   rN   rz  InMemoryDataset)r   child1child2batch2rK  r   r\  r   )test_construct_from_invalid_sources_raise	  sd   $rc  c                 C   s   t jjt tdgdgd}t j|g}tjg dt 	g d
 }|t g ks,J |||g|gfD ]6}t|}| 
||ksDJ tt| dksPJ t| 
 |ks\J t jt| |ksjJ q4d S )Nr   rV   rk  rm  rX  r   )r;   r  r  r  r,   rA   rl   r   rN   r9   r   r   r   ri   rF   r/   r   )r   rt   r   dataset_tablesourcerN   r   r   r   test_construct_in_memory	  s   
rf  r   c              	      s   t jjt tdgdgd t j gd} fddd ffddd f fdd jffD ]2\}}tj	j| || d	}|
 ksFJ tjt j|d
 |
  W d    n1 s]w   Y  q0d S )Nr   rV   rk  z#OneShotFragment was already scannedc                      s   t j j gS r   )r;   RecordBatchReaderrl   r9   r   rt   r   r   r  	  s    z$test_scan_iterator.<locals>.<lambda>c                      s   t  S r   )r   r   r   r   r   r  	  s    c                      s    fddt dD S )Nc                 3   r[  r   r   rh  rh  r   r   rT  	  r^  z7test_scan_iterator.<locals>.<lambda>.<locals>.<genexpr>r   r,   r   rh  r   r   r  	  r  r9   r   r   )r;   r  r  r  r,   rA   rl   r9   r   r2  r   r   r   r3  )r   r   r   r9   r   r   )rt   r   r   test_scan_iterator	  s$   

rk  c                 C   s   t tddgd dgd  d}| d }|  tdD ]}|d	| }|  t|d| d|d
  q|dt j	dgd dgd  dgd  t 
 d}||fS )Nr   r  r   r
  r   rU   zdataset-partitionedrT   zpart={}ri  r   r   r   r   r  )r;   r   r,   r>  rf   rm   rn   rO  append_columnr  r   )r  r   rr   r7   r   
full_tabler   r   r   _create_partitioned_dataset
  s   $,rn  c           
      C   sj  t | \}}|ddg}t|||| tjt|tjddd}|j|js*J t	|  tjdtjddd}|j|jsCJ W d    n1 sMw   Y  tjt|dd}|j|jsdJ tjt|tjt
dt
 fgddd}|jt
dt
 }|j|sJ | }|dt
jdgd	 d
gd	  dgd	  t
 d}	||	sJ d S )NrV   rW   r   rU  r   zdataset-partitioned/r   r   rT   r   r   r  )rn  rX  r.  r   rN   rY   r   r9   r   r
   r;   rv  r-   r<   r   rl  r  )
r  r   r  rm  rr   r   rN   r  r!  rK  r   r   r   'test_open_dataset_partitioned_directory
  s8   

,rp  c                 C   s   t | \}}tt|}|j|jsJ tjt|t d}|j|js*J t|  tjdt d}W d    n1 sBw   Y  |j|jsPJ t	
t tjt|t d W d    d S 1 slw   Y  d S )Nr}  ri  )r  r   rN   rY   r9   r   rc   ru   r
   r   r   r  rd   )r  r   rr   r:  r;  dataset3r   r   r   test_open_dataset_filesystem;
  s   
"rr  c                 C   sP   t | \}}tjtdd tj|gdd W d    d S 1 s!w   Y  d S )Nz format 'blabla' is not supportedr   blablar   )r  r   r   rz  r   rN   )r  rE  rr   r   r   r   $test_open_dataset_unsupported_formatR
  s   "rt  c                 C   s`   t | \}}t|}t||g}t|tjsJ |||}||||ks.J d S r   )r  r   rN   rH   rR  r  r  r   )r  r   r  rE  rr   rN   unionr;  r   r   r   test_open_union_datasetY
  s   
rv  c                 C   sT   t jd| dd}tjtdd t j|gdd W d    d S 1 s#w   Y  d S )Nr  r   r  zcannot pass any additionalr   r   )r   rN   r   r   rz  )r   rT  r   r   r   .test_open_union_dataset_with_additional_kwargse
  s   "rw  c                   C   s|   t t tjddd W d    n1 sw   Y  t jtjdd tjddd W d    d S 1 s7w   Y  d S )Nzi-am-not-existing.arrowrm  r   zcannot be relativer   zfile:i-am-not-existing.arrow)r   r   r  r   rN   r;   r3  r   r   r   r   #test_open_dataset_non_existing_filek
  s   "rx  r   rq   r   r\  r[  partition_keysr  BCr  )DEFr  )r   NrT   )r  Nr|  )Nr   rT   c                    sl  t tddgd dgd  d}d |d v pd |d v }|d	kr&|r&d S |d	kr9tjjd
dg d}d}d }n|rDtjj |d}ntjj d}d}|rR|}nd}| d }	|	  |\}
}|
D ]!}|D ]}|	||pn||pq| }|jdd t	
||d  qfqbtjt|	|d} fdd}|jt d
||
d t d||d }|j|sJ d S )Nr   r  r   r
  r   rU   r   r   rq   part1part2r  z{0}/{1})r  r\  zpart1={0}/part2={1}__HIVE_DEFAULT_PARTITION__rN   T)parentsri  ro  c                    sH    rt | trt nt }tt |S t | tr t S t S r   )rH   rY   r;   r@   r   ru  )r   
value_typer  r   r   r  
  s   z/test_partition_discovery.<locals>.expected_type)r;   r   r,   r   r   r  ro  r>  rf   rm   rn   rN   rY   r9   r-   r<   r   )r  r   r\  r  ry  r   has_nullfmt
null_valuebasepath
part_keys1
part_keys2r  r  rr   rN   r  r  r   r  r   test_partition_discoveryu
  sT   $r  c           	      C   sV  t tddgdtdd}tj|dgjdd}tj	|| |d	d
 tj
| d	tjjddd}t |d |d  d}| |sIJ t| d }|j|jd|d d saJ |j}|||}| |suJ |||}|j|jd|d d sJ |j|jd |d d  sJ |j|sJ d S )Nr  r{  r   r   r   rS  r   r   rU  r  r   rf   Tr  rn  rS  )rS  r   r   rb   )r;   r   r  r  r,   r   r   rX  r9   ry  rN   ro  r  r?  r   r   ri   rF   r   r  r  ru  )	r  r  r   r   rN   rK  rO   	part_exprrJ  r   r   r   4test_dataset_partitioned_dictionary_type_reconstruct
  s,      r  c                 C   s   ddl m} | d \}}}}d||||}||\}}|d tdg di}	|d}
t	|	|
 W d    n1 sAw   Y  |	|||||||fS )	Nr   
FileSystem
connectionz_s3://{}:{}@mybucket/data.parquet?scheme=http&endpoint_override={}:{}&allow_bucket_creation=TruemybucketrV   r  zmybucket/data.parquet)
r   r  rf   from_urirg   r;   r   rh   rm   rn   )	s3_serverr  r  r  r  r  r  rc   rr   r   rs   r   r   r   r  
  s   
r  c                 C   s^   | \}}}}}}}}t j|dd}|||sJ t j|d|d}|||s-J d S )Nr   r   rf   r   )r   rN   r   r   )r  r   r   rr   rc   r  rE  rN   r   r   r   test_open_dataset_from_uri_s3
  s
   r  c           
      C   sP   | \}}}}}}}}t d}||}tj|d|d}	||	|s&J d S )Nr  r   r  )rc   r   r  r   rN   r   r   )
r  r   r   rr   r   r  rE  r   finfosrN   r   r   r    test_open_dataset_from_fileinfos
  s
   

r  c                 C   s   | \}}}}}}}}t d}ddlm}	m}
 |j||dd||id}tj|d|d}|	 
|s6J |
|	|}tj|d|d}|	 
|sMJ d S )	Ns3fsr   )FSSpecHandlerrv   endpoint_urlzhttp://{}:{})r   secretclient_kwargsr   r  )r   importorskipr   r  rv   S3FileSystemrf   r   rN   r   r   )r  r   rr   rE  r  r  r  r  r  r  rv   rc   rN   r   r   r   $test_open_dataset_from_uri_s3_fsspec  s   
	r  c                 C   s.  ddl m} | d \}}}}d}d}d||||||}||\}	}|dks)J |	| tdg d	i}
|	|}t	|
| W d    n1 sMw   Y  t
j|d
d}| |
sbJ d||||}g d}|D ]\}}||}t
j||d
d}| |
sJ qptjtjdd |d}t
jd|d W d    n1 sw   Y  d}d}||}tt}t
jd|d W d    n1 sw   Y  t|j|d||ksJ d}||}tt}t
jd|d W d    n	1 sw   Y  t|j|d||ksJ d S )Nr   r  r  theirbucketnested/folder/data.parquetzOs3://{}:{}@{}/{}?scheme=http&endpoint_override={}:{}&allow_bucket_creation=truez&theirbucket/nested/folder/data.parquetrV   r  r   r   3s3://{}:{}@{{}}?scheme=http&endpoint_override={}:{}))ztheirbucket/nested/folder/z/data.parquet)ztheirbucket/nested/folderdata.parquet)ztheirbucket/nested/folder/data.parquet)ztheirbucket/nestedr  )r  z/nested/folder/data.parquet)r  r  r  zMissing bucket namer   r  z'/theirbucket/nested/folder/data.parquetr}  zThe path component of the filesystem URI must point to a directory but it has a type: `{}`. The path component is `{}` and the given filesystem URI is `{}`ztheirbucket/doesnt/existr  NotFoundFile)r   r  rf   r  rg   r;   r   rh   rm   rn   r   rN   r   r   r   r   r3  rz  rY   r$   )r  r  r  r  r  r  bucketrr   r  rc   r   rs   rN   templaterE  prefixr  excr   r   r   -test_open_dataset_from_s3_with_filesystem_uri"  sZ   




"r  c                 C   sD   t | \}}td}|d}tj||d}|j|js J d S )Nfsspecfiler}  )r  r   r  r   r   rN   r9   r   )r  r   rr   r  r{   rN   r   r   r   test_open_dataset_from_fsspecf  s
   

r  c           	      C   s   t d}tdg di}| d }t|| |d}|| d ds)J t	
 }tt|}|||}||jsCJ |||}|j|jsRJ d S )Nr  rV   r  r  r  r   )r   r  r;   r   rm   rn   r   lsendswithr   r   rc   rv   r  r  r   r9   r   rM  )	r  r  r   rr   	fsspec_fsrf   r   r9   rO   r   r   r   test_file_format_inspect_fsspecq  s   

r  c                 C   s  | d }t ddgd tdd}tj|dgjdd	}tj|||d
d tjt dt dfgdd	}tj	|d
|d}t
dtdk}|j||d}|d g dks]J dd l}t
d|dddk}|j||d}|d g dksJ d S )Ntest_partition_timestamps
2012-01-01z
2012-01-02r   r   )datesrz  r  r   rU  r  r  r  rn  r   rz  )r   rT   r   r(  r   r   i  r   )r;   r   r,   r   r   rX  r9   ry  r  rN   r<   r0   	Timestampr   r  r:  r(   )r  r   rr   r   r   rN   r   r(   r   r   r   test_filter_timestamp  s$   
r  c                 C   sh   t dt jg dt  di}t| |\}}tt|}tddk}t	|j
||ddks2J d S )NrV   )r   r   r   rT   r   r   r  r   r   rT   )r;   r   r  rv  r  r   rN   rY   r<   r   r   )r  r   r   rE  rr   rN   filter_r   r   r   test_filter_implicit_cast  s
    r  c                 C   s^   t dg di}t| |\}}tt|}|j|tdtd kd}|j	dks-J d S )Nr  )rV   rW   Nr   r   )
r;   r   r  r   rN   rY   r   r<   r   r   )r  r   r   rE  rr   rN   r   r   r   test_filter_equal_null  s   r  c           	      C   s   t g ddd tdD dd tddD d}t| |\}}tt|}tt	d	t 
d
dg}|j||djdksBJ tt	ddk}|j||djdksXJ tt	dt	d}|j|d|id}|d  g dksyJ d S )N)rV   rW   NrV   r  c                 S   s   g | ]
}t  d dd|qS i  r   r  r   r   r   r   r\         z2test_filter_compute_expression.<locals>.<listcomp>r   c                 S   s   g | ]	}t  d d|qS r  r  r   r   r   r   r\     r}   r   r  rz  r  rV   rW   r   rT   r{  r   r|  r   r&   r  )r;   r   r,   r  r   rN   rY   r  is_inr<   r  r   r   hourdays_betweenr:  )	r  r   r   rE  rr   rN   r  r   r!  r   r   r   test_filter_compute_expression  s   r  c                 C   s   t j| tdt  d}t |g}t| dksJ tdd | D s*J | d 	|
 s7J |
 	|
 sBJ t| t jsLJ d S )Nr  r   r   c                 s   s    | ]	}t |tjV  qd S r   )rH   r;   r  )rZ   r  r   r   r   rT    s    z%test_dataset_union.<locals>.<genexpr>r   )r   r   rc   r   r   UnionDatasetFactoryr   r  rq  r   r  rH   r   r  )r   rT  r   r   r   r   test_dataset_union  s   
r  c                 C   s  t jd|dd}t jd|dddgd}t jd|dd	d}|j|j  kr*|jks-J  J t |||g}t|t js=J d
}tjt|d t j||g|d W d    n1 sZw   Y  tdt	 fdt
 fdt fdt fdt fdt fdt fg}|j|sJ | j|sJ t ||g}tdt	 fdt
 fdt fdt fdt fdt fg}|j|sJ | j|sJ tdt fdt fdt	 fg}t j||g|d}| j|sJ tdt fdt fdt fg}t j||g|d}| j|s$J tjtddgd dgd  dgg dd}t| |d\}	}
t |
}tjtjdd t ||g W d    d S 1 scw   Y  d S )Nr  r   r  rZ  weekr%   r   rf   r   /hiver   z$cannot pass any additional argumentsr   r}  r"   r#   r$   r   r   rb   r,  r   r  r   r
  r   	abcdefghj)r"   r$   r#   rk  r   zUnable to merge)r   rN   r9   rH   rR  r   r   rz  r;   r=   r>   r?   r@   r   r   r   r   r,   r  ArrowTypeError)r  r   r`  ra  child3	assembledmsgr  r   rE  rr   child4r   r   r   &test_union_dataset_from_other_datasets  st   

"






	











 
$r  c                 C   sJ   d}t jt|d tjg d| d W d    d S 1 sw   Y  d S )Nz8points to a directory, but only file paths are supportedr   )r  rZ  r  r}  )r   r   IsADirectoryErrorr   rN   )r   r  r   r   r   4test_dataset_from_a_list_of_local_directories_raises%  s   "r  c              
   C   s   t t jd| dt jd| dt jd| dg}tdt fdt fdt fdt fg}|j|s8J t t jd| dt jd| dt jd| d	d
g}tdt fdt fdt fdt fdt	 fdt	 fg}|j|s{J d S )Nr  r}  rZ  r  r"   r#   r$   r%   r   )r   r   r   r   )
r   rN   r;   r9   r=   r>   r?   r@   r   r   )r   rN   r  r   r   r   &test_union_dataset_filesystem_datasets+  s4   









r  c                    s  t g dg dd}t|d  d fdd	}d }|}||||jd |j}|}||| t dd	g}t jg dg dgd
dgd}||| t d	g}t jg dgdgd}||| t d	dg}t jg dt jg dddgddgd}||| t ddg}tjtd |d}t j|d 	d|d
 gdd
gd}||| t dt 
t  fdg}tjtd |d}|j|sJ tjtdd  | W d    d S 1 sw   Y  d S )Nr  r  r  r  rU   r  c                    s\   t jtd | d}|d ur|j|sJ n|j| s J  |}||s,J d S )Nr  rb   )r   rN   rY   r9   r   r   )r9   rK  r  rN   r!  r   r  r   r   r)  P  s   
z-test_specified_schema.<locals>._check_dataset)r  )rW   r?   )rV   r>   rW   rV   rk  )r  r   NNNr   r  r  )rV   r   rb   z#Unsupported cast from int64 to listr   r   )r;   r   rm   rn   r9   r  r   rN   rY   r  list_r   r   r   r   r  r   )r  r   r   r)  r9   rK  rN   r   r  r   test_specified_schemaK  sL   






"r  c                 C   s   | d }t dg di}t|| t dt  fg}tjt|gd |d}|j	|s1J |
|}tjtdd | }|  W d    d S 1 sQw   Y  d S )Nr  rV   r  d   rb   z#Unsupported cast from int64 to nullr   )r;   r   rm   rn   r9   r  r   rN   rY   r   r   r   r   r  r6  r7  )r  r   fnr   r9   rN   r   r   r   r   r   test_incompatible_schema_hang  s   

"r  c           	      C   s   t t jg dddt jg dddd}t| d }t |}t ||j}|| d  |	  W d    n1 s@w   Y  t
j|t
 d	}||}||sZJ t| d
D ]}t
j||d	}||}||suJ q`d S )Nr  rv  r  r  r?   rU   z
test.arrowr   r   )rm  arrow)r;   r   r  rY   output_streamRecordBatchFileWriterr9   write_batchr   r  r   rN   r  r   r   rQ   )	r  r   r   rr   r  r  rN   r!  
format_strr   r   r   test_ipc_format  s$   


r  c              	   C   s  ddl m} ttjg dddtjg dddd}t| d	 }||| tj|t	 d
}t
| }t|d tjsAJ ||}|jdd ||sSJ t| tj|dd
}||}|jdd ||spJ |j|dgd}|jdd ||dgsJ |j|dtdd id}|jdd |tdtjg dddisJ ||dksJ |j|tddkddksJ d S )Nr   orcr  rv  r  r  r?   rU   test.orcr   T)fullr  rW   r&   b2r   )r  r  g333333?rT   rV   r   r   )r   r  r;   r   r  rY   rn   r   rN   r  ri   rF   rH   FileFragmentr   validater   rQ   rX  r<   r   )r  r   r  r   rr   rN   r   r!  r   r   r   test_orc_format  s:   

$r  c                 C   s   ddl m} ttjg dddtjg dddd}t| d	 }||| tj|d
d}t	|
|}t|dks>J |d jdksGJ |d |
 d sTJ d S )Nr   r  r  rv  r  r  r?   rU   r  r  r   r   rT   )r   r  r;   r   r  rY   rn   r   rN   ri   r   r   r   r   )r  r   r  r   rr   rN   r!  r   r   r   test_orc_scan_options  s   r  c                  C   sh   z	ddl m}  W d S  ty3   tjtdd tjddd W d    Y d S 1 s+w   Y  Y d S w )Nr   r  z'not built with support for the ORC filer   r  r  r   )rt  r  r  r   r   rz  r   rN   r  r   r   r   test_orc_format_not_supported  s   &r  c                  C   s   t jtdd tjtdtdiddd W d    n1 s!w   Y  t } t jtdd | 	  W d    d S 1 sAw   Y  d S )Nz9Writing datasets not yet implemented for this file formatr   rV   r   r  z/tmp)rf   r"  )
r   r   r  r   ry  r;   r   r,   r  make_write_options)ofr   r   r   +test_orc_writer_not_implemented_for_dataset  s   
"r  c                 C   s   t t jg dddt jg dddd}t| d }| j|dd	 tj|t d
}|	|}|
|s:J t| tj|dd
}|	|}|
|sQJ d S )Nr  r>   r  r  r?   rU   test.csvFr#   r   r  )r;   r   r  rY   ru  to_csvr   rN   r  r   r   rQ   )r  r   r   rr   rN   r!  r   r   r   test_csv_format  s   

r  compression)bz2gziplz4zstdc                 C   s   t j|std| ttjg dddtjg dddd}t	
 }|dkr.|nd	}t| d
|  }|j||d}| jdd}||d W d    n1 s[w   Y  tj|t d}	||	}
|
|suJ d S )Nz{} support is not builtr  r>   r  r  r?   rU   r  gzz	test.csv.r  Fr  r'  r   )r   Codecis_availabler   skiprf   r;   r   r  rc   ru   rY   rh   ru  r  writer6  r   rN   r  r   r   )r  r  r   r   r   suffixrr   r  csv_strrN   r!  r   r   r   test_csv_format_compressed  s   
r  c              	   C   s  t | d }t|d}|d W d    n1 sw   Y  tj|dd}||}|tdt	g dis=J tj|tj
tjjdd	d
d}||}|tdt	ddgiscJ tj|tj
tjjdgdd
d}||}|tdt	g disJ d S )Nr  wzskipped
col0
foo
bar
r  r   skipped)col0r  r  r   )r  r  r  r  r  r9  )r  r  r  r  )rY   rG   r  r   rN   r   r   r;   r   r  r  r  r  )r  r   rr   r  rN   r!  r   r   r   test_csv_format_options6  s*   



"


r  c              
   C   s   t | d }t|d}|d W d    n1 sw   Y  tj|tjtjjdddd}|	|}g d}|j
|ks@J |ttd	gtd
gtdgtd	gds_J d S )Nr  r   z1,a,true,1
T)autogenerate_column_namesr  r   )f0r  rj  r  r   rV   )rY   rG   r  r   rN   r  r;   r  r  r   r9  r   r   r  )r  r   rr   r  rN   r!  expected_column_namesr   r   r   (test_csv_format_options_generate_columnsK  s   





r  c           	   	   C   s*  t | d }t|d}|d W d    n1 sw   Y  tj|dd}tjjdgdd}tj|t	jj
d	d
d}|j||d}|t	dt	g disTJ tj|d}tj||d}||}|t	dt	g diswJ t }|j||d}|t	dt	g disJ d S )Nr  r   zcol0
foo
spam
MYNULL
r  r   MYNULLT)null_valuesr  r  r  )r  r  )fragment_scan_optionsr  )r  spamNr  )r  r  r	  )rY   rG   r  r   rN   r   r  r  r  r;   r  r   r   r   r  r  )	r  r   rr   r  rN   r  r   r!  r:  r   r   r   test_csv_fragment_options[  s.   
"
"r  c                 C   s   t t jg dddt jg dddd}t| d }| jdd	d
d dd}t|d}|| W d    n1 sAw   Y  t	j
|t	 d}||}||s[J t| t	j
|dd}||}||srJ d S )Nr  r>   r  r  r?   rU   	test.jsonrecordsorientr   r  },{}
{r   r   r  )r;   r   r  rY   ru  to_jsonreplacerG   r  r   rN   r  r   r   rQ   r  r   r   rr   rs   r  rN   r!  r   r   r   test_json_formats  s    

r  c                 C   s  t t jg dddt jg dddd}t| d }| jdd	d
d dd}t|d}|| W d    n1 sAw   Y  t	j
tdd tj|tjt jjdddd}W d    n1 shw   Y  tj|tjt jjdddd}||}||sJ d S Nr  r>   r  r  r?   rU   r  r  r  r   r  r  r  r   ztry to increase block sizer   r   r  r  r   @   )r;   r   r  rY   ru  r  r  rG   r  r   r   rz  r   rN   r  r  r  r   r   r  r   r   r   test_json_format_options  s(    



r  c           	      C   s*  t t jg dddt jg dddd}t| d }| jdd	d
d dd}t|d}|| W d    n1 sAw   Y  t	j
tdd tjt jjddd}tj|t|d}W d    n1 smw   Y  tjt jjddd}tj|t|d}||}||sJ d S r  )r;   r   r  rY   ru  r  r  rG   r  r   r   rz  r   r  r  r  rN   r  r   r   )	r  r   r   rr   rs   r  r   rN   r!  r   r   r   test_json_fragment_options  s,    
r  c              	   C   s   t | d }dD ]^\}}t|d}|| W d    n1 s!w   Y  tdt fdt fg}tjdgdgd|d	}tjj|d
}t	j
|d}	t	j||	d}
|
j|s]J |
 |sfJ qd S )Nr  ))latin-1s   a,b
un,lphant)utf16s    a , b 
 u n ,  l  p h a n t wbrV   rW   un
   éléphantrU   rb   encodingr  r   )rY   rG   r  r;   r9   r@   r   r  r  r   r  rN   r   r   )r  r   rr   r"  
input_rowsr  r  expected_tabler  r   dataset_transcodedr   r   r   test_encoding  s"   r&  c           
      C   s  t | d }t|d}|d W d    n1 sw   Y  tdt fdt fg}tjdgdgd|d	}tj|d
|d}t	j
tjjdd || W d    n1 s\w   Y  tjjdd}tj|d}tj||d}	|	j|s}J |	 |sJ d S )Nr  r  s   ,b
un,lphant   érW   r  r   )r'  rW   rb   r  rX  zinvalid UTF8r   r  r!  r  r   )rY   rG   r  r;   r9   r@   r   r   rN   r   r   r   r  r3  r   r  r  r  r   )
r  r   rr   r  r  r$  rN   r  r   r%  r   r   r   test_column_names_encoding  s&   r(  c                 C   sT  ddl m} ttjg dddtjg dddd}| d	 }|  ||t|d
  tj|t	 d}|
|}||sBJ t| tj|dd}|
|}||sYJ |j
|ddgd}|jddgkskJ |j
|ddgd}|jddgks}J ||t|d dd tt |
tj|dd W d    d S 1 sw   Y  d S )Nr   )r  r  rv  r  r  r?   rU   feather_datasetr  r   r  rW   rV   r&   zdata1.featherr   version)pyarrow.featherr  r;   r   r  r>  rY   r   rN   r  r   r   rQ   r9  r   r   rz  )r  r   r  r   r  rN   r!  r   r   r   test_feather_format  s,   

"r-  )r  r  brotlic                 C   s  t t jdgd ddt jg dd ddd}t j|s#t  | d	 }|  t	 }| d
 }|  tj
|t|d ||jd dd |dkrtjtdd |j|d}W d    n1 sdw   Y  tjtdd t |}|j|d}W d    d S 1 sw   Y  d S |j|d}tj
|t|d ||d tj|t	 d}	||	}
|
|sJ |d d }| j}|d d }| j}||k sJ d S )Nr   ,  rv  r  r  r  r?   rU   feather_dataset_compressedfeather_dataset_uncompressedz
data.arrowr  rf   file_optionsr.  zCompression typer   r   part-0.arrow)r;   r   r  r  r  r   r  r>  r   r  ry  rY   r  r   rz  rN   r   r   statst_size)r  r  r   r   r  r   uncompressed_basedirwrite_optionscodecrN   r!  compressed_filecompressed_sizeuncompressed_fileuncompressed_sizer   r   r   test_feather_format_compressed  sX   







r>  c                 C   sp   g }t dD ]}t|gd dd t dD d}tj|t| |d qt| d }tj|j||d ||fS )zO
    Creates a simple (flat files, no nested partitioning) Parquet dataset
    r   r   c                 S   s   g | ]}t   qS r   rf  rh  r   r   r   r\   B  r  z2_create_parquet_dataset_simple.<locals>.<listcomp>rf  metadata_collector	_metadata)r,   r;   r   rm   rK  rY   write_metadatar9   )	root_pathr@  r7   r   metadata_pathr   r   r   _create_parquet_dataset_simple:  s   $
rE  c                 C   s\   | d }t |\}}t|}|j|jsJ t|jdks!J | }|jdks,J d S )NrH  r   (   )	rE  r   parquet_datasetr9   r   r   r   r   r   )r  rC  rD  r   rN   r!  r   r   r   test_parquet_dataset_factoryP  s   
rH  win32z'Results in FileNotFoundError on Windows)reasonc           	      C   s   t d}| d }t|\}}|d}tt|}tj||d}|j	
|j	s,J t|jdks5J | }|jdks@J d S )Nr  rH  r  r}  r   rF  )r   r  rE  r   rc   rv   r  r   rG  r9   r   r   r   r   r   )	r  r  rC  rD  r   r  r   rN   r!  r   r   r   #test_parquet_dataset_factory_fsspec\  s   

rK  c                 C   s   | d }t dgd tjdd}g }tj|t||d t|d }tj|j	||d t
|}|j	|j	s<J | }|jdksGJ d S )NrH  r   r   rf  r?  rA  )r;   r   r  rg  randnrm   rK  rY   rB  r9   r   rG  r   r   r   )r  rC  r   r@  rD  rN   r!  r   r   r   &test_parquet_dataset_factory_roundtript  s   	

rM  c           	   	   C   s   g }t dD ]-}tdtt |d |d d i}| | d }tj|||d |d | d qt| d }t|j	|| t
|}| }|d }|tt dd	ks]J d S )
Nr   r  r   z.parquetr?  r  rA  r   r  )r,   r;   r   ri   rm   rn   set_file_pathrY   rB  r9   r   rG  r   r  r:  )	r  	metadatasr7   r   
table_pathrD  rN   scanned_tablescanned_colr   r   r   "test_parquet_dataset_factory_order  s   
rS  c                 C   s   | d }t |\}}t|dd   t|}|j|js#J t|j	dks,J t
t |  W d    d S 1 sAw   Y  d S )Ntest_parquet_dataset_invalid	*.parquetr   r   )rE  ri   globunlinkr   rG  r9   r   r   r   r   r   r  r   )r  rC  rD  r   rN   r   r   r   $test_parquet_dataset_factory_invalid  s   

"rX  c                 C   sz   t t| d}t|d j }g }|D ]}t|j}|t	|
|  || q| d }tj|||d |S )NrU  r   rA  r?  )ri   r  rglobrm   ParquetFiler9   to_arrow_schemarx  rN  rY   r?  r-   rB  )rC  parquet_pathsr9   r@  rr   rx  rD  r   r   r   _create_metadata_file  s   r]  c              	   C   sr   t jt tdt tjdt tddgdgg dd}|ddi}t	j
|t| d	gd
 t| |fS )Nrd  rV   rW   r   ri  rk  r   r$   r   rT  )r;   r   r  r,   r  rg  rL  r  rC   rm   rK  rY   r]  )rC  r   r   r   r   #_create_parquet_dataset_partitioned  s   r^  c                 C   s   | d }t |\}}tjdd}tj||d}|j|js J t|jdks)J | }|j	dks4J |
 djdd	}|
 }tj|| d S )
N(test_parquet_dataset_factory_partitionedr   rU  ro  r   rd  r  Tdrop)r^  r   r   rG  r9   r   r   r   r   r   ru  sort_valuesreset_indexr0   testingassert_frame_equal)r  rC  rD  r   r   rN   r!  rK  r   r   r   r_    s   r_  c                 C   sh   | d }t |\}}tj|dd}|j|jsJ d|jjv s"J t| }d|d jjv s2J d S )N%test_parquet_dataset_factory_metadatar   ro     keyr   )	r^  r   rG  r9   r   rx  ri   rF   rM  )r  rC  rD  r   rN   r   r   r   r   rf    s   rf  c           
      C   sX  |\}}| d }t |\}}||g tj|tjdd|d}W d    n1 s*w   Y  |g  t| }W d    n1 sDw   Y  |g  t|tddk W d    n1 sdw   Y  |g  |d tddk W d    n1 sw   Y  |g  |d  }	|	d   W d    d S 1 sw   Y  d S )N#test_parquet_dataset_lazy_filteringr   rU  )r   r   r     r   )	rE  r   rG  r   ri   rF   r<   r   rl  )
r  r   rc   r   rC  rD  rE  rN   r   rg_fragmentsr   r   r   rh    s.   




"rh  c                 C   sp   t dg di}| d }|| t|}||j}|j|dgdj}d|jv s-J |j|dds6J d S )NrV   r  ri  r&   s   pandasTr  )	r0   r1   r  r   rN   r   r9   rx  r   )r  r   rD   rr   rN   r9   r5  r   r   r   test_dataset_schema_metadata  s   

rk  c                 C   s   t dt jg dddi}t|t| d  t dt  fg}tj	| d d|d}|j
|tddkd	}|d |d d
dsIJ t| d }|j
|tddk|d}|d |d d
dsoJ d S )NrS  r  r   r  r  r   rX  r   r   r>   r   r  )r;   r   r  rm   rn   rY   r9   r>   r   rN   r   r<   r   r  rO  ri   rF   )r  r   r   r9   rN   filteredrO   r   r   r   test_filter_mismatching_schema.  s   
"&rm  c                 C   s   t d ttdd}t| d }tj||dgd tj	|dd}|
|}|j
|dgd	}|d|ds>J d S )
Nza a b br   r  r#  r   rT  r   ro  r&   )r;   r   r  ri   r,   rY   rm   rK  r   rN   r   r  r   )r  r   r   rr   rN   all_cols	part_onlyr   r   r   +test_dataset_project_only_partition_columnsD  s   
rp  c                 C   s   t dtjg dddi}| d }|j|dd tj|dtdt	 fgd	}t
dtg dt	 i}|||sBJ d S )
NrS  r  objectdtypez(test_dataset_project_null_column.parquetr   r  r   rX  )r0   r1   r  r  r  r   rN   r;   r9   r>   r   r   r   )r  r   rD   r  rN   rK  r   r   r    test_dataset_project_null_columnT  s   rt  c                 C   s   ddl m} tg dg dg dd}||| d  tj| d dd	}|j|td
tdj	dddtddkdd}tg dtj
g dddg dd}||s\J tjtdd |j|d
d
id W d    d S 1 sxw   Y  d S )Nr   r  r  )r
  r  r  r(  rz  r  r  r   r  r{  r   Fsafer|  rV   )	A_renamedB_as_intC_is_ar&   r  )TFFzExpected an Expressionr   )r   r  r;   r   r  r   rN   r   r<   r  r  r   r   r   r   )r  r   r  r   rN   r!  rK  r   r   r   test_dataset_project_columnsb  s$   
"r{  c           	      C   sr  t | \}}t|}t|jtjsJ t| \}}t|}t|jtjs(J tj|dd}|j}|d us8J t|tjs@J |jt	dt	
 fgksOJ t|jdksXJ |jd t	g dt	
 ksiJ tjt	dt	
 fgdd}t|tjsJ t|jdksJ tdd	 |jD sJ tj||d}|j}t|tjsJ |jt	dt	
 fgksJ t|jdksJ td
d	 |jD sJ tj|dd}tjt| |j|j|jd}|jd u sJ | d }t|\}}tj|dd}|j}|d usJ t|tjsJ |jt	dt	 fgksJ t|jdks'J t|jd  ddhks7J d S )Nr   ro  r   r   r   )r   r   r   rU  c                 s   rS  r   r   r   r   r   r   rT    rU  z6test_dataset_preserved_partitioning.<locals>.<genexpr>c                 s   rS  r   r   r   r   r   r   rT    rU  r   zdata-partitioned-metadatarV   rW   )r  r   rN   rH   r   r   rn  ro  r9   r;   r   r   rc  r  rq  r   ri   rF   rf   r   r^  rG  r@   r   r:  )	r  rE  rr   rN   rm  r   r;  rC  rD  r   r   r   #test_dataset_preserved_partitioningz  sL   

" $r|  c                 C   s   t t dt  t dt t  t  g}t jg dtt	dd|d}t
| d }tj||dgd t| d }|d |d ksOJ |d|ds\J d S )	NrS  r   )NNrV   rV   r   r  rb   r#  rT  )r;   r9   r<   r>   ru  r   r@   r   ri   r,   rY   rm   rK  r  r  r:  r   )r  r9   r   rr   actual_tabler   r   r   +test_write_to_dataset_given_null_just_works  s    

r~  c                 C   s2   dd l m} |j| ||dfgd}|| |S )Nr   	ascending)r   )pyarrow.computecomputesort_indicesSortOptionsr   )tabsort_colr  sorted_indicesr   r   r   _sort_table  s
   r  c                 C   st   |p|}t j| |d|dd t|d}t|t|ksJ t j|d|d}t| |t|  |s8J d S )Nr  Frf   r   r   *rn  )	r   ry  ri   rY  r   rN   r  r   r   )rN   r"  expected_filesr  base_dir_pathr   
file_pathsr;  r   r   r   _check_dataset_roundtrip  s   
r  c                 C   s   | d }|   t|}t|}| d }|d g}t|t||d| | d }|d g}t|||d| | d }|   t|}t|}| d }|d g}t|t||d| d S )Nr=  zsingle-file-targetr4  rV   zsingle-file-target2rF  zsingle-directory-target)r>  r  r   rN   r  rY   r'  )r  rq   rE  rN   targetr  r   r   r   test_write_dataset  s"   





r  c                 C   s   | d }t |}tjdd}tj||d}| d }|d |d d |d |d d g}tjtd	t fgdd}t|t||d
||d | d }|d |d d |d |d d g}ttd	t fg}t|t||d
||d d S )Npartitionedr   rU  ro  zpartitioned-hive-targetpart=ar4  part=br   r  partitioned-dir-targetrV   rW   )	r^  r   r   rN   r;   r9   r@   r  rY   )r  rq   rE  r   rN   r  expected_pathsr~  r   r   r   test_write_dataset_partitioned  s4   
r  c                    s   t g dg dd}tj| ddgd tj ddgd}|j} fdd|D }|h d	ks3J | }||s>J d S )
Nr  r  rU   rm  rW   rn  c                    "   h | ]}t t| jqS r   rY   r*  r+  r?  r,  r  rL  r   r   r|   5      z6test_write_dataset_with_field_names.<locals>.<setcomp>>   r   r  r  r;   r   r   ry  rN   r   r   r   r  r   r  r   partitioning_dirsr  r   rL  r   #test_write_dataset_with_field_names-  s   

r  c                    s   t g dg dd}tj| ddgdd tj ddd}|j} fd	d
|D }|h dks3J | }||s>J d S )Nr  r  rU   rm  rW   r   )rf   r   partitioning_flavorrn  c                    r  r   r  r  rL  r   r   r|   F  r  z;test_write_dataset_with_field_names_hive.<locals>.<setcomp>>   b=xb=yb=zr  r  r   rL  r   (test_write_dataset_with_field_names_hive>  s   

r  c                 C   s   t g dg dg dd}tj|| ddgd tj| ddgd}t 5}tj|jddgd	|ddgd tj|ddgd}| }t	|
 |d

 ksSJ W d    d S 1 s^w   Y  d S )Nr  r  r  r(  rm  rW   rn  r  r&   rV   )r;   r   r   ry  rN   rw  rx  r   r   rr  r  drop_columnsr  r   rN   tempdir2r  r  r   r   r   test_write_dataset_with_scannerO  s"   



"r  c           	         sH  t  G fdddt}t|t ttdt	 g}tj
tttdg|d dd}dd	 fd
d}tjj| |d	dt jfddd}|  z;t fdd}d}d}| dk r|kr~|kr|d	}n}td | dk sq|sJ W d  |  d S d  |  w )Nc                       s   e Zd Z fddZdS )z6test_write_dataset_with_backpressure.<locals>.GatingFsc                    s       | jj||dS )Nrx  )waitr   rh   )r   rr   rx  consumer_gater   r   rh   k  s   zItest_write_dataset_with_backpressure.<locals>.GatingFs.open_output_streamN)r   r   r   rh   r   r  r   r   GatingFsj  s    r  r6   r  rb   r          Tc                   3   s:    k rs	d S t d d7  V  k sd S d S )Ng{Gz?r   )r  sleepr   )rt   batches_readend
keep_goingr   r   counting_generatorz  s   
z@test_write_dataset_with_backpressure.<locals>.counting_generatorrj  c                      s   t jtd dS )Nr   r  )r   ry  rY   r   )	gating_fsr   r  r   r   r    s    z6test_write_dataset_with_backpressure.<locals>.<lambda>)r  c                      s   t     S r   )r  r   )startr   r   duration  r   z6test_write_dataset_with_backpressure.<locals>.durationFr   r  )	threadingEventr   rc   rv   ru   r;   r9   r<   r   rk   r  ri   r,   r   r2  rl   Threadr  r  r  r   r  )	r  r  r9   min_backpressurer  write_threadr  
last_valuebackpressure_probably_hitr   )	rt   r  r  r  r  r  r   r  r  r   $test_write_dataset_with_backpressureb  sJ   	




r  c                 C   s   t g dg dd}tj|| ddgd tj| ddgd}t ,}tj||ddgd tj|ddgd}| }t|	 |	 ksGJ W d    d S 1 sRw   Y  d S )Nr  r  rW   r  rm  rW   rn  )
r;   r   r   ry  rN   rw  rx  r   rr  r  r  r   r   r   test_write_dataset_with_dataset  s   

"r  c           	      C   s  | d }t g dg dd}tjt t dt  gdd}dd	 }tj|||d
d t g dg dd}t	t j
 tj|||d
d W d    n1 sTw   Y  t ddgi}|d d }tj|| tj|||d
dd t g dg dd}tj| d
|d }||| | sJ tj|||d
dd t g dg dd}tj| d
|d }||| | rJ d S )Nr   r  r  r  r  r   )r9   r  c                 S   s>   |   djdd}|  djdd}||sJ d S )NrW   Tr`  )ru  rb  rc  r   )rG  rH  df1df2r   r   r   compare_tables_ignoring_order  s   zGtest_write_dataset_existing_data.<locals>.compare_tables_ignoring_orderrm  r  r(  r  rW   ezc=2z	foo.arrowoverwrite_or_ignore)r   rf   existing_data_behavior)r  r   rV   rW   r  )r   r   r   rT   r   rn  delete_matching)r   rV   rW   r  r  )r;   r   r   r   r9   r<   r>   ry  r   r   r3  r   r  r  rN   r   exists)	r  rq   r   r   r  extra_table
extra_fileoverwrittenreadbackr   r   r    test_write_dataset_existing_data  sV   



r  r   r   r   c                    s    fddt | D S )Nc                    s   g | ]}t  qS r   )rg  randintrh  re  rd  r   r   r\         z._generate_random_int_array.<locals>.<listcomp>ri  r  rd  re  r   r  r   _generate_random_int_array  s   r  c                 C   sN   g }g }t | D ]}|t|d|d |dt|  qtj||d}|S )Nr   r  r  r6   rl  )r,   r-   r  rY   r;   rk   )num_of_columnsnum_of_recordsr6   r9  r7   rk   r   r   r   _generate_data_and_columns  s   r  c                 C   s   t tt| d| S )Nz**/*.)r   ri   r*  r+  rV  base_directoryr   r   r   r   _get_num_of_files_generated  s   r  c                    s   | d }d d}d}d}t ||}tj||d |d t|}|  d }t||ks.J g }t|D ]\}	}
|t|
 }tj|dd}|	|
 jd	  q4|t|ksXJ |t|ks`J t fd
d|D smJ d S )Nr   r   r   #   r   )rf   max_rows_per_filemax_rows_per_groupr   r   r   c                 3   s    | ]}| kV  qd S r   r   )rZ   file_rowcountr  r   r   rT  &  s    z7test_write_dataset_max_rows_per_file.<locals>.<genexpr>)r  r   ry  r  r  r   re   rY   rN   r-   r   shaperM  rq  )r  rq   r  r  r  rk   files_in_direxpected_partitionsresult_row_combinationrE  f_filef_pathrN   r   r  r   $test_write_dataset_max_rows_per_file  s2   

r  c                    s   | d }d}d}d g d} fdd|D }|d }t j||||d	d
 t|}t|D ]>\}}	|t|	 }
t j|
d	d}| }| }t|D ] \}}|j	}|t
|d k re||krb||ksdJ qK||kskJ qKq.d S )Nr   r  r%  r   )
r   r   r   r   r   r   r   r   r   r   c                    s   g | ]}t  |qS r   )r  )rZ   r  r  r   r   r\   3  s
    z9test_write_dataset_min_rows_per_group.<locals>.<listcomp>min_rows_groupr   )r6   r"  min_rows_per_groupr  rf   r   r   )r   ry  r  r  re   rY   rN   r   r   r   r   )r  rq   r  r  record_sizesrecord_batchesdata_sourcer  rE  r  r  rN   r   batchesrz  rt   rows_per_batchr   r  r   %test_write_dataset_min_rows_per_group*  s8   

r  c                 C   s   | d }d}d}d}t ||}|d }tj|||dd t|}g }|D ]"}	|t|	 }
tj|
dd}| }| }|D ]}|	|j
 q>q%|dd	gksPJ d S )
Nr   r  r      max_rows_groupr   )r6   r"  r  rf   r   r*  )r  r   ry  r  r  rY   rN   r   r   r-   r   )r  rq   r  r  r  rk   r  r  batched_datar  r  rN   r   r  rt   r   r   r   %test_write_dataset_max_rows_per_groupN  s.   
r  c                 C   s:  | d }d}d}ddg}t jg dg dg|d}t jg d	g d
g|d}t jg dg dg|d}t jg dg dg|d}t j||||g}	tjt || t  fgdd}
|d }tj|	||
|d dd }|||||\}}||ks{J |d }d}tj|	||
||dd |||||\}}||ksJ d S )Nr   r   r   c1c2)r   r   rT   r   r   r   )rV   rW   r  r  r  rV   r  )r   r  r(  r%  r   r   )rV   rW   r  r  r  r  )r   r   r+  r*  r   r   )rV   rW   r  r  r  r  )r  r  ri  r$  r   r   )rV   rW   r  r  r  rW   r   rU  default)r6   r"  r   rf   c                 S   s(   t | |d}ttj|| }||fS )Nr  )r  r   r;   r  unique)r  rk   r   col_idnum_of_files_generatednumber_of_partitionsr   r   r   _get_compare_pair  s
   z<test_write_dataset_max_open_files.<locals>._get_compare_pairmax_1rT   F)r6   r"  r   rf   max_open_filesr   )	r;   rk   rA   rl   r   r   r9   r@   ry  )r  rq   r   partition_column_idr9  record_batch_1record_batch_2record_batch_3record_batch_4r   r   data_source_1r  r  r  data_source_2r  r   r   r   !test_write_dataset_max_open_filesk  sh   




r  c                 C   s   | d }t |}tj|tjjddd}| d }|d |d d |d |d d g}tjt|jd	gd	t	ddgid
}t
|t||d||d d S )Nr  Tr  ro  r  rV   r4  rW   r   rb  r  )r^  r   rN   ro  r  r   r;   r9   r<   r  r  rY   )r  rq   rE  rN   r  r  r   r   r   r   #test_write_dataset_partitioned_dict  s&   

r  c                    s   | d }t |}tj|dd}tjtdt fgdd}| d }g   fdd}tj||d	|d
|d |d d |d d h}tt	t
j }||ksOJ | d }	tj||	d	|dd tj|d	|d}
tj|	d	|d}|
 | sxJ d S )Nr  r   ro  r   rU  partitioned1c                    s     | j d S r   )r-   rr   written_filepaths_writtenr   r   file_visitor  s   z4test_write_dataset_use_threads.<locals>.file_visitorr  Trf   r   r   r	  r  part-0.featherr  partitioned2Fr  rn  )r^  r   rN   r   r;   r9   r@   ry  r   rj   r*  r+  r   r   )r  rq   rE  rN   r   target1r	  r  paths_written_settarget2result1result2r   r  r   test_write_dataset_use_threads  s4   

r  c           
         s  t jt tdt dd tdD t dgd dgd  gg dd}| d	 }tj||d
dd t|d}|d g}t|t|ksIJ tj	|dd
 }||sYJ | d }|d |d d |d |d d g}g  g  fdd}tjt dt  fgdd}tj||dd
||d t|d}t|t|ksJ dd  D }|ksJ tj	|d|d}|
 |sJ t dksJ  D ]}	t|	|v sJ qd S )Nrd  c                 s   re  r   rf  rh  r   r   r   rT    rU  z#test_write_table.<locals>.<genexpr>rV   r   rW   ri  rk  singledat_{i}.arrowr  basename_templaterf   r  zdat_0.arrowrm  r   r  r  r  c                    s     | j  | j d S r   )r-   rr   r  r  visited_pathsvisited_sizesr   r   r	    s   z&test_write_table.<locals>.file_visitorr   r   rU  )rf   r  r   r	  c                 S   s   g | ]}t j|qS r   )r  rr   getsizer  r   r   r   r\     r  z$test_write_table.<locals>.<listcomp>rn  r   )r;   r   r  r,   r   ry  ri   rY  r   rN   r   r   r   r9   r@   r   r*  r+  )
r  r   r"  r  r  r!  r	  r   actual_sizesvisited_pathr   r  r   test_write_table  sN   "

r  c                 C   s  t jt tdt dd tdD t dgd dgd  gg dd}t |gd	 }| d
 }tj||dd t|dt|d gksJJ tj	|dd
 |sXJ | d }tj|g|dd t|dt|d gksuJ tj	|dd
 |sJ | d }tj| |dd t|dt|d gksJ tj	|dd
 |sJ | d }tj||g|dd t|dt|d gksJ tj	|dd
 t |gd	 sJ d S )Nr   c                 s   re  r   rf  rh  r   r   r   rT  #  rU  z6test_write_table_multiple_fragments.<locals>.<genexpr>rV   r   rW   ri  rk  r   r  r  r   r  r  rm  zsingle-listmultiplezmultiple-table)r;   r   r  r,   r3  r   ry  r   rY  rN   r   r   r   )r  r   r"  r   r   r   #test_write_table_multiple_fragments!  s:   "  

r  c                 C   s,  t jt tdt dd tdD t dgd dgd  gg dd}| d	 }tjd
d | D ||jddd tj|dd	 }|
|sLJ | d }t j|j| }tj||ddd tj|dd	 }|
|ssJ | d }t|}tj||ddd tj|dd	 }|
|sJ d S )Nrd  c                 s   re  r   rf  rh  r   r   r   rT  G  rU  z&test_write_iterable.<locals>.<genexpr>rV   r   rW   ri  rk  inmemory_iterablec                 s   s    | ]}|V  qd S r   r   )rZ   rt   r   r   r   rT  L  r^  r  rm  )r9   r  rf   r   inmemory_readerr  inmemory_pycapsule)r;   r   r  r,   r   ry  r   r9   rN   r   r   rg  rl   r   )r  r   r"  r!  r   streamr   r   r   test_write_iterableE  s2   "
r$  c                 C   s2  t jt tdt dd tdD t dgd dgd  gg dd}t|}| d	 }tj|||d
d |tj|dd}|	|sKJ | d }tj|j|dgd|d
d |tj|dd}|	|
dgsrJ tjtdd tj||||jd
d W d    d S 1 sw   Y  d S )Nrd  c                 s   re  r   rf  rh  r   r   r   rT  b  rU  z%test_write_scanner.<locals>.<genexpr>rV   r   rW   ri  rk  dataset_from_scannerr  r   rm  dataset_from_scanner2r  r&   zCannot specify a schemar   )r9   rf   )r;   r   r  r,   r   rN   ry  r   r   r   rX  r   r   rz  r9   )r  r   r   rN   r"  r!  r   r   r   test_write_scanner`  s4   "
"r'  c                 C   s   t jt tdt dgd dgd   gddgd}t|dgj}| d }tj	||d	|d
 tj
jdgdd}tj|d|d
 }||sNJ d S )Nrd  rV   r   rW   rS  r   rk  rN   r  rn  Tr  rm  )r;   r   r  r,   r?  r   r   rX  r9   ry  r   r  rN   r   r   )r  r   r   r"  partitioning_readr!  r   r   r   !test_write_table_partitioned_dictz  s(   r)  c              	   C   s  t jt jtdddt tjdddddt tdd	gd
gg dd}| d }tj	||dd t
|d}|d g}t|t|ksJJ tj|dd }||sZJ dD ]w}t }|j|d}dt|v spJ | d| }tj	||||d t|d }	|dkrdnd}
|	j|
ksJ tj|dd }|j}|dkr|d|dt  }|dv r|d|dt d}||}||sJ q\d S )Nrd  r  r  r  zdatetime64[D]rr  zdatetime64[ns]rV   rW   r   ri  rk  rG  r   r   r  part-0.parquet)1.02.42.6r*  z(<pyarrow.dataset.ParquetFileWriteOptionszparquet_dataset_version{0}r2  r+  r-  r   )r+  r,  r   r  )r;   r   r  r,   r  r  rs  r  r   ry  ri   rY  r   rN   r   r   r   r  r  rf   rm   read_metadataformat_versionr9   r<   	with_typer>   r  r  )r  r   r"  r  r  r!  r+  rf   optsmetaexpected_versionr9   rK  r   r   r   test_write_dataset_parquet  sD   	

r4  c                 C   s  t jt tdt dd tdD t dgd dgd  gg dd}| d	 }tj||d
d t|d}|d g}t|t|ksHJ tj	|d
d
 }||sXJ tjtjj|jjdd}|jdd}| d }tj||||d tj	||d
 }||sJ d S )Nrd  c                 s   re  r   rf  rh  r   r   r   rT    rU  z)test_write_dataset_csv.<locals>.<genexpr>rV   r   rW   )r  rj  chr1rk  csv_datasetr  r   r  z
part-0.csvr  r  F)include_headercsv_dataset_noheaderr2  )r;   r   r  r,   r   ry  ri   rY  r   rN   r   r   r  r   r  r  r9   rl  r  )r  r   r"  r  r  r!  rf   r1  r   r   r   test_write_dataset_csv  s*   "


r9  c                    s   t jt tdt dd tdD t dgd dgd  gg dd}d	  fd
d}| d }tj||d|d  s?J d S )Nrd  c                 s   re  r   rf  rh  r   r   r   rT    rU  z:test_write_dataset_parquet_file_visitor.<locals>.<genexpr>rV   r   rW   ri  rk  Fc                    s&   | j d ur| j jdkrd d S d S d S )NrT   T)rx  r)  r  visitor_calledr   r   r	    s
   
z=test_write_dataset_parquet_file_visitor.<locals>.file_visitorrG  r   )rf   r	  )r;   r   r  r,   r   ry  )r  r   r	  r"  r   r:  r   'test_write_dataset_parquet_file_visitor  s   "
r<  c           	         s   dd t dD }dd t dD }t||dgd dgd  d}| d	 }tjtd
t fgdd}g  d  fdd}tj||d|d|d |d d |d d h}tt	t
j }||ksfJ d uslJ jdkssJ d S )Nc                 S   s    g | ]}|gd  D ]}|q	qS r   r   rZ   r   itemr   r   r   r\     s     z?test_partition_dataset_parquet_file_visitor.<locals>.<listcomp>r   c                 S   s$   g | ]}|gd  D ]}|d  q	qS r   r   r=  r   r   r   r\     s   $ rV   rd  rW   ri  r  r   r   rU  c                    s   | j r| j  | j d S r   )rx  r-   rr   r  r  sample_metadatar   r   r	    s   zAtest_partition_dataset_parquet_file_visitor.<locals>.file_visitorr   Tr
  r  r*  r  r   )r,   r;   r   r   r   r9   r@   ry  r   rj   r*  r+  r)  )	r  f1_valsf2_valsr   rC  r   r	  r  r  r   r?  r   +test_partition_dataset_parquet_file_visitor  s.   

rC  c                 C   sd   t dtjdddgi}|d jjdksJ tj|| dd t	| d }|d jjdks0J d S )NrV   r  zEurope/Brussels)tzr   r   r*  )
r;   r   r0   r  r	  rD  r   ry  rm   r  )r  r   r!  r   r   r   (test_write_dataset_arrow_schema_metadata  s
   rE  c                 C   sb   ddl m} tdg di}|ddi}tj|| dd || d	 j}|j	ddiks/J d S )
Nr   ru  rV   r  rg     valuer  r   r  )
r   r  r;   r   rC   r   ry  r  r9   rx  )r  r  r   r9   r   r   r   "test_write_dataset_schema_metadata   s   rG  c                 C   sV   t dg di}|ddi}tj|| dd t| d j}|jddiks)J d S )NrV   r  rg  rF  r   r   r*  )	r;   r   rC   r   ry  rm   r  r9   rx  )r  r   r9   r   r   r   *test_write_dataset_schema_metadata_parquet,  s
   rH  c                 C   sL  | \}}}}}}}}d ||||}tjttdtdd tdD tdgd dgd  gg dd	}tjtd
t fgdd}	tj	|d|d|	d tj
d|ddd }
|
|scJ | d}tj	||d|	d tj
d|ddd }
|
|sJ | d}tj	|d|d|	d tj
d|ddd }
|
|sJ d S )Nr  rd  c                 s   re  r   rf  rh  r   r   r   rT  B  rU  z(test_write_dataset_s3.<locals>.<genexpr>rV   r   rW   ri  rk  r   r   rU  zmybucket/datasetr  r  rm  zmybucket/dataset2rn  r  rq  zmybucket/dataset3)rf   r;   r   r  r,   r   r   r9   r@   ry  rN   r   r   )r  rE  rc   r  r  r  r  uri_templater   r   r!  r  r   r   r   test_write_dataset_s37  sP   "


rJ  aC  {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "s3:PutObject",
                "s3:ListBucket",
                "s3:GetObjectVersion"
            ],
            "Resource": [
                "arn:aws:s3:::*"
            ]
        }
    ]
}c           	   	   C   s  ddl m} | d \}}}}t| tdd |ddd||dd}tjttd	td
d td	D tdgd dgd  gg dd}t	j
tdt fgdd}t	j|d|dd|dd t	jd|ddd }||stJ t	j|d|dd|dd t	jd|ddd }||sJ tjtdd t	j|d|dddd W d    n1 sw   Y  |d dd||ddd!}tjtd"d t	j|d|dddd W d    d S 1 sw   Y  d S )#Nr   )r  r  test_dataset_limited_user
limited123z{}:{}http)r  r  endpoint_overrideschemerd  c                 s   re  r   rf  rh  r   r   r   rT    rU  z1test_write_dataset_s3_put_only.<locals>.<genexpr>rV   r   rW   ri  rk  r   r   rU  zexisting-bucketr  Fr  )r   rf   rg   r   r  rm  r  Tz&Bucket 'non-existing-bucket' not foundr   znon-existing-bucket)r   rf   rg   r  limited)r  r  rN  rO  allow_bucket_creationz(Access Denied|ACCESS_DENIED))r   r  r   _minio_put_only_policyrf   r;   r   r  r,   r   r   r9   r@   ry  rN   r   r   r   r   r  )	r  r  r  r  rE  rc   r   r   r!  r   r   r   test_write_dataset_s3_put_onlyz  s~   
"	
"rS  c              
   C   s   t dd d gi}t|| d  t t dt t  t  g}t	j
j| d g|t	 t d}||}|j|ks@J d S )NrV   ri  )r~   r9   rf   r   )r;   r   rm   rn   r9   r<   ru  r   r@   r   r   r   r   rc   ru   r   )r  r   r   r9   fsdsr   r   r   $test_dataset_null_to_dictionary_cast  s   
rU  c                 C   s   t g dg dd}tj|| d dd tj| d dd}t g dg dd	}tj|| d
 dd tj| d
 dd}||dd}| t g dg dg ddksZJ |j|dddd}| dt g dg dg ddks{J d S )Nr   r   r  rV   rW   r  colArp  rG  rm  r   c   r   r   Zr{  r  )colBcol3rH  rY  r^  r  r{  NrY  rp  r_  
full outer)	join_typer   r   r  r[  rV   rW   r  Nr  r{  Nr]  r;   r   r   ry  rN   r  r   r  r  rG  ds1rH  ds2r!  r   r   r   test_dataset_join  s0   
rk  c                 C   s   t g dg dd}tj|| d dd tj| d dd}t g dg dd	}tj|| d
 dd tj| d
 dd}||d}| t g dg dg ddksYJ |j|dddd}| dt g dg dg ddkszJ d S )NrV  rW  rX  rG  rm  r   rZ  r\  )rY  r_  rH  rY  r`  ra  rb  _rrc  right_suffixrd  re  rf  rg  rh  r   r   r   test_dataset_join_unique_key  s0   
ro  c                 C   s   t g dg dg dd}tj|| d dd tj| d dd}t g dg d	g d
d}tj|| d dd tj| d dd}|j|dddd}| dt jg dg dg dg dg dgg ddksnJ d S )NrV  r   rd  <   rW  )rY  r^  colValsrG  rm  r   rZ  r[  rd  r   r\  rH  rY  rb  rl  rm  rd  )r   rd  rq  Nre  )r   rd  Nr[  rf  )rY  r^  rr  colB_r	colVals_rrk  rg  rh  r   r   r   test_dataset_join_collisions  s0   rv  c                 C   s   t jg dg dd}tj|| d dd tj| d dd}t jg dg dg d	d
}tj|| d dd tj| d dd}|j|dddddd}| dt 	g dg dg ddksfJ d S )N)r   r   r   r  r(  )rV   rW   rV   rW   r  rX  rG  rm  r   )r   r   ri  )rV   rW   g)r
  r  g      @)r^  r_  colCrH  rY  rp  r   r^  r_  onby	toleranceright_onright_by)r
  NNNN)rY  rp  rx  )
r;   rA   from_pydictr   ry  rN   	join_asofr   r  r   rh  r   r   r   test_dataset_join_asof5  s,   r  c                 C   s   t g dg dg dd}tj|| d dd tj| d dd}t g dg d	g d
g dd}tj|| d dd tj| d dd}|j|dddgdd}| dt g dg dg dg ddksmJ d S )NrV  rp  r  )rY  r^  rz  rG  rm  r   rs  r\  rZ  r  )r^  rr  rY  rz  rH  rz  rY  r^  r   rz  r{  r|  )Nr{  NrY  r^  rz  rr  )r;   r   r   ry  rN   r  r   r  rh  r   r   r   "test_dataset_join_asof_multiple_byQ  s0   r  c                 C   s   t dg di}tj|| d dd tj| d dd}t g dg dd}tj|| d	 dd tj| d	 dd}|j|dg d
d}| t g dg ddksVJ d S )Nrz  r  rG  rm  r   r\  r  )rr  rz  rH  r   r  )r]  r]  r{  )rz  rr  )r;   r   r   ry  rN   r  r   rh  r   r   r   test_dataset_join_asof_empty_byo  s$   
r  c              	   C   s   t g dg dg dg dd}tj|| d dd tj| d dd}t g d	g d
g dg dg dd}tj|| d dd tj| d dd}d}tjt|d |j|dddgddddgd W d    d S 1 sqw   Y  d S )NrV  rp  r  rW  r  rG  rm  r   rs  r\  )r  r  r/  rZ  r  )r^  rr  colUniqrY  rz  rH  zXColumns {'colVals'} present in both tables. AsofJoin does not support column collisions.r   rz  rY  r^  r   ry  )	r;   r   r   ry  rN   r   r   rz  r  )r  rG  ri  rH  rj  r  r   r   r   !test_dataset_join_asof_collisions  s2   "r  dstyperc   memc                 C   s  t g dg dd}|dkr$tj|| d dd tj| d dd}n|dkr.t|}nt|td	d
k tddk}|dkrItj	ntj
}t||sSJ | t dgdgdkscJ |dt dgdgdkstJ |td	dk td	dkjtd	dkd}| t dgdgdksJ tj|| d dd tj| d dd}| t dgdgdksJ |jtt ddgddgdddd}| dt dd gddgddgdksJ tt |d  W d    n	1 sw   Y  tt |  W d    n	1 sw   Y  |jd}	|td	d
k |	}
|
 t d	ddgiksGJ tt j ||	  W d    d S 1 saw   Y  d S )Nr   r   r  r%  rV   rW   r  rw  rX  rc   rG  rm  r   r  rY  rT   rp  rV   r   r   r%  r  r   r   rW   rl  r   rd  r^  rp  zright outerkeysrc  r^  )rY  r^  rp  )r;   r   r   ry  rN   r  r   r  r<   r   r_  rH   r   r   r   r  r  r   r   r   rz  rF   r9   rP  replace_schemar3  )r  r  rG  ri  r!  rK  r2rl  joinedschema_without_col2	newschemar   r   r   test_dataset_filter  s   $




$r  c           
      C   s  t g dg dd}t g dg dd}|dkrCtj|| d dd	 tj| d dd	}tj|| d
 dd	 tj| d
 dd	}n|dkrRt|}t|}ntt||ftddk tddkB }|	 t g dg ddks|J |j
tt ddgddgdddd}|	 dt g dg dg ddksJ |tddk }|tddk }	tjtdd t||	f W d    d S 1 sw   Y  d S )Nr  r  rX  )r   r   r+  )hr7   lrc   rG  rm  r   rH  r  rY  rT   r   )r   r   r   )rV   rW   r  r   rd  rV   rW   r  rp  z
left outerr  )r   rd  N)rY  rp  r^  zcurrently not supportedr   )r;   r   r   ry  rN   r  r   r  r<   r   r  r  r   r   rz  )
r  r  rG  rH  ri  rj  filtered_union_dsr  filtered_ds1filtered_ds2r   r   r   test_union_dataset_filter  sP   

"r  c                 C   s   | d }t |\}}t|}| }|jdksJ |tddk }| jdks-J t	t
 |  W d    d S 1 sBw   Y  d S )Ntest_parquet_dataset_filterrF  r  r   rd  )rE  r   rG  r   r   r   r  r<   r   r   rz  rF   )r  rC  rD  rE  rN   r!  filtered_dsr   r   r   r  (  s   

"r  c                 C   s   t jt tdgdgd}t|}dtdi}|j|d}tj|| dgdd t	j
tdd	 tj|| dgdd W d
   d
S 1 sGw   Y  d
S )z
    Ensure the projected schema is used to validate partitions for scanner

    https://issues.apache.org/jira/browse/ARROW-17228
    rd  original_columnrk  renamed_columnr&   rm  r  z0'Column original_column does not exist in schemar   N)r;   r   r  r,   r   rN   r<   r   ry  r   r   KeyError)r  r   table_datasetr'   r   r   r   r   4test_write_dataset_with_scanner_use_projected_schema7  s    



"r  rf   )rm  r   c              
   C   s   |dkr	t d tddgddgd dddgdd	id gd
ddg dddigd
gd}tj|| d |d tj| d |d}|jg dd}| dd ddgd d	dd gddddg ddd dgddgkskJ d S )Nr   zpyarrow.parquetabc123qrs456r   r   buttonr  r  )r	  elementvaluesstructsscrollwindow)NrT   r   fizzbuzz)user_ida.dotted.fieldinteractionr   r   )r  zinteraction.typezinteraction.valueszinteraction.structsr  r&   )r  r  )r  r	  r  r  r  )	r   r  r;   r   r   ry  rN   r   r:  )r  rf   r   ri  r   r   r   test_read_table_nested_columnsP  s2   



r  c                 C   s   ddl m} | d }tjtg dt tg dt gddg}|j||ddgd	d
 |j|dd	t	t
dt t
dt gd  }||dksWJ |d }tt|}dd |D }tt|}||ksxJ d S )Nr   r  zslash-writer-xr   r   rT   r   r   )experiment/A/f.csvzexperiment/B/f.csvr  zexperiment/C/k.csvzexperiment/M/i.csvexp_idexp_metarm  r   )r6   r"  rf   r   r  )re  rf   r   r9   r   c                 S   s   g | ]
}d t |dd qS )z	exp_meta=r  rv  r   r  r   r   r   r\     r  z5test_dataset_partition_with_slash.<locals>.<listcomp>)r   rN   r;   rA   r  r  r   r  ry  r9   r<   r   r  r  r  r:  r  r   r  r  )tmpdirr   rr   dt_tabler  r  encoded_pathsr  r   r   r   !test_dataset_partition_with_slashl  sB   
r  c                 C   s   t t jdt  ddt jdt  ddg}g dg dg}t jj||d}t|| d	  tj	| d	 d
d}|
 j|sBJ tj|| d d
d tj	| d d
d}|
 j|s_J tj||g| d d
d tj	| d d
d}|
 j|s~J d S )Nr   F)nullabler  Tr  Nr   Nrb   	nulltest1r   r   	nulltest2	nulltest3)r;   r9   r<   r>   rA   r  rm   rK  r   rN   r   r   ry  )r  schema_nullablerB  r   rN   r   r   r   'test_write_dataset_preserve_nullability  s   r  c                 C   sP  t t jdt  ddidt dt  g}t t dt  t dt  g}g dg dg}t jj||d}t jj||d}tj||g| d	 d
d tj| d	 d
d}|	 jj
|ddscJ tj||g| d d
d tj| d d
d}|	 jj
|ddsJ tj||g| d d
|d tj| d d
d}|	 jj
|ddsJ d S )Nr   s   foos   barr  r  r  r  rb   test1r   r   Tr  test2test3rX  )r;   r9   r<   r>   rA   r  r   ry  rN   r   r   )r  schema_metadataschema_no_metarB  r   table_no_metarN   r   r   r   *test_write_dataset_preserve_field_metadata  s,   r  c              
   C   s   dD ]n}dD ]i}t t dt  t dt  g}g dg dg}t jj||d}t }| d|  }tj||d|j	||d	d
d tj
|dd}|jD ]}	t|	}
|
dd}|j|u seJ |j||@ u snJ qOqqd S )N)TFr   r  r  r  rb   write_page_index_r   )write_statisticswrite_page_indexr  )rf   r3  r  r   r   )r;   r9   r<   r>   rA   r  r   r   ry  r  rN   r   rm   r.  r{  r  has_offset_indexhas_column_index)r  r  r  r9   rB  r   r   r"  ri  r  rx  ccr   r   r   #test_write_dataset_write_page_index  s:   


r  c                 C   s  t jt g dt g dgddgd}|dkr-tj|| d dd	 tj| d dd	}n|d
kr7t|}nt|d 	 g dg ddksMJ |dg 	 g dg ddksbJ |
tddk d 	 g dg ddks~J t jjt jg dt  dt g dgddgd}t|}|dg}| 	 }|d g dksJ |d g dksJ |dg}| 	 }|d g dksJ |d g dksJ d S )N)rT   r   r   r   r   )rW   rV   rW   rV   r  r  r  rk  rc   rG  rm  r   r  )rV   rV   rW   rW   r  r  )r  r  )r  
descending)r  rW   rW   rV   rV   )r   r   rT   r   r   r   )rV   rV   rW   r  )r   r(  r(  r  r  )r  carr  foobarrV   rW   )rV   r  )r  r(  r(  r   )r  r  r  r  )rV   r  )r;   r   r  r   ry  rN   r  r  r   r  r   r  r<   rA   r  r>   )r  r  r   r   
sorted_tabsorted_tab_dictr   r   r   test_dataset_sort_by  sV   
r  c                 C   s  t dg di}t j }|jdd}| d }tj||||d tjdd}t jj|d}tj||d	 }||ks=J | d
 }t	|| t
| }	t|	dksTJ |	d }
t|
 }|d |d kshJ |d |d |d< |d< |
| tjdd}t jj|d}tj||d	 }||ksJ |t dg diksJ tjtdd tj||d	 }W d   dS 1 sw   Y  dS )zwCheck that checksum verification works for datasets created with
    ds.write_dataset and read with ds.dataset.to_tablerV   r  T)write_page_checksumcorrect_dir)r6   r"  rf   r3  r  )default_fragment_scan_optionsr   corrupted_dirr   r      $   F)r   rT   r   r   zCRC checksum verificationr   N)r;   r   rN   r   r  r   ry  r  r   r   ri   iterdirr   	bytearray
read_byteswrite_bytesr   r   r  )r  
table_origpq_write_formatr8  original_dir_pathpq_scan_opts_crcpq_read_format_crctable_checkcorrupted_dir_pathcorrupted_file_path_listcorrupted_file_pathbin_datapq_scan_opts_no_crcpq_read_format_no_crctable_corruptrE  r   r   r   1test_checksum_write_dataset_read_dataset_to_table  sn   


"r  c                  C   s   d} d}t t}tjjd W d    n1 sw   Y  | t|jv s0|t|jv s0J tj }d}t jt|d |d W d    d S 1 sOw   Y  d S )NzImake_write_options() should be called on an instance of ParquetFileFormatzqdescriptor 'make_write_options' for 'pyarrow._dataset_parquet.ParquetFileFormat' objects doesn't apply to a 'int'+   z;make_write_options\(\) takes exactly 0 positional argumentsr   )	r   r   r   r;   rN   r   r  rY   r$   )msg_1msg_2excinfopformatr  r   r   r   test_make_write_options_errorb  s    
"r  c                 C   st   zdd l m} W n ty   td Y nw d}d}| j|j||j|d }|	 dddgiks8J d S )Nr   zsubstrait NOT enableds   
SOhttps://github.com/apache/arrow/blob/main/format/substrait/extension_types.yaml	
u64
	u32

str"i
i64
f64
str
const
struct
a
b
group
key7
:
Z
b
:

:
b
*
bs3  
/functions_comparison.yaml
SOhttps://github.com/apache/arrow/blob/main/format/substrait/extension_types.yamlequal:any1_any1	
u64
	u32

"
 "
("i
i64
f64
str
const
struct
a
b
group
key7
:
Z
b
:

:
b
*
brG  rY   4)
pyarrow.substrait	substraitr  r   r  r   BoundExpressionsfrom_substraitr   r  )rN   psr"  	filteringr!  r   r   r   test_scanner_from_substraitu  s   

r  r\  r   )r   r   r   (  r   r(   r  r*  r  rg  sysrw  r3  r  r  shutilr   urllib.parser   numpyr  r  r   r   r;   r  r  r  pyarrow.csvr,  r   rc   pyarrow.jsonpyarrow.libr   pyarrow.tests.utilr   r   r   r	   r
   r   r0   rt  rN   r   pyarrow.parquetr   rm   mark
pytestmarkr   r8   rE   rQ   fixturero   r   r   r   r  r  r#  r*  r=  rF  rI  rL  rM  rR  r  r  parametrizerY   tupler  r  r  r  r  r  r  r  r   r  r  s3r&  r<  r>  rG  rL  rR  rV  ra  rh  rn  rv  r|  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r  r  r  r  r  r  r  r  r'  r)  r.  r1  r2  r5  r8  r<  rE  rK  rP  rQ  rU  rW  rY  rc  rf  rk  rn  rp  rr  rt  rv  rw  rx  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r&  r(  r-  r>  rE  rH  skipifplatformrK  rM  rS  rX  r]  r^  r_  rf  rh  rk  rm  rp  rt  r{  r|  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r$  r'  r)  r4  r9  r<  rC  rE  rG  rH  rJ  rR  rS  rU  rk  ro  rv  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   <module>   s  
"

 
3

.
G
;

0



 ;


9'H
8
%
 
<


(
8*




)



## 
d
U%












	B

$



	9

B

B 9& /
'=#K0$#D&/$+#
/L

N-%
 0I