o
    [h?                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z" d dl!m#Z# d dl!m$Z$ d dl%m&Z& dZ'z
d dl(m)Z) dZ*W n e+y   dZ*Y nw zd dl,Z-dZ.W n e+y   dZ.Y nw e/dZ0dZ1e2dZ3dZ4h dZ5de6fd e6fd!e6fd"e7fd#e8fd$e7fd%Z9d&d'd(d)d*Z:e$d+Z;e$d,Z<G d-d. d.Z=G d/d0 d0e>Z?G d1d2 d2e@ZAG d3d4 d4e>ZBG d5d6 d6e>ZCG d7d8 d8e>ZDd9d: ZEe	dzd;d<ZFd=d> ZGd?d@ ZHdAdB ZIdCdD ZJe	dEdF ZKd{dGdHZL	I	 	 d|dJdKZMd}d~dPdQZN		 	 	dddYdZZO	I		I	 	 	dd[d\ZPd]d^ ZQd_d` ZRdadb ZSeGddcddZTeGdedf ZUddId e=jVd fdgdhZW		I	 	i	 ddjdkZX		I	 	 ddldmZYddId e=jVd fdndoZZdzdpdqZ[ddId e=jVd dfdrdsZ\dtdId e=jVd fdudvZ]dwdx Z^e_dykre`e^ dS )    )annotationsN)contextmanager)
QUOTE_NONE)ENOENT)wraps)iglob)BytesIO)environ)extsep)linesep)remove)normcase)normpath)realpath)NamedTemporaryFile)sleep)InvalidVersion)parse)Version)Image	tesseract)ndarrayTFpytesseractzutf-8z^[a-z0-9_]+$RGB>
   BMPGIFPBMPGMPNGPPMJPEGTIFFWEBPJPEG2000page_numorientationrotateorientation_confscriptscript_conf)zPage numberzOrientation in degreesRotatezOrientation confidenceScriptzScript confidencez.tessedit_create_boxfile=1 batch.nochop makeboxztessedit_create_alto=1ztessedit_create_hocr=1ztessedit_create_tsv=1)boxxmlhocrtsvz3.05z4.1.0c                   @  s   e Zd ZdZdZdZdZdS )Outputbytesz
data.framedictstringN)__name__
__module____qualname__BYTES	DATAFRAMEDICTSTRING r;   r;   L/var/www/vscode/pina/lib/python3.10/site-packages/pytesseract/pytesseract.pyr0   V   s
    r0   c                         e Zd Z fddZ  ZS )PandasNotSupportedc                      t  d d S )NzMissing pandas packagesuper__init__self	__class__r;   r<   rB   ^   s   zPandasNotSupported.__init__r4   r5   r6   rB   __classcell__r;   r;   rE   r<   r>   ]       r>   c                   @  s   e Zd Zdd ZdS )TesseractErrorc                 C  s   || _ || _||f| _d S N)statusmessageargs)rD   rL   rM   r;   r;   r<   rB   c   s   zTesseractError.__init__N)r4   r5   r6   rB   r;   r;   r;   r<   rJ   b   s    rJ   c                      r=   )TesseractNotFoundErrorc                   s   t  t d d S )NzQ is not installed or it's not in your PATH. See README file for more information.)rA   rB   tesseract_cmdrC   rE   r;   r<   rB   j   s   zTesseractNotFoundError.__init__rG   r;   r;   rE   r<   rO   i   rI   rO   c                      r=   )TSVNotSupportedc                   r?   )Nz4TSV output not supported. Tesseract >= 3.05 requiredr@   rC   rE   r;   r<   rB   r      zTSVNotSupported.__init__rG   r;   r;   rE   r<   rQ   q   rI   rQ   c                      r=   )ALTONotSupportedc                   r?   )Nz6ALTO output not supported. Tesseract >= 4.1.0 requiredr@   rC   rE   r;   r<   rB   y   rR   zALTONotSupported.__init__rG   r;   r;   rE   r<   rS   x   rI   rS   c                 C  s   |    z;z| d W n ty   td Y n ty!   Y nw W |   || _d S W |   || _d S W |   || _d S |   || _w )N   )	terminatewait	TypeErrorr   	Exceptionkill
returncode)processcoder;   r;   r<   rY      s&   


rY   c                 c  s    zL|s|   d V  W | j  | j  | j  d S z| j |d\}}|V  W n tjy;   t| d tdw W | j  | j  | j  d S | j  | j  | j  w )NrT   )timeoutzTesseract process timeout)	communicatestdinclosestdoutstderr
subprocessTimeoutExpiredrY   RuntimeError)procseconds_error_stringr;   r;   r<   timeout_manager   s,   
	






rk   c                   s    t   fdd_S )Nc                    s,   | ddrju r | i |_jS )NcachedF)pop_result)rN   kwargsfuncwrapperr;   r<   rr      s   zrun_once.<locals>.wrapper)r   rn   )rq   r;   rp   r<   run_once   s   rs   c                 C  s"   d dd | t D  S )N c                 s  s    | ]}|V  qd S rK   r;   .0liner;   r;   r<   	<genexpr>   s    
zget_errors.<locals>.<genexpr>)joindecodeDEFAULT_ENCODING
splitlinesstrip)rj   r;   r;   r<   
get_errors   s
   
r~   c                 C  s\   t | r|  dn| D ] }zt| W q ty+ } z|jtkr! W Y d}~qd}~ww dS )z5Tries to remove temp files by filename wildcard path.*N)r   r   OSErrorerrnor   )	temp_namefilenameer;   r;   r<   cleanup   s   
r   c                 C  s   t rt| trt| } t| tjstd| jsdn| j}|tvr&tdd|  v r@t	t
| jd}|| d| d |} || _| |fS )NzUnsupported image objectr   zUnsupported image format/typeA)   r   r   )r   r   )numpy_installed
isinstancer   r   	fromarrayrW   formatSUPPORTED_FORMATSgetbandsnewRGB_MODEsizepaste
getchannel)image	extension
backgroundr;   r;   r<   prepare   s   
r   c                 c  s    zdt dddF}t| tr)|jttt| fV  	 W d    W t|j d S t| \} }|j dt	 | }| j
|| jd |j|fV  W d    n1 sQw   Y  W t|j d S W t|j d S t|j w )Ntess_F)prefixdelete_input)r   )r   r   strnamer   r   r   r   r   r
   saver   )r   fr   input_file_namer;   r;   r<   r      s    
			r   c                 C  sh   t jt jd td}tt dr$t  |d< |d  jt jO  _t j|d _| r-t j|d< |S t j	|d< |S )N)r`   rc   startupinfoenvSTARTUPINFOr   rb   )
rd   PIPEr	   hasattrr   dwFlagsSTARTF_USESHOWWINDOWSW_HIDEwShowWindowDEVNULL)include_stdoutro   r;   r;   r<   subprocess_args   s   


r    c              
   C  s  g }t jdk }|r|dkr|ddt|f7 }|t| |f7 }|d ur(|d|f7 }|r3|tj||d7 }| D ]}	|	dvrB||	 q7td| zt	j
|fi t }
W n tyj } z	|jtkrc t d }~ww t|
|}|
jr|t|
jt|W d    d S 1 sw   Y  d S )	Nwin32r   nicez-n-l)posix>   r,   osdr/   r-   z%r)sysplatformr   rP   shlexsplitappendLOGGERdebugrd   Popenr   r   r   r   rO   rk   rZ   rJ   r~   )input_filenameoutput_filename_baser   langconfigr   r]   cmd_argsnot_windows
_extensionrg   r   rj   r;   r;   r<   run_tesseract   s6   	

"r   r   r   return_bytesboolc                 C  sV   t | d}|r| W  d    S | tW  d    S 1 s$w   Y  d S )Nrb)openreadrz   r{   )r   r   output_filer;   r;   r<   _read_output  s   $r   
extensions	list[str]r   
str | Noner   intr]   c           	   	     s   d dd |D  }|rd| }nd}t| (\}}||d |||||d td	i    fdd|D W  d    S 1 sEw   Y  d S )
Nrt   c                 s  s    | ]	}t |d V  qdS )r   N)EXTENTION_TO_CONFIGgetrv   r   r;   r;   r<   rx   .  s    
z.run_and_get_multiple_output.<locals>.<genexpr>z-c r   r   r   r   r   r   r   r]   c                   s2   g | ]}t  d   t | |dv rdnqS )r      pdfr.   T)r   r
   r   ro   r   r;   r<   
<listcomp>C  s    z/run_and_get_multiple_output.<locals>.<listcomp>r;   )ry   r}   r   r   )	r   r   r   r   r]   r   r   r   r   r;   r   r<   run_and_get_multiple_output&  s,   

$r   c           
   	   C  sl   t | (\}}|||||||d}	tdi |	 t|	d  t | |W  d    S 1 s/w   Y  d S )Nr   r   r;   )r   r   r   r
   )
r   r   r   r   r   r]   r   r   r   ro   r;   r;   r<   run_and_get_outputL  s   	
$r   c              
     s   i } fdd|   dD }t|dk r|S |d}t|}t|d |k r0|d d |dk r8||7 }t|D ]<\}}t ||< |D ]0}	t|	|krPqG||krlz
tt|	| }
W n t	yk   |	| }
Y nw |	| }
|| |
 qGq<|S )Nc                   s   g | ]}|  qS r;   r   )rv   rowcell_delimiterr;   r<   r   i  s    z file_to_dict.<locals>.<listcomp>
   r   r^   r   )
r}   r   lenrm   r   	enumeratelistr   float
ValueError)r/   r   str_col_idxresultrowsheaderlengthiheadr   valr;   r   r<   file_to_dictg  s2   

r   c                 C  s@   |t u r|  S |tu rzt|  W dS  ty   Y dS w dS )NTF)r   isdigitr   r   )r   _typer;   r;   r<   is_valid  s   r   c                 C  s   dd dd |  dD D S )Nc                 S  sX   i | ](}t |d krt|d t|d  d rt|d  d t|d  d |d qS )r   rT   r   )r   r   OSD_KEYS)rv   kvr;   r;   r<   
<dictcomp>  s
    &$zosd_to_dict.<locals>.<dictcomp>c                 s  s    | ]}| d V  qdS ): Nr   ru   r;   r;   r<   rx     s    zosd_to_dict.<locals>.<genexpr>r   r   )r   r;   r;   r<   osd_to_dict  s   r   c                 C  s   t dg}| r|t| 7 }ztj|tjtjd}W n
 ty#   t w |j	dvr,t g }|j
rK|j
ttD ]}| }t|rJ|| q:|S )Nz--list-langs)rb   rc   )r   rT   )rP   r   r   rd   runr   STDOUTr   rO   rZ   rb   rz   r{   r   r}   LANG_PATTERNmatchr   )r   r   r   	languagesrw   r   r;   r;   r<   get_languages  s,   



r   c               	   C  s   zt jtdgt jtt jd} W n
 ty   t w | t	}|
tjdd d^}}|d^}}zt|}|tks@J W |S  ttfyS   td| dw )	z9
    Returns Version object of the Tesseract version
    z	--version)rc   r   r`   
   Nrt   -zInvalid tesseract version: "")rd   check_outputrP   r   r	   r   r   rO   rz   r{   lstripr3   	printable	partitionr   TESSERACT_MIN_VERSIONAssertionErrorr   
SystemExit)outputraw_versionstr_versionri   versionr;   r;   r<   get_tesseract_version  s(   

r  c                   sD   | d||||g t j fddt j fddt j fddi|  S )zS
    Returns the result of a Tesseract OCR run on the provided image to string
    txtc                        t  dg  S NTr   r;   rN   r;   r<   <lambda>      z!image_to_string.<locals>.<lambda>c                     s   dt   iS )Ntextr  r;   r  r;   r<   r        c                        t   S rK   r  r;   r  r;   r<   r        )r0   r7   r9   r:   r   r   r   r   output_typer]   r;   r  r<   image_to_string  s   r  r   c                 C  sF   |dvrt d| |dkrd|  }| |||||dg}t| S )zU
    Returns the result of a Tesseract OCR run on the provided image to pdf/hocr
    r   zUnsupported extension: r.   z-c tessedit_create_hocr=1 T)r   r}   r   )r   r   r   r   r   r]   rN   r;   r;   r<   image_to_pdf_or_hocr  s   r  c                 C  s<   t ddtk r
t d|  }| d||||dg}t| S )zU
    Returns the result of a Tesseract OCR run on the provided image to ALTO XML
    Trl   z-c tessedit_create_alto=1 r-   )r  TESSERACT_ALTO_VERSIONrS   r}   r   )r   r   r   r   r]   rN   r;   r;   r<   image_to_alto_xml  s
   r  c                   sR   |   d}| d||||g tj fddtj fddtj fddi|  S )zR
    Returns string containing recognized characters and their box boundaries
    z2 -c tessedit_create_boxfile=1 batch.nochop makeboxr,   c                     r	  r
  r  r;   r  r;   r<   r  )  r  z image_to_boxes.<locals>.<lambda>c                     s   t dt   ddS )Nz char left bottom right top page
rt   r   r   r   r;   r  r;   r<   r  *  s
    c                     r  rK   r  r;   r  r;   r<   r  /  r  r}   r0   r7   r9   r:   r  r;   r  r<   image_to_boxes  s   r  c              	   C  sT   t st tdd}z|| W n ttfy   Y nw tjtt	|  fi |S )N	)quotingsep)
pandas_installedr>   r   updaterW   r   pdread_csvr   r   )rN   r   ro   r;   r;   r<   get_pandas_output3  s   
r$  c              
     sv   t ddtk r
t d|  }| d||||g tj fddtj fddtj fddtj fd	di|  S )
zt
    Returns string containing box boundaries, confidences,
    and other information. Requires Tesseract 3.05+
    Tr  z-c tessedit_create_tsv=1 r/   c                     r	  r
  r  r;   r  r;   r<   r  U  r  zimage_to_data.<locals>.<lambda>c                     s   t  dg S r
  )r$  r;   rN   pandas_configr;   r<   r  V  s    c                     s   t t  ddS )Nr  r^   r  r;   r  r;   r<   r  Z  s    c                     r  rK   r  r;   r  r;   r<   r  [  r  )	r  r   rQ   r}   r0   r7   r8   r9   r:   )r   r   r   r   r  r]   r&  r;   r%  r<   image_to_data@  s   r'  r   c                   sR   d|   }| d||||g tj fddtj fddtj fddi|  S )zN
    Returns string containing the orientation and script detection (OSD)
    z--psm 0 r   c                     r	  r
  r  r;   r  r;   r<   r  n  r  zimage_to_osd.<locals>.<lambda>c                     s   t t  S rK   )r   r   r;   r  r;   r<   r  o  r  c                     r  rK   r  r;   r  r;   r<   r  p  r  r  r  r;   r  r<   image_to_osd_  s   r(  c               
   C  s,  t tjdkrtjd d } }n#t tjdkr*tjd dkr*tjd tjd } }n	tdtjd dS z"t| }tt||d W d    W d S 1 sNw   Y  W d S  tyt } ztt	| d	tjd W Y d }~dS d }~w t
y } ztt|j d
| tjd W Y d }~dS d }~ww )Nr   rT      r      z(Usage: pytesseract [-l lang] input_file
)file)r   r   r   )r   r   argvprintrc   r   r   r  rO   r   r   typer4   )r   r   imgr   r;   r;   r<   maint  s&   &r0  __main__rK   )T)r   r   r   )F)r   r   r   r   )Nr   r   F)
r   r   r   r   r   r   r]   r   r   r   )r   Nr   r   r   F)r   )Nr   r   r   r   )Nr   r   r   )a
__future__r   loggingrer   r3   rd   r   
contextlibr   csvr   r   r   	functoolsr   globr   ior   osr	   r
   r   r   os.pathr   r   r   tempfiler   timer   packaging.versionr   r   r   PILr   rP   numpyr   r   ModuleNotFoundErrorpandasr"  r   	getLoggerr   r{   compiler   r   r   r   r   r   r   r   r   r  r0   EnvironmentErrorr>   rf   rJ   rO   rQ   rS   rY   rk   rs   r~   r   r   r   r   r   r   r   r   r   r   r   r   r  r:   r  r  r  r  r$  r'  r(  r0  r4   r  r;   r;   r;   r<   <module>   s  






)
(
#






!


