
    k^iyC                        d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
mZmZ ddlmZmZmZmZmZmZ ddlmZ ddlmZmZ ddlmZmZ ddlmZmZmZm Z m!Z! dd	l"m#Z#m$Z$  ejJ                  e&      Z' G d
 de      Z( G d d      Z) G d de)      Z* G d de)      Z+ G d de+      Z, G d de)      Z- G d de-      Z. G d de*      Z/ G d de-      Z0 G d de*      Z1 G d de-      Z2 G d  d!      Z3 G d" d#e e         Z4y)$zAdobe character mapping (CMap) support.

CMaps provide the mapping between character codes and Unicode
code-points to character ids (CIDs).

More information is available on:

  https://github.com/adobe-type-tools/cmap-resources

    N)IterableIteratorMutableMapping)AnyBinaryIOClassVarTextIOUnioncast)name2unicode)PDFExceptionPDFTypeError)PSEOFPSSyntaxError)KWD	PSKeyword	PSLiteralPSStackParserliteral_name)choplistnunpackc                       e Zd Zy)	CMapErrorN__name__
__module____qualname__     M/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/pdfminer/cmapdb.pyr   r   '   s    r   r   c                       e Zd ZdZdeddfdZdefdZdededdfd	Z	d
ede
ddfdZde
d
eez  e
z  ddfdZddZd
edee
   fdZy)CMapBaser   kwargsreturnNc                 .    |j                         | _        y N)copyattrsselfr#   s     r    __init__zCMapBase.__init__.   s    28++-
r   c                 @    | j                   j                  dd      dk7  S )NWModer   )r(   getr*   s    r    is_verticalzCMapBase.is_vertical1   s    zz~~gq)Q..r   kvc                 "    || j                   |<   y r&   )r(   )r*   r1   r2   s      r    set_attrzCMapBase.set_attr4   s    

1r   codecidc                      y r&   r   )r*   r5   r6   s      r    add_code2cidzCMapBase.add_code2cid7       r   c                      y r&   r   )r*   r6   r5   s      r    add_cid2unichrzCMapBase.add_cid2unichr:   r9   r   c                      y r&   r   )r*   cmaps     r    use_cmapzCMapBase.use_cmap=   r9   r   c                     t         r&   )NotImplementedError)r*   r5   s     r    decodezCMapBase.decode@   s    !!r   )r=   r"   r$   N)r   r   r   debugobjectr+   boolr0   strr4   intr8   r   bytesr;   r>   r   rA   r   r   r    r"   r"   +   s    E@ @D @/T /# & T  3 4 # Y->-D  "5 "Xc] "r   r"   c            	           e Zd Zdeez  ddfdZdefdZdeddfdZde	de
e   fd	Zej                  dd
fdedeeef   dz  deedf   ddfdZy)CMapr#   r$   Nc                 >    t        j                  | fi | i | _        y r&   )r"   r+   code2cidr)   s     r    r+   zCMap.__init__E   s    $)&)+-r   c                 V    dj                  | j                  j                  d            S )Nz
<CMap: {}>CMapNameformatr(   r.   r/   s    r    __repr__zCMap.__repr__I   s     ""4::>>*#=>>r   r=   c                     t        |t              sJ t        t        |                   dt        t
        t        f   dt        t
        t        f   dd ffd | j                  |j                         y )Ndstsrcr$   c                     |j                         D ]+  \  }}t        |t              ri }|| |<    ||       '|| |<   - y r&   )items
isinstancedict)rR   rS   r1   r2   dr'   s        r    r'   zCMap.use_cmap.<locals>.copyO   sF    		 1a&+-ACFAJCFr   )rV   rI   rE   typerW   rF   rC   rK   )r*   r=   r'   s     @r    r>   zCMap.use_cmapL   s`    $%6s4:6%	d3;' 	d3;.? 	D 	 	T]]DMM*r   r5   c              #     K   t         j                  d| |       | j                  }t        |      D ]V  }||v rD||   }t	        |t
              r| | j                  }-t        t        t
        t        f   |      }K| j                  }X y w)Nzdecode: %r, %r)	logrB   rK   iterrV   rF   r   rW   rC   )r*   r5   rX   ixs        r    rA   zCMap.decodeZ   sy     		"D$/MMd 		"AAvaDa%GAT#v+.2AMM		"s   B	Br   outrK   .c           	      $   || j                   }d}t        |j                               D ]d  \  }}g ||}t        |t              r|j                  d|d| d       6| j                  |t        t        t        t        f   |      |       f y )Nr   zcode z = cid 
)r_   rK   r5   )
rK   sortedrU   rV   rF   writedumpr   rW   rC   )r*   r_   rK   r5   r1   r2   cs          r    rd   z	CMap.dumph   s     }}HD8>>+, 	PDAq$

A!S!		E!gaS34		cDc6k1BA,FQ	O	Pr   )r   r   r   rE   rF   r+   rP   r"   r>   rG   r   rA   sysstdoutr	   rW   rC   tuplerd   r   r   r    rI   rI   D   s    .s .t .?# ?+X +$ +"5 "Xc] "  jj-1 "	PP sF{#d*P CHo	P
 
Pr   rI   c                   &    e Zd Zdedeedf   fdZy)IdentityCMapr5   r$   .c                 d    t        |      dz  }|r t        j                  d| d|d |dz         S y)N   >Hr   lenstructunpackr*   r5   ns      r    rA   zIdentityCMap.decodez   s8    IN==1QCq4!a%=99r   Nr   r   r   rG   rh   rF   rA   r   r   r    rj   rj   y       5 U38_ r   rj   c                   &    e Zd Zdedeedf   fdZy)IdentityCMapByter5   r$   .c                 X    t        |      }|rt        j                  d| d|d |       S y)Nrm   Br   ro   rs   s      r    rA   zIdentityCMapByte.decode   s0    I==1QCq4844r   Nru   r   r   r    rx   rx      rv   r   rx   c                   f    e Zd Zdeez  ddfdZdefdZdedefdZej                  fde
ddfd	Zy)

UnicodeMapr#   r$   Nc                 >    t        j                  | fi | i | _        y r&   )r"   r+   
cid2unichrr)   s     r    r+   zUnicodeMap.__init__   s    $)&)*,r   c                 V    dj                  | j                  j                  d            S )Nz<UnicodeMap: {}>rM   rN   r/   s    r    rP   zUnicodeMap.__repr__   s     !((
)CDDr   r6   c                 N    t         j                  d| |       | j                  |   S )Nget_unichr: %r, %r)r[   rB   r~   r*   r6   s     r    
get_unichrzUnicodeMap.get_unichr   s"    		&c2s##r   r_   c                     t        | j                  j                               D ]  \  }}|j                  d| d|d        y )Nzcid z = unicode ra   )rb   r~   rU   rc   )r*   r_   r1   r2   s       r    rd   zUnicodeMap.dump   sB    4??0023 	4DAqIIQC{1%r23	4r   )r   r   r   rE   rF   r+   rP   r   rf   rg   r	   rd   r   r   r    r|   r|      sV    -s -t -E# E$c $c $ "% 4 4 4r   r|   c                       e Zd ZdedefdZy)IdentityUnicodeMapr6   r$   c                 F    t         j                  d| |       t        |      S )z+Interpret character id as unicode codepointr   )r[   rB   chrr   s     r    r   zIdentityUnicodeMap.get_unichr   s    		&c23xr   N)r   r   r   rF   rE   r   r   r   r    r   r      s    c c r   r   c                        e Zd ZdededdfdZy)FileCMapr5   r6   r$   Nc                 P   t        |t              rt        |t              s$J t        t        |      t        |      f             | j                  }|d d D ];  }t        |      }||v r!t        t        t        t        f   ||         }3i }|||<   |}= t        |d         }|||<   y )N)	rV   rE   rF   rY   rK   ordr   rW   rC   )r*   r5   r6   rX   re   cits          r    r8   zFileCMap.add_code2cid   s    $$C)= 	
s$Zc#@
 	
= MMcr 	AQBQwc6k*AbE2')"	 b]"r   )r   r   r   rE   rF   r8   r   r   r    r   r      s     3 4 r   r   c                   ,    e Zd Zdedeez  ez  ddfdZy)FileUnicodeMapr6   r5   r$   Nc                    t        |t              sJ t        t        |                   t        |t              r2t        |j
                  t              sJ t        |j
                        }nJt        |t              r|j                  dd      }n't        |t              rt        |      }nt        |      |dk(  r| j                  j                  |      dk(  ry || j                  |<   y )NzUTF-16BEignore     )rV   rF   rE   rY   r   namer   rG   rA   r   r   r~   r.   )r*   r6   r5   unichrs       r    r;   zFileUnicodeMap.add_cid2unichr   s    #s#3Sc^3#dI&dii---!$)),Fe$[[X6Fc"YFt$$ X$//"5"5c":c"A%r   )r   r   r   rF   r   rG   r;   r   r   r    r   r      s&    &# &Y->-D & &r   r   c                   ,     e Zd Zdededdf fdZ xZS )PyCMapr   moduler$   Nc                     t         |   |       |j                  | _        |j                  rd| j
                  d<   y y N)rM      r-   )superr+   CODE2CIDrK   IS_VERTICALr(   )r*   r   r   	__class__s      r    r+   zPyCMap.__init__   s:    $'"#DJJw r   )r   r   r   rE   r   r+   __classcell__r   s   @r    r   r      s"    $S $# $$ $ $r   r   c                   0     e Zd Zdedededdf fdZ xZS )PyUnicodeMapr   r   verticalr$   Nc                     t         |   |       |r!|j                  | _        d| j                  d<   y |j
                  | _        y r   )r   r+   CID2UNICHR_Vr~   r(   CID2UNICHR_H)r*   r   r   r   r   s       r    r+   zPyUnicodeMap.__init__   s>    $'$11DO"#DJJw$11DOr   )r   r   r   rE   r   rD   r+   r   r   s   @r    r   r      s)    2S 2# 2 2$ 2 2r   r   c                      e Zd ZU i Zeeeef      ed<   i Z	eeee
e   f      ed<    G d de      Zedeeeef   ef   deeeef   ef   fd       Zededee   fd	       Zededefd
       Zeddededefd       Zy)CMapDB_cmap_cache_umap_cachec                       e Zd Zy)CMapDB.CMapNotFoundNr   r   r   r    CMapNotFoundr      s    r   r   rX   r$   c                     t        | t              s| S i }| j                         D ]?  \  }}	 t        |      }t        |t              rt        j                  |      ||<   ;|||<   A |S # t        t
        f$ r |}Y Ew xY w)zERecursively convert string keys to integers in CODE2CID dictionaries.)rV   rW   rU   rF   
ValueError	TypeErrorr   _convert_code2cid_keys)rX   resultr1   r2   new_keys        r    r   zCMapDB._convert_code2cid_keys   s    
 !T"H$&GGI 
	$DAqa& !T""("?"?"Bw"#w
	$  	* s   A))A=<A=r   c           	         |j                  dd      }t        j                  d|       t        j                  j                  dd      t        j                  j                  t        j                  j                  t              d      f}|D ]  }| d}t        j                  j                  ||      }t        j                  j                  |      }t        j                  j                  |      }|j                  |t        j                  z         st        j                  j                  |      st        j                  d|       t        j                  |d	d
      5 }t!        j"                  |      }	d|	v r3|	d   j%                         D 
ci c]  \  }
}t'        |
      | c}}
|	d<   d|	v r3|	d   j%                         D 
ci c]  \  }
}t'        |
      | c}}
|	d<   |	j                  d      r| j)                  |	d         |	d<   t+        t-        |      d|	      cd d d        c S  t.        j1                  |      c c}}
w c c}}
w # 1 sw Y   xY w)N  zloading: %r	CMAP_PATHz/usr/share/pdfminer/r=   z.json.gzzloading JSON: %rrtzutf-8)encodingr   r   r   r   )replacer[   rB   osenvironr.   pathjoindirname__file__realpath
startswithsepexistsgzipopenjsonloadrU   rF   r   rY   rE   r   r   )clsr   
cmap_paths	directoryjson_filename	json_pathresolved_json_pathresolved_directorygzfiledatar1   r2   s               r    
_load_datazCMapDB._load_data   s   ||D"%		-&JJNN;(>?GGLL2F;


 $ 	5I#fH-MY>I!#!1!1)!<!#!1!1)!< ",,"RVV+''..!34		,i8YY14'J 5f+/99V+<D%-26~2F2L2L2N0*.!QCFAI0^, &-26~2F2L2L2N0*.!QCFAI0^, xx
++.+E+Ed:FV+WZ(D	2t45 5	58 !!$''005 5s+   $0II
*!II
!AIII	c                 $   |dk(  rt        d      S |dk(  rt        d      S |dk(  rt        d      S |dk(  rt        d      S 	 | j                  |   S # t        $ r Y nw xY w| j	                  |      }t        ||      x| j                  |<   }|S )Nz
Identity-Hr   )r-   z
Identity-Vr   OneByteIdentityHOneByteIdentityV)rj   rx   r   KeyErrorr   r   )r   r   r   r=   s       r    get_cmapzCMapDB.get_cmap  s    <a((\!a((''#!,,''#!,,	??4(( 		~~d#'-dD'99s   A 	A! A!r   c                     	 | j                   |   |   S # t        $ r Y nw xY w| j                  d|       }dD cg c]  }t        |||       nc c}w c}| j                   |<   | j                   |   |   S )Nzto-unicode-)FT)r   r   r   r   )r   r   r   r   r2   s        r    get_unicode_mapzCMapDB.get_unicode_map1  s{    	??4(22 		~~D623FS TdD!!< T Tt$X..s    	  AN)F)r   r   r   r   r   rW   rE   r   __annotations__r   listr   r   r   staticmethodr
   rC   rF   r   classmethodrY   r   r   r"   r   rD   r|   r   r   r   r    r   r      s    /1K$sF{+,1;=K$sD$6678=y  c6k"C'(	tCK #%	& ( $(c $(d3i $( $(L C H  " /3 /$ /: / /r   r   c                   L   e Zd ZdededdfdZddZ ed      Z ed      Z	 ed	      Z
 ed
      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      ZdededdfdZdeddfdZy)
CMapParserr=   fpr$   Nc                 j    t        j                  | |       || _        d| _        t	               | _        y )NT)r   r+   r=   _in_cmapset	_warnings)r*   r=   r   s      r    r+   zCMapParser.__init__=  s)    tR(	#&5r   c                     t        j                  t              5  | j                          d d d        y # 1 sw Y   y xY wr&   )
contextlibsuppressr   
nextobjectr/   s    r    runzCMapParser.runD  s/      ' 	OO	 	 	s   4=s	   begincmaps   endcmaps   usecmaps   defs   begincodespaceranges   endcodespaceranges   begincidranges   endcidranges   begincidchars
   endcidchars   beginbfranges
   endbfranges   beginbfchars	   endbfchars   beginnotdefranges   endnotdefrangepostokenc                 :   || j                   u rd| _        | j                          y|| j                  u rd| _        y| j                  sy|| j                  u rA	 | j                  d      \  \  }}\  }}| j                  j                  t        |      |       y|| j                  u rO	 | j                  d      \  \  }}| j                  j                  t        j                  t        |                   y|| j                  u r| j                          y|| j                   u r| j                          y|| j"                  u r| j                          y|| j$                  u rj| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]7  \  }
}}t)        |
t*              s| j-                  d       *t)        |t*              s| j-                  d       Lt)        |t.              s| j-                  d	       nt1        |
      t1        |      k7  r| j-                  d
       |
dd }|dd }||k7  r| j-                  d       |
dd }|dd }t3        |      }t3        |      }t1        |      }t5        ||z
  dz         D ]A  }|t7        j8                  d||z         | d z   }| j                  j;                  ||z   |       C : y|| j<                  u r| j                          y|| j>                  u ru| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]C  \  }}t)        |t*              st)        |t.              s(| j                  j;                  ||       E y|| j@                  u r| j                          y|| jB                  u r| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]  \  }
}}t)        |
t*              s| j-                  d       *t)        |t*              s| j-                  d       Lt1        |
      t1        |      k7  r| j-                  d       ut3        |
      }t3        |      }t)        |tD              rft1        |      ||z
  dz   k7  r| j-                  d       tG        t5        ||dz         |d      D ]!  \  }}| j                  j;                  ||       # t)        |t*              sJ |dd }t3        |      }|dd }t1        |      }t5        ||z
  dz         D ]A  }|t7        j8                  d||z         | d z   }| j                  j;                  ||z   |       C  y|| jH                  u r| j                          y|| jJ                  u r~| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]L  \  }}t)        |t*              st)        |t*              s(| j                  j;                  t3        |      |       N y|| jL                  u r| j                          y|| jN                  u r| j                          y| jQ                  ||f       y# t        $ r Y yw xY w# t        $ r Y yt        j                  $ r Y yw xY wc c}}w c c}}w c c}}w c c}}w )z[ToUnicode CMaps

        See Section 5.9.2 - ToUnicode CMaps of the PDF Reference.
        TNFrl   r      z0The start object of begincidrange is not a byte.z.The end object of begincidrange is not a byte.z.The cid object of begincidrange is not a byte.z?The start and end byte of begincidrange have different lengths.zGThe prefix of the start and end byte of begincidrange are not the same.z>LzThe start object is not a byte.zThe end object is not a byte.z.The start and end byte have different lengths.zPThe difference between the start and end offsets does not match the code length.)strict))KEYWORD_BEGINCMAPr   popallKEYWORD_ENDCMAPKEYWORD_DEFpopr=   r4   r   r   KEYWORD_USECMAPr>   r   r   r   KEYWORD_BEGINCODESPACERANGEKEYWORD_ENDCODESPACERANGEKEYWORD_BEGINCIDRANGEKEYWORD_ENDCIDRANGEr   rV   rG   
_warn_oncerF   rp   r   rangerq   packr;   KEYWORD_BEGINCIDCHARKEYWORD_ENDCIDCHARKEYWORD_BEGINBFRANGEKEYWORD_ENDBFRANGEr   zipKEYWORD_BEGINBFCHARKEYWORD_ENDBFCHARKEYWORD_BEGINNOTDEFRANGEKEYWORD_ENDNOTDEFRANGEpush)r*   r   r   _r1   r2   cmapname__objobjs
start_byteend_byter6   start_prefix
end_prefixsvarevarstartendvlenr]   r^   r5   unicode_valuevarbaseprefixs                              r    
do_keywordzCMapParser.do_keywordY  s7   
 D*** DMKKMd***!DM}}D$$$#'88A; !Q!Q		""<?A6 D(((#'88A; !X		""6??<3I#JK
 D444KKMD222KKMD...KKMD,,,)-7IRC7D7-5a-> 9)
Hc!*e4OO$VW!(E2OO$TU!#s+OO$TUz?c(m3OO- )#2%cr]
:-OO: !"#}dm4ysU{Q/ 9A$v{{4'CTEF'KKAII,,S1Wa89;9@ D---KKMD+++)-7IRC7D7%a. 8	TdE*z#s/CII,,S$78 D---KKMD+++)-7IRC7D7.6q$.? ?*
Hd!*e4OO$EF!(E2OO$CDz?c(m3OO$TU
+h'dD)4yC%K!O3F /2eS1W-tE/ E*] 		00mDE
 &dE222rs)C"3<D!#2YFs8D"3;?3 ?"V[[tax%@$%HH		00A>?9?> D,,,KKMD***)-7IRC7D7%a. A	Tc5)ju.EII,,WS\4@A D111KKMD///KKM		3,E !  !   &&   8P 8 8N 8sC   ?Y (AY" 5ZZ/ZZ	YY"	Z-ZZmsgc                     || j                   vr6| j                   j                  |       d}t        j                  ||z          yy)z!Warn once for each unique messagezIgnoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. N)r   addr[   warning)r*   r  base_msgs      r    r   zCMapParser._warn_once  sA    dnn$NNs#/ 
 KK3' %r   )r$   N)r   r   r   r"   r   r+   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  rF   r   r  rE   r   r   r   r    r   r   <  s   )X )8 ) ) L)*oO*oOf+K"%&<"= #$8 9 01n-/]+/]+n-L)"#67 !23W c W ) W  W r	(c 	(d 	(r   r   )5__doc__r   r   r   loggingr   os.pathrq   rf   collections.abcr   r   r   typingr   r   r   r	   r
   r   pdfminer.encodingdbr   pdfminer.pdfexceptionsr   r   pdfminer.psexceptionsr   r   pdfminer.psparserr   r   r   r   r   pdfminer.utilsr   r   	getLoggerr   r[   r   r"   rI   rj   rx   r|   r   r   r   r   r   r   r   r   r   r    <module>r-     s   	     	   
 > >  - = 6 T T ,g!	 	" "22P8 2Pj8 | 4 4" t $&Z &*$T $2: 2]/ ]/@(y) (r   