
    3fi                        d dl mZ d dlZd dlmZmZmZmZmZm	Z	 d dl
mZ d dlmZ d dlmZ erd dlmZ  ej$                  e      Z G d d	e      Zy)
    )annotationsN)TYPE_CHECKINGAnyIteratorListOptionalUnion)Document)BaseBlobParser)Blob)TokenCredentialc                  h    e Zd ZdZ	 	 	 	 	 	 d		 	 	 	 	 	 	 	 	 	 	 	 	 d
dZddZddZddZddZddZ	y)!AzureAIDocumentIntelligenceParserzMLoads a PDF with Azure Document Intelligence
    (formerly Forms Recognizer).Nc                >   ddl m} ddlm}	 ddlm}
 i }|r|t        d      |}n|	 |
|      }nt        d      |||d<    |d||dd	id
|| _        || _        || _	        d | _
        ||D cg c]
  } |	|       c}| _
        | j                  dv sJ y c c}w )Nr   )DocumentIntelligenceClient)DocumentAnalysisFeature)AzureKeyCredentialz;Only one of api_key or azure_credential should be provided.z4Either api_key or azure_credential must be provided.api_versionzx-ms-useragentzlangchain-parser/1.0.0)endpoint
credentialheaders)singlepagemarkdown )azure.ai.documentintelligencer   $azure.ai.documentintelligence.modelsr   azure.core.credentialsr   
ValueErrorclient	api_modelmodefeatures)selfapi_endpointapi_keyr   r!   r"   analysis_featuresazure_credentialr   r   r   kwargsr   features                 {/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_community/document_loaders/parsers/doc_intelligence.py__init__z*AzureAIDocumentIntelligenceParser.__init__   s     	MP= " Q  *J +G4JSTT"$/F=!0 
!!%'?@
 	
 #	AE(@Q5<'0DM yy::::s   2Bc              #     K   |j                   D ]R  }dj                  |j                  D cg c]  }|j                   c}      }t	        |d|j
                  i      }| T y c c}w w)N r   page_contentmetadata)pagesjoinlinescontentr
   page_number)r$   resultpliner5   ds         r+   _generate_docs_pagez5AzureAIDocumentIntelligenceParser._generate_docs_pageC   s_      		AhhAABG$AMMA G		As   )A*A%
,A*c              #  Z   K   t        |j                  |j                                y w)Nr/   )r
   r5   as_dict)r$   r7   s     r+   _generate_docs_singlez7AzureAIDocumentIntelligenceParser._generate_docs_singleO   s     FNNV^^=MNNs   )+c              #    K   |j                         5 }| j                  j                  | j                  |d| j                  dk(  rdnd| j
                        }|j                         }| j                  dv r| j                  |      E d{    n@| j                  dv r| j                  |      E d{    nt        d| j                         ddd       y7 N7 (# 1 sw Y   yxY ww)	zLazily parse the blob.zapplication/octet-streamr   text)bodycontent_typeoutput_content_formatr#   r   r   Nr   Invalid mode: )
as_bytes_ior    begin_analyze_documentr!   r"   r#   r7   r>   r;   r   )r$   blobfile_objpollerr7   s        r+   
lazy_parsez,AzureAIDocumentIntelligenceParser.lazy_parseR   s       	?8[[77748II4KjQW 8 F ]]_Fyy2255f===h&33F;;; >$))!=>>	? 	? >;	? 	?sA   C*A8CC'C3C4C	C*CCC'#C*c              #    K   ddl m} | j                  j                  | j                   ||      | j
                  dk(  rdnd| j                        }|j                         }| j
                  dv r| j                  |      E d {    y | j
                  dv r| j                  |      E d {    y t        d	| j
                         7 E7 w)
Nr   AnalyzeDocumentRequest)
url_sourcer   r@   rA   rC   r#   rD   rE   rF   r   rO   r    rH   r!   r"   r#   r7   r>   r;   r   )r$   urlrO   rK   r7   s        r+   	parse_urlz+AzureAIDocumentIntelligenceParser.parse_urlf   s     O33NN'3704		Z0G*V]]	 4 
 99..11&999YY("//777~dii[9::	 :7$   BCC'C.C/CCc              #    K   ddl m} | j                  j                  | j                   ||      | j
                  dk(  rdnd| j                        }|j                         }| j
                  dv r| j                  |      E d {    y | j
                  dv r| j                  |      E d {    y t        d	| j
                         7 E7 w)
Nr   rN   )bytes_sourcer   r@   rQ   rD   rE   rF   rR   )r$   rW   rO   rK   r7   s        r+   parse_bytesz-AzureAIDocumentIntelligenceParser.parse_bytesx   s     O33NN'\B04		Z0G*V]]	 4 
 99..11&999YY("//777~dii[9::	 :7rU   )NNzprebuilt-layoutr   NN)r%   strr&   Optional[str]r   rZ   r!   rY   r"   rY   r'   zOptional[List[str]]r(   zOptional['TokenCredential'])r7   r   returnIterator[Document])rI   r   r[   r\   )rS   rY   r[   r\   )rW   bytesr[   r\   )
__name__
__module____qualname____doc__r,   r;   r>   rL   rT   rX   r       r+   r   r      s    $ "&%)*158<,;,; ,; #	,;
 ,; ,; /,; 6,;\
O?(;$;rb   r   )
__future__r   loggingtypingr   r   r   r   r   r	   langchain_core.documentsr
   )langchain_community.document_loaders.baser   1langchain_community.document_loaders.blob_loadersr   r   r   	getLoggerr^   loggerr   r   rb   r+   <module>rk      s@    "  F F - D B6			8	$w; w;rb   