
    3fi                     J    d dl mZ d dlmZ d dlmZ d dlmZ  G d de      Zy)    )Iterator)Document)BaseBlobParser)Blobc                   &    e Zd ZdZdedee   fdZy)MsWordParserz/Parse the Microsoft Word documents from a blob.blobreturnc           	   #     K   	 ddl m} ddlm} ||d}|j
                  dvrt        d      |j                         5 } ||j
                     |      }d	j                  |D cg c]  }t        |       c}      }	d
|j                  i}
t        |	|
       ddd       y# t        $ r}t	        d      |d}~ww xY wc c}w # 1 sw Y   yxY ww)zParse a Microsoft Word document into the Document iterator.

        Args:
            blob: The blob to parse.

        Returns: An iterator of Documents.

        r   )partition_doc)partition_docxzNCould not import unstructured, please install with `pip install unstructured`.N)zapplication/mswordzGapplication/vnd.openxmlformats-officedocument.wordprocessingml.documentz0This blob type is not supported for this parser.)filez

source)page_contentmetadata)unstructured.partition.docr   unstructured.partition.docxr   ImportErrormimetype
ValueErroras_bytes_iojoinstrr   r   )selfr	   r   r   emime_type_parserword_documentelementseltextr   s              q/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_community/document_loaders/parsers/msword.py
lazy_parsezMsWordParser.lazy_parse   s     	@B #0
 == !
 
 OPP 	A=6'6MJH;;(;BB;<D $++.Hx@@		A 	A#  	! 	&  <	A 	AsP   CB# .C&C$C 
6$C	C#	B=,B88B==C CC
CN)__name__
__module____qualname____doc__r   r   r   r"        r!   r   r   	   s     9!At !A(: !Ar(   r   N)	typingr   langchain_core.documentsr   )langchain_community.document_loaders.baser   1langchain_community.document_loaders.blob_loadersr   r   r'   r(   r!   <module>r-      s     - D B$A> $Ar(   