
    f3fi[+                        d Z ddlmZ ddlZddlZddlmZmZ ddlm	Z	m
Z
 ddlmZmZmZmZ ddlmZmZmZ ddlmZ erdd	lmZ ee
z  Z G d
 de      Z G d de      Z G d de      Zy)aD  Base classes for media and documents.

This module contains core abstractions for **data retrieval and processing workflows**:

- `BaseMedia`: Base class providing `id` and `metadata` fields
- `Blob`: Raw data loading (files, binary data) - used by document loaders
- `Document`: Text content for retrieval (RAG, vector stores, semantic search)

!!! note "Not for LLM chat messages"
    These classes are for data processing pipelines, not LLM I/O. For multimodal
    content in chat messages (images, audio in conversations), see
    `langchain.messages` content blocks instead.
    )annotationsN)BufferedReaderBytesIO)PathPurePath)TYPE_CHECKINGAnyLiteralcast)
ConfigDictFieldmodel_validator)Serializable)	Generatorc                  N    e Zd ZU dZ edd      Zded<   	  ee      Zded	<   y)
	BaseMediaaA  Base class for content used in retrieval and data processing workflows.

    Provides common fields for content that needs to be stored, indexed, or searched.

    !!! note
        For multimodal content in **chat messages** (images, audio sent to/from LLMs),
        use `langchain.messages` content blocks instead.
    NT)defaultcoerce_numbers_to_str
str | Noneid)default_factorydictmetadata)	__name__
__module____qualname____doc__r   r   __annotations__r   r        [/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_core/documents/base.pyr   r   !   s3     4tDB
D 40Hd09r    r   c                  H   e Zd ZU dZdZded<   	 dZded<   	 dZded	<   	 dZd
ed<   	  e	dd      Z
edd       Z ed      edd              ZddZddZej&                  dd       Zeddddd	 	 	 	 	 	 	 	 	 	 	 dd       Zeddddd	 	 	 	 	 	 	 	 	 	 	 d d       ZddZy)!Bloba  Raw data abstraction for document loading and file processing.

    Represents raw bytes or text, either in-memory or by file reference. Used
    primarily by document loaders to decouple data loading from parsing.

    Inspired by [Mozilla's `Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob)

    ???+ example "Initialize a blob from in-memory data"

        ```python
        from langchain_core.documents import Blob

        blob = Blob.from_data("Hello, world!")

        # Read the blob as a string
        print(blob.as_string())

        # Read the blob as bytes
        print(blob.as_bytes())

        # Read the blob as a byte stream
        with blob.as_bytes_io() as f:
            print(f.read())
        ```

    ??? example "Load from memory and specify MIME type and metadata"

        ```python
        from langchain_core.documents import Blob

        blob = Blob.from_data(
            data="Hello, world!",
            mime_type="text/plain",
            metadata={"source": "https://example.com"},
        )
        ```

    ??? example "Load the blob from a file"

        ```python
        from langchain_core.documents import Blob

        blob = Blob.from_path("path/to/file.txt")

        # Read the blob as a string
        print(blob.as_string())

        # Read the blob as bytes
        print(blob.as_bytes())

        # Read the blob as a byte stream
        with blob.as_bytes_io() as f:
            print(f.read())
        ```
    Nzbytes | str | Nonedatar   mimetypezutf-8strencodingzPathLike | NonepathT)arbitrary_types_allowedfrozenc                    | j                   r'd| j                   v rt        d| j                   d         S | j                  rt        | j                        S dS )a!  The source location of the blob as string if known otherwise none.

        If a path is associated with the `Blob`, it will default to the path location.

        Unless explicitly set via a metadata field called `'source'`, in which
        case that value will be used instead.
        sourcer   N)r   r   r(   r&   selfs    r!   r,   zBlob.source   sE     ==X6dmmH&=>>!%s499~44r    before)modec                0    d|vrd|vrd}t        |      |S )z,Verify that either data or path is provided.r$   r(   z$Either data or path must be provided)
ValueError)clsvaluesmsgs      r!   check_blob_is_validzBlob.check_blob_is_valid   s'     F&$88CS/!r    c                z   | j                   ;| j                  r/t        | j                        j                  | j                        S t        | j                   t              r%| j                   j                  | j                        S t        | j                   t              r| j                   S d|  }t        |      )zRead data as a string.

        Raises:
            ValueError: If the blob cannot be represented as a string.

        Returns:
            The data as a string.
        )r'   zUnable to get string for blob )
r$   r(   r   	read_textr'   
isinstancebytesdecoder&   r2   r.   r5   s     r!   	as_stringzBlob.as_string   s     99		?,,dmm,DDdii'99##DMM22dii%99.tf5or    c                b   t        | j                  t              r| j                  S t        | j                  t              r%| j                  j	                  | j
                        S | j                  /| j                  r#t        | j                        j                         S d|  }t        |      )zRead data as bytes.

        Raises:
            ValueError: If the blob cannot be represented as bytes.

        Returns:
            The data as bytes.
        zUnable to get bytes for blob )
r9   r$   r:   r&   encoder'   r(   r   
read_bytesr2   r<   s     r!   as_byteszBlob.as_bytes   s     dii'99dii%99##DMM2299		?--//-dV4or    c              #  :  K   t        | j                  t              rt        | j                         y| j                  >| j                  r2t        | j                        j                  d      5 }| ddd       yd|  }t        |      # 1 sw Y   yxY ww)zRead data as a byte stream.

        Raises:
            NotImplementedError: If the blob cannot be represented as a byte stream.

        Yields:
            The data as a byte stream.
        NrbzUnable to convert blob )r9   r$   r:   r   r(   r   openNotImplementedError)r.   fr5   s      r!   as_bytes_iozBlob.as_bytes_io   s      dii'$))$$YY499dii%%d+ q  ,D62C%c**	 s   A/B1B6BBB)r'   	mime_type
guess_typer   c               t    ||r|rt        j                  |      d   nd}n|} | d|||||      S i       S )a  Load the blob from a path like object.

        Args:
            path: Path-like object to file to be read
            encoding: Encoding to use if decoding the bytes into a string
            mime_type: If provided, will be set as the MIME type of the data
            guess_type: If `True`, the MIME type will be guessed from the file
                extension, if a MIME type was not provided
            metadata: Metadata to associate with the `Blob`

        Returns:
            `Blob` instance
        Nr   r$   r%   r'   r(   r   )	mimetypesrI   )r3   r(   r'   rH   rI   r   r%   s          r!   	from_pathzBlob.from_path   s\    . 8By++D1!4H H !)!5X
 	

 <>
 	
r    )r'   rH   r(   r   c               .     | ||||||      S i       S )a  Initialize the `Blob` from in-memory data.

        Args:
            data: The in-memory data associated with the `Blob`
            encoding: Encoding to use if decoding the bytes into a string
            mime_type: If provided, will be set as the MIME type of the data
            path: If provided, will be set as the source from which the data came
            metadata: Metadata to associate with the `Blob`

        Returns:
            `Blob` instance
        rK   r   )r3   r$   r'   rH   r(   r   s         r!   	from_datazBlob.from_data   s4    , !)!5X
 	

 <>
 	
r    c                ^    dt        |        }| j                  r|d| j                   z  }|S )zReturn the blob representation.zBlob  )r   r,   )r.   str_reprs     r!   __repr__zBlob.__repr__  s3    2d8*%;;!DKK=))Hr    )returnr   )r4   zdict[str, Any]rT   r	   rT   r&   )rT   r:   )rT   z/Generator[BytesIO | BufferedReader, None, None])r(   PathLiker'   r&   rH   r   rI   boolr   dict | NonerT   r#   )r$   zstr | bytesr'   r&   rH   r   r(   r   r   rX   rT   r#   )r   r   r   r   r$   r   r%   r'   r(   r   model_configpropertyr,   r   classmethodr6   r=   rA   
contextlibcontextmanagerrG   rM   rO   rS   r   r    r!   r#   r#   9   sj   6p  $D
#.Hj>Hc !D/ 8 $L
 
5 
5 (#  $$$ + +$ 
   $ $"
"
 	"

 "
 "
 "
 
"
 "
H 
   $ $

 	

 
 
 
 

 
:r    r#   c                  l     e Zd ZU dZded<   	 d Zded<   d
 fdZedd       Zedd       Z	dd	Z
 xZS )Documenta  Class for storing a piece of text and associated metadata.

    !!! note
        `Document` is for **retrieval workflows**, not chat I/O. For sending text
        to an LLM in a conversation, use message types from `langchain.messages`.

    Example:
        ```python
        from langchain_core.documents import Document

        document = Document(
            page_content="Hello, world!", metadata={"source": "https://example.com"}
        )
        ```
    r&   page_contentzLiteral['Document']typec                (    t        |   dd|i| y)z0Pass page_content in as positional or named arg.r`   Nr   )super__init__)r.   r`   kwargs	__class__s      r!   rd   zDocument.__init__0  s     	=l=f=r    c                     y)z,Return `True` as this class is serializable.Tr   r3   s    r!   is_lc_serializablezDocument.is_lc_serializable6  s     r    c                
    g dS )zuGet the namespace of the LangChain object.

        Returns:
            ["langchain", "schema", "document"]
        )	langchainschemadocumentr   rh   s    r!   get_lc_namespacezDocument.get_lc_namespace;  s
     32r    c                r    | j                   rd| j                   d| j                    S d| j                   dS )zOverride `__str__` to restrict it to page_content and metadata.

        Returns:
            A string representation of the `Document`.
        zpage_content='z' metadata=')r   r`   r-   s    r!   __str__zDocument.__str__D  s?     ==#D$5$5#6k$--QQ 1 12!44r    )r`   r&   re   r	   rT   None)rT   rW   )rT   z	list[str]rU   )r   r   r   r   r   ra   rd   r[   ri   rn   rq   __classcell__)rf   s   @r!   r_   r_     sP       *D
*>   3 35r    r_   )r   
__future__r   r\   rL   ior   r   pathlibr   r   typingr   r	   r
   r   pydanticr   r   r    langchain_core.load.serializabler   collections.abcr   r&   rV   r   r#   r_   r   r    r!   <module>r{      s`    #   & " 4 4 7 7 9)>: :0_9 _D95y 95r    