
    f3fiW                    4   d Z ddlmZ ddlZddlZddlmZmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ erdd
lmZ ddlmZ  G d de      Z G d de
      Z G d de      Z G d de
      Z G d de
d      Z ed       G d de             Zy)zBase classes for indexing.    )annotationsN)ABCabstractmethod)TYPE_CHECKINGAny	TypedDict)override)beta)BaseRetriever)run_in_executor)Sequence)Documentc                  `   e Zd ZdZ	 	 	 	 ddZedd       Zedd       Zedd       Zedd       Z	eddd	 	 	 	 	 	 	 dd	       Z
eddd	 	 	 	 	 	 	 dd
       Zedd       Zedd       Zeddddd	 	 	 	 	 	 	 	 	 dd       Zeddddd	 	 	 	 	 	 	 	 	 dd       Zedd       Zedd       Zy)RecordManagera  Abstract base class representing the interface for a record manager.

    The record manager abstraction is used by the langchain indexing API.

    The record manager keeps track of which documents have been
    written into a `VectorStore` and when they were written.

    The indexing API computes hashes for each document and stores the hash
    together with the write time and the source id in the record manager.

    On subsequent indexing runs, the indexing API can check the record manager
    to determine which documents have already been indexed and which have not.

    This allows the indexing API to avoid re-indexing documents that have
    already been indexed, and to only index new documents.

    The main benefit of this abstraction is that it works across many vectorstores.
    To be supported, a `VectorStore` needs to only support the ability to add and
    delete documents by ID. Using the record manager, the indexing API will
    be able to delete outdated documents and avoid redundant indexing of documents
    that have already been indexed.

    The main constraints of this abstraction are:

    1. It relies on the time-stamps to determine which documents have been
        indexed and which have not. This means that the time-stamps must be
        monotonically increasing. The timestamp should be the timestamp
        as measured by the server to minimize issues.
    2. The record manager is currently implemented separately from the
        vectorstore, which means that the overall system becomes distributed
        and may create issues with consistency. For example, writing to
        record manager succeeds, but corresponding writing to `VectorStore` fails.
    c                    || _         y)zsInitialize the record manager.

        Args:
            namespace: The namespace for the record manager.
        N)	namespace)selfr   s     Z/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_core/indexing/base.py__init__zRecordManager.__init__9   s     #    c                     y)z2Create the database schema for the record manager.N r   s    r   create_schemazRecordManager.create_schemaD       r   c                   K   yw)zAAsynchronously create the database schema for the record manager.Nr   r   s    r   acreate_schemazRecordManager.acreate_schemaH           c                     y)a*  Get the current server time as a high resolution timestamp!

        It's important to get this from the server to ensure a monotonic clock,
        otherwise there may be data loss when cleaning up old documents!

        Returns:
            The current server time as a float timestamp.
        Nr   r   s    r   get_timezRecordManager.get_timeL   r   r   c                   K   yw)a9  Asynchronously get the current server time as a high resolution timestamp.

        It's important to get this from the server to ensure a monotonic clock,
        otherwise there may be data loss when cleaning up old documents!

        Returns:
            The current server time as a float timestamp.
        Nr   r   s    r   	aget_timezRecordManager.aget_timeW   r   r   N	group_idstime_at_leastc                    y)a  Upsert records into the database.

        Args:
            keys: A list of record keys to upsert.
            group_ids: A list of group IDs corresponding to the keys.
            time_at_least: Optional timestamp. Implementation can use this
                to optionally verify that the timestamp IS at least this time
                in the system that stores the data.

                e.g., use to validate that the time in the postgres database
                is equal to or larger than the given timestamp, if not
                raise an error.

                This is meant to help prevent time-drift issues since
                time may not be monotonically increasing!

        Raises:
            ValueError: If the length of keys doesn't match the length of group_ids.
        Nr   r   keysr%   r&   s       r   updatezRecordManager.updateb   r   r   c                  K   yw)a*  Asynchronously upsert records into the database.

        Args:
            keys: A list of record keys to upsert.
            group_ids: A list of group IDs corresponding to the keys.
            time_at_least: Optional timestamp. Implementation can use this
                to optionally verify that the timestamp IS at least this time
                in the system that stores the data.

                e.g., use to validate that the time in the postgres database
                is equal to or larger than the given timestamp, if not
                raise an error.

                This is meant to help prevent time-drift issues since
                time may not be monotonically increasing!

        Raises:
            ValueError: If the length of keys doesn't match the length of group_ids.
        Nr   r(   s       r   aupdatezRecordManager.aupdate~   r   r   c                     y)Check if the provided keys exist in the database.

        Args:
            keys: A list of keys to check.

        Returns:
            A list of boolean values indicating the existence of each key.
        Nr   r   r)   s     r   existszRecordManager.exists   r   r   c                   K   yw)zAsynchronously check if the provided keys exist in the database.

        Args:
            keys: A list of keys to check.

        Returns:
            A list of boolean values indicating the existence of each key.
        Nr   r/   s     r   aexistszRecordManager.aexists   r   r   beforeafterr%   limitc                    y)a  List records in the database based on the provided filters.

        Args:
            before: Filter to list records updated before this time.
            after: Filter to list records updated after this time.
            group_ids: Filter to list records with specific group IDs.
            limit: optional limit on the number of records to return.

        Returns:
            A list of keys for the matching records.
        Nr   r   r4   r5   r%   r6   s        r   	list_keyszRecordManager.list_keys   r   r   c                  K   yw)a  Asynchronously list records in the database based on the provided filters.

        Args:
            before: Filter to list records updated before this time.
            after: Filter to list records updated after this time.
            group_ids: Filter to list records with specific group IDs.
            limit: optional limit on the number of records to return.

        Returns:
            A list of keys for the matching records.
        Nr   r8   s        r   
alist_keyszRecordManager.alist_keys   r   r   c                     yzoDelete specified records from the database.

        Args:
            keys: A list of keys to delete.
        Nr   r/   s     r   delete_keyszRecordManager.delete_keys   r   r   c                   K   yw)z~Asynchronously delete specified records from the database.

        Args:
            keys: A list of keys to delete.
        Nr   r/   s     r   adelete_keyszRecordManager.adelete_keys   r   r   r   strreturnNonerC   rD   rC   floatr)   Sequence[str]r%   zSequence[str | None] | Noner&   float | NonerC   rD   r)   rI   rC   z
list[bool]
r4   rJ   r5   rJ   r%   zSequence[str] | Noner6   z
int | NonerC   	list[str]r)   rI   rC   rD   )__name__
__module____qualname____doc__r   r   r   r   r!   r#   r*   r,   r0   r2   r9   r;   r>   r@   r   r   r   r   r      s    D	#	# 
	# A A P P     
 26&* /	
 $ 
 6 
 26&* /	
 $ 
 6       $"*.   	
 (  
 (   $"*.   	
 (  
 (    r   r   c                  "    e Zd ZU ded<   ded<   y)_Recordz
str | Nonegroup_idrG   
updated_atN)rO   rP   rQ   __annotations__r   r   r   rT   rT      s    r   rT   c                       e Zd ZdZd fdZddZddZedd       Zedd       Z	ddd	 	 	 	 	 	 	 dd	Z
ddd	 	 	 	 	 	 	 dd
ZddZddZddddd	 	 	 	 	 	 	 	 	 ddZddddd	 	 	 	 	 	 	 	 	 ddZddZddZ xZS )InMemoryRecordManagerz1An in-memory record manager for testing purposes.c                @    t         |   |       i | _        || _        y)z}Initialize the in-memory record manager.

        Args:
            namespace: The namespace for the record manager.
        N)superr   recordsr   )r   r   	__class__s     r   r   zInMemoryRecordManager.__init__   s"     	# ,."r   c                     yzJIn-memory schema creation is simply ensuring the structure is initialized.Nr   r   s    r   r   z#InMemoryRecordManager.create_schema   r   r   c                   K   ywr_   r   r   s    r   r   z$InMemoryRecordManager.acreate_schema  r   r   c                *    t        j                          S N)timer   s    r   r!   zInMemoryRecordManager.get_time  s    yy{r   c                *   K   | j                         S wrb   )r!   r   s    r   r#   zInMemoryRecordManager.aget_time	  s     }}s   Nr$   c                  |r$t        |      t        |      k7  rd}t        |      t        |      D ]P  \  }}|r||   nd}|r || j                         kD  rd}t        |      || j                         d| j                  |<   R y)a\  Upsert records into the database.

        Args:
            keys: A list of record keys to upsert.
            group_ids: A list of group IDs corresponding to the keys.

            time_at_least: Optional timestamp. Implementation can use this
                to optionally verify that the timestamp IS at least this time
                in the system that stores.
                E.g., use to validate that the time in the postgres database
                is equal to or larger than the given timestamp, if not
                raise an error.
                This is meant to help prevent time-drift issues since
                time may not be monotonically increasing!

        Raises:
            ValueError: If the length of keys doesn't match the length of group
                ids.
            ValueError: If time_at_least is in the future.
        z-Length of keys must match length of group_idsNz!time_at_least must be in the past)rU   rV   )len
ValueError	enumerater!   r\   )r   r)   r%   r&   msgindexkeyrU   s           r   r*   zInMemoryRecordManager.update  s    6 Tc)n4ACS/!#D/ 	VJE3+4y'$H!@9 o%-5T]]_ UDLL	Vr   c               4   K   | j                  |||       yw)a  Async upsert records into the database.

        Args:
            keys: A list of record keys to upsert.
            group_ids: A list of group IDs corresponding to the keys.

            time_at_least: Optional timestamp. Implementation can use this
                to optionally verify that the timestamp IS at least this time
                in the system that stores.
                E.g., use to validate that the time in the postgres database
                is equal to or larger than the given timestamp, if not
                raise an error.
                This is meant to help prevent time-drift issues since
                time may not be monotonically increasing!
        r$   N)r*   r(   s       r   r,   zInMemoryRecordManager.aupdate2  s     , 	DI]K   c                D    |D cg c]  }|| j                   v  c}S c c}w )r.   r\   r   r)   rk   s      r   r0   zInMemoryRecordManager.existsJ  s!     044t||#444s   c                ,   K   | j                  |      S w)zAsync check if the provided keys exist in the database.

        Args:
            keys: A list of keys to check.

        Returns:
            A list of boolean values indicating the existence of each key.
        )r0   r/   s     r   r2   zInMemoryRecordManager.aexistsU  s      {{4  s   r3   c                   g }| j                   j                         D ]6  \  }}|r	|d   |k\  r|r	|d   |k  r|r|d   |vr&|j                  |       8 |r|d| S |S )a  List records in the database based on the provided filters.

        Args:
            before: Filter to list records updated before this time.

            after: Filter to list records updated after this time.

            group_ids: Filter to list records with specific group IDs.

            limit: optional limit on the number of records to return.


        Returns:
            A list of keys for the matching records.
        rV   rU   N)r\   itemsappend)r   r4   r5   r%   r6   resultrk   datas           r   r9   zInMemoryRecordManager.list_keys`  s    . ++- 	IC$|,6l+u4T*-Y>MM#	 &5>!r   c               4   K   | j                  ||||      S w)a  Async list records in the database based on the provided filters.

        Args:
            before: Filter to list records updated before this time.

            after: Filter to list records updated after this time.

            group_ids: Filter to list records with specific group IDs.

            limit: optional limit on the number of records to return.


        Returns:
            A list of keys for the matching records.
        r3   )r9   r8   s        r   r;   z InMemoryRecordManager.alist_keys  s'     . ~~)5  
 	
rm   c                J    |D ]  }|| j                   v s| j                   |=   yr=   ro   rp   s      r   r>   z!InMemoryRecordManager.delete_keys  s+      	&Cdll"LL%	&r   c                .   K   | j                  |       yw)zuAsync delete specified records from the database.

        Args:
            keys: A list of keys to delete.
        N)r>   r/   s     r   r@   z"InMemoryRecordManager.adelete_keys  s      	s   rA   rE   rF   rH   rK   rL   rN   )rO   rP   rQ   rR   r   r   r   r	   r!   r#   r*   r,   r0   r2   r9   r;   r>   r@   __classcell__)r]   s   @r   rY   rY      sO   ;
#YY     26&*#V#V /	#V
 $#V 
#VR 26&*LL /	L
 $L 
L0	5	!  $"*. " " 	"
 (" " 
"N  $"*. 
 
 	

 (
 
 

6&r   rY   c                  (    e Zd ZU dZded<   	 ded<   y)UpsertResponsea  A generic response for upsert operations.

    The upsert response will be used by abstractions that implement an upsert
    operation for content that can be upserted by ID.

    Upsert APIs that accept inputs with IDs and generate IDs internally
    will return a response that includes the IDs that succeeded and the IDs
    that failed.

    If there are no failures, the failed list will be empty, and the order
    of the IDs in the succeeded list will match the order of the input documents.

    If there are failures, the response becomes ill defined, and a user of the API
    cannot determine which generated ID corresponds to which input document.

    It is recommended for users explicitly attach the IDs to the items being
    indexed to avoid this issue.
    rM   	succeededfailedNrO   rP   rQ   rR   rW   r   r   r   r|   r|     s    & 1'r   r|   c                  @    e Zd ZU dZded<   	 ded<   	 ded<   	 ded<   y)	DeleteResponsezA generic response for delete operation.

    The fields in this response are optional and whether the `VectorStore`
    returns them or not is up to the implementation.
    intnum_deletedrI   r}   r~   
num_failedNr   r   r   r   r   r     s8        O8r   r   F)totalz6Added in 0.2.29. The abstraction is subject to change.)messagec                      e Zd ZdZej
                  d	d       Z	 	 	 	 	 	 d	dZej
                  d
dd       Z	 d
	 	 	 	 	 ddZ	ej
                  	 	 	 	 	 	 dd       Z
	 	 	 	 	 	 ddZy)DocumentIndexa  A document retriever that supports indexing operations.

    This indexing interface is designed to be a generic abstraction for storing and
    querying documents that has an ID and metadata associated with it.

    The interface is designed to be agnostic to the underlying implementation of the
    indexing system.

    The interface is designed to support the following operations:

    1. Storing document in the index.
    2. Fetching document by ID.
    3. Searching for document using a query.
    c                    y)a\  Upsert documents into the index.

        The upsert functionality should utilize the ID field of the content object
        if it is provided. If the ID is not provided, the upsert method is free
        to generate an ID for the content.

        When an ID is specified and the content already exists in the `VectorStore`,
        the upsert method should update the content with the new data. If the content
        does not exist, the upsert method should add the item to the `VectorStore`.

        Args:
            items: Sequence of documents to add to the `VectorStore`.
            **kwargs: Additional keyword arguments.

        Returns:
            A response object that contains the list of IDs that were
            successfully added or updated in the `VectorStore` and the list of IDs that
            failed to be added or updated.
        Nr   r   rs   kwargss      r   upsertzDocumentIndex.upsert  r   r   c               N   K   t        d| j                  |fi | d{   S 7 w)an  Add or update documents in the `VectorStore`. Async version of `upsert`.

        The upsert functionality should utilize the ID field of the item
        if it is provided. If the ID is not provided, the upsert method is free
        to generate an ID for the item.

        When an ID is specified and the item already exists in the `VectorStore`,
        the upsert method should update the item with the new data. If the item
        does not exist, the upsert method should add the item to the `VectorStore`.

        Args:
            items: Sequence of documents to add to the `VectorStore`.
            **kwargs: Additional keyword arguments.

        Returns:
            A response object that contains the list of IDs that were
            successfully added or updated in the `VectorStore` and the list of IDs that
            failed to be added or updated.
        N)r   r   r   s      r   aupsertzDocumentIndex.aupsert  s6     , %KK
 	
 
 	
 
   %#%Nc                     y)a6  Delete by IDs or other criteria.

        Calling delete without any input parameters should raise a ValueError!

        Args:
            ids: List of IDs to delete.
            **kwargs: Additional keyword arguments. This is up to the implementation.
                For example, can include an option to delete the entire index,
                or else issue a non-blocking delete etc.

        Returns:
            A response object that contains the list of IDs that were
            successfully deleted and the list of IDs that failed to be deleted.
        Nr   r   idsr   s      r   deletezDocumentIndex.delete4  r   r   c                N   K   t        d| j                  |fi | d{   S 7 w)a  Delete by IDs or other criteria. Async variant.

        Calling adelete without any input parameters should raise a ValueError!

        Args:
            ids: List of IDs to delete.
            **kwargs: Additional keyword arguments. This is up to the implementation.
                For example, can include an option to delete the entire index.

        Returns:
            A response object that contains the list of IDs that were
            successfully deleted and the list of IDs that failed to be deleted.
        N)r   r   r   s      r   adeletezDocumentIndex.adeleteE  s6       %KK
 	
 
 	
 
r   c                    ya  Get documents by id.

        Fewer documents may be returned than requested if some IDs are not found or
        if there are duplicated IDs.

        Users should not assume that the order of the returned documents matches
        the order of the input IDs. Instead, users should rely on the ID field of the
        returned documents.

        This method should **NOT** raise exceptions if no documents are found for
        some IDs.

        Args:
            ids: List of IDs to get.
            **kwargs: Additional keyword arguments. These are up to the implementation.

        Returns:
            List of documents that were found.
        Nr   r   s      r   getzDocumentIndex.get\  r   r   c               N   K   t        d| j                  |fi | d{   S 7 wr   )r   r   r   s      r   agetzDocumentIndex.agetw  s6     2 %HH
 	
 
 	
 
r   )rs   zSequence[Document]r   r   rC   r|   rb   )r   zlist[str] | Noner   r   rC   r   )r   rI   r   r   rC   zlist[Document])rO   rP   rQ   rR   abcr   r   r   r   r   r   r   r   r   r   r   r     s     	 *
'
69
	
: 	 " '+
#
69
	
. 	 	
 
 4

 	

 

r   r   )rR   
__future__r   r   rc   r   r   typingr   r   r   typing_extensionsr	   langchain_core._apir
   langchain_core.retrieversr   langchain_core.runnablesr   collections.abcr   langchain_core.documentsr   r   rT   rY   r|   r   r   r   r   r   <module>r      s      " 
  # 0 0 & $ 3 4(1RC Rji 
M D(Y (4!9Ye !9H FGd
M d
 Hd
r   