
    f3fi                       U d Z ddlmZ ddlZddlZddlZddlZddlmZ ddl	m
Z
mZmZmZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ e
rddlmZmZmZmZm Z m!Z!  ejD                  d      Z# ed      Z$d+dZ%da&de'd<   d,dZ(	 	 	 	 	 	 d-dZ)	 	 	 	 	 	 d.dZ*d/dZ+d0dZ,	 	 	 	 d1dZ-	 	 	 	 d2dZ. G d de      Z/	 	 	 	 	 	 d3dZ0	 	 	 	 	 	 d4dZ1 G d d      Z2	 	 	 	 	 	 d5d Z3 G d! d"e      Z4d#ddd$dd%dd&	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d6d'Z5d7d(Z6	 	 	 	 	 	 d5d)Z7d#ddd$dd%dd&	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d8d*Z8y)9z@Module contains logic for indexing documents into vector stores.    )annotationsN)islice)TYPE_CHECKINGAnyLiteral	TypedDictTypeVarcast)
BaseLoader)Document)LangChainException)DocumentIndexRecordManager)VectorStore)AsyncIterableAsyncIteratorCallableIterableIteratorSequencei  )intTc                    t        j                  | j                  d      d      j                         }t	        t        j                  t        |            S )z3Hashes a string and returns the corresponding UUID.utf-8Fusedforsecurity)hashlibsha1encode	hexdigeststruuiduuid5NAMESPACE_UUID)input_string
hash_values     Y/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_core/indexing/api.py_hash_string_to_uuidr(   ,   sB    G$eik  tzz.*566    Fbool_WARNED_ABOUT_SHA1c                 N    t         st        j                  dt        d       da yy)z9Emit a one-time warning about SHA-1 collision weaknesses.aU  Using SHA-1 for document hashing. SHA-1 is *not* collision-resistant; a motivated attacker can construct distinct inputs that map to the same fingerprint. If this matters in your threat model, switch to a stronger algorithm such as 'blake2b', 'sha256', or 'sha512' by specifying  `key_encoder` parameter in the `index` or `aindex` function.    )category
stacklevelTN)r+   warningswarnUserWarning r)   r'   _warn_about_sha1r4   7   s.     M !		
 " r)   c               l    |dk(  r
t                t        | |      }t        j                  t        |      S )zKHash *input_string* to a deterministic UUID using the configured algorithm.r   )r4   _calculate_hashr"   r#   r$   )r%   	algorithmr&   s      r'   _hash_stringr8   I   s/     F y9J::nj11r)   c               J    t        j                  | d      }t        ||      S )zBHash a nested dictionary to a UUID using the configured algorithm.T	sort_keysr7   )jsondumpsr8   )datar7   serialized_datas      r'   _hash_nested_dictrA   S   s!     jj6O9==r)   c              #  \   K   t        |      }	 t        t        ||             }|sy| wzUtility batching function.N)iterlistr   )sizeiterableitchunks       r'   _batchrJ   [   s3     	hB
VB%&	 s   *,c                  K   g }|2 3 d{   }t        |      | k  r|j                  |       t        |      | k\  s7| g }?7 :6 |r| yywrC   )lenappend)rF   rG   batchelements       r'   _abatchrP   e   s_     E!  gu:LL!u:KE  s*   AAAA.AAA
Ac                      d S t         t              r fdS t               r S d  dt                d}t	        |      )z$Get the source id from the document.c                     y Nr3   )_docs    r'   <lambda>z)_get_source_id_assigner.<locals>.<lambda>y   s    r)   c                "    | j                      S rS   )metadata)docsource_id_keys    r'   rU   z)_get_source_id_assigner.<locals>.<lambda>{   s    3<<6 r)   zAsource_id_key should be either None, a string or a callable. Got z	 of type .)
isinstancer!   callabletype
ValueError)rY   msgs   ` r'   _get_source_id_assignerr`   t   s]       -%66oYtM':&;1	>  S/r)   c              #     K   t               }| D ]:  }|j                  |vs|j                  t        d|j                               | < yw)z>Deduplicate a list of hashed documents while preserving order.r!   N)setidaddr
   )hashed_documentsseen
hashed_docs      r'   _deduplicate_in_orderrh      sI      UD& 
==$ HHT%/0s
   A,Ac                      e Zd ZdZy)IndexingExceptionz(Raised when an indexing operation fails.N)__name__
__module____qualname____doc__r3   r)   r'   rj   rj      s    2r)   rj   c                $   |dk(  rWt        j                  | j                  d      d      j                         }t	        t        j                  t        |            S |dk(  r2t        j                  | j                  d            j                         S |dk(  r2t        j                  | j                  d            j                         S |dk(  r2t        j                  | j                  d            j                         S d| }t        |      )	z8Return a hexadecimal digest of *text* using *algorithm*.r   r   Fr   blake2bsha256sha512zUnsupported hashing algorithm: )r   r   r   r    r!   r"   r#   r$   rp   rq   rr   r^   )textr7   digestr_   s       r'   r6   r6      s     Fdkk'2EJTTV4::nf566It{{734>>@@H~~dkk'23==??H~~dkk'23==??+I;
7C
S/r)   c               ~   t        | j                  xs i       }t        |      r	 ||       }nLt        | j                  |      }	 t        j                  |d      }t        ||      }t        ||z   |      }t        || j                  | j                        S # t        $ r}d| d}t        |      |d}~ww xY w)a
  Calculate a hash of the document, and assign it to the uid.

    When using one of the predefined hashing algorithms, the hash is calculated
    by hashing the content and the metadata of the document.

    Args:
        document: Document to hash.
        key_encoder: Hashing algorithm to use for hashing the document.
            If not provided, a default encoder using SHA-1 will be used.
            SHA-1 is not collision-resistant, and a motivated attacker
            could craft two different texts that hash to the
            same cache key.

            New applications should use one of the alternative encoders
            or provide a custom and strong key encoder function to avoid this risk.

            When changing the key encoder, you must change the
            index as well to avoid duplicated documents in the cache.

    Raises:
        ValueError: If the metadata cannot be serialized using json.

    Returns:
        Document with a unique identifier based on the hash of the content and metadata.
    r<   Tr:   zFailed to hash metadata: z6. Please use a dict that can be serialized using json.N)rc   page_contentrW   )
dictrW   r\   r6   rv   r=   r>   	Exceptionr^   r   )	documentkey_encoderrW   hash_content_hashserialized_metaer_   metadata_hashs	            r'   _get_document_with_hashr      s    >  $H$5$5$;<HH% 'x'<'<T	)"jjTBO (;O} <T**""	   	)+A3 /G H  S/q(	)s   B 	B<%B77B<c                      e Zd ZddZy)_HashedDocumentc                    d}t        |      )z-Raise an error if this class is instantiated.z_HashedDocument is an internal abstraction that was deprecated in  langchain-core 0.3.63. This abstraction is marked as private and  should not have been used directly. If you are seeing this error, please  update your code appropriately.)NotImplementedError)selfargskwargsr_   s       r'   __init__z_HashedDocument.__init__   s    / 	 "#&&r)   N)r   r   r   r   returnNone)rk   rl   rm   r   r3   r)   r'   r   r      s    'r)   r   c                   t        | t              r&| j                  |      }||du rd}t        |      yyt        | t              r,| j                  |      }d|v r|d   dkD  rd}t        |      yydt        |        d}t        |      )	ax  Delete documents from a vector store or document index by their IDs.

    Args:
        vector_store: The vector store or document index to delete from.
        ids: List of document IDs to delete.

    Raises:
        IndexingException: If the delete operation fails.
        TypeError: If the `vector_store` is neither a `VectorStore` nor a
            `DocumentIndex`.
    NF+The delete operation to VectorStore failed.
num_failedr   -The delete operation to DocumentIndex failed.CVectorstore should be either a VectorStore or a DocumentIndex. Got rZ   )r[   r   deleterj   r   r]   	TypeErrorvector_storeids	delete_okr_   delete_responses        r'   _deleter      s     ,, '',	 Y%%7?C#C(( &8  
L-	0&--c2?*|/Lq/PAC#C(( 0Q*
%&a) 	 nr)   c                  @    e Zd ZU dZded<   	 ded<   	 ded<   	 ded<   y)IndexingResultzFReturn a detailed a breakdown of the result of the indexing operation.r   	num_addednum_updatednum_deletednum_skippedN)rk   rl   rm   rn   __annotations__r3   r)   r'   r   r     s(    PN$G&Kr)   r   d   i  r   )
batch_sizecleanuprY   cleanup_batch_sizeforce_updaterz   upsert_kwargsc               	   |dk(  r
t                |dvrd| d}
t        |
      |dv r|d}
t        |
      |}t        |t              r]dd	g}|D ]!  }t	        ||      rd
| d| }
t        |
       t        |      j                  t        j                  k(  r8d}
t        |
      t        |t              rndt        |       d}
t        |
      t        | t              r	 | j                         }nt        |       }t        |      }|j                         }d}d}d}d}t!               }t#        ||      D ]  }t%        |      }t'        t)        |D cg c]  }t+        ||       c}            }||t%        |      z
  z  }|D cg c]
  } ||       }}|dv rYt-        ||d      D ]<  \  }}|d|j.                  dd  d}
t        |
      |dk(  s,|j1                  |       > t3        d|      }|j5                  t3        d|D cg c]  }|j6                   c}            }g }g }g } t!               }!t-        ||d      D ]e  \  }}"t3        d|j6                        }#|"r&|r|!j1                  |#       n| j9                  |#       D|j9                  |#       |j9                  |       g | r!|j;                  | |       |t%        |       z  }|rzt        |t              r |j<                  |f||d|	xs i  n't        |t              r |j>                  |fi |	xs i  |t%        |      t%        |!      z
  z  }|t%        |!      z  }|j;                  t3        d|D cg c]  }|j6                   c}      ||       |dk(  sT|D ]  }|d}
tA        |
       t3        d|      }$|jC                  |$||      x}%stE        ||%       |jG                  |%       |t%        |%      z  }|jC                  |$||      x}%rB |dk(  s|dk(  rl|rjd}&|dk(  rt'        |      }&|jC                  |&||      x}%rBtE        ||%       |jG                  |%       |t%        |%      z  }|jC                  |&||      x}%rB||||dS # t        $ r t        | j                               }Y w xY wc c}w c c}w c c}w c c}w ) aj  Index data from the loader into the vector store.

    Indexing functionality uses a manager to keep track of which documents
    are in the vector store.

    This allows us to keep track of which documents were updated, and which
    documents were deleted, which documents should be skipped.

    For the time being, documents are indexed using their hashes, and users
    are not able to specify the uid of the document.

    !!! warning "Behavior changed in `langchain-core` 0.3.25"

        Added `scoped_full` cleanup mode.

    !!! warning

        * In full mode, the loader should be returning
            the entire dataset, and not just a subset of the dataset.
            Otherwise, the auto_cleanup will remove documents that it is not
            supposed to.
        * In incremental mode, if documents associated with a particular
            source id appear across different batches, the indexing API
            will do some redundant work. This will still result in the
            correct end state of the index, but will unfortunately not be
            100% efficient. For example, if a given document is split into 15
            chunks, and we index them using a batch size of 5, we'll have 3 batches
            all with the same source id. In general, to avoid doing too much
            redundant work select as big a batch size as possible.
        * The `scoped_full` mode is suitable if determining an appropriate batch size
            is challenging or if your data loader cannot return the entire dataset at
            once. This mode keeps track of source IDs in memory, which should be fine
            for most use cases. If your dataset is large (10M+ docs), you will likely
            need to parallelize the indexing process regardless.

    Args:
        docs_source: Data loader or iterable of documents to index.
        record_manager: Timestamped set to keep track of which documents were
            updated.
        vector_store: `VectorStore` or DocumentIndex to index the documents into.
        batch_size: Batch size to use when indexing.
        cleanup: How to handle clean up of documents.

            - incremental: Cleans up all documents that haven't been updated AND
                that are associated with source IDs that were seen during indexing.
                Clean up is done continuously during indexing helping to minimize the
                probability of users seeing duplicated content.
            - full: Delete all documents that have not been returned by the loader
                during this run of indexing.
                Clean up runs after all documents have been indexed.
                This means that users may see duplicated content during indexing.
            - scoped_full: Similar to Full, but only deletes all documents
                that haven't been updated AND that are associated with
                source IDs that were seen during indexing.
            - None: Do not delete any documents.
        source_id_key: Optional key that helps identify the original source
            of the document.
        cleanup_batch_size: Batch size to use when cleaning up documents.
        force_update: Force update documents even if they are present in the
            record manager. Useful if you are re-indexing with updated embeddings.
        key_encoder: Hashing algorithm to use for hashing the document content and
            metadata. Options include "blake2b", "sha256", and "sha512".

            !!! version-added "Added in `langchain-core` 0.3.66"

        key_encoder: Hashing algorithm to use for hashing the document.
            If not provided, a default encoder using SHA-1 will be used.
            SHA-1 is not collision-resistant, and a motivated attacker
            could craft two different texts that hash to the
            same cache key.

            New applications should use one of the alternative encoders
            or provide a custom and strong key encoder function to avoid this risk.

            When changing the key encoder, you must change the
            index as well to avoid duplicated documents in the cache.
        upsert_kwargs: Additional keyword arguments to pass to the add_documents
            method of the `VectorStore` or the upsert method of the DocumentIndex.
            For example, you can use this to specify a custom vector_field:
            upsert_kwargs={"vector_field": "embedding"}
            !!! version-added "Added in `langchain-core` 0.3.10"

    Returns:
        Indexing result which contains information about how many documents
        were added, updated, deleted, or skipped.

    Raises:
        ValueError: If cleanup mode is not one of 'incremental', 'full' or None
        ValueError: If cleanup mode is incremental and source_id_key is None.
        ValueError: If `VectorStore` does not have
            "delete" and "add_documents" required methods.
        ValueError: If source_id_key is not None, but is not a string or callable.
        TypeError: If `vectorstore` is not a `VectorStore` or a DocumentIndex.
        AssertionError: If `source_id` is None when cleanup mode is incremental.
            (should be unreachable code).
    r   >   Nfullincrementalscoped_fullKcleanup should be one of 'incremental', 'full', 'scoped_full' or None. Got rZ   >   r   r   NJSource id key is required when cleanup mode is incremental or scoped_full.r   add_documentsVectorstore  does not have required method z1Vectorstore has not implemented the delete methodr   r   rz   FstrictlSource IDs are required when cleanup mode is incremental or scoped_full. Document that starts with content: r    was not assigned as source id.r   Sequence[str]r!   time_at_leastr   r   	group_idsr   r   Asource_id cannot be None at this point. Reached unreachable code.r   beforelimitr   r   r   r   r   )$r4   r^   r[   r   hasattrr]   r   r   r   r   	lazy_loadr   rD   loadr`   get_timerb   rJ   rL   rE   rh   r   ziprv   rd   r
   existsrc   rM   updater   upsertAssertionError	list_keysr   delete_keys)'docs_sourcerecord_managerr   r   r   rY   r   r   rz   r   r_   destinationmethodsmethoddoc_iteratorsource_id_assignerindex_start_dtr   r   r   r   scoped_full_cleanup_source_ids	doc_batchoriginal_batch_sizerX   hashed_docsrg   
source_ids	source_idexists_batchuidsdocs_to_indexuids_to_refresh	seen_docs
doc_exists	hashed_idsource_ids_uids_to_deletedelete_group_idss'                                          r'   indexr   "  s   ` fBB)1 	 o11}7LX 	 oK +{+_- 	&F;/";-/NvhW  !o%	& ##{'9'99 FCS/!	K	/$%Q( 	 n+z*	4&002L K(0? $,,.NIKKK/2u"J5 o3	!)n!  ) ,C[I
 	*S-=== >I,
/9z*,

 ,
 44),ZU)S B%	:$$ %/$;$;DS$A#B C9:  %S/)m+266yAB oz:J%,,["Ac366"AB

 !e	&)+|E&R 		-"J
UJMM2IMM),#**95KK	"  ,		- !!/!P3//K +{3)))!) %*	 K7"""!$*
 ]+c)n<<I3y>)K
 	["Ac366"AB ( 	 	
 m# ( .	$4  )--. 
;K$2$<$<%nDV %= % .  ^4**>:s>22 %3$<$<%nDV %= % . Qo3b &= %C15m##$BC . 8 8&~EW !9 !
 
n 
 K0&&~63~..K !/ 8 8&~EW !9 !
 
n 
 """	 _ # 	4 0 0 23L	4*,
, #B^ #Bs*   #R R1	R6R;$S "R.-R.c               $   K   | D ]  }| 	 yw)z)Convert an iterable to an async iterator.Nr3   )iteratoritems     r'   _to_async_iteratorr   Y  s      
s   c                J  K   t        | t              r.| j                  |       d {   }||du rd}t        |      y y t        | t              r4| j                  |       d {   }d|v r|d   dkD  rd}t        |      y y dt        |        d}t        |      7 w7 ;w)NFr   r   r   r   r   rZ   )r[   r   adeleterj   r   r]   r   r   s        r'   _adeleter   _  s      ,,&..s33	 Y%%7?C#C(( &8  
L-	0 , 4 4S 99?*|/Lq/PAC#C(( 0Q*
%&a) 	 n 4
 :s!   %B#B=B#%B!&:B#!B#c               &  K   |dk(  r
t                |dvrd| d}
t        |
      |dv r|d}
t        |
      |}t        |t              rdd	g}|D ]!  }t	        ||      rd
| d| }
t        |
       t        |      j                  t        j                  k(  r^t        |      j                  t        j                  k(  r8d}
t        |
      t        |t              rndt        |       d}
t        |
      t        | t              r	 | j                         }nt	        | d      r| }nt        |       }t        |      }|j!                          d{   }d}d}d}d}t#               }t%        ||      2 3 d{   }t'        |      }t)        t+        |D cg c]  }t-        ||       c}            }||t'        |      z
  z  }|D cg c]
  } ||       }}|dv rYt/        ||d      D ]<  \  }}|d|j0                  dd  d}
t        |
      |dk(  s,|j3                  |       > t5        d|      }|j7                  t5        d|D cg c]  }|j8                   c}             d{   }g }g }g } t#               }!t/        ||d      D ]e  \  }}"t5        d|j8                        }#|"r&|r|!j3                  |#       n| j;                  |#       D|j;                  |#       |j;                  |       g | r)|j=                  | |       d{    |t'        |       z  }|rt        |t              r# |j>                  |f||d|	xs i  d{    n/t        |t              r |j@                  |fi |	xs i  d{    |t'        |      t'        |!      z
  z  }|t'        |!      z  }|j=                  t5        d|D cg c]  }|j8                   c}      ||       d{    |dk(  s|D ]  }|d}
tC        |
       t5        d|      }$|jE                  |$||       d{   x}%stG        ||%       d{    |jI                  |%       d{    |t'        |%      z  }|jE                  |$||       d{   x}%rZ# t        $ r t        | j                               }Y w xY w7 k7 Gc c}w c c}w c c}w 7 c7 7 7 Wc c}w 7 7 7 7 7 `6 |dk(  s|dk(  r|rd}&|dk(  rt)        |      }&|jE                  |&||       d{  7  x}%r]tG        ||%       d{  7   |jI                  |%       d{  7   |t'        |%      z  }|jE                  |&||       d{  7  x}%r]||||d S w)!a  Async index data from the loader into the vector store.

    Indexing functionality uses a manager to keep track of which documents
    are in the vector store.

    This allows us to keep track of which documents were updated, and which
    documents were deleted, which documents should be skipped.

    For the time being, documents are indexed using their hashes, and users
    are not able to specify the uid of the document.

    !!! warning "Behavior changed in `langchain-core` 0.3.25"

        Added `scoped_full` cleanup mode.

    !!! warning

        * In full mode, the loader should be returning
            the entire dataset, and not just a subset of the dataset.
            Otherwise, the auto_cleanup will remove documents that it is not
            supposed to.
        * In incremental mode, if documents associated with a particular
            source id appear across different batches, the indexing API
            will do some redundant work. This will still result in the
            correct end state of the index, but will unfortunately not be
            100% efficient. For example, if a given document is split into 15
            chunks, and we index them using a batch size of 5, we'll have 3 batches
            all with the same source id. In general, to avoid doing too much
            redundant work select as big a batch size as possible.
        * The `scoped_full` mode is suitable if determining an appropriate batch size
            is challenging or if your data loader cannot return the entire dataset at
            once. This mode keeps track of source IDs in memory, which should be fine
            for most use cases. If your dataset is large (10M+ docs), you will likely
            need to parallelize the indexing process regardless.

    Args:
        docs_source: Data loader or iterable of documents to index.
        record_manager: Timestamped set to keep track of which documents were
            updated.
        vector_store: `VectorStore` or DocumentIndex to index the documents into.
        batch_size: Batch size to use when indexing.
        cleanup: How to handle clean up of documents.

            - incremental: Cleans up all documents that haven't been updated AND
                that are associated with source IDs that were seen during indexing.
                Clean up is done continuously during indexing helping to minimize the
                probability of users seeing duplicated content.
            - full: Delete all documents that have not been returned by the loader
                during this run of indexing.
                Clean up runs after all documents have been indexed.
                This means that users may see duplicated content during indexing.
            - scoped_full: Similar to Full, but only deletes all documents
                that haven't been updated AND that are associated with
                source IDs that were seen during indexing.
            - None: Do not delete any documents.
        source_id_key: Optional key that helps identify the original source
            of the document.
        cleanup_batch_size: Batch size to use when cleaning up documents.
        force_update: Force update documents even if they are present in the
            record manager. Useful if you are re-indexing with updated embeddings.
        key_encoder: Hashing algorithm to use for hashing the document content and
            metadata. Options include "blake2b", "sha256", and "sha512".

            !!! version-added "Added in `langchain-core` 0.3.66"

        key_encoder: Hashing algorithm to use for hashing the document.
            If not provided, a default encoder using SHA-1 will be used.
            SHA-1 is not collision-resistant, and a motivated attacker
            could craft two different texts that hash to the
            same cache key.

            New applications should use one of the alternative encoders
            or provide a custom and strong key encoder function to avoid this risk.

            When changing the key encoder, you must change the
            index as well to avoid duplicated documents in the cache.
        upsert_kwargs: Additional keyword arguments to pass to the add_documents
            method of the `VectorStore` or the upsert method of the DocumentIndex.
            For example, you can use this to specify a custom vector_field:
            upsert_kwargs={"vector_field": "embedding"}
            !!! version-added "Added in `langchain-core` 0.3.10"

    Returns:
        Indexing result which contains information about how many documents
        were added, updated, deleted, or skipped.

    Raises:
        ValueError: If cleanup mode is not one of 'incremental', 'full' or None
        ValueError: If cleanup mode is incremental and source_id_key is None.
        ValueError: If `VectorStore` does not have
            "adelete" and "aadd_documents" required methods.
        ValueError: If source_id_key is not None, but is not a string or callable.
        TypeError: If `vector_store` is not a `VectorStore` or DocumentIndex.
        AssertionError: If `source_id_key` is None when cleanup mode is
            incremental or `scoped_full` (should be unreachable).
    r   >   Nr   r   r   r   rZ   >   r   r   Nr   r   aadd_documentsr   r   z<Vectorstore has not implemented the adelete or delete methodr   	__aiter__r   r   Fr   r   r   r   r   r   r!   r   r   r   r   r   r   r   r   )%r4   r^   r[   r   r   r]   r   r   r   r   r   
alazy_loadr   r   r   r`   	aget_timerb   rP   rL   rE   rh   r   r   rv   rd   r
   aexistsrc   rM   aupdater   aupsertr   
alist_keysr   adelete_keys)'r   r   r   r   r   rY   r   r   rz   r   r_   r   r   r   async_doc_iteratorr   r   r   r   r   r   r   r   r   rX   r   r   r   rg   r   r   r   r   r   r   r   r   r   r   s'                                          r'   aindexr   u  s    ` fBB)1 	 o11}7LX 	 oK +{+ ./ 	&F;/";-/NvhW  !o%	& %%)<)<<[!((K,>,>> QCS/!	K	/$%Q( 	 n+z*	H!,!7!7!9 
k	*(/<0? *3355NIKKK/2u"":/AB p3 p3i!)n!  ) ,C[I
 	*S-=== 0;,
(+s#,

 ,
 44),ZU)S B%	:$$ %/$;$;DS$A#B C9:  %S/)m+266yAB oz:J+33["Ac366"AB
 

 (*!e	&)+|E&R 		-"J
UJMM2IMM),#**95KK	"  ,		-  (((WWW3//K +{30k00!) %*	   K7)k))!$*   ]+c)n<<I3y>)K
 $$["Ac366"AB ( % 
 	
 	
 m#
 ( .	$4  )--. 
;K*8*C*C%nDV +D + % .  {N;;;$11.AAAs>22 +9*C*C%nDV +D + % .  # 	H "4K4D4D4F!G	H 6p3,
, #B
, X #B	
.% <A%S Cd &= %C15m##$BC&4&?&?&~EW '@ '
 !
 !
 
n 
 ;777 --n===3~..K '5&?&?&~EW '@ '
 !
 !
 
n 
 """	 s  AV"B(VR 9VR?"V7S/;S<S/?VS- VS
?V4VS#V1S2B"VSAVS/VSAVS V,S%-	V8V 1V1S'2V:V	S)
V"S+#)VS-VV"R<8V;R<<VS/VVVV V'V)V+V-V/6V%T(&V<T?=VU*VVV
V)r%   r!   r   r!   )r   r   )r%   r!   r7   .Literal['sha1', 'sha256', 'sha512', 'blake2b']r   	uuid.UUID)r?   zdict[Any, Any]r7   r   r   r   )rF   r   rG   Iterable[T]r   zIterator[list[T]])rF   r   rG   zAsyncIterable[T]r   zAsyncIterator[list[T]])rY   &str | Callable[[Document], str] | Noner   z Callable[[Document], str | None])re   zIterable[Document]r   zIterator[Document])rs   r!   r7   r   r   r!   )ry   r   rz   zJCallable[[Document], str] | Literal['sha1', 'sha256', 'sha512', 'blake2b']r   r   )r   VectorStore | DocumentIndexr   z	list[str]r   r   )r   zBaseLoader | Iterable[Document]r   r   r   r   r   r   r   4Literal['incremental', 'full', 'scoped_full'] | NonerY   r   r   r   r   r*   rz   JLiteral['sha1', 'sha256', 'sha512', 'blake2b'] | Callable[[Document], str]r   dict[str, Any] | Noner   r   )r   r   r   zAsyncIterator[T])r   z9BaseLoader | Iterable[Document] | AsyncIterator[Document]r   r   r   r   r   r   r   r   rY   r   r   r   r   r*   rz   r   r   r   r   r   )9rn   
__future__r   r   r=   r"   r0   	itertoolsr   typingr   r   r   r   r	   r
   $langchain_core.document_loaders.baser   langchain_core.documentsr   langchain_core.exceptionsr   langchain_core.indexing.baser   r   langchain_core.vectorstoresr   collections.abcr   r   r   r   r   r   UUIDr$   r   r(   r+   r   r4   r8   rA   rJ   rP   r`   rh   rj   r6   r   r   r   r   r   r   r   r   r3   r)   r'   <module>r     s   F "       < - 8 E 3  t$ CL7 ! D  "$22%S22>
>(V>>9%"(3* 3
H$7757
 7x	' 	'-	 
H
LY 
L$ DH<@#"(+/s0s!s .s
 s Bs :s s s s )s sn	-	 
6 DH<@#"(+/J! .
  B :    ) r)   