
    3fiA                         d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ erd d	lmZ  G d
 de      Z G d de      Zy)    N)Path)TYPE_CHECKINGAnyDictIterableListOptionalTupleUnion)Document)
Embeddings)VectorStore)
ConfigDict)Self	neural_dbc                   *   e Zd ZU dZd)dZdZded<   	  ed      Ze	d*d	e
e   ddfd
       Ze	 d*d	e
e   dedefd       Ze	 d*deeef   d	e
e   defd       Ze	 d*dee   dede
ee      dedd f
d       Z	 d*dee   de
ee      dedee   fdZ	 	 d+deeedf      dedededee   f
dZdeeedf      ded   fdZdedeeef   ddfdZdee eef      ddfdZ!d ed!eddfd"Z"d#ee eef      ddfd$Z#	 d,ded%ededee$   fd&Z%d'eddfd(Z&y)-NeuralDBVectorStorea  Vectorstore that uses ThirdAI's NeuralDB.

    To use, you should have the ``thirdai[neural_db]`` python package installed.

    Example:
        .. code-block:: python

            from langchain_community.vectorstores import NeuralDBVectorStore
            from thirdai import neural_db as ndb

            db = ndb.NeuralDB()
            vectorstore = NeuralDBVectorStore(db=db)
    dbndb.NeuralDBreturnNc                     || _         y Nr   selfr   s     o/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_community/vectorstores/thirdai_neuraldb.py__init__zNeuralDBVectorStore.__init__    	        forbidextrathirdai_keyc                     	 ddl m} t        j                  j	                  d       |j                  | xs t        j                  d             y # t        $ r t        d      w xY w)Nr   )	licensingzthirdai.neural_dbTHIRDAI_KEYz{Could not import thirdai python package and neuraldb dependencies. Please install it with `pip install thirdai[neural_db]`.)	thirdair&   	importlibutil	find_specactivateosgetenvImportError)r$   r&   s     r   _verify_thirdai_libraryz+NeuralDBVectorStore._verify_thirdai_library*   s[    
	)NN$$%89{Fbii.FG 	K 	s   AA A%model_kwargsc                 j    t         j                  |       ddlm}  |  |j                  di |      S )a  
        Create a NeuralDBVectorStore from scratch.

        To use, set the ``THIRDAI_KEY`` environment variable with your ThirdAI
        API key, or pass ``thirdai_key`` as a named parameter.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import NeuralDBVectorStore

                vectorstore = NeuralDBVectorStore.from_scratch(
                    thirdai_key="your-thirdai-key",
                )

                vectorstore.insert([
                    "/path/to/doc.pdf",
                    "/path/to/doc.docx",
                    "/path/to/doc.csv",
                ])

                documents = vectorstore.similarity_search("AI-driven music therapy")
        r   r   r    )r   r0   r(   r   NeuralDB)clsr$   r1   ndbs       r   from_scratchz NeuralDBVectorStore.from_scratch8   s.    : 	33K@,lcll2\233r    
checkpointc                 |    t         j                  |       ddlm}  | |j                  j                  |            S )a:  
        Create a NeuralDBVectorStore with a base model from a saved checkpoint

        To use, set the ``THIRDAI_KEY`` environment variable with your ThirdAI
        API key, or pass ``thirdai_key`` as a named parameter.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import NeuralDBVectorStore

                vectorstore = NeuralDBVectorStore.from_checkpoint(
                    checkpoint="/path/to/checkpoint.ndb",
                    thirdai_key="your-thirdai-key",
                )

                vectorstore.insert([
                    "/path/to/doc.pdf",
                    "/path/to/doc.docx",
                    "/path/to/doc.csv",
                ])

                documents = vectorstore.similarity_search("AI-driven music therapy")
        r   r   r   )r   r0   r(   r   r4   from_checkpoint)r5   r8   r$   r6   s       r   r:   z#NeuralDBVectorStore.from_checkpointZ   s/    < 	33K@,cll22:>??r    texts	embedding	metadataskwargsc                 t    i }d|v r|d   |d<   |d=  | j                   di |} |j                  ||fi | |S )z9Return VectorStore initialized from texts and embeddings.r$   r3   )r7   	add_texts)r5   r;   r<   r=   r>   r1   vectorstores          r   
from_textszNeuralDBVectorStore.from_texts}   sZ     F"*0*?L'}%&c&&66eY9&9r    c                    ddl }ddlm} |j                  d|i      }|r.|j	                  ||j                  j                  |      gd      }t        j                  ddd	      }|j                  |        | j                   |j                  |j                        gfi |d   }| j                  j                  j                  j                  |      d   }	t!        t#        |            D 
cg c]  }
t%        |	|
z          c}
S c c}
w )
ar  Run more texts through the embeddings and add to the vectorstore.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            kwargs: vectorstore specific parameters

        Returns:
            List of ids from adding the texts into the vectorstore.
        r   Nr   r;      )axiswF)deletedelete_on_close)pandasr(   r   	DataFrameconcatfrom_recordstempfileNamedTemporaryFileto_csvinsertCSVnamer   _savable_state	documentsget_source_by_idrangelenstr)r   r;   r=   r>   pdr6   dftemp	source_idoffsetis              r   r@   zNeuralDBVectorStore.add_texts   s      	,\\7E*+B 9 9) DEANB**3ueT
		$DKK!3 4??B	''11BB9MaP).s5z):;AFQJ;;;s   0Dsourceszndb.Documenttrain	fast_modec                 d    | j                  |      } | j                  j                  d|||d|S )au  Inserts files / document sources into the vectorstore.

        Args:
            train: When True this means that the underlying model in the
            NeuralDB will undergo unsupervised pretraining on the inserted files.
            Defaults to True.
            fast_mode: Much faster insertion with a slight drop in performance.
            Defaults to True.
        )r_   r`   fast_approximationr3   )_preprocess_sourcesr   rP   )r   r_   r`   ra   r>   s        r   rP   zNeuralDBVectorStore.insert   sC      **73tww~~ 
(
 	
 	
r    c                    ddl m} |s|S g }|D ]  }t        |t              s|j	                  |       %|j                         j                  d      r"|j	                   |j                  |             f|j                         j                  d      r"|j	                   |j                  |             |j                         j                  d      r"|j	                   |j                  |             t        d| d       |S )zChecks if the provided sources are string paths. If they are, convert
        to NeuralDB document objects.

        Args:
            sources: list of either string paths to PDF, DOCX or CSV files, or
            NeuralDB document objects.
        r   r   z.pdfz.docxz.csvzCould not automatically load z. Only files with .pdf, .docx, or .csv extensions can be loaded automatically. For other formats, please use the appropriate document object from the ThirdAI library.)r(   r   
isinstancerX   appendlowerendswithPDFDOCXrQ   RuntimeError)r   r_   r6   preprocessed_sourcesdocs        r   rd   z'NeuralDBVectorStore._preprocess_sources   s     	-N! 	Cc3'$++C099;''/(//=YY[))'2(//>YY[))&1(//=&7u =P P 	" $#r    querydocument_idc                 N    | j                   j                  |t        |             y)a%  The vectorstore upweights the score of a document for a specific query.
        This is useful for fine-tuning the vectorstore to user behavior.

        Args:
            query: text to associate with `document_id`
            document_id: id of the document to associate query with.
        N)r   text_to_resultint)r   ro   rp   s      r   upvotezNeuralDBVectorStore.upvote   s     	uc+&67r    query_id_pairsc           
          | j                   j                  |D cg c]  \  }}|t        |      f c}}       yc c}}w )a  Given a batch of (query, document id) pairs, the vectorstore upweights
        the scores of the document for the corresponding queries.
        This is useful for fine-tuning the vectorstore to user behavior.

        Args:
            query_id_pairs: list of (query, document id) pairs. For each pair in
            this list, the model will upweight the document id for the query.
        N)r   text_to_result_batchrs   )r   ru   ro   doc_ids       r   upvote_batchz NeuralDBVectorStore.upvote_batch   s4     	$$7EFmeVeS[!F	
Fs   ;
sourcetargetc                 <    | j                   j                  ||       y)aA  The vectorstore associates a source phrase with a target phrase.
        When the vectorstore sees the source phrase, it will also consider results
        that are relevant to the target phrase.

        Args:
            source: text to associate to `target`.
            target: text to associate `source` to.
        N)r   	associate)r   rz   r{   s      r   r}   zNeuralDBVectorStore.associate   s     	&&)r    
text_pairsc                 :    | j                   j                  |       y)a0  Given a batch of (source, target) pairs, the vectorstore associates
        each source phrase with the corresponding target phrase.

        Args:
            text_pairs: list of (source, target) text pairs. For each pair in
            this list, the source will be associated with the target.
        N)r   associate_batch)r   r~   s     r   r   z#NeuralDBVectorStore.associate_batch  s     	
+r    kc                 f   	  | j                   j                  d||d|}|D cg c]a  }t        |j                  |j                  |j
                  |j                  |j                  |j                  |j                  d      d      c c}S c c}w # t        $ r}t        d|       |d}~ww xY w)Retrieve {k} contexts with for a given query

        Args:
            query: Query to submit to the model
            k: The max number of context results to retrieve. Defaults to 10.
        ro   top_krD   )id
upvote_idsrz   metadatascorecontextpage_contentr   "Error while retrieving documents: Nr3   )r   searchr   textr   r   rz   r   r   r   	Exception
ValueErrorr   ro   r   r>   
referencesrefes          r   similarity_searchz%NeuralDBVectorStore.similarity_search  s    	N'Ge1GGJ &  !$!ff&)nn"%**$'LL!$#&;;q>
    	NA!EFAM	Ns)   #B A&BB B 	B0B++B0pathc                 :    | j                   j                  |       y)zSaves a NeuralDB instance to disk. Can be loaded into memory by
        calling NeuralDB.from_checkpoint(path)

        Args:
            path: path on disk to save the NeuralDB instance to.
        N)r   save)r   r   s     r   r   zNeuralDBVectorStore.save,  s     	Tr    )r   r   r   Nr   )TT
   )'__name__
__module____qualname____doc__r   r   __annotations__r   model_configstaticmethodr	   rX   r0   classmethodr   r   r7   r   r   r:   r   r   dictrB   r   r@   listboolrP   rd   rs   rt   r
   ry   r}   r   r   r   r   r3   r    r   r   r      s    BL Xc] d    &*4c]4 4 
	4 4B  &* @#t)$ @ c] @ 
	 @  @D 
 +/	Cy  DJ'	
  
 & +/<}< DJ'< 	<
 
c<> 	
eC/01
 
 	

 
 
c
0 $E#~"567 $	n	 $D8C 8eCHo 8$ 8
4c3h+@ 
T 
	* 	*S 	*T 	*,$uS#X*? ,D , $&NN N14N	hN8  r    r   c            
           e Zd ZU dZddZdZded<   	  ed      Z	 dd	e	d
e
dedee   fdZdeee	ef      defdZdee	   ddfdZy)NeuralDBClientVectorStoreas  Vectorstore that uses ThirdAI's NeuralDB Enterprise Python Client for NeuralDBs.

    To use, you should have the ``thirdai[neural_db]`` python package installed.

    Example:
        .. code-block:: python

            from langchain_community.vectorstores import NeuralDBClientVectorStore
            from thirdai.neural_db import ModelBazaar, NeuralDBClient

            bazaar = ModelBazaar(base_url="http://{NEURAL_DB_ENTERPRISE_IP}/api/")
            bazaar.log_in(email="user@thirdai.com", password="1234")

            ndb_client = NeuralDBClient(
                deployment_identifier="user/model-0:user/deployment-0",
                base_url="http://{NEURAL_DB_ENTERPRISE_IP}/api/",
                bazaar=bazaar
            )
            vectorstore = NeuralDBClientVectorStore(db=ndb_client)
            retriever = vectorstore.as_retriever(search_kwargs={'k':5})

    r   ndb.NeuralDBClientr   Nc                     || _         y r   r   r   s     r   r   z"NeuralDBClientVectorStore.__init__N  r   r    r!   r"   ro   r   r>   c                     	  | j                   j                  d||d|d   }|D cg c]'  }t        |d   |d   |d   |d   |d   |d   d	      ) c}S c c}w # t        $ r}t	        d
|       |d}~ww xY w)r   r   r   r   r   rz   r   r   )r   rz   r   r   r   r   r   Nr3   )r   r   r   r   r   r   s          r   r   z+NeuralDBClientVectorStore.similarity_searchX  s    	N'Ge1GGUJ &  !$V!$i"%h-$'
O!$X#&y>	    	NA!EFAM	Ns(   &A ,AA A 	A9%A44A9rT   c                 8    | j                   j                  |      S )a  
        Inserts documents into the VectorStore and return the corresponding Sources.

        Args:
            documents (List[Dict[str, Any]]): A list of dictionaries that
            represent documents to be inserted to the VectorStores.
            The document dictionaries must be in the following format:
            {"document_type": "DOCUMENT_TYPE", **kwargs} where "DOCUMENT_TYPE"
            is one of the following:
            "PDF", "CSV", "DOCX", "URL", "SentenceLevelPDF", "SentenceLevelDOCX",
            "Unstructured", "InMemoryText".
            The kwargs for each document type are shown below:

            class PDF(Document):
                document_type: Literal["PDF"]
                path: str
                metadata: Optional[dict[str, Any]] = None
                on_disk: bool = False
                version: str = "v1"
                chunk_size: int = 100
                stride: int = 40
                emphasize_first_words: int = 0
                ignore_header_footer: bool = True
                ignore_nonstandard_orientation: bool = True

            class CSV(Document):
                document_type: Literal["CSV"]
                path: str
                id_column: Optional[str] = None
                strong_columns: Optional[List[str]] = None
                weak_columns: Optional[List[str]] = None
                reference_columns: Optional[List[str]] = None
                save_extra_info: bool = True
                metadata: Optional[dict[str, Any]] = None
                has_offset: bool = False
                on_disk: bool = False

            class DOCX(Document):
                document_type: Literal["DOCX"]
                path: str
                metadata: Optional[dict[str, Any]] = None
                on_disk: bool = False

            class URL(Document):
                document_type: Literal["URL"]
                url: str
                save_extra_info: bool = True
                title_is_strong: bool = False
                metadata: Optional[dict[str, Any]] = None
                on_disk: bool = False

            class SentenceLevelPDF(Document):
                document_type: Literal["SentenceLevelPDF"]
                path: str
                metadata: Optional[dict[str, Any]] = None
                on_disk: bool = False

            class SentenceLevelDOCX(Document):
                document_type: Literal["SentenceLevelDOCX"]
                path: str
                metadata: Optional[dict[str, Any]] = None
                on_disk: bool = False

            class Unstructured(Document):
                document_type: Literal["Unstructured"]
                path: str
                save_extra_info: bool = True
                metadata: Optional[dict[str, Any]] = None
                on_disk: bool = False

            class InMemoryText(Document):
                document_type: Literal["InMemoryText"]
                name: str
                texts: list[str]
                metadatas: Optional[list[dict[str, Any]]] = None
                global_metadata: Optional[dict[str, Any]] = None
                on_disk: bool = False

            For Document types with the arg "path", ensure that
            the path exists on your local machine.
        )r   rP   )r   rT   s     r   rP   z NeuralDBClientVectorStore.inserts  s    d ww~~i((r    
source_idsc                 :    | j                   j                  |       y)z
        Deletes documents from the VectorStore using source ids.

        Args:
            files (List[str]): A list of source ids to delete from the VectorStore.
        N)r   rG   )r   r   s     r   remove_documentsz*NeuralDBClientVectorStore.remove_documents  s     	z"r    )r   r   r   Nr   )r   r   r   r   r   r   r   r   r   rX   rs   r   r   r   r   r   rP   r   r   r3   r    r   r   r   6  s    .  $B#"L
 $&NN N14N	hN6R)T#s(^ 4 R) R)h#49 # #r    r   )r)   r-   rM   pathlibr   typingr   r   r   r   r   r	   r
   r   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.vectorstoresr   pydanticr   typing_extensionsr   r(   r   r6   r   r   r3   r    r   <module>r      sL     	   S S S - 0 3  "(b+ bJ	Y# Y#r    