
    3fiT                    *   d dl mZ d dlZd dlZd dlZd dlZd dlmZmZm	Z	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ er d d	lmZ d d
lmZmZ d dlmZ d dlm Z  d dl!m"Z"  ejF                  e$      Z% eddd       G d de             Z&y)    )annotationsN)TYPE_CHECKINGAnyIterableListOptionalTupleType)
deprecated)Document)
Embeddings)VectorStore)get_client_infostorage)MatchingEngineIndexMatchingEngineIndexEndpoint)	Namespace)CredentialsTensorflowHubEmbeddingsz0.0.12z1.0z1langchain_google_vertexai.VectorSearchVectorStore)sinceremovalalternative_importc                  H    e Zd ZdZ	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZedd       ZddZ	 d	 	 	 	 	 	 	 ddZddZ		 	 d	 	 	 	 	 	 	 dd	Z
	 	 d	 	 	 	 	 	 	 d d
Z	 	 d	 	 	 	 	 	 	 	 	 d!dZ	 	 d	 	 	 	 	 	 	 	 	 d"dZd#dZd$dZe	 d	 	 	 	 	 	 	 	 	 	 	 d%d       Ze	 	 d&	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d'd       Zed(d       Ze	 	 	 	 d)d       Ze	 	 	 	 	 	 	 	 	 	 d*d       Ze	 	 	 	 	 	 	 	 	 	 d+d       Ze	 	 	 	 	 	 d,d       Ze	 	 	 	 	 	 	 	 	 	 d-d       Zed.d       Z xZS )/MatchingEnginea@  `Google Vertex AI Vector Search` (previously Matching Engine) vector store.

    While the embeddings are stored in the Matching Engine, the embedded
    documents will be stored in GCS.

    An existing Index and corresponding Endpoint are preconditions for
    using this module.

    See usage in docs/integrations/vectorstores/google_vertex_ai_vector_search.ipynb

    Note that this implementation is mostly meant for reading if you are
    planning to do a real time implementation. While reading is a real time
    operation, updating the index takes close to one hour.N)document_id_keyc                   t         	|           | j                          || _        || _        || _        || _        || _        || _        || _	        || _
        y)a  Google Vertex AI Vector Search (previously Matching Engine)
         implementation of the vector store.

        While the embeddings are stored in the Matching Engine, the embedded
        documents will be stored in GCS.

        An existing Index and corresponding Endpoint are preconditions for
        using this module.

        See usage in
        docs/integrations/vectorstores/google_vertex_ai_vector_search.ipynb.

        Note that this implementation is mostly meant for reading if you are
        planning to do a real time implementation. While reading is a real time
        operation, updating the index takes close to one hour.

        Attributes:
            project_id: The GCS project id.
            index: The created index class. See
                ~:func:`MatchingEngine.from_components`.
            endpoint: The created endpoint class. See
                ~:func:`MatchingEngine.from_components`.
            embedding: A :class:`Embeddings` that will be used for
                embedding the text sent. If none is sent, then the
                multilingual Tensorflow Universal Sentence Encoder will be used.
            gcs_client: The GCS client.
            gcs_bucket_name: The GCS bucket name.
            credentials (Optional): Created GCP credentials.
            document_id_key (Optional): Key for storing document ID in document
                metadata. If None, document ID will not be returned in document
                metadata.
        N)super__init__'_validate_google_libraries_installation
project_idindexendpoint	embedding
gcs_clientcredentialsgcs_bucket_namer   )
selfr"   r#   r$   r%   r&   r(   r'   r   	__class__s
            n/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_community/vectorstores/matching_engine.pyr    zMatchingEngine.__init__1   sX    X 	446$
 "$&..    c                    | j                   S N)r%   )r)   s    r+   
embeddingszMatchingEngine.embeddingsi   s    ~~r,   c                R    	 ddl m}m} ddlm} y# t
        $ r t        d      w xY w)z>Validates that Google libraries that are needed are installed.r   )
aiplatformr   service_accountzwYou must run `pip install --upgrade google-cloud-aiplatform google-cloud-storage`to use the MatchingEngine Vectorstore.N)google.cloudr1   r   google.oauth2r3   ImportError)r)   r1   r   r3   s       r+   r!   z6MatchingEngine._validate_google_libraries_installationm   s.    	85 	9 	s    &c                   t        |      }|;t        |      t        |      k7  r$t        dt        |       dt        |       d      t        j	                  d       | j
                  j                  |      }g }g }t        t        ||            D ]k  \  }\  }}	t        t        j                               }
|j                  |
       |
|d}|||   |d<   |j                  |       | j                  |	d|
        m t        j	                  dt        |       d	       d
j                  |D cg c]  }t        j                   |       c}      }dt        j                          }| dt#        j"                          d}| j                  ||       t        j	                  d| j$                   d| d       | j&                  j)                  d| j$                   d| d      | _        t        j	                  d       |S c c}w )as  Run more texts through the embeddings and add to the vectorstore.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            kwargs: vectorstore specific parameters.

        Returns:
            List of ids from adding the texts into the vectorstore.
        z:texts and metadatas do not have the same length. Received z texts and z metadatas.zEmbedding documents.)idr%   metadata
documents/z	Uploaded z documents to GCS.
zindexes//z.jsonz)Uploaded updated json with embeddings to .gs://)contents_delta_uriz%Updated index with new configuration.)listlen
ValueErrorloggerdebugr%   embed_documents	enumeratezipstruuiduuid4append_upload_to_gcsjoinjsondumpstimer(   r#   update_embeddings)r)   texts	metadataskwargsr/   jsonsidsidxr%   textr8   json_x
result_strfilename_prefixfilenames                   r+   	add_textszMatchingEngine.add_textsy   s     U SZ3y>%ALu:,k#i.)9F  	+,^^33E:
&/J0F&G 	9"C")TTZZ\"BJJrN!#)<E$$-cNj!LL
2$&78	9 	yS
*<=> YYu=!

1=>
$TZZ\N3%&a		}E:J17##$AhZq2	

 ZZ11!&t';';&<Ao=NaP 2 

 	<=
!  >s   1Hc                    | j                   j                  | j                        }|j                  |      }|j	                  |       y)zUploads data to gcs_location.

        Args:
            data: The data that will be stored.
            gcs_location: The location where the data will be stored.
        N)r&   
get_bucketr(   blobupload_from_string)r)   datagcs_locationbucketra   s        r+   rL   zMatchingEngine._upload_to_gcs   s:     ++D,@,@A{{<(%r,   c                    t         j                  d| d       | j                  j                  |      }| j	                  |||      S )a  Return docs most similar to query and their cosine distance from the query.

        Args:
            query: String query look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter: Optional. A list of Namespaces for filtering
                the matching results.
                For example:
                [Namespace("color", ["red"], []), Namespace("shape", [], ["squared"])]
                will match datapoints that satisfy "red color" but not include
                datapoints with "squared shape". Please refer to
                https://cloud.google.com/vertex-ai/docs/matching-engine/filtering#json
                for more detail.

        Returns:
            List[Tuple[Document, float]]: List of documents most similar to
            the query text and cosine distance in float for each.
            Lower score represents more similarity.
        zEmbedding query r=   kfilter)rC   rD   r%   embed_query&similarity_search_by_vector_with_score)r)   queryrh   ri   embedding_querys        r+   similarity_search_with_scorez+MatchingEngine.similarity_search_with_score   sM    2 	'wa01..44U;::q ; 
 	
r,   c                   |xs g }t        | j                  d      rE| j                  j                  r/| j                  j                  | j	                         |g||      }n.| j                  j                  | j	                         |g||      }t        j                  dt        |       d       t        |      dk(  rg S g }|d   D ]q  }| j                  d|j                         }i }| j                  |j                  || j                  <   t        ||      }	|j                  |	|j                  f       s t        j                  d       |S )	a  Return docs most similar to the embedding and their cosine distance.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter: Optional. A list of Namespaces for filtering
                the matching results.
                For example:
                [Namespace("color", ["red"], []), Namespace("shape", [], ["squared"])]
                will match datapoints that satisfy "red color" but not include
                datapoints with "squared shape". Please refer to
                https://cloud.google.com/vertex-ai/docs/matching-engine/filtering#json
                for more detail.

        Returns:
            List[Tuple[Document, float]]: List of documents most similar to
            the query text and cosine distance in float for each.
            Lower score represents more similarity.

        _public_match_client)deployed_index_idqueriesnum_neighborsri   zFound z	 matches.r   r:   )page_contentr9   zDownloaded documents for query.)hasattrr$   rp   find_neighbors_get_index_idmatchrC   rD   rA   _download_from_gcsr8   r   r   rK   distance)
r)   r%   rh   ri   responsedocsresultrt   r9   documents
             r+   rk   z5MatchingEngine.similarity_search_by_vector_with_score   sN   4 2 4=="89MM..}}33"&"4"4"6"	 4 H }}**"&"4"4"6"	 + H 	vc(m_I67x=AI-/ qk 
	5F22Z		{3KLLH##/17--.)!H KK6??34
	5 	67r,   c                d     | j                   |f||d|}|D cg c]  \  }}|	 c}}S c c}}w )a  Return docs most similar to query.

        Args:
            query: The string that will be used to search for similar documents.
            k: The amount of neighbors that will be retrieved.
            filter: Optional. A list of Namespaces for filtering the matching results.
                For example:
                [Namespace("color", ["red"], []), Namespace("shape", [], ["squared"])]
                will match datapoints that satisfy "red color" but not include
                datapoints with "squared shape". Please refer to
                https://cloud.google.com/vertex-ai/docs/matching-engine/filtering#json
                 for more detail.

        Returns:
            A list of k matching documents.
        rg   )rn   )r)   rl   rh   ri   rT   docs_and_scoresdoc_s           r+   similarity_searchz MatchingEngine.similarity_search%  sE    . <$;;
v
)/
 #22Q222   ,c                d     | j                   |f||d|}|D cg c]  \  }}|	 c}}S c c}}w )a  Return docs most similar to the embedding.

        Args:
            embedding: Embedding to look up documents similar to.
            k: The amount of neighbors that will be retrieved.
            filter: Optional. A list of Namespaces for filtering the matching results.
                For example:
                [Namespace("color", ["red"], []), Namespace("shape", [], ["squared"])]
                will match datapoints that satisfy "red color" but not include
                datapoints with "squared shape". Please refer to
                https://cloud.google.com/vertex-ai/docs/matching-engine/filtering#json
                 for more detail.

        Returns:
            A list of k matching documents.
        rg   )rk   )r)   r%   rh   ri   rT   r   r   r   s           r+   similarity_search_by_vectorz*MatchingEngine.similarity_search_by_vectorB  sF    . F$EE
6
-3
 #22Q222r   c                   | j                   j                  D ]3  }|j                  | j                  j                  k(  s'|j                  c S  t        d| j                  j                   d| j                   j                   d      )zGets the correct index id for the endpoint.

        Returns:
            The index id if found (which should be found) or throws
            ValueError otherwise.
        zNo index with id z deployed on endpoint r=   )r$   deployed_indexesr#   resource_namer8   rB   display_name)r)   r#   s     r+   rw   zMatchingEngine._get_index_id_  sy     ]]33 	 E{{djj666xx	  

 8 89 :$}}))*!-
 	
r,   c                    | j                   j                  | j                        }|j                  |      }|j	                         S )zDownloads from GCS in text format.

        Args:
            gcs_location: The location where the file is located.

        Returns:
            The string contents of the file.
        )r&   r`   r(   ra   download_as_string)r)   rd   re   ra   s       r+   ry   z!MatchingEngine._download_from_gcsp  s;     ++D,@,@A{{<(&&((r,   c                    t        d      )zUse from components instead.zThis method is not implemented. Instead, you should initialize the class with `MatchingEngine.from_components(...)` and then call `add_texts`)NotImplementedError)clsrR   r%   rS   rT   s        r+   
from_textszMatchingEngine.from_texts}  s     "
 	
r,   c                &   | j                  |      }| j                  |      }	| j                  ||||	      }
| j                  ||||	      }| j	                  |	|      }| j                  ||||	        | d||
||xs | j                         ||	|d|S )a|  Takes the object creation out of the constructor.

        Args:
            project_id: The GCP project id.
            region: The default location making the API calls. It must have
            the same location as the GCS bucket and must be regional.
            gcs_bucket_name: The location where the vectors will be stored in
            order for the index to be created.
            index_id: The id of the created index.
            endpoint_id: The id of the created endpoint.
            credentials_path: (Optional) The path of the Google credentials on
            the local file system.
            embedding: The :class:`Embeddings` that will be used for
            embedding the texts.
            kwargs: Additional keyword arguments to pass to MatchingEngine.__init__().

        Returns:
            A configured MatchingEngine with the texts added to the index.
        )r"   r#   r$   r%   r&   r'   r(    )_validate_gcs_bucket_create_credentials_from_file_create_index_by_id_create_endpoint_by_id_get_gcs_client_init_aiplatform_get_default_embeddings)r   r"   regionr(   index_idendpoint_idcredentials_pathr%   rT   r'   r#   r$   r&   s                r+   from_componentszMatchingEngine.from_components  s    > 22?C778HI''*fkR--	
 ((jA
Z+N 	
!@3#>#>#@!#+	
 	
 		
r,   c                N    |j                  dd      }d|v rt        d|       |S )zValidates the gcs_bucket_name as a bucket name.

        Args:
              gcs_bucket_name: The received bucket uri.

        Returns:
              A valid gcs_bucket_name or throws ValueError if full path is
              provided.
        r>    r<   zFThe argument gcs_bucket_name should only be the bucket name. Received )replacerB   )r   r(   s     r+   r   z#MatchingEngine._validate_gcs_bucket  sC     *11'2>/!--<,=?  r,   c                P    ddl m} d}||j                  j                  |      }|S )a  Creates credentials for GCP.

        Args:
             json_credentials_path: The path on the file system where the
             credentials are stored.

         Returns:
             An optional of Credentials or None, in which case the default
             will be used.
        r   r2   N)r5   r3   r   from_service_account_file)r   json_credentials_pathr3   r'   s       r+   r   z,MatchingEngine._create_credentials_from_file  s3     	2 ,)55OO%K r,   c                j    ddl m} t        j                  d| d       |j	                  ||||      S )aJ  Creates a MatchingEngineIndex object by id.

        Args:
            index_id: The created index id.
            project_id: The project to retrieve index from.
            region: Location to retrieve index from.
            credentials: GCS credentials.

        Returns:
            A configured MatchingEngineIndex.
        r   r1   z'Creating matching engine index with id r=   )
index_nameprojectlocationr'   )r4   r1   rC   rD   r   )r   r   r"   r   r'   r1   s         r+   r   z"MatchingEngine._create_index_by_id  sA      	,>xjJK--#	 . 
 	
r,   c                j    ddl m} t        j                  d| d       |j	                  ||||      S )a`  Creates a MatchingEngineIndexEndpoint object by id.

        Args:
            endpoint_id: The created endpoint id.
            project_id: The project to retrieve index from.
            region: Location to retrieve index from.
            credentials: GCS credentials.

        Returns:
            A configured MatchingEngineIndexEndpoint.
        r   r   zCreating endpoint with id r=   )index_endpoint_namer   r   r'   )r4   r1   rC   rD   r   )r   r   r"   r   r'   r1   s         r+   r   z%MatchingEngine._create_endpoint_by_id	  sA      	,1+a@A55 +#	 6 
 	
r,   c                L    ddl m}  |j                  ||t        d            S )z\Lazily creates a GCS client.

        Returns:
            A configured GCS client.
        r   r   zvertex-ai-matching-engine)module)r'   r   client_info)r4   r   Clientr   )r   r'   r"   r   s       r+   r   zMatchingEngine._get_gcs_client#  s)     	)w~~#'/JK
 	
r,   c           	     x    ddl m} t        j                  d| d| d| d       |j	                  ||||       y)	a`  Configures the aiplatform library.

        Args:
            project_id: The GCP project id.
            region: The default location making the API calls. It must have
            the same location as the GCS bucket and must be regional.
            gcs_bucket_name: GCS staging location.
            credentials: The GCS Credentials object.
        r   r   z%Initializing AI Platform for project z on z	 and for r=   )r   r   staging_bucketr'   N)r4   r1   rC   rD   init)r   r"   r   r(   r'   r1   s         r+   r   zMatchingEngine._init_aiplatform5  sN    $ 	,3J<thi03	
 	*#	 	 	
r,   c                    ddl m}  |       S )z{This function returns the default embedding.

        Returns:
            Default TensorflowHubEmbeddings to use.
        r   r   )langchain_community.embeddingsr   )r   r   s     r+   r   z&MatchingEngine._get_default_embeddingsT  s     	K&((r,   r.   )r"   rH   r#   r   r$   r   r%   r   r&   zstorage.Clientr(   rH   r'   Optional[Credentials]r   Optional[str])returnr   )r   None)rR   zIterable[str]rS   Optional[List[dict]]rT   r   r   	List[str])rc   rH   rd   rH   r   r   )   N)rl   rH   rh   intri   Optional[List[Namespace]]r   List[Tuple[Document, float]])r%   List[float]rh   r   ri   r   r   r   )
rl   rH   rh   r   ri   r   rT   r   r   List[Document])
r%   r   rh   r   ri   r   rT   r   r   r   )r   rH   )rd   rH   r   rH   )r   Type['MatchingEngine']rR   r   r%   r   rS   r   rT   r   r   'MatchingEngine')NN)r   r   r"   rH   r   rH   r(   rH   r   rH   r   rH   r   r   r%   zOptional[Embeddings]rT   r   r   r   )r(   rH   r   rH   )r   r   r   r   )
r   rH   r"   rH   r   rH   r'   'Credentials'r   r   )
r   rH   r"   rH   r   rH   r'   r   r   r   )r'   r   r"   rH   r   z'storage.Client')
r"   rH   r   rH   r(   rH   r'   r   r   r   )r   z'TensorflowHubEmbeddings')__name__
__module____qualname____doc__r    propertyr/   r!   r^   rL   rn   rk   r   r   rw   ry   classmethodr   r   r   r   r   r   r   r   r   __classcell__)r*   s   @r+   r   r      s   >, .26/ *.6/6/ #6/ .	6/
 6/ #6/ 6/ +6/ '6/p  
 +/77 (7 	7
 
7r	& ,0	

 
 *	

 
&
D ,0	GG G *	G
 
&GX ,0	33 3 *	3
 3 
3@ ,0	33 3 *	3
 3 
3:
") 
 +/	
#

 
 (	

 
 

 
  +/*.4
#4
4
 4
 	4

 4
 4
 (4
 (4
 4
 
4
 4
l  $ $1	 0 

(+
58
GT
	
 
2 

+.
8;
JW
	$
 
2 
'
58
	
 
" 

 
 	

 #
 

 
< 	) 	)r,   r   )'
__future__r   rN   loggingrP   rI   typingr   r   r   r   r   r	   r
   langchain_core._api.deprecationr   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.vectorstoresr   &langchain_community.utilities.vertexair   r4   r   google.cloud.aiplatformr   r   Fgoogle.cloud.aiplatform.matching_engine.matching_engine_index_endpointr   google.oauth2.service_accountr   r   r   	getLoggerr   rC   r   r   r,   r+   <module>r      s    "     L L L 6 - 0 3 B$X :F			8	$ 
J
|)[ |)
|)r,   