
    3fiP                        d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZ ddlmZ defdZ ej>                  e       Z! ejD                  d      de#fd       Z$defdZ% G d de&e      Z' G d de      Z( G d de(e      Z) G d de(e      Z*y)z@A chain for comparing the output of two models using embeddings.    N)Enum)util)Any)	Callbacks)AsyncCallbackManagerForChainRunCallbackManagerForChainRun)
Embeddings)pre_init)
ConfigDictField)override)Chain)PairwiseStringEvaluatorStringEvaluatorRUN_KEYreturnc                  N    	 dd l } | S # t        $ r}d}t        |      |d }~ww xY w)Nr   z@Could not import numpy, please install with `pip install numpy`.)numpyImportError)npemsgs      r/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_classic/evaluation/embedding_distance/base.py_import_numpyr      s5    & I  &P#A%&s    	$$   )maxsizec                  l    t        t        j                  d            ryt        j	                  d       y)Nr   Ta  NumPy not found in the current Python environment. langchain will use a pure Python implementation for embedding distance operations, which may significantly impact performance, especially for large datasets. For optimal speed and efficiency, consider installing NumPy: pip install numpyF)boolr   	find_specloggerwarning     r   _check_numpyr%   $   s,    DNN7#$
NN	 r$   c                      	 ddl m}   |        S # t        $ r0 	 ddlm}  n# t        $ r}d}t        |      |d}~ww xY wY  |        S w xY w)zZCreate an `Embeddings` object.

    Returns:
        The created `Embeddings` object.
    r   OpenAIEmbeddingstCould not import OpenAIEmbeddings. Please install the OpenAIEmbeddings package using `pip install langchain-openai`.N)langchain_openair(   r   %langchain_community.embeddings.openai)r(   r   r   s      r   _embedding_factoryr,   2   sl    *5   
*		*  	*Q  c")	* 
*s)    	A A	<7<AAc                   $    e Zd ZdZdZdZdZdZdZy)EmbeddingDistancea  Embedding Distance Metric.

    Attributes:
        COSINE: Cosine distance metric.
        EUCLIDEAN: Euclidean distance metric.
        MANHATTAN: Manhattan distance metric.
        CHEBYSHEV: Chebyshev distance metric.
        HAMMING: Hamming distance metric.
    cosine	euclidean	manhattan	chebyshevhammingN)	__name__
__module____qualname____doc__COSINE	EUCLIDEAN	MANHATTAN	CHEBYSHEVHAMMINGr#   r$   r   r.   r.   K   s"     FIIIGr$   r.   c                      e Zd ZU dZ ee      Zeed<    ee	j                        Ze	ed<   edeeef   deeef   fd       Z ed	
      Zedee   fd       ZdedefdZde	defdZedededefd       Zedededefd       Zedededefd       Zedededefd       Zedededefd       ZdedefdZy)_EmbeddingDistanceChainMixinzShared functionality for embedding distance evaluators.

    Attributes:
        embeddings: The embedding objects to vectorize the outputs.
        distance_metric: The distance metric to use for comparing the embeddings.
    )default_factory
embeddings)defaultdistance_metricvaluesr   c                 \   |j                  d      }g }	 ddlm} |j                  |       	 ddlm} |j                  |       |sd}t	        |      t        |t        |            r	 ddl}|S |S # t        $ r Y Pw xY w# t        $ r Y Gw xY w# t        $ r}d}t	        |      |d}~ww xY w)zValidate that the TikTok library is installed.

        Args:
            values: The values to validate.

        Returns:
            The validated values.
        r@   r   r'   r)   NzThe tiktoken library is required to use the default OpenAI embeddings with embedding distance evaluators. Please either manually select a different Embeddings object or install tiktoken using `pip install tiktoken`.)	getr*   r(   appendr   r+   
isinstancetupletiktoken)clsrC   r@   types_r(   r   rI   r   s           r   _validate_tiktoken_installedz9_EmbeddingDistanceChainMixin._validate_tiktoken_installedh   s     ZZ-
	9MM*+	 MM*+ Q  c""j%-0	. v=  		  		  .I  "#&A-.s:   A1 B  )B 1	A=<A= 	BB	B+B&&B+T)arbitrary_types_allowedc                     dgS )z\Return the output keys of the chain.

        Returns:
            The output keys.
        scorer#   selfs    r   output_keysz(_EmbeddingDistanceChainMixin.output_keys   s     yr$   resultc                 D    d|d   i}t         |v r|t            |t         <   |S )NrO   r   )rQ   rS   parseds      r   _prepare_outputz,_EmbeddingDistanceChainMixin._prepare_output   s*    6'?+f$WoF7Or$   metricc           
      <   t         j                  | j                  t         j                  | j                  t         j
                  | j                  t         j                  | j                  t         j                  | j                  i}||v r||   S d| }t        |      )zGet the metric function for the given metric name.

        Args:
            metric: The metric name.

        Returns:
            The metric function.
        zInvalid metric: )r.   r8   _cosine_distancer9   _euclidean_distancer:   _manhattan_distancer;   _chebyshev_distancer<   _hamming_distance
ValueError)rQ   rW   metricsr   s       r   _get_metricz(_EmbeddingDistanceChainMixin._get_metric   s     $$d&;&;'')A)A'')A)A'')A)A%%t'='=
 W6?" )or$   abc           
      h   	 ddl m} d || |      z
  S # t        $ r 	 ddlm}  || j                         |j                               cY S # t        $ rW t               rt               }| j                         }|j                         }|j                  ||      }|j                  j                  |      }|j                  j                  |      }	|dk(  s|	dk(  rY Y yd|||	z  z  z
  cY cY S t        | d      r| n| g}t        |d      r|n|g}t        | d      r| j                         }t        |d      r|j                         }t        d t        ||d	
      D              }t        d |D              dz  }t        d |D              dz  }	|dk(  s|	dk(  rY Y yd|||	z  z  z
  cY cY S w xY ww xY w)zCompute the cosine distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.ndarray: The cosine distance.
        r   )_cosine_similarityg      ?)r/   g        __len__flattenc              3   ,   K   | ]  \  }}||z    y wNr#   .0xys      r   	<genexpr>z@_EmbeddingDistanceChainMixin._cosine_distance.<locals>.<genexpr>   s     !VDAq!a%!Vs   Fstrictc              3   &   K   | ]	  }||z    y wrh   r#   rj   rk   s     r   rm   z@_EmbeddingDistanceChainMixin._cosine_distance.<locals>.<genexpr>        3qQU3         ?c              3   &   K   | ]	  }||z    y wrh   r#   rq   s     r   rm   z@_EmbeddingDistanceChainMixin._cosine_distance.<locals>.<genexpr>   rr   rs   )!langchain_core.vectorstores.utilsrd   r   scipy.spatial.distancer/   rf   r%   r   dotlinalgnormhasattrsumzip)
ra   rb   rd   r/   r   a_flatb_flatdot_productnorm_anorm_bs
             r   rY   z-_EmbeddingDistanceChainMixin._cosine_distance   s   #	?L+Aq111 	??9aiik199;77 ?>&BYY[FYY[F"$&&"8KYY^^F3FYY^^F3F{fk"+&"ABB%a3!%a3!1i(YY[F1i(YY[F!!VCu4U!VV3F33s:3F33s:Q;&A+kVf_=>>3?	?sR    
F1*A	F1BF-F1F-*F1.B,F-F1F-(F1,F--F1c           	         	 ddl m}  || j                         |j                               S # t        $ rR t	               r$ddl}|j                  j                  | |z
        cY S t        d t        | |d      D              dz  cY S w xY w)zCompute the Euclidean distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Euclidean distance.
        r   )r0   Nc              3   8   K   | ]  \  }}||z
  ||z
  z    y wrh   r#   ri   s      r   rm   zC_EmbeddingDistanceChainMixin._euclidean_distance.<locals>.<genexpr>  s!     MTQA!a%(Ms   Frn   rt   )
rw   r0   rf   r   r%   r   ry   rz   r|   r}   )ra   rb   r0   r   s       r   rZ   z0_EmbeddingDistanceChainMixin._euclidean_distance   st    
	U8QYY[!))+66 	U~"yy~~a!e,,MSAe5LMMQTTT	Us   *- 5B$!BBc           	      &   	 ddl m}  || j                         |j                               S # t        $ rZ t	               r/t               }|j                  |j                  | |z
              cY S t        d t        | |d      D              cY S w xY w)zCompute the Manhattan distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Manhattan distance.
        r   )	cityblockc              3   >   K   | ]  \  }}t        ||z
          y wrh   absri   s      r   rm   zC_EmbeddingDistanceChainMixin._manhattan_distance.<locals>.<genexpr>       Fdas1q5zF   Frn   )	rw   r   rf   r   r%   r   r|   r   r}   )ra   rb   r   r   s       r   r[   z0_EmbeddingDistanceChainMixin._manhattan_distance  v    		G8QYY[!))+66 	G~"_vvbffQUm,,Fc!Qu.EFFF	G   *- A B/BBc           	      &   	 ddl m}  || j                         |j                               S # t        $ rZ t	               r/t               }|j                  |j                  | |z
              cY S t        d t        | |d      D              cY S w xY w)zCompute the Chebyshev distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Chebyshev distance.
        r   )r2   c              3   >   K   | ]  \  }}t        ||z
          y wrh   r   ri   s      r   rm   zC_EmbeddingDistanceChainMixin._chebyshev_distance.<locals>.<genexpr>1  r   r   Frn   )	rw   r2   rf   r   r%   r   maxr   r}   )ra   rb   r2   r   s       r   r\   z0_EmbeddingDistanceChainMixin._chebyshev_distance  r   r   c           	          	 ddl m}  || j                         |j                               S # t        $ rW t	               r t               }|j                  | |k7        cY S t        d t        | |d      D              t        |       z  cY S w xY w)zCompute the Hamming distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Hamming distance.
        r   )r3   c              3   2   K   | ]  \  }}||k7  sd   yw)r   Nr#   ri   s      r   rm   zA_EmbeddingDistanceChainMixin._hamming_distance.<locals>.<genexpr>G  s     GTQQqGs   Frn   )
rw   r3   rf   r   r%   r   meanr|   r}   len)ra   rb   r3   r   s       r   r]   z._EmbeddingDistanceChainMixin._hamming_distance3  sv    		Q6199;		44 	Q~"_wwqAv&GSAe%<GG#a&PP	Qs   *- 1B *BBvectorsc                 L   | j                  | j                        }t               rft        |t	               j
                        rH ||d   j                  dd      |d   j                  dd            j                         }t        |      S  ||d   |d         }t        |      S )zCompute the score based on the distance metric.

        Args:
            vectors (np.ndarray): The input vectors.

        Returns:
            The computed score.
        r   r   )	r`   rB   r%   rG   r   ndarrayreshapeitemfloat)rQ   r   rW   rO   s       r   _compute_scorez+_EmbeddingDistanceChainMixin._compute_scoreI  s     !!$"6"67>j-/2I2IJ71:--a4gaj6H6HB6OPUUWE U| 71:wqz2EU|r$   N) r4   r5   r6   r7   r   r,   r@   r	   __annotations__r.   r8   rB   r
   dictstrr   rL   r   model_configpropertylistrR   rV   r`   staticmethodrY   rZ   r[   r\   r]   r   r   r#   r$   r   r>   r>   ]   s    #3EFJ
F).7H7O7O)PO&P-$sCx. -T#s(^ - -^  $L T#Y  d t "3  * -?C -?C -?C -? -?^ Us Us Us U U, Gs Gs Gs G G* Gs Gs Gs G G* QS QS QS Q Q*c e r$   r>   c                      e Zd ZdZedefd       Zeedefd              Z	ede
e   fd       Ze	 ddeeef   dedz  deeef   fd	       Ze	 ddeeef   dedz  deeef   fd
       Zedddddddededz  dede
e   dz  deeef   dz  dededefd       Zedddddddededz  dede
e   dz  deeef   dz  dededefd       Zy)EmbeddingDistanceEvalChainaL  Embedding distance evaluation chain.

    Use embedding distances to score semantic difference between
    a prediction and reference.

    Examples:
        >>> chain = EmbeddingDistanceEvalChain()
        >>> result = chain.evaluate_strings(prediction="Hello", reference="Hi")
        >>> print(result)
        {'score': 0.5}
    r   c                      y)zReturn whether the chain requires a reference.

        Returns:
            True if a reference is required, `False` otherwise.
        Tr#   rP   s    r   requires_referencez-EmbeddingDistanceEvalChain.requires_referenceg  s     r$   c                 6    d| j                   j                   dS )N
embedding_	_distancerB   valuerP   s    r   evaluation_namez*EmbeddingDistanceEvalChain.evaluation_namep  s      D00667yAAr$   c                 
    ddgS )ZReturn the input keys of the chain.

        Returns:
            The input keys.
        
prediction	referencer#   rP   s    r   
input_keysz%EmbeddingDistanceEvalChain.input_keysu  s     k**r$   Ninputsrun_managerc                     | j                   j                  |d   |d   g      }t               rt               }|j	                  |      }| j                  |      }d|iS )zCompute the score for a prediction and reference.

        Args:
            inputs: The input data.
            run_manager: The callback manager.

        Returns:
            The computed score.
        r   r   rO   r@   embed_documentsr%   r   arrayr   rQ   r   r   r   r   rO   s         r   _callz EmbeddingDistanceEvalChain._call~  sa     //11L!6+#67
 >Bhhw'G##G,r$   c                    K   | j                   j                  |d   |d   g       d{   }t               rt               }|j	                  |      }| j                  |      }d|iS 7 >w)zAsynchronously compute the score for a prediction and reference.

        Args:
            inputs: The input data.
            run_manager: The callback manager.

        Returns:
            The computed score.
        r   r   NrO   r@   aembed_documentsr%   r   r   r   r   s         r   _acallz!EmbeddingDistanceEvalChain._acall  sr      88|${#
 
 >Bhhw'G##G,
   'A*A(?A*F)r   	callbackstagsmetadatainclude_run_infor   r   r   r   r   r   kwargsc                D     | ||d||||      }| j                  |      S )Z  Evaluate the embedding distance between a prediction and reference.

        Args:
            prediction: The output string from the first model.
            reference: The output string from the second model.
            callbacks: The callbacks to use.
            tags: The tags to apply.
            metadata: The metadata to use.
            include_run_info: Whether to include run information in the output.
            **kwargs: Additional keyword arguments.

        Returns:
            `dict` containing:
                - score: The embedding distance between the two predictions.
        r   r   r   r   r   r   r   rV   	rQ   r   r   r   r   r   r   r   rS   s	            r   _evaluate_stringsz,EmbeddingDistanceEvalChain._evaluate_strings  s5    6 ",9E-
 ##F++r$   c                r   K   | j                  ||d||||       d{   }| j                  |      S 7 w)r   r   r   NacallrV   r   s	            r   _aevaluate_stringsz-EmbeddingDistanceEvalChain._aevaluate_strings  sL     6 zz",9E- " 
 
 ##F++
   757rh   )r4   r5   r6   r7   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r#   r$   r   r   r   Z  s   
 D   B B  B +DI + +  :> S#X  0$6  
c3h	   .  ?C S#X  5t;  
c3h	   4 
 !%#!%*.!&!, !, :	!,
 !, 3i$!, sCx.4'!, !, !, 
!, !,F 
 !%#!%*.!&!, !, :	!,
 !, 3i$!, sCx.4'!, !, !, 
!, !,r$   r   c                   n   e Zd ZdZedee   fd       Zedefd       Ze		 dde
eef   dedz  de
eef   fd       Ze		 dde
eef   dedz  de
eef   fd	       Ze	dddd
ddedededee   dz  de
eef   dz  dedede
fd       Ze	dddd
ddedededee   dz  de
eef   dz  dedede
fd       Zy)"PairwiseEmbeddingDistanceEvalChaina  Use embedding distances to score semantic difference between two predictions.

    Examples:
    >>> chain = PairwiseEmbeddingDistanceEvalChain()
    >>> result = chain.evaluate_string_pairs(prediction="Hello", prediction_b="Hi")
    >>> print(result)
    {'score': 0.5}
    r   c                 
    ddgS )r   r   prediction_br#   rP   s    r   r   z-PairwiseEmbeddingDistanceEvalChain.input_keys  s     n--r$   c                 6    d| j                   j                   dS )zReturn the evaluation name.pairwise_embedding_r   r   rP   s    r   r   z2PairwiseEmbeddingDistanceEvalChain.evaluation_name  s      %T%9%9%?%?$@	JJr$   Nr   r   c                     | j                   j                  |d   |d   g      }t               rt               }|j	                  |      }| j                  |      }d|iS )zCompute the score for two predictions.

        Args:
            inputs: The input data.
            run_manager: The callback manager.

        Returns:
            The computed score.
        r   r   rO   r   r   s         r   r   z(PairwiseEmbeddingDistanceEvalChain._call  sd     //11|$~&
 >Bhhw'G##G,r$   c                    K   | j                   j                  |d   |d   g       d{   }t               rt               }|j	                  |      }| j                  |      }d|iS 7 >w)zAsynchronously compute the score for two predictions.

        Args:
            inputs: The input data.
            run_manager: The callback manager.

        Returns:
            The computed score.
        r   r   NrO   r   r   s         r   r   z)PairwiseEmbeddingDistanceEvalChain._acall0  sr      88|$~&
 
 >Bhhw'G##G,
r   F)r   r   r   r   r   r   r   r   r   r   r   c                D     | ||d||||      }| j                  |      S )aR  Evaluate the embedding distance between two predictions.

        Args:
            prediction: The output string from the first model.
            prediction_b: The output string from the second model.
            callbacks: The callbacks to use.
            tags: The tags to apply.
            metadata: The metadata to use.
            include_run_info: Whether to include run information in the output.
            **kwargs: Additional keyword arguments.

        Returns:
            `dict` containing:
                - score: The embedding distance between the two predictions.
        r   r   r   r   	rQ   r   r   r   r   r   r   r   rS   s	            r   _evaluate_string_pairsz9PairwiseEmbeddingDistanceEvalChain._evaluate_string_pairsK  s5    6 ",lK-
 ##F++r$   c                r   K   | j                  ||d||||       d{   }| j                  |      S 7 w)aa  Asynchronously evaluate the embedding distance between two predictions.

        Args:
            prediction: The output string from the first model.
            prediction_b: The output string from the second model.
            callbacks: The callbacks to use.
            tags: The tags to apply.
            metadata: The metadata to use.
            include_run_info: Whether to include run information in the output.
            **kwargs: Additional keyword arguments.

        Returns:
            `dict` containing:
                - score: The embedding distance between the two predictions.
        r   r   Nr   r   s	            r   _aevaluate_string_pairsz:PairwiseEmbeddingDistanceEvalChain._aevaluate_string_pairso  sL     6 zz",lK- " 
 
 ##F++
r   rh   )r4   r5   r6   r7   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r#   r$   r   r   r     s    .DI . . K K K  :> S#X  0$6  
c3h	   4  ?C S#X  5t;  
c3h	   4   $!%*.!&!, !, 	!,
 !, 3i$!, sCx.4'!, !, !, 
!, !,F   $!%*.!&!, !, 	!,
 !, 3i$!, sCx.4'!, !, !, 
!, !,r$   r   )+r7   	functoolsloggingenumr   	importlibr   typingr   langchain_core.callbacksr    langchain_core.callbacks.managerr   r   langchain_core.embeddingsr	   langchain_core.utilsr
   pydanticr   r   typing_extensionsr   langchain_classic.chains.baser   #langchain_classic.evaluation.schemar   r   langchain_classic.schemar   r   	getLoggerr4   r!   	lru_cacher   r%   r,   r   r.   r>   r   r   r#   r$   r   <module>r      s    F      . 1 ) & & / X ,s  
		8	$ Q
d 
  
J 2T $z5 zz],!= ],@W, W,r$   