
    3fiS*                       d Z ddlmZ ddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZmZ ddlmZ ddZddZ G d deee      Z  G d deee      Z! G d de!      Z"y)z-LLM Chains for evaluating question answering.    )annotationsN)Sequence)Any)	Callbacks)BaseLanguageModel)PromptTemplate)
ConfigDict)override)LLMChain)CONTEXT_PROMPT
COT_PROMPTPROMPT)LLMEvalChainStringEvaluator)RUN_KEYc                   t        j                  d| j                         t         j                        }|rF|j	                  d      j                         dk(  ry|j	                  d      j                         dk(  ry	 | j                         j                         d   j                  t        j                  ddt        j                              }|j                         dk(  ry|j                         dk(  ry| j                         j                         d	   j                  t        j                  ddt        j                              }|j                         dk(  ry|j                         dk(  ry	 y # t        $ r Y y w xY w)
Nzgrade:\s*(correct|incorrect)   CORRECT)r   r   	INCORRECT)r   r   r    )researchstrip
IGNORECASEgroupuppersplit	translatestr	maketransstringpunctuation
IndexError)textmatch
first_word	last_words       h/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_classic/evaluation/qa/eval_chain.py
_get_scorer*      sC   II5tzz|R]]SE;;q>!Y.;;q>![0!JJL #--cmmBFDVDV.WX 	 *,!JJLUWRYs}}RV-?-?@A 	
 ??	)??+! ,   s&   =A&E5 $E5 8A&E5 E5 5	F Fc                ^    | j                         }t        |      }|d\  }}n|\  }}|||dS )zwParse the output text.

    Args:
        text: The output text to parse.

    Returns:
        The parsed output.
    )NN)	reasoningvaluescore)r   r*   )r%   r,   parsed_scoresr-   r.   s        r)   _parse_string_eval_outputr0   7   sD     

Iy)M!u$u     c                  n   e Zd ZU dZdZded<    ed      Zee	dd              Z
ee	dd              Zee	dd	              Zee	dd
              Ze	 d	 	 	 	 	 	 	 dd       Z	 	 	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZe	ddddd	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Ze	ddddd	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zy)QAEvalChainz,LLM Chain for evaluating question answering.resultsr    
output_keyignoreextrac                     yNF clss    r)   is_lc_serializablezQAEvalChain.is_lc_serializableV        r1   c                     y)Ncorrectnessr;   selfs    r)   evaluation_namezQAEvalChain.evaluation_name[   s     r1   c                     yNTr;   rB   s    r)   requires_referencezQAEvalChain.requires_reference`        r1   c                     yrF   r;   rB   s    r)   requires_inputzQAEvalChain.requires_inpute   rH   r1   Nc                    |xs t         }h d}|t        |j                        k7  rd| d|j                   }t        |       | d||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm: The base language model to use.
            prompt: A prompt template containing the input_variables:
                `'input'`, `'answer'` and `'result'` that will be used as the prompt
                for evaluation.

                Defaults to `PROMPT`.
            **kwargs: Additional keyword arguments.

        Returns:
            The loaded QA eval chain.
        >   queryanswerresultInput variables should be 
, but got llmpromptr;   )r   setinput_variables
ValueError)r=   rR   rS   kwargsexpected_input_varsmsgs         r)   from_llmzQAEvalChain.from_llmj   sm    * !6;#f&<&<"==,-@,A B!1124  S/!4s64V44r1   	callbacksc                   t        |      D cg c]  \  }}||   ||   ||   |   d }	}}| j                  |	|      S c c}}w )5Evaluate question answering examples and predictions.rL   rM   rN   r[   	enumerateapply)
rC   examplespredictionsquestion_key
answer_keyprediction_keyr\   iexampleinputss
             r)   evaluatezQAEvalChain.evaluate   sd    " (1
 7	 !.!*-%a.8
 
 zz&Iz66
   Ac                f    t        || j                           }t        |v r|t           |t        <   |S Nr0   r5   r   rC   rN   parsed_results      r)   _prepare_outputzQAEvalChain._prepare_output   1    1&2IJf%+G_M'"r1   F	referenceinputr\   include_run_infoc               B     | |||d||      }| j                  |      S )aH  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction: The LLM or chain prediction to evaluate.
            reference: The reference label to evaluate against.
            input: The input to consider during evaluation
            callbacks: The callbacks to use for tracing.
            include_run_info: Whether to include run info in the returned results.
            **kwargs: Additional keyword arguments, including callbacks, tags, etc.

        Returns:
            The evaluation results containing the score or value.
        r_   r\   rw   rr   rC   
predictionru   rv   r\   rw   rW   rN   s           r)   _evaluate_stringszQAEvalChain._evaluate_strings   s7    0 #$
  -
 ##F++r1   c               p   K   | j                  |||d||       d {   }| j                  |      S 7 w)Nr_   rj   r\   rw   acallrr   r{   s           r)   _aevaluate_stringszQAEvalChain._aevaluate_strings   sH      zz"i:N- " 
 

 ##F++
   646returnboolr   r    rn   )rR   r   rS   PromptTemplate | NonerW   r   r   r3   r_   )rc   Sequence[dict]rd   r   re   r    rf   r    rg   r    r\   r   r   
list[dict]rN   dictr   r   r|   r    ru   
str | Nonerv   r   r\   r   rw   r   rW   r   r   r   )__name__
__module____qualname____doc__r5   __annotations__r	   model_configclassmethodr
   r>   propertyrD   rG   rJ   rZ   rk   rr   r}   r   r;   r1   r)   r3   r3   M   s    6JL              )-55 &5 	5
 
5 5D $"&7  $7 7 $7 	7
 7 7 7 
7, 
 !% #!& ,  , 	 ,
  ,  ,  ,  , 
 ,  ,D 
 !% #!&, , 	,
 , , , , 
, ,r1   r3   c                  \   e Zd ZdZeedd              Zedd       Zedd       Z	 e
d      Zedd       Zeedd              Ze	 d	 	 	 	 	 	 	 dd
       Z	 	 	 dd	d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZed	d	d	dd	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zed	d	d	dd	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zy	)ContextQAEvalChainz4LLM Chain for evaluating QA w/o GT based on context.c                     yr:   r;   r<   s    r)   r>   z%ContextQAEvalChain.is_lc_serializable   r?   r1   c                     y)z.Whether the chain requires a reference string.Tr;   rB   s    r)   rG   z%ContextQAEvalChain.requires_reference   rH   r1   c                     y)z+Whether the chain requires an input string.Tr;   rB   s    r)   rJ   z!ContextQAEvalChain.requires_input   rH   r1   r6   r7   c                v    h d}|t        |j                        k7  rd| d|j                   }t        |      y )N>   rL   rN   contextrO   rP   )rT   rU   rV   )r=   rS   rX   rY   s       r)   _validate_input_varsz'ContextQAEvalChain._validate_input_vars   sQ    <#f&<&<"==,-@,A B!1124  S/! >r1   c                     y)NzContextual Accuracyr;   rB   s    r)   rD   z"ContextQAEvalChain.evaluation_name   s     %r1   Nc                N    |xs t         }| j                  |        | d||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm: The base language model to use.
            prompt: A prompt template containing the `input_variables`:
                `'query'`, `'context'` and `'result'` that will be used as the prompt
                for evaluation.

                Defaults to `PROMPT`.
            **kwargs: Additional keyword arguments.

        Returns:
            The loaded QA eval chain.
        rQ   r;   )r   r   r=   rR   rS   rW   s       r)   rZ   zContextQAEvalChain.from_llm   s1    * )>  (4s64V44r1   r[   c                   t        |      D cg c]  \  }}||   ||   ||   |   d }	}}| j                  |	|      S c c}}w )r^   rL   r   rN   r[   r`   )
rC   rc   rd   re   context_keyrg   r\   rh   ri   rj   s
             r)   rk   zContextQAEvalChain.evaluate  sd    " (1
 7	 !.";/%a.8
 
 zz&Iz66
rl   c                f    t        || j                           }t        |v r|t           |t        <   |S rn   ro   rp   s      r)   rr   z"ContextQAEvalChain._prepare_output/  rs   r1   Frt   c               B     | |||d||      }| j                  |      S )Nr   ry   rz   r{   s           r)   r}   z$ContextQAEvalChain._evaluate_strings5  s7     $$
  -
 ##F++r1   c               p   K   | j                  |||d||       d {   }| j                  |      S 7 w)Nr   r   r   r{   s           r)   r   z%ContextQAEvalChain._aevaluate_stringsK  sH      zz"yJO- " 
 

 ##F++
r   r   )rS   r   r   Noner   rn   )rR   r   rS   r   rW   r   r   r   r   )rc   r   rd   r   re   r    r   r    rg   r    r\   r   r   r   r   r   )r   r   r   r   r   r
   r>   r   rG   rJ   r	   r   r   rD   rZ   rk   rr   r}   r   r;   r1   r)   r   r      s   >       L " " %  %  )-55 &5 	5
 
5 58 $$&7  $77  7 	7
 7 7 7 
7, 
 !% #!&, , 	,
 , , , , 
, ,* 
 !% #!&, , 	,
 , , , , 
, ,r1   r   c                  l    e Zd ZdZeedd              Zeedd              Ze	 d	 	 	 	 	 	 	 d	d       Z	y)
CotQAEvalChainz=LLM Chain for evaluating QA using chain of thought reasoning.c                     yr:   r;   r<   s    r)   r>   z!CotQAEvalChain.is_lc_serializablea  r?   r1   c                     y)NzCOT Contextual Accuracyr;   rB   s    r)   rD   zCotQAEvalChain.evaluation_namef  s     )r1   Nc                N    |xs t         }| j                  |        | d||d|S )zLoad QA Eval Chain from LLM.rQ   r;   )r   r   r   s       r)   rZ   zCotQAEvalChain.from_llmk  s1     %:  (4s64V44r1   r   r   rn   )rR   r   rS   r   rW   r   r   r   )
r   r   r   r   r   r
   r>   r   rD   rZ   r;   r1   r)   r   r   ^  sz    G   )  )  )-	5	5 &	5 		5
 
	5 	5r1   r   )r%   r    r   ztuple[str, int] | None)r%   r    r   r   )#r   
__future__r   r   r"   collections.abcr   typingr   langchain_core.callbacksr   langchain_core.language_modelsr   langchain_core.promptsr   pydanticr	   typing_extensionsr
   langchain_classic.chains.llmr   +langchain_classic.evaluation.qa.eval_promptr   r   r   #langchain_classic.evaluation.schemar   r   langchain_classic.schemar   r*   r0   r3   r   r   r;   r1   r)   <module>r      sy    3 " 	  $  . < 1  & 1 
 N ,:,K,(O\ K,\@,?L @,F5' 5r1   