
    3fi                     .   d dl mZ d dlmZmZ d dlmZmZmZm	Z	 d dl
mZ ddlmZ de	eeef      ded	e	eeef      fd
Z	 	 	 dde	e   de	e   dedededed	e	eeef      fdZ	 	 dde	e   de	e   dededed	e	eeef      fdZe G d de             Zy)    )ABC)	dataclassfield)AnyCallableOptionalSequence)
RowMapping   )DistanceStrategyresultsis_distance_metricreturnc           	      r   | sg S | D cg c]'  }t        t        |j                               d         ) }}t        |      t	        |      }}||z
  }|dk(  r| D ]  }d|d<   	 t        |       S | D ]6  }t        |j                               d   }||z
  |z  }|r	d|z
  |d<   2||d<   8 t        |       S c c}w )z2Normalizes scores to a 0-1 scale, where 1 is best.r         ?normalized_score)floatlistvaluesminmax)	r   r   itemscores	min_score	max_scorescore_rangescore
normalizeds	            h/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_postgres/v2/hybrid_search_config.py_normalize_scoresr!   
   s     	 :AAeD'+,AFAv;FyIi'Ka 	+D'*D#$	+G} 	2T[[]#B'i';6
'*Z'7D#$ (2D#$	2 =+ Bs   ,B4primary_search_resultssecondary_search_resultsprimary_results_weightsecondary_results_weightfetch_top_kkwargsc                    |j                  dt        j                        }|t        j                  k7  }t	        | D cg c]  }t        |       c}|      }	t	        |D cg c]  }t        |       c}d      }
i }|	D ]7  }t        t        |j                               d         }|d   |z  |d<   |||<   9 |
D ]N  }t        t        |j                               d         }|d   |z  }||v r||   dxx   |z  cc<   E||d<   |||<   P t        |j                         d d	      }|D ]  }|j                  dd
        |d
| S c c}w c c}w )a  
    Ranks documents using a weighted sum of scores from two sources.

    Args:
        primary_search_results: A list of (document, distance) tuples from
            the primary search.
        secondary_search_results: A list of (document, distance) tuples from
            the secondary search.
        primary_results_weight: The weight for the primary source's scores.
            Defaults to 0.5.
        secondary_results_weight: The weight for the secondary source's scores.
            Defaults to 0.5.
        fetch_top_k: The number of documents to fetch after merging the results.
            Defaults to 4.

    Returns:
        A list of (document, distance) tuples, sorted by weighted_score in
        descending order.
    distance_strategy)r   Fr   r   distancec                     | d   S Nr*    r   s    r    <lambda>z&weighted_sum_ranking.<locals>.<lambda>n   s    4
3C     TkeyreverseN)getr   COSINE_DISTANCEINNER_PRODUCTr!   dictstrr   r   sortedpop)r"   r#   r$   r%   r&   r'   r)   is_primary_distancerownormalized_primarynormalized_secondaryweighted_scoresr   doc_idsecondary_weighted_scoreranked_resultsresults                    r    weighted_sum_rankingrD   *   s   8 

-== ,/?/M/MM +45sc5. -67sc7E
 24O # 'T$++-(+, 236LLZ"&	' % 
+T$++-(+,#'(:#;>V#V _$F#J/3KK/  8D&*OF#
+  &CTN ! -

%t,- ,;''O 	6 	8s   E!Errf_kc                    |j                  dt        j                        }i }|t        j                  k(  }t	        | d |      }t        |      D ]Z  \  }	}
t        t        |
j                               d         }||vrt        |
      ||<   d||   d<   ||   dxx   d|	|z   z  z  cc<   \ t	        |d d	      }t        |      D ]Z  \  }	}
t        t        |
j                               d         }||vrt        |
      ||<   d||   d<   ||   dxx   d|	|z   z  z  cc<   \ t	        |j                         d
 d	      }|d| S )a[  
    Ranks documents using Reciprocal Rank Fusion (RRF) of scores from two sources.

    Args:
        primary_search_results: A list of (document, distance) tuples from
            the primary search.
        secondary_search_results: A list of (document, distance) tuples from
            the secondary search.
        rrf_k: The RRF parameter k.
            Defaults to 60.
        fetch_top_k: The number of documents to fetch after merging the results.
            Defaults to 4.

    Returns:
        A list of (document_id, rrf_score) tuples, sorted by rrf_score
        in descending order.
    r)   c                     | d   S r,   r-   r.   s    r    r/   z(reciprocal_rank_fusion.<locals>.<lambda>       j) r0   r1   r   g        r*   r   c                     | d   S r,   r-   r.   s    r    r/   z(reciprocal_rank_fusion.<locals>.<lambda>   rH   r0   Tc                     | d   S r,   r-   r.   s    r    r/   z(reciprocal_rank_fusion.<locals>.<lambda>   s    d:.> r0   N)
r4   r   r5   r6   r9   	enumerater8   r   r   r7   )r"   r#   rE   r&   r'   r)   
rrf_scoresis_similarity_metricsorted_primaryrankr<   r@   sorted_secondaryrB   s                 r    reciprocal_rank_fusionrQ   w   s~   0 

-== -/J -0@0N0NN)$N ~. ?	cT#**,'*+#!%cJv-0Jvz*6:&#*>>&?  ) /0 ?	cT#**,'*+#!%cJv-0Jvz*6:&#*>>&? !>N ,;''r0   c                       e Zd ZU dZdZee   ed<   dZee   ed<   dZ	ee   ed<   e
Zeee   ee   egee   f   ed<    ee      Zeeef   ed	<   d
Zeed<   d
Zeed<   dZeed<   dZeed<   y)HybridSearchConfigz
    AlloyDB Vector Store Hybrid Search Config.

    Queries might be slow if the hybrid search column does not exist.
    For best hybrid search performance, consider creating a TSV column
    and adding GIN index.
     
tsv_columnzpg_catalog.englishtsv_lang	fts_queryfusion_function)default_factoryfusion_function_parameters   primary_top_ksecondary_top_klangchain_tsv_index
index_nameGIN
index_typeN)__name__
__module____qualname____doc__rU   r   r8   __annotations__rV   rW   rD   rX   r   r	   r
   r   r   r7   rZ   r\   intr]   r_   ra   r-   r0   r    rS   rS      s     !#J"2Hhsm2!Ix}! 	 X	*	x
3S98C=H  27t1LS#XLM3OS+J+Jr0   rS   N)      ?rh   r[   )<   r[   )abcr   dataclassesr   r   typingr   r   r   r	   
sqlalchemyr
   indexesr   r7   r8   boolr!   r   rg   rD   rQ   rS   r-   r0   r    <module>rp      s@    ( 4 4 ! %d38n%;?d38nF %(&)J($Z0J(&z2J( "J( $	J(
 J( J( d38nJ(` 	F($Z0F(&z2F( F( 	F(
 F( d38nF(R   r0   