
    3fihv                    &   d Z ddlmZ ddlZddlZddlmZmZmZm	Z	m
Z
 ddlmZmZmZ ddlZddlZddlmZ ddlmZ ddlmZmZmZ dd	lmZmZmZmZmZ dd
lm Z  ddl!m"Z"  ejF                  e$      Z%dZ&	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ' G d dee      Z(y)z#Base classes for OpenAI embeddings.    )annotationsN)	AwaitableCallableIterableMappingSequence)AnyLiteralcast)
Embeddings)run_in_executor)from_envget_pydantic_field_namessecret_from_env)	BaseModel
ConfigDictField	SecretStrmodel_validator)Self) _resolve_sync_and_async_api_keysi c                   t        |       D cg c]  }g  }}t        |       D cg c]  }g  }}t        t        |            D ]S  }|rt        ||         dk(  r|||      j                  ||          |||      j                  t        ||                U g }	t        |       D ]  }||   }
t        |
      dk(  r|	j                  d        (t        |
      dk(  r|	j                  |
d          Kt        ||         }t	        |
ddiD cg c]&  }t        d t	        |||   d      D              |z  ( }}t        d |D              dz  }|	j                  |D cg c]  }||z  	 c}        |	S c c}w c c}w c c}w c c}w )	N   r   strictFc              3  ,   K   | ]  \  }}||z    y wN ).0valweights      ^/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_openai/embeddings/base.py	<genexpr>z6_process_batched_chunked_embeddings.<locals>.<genexpr>F   s       C fs   )r   c              3  &   K   | ]	  }|d z    yw)   Nr   )r   r   s     r!   r"   z6_process_batched_chunked_embeddings.<locals>.<genexpr>P   s     23Q2   g      ?)rangelenappendsumzip)	num_textstokensbatched_embeddingsindices
skip_empty_resultsnum_tokens_in_batchi
embeddings_resulttotal_weight	embeddingaverage	magnituder   s                   r!   #_process_batched_chunked_embeddingsr:      s    5:)4D'Eq'EG'E
 9>i8H+I1B+I+I3w<  ?#0349
""#5a#89GAJ'..s6!9~>	? ,.J9 @%,QZw<1 d#w<1gaj)
 .q12 !'8%8
   #&y2Ea2HQV#W  	
 
 2'22c9	g>s3?>??@B a (F
 ,J:
 ?s   	E6	E;+F F
c                     e Zd ZU dZ edd      Zded<    edd      Zded<   dZd	ed
<   dZ	ded<   	 eZ
ded<    e edd      d      Zded<   	  ed edd            Zded<   	  e edd            Zded<    e edd            Zded<   dZded<   	  ed  ed!d            Zd"ed#<   	  ed$ ed%d&gd            Zded'<   	 dZd(ed)<   dZd*ed+<   d,Zded-<   	 d.Zded/<   	  edd01      Zd2ed3<   	 dZded4<   dZd5ed6<   	 dZded7<   	 d8Zd5ed9<   	  ee      Zd:ed;<   	 d8Zd5ed<<   	 dZ d=ed><   dZ!d?ed@<   dAZ"dedB<   	 dCZ#dedD<   	 dZ$dEedF<   	 dZ%dEedG<   	 dZ&d5edH<   	  e'dIddJK      Z( e)dLM      e*d[dN              Z+ e)dOM      d\dP       Z,e-d]dQ       Z.d^dRZ/	 	 	 	 	 	 d_dSZ0ddT	 	 	 	 	 	 	 	 	 d`dUZ1ddT	 	 	 	 	 	 	 	 	 d`dVZ2	 da	 	 	 	 	 	 	 dbdWZ3	 da	 	 	 	 	 	 	 dbdXZ4dcdYZ5dcdZZ6y)dOpenAIEmbeddingsu  OpenAI embedding model integration.

    Setup:
        Install `langchain_openai` and set environment variable `OPENAI_API_KEY`.

        ```bash
        pip install -U langchain_openai
        export OPENAI_API_KEY="your-api-key"
        ```

    Key init args — embedding params:
        model:
            Name of OpenAI model to use.
        dimensions:
            The number of dimensions the resulting output embeddings should have.
            Only supported in `'text-embedding-3'` and later models.

    Key init args — client params:
        api_key:
            OpenAI API key.
        organization:
            OpenAI organization ID. If not passed in will be read
            from env var `OPENAI_ORG_ID`.
        max_retries:
            Maximum number of retries to make when generating.
        request_timeout:
            Timeout for requests to OpenAI completion API

    See full list of supported init args and their descriptions in the params section.

    Instantiate:
        ```python
        from langchain_openai import OpenAIEmbeddings

        embed = OpenAIEmbeddings(
            model="text-embedding-3-large"
            # With the `text-embedding-3` class
            # of models, you can specify the size
            # of the embeddings you want returned.
            # dimensions=1024
        )
        ```

    Embed single text:
        ```python
        input_text = "The meaning of life is 42"
        vector = embeddings.embed_query("hello")
        print(vector[:3])
        ```
        ```python
        [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915]
        ```

    Embed multiple texts:
        ```python
        vectors = embeddings.embed_documents(["hello", "goodbye"])
        # Showing only the first 3 coordinates
        print(len(vectors))
        print(vectors[0][:3])
        ```
        ```python
        2
        [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915]
        ```

    Async:
        ```python
        await embed.aembed_query(input_text)
        print(vector[:3])

        # multiple:
        # await embed.aembed_documents(input_texts)
        ```
        ```python
        [-0.009100092574954033, 0.005071679595857859, -0.0029193938244134188]
        ```
    NT)defaultexcluder	   clientasync_clientztext-embedding-ada-002strmodel
int | None
dimensionsz
str | None
deploymentOPENAI_API_VERSION)r=   api_version)default_factoryaliasopenai_api_versionbase_urlOPENAI_API_BASE)rI   rH   openai_api_baseOPENAI_API_TYPE)rH   openai_api_typeOPENAI_PROXYopenai_proxyi  intembedding_ctx_lengthapi_keyOPENAI_API_KEYzCSecretStr | None | Callable[[], str] | Callable[[], Awaitable[str]]openai_api_keyorganizationOPENAI_ORG_IDOPENAI_ORGANIZATIONopenai_organizationz Literal['all'] | set[str] | Noneallowed_specialz0Literal['all'] | set[str] | Sequence[str] | Nonedisallowed_speciali  
chunk_sizer$   max_retriestimeout)r=   rI   z(float | tuple[float, float] | Any | Nonerequest_timeoutheadersbooltiktoken_enabledtiktoken_model_nameFshow_progress_bardict[str, Any]model_kwargsr/   zMapping[str, str] | Nonedefault_headerszMapping[str, object] | Nonedefault_query   retry_min_seconds   retry_max_secondsz
Any | Nonehttp_clienthttp_async_clientcheck_embedding_ctx_lengthforbidr   )extrapopulate_by_nameprotected_namespacesbefore)modec           
     h   t        |       }|j                  di       }t        |      D ]O  }||v rd| d}t        |      ||vst	        j
                  d| d| d| d       |j                  |      ||<   Q |j                  |j                               }|rd| d	}t        |      ||d<   |S )
z>Build extra kwargs from additional params that were passed in.rg   zFound z supplied twice.z	WARNING! z/ is not default parameter.
                    zJ was transferred to model_kwargs.
                    Please confirm that z is what you intended.zParameters za should be specified explicitly. Instead they were passed in as part of `model_kwargs` parameter.)	r   getlist
ValueErrorwarningswarnpopintersectionkeys)clsvaluesall_required_field_namesrr   
field_namemsginvalid_model_kwargss          r!   build_extrazOpenAIEmbeddings.build_extra.  s     $<C#@ 

>2.v, 
	;JU"zl*:; o%!99!* .L !))34JN
 %+JJz$:j!
	;  8DDUZZ\R23 4S T  S/!!&~    afterc                ^   | j                   dv rd}t        |      d}d}| j                  t        | j                        \  }}| j                  | j
                  | j                  | j                  | j                  | j                  d}| j                  rR| j                  s| j                  r:| j                  }| j                  }| j                  }d|d|d|}t        |      | j                  s}|d| _        ns| j                  r2| j                  s&	 ddl}|j!                  | j                  
      | _        | j                  |d}
t#        j$                  di ||
j&                  | _        | j(                  ss| j                  r2| j                  s&	 ddl}|j+                  | j                  
      | _        | j                  |d}t#        j,                  di ||j&                  | _        | S # t        $ r}	d	}t        |      |	d}	~	ww xY w# t        $ r}	d	}t        |      |	d}	~	ww xY w)z?Validate that api key and python package exists in environment.)azureazure_adazureadzEIf you are using Azure, please use the `AzureOpenAIEmbeddings` class.N)rW   rK   r_   r^   rh   ri   zwCannot specify 'openai_proxy' if one of 'http_client'/'http_async_client' is already specified. Received:
openai_proxy=z
http_client=z
http_async_client=r   zRCould not import httpx python package. Please install it with `pip install httpx`.)proxy)rn   rT   r   )rO   rz   rV   r   rZ   rM   r`   r^   rh   ri   rQ   rn   ro   r?   httpxImportErrorClientopenaiOpenAIr4   r@   AsyncClientAsyncOpenAI)selfr   sync_api_key_valueasync_api_key_valueclient_paramsrQ   rn   ro   r   esync_specificasync_specifics               r!   validate_environmentz%OpenAIEmbeddings.validate_environmentK  sh    #CCW  S/! >BIM* 7W##73 3
 !44,,++++#33!//
 $"2"2d6L6L,,L**K $ 6 6!/K>1F4E3GI 
 S/!{{!) #$$T-=-=6$ (-||$:K:K|'LD$#'#3#31! %mmMmM}MXX    )?)?2  */):):ARAR):)S&#55.N !' 2 2 !! ! j  ? ' 6J  *#.A56  # 2F  &c*12s0   G1 H 1	H:HH	H,H''H,c                p    d| j                   i| j                  }| j                  | j                  |d<   |S )NrB   rD   )rB   rg   rD   )r   paramss     r!   _invocation_paramsz#OpenAIEmbeddings._invocation_params  s8    At/@/@A??&#'??F< r   c                6    | j                   d}t        |      y)z8Check that sync client is available, raise error if not.NzSync client is not available. This happens when an async callable was provided for the API key. Use async methods (ainvoke, astream) instead, or provide a string or sync callable for the API key.)r?   rz   )r   r   s     r!   _ensure_sync_client_availablez.OpenAIEmbeddings._ensure_sync_client_available  s(    ;;Q 
 S/! r   c           
        g }g }g }| j                   xs | j                  }| j                  s	 ddlm} |j                  |      }	t        |      D ]  \  }
}|	j                  |d      }t        dt        |      | j                        D ]a  }|||| j                  z    }|	j                  |      }|j                  |       |j                  |
       |j                  t        |             c  n:	 t        j                   |      }| j&                  | j(                  dj+                         D ci c]
  \  }}||| }}}t        |      D ]  \  }
}| j                  j-                  d	      r|j/                  d
d      }|r |j                  |fi |}n|j1                  |      }t        dt        |      | j                        D ]^  }|j                  |||| j                  z           |j                  |
       |j                  t        |||| j                  z                 `  | j2                  r$	 ddlm}  |t        dt        |      |            }nt        dt        |      |      }||||fS # t
        $ r d}t        |      w xY w# t"        $ r t        j$                  d      }Y w xY wc c}}w # t
        $ r t        dt        |      |      }Y lw xY w)a  Tokenize and batch input texts.

        Splits texts based on `embedding_ctx_length` and groups them into batches
        of size `chunk_size`.

        Args:
            texts: The list of texts to tokenize.
            chunk_size: The maximum number of texts to include in a single batch.

        Returns:
            A tuple containing:
                1. An iterable of starting indices in the token list for each batch.
                2. A list of tokenized texts (token arrays for tiktoken, strings for
                    HuggingFace).
                3. An iterable mapping each token array to the index of the original
                    text. Same length as the token list.
                4. A list of token counts for each tokenized text.
        r   )AutoTokenizerzCould not import transformers python package. This is needed for OpenAIEmbeddings to work without `tiktoken`. Please install it with `pip install transformers`. )pretrained_model_name_or_pathF)add_special_tokenscl100k_base)r[   r\   001
 )tqdm)rd   rB   rc   transformersr   r   rz   from_pretrained	enumerateencoder&   r'   rS   decoder(   tiktokenencoding_for_modelKeyErrorget_encodingr[   r\   itemsendswithreplaceencode_ordinaryre   	tqdm.autor   )r   textsr]   r,   r.   token_counts
model_namer   r   	tokenizerr3   text	tokenizedjtoken_chunk
chunk_textencodingkvencoder_kwargstokenr   _iters                          r!   	_tokenizezOpenAIEmbeddings._tokenize  s   * )+"$--;
 $$&6 &55.8 6 I %U+ :4'0'7'7QV'7'W	 q#i.$2K2KL 	:A-6A 9 99.K
 '0&6&6{&CJMM*-NN1% ''K(89	:: @#66zB (,';';*.*A*A %'.Aq = 1.N . %U+ W4::&&u-  <<c2D!+HOODCNCE$44T:E q#e*d.G.GH WAMM%A0I0I,I"JKNN1% ''E!a$:S:S6S,T(UVWW$ !!:*"&uQFZ'H"I !S[*5Efg|33E  &V 
 !o%&:  @#00?@.>  :aVj9:s5   I8 3J 2J79"J= 8JJ43J4=KK)r]   c          	         |xs  j                   }i  j                  | j                  ||      \  }}}}	g }
d}|t        |      k  rd}|}t	        |t        ||z   t        |                  D ])  }|	|   }||z   t        kD  r||k(  r|dz   } n||z  }|dz   }+ ||| }  j                  j                  dd|i}t        |t              s|j                         }|
j                  d |d   D               |}|t        |      k  rt        t        |      ||
| j                        }dd	 fd}|D cg c]  }||n |        c}S c c}w )
a2  Generate length-safe embeddings for a list of texts.

        This method handles tokenization and embedding generation, respecting the
        `embedding_ctx_length` and `chunk_size`. Supports both `tiktoken` and
        HuggingFace `transformers` based on the `tiktoken_enabled` flag.

        Args:
            texts: The list of texts to embed.
            engine: The engine or model to use for embeddings.
            chunk_size: The size of chunks for processing embeddings.

        Returns:
            A list of embeddings for each input text.
        r   r   inputc              3  &   K   | ]	  }|d      ywr7   Nr   r   rs     r!   r"   z<OpenAIEmbeddings._get_len_safe_embeddings.<locals>.<genexpr>C       %Oan%Or%   dataNc                     I j                   j                  dddi} t        | t              s| j	                         } | d   d   d   S Nr    r   r   r7   r   )r?   create
isinstancedict
model_dumpaverage_embedded_cached_empty_embeddingclient_kwargsr   s    r!   empty_embeddingzBOpenAIEmbeddings._get_len_safe_embeddings.<locals>.empty_embeddingL  s]    &.#54;;#5#5#PB#P-#P !"2D9'7'B'B'D$*:6*B1*Ek*R'**r   r   returnlist[float])r]   r   r   r'   r&   minMAX_TOKENS_PER_REQUESTr?   r   r   r   r   extendr:   r/   r   r   enginer]   kwargs_chunk_sizer   r,   r.   r   r-   r3   batch_token_count	batch_endr   chunk_tokensbatch_tokensresponser4   r   r   r   r   s   `                    @@r!   _get_len_safe_embeddingsz)OpenAIEmbeddings._get_len_safe_embeddings  s   , !3DOO=422=f=/3~~e[/Q,vw02 #f+o !I1c!k/3v;?@ 	"+A$|36LL A~$%E	!\1!E		" "!I.L)t{{))NNNHh-#..0%%%Ohv>N%OOA/ #f+o2 9J 2GT__

 7;	+ DNNaQ](99NNNs   >Ec          	        K   |xs  j                   }i  j                  |t        d j                  ||       d{   \  }}}}	g }
d}|t	        |      k  rd}|}t        |t        ||z   t	        |                  D ])  }|	|   }||z   t        kD  r||k(  r|dz   } n||z  }|dz   }+ ||| }  j                  j                  dd|i d{   }t        |t              s|j                         }|
j                  d |d   D               |}|t	        |      k  rt        t	        |      ||
| j                        }dd	 fd}|D cg c]  }||n |        d{    c}S 7 27 7 c c}w w)
aA  Asynchronously generate length-safe embeddings for a list of texts.

        This method handles tokenization and embedding generation, respecting the
        `embedding_ctx_length` and `chunk_size`. Supports both `tiktoken` and
        HuggingFace `transformers` based on the `tiktoken_enabled` flag.

        Args:
            texts: The list of texts to embed.
            engine: The engine or model to use for embeddings.
            chunk_size: The size of chunks for processing embeddings.

        Returns:
            A list of embeddings for each input text.
        Nr   r   r   c              3  &   K   | ]	  }|d      ywr   r   r   s     r!   r"   z=OpenAIEmbeddings._aget_len_safe_embeddings.<locals>.<genexpr>  r   r%   r   c                    K   Q j                   j                  dddi d {   } t        | t              s| j	                         } | d   d   d   S 7 1wr   )r@   r   r   r   r   r   s    r!   r   zCOpenAIEmbeddings._aget_len_safe_embeddings.<locals>.empty_embedding  sw     &.)A):):)A)A ** -* $  ""2D9'7'B'B'D$*:6*B1*Ek*R'**$s   $AA2Ar   r   )r]   r   r   r   r'   r&   r   r   r@   r   r   r   r   r   r:   r/   r   s   `                    @@r!   _aget_len_safe_embeddingsz*OpenAIEmbeddings._aget_len_safe_embeddingsY  s    , !3DOO=422=f=5D$..%6
 0
,vw 13 #f+o !I1c!k/3v;?@ 	"+A$|36LL A~$%E	!\1!E		" "!I.L5T..55 "&3 H h-#..0%%%Ohv>N%OOA3 #f+o6 9J 2GT__

 7;		+ JTTAQ]o.?(??TTe0
22 )@TsO   <E?E3BE?E6AE?&0E?E:)E8
*E:0E?6E?8E::E?c           	        | j                          |xs | j                  }i | j                  |}| j                  s~g }t	        dt        |      |      D ]a  } | j                  j                  dd||||z    i|}t        |t              s|j                         }|j                  d |d   D               c |S t        t        | j                        }	 | j                  |f|	|d|S )a  Call OpenAI's embedding endpoint to embed search docs.

        Args:
            texts: The list of texts to embed.
            chunk_size: The chunk size of embeddings.

                If `None`, will use the chunk size specified by the class.
            kwargs: Additional keyword arguments to pass to the embedding API.

        Returns:
            List of embeddings, one for each text.
        r   r   c              3  &   K   | ]	  }|d      ywr   r   r   s     r!   r"   z3OpenAIEmbeddings.embed_documents.<locals>.<genexpr>       !KQ!K.!Kr%   r   r   r]   r   )r   r]   r   rp   r&   r'   r?   r   r   r   r   r   r   rA   rE   r   
r   r   r]   r   chunk_size_r   r4   r3   r   r   s
             r!   embed_documentsz OpenAIEmbeddings.embed_documents  s    	**, 3DOO=422=f=..,.J1c%j+6 L-4;;-- AO48E "(D1'224H!!!K(6:J!KKL  c4??+,t,,
 Z
;A
 	
r   c           	       K   |xs | j                   }i | j                  |}| j                  sg }t        dt	        |      |      D ]i  } | j
                  j                  dd||||z    i| d{   }t        |t              s|j                         }|j                  d |d   D               k |S t        t        | j                        }	 | j                  |f|	|d| d{   S 7 {7 w)a  Asynchronously call OpenAI's embedding endpoint to embed search docs.

        Args:
            texts: The list of texts to embed.
            chunk_size: The chunk size of embeddings.

                If `None`, will use the chunk size specified by the class.
            kwargs: Additional keyword arguments to pass to the embedding API.

        Returns:
            List of embeddings, one for each text.
        r   r   Nc              3  &   K   | ]	  }|d      ywr   r   r   s     r!   r"   z4OpenAIEmbeddings.aembed_documents.<locals>.<genexpr>  r   r%   r   r   r   )r]   r   rp   r&   r'   r@   r   r   r   r   r   r   rA   rE   r   r   s
             r!   aembed_documentsz!OpenAIEmbeddings.aembed_documents  s     !3DOO=422=f=..,.J1c%j+6 L!9!2!2!9!9 "AO4"8E"  "(D1'224H!!!K(6:J!KKL  c4??+3T33
 Z
;A
 
 	

s%   A/C11C-2A6C1(C/)C1/C1c                P    | j                           | j                  |gfi |d   S )a  Call out to OpenAI's embedding endpoint for embedding query text.

        Args:
            text: The text to embed.
            kwargs: Additional keyword arguments to pass to the embedding API.

        Returns:
            Embedding for the text.
        r   )r   r   )r   r   r   s      r!   embed_queryzOpenAIEmbeddings.embed_query  s/     	**,#t##TF5f5a88r   c                P   K    | j                   |gfi | d{   }|d   S 7 	w)a	  Call out to OpenAI's embedding endpoint async for embedding query text.

        Args:
            text: The text to embed.
            kwargs: Additional keyword arguments to pass to the embedding API.

        Returns:
            Embedding for the text.
        Nr   )r   )r   r   r   r4   s       r!   aembed_queryzOpenAIEmbeddings.aembed_query  s4      1400$B6BB
!} Cs   &$
&)r   rf   r   r	   )r   r   )r   rf   )r   None)r   	list[str]r]   rR   r   zAtuple[Iterable[int], list[list[int] | str], list[int], list[int]])
r   r   r   rA   r]   rC   r   r	   r   list[list[float]]r   )r   r   r]   rC   r   r	   r   r  )r   rA   r   r	   r   r   )7__name__
__module____qualname____doc__r   r?   __annotations__r@   rB   rD   rE   r   rJ   rM   rO   rQ   rS   r   rV   rZ   r[   r\   r]   r^   r`   ra   rc   rd   re   r   rg   r/   rh   ri   rk   rm   rn   ro   rp   r   model_configr   classmethodr   r   propertyr   r   r   r   r   r   r   r   r   r   r   r!   r<   r<   V   s   L\ d3FC3dD9L#9)E3)!J
! #J
" &+ !5tD&
  T #((3Dd*S#OZ  #( !2DA#OZ 
  % > L*  !%#$8 	9ISW)X	 K
 P&+ 34d
'  O8<O5<KOHOJ8K<@EIAO=  GS!d!G '+* $t#8#(#>L.>VJI04O-415M.5
 s7s7"K" %)z( (,+- BL (#  $6 '"N #N`  "`4`4,/`4	J`4R "&DODO 	DO
 DO DO 
DOZ "&JUJU 	JU
 JU JU 
JUZ :>"
"
,6"
IL"
	"
J :>!
!
,6!
IL!
	!
F9r   r<   )r+   rR   r,   zlist[list[int] | str]r-   r  r.   z	list[int]r/   rb   r   zlist[list[float] | None]))r  
__future__r   loggingr{   collections.abcr   r   r   r   r   typingr	   r
   r   r   r   langchain_core.embeddingsr   langchain_core.runnables.configr   langchain_core.utilsr   r   r   pydanticr   r   r   r   r   typing_extensionsr   *langchain_openai.chat_models._client_utilsr   	getLoggerr  loggerr   r:   r<   r   r   r!   <module>r     s    ) "   L L % %   0 ; T T M M " W			8	$  199!9 *9 	9
 9 9xn
y* n
r   