
    3fi                        d dl Zd dlZd dlmZmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ dZdZd	Z ej(                  e      Zdd
dedededeee      dedefdZeed dfdedededee   def
dZ G d de      Zy)    N)AnyCallableListMappingOptional)CallbackManagerForLLMRun)
ConfigDict)SelfHostedPipeline)enforce_stop_tokensgpt2text-generation)text2text-generationr   summarization)stoppipelinepromptargsr   kwargsreturnc                "    | |g|i |}| j                   dk(  r|d   d   t        |      d }nP| j                   dk(  r	|d   d   }n8| j                   dk(  r	|d   d   }n t        d| j                    d	t         d
      |t	        ||      }|S )zInference function to send to the remote hardware.

    Accepts a Hugging Face pipeline (or more likely,
    a key pointing to such a pipeline on the cluster's object store)
    and returns generated text.
    r   r   generated_textNr   r   summary_textGot invalid task , currently only  are supported)tasklen
ValueErrorVALID_TASKSr   )r   r   r   r   r   responsetexts          o/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_community/llms/self_hosted_hugging_face.py_generate_textr#      s     000H}})){+,S[];	0	0{+,	/	){>* /)].:
 	
 "4.K    model_idr   devicemodel_kwargsc                    ddl m}m}m} ddl m} |xs i } |j
                  | fi |}		 |dk(  r |j
                  | fi |}
n.|dv r |j
                  | fi |}
nt        d| dt         d      t        j                  j                  d      Zddl}|j                  j                         }|dk  s||k\  rt        d| d| d      |dk  r|dkD  rt        j!                  d|        |||
|	||      }|j"                  t        vr t        d|j"                   dt         d      |S # t        $ r}t        d	| d
      |d}~ww xY w)zInference function to send to the remote hardware.

    Accepts a huggingface model_id and returns a pipeline for the task.
    r   )AutoModelForCausalLMAutoModelForSeq2SeqLMAutoTokenizer)r   r   )r   r   r   r   r   zCould not load the z# model due to missing dependencies.NtorchzGot device==z', device is required to be within [-1, )zDevice has %d GPUs available. Provide device={deviceId} to `from_model_id` to use availableGPUs for execution. deviceId is -1 for CPU and can be a positive integer associated with CUDA device id.)r   model	tokenizerr&   r'   )transformersr)   r*   r+   r   from_pretrainedr   r   ImportError	importlibutil	find_specr,   cudadevice_countloggerwarningr   )r%   r   r&   r'   r)   r*   r+   hf_pipeline_model_kwargsr0   r/   er,   cuda_device_countr   s                  r"   _load_transformerr?   1   s    XW4 &BM---hH-HI$$8(88S]SE>>9)99(TmTE#D6):;-~V  ~~(4!JJ335B;6%66vh '88I7J!M  A:+a/NNL " "H }}K' /)].:
 	
 OG  !$'JK
	s   AD, ,	E
5EE
c                   6    e Zd ZU dZeZeed<   	 eZ	eed<   	 dZ
eed<   	 dZee   ed<   	 dZeed<   	 g d	Zee   ed
<   	 eZeed<   	 eZeed<   	  ed      Zdef fdZedeeef   fd       Zedefd       Z	 	 ddedeee      dee   dedef
dZ  xZ!S )SelfHostedHuggingFaceLLMay  HuggingFace Pipeline API to run on self-hosted remote hardware.

    Supported hardware includes auto-launched instances on AWS, GCP, Azure,
    and Lambda, as well as servers specified
    by IP address and SSH credentials (such as on-prem, or another cloud
    like Paperspace, Coreweave, etc.).

    To use, you should have the ``runhouse`` python package installed.

    Only supports `text-generation`, `text2text-generation` and `summarization` for now.

    Example using from_model_id:
        .. code-block:: python

            from langchain_community.llms import SelfHostedHuggingFaceLLM
            import runhouse as rh
            gpu = rh.cluster(name="rh-a10x", instance_type="A100:1")
            hf = SelfHostedHuggingFaceLLM(
                model_id="google/flan-t5-large", task="text2text-generation",
                hardware=gpu
            )
    Example passing fn that generates a pipeline (bc the pipeline is not serializable):
        .. code-block:: python

            from langchain_community.llms import SelfHostedHuggingFaceLLM
            from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
            import runhouse as rh

            def get_pipeline():
                model_id = "gpt2"
                tokenizer = AutoTokenizer.from_pretrained(model_id)
                model = AutoModelForCausalLM.from_pretrained(model_id)
                pipe = pipeline(
                    "text-generation", model=model, tokenizer=tokenizer
                )
                return pipe
            hf = SelfHostedHuggingFaceLLM(
                model_load_fn=get_pipeline, model_id="gpt2", hardware=gpu)
    r%   r   r   r&   Nr'   hardware)z./r1   r,   
model_reqsmodel_load_fninference_fnforbid)extrar   c                     |j                  dt              |j                  dt              |j                  dd      |j                  dd      d}t        |   d	d|i| y)
a	  Construct the pipeline remotely using an auxiliary function.

        The load function needs to be importable to be imported
        and run on the server, i.e. in a module and not a REPL or closure.
        Then, initialize the remote inference function.
        r%   r   r&   r   r'   N)r%   r   r&   r'   load_fn_kwargs )getDEFAULT_MODEL_IDDEFAULT_TASKsuper__init__)selfr   rI   	__class__s      r"   rO   z!SelfHostedHuggingFaceLLM.__init__   s\     

:/?@JJv|4jj1-"JJ~t<	
 	AA&Ar$   r   c                 >    i d| j                   id| j                  iS )zGet the identifying parameters.r%   r'   )r%   r'   rP   s    r"   _identifying_paramsz,SelfHostedHuggingFaceLLM._identifying_params   s0    
4==)
t001
 	
r$   c                      y)Nselfhosted_huggingface_pipelinerJ   rS   s    r"   	_llm_typez"SelfHostedHuggingFaceLLM._llm_type   s    0r$   r   r   run_managerc                 B     | j                   d| j                  ||d|S )N)r   r   r   rJ   )clientpipeline_ref)rP   r   r   rX   r   s        r"   _callzSelfHostedHuggingFaceLLM._call   s1     t{{ 
&&vD
DJ
 	
r$   )NN)"__name__
__module____qualname____doc__rL   r%   str__annotations__rM   r   r&   intr'   r   dictrB   r   rC   r   r?   rD   r   r#   rE   r	   model_configrO   propertyr   rT   rW   r   r\   __classcell__)rQ   s   @r"   rA   rA   p   s"   &P %Hc$2D#FCOT#'L(4.'1Hc<;JS	;E/M8/<+L(+<LB B 
WS#X%6 
 
 13 1 1 %):>		
	
 tCy!	
 67		

 	
 
	
r$   rA   )importlib.utilr4   loggingtypingr   r   r   r   r   langchain_core.callbacksr   pydanticr	   $langchain_community.llms.self_hostedr
   langchain_community.llms.utilsr   rL   rM   r   	getLoggerr]   r9   ra   r#   rc   rd   r?   rA   rJ   r$   r"   <module>rp      s      9 9 =  C >  J			8	$ !%	  49
	
  	@ %#'	<<
< < 4.	<
 	<~c
1 c
r$   