
    3fi                    z    d dl mZ d dlmZmZmZ d dlmZ d dlm	Z	 d dl
mZ erd dlmZ d dlmZ  G d d	e      Zy
)    )annotations)TYPE_CHECKINGListOptional)
BaseLoader)Document)get_client_info)RecognitionConfig)	FieldMaskc                  N    e Zd ZdZ	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZd	dZy)
SpeechToTextLoadera7  Loader for Google Cloud Speech-to-Text audio transcripts.

    Inherits from [`BaseLoader`][langchain_core.document_loaders.BaseLoader].

    Transcribes audio files using Google Cloud Speech-to-Text API and loads
    transcribed text into documents. Supports both GCS URIs and local file paths.

    See [Speech-to-Text documentation](https://cloud.google.com/speech-to-text)
    for detailed information.

    !!! note "Installation"

        Requires additional dependencies:

        ```bash
        pip install langchain-google-community[speech]
        ```
    Nc                   	 ddl m} ddlm}	m}
m}m} || _        || _	        || _
        || _        |xs  |
 |	       dgd |d	      
      | _        || _         |t        d      |dk7  r || d      nd      | _        | j                  j!                  |||      | _        || _        y# t        $ r}t        d      |d}~ww xY w)a  Initialize the Speech-to-Text loader.

        Args:
            project_id: Google Cloud Project ID.
            file_path: Google Cloud Storage URI or local file path.
            location: Speech-to-Text recognizer location.
            recognizer_id: Speech-to-Text recognizer ID.
            config: Recognition options and features. See
                [`RecognitionConfig`](https://cloud.google.com/python/docs/reference/speech/latest/google.cloud.speech_v2.types.RecognitionConfig).
            config_mask: Fields in config that override `default_recognition_config`
                of the recognizer. See [`RecognizeRequest`](https://cloud.google.com/python/docs/reference/speech/latest/google.cloud.speech_v2.types.RecognizeRequest).
            is_long: Use async Cloud Speech recognition for long audio files. See
                [batch recognize](https://cloud.google.com/speech-to-text/v2/docs/batch-recognize).
        r   )ClientOptions)AutoDetectDecodingConfigr
   RecognitionFeaturesSpeechClientCould not import google-cloud-speech python package. Please, install speech dependency group: `pip install langchain-google-community[speech]`Nzen-USchirpT)enable_automatic_punctuation)auto_decoding_configlanguage_codesmodelfeatureszspeech-to-text)moduleglobalz-speech.googleapis.com)api_endpoint)client_infoclient_options)google.api_core.client_optionsr   google.cloud.speech_v2r   r
   r   r   ImportError
project_id	file_pathlocationrecognizer_idconfigconfig_maskr	   _clientrecognizer_path_recognizer_path_is_long)selfr"   r#   r$   r%   r&   r'   is_longr   r   r
   r   r   excs                 n/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_google_community/google_speech_to_text.py__init__zSpeechToTextLoader.__init__#   s    0	D  %" * 
 1!9!;#9(-1	!
 '#'/?@ x' hZ7M+NO
 !% < <-!
  E  	C 		s   B- -	C6CCc           	        | j                   rt        | j                               gS 	 ddlm}  || j                  | j                  | j                        }d| j                  v r| j                  |_
        n4t        | j                  d      5 }|j                         |_        ddd       | j                  j                  |	      }|j                   D cg c]=  }t        |j"                  d   j$                  |j&                  |j(                  d
      ? c}S # t
        $ r}t        d      |d}~ww xY w# 1 sw Y   xY wc c}w )zTranscribes the audio file and loads the transcript into documents.

        It uses the Google Cloud Speech-to-Text API to transcribe the audio file
        and blocks until the transcription is finished.
        )page_contentr   )RecognizeRequestr   N)
recognizerr&   r'   zgs://rbrequest)language_coderesult_end_offset)r2   metadata)r+   r   
_load_longr    r3   r!   r*   r&   r'   r#   uriopenreadcontentr(   	recognizeresultsalternatives
transcriptr8   r9   )r,   r3   r.   r7   fresponseresults          r/   loadzSpeechToTextLoader.loadg   s4    ==$//*;<==	? #,,;;((
 dnn$..GKdnnd+ +q"#&&(+ <<))'): #**	
  #003>>%+%9%9)/)A)A	
 		
+  	C 		 + +
	
s*   D D8AE	D5$D00D58Ec           
        ddl m}m}m}m}  || j
                  | j                  | j                   || j                        g | |                   }| j                  j                  |      }|j                  d      }d	j                  |j                  | j                     j                  j                  D cg c]'  }|j                  r|j                  d   j                  ) c}      S c c}w )
Nr   )BatchRecognizeFileMetadataBatchRecognizeRequestInlineOutputConfigRecognitionOutputConfig)r<   )inline_response_config)r4   r&   r'   filesrecognition_output_configr6   x   )timeout )r    rI   rJ   rK   rL   r*   r&   r'   r#   r(   batch_recognizerF   joinrA   rC   rB   )	r,   rI   rJ   rK   rL   r7   	operationrE   rs	            r/   r;   zSpeechToTextLoader._load_long   s    	
 	
 (,,;;((-$..AB&='9';'
 LL000A	##C#0ww "))$..9DDLL>> q!,,
 	
s   >,C1)zus-central1_NNF)r"   strr#   rX   r$   rX   r%   rX   r&   zOptional[RecognitionConfig]r'   zOptional[FieldMask]r-   bool)returnzList[Document])rZ   rX   )__name__
__module____qualname____doc__r0   rG   r;        r/   r   r      su    . & .2+/B B  B  	B 
 B  ,B  )B  B H(
T
r`   r   N)
__future__r   typingr   r   r   langchain_core.document_loadersr   langchain_core.documentsr   !langchain_google_community._utilsr	   r    r
   google.protobuf.field_mask_pb2r   r   r_   r`   r/   <module>rg      s.    " 0 0 6 - =88[
 [
r`   