
    3fi                     V    d dl Z d dlmZmZmZmZmZ d dlmZ d dl	m
Z
  G d de
      Zy)    N)IteratorMappingOptionalSequenceUnion)Document)
BaseLoaderc                       e Zd ZdZ	 	 	 	 	 	 	 	 	 ddededee   dee   deeeee   eeeeee   f   f   f      dee   d	ee	   d
e	deee	ef      dee
   fdZdee   fdZdeeef   defdZy)HuggingFaceDatasetLoaderz&Load from `Hugging Face Hub` datasets.Npathpage_content_columnnamedata_dir
data_files	cache_dirkeep_in_memory
save_infosuse_auth_tokennum_procc                     || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        y)a  Initialize the HuggingFaceDatasetLoader.

        Args:
            path: Path or name of the dataset.
            page_content_column: Page content column name. Default is "text".
            name: Name of the dataset configuration.
            data_dir: Data directory of the dataset configuration.
            data_files: Path(s) to source data file(s).
            cache_dir: Directory to read/write data.
            keep_in_memory: Whether to copy the dataset in-memory.
            save_infos: Save the dataset information (checksums/size/splits/...).
              Default is False.
            use_auth_token: Bearer token for remote files on the Dataset Hub.
            num_proc: Number of processes.
        N)
r   r   r   r   r   r   r   r   r   r   )selfr   r   r   r   r   r   r   r   r   r   s              w/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_community/document_loaders/hugging_face_dataset.py__init__z!HuggingFaceDatasetLoader.__init__   sM    > 	#6 	 $",$,     returnc              #   l   K   	 ddl m}  | j                   j                   j
                   j                   j                   j                   j                   j                   j                  	       fdj                         D        E d{    y# t        $ r t        d      w xY w7 w)zLoad documents lazily.r   )load_datasetzXCould not import datasets python package. Please install it with `pip install datasets`.)	r   r   r   r   r   r   r   r   r   c           	   3      K   | ]C  }|   D ]9  }t        j                  |j                  j                              |        ; E yw))page_contentmetadataN)r   	parse_objpopr   ).0keyrowdatasetr   s      r   	<genexpr>z5HuggingFaceDatasetLoader.lazy_load.<locals>.<genexpr>N   s^      

 s|
  !^^CGGD4L4L,MN 

s   A	AN)datasetsr   ImportErrorr   r   r   r   r   r   r   r   r   keys)r   r   r&   s   ` @r   	lazy_loadz"HuggingFaceDatasetLoader.lazy_load6   s     	- ]]nn....]]



 ||~
 	
 	
%  	A 	$	
s(   B4B BB4B2B4B//B4r   c                 P    t        |t              rt        j                  |      S |S )N)
isinstanceobjectjsondumps)r   r   s     r   r!   z"HuggingFaceDatasetLoader.parse_objW   s!    lF+::l++r   )	textNNNNNFNN)__name__
__module____qualname____doc__strr   r   r   r   boolintr   r   r   r+   r.   r!    r   r   r   r   	   s   0
 $*""& #')- 59"&(!(! !(! sm	(!
 3-(! #x}gc5hsm9K3L.L&MMN
(! C=(! !(! (! !tSy!12(! 3-(!T
	(	
BeCK&8 S r   r   )r/   typingr   r   r   r   r   langchain_core.documentsr   )langchain_community.document_loaders.baser	   r   r9   r   r   <module>r=      s"     ? ? - @Qz Qr   