
    3fi                     j    d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	 d dl
mZ d dlmZ  G d de      Zy)    N)Path)DictIteratorListOptionalUnion)Document)
BaseLoaderc                       e Zd ZdZddeeef   dee   fdZe	dede
eef   fd       Zdee   fdZd	ej                   d
edee   fdZdededefdZdededefdZdedededefdZy)SlackDirectoryLoaderz#Load from a `Slack` directory dump.Nzip_pathworkspace_urlc                 r    t        |      | _        || _        | j                  | j                        | _        y)a)  Initialize the SlackDirectoryLoader.

        Args:
            zip_path (str): The path to the Slack directory dump zip file.
            workspace_url (Optional[str]): The Slack workspace URL.
              Including the URL will turn
              sources into links. Defaults to None.
        N)r   r   r   _get_channel_id_mapchannel_id_map)selfr   r   s      r/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_community/document_loaders/slack_directory.py__init__zSlackDirectoryLoader.__init__   s.     X*"66t}}E    returnc                 @   t        j                  | d      5 }	 |j                  dd      5 }t        j                  |      }ddd       D ci c]  }|d   |d    c}cddd       S # 1 sw Y   )xY wc c}w # t
        $ r i cY cddd       S w xY w# 1 sw Y   yxY w)z?Get a dictionary mapping channel names to their respective IDs.rzchannels.jsonNnameid)zipfileZipFileopenjsonloadKeyError)r   zip_filefchannelschannels        r   r   z(SlackDirectoryLoader._get_channel_id_map   s     __Xs+ 	x]]?C8 ,A#yy|H,FNO76O		 	, ,O 		 	
	 	sK   BA:A)A:A5A:)A2	.A::BBBBBc              #   d  K   t        j                  | j                  d      5 }|j                         D ]c  }t	        |      j
                  j                  }|s%|j                  d      s7| j                  ||      }|D ]  }| j                  ||        e 	 ddd       y# 1 sw Y   yxY ww)z8Load and return documents from the Slack directory dump.r   z.jsonN)
r   r   r   namelistr   parentr   endswith
_read_json_convert_message_to_document)r   r!   channel_pathchannel_namemessagesmessages         r   	lazy_loadzSlackDirectoryLoader.lazy_load&   s     __T]]C0 	WH ( 1 1 3 W#L188==#((1#xFH#+ W"??VVWW	W 	W 	Ws#   !B0AB$+/B$	B0$B-)B0r!   	file_pathc                     |j                  |d      5 }t        j                  |      }ddd       |S # 1 sw Y   S xY w)z"Read JSON data from a zip subfile.r   N)r   r   r   )r   r!   r0   r"   datas        r   r)   zSlackDirectoryLoader._read_json2   s9    ]]9c* 	 a99Q<D	 	 s   3=r.   r,   c                 d    |j                  dd      }| j                  ||      }t        ||      S )a/  
        Convert a message to a Document object.

        Args:
            message (dict): A message in the form of a dictionary.
            channel_name (str): The name of the channel the message belongs to.

        Returns:
            Document: A Document object representing the message.
        text )page_contentmetadata)get_get_message_metadatar	   )r   r.   r,   r4   r7   s        r   r*   z1SlackDirectoryLoader._convert_message_to_document8   s9     {{62&--g|D
 	
r   c                 ~    |j                  dd      }|j                  dd      }| j                  |||      }||||dS )z;Create and return metadata for a given message and channel.tsr5   user)sourcer$   	timestampr<   )r8   _get_message_source)r   r.   r,   r>   r<   r=   s         r   r9   z*SlackDirectoryLoader._get_message_metadataL   sL    KKb)	{{62&)),iH#"	
 	
r   r<   r>   c                     | j                   rC| j                  j                  |d      }| j                    d| d|j                  dd       z   S | d| d| S )a@  
        Get the message source as a string.

        Args:
            channel_name (str): The name of the channel the message belongs to.
            user (str): The user ID who sent the message.
            timestamp (str): The timestamp of the message.

        Returns:
            str: The message source.
        r5   z
/archives/z/p.z - )r   r   r8   replace)r   r,   r<   r>   
channel_ids        r   r?   z(SlackDirectoryLoader._get_message_sourceX   su     ,,00rBJ%%&j=y((b1234
 #^3tfC	{;;r   )N)__name__
__module____qualname____doc__r   strr   r   r   staticmethodr   r   r   r	   r/   r   r   r   dictr)   r*   r9   r?    r   r   r   r      s    -FsDy!1 F(3- F d tCH~  
W8H- 
W7?? s tDz 

+.
	
(

T 

 

 

< <3 <3 <SV <r   r   )r   r   pathlibr   typingr   r   r   r   r   langchain_core.documentsr	   )langchain_community.document_loaders.baser
   r   rK   r   r   <module>rP      s(       8 8 - @`<: `<r   