
    3fi`                         d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZmZmZ  G d dee      Zy)	    N)Path)AnyClassVarDictListOptionalSequenceTupleUnion)
BaseLoader)Document)	BaseModelfield_validatormodel_validatorc                      e Zd ZU dZdZeeedf      ed<    e	j                         dz  dz  Ze	ed<   	  e	j                         dz  dz  Ze	ed	<   	  e	j                         dz  d
z  Ze	ed<   	 dZeed<   	 dZee   ed<   	 dZeee      ed<   	 dZeee      ed<   	 dZeed<   	 dZeee      ed<   	 dZeed<   	 dZeed<   	 i Zedef   ed<   	 dZeed<   	 dZeed<   	 dgZee   ed<   	 dedefdZ dedefdZ!dedefd Z"dedee   fd!Z# e$d"#      e%d$eeef   defd%              Z& e'd	      d&ed'edefd(       Z( e'd      d&ee   dee   fd)       Z)defd*Z*dedee+   fd+Z,dede+fd,Z-dd-dedeee      dee+   fd.Z.d/ededeeee/eee   f   f      fd0Z0dee+   fd1Z1dedee+   fd2Z2dee+   fd3Z3dee+   fd4Z4y)5GoogleDriveLoaderai  Load documents from Google Drive.

    Inherits from [`BaseLoader`][langchain_core.document_loaders.BaseLoader].

    Supports loading from folders, specific documents, or file IDs with authentication.

    !!! note "Installation"

        Requires additional dependencies:

        ```bash
        pip install langchain-google-community[drive]
        ```
    )*https://www.googleapis.com/auth/drive.filez.https://www.googleapis.com/auth/drive.readonlyz3https://www.googleapis.com/auth/drive.meet.readonlyz7https://www.googleapis.com/auth/drive.metadata.readonlyz.https://www.googleapis.com/auth/drive.metadata.VALID_SCOPESz.credentialsz	keys.jsonservice_account_keyzcredentials.jsoncredentials_pathz
token.json
token_pathNcredentials	folder_iddocument_idsfile_idsF	recursive
file_typesload_trashed_filesfile_loader_clsstrfile_loader_kwargs	load_authload_extended_metadatar   scopesidreturnc                    	 ddl }ddlm} | j	                         } |dd|      }	 |j                         j                  |d	      j                         }|d   S # t        $ r}t        d      |d}~ww xY w# |j                  j                  $ r t        d
|        Y yt        $ r&}t        d|        t        d|        Y d}~yd}~ww xY w)zFetch the size of the file.r   Nbuild_You must run `pip install --upgrade google-api-python-client` to load authorization identities.drivev3r   sizefileIdfieldszinsufficientFilePermissions: The user does not have sufficient                 permissions to retrieve size for the file with fileId: unknownzAError occurred while fetching the size for the file with fileId: Error: googleapiclient.errorsgoogleapiclient.discoveryr)   ImportError_load_credentialsfilesgetexecuteerrors	HttpErrorprint	Exceptionselfr%   googleapiclientr)   exccredsservicefiles           ^/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_google_community/drive.py_get_file_size_from_idz(GoogleDriveLoader._get_file_size_from_id]   s    		)7 &&(59	==?&&b&@HHJD<  	4
 	 %%// 	HHJtM  	STVSWX GC5/"	s4   
A 3A9 	A6%A11A69'C"C*CCc                    	 ddl }ddlm} | j	                         } |dd|      }	 |j                         j                  |d	      j                         }|d   d   j                  d
      S # t        $ r}t        d      |d}~ww xY w# |j                  j                  $ r t        d|        Y yt        $ r}t        d| d|        Y d}~yd}~ww xY w)zFetch the owner of the file.r   Nr(   r*   r+   r,   r-   ownersr/   emailAddresszinsufficientFilePermissions: The user does not have sufficient                 permissions to retrieve owner for the file with fileId: r2   zRError occurred while fetching the owner for the file with fileId:                 z with error: r4   r@   s           rG   _get_owner_metadata_from_idz-GoogleDriveLoader._get_owner_metadata_from_id|   s    		)7 &&(59	==?&&b&BJJLD>!$((88  	4
 	 %%// 	IIKN  	M#( 	s5   
A. AB .	B7BB'C4C<CCc                    	 ddl }ddlm} | j	                         } |dd|      }g }|}	 	 |j                         j                  |d	      j                         }	|j                  |	d
          d|	v r	|	d   d   }nn	 T|j                          dj                  |      S # t        $ r}t        d      |d}~ww xY w# |j                  j                  $ r t        d|        Y gw xY w)z7Fetch the full path of the file starting from the root.r   Nr(   r*   r+   r,   r-   zname, parentsr/   nameparentszinsufficientFilePermissions: The user does not have sufficient                    permissions to retrieve path for the file with fileId: /)r5   r6   r)   r7   r8   r9   r:   r;   appendr<   r=   r>   reversejoin)
rA   r%   rB   r)   rC   rD   rE   path
current_idrF   s
             rG   _get_file_path_from_idz(GoogleDriveLoader._get_file_path_from_id   s   		)7 &&(59
MMOS
?SCWY 
 DL)$!%i!3J  $ 	xx~?  	4
 	0 #))33 LLN4Q s)   
B! AB> !	B;*B66B;>'C('C(c                    	 ddl }ddlm} g }| j	                         } |dd|      }	 |j                         j                  |      j                         }|j                  di       D ]e  }	|j                         j                  ||	j                  dd      d      j                         j                  d      }
|
sU|j                  |
       g |S # t        $ r}t        d      |d}~ww xY w# |j                  j                  $ r t        d	|        |cY S t        $ r(}t        d
|        t        d|        |cY d}~S d}~ww xY w)z2Fetch the list of people having access to ID file.r   Nr(   r*   r+   r,   r-   r0   zinsufficientFilePermissions: The user does not have sufficient                 permissions to retrieve permission for the file with fileId: zXError occurred while fetching the permissions for the file with                 fileId: r3   permissionsr%    rK   )r0   permissionIdr1   )r5   r6   r)   r7   r8   rY   listr;   r<   r=   r>   r?   r:   rQ   )rA   r%   rB   r)   rC   authorized_identitiesrD   rE   rY   permemail_ids              rG   _get_identity_metadata_from_idz0GoogleDriveLoader._get_identity_metadata_from_id   sl   		)7 ')&&(59	)!--/44B4?GGIK  OOM26 	7D##%BTXXdB-?W^$	  %,,X6	7 %$K  	4
 	 %%// 	)NNPTS )( 	) GC5/"((	)s:   
C .C1 	C.C))C.1)EE$EEEbefore)modevaluesc           	      l  	 |j                  d      r-|j                  d      s|j                  d      rt        d      |j                  d      s-|j                  d      s|j                  d      st        d      |j                  d      }|r|j                  d      s|j                  d      rt        d      dd	d
dd	t        	j                               t        	j	                               z   }dj                  	j                         D cg c]  }d| d
 c}      }dj                  	j	                         D cg c]  }d| d
 c}      }|D ]  }||vst        d| d| d|        dt        dt        f	fd}|D cg c]
  } ||       c}|d<   |S c c}w c c}w c c}w )zDValidate that either folder_id or document_ids is set, but not both.r   r   r   zICannot specify both folder_id and document_ids nor folder_id and file_idsz8Must specify either folder_id, document_ids, or file_idsr   zdfile_types can only be given when folder_id is given, (not when document_ids or file_ids are given).$application/vnd.google-apps.document'application/vnd.google-apps.spreadsheetapplication/pdf(application/vnd.google-apps.presentation)documentsheetpdfpresentation, 'zGiven file type z) is not supported. Supported values are: z; and their full-form names: xr&   c                     | v r|    S | S )N )ro   type_mappings    rG   	full_formz4GoogleDriveLoader.validate_inputs.<locals>.full_form  s    *+|*;|ABB    )r:   
ValueErrorr\   keysrc   rS   r    )
clsrc   r   allowed_typesro   short_names
full_names	file_typers   rr   s
            @rG   validate_inputsz!GoogleDriveLoader.validate_inputs   s    ::k"JJ~&&**Z*@) 
 

;'JJ~.JJz*WXXZZ-
zz.)VZZ
-C F 
 CB( J	L !!2!2!45\=P=P=R8SSM))|7H7H7J$K!q1X$KLKl6I6I6K#Las!H#LMJ' 	M1$*9+ 611< >22<? CS CS C KU#UYIi$8#UF<  %L#L $Vs   F'F,F1vkwargsc                 D    |j                         st        d| d      |S )z&Validate that credentials_path exists.zcredentials_path z does not exist)existsru   )rw   r}   r~   s      rG   validate_credentials_pathz+GoogleDriveLoader.validate_credentials_path  s'     xxz0?CDDrt   c                     |st        d      |D cg c]  }|| j                  vs| }}|r9t        ddj                  |       ddj                  | j                               |S c c}w )z^Validate that the provided scopes are not empty and
        are valid Google Drive API scopes.z#At least one scope must be providedz#Invalid Google Drive API scope(s): rm   z. Valid scopes are: )ru   r   rS   )rw   r}   scopeinvalid_scopess       rG   validate_scopesz!GoogleDriveLoader.validate_scopes%  s     BCC-.PE%s?O?O2O%PP5dii6O5P Q%%)YYs/?/?%@$AC 
  Qs
   A)A)c                    	 ddl m} ddlm} ddlm} ddlm} ddlm	} d}| j                  j                         r:|j                  j                  t        | j                        | j                  	      S | j                   j                         r/|j#                  t        | j                         | j                        }| j$                  r| j$                  }|S |r|j&                  s|r/|j(                  r#|j*                  r|j-                   |              n|d
t.        j0                  vr) |       \  }}|j3                  | j                        }|rC|S |j5                  t        | j6                        | j                        }|j9                  d      }t;        | j                   d      5 }	|	j=                  |j?                                ddd       |S |S # t        $ r t        d      w xY w# 1 sw Y   |S xY w)zLoad credentials.r   )default)Request)service_account)Credentials)InstalledAppFlowzxCould execute GoogleDriveLoader. Please, install drive dependency group: `pip install langchain-google-community[drive]`N)r$   GOOGLE_APPLICATION_CREDENTIALS)portw) google.authr   google.auth.transport.requestsr   google.oauth2r   google.oauth2.credentialsr   google_auth_oauthlib.flowr   r7   r   r   from_service_account_filer    r$   r   from_authorized_user_filer   validexpiredrefresh_tokenrefreshosenvironwith_scopesfrom_client_secrets_filer   run_local_serveropenwriteto_json)
rA   r   r   r   r   r   rD   projectflowtokens
             rG   r8   z#GoogleDriveLoader._load_credentials5  s   	+>5= ##**,"..HHD,,-dkk I   ??!!#99DOO$dkkE $$ELEKK5+>+>gi(1C!(w))$++6 L'@@--. --1-5doos+ -uEMMO,- uQ  	B 	J- s   G * G.G+.G8c           	      (   ddl m} | j                         } |dd|      }|j                         j	                  |      j                         }|j	                  dg       }| j                  r| j                  |      }| j                  r3| j                  |      }| j                  |      }	| j                  |      }
g }|D ]L  }|d   d   }|j                         j                         j	                  ||	      j                         }|j	                  d
g       }|s^|d   }t        |dd d      D ]  \  }}d| d|d   d    |d   d    d| |d}| j                  r|d<   | j                  r|d<   	|d<   
|d<   g }t        |      D ]L  \  }}t        |      |kD  r||   j                         nd}|j!                  | d|j                                 N dj#                  |      }|j!                  t%        ||              O |S )z%Load a sheet and all tabs from an ID.r   r(   sheetsv4r-   )spreadsheetId
propertiestitle)r   rangerc      N)startz'https://docs.google.com/spreadsheets/d/z
/edit?gid=sheetIdz - )sourcer   rowr]   ownerr.   	full_pathrZ   z: 
page_contentmetadata)r6   r)   r8   spreadsheetsr:   r;   r"   r`   r#   rL   rH   rV   rc   	enumeratelenstriprQ   rS   r   )rA   r%   r)   rD   sheets_servicespreadsheetr   r]   r   r.   r   	documentsrj   
sheet_nameresultrc   headerir   r   contentjr}   r   r   s                            rG   _load_sheet_from_idz%GoogleDriveLoader._load_sheet_from_idj  sG    	4&&(x5A$11377b7IQQS2.>>$($G$G$K!&&44R8E..r2D33B7I	 "	YE|,W5J++-2Z8	  ZZ"-FAYF#F12Ja8 Y3 B" F$$),$7	$B#CE !,L 9' BC3zlS >>8MH45..(-HW%'+HV$,5H[)%cN <DAq14VqF1IOO-bENNeWBqwwyk#:;<  $yy1  |h!WX+Y"	YH rt   c                    ddl m} ddlm} ddlm} ddlm} | j                         } |dd|      }| j                  r| j                  |      }| j                  r3| j                  |      }	| j                  |      }
| j                  |      }|j                         j!                  |d	d
      j#                         }|j                         j%                  |d      } |       } |||      }d}	 |du r|j'                         \  }}|du r|j1                         j3                  d      }|j!                  d       |j!                  d       |j!                  d       d}| j                  r|d<   | j                  r	|d<   
|d<   |d<   t5        ||      S # |$ rX}|j(                  j*                  dk(  rt-        dj/                  |             nt-        dj/                  |             Y d}~d}~ww xY w)zLoad a document from an ID.r   BytesIOr(   )r=   MediaIoBaseDownloadr+   r,   r-   TzmodifiedTime,name,webViewLink)r0   supportsAllDrivesr1   z
text/plain)r0   mimeTypeFi  zFile not found: {}zAn error occurred: {}Nzutf-8webViewLinkrN   modifiedTime)r   r   whenr]   r   r.   r   r   )ior   r6   r)   r5   r=   googleapiclient.httpr   r8   r"   r`   r#   rL   rH   rV   r9   r:   r;   export_media
next_chunkrespstatusr>   formatgetvaluedecoder   )rA   r%   r   r)   r=   r   rD   rE   r]   r   r.   r   rF   requestfh
downloaderdoner   etextr   s                        rG   _load_document_from_idz(GoogleDriveLoader._load_document_from_id  s   34<&&(59>>$($G$G$K!&&44R8E..r2D33B7I MMOS"&6  
 WY 	 --/..b<.PY(W5
	9%-)446 %- {{}##G,-01()xx/0

 >>0EH,-&& %HW#HV$-H[!TH==%  	9vv}}#*11"56-44Q78		9s   4F G;#AG66G;r   c                   ddl m} | j                         } |dd|      }| j                  ||      }|r|D cg c]  }|d   |v s| }}n|}g }	|D ]  }
|
d   r| j                  s|
d   dv r$|	j                  | j                  |
d	                @|
d   d
k(  r$|	j                  | j                  |
d	                l|
d   dk(  s| j                  $|	j                  | j                  |
d	                 |	S c c}w )zLoad documents from a folder.r   r(   r+   r,   r-   r   trashed)re   rh   r%   rf   rg   )r6   r)   r8   _fetch_files_recursiver   rQ   r   extendr   r   _load_file_from_id)rA   r   r   r)   rD   rE   r9   f_filesreturnsrF   s              rG   _load_documents_from_folderz-GoogleDriveLoader._load_documents_from_folder  s    	4&&(59++GY?!&FA!J-:*EaFFFF 	DIt'>'>j! &  t::4:FGj!%NNt77T
CDZ $55''3t66tDzBC!	" - Gs   C<C<rE   c                 >   |j                         j                  d| ddddd      j                         }|j                  dg       }g }|D ]M  }|d   d	k(  r2| j                  s|j                  | j                  ||d
                =|j                  |       O |S )z+Fetch all files and subfolders recursively.rn   z' in parentsi  Tz:nextPageToken, files(id, name, mimeType, parents, trashed))qpageSizeincludeItemsFromAllDrivesr   r1   r9   r   z"application/vnd.google-apps.folderr%   )r9   r\   r;   r:   r   r   r   rQ   )rA   rE   r   resultsr9   r   rF   s          rG   r   z(GoogleDriveLoader._fetch_files_recursive  s    
 MMOTi[-*."&S   WY 	 GR( 	%DJ#GG>>NN4#>#>wT
#STt$	% rt   c                     | j                   st        d      | j                   D cg c]  }| j                  |       c}S c c}w )z"Load documents from a list of IDs.zdocument_ids must be set)r   ru   r   )rA   doc_ids     rG   _load_documents_from_idsz*GoogleDriveLoader._load_documents_from_ids  s<      788BFBSBST++F3TTTs   Ac                 J   ddl m} ddlm} ddlm} | j                         } |dd|      }| j                  r| j                  |      }| j                  r3| j                  |      }| j                  |      }	| j                  |      }
|j                         j                  |d	      j                         }|j                         j!                  |
      } |       } |||      }d}|du r|j#                         \  }}|du r| j$                  |j'                  d        | j$                  dd|i| j(                  }|j+                         }|D ]  }d| d|j,                  d<   d|j,                  vr|j                  d       |j,                  d<   | j                  r|j,                  d<   | j                  sk|j,                  d<   	|j,                  d<   
|j,                  d<    |S ddlm} |j3                         } | ||            }g }t5        |j6                        D ]u  \  }}d| d|j                  d       |d}| j                  r|d<   | j                  r|d<   	|d<   
|d<   |j9                  t;        |j=                         |             w |S )zLoad a file from an ID.r   r   r(   r   r+   r,   r-   T)r0   r   rX   FrF   z https://drive.google.com/file/d/z/viewr   r   rN   r]   r   r.   r   )	PdfReader)r   r   pager   rq   )r   r   r6   r)   r   r   r8   r"   r`   r#   rL   rH   rV   r9   r:   r;   	get_mediar   r   seekr!   loadr   PyPDF2r   r   r   pagesrQ   r   extract_text)rA   r%   r   r)   r   rD   rE   r]   r   r.   r   rF   r   r   r   r   r   loaderdocsdocr   r   
pdf_readerr   r   r   s                             rG   r   z$GoogleDriveLoader._load_file_from_id  s   3<&&(59>>$($G$G$K!&&44R8E..r2D33B7I}}""""EMMO--/++2+6Y(W5
em%002LFD em +GGAJ)T))MrMT5L5LMF;;=D 	:+KB4u)UX&#,,./3xx/?.@CLL)>><QCLL!89..,1CLL)+/CLL(09CLL-	: K )kkmG"77#34JD$Z%5%56 4 @EJ $ 01
 >>8MH45..(-HW%'+HV$,5H[)%)%6%6%8!)$ Krt   c                     | j                   st        d      g }| j                   D ]"  }|j                  | j                  |             $ |S )zLoad files from a list of IDs.zfile_ids must be set)r   ru   r   r   )rA   r   file_ids      rG   _load_file_from_idsz%GoogleDriveLoader._load_file_from_ids_  sI    }}344}} 	:GKK//89	:rt   c                     | j                   r'| j                  | j                   | j                        S | j                  r| j	                         S | j                         S )zLoad documents.r   )r   r   r   r   r   r   )rA   s    rG   r   zGoogleDriveLoader.loadh  sW    >>334?? 4   0022++--rt   )5__name__
__module____qualname____doc__r   r   r
   r    __annotations__r   homer   r   r   r   r   r   r   r   r   r   r   boolr   r	   r   r   r!   r   r"   r#   r$   rH   rL   rV   r`   r   classmethodr|   r   r   r   r8   r   r   r   r   r   r   r   r   r   r   rq   rt   rG   r   r      s/   "/L(5c?+  !*		n <{ JJ/&TYY[>9<NNdN' tyy{^3lBJB!KD#Ix}#%(,L(49%,($(HhtCy!($ItN*.J#'.I$$P  OS'+-UCZ(-(It3#(D(,EFFDIF  >c c >$ $ $L*% *%c *%X (#-T#s(^ - -  $-^ '(#    ) XS	 d3i  33 3j5c 5d8n 5n4> 4> 4>n HL!!-5hsm-D!	h!F'*	d3c49n--.	/4U$x. UBS BT(^ BHT(^ 	.d8n 	.rt   r   )r   pathlibr   typingr   r   r   r   r   r	   r
   r   langchain_core.document_loadersr   langchain_core.documentsr   pydanticr   r   r   r   rq   rt   rG   <module>r     s2    
  N N N 6 - @ @^	.
I ^	.rt   