
    g3fiz-                       U d Z ddlmZ ddlZddlZddlZddlZddlmZm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ ed	   Z	  G d
 de      Z G d de      Zeegee   f   Z	 d&dZd&dZd&dZd&dZd&dZeeeeedZded<   	 dZ dZ!d'dZ"d(dZ#dZ$dZ%d(dZ&d(dZ'	 	 	 	 	 	 	 	 d)dZ(d*dZ) ed        G d! d"             Z* ed        G d# d$             Z+g d%Z,y)+z5Shared redaction utilities for middleware components.    )annotationsN)CallableSequence)	dataclass)Literal)urlparse)	TypedDict)blockredactmaskhashc                  :    e Zd ZU dZded<   ded<   ded<   ded<   y)	PIIMatchz1Represents an individual match of sensitive data.strtypevalueintstartendN)__name__
__module____qualname____doc____annotations__     d/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain/agents/middleware/_redaction.pyr   r      s    ;
IJJ	Hr   r   c                  $     e Zd ZdZd fdZ xZS )PIIDetectionErrorz=Raised when configured to block on detected sensitive values.c                z    || _         t        |      | _        t        |      }d| d| d}t        |   |       y)zInitialize the exception with match context.

        Args:
            pii_type: Name of the detected sensitive type.
            matches: All matches that were detected for that type.
        z	Detected z instance(s) of z in text contentN)pii_typelistmatcheslensuper__init__)selfr!   r#   countmsg	__class__s        r   r&   zPIIDetectionError.__init__    sC     !G}G% 0
:JKr   )r!   r   r#   zSequence[PIIMatch]returnNone)r   r   r   r   r&   __classcell__)r*   s   @r   r   r      s    G r   r   c           
         d}t        j                  ||       D cg c];  }t        d|j                         |j	                         |j                               = c}S c c}w )z"Detect email addresses in content.z3\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\bemailr   r   r   r   refinditerr   groupr   r   contentpatternmatchs      r   detect_emailr9   2   sY    DG [['2  	++-++-				
     A Ac           
         d}g }t        j                  ||       D ]X  }|j                         }t        |      s|j	                  t        d||j                         |j                                      Z |S )z<Detect credit card numbers in content using Luhn validation.z*\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\bcredit_cardr0   )r2   r3   r4   _passes_luhnappendr   r   r   )r6   r7   r#   r8   card_numbers        r   detect_credit_cardr@   @   sl    ;GGWg. 
kkm$NN&%++-			
 Nr   c           
     "   g }d}t        j                  ||       D ]b  }|j                         }	 t        j                  |       |j                  t        d||j                         |j                                      d |S # t
        $ r Y rw xY w)z)Detect IPv4 or IPv6 addresses in content.z!\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\bipr0   )
r2   r3   r4   	ipaddress
ip_address
ValueErrorr>   r   r   r   )r6   r#   ipv4_patternr8   ip_candidates        r   	detect_iprH   T   s     G7L\73 
{{}	  . 	"kkmIIK		

 N  		s   B	BBc           
         d}t        j                  ||       D cg c];  }t        d|j                         |j	                         |j                               = c}S c c}w )z Detect MAC addresses in content.z)\b([0-9A-Fa-f]{2}[:-]){5}[0-9A-Fa-f]{2}\bmac_addressr0   r1   r5   s      r   detect_mac_addressrK   k   sY    :G [['2  	++-++-				
  r:   c           
       	 g }d}t        j                  ||       D ]s  }|j                         }t        |      }|j                  dv s-|j
                  s:|j                  t        d||j                         |j                                      u d}t        j                  ||       D ]  }|j                         |j                         c	t        	fd|D              r:|j                         }d|v s|j                  d      s`d	| }t        |      }|j
                  s}d
|j
                  v s|j                  t        d|	              |S )z9Detect URLs in content using regex and stdlib validation.zhttps?://[^\s<>\"{}|\\^`\[\]]+>   httphttpsurlr0   zy\b(?:www\.)?[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?:/[^\s]*)?c              3  z   K   | ]2  }|d    cxk  xr |d   k  nc xs |d    cxk  xr |d   k  nc  4 yw)r   r   Nr   ).0mr   r   s     r   	<genexpr>zdetect_url.<locals>.<genexpr>   s>     _RSqzU-QuX-M7c1MQuX1MM_s   8;/zwww.zhttp://.)r2   r3   r4   r   schemenetlocr>   r   r   r   any
startswith)
r6   r#   scheme_patternr8   rO   resultbare_patterntest_urlr   r   s
           @@r   
detect_urlr^   y   s9    G 7N^W5 kkm#==--&--NN++-				 	L 
 \73 [[]EIIK
s_W^__kkm #:/ Hh'F}}!5"!#	. Nr   )r/   r<   rB   rJ   rO   zdict[str, Detector]BUILTIN_DETECTORS      c                $   | D cg c]  }|j                         st        |        }}t        t        |      cxk  r	t        k  sy yd}t        t        |            D ]#  \  }}|}|dz  dk(  r|dz  }|dkD  r|dz  }||z  }% |dz  dk(  S c c}w )z4Validate credit card number using the Luhn checksum.Fr         	   
   )isdigitr   _CARD_NUMBER_MIN_DIGITSr$   _CARD_NUMBER_MAX_DIGITS	enumeratereversed)r?   ddigitschecksumindexdigitr   s          r   r=   r=      s    )9QYY[c!f9F9"c&kL5LL MH!(6"23 u19>QJEqy
E b=A :s
   BBc                    | }t        |t        j                  d      d      D ]-  }d|d   j                          d}|d |d    |z   ||d   d  z   }/ |S )Nr   Tkeyreversez
[REDACTED_r   ]r   )sortedoperator
itemgetterupper)r6   r#   r[   r8   replacements        r   _apply_redact_strategyr{      su    FX%8%8%A4P Q"5=#6#6#8"9;(%.)K7&u:PPQ Mr      c                   | }t        |t        j                  d      d      D ]Q  }|d   }|d   }|dk(  rZ|j                  d      }t	        |      dk(  r8|d	   j                  d
      }t	        |      d	kD  r|d    d|d    n|d    d}nd}n|dk(  rLdj                  d |D              }	d|v rdnd|v rdnd}
|
rd|
 d|
 d|
 |	t         d   }nd|	t         d   }n|dk(  r.|j                  d
      }t	        |      t        k(  rd|d    nd}nM|dk(  rd|v rdnd}
d|
 d|
 d|
 d|
 d|
 |dd   }n)|dk(  rd}n!t	        |      t        kD  rd|t         d   nd}|d |d    |z   ||d   d  z   }T |S )Nr   Trr   r   r   r/   @rc   rd   rU   r   z@****.z@****z****r<    c              3  B   K   | ]  }|j                         s|  y w)N)rg   )rQ   cs     r   rS   z'_apply_mask_strategy.<locals>.<genexpr>   s     !Baiik!!Bs   - z************rB   z*.*.*.rJ   :z**rO   z[MASKED_URL]r   )rv   rw   rx   splitr$   join_UNMASKED_CHAR_NUMBER_IPV4_PARTS_NUMBER)r6   r#   r[   r8   r   r!   partsdomain_partsmaskeddigits_only	separatoroctetss               r   _apply_mask_strategyr      s"   FX%8%8%A4P (Lg=wKK$E5zQ$Qx~~c2 <(1, Qxj|B'7&89!!H:U+   &''!BU!BBK"e|u"I9+T)D"$9#9#:;<> 
 (5J4J4K(L'MN[[%F.1&k=O.OvfRj\*U[F&"e|IYKr)BykI;bTYZ\Z]T^S_`  #F u: 55 u334567 
 (%.)F2VE%LN5KKQ(LR Mr   c                   | }t        |t        j                  d      d      D ]Y  }t        j                  |d   j                               j                         d d }d|d    d| d	}|d |d    |z   ||d
   d  z   }[ |S )Nr   Trr   r      <r   z_hash:>r   )rv   rw   rx   hashlibsha256encode	hexdigest)r6   r#   r[   r8   digestrz   s         r   _apply_hash_strategyr     s    FX%8%8%A4P Qg 5 5 78BBDRaH%-vha8(%.)K7&u:PPQ Mr   c                    |s| S |dk(  rt        | |      S |dk(  rt        | |      S |dk(  rt        | |      S |dk(  rt        |d   d   |      d| }t	        |      )z8Apply the configured strategy to matches within content.r   r   r   r
   r   r   zUnknown redaction strategy: )r{   r   r   r   rE   )r6   r#   strategyr)   s       r   apply_strategyr     s     8%gw776#GW556#GW557
6 2G<<(

3C
S/r   c                     |@ t         vr/d  dt        t         j                                d}t        |      t             S t	        |t
              rt        j                  |      d fd}|S |S )z7Return a callable detector for the given configuration.zUnknown PII type: z. Must be one of z or provide a custom detector.c           
         j                  |       D cg c];  }t        |j                         |j                         |j	                               = c}S c c}w )Nr0   )r3   r   r4   r   r   )r6   r8   r7   r!   s     r   regex_detectorz(resolve_detector.<locals>.regex_detector0  sU     %--g6  !++-++-			  s   A Ar6   r   r+   list[PIIMatch])r_   r"   keysrE   
isinstancer   r2   compile)r!   detectorr)   r   r7   s   `   @r   resolve_detectorr   #  s    ,,$XJ /""&'8'='='?"@!AA_a  S/! **(C **X&		 Or   T)frozenc                  @    e Zd ZU dZded<   dZded<   dZded	<   dd
Zy)RedactionRulez-Configuration for handling a single PII type.r   r!   r   RedactionStrategyr   NDetector | str | Noner   c                    t        | j                  | j                        }t        | j                  | j                  |      S )z6Resolve runtime detector and return an immutable rule.)r!   r   r   )r   r!   r   ResolvedRedactionRuler   )r'   resolved_detectors     r   resolvezRedactionRule.resolveG  s5    ,T]]DMMJ$]]]]&
 	
r   )r+   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   ?  s%    7M"*H*&*H#*
r   r   c                  8    e Zd ZU dZded<   ded<   ded<   d
dZy	)r   z,Resolved redaction rule ready for execution.r   r!   r   r   Detectorr   c                f    | j                  |      }|s|g fS t        ||| j                        }||fS )z>Apply this rule to content, returning new content and matches.)r   r   r   )r'   r6   r#   updateds       r   applyzResolvedRedactionRule.applyY  s:    --(B; '4==Ar   N)r6   r   r+   ztuple[str, list[PIIMatch]])r   r   r   r   r   r   r   r   r   r   r   Q  s    6M r   r   )
r   r   r   r   r   r@   r9   rH   rK   r^   r   )r?   r   r+   bool)r6   r   r#   r   r+   r   )r6   r   r#   r   r   r   r+   r   )r!   r   r   r   r+   r   )-r   
__future__r   r   rC   rw   r2   collections.abcr   r   dataclassesr   typingr   urllib.parser   typing_extensionsr	   r   r   	Exceptionr   r   r"   r   r9   r@   rH   rK   r^   r_   r   rh   ri   r=   r{   r   r   r   r   r   r   r   r   __all__r   r   r   <module>r      sG   ; "    	 . !  ! '=>  By 	 " SE4>)* D(.2l %
%* &  9  "   +\   		(8 $
 
 
" $      r   