
    3fi                     B    d dl Z d dlmZmZmZ d dlmZ  G d de      Zy)    N)CallableListPattern)CodeSegmenterc                   Z    e Zd ZU dZ ej
                  dej                        Zee	d<    ej
                  dej                        Z
ee	d<    ej
                  dej                        Zee	d<   def fd	Zd
efdZdeded
efdZded
efdZded
ee   fdZd
ee   fdZd
efdZ xZS )CobolSegmenterzCode segmenter for `COBOL`.z^[A-Z0-9\-]+(\s+.*)?\.$PARAGRAPH_PATTERNz=^\s*(IDENTIFICATION|DATA|PROCEDURE|ENVIRONMENT)\s+DIVISION.*$DIVISION_PATTERNz^\s*[A-Z0-9\-]+\s+SECTION.$SECTION_PATTERNcodec                 b    t         |   |       | j                  j                         | _        y N)super__init__r   
splitlinessource_lines)selfr   	__class__s     y/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/langchain_community/document_loaders/parsers/language/cobol.pyr   zCobolSegmenter.__init__   s%    '+yy';';'=    returnc                 @     t         fd j                  D              S )Nc              3   T   K   | ]  }j                   j                  |       ! y wr   )r
   match).0liner   s     r   	<genexpr>z*CobolSegmenter.is_valid.<locals>.<genexpr>   s"     S4((..t4Ss   %()anyr   )r   s   `r   is_validzCobolSegmenter.is_valid   s    SARARSSSr   	start_idxend_idxc                 \    dj                  | j                  ||       j                  d      S )N
)joinr   rstrip)r   r    r!   s      r   _extract_codezCobolSegmenter._extract_code   s)    yy**9W=>EEdKKr   r   c                 *    d|j                         v ryy)zHCheck if a line is part of the procedure division or a relevant section.PROCEDURE DIVISIONTF)upper)r   r   s     r   _is_relevant_codez CobolSegmenter._is_relevant_code   s    4::</r   funcc                    g }d}d}t        | j                        D ]  \  }}| j                  |      rd}|s| j                  j	                  |j                         j                  d      d         s*| j                  j	                  |j                               s|
 ||||       |} | |||t        | j                               |S )zAA generic function to process COBOL lines based on provided func.NFT r   )		enumerater   r*   r	   r   stripsplitr   len)r   r+   elementsr    inside_relevant_sectionir   s          r   _process_lineszCobolSegmenter._process_lines$   s     	"' !2!23 
	GAt%%d+*.'&&&,,TZZ\-?-?-DQ-GH''--djjl;(9a0	
	  9c$*;*;&<=r   c                 h     dt         t           dt        dt        dd f fd} j                  |      S )Nr2   r    r!   r   c                 H    | j                  j                  ||             y r   )appendr&   )r2   r    r!   r   s      r   extract_funcz>CobolSegmenter.extract_functions_classes.<locals>.extract_func=   s    OOD..y'BCr   )r   strintr5   )r   r9   s   ` r   extract_functions_classesz(CobolSegmenter.extract_functions_classes<   s@    	D49 	D 	Ds 	Dt 	D ""<00r   c                    g }d}d}| j                   D ]  }d|v xsr d|v xsl d|v xsf | j                  j                  |j                         j	                  d      d         xs) | j
                  j                  |j                               }|rd}d}|s|r|j                  |       |r|j                  d       d} d	j                  |      S )
NFr(   zDATA DIVISIONzIDENTIFICATION DIVISIONr-   r   Tz* OMITTED CODE *r#   )r   r	   r   r/   r0   r   r8   r$   )r   simplified_linesr3   omitted_code_addedr   	is_headers         r   simplify_codezCobolSegmenter.simplify_codeB   s    &("' 	 %% 	.D$, <"d*<,4< ))//

0B0B30G0JK< ''--djjl;  *.' &+"&$++D1+ %++,>?)-&/	.2 yy)**r   )__name__
__module____qualname____doc__recompile
IGNORECASEr	   r   __annotations__r
   r   r:   r   boolr   r;   r&   r*   r   r   r5   r<   rA   __classcell__)r   s   @r   r   r   	   s    %!+,F!VwV *

H"--!g   *rzz*H"--XOWX>S >T$ TLs LS LS Lc d 8 S	 0149 1 +s  +r   r   )rF   typingr   r   r   Dlangchain_community.document_loaders.parsers.language.code_segmenterr   r    r   r   <module>rO      s!    	 * *
Y+] Y+r   