
    h                    >    d dl mZ d dlmZ d dlmZ  G d de      Zy)    )annotations)Any)TextSplitterc                  J     e Zd ZdZ	 	 ddd	 	 	 	 	 	 	 	 	 d fdZddZ xZS )	NLTKTextSplitterz"Splitting text using NLTK package.F)use_span_tokenizec                  t        |   di | || _        || _        || _        | j                  r| j                  dk7  rd}t        |      	 ddl}| j                  r+|j                  j                  | j                        | _	        y|j                  j                  | _	        y# t        $ r}d}t        |      |d}~ww xY w)zInitialize the NLTK splitter. z6When use_span_tokenize is True, separator should be ''r   NzANLTK is not installed, please install it with `pip install nltk`. )super__init__
_separator	_language_use_span_tokenize
ValueErrornltktokenize_get_punkt_tokenizer
_tokenizersent_tokenizeImportError)	self	separatorlanguager   kwargsmsgr   err	__class__s	           [/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_text_splitters/nltk.pyr   zNLTKTextSplitter.__init__   s     	"6"#!"3""t"'<JCS/!		,&&"&--"D"DT^^"T"&--"="= 	,UCc"+	,s   :B& 
B& &	C/B==Cc                j   | j                   rot        | j                  j                  |            }g }t	        |      D ]:  \  }\  }}|dkD  r||dz
     d   }||| ||| z   }n||| }|j                  |       < n| j                  || j                        }| j                  || j                        S )z&Split incoming text and return chunks.r      )r   )	r   listr   span_tokenize	enumerateappendr   _merge_splitsr   )	r   textspanssplitsistartendprev_endsentences	            r   
split_textzNLTKTextSplitter.split_text&   s     ""66t<=EF#,U#3 (<E3q5$QU|AH#HU3d5oEH#E#Hh'( __TDNN_CF!!&$//::    )z

english)
r   strr   r2   r   boolr   r   returnNone)r'   r2   r4   z	list[str])__name__
__module____qualname____doc__r   r/   __classcell__)r   s   @r   r   r      sR    ,  !,
 #(,, ,
  , , 
,6;r0   r   N)
__future__r   typingr   langchain_text_splitters.baser   r   r   r0   r   <module>r>      s    "  6-;| -;r0   