o
    Mf                     @   s`   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ G dd de j	Z
G dd	 d	e j	ZdS )
    N)closing)data)PorterStemmer)SnowballStemmerc                   @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )SnowballTestc                 C   sR  t dd}|ddksJ |ddksJ |ddks J |dd	ks)J |d
d	ks2J |dd	ks;J |dd	ksDJ |ddksMJ |ddksVJ t dd}|ddksdJ |dd	ksmJ |ddksvJ t d}|ddksJ |ddksJ |ddksJ |dd	ksJ |ddksJ dS )z
        this unit testing for test the snowball arabic light stemmer
        this stemmer deals with prefixes and suffixes
        arabicTu&   الْعَرَبِــــــيَّةu   عربu   العربيةu   فقالواu   قالu   الطالباتu   طالبu   فالطالباتu   والطالباتu   الطالبونu   اللذانu   منFu   اللذu   الكلماتu   كلمNr   stem)self
ar_stemmer r   Z/var/www/html/analyze/labelStudio/lib/python3.10/site-packages/nltk/test/unit/test_stem.pytest_arabic
   s(   

zSnowballTest.test_arabicc                 C      t d}|ddksJ d S )Nrussianu   авантненькаяu   авантненькr   )r
   stemmer_russianr   r   r   test_russian'      zSnowballTest.test_russianc                 C   s`   t d}t ddd}|ddksJ |ddksJ |ddks%J |ddks.J d S )NgermanT)ignore_stopwordsu	   Schränkeschrankkeinenkeinr   )r
   stemmer_germanstemmer_german2r   r   r   test_german+   s   zSnowballTest.test_germanc                 C   s0   t d}|ddksJ |ddksJ d S )Nspanish	Visionadovisionalguealgur   r
   stemmerr   r   r   test_spanish5   s   zSnowballTest.test_spanishc                 C   r   )Nenglishzy'syr   r!   r   r   r   test_short_strings_bug=   r   z#SnowballTest.test_short_strings_bugN)__name__
__module____qualname__r   r   r   r#   r&   r   r   r   r   r   	   s    
r   c                   @   sD   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dS )
PorterTestc                 C   sF   t tdjdd}|  W  d    S 1 sw   Y  d S )Nz*stemmers/porter_test/porter_vocabulary.txtutf-8encoding)r   r   findopenread
splitlinesr
   fpr   r   r   _vocabularyC   s   

$zPorterTest._vocabularyc                 C   sL   t |d}t|  |D ]\}}||}||ks#J d||||qd S )N)modez*{} should stem to {} in {} mode but got {})r   zipr4   r	   format)r
   stemmer_modeexpected_stemsr"   word	true_stemour_stemr   r   r   _test_against_expected_outputK   s   


z(PorterTest._test_against_expected_outputc                 C   sR   t tdjdd}| tj|   W d   dS 1 s"w   Y  dS )az  Tests all words from the test vocabulary provided by M Porter

        The sample vocabulary and output were sourced from
        https://tartarus.org/martin/PorterStemmer/voc.txt and
        https://tartarus.org/martin/PorterStemmer/output.txt
        and are linked to from the Porter Stemmer algorithm's homepage
        at https://tartarus.org/martin/PorterStemmer/
        z-stemmers/porter_test/porter_martin_output.txtr+   r,   N)	r   r   r.   r/   r=   r   MARTIN_EXTENSIONSr0   r1   r2   r   r   r   test_vocabulary_martin_modeX   s   	
"z&PorterTest.test_vocabulary_martin_modec                 C   sR   t tdjdd}| tj|   W d    d S 1 s"w   Y  d S )Nz+stemmers/porter_test/porter_nltk_output.txtr+   r,   )	r   r   r.   r/   r=   r   NLTK_EXTENSIONSr0   r1   r2   r   r   r   test_vocabulary_nltk_modej   s   
"z$PorterTest.test_vocabulary_nltk_modec                 C   st   t tdjdd}| tj|   W d    n1 s!w   Y  | tjtdjdd   d S )Nz/stemmers/porter_test/porter_original_output.txtr+   r,   )	r   r   r.   r/   r=   r   ORIGINAL_ALGORITHMr0   r1   r2   r   r   r   test_vocabulary_original_modet   s$   
	z(PorterTest.test_vocabulary_original_modec                 C   s   t  ddks
J dS )zTest for bug https://github.com/nltk/nltk/issues/1581

        Ensures that 'oed' can be stemmed without throwing an error.
        oedoNr   r	   )r
   r   r   r   test_oed_bug   s   zPorterTest.test_oed_bugc                 C   sl   t  }|ddksJ |ddksJ |jddddks J |ddks)J |jddddks4J d	S )
zTest for improvement on https://github.com/nltk/nltk/issues/2507

        Ensures that stems are lowercased when `to_lowercase=True`
        OnonIiF)to_lowercaseGithubgithubNrF   )r
   porterr   r   r   test_lowercase_option   s   z PorterTest.test_lowercase_optionN)
r'   r(   r)   r4   r=   r?   rA   rC   rG   rP   r   r   r   r   r*   B   s    
r*   )unittest
contextlibr   nltkr   nltk.stem.porterr   nltk.stem.snowballr   TestCaser   r*   r   r   r   r   <module>   s    9