§
    ïžîd@<  ã                  óÖ   — d dl mZ d dlmZmZmZ d dlmZ d dlm	Z	 d dl
mZmZ d$d„Zd$d„Zd%d„Zdddddœd&d„Zdddddœd&d„Zdddddœd'd„Zdddddœd'd„Zdddœd(d!„Zdddœd)d#„ZdS )*é    )Úannotations)ÚCallableÚHashableÚSequence)Úis_none)ÚIndel)ÚEditopsÚOpcodesÚs1úSequence[Hashable]Ús2Úweightsútuple[int, int, int]ÚreturnÚintc                óâ   — t          | ¦  «        }t          |¦  «        }|\  }}}||z  ||z  z   }||k    rt          |||z  ||z
  |z  z   ¦  «        }nt          |||z  ||z
  |z  z   ¦  «        }|S )N)ÚlenÚmin)	r   r   r   Úlen1Úlen2ÚinsertÚdeleteÚreplaceÚmax_dists	            úe/home/feoh/.local/pipx/venvs/poetry/lib/python3.11/site-packages/rapidfuzz/distance/Levenshtein_py.pyÚ_levenshtein_maximumr      sŠ   € õ ˆr‰7Œ7€DÝˆr‰7Œ7€DØ%Ñ€FˆFGàf‰}˜t f™}Ñ,€Hàˆt‚|€|Ýx ¨¡°4¸$±;À&Ñ2HÑ!HÑIÔIˆˆåx ¨¡°4¸$±;À&Ñ2HÑ!HÑIÔIˆà€Oó    c                óv  — t          | ¦  «        }|\  }}}t          t          d|dz   |z  |¦  «        ¦  «        }|D ]v}|d         }	|dxx         |z  cc<   t          |¦  «        D ]L}
|	}| |
         |k    r)t          ||
         |z   ||
dz            |z   |	|z   ¦  «        }||
dz            }	|||
dz   <   ŒMŒw|d         S )Nr   é   éÿÿÿÿ)r   ÚlistÚranger   )r   r   r   r   r   r   r   ÚcacheÚch2ÚtempÚiÚxs               r   Ú_uniform_genericr(      sê   € õ ˆr‰7Œ7€DØ%Ñ€FˆFGÝ•q˜4 !™8 vÑ-¨vÑ6Ô6Ñ7Ô7€Eàð ð ˆØQŒxˆØˆaˆˆŒFÑˆˆ‰Ýt‘”ð 	ð 	ˆAØˆAØ!Œu˜Š|ˆ|Ý˜˜aœ 6Ñ)¨5°°Q±¬<¸&Ñ+@À$ÈÁ.ÑQÔQØ˜˜Q™”<ˆDØˆE!a‘%‰LˆLð	ð Œ9Ðr   c                ó¶  — | st          |¦  «        S dt          | ¦  «        z  dz
  }d}t          | ¦  «        }dt          | ¦  «        dz
  z  }i }|j        }d}| D ]}	 ||	d¦  «        |z  ||	<   |dz  }Œ|D ]b}
 ||
d¦  «        }|}||z  |z   |z  |z  |z  }|||z   z  }||z  }|||z  dk    z  }|||z  dk    z  }|dz  dz  }|dz  }|||z   z  }||z  }Œc|S )Nr   r   )r   Úget)r   r   ÚVPÚVNÚcurrDistÚmaskÚblockÚ	block_getr'   Úch1r$   ÚPM_jÚXÚD0ÚHPÚHNs                   r   Ú_uniform_distancer7   2   sF  € Øð Ý2‰wŒwˆà
s2‰wŒw‰,˜!Ñ	€BØ	
€BÝ2‰wŒw€HØ•R‘”˜1‘Ñ€Dà!#€EØ”	€IØ	€AØð ð ˆØY˜s AÑ&Ô&¨Ñ*ˆˆc‰
Ø	ˆa‰ˆˆàð ð ˆàˆy˜˜aÑ Ô ˆØˆØB‘˜"‰} Ñ" aÑ'¨"Ñ,ˆàB˜‘G*‰_ˆØ"‰WˆàR˜$‘Y 1Ò$Ñ$ˆØR˜$‘Y 1Ò$Ñ$ˆàA‰g˜‰]ˆØ1‰WˆØB˜‘G*‰_ˆØ"‰Wˆˆà€Or   ©r   r   r   N)r   Ú	processorÚscore_cutoffÚ
score_hintútuple[int, int, int] | Noner9   ú(Callable[..., Sequence[Hashable]] | Noner:   ú
int | Noner;   c               óÜ   — | || ¦  «        }  ||¦  «        }||dk    rt          | |¦  «        }n-|dk    rt          j        | |¦  «        }nt          | ||¦  «        }|||k    r|n|dz   S )a´  
    Calculates the minimum number of insertions, deletions, and substitutions
    required to change one sequence into the other according to Levenshtein with custom
    costs for insertion, deletion and substitution

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    weights : Tuple[int, int, int] or None, optional
        The weights for the three operations in the form
        (insertion, deletion, substitution). Default is (1, 1, 1),
        which gives all three operations a weight of 1.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the distance is bigger than score_cutoff,
        score_cutoff + 1 is returned instead. Default is None, which deactivates
        this behaviour.
    score_hint : int, optional
        Expected distance between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    distance : int
        distance between s1 and s2

    Raises
    ------
    ValueError
        If unsupported weights are provided a ValueError is thrown

    Examples
    --------
    Find the Levenshtein distance between two strings:

    >>> from rapidfuzz.distance import Levenshtein
    >>> Levenshtein.distance("lewenstein", "levenshtein")
    2

    Setting a maximum distance allows the implementation to select
    a more efficient implementation:

    >>> Levenshtein.distance("lewenstein", "levenshtein", score_cutoff=1)
    2

    It is possible to select different weights by passing a `weight`
    tuple.

    >>> Levenshtein.distance("lewenstein", "levenshtein", weights=(1,1,2))
    3
    Nr8   )r   r   é   r   )r7   r   Údistancer(   )r   r   r   r9   r:   r;   Údists          r   rA   rA   V   s“   € ðD ÐØˆYr‰]Œ]ˆØˆYr‰]Œ]ˆà€˜' YÒ.Ð.Ý   RÑ(Ô(ˆˆØ	IÒ	Ð	ÝŒ~˜b "Ñ%Ô%ˆˆå  B¨Ñ0Ô0ˆà Ð(¨D°LÒ,@Ð,@ˆ4ˆ4À|ÐVWÑGWÐWr   c               ó¢   — | || ¦  «        }  ||¦  «        }|pd}t          | ||¦  «        }t          | ||¬¦  «        }||z
  }|||k    r|ndS )a×  
    Calculates the levenshtein similarity in the range [max, 0] using custom
    costs for insertion, deletion and substitution.

    This is calculated as ``max - distance``, where max is the maximal possible
    Levenshtein distance given the lengths of the sequences s1/s2 and the weights.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    weights : Tuple[int, int, int] or None, optional
        The weights for the three operations in the form
        (insertion, deletion, substitution). Default is (1, 1, 1),
        which gives all three operations a weight of 1.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the similarity is smaller than score_cutoff,
        0 is returned instead. Default is None, which deactivates
        this behaviour.
    score_hint : int, optional
        Expected similarity between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    similarity : int
        similarity between s1 and s2

    Raises
    ------
    ValueError
        If unsupported weights are provided a ValueError is thrown
    Nr8   ©r   r   )r   rA   )	r   r   r   r9   r:   r;   ÚmaximumrB   Úsims	            r   Ú
similarityrG   ¦   sv   € ð` ÐØˆYr‰]Œ]ˆØˆYr‰]Œ]ˆàÐ"˜€GÝ" 2 r¨7Ñ3Ô3€GÝB˜ GÐ,Ñ,Ô,€DØ
D‰.€CØÐ'¨3°,Ò+>Ð+>ˆ3ˆ3ÀQÐFr   úfloat | NoneÚfloatc               óê   — t          | ¦  «        st          |¦  «        rdS | || ¦  «        }  ||¦  «        }|pd}t          | ||¦  «        }t          | ||¬¦  «        }|r||z  nd}|||k    r|ndS )aû  
    Calculates a normalized levenshtein distance in the range [1, 0] using custom
    costs for insertion, deletion and substitution.

    This is calculated as ``distance / max``, where max is the maximal possible
    Levenshtein distance given the lengths of the sequences s1/s2 and the weights.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    weights : Tuple[int, int, int] or None, optional
        The weights for the three operations in the form
        (insertion, deletion, substitution). Default is (1, 1, 1),
        which gives all three operations a weight of 1.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_dist > score_cutoff 1.0 is returned instead. Default is None,
        which deactivates this behaviour.
    score_hint : float, optional
        Expected normalized distance between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    norm_dist : float
        normalized distance between s1 and s2 as a float between 1.0 and 0.0

    Raises
    ------
    ValueError
        If unsupported weights are provided a ValueError is thrown
    ç      ð?Nr8   rD   r   r   )r   r   rA   )	r   r   r   r9   r:   r;   rE   rB   Ú	norm_dists	            r   Únormalized_distancerM   á   s    € õ^ ˆr{„{ð •g˜b‘k”kð ØˆsàÐØˆYr‰]Œ]ˆØˆYr‰]Œ]ˆàÐ"˜€GÝ" 2 r¨7Ñ3Ô3€GÝB˜ GÐ,Ñ,Ô,€DØ")Ð0w‘¨q€IØ%Ð-°¸lÒ1JÐ1Jˆ9ˆ9ÐQRÐRr   c               óÀ   — t          | ¦  «        st          |¦  «        rdS | || ¦  «        }  ||¦  «        }|pd}t          | ||¬¦  «        }d|z
  }|||k    r|ndS )aÉ  
    Calculates a normalized levenshtein similarity in the range [0, 1] using custom
    costs for insertion, deletion and substitution.

    This is calculated as ``1 - normalized_distance``

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    weights : Tuple[int, int, int] or None, optional
        The weights for the three operations in the form
        (insertion, deletion, substitution). Default is (1, 1, 1),
        which gives all three operations a weight of 1.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_sim < score_cutoff 0 is returned instead. Default is None,
        which deactivates this behaviour.
    score_hint : int, optional
        Expected normalized similarity between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    norm_sim : float
        normalized similarity between s1 and s2 as a float between 0 and 1.0

    Raises
    ------
    ValueError
        If unsupported weights are provided a ValueError is thrown

    Examples
    --------
    Find the normalized Levenshtein similarity between two strings:

    >>> from rapidfuzz.distance import Levenshtein
    >>> Levenshtein.normalized_similarity("lewenstein", "levenshtein")
    0.81818181818181

    Setting a score_cutoff allows the implementation to select
    a more efficient implementation:

    >>> Levenshtein.normalized_similarity("lewenstein", "levenshtein", score_cutoff=0.85)
    0.0

    It is possible to select different weights by passing a `weight`
    tuple.

    >>> Levenshtein.normalized_similarity("lewenstein", "levenshtein", weights=(1,1,2))
    0.85714285714285

    When a different processor is used s1 and s2 do not have to be strings

    >>> Levenshtein.normalized_similarity(["lewenstein"], ["levenshtein"], processor=lambda s: s[0])
    0.81818181818181
    g        Nr8   rD   rK   r   )r   rM   )r   r   r   r9   r:   r;   rL   Únorm_sims           r   Únormalized_similarityrP     s…   € õN ˆr{„{ð •g˜b‘k”kð ØˆsàÐØˆYr‰]Œ]ˆØˆYr‰]Œ]ˆàÐ"˜€GÝ# B¨°GÐ<Ñ<Ô<€IØY‰€HØ$Ð,°¸LÒ0HÐ0Hˆ8ˆ8ÈqÐPr   ©r9   r;   r	   c               ó@   — | || ¦  «        }  ||¦  «        }t           ‚)u  
    Return Editops describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_hint : int, optional
        Expected distance between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    editops : Editops
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki HyyrÃ¶, which is
    described [8]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [8] HyyrÃ¶, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import Levenshtein
    >>> for tag, src_pos, dest_pos in Levenshtein.editops("qabxcd", "abycdf"):
    ...    print(("%7s s1[%d] s2[%d]" % (tag, src_pos, dest_pos)))
     delete s1[1] s2[0]
    replace s1[3] s2[2]
     insert s1[6] s2[5]
    )ÚNotImplementedError©r   r   r9   r;   s       r   ÚeditopsrU   r  s+   € ð^ ÐØˆYr‰]Œ]ˆØˆYr‰]Œ]ˆå
Ðr   r
   c               óL   — t          | |||¬¦  «                             ¦   «         S )uÊ  
    Return Opcodes describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_hint : int, optional
        Expected distance between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    opcodes : Opcodes
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki HyyrÃ¶, which is
    described [9]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [9] HyyrÃ¶, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import Levenshtein

    >>> a = "qabxcd"
    >>> b = "abycdf"
    >>> for tag, i1, i2, j1, j2 in Levenshtein.opcodes("qabxcd", "abycdf"):
    ...    print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
    ...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
     delete a[0:1] (q) b[0:0] ()
      equal a[1:3] (ab) b[0:2] (ab)
    replace a[3:4] (x) b[2:3] (y)
      equal a[4:6] (cd) b[3:5] (cd)
     insert a[6:6] () b[5:6] (f)
    rQ   )rU   Ú
as_opcodesrT   s       r   ÚopcodesrX   ¨  s(   € õj 2r Y¸:ÐFÑFÔF×QÒQÑSÔSÐSr   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r<   r9   r=   r:   r>   r;   r>   r   r   )r   r   r   r   r   r<   r9   r=   r:   rH   r;   rH   r   rI   )
r   r   r   r   r9   r=   r;   r>   r   r	   )
r   r   r   r   r9   r=   r;   r>   r   r
   )Ú
__future__r   Útypingr   r   r   Úrapidfuzz._utilsr   Úrapidfuzz.distancer   Úrapidfuzz.distance._initializer	   r
   r   r(   r7   rA   rG   rM   rP   rU   rX   © r   r   ú<module>r_      sç  ðð #Ð "Ð "Ð "Ð "Ð "à /Ð /Ð /Ð /Ð /Ð /Ð /Ð /Ð /Ð /à $Ð $Ð $Ð $Ð $Ð $Ø $Ð $Ð $Ð $Ð $Ð $Ø ;Ð ;Ð ;Ð ;Ð ;Ð ;Ð ;Ð ;ðð ð ð ð"ð ð ð ð(!ð !ð !ð !ðP ,5Ø:>Ø#Ø!ðMXð MXð MXð MXð MXð MXðh ,5Ø:>Ø#Ø!ð8Gð 8Gð 8Gð 8Gð 8Gð 8Gð~ ,5Ø:>Ø!%Ø#ð:Sð :Sð :Sð :Sð :Sð :SðB ,5Ø:>Ø!%Ø#ðQQð QQð QQð QQð QQð QQðp ;?Ø!ð3ð 3ð 3ð 3ð 3ð 3ðt ;?Ø!ð5Tð 5Tð 5Tð 5Tð 5Tð 5Tð 5Tð 5Tr   