
    dy'                        d dl mZ d dlmZmZmZ d dlmZ d dlm	Z	m
Z
 dddddZ	 dd dZdddddZdddd!dZdddd!dZddd"dZddd#dZdS )$    )annotations)CallableHashableSequence)is_none)EditopsOpcodesN)	processorscore_cutoffs1Sequence[Hashable]s2r
   (Callable[..., Sequence[Hashable]] | Noner   
int | Nonereturnintc               |   | ||           }  ||          }| sdS dt          |           z  dz
  }i }|j        }d}| D ]} ||d          |z  ||<   |dz  }|D ]}	 ||	d          }
||
z  }||z   ||z
  z  }t          |          t          |            d                             d          }|||k    r|ndS )a  
    Calculates the length of the longest common subsequence

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the similarity is smaller than score_cutoff,
        0 is returned instead. Default is None, which deactivates
        this behaviour.

    Returns
    -------
    similarity : int
        similarity between s1 and s2
    Nr      0lengetbincount)r   r   r
   r   Sblock	block_getxch1ch2Matchesuress                `/home/feoh/.local/pipx/venvs/poetry/lib/python3.11/site-packages/rapidfuzz/distance/LCSseq_py.py
similarityr%      s   < Yr]]Yr]] q	
c"ggA!#E	I	A  YsA&&*c
	a  )C##KUq1u a&&#b''

"
"3
'
'C'3,+>+>33QF    r   dict[Hashable, int]c                   |sdS dt          |          z  dz
  }| j        }|D ]} ||d          }||z  }||z   ||z
  z  }t          |          t          |           d                              d          }	||	|k    r|	ndS )Nr   r   r   r   )
r   r   r   r   r   r   r    r!   r"   r#   s
             r$   _block_similarityr)   C   s      q	
c"ggA	I  )C##KUq1u a&&#b''

"
"3
'
'C'3,+>+>33QFr&   c                   | ||           }  ||          }t          t          |           t          |                    }t          | |          }||z
  }|||k    r|n|dz   S )a  
    Calculates the LCS distance in the range [0, max].

    This is calculated as ``max(len1, len2) - similarity``.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the distance is bigger than score_cutoff,
        score_cutoff + 1 is returned instead. Default is None, which deactivates
        this behaviour.

    Returns
    -------
    distance : int
        distance between s1 and s2

    Examples
    --------
    Find the LCS distance between two strings:

    >>> from rapidfuzz.distance import LCSseq
    >>> LCSseq.distance("lewenstein", "levenshtein")
    2

    Setting a maximum distance allows the implementation to select
    a more efficient implementation:

    >>> LCSseq.distance("lewenstein", "levenshtein", score_cutoff=1)
    2

    Nr   )maxr   r%   )r   r   r
   r   maximumsimdists          r$   distancer/   Y   sw    ^ Yr]]Yr]]#b''3r77##G
R

CS=D (DL,@,@44|VWGWWr&   float | Nonefloatc                  t          |           st          |          rdS | ||           }  ||          }| r|sdS t          t          |           t          |                    }t          | |          |z  }|||k    r|ndS )a2  
    Calculates a normalized LCS similarity in the range [1, 0].

    This is calculated as ``distance / max(len1, len2)``.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_dist > score_cutoff 1.0 is returned instead. Default is 1.0,
        which deactivates this behaviour.

    Returns
    -------
    norm_dist : float
        normalized distance between s1 and s2 as a float between 0 and 1.0
          ?Nr   r   )r   r+   r   r/   )r   r   r
   r   r,   norm_sims         r$   normalized_distancer5      s    > r{{ gbkk sYr]]Yr]] R q#b''3r77##GB')H$,L0H0H88qPr&   c                   t          |           st          |          rdS | ||           }  ||          }dt          | |          z
  }|||k    r|ndS )a  
    Calculates a normalized LCS similarity in the range [0, 1].

    This is calculated as ``1 - normalized_distance``

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_sim < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    norm_sim : float
        normalized similarity between s1 and s2 as a float between 0 and 1.0

    Examples
    --------
    Find the normalized LCS similarity between two strings:

    >>> from rapidfuzz.distance import LCSseq
    >>> LCSseq.normalized_similarity("lewenstein", "levenshtein")
    0.8181818181818181

    Setting a score_cutoff allows the implementation to select
    a more efficient implementation:

    >>> LCSseq.normalized_similarity("lewenstein", "levenshtein", score_cutoff=0.9)
    0.0

    When a different processor is used s1 and s2 do not have to be strings

    >>> LCSseq.normalized_similarity(["lewenstein"], ["levenshtein"], processor=lambda s: s[0])
    0.81818181818181
    g        Nr3   r   )r   r5   )r   r   r
   r   r4   s        r$   normalized_similarityr7      ss    d r{{ gbkk sYr]]Yr]](R000H$,L0H0H88qPr&   r
   r   c                   t           )uc  
    Return Editops describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.

    Returns
    -------
    editops : Editops
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described [6]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [6] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import LCSseq
    >>> for tag, src_pos, dest_pos in LCSseq.editops("qabxcd", "abycdf"):
    ...    print(("%7s s1[%d] s2[%d]" % (tag, src_pos, dest_pos)))
     delete s1[0] s2[0]
     delete s1[3] s2[2]
     insert s1[4] s2[2]
     insert s1[6] s2[5]
    )NotImplementedErrorr   r   r
   s      r$   editopsr<      s    X r&   r	   c               J    t          | ||                                          S )u  
    Return Opcodes describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.

    Returns
    -------
    opcodes : Opcodes
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described [7]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [7] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import LCSseq

    >>> a = "qabxcd"
    >>> b = "abycdf"
    >>> for tag, i1, i2, j1, j2 in LCSseq.opcodes(a, b):
    ...    print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
    ...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
     delete a[0:1] (q) b[0:0] ()
      equal a[1:3] (ab) b[0:2] (ab)
     delete a[3:4] (x) b[2:2] ()
     insert a[4:4] () b[2:3] (y)
      equal a[4:6] (cd) b[3:5] (cd)
     insert a[6:6] () b[5:6] (f)
    r8   )r<   
as_opcodesr;   s      r$   opcodesr?   ,  s&    d 2rY///::<<<r&   )
r   r   r   r   r
   r   r   r   r   r   )N)
r   r'   r   r   r   r   r   r   r   r   )
r   r   r   r   r
   r   r   r0   r   r1   )r   r   r   r   r
   r   r   r   )r   r   r   r   r
   r   r   r	   )
__future__r   typingr   r   r   rapidfuzz._utilsr   rapidfuzz.distance._initializer   r	   r%   r)   r/   r5   r7   r<   r?    r&   r$   <module>rE      s   # " " " " " / / / / / / / / / / $ $ $ $ $ $ ; ; ; ; ; ; ; ; ;?#4G 4G 4G 4G 4G 4Gv  $	G G G G G4 ;?#6X 6X 6X 6X 6X 6Xz ;?!%+Q +Q +Q +Q +Q +Qd ;?!%:Q :Q :Q :Q :Q :QB ;?	, , , , , ,f ;?	2= 2= 2= 2= 2= 2= 2= 2=r&   