
    NiF4                       d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	 ddl
m
Z ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZ erdd	lmZ d
Z G d de          Z ed          d.d            Z ed          d/d            Z  ed          d/d            Z! ed          d/d            Z" ed          d/d            Z# G d de	          Z$ ed          d0d            Z%d1d"Z&	 	 d2d3d)Z'd4d+Z(d5d,Z)	 	 d2d3d-Z*d#S )6z
Grapheme cluster segmentation following Unicode Standard Annex #29.

This module provides pure-Python implementation of the grapheme cluster boundary algorithm as
defined in UAX #29: Unicode Text Segmentation.

https://www.unicode.org/reports/tr29/
    )annotations)IntEnum)	lru_cache)TYPE_CHECKING
NamedTuple   )bisearch)
GRAPHEME_L
GRAPHEME_T
GRAPHEME_VGRAPHEME_LVINCB_EXTENDINCB_LINKERGRAPHEME_LVTINCB_CONSONANTGRAPHEME_EXTENDGRAPHEME_CONTROLGRAPHEME_PREPENDGRAPHEME_SPACINGMARKEXTENDED_PICTOGRAPHICGRAPHEME_REGIONAL_INDICATOR)Iterator    c                  J    e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdS )GCBz'Grapheme Cluster Break property values.r   r                        	   
            N)__name__
__module____qualname____doc__OTHERCRLFCONTROLEXTENDZWJREGIONAL_INDICATORPREPENDSPACING_MARKLVTLVLVT     k/var/www/html/web/mlink/mlink_AI_Server/mlink-backend/venv/lib/python3.11/site-packages/wcwidth/grapheme.pyr   r   ,   s[        11E	
B	
BGF
CGL	A
A
A	B
CCCr;   r   i   )maxsizeucsintreturnc                   | dk    rt           j        S | dk    rt           j        S | dk    rt           j        S t	          | t
                    rt           j        S t	          | t                    rt           j        S t	          | t                    rt           j
        S t	          | t                    rt           j        S t	          | t                    rt           j        S t	          | t                    rt           j        S t	          | t"                    rt           j        S t	          | t&                    rt           j        S t	          | t*                    rt           j        S t	          | t.                    rt           j        S t           j        S )z;Return the Grapheme_Cluster_Break property for a codepoint.r'   r$   i   )r   r-   r.   r1   	_bisearchr   r/   r   r0   r   r2   r   r3   r   r4   r
   r5   r   r6   r   r7   r   r8   r   r9   r,   r>   s    r<   _grapheme_cluster_breakrD   B   sA   
 f}}v
f}}v
f}}w&'' {o&& z122 &%%&'' {*++  j!! uj!! uj!! uk"" vl## w9r;   boolc                F    t          t          | t                              S )z6Check if codepoint has Extended_Pictographic property.)rE   rB   r   rC   s    r<   _is_extended_pictographicrG   e   s     	#455666r;   c                F    t          t          | t                              S )z,Check if codepoint has InCB=Linker property.)rE   rB   r   rC   s    r<   _is_incb_linkerrI   k        	#{++,,,r;   c                F    t          t          | t                              S )z/Check if codepoint has InCB=Consonant property.)rE   rB   r   rC   s    r<   _is_incb_consonantrL   q   s     	#~..///r;   c                F    t          t          | t                              S )z,Check if codepoint has InCB=Extend property.)rE   rB   r   rC   s    r<   _is_incb_extendrN   w   rJ   r;   c                  (    e Zd ZU dZded<   ded<   dS )BreakResultz*Result of grapheme cluster break decision.rE   should_breakr?   ri_countN)r(   r)   r*   r+   __annotations__r:   r;   r<   rP   rP   }   s+         44MMMMMr;   rP   prev_gcbcurr_gcbBreakResult | Nonec                   | t           j        k    r!|t           j        k    rt          dd          S | t           j        t           j        t           j        fv rt          dd          S |t           j        t           j        t           j        fv rt          dd          S | t           j        k    rA|t           j        t           j        t           j        t           j        fv rt          dd          S | t           j        t           j        fv r+|t           j        t           j	        fv rt          dd          S | t           j        t           j	        fv r!|t           j	        k    rt          dd          S |t           j
        k    rt          dd          S |t           j        k    rt          dd          S | t           j        k    rt          dd          S dS )z
    Check simple GCB-pair-based break rules (cacheable).

    Returns BreakResult for rules that can be determined from GCB properties alone, or None if
    complex lookback rules (GB9c, GB11) need to be checked.
    Fr   rQ   rR   TN)r   r-   r.   rP   r/   r5   r6   r8   r9   r7   r0   r4   r3   )rT   rU   s     r<   _simple_break_checkrY      s    36h#&00:::: CK000q9999 CK000q9999 35X#%)HHH:::: CFCE?""xCE35>'A'A:::: CGSU###CE(9(9:::: 3::::: 3###:::: 3;:::: 4r;   textstrcurr_idxrR   c                   t          | |          }||S |t          j        k    rt          dd          S t	          ||                   }t          |          rxd}|dz
  }|dk    rkt	          ||                   }	t          |	          rd}|dz  }n9t          |	          r|dz  }n$t          |	          r|rt          dd          S nn|dk    k| t          j        k    r{t          |          rl|dz
  }|dk    rat	          ||                   }	t          |	          }
|
t          j
        k    r|dz  }n!t          |	          rt          dd          S n|dk    a| t          j        k    r>|t          j        k    r.|dz  dk    rt          d|dz             S t          dd          S |t          j        k    rdnd}t          d|          S )z
    Determine if there should be a grapheme cluster break between prev and curr.

    Implements UAX #29 grapheme cluster boundary rules.
    NFr   rX   r   Tr   )rY   r   r1   rP   ordrL   rI   rN   rG   rD   r0   r2   )rT   rU   rZ   r\   rR   resultcurr_ucs
has_linkeriprev_ucs	prev_props              r<   _should_breakre      s    !844F 37::::
 4>""H(## 
qL1ff47||Hx(( 
!
Q ** Q#H--  G&EAFFFF 1ff 378BBqL1ff47||H/99ICJ&&Q*844 "BBBB 1ff 3)))h#:P.P.Pa<1EHqLIIIIq9999  666qqAHD8<<<<r;   Nunistrstartend
int | NoneIterator[str]c              #    K   | sdS t          |           }||}||k    s||k    rdS t          ||          }|}d}t          t          | |                             }|t          j        k    rd}t          |dz   |          D ]U}t          t          | |                             }t          ||| ||          }	|	j        }|	j	        r| ||         V  |}|}V| ||         V  dS )aP  
    Iterate over grapheme clusters in a Unicode string.

    Grapheme clusters are "user-perceived characters" - what a user would
    consider a single character, which may consist of multiple Unicode
    codepoints (e.g., a base character with combining marks, emoji sequences).

    :param unistr: The Unicode string to segment.
    :param start: Starting index (default 0).
    :param end: Ending index (default len(unistr)).
    :yields: Grapheme cluster substrings.

    Example::

        >>> list(iter_graphemes('cafe\u0301'))
        ['c', 'a', 'f', 'e\u0301']
        >>> list(iter_graphemes('\U0001F468\u200D\U0001F469\u200D\U0001F467'))
        ['o', 'k', '\U0001F468\u200D\U0001F469\u200D\U0001F467']
        >>> list(iter_graphemes('\U0001F1FA\U0001F1F8'))
        ['o', 'k', '\U0001F1FA\U0001F1F8']

    .. versionadded:: 0.3.0
    Nr   r   )
lenminrD   r^   r   r2   rangere   rR   rQ   )
rf   rg   rh   lengthcluster_startrR   rT   idxrU   r_   s
             r<   iter_graphemesrr      s(     8  [[F
{||u
c6

C MH 's6%='9'9::H 3)))UQY$$ 
 
*3vc{+;+;<<x63II? 	 s*++++M s"
######r;   posc                V   t          | |dz
                     }|dk    r|dk    r| |dz
           dk    r|dz
  S |dk     r_|dk    rT|dk    rNt          | |dz
                     }|dk    r0t          |          t          j        k    rt	          | |dz
            S |dz
  S |dz
  }|dk    rk||z
  t
          k     r]t          | |                   }d|cxk    rdk     rn nn7t          |          t          j        k    rn|dz  }|dk    r||z
  t
          k     ]|}t          t          | |                             }|t          j        k    rdnd}t          |dz   |          D ]I}	t          t          | |	                             }
t          ||
| |	|          }|j
        }|j        r|	}|
}J|S )a  
    Find the start of the grapheme cluster containing the character before pos.

    Scans backwards from pos to find a safe starting point, then iterates forward using standard
    break rules to find the actual cluster boundary.

    :param text: The Unicode string.
    :param pos: Position to search before (exclusive).
    :returns: Start position of the grapheme cluster.
    r   r$   r      r   r   )r^   rD   r   r3   _find_cluster_startMAX_GRAPHEME_SCANr/   r2   rn   re   rR   rQ   )rZ   rs   	target_cpprev_cp
safe_startcprp   left_gcbrR   rb   	right_gcbr_   s               r<   rw   rw   <  s    DqM""I DSAXX$sQw-4*?*?Qw 4!88	T))$sQw-((G$#:7#C#Cs{#R#R*4q999Qw qJ
q..cJ.2CCCj!""2"2&&#+55a
 q..cJ.2CCC M&s4
+;'<'<==H 666qqAH:>3''  +CQLL99	xD!XFF? 	Mr;   c           	     h    |dk    rdS t          | t          |t          |                               S )a  
    Find the grapheme cluster boundary immediately before a position.

    :param unistr: The Unicode string to search.
    :param pos: Position in the string (0 < pos <= len(unistr)).
    :returns: Start index of the grapheme cluster containing the character at pos-1.

    Example::

        >>> grapheme_boundary_before('Hello \U0001F44B\U0001F3FB', 8)
        6
        >>> grapheme_boundary_before('a\r\nb', 3)
        1

    .. versionadded:: 0.3.6
    r   )rw   rm   rl   )rf   rs   s     r<   grapheme_boundary_beforer   p  s2    " axxqvs3F'<'<===r;   c              #     K   | sdS t          |           }||nt          ||          }t          |d          }||k    s||k    rdS |}||k    r.t          | |          }||k     rdS | ||         V  |}||k    ,dS dS )a  
    Iterate over grapheme clusters in reverse order (last to first).

    :param unistr: The Unicode string to segment.
    :param start: Starting index (default 0).
    :param end: Ending index (default len(unistr)).
    :yields: Grapheme cluster substrings in reverse order.

    Example::

        >>> list(iter_graphemes_reverse('cafe\u0301'))
        ['e\u0301', 'f', 'a', 'c']

    .. versionadded:: 0.3.6
    Nr   )rl   rm   maxrw   )rf   rg   rh   ro   rs   rp   s         r<   iter_graphemes_reverser     s      (  [[FK&&Sf%5%5CqMME||u
C
+++FC885  E]3&'''' ++++++r;   )r>   r?   r@   r   )r>   r?   r@   rE   )rT   r   rU   r   r@   rV   )rT   r   rU   r   rZ   r[   r\   r?   rR   r?   r@   rP   )r   N)rf   r[   rg   r?   rh   ri   r@   rj   )rZ   r[   rs   r?   r@   r?   )rf   r[   rs   r?   r@   r?   )+r+   
__future__r   enumr   	functoolsr   typingr   r   r	   rB   table_graphemer
   r   r   r   r   r   r   r   r   r   r   r   r   r   collections.abcr   rx   r   rD   rG   rI   rL   rN   rP   rY   re   rr   rw   r   r   r:   r;   r<   <module>r      s7    # " " " " "             , , , , , , , , , + + + + +: : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : :  )((((((      '   , 4   D 47 7 7 7
 4- - - -
 40 0 0 0
 4- - - -
    *    4- - - -`@= @= @= @=J A$ A$ A$ A$ A$H1 1 1 1h> > > >0 & & & & & & &r;   