
    &PhB                       d dl mZ d dlZd dlZd dlmZmZmZ d dlZd dl	Z
d dlmZmZ d dlmZmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z'm(Z( d dl)m*Z* es
d dl+Z,d dl-m.Z/ erd dl0m1Z1 d dl2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8 ee9ejt                  f   Z;ddZ<d Z= G d de*ee'      Z> G d de>      Z?y)    )annotationsN)TYPE_CHECKINGCallableUnion)libmissing)pa_version_under10p1pa_version_under13p0pa_version_under16p0)find_stack_level)	is_scalarpandas_dtype)isna)ArrowStringArrayMixin)ArrowExtensionArray)BooleanDtype)Float64Dtype)
Int64Dtype)NumericDtype)BaseStringArrayStringDtype)ObjectStringArrayMixin)Sequence)	ArrayLikeDtypeSelfnpt)Seriesc                 *    t         rd} t        |       y )NzCpyarrow>=10.0.1 is required for PyArrow backed ArrowExtensionArray.)r	   ImportError)msgs    jC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\pandas/core/arrays/string_arrow.py_chk_pyarrow_availabler#   @   s    S#     c                R    t          xr t        j                  j                  |       S N)r   patypesis_string_view)typs    r"   _is_string_viewr+   F   s    ##D(?(?(DDr$   c                      e Zd ZU dZded<   dZej                  Zded<   d' fdZ	e
d(d) fd	       Ze
	 d*	 	 	 	 	 d+ fd       Zd,dZe
dd
dd-d       Ze
	 d*	 	 	 d-d       Zed.d       Zd/ fdZej(                  dfdZ fdZd0dZd1d2 fdZed        Zej6                  Zej8                  Zej:                  Zej<                  Zej>                  Zej@                  Z ejB                  Z!ejD                  Z"ejF                  Z#e$jJ                  Z%ejL                  Z&ejN                  Z'ejP                  Z(ejR                  Z)ejT                  Z*ejV                  Z+ejX                  Z,ejZ                  Z-ej\                  Z.ej^                  Z/ej`                  Z0ejb                  Z1ejd                  Z2ejf                  Z3ejh                  Z4ejj                  Z5ejl                  Z6ejn                  Z7ddej(                  df	 	 	 	 	 d3 fdZ8	 	 	 	 d4	 	 	 	 	 	 	 	 	 	 	 d5 fdZ9d6 fdZ:d7 fdZ;d8d9 fdZ<d:d; fdZ=d<d=dZ>d  Z?d! Z@dd
d"	 	 	 	 	 d>d#ZAd1d? fd$ZB fd%ZCd@d&ZD xZES )AArrowStringArraya  
    Extension array for string data in a ``pyarrow.ChunkedArray``.

    .. warning::

       ArrowStringArray is considered experimental. The implementation and
       parts of the API may change without warning.

    Parameters
    ----------
    values : pyarrow.Array or pyarrow.ChunkedArray
        The array of data.

    Attributes
    ----------
    None

    Methods
    -------
    None

    See Also
    --------
    :func:`pandas.array`
        The recommended function for creating a ArrowStringArray.
    Series.str
        The string methods are available on Series backed by
        a ArrowStringArray.

    Notes
    -----
    ArrowStringArray returns a BooleanArray for comparison methods.

    Examples
    --------
    >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string[pyarrow]")
    <ArrowStringArray>
    ['This is', 'some text', <NA>, 'data.']
    Length: 4, dtype: string
    r   _dtypepyarrowzlibmissing.NAType | float	_na_valuec                ~   t                t        |t        j                  t        j                  f      rt        j
                  j                  |j                        st        |j                        st        j
                  j                  |j                        rt        j
                  j                  |j                  j                        sRt        j
                  j                  |j                  j                        st        |j                  j                        r(t        j                  |t        j                               }t        | A  |       t#        | j$                  | j&                        | _        t        j
                  j                  | j*                  j                        st-        d      y )N)storagena_valuezHArrowStringArray requires a PyArrow (chunked) array of large_string type)r#   
isinstancer'   ArrayChunkedArrayr(   	is_stringtyper+   is_dictionary
value_typeis_large_stringpccastlarge_stringsuper__init__r   _storager0   r.   	_pa_array
ValueError)selfvalues	__class__s     r"   r@   zArrowStringArray.__init__   s    frxx9:HHv{{+v{{+&&v{{3HH&&v{{'='=>xx//0F0FG&v{{'='=> WWVR__%67F !$--$..Qxx''(;(;<$  =r$   Nc                    t         |   ||      }t        j                  j	                  |j
                        r*|(t        j                  |t        j                               }|S r&   )	r?   _box_pa_scalarr'   r(   r7   r8   r<   r=   r>   )clsvaluepa_type	pa_scalarrF   s       r"   rH   zArrowStringArray._box_pa_scalar   sK    G*5':	88inn-'/	2??+<=Ir$   Fc                    t         |   ||      }t        j                  j	                  |j
                        r*|(t        j                  |t        j                               }|S r&   )	r?   _box_pa_arrayr'   r(   r7   r8   r<   r=   r>   )rI   rJ   rK   copypa_arrayrF   s        r"   rN   zArrowStringArray._box_pa_array   sL     7(888hmm,wwx):;Hr$   c                ,    t        | j                        S )z]
        Length of this array.

        Returns
        -------
        length : int
        )lenrB   rD   s    r"   __len__zArrowStringArray.__len__   s     4>>""r$   dtyperO   c                  ddl m} t                |rAt        |t              r|dk(  s,t        |      }t        |t              r|j                  dk(  sJ t        ||      r`|j                  }|j                  }t        j                  ||d      } | t        j                  ||t        j                                     S t        |t        j                  t        j                   f      r. | t#        j$                  |t        j                                     S t        j                  ||      } | t        j                  |t        j                         d	
            S )Nr   )BaseMaskedArraystringr/   F)rO   convert_na_value)maskr8   rO   T)r8   from_pandas)pandas.core.arrays.maskedrX   r#   r4   strr   r   r2   _mask_datar   ensure_string_arrayr'   arrayr>   r5   r6   r<   r=   )rI   scalarsrV   rO   rX   	na_valuesresults          r"   _from_sequencezArrowStringArray._from_sequence   s    = *UC0Uh5F 'Ee[1emmy6PPPg/  I]]F,,V$QVWFrxxYR__=NOPP"((BOO!<=rwww(9:;; ((t<288F):MNNr$   c                *    | j                  |||      S )NrU   )rg   )rI   stringsrV   rO   s       r"   _from_sequence_of_stringsz*ArrowStringArray._from_sequence_of_strings   s     !!'T!BBr$   c                    | j                   S )z3
        An instance of 'string[pyarrow]'.
        )r.   rS   s    r"   rV   zArrowStringArray.dtype   s    
 {{r$   c                B   | j                   j                  t        j                  u r"|t        j                  u rt        j
                  }t        |t              s7|t        j
                  ur%t        d| dt        |      j                   d      t        | 1  ||      S )NInvalid value 'C' for dtype 'str'. Value should be a string or missing value, got '
' instead.)rV   r3   npnan
libmissingNAr4   r_   	TypeErrorr8   __name__r?   insert)rD   locitemrF   s      r"   rv   zArrowStringArray.insert   s    ::"&&(TRVV^==D$$Z]])B!$ (115d1D1D0EZQ  w~c4((r$   c                Z   |t         j                  urNt        |      sCt        |t              s3t        j                  d| dt        t                      t	        |      }| j                  j                  t        j                  u r_|t         j                  u st        |      r!|j                  d      }|j                         S |j                  |      }|j                         S |t         j                  urt        |      s|j                  |      }t               j!                  |      S )Nz$Allowing a non-bool 'na' in obj.str.z2 is deprecated and will raise in a future version.
stacklevelF)r   
no_defaultr   r4   boolwarningswarnFutureWarningr   rV   r3   rp   rq   	fill_nullto_numpyr   __from_arrow__)rD   rE   namethod_names       r"   _convert_bool_resultz%ArrowStringArray._convert_bool_result   s    S^^#DHZD=QMM6{m D6 6+-	 bB::"&&(S^^#tBx))%0 ??$$  ))"-??$$'1  ))"-~,,V44r$   c                \   t        |      rCt        |      rd}nt        |t              stt	        d| dt        |      j                   d      t        j                  |t        d      }d|t        |      <   |D ]   }|t        |t              rt	        d       t        | -  |      S )z-Maybe convert value to be pyarrow compatible.Nrm   rn   ro   TrU   z]Invalid value for dtype 'str'. Value should be a string or missing value (or array of those).)r   r   r4   r_   rt   r8   ru   rp   rc   objectr?   _maybe_convert_setitem_value)rD   rJ   vrF   s      r"   r   z-ArrowStringArray._maybe_convert_setitem_value   s    UE{s+%eW -559%[5I5I4J*V 
 HHU&t<E!%E$u+	Z3%7#G   w3E::r$   c                `   |D cg c]  }t        j                  |d       c}D cg c]Y  }|j                  t        j                         t        j                         t        j
                         fv r|j                         [ }}t        |      s$t        j                  t        |       t              S t        j                  | j                  t        j                  || j                  j                              }t        j                  |t        j                        S c c}w c c}w )NT)r]   )rV   )r8   )	value_set)r'   scalarr8   rY   nullr>   as_pyrR   rp   zerosr}   r<   is_inrB   rc   bool_)rD   rE   rJ   rL   r   rf   s         r"   isinzArrowStringArray.isin  s     OUUfUbii4@fU
U	~~"))+rwwy"//:K!LL OOU 	 
 9~88CIT22NNbhhyt~~?R?R&S

 xxbhh// V
s   D&AD+Tc                   t        |      }|| j                  k(  r|r| j                         S | S t        |t              rI| j
                  j                  t        j                  |j                              }|j                  |      S t        |t        j                        rEt        j                  |t        j                        r!| j                  |t        j                        S t         | E  ||      S )N)rV   r3   r\   )r   rV   rO   r4   r   rB   r=   r'   from_numpy_dtypenumpy_dtyper   rp   
issubdtypefloatingr   rq   r?   astype)rD   rV   rO   datarF   s       r"   r   zArrowStringArray.astype!  s    U#DJJyy{"K|,>>&&r':':5;L;L'MND''--rxx(R]]5"++-N==urvv=>>w~e$~//r$   c                    t        j                  t        |       j                   dt        t                      | j                  S )NzV._data is a deprecated and will be removed in a future version, use ._pa_array insteadrz   )r~   r   r8   ru   r   r   rB   rS   s    r"   ra   zArrowStringArray._data0  s?     	Dz""# $: :')		
 ~~r$   r   c                b    |rt         |   |||||      S t        j                  | |||||      S r&   )r?   _str_containsr   )rD   patcaseflagsr   regexrF   s         r"   r   zArrowStringArray._str_contains\  s;     7(dE2uEE$224dE2uUUr$   c           	         t        |t        j                        st        |      s|r|rt        |   ||||||      S t        j
                  | ||||||      S r&   )r4   rePatterncallabler?   _str_replacer   )rD   r   replnr   r   r   rF   s          r"   r   zArrowStringArray._str_replacei  sY     c2::&(4.7'T1dE5II$11#tQeU
 	
r$   c                p    t        |t              st        |   |      S t	        j                  | |      S )N)repeats)r4   intr?   _str_repeatr   )rD   r   rF   s     r"   r   zArrowStringArray._str_repeaty  s0    '3'7&w//&224IIr$   c                Z    t         st        j                  | |      S t        | 	  |      S r&   )r
   r   _str_removeprefixr?   )rD   prefixrF   s     r"   r   z"ArrowStringArray._str_removeprefix  s)    #(::4HHw(00r$   c                    |rt         |   ||      S t        j                  | j                  |      }| j                  |      S r&   )r?   
_str_countr<   count_substring_regexrB   _convert_int_result)rD   r   r   rf   rF   s       r"   r   zArrowStringArray._str_count  s?    7%c511))$..#>''//r$   c                ~    t         r|dk7  r||dk(  r|t        | 	  |||      S t        j                  | |||      S )Nr   )r
   r?   	_str_findr   )rD   substartendrF   s       r"   r   zArrowStringArray._str_find  sI     aZCOaZCK 7$S%55$..tS%EEr$   c                F   t        | j                        j                  |      \  }}t        |      dk(  r't	        j
                  dt        j                        |fS t	        j                  |j                               }|j                  t        j                  d      |fS )Nr   )r   r   )shaperV   Fr\   )
r   rB   _str_get_dummiesrR   rp   emptyint64vstackr   r   )rD   sep
dummies_palabelsdummiess        r"   r   z!ArrowStringArray._str_get_dummies  sz    0@QQRUV
Fv;!88&96AA))J//12~~bhhU~3V;;r$   c                v   | j                   j                  t        j                  u r{t	        |t
        j                        r|j                  d      }n|j                         }|j                   t        j                  k(  r|j                  t        j                        }|S t               j                  |      S )NFzero_copy_only)rV   r3   rp   rq   r4   r'   r5   r   int32r   r   r   r   rD   rf   s     r"   r   z$ArrowStringArray._convert_int_result  s|    ::"&&(&"((+>*||rxx'rxx0M|**622r$   c                    | j                   j                  t        j                  u rPt	        |t
        j                        r|j                  d      }n|j                         }|j                  dd      S t               j                  |      S )NFr   float64r\   )rV   r3   rp   rq   r4   r'   r5   r   r   r   r   r   s     r"   _convert_rank_resultz%ArrowStringArray._convert_rank_result  sh    ::"&&(&"((+>*===77~,,V44r$   skipnakeepdimsc                  | j                   j                  t        j                  u r|dv r|sTt	        j
                  | j                        }t	        j                  |t	        j                  | j                  d            }n t	        j                  | j                  d      } t        |      j                  |f||d|}|r|j                  t        j                        S |S |dv r | j                  |f||d|}nt        d| d      |dv r+t        |t         j"                        r| j%                  |      S t        |t         j"                        r t'        |       |      S |S )N)anyall r   )minmaxsumargminargmaxzCannot perform reduction 'z' with string dtype)r   r   )rV   r3   rp   rq   r<   is_nullrB   	or_kleene	not_equalr   _reducer   r   _reduce_calcrt   r4   r'   r5   r   r8   )rD   namer   r   kwargsnasarrrf   s           r"   r   zArrowStringArray._reduce  s<    ::"&&(T^-Cjj0ll3T^^R(HIll4>>265(-55#h:@F }}RXX..M<<&T&&tXFXXQWXF8>QRSS''Jvrxx,H++F33)4:f%%Mr$   c                    t         |   |      }| j                  j                  t        j
                  u rC|j                  j                         }|j                  ||j                  |j                  d      S |S )N)dropnaF)indexr   rO   )r?   value_countsrV   r3   rp   rq   _valuesr   _constructorr   r   )rD   r   rf   
res_valuesrF   s       r"   r   zArrowStringArray.value_counts  sm    %V%4::"&&(002J&&&,,V[[u '   r$   c                   t         |   ||      }| j                  j                  t        j
                  u rU|t        j                  k(  r!|j                  t        j                  d      S |j                  t        j                  d      S |S )NT)r3   F)
r?   _cmp_methodrV   r3   rp   rq   operatorner   r   )rD   otheroprf   rF   s       r"   r   zArrowStringArray._cmp_method  si    $UB/::"&&(X[[ rxx$??rxx%@@r$   c                4    t        d| j                   d      )Nzbad operand type for unary +: '')rt   rV   rS   s    r"   __pos__zArrowStringArray.__pos__  s    9$**QGHHr$   returnNoner&   )rK   pa.DataType | Noner   z	pa.Scalar)NF)rK   r   rO   r}   r   zpa.Array | pa.ChunkedArray)r   r   )rV   zDtype | NonerO   r}   )r   r   )rw   r   r   r-   )rE   r   r   znpt.NDArray[np.bool_])T)rO   r}   )r   r}   r   r   r   r}   )Tr   T)r   zstr | re.Patternr   zstr | Callabler   r   r   r}   r   r   r   r}   )r   zint | Sequence[int])r   r_   )r   )r   r_   r   r   )r   N)r   r_   r   r   r   z
int | None)|)r   r_   )r   r_   r   r}   r   r}   )r   r}   r   r   )r   r   )Fru   
__module____qualname____doc____annotations__rA   rr   rs   r0   r@   classmethodrH   rN   rT   rg   rj   propertyrV   rv   r   r|   r   r   r   r   ra   r   _str_isalnum_str_isalpha_str_isdecimal_str_isdigit_str_islower_str_isnumeric_str_isspace_str_istitle_str_isupperr   _str_map_str_startswith_str_endswith_str_pad
_str_match_str_fullmatch
_str_lower
_str_upper
_str_strip_str_lstrip_str_rstrip_str_removesuffix_str_get_str_capitalize
_str_title_str_swapcase_str_slice_replace_str_len
_str_slicer   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__)rF   s   @r"   r-   r-   O   s   'V H+5==I(82   EJ/>B	# # >BQV O O. ?DC)C8<C C
  ) /2nn$ 52;*0$0   )55L(55L*99N(55L(55L*99N(55L(55L(55L''H+;;O)77M$--H&11J*99N&11J&11J&11J'33K'33K-??$--H+;;O&11J)77M.AA$--H&11J
 >>V V 	V V" 

 
 	

 
 
 
 J1
0F<
35 ,0%$(;?:Ir$   r-   c                  $    e Zd Zej                  Zy)ArrowStringArrayNumpySemanticsN)ru   r   r   rp   rq   r0    r$   r"   r  r    s    Ir$   r  r   )@
__future__r   r   r   typingr   r   r   r~   numpyrp   pandas._libsr   r   rr   pandas.compatr	   r
   r   pandas.util._exceptionsr   pandas.core.dtypes.commonr   r   pandas.core.dtypes.missingr   'pandas.core.arrays._arrow_string_mixinsr   pandas.core.arrays.arrowr   pandas.core.arrays.booleanr   pandas.core.arrays.floatingr   pandas.core.arrays.integerr   pandas.core.arrays.numericr   pandas.core.arrays.string_r   r    pandas.core.strings.object_arrayr   r/   r'   pyarrow.computecomputer<   collections.abcr   pandas._typingr   r   r   r   pandasr   r_   NATypeArrowStringScalarOrNATr#   r+   r-   r  r  r$   r"   <module>r3     s    "  	 
   
 5 , I 8 3 4 1 3 D  (   sJ$5$556 ERI-/BO RIj%5 r$   