
    ti0                        d dl mZ d dlmZmZmZ d dlmZmZm	Z	 d dl
mZ d dlmZmZ d dlmZ g dZ e	dd	
      ZdedefdZ ed       G d dee                Z ed       G d de             Z ed       G d dee                Zy)    )defaultdict)CallableIteratorSized)AnyNoReturnTypeVar)functional_datapipe)	DataChunkIterDataPipe)_check_unpickable_fn)BatcherIterDataPipeGrouperIterDataPipeUnBatcherIterDataPipe_T_coT)	covariantnamereturnc                 ,    t        dt         d|        )Nzmodule z has no attribute )AttributeError__name__)r   s    n/home/ubuntu/crypto_trading_bot/.venv/lib/python3.12/site-packages/torch/utils/data/datapipes/iter/grouping.py__getattr__r      s    
78*,>tfE
FF    batchc                        e Zd ZU dZeed<   eed<   eed<   defdededede	e   ddf
 fd	Z
dee   fd
ZdefdZ xZS )r   a2  
    Creates mini-batches of data (functional name: ``batch``).

    An outer dimension will be added as ``batch_size`` if ``drop_last`` is set to ``True``, or ``length % batch_size`` for the
    last batch if ``drop_last`` is set to ``False``.

    Args:
        datapipe: Iterable DataPipe being batched
        batch_size: The size of each batch
        drop_last: Option to drop the last batch if it's not full
        wrapper_class: wrapper to apply onto each batch (type ``List``) before yielding,
            defaults to ``DataChunk``

    Example:
        >>> # xdoctest: +SKIP
        >>> from torchdata.datapipes.iter import IterableWrapper
        >>> dp = IterableWrapper(range(10))
        >>> dp = dp.batch(batch_size=3, drop_last=True)
        >>> list(dp)
        [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
    datapipe
batch_size	drop_lastFwrapper_classr   Nc                 z    |dk  rt        d      t        | 	          || _        || _        || _        || _        y )Nr   z+Batch size is required to be larger than 0!)AssertionErrorsuper__init__r   r   r   r    )selfr   r   r   r    	__class__s        r   r$   zBatcherIterDataPipe.__init__5   s@     ? !NOO $"*r   c              #     K   g }| j                   D ]A  }|j                  |       t        |      | j                  k(  s-| j	                  |       g }C t        |      dkD  r!| j
                  s| j	                  |       y y y wNr   )r   appendlenr   r    r   )r%   r   xs      r   __iter__zBatcherIterDataPipe.__iter__D   s~      	ALLO5zT__,((//		
 u:>>>((// " s   ;BABc                 8   t        | j                  t              r`| j                  r"t	        | j                        | j
                  z  S t	        | j                        | j
                  z   dz
  | j
                  z  S t        t        |       j                   d      )N   z# instance doesn't have valid length)	
isinstancer   r   r   r*   r   	TypeErrortyper   r%   s    r   __len__zBatcherIterDataPipe.__len__O   ss    dmmU+~~4==)T__<<DMM*T__<q@T__TTtDz2233VWXXr   )r   
__module____qualname____doc__r   __annotations__intboolr   r1   r$   r   r,   r3   __classcell__)r&   s   @r   r   r      sz    , OO  )2++ + 	+
 I+ 
+	0(9- 	0Y Yr   r   unbatchc                   2    e Zd ZdZd	dededdfdZd Zd Zy)
r   a   
    Undos batching of data (functional name: ``unbatch``).

    In other words, it flattens the data up to the specified level within a batched DataPipe.

    Args:
        datapipe: Iterable DataPipe being un-batched
        unbatch_level: Defaults to ``1`` (only flattening the top level). If set to ``2``,
            it will flatten the top two levels, and ``-1`` will flatten the entire DataPipe.

    Example:
        >>> # xdoctest: +SKIP
        >>> from torchdata.datapipes.iter import IterableWrapper
        >>> source_dp = IterableWrapper([[[0, 1], [2]], [[3, 4], [5]], [[6]]])
        >>> dp1 = source_dp.unbatch()
        >>> list(dp1)
        [[0, 1], [2], [3, 4], [5], [6]]
        >>> dp2 = source_dp.unbatch(unbatch_level=2)
        >>> list(dp2)
        [0, 1, 2, 3, 4, 5, 6]
    r   unbatch_levelr   Nc                      || _         || _        y N)r   r=   )r%   r   r=   s      r   r$   zUnBatcherIterDataPipe.__init__q   s     *r   c              #   |   K   | j                   D ]'  }| j                  || j                        E d {    ) y 7 w)Nr=   )r   _diver=   )r%   elements     r   r,   zUnBatcherIterDataPipe.__iter__u   s:     }} 	MGzz'9K9KzLLL	MLs   0<:<c              #   v  K   |dk  rt        d      |dk(  r>t        |t        t        f      r#|D ]  }| j	                  |d      E d {     y | y |dk(  r| y t        |t        t        f      r&|D ]   }| j	                  ||dz
        E d {    " y t        d| j                   d      7 k7 "w)Nz unbatch_level must be -1 or >= 0rA   r   r.   zunbatch_level z" exceeds the depth of the DataPipe)
ValueErrorr/   listr   rB   
IndexErrorr=   )r%   rC   r=   items       r   rB   zUnBatcherIterDataPipe._divey   s     2?@@B'D)#45# BD#zz$bzAAAB aM'D)#45# QD#zz$ma>OzPPPQ !$T%7%7$88Z[  B Qs%   AB9	B5
A
B9B7!B97B9)r.   )	r   r4   r5   r6   r   r8   r$   r,   rB    r   r   r   r   Y   s,    ,+ +c +$ +Mr   r   groupbyc                       e Zd ZdZdddddddee   deegef   ded	e	d
e	dz  de	dz  deddfdZ
d Zd ZddZd Zd ZddZy)r   a
  
    Groups data from IterDataPipe by keys from ``group_key_fn``, yielding a ``DataChunk`` with batch size up to ``group_size``.

    (functional name: ``groupby``).

    The samples are read sequentially from the source ``datapipe``, and a batch of samples belonging to the same group
    will be yielded as soon as the size of the batch reaches ``group_size``. When the buffer is full,
    the DataPipe will yield the largest batch with the same key, provided that its size is larger
    than ``guaranteed_group_size``. If its size is smaller, it will be dropped if ``drop_remaining=True``.

    After iterating through the entirety of source ``datapipe``, everything not dropped due to the buffer capacity
    will be yielded from the buffer, even if the group sizes are smaller than ``guaranteed_group_size``.

    Args:
        datapipe: Iterable datapipe to be grouped
        group_key_fn: Function used to generate group key from the data of the source datapipe
        keep_key: Option to yield the matching key along with the items in a tuple,
            resulting in `(key, [items])` otherwise returning [items]
        buffer_size: The size of buffer for ungrouped data
        group_size: The max size of each group, a batch is yielded as soon as it reaches this size
        guaranteed_group_size: The guaranteed minimum group size to be yielded in case the buffer is full
        drop_remaining: Specifies if the group smaller than ``guaranteed_group_size`` will be dropped from buffer
            when the buffer is full

    Example:
        >>> import os
        >>> # xdoctest: +SKIP
        >>> from torchdata.datapipes.iter import IterableWrapper
        >>> def group_fn(file):
        ...     return os.path.basename(file).split(".")[0]
        >>> source_dp = IterableWrapper(
        ...     ["a.png", "b.png", "a.json", "b.json", "a.jpg", "c.json"]
        ... )
        >>> dp0 = source_dp.groupby(group_key_fn=group_fn)
        >>> list(dp0)
        [['a.png', 'a.json', 'a.jpg'], ['b.png', 'b.json'], ['c.json']]
        >>> # A group is yielded as soon as its size equals to `group_size`
        >>> dp1 = source_dp.groupby(group_key_fn=group_fn, group_size=2)
        >>> list(dp1)
        [['a.png', 'a.json'], ['b.png', 'b.json'], ['a.jpg'], ['c.json']]
        >>> # Scenario where `buffer` is full, and group 'a' needs to be yielded since its size > `guaranteed_group_size`
        >>> dp2 = source_dp.groupby(
        ...     group_key_fn=group_fn,
        ...     buffer_size=3,
        ...     group_size=3,
        ...     guaranteed_group_size=2,
        ... )
        >>> list(dp2)
        [['a.png', 'a.json'], ['b.png', 'b.json'], ['a.jpg'], ['c.json']]
    Fi'  N)keep_keybuffer_size
group_sizeguaranteed_group_sizedrop_remainingr   group_key_fnrM   rN   rO   rP   rQ   r   c                |   t        |       || _        || _        || _        || _        t        t              | _        d| _        || _	        d | _
        |+|)d|cxk  r|k  st        d       t        d      || _
        |+|d|cxk  r|k  st        d       t        d      || _
        || _        t        | _        y )Nr   z)group_size must be > 0 and <= buffer_sizezNguaranteed_group_size must be > 0 and <= group_size and group_size must be set)r   r   rR   rM   max_buffer_sizer   rG   buffer_elementscurr_buffer_sizerO   rP   r"   rQ   r   r    )r%   r   rR   rM   rN   rO   rP   rQ   s           r   r$   zGrouperIterDataPipe.__init__   s     	\* ( *7B47H !$%)"!k&=
1k1$%PQQ 2$%PQQ)3D& ,!!.C*Qz*Q$d  +R$d  *?D&,&r   c                    d }d}d }| j                   D ]8  }t        | j                   |         |kD  st        | j                   |         }|}: | j                  =|| j                  k  r.| j                  s"t	        dt        | j                   |               | j                  || j                  k\  r| j                   |   }| xj                  |z  c_        | j                   |= |S )Nr   zFailed to group items)rU   r*   rP   rQ   RuntimeErrorstrrV   )r%   biggest_keybiggest_sizeresult_to_yieldfindkeys        r   _remove_biggest_keyz'GrouperIterDataPipe._remove_biggest_key   s    ++ 	&G4''01L@"4#7#7#@A%	& &&2t999'''T-A-A+-N)O 
 &&.t999"22;?O-  -r   c              #     K   | j                   D ]5  }| j                  |      }| j                  |   j                  |       | xj                  dz  c_        | j
                  | j
                  t        | j                  |         k(  rj| j                  | j                  |         }| j                  r||fn| | xj                  t        | j                  |         z  c_        | j                  |= | j                  | j                  k(  s| j                         }|| j                  |      }| j                  r||fn| 8 t        | j                  j                               D ]^  }| j                  | j                  j                  |            }| xj                  t        |      z  c_        | j                  r||fn| ` y w)Nr.   )r   rR   rU   r)   rV   rO   r*   r    rM   rT   r^   tuplekeyspop)r%   r+   keyresultr\   s        r   r,   zGrouperIterDataPipe.__iter__  s     	EA##A&C  %,,Q/!!Q&!*t#$$S)C 0 *.););D<P<PQT<U)V'+}}sFm&@%%T-A-A#-F)GG%((-$$(<(<<"&":":"<".!//@F+/==3-fD%	E( --2245 	=C''(<(<(@(@(EFF!!S[0!#'==3-f<	=s   D	GG B-Gc                 :    d| _         t        t              | _        y r(   )rV   r   rG   rU   r2   s    r   resetzGrouperIterDataPipe.reset   s     !*40r   c           
      0   | j                   | j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  f
}t        j                  t        j                  |      S |S r?   )r   rR   rM   rT   rO   rP   rQ   r    _valid_iterator_id_number_of_samples_yieldedr   getstate_hookr%   states     r   __getstate__z GrouperIterDataPipe.__getstate__$  s    MMMM  OO&&##++
 %%1--e44r   c                     |\
  | _         | _        | _        | _        | _        | _        | _        | _        | _        | _	        d| _
        t        t              | _        y r(   )r   rR   rM   rT   rO   rP   rQ   r    rh   ri   rV   r   rG   rU   rk   s     r   __setstate__z GrouperIterDataPipe.__setstate__5  sZ     	
MM O&#+ !*40r   c                 8    | j                   j                          y r?   )rU   clearr2   s    r   __del__zGrouperIterDataPipe.__del__E  s    ""$r   )r   N)r   r4   r5   r6   r   r   r   r   r9   r8   r$   r^   r,   rf   rm   ro   rr   rJ   r   r   r   r      s    1p  !%,0$$'u%$' w|,$'
 $' $' $J$'  #Tz$' $' 
$'L:=41"1 %r   r   N)collectionsr   collections.abcr   r   r   typingr   r   r	   %torch.utils.data.datapipes._decoratorr
   #torch.utils.data.datapipes.datapiper   r   'torch.utils.data.datapipes.utils.commonr   __all__r   rY   r   r   r   r   rJ   r   r   <module>rz      s    # 5 5 ) ) E G H 	4(Gc Gh G W<Y,y1 <Y <Y~ Y1L 1  1h Yw%,y1 w%  w%r   