
    i*                        d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlZddlZddlm
Z ddlmZ dd	l m!Z! d
dl"m#Z# d
dl$m%Z%m&Z&m'Z' d
dl(m)Z)m*Z* d
dl+m,Z,m-Z- d
dl.m/Z0 erddl1m2Z3 neZ3 ejh                  d      Z5e5jm                  dd      Z7de8de8deeeejr                  ejr                  f   ee:e:f   f   ddf   fdZ;defdZ<defdZ=deddfdZ>e7j~                  deejr                  ejr                  f   fd       Z@e7j~                  deejr                  ejr                  f   fd       ZAe7j~                  deejr                  ejr                  f   fd       ZBe7j~                  deejr                  ejr                  f   fd       ZCe7j~                  dee3ejr                  f   fd        ZDe7j~                  d!eEdee!j                  ejr                  ejr                  e!j                  ejr                  ejr                  e!j                  ejr                  ejr                  f	   fd"       ZG	 dWd#d$d%d&e8de8d'e8d(eHd)eHd*e8deeejr                     eejr                     eejr                     f   fd+ZIee!j                  ej                  ej                     ej                  ej                     f   ZLe	 G d, d-             ZM G d. d/e      ZN G d0 d1      ZOd2ej                  ej                     deej                  ej                  ej                  f   fd3ZP	 dXd4e!j                  d5ej                  ej                     d6ej                  ej                     d7eQdej                  ej                     f
d8ZSd9ee!j                  ej                  ej                     ej                  ej                     f   d:ej                  ej                     deMfd;ZTd<eNdeeMeeM   f   fd=ZUd4e!j                  d5ej                  ej                     d6ej                  ej                     d>ej                  ej                     d?ej                  ej                     dee!j                  ej                  ej                     ej                  ej                     ej                  ej                     f   fd@ZWdAedBee&   dCeEddfdDZXe7j~                  de8de8dEeQdFeHdeee!j                     ejr                  f   f
dG       ZYdHe8dIe8deeE   fdJZZdKdLd#d$ej                  dMdNde8de8dOe8dPeHdEeQdQeQdReHd*e8dSej                  j                  dCeEdee,ejr                  f   fdTZ\ G dU dVe%      Z]y)YzUtilities for data generation.    N)ThreadPoolExecutor)	dataclass)TYPE_CHECKINGAnyCallableDict	GeneratorList
NamedTupleOptionalSequenceSetTupleTypeUnion)request)typing)r	   )sparse   )concat)DataIterDMatrixQuantileDMatrix)is_pd_cat_dtypepandas_pyarrow_mapper)	ArrayLike	XGBRanker)train)	DataFramejoblibz
./cachedir)verbose	n_samples
n_featuresreturnc              #     K   t        j                  d      }t        j                  j	                  d      }|j                  dd| |z        j                  | |      }t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                   t        j"                  t        j$                  t        j&                  t        j(                  t        j*                  t        j,                  t        j.                  t        j0                  t        j2                  t        j4                  g}|D ]A  }t        j6                  ||      }||f |j9                         |j9                         f C |D ]A  }t        j6                  ||      }|j;                  |      }|j;                  |      }	||	f C |j=                  dd| |z  	      j                  | |      }t        j>                  t@        fD ]  }
t        j6                  ||
      }||f ! t        j>                  t@        fD ]A  }t        j6                  ||      }|j;                  |      }|j;                  |      }	||	f C y
w)z*Enumerate all supported dtypes from numpy.pandas  r      lowhighsizedtype   g      ?r,   N)!pytestimportorskipnprandomRandomStaterandintreshapeint32int64byteshortintcint_longlonguint32uint64ubyteushortuintcuint	ulonglongfloat16float32float64halfsingledoublearraytolistr   binomialbool_bool)r"   r#   pdrngorigdtypesr.   Xdf_origdfdtype1dtype2s               Z/home/ubuntu/crypto_trading_bot/.venv/lib/python3.12/site-packages/xgboost/testing/data.py	np_dtypesr[   1   s     
		X	&B
))


%C;;13Y-C;DLL:D 	






		
		

		













		
		)F,  (HHT'AgkkmQXXZ''(
  HHT',,t$\\!_rk	 <<3Y%;<<DD:D 88T" HHT(Ag 88T" HHT(,,t$\\!_rk	s   KKc            	   #   8  K   t        j                  d      } | j                         | j                         | j	                         | j                         | j                         | j                         | j                         | j                         g}t        j                  }| j                  dd|dgdd|dgdt        j                        }t        j                  d| j                  fD ]-  }|D ]&  }| j                  dd|dgdd|dgd|      }||f ( / t        j                  }| j                         | j!                         g}| j                  d	d
|dgdd
|d	gdt        j                        }t        j                  d| j                  fD ]m  }|D ]f  }| j                  d	d
|dgdd
|d	gd|      }||f |d   }|d   }t#        || j$                        sJ t#        || j$                        sJ ||f h o |j'                  d      }|j(                  D ]'  }||   j*                  j-                  t.              ||<   ) t        j                  d| j                  fD ]4  }| j                  dd|dgdd|dgd| j1                               }||f 6 d| j                  fD ]i  }dd|dgdd|dgd}	| j                  |	|t        j2                  n| j5                               }| j                  |	| j5                               }||f k yw)z/Enumerate all supported pandas extension types.r&   r/   r         f0f1r-   N      ?g       @g      @r`   categoryTF)r1   r2   
UInt8DtypeUInt16DtypeUInt32DtypeUInt64Dtype	Int8Dtype
Int16Dtype
Int32Dtype
Int64Dtyper3   nanr   rG   NAFloat32DtypeFloat64Dtype
isinstanceSeriesastypecolumnscatrename_categoriesintCategoricalDtyperO   BooleanDtype)
rQ   rT   NullrS   r.   rW   ser_origsercdatas
             rZ   	pd_dtypesr~   l   s*    			X	&B 	






	F %'FFD<<1dAq!T1o6bjj  D ruu%  	E1dAq!T1o>e  B (N		 66Doo!23F<<S$$S#tS,AB"**  D ruu% 
  		 ES$,S#tS4IJRW  B (NDzHT(Cc299---h		222C-		 
  ;;z"D\\ 5q'++//4Q5ruu% \\q$?1aq/:%%'  
 Bh ruu UD$/tT47PQ||DDLbooFW|X\\$boo&7\8Bhs   LLc            	   #     K   t        j                  d      } t        j                  d      }t        }d| j                  dfD ]  }|D ]  }|j	                  d      s|j	                  d      r&| j                  |      s|dk(  r|nt        j                  }| j                  dd|d	gd
d	|dgdt        j                        }| j                  dd|d	gd
d	|dgd|      }||f   | j                  dfD ]o  }| j                  dd|dgdd|dgd| j                               }| j                  dd|dgdd|dgd| j                  |j                                     }||f q yw)z*Pandas DataFrame with pyarrow backed type.r&   pyarrowNr   rF   rP   r/   r   r]   r^   r_   r-   FT)r1   r2   r   rm   
startswithisnar3   rl   r   rG   rx   
ArrowDtyperO   )rQ   parT   ry   r.   	orig_nullrS   rW   s           rZ   pd_arrow_dtypesr      s    			X	&B			Y	'B #F. ruua   	E	*e.>.>v.F$&GGDMdaiRVVI<<1i+Aq)Q3GHjj   D
 1dAq!T1o>e  B (N	"  	||%t,UD$4MN//#  
 \\%t,UD$4MN--
+  
 Bh	s   E(E*rR   c                    | j                  d      j                  dd      }| j                  d      }t        j                  |d<   t	        j
                  t        d      5  t        ||       ddd       t	        j
                  t        d      5  t        ||       ddd       y# 1 sw Y   :xY w# 1 sw Y   yxY w)	zValidate there's no inf in X.    r0      r^   )   r   zInput data contains `inf`matchN)	r4   r7   r3   infr1   raises
ValueErrorr   r   )rR   rU   ys      rZ   	check_infr      s    


##Aq)A


AffAdG	z)D	E 1 
z)D	E 1   s   #B)B5)B25B>c                     dt         j                  j                  d      t        j                  d      } dt
        t           dt
        t           dt
        t           dt         j                  ffd}d	t        d
t        dt         j                  ffd}| j                   |ddgddgddg       |ddgddgddg       |dd       |dd       |dd       |dd        |d!d"       |d#d$       |d%d&      d'	      }||j                  j                  d(g         j                         }|d(   j                         }||fS ))zSynthesize a dataset similar to the sklearn California housing dataset.

    The real one can be obtained via:

    .. code-block::

        import sklearn.datasets

        X, y = sklearn.datasets.fetch_california_housing(return_X_y=True)

    iP  i  r&   meanssigmasweightsr$   c                     j                  t        |d   z        | d   |d         }j                  |j                  d   z
  | d   |d         }t        j                  ||gd      S )Nr   )r,   locscaler/   axis)normalrv   shaper3   concatenate)r   r   r   l0l1r"   rR   s        rZ   mixture_2compz-get_california_housing.<locals>.mixture_2comp  sv     ZZi'!*,-E!HF1I  
 ZZi"((1+5E!HFSTIZV~~r2hQ//    meanstdc                 .    j                  | |f      S )Nr   r   r,   )r   )r   r   r"   rR   s     rZ   normz$get_california_housing.<locals>.norm  s    zzd#YLzAAr   g5ŀ]g~(Fv^gr-|E?g3mE^1?gDi-T?gÅv-W?gXcB@g&	@@g6?g](?g8W nx?gd?g|["@g2{e?)r   r   gVb<@g>+)@gZK@g@g)P=?g˧^T?g/E@g@gI@gtbO$@gg9h @gk}v?)		LongitudeLatitudeMedIncHouseAgeAveRooms	AveBedrms
PopulationAveOccupMedHouseValr   )r3   r4   default_rngr1   r2   r
   floatndarrayr   rs   
differenceto_numpy)rQ   r   r   rW   rU   r   r"   rR   s         @@rZ   get_california_housingr      sv    I
))


%C			X	&B0E{0$(K0:>u+0	0B5 Bu B B 
&}-#%78Z(
 &k*#%78Z(
  28JK"4:LM"39JK#4:MN$6<MN"4:LM%6<NO#	

B* 	2::  -12;;=A
=""$Aa4Kr   c                  |    t        j                  d      } | j                         }|j                  |j                  fS )z&Fetch the digits dataset from sklearn.sklearn.datasets)r1   r2   load_digitsr}   target)datasetsr}   s     rZ   
get_digitsr   ,  s6     ""#56H!D99dkk!!r   c                  P    t        j                  d      } | j                  d      S )z-Fetch the breast cancer dataset from sklearn.r   T)
return_X_y)r1   r2   load_breast_cancer)r   s    rZ   
get_cancerr   4  s)     ""#56H&&$&77r   c                     t        j                  d      } t        j                  j	                  d      }d}d}| j                  ||      \  }}|j                  d||j                        }t        |j                  d         D ]<  }t        |j                  d         D ]  }|||f   st        j                  |||f<   ! > ||fS )zGenerate a sparse dataset.r      i  g      ?)random_stater/   r   )
r1   r2   r3   r4   r5   make_regressionrN   r   rangerl   )	r   rR   nsparsityrU   r   flagijs	            rZ   
get_sparser   ;  s     ""#56H
))


$CAH##AC#8DAq<<8QWW-D1771: !qwwqz" 	!AAqDz&&!Q$	!! a4Kr   c                     t         rddlnt        j                  d      t        j
                  j                  d      dj                         } dt        t        t        t        f   t        f   dt        dj                  ffd	} |d
dddddd      | d<    |ddddd      | d<    |dddddd      | d<    |ddd d!d"d#d$d%d      | d&<    |d'd(d)d!d*d+      | d,<    |d-d(d.d/d0d"d1d2d3d      | d4<    |d5d6d7d8d9d:d;      | d<<    |d=d>d?d@d$dAd      | dB<    |dCdDdd"dEd      | dF<    |d@dGdGdHdI      | dJ<   dKt        dLt        dt        dj                  ffdM} |dNdOd      | dP<    |dQdRd      | dS<    |dTdUd      | dV<    |dWdXd      | dY<    |dZd[d      | d\<    |d]d^d      | d_<    |d`dad      | db<    |dcddd      | de<    |dfdgd      | dh<    |didjd      | dk<   t        | j                        }j                  |       | |   } t	        j                   fl      }| j                  D ]q  }t#        | |   j$                  j&                        r:|| |   j(                  j*                  j-                  t        j.                        z  }`|| |   j0                  z  }s |dm|j3                         z  z  }|dn|j5                         z
  z  }| |fS )oam  Get a synthetic version of the amse housing dataset.

    The real one can be obtained via:

    .. code-block::

        from sklearn import datasets

        datasets.fetch_openml(data_id=42165, as_frame=True, return_X_y=True)

    Number of samples: 1460
    Number of features: 20
    Number of categorical features: 10
    Number of numerical features: 10
    r   Nr&   r'   i  
name_probadensityr$   c           	         t        	d|z
  z        }t        j                  d|z
        dkD  xr |dkD  }|rd|z
  }|| t        j                  <   t	        | j                               }t	        | j                               }|dxx   dt        j                  |      z
  z  cc<   j                  |	|      }
j                  |
j                  t        d |                  }|S )	Nr/   rb   ư>r   )r,   pc                 "    t        | t              S N)rp   str)xs    rZ   <lambda>z5get_ames_housing.<locals>.synth_cat.<locals>.<lambda>x  s    As!3 r   r-   )rv   r3   absrl   listkeysvaluessumchoicerq   rw   filter)r   r   n_nullshas_nanr   r   r   r   seriesr"   rQ   rR   s            rZ   	synth_catz#get_ames_housing.<locals>.synth_catf  s     i1w;/0&&w'$.>7Q;W}H!)JrvvJOO%&""$%	"rvvay JJt)qJ1%%3T:  
 r   gqu ]?gqh.?gsmB<?g5C(?goEb?)1Fam2fmConDuplexTwnhsTwnhsErb   BldgTypegwD?g. ҥ?g)$;?)UnfRFnFing_9?GarageFinishgW歺?gbFx{?gbFx{?gQfL2rf?)CornerCulDSacFR2FR3	LotConfigg?g/ؗ?gf׽?g$A
?g5e?g() l?g[iF?)TypMin2Min1ModMaj1Maj2Sev
Functionalg M?g?gMq?)NoneBrkFaceStoneBrkCmng3f?
MasVnrTypeg3f?gI/j ?g,	PS˦?ge@?gQ~?gZ	%qv?)1Story2Storyz1.5FinSLvlSFoyerz1.5Unfz2.5Unfz2.5Fin
HouseStyleg$	P?gHp?gK$?gՐ?g4*p?)GdTAFaExPogE`o?FireplaceQugș&l??皙?g5e?gunڌ`?)r  r  r  r  r  	ExterCondgn0a?g{gUId?)r  r  r  r  	ExterQualg8 nV?)r  r  r  g(xߢs?PoolQCr   r   c                    j                  | |      }t        d|z
  z        }t        j                  d|z
        dkD  r,|dkD  r'j	                  |d      }t        j
                  ||<   j                  |t        j                        S )	Nr   r/   rb   r   r   Fr,   replacer-   )r   rv   r3   r   r   rl   rq   rH   )	r   r   r   r   r   null_idxr"   rQ   rR   s	         rZ   	synth_numz#get_ames_housing.<locals>.synth_num  s}    JJ3c	J:i1w;/066#- 4'GaKzz)'5zIH&&AhKyy"**y--r   gmtF@gOfK<Q=@	3SsnPorchgݹsΝ?g2Tf?
FireplacesgR u?gP$[r?BsmtHalfBathgvS?g_-?HalfBathgbĈ#F?g+?
GarageCarsg$[Q<@g"$#e?TotRmsAbvGrdg$[Q<{@g%Ǒ|@
BsmtFinSF1ge0OFG@g*Ӛ{7*d@
BsmtFinSF2gNڭ@gCk@	GrLivAreagg6.@gK@ScreenPorchr   g(e@g.A)r   r&   r1   r2   r3   r4   r   r   r   r   r   r   rq   r   rs   shufflezerosrp   r.   rw   rt   codesrr   rH   r   r   r   )	rW   r   r  rs   r   r|   r"   rQ   rR   s	         @@@rZ   get_ames_housingr   L  s   "   *
))


%CI	BsEz*E12=B	. 	
 		BzN #(;WB~  		
 	B{O !	
 	B| !		
 	B| !		
 	B| "	
 		B}  	
 		B{O  		
 	B{O 	

 	BxL.u .5 .5 .RYY .   24EsKB{O !24FLB|"#79LcRB~24FLBzN !35GMB|"#46H#NB~ !24EsKB| !24FLB| 13DcJB{O!"46H#NB}2::GKK	GB 		|$AZZ bekk2#6#67A''

33AAA	 	QUUW	$$A	affh	&&Aq5Lr   dpathc           	      b   t        j                  d      }d}t        j                  j	                  | d      }t        j                  j                  |      st        j                  ||       t        j                  |d      5 }|j                  |        ddd       |j                  t        j                  j	                  | d      t        j                  j	                  | d	      t        j                  j	                  | d
      fdd      \	  }}}}}	}
}}}|||||	|
|||f	S # 1 sw Y   xY w)zFetch the mq2008 dataset.r   z>https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zipz
MQ2008.zip)urlfilenamer)pathNzMQ2008/Fold1/train.txtzMQ2008/Fold1/test.txtzMQ2008/Fold1/vali.txtTF)query_id
zero_based)r1   r2   osr&  joinexistsr   urlretrievezipfileZipFile
extractallload_svmlight_files)r!  r   srcr   fx_trainy_train	qid_trainx_testy_testqid_testx_validy_valid	qid_valids                 rZ   
get_mq2008r<    s#    ""#56H
JCWW\\%.F77>>&!f5		% !	% ! 	$$GGLL 89GGLL 78GGLL 78	

  	% 	
 	
 
/! !s   D%%D.Fr'   )	vary_sizer   n_samples_per_batch	n_batchesuse_cupyr=  r   c                   g }g }g }|r3ddl }	|	j                  j                  t        j                  |            }
nt        j                  j                  |      }
t        |      D ]x  }|r| |dz  z   n| }|
j                  ||      }|
j                  |      }|
j                  dd|      }|j                  |       |j                  |       |j                  |       z |||fS )zMake batches of dense data.r   N
   r/   r)   )	cupyr4   r5   r3   r@   r   randnuniformappend)r>  r#   r?  r@  r=  r   rU   r   wrC  rR   r   r"   _X_y_ws                   rZ   make_batchesrK  <  s     	A
A
Akk%%bii&=>ii##L19 4='!b&0CV	YYy*-YYy![[QQY[7			 a7Nr   c                   H   e Zd ZU dZej
                  ed<   ej                  e	j                     ed<   ej                  e	j                     ed<   ej                  e	j                     ed<   ej                  e	j                     ed<   ej                  e	j                     ed<   y)		ClickFoldzCA structure containing information about generated user-click data.rU   r   qidscoreclickposN)__name__
__module____qualname____doc__r   
csr_matrix__annotations__nptNDArrayr3   r8   rG   r9    r   rZ   rM  rM  ]  sp    M
{{288	RXX	;;rzz"";;rxx  	RXX	r   rM  c                   <    e Zd ZU dZeed<   eed<   eed<   defdZy)	RelDataCVzPSimple data struct for holding a train-test split of a learning to rank dataset.r   testmax_relr$   c                      | j                   dk(  S )z6Whether the label consists of binary relevance degree.r/   )r^  selfs    rZ   	is_binaryzRelDataCV.is_binaryp  s    ||q  r   N)	rR  rS  rT  rU  RelDatarW  rv   rP   rb  rZ  r   rZ   r\  r\  i  s     ZN
ML!4 !r   r\  c                       e Zd ZdZdeddfdZdej                  ej                     dej                  ej                     dej                  ej                     fdZy)	PBMa  Simulate click data with position bias model. There are other models available in
    `ULTRA <https://github.com/ULTR-Community/ULTRA.git>`_ like the cascading model.

    References
    ----------
    Unbiased LambdaMART: An Unbiased Pairwise Learning-to-Rank Algorithm

    etar$   Nc                     t        j                  g d      | _        t        j                  g d      }t        j                  ||      | _        y )N)r	  g{Gz?Q?gp=
ף?rb   )
g(\?gQ?gQ?g(\?rh  皙?g)\(?r	  g{Gz?gQ?)r3   rL   
click_probpower	exam_prob)ra  rf  rl  s      rZ   __init__zPBM.__init__  s8    ((#?@HHH
	 )S1r   labelspositionc                    t        j                  |d      }t        j                  |j                        }d||dk  <   d||t	        | j
                        k\  <   | j
                  |   }t        j                  |j                        }|j                  |j                  k(  sJ t        j                  |d      }d||| j                  j                  k\  <   | j                  |   }t         j                  j                  d      }|j                  |j                  d   t         j                        }t        j                  |j                  t         j                        }d||||z  k  <   |S )	zSample clicks for one query based on input relevance degree and position.

        Parameters
        ----------

        labels :
            relevance_degree

        T)copyr   r   r'   )r,   r.   r-   r/   )r3   rL   r  r   lenrj  r,   rl  r4   r   rG   r8   )	ra  rn  ro  rj  rl  ranksrR   probclickss	            rZ   sample_clicks_for_queryzPBM.sample_clicks_for_query  s    &t,XXfll+
vz13vT__--.__V,
HHV\\*	}}+++-.0et~~***+NN5)	ii##D)zzv||AbjjzA(*RXX(N01ti*,,-r   )rR  rS  rT  rU  r   rm  rX  rY  r3   r8   r9   rv  rZ  r   rZ   re  re  u  s^    2E 2d 2!kk"((+!7:{{2887L!	RXX	!r   re  r   c           
         t        j                  |       } | j                  }t         j                  dt        j                  t        j
                  | dd | dd d             dz   f   }t        j                  t         j                  ||f         }| |   }t        j                  |t        j                  | j                  g            }|||fS )zzRun length encoding using numpy, modified from:
    https://gist.github.com/nvictus/66627b580c13068589957d6ab0919e66

    r   r/   Nr   T)	equal_nan)	r3   asarrayr,   r_flatnonzeroisclosediffrF  rL   )r   r   startslengthsr   indptrs         rZ   rlencoder    s    
 	

1A	AUU1bnnbjj12#2$&O%OPSTTTUFggbeeFAI&'GvYFYYvrxx12F7F""r   rU   r   rN  sample_ratec                    t         j                  j                  d      }t        | j                  d   |z        }t        j
                  d| j                  d   t         j                        }|j                  |       |d| }| |   }||   }||   }	t        j                  |	      }
||
   }||
   }|	|
   }	t        dd      }|j                  |||	       |j                  |       }|S )	zWe use XGBoost to generate the initial score instead of SVMRank for
    simplicity. Sample rate is set to 0.1 by default so that we can test with small
    datasets.

    r'   r   r-   Nz	rank:ndcghist)	objectivetree_method)rN  )r3   r4   r   rv   r   aranger@   r  argsortr   fitpredict)rU   r   rN  r  rR   r"   indexX_trainr4  r5  
sorted_idxltrscoress                rZ   init_rank_scorer    s     ))


%CAGGAJ,-I1aggaj		BEKK*9EhGhGE
I I&Jj!Gj!G*%I
kv
>CGGGW)G, [[^FMr   foldscores_foldc                 4   | \  }}}|j                   t        j                  k(  sJ t        j                  |      }t        j                  |j
                  ft        j                        }t        j                  |j
                  ft        j                        }t        d      }|D ]f  }	|	|k(  }
|
j                  |
j                  d         }
||
   }t        j                  |      ddd   }|||
<   ||
   }|j                  ||      }|||
<   h |j                  d   |j                  d   k(  sJ |j                  |j                  f       |j                  d   |j                  d   k(  sJ |j                  |j                  f       t        ||||||      S )zSimulate clicks for one fold.r-   rb   )rf  r   Nr   )r.   r3   r8   uniqueemptyr,   r9   re  r7   r   r  rv  rM  )r  r  X_foldy_foldqid_foldqidsro  ru  pbmqqid_maskquery_scoresquery_positionrelevance_degreesquery_clickss                  rZ   simulate_one_foldr    so   
  $FFH>>RXX%%%99XDxxbhh7HXXv{{nBHH5F
#,C  
(=##HNN1$56"8,L1$B$7+"8,223DnU'x
( <<?hnnQ//O&,,1OO/<<?fll1o-Kfll/KK-VVX{FHMMr   cv_datac           	      d   t        t        | j                  | j                              \  }}}t	        j
                  dg|D cg c]  }|j                  d    c}z         }t	        j                  |      }t        |      dk(  sJ t        j                  |      }t	        j                  |      }t	        j                  |      }t        |||      }	t        d|j                        D 
cg c]  }
|	||
dz
     ||
     }}
g g g g g g f\  t        |j                  dz
        D ]  }
t        ||
   ||
   ||
   f||
         }j!                  |j"                         j!                  |j$                         j!                  |j&                         j!                  |j(                         j!                  |j*                         j!                  |j,                          t        |j                  dz
        D 
cg c]  }
|
   	 }}
t        d      D ]  }
||
   ||
   k(  j/                         rJ  t              dk(  r(t1        d   d   d   d   d   d         }d}||fS fdt        t                    D        \  }}||fS c c}w c c}
w c c}
w )z6Simulate click data using position biased model (PBM).r   r]   r/   r   Nc           
   3   b   K   | ]&  }t        |   |   |   |   |   |          ( y wr   )rM  ).0r   X_lstc_lstp_lstq_lsts_lsty_lsts     rZ   	<genexpr>z"simulate_clicks.<locals>.<genexpr>&  s@      
 eAha%(E!HeAhaQ
s   ,/)r   zipr   r]  r3   rL   r   cumsumrr  r   vstackr   r  r   r,   r  rF  rU   r   rN  rO  rP  rQ  allrM  )r  rU   r   rN  vr  X_fully_fullqid_fullscores_fullr   r  r  scores_check_1r   r]  r  r  r  r  r  r  s                   @@@@@@rZ   simulate_clicksr    sx   S56IAq# XXqc3AQWWQZ334FYYvFv;%]]1F^^AF~~c"H "&&(;K>CAv{{>STk&Q-&)4TFT/12r2r2/E,E5%u6;;?#  !A$!c!f!5vayATVVTVVTXXTZZ TZZ TXX ).fkkAo(>?1eAh?N?1X 6q!VAY.335556 5zQ%(E!HeAha%(ERSHU $;	
 
3u:&
t $;G 4 U @s   	J#
1J(J-ru  rQ  c           
         t        j                  |      }| |   } ||   }||   }||   }t        |      \  }}}t        d|j                        D ]  }||dz
     }	||   }
|	|
k  s	J |	|
f       t        j
                  ||	|
       j                  dk(  s	J |	|
f       ||	|
 }|j                         dk(  sJ |j                                |j                         |j                  dz
  k\  s9J |j                         |j                  |t        j
                  ||	|
       f       t        j                  |      }| |	|
 |   | |	|
 ||	|
 |   ||	|
 ||	|
 |   ||	|
 ||	|
 |   ||	|
  | |||f}|S )z,Sort data based on query index and position.r/   r   )r3   r  r  r   r,   r  minmax)rU   r   rN  ru  rQ  r  r  _r   begend	query_posr}   s                rZ   sort_ltr_samplesr  -  s    CJ	*AJF
j/C
j/CC=LFAq1fkk" 0QUmQiSy$3*$yyyS&++q0<3*<0CL	}}!#4Y]]_4#}})..1"44 	
MMONNIIc#cl#	7
 	
4 ZZ	*
s3Z
+#c
 S/*5s3s3Z
+#c
3s|J/C+0. faDKr   DTypeDMatrixTdevicec                    t         j                  j                         } | |j                  ddd      j	                  t         j
                        j                  dd            }t        |d      r|j                  dddf   }n	|dddf   }|} ||||	      }t        j                  t        d
      5  t        d|d|       ddd       t        |d      s | |j                         j                  dd            }||k(  j                         sJ |j                  j                   j"                  du sJ |j                  j                   j$                  du sJ |j'                  |j                  	        | |j                         j                  dd            }||j                  k(  j                         sJ |}|j)                  |       |j                         }	|j)                  |j                  d|j*                               |j                         }
|
|	k(  j                         sJ |j	                  t         j,                        }|j)                  |       |j                         }||	k(  j                         sJ |j                  dddd      }t        j                  t        d
      5  |j)                  |       ddd       yy# 1 sw Y   xY w# 1 sw Y   yxY w)zRun tests for base margin.r   rb   d   r0   2   r   ilocN)base_marginz.*base_margin.*r   r  )r  r  FTr/   r   )r3   r4   r   r   rr   rG   r7   hasattrr  r1   r   r   train_fnget_base_marginr  Tflagsc_contiguousf_contiguousset_infoset_base_marginr,   rH   )r  r  r  rR   rU   r   r  Xygotbm_colbm_rowbm_f64s               rZ   run_base_margin_infor  ^  s   
))


!CcjjCcj*11"**=EEb!LMAq&FF1a4LadGK	!QK	0B	z);	< @6:B?@ 1fB&&(00Q78{"'')))}}""//5888}}""//4777
.B&&(00B78{}}$))+++ 
;'##%
;..q+2B2BCD##%& %%''' "((4
;'##%& %%''' ii1a+]]:-?@ 	,{+	, 	,7 @ @<	, 	,s   7K7K K K)r   as_densec                     t        t        j                  d      sTt        j                  j                  d      }t	        j                   dz
  |d      }|j                  dd       }||fS t        t        j                               dt        d	t        j                  f fd
}g }t              5 }	t              D ]#  }
|j                  |	j                  ||
             % 	 ddd       g }g }|D ]7  }|j                         \  }}|j                  |       |j                  |       9 t!        |      k(  sJ t	        j"                  |d      }t        j$                  |      }|j'                  |j(                  d   |j(                  d   f      j*                  }t        j,                  |d      }|j(                  d    k(  sJ |j(                  d   k(  sJ |j(                  d    k(  sJ |rR|j/                         }|j(                  d    k(  sJ |j(                  d   k(  sJ t        j0                  ||dk(  <   ||fS ||fS # 1 sw Y   hxY w)zMake sparse matrix.

    Parameters
    ----------

    as_dense:

      Return the matrix as np.ndarray with missing values filled by NaN

    r   r'   rb   csr)mr   r   r   format        r   t_idr$   c                    t         j                  j                  d| z        }
z  }| 
dz
  k(  r	| |z  z
  }n|}t        j                  	|dz
  |      j	                         }t        j
                  	df      }t        |j                  d         D ][  }|j                  |dz      |j                  |   z
  }|dk7  s+||d d |f   j                         |j                  	df      z  dz  z  }] ||fS )Nr'   r/   rb   )r  r   r   r   r   ri  )
r3   r4   r   r   tocscr  r   r   r  toarray)r  rR   thread_sizen_features_tlocrU   r   r   r,   r#   r"   	n_threadsr   s           rZ   
random_cscz*make_sparse_regression.<locals>.random_csc  s   ii##D4K0 I-9q= (4++==O)OMM(N	

 %' 	
 HHi^$qwwqz" 	JA88AE?QXXa[0DqyQq!tW__&YN)CCcII	J
 !tr   )max_workersN)r  r   r/   r   )r  r3   r4   r5   r   r   r  multiprocessing	cpu_countrv   
csc_matrixr   r   rF  submitresultrr  hstackry  r7   r   r  r   r  rl   )r"   r#   r   r  rR   rU   r   r  futuresexecutorr   	X_results	y_resultsr2  r  arrr  s   ```             @rZ   make_sparse_regressionr    s=    299m,ii##D)MM(N
 JJ3c	J:!t O--/<I !2!2  . G			2 ;hy! 	;ANN8??:q9:	;; II xxz1
 y>Y&&&#]]9UCC


9A			1771:qwwqz*+--A
qqA99Q<9$$$99Q<:%%%771:"""kkmyy|y(((yy|z)))C1HAv6M;; ;s   >2II!	n_stringsseedc                 >   d}t               }t        j                  j                  |      }t	        |      | k  rZdj                  |j                  t        t        j                        |d            }|j                  |       t	        |      | k  rZt        |      S )zGenerate n unique strings.r    Tr  )setr3   r4   r   rr  r*  r   r   stringascii_lettersadd)r  r  name_lenunique_stringsrR   
random_strs         rZ   unique_random_stringsr    s    H"uN
))


%C
n
	
)WWJJtF001$JO

 	:&	 n
	
) r   r  rb   cpu)r   	cat_ratior  r   	cat_dtyper  n_categoriesonehotr  r  r   c          	         t        j                  d      }
t        j                  j	                  |      }t        j                  j	                  |dz         }|
j                         }t        |      D ]1  }|j                  d|d      d   }|dk(  rt        j                  |t        j                        r4t        j                  t        ||            }|j                  || d      }n*t        j                  d|      }|j                  d||       }|
j                  |d	      |t!        |      <   |t!        |         j"                  j%                  |      |t!        |      <   |j                  d||       }|
j                  ||j&                  	      |t!        |      <   4 t        j(                  | f
      }|j*                  D ]J  }t-        ||   j&                  |
j.                        r|||   j"                  j0                  z  }C|||   z  }L |dz  }|dkD  rt        |      D ]  }|j                  d| dz
  t3        | |z              }t        j4                  |j6                  ||f<   t9        |j:                  j6                  |         sh|t        j<                  |j:                  j6                  |   j>                        j@                  k(  rJ  |jB                  d   |k(  sJ |r|
jE                  |      }|r+tG        |j*                        }|jI                  |       ||   }|	dk7  r0|	dv sJ ddl%}ddl&}|jO                  |      }|j                  |      }||fS )a/  Generate categorical features for test.

    Parameters
    ----------
    n_categories:
        Number of categories for categorical features.
    onehot:
        Should we apply one-hot encoding to the data?
    sparsity:
        The ratio of the amount of missing values over the number of all entries.
    cat_ratio:
        The ratio of features that are categorical.
    shuffle:
        Whether we should shuffle the columns.
    cat_dtype :
        The dtype for categorical features, might be string or numeric.

    Returns
    -------
    X, y
    r&   r/   r0   r   Tr  r)   rc   r-   r  r  r  )cudagpuN)(r1   r2   r3   r4   r5   r   r   rN   
issubdtypestr_rL   r  r   r  r6   rq   r   rt   set_categoriesr.   r  rs   rp   rw   r  rv   rl   r  r   rT   r  
categoriesr,   r   get_dummiesr   r  cudfrC  from_pandas)r"   r#   r  r  r   r  r  r   r   r  rQ   rR   row_rngrW   r   r   r	  r|   numlabelcolr  rs   r  rC  s                            rZ   make_categoricalr    s   D 
		X	&B ))


-Cii##L1$45G	B: 9a3A6Q;}}Y0  XX&;L!&LM
NN:ItNLYYq,7
OO9OM1J7Bs1vJCF66zBBs1vJ//al/KC3cii8Bs1vJ!9$ HHI<(Ezz bgmmR%8%89RW[[&&&ERWE	
 
QJE#~z" 	TAOOIMI4H0I $ E !#BGGE1Hryy~~a01#ryy1B1M1M'N'S'SSSS	T 88A;*$$$^^Brzz" [(((b!

5!u9r   c                        e Zd ZdZddddededee   dee   d	ed
ee   ddf fdZ	de
defdZddZdeeej                   ej$                  f   eee   f   fdZ xZS )IteratorForTestzCIterator for testing streaming DMatrix. (external memory, quantile)FN)on_hostmin_cache_page_bytesrU   r   rG  cacher  r  r$   c                    t        |      t        |      k(  sJ || _        || _        || _        d| _        t
        |   |||       y )Nr   )cache_prefixr  r  )rr  rU   r   rG  itsuperrm  )ra  rU   r   rG  r  r  r  	__class__s          rZ   rm  zIteratorForTest.__init__V  sR     1vQ!5 	 	
r   
input_datac                 |   | j                   t        | j                        k(  ryt        j                  t
        d      5   || j                  | j                      | j                  | j                      d        d d d         || j                  | j                      j                         | j                  | j                      j                         | j                  r'| j                  | j                      j                         nd        t        j                          | xj                   dz  c_         y# 1 sw Y   xY w)NFzKeyword argumentr   )r}   r  weightr/   T)r  rr  rU   r1   r   	TypeErrorr   rq  rG  gccollect)ra  r  s     rZ   nextzIteratorForTest.nextk  s    77c$&&k!]]9,>? 	?tvvdggtww>	? 	%%'&&/&&(-1VV466$''?'')	

 	

1	? 	?s   9D22D;c                     d| _         y )Nr   )r  r`  s    rZ   resetzIteratorForTest.reset|  s	    r   c                     t        | j                        }t        | j                        }| j                  r"t	        j
                  | j                  d      }nd}|||fS )zReturn concatenated arrays.r   r   N)r   rU   r   rG  r3   r   )ra  rU   r   rG  s       rZ   	as_arrayszIteratorForTest.as_arrays  sI     466N466N66tvvA.AA!Qwr   )r$   N)rR  rS  rT  rU  r   r   r   rP   rv   rm  r   r"  r$  r   r   r3   r   r   rV  r   r&  __classcell__)r  s   @rZ   r  r  S  s    M .2

 
 H	
 }
 
 'sm
 

*x D "
	uRZZ!2!223Y@SS	T
r   r  )F)r	  )^rU  r   r  r)  r  r-  concurrent.futuresr   dataclassesr   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   urllibr   numpyr3   r1   rX  numpy.randomRNGscipyr   compatr   corer   r   r   r}   r   r   sklearnr   r   trainingr   r  r&   r   
DataFrameTr2   r    Memorymemoryrv   r   r   r[   r~   r   r   r  r   r   r   r   r   r   rV  r<  rP   rK  rY  r8   rc  rM  r\  re  r  r   rG   r  r  r  r9   r  r  r  r  	DTypeLiker  r  rZ  r   rZ   <module>r7     s   $ 	  	   1 !        )   5 5 9 * (.J			X	&	|Q	/88 #8uU2::rzz12E$*4EEFdRS8v>9 >B7 7t
3 
4 
 4bjj"**&< = 4 4n "E"**bjj01 " " 8E"**bjj01 8 8 E"**bjj01    s%
BJJ 67 s sl 55

JJJJ
JJJJ
JJJJ
5 5x 	   	   4

T"**-tBJJ/??@< !!3;;rxx#8#++bhh:OO
P   	!
 	!4 4n#BHH% #%S[[#++0U*V #& 	!!
{{288! 
RXX	! 	!
 	[[!HN
!!3;;rxx#8#++bhh:OO
PNRZZ(N NF(Y (5HY<O1O+P (V..
{{288. 
RXX	. KK!	.
 
RXX	. 
KKKKKK.b+, +,DM +,3 +,SW +,^ RR #R/4R@DR
5""#RZZ/0R Rj S    S	  , %'XX]]] ]
 ] ] ] ] ] yy""] ] 9bjj !]@6h 6r   