
    DiH                    H   d Z ddlmZ ddlZddlmZ ddlmZ ddlZddl	m
Z
 ddlmZ ddlmZ erdd	lmZ ddlZddlZndd
lmZ  ed      Z ed      Z ee      ZddZ G d dej0                  j2                        Z G d d      Z	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZy)a  Notations in this Gaussian process implementation

X_train: Observed parameter values with the shape of (len(trials), len(params)).
y_train: Observed objective values with the shape of (len(trials), ).
x: (Possibly batched) parameter value(s) to evaluate with the shape of (..., len(params)).
cov_fX_fX: Kernel matrix X = V[f(X)] with the shape of (len(trials), len(trials)).
cov_fx_fX: Kernel matrix Cov[f(x), f(X)] with the shape of (..., len(trials)).
cov_fx_fx: Kernel scalar value x = V[f(x)]. This value is constant for the Matern 5/2 kernel.
cov_Y_Y_inv:
    The inverse of the covariance matrix (V[f(X) + noise_var])^-1 with the shape of
    (len(trials), len(trials)).
cov_Y_Y_inv_Y: `cov_Y_Y_inv @ y` with the shape of (len(trials), ).
max_Y: The maximum of Y (Note that we transform the objective values such that it is maximized.)
sqd: The squared differences of each dimension between two points.
is_categorical:
    A boolean array with the shape of (len(params), ). If is_categorical[i] is True, the i-th
    parameter is categorical.
    )annotationsN)Any)TYPE_CHECKING)*single_blas_thread_if_scipy_v1_15_or_newer)optuna_warn)
get_logger)Callable)_LazyImportscipytorchc                   t        j                  |       }t        j                  |      r| S t        d       t        j                  |d      }t        j
                  | t        j                  |t        j                  t        j                  || t         j                        d      d      t        j                  |t        j                  t        j                  || t         j                         d      d            S )NzDClip non-finite values to the min/max finite values for GP fittings.r   )axis        )
npisfiniteallr   anyclipwheremininfmax)valuesis_values_finiteis_any_finites      S/home/ubuntu/crypto_trading_bot/.venv/lib/python3.12/site-packages/optuna/_gp/gp.pywarn_and_convert_infr   /   s    {{6*	vvVWFF+!4M 77
rxx0@&"&&'QXY Z\_`
rxx0@&266''RYZ []`a     c                  0    e Zd Zedd       Zedd       Zy)Matern52Kernelc                    t        j                  d|z        }t        j                  |       }|d|z  |z   dz   z  }d|dz   z  |z  }| j                  |       |S )a  
        This method calculates `exp(-sqrt5d) * (1/3 * sqrt5d ** 2 + sqrt5d + 1)` where
        `sqrt5d = sqrt(5 * squared_distance)`.

        Please note that automatic differentiation by PyTorch does not work well at
        `squared_distance = 0` due to zero division, so we manually save the derivative, i.e.,
        `-5/6 * (1 + sqrt5d) * exp(-sqrt5d)`, for the exact derivative calculation.

        Notice that the derivative of this function is taken w.r.t. d**2, but not w.r.t. d.
           g?   g)r   sqrtexpsave_for_backward)ctxsquared_distancesqrt5dexp_partvalderivs         r   forwardzMatern52Kernel.forward@   sh     A 00199fW%5$44v=ABFQJ'(2e$
r   c                (    | j                   \  }||z  S )z
        Let x be squared_distance, f(x) be forward(ctx, x), and g(f) be a provided function, then
        deriv := df/dx, grad := dg/df, and deriv * grad = df/dx * dg/df = dg/dx.
        )saved_tensors)r'   gradr,   s      r   backwardzMatern52Kernel.backwardS   s     $$t|r   N)r'   r   r(   torch.Tensorreturnr2   )r'   r   r0   r2   r3   r2   )__name__
__module____qualname__staticmethodr-   r1    r   r   r    r    ?   s(     $  r   r    c                      e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 d
dZedd       ZddZddZ	 d	 	 	 	 	 ddZdddZ	ddZ
	 	 	 	 	 	 	 	 	 	 dd	Zy)GPRegressorc                   || _         || _        || _        || _        || _        |j                  d      |j                  d      z
  j                         | _        | j                   j                         rT| j                  d| j                   f   dkD  j                  t        j                        | j                  d| j                   f<   d | _        d | _        || _        || _        || _        y )N.r   )_is_categorical_X_train_y_train_X_all_y_all	unsqueezesquare__squared_X_diffr   typer   float64_cov_Y_Y_chol_cov_Y_Y_inv_Yinverse_squared_lengthscaleskernel_scale	noise_var)selfis_categoricalX_trainy_trainrJ   rK   rL   s          r   __init__zGPRegressor.__init__^   s      . ' 1 1" 58I8I"8M MVVX##%$$S$*>*>%>?#Ed5==!   d&:&:!:; 3737,H)("r   c                    dt        j                  | j                  j                         j	                         j                               z  S )Ng      ?)r   r$   rJ   detachcpunumpy)rM   s    r   length_scaleszGPRegressor.length_scalesx   s7    RWWT>>EEGKKMSSUVVVr   c                R   | j                   | j                  J d       t        j                         5  | j	                         j                         j                         j                         }d d d        t        j                  | j                  j                  d         xx   | j                  j                         z  cc<   t        j                  j                  |      }t         j                  j#                  |j$                  t         j                  j#                  || j&                  j                         j                         d      d      }t        j(                  |      | _         t        j(                  |      | _        | j*                  j                         | _        d | j*                  _        | j.                  j                         | _        d | j.                  _        | j                  j                         | _        d | j                  _        y # 1 sw Y   xY w)Nz(Cannot call cache_matrix more than once.r   TlowerF)rH   rI   r   no_gradkernelrS   rT   rU   r   diag_indicesr?   shaperL   itemlinalgcholeskyr   solve_triangularTr@   
from_numpyrJ   r0   rK   )rM   cov_Y_Ycov_Y_Y_cholcov_Y_Y_inv_Ys       r   _cache_matrixzGPRegressor._cache_matrix|   s   !!)d.A.A.I 	
6	
I ]]_ 	;kkm**,00288:G	; 	 3 3A 678DNN<O<O<QQ8yy))'2 55NNLL)),8I8I8K8Q8Q8S[_)` 6 
 #--l;#..}=,0,M,M,T,T,V)15)). --446!%..0"+	; 	;s   ;HH&c                ,   | j                   | j                  J d       | j                  j                  d   }|j                  d   }||z   }t	        j
                  ||ft        j                        }| j                   j                         |d |d |f<   t        j                         5  | j                  |      j                         j                         j                         }| j                  ||      j                         j                         j                         }|t	        j                  |      xx   | j                  j                         z  cc<   d d d        t         j"                  j%                  | j                   j                         j                         j&                  d      j&                  ||d d |f<   t        j"                  j)                  ||d d |f   ||d d |f   j&                  z  z
        ||d |d f<   t        j*                  | j,                  |gd      | _        t         j"                  j%                  |j&                  t         j"                  j%                  || j.                  j                         j                         d      d      }	t        j0                  |      | _         t        j0                  |	      | _        t        j*                  | j                  |gd      | _        y # 1 sw Y   xY w)Nz-Call _cache_matrix before append_running_datar   dtypeTrX   )dimF)rH   rI   r?   r]   r   zerosrG   rU   r   rZ   r[   rS   rT   r\   rL   r^   r   r_   ra   rb   r`   catr@   rB   rc   rA   )
rM   	X_running	y_runningn_train	n_runningn_totalre   kernel_running_trainkernel_running_runningrf   s
             r   append_running_datazGPRegressor.append_running_data   s   !!-$2E2E2Q 	
;	
Q --%%a(OOA&	I%xx' 2"**E+/+=+=+C+C+EXgXxx'(]]_ 	X#';;y#9#@#@#B#F#F#H#N#N#P %)[[I%F%M%M%O%S%S%U%[%[%]""2??9#=>$..BUBUBWW>	X
 ,1<<+H+H""$**,.B.D.DD ,I ,

! 	WXxx'( ,.99+=+="78XgX-.gh>P1Q1S1SST,
WXwx'( ii	 :B55NNLL)),8I8O8O8QY])^ 6 
 #--l;#..}=ii	 :B-	X 	Xs   +B0L		LNc                   ||J | j                   }n|| j                  }|j                  dk(  r||z
  n"|j                  d      |j                  d      z
  j	                         }| j
                  j                         r@|d| j
                  f   dkD  j                  t        j                        |d| j
                  f<   |j                  | j                        }t        j                  |      | j                  z  S )am  
        Return the kernel matrix with the shape of (..., n_A, n_B) given X1 and X2 each with the
        shapes of (..., n_A, len(params)) and (..., n_B, len(params)).

        If x1 and x2 have the shape of (len(params), ), kernel(x1, x2) is computed as:
            kernel_scale * Matern52Kernel.apply(
                sqd(x1, x2) @ inverse_squared_lengthscales
            )
        where if x1[i] is continuous, sqd(x1, x2)[i] = (x1[i] - x2[i]) ** 2 and if x1[i] is
        categorical, sqd(x1, x2)[i] = int(x1[i] != x2[i]).
        Note that the distance for categorical parameters is the Hamming distance.
        r#   r<   r=   .r   )rE   r?   ndimrC   rD   r>   r   rF   r   rG   matmulrJ   r    applyrK   )rM   X1X2sqdsqdists        r   r[   zGPRegressor.kernel   s     :::&&Cz]] ggl27R0@2<<PRCS0S\\^C##'')25c4;O;O6O2PSV2V1\1\MM2C---. D==>##F+d.?.???r   c           	     p   | j                   | j                  J d       |j                  dk(  }|s|n|j                  d      }t        j
                  j                  | j                  || j                        x}| j                        }t        j
                  j                  | j                   t        j
                  j                  | j                   j                  |dd      dd      }|rb|rJ d       | j                  ||      }||j                  |j                  dd	            z
  }	|	j                  d	d
      j                  d       n@| j                  }|t        j
                  j                  ||      z
  }	|	j                  d       |r"|j!                  d      |	j!                  d      fS ||	fS )a)  
        This method computes the posterior mean and variance given the points `x` where both mean
        and variance tensors will have the shape of x.shape[:-1].
        If ``joint=True``, the joint posterior will be computed.

        The posterior mean and variance are computed as:
            mean = cov_fx_fX @ inv(cov_fX_fX + noise_var * I) @ y, and
            var = cov_fx_fx - cov_fx_fX @ inv(cov_fX_fX + noise_var * I) @ cov_fx_fX.T.

        Please note that we clamp the variance to avoid negative values due to numerical errors.
        z+Call cache_matrix before calling posterior.r#   r   TF)upperleftz3Call posterior with joint=False for a single point.r<   )dim1dim2r   )rH   rI   rw   rC   r   r_   vecdotr[   rA   ra   rb   rx   	transposediagonal
clamp_min_rK   squeeze)
rM   xjointis_single_pointx_	cov_fx_fXmeanV	cov_fx_fxvar_s
             r   	posteriorzGPRegressor.posterior   s    !!-$2E2E2Q 	
9	
Q &&A+%Q1;;q>||""B0L#L9dNaNabLL))LL))$*<*<*>*>	QU\a)b	 * 
 &](]]&B+Iqxx	(;(;B(CDDDMMrM+66s;))Iu||229a@@DOOC 5DQa1V4QU,Vr   c                >   | j                   j                  d   }d|z  t        j                  dt        j                  z        z  }| j                         | j                  t        j                  |t        j                        z  z   }t        j                  j                  |      }|j                         j                         j                          }t        j                  j                  || j                  dddf   d      dddf   }d||z  z  }||z   |z   S )a  
        This method computes the marginal log-likelihood of the kernel hyperparameters given the
        training dataset (X, y).
        Assume that N = len(X) in this method.

        Mathematically, the closed form is given as:
            -0.5 * log((2*pi)**N * det(C)) - 0.5 * y.T @ inv(C) @ y
            = -0.5 * log(det(C)) - 0.5 * y.T @ inv(C) @ y + const,
        where C = cov_Y_Y = cov_fX_fX + noise_var * I and inv(...) is the inverse operator.

        We exploit the full advantages of the Cholesky decomposition (C = L @ L.T) in this method:
            1. The determinant of a lower triangular matrix is the diagonal product, which can be
               computed with N flops where log(det(C)) = log(det(L.T @ L)) = 2 * log(det(L)).
            2. Solving linear system L @ u = y, which yields u = inv(L) @ y, costs N**2 flops.
        Note that given `u = inv(L) @ y` and `inv(C) = inv(L @ L.T) = inv(L).T @ inv(L)`,
        y.T @ inv(C) @ y is calculated as (inv(L) @ y) @ (inv(L) @ y).

        In principle, we could invert the matrix C first, but in this case, it costs:
            1. 1/3*N**3 flops for the determinant of inv(C).
            2. 2*N**2-N flops to solve C @ alpha = y, which is alpha = inv(C) @ y.

        Since the Cholesky decomposition costs 1/3*N**3 flops and the matrix inversion costs
        2/3*N**3 flops, the overall cost for the former is 1/3*N**3+N**2+N flops and that for the
        latter is N**3+2*N**2-N flops.
        r   g         ri   NF)r   )r?   r]   mathlogpir[   rL   r   eyerG   r_   r`   r   sumra   r@   )rM   n_pointsconstrd   Llogdet_partinv_L_y	quad_parts           r   marginal_log_likelihoodz#GPRegressor.marginal_log_likelihood   s    4 ==&&q)x$((1tww;"77++-$..599XU]]3["[[LL!!'*zz|'')--//,,//4==D3IQV/WXY[\X\]Gg-.	U"Y..r   c           	        	  j                   j                  d   	t        j                  t        j                   j
                  j                         j                         j                               t        j                   j                  j                               t        j                   j                  j                         dz  z
        gg      }d
	 fd}t               5  t        j                  j                  ||ddd|i      }d d d        j                   st#        d|j$                         t'        j(                  |j*                        }t'        j,                  |d 	        _        t'        j,                  |	          _	        r%t'        j.                  t&        j0                  	      nt'        j,                  |	dz            z    _         j3                           S # 1 sw Y   xY w)Nr#   gGz?c                   t        j                  |       j                  d      }t        j                         5  t        j                  |d        _        t        j                  |         _        r%t        j                  t         j                        nt        j                  |dz            z   _	        j                                 z
  }|j                          |j                  dz      }r|dk(  sJ d d d        j                         |j                  j                         j                         j!                         fS # 1 sw Y   OxY w)NTri   r#   r   )r   rc   requires_grad_enable_gradr%   rJ   rK   tensorrG   rL   r   r1   r0   r^   rS   rT   rU   )	
raw_paramsraw_params_tensorlossraw_noise_var_graddeterministic_objective	log_priorminimum_noisen_paramsrM   s	       r   	loss_funcz1GPRegressor._fit_kernel_params.<locals>.loss_func8  s1    % 0 0 < K KD Q""$ N49II>OPYQY>Z4[1$)II.?.I$J! / LLemmD#4X\#BCmS 
 44664H%6%;%;HqL%I"26HA6MMMN 99; 1 6 6 = = ? C C E K K MMMN Ns   C
EETzl-bfgs-bgtol)jacmethodoptionszOptimization failed: ri   )r   
np.ndarrayr3   ztuple[float, np.ndarray])r?   r]   r   concatenater   rJ   rS   rT   rU   rK   r^   rL   r   r   optimizeminimizesuccessRuntimeErrormessager   rc   r   r%   r   rG   rg   )
rM   r   r   r   r   initial_raw_paramsr   resraw_params_opt_tensorr   s
   ````     @r   _fit_kernel_paramszGPRegressor._fit_kernel_params  s    ==&&q)  ^^t88??AEEGMMOPFF4,,1134FF4>>..04-3GGH	
	N 	N" 89 		..))"! * C		 {{!6s{{mDEE % 0 0 7,1II6KIX6V,W)!II&;H&EF ' LLemm<+@A+N!OO 	
 	-		 		s   /'G77H )rN   r2   rO   r2   rP   r2   rJ   r2   rK   r2   rL   r2   r3   None)r3   r   )r3   r   )rn   r2   ro   r2   r3   r   )NN)rz   torch.Tensor | Noner{   r   r3   r2   )F)r   r2   r   boolr3   z!tuple[torch.Tensor, torch.Tensor])r3   r2   )
r   %Callable[[GPRegressor], torch.Tensor]r   floatr   r   r   r   r3   r:   )r4   r5   r6   rQ   propertyrV   rg   ru   r[   r   r   r   r8   r   r   r:   r:   ]   s    #$# # 	#
 '3# ##  # 
#4 W W#6 CF IM@%@2E@	@<#WJ!/F@8@ @ "&	@
 @ 
@r   r:   c           	     J    t        j                   j                  d   dz   t         j                        d	 fd} |       }	| |       }d }
||	fD ]  }	 t	        t        j
                        t        j
                         t        j
                        |j                  |j                  |j                        j                  ||||      c S  t        j                  d|
 d        |       }|j                          |S # t        $ r}|}
Y d }~d }~ww xY w)
Nr#   r   ri   c            	         t        t        j                        t        j                         t        j                        d d j                         d   j                         d   j                               S )Nr<   r   rN   rO   rP   rJ   rK   rL   )r:   r   rc   clone)XYdefault_kernel_paramsrN   s   r   _default_gprz'fit_kernel_params.<locals>._default_gpro  so     ++N;$$Q'$$Q')>s)C)I)I)K.r288:+B/557
 	
r   r   )r   r   r   r   z/The optimization of kernel parameters failed: 
z<
The default initial kernel parameters will be used instead.)r3   r:   )r   onesr]   rG   r:   rc   rJ   rK   rL   r   r   loggerwarningrg   )r   r   rN   r   r   r   	gpr_cacher   r   default_gpr_cacheerrorgpr_cache_to_useedefault_gprr   s   ```           @r   fit_kernel_paramsr   b  s-    "JJqwwqzA~U]]K
 
 % N	E '(9: 	$//?((+((+-=-Z-Z-::*44 ! #+(?	 ! $ NN
:5' BF 	F .K  	E	s   A:D	D"DD")r   r   r3   r   )Ng{Gz?)r   r   r   r   rN   r   r   r   r   r   r   r   r   zGPRegressor | Noner   r   r3   r:   )__doc__
__future__r   r   typingr   r   rU   r   "optuna._gp.scipy_blas_thread_patchr   optuna._warningsr   optuna.loggingr   collections.abcr	   r   r   optuna._importsr
   r4   r   r   autogradFunctionr    r:   r   r8   r   r   <module>r      s   & #      Y ( % (+ E E	H	 U^^,, <B BX %)777 7 5	7
 7 "7 "7 7 7r   