
    Z|i                         d Z ddlmZmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ d	Zd
eeej(                  f   deeej(                  f   fdZ G d dee	      Zy)zOutlier/anomaly detection utilities for MySQL Connector/Python.

Provides a scikit-learn compatible wrapper using HeatWave to score anomalies.
    )OptionalUnionN)OutlierMixin)MyBaseMLModel)ML_TASK)	copy_dict)MySQLConnectionAbstractgh㈵>probreturnc                 |    t        j                  | t        dt        z
        }t        j                  |d|z
  z        S )z
    Compute logit (logodds) for a probability, clipping to avoid numerical overflow.

    Args:
        prob: Scalar or array of probability values in (0,1).

    Returns:
        logit-transformed probabilities.
       )npclipEPSlog)r
   results     M/var/www/html/python/venv/lib/python3.12/site-packages/mysql/ai/ml/outlier.py_get_logitsr   0   s2     WWT3C(F66&AJ'((    c            
       D   e Zd ZdZ	 	 	 ddedee   dee   dee   fdZde	e
j                  ej                  f   d	ej                  fd
Zde	e
j                  ej                  f   d	ej                  fdZde	e
j                  ej                  f   d	ej                  fdZy)MyAnomalyDetectora  
    MySQL HeatWave scikit-learn compatible anomaly/outlier detector.

    Flags samples as outliers when the probability of being an anomaly
    exceeds a user-tunable threshold.
    Includes helpers to obtain decision scores and anomaly probabilities
    for ranking.

    Args:
        db_connection (MySQLConnectionAbstract): Active MySQL DB connection.
        model_name (str, optional): Custom model name in the database.
        fit_extra_options (dict, optional): Extra options for fitting.
        score_extra_options (dict, optional): Extra options for scoring/prediction.

    Attributes:
        boundary: Decision threshold boundary in logit space. Derived from
            trained model's catalog info

    Methods:
        predict(X): Predict outlier/inlier labels.
        score_samples(X): Compute anomaly (normal class) logit scores.
        decision_function(X): Compute signed score above/below threshold for ranking.
    Ndb_connection
model_namefit_extra_optionsscore_extra_optionsc                     t        j                  | |t        j                  ||       t	        |      | _        d| _        y)a2  
        Initialize an anomaly detector instance with threshold and extra options.

        Args:
            db_connection: Active MySQL DB connection.
            model_name: Optional model name in DB.
            fit_extra_options: Optional extra fit options.
            score_extra_options: Optional extra scoring options.

        Raises:
            ValueError: If outlier_threshold is not in (0,1).
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.
        )r   r   N)r   __init__r   ANOMALY_DETECTIONr   r   boundary)selfr   r   r   r   s        r   r   zMyAnomalyDetector.__init__W   s>    , 	%%!/	
 $--@#A )-r   Xr   c                 T    t        j                  | j                  |      dk  dd      S )a  
        Predict outlier/inlier binary labels for input samples.

        Args:
            X: Samples to predict on.

        Returns:
            ndarray: Values are -1 for outliers, +1 for inliers, as per scikit-learn convention.

        Raises:
            DatabaseError:
                If provided options are invalid or unsupported,
                or if the model is not initialized, i.e., fit or import has not
                been called
                If a database connection issue occurs.
                If an operational error occurs during execution.
            DatabaseError:
                If provided options are invalid or unsupported,
                or if the model is not initialized, i.e., fit or import has not
                been called
                If a database connection issue occurs.
                If an operational error occurs during execution.
        g        r   )r   wheredecision_function)r    r!   s     r   predictzMyAnomalyDetector.predictw   s'    6 xx..q1C7Q??r   c                    | j                  |      }| j                  U| j                         }|t        d      |d   d   j	                  dd      }|t        d      t        d|z
        | _        || j                  z
  S )a  
        Compute signed distance to the outlier threshold.

        Args:
            X: Samples to predict on.

        Returns:
            ndarray: Score > 0 means inlier, < 0 means outlier; |value| gives margin.

        Raises:
            DatabaseError:
                If provided options are invalid or unsupported,
                or if the model is not initialized, i.e., fit or import has not
                been called
                If a database connection issue occurs.
                If an operational error occurs during execution.
            ValueError:
                If the provided model info does not provide threshold
        Nz Model does not exist in catalog.model_metadatatraining_paramsanomaly_detection_thresholdzzTrained model is outdated and does not support threshold. Try retraining or using an existing, trained model with MyModel.g      ?)score_samplesr   get_model_info
ValueErrorgetr   )r    r!   sample_scores
model_info	thresholds        r   r%   z#MyAnomalyDetector.decision_function   s    . **1-== ,,.J! !CDD"#345FGKK-tI   W  (i8DMt}},,r   c                     | j                   j                  || j                        }t        |d   j	                  d       j                               S )aJ  
        Compute normal probability logit score for each sample.
        Used for ranking, thresholding.

        Args:
            X: Samples to score.

        Returns:
            ndarray: Logit scores based on "normal" class probability.

        Raises:
            DatabaseError:
                If provided options are invalid or unsupported,
                or if the model is not initialized, i.e., fit or import has not
                been called
                If a database connection issue occurs.
                If an operational error occurs during execution.
        )options
ml_resultsc                     | d   d   S )Nprobabilitiesnormal )xs    r   <lambda>z1MyAnomalyDetector.score_samples.<locals>.<lambda>   s    Q/9 r   )_modelr&   r   r   applyto_numpy)r    r!   r   s      r   r+   zMyAnomalyDetector.score_samples   sJ    , $$Q0H0H$I< U9:XZ
 	
r   )NNN)__name__
__module____qualname____doc__r	   r   strdictr   r   pd	DataFramer   ndarrayr&   r%   r+   r8   r   r   r   r   >   s    6 %),0.2... SM. $D>	.
 &d^.@@rzz)*@ 
@:+-rzz)*+- 
+-Z
rzz)*
 

r   r   )rA   typingr   r   numpyr   pandasrD   sklearn.baser   mysql.ai.ml.baser   mysql.ai.ml.modelr   mysql.ai.utilsr   mysql.connector.abstractsr	   r   floatrF   r   r   r8   r   r   <module>rP      sh   : #   % * % $ =
)eE2::-. )5

9J3K )_
| _
r   