[docs]@dataclassclassPyODPolicyConfig:""" Configuration dataclass for PyODPolicy. Parameters ---------- name : str, optional Name of the policy class. Default is "pyod". method : str, optional PyOD method to use. Default is "deepsvdd.DeepSVDD". feature_type : str, optional Type of feature representation to use. Default is "hidden". pyod_config : dict, optional Additional configuration for the PyOD model. Default is None. load_path : str, optional Path to a checkpoint to load. Default is None. Examples -------- >>> config = PyODPolicyConfig(method="deepsvdd.DeepSVDD", feature_type="hidden") """name:str="pyod"method:str="deepsvdd.DeepSVDD"feature_type:str="hidden"pyod_config:Optional[Dict[str,Any]]=Noneload_path:Optional[str]=None
[docs]classPyODPolicy(Policy):""" Policy that uses a PyOD outlier detector for action selection based on OOD scores. Examples -------- >>> policy = PyODPolicy(PyODPolicyConfig(), env) >>> obs = ... >>> action = policy.act(obs) """config_cls=PyODPolicyConfigdef__init__(self,config:PyODPolicyConfig,env:"gym.Env")->None:""" Initialize the PyODPolicy. Parameters ---------- config : PyODPolicyConfig Configuration object for the policy. env : gym.Env The environment instance, used to determine expert index. Returns ------- None Examples -------- >>> policy = PyODPolicy(PyODPolicyConfig(), env) """self.config=configself.threshold=Noneself.device=get_global_variable("device")config.pyod_config["device"]=self.deviceconfig.pyod_config["random_state"]=get_global_variable("seed")self.clf=self._get_pyod_class(config)(**config.pyod_config)ifhasattr(self.clf,"model_")andisinstance(self.clf.model_,nn.Module):self.clf.model_.to(self.device)self.feature_type=config.feature_typeself.EXPERT=env.EXPERT
[docs]def_get_pyod_class(self,config:PyODPolicyConfig)->type:""" Dynamically import and return the PyOD class specified in the config. Parameters ---------- config : PyODPolicyConfig Configuration object for the policy. Returns ------- type The PyOD class to instantiate. Raises ------ ImportError If the specified class cannot be imported. Examples -------- >>> cls = policy._get_pyod_class(config) """try:module_name,cls_name=config.method.split(".")module_name=f"lib.pyod.pyod.models.{module_name}"module=importlib.import_module(module_name)cls=getattr(module,cls_name)returnclsexceptExceptionase:raiseImportError(f"Could not import {config.method} from PyOD: {e}")
[docs]defreset(self,done:"numpy.ndarray")->None:""" Reset the policy state at episode boundaries. Parameters ---------- done : numpy.ndarray Boolean array indicating which episodes in a batch require a reset. Returns ------- None Examples -------- >>> policy.reset(done) """pass
[docs]def_make_input(self,obs:Dict[str,Any])->np.ndarray:""" Construct the input feature array for the PyOD model from the observation. Parameters ---------- obs : dict Observation dictionary containing required features. Returns ------- np.ndarray Concatenated feature array for the PyOD model. Raises ------ AssertionError If no features are selected for PyOD input. Examples -------- >>> inp = policy._make_input(obs) """inp=[]if"obs"inself.feature_type:base_obs=obs["base_obs"]ifbase_obs.ndim>2:# If env_obs is a tensor with more than 2 dimensions, flatten itbase_obs=base_obs.reshape(base_obs.shape[0],-1)inp.append(base_obs)if"hidden"inself.feature_type:inp.append(obs["novice_hidden"])if"dist"inself.feature_type:inp.append(obs["novice_logit"].softmax(dim=-1))assertlen(inp)>0,"No features selected for PyOD input"inp=np.concatenate(inp,axis=1)returninp
[docs]deffit(self,data:Dict[str,Any])->None:""" Fit the PyOD model using the provided data. Parameters ---------- data : dict Data dictionary containing features for fitting the model. Returns ------- None Examples -------- >>> policy.fit(data) """X=self._make_input(data)self.clf.fit(X)
[docs]defget_train_scores(self)->np.ndarray:""" Get the OOD decision scores from the PyOD model after fitting. Returns ------- np.ndarray Array of decision scores for the training data. Examples -------- >>> scores = policy.get_train_scores() """returnself.clf.decision_scores_
[docs]defact(self,obs:Dict[str,Any],temperature:Optional[float]=None)->torch.Tensor:""" Select actions based on OOD scores from the PyOD model. Parameters ---------- obs : dict Observation dictionary containing required features. temperature : float, optional Unused. Included for API compatibility. Returns ------- torch.Tensor Tensor of selected actions (expert or not) for the batch. Examples -------- >>> action = policy.act(obs) """inp=self._make_input(obs)score=self.clf.decision_function(inp)score=torch.from_numpy(score).float().to(get_global_variable("device"))action=torch.where(score<self.threshold,self.EXPERT,1-self.EXPERT,)returnaction
[docs]defset_params(self,params:Dict[str,Any])->None:""" Set the parameters of the policy. Parameters ---------- params : dict Dictionary of policy parameters to set. Returns ------- None Examples -------- >>> policy.set_params({'threshold': 0.5, 'clf': clf}) """if"threshold"inparams:self.threshold=params["threshold"]if"clf"inparams:self.clf=params["clf"]
[docs]defget_params(self)->Dict[str,Any]:""" Get the current parameters of the policy. Returns ------- dict Dictionary of policy parameters. Examples -------- >>> params = policy.get_params() """return{"threshold":self.threshold,"clf":self.clf}
[docs]deftrain(self)->None:""" Set the PyOD model to training mode if applicable. Returns ------- None Examples -------- >>> policy.train() """ifhasattr(self.clf,"model_")andisinstance(self.clf.model_,nn.Module):self.clf.model_.train()
[docs]defeval(self)->None:""" Set the PyOD model to evaluation mode if applicable. Returns ------- None Examples -------- >>> policy.eval() """ifhasattr(self.clf,"model_")andisinstance(self.clf.model_,nn.Module):self.clf.model_.eval()