--- title: "Exploring The Variables Importance" author: "Gabriele Pittarello" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Exploring The Variables Importance} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r instancepackage, include=FALSE} library(ReSurv) ``` # Introduction Machine learning models catch interactions between covariates. Often they are a black-box but they can be interpreted with SHAP values. We generate two data sets, one from scenario Alpha and one from scenario Delta the plotting functionalities of the ReSurv package. ```{r eval=FALSE, include=TRUE} input_data_0 <- data_generator( random_seed = 1, scenario = 0, time_unit = 1 / 360, years = 4, yearly_exposure = 200 ) individual_data_0 <- IndividualDataPP( data = input_data_0, id = NULL, categorical_features = "claim_type", continuous_features = "AP", accident_period = "AP", calendar_period = "RP", input_time_granularity = "days", output_time_granularity = "quarters", years = 4 ) ``` ```{r eval=FALSE, include=TRUE} # Input data scenario Delta input_data3 <- data_generator( random_seed = 1, scenario = 3, time_unit = 1 / 360, years = 4, yearly_exposure = 200 ) individual_data_3 <- IndividualDataPP( data = input_data3, id = NULL, categorical_features = "claim_type", continuous_features = "AP", accident_period = "AP", calendar_period = "RP", input_time_granularity = "days", output_time_granularity = "quarters", years = 4 ) ``` Here we fit Neural Networks and XGB. In order to simplify this vignette, we provide in advance the optimal hyperparameters. ```{r eval=FALSE, include=TRUE} hp_scenario_alpha_xgb <- list( params = list( booster = "gbtree", eta = 0.9887265, subsample = 0.7924135 , alpha = 10.85342, lambda = 6.213317, min_child_weight = 3.042204, max_depth = 1 ), print_every_n = 0, nrounds = 3000, verbose = FALSE, early_stopping_rounds = 500 ) hp_scenario_alpha_nn <- list( batch_size = as.integer(5000), epochs = as.integer(5500), num_workers = 0, tie = 'Efron', num_layers = 2, num_nodes = 10, optim = "SGD", batch_size = as.integer(5000), lr = 0.3023043, xi = 0.426443, eps = 0, activation = "SELU", early_stopping = TRUE, patience = 350, verbose = FALSE, network_structure = NULL ) hp_scenario_delta_xgb <- list(params=list(booster="gbtree", eta=0.2717736, subsample=0.9043068 , alpha=7.789214, lambda=12.09398 , min_child_weight=22.4837 , max_depth = 4), print_every_n = 0, nrounds=3000, verbose= FALSE, early_stopping_rounds = 500) hp_scenario_delta_nn <- list( batch_size = as.integer(5000), epochs = as.integer(5500), num_workers = 0, tie = 'Efron', num_layers = 2, num_nodes = 2, optim = "Adam", batch_size = as.integer(5000), lr = 0.3542422, xi = 0.1803953, eps = 0, activation = "LeakyReLU", early_stopping = TRUE, patience = 350, verbose = FALSE, network_structure = NULL ) ``` ```{r eval=FALSE, include=TRUE} resurv_model_xgb_A <- ReSurv(individual_data_0, hazard_model = "XGB", hparameters = hp_scenario_alpha_xgb) resurv_model_nn_A <- ReSurv(individual_data_0, hazard_model = "NN", hparameters = hp_scenario_alpha_nn) resurv_model_xgb_D <- ReSurv(individual_data_3, hazard_model = "XGB", hparameters = hp_scenario_delta_xgb) resurv_model_nn_D <- ReSurv(individual_data_3, hazard_model = "NN", hparameters = hp_scenario_delta_nn) ``` ## Shap values (XGB) ```{r eval=FALSE, include=TRUE} plot(resurv_model_xgb_A) ``` ```{r eval=FALSE, include=TRUE} plot(resurv_model_xgb_D) ``` ## Shap values (NN) ```{r eval=FALSE, include=TRUE} plot(resurv_model_nn_A, nsamples = 10000) ``` ```{r eval=FALSE, include=TRUE} plot(resurv_model_nn_D, nsamples=10000) ```