Skip to content

Commit

Permalink
version 1.8.2
Browse files Browse the repository at this point in the history
  • Loading branch information
david-cortes authored and cran-robot committed Nov 22, 2023
1 parent 111fef2 commit 194efc2
Show file tree
Hide file tree
Showing 20 changed files with 559 additions and 602 deletions.
12 changes: 6 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 1,7 @@
Package: outliertree
Type: Package
Title: Explainable Outlier Detection Through Decision Tree Conditioning
Version: 1.8.1-1
Version: 1.8.2
Author: David Cortes
Maintainer: David Cortes <[email protected]>
URL: https://github.com/david-cortes/outliertree
Expand All @@ -12,14 12,14 @@ Description: Outlier detection method that flags suspicious values within observ
Full procedure is described in Cortes (2020) <arXiv:2001.00636>.
Loosely based on the 'GritBot' <https://www.rulequest.com/gritbot-info.html> software.
License: GPL (>= 3)
Imports: Rcpp (>= 1.0.1)
Depends: R (>= 3.5.0)
Imports: Rcpp (>= 1.0.1), methods
Depends: R (>= 4.3.0)
Suggests: knitr, rmarkdown, kableExtra, data.table
LinkingTo: Rcpp, Rcereal
LazyData: true
VignetteBuilder: knitr
RoxygenNote: 7.1.2
RoxygenNote: 7.2.3
NeedsCompilation: yes
Packaged: 2022-07-27 18:40:52 UTC; david
Packaged: 2023-11-21 21:14:45 UTC; david
Repository: CRAN
Date/Publication: 2022-08-06 20:00:02 UTC
Date/Publication: 2023-11-21 21:50:03 UTC
37 changes: 18 additions & 19 deletions MD5
Original file line number Diff line number Diff line change
@@ -1,44 1,43 @@
0bb6d82dfe028de1d1c9a8955257361e *DESCRIPTION
f2f84a6270f5aef385a2a886f1246758 *NAMESPACE
a3549c32ad34b6c7bd1a2024982a62ba *R/RcppExports.R
a8b53087fcf6e29fdf5e8519ba1442fd *R/helpers.R
0191b4ce8f38c3e76d42ee8c94c531cc *DESCRIPTION
f7ddfbf401ff4029ee143ac87b34bf18 *NAMESPACE
146ce017ca8e585317f46365f599d82c *R/RcppExports.R
8f9d710b5dc39d73a8055c336f9f6d95 *R/helpers.R
3479eaf06504f010287a711cd5aeed33 *R/hypothyroid.R
f5fb84a0c34408e213347e6d0e3b2fcf *R/obj_methods.R
e37dfd4db46c0a06daa5c7bd9184c2cd *R/outliertree.R
b000c22a8e5f588636e309fac6d2b800 *R/obj_methods.R
867d3986ac0d780a4f2a31ff53899075 *R/outliertree.R
6e9aeeba689a25e3a5714c376f892c9d *R/titanic.R
710acbc770c9ab441ea689536bd6e539 *build/vignette.rds
2e174a8feb7b3f62911d9b56c4e70fbe *build/vignette.rds
c18cb6d69093cb91329c328ff4a117a0 *configure
46f6ca6bf4d93b32c26870fe7d3009e1 *configure.ac
d41d8cd98f00b204e9800998ecf8427e *configure.win
68bd4e6455985c40493fc54c36e99ec0 *data/hypothyroid.rda
88bb214afac793d369d89a07007f007b *data/titanic.rda
707cb95bdf71178625753b2ec698c97e *inst/doc/Explainable_Outlier_Detection_in_Titanic_dataset.R
cf5ae8500528392a73f9c329806776ea *inst/doc/Explainable_Outlier_Detection_in_Titanic_dataset.Rmd
dc21f304cce8ab5ac20213243137bd9a *inst/doc/Explainable_Outlier_Detection_in_Titanic_dataset.html
6f1202eec1dd71943944b016461bfb84 *inst/doc/Explainable_Outlier_Detection_in_Titanic_dataset.html
c7d31c586d00d153e28c3ffc0e42e9ca *inst/doc/Introducing_OutlierTree.R
520ac4aa8c838850aaf36d4e11426308 *inst/doc/Introducing_OutlierTree.Rmd
e7ffd977382d9b679d30c5f4b7278926 *inst/doc/Introducing_OutlierTree.html
486241bd107ceced52b2db1d3c4d6fab *inst/doc/Introducing_OutlierTree.html
9e980d45ebf1314f6a7ab4d0735c34f9 *man/as.list.outlieroutputs.Rd
c1db44adae56d90f7e038ce359f3a1d2 *man/check.outlierness.bounds.Rd
289001788632a6cb1e1fa4faaa5f6502 *man/extract.training.outliers.Rd
894e7d20bd0bee99b5154b6fcdc3c25c *man/hypothyroid.Rd
569135efd7b6147a88c2ff25ec617c7a *man/outlier.tree.Rd
a3af014eecf636332bb89a3dc30b4431 *man/predict.outliertree.Rd
007649f14261ce69628d323c54c11831 *man/print.outlieroutputs.Rd
4a7c7e409ee44b5f970203634f0b4604 *man/outlier.tree.Rd
50de24911cc616b2ae4e1ddce5e5f798 *man/predict.outliertree.Rd
1f927ee92106a94883ca322aecc8b048 *man/print.outlieroutputs.Rd
b7678d0ade3f8be54a2e3952e9b4a0b3 *man/print.outliertree.Rd
78650c26bdca4bcc0233fec124b45605 *man/sub-.outlieroutputs.Rd
506e9f197f434716d44c4d944858b87d *man/summary.outlieroutputs.Rd
e4f9598822d1683a4810f8b70b3f719d *man/summary.outliertree.Rd
7e6cc5ce3242ff2eda4d9487d18babc1 *man/titanic.Rd
9031913794b4e414be12a8bc60cc571d *man/unpack.outlier.tree.Rd
bfcd1b4e4980ae851a22408af8aaeb79 *src/Makevars.in
6d09028baad6a6136c1c02345ea0cd17 *src/Makevars.win
9feb25e069cc93179718532dad3ba95c *src/RcppExports.cpp
ee55e310e831eaf1d5548c9e679bfa38 *src/Rwrapper.cpp
c751a87f5827956018d5ce3aa35194c0 *src/Makevars.in
72f4c11f78af661d8cbaf40c179a8318 *src/Makevars.win
0b663d0a726e67eac4918a72c1edd747 *src/RcppExports.cpp
a52f73c46b570268b0af8aa1981a4459 *src/Rwrapper.cpp
c7a837ecba6b9af232973a449a1e3a93 *src/cat_outlier.cpp
ca45be6248ced1c119cbe38b5d5da6e3 *src/clusters.cpp
2dce205f669042b7b1b466e7afd88a45 *src/fit_model.cpp
4987b3276c61ebbf65a920d66c84e781 *src/misc.cpp
f3456306c44f650494f9f33e9a3ab6da *src/fit_model.cpp
06347e747355f1f9f95b8e2a0f748224 *src/misc.cpp
f67bff9e697a7cee33db78443f870edf *src/outlier_tree.h
9773402db2b299cbaf88feb87a83deac *src/outliertree-win.def
08abb4798dd6572955e7ec90c1d660fe *src/predict.cpp
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 10,8 @@ S3method(summary,outliertree)
export(check.outlierness.bounds)
export(extract.training.outliers)
export(outlier.tree)
export(unpack.outlier.tree)
importFrom(Rcpp,evalCpp)
importFrom(methods,new)
importFrom(parallel,detectCores)
importFrom(stats,predict)
importFrom(utils,head)
Expand Down
8 changes: 0 additions & 8 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,14 1,6 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

deserialize_OutlierTree <- function(src, ptr_obj) {
.Call(`_outliertree_deserialize_OutlierTree`, src, ptr_obj)
}

check_null_ptr_model <- function(ptr_model) {
.Call(`_outliertree_check_null_ptr_model`, ptr_model)
}

fit_OutlierTree <- function(arr_num, ncols_numeric, arr_cat, ncols_categ, ncat, arr_ord, ncols_ord, ncat_ord, nrows, cols_ignore_r, nthreads, categ_as_bin, ord_as_bin, cat_bruteforce_subset, categ_from_maj, take_mid, max_depth, max_perc_outliers, min_size_numeric, min_size_categ, min_gain, follow_all, gain_as_pct, z_norm, z_outlier, return_outliers, cat_levels, ord_levels, colnames_num, colnames_cat, colnames_ord, min_date, min_ts) {
.Call(`_outliertree_fit_OutlierTree`, arr_num, ncols_numeric, arr_cat, ncols_categ, ncat, arr_ord, ncols_ord, ncat_ord, nrows, cols_ignore_r, nthreads, categ_as_bin, ord_as_bin, cat_bruteforce_subset, categ_from_maj, take_mid, max_depth, max_perc_outliers, min_size_numeric, min_size_categ, min_gain, follow_all, gain_as_pct, z_norm, z_outlier, return_outliers, cat_levels, ord_levels, colnames_num, colnames_cat, colnames_ord, min_date, min_ts)
}
Expand Down
10 changes: 5 additions & 5 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -261,17 261,15 @@ check.nthreads <- function(nthreads) {
nthreads <- 1L
} else if (is.na(nthreads)) {
nthreads <- 1L
} else if (nthreads == "auto") {
nthreads <- parallel::detectCores()
} else if (nthreads < 1L) {
nthreads <- parallel::detectCores()
nthreads <- 1L
}
nthreads <- as.integer(nthreads)
if (nthreads > 1L && !R_has_openmp()) {
msg <- paste0("Attempting to use more than 1 thread, but ",
"package was compiled without OpenMP support.")
if (tolower(Sys.info()[["sysname"]]) == "darwin")
msg <- paste0(msg, " See https://mac.r-project.org/openmp/")
msg <- paste0(msg, " See https://github.com/david-cortes/installing-optimized-libraries#4-macos-install-and-enable-openmp")
warning(msg)
}
return(nthreads)
Expand All @@ -292,7 290,9 @@ check.is.model.obj <- function(model_obj) {
if (!("outliertree" %in% class(model_obj))) {
stop("Must pass an Outlier Tree model object as generated by function 'outlier.tree'.")
}
if (is.null(model_obj$obj_from_cpp)) {
if (is.null(model_obj$obj_from_cpp$ptr_model$ptr) ||
identical(model_obj$obj_from_cpp$ptr_model$ptr, methods::new("externalptr"))
) {
stop("Outlier Tree model object has been corrupted.")
}
}
Expand Down
1 change: 0 additions & 1 deletion R/obj_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 10,6 @@
#' @return The same input `x` that was passed (as `invisible`).
#' @export
print.outliertree <- function(x, ...) {
unpack.outlier.tree(x)
cat("Outlier Tree model\n")
if (NROW(x$cols_num)) cat(sprintf("\tNumeric variables: %d\n", NROW(x$cols_num)))
if (NROW(x$cols_date)) cat(sprintf("\tDate variables: %d\n", NROW(x$cols_date)))
Expand Down
70 changes: 11 additions & 59 deletions R/outliertree.R
Original file line number Diff line number Diff line change
@@ -1,6 1,7 @@
#' @importFrom parallel detectCores
#' @importFrom stats predict
#' @importFrom utils head
#' @importFrom methods new
#' @importFrom Rcpp evalCpp
#' @useDynLib outliertree, .registration=TRUE
NULL
Expand Down Expand Up @@ -113,7 114,7 @@ NULL
#' \item GritBot software: \url{https://www.rulequest.com/gritbot-info.html}
#' \item Cortes, David. "Explainable outlier detection through decision tree conditioning." arXiv preprint arXiv:2001.00636 (2020).
#' }
#' @seealso \link{predict.outliertree} \link{extract.training.outliers} \link{hypothyroid} \link{unpack.outlier.tree}
#' @seealso \link{predict.outliertree} \link{extract.training.outliers} \link{hypothyroid}
#' @examples
#' library(outliertree)
#'
Expand All @@ -139,7 140,7 @@ NULL
#' ### use custom row names
#' df.w.names <- hypothyroid
#' row.names(df.w.names) <- paste0("rownum", 1:nrow(hypothyroid))
#' outliers.w.names <- predict(model, df.w.names, return_outliers=TRUE)
#' outliers.w.names <- predict(model, df.w.names, return_outliers=TRUE, nthreads=1)
#' outliers.w.names[["rownum745"]]
#' @export
outlier.tree <- function(df, max_depth = 4L, min_gain = 1e-2, z_norm = 2.67, z_outlier = 8.0,
Expand Down Expand Up @@ -212,8 213,6 @@ outlier.tree <- function(df, max_depth = 4L, min_gain = 1e-2, z_norm = 2.67, z_o
as.character(model_data$cols_ord),
model_data$date_min,
model_data$ts_min)
if (!NROW(model_data$obj_from_cpp$serialized_obj))
stop("Model object is too big. Try smaller inputs and/or changing hyperparameters.")
names(model_data$obj_from_cpp$bounds) <- get.cols.ordered(model_data)
model_data$obj_from_cpp$bounds <- model_data$obj_from_cpp$bounds[names(df)]

Expand Down Expand Up @@ -252,6 251,7 @@ outlier.tree <- function(df, max_depth = 4L, min_gain = 1e-2, z_norm = 2.67, z_o
#' outliers. The number of decimals will be dynamically increased according to the relative magnitudes of the
#' values being reported. Ignored when passing `outliers_print=0` or `outliers_print=FALSE`.
#' @param return_outliers Whether to return the outliers in an R object (otherwise will just print them).
#' @param nthreads Number of parallel threads to use. Parallelization is done by rows.
#' @param ... Not used.
#' @return If passing `return_outliers` = `TRUE`, will return a list of lists with the outliers and their
#' information (each row is an entry in the first list, with the same names as the rows in the input data
Expand All @@ -264,7 264,7 @@ outlier.tree <- function(df, max_depth = 4L, min_gain = 1e-2, z_norm = 2.67, z_o
#' @details Note that after loading a serialized object from `outlier.tree` through `readRDS` or `load`,
#' it will only de-serialize the underlying C object upon running `predict` or `print`, so the first run will
#' be slower, while subsequent runs will be faster as the C object will already be in-memory.
#' @seealso \link{outlier.tree} \link{print.outlieroutputs} \link{unpack.outlier.tree}
#' @seealso \link{outlier.tree} \link{print.outlieroutputs}
#' @examples
#' library(outliertree)
#' ### random data frame with an obvious outlier
Expand All @@ -291,7 291,8 @@ outlier.tree <- function(df, max_depth = 4L, min_gain = 1e-2, z_norm = 2.67, z_o
#'
#' ### find the test outlier
#' test_outliers = predict(outliers_model, df_test,
#' outliers_print = 1, return_outliers = TRUE)
#' outliers_print = 1, return_outliers = TRUE,
#' nthreads = 1)
#'
#' ### retrieve the outlier info (for row 1) as an R list
#' test_outliers[[1]]
Expand All @@ -300,8 301,7 @@ outlier.tree <- function(df, max_depth = 4L, min_gain = 1e-2, z_norm = 2.67, z_o
#' # dt = t(data.table::as.data.table(test_outliers))
#' @export
predict.outliertree <- function(object, newdata, outliers_print = 15L, min_decimals = 2L,
return_outliers = TRUE, ...) {
unpack.outlier.tree(object)
return_outliers = TRUE, nthreads = object$nthreads, ...) {
outliers_print <- check.outliers.print(outliers_print)
return_outliers <- as.logical(return_outliers)
if (NROW(newdata) == 0) {
Expand All @@ -314,7 314,7 @@ predict.outliertree <- function(object, newdata, outliers_print = 15L, min_decim
}

c_arr_data <- split.types.new(newdata, object)
outliers_info <- predict_OutlierTree(object$obj_from_cpp$ptr_model, NROW(newdata), object$nthreads,
outliers_info <- predict_OutlierTree(object$obj_from_cpp$ptr_model$ptr, NROW(newdata), check.nthreads(nthreads),
c_arr_data$arr_num, c_arr_data$arr_cat, c_arr_data$arr_ord,
object$cat_levels,
object$ord_levels,
Expand Down Expand Up @@ -365,7 365,8 @@ predict.outliertree <- function(object, newdata, outliers_print = 15L, min_decim
#' pred <- predict(otree,
#' hypothyroid,
#' outliers_print=0,
#' return_outliers=TRUE)
#' return_outliers=TRUE,
#' nthreads=1)
#'
#' ### Print stored predictions
#' ### Row 531 is an outlier, but 532 is not
Expand Down Expand Up @@ -442,52 443,3 @@ check.outlierness.bounds <- function(outlier_tree_model) {
check.is.model.obj(outlier_tree_model)
return(outlier_tree_model$obj_from_cpp$bounds)
}

#' @title Unpack Outlier Tree model after de-serializing
#' @description After persisting an outlier tree model object through `saveRDS`, `save`,
#' or restarting a session, the underlying C objects that constitute the outlier tree
#' model and which live only on the C heap memory are not saved along, thus not
#' restored after loading a saved model through `readRDS` or `load`.
#'
#' The model object however keeps serialized versions of the C objects as raw bytes,
#' from which the C objects can be reconstructed, and are done so automatically after
#' calling `predict`, `print`, or `summary` on the freshly-loaded object from
#' `readRDS` or `load`.
#'
#' This function allows de-serializing the object bytes without invoking any extra
#' side effects or computations, akin to XGBoost's `xgb.Booster.complete` or
#' CatBoost's `catboost.restore_handle`.
#' @details If the model is going to be used in a production system, it's possible
#' after de-serialization to delete the raw bytes in order to save memory (e.g.
#' `otree$obj_from_cpp$serialized_obj <- NULL`). The memory will however not be
#' freed automatically, as it's managed by R's garbage collector.
#' @param model An Outlier Tree object as returned by `outlier.tree`, which has
#' been just loaded from a disk file through `readRDS`, `load`, or a session restart.
#' @return No return value. Object is modified in-place.
#' @examples
#' ### Warning: this example will generate a temporary .Rds
#' ### file in your temp folder, and will then delete it
#' library(outliertree)
#' set.seed(1)
#' df <- as.data.frame(matrix(rnorm(1000), nrow = 250))
#' otree <- outlier.tree(df, outliers_print=0, nthreads=1)
#' temp_file <- file.path(tempdir(), "otree.Rds")
#' saveRDS(otree, temp_file)
#' otree2 <- readRDS(temp_file)
#' file.remove(temp_file)
#'
#' cat("Pointer after loading model is this: \n")
#' print(otree2$obj_from_cpp$ptr_model)
#'
#' ### now unpack the raw bytes
#' unpack.outlier.tree(otree2)
#' cat("Pointer after unpacking is this: \n")
#' print(otree2$obj_from_cpp$ptr_model)
#' @export
unpack.outlier.tree <- function(model) {
check.is.model.obj(model)
if (check_null_ptr_model(model$obj_from_cpp$ptr_model)) {
deserialize_OutlierTree(model$obj_from_cpp$serialized_obj, model$obj_from_cpp$ptr_model)
}
return(invisible(NULL))
}
Binary file modified build/vignette.rds
Binary file not shown.
231 changes: 113 additions & 118 deletions inst/doc/Explainable_Outlier_Detection_in_Titanic_dataset.html

Large diffs are not rendered by default.

Loading

0 comments on commit 194efc2

Please sign in to comment.