Skip to content

Commit

Permalink
version 1.7.1
Browse files Browse the repository at this point in the history
  • Loading branch information
david-cortes authored and cran-robot committed Apr 16, 2021
1 parent fa8daef commit 46e302c
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 46 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: outliertree
Type: Package
Title: Explainable Outlier Detection Through Decision Tree Conditioning
Version: 1.6.0
Date: 2021-03-14
Version: 1.7.1
Date: 2021-04-15
Author: David Cortes
Maintainer: David Cortes <[email protected]>
URL: https://github.com/david-cortes/outliertree
Expand All @@ -22,6 +22,6 @@ LinkingTo: Rcpp, Rcereal
LazyData: true
RoxygenNote: 7.1.1
NeedsCompilation: yes
Packaged: 2021-03-14 17:56:12 UTC; david
Packaged: 2021-04-15 19:45:05 UTC; david
Repository: CRAN
Date/Publication: 2021-03-15 00:40:02 UTC
Date/Publication: 2021-04-16 17:40:03 UTC
6 changes: 3 additions & 3 deletions MD5
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
e7f918051f1ea35b8be12e16d9597b23 *DESCRIPTION
09c3b26be5c45faed8edfbd8864518d0 *DESCRIPTION
f2f84a6270f5aef385a2a886f1246758 *NAMESPACE
cd011562a496abe949f9c67b0e70c7e8 *R/RcppExports.R
e89f8e9179527df974b7fdb0b0cbddfb *R/helpers.R
Expand All @@ -20,10 +20,10 @@ e4f9598822d1683a4810f8b70b3f719d *man/summary.outliertree.Rd
78d2bc9006273f4c601fc7bbfde77904 *man/unpack.outlier.tree.Rd
de413d15ec2e4e74faf23a7afb8e603f *src/Makevars
79eea95bea118e04f0c9368fde6bef85 *src/RcppExports.cpp
f07862e4cc5545366ad95cf829879237 *src/Rwrapper.cpp
20a3fa9b22500e886af3b615e73795bd *src/Rwrapper.cpp
353cd9789f3b65cb350a64a2d450784c *src/cat_outlier.cpp
60581e03bd69467d5ec13e66076f0b2d *src/clusters.cpp
aeca765e3f86a8ab5922822504bc29aa *src/fit_model.cpp
7cdf5ce484dc130951ab4948c761cd8d *src/fit_model.cpp
d2d863f2743427121c5924bb9f70d217 *src/misc.cpp
2f164850f14fd869f199159613b48255 *src/outlier_tree.h
77d9d57e9cacb096c19fec48c63df299 *src/predict.cpp
Expand Down
107 changes: 71 additions & 36 deletions src/Rwrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,22 +36,20 @@ Rcpp::RawVector serialize_OutlierTree(ModelOutputs *model_outputs)
Rcpp::Rcerr << "Error: model is too big to serialize, resulting object will not be usable.\n" << std::endl;
return Rcpp::RawVector();
}
// Rcpp::RawVector retval((size_t)vec_size);
Rcpp::RawVector retval;
size_t vec_size_ = (size_t)vec_size;
retval = Rcpp::unwindProtect(alloc_RawVec, (void*)&vec_size_);
Rcpp::RawVector retval = Rcpp::unwindProtect(alloc_RawVec, (void*)&vec_size_);
if (!retval.size())
return retval;
ss.seekg(0, ss.beg);
ss.read(reinterpret_cast<char*>(&retval[0]), retval.size());
ss.read(reinterpret_cast<char*>(RAW(retval)), retval.size());
return retval;
}

// [[Rcpp::export(rng = false)]]
SEXP deserialize_OutlierTree(Rcpp::RawVector src)
{
std::stringstream ss;
ss.write(reinterpret_cast<char*>(&src[0]), src.size());
ss.write(reinterpret_cast<char*>(RAW(src)), src.size());
ss.seekg(0, ss.beg);
std::unique_ptr<ModelOutputs> model_outputs = std::unique_ptr<ModelOutputs>(new ModelOutputs());
{
Expand Down Expand Up @@ -80,9 +78,9 @@ double* set_R_nan_as_C_nan(double *restrict x_R, std::vector<double> &x_C, size_

/* for predicting outliers */
Rcpp::List describe_outliers(ModelOutputs &model_outputs,
double *arr_num,
int *arr_cat,
int *arr_ord,
double *restrict arr_num,
int *restrict arr_cat,
int *restrict arr_ord,
Rcpp::ListOf<Rcpp::StringVector> cat_levels,
Rcpp::ListOf<Rcpp::StringVector> ord_levels,
Rcpp::StringVector colnames_num,
Expand Down Expand Up @@ -1056,6 +1054,37 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
return outp;
}

struct args_describe_outliers {
ModelOutputs *model_outputs;
double *arr_num;
int *arr_cat;
int *arr_ord;
Rcpp::ListOf<Rcpp::StringVector> *cat_levels;
Rcpp::ListOf<Rcpp::StringVector> *ord_levels;
Rcpp::StringVector *colnames_num;
Rcpp::StringVector *colnames_cat;
Rcpp::StringVector *colnames_ord;
Rcpp::NumericVector *min_date;
Rcpp::NumericVector *min_ts;
};

SEXP describe_outliers_wrapper(void *args_)
{
args_describe_outliers *args = (args_describe_outliers*)args_;
return describe_outliers(*(args->model_outputs),
args->arr_num,
args->arr_cat,
args->arr_ord,
*(args->cat_levels),
*(args->ord_levels),
*(args->colnames_num),
*(args->colnames_cat),
*(args->colnames_ord),
*(args->min_date),
*(args->min_ts));
}


/* for extracting info about flaggable outliers */
Rcpp::List extract_outl_bounds(ModelOutputs &model_outputs,
Rcpp::ListOf<Rcpp::StringVector> cat_levels,
Expand Down Expand Up @@ -1150,13 +1179,13 @@ Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
cols_ignore_ptr = &cols_ignore[0];
}
std::vector<double> Xcpp;
double *arr_num_C = set_R_nan_as_C_nan(&arr_num[0], Xcpp, arr_num.size(), nthreads);
double *arr_num_C = set_R_nan_as_C_nan(REAL(arr_num), Xcpp, arr_num.size(), nthreads);

std::unique_ptr<ModelOutputs> model_outputs = std::unique_ptr<ModelOutputs>(new ModelOutputs());
found_outliers = fit_outliers_models(*model_outputs,
arr_num_C, ncols_numeric,
&arr_cat[0], ncols_categ, &ncat[0],
&arr_ord[0], ncols_ord, &ncat_ord[0],
INTEGER(arr_cat), ncols_categ, INTEGER(ncat),
INTEGER(arr_ord), ncols_ord, INTEGER(ncat_ord),
nrows, cols_ignore_ptr, nthreads,
categ_as_bin, ord_as_bin, cat_bruteforce_subset, categ_from_maj, take_mid,
max_depth, max_perc_outliers, min_size_numeric, min_size_categ,
Expand All @@ -1172,17 +1201,20 @@ Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
if (!Rf_xlength(outp["serialized_obj"]))
return outp;
if (return_outliers) {
outp["outliers_info"] = describe_outliers(*model_outputs,
arr_num_C,
&arr_cat[0],
&arr_ord[0],
cat_levels,
ord_levels,
colnames_num,
colnames_cat,
colnames_ord,
min_date,
min_ts);
args_describe_outliers temp = {
model_outputs.get(),
arr_num_C,
INTEGER(arr_cat),
INTEGER(arr_ord),
&cat_levels,
&ord_levels,
&colnames_num,
&colnames_cat,
&colnames_ord,
&min_date,
&min_ts
};
outp["outliers_info"] = Rcpp::unwindProtect(describe_outliers_wrapper, (void*)&temp);
}
/* add number of trees and clusters */
size_t ntrees = 0, nclust = 0;
Expand Down Expand Up @@ -1211,22 +1243,25 @@ Rcpp::List predict_OutlierTree(SEXP ptr_model, size_t nrows, int nthreads,
Rcpp::NumericVector min_ts)
{
std::vector<double> Xcpp;
double *arr_num_C = set_R_nan_as_C_nan(&arr_num[0], Xcpp, arr_num.size(), nthreads);
double *arr_num_C = set_R_nan_as_C_nan(REAL(arr_num), Xcpp, arr_num.size(), nthreads);

ModelOutputs *model_outputs = static_cast<ModelOutputs*>(R_ExternalPtrAddr(ptr_model));
bool found_outliers = find_new_outliers(&arr_num[0], &arr_cat[0], &arr_ord[0],
bool found_outliers = find_new_outliers(REAL(arr_num), INTEGER(arr_cat), INTEGER(arr_ord),
nrows, nthreads, *model_outputs);
Rcpp::List outp = describe_outliers(*model_outputs,
arr_num_C,
&arr_cat[0],
&arr_ord[0],
cat_levels,
ord_levels,
colnames_num,
colnames_cat,
colnames_ord,
min_date,
min_ts);
args_describe_outliers temp = {
model_outputs,
arr_num_C,
INTEGER(arr_cat),
INTEGER(arr_ord),
&cat_levels,
&ord_levels,
&colnames_num,
&colnames_cat,
&colnames_ord,
&min_date,
&min_ts
};
Rcpp::List outp = Rcpp::unwindProtect(describe_outliers_wrapper, (void*)&temp);
outp["found_outliers"] = Rcpp::LogicalVector(found_outliers);
forget_row_outputs(*model_outputs);
return outp;
Expand All @@ -1236,7 +1271,7 @@ Rcpp::List predict_OutlierTree(SEXP ptr_model, size_t nrows, int nthreads,
Rcpp::LogicalVector check_few_values(Rcpp::NumericVector arr_num, size_t nrows, size_t ncols, int nthreads)
{
std::vector<char> too_few_vals(ncols, 0);
check_more_two_values(&arr_num[0], nrows, ncols, nthreads, too_few_vals.data());
check_more_two_values(REAL(arr_num), nrows, ncols, nthreads, too_few_vals.data());
Rcpp::LogicalVector outp(ncols);
for (size_t col = 0; col < ncols; col++) {
outp[col] = (bool) too_few_vals[col];
Expand Down
6 changes: 3 additions & 3 deletions src/fit_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,9 @@ bool fit_outliers_models(ModelOutputs &model_outputs,
model_outputs.start_ix_cat_counts[0] = 0;
if (tot_cols > ncols_numeric) {
input_data.max_categ = calculate_category_indices(&model_outputs.start_ix_cat_counts[0], input_data.ncat, input_data.ncols_categ,
(bool*) &input_data.skip_col[ncols_numeric]);
input_data.max_categ = calculate_category_indices(&model_outputs.start_ix_cat_counts[input_data.ncols_categ], input_data.ncat_ord, input_data.ncols_ord,
(bool*) &input_data.skip_col[input_data.ncols_numeric + input_data.ncols_categ], input_data.max_categ);
(bool*) input_data.skip_col.data() + ncols_numeric);
input_data.max_categ = calculate_category_indices(model_outputs.start_ix_cat_counts.data() + input_data.ncols_categ, input_data.ncat_ord, input_data.ncols_ord,
(bool*) input_data.skip_col.data() + input_data.ncols_numeric + input_data.ncols_categ, input_data.max_categ);
} else {
input_data.max_categ = 0;
}
Expand Down

0 comments on commit 46e302c

Please sign in to comment.