Skip to content

Commit

Permalink
Forder (Rdatatable#3124)
Browse files Browse the repository at this point in the history
  • Loading branch information
mattdowle authored Oct 24, 2018
1 parent 88439d9 commit e59ba14
Show file tree
Hide file tree
Showing 10 changed files with 886 additions and 1,385 deletions.
1 change: 1 addition & 0 deletions CRAN_Release.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 188,7 @@ cd R-devel-strict # important to change directory name before building not af
make
alias Rdevel-strict='~/build/R-devel-strict/bin/R --vanilla'
cd ~/GitHub/data.table
## edit ~/.R/Makevars and activate "CFLAGS=-O0 -g" there to trace
Rdevel-strict CMD INSTALL data.table_1.11.8.tar.gz
# Check UBSAN and ASAN flags appear in compiler output above. Rdevel was compiled with them so should be passed through to here
Rdevel-strict
Expand Down
4 changes: 2 additions & 2 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -1859,10 1859,10 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
if (verbose) {last.started.at=proc.time();cat("setkey() afterwards for keyby=.EACHI ... ");flush.console()}
setkeyv(ans,names(ans)[seq_along(byval)])
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
} else if (!missing(keyby) || (haskey(x) && bysameorder)) {
} else if (!missing(keyby) || (haskey(x) && bysameorder && (byjoin || (length(allbyvars) && identical(allbyvars,head(key(x),length(allbyvars))))))) {
setattr(ans,"sorted",names(ans)[seq_along(grpcols)])
}
alloc.col(ans) # TO DO: overallocate in dogroups in the first place and remove this line
alloc.col(ans) # TODO: overallocate in dogroups in the first place and remove this line
}

.optmean <- function(expr) { # called by optimization of j inside [.data.table only. Outside for a small speed advantage.
Expand Down
10 changes: 5 additions & 5 deletions R/test.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 212,7 @@ test <- function(num,x,y=TRUE,error=NULL,warning=NULL,output=NULL,message=NULL)
}
if (!fail && !length(error) && (!length(output) || !missing(y))) { # TODO test y when output=, too
y = try(y,TRUE)
if (identical(x,y)) return(invisible())
if (identical(x,y)) return(invisible(TRUE))
all.equal.result = TRUE
if (is.data.table(x) && is.data.table(y)) {
if (!selfrefok(x) || !selfrefok(y)) {
Expand All @@ -230,12 230,12 @@ test <- function(num,x,y=TRUE,error=NULL,warning=NULL,output=NULL,message=NULL)
setattr(yc,"row.names",NULL)
setattr(xc,"index",NULL) # too onerous to create test RHS with the correct index as well, just check result
setattr(yc,"index",NULL)
if (identical(xc,yc) && identical(key(x),key(y))) return(invisible()) # check key on original x and y because := above might have cleared it on xc or yc
if (identical(xc,yc) && identical(key(x),key(y))) return(invisible(TRUE)) # check key on original x and y because := above might have cleared it on xc or yc
if (isTRUE(all.equal.result<-all.equal(xc,yc)) && identical(key(x),key(y)) &&
identical(vapply_1c(xc,typeof), vapply_1c(yc,typeof))) return(invisible())
identical(vapply_1c(xc,typeof), vapply_1c(yc,typeof))) return(invisible(TRUE))
}
}
if (is.atomic(x) && is.atomic(y) && isTRUE(all.equal.result<-all.equal(x,y,check.names=!isTRUE(y))) && typeof(x)==typeof(y)) return(invisible())
if (is.atomic(x) && is.atomic(y) && isTRUE(all.equal.result<-all.equal(x,y,check.names=!isTRUE(y))) && typeof(x)==typeof(y)) return(invisible(TRUE))
# For test 617 on r-prerel-solaris-sparc on 7 Mar 2013
# nocov start
if (!fail) {
Expand All @@ -255,6 255,6 @@ test <- function(num,x,y=TRUE,error=NULL,warning=NULL,output=NULL,message=NULL)
assign("whichfail", c(whichfail, num), parent.frame(), inherits=TRUE)
# nocov end
}
invisible()
invisible(!fail)
}

105 changes: 56 additions & 49 deletions inst/tests/tests.Rraw

Large diffs are not rendered by default.

27 changes: 17 additions & 10 deletions src/bmerge.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,14 209,20 @@ static union {
static int mid, tmplow, tmpupp; // global to save them being added to recursive stack. Maybe optimizer would do this anyway.
static SEXP ic, xc;

static uint64_t i64twiddle(void *p, int i)
{
return ((uint64_t *)p)[i] ^ 0x8000000000000000;
// Always ascending and NA first (0) when used by bmerge
}

void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisgrp, int lowmax, int uppmax)
// col is >0 and <=ncol-1 if this range of [xlow,xupp] and [ilow,iupp] match up to but not including that column
// lowmax=1 if xlowIn is the lower bound of this group (needed for roll)
// uppmax=1 if xuppIn is the upper bound of this group (needed for roll)
// new: col starts with -1 for non-equi joins, which gathers rows from nested id group counter 'thisgrp'
{
int xlow=xlowIn, xupp=xuppIn, ilow=ilowIn, iupp=iuppIn, j, k, ir, lir, tmp;
Rboolean isInt64=FALSE;
bool isInt64=false;
ir = lir = ilow (iupp-ilow)/2; // lir = logical i row.
if (o) ir = o[lir]-1; // ir = the actual i row if i were ordered
if (col>-1) {
Expand Down Expand Up @@ -329,13 335,14 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg
if (xval.s == ival.s) tmpupp=mid; else ilow=mid; // see above re ==
}
break;
case REALSXP :
case REALSXP : {
isInt64 = INHERITS(xc, char_integer64);
twiddle = isInt64 ? &i64twiddle : &dtwiddle;
ival.ull = twiddle(DATAPTR(ic), ir, 1);
uint64_t (*twiddle)(void *, int) = isInt64 ? &i64twiddle : &dtwiddle;
// TODO: remove this last remaining use of i64twiddle. remove DATAPTR too.
ival.ull = twiddle(DATAPTR(ic), ir);
while(xlow < xupp-1) {
mid = xlow (xupp-xlow)/2;
xval.ull = twiddle(DATAPTR(xc), XIND(mid), 1);
xval.ull = twiddle(DATAPTR(xc), XIND(mid));
if (xval.ull<ival.ull) {
xlow=mid;
} else if (xval.ull>ival.ull) {
Expand All @@ -345,12 352,12 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg
tmpupp = mid;
while(tmplow<xupp-1) {
mid = tmplow (xupp-tmplow)/2;
xval.ull = twiddle(DATAPTR(xc), XIND(mid), 1);
xval.ull = twiddle(DATAPTR(xc), XIND(mid));
if (xval.ull == ival.ull) tmplow=mid; else xupp=mid;
}
while(xlow<tmpupp-1) {
mid = xlow (tmpupp-xlow)/2;
xval.ull = twiddle(DATAPTR(xc), XIND(mid), 1);
xval.ull = twiddle(DATAPTR(xc), XIND(mid));
if (xval.ull == ival.ull) tmpupp=mid; else xlow=mid;
}
break;
Expand Down Expand Up @@ -380,17 387,17 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg
if (col>-1) {
while(tmplow<iupp-1) {
mid = tmplow (iupp-tmplow)/2;
xval.ull = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid, 1 );
xval.ull = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid);
if (xval.ull == ival.ull) tmplow=mid; else iupp=mid;
}
while(ilow<tmpupp-1) {
mid = ilow (tmpupp-ilow)/2;
xval.ull = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid, 1 );
xval.ull = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid);
if (xval.ull == ival.ull) tmpupp=mid; else ilow=mid;
}
}
// ilow and iupp now surround the group in ic, too
break;
} break;
default:
error("Type '%s' not supported as key column", type2char(TYPEOF(xc)));
}
Expand Down
6 changes: 2 additions & 4 deletions src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 71,7 @@ SEXP sym_index;
SEXP sym_BY;
SEXP sym_starts, char_starts;
SEXP sym_maxgrpn;
Rboolean INHERITS(SEXP x, SEXP char_);
bool INHERITS(SEXP x, SEXP char_);
long long DtoLL(double x);
double LLtoD(long long x);
double NA_INT64_D;
Expand All @@ -90,9 90,7 @@ Rboolean isDatatable(SEXP x);

// forder.c
int StrCmp(SEXP x, SEXP y);
unsigned long long dtwiddle(void *p, int i, int order);
unsigned long long i64twiddle(void *p, int i, int order);
unsigned long long (*twiddle)(void *, int, int);
uint64_t dtwiddle(void *p, int i);
SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP naArg);
bool need2utf8(SEXP x, int n);
SEXP isReallyReal(SEXP);
Expand Down
Loading

0 comments on commit e59ba14

Please sign in to comment.