Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

util/ranger: fix incorrect behavior about index who has prefix column(#8851) #8878

Merged
merged 3 commits into from
Dec 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 41 additions & 7 deletions util/ranger/ranger.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,12 @@ func buildCNFIndexRange(sc *stmtctx.StatementContext, cols []*expression.Column,

// Take prefix index into consideration.
if hasPrefix(lengths) {
fixPrefixColRange(ranges, lengths, newTp)
if fixPrefixColRange(ranges, lengths, newTp) {
ranges, err = unionRanges(sc, ranges)
if err != nil {
return nil, errors.Trace(err)
}
}
}

return ranges, nil
Expand Down Expand Up @@ -397,20 +402,46 @@ func hasPrefix(lengths []int) bool {
return false
}

func fixPrefixColRange(ranges []*Range, lengths []int, tp []*types.FieldType) {
// fixPrefixColRange checks whether the range of one column exceeds the length and needs to be cut.
// It specially handles the last column of each range point. If the last one need to be cut, it will
// change the exclude status of that point and return `true` to tell
// that we need do a range merging since that interval may have intersection.
// e.g. if the interval is (-inf -inf, a xxxxx), (a xxxxx, +inf +inf) and the length of the last column is 3,
// then we'll change it to (-inf -inf, a xxx], [a xxx, +inf +inf). You can see that this two interval intersect,
// so we need a merge operation.
// Q: only checking the last column to decide whether the endpoint's exclude status needs to be reset is enough?
// A: Yes, suppose that the interval is (-inf -inf, a xxxxx b) and only the second column needs to be cut.
// The result would be (-inf -inf, a xxx b) if the length of it is 3. Obviously we only need to care about the data
// whose the first two key is `a` and `xxx`. It read all data whose index value begins with `a` and `xxx` and the third
// value less than `b`, covering the values begin with `a` and `xxxxx` and the third value less than `b` perfectly.
// So in this case we don't need to reset its exclude status. The right endpoint case can be proved in the same way.
func fixPrefixColRange(ranges []*Range, lengths []int, tp []*types.FieldType) bool {
hasCut := false
for _, ran := range ranges {
for i := 0; i < len(ran.LowVal); i++ {
lowTail := len(ran.LowVal) - 1
for i := 0; i < lowTail; i++ {
fixRangeDatum(&ran.LowVal[i], lengths[i], tp[i])
}
ran.LowExclude = false
for i := 0; i < len(ran.HighVal); i++ {
lowCut := false
lowCut = fixRangeDatum(&ran.LowVal[lowTail], lengths[lowTail], tp[lowTail])
if lowCut {
ran.LowExclude = false
}
highTail := len(ran.HighVal) - 1
for i := 0; i < highTail; i++ {
fixRangeDatum(&ran.HighVal[i], lengths[i], tp[i])
}
ran.HighExclude = false
highCut := false
highCut = fixRangeDatum(&ran.HighVal[highTail], lengths[highTail], tp[highTail])
if highCut {
ran.HighExclude = false
}
hasCut = lowCut || highCut
}
return hasCut
}

func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) {
func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) bool {
// If this column is prefix and the prefix length is smaller than the range, cut it.
// In case of UTF8, prefix should be cut by characters rather than bytes
if v.Kind() == types.KindString || v.Kind() == types.KindBytes {
Expand All @@ -423,12 +454,15 @@ func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) {
truncateStr := string(rs[:length])
// truncate value and limit its length
v.SetString(truncateStr)
return true
}
} else if length != types.UnspecifiedLength && len(colValue) > length {
// truncate value and limit its length
v.SetBytes(colValue[:length])
return true
}
}
return false
}

// We cannot use the FieldType of column directly. e.g. the column a is int32 and we have a > 1111111111111111111.
Expand Down
28 changes: 28 additions & 0 deletions util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,34 @@ func (s *testRangerSuite) TestIndexRange(c *C) {
filterConds: "[eq(test.t.e, 你好啊)]",
resultStr: "[[\"[228 189]\",\"[228 189]\"]]",
},
{
indexPos: 2,
exprStr: `d in ("你好啊")`,
accessConds: "[in(test.t.d, 你好啊)]",
filterConds: "[in(test.t.d, 你好啊)]",
resultStr: "[[\"你好\",\"你好\"]]",
},
{
indexPos: 2,
exprStr: `d not in ("你好啊")`,
accessConds: "[not(in(test.t.d, 你好啊))]",
filterConds: "[not(in(test.t.d, 你好啊))]",
resultStr: "[(NULL,+inf]]",
},
{
indexPos: 2,
exprStr: `d < "你好" || d > "你好"`,
accessConds: "[or(lt(test.t.d, 你好), gt(test.t.d, 你好))]",
filterConds: "[or(lt(test.t.d, 你好), gt(test.t.d, 你好))]",
resultStr: "[[-inf,\"你好\") (\"你好\",+inf]]",
},
{
indexPos: 2,
exprStr: `not(d < "你好" || d > "你好")`,
accessConds: "[and(ge(test.t.d, 你好), le(test.t.d, 你好))]",
filterConds: "[and(ge(test.t.d, 你好), le(test.t.d, 你好))]",
resultStr: "[[\"你好\",\"你好\"]]",
},
}

for _, tt := range tests {
Expand Down