Skip to content

Commit

Permalink
Merge pull request litedb-org#2415 from anatawa12/validate-bson-string
Browse files Browse the repository at this point in the history
chore: throw exception when encounter unpaired surrogate instead of replace with U FFFD
  • Loading branch information
mbdavid authored Feb 13, 2024
2 parents 4e856d8 289e9b1 commit 31e0ff7
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 14 deletions.
4 changes: 2 additions & 2 deletions LiteDB/Document/BsonValue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 648,7 @@ internal virtual int GetBytesCount(bool recalc)
case BsonType.Double: return 8;
case BsonType.Decimal: return 16;

case BsonType.String: return Encoding.UTF8.GetByteCount(this.AsString);
case BsonType.String: return StringEncoding.UTF8.GetByteCount(this.AsString);

case BsonType.Binary: return this.AsBinary.Length;
case BsonType.ObjectId: return 12;
Expand All @@ -674,7 674,7 @@ protected int GetBytesCountElement(string key, BsonValue value)

return
1 // element type
Encoding.UTF8.GetByteCount(key) // CString
StringEncoding.UTF8.GetByteCount(key) // CString
1 // CString \0
value.GetBytesCount(true)
(variant ? 5 : 0); // bytes.Length 0x??
Expand Down
8 changes: 4 additions & 4 deletions LiteDB/Engine/Disk/Serializer/BufferReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 154,7 @@ public string ReadString(int count)
// if fits in current segment, use inner array - otherwise copy from multiples segments
if (_currentPosition count <= _current.Count)
{
value = Encoding.UTF8.GetString(_current.Array, _current.Offset _currentPosition, count);
value = StringEncoding.UTF8.GetString(_current.Array, _current.Offset _currentPosition, count);

this.MoveForward(count);
}
Expand All @@ -165,7 165,7 @@ public string ReadString(int count)

this.Read(buffer, 0, count);

value = Encoding.UTF8.GetString(buffer, 0, count);
value = StringEncoding.UTF8.GetString(buffer, 0, count);

BufferPool.Return(buffer);
}
Expand Down Expand Up @@ -204,7 204,7 @@ public string ReadCString()

this.MoveForward(1); // 1 to '\0'

return Encoding.UTF8.GetString(mem.ToArray());
return StringEncoding.UTF8.GetString(mem.ToArray());
}
}
}
Expand All @@ -220,7 220,7 @@ private bool TryReadCStringCurrentSegment(out string value)
{
if (_current[pos] == 0x00)
{
value = Encoding.UTF8.GetString(_current.Array, _current.Offset _currentPosition, count);
value = StringEncoding.UTF8.GetString(_current.Array, _current.Offset _currentPosition, count);
this.MoveForward(count 1); // 1 means '\0'
return true;
}
Expand Down
12 changes: 6 additions & 6 deletions LiteDB/Engine/Disk/Serializer/BufferWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 152,13 @@ public void WriteCString(string value)
{
if (value.IndexOf('\0') > -1) throw LiteException.InvalidNullCharInString();

var bytesCount = Encoding.UTF8.GetByteCount(value);
var bytesCount = StringEncoding.UTF8.GetByteCount(value);
var available = _current.Count - _currentPosition; // avaiable in current segment

// can write direct in current segment (use < because need 1 \0)
if (bytesCount < available)
{
Encoding.UTF8.GetBytes(value, 0, value.Length, _current.Array, _current.Offset _currentPosition);
StringEncoding.UTF8.GetBytes(value, 0, value.Length, _current.Array, _current.Offset _currentPosition);

_current[_currentPosition bytesCount] = 0x00;

Expand All @@ -168,7 168,7 @@ public void WriteCString(string value)
{
var buffer = BufferPool.Rent(bytesCount);

Encoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0);
StringEncoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0);

this.Write(buffer, 0, bytesCount);

Expand All @@ -186,7 186,7 @@ public void WriteCString(string value)
/// </summary>
public void WriteString(string value, bool specs)
{
var count = Encoding.UTF8.GetByteCount(value);
var count = StringEncoding.UTF8.GetByteCount(value);

if (specs)
{
Expand All @@ -195,7 195,7 @@ public void WriteString(string value, bool specs)

if (count <= _current.Count - _currentPosition)
{
Encoding.UTF8.GetBytes(value, 0, value.Length, _current.Array, _current.Offset _currentPosition);
StringEncoding.UTF8.GetBytes(value, 0, value.Length, _current.Array, _current.Offset _currentPosition);

this.MoveForward(count);
}
Expand All @@ -204,7 204,7 @@ public void WriteString(string value, bool specs)
// rent a buffer to be re-usable
var buffer = BufferPool.Rent(count);

Encoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0);
StringEncoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0);

this.Write(buffer, 0, count);

Expand Down
4 changes: 2 additions & 2 deletions LiteDB/Engine/Structures/CollectionIndex.cs
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 121,8 @@ public static int GetLength(string name, string expr)
return
1 // Slot
1 // IndexType
Encoding.UTF8.GetByteCount(name) 1 // Name \0
Encoding.UTF8.GetByteCount(expr) 1 // Expression \0
StringEncoding.UTF8.GetByteCount(name) 1 // Name \0
StringEncoding.UTF8.GetByteCount(expr) 1 // Expression \0
1 // Unique
PageAddress.SIZE // Head
PageAddress.SIZE // Tail
Expand Down
12 changes: 12 additions & 0 deletions LiteDB/Utils/Encoding.cs
Original file line number Diff line number Diff line change
@@ -0,0 1,12 @@
using System.Text;

namespace LiteDB
{
internal class StringEncoding
{
// Original Encoding.UTF8 will replace unpaired surrogate with U FFFD, which is not suitable for database
// so, we need to use new UTF8Encoding(false, true) to make throw exception when unpaired surrogate is found
//public static System.Text.Encoding UTF8 = new UTF8Encoding(false, true);
public static Encoding UTF8 = new UTF8Encoding(false, true);
}
}

0 comments on commit 31e0ff7

Please sign in to comment.