forked from samkenxstream/SAMkenxsimdjson
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
561 additions
and
184 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 1,80 @@ | ||
#pragma once | ||
|
||
#ifdef SIMDJSON_COMPETITION_SAJSON | ||
|
||
#include "distinct_user_id.h" | ||
|
||
namespace distinct_user_id { | ||
|
||
struct sajson { | ||
size_t ast_buffer_size{0}; | ||
size_t *ast_buffer{nullptr}; | ||
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) { | ||
auto val = obj.get_value_of_key({key.data(), key.length()}); | ||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field is not a string"; } | ||
return { val.as_cstring(), val.get_string_length() }; | ||
} | ||
simdjson_really_inline uint64_t get_str_uint64(const ::sajson::value &obj, std::string_view key) { | ||
// Since sajson only supports 53-bit numbers, and IDs in twitter.json can be > 53 bits, we read the corresponding id_str and parse that. | ||
auto val = obj.get_value_of_key({key.data(), key.length()}); | ||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; } | ||
auto str = val.as_cstring(); | ||
char *endptr; | ||
uint64_t result = strtoull(str, &endptr, 10); | ||
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; } | ||
return result; | ||
} | ||
|
||
bool run(simdjson::padded_string &json, std::vector<uint64_t> &result) { | ||
using namespace sajson; | ||
if (!ast_buffer) { | ||
ast_buffer_size = json.size(); | ||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t)); | ||
} | ||
auto doc = parse( | ||
bounded_allocation(ast_buffer, ast_buffer_size), | ||
mutable_string_view(json.size(), json.data()) | ||
); | ||
if (!doc.is_valid()) { return false; } | ||
|
||
auto root = doc.get_root(); | ||
if (root.get_type() != TYPE_OBJECT) { return false; } | ||
auto statuses = root.get_value_of_key({"statuses", strlen("statuses")}); | ||
if (statuses.get_type() != TYPE_ARRAY) { return false; } | ||
|
||
for (size_t i=0; i<statuses.get_length(); i ) { | ||
auto tweet = statuses.get_array_element(i); | ||
|
||
// get tweet.user.id | ||
if (tweet.get_type() != TYPE_OBJECT) { return false; } | ||
auto user = tweet.get_value_of_key({"user", strlen("user")}); | ||
if (user.get_type() != TYPE_OBJECT) { return false; } | ||
result.push_back(get_str_uint64(user, "id_str")); | ||
|
||
// get tweet.retweeted_status.user.id | ||
auto retweet = tweet.get_value_of_key({"retweeted_status", strlen("retweeted_status")}); | ||
switch (retweet.get_type()) { | ||
case TYPE_OBJECT: { | ||
auto retweet_user = retweet.get_value_of_key({"user", strlen("user")}); | ||
if (retweet_user.get_type() != TYPE_OBJECT) { return false; } | ||
result.push_back(get_str_uint64(retweet_user, "id_str")); | ||
break; | ||
} | ||
// TODO distinguish null and missing. null is bad. missing is fine. | ||
case TYPE_NULL: | ||
break; | ||
default: | ||
return false; | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
}; | ||
|
||
BENCHMARK_TEMPLATE(distinct_user_id, sajson)->UseManualTime(); | ||
|
||
} // namespace distinct_user_id | ||
|
||
#endif // SIMDJSON_COMPETITION_SAJSON | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 1,64 @@ | ||
#pragma once | ||
|
||
#ifdef SIMDJSON_COMPETITION_SAJSON | ||
|
||
#include "find_tweet.h" | ||
|
||
namespace find_tweet { | ||
|
||
struct sajson { | ||
size_t ast_buffer_size{0}; | ||
size_t *ast_buffer{nullptr}; | ||
simdjson_really_inline std::string_view get_string_view(const ::sajson::value &obj, std::string_view key) { | ||
auto val = obj.get_value_of_key({key.data(), key.length()}); | ||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field is not a string"; } | ||
return { val.as_cstring(), val.get_string_length() }; | ||
} | ||
simdjson_really_inline uint64_t get_str_uint64(const ::sajson::value &obj, std::string_view key) { | ||
// Since sajson only supports 53-bit numbers, and IDs in twitter.json can be > 53 bits, we read the corresponding id_str and parse that. | ||
auto val = obj.get_value_of_key({key.data(), key.length()}); | ||
if (val.get_type() != ::sajson::TYPE_STRING) { throw "field not a string"; } | ||
auto str = val.as_cstring(); | ||
char *endptr; | ||
uint64_t result = strtoull(str, &endptr, 10); | ||
if (endptr != &str[val.get_string_length()]) { throw "field is a string, but not an integer string"; } | ||
return result; | ||
} | ||
|
||
bool run(simdjson::padded_string &json, uint64_t find_id, std::string_view &result) { | ||
if (!ast_buffer) { | ||
ast_buffer_size = json.size(); | ||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t)); | ||
} | ||
auto doc = ::sajson::parse( | ||
::sajson::bounded_allocation(ast_buffer, ast_buffer_size), | ||
::sajson::mutable_string_view(json.size(), json.data()) | ||
); | ||
if (!doc.is_valid()) { return false; } | ||
|
||
auto root = doc.get_root(); | ||
if (root.get_type() != ::sajson::TYPE_OBJECT) { printf("a\n"); return false; } | ||
auto statuses = root.get_value_of_key({"statuses", strlen("statuses")}); | ||
if (statuses.get_type() != ::sajson::TYPE_ARRAY) { return false; } | ||
|
||
for (size_t i=0; i<statuses.get_length(); i ) { | ||
auto tweet = statuses.get_array_element(i); | ||
if (tweet.get_type() != ::sajson::TYPE_OBJECT) { printf("b\n"); return false; } | ||
// TODO if there is a way to get the raw string, it might be faster to iota find_id and then | ||
// compare it to each id_str, instead of parsing each int and comparing to find_id. | ||
if (get_str_uint64(tweet, "id_str") == find_id) { | ||
result = get_string_view(tweet, "text"); | ||
return true; | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
}; | ||
|
||
BENCHMARK_TEMPLATE(find_tweet, sajson)->UseManualTime(); | ||
|
||
} // namespace find_tweet | ||
|
||
#endif // SIMDJSON_COMPETITION_SAJSON | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 1,63 @@ | ||
#pragma once | ||
|
||
#ifdef SIMDJSON_COMPETITION_SAJSON | ||
|
||
#include "kostya.h" | ||
|
||
namespace kostya { | ||
|
||
struct sajson { | ||
size_t ast_buffer_size{0}; | ||
size_t *ast_buffer{nullptr}; | ||
|
||
simdjson_really_inline double get_double(const ::sajson::value &obj, std::string_view key) { | ||
using namespace sajson; | ||
|
||
auto val = obj.get_value_of_key({key.data(), key.length()}); | ||
switch (val.get_type()) { | ||
case TYPE_INTEGER: | ||
case TYPE_DOUBLE: | ||
return val.get_number_value(); | ||
default: | ||
throw "field not double"; | ||
} | ||
} | ||
|
||
bool run(simdjson::padded_string &json, std::vector<point> &result) { | ||
using namespace sajson; | ||
|
||
if (!ast_buffer) { | ||
ast_buffer_size = json.size(); | ||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t)); | ||
} | ||
auto doc = parse( | ||
bounded_allocation(ast_buffer, ast_buffer_size), | ||
mutable_string_view(json.size(), json.data()) | ||
); | ||
if (!doc.is_valid()) { return false; } | ||
|
||
auto root = doc.get_root(); | ||
if (root.get_type() != TYPE_OBJECT) { return false; } | ||
auto points = root.get_value_of_key({"coordinates", strlen("coordinates")}); | ||
if (points.get_type() != TYPE_ARRAY) { return false; } | ||
|
||
for (size_t i=0; i<points.get_length(); i ) { | ||
auto point = points.get_array_element(i); | ||
if (point.get_type() != TYPE_OBJECT) { return false; } | ||
result.emplace_back(kostya::point{ | ||
get_double(point, "x"), | ||
get_double(point, "y"), | ||
get_double(point, "z") | ||
}); | ||
} | ||
|
||
return true; | ||
} | ||
}; | ||
|
||
BENCHMARK_TEMPLATE(kostya, sajson)->UseManualTime(); | ||
|
||
} // namespace kostya | ||
|
||
#endif // SIMDJSON_COMPETITION_SAJSON | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 1,61 @@ | ||
#pragma once | ||
|
||
#ifdef SIMDJSON_COMPETITION_SAJSON | ||
|
||
#include "large_random.h" | ||
|
||
namespace large_random { | ||
|
||
struct sajson { | ||
size_t ast_buffer_size{0}; | ||
size_t *ast_buffer{nullptr}; | ||
|
||
simdjson_really_inline double get_double(const ::sajson::value &obj, std::string_view key) { | ||
using namespace sajson; | ||
|
||
auto val = obj.get_value_of_key({key.data(), key.length()}); | ||
switch (val.get_type()) { | ||
case TYPE_INTEGER: | ||
case TYPE_DOUBLE: | ||
return val.get_number_value(); | ||
default: | ||
throw "field not double"; | ||
} | ||
} | ||
|
||
bool run(simdjson::padded_string &json, std::vector<point> &result) { | ||
using namespace sajson; | ||
|
||
if (!ast_buffer) { | ||
ast_buffer_size = json.size(); | ||
ast_buffer = (size_t *)std::malloc(ast_buffer_size * sizeof(size_t)); | ||
} | ||
auto doc = parse( | ||
bounded_allocation(ast_buffer, ast_buffer_size), | ||
mutable_string_view(json.size(), json.data()) | ||
); | ||
if (!doc.is_valid()) { return false; } | ||
|
||
auto points = doc.get_root(); | ||
if (points.get_type() != TYPE_ARRAY) { return false; } | ||
|
||
for (size_t i=0; i<points.get_length(); i ) { | ||
auto point = points.get_array_element(i); | ||
if (point.get_type() != TYPE_OBJECT) { return false; } | ||
result.emplace_back(large_random::point{ | ||
get_double(point, "x"), | ||
get_double(point, "y"), | ||
get_double(point, "z") | ||
}); | ||
} | ||
|
||
return true; | ||
} | ||
}; | ||
|
||
BENCHMARK_TEMPLATE(large_random, sajson)->UseManualTime(); | ||
|
||
} // namespace large_random | ||
|
||
#endif // SIMDJSON_COMPETITION_SAJSON | ||
|
Oops, something went wrong.