Bug #62156
openrgw crashes seen for s3select json query with "where" clause
0%
Description
[cephuser@ceph-pri-hmaheswa-automation-0jcitv-node6 ~]$ venv/bin/aws s3api select-object-content --endpoint-url http://10.0.98.34:80 --bucket bkt1 --key small_json --expression-type 'SQL' --input-serialization '{"JSON": {"Type": "DOCUMENT"}, "CompressionType": "NONE"}' --output-serialization '{"JSON": {}}' --expression "select * from S3Object where employee.name=raju;" /dev/stdout
Could not connect to the endpoint URL: "http://10.0.98.34:80/bkt1/small_json?select&select-type=2"
[cephuser@ceph-pri-hmaheswa-automation-0jcitv-node6 ~]$
[cephuser@ceph-pri-hmaheswa-automation-0jcitv-node6 ~]$ cat small_json
{
"employee": {
"name": "raju",
"salary": 56000,
"married": true
}
}
[cephuser@ceph-pri-hmaheswa-automation-0jcitv-node6 ~]$ venv/bin/aws s3api select-object-content --endpoint-url http://10.0.98.34:80 --bucket bkt1 --key 200_mb_json --expression-type 'SQL' --input-serialization '{"JSON": {"Type": "DOCUMENT"}, "CompressionType": "NONE"}' --output-serialization '{"JSON": {}}' --expression "select * from S3Object[*] where tags=ring;" /dev/stdout
Could not connect to the endpoint URL: "http://10.0.98.34:80/bkt1/200_mb_json?select&select-type=2"
json file is downloaded from here: https://www.kaggle.com/datasets/kristoft/pitt-quantum-repository-106066-molecules
rgw crash info:
[root@ceph-pri-hmaheswa-automation-0jcitv-node5 crash]# cat 2023-07-25T08:50:29.789218Z_82a457cb-e975-4636-8804-7bfe1bbd0e08/meta
{
"crash_id": "2023-07-25T08:50:29.789218Z_82a457cb-e975-4636-8804-7bfe1bbd0e08",
"timestamp": "2023-07-25T08:50:29.789218Z",
"process_name": "radosgw",
"entity_name": "client.rgw.shared.pri.ceph-pri-hmaheswa-automation-0jcitv-node5.vnvrex",
"ceph_version": "18.0.0-5070-g01bc98b4",
"utsname_hostname": "ceph-pri-hmaheswa-automation-0jcitv-node5",
"utsname_sysname": "Linux",
"utsname_release": "5.14.0-284.18.1.el9_2.x86_64",
"utsname_version": "#1 SMP PREEMPT_DYNAMIC Wed May 31 10:39:18 EDT 2023",
"utsname_machine": "x86_64",
"os_name": "CentOS Stream",
"os_id": "centos",
"os_version_id": "8",
"os_version": "8",
"backtrace": [
"/lib64/libpthread.so.0(+0x12cf0) [0x7fb199b20cf0]",
"gsignal()",
"abort()",
"/usr/bin/radosgw(+0x673c58) [0x563908f9cc58]",
"(s3selectEngine::json_object::init_json_processor(s3selectEngine::s3select*)+0x78f) [0x5639093b4a8f]",
"(RGWSelectObj_ObjStore_S3::run_s3select_on_json(char const*, char const*, unsigned long)+0x364) [0x56390938ba64]",
"(RGWSelectObj_ObjStore_S3::json_processing(ceph::buffer::v15_2_0::list&, long, long)+0x6a5) [0x5639093903a5]",
"(RGWRados::get_obj_iterate_cb(DoutPrefixProvider const*, rgw_raw_obj const&, long, long, long, bool, RGWObjState*, void*)+0x131) [0x5639094f1d01]",
"/usr/bin/radosgw(+0xba8ed6) [0x5639094d1ed6]",
"(RGWRados::iterate_obj(DoutPrefixProvider const*, RGWObjectCtx&, RGWBucketInfo&, rgw_obj const&, long, long, unsigned long, int ()(DoutPrefixProvider const, rgw_raw_obj const&, long, long, long, bool, RGWObjState*, void*), void*, optional_yield)+0x3b6) [0x563909514a36]",
"(RGWRados::Object::Read::iterate(DoutPrefixProvider const*, long, long, RGWGetDataCB*, optional_yield)+0x138) [0x563909515298]",
"(RGWGetObj::execute(optional_yield)+0x1122) [0x5639092b8582]",
"(RGWSelectObj_ObjStore_S3::execute(optional_yield)+0xc1) [0x56390938e131]",
"(rgw_process_authenticated(RGWHandler_REST*, RGWOp*&, RGWRequest*, req_state*, optional_yield, rgw::sal::Driver*, bool)+0xd91) [0x5639090a5171]",
"(process_request(RGWProcessEnv const&, RGWRequest*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWRestfulIO*, optional_yield, rgw::dmclock::Scheduler*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> >, int*)+0x2b5c) [0x5639090a875c]",
"/usr/bin/radosgw(+0x6b347e) [0x563908fdc47e]",
"/usr/bin/radosgw(+0x6b4147) [0x563908fdd147]",
"make_fcontext()"
]
}
ceph version: 18.0.0-5070-g01bc98b4 (01bc98b489ef938d10e187313be218ecd8a7ef33) reef (dev)
Updated by Gal Salomon 9 months ago
the input-serialization(AWS-CLI) indicates the s3select-request as a JSON statement
while
the statement itself does not align with the JSON statement syntax(the from clause)
it creates conflict, that is not handled correctly, and that lead to the crash.
Updated by Daniel Gryniewicz 9 months ago
- Status changed from New to Fix Under Review
Updated by Gal Salomon 9 months ago
- Status changed from Fix Under Review to Pending Backport
- Backport set to reef
Updated by Backport Bot 9 months ago
- Copied to Backport #62466: reef: rgw crashes seen for s3select json query with "where" clause added
Updated by Backport Bot 9 months ago
- Tags changed from s3select to s3select backport_processed