Project

General

Profile

Backport #48543

nautilus: rgw_file: common_prefixes returned out of lexical order

Added by Backport Bot 3 months ago. Updated about 1 month ago.

Status:
Resolved
Priority:
Normal
Assignee:
Target version:
Release:
nautilus
Crash signature:


Related issues

Copied from rgw - Bug #48410: rgw_file: common_prefixes returned out of lexical order Resolved

History

#1 Updated by Backport Bot 3 months ago

  • Copied from Bug #48410: rgw_file: common_prefixes returned out of lexical order added

#2 Updated by Nathan Cutler about 2 months ago

  • Status changed from New to Need More Info
  • Assignee set to Matt Benjamin

non-trivial conflicts

#3 Updated by Matt Benjamin about 2 months ago

commit c587ceb1a5408db9a428818bd029ac5bdf88b183
Author: Matt Benjamin <>
Date: Tue Dec 1 07:53:56 2020 -0500

rgw_file: return common_prefixes in lexical order
Since inception RGWReaddirRequest has sent all leaf objects first
(in lexical order), then common_prefixes (in lexical order). In
hindsight, an overall listing could trivially be returned out of
lexical order, which can cause continued listing of large, mixed
directories to fail.
RCA by Dan Gryniewicz.
Fixes: https://tracker.ceph.com/issues/48410
Resolves: rhbz#1878250
Signed-off-by: Matt Benjamin &lt;&gt;
(cherry picked from commit e561e98e5cca2678854e01c990f95e474022b7ed)

diff --git a/src/rgw/rgw_file.h b/src/rgw/rgw_file.h
index 7cdd7e1ff24..8987aeb6ab1 100644
--- a/src/rgw/rgw_file.h
+++ b/src/rgw/rgw_file.h
@ -1559,94 +1559,191 @ public:

void send_response() override {
struct req_state* s = get_state();
- for (const auto& iter : objs) {
+ auto cnow = real_clock::now();

- boost::string_ref sref {iter.key.name};
+ /* enumerate objs and common_prefixes in parallel,
+ * avoiding increment on and end iterator, which is
+ * undefined */

- lsubdout(cct, rgw, 15) << "readdir objects prefix: " << prefix
- << " obj: " << sref << dendl;
+ class DirIterator
+ {
+ vector<rgw_bucket_dir_entry>& objs;
+ vector<rgw_bucket_dir_entry>::iterator obj_iter;

- size_t last_del = sref.find_last_of('/');
- if (last_del != string::npos)
- sref.remove_prefix(last_del+1);
+ map<string, bool>& common_prefixes;
+ map<string, bool>::iterator cp_iter;

- /* leaf directory? */
- if (sref.empty())
- continue;
+ boost::optional<boost::string_ref> obj_sref;
+ boost::optional<boost::string_ref> cp_sref;
+ bool _skip_cp;

- lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
- << func << " "
- << "list uri=" << s->relative_uri << " "
- << " prefix=" << prefix << " "
- << " obj path=" << iter.key.name
- << " (" << sref << ")" << ""
- << " mtime="
- << real_clock::to_time_t(iter.meta.mtime)
- << " size=" << iter.meta.accounted_size
- << dendl;
+ public:

- if (! this->operator()(sref, next_marker, iter.meta.mtime,
- iter.meta.accounted_size, RGW_FS_TYPE_FILE)) {
- /* caller cannot accept more */
- lsubdout(cct, rgw, 5) << "readdir rcb failed"
- << " dirent=" << sref.data()
- << " call count=" << ix
- << dendl;
- rcb_eof = true;
- return;
+ DirIterator(vector<rgw_bucket_dir_entry>& objs,
+ map<string, bool>& common_prefixes)
+ : objs(objs), common_prefixes(common_prefixes), _skip_cp(false)
+ {
+ obj_iter = objs.begin();
+ parse_obj();
+ cp_iter = common_prefixes.begin();
+ parse_cp();
+ }

bool is_obj() {
+ return (obj_iter != objs.end());
}
- ++ix;
- }

- auto cnow = real_clock::now();
- for (auto& iter : common_prefixes) {
+ bool is_cp(){
+ return (cp_iter != common_prefixes.end());
+ }

- lsubdout(cct, rgw, 15) << "readdir common prefixes prefix: " << prefix
- << " iter first: " << iter.first
- << " iter second: " << iter.second
- << dendl;
+ bool eof() {
+ return ((!is_obj()) && (!is_cp()));
+ }

- /* XXX aieee--I have seen this case! /
- if (iter.first == "/")
- continue;
+ void parse_obj() {
+ if (is_obj()) {
+ boost::string_ref sref{obj_iter->key.name};
+ size_t last_del = sref.find_last_of('/');
+ if (last_del != string::npos)
+ sref.remove_prefix(last_del+1);
+ obj_sref = sref;
+ }
+ } /
parse_obj */

- /* it's safest to modify the element in place--a suffix-modifying
- * string_ref operation is problematic since ULP rgw_file callers
- * will ultimately need a c-string */
- if (iter.first.back() '/')
- const_cast&lt;std::string&&gt;(iter.first).pop_back();
+ void next_obj() {
+ +obj_iter;
parse_obj();
+ }

- boost::string_ref sref{iter.first};
+ void parse_cp() {
+ if (is_cp()) {
+ /* leading-/ skip case /
+ if (cp_iter->first "/") {
+ _skip_cp = true;
+ return;
+ } else
+ _skip_cp = false;

/
it's safest to modify the element in place--a suffix-modifying
+ * string_ref operation is problematic since ULP rgw_file callers
+ * will ultimately need a c-string /
+ if (cp_iter->first.back() == '/')
+ const_cast<std::string&>(cp_iter->first).pop_back();

boost::string_ref sref{cp_iter->first};
+ size_t last_del = sref.find_last_of('/');
+ if (last_del != string::npos)
+ sref.remove_prefix(last_del+1);
+ cp_sref = sref;
+ } /
is_cp /
+ } /
parse_cp */

void next_cp() {
+ +cp_iter;
parse_cp();
+ }

- size_t last_del = sref.find_last_of('/');
- if (last_del != string::npos)
- sref.remove_prefix(last_del+1);
+ bool skip_cp() {
+ return _skip_cp;
+ }

- lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
- << func << " "
- << "list uri=" << s->relative_uri << " "
- << " prefix=" << prefix << " "
- << " cpref=" << sref
- << dendl;
+ bool entry_is_obj() {
+ return (is_obj() &&
+ ((! is_cp()) ||
+ (obj_sref.get() < cp_sref.get())));
+ }

- if (sref.empty()) {
- /* null path segment--could be created in S3 but has no NFS
- * interpretation */
- return;
+ boost::string_ref get_obj_sref() {
+ return obj_sref.get();
}

- if (! this->operator()(sref, next_marker, cnow, 0,
- RGW_FS_TYPE_DIRECTORY)) {
- /* caller cannot accept more /
- lsubdout(cct, rgw, 5) << "readdir rcb failed"
- << " dirent=" << sref.data()
- << " call count=" << ix
- << dendl;
- rcb_eof = true;
- return;
+ boost::string_ref get_cp_sref() {
+ return cp_sref.get();
}
- +ix;
- }

+ vector<rgw_bucket_dir_entry>::iterator& get_obj_iter() {
+ return obj_iter;
+ }

map<string, bool>::iterator& get_cp_iter() {
+ return cp_iter;
+ }

}; /
DirIterator /

DirIterator di{objs, common_prefixes};

for (;;) {

if (di.eof()) {
+ break; // done
+ }

/
assert: one of is_obj() || is_cp() holds /
+ if (di.entry_is_obj()) {
+ auto sref = di.get_obj_sref();
+ if (sref.empty()) {
+ /
recursive list of a leaf dir (iirc), do nothing /
+ } else {
+ /
send a file entry /
+ auto obj_entry = *(di.get_obj_iter());

lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
+ << func << " "
+ << "list uri=" << s->relative_uri << " "
+ << " prefix=" << prefix << " "
+ << " obj path=" << obj_entry.key.name
+ << " (" << sref << ")" << ""
+ << " mtime="
+ << real_clock::to_time_t(obj_entry.meta.mtime)
+ << " size=" << obj_entry.meta.accounted_size
+ << dendl;

if (! this->operator()(sref, next_marker, obj_entry.meta.mtime,
+ obj_entry.meta.accounted_size,
+ RGW_FS_TYPE_FILE)) {
+ /
caller cannot accept more /
+ lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop"
+ << " dirent=" << sref.data()
+ << " call count=" << ix
+ << dendl;
+ rcb_eof = true;
+ return;
+ }
+ }
+ di.next_obj(); // and advance object
+ } else {
+ /
send a dir entry /
+ if (! di.skip_cp()) {
+ auto sref = di.get_cp_sref();

lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
+ << func << " "
+ << "list uri=" << s->relative_uri << " "
+ << " prefix=" << prefix << " "
+ << " cpref=" << sref
+ << dendl;

if (sref.empty()) {
+ /
null path segment--could be created in S3 but has no NFS
+ * interpretation /
+ } else {
+ if (! this->operator()(sref, next_marker, cnow, 0,
+ RGW_FS_TYPE_DIRECTORY)) {
+ /
caller cannot accept more /
+ lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop"
+ << " dirent=" << sref.data()
+ << " call count=" << ix
+ << dendl;
+ rcb_eof = true;
+ return;
+ }
+ }
+ }
+ di.next_cp(); // and advance common_prefixes
+ } /
! di.entry_is_obj() /
+ } /
for (;;) */
}

virtual void send_versioned_response() {

#4 Updated by Matt Benjamin about 2 months ago

"""
commit c587ceb1a5408db9a428818bd029ac5bdf88b183
Author: Matt Benjamin <>
Date: Tue Dec 1 07:53:56 2020 -0500

rgw_file: return common_prefixes in lexical order
Since inception RGWReaddirRequest has sent all leaf objects first
(in lexical order), then common_prefixes (in lexical order). In
hindsight, an overall listing could trivially be returned out of
lexical order, which can cause continued listing of large, mixed
directories to fail.
RCA by Dan Gryniewicz.
Fixes: https://tracker.ceph.com/issues/48410
Resolves: rhbz#1878250
Signed-off-by: Matt Benjamin &lt;&gt;
(cherry picked from commit e561e98e5cca2678854e01c990f95e474022b7ed)

diff --git a/src/rgw/rgw_file.h b/src/rgw/rgw_file.h
index 7cdd7e1ff24..8987aeb6ab1 100644
--- a/src/rgw/rgw_file.h
+++ b/src/rgw/rgw_file.h
@ -1559,94 +1559,191 @ public:

void send_response() override {
struct req_state* s = get_state();
- for (const auto& iter : objs) {
+ auto cnow = real_clock::now();

- boost::string_ref sref {iter.key.name};
+ /* enumerate objs and common_prefixes in parallel,
+ * avoiding increment on and end iterator, which is
+ * undefined */

- lsubdout(cct, rgw, 15) << "readdir objects prefix: " << prefix
- << " obj: " << sref << dendl;
+ class DirIterator
+ {
+ vector<rgw_bucket_dir_entry>& objs;
+ vector<rgw_bucket_dir_entry>::iterator obj_iter;

- size_t last_del = sref.find_last_of('/');
- if (last_del != string::npos)
- sref.remove_prefix(last_del+1);
+ map<string, bool>& common_prefixes;
+ map<string, bool>::iterator cp_iter;

- /* leaf directory? */
- if (sref.empty())
- continue;
+ boost::optional<boost::string_ref> obj_sref;
+ boost::optional<boost::string_ref> cp_sref;
+ bool _skip_cp;

- lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
- << func << " "
- << "list uri=" << s->relative_uri << " "
- << " prefix=" << prefix << " "
- << " obj path=" << iter.key.name
- << " (" << sref << ")" << ""
- << " mtime="
- << real_clock::to_time_t(iter.meta.mtime)
- << " size=" << iter.meta.accounted_size
- << dendl;
+ public:

- if (! this->operator()(sref, next_marker, iter.meta.mtime,
- iter.meta.accounted_size, RGW_FS_TYPE_FILE)) {
- /* caller cannot accept more */
- lsubdout(cct, rgw, 5) << "readdir rcb failed"
- << " dirent=" << sref.data()
- << " call count=" << ix
- << dendl;
- rcb_eof = true;
- return;
+ DirIterator(vector<rgw_bucket_dir_entry>& objs,
+ map<string, bool>& common_prefixes)
+ : objs(objs), common_prefixes(common_prefixes), _skip_cp(false)
+ {
+ obj_iter = objs.begin();
+ parse_obj();
+ cp_iter = common_prefixes.begin();
+ parse_cp();
+ }

bool is_obj() {
+ return (obj_iter != objs.end());
}
- ++ix;
- }

- auto cnow = real_clock::now();
- for (auto& iter : common_prefixes) {
+ bool is_cp(){
+ return (cp_iter != common_prefixes.end());
+ }

- lsubdout(cct, rgw, 15) << "readdir common prefixes prefix: " << prefix
- << " iter first: " << iter.first
- << " iter second: " << iter.second
- << dendl;
+ bool eof() {
+ return ((!is_obj()) && (!is_cp()));
+ }

- /* XXX aieee--I have seen this case! /
- if (iter.first == "/")
- continue;
+ void parse_obj() {
+ if (is_obj()) {
+ boost::string_ref sref{obj_iter->key.name};
+ size_t last_del = sref.find_last_of('/');
+ if (last_del != string::npos)
+ sref.remove_prefix(last_del+1);
+ obj_sref = sref;
+ }
+ } /
parse_obj */

- /* it's safest to modify the element in place--a suffix-modifying
- * string_ref operation is problematic since ULP rgw_file callers
- * will ultimately need a c-string */
- if (iter.first.back() '/')
- const_cast&lt;std::string&&gt;(iter.first).pop_back();
+ void next_obj() {
+ +obj_iter;
parse_obj();
+ }

- boost::string_ref sref{iter.first};
+ void parse_cp() {
+ if (is_cp()) {
+ /* leading-/ skip case /
+ if (cp_iter->first "/") {
+ _skip_cp = true;
+ return;
+ } else
+ _skip_cp = false;

/
it's safest to modify the element in place--a suffix-modifying
+ * string_ref operation is problematic since ULP rgw_file callers
+ * will ultimately need a c-string /
+ if (cp_iter->first.back() == '/')
+ const_cast<std::string&>(cp_iter->first).pop_back();

boost::string_ref sref{cp_iter->first};
+ size_t last_del = sref.find_last_of('/');
+ if (last_del != string::npos)
+ sref.remove_prefix(last_del+1);
+ cp_sref = sref;
+ } /
is_cp /
+ } /
parse_cp */

void next_cp() {
+ +cp_iter;
parse_cp();
+ }

- size_t last_del = sref.find_last_of('/');
- if (last_del != string::npos)
- sref.remove_prefix(last_del+1);
+ bool skip_cp() {
+ return _skip_cp;
+ }

- lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
- << func << " "
- << "list uri=" << s->relative_uri << " "
- << " prefix=" << prefix << " "
- << " cpref=" << sref
- << dendl;
+ bool entry_is_obj() {
+ return (is_obj() &&
+ ((! is_cp()) ||
+ (obj_sref.get() < cp_sref.get())));
+ }

- if (sref.empty()) {
- /* null path segment--could be created in S3 but has no NFS
- * interpretation */
- return;
+ boost::string_ref get_obj_sref() {
+ return obj_sref.get();
}

- if (! this->operator()(sref, next_marker, cnow, 0,
- RGW_FS_TYPE_DIRECTORY)) {
- /* caller cannot accept more /
- lsubdout(cct, rgw, 5) << "readdir rcb failed"
- << " dirent=" << sref.data()
- << " call count=" << ix
- << dendl;
- rcb_eof = true;
- return;
+ boost::string_ref get_cp_sref() {
+ return cp_sref.get();
}
- +ix;
- }

+ vector<rgw_bucket_dir_entry>::iterator& get_obj_iter() {
+ return obj_iter;
+ }

map<string, bool>::iterator& get_cp_iter() {
+ return cp_iter;
+ }

}; /
DirIterator /

DirIterator di{objs, common_prefixes};

for (;;) {

if (di.eof()) {
+ break; // done
+ }

/
assert: one of is_obj() || is_cp() holds /
+ if (di.entry_is_obj()) {
+ auto sref = di.get_obj_sref();
+ if (sref.empty()) {
+ /
recursive list of a leaf dir (iirc), do nothing /
+ } else {
+ /
send a file entry /
+ auto obj_entry = *(di.get_obj_iter());

lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
+ << func << " "
+ << "list uri=" << s->relative_uri << " "
+ << " prefix=" << prefix << " "
+ << " obj path=" << obj_entry.key.name
+ << " (" << sref << ")" << ""
+ << " mtime="
+ << real_clock::to_time_t(obj_entry.meta.mtime)
+ << " size=" << obj_entry.meta.accounted_size
+ << dendl;

if (! this->operator()(sref, next_marker, obj_entry.meta.mtime,
+ obj_entry.meta.accounted_size,
+ RGW_FS_TYPE_FILE)) {
+ /
caller cannot accept more /
+ lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop"
+ << " dirent=" << sref.data()
+ << " call count=" << ix
+ << dendl;
+ rcb_eof = true;
+ return;
+ }
+ }
+ di.next_obj(); // and advance object
+ } else {
+ /
send a dir entry /
+ if (! di.skip_cp()) {
+ auto sref = di.get_cp_sref();

lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
+ << func << " "
+ << "list uri=" << s->relative_uri << " "
+ << " prefix=" << prefix << " "
+ << " cpref=" << sref
+ << dendl;

if (sref.empty()) {
+ /
null path segment--could be created in S3 but has no NFS
+ * interpretation /
+ } else {
+ if (! this->operator()(sref, next_marker, cnow, 0,
+ RGW_FS_TYPE_DIRECTORY)) {
+ /
caller cannot accept more /
+ lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop"
+ << " dirent=" << sref.data()
+ << " call count=" << ix
+ << dendl;
+ rcb_eof = true;
+ return;
+ }
+ }
+ }
+ di.next_cp(); // and advance common_prefixes
+ } /
! di.entry_is_obj() /
+ } /
for (;;) */
}

virtual void send_versioned_response() {
"""

#5 Updated by Nathan Cutler about 2 months ago

  • Status changed from Need More Info to New
  • Assignee changed from Matt Benjamin to Nathan Cutler

#6 Updated by Nathan Cutler about 2 months ago

  • Description updated (diff)
  • Status changed from New to In Progress

#7 Updated by Yuri Weinstein about 1 month ago

Backport Bot wrote:

https://github.com/ceph/ceph/pull/38828

merged

#8 Updated by Nathan Cutler about 1 month ago

  • Status changed from In Progress to Resolved
  • Target version set to v14.2.17

This update was made using the script "backport-resolve-issue".
backport PR https://github.com/ceph/ceph/pull/38828
merge commit 749e381c9ec6234b1b237f2f8b7513be0dc67ebd (v14.2.16-191-g749e381c9ec)

Also available in: Atom PDF