Project

General

Profile

Actions

Bug #63394

open

rgw: link only radosgw with ALLOC_LIBS

Added by Matt Benjamin 6 months ago. Updated 5 months ago.

Status:
Pending Backport
Priority:
High
Assignee:
Target version:
-
% Done:

0%

Source:
Development
Tags:
backport_processed
Backport:
reef,quincy
Regression:
No
Severity:
2 - major
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):

Description

In particular, do not link intermediate dependencies nor librgw.so.2
with a custom allocator (normally tcmalloc).

This prevents illegal behavior due to mismatched allocators when run
under nfs-ganesha or other consumers.

Example gdb session:

"""
(gdb) down
#10 0x00007ffff5dc7850 in Option::Option (this=0x4786d0) at /home/mbenjamin/dev/ceph-cp/src/common/options.h:14
14 struct Option {
(gdb) w
Missing arguments.
(gdb) where
#0 int_malloc (av=av@entry=0x7ffff7ac7c80 <main_arena>, bytes=44) at malloc.c:4026
#1 0x00007ffff798fd72 in GI_libc_malloc (bytes=<optimized out>) at malloc.c:3297
#2 0x00007ffff46b570c in operator new (sz=44) at ../../../../libstdc++-v3/libsupc++/new_op.cc:50
#3 0x00007ffff5bcabf5 in std::
_new_allocator<char>::allocate (this=0x4786f8, _n=44)
at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/new_allocator.h:147
#4 0x00007ffff5bcab71 in std::allocator<char>::allocate (this=0x4786f8, __n=44)
at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/allocator.h:198
#5 std::allocator_traits<std::allocator<char> >::allocate (
_a=..., _n=44)
at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/alloc_traits.h:482
#6 std::
_cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_S_allocate (_a=...,
__n=44) at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/basic_string.h:126
#7 0x00007ffff5bca998 in std::
_cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_create (this=0x4786f8, _capacity=@0x7ffffffe59f8: 43, __old_capacity=0)
at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/basic_string.tcc:155
#8 0x00007ffff5bcce42 in std::
_cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_construct<char*> (this=0x4786f8, _beg=0x45c7e0 "time in seconds for detecting a hung thread", __end=0x45c80b "")
at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/basic_string.tcc:225
#9 0x00007ffff5bccd81 in std::
_cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string (this=0x4786f8, _str="time in seconds for detecting a hung thread")
at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/basic_string.h:541
#10 0x00007ffff5dc7850 in Option::Option (this=0x4786d0) at /home/mbenjamin/dev/ceph-cp/src/common/options.h:14
#11 0x00007ffff5dc77bd in std::_Construct<Option, Option const&> (
_p=0x4786d0, _args=...)
at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/stl_construct.h:119
#12 0x00007ffff5dc7707 in std::
_do_uninit_copy<Option const*, Option*> (_first=0x7fffffff2b58,
__last=0x7fffffffc848, __result=0x478010)
at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/stl_uninitialized.h:120
#13 0x00007ffff5dc76c5 in std::
_uninitialized_copy<false>::__uninit_copy<Option const*, Option*> (
_first=0x7fffffff2498, __last=0x7fffffffc848, __result=0x478010)
at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/stl_uninitialized.h:137
#14 0x00007ffff5dc768d in std::uninitialized_copy<Option const*, Option*> (
_first=0x7fffffff2498,
_last=0x7fffffffc848, __result=0x478010)
at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/stl_uninitialized.h:184
#15 0x00007ffff5dc75e9 in std::
_uninitialized_copy_a<Option const*, Option*, Option> (_first=0x7fffffff2498,
__last=0x7fffffffc848, __result=0x478010)
at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/stl_uninitialized.h:373
#16 0x00007ffff5e9b90e in std::vector<Option, std::allocator<Option> >::_M_range_initialize<Option const*> (
this=0x7fffffffd638, __first=0x7fffffff2498, __last=0x7fffffffc848)
at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/stl_vector.h:1692
--Type <RET> for more, q to quit, c to continue without paging--
#17 0x00007ffff5e9a953 in std::vector<Option, std::allocator<Option> >::vector (this=0x7fffffffd638,
__l=std::initializer_list of length 97 = {...}, __a=...)
at /usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../include/c++/13/bits/stl_vector.h:679
#18 0x00007ffff5edc0c4 in get_rbd_options ()
at /home/mbenjamin/dev/ceph-cp/build/src/common/options/rbd_options.cc:12
#19 0x00007ffff5e4fd32 in build_options ()
at /home/mbenjamin/dev/ceph-cp/src/common/options/build_options.cc:44
#20 0x00007ffff5bb6100 in __cxx_global_var_init.55(void) ()
at /home/mbenjamin/dev/ceph-cp/src/common/options.cc:340
#21 0x00007ffff5bb6147 in _GLOBAL
_sub_I_options.cc ()
from /home/mbenjamin/dev/ceph-cp/build/lib/libceph-common.so.2
#22 0x00007ffff7fcef77 in call_init (env=0x7fffffffe090, argv=0x7fffffffe058, argc=6, l=<optimized out>)
at dl-init.c:90
#23 call_init (l=<optimized out>, argc=6, argv=0x7fffffffe058, env=0x7fffffffe090) at dl-init.c:27
#24 0x00007ffff7fcf06d in dl_init (main_map=0x407850, argc=6, argv=0x7fffffffe058, env=0x7fffffffe090)
at dl-init.c:137
#25 0x00007ffff7fcb5c2 in __GI
_dl_catch_exception (exception=exception@entry=0x0,
operate=operate@entry=0x7ffff7fd5c30 <call_dl_init>, args=args@entry=0x7fffffffd8e0) at dl-catch.c:211
#26 0x00007ffff7fd5bcc in dl_open_worker (a=a@entry=0x7fffffffda90) at dl-open.c:808
#27 0x00007ffff7fcb523 in _GI_dl_catch_exception (exception=exception@entry=0x7fffffffda70,
operate=operate@entry=0x7ffff7fd5b30 <dl_open_worker>, args=args@entry=0x7fffffffda90) at dl-catch.c:237
#28 0x00007ffff7fd5f44 in dl_open (file=0x7ffff7dd42ec "libganesha_rados_urls.so", mode=<optimized out>,
caller_dlopen=0x7ffff7c86d3a <load_rados_config+24>, nsid=<optimized out>, argc=6, argv=0x7fffffffe058,
env=0x7fffffffe090) at dl-open.c:884
#29 0x00007ffff797b714 in dlopen_doit (a=a@entry=0x7fffffffdd40) at dlopen.c:56
#30 0x00007ffff7fcb523 in __GI
_dl_catch_exception (exception=exception@entry=0x7fffffffdc80,
operate=0x7ffff797b6b0 <dlopen_doit>, args=0x7fffffffdd40) at dl-catch.c:237
#31 0x00007ffff7fcb679 in dl_catch_error (objname=0x7fffffffdce8, errstring=0x7fffffffdcf0,
mallocedp=0x7fffffffdce7, operate=<optimized out>, args=<optimized out>) at dl-catch.c:256
#32 0x00007ffff797b1f3 in _dlerror_run (operate=operate@entry=0x7ffff797b6b0 <dlopen_doit>,
args=args@entry=0x7fffffffdd40) at dlerror.c:138
#33 0x00007ffff797b7cf in dlopen_implementation (dl_caller=<optimized out>, mode=<optimized out>,
file=<optimized out>) at dlopen.c:71
#34 _
_dlopen (file=<optimized out>, mode=<optimized out>) at dlopen.c:81
#35 0x00007ffff7c86d3a in load_rados_config ()
at /home/mbenjamin/dev/nfs-ganesha/src/config_parsing/conf_url.c:85
#36 0x00007ffff7c86ff8 in config_url_init ()
at /home/mbenjamin/dev/nfs-ganesha/src/config_parsing/conf_url.c:123
--Type <RET> for more, q to quit, c to continue without paging--
Thread 1 "ganesha.nfsd" hit Breakpoint 10, tc_free (ptr=0xee2420) at src/tcmalloc.cc:1936
1936 void tc_free(void* ptr) PERFTOOLS_NOTHROW {
(gdb) c
Continuing.

Thread 1 "ganesha.nfsd" hit Breakpoint 10, tc_free (ptr=0xee2400) at src/tcmalloc.cc:1936
1936 void tc_free(void* ptr) PERFTOOLS_NOTHROW {
(gdb) c
Continuing.

Thread 1 "ganesha.nfsd" hit Breakpoint 10, tc_free (ptr=0xef9080) at src/tcmalloc.cc:1936
1936 void tc_free(void* ptr) PERFTOOLS_NOTHROW {
(gdb) c
Continuing.

Thread 1 "ganesha.nfsd" hit Breakpoint 10, tc_free (ptr=0xede010) at src/tcmalloc.cc:1936
1936 void tc_free(void* ptr) PERFTOOLS_NOTHROW {
(gdb) c
Continuing.

Thread 1 "ganesha.nfsd" hit Breakpoint 10, tc_free (ptr=0xee0020) at src/tcmalloc.cc:1936
1936 void tc_free(void* ptr) PERFTOOLS_NOTHROW {
(gdb) c
Continuing.

Thread 1 "ganesha.nfsd" hit Breakpoint 1, librgw_create (rgw=0x7ffff76615f0 <RGWFSM+816>, argc=2,
argv=0x7fffffffd440) at /home/mbenjamin/dev/ceph-cp/src/rgw/librgw.cc:72
72 rc = rgwlib.init(args);
(gdb) c
Continuing.

Thread 1 "ganesha.nfsd" hit Breakpoint 10, tc_free (ptr=0xee2560) at src/tcmalloc.cc:1936
1936 void tc_free(void* ptr) PERFTOOLS_NOTHROW {
(gdb) c
Continuing.

Thread 1 "ganesha.nfsd" hit Breakpoint 10, tc_free (ptr=0xee2540) at src/tcmalloc.cc:1936
1936 void tc_free(void* ptr) PERFTOOLS_NOTHROW {
(gdb)
Continuing.

Thread 1 "ganesha.nfsd" hit Breakpoint 10, tc_free (ptr=0xee2520) at src/tcmalloc.cc:1936
1936 void tc_free(void* ptr) PERFTOOLS_NOTHROW {
(gdb) c
Continuing.

Thread 1 "ganesha.nfsd" hit Breakpoint 10, tc_free (ptr=0xef9080) at src/tcmalloc.cc:1936
1936 void tc_free(void* ptr) PERFTOOLS_NOTHROW {
(gdb) c
Continuing.

Thread 1 "ganesha.nfsd" hit Breakpoint 10, tc_free (ptr=0xee2440) at src/tcmalloc.cc:1936
1936 void tc_free(void* ptr) PERFTOOLS_NOTHROW {
(gdb) c
Continuing.

Thread 1 "ganesha.nfsd" hit Breakpoint 6, rgw_global_init (defaults=0x7fffffffd298,
args=std::vector of length 3, capacity 4 = {...}, module_type=8, code_env=CODE_ENVIRONMENT_DAEMON, flags=1)
at /home/mbenjamin/dev/ceph-cp/src/rgw/rgw_common.cc:3051
3051 global_pre_init(defaults, args, module_type, code_env, flags);
(gdb) c
Continuing.

Thread 1 "ganesha.nfsd" hit Breakpoint 7, ceph_argparse_early_args (
args=std::vector of length 3, capacity 4 = {...}, module_type=8, cluster=0x7fffffffcda0,
conf_file_list=0x7fffffffcdc0) at /home/mbenjamin/dev/ceph-cp/src/common/ceph_argparse.cc:496
496 auto orig_args = args;
(gdb)
[New Thread 0x7fffeb39e6c0 (LWP 1228515)]

Thread 1 "ganesha.nfsd" hit Breakpoint 2, global_pre_init (defaults=0x7fffffffd298,
args=std::vector of length 0, capacity 4, module_type=8, code_env=CODE_ENVIRONMENT_DAEMON, flags=1)
at /home/mbenjamin/dev/ceph-cp/src/global/global_init.cc:174
174 conf.do_argv_commands();
(gdb) c
Continuing.

Thread 1 "ganesha.nfsd" hit Breakpoint 10, tc_free (ptr=0x446e70) at src/tcmalloc.cc:1936
1936 void tc_free(void* ptr) PERFTOOLS_NOTHROW {
(gdb) c
Continuing.
src/tcmalloc.cc:333] Attempt to free invalid pointer 0x446e70

Thread 1 "ganesha.nfsd" received signal SIGABRT, Aborted.
__pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0)
at pthread_kill.c:44
44 return INTERNAL_SYSCALL_ERROR_P (ret) ? INTERNAL_SYSCALL_ERRNO (ret) : 0;
(gdb)
"""


Related issues 2 (2 open0 closed)

Copied to rgw - Backport #63622: quincy: rgw: link only radosgw with ALLOC_LIBSNewMatt BenjaminActions
Copied to rgw - Backport #63623: reef: rgw: link only radosgw with ALLOC_LIBSNewMatt BenjaminActions
Actions #1

Updated by Matt Benjamin 6 months ago

  • Status changed from In Progress to Fix Under Review
  • Pull request ID set to 54297
Actions #2

Updated by Casey Bodley 5 months ago

  • Status changed from Fix Under Review to Pending Backport
Actions #3

Updated by Backport Bot 5 months ago

  • Copied to Backport #63622: quincy: rgw: link only radosgw with ALLOC_LIBS added
Actions #4

Updated by Backport Bot 5 months ago

  • Copied to Backport #63623: reef: rgw: link only radosgw with ALLOC_LIBS added
Actions #5

Updated by Backport Bot 5 months ago

  • Tags set to backport_processed
Actions

Also available in: Atom PDF