-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathlibraryPatcher_linux.cpp
More file actions
714 lines (661 loc) · 31.6 KB
/
Copy pathlibraryPatcher_linux.cpp
File metadata and controls
714 lines (661 loc) · 31.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
/*
* Copyright 2026, Datadog, Inc.
* SPDX-License-Identifier: Apache-2.0
*/
#include "libraryPatcher.h"
#ifdef __linux__
#include "counters.h"
#include "guards.h"
#include "nativeSocketSampler.h"
#include "profiler.h"
#include <cassert>
#include <dlfcn.h>
#include <mutex>
#include <limits.h>
#include <setjmp.h>
#include <string.h>
#include <stdlib.h>
typedef void* (*func_start_routine)(void*);
SpinLock LibraryPatcher::_lock;
const char* LibraryPatcher::_profiler_name = nullptr;
PatchEntry LibraryPatcher::_patched_entries[MAX_NATIVE_LIBS];
int LibraryPatcher::_size = 0;
PatchEntry LibraryPatcher::_sigaction_entries[MAX_NATIVE_LIBS];
int LibraryPatcher::_sigaction_size = 0;
PatchEntry LibraryPatcher::_socket_entries[4 * MAX_NATIVE_LIBS];
int LibraryPatcher::_socket_size = 0;
std::atomic<bool> LibraryPatcher::_socket_active{false};
void LibraryPatcher::initialize() {
if (_profiler_name == nullptr) {
Dl_info info;
void* caller_address = __builtin_return_address(0); // Get return address of caller
bool ret = dladdr(caller_address, &info);
assert(ret);
_profiler_name = realpath(info.dli_fname, nullptr);
_size = 0;
}
}
class RoutineInfo {
private:
func_start_routine _routine;
void* _args;
public:
RoutineInfo(func_start_routine routine, void* args) :
_routine(routine), _args(args) {
}
func_start_routine routine() const {
return _routine;
}
void* args() const {
return _args;
}
};
// Unregister the current thread from the profiler and release its TLS under a
// single SignalBlocker to close the race window between unregisterThread()
// returning and release() acquiring its internal guard (PROF-14603). Without
// this, a SIGVTALRM delivered in that window could call currentSignalSafe()
// and dereference a now-freed ProfiledThread. Kept noinline so the
// SignalBlocker's sigset_t does not appear in the caller's stack frame on
// musl/aarch64 where the deopt blob may corrupt the wrapper's stack guard.
__attribute__((noinline))
static void unregister_and_release(int tid) {
SignalBlocker blocker;
Profiler::unregisterThread(tid);
ProfiledThread::release();
}
// pthread_cleanup_push callback for thread wrappers.
// Fires when the wrapped routine calls pthread_exit() or the thread is
// canceled. Kept noinline so its stack frame (which may hold a SignalBlocker
// via unregister_and_release) lives outside the DEOPT-corruption zone of the
// caller on musl/aarch64, and so that the SignalBlocker's sigset_t does not
// appear in the caller's frame on platforms with stack-protector canaries.
__attribute__((noinline))
static void cleanup_unregister(void*) {
unregister_and_release(ProfiledThread::currentTid());
}
// Thread-cleanup wrapper that avoids the static-libgcc / forced-unwind crash.
//
// The crash: on glibc, pthread_cleanup_push in C++ mode expands to
// __pthread_cleanup_class (RAII), which adds a cleanup entry to the LSDA of
// this frame. When libjavaProfiler.so is built with -static-libgcc, the
// embedded __gxx_personality_v0 is called by the dynamic libgcc_s.so.1's
// _Unwind_ForcedUnwind. The two libgcc versions have incompatible
// _Unwind_Context layouts; calling _Unwind_SetGR (which happens when the
// personality finds a cleanup action) with a cross-version context triggers
// the cold/error path, which calls abort().
//
// The fix: use __pthread_register_cancel / __pthread_unregister_cancel
// directly — the same thing the C macro form of pthread_cleanup_push does.
// This registers cleanup via a setjmp buffer in a runtime linked-list, NOT
// via an LSDA destructor. _Unwind_ForcedUnwind's stop function
// (__pthread_unwind_stop) handles the cleanup without ever calling
// __gxx_personality_v0 for this frame, so _Unwind_SetGR is never called and
// the cross-version incompatibility is never triggered.
//
// On musl: pthread_cleanup_push already uses the C/setjmp form (no RAII),
// and pthread_exit does not use _Unwind_ForcedUnwind, so there is no issue.
// The __GLIBC__ guard keeps the musl path unchanged.
#ifdef __GLIBC__
// On glibc, <pthread.h> declares __pthread_register_cancel etc. only inside
// the C (non-C++) conditional, so they're invisible in C++ code. Redeclare
// them with extern "C" so we can call them directly without the header guard.
extern "C" {
extern void __pthread_register_cancel(__pthread_unwind_buf_t*);
extern void __pthread_unregister_cancel(__pthread_unwind_buf_t*);
[[noreturn]] extern void __pthread_unwind_next(__pthread_unwind_buf_t*);
}
#endif
__attribute__((visibility("hidden"), noinline, no_stack_protector))
void run_with_cleanup(func_start_routine routine, void* params,
void (*cleanup_fn)(void*), void* cleanup_arg) {
#ifdef __GLIBC__
__pthread_unwind_buf_t cancel_buf = {};
// With savemask=0, __sigsetjmp only writes __jmp_buf + int __mask_was_saved;
// it never touches __saved_mask. The inner struct of __pthread_unwind_buf_t
// must cover exactly that writable prefix of struct __jmp_buf_tag.
static_assert(offsetof(__pthread_unwind_buf_t, __cancel_jmp_buf) == 0 &&
sizeof(cancel_buf.__cancel_jmp_buf[0]) == offsetof(struct __jmp_buf_tag, __saved_mask),
"glibc __pthread_unwind_buf_t inner layout incompatible with struct __jmp_buf_tag");
// __sigsetjmp/longjmp only intercepts _Unwind_ForcedUnwind (pthread_exit /
// cancellation). routine(params) must NOT throw a regular C++ exception
// across this boundary: an escaping exception would skip both
// __pthread_unregister_cancel and cleanup_fn below, leaking the thread
// registration and leaving cancel_buf linked against this (unwound) frame.
// We cannot defend with a try/catch here — a handler frame adds an LSDA
// action, which is exactly what triggers the static-libgcc abort this
// function exists to avoid. Production routines are JVM/native start
// routines that handle their own exceptions and do not throw across here.
if (__builtin_expect(
// set __sigsetjmp's savemask=0 (the second parameter, noting that the signal mask is NOT
// saved/restored, which is correct because the cancel mechanism does not depend on signal mask state.
__sigsetjmp((struct __jmp_buf_tag*)(void*)cancel_buf.__cancel_jmp_buf, 0), 0)) {
// Reached via longjmp from glibc's stop function when pthread_exit
// (or cancellation) fires. Run cleanup and continue unwinding.
cleanup_fn(cleanup_arg);
__pthread_unwind_next(&cancel_buf);
// __pthread_unwind_next is [[noreturn]]; this fails loudly rather than
// falling through into __pthread_register_cancel on a torn-down frame
// should a future/variant glibc ever return from it.
__builtin_unreachable();
}
// Callers must not have a pending pthread_cancel when they enter
// run_with_cleanup: a cancellation arriving between __sigsetjmp returning
// and __pthread_register_cancel below would unwind this frame before
// cancel_buf is registered, silently skipping cleanup_fn. All current
// callers are JVM/native start routines with no pending cancellation.
__pthread_register_cancel(&cancel_buf);
routine(params);
__pthread_unregister_cancel(&cancel_buf);
cleanup_fn(cleanup_arg);
#else
// musl / non-glibc: pthread_cleanup_push uses the C/setjmp form, no RAII.
pthread_cleanup_push(cleanup_fn, cleanup_arg);
routine(params);
pthread_cleanup_pop(1);
#endif
}
#ifdef UNIT_TEST
// Integration test entry point: exercises the full start_routine_wrapper →
// run_with_cleanup chain without calling Profiler::registerThread or
// Profiler::unregisterThread, which dereference _cpu_engine/_wall_engine and
// crash when the profiler is not started (as in gtest).
//
// The caller supplies cleanup_fn/cleanup_arg so the test can verify cleanup
// fires and observe ProfiledThread::release() without coupling to Profiler state.
//
// Thread lifecycle:
// pthread_create_wrapped_for_test → start_routine_for_test
// → ProfiledThread::initCurrentThread()
// → run_with_cleanup(routine, params, cleanup_fn, cleanup_arg)
// → pthread_exit(nullptr)
struct WrapperTestCtx {
func_start_routine routine;
void* params;
void (*cleanup_fn)(void*);
void* cleanup_arg;
};
__attribute__((visibility("hidden"), noinline, no_stack_protector))
static void* start_routine_for_test(void* raw) {
auto* ctx = static_cast<WrapperTestCtx*>(raw);
func_start_routine routine = ctx->routine;
void* params = ctx->params;
void (*cleanup_fn)(void*) = ctx->cleanup_fn;
void* cleanup_arg = ctx->cleanup_arg;
{
SignalBlocker blocker;
delete ctx;
ProfiledThread::initCurrentThread();
}
run_with_cleanup(routine, params, cleanup_fn, cleanup_arg);
pthread_exit(nullptr);
__builtin_unreachable();
}
int pthread_create_wrapped_for_test(pthread_t* thread,
func_start_routine routine, void* params,
void (*cleanup_fn)(void*), void* cleanup_arg) {
WrapperTestCtx* ctx;
{
SignalBlocker blocker;
ctx = new WrapperTestCtx{routine, params, cleanup_fn, cleanup_arg};
}
int ret = pthread_create(thread, nullptr, start_routine_for_test, ctx);
if (ret != 0) {
SignalBlocker blocker;
delete ctx;
}
return ret;
}
// Variant that passes the production cleanup_unregister as the cleanup function.
// Exercises the full chain: start_routine_for_test → run_with_cleanup →
// cleanup_unregister → Profiler::unregisterThread + ProfiledThread::release.
// Profiler::unregisterThread is null-safe under UNIT_TEST (see profiler.cpp).
int pthread_create_with_cleanup_unregister_for_test(pthread_t* thread,
func_start_routine routine,
void* params) {
return pthread_create_wrapped_for_test(thread, routine, params,
cleanup_unregister, nullptr);
}
#endif // UNIT_TEST
#ifdef __aarch64__
// Delete RoutineInfo with profiling signals blocked to prevent ASAN
// allocator lock reentrancy. Kept noinline so SignalBlocker's sigset_t
// does not trigger stack-protector canary in the caller on aarch64.
__attribute__((noinline))
static void delete_routine_info(RoutineInfo* thr) {
SignalBlocker blocker;
delete thr;
}
// Initialize the current thread's TLS, open the init window (PROF-13072), and
// register the thread with the profiler — all under a single SignalBlocker so
// profiling signals cannot fire in the gap between initCurrentThread() and
// startInitWindow(). Kept noinline for the same stack-protector reason as
// delete_routine_info: SignalBlocker's sigset_t must not appear in
// start_routine_wrapper_spec's own stack frame on musl/aarch64.
__attribute__((noinline))
static void init_tls_and_register() {
SignalBlocker blocker;
ProfiledThread::initCurrentThread();
if (ProfiledThread *pt = ProfiledThread::currentSignalSafe()) {
pt->startInitWindow();
}
Profiler::registerThread(ProfiledThread::currentTid());
}
// Wrapper around the real start routine.
// The wrapper:
// 1. Register the newly created thread to profiler
// 2. Call real start routine
// 3. Unregister the thread from profiler once the routine is completed.
// This version works around stack corruption observed on musl/aarch64/JDK11:
//
// Empirical observation (hs_err analysis): after DEOPT PACKING fires on a
// thread running compiled lambda$measureContention$0 at sp=0x...49d0, this
// wrapper's frame (sp=0x...5020, ~144 bytes below thread stack top) shows a
// corrupted FP (odd address 0x...5001) and a corrupted stack canary. The
// corruption is confined to the top ~224 bytes of the stack (the region between
// DEOPT PACKING sp and the thread stack top).
//
// The source of the corruption is the interpreter-frame rebuild sequence in
// HotSpot's deoptimization blob (generate_deopt_blob in
// sharedRuntime_aarch64.cpp, openjdk/jdk11u). After popping the compiled
// frame the blob executes "sub sp, sp, caller_adjustment" followed by a loop
// of enter() calls (each doing "stp rfp, lr, [sp, #-16]!") to lay down
// replacement interpreter frames. When musl's small thread stack places this
// wrapper immediately above the compiled frame, the enter() writes can reach
// into this wrapper's frame, corrupting the saved FP and stack canary.
// The mechanism is the same "precarious stack guard corruption" the noinline
// helpers above already defend against for SignalBlocker's sigset_t.
//
// Two symptoms arise from this corruption:
//
// (a) Stack-canary crash: -fstack-protector-strong inserts a canary whenever
// the frame has a non-trivially destructed local (e.g. a Cleanup struct).
// That canary lands in the corruption zone; the epilogue fires
// __stack_chk_fail. no_stack_protector removes the canary.
//
// (b) Corrupted-LR crash: even without a canary, `return` loads the saved LR
// from the corrupted frame and jumps to a garbage address. pthread_exit()
// terminates the thread without using LR. HotSpot on musl returns normally
// from java_start (no forced-unwind), so no exception-based cleanup path
// is needed.
//
// Cleanup reads tid from TLS (via ProfiledThread::currentTid()) rather than
// from a stack variable, so it is correct even after the frame is corrupted.
// pthread_cleanup_push/pop ensures unregister_and_release() also runs when the
// wrapped routine calls pthread_exit() or the thread is canceled.
__attribute__((visibility("hidden"), no_stack_protector))
static void* start_routine_wrapper_spec(void* args) {
RoutineInfo* thr = (RoutineInfo*)args;
func_start_routine routine = thr->routine();
void* params = thr->args();
delete_routine_info(thr);
init_tls_and_register();
// cleanup_unregister fires on pthread_exit() or cancellation from within
// routine(params). The push/pop pair lives inside run_with_cleanup so
// that __pthread_unwind_buf_t (glibc) / struct __ptcb (musl) does not land
// in this frame's DEOPT-corruption zone.
run_with_cleanup(routine, params, cleanup_unregister, nullptr);
// pthread_exit instead of 'return': the saved LR in this frame is corrupted
// by DEOPT PACKING; returning would jump to a garbage address.
// cleanup_unregister has already run via run_with_cleanup's normal return
// path, so there is no registered cancel handler left. The forced unwind
// raised by pthread_exit walks this frame, but it is safe because no
// destructor-bearing local (and hence no LSDA cleanup/handler action) is
// live at this call site: __gxx_personality_v0 returns continue-unwind
// without ever calling _Unwind_SetGR, avoiding the static-libgcc abort.
// WARNING: adding any RAII local with a destructor between run_with_cleanup
// and pthread_exit would reintroduce that crash.
pthread_exit(nullptr);
__builtin_unreachable();
}
static int pthread_create_hook_spec(pthread_t* thread,
const pthread_attr_t* attr,
func_start_routine start_routine,
void* args) {
RoutineInfo* thr;
{
SignalBlocker blocker;
thr = new RoutineInfo(start_routine, args);
}
int ret = pthread_create(thread, attr, start_routine_wrapper_spec, (void*)thr);
if (ret != 0) {
SignalBlocker blocker;
delete thr;
}
return ret;
}
#endif // __aarch64__
// Wrapper around the real start routine.
// See comments for start_routine_wrapper_spec() for details
__attribute__((visibility("hidden"), no_stack_protector))
static void* start_routine_wrapper(void* args) {
RoutineInfo* thr = (RoutineInfo*)args;
func_start_routine routine;
void* params;
{
// Block profiling signals while accessing and freeing RoutineInfo
// and during TLS initialization. Under ASAN, new/delete/
// pthread_setspecific are intercepted and acquire ASAN's internal
// allocator lock. A profiling signal during any of these calls
// runs ASAN-instrumented code that tries to acquire the same
// lock, causing deadlock.
// registerThread is also kept inside the blocker so that the CPU
// timer is armed while SIGPROF/SIGVTALRM are masked. Any pending
// signal fires only after signals are re-enabled (when the blocker
// scope exits), at which point JVMThread::current() is still null
// and the guard in CTimer::signalHandler discards the sample safely.
SignalBlocker blocker;
routine = thr->routine();
params = thr->args();
delete thr;
ProfiledThread::initCurrentThread();
ProfiledThread::currentSignalSafe()->startInitWindow();
Profiler::registerThread(ProfiledThread::currentTid());
}
// Use POSIX cleanup instead of C++ RAII to handle pthread_exit(): see run_with_cleanup.
// cleanup_unregister has already run on run_with_cleanup's normal return path.
// The pthread_exit forced unwind is safe here for the same reason as in
// start_routine_wrapper_spec: no destructor-bearing local is live at this
// call site, so __gxx_personality_v0 never calls _Unwind_SetGR.
run_with_cleanup(routine, params, cleanup_unregister, nullptr);
pthread_exit(nullptr);
__builtin_unreachable();
}
static int pthread_create_hook(pthread_t* thread,
const pthread_attr_t* attr,
func_start_routine start_routine,
void* args) {
RoutineInfo* thr;
{
SignalBlocker blocker;
thr = new RoutineInfo(start_routine, args);
}
int ret = pthread_create(thread, attr, start_routine_wrapper, (void*)thr);
if (ret != 0) {
SignalBlocker blocker;
delete thr;
}
return ret;
}
void LibraryPatcher::patch_libraries() {
// LibraryPatcher has yet initialized, only happens in Gtest
if (_profiler_name == nullptr) {
return;
}
TEST_LOG("Patching libraries");
patch_pthread_create();
TEST_LOG("%d libraries patched", _size);
}
void LibraryPatcher::patch_library_unlocked(CodeCache* lib) {
if (lib->name() == nullptr) return;
char path[PATH_MAX];
char* resolved_path = realpath(lib->name(), path);
if (resolved_path != nullptr && // filter out virtual file, e.g. [vdso], etc.
strcmp(resolved_path, _profiler_name) == 0) { // Don't patch self
return;
}
// Don't patch sanitizer runtime libraries — intercepting their internal
// pthread_create calls causes reentrancy and heap corruption under ASAN.
const char* base = strrchr(lib->name(), '/');
base = (base != nullptr) ? base + 1 : lib->name();
if (strncmp(base, "libasan", 7) == 0 ||
strncmp(base, "libtsan", 7) == 0 ||
strncmp(base, "libubsan", 8) == 0) {
return;
}
void** pthread_create_location = (void**)lib->findImport(im_pthread_create);
if (pthread_create_location == nullptr) {
return;
}
for (int index = 0; index < _size; index++) {
// Already patched
if (_patched_entries[index]._lib == lib) {
return;
}
}
TEST_LOG("Patching: %s", lib->name());
void* func = (void*)pthread_create_hook;
#ifdef __aarch64__
// Workaround stack guard corruption in Linux/aarch64/musl/jdk11
if (VM::isHotspot() && OS::isMusl() && VM::java_version() == 11) {
func = (void*)pthread_create_hook_spec;
}
#endif
_patched_entries[_size]._lib = lib;
_patched_entries[_size]._location = pthread_create_location;
_patched_entries[_size]._func = (void*)__atomic_load_n(pthread_create_location, __ATOMIC_RELAXED);
__atomic_store_n(pthread_create_location, func, __ATOMIC_RELAXED);
_size++;
}
void LibraryPatcher::unpatch_libraries() {
TEST_LOG("Restore libraries");
ExclusiveLockGuard locker(&_lock);
for (int index = 0; index < _size; index++) {
__atomic_store_n(_patched_entries[index]._location, _patched_entries[index]._func, __ATOMIC_RELAXED);
}
_size = 0;
}
void LibraryPatcher::patch_pthread_create() {
const CodeCacheArray& native_libs = Libraries::instance()->native_libs();
int num_of_libs = native_libs.count();
ExclusiveLockGuard locker(&_lock);
for (int index = 0; index < num_of_libs; index++) {
CodeCache* lib = native_libs.at(index);
if (lib != nullptr) {
patch_library_unlocked(lib);
}
}
}
// Patch sigaction in all libraries to prevent any library from overwriting
// our SIGSEGV/SIGBUS handlers. This protects against misbehaving libraries
// (like wasmtime) that install broken signal handlers calling malloc().
void LibraryPatcher::patch_sigaction_in_library(CodeCache* lib) {
if (lib->name() == nullptr) return;
if (_profiler_name == nullptr) return; // Not initialized yet
// Don't patch ourselves
char path[PATH_MAX];
char* resolved_path = realpath(lib->name(), path);
if (resolved_path != nullptr && strcmp(resolved_path, _profiler_name) == 0) {
return;
}
// Note: We intentionally patch sanitizer libraries (libasan, libtsan, libubsan) here.
// This keeps our handler on top for recoverable SIGSEGVs (e.g., safefetch) while
// still chaining to the sanitizer's handler for unexpected crashes.
void** sigaction_location = (void**)lib->findImport(im_sigaction);
if (sigaction_location == nullptr) {
return;
}
// Check if already patched or array is full
if (_sigaction_size >= MAX_NATIVE_LIBS) {
return;
}
for (int index = 0; index < _sigaction_size; index++) {
if (_sigaction_entries[index]._lib == lib) {
return;
}
}
void* hook = OS::getSigactionHook();
_sigaction_entries[_sigaction_size]._lib = lib;
_sigaction_entries[_sigaction_size]._location = sigaction_location;
_sigaction_entries[_sigaction_size]._func = (void*)__atomic_load_n(sigaction_location, __ATOMIC_RELAXED);
__atomic_store_n(sigaction_location, hook, __ATOMIC_RELAXED);
_sigaction_size++;
Counters::increment(SIGACTION_PATCHED_LIBS);
}
void LibraryPatcher::patch_sigaction() {
const CodeCacheArray& native_libs = Libraries::instance()->native_libs();
int num_of_libs = native_libs.count();
ExclusiveLockGuard locker(&_lock);
for (int index = 0; index < num_of_libs; index++) {
CodeCache* lib = native_libs.at(index);
if (lib != nullptr) {
patch_sigaction_in_library(lib);
}
}
}
bool LibraryPatcher::patch_socket_functions() {
// Resolve the real libc symbols ONCE at first call and cache them. On a
// restart cycle (stop()→start()) we MUST NOT re-resolve via RTLD_NEXT: if
// any GOT slot in another DSO was missed during unpatch (e.g. its CodeCache
// disappeared), dlsym(RTLD_NEXT) could now resolve to the still-installed
// hook in that other DSO's GOT — the assignment to _orig_* would become
// self-referential and the next hook call would infinite-loop.
//
// RTLD_NEXT finds the first definition after this DSO in load order,
// bypassing unresolved lazy-binding stubs that would otherwise trigger
// _dl_runtime_resolve and silently overwrite the hook in the GOT.
// May resolve to an LD_PRELOAD interposer (e.g. libasan) — intentional.
// On musl, RTLD_NEXT returns NULL when libc is loaded before this DSO in the
// link map; fall back to RTLD_DEFAULT which finds symbols globally.
// The four statics and the `cached` flag are written once and then
// read-only. They live outside the ExclusiveLockGuard intentionally (dlsym
// must not be called while holding _lock because dlsym may acquire the
// linker lock, which is also acquired during dlopen — inverting the order
// would deadlock). Guard the one-time init with a dedicated once_flag so
// that concurrent callers serialise on the dlsym block rather than racing
// to write the statics.
static NativeSocketSampler::send_fn cached_send = nullptr;
static NativeSocketSampler::recv_fn cached_recv = nullptr;
static NativeSocketSampler::write_fn cached_write = nullptr;
static NativeSocketSampler::read_fn cached_read = nullptr;
static std::once_flag dlsym_once;
std::call_once(dlsym_once, [&]() {
cached_send = (NativeSocketSampler::send_fn) dlsym(RTLD_NEXT, "send");
if (!cached_send) cached_send = (NativeSocketSampler::send_fn) dlsym(RTLD_DEFAULT, "send");
cached_recv = (NativeSocketSampler::recv_fn) dlsym(RTLD_NEXT, "recv");
if (!cached_recv) cached_recv = (NativeSocketSampler::recv_fn) dlsym(RTLD_DEFAULT, "recv");
cached_write = (NativeSocketSampler::write_fn) dlsym(RTLD_NEXT, "write");
if (!cached_write) cached_write = (NativeSocketSampler::write_fn) dlsym(RTLD_DEFAULT, "write");
cached_read = (NativeSocketSampler::read_fn) dlsym(RTLD_NEXT, "read");
if (!cached_read) cached_read = (NativeSocketSampler::read_fn) dlsym(RTLD_DEFAULT, "read");
// If dlsym resolves to one of our own hooks the linker is already serving
// the patched copy. Null the pointers so the early-return below fires.
if (cached_send == &NativeSocketSampler::send_hook ||
cached_recv == &NativeSocketSampler::recv_hook ||
cached_write == &NativeSocketSampler::write_hook ||
cached_read == &NativeSocketSampler::read_hook) {
TEST_LOG("patch_socket_functions dlsym returned hook address; refusing to self-reference");
cached_send = nullptr; cached_recv = nullptr;
cached_write = nullptr; cached_read = nullptr;
}
});
auto pre_send = cached_send;
auto pre_recv = cached_recv;
auto pre_write = cached_write;
auto pre_read = cached_read;
TEST_LOG("patch_socket_functions dlsym send=%p recv=%p write=%p read=%p",
(void*)pre_send, (void*)pre_recv, (void*)pre_write, (void*)pre_read);
if (!pre_send || !pre_recv || !pre_write || !pre_read) {
TEST_LOG("patch_socket_functions EARLY RETURN: at least one dlsym returned NULL");
return false;
}
const CodeCacheArray& native_libs = Libraries::instance()->native_libs();
int num_of_libs = native_libs.count();
// Pre-resolve all library paths before acquiring the lock: realpath() may
// block on I/O and must not be called while holding _lock.
// We only need the is-self flag per library, so avoid a huge stack allocation.
static_assert(MAX_NATIVE_LIBS > 0, "MAX_NATIVE_LIBS must be positive");
bool is_self[MAX_NATIVE_LIBS];
int capped = (num_of_libs <= MAX_NATIVE_LIBS) ? num_of_libs : MAX_NATIVE_LIBS;
for (int index = 0; index < capped; index++) {
CodeCache* lib = native_libs.at(index);
is_self[index] = false;
if (lib == nullptr || lib->name() == nullptr) continue;
char path[PATH_MAX];
char* rp = realpath(lib->name(), path);
is_self[index] = (rp != nullptr && strcmp(rp, _profiler_name) == 0);
}
ExclusiveLockGuard locker(&_lock);
// Re-check under the lock only on re-entry (when hooks are already installed):
// a concurrent unpatch_socket_functions() may have cleared _socket_active
// between the acquire-load in install_socket_hooks() and this lock acquisition.
// The initial call from NativeSocketSampler::start() always has _socket_size == 0
// and must proceed regardless of _socket_active.
if (_socket_size > 0 && !_socket_active.load(std::memory_order_relaxed)) {
return false;
}
// Only assign orig pointers on the first call (no hooks installed yet).
// On re-entry via dlopen, RTLD_NEXT would resolve to the hook itself.
if (_socket_size == 0) {
NativeSocketSampler::setOriginalFunctions(pre_send, pre_recv, pre_write, pre_read);
}
// TODO: hook table (name + hook fn) should be owned by NativeSocketSampler;
// LibraryPatcher should iterate an externally-provided table rather than
// hardcoding the four socket hooks here.
auto try_patch_slot = [&](void** location, void* hook_fn, const char* fn_name, CodeCache* lib) {
if (location == nullptr) return;
for (int i = 0; i < _socket_size; i++) {
if (_socket_entries[i]._location == location) return;
}
if (_socket_size < 4 * MAX_NATIVE_LIBS) {
void* orig = (void*)__atomic_load_n(location, __ATOMIC_ACQUIRE);
_socket_entries[_socket_size]._lib = lib;
_socket_entries[_socket_size]._location = location;
_socket_entries[_socket_size]._func = orig;
__atomic_store_n(location, hook_fn, __ATOMIC_RELEASE);
_socket_size++;
} else {
Log::warn("socket patch table full (%d slots), skipping %s in %s", 4 * MAX_NATIVE_LIBS, fn_name, lib ? lib->name() : "?");
}
};
for (int index = 0; index < capped; index++) {
CodeCache* lib = native_libs.at(index);
if (lib == nullptr) continue;
if (lib->name() == nullptr) continue;
if (is_self[index]) {
continue;
}
void** send_location = (void**)lib->findImport(im_send);
void** recv_location = (void**)lib->findImport(im_recv);
void** write_location = (void**)lib->findImport(im_write);
void** read_location = (void**)lib->findImport(im_read);
if (send_location == nullptr && recv_location == nullptr
&& write_location == nullptr && read_location == nullptr) continue;
TEST_LOG("patch_socket_functions PATCH %s send=%p recv=%p write=%p read=%p",
lib->name(), (void*)send_location, (void*)recv_location,
(void*)write_location, (void*)read_location);
// The _lock is held during patching to protect _socket_entries and _socket_size.
// Concurrent dlopen_hook calls serialize via the same lock in install_socket_hooks(),
// ensuring slot_patched checks and updates are atomic with respect to each other.
try_patch_slot(send_location, (void*)NativeSocketSampler::send_hook, "send", lib);
try_patch_slot(recv_location, (void*)NativeSocketSampler::recv_hook, "recv", lib);
try_patch_slot(write_location, (void*)NativeSocketSampler::write_hook, "write", lib);
try_patch_slot(read_location, (void*)NativeSocketSampler::read_hook, "read", lib);
}
TEST_LOG("patch_socket_functions DONE total_slots=%d num_libs_scanned=%d",
_socket_size, capped);
_socket_active.store(true, std::memory_order_release);
return true;
}
void LibraryPatcher::unpatch_socket_functions() {
ExclusiveLockGuard locker(&_lock);
// Clear _socket_active FIRST so that any concurrent install_socket_hooks()
// thread that already passed the acquire-load on _socket_active (before we
// acquired the lock) will see false when it checks again after acquiring the
// lock — preventing it from re-patching slots we are about to restore.
// Hooks that already entered the hook body before this store are benign: they
// hold no lock and will complete normally using the still-valid orig pointers.
//
// ASSUMPTION (dlclose UAF): we write through _socket_entries[i]._location
// without checking that the owning library is still mapped. If a patched
// DSO were actually unmapped between patch and unpatch, this store would
// corrupt freed memory or SEGV. In practice this is benign because (a) the
// host JVM does not dlclose libc-importing DSOs, (b) glibc's dlclose
// refcounts and only unmaps when the final reference is dropped, and
// (c) the same risk is already accepted by unpatch_libraries() and
// unpatch_socket_functions has the same trust model. If a host that
// routinely unmaps libc-importing libraries is ever supported, gate each
// store on a /proc/self/maps lookup or hold a dlopen handle on each lib
// for the patch lifetime.
_socket_active.store(false, std::memory_order_release);
TEST_LOG("unpatch_socket_functions restoring %d slot(s)", _socket_size);
for (int index = 0; index < _socket_size; index++) {
__atomic_store_n(_socket_entries[index]._location, _socket_entries[index]._func, __ATOMIC_RELEASE);
}
_socket_size = 0;
// _orig_send/_orig_recv/_orig_write/_orig_read are intentionally NOT nulled.
// In-flight hook invocations that entered before PLT entries were restored
// above may still be executing and will dereference these pointers.
// They remain valid (pointing to the real libc functions) until the next
// patch_socket_functions() call.
}
#endif // __linux__