Adding direct reclaim state monitoring am: 9e136285a6 am: 696456889a

Original change: https://android-review.googlesource.com/c/platform/system/memory/lmkd/+/3001286

Change-Id: I9213f66c6289856f4f0da5b0971813b0680979d1
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
This commit is contained in:
Carlos Galo 2024-03-21 06:35:38 +00:00 committed by Automerger Merge Worker
commit 02f2386694
2 changed files with 137 additions and 12 deletions

View File

@ -19,19 +19,19 @@ lmkd_hooks_cc_defaults {
soong_config_variables: { soong_config_variables: {
use_hooks: { use_hooks: {
cflags: [ cflags: [
"-DLMKD_USE_HOOKS" "-DLMKD_USE_HOOKS",
], ],
static_libs: [ static_libs: [
"liblmkdhooks" "liblmkdhooks",
] ],
} },
} },
} }
cc_defaults { cc_defaults {
name: "stats_defaults", name: "stats_defaults",
cflags: [ cflags: [
"-DLMKD_LOG_STATS" "-DLMKD_LOG_STATS",
], ],
} }
@ -44,8 +44,10 @@ cc_binary {
"watchdog.cpp", "watchdog.cpp",
], ],
shared_libs: [ shared_libs: [
"libbase",
"libcutils", "libcutils",
"liblog", "liblog",
"libmemevents",
"libprocessgroup", "libprocessgroup",
"libpsi", "libpsi",
], ],
@ -54,17 +56,20 @@ cc_binary {
"liblmkd_utils", "liblmkd_utils",
], ],
header_libs: [ header_libs: [
"bpf_syscall_wrappers", "bpf_headers",
], ],
local_include_dirs: ["include"], local_include_dirs: ["include"],
cflags: [ cflags: [
"-Wall", "-Wall",
"-Werror", "-Werror",
"-Wextra", "-Wextra",
"-DLMKD_TRACE_KILLS" "-DLMKD_TRACE_KILLS",
], ],
init_rc: ["lmkd.rc"], init_rc: ["lmkd.rc"],
defaults: ["stats_defaults", "lmkd_hooks_defaults"], defaults: [
"stats_defaults",
"lmkd_hooks_defaults",
],
logtags: ["event.logtags"], logtags: ["event.logtags"],
afdo: true, afdo: true,
} }
@ -98,5 +103,5 @@ cc_library_static {
"-g", "-g",
"-Wall", "-Wall",
"-Werror", "-Werror",
] ],
} }

124
lmkd.cpp
View File

@ -36,8 +36,12 @@
#include <algorithm> #include <algorithm>
#include <array> #include <array>
#include <memory>
#include <shared_mutex> #include <shared_mutex>
#include <vector>
#include <bpf/KernelUtils.h>
#include <bpf/WaitForProgsLoaded.h>
#include <cutils/properties.h> #include <cutils/properties.h>
#include <cutils/sockets.h> #include <cutils/sockets.h>
#include <liblmkd_utils.h> #include <liblmkd_utils.h>
@ -46,6 +50,7 @@
#include <log/log.h> #include <log/log.h>
#include <log/log_event_list.h> #include <log/log_event_list.h>
#include <log/log_time.h> #include <log/log_time.h>
#include <memevents/memevents.h>
#include <private/android_filesystem_config.h> #include <private/android_filesystem_config.h>
#include <processgroup/processgroup.h> #include <processgroup/processgroup.h>
#include <psi/psi.h> #include <psi/psi.h>
@ -189,6 +194,10 @@ struct psi_threshold {
int threshold_ms; int threshold_ms;
}; };
/* Listener for direct reclaim state changes */
static std::unique_ptr<android::bpf::memevents::MemEventListener> memevent_listener(nullptr);
static struct timespec direct_reclaim_start_tm;
static int level_oomadj[VMPRESS_LEVEL_COUNT]; static int level_oomadj[VMPRESS_LEVEL_COUNT];
static int mpevfd[VMPRESS_LEVEL_COUNT] = { -1, -1, -1 }; static int mpevfd[VMPRESS_LEVEL_COUNT] = { -1, -1, -1 };
static bool pidfd_supported; static bool pidfd_supported;
@ -278,8 +287,9 @@ static struct event_handler_info vmpressure_hinfo[VMPRESS_LEVEL_COUNT];
/* /*
* 1 ctrl listen socket, 3 ctrl data socket, 3 memory pressure levels, * 1 ctrl listen socket, 3 ctrl data socket, 3 memory pressure levels,
* 1 lmk events + 1 fd to wait for process death + 1 fd to receive kill failure notifications * 1 lmk events + 1 fd to wait for process death + 1 fd to receive kill failure notifications
* + 1 fd to receive direct reclaim state change notifications
*/ */
#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1 + 1) #define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1 + 1 + 1)
static int epollfd; static int epollfd;
static int maxevents; static int maxevents;
@ -2620,6 +2630,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
long since_thrashing_reset_ms; long since_thrashing_reset_ms;
int64_t workingset_refault_file; int64_t workingset_refault_file;
bool critical_stall = false; bool critical_stall = false;
bool in_direct_reclaim;
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) { if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
ALOGE("Failed to get current time"); ALOGE("Failed to get current time");
@ -2672,8 +2683,12 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
swap_low_threshold = 0; swap_low_threshold = 0;
} }
in_direct_reclaim = memevent_listener ? (direct_reclaim_start_tm.tv_sec != 0 ||
direct_reclaim_start_tm.tv_nsec != 0)
: (vs.field.pgscan_direct != init_pgscan_direct);
/* Identify reclaim state */ /* Identify reclaim state */
if (vs.field.pgscan_direct != init_pgscan_direct) { if (in_direct_reclaim) {
init_pgscan_direct = vs.field.pgscan_direct; init_pgscan_direct = vs.field.pgscan_direct;
init_pgscan_kswapd = vs.field.pgscan_kswapd; init_pgscan_kswapd = vs.field.pgscan_kswapd;
init_pgrefill = vs.field.pgrefill; init_pgrefill = vs.field.pgrefill;
@ -3232,6 +3247,103 @@ static MemcgVersion memcg_version() {
return version; return version;
} }
static void direct_reclaim_state_change(int data __unused, uint32_t events __unused,
struct polling_params* poll_params __unused) {
struct timespec curr_tm;
std::vector<mem_event_t> mem_events;
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
direct_reclaim_start_tm.tv_sec = 0;
direct_reclaim_start_tm.tv_nsec = 0;
ALOGE("Failed to get current time for direct reclaim state change.");
return;
}
if (!memevent_listener->getMemEvents(mem_events)) {
direct_reclaim_start_tm.tv_sec = 0;
direct_reclaim_start_tm.tv_nsec = 0;
ALOGE("Failed fetching direct reclaim events.");
return;
}
/*
* `mem_events` is ordered from oldest to newest, therefore we use
* the last/latest direct reclaim event as the current direct reclaim
* state.
*/
for (const mem_event_t mem_event : mem_events) {
if (mem_event.type == MEM_EVENT_DIRECT_RECLAIM_BEGIN) {
direct_reclaim_start_tm = curr_tm;
} else if (mem_event.type == MEM_EVENT_DIRECT_RECLAIM_END) {
direct_reclaim_start_tm.tv_sec = 0;
direct_reclaim_start_tm.tv_nsec = 0;
}
}
}
static bool init_direct_reclaim_monitoring() {
static struct event_handler_info direct_reclaim_poll_hinfo = {0, direct_reclaim_state_change};
if (!memevent_listener) {
// Make sure bpf programs are loaded
android::bpf::waitForProgsLoaded();
memevent_listener = std::make_unique<android::bpf::memevents::MemEventListener>(
android::bpf::memevents::MemEventClient::LMKD);
}
if (!memevent_listener->ok()) {
ALOGE("Failed to initialize memevents listener");
memevent_listener.reset();
return false;
}
if (!memevent_listener->registerEvent(MEM_EVENT_DIRECT_RECLAIM_BEGIN) ||
!memevent_listener->registerEvent(MEM_EVENT_DIRECT_RECLAIM_END)) {
ALOGE("Failed to register direct reclaim memevents");
memevent_listener.reset();
return false;
}
int memevent_listener_fd = memevent_listener->getRingBufferFd();
if (memevent_listener_fd < 0) {
memevent_listener.reset();
ALOGE("Invalid memevent_listener fd: %d", memevent_listener_fd);
return false;
}
struct epoll_event epev;
epev.events = EPOLLIN;
epev.data.ptr = (void*)&direct_reclaim_poll_hinfo;
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, memevent_listener_fd, &epev) < 0) {
ALOGE("Failed registering direct reclaim fd: %d; errno=%d", memevent_listener_fd, errno);
/*
* Reset the fd to let `destroy_direct_reclaim_monitoring` know we failed adding this fd,
* therefore it won't try to close the `memevent_listener_fd`.
*/
memevent_listener.reset();
return false;
}
direct_reclaim_start_tm.tv_sec = 0;
direct_reclaim_start_tm.tv_nsec = 0;
maxevents++;
return true;
}
static void destroy_direct_reclaim_monitoring() {
if (!memevent_listener) return;
if (epoll_ctl(epollfd, EPOLL_CTL_DEL, memevent_listener->getRingBufferFd(), NULL) < 0) {
ALOGE("Failed to unregister direct reclaim monitoring; errno=%d", errno);
}
maxevents--;
memevent_listener.reset();
direct_reclaim_start_tm.tv_sec = 0;
direct_reclaim_start_tm.tv_nsec = 0;
}
static bool init_psi_monitors() { static bool init_psi_monitors() {
/* /*
* When PSI is used on low-ram devices or on high-end devices without memfree levels * When PSI is used on low-ram devices or on high-end devices without memfree levels
@ -3382,6 +3494,13 @@ static bool init_monitors() {
} else { } else {
ALOGI("Using vmpressure for memory pressure detection"); ALOGI("Using vmpressure for memory pressure detection");
} }
if (init_direct_reclaim_monitoring()) {
ALOGI("Using memevents for direct reclaim detection");
} else {
ALOGI("Using vmstats for direct reclaim detection");
}
monitors_initialized = true; monitors_initialized = true;
return true; return true;
} }
@ -3396,6 +3515,7 @@ static void destroy_monitors() {
destroy_mp_common(VMPRESS_LEVEL_MEDIUM); destroy_mp_common(VMPRESS_LEVEL_MEDIUM);
destroy_mp_common(VMPRESS_LEVEL_LOW); destroy_mp_common(VMPRESS_LEVEL_LOW);
} }
destroy_direct_reclaim_monitoring();
} }
static void drop_reaper_comm() { static void drop_reaper_comm() {