lmkd: Use process_mrelease to reap the target process from a thread

process_mrelease syscall can be used to expedite memory release of
a process after it was killed. This allows memory to be released
without the target process being scheduled, therefore does not depend
on target's priority or the CPU it's running on.
However process_mrelease syscall can take considerable time. Blocking
lmkd main thread during that time can cause memory pressure events
being missed while lmkd is busy reaping previous target's memory.
For this reason reaping should be done in a separate thread. This way
lmkd main thread can keep monitoring memory pressure while memory is
being released.
Introduce Reaper class which maintains a pool of threads to perform
process killing and reaping. The main thread submits a request to the
Reaper to kill and reap the process without blocking. If all the threads
in the pool are busy at the time the next kill is needed, the kill is
performed by the main thread without reaping.

Bug: 130172058
Bug: 189803002
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: If7b10fdd1838bdfeea3fed3031565feffe0b52be
This commit is contained in:
Suren Baghdasaryan 2021-06-11 12:12:56 -07:00
parent 53df06cdf1
commit 7c3addb2a1
4 changed files with 398 additions and 59 deletions

View File

@ -12,7 +12,10 @@ cc_defaults {
cc_binary {
name: "lmkd",
srcs: ["lmkd.cpp"],
srcs: [
"lmkd.cpp",
"reaper.cpp",
],
shared_libs: [
"libcutils",
"liblog",

145
lmkd.cpp
View File

@ -16,12 +16,10 @@
#define LOG_TAG "lowmemorykiller"
#include <dirent.h>
#include <errno.h>
#include <inttypes.h>
#include <pwd.h>
#include <sched.h>
#include <signal.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
@ -30,12 +28,9 @@
#include <sys/eventfd.h>
#include <sys/mman.h>
#include <sys/pidfd.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <sys/sysinfo.h>
#include <sys/time.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
@ -47,10 +42,9 @@
#include <log/log_event_list.h>
#include <log/log_time.h>
#include <private/android_filesystem_config.h>
#include <processgroup/processgroup.h>
#include <psi/psi.h>
#include <system/thread_defs.h>
#include "reaper.h"
#include "statslog.h"
#define BPF_FD_JUST_USE_INT
@ -228,6 +222,8 @@ static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = {
};
static android_log_context ctx;
static Reaper reaper;
static int reaper_comm_fd[2];
enum polling_update {
POLLING_DO_NOT_CHANGE,
@ -277,9 +273,9 @@ static struct event_handler_info vmpressure_hinfo[VMPRESS_LEVEL_COUNT];
/*
* 1 ctrl listen socket, 3 ctrl data socket, 3 memory pressure levels,
* 1 lmk events + 1 fd to wait for process death
* 1 lmk events + 1 fd to wait for process death + 1 fd to receive kill failure notifications
*/
#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1)
#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1 + 1)
static int epollfd;
static int maxevents;
@ -2008,42 +2004,6 @@ static struct proc *proc_get_heaviest(int oomadj) {
return maxprocp;
}
static void set_process_group_and_prio(int pid, const std::vector<std::string>& profiles,
int prio) {
DIR* d;
char proc_path[PATH_MAX];
struct dirent* de;
snprintf(proc_path, sizeof(proc_path), "/proc/%d/task", pid);
if (!(d = opendir(proc_path))) {
ALOGW("Failed to open %s; errno=%d: process pid(%d) might have died", proc_path, errno,
pid);
return;
}
while ((de = readdir(d))) {
int t_pid;
if (de->d_name[0] == '.') continue;
t_pid = atoi(de->d_name);
if (!t_pid) {
ALOGW("Failed to get t_pid for '%s' of pid(%d)", de->d_name, pid);
continue;
}
if (setpriority(PRIO_PROCESS, t_pid, prio) && errno != ESRCH) {
ALOGW("Unable to raise priority of killing t_pid (%d): errno=%d", t_pid, errno);
}
if (!SetTaskProfiles(t_pid, profiles, true)) {
ALOGW("Failed to set task_profiles on pid(%d) t_pid(%d)", pid, t_pid);
continue;
}
}
closedir(d);
}
static bool is_kill_pending(void) {
char buf[24];
@ -2114,6 +2074,19 @@ static void kill_done_handler(int data __unused, uint32_t events __unused,
poll_params->update = POLLING_RESUME;
}
static void kill_fail_handler(int data __unused, uint32_t events __unused,
struct polling_params *poll_params) {
int pid;
// Extract pid from the communication pipe. Clearing the pipe this way allows further
// epoll_wait calls to sleep until the next event.
if (TEMP_FAILURE_RETRY(read(reaper_comm_fd[0], &pid, sizeof(pid))) != sizeof(pid)) {
ALOGE("thread communication read failed: %s", strerror(errno));
}
stop_wait_for_proc_kill(false);
poll_params->update = POLLING_RESUME;
}
static void start_wait_for_proc_kill(int pid_or_fd) {
static struct event_handler_info kill_done_hinfo = { 0, kill_done_handler };
struct epoll_event epev;
@ -2149,7 +2122,7 @@ static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_i
int pidfd = procp->pidfd;
uid_t uid = procp->uid;
char *taskname;
int r;
int kill_result;
int result = -1;
struct memory_stat *mem_st;
struct kill_stat kill_st;
@ -2188,29 +2161,21 @@ static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_i
snprintf(desc, sizeof(desc), "lmk,%d,%d,%d,%d,%d", pid, ki ? (int)ki->kill_reason : -1,
procp->oomadj, min_oom_score, ki ? ki->max_thrashing : -1);
trace_kill_start(pid, desc);
/* CAP_KILL required */
if (pidfd < 0) {
start_wait_for_proc_kill(pid);
r = kill(pid, SIGKILL);
} else {
start_wait_for_proc_kill(pidfd);
r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
}
start_wait_for_proc_kill(pidfd < 0 ? pid : pidfd);
kill_result = reaper.kill({ pidfd, pid });
trace_kill_end();
if (r) {
if (kill_result) {
stop_wait_for_proc_kill(false);
ALOGE("kill(%d): errno=%d", pid, errno);
/* Delete process record even when we fail to kill so that we don't get stuck on it */
goto out;
}
set_process_group_and_prio(pid, {"CPUSET_SP_FOREGROUND", "SCHED_SP_FOREGROUND"},
ANDROID_PRIORITY_HIGHEST);
last_kill_tm = *tm;
inc_killcnt(procp->oomadj);
@ -3158,6 +3123,63 @@ static void destroy_monitors() {
}
}
static void drop_reaper_comm() {
close(reaper_comm_fd[0]);
close(reaper_comm_fd[1]);
}
static bool setup_reaper_comm() {
if (pipe(reaper_comm_fd)) {
ALOGE("pipe failed: %s", strerror(errno));
return false;
}
// Ensure main thread never blocks on read
int flags = fcntl(reaper_comm_fd[0], F_GETFL);
if (fcntl(reaper_comm_fd[0], F_SETFL, flags | O_NONBLOCK)) {
ALOGE("fcntl failed: %s", strerror(errno));
drop_reaper_comm();
return false;
}
return true;
}
static bool init_reaper() {
if (!reaper.is_reaping_supported()) {
ALOGI("Process reaping is not supported");
return false;
}
if (!setup_reaper_comm()) {
ALOGE("Failed to create thread communication channel");
return false;
}
// Setup epoll handler
struct epoll_event epev;
static struct event_handler_info kill_failed_hinfo = { 0, kill_fail_handler };
epev.events = EPOLLIN;
epev.data.ptr = (void *)&kill_failed_hinfo;
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, reaper_comm_fd[0], &epev)) {
ALOGE("epoll_ctl failed: %s", strerror(errno));
drop_reaper_comm();
return false;
}
if (!reaper.init(reaper_comm_fd[1])) {
ALOGE("Failed to initialize reaper object");
if (epoll_ctl(epollfd, EPOLL_CTL_DEL, reaper_comm_fd[0], &epev)) {
ALOGE("epoll_ctl failed: %s", strerror(errno));
}
drop_reaper_comm();
return false;
}
maxevents++;
return true;
}
static int init(void) {
static struct event_handler_info kernel_poll_hinfo = { 0, kernel_event_handler };
struct reread_data file_data = {
@ -3480,6 +3502,8 @@ static void update_props() {
thrashing_limit_pct * 2));
swap_util_max = clamp(0, 100, GET_LMK_PROPERTY(int32, "swap_util_max", 100));
filecache_min_kb = GET_LMK_PROPERTY(int64, "filecache_min_kb", 0);
reaper.enable_debug(debug_process_killing);
}
int main(int argc, char **argv) {
@ -3521,6 +3545,11 @@ int main(int argc, char **argv) {
}
}
if (init_reaper()) {
ALOGI("Process reaper initialized with %d threads in the pool",
reaper.thread_cnt());
}
mainloop();
}

248
reaper.cpp Normal file
View File

@ -0,0 +1,248 @@
/*
* Copyright 2021 Google, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "lowmemorykiller"
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <log/log.h>
#include <signal.h>
#include <string.h>
#include <stdlib.h>
#include <sys/epoll.h>
#include <sys/pidfd.h>
#include <sys/resource.h>
#include <sys/sysinfo.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include <processgroup/processgroup.h>
#include <system/thread_defs.h>
#include "reaper.h"
#define NS_PER_MS (NS_PER_SEC / MS_PER_SEC)
#define THREAD_POOL_SIZE 2
#ifndef __NR_process_mrelease
#define __NR_process_mrelease 448
#endif
static int process_mrelease(int pidfd, unsigned int flags) {
return syscall(__NR_process_mrelease, pidfd, flags);
}
static inline long get_time_diff_ms(struct timespec *from,
struct timespec *to) {
return (to->tv_sec - from->tv_sec) * (long)MS_PER_SEC +
(to->tv_nsec - from->tv_nsec) / (long)NS_PER_MS;
}
static void* reaper_main(void* param) {
Reaper *reaper = static_cast<Reaper*>(param);
struct timespec start_tm, end_tm;
struct Reaper::target_proc target;
pid_t tid = gettid();
// Ensure the thread does not use little cores
if (!SetTaskProfiles(tid, {"CPUSET_SP_FOREGROUND"}, true)) {
ALOGE("Failed to assign cpuset to the reaper thread");
}
for (;;) {
target = reaper->dequeue_request();
if (reaper->debug_enabled()) {
clock_gettime(CLOCK_MONOTONIC_COARSE, &start_tm);
}
if (pidfd_send_signal(target.pidfd, SIGKILL, NULL, 0)) {
// Inform the main thread about failure to kill
reaper->notify_kill_failure(target.pid);
goto done;
}
if (process_mrelease(target.pidfd, 0)) {
ALOGE("process_mrelease %d failed: %s", target.pidfd, strerror(errno));
goto done;
}
if (reaper->debug_enabled()) {
clock_gettime(CLOCK_MONOTONIC_COARSE, &end_tm);
ALOGI("Process %d was reaped in %ldms", target.pid,
get_time_diff_ms(&start_tm, &end_tm));
}
done:
close(target.pidfd);
reaper->request_complete();
}
return NULL;
}
bool Reaper::is_reaping_supported() {
static enum {
UNKNOWN,
SUPPORTED,
UNSUPPORTED
} reap_support = UNKNOWN;
if (reap_support == UNKNOWN) {
if (process_mrelease(-1, 0) && errno == ENOSYS) {
reap_support = UNSUPPORTED;
} else {
reap_support = SUPPORTED;
}
}
return reap_support == SUPPORTED;
}
bool Reaper::init(int comm_fd) {
char name[16];
if (thread_cnt_ > 0) {
// init should not be called multiple times
return false;
}
thread_pool_ = new pthread_t[THREAD_POOL_SIZE];
for (int i = 0; i < THREAD_POOL_SIZE; i++) {
if (pthread_create(&thread_pool_[thread_cnt_], NULL, reaper_main, this)) {
ALOGE("pthread_create failed: %s", strerror(errno));
continue;
}
snprintf(name, sizeof(name), "lmkd_reaper%d", thread_cnt_);
if (pthread_setname_np(thread_pool_[thread_cnt_], name)) {
ALOGW("pthread_setname_np failed: %s", strerror(errno));
}
thread_cnt_++;
}
if (!thread_cnt_) {
delete[] thread_pool_;
return false;
}
queue_.reserve(thread_cnt_);
comm_fd_ = comm_fd;
return true;
}
static void set_process_group_and_prio(int pid, const std::vector<std::string>& profiles,
int prio) {
DIR* d;
char proc_path[PATH_MAX];
struct dirent* de;
snprintf(proc_path, sizeof(proc_path), "/proc/%d/task", pid);
if (!(d = opendir(proc_path))) {
ALOGW("Failed to open %s; errno=%d: process pid(%d) might have died", proc_path, errno,
pid);
return;
}
while ((de = readdir(d))) {
int t_pid;
if (de->d_name[0] == '.') continue;
t_pid = atoi(de->d_name);
if (!t_pid) {
ALOGW("Failed to get t_pid for '%s' of pid(%d)", de->d_name, pid);
continue;
}
if (setpriority(PRIO_PROCESS, t_pid, prio) && errno != ESRCH) {
ALOGW("Unable to raise priority of killing t_pid (%d): errno=%d", t_pid, errno);
}
if (!SetTaskProfiles(t_pid, profiles, true)) {
ALOGW("Failed to set task_profiles on pid(%d) t_pid(%d)", pid, t_pid);
continue;
}
}
closedir(d);
}
bool Reaper::async_kill(const struct target_proc& target) {
if (target.pidfd == -1) {
return false;
}
if (!thread_cnt_) {
return false;
}
mutex_.lock();
if (active_requests_ >= thread_cnt_) {
mutex_.unlock();
return false;
}
active_requests_++;
// Duplicate pidfd instead of reusing the original one to avoid synchronization and refcounting
// when both reaper and main threads are using or closing the pidfd
queue_.push_back({ dup(target.pidfd), target.pid });
// Wake up a reaper thread
cond_.notify_one();
mutex_.unlock();
set_process_group_and_prio(target.pid, {"CPUSET_SP_FOREGROUND", "SCHED_SP_FOREGROUND"},
ANDROID_PRIORITY_HIGHEST);
return true;
}
int Reaper::kill(const struct target_proc& target) {
/* CAP_KILL required */
if (target.pidfd < 0) {
return ::kill(target.pid, SIGKILL);
}
if (async_kill(target)) {
// we assume the kill will be successful and if it fails we will be notified
return 0;
}
return pidfd_send_signal(target.pidfd, SIGKILL, NULL, 0);
}
Reaper::target_proc Reaper::dequeue_request() {
struct target_proc target;
std::unique_lock<std::mutex> lock(mutex_);
while (queue_.empty()) {
cond_.wait(lock);
}
target = queue_.back();
queue_.pop_back();
return target;
}
void Reaper::request_complete() {
std::scoped_lock<std::mutex> lock(mutex_);
active_requests_--;
}
void Reaper::notify_kill_failure(int pid) {
std::scoped_lock<std::mutex> lock(mutex_);
ALOGE("Failed to kill process %d", pid);
if (TEMP_FAILURE_RETRY(write(comm_fd_, &pid, sizeof(pid))) != sizeof(pid)) {
ALOGE("thread communication write failed: %s", strerror(errno));
}
}

59
reaper.h Normal file
View File

@ -0,0 +1,59 @@
/*
* Copyright 2021 Google, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <condition_variable>
#include <mutex>
#include <vector>
class Reaper {
public:
struct target_proc {
int pidfd;
int pid;
};
private:
// mutex_ and cond_ are used to wakeup the reaper thread.
std::mutex mutex_;
std::condition_variable cond_;
// mutex_ protects queue_ and active_requests_ access.
std::vector<struct target_proc> queue_;
int active_requests_;
// write side of the pipe to communicate kill failures with the main thread
int comm_fd_;
int thread_cnt_;
pthread_t* thread_pool_;
bool debug_enabled_;
bool async_kill(const struct target_proc& target);
public:
Reaper() : active_requests_(0), thread_cnt_(0), debug_enabled_(false) {}
static bool is_reaping_supported();
bool init(int comm_fd);
int thread_cnt() const { return thread_cnt_; }
void enable_debug(bool enable) { debug_enabled_ = enable; }
bool debug_enabled() const { return debug_enabled_; }
// return 0 on success or error code returned by the syscall
int kill(const struct target_proc& target);
// below members are used only by reaper_main
target_proc dequeue_request();
void request_complete();
void notify_kill_failure(int pid);
};