diff --git a/Android.bp b/Android.bp index 4f53ded..032541f 100644 --- a/Android.bp +++ b/Android.bp @@ -12,7 +12,10 @@ cc_defaults { cc_binary { name: "lmkd", - srcs: ["lmkd.cpp"], + srcs: [ + "lmkd.cpp", + "reaper.cpp", + ], shared_libs: [ "libcutils", "liblog", diff --git a/lmkd.cpp b/lmkd.cpp index fe1d32c..b4143cd 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -16,12 +16,10 @@ #define LOG_TAG "lowmemorykiller" -#include #include #include #include #include -#include #include #include #include @@ -30,12 +28,9 @@ #include #include #include -#include #include #include #include -#include -#include #include #include @@ -47,10 +42,9 @@ #include #include #include -#include #include -#include +#include "reaper.h" #include "statslog.h" #define BPF_FD_JUST_USE_INT @@ -228,6 +222,8 @@ static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = { }; static android_log_context ctx; +static Reaper reaper; +static int reaper_comm_fd[2]; enum polling_update { POLLING_DO_NOT_CHANGE, @@ -277,9 +273,9 @@ static struct event_handler_info vmpressure_hinfo[VMPRESS_LEVEL_COUNT]; /* * 1 ctrl listen socket, 3 ctrl data socket, 3 memory pressure levels, - * 1 lmk events + 1 fd to wait for process death + * 1 lmk events + 1 fd to wait for process death + 1 fd to receive kill failure notifications */ -#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1) +#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1 + 1) static int epollfd; static int maxevents; @@ -2008,42 +2004,6 @@ static struct proc *proc_get_heaviest(int oomadj) { return maxprocp; } -static void set_process_group_and_prio(int pid, const std::vector& profiles, - int prio) { - DIR* d; - char proc_path[PATH_MAX]; - struct dirent* de; - - snprintf(proc_path, sizeof(proc_path), "/proc/%d/task", pid); - if (!(d = opendir(proc_path))) { - ALOGW("Failed to open %s; errno=%d: process pid(%d) might have died", proc_path, errno, - pid); - return; - } - - while ((de = readdir(d))) { - int t_pid; - - if (de->d_name[0] == '.') continue; - t_pid = atoi(de->d_name); - - if (!t_pid) { - ALOGW("Failed to get t_pid for '%s' of pid(%d)", de->d_name, pid); - continue; - } - - if (setpriority(PRIO_PROCESS, t_pid, prio) && errno != ESRCH) { - ALOGW("Unable to raise priority of killing t_pid (%d): errno=%d", t_pid, errno); - } - - if (!SetTaskProfiles(t_pid, profiles, true)) { - ALOGW("Failed to set task_profiles on pid(%d) t_pid(%d)", pid, t_pid); - continue; - } - } - closedir(d); -} - static bool is_kill_pending(void) { char buf[24]; @@ -2114,6 +2074,19 @@ static void kill_done_handler(int data __unused, uint32_t events __unused, poll_params->update = POLLING_RESUME; } +static void kill_fail_handler(int data __unused, uint32_t events __unused, + struct polling_params *poll_params) { + int pid; + + // Extract pid from the communication pipe. Clearing the pipe this way allows further + // epoll_wait calls to sleep until the next event. + if (TEMP_FAILURE_RETRY(read(reaper_comm_fd[0], &pid, sizeof(pid))) != sizeof(pid)) { + ALOGE("thread communication read failed: %s", strerror(errno)); + } + stop_wait_for_proc_kill(false); + poll_params->update = POLLING_RESUME; +} + static void start_wait_for_proc_kill(int pid_or_fd) { static struct event_handler_info kill_done_hinfo = { 0, kill_done_handler }; struct epoll_event epev; @@ -2149,7 +2122,7 @@ static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_i int pidfd = procp->pidfd; uid_t uid = procp->uid; char *taskname; - int r; + int kill_result; int result = -1; struct memory_stat *mem_st; struct kill_stat kill_st; @@ -2188,29 +2161,21 @@ static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_i snprintf(desc, sizeof(desc), "lmk,%d,%d,%d,%d,%d", pid, ki ? (int)ki->kill_reason : -1, procp->oomadj, min_oom_score, ki ? ki->max_thrashing : -1); + trace_kill_start(pid, desc); - /* CAP_KILL required */ - if (pidfd < 0) { - start_wait_for_proc_kill(pid); - r = kill(pid, SIGKILL); - } else { - start_wait_for_proc_kill(pidfd); - r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0); - } + start_wait_for_proc_kill(pidfd < 0 ? pid : pidfd); + kill_result = reaper.kill({ pidfd, pid }); trace_kill_end(); - if (r) { + if (kill_result) { stop_wait_for_proc_kill(false); ALOGE("kill(%d): errno=%d", pid, errno); /* Delete process record even when we fail to kill so that we don't get stuck on it */ goto out; } - set_process_group_and_prio(pid, {"CPUSET_SP_FOREGROUND", "SCHED_SP_FOREGROUND"}, - ANDROID_PRIORITY_HIGHEST); - last_kill_tm = *tm; inc_killcnt(procp->oomadj); @@ -3158,6 +3123,63 @@ static void destroy_monitors() { } } +static void drop_reaper_comm() { + close(reaper_comm_fd[0]); + close(reaper_comm_fd[1]); +} + +static bool setup_reaper_comm() { + if (pipe(reaper_comm_fd)) { + ALOGE("pipe failed: %s", strerror(errno)); + return false; + } + + // Ensure main thread never blocks on read + int flags = fcntl(reaper_comm_fd[0], F_GETFL); + if (fcntl(reaper_comm_fd[0], F_SETFL, flags | O_NONBLOCK)) { + ALOGE("fcntl failed: %s", strerror(errno)); + drop_reaper_comm(); + return false; + } + + return true; +} + +static bool init_reaper() { + if (!reaper.is_reaping_supported()) { + ALOGI("Process reaping is not supported"); + return false; + } + + if (!setup_reaper_comm()) { + ALOGE("Failed to create thread communication channel"); + return false; + } + + // Setup epoll handler + struct epoll_event epev; + static struct event_handler_info kill_failed_hinfo = { 0, kill_fail_handler }; + epev.events = EPOLLIN; + epev.data.ptr = (void *)&kill_failed_hinfo; + if (epoll_ctl(epollfd, EPOLL_CTL_ADD, reaper_comm_fd[0], &epev)) { + ALOGE("epoll_ctl failed: %s", strerror(errno)); + drop_reaper_comm(); + return false; + } + + if (!reaper.init(reaper_comm_fd[1])) { + ALOGE("Failed to initialize reaper object"); + if (epoll_ctl(epollfd, EPOLL_CTL_DEL, reaper_comm_fd[0], &epev)) { + ALOGE("epoll_ctl failed: %s", strerror(errno)); + } + drop_reaper_comm(); + return false; + } + maxevents++; + + return true; +} + static int init(void) { static struct event_handler_info kernel_poll_hinfo = { 0, kernel_event_handler }; struct reread_data file_data = { @@ -3480,6 +3502,8 @@ static void update_props() { thrashing_limit_pct * 2)); swap_util_max = clamp(0, 100, GET_LMK_PROPERTY(int32, "swap_util_max", 100)); filecache_min_kb = GET_LMK_PROPERTY(int64, "filecache_min_kb", 0); + + reaper.enable_debug(debug_process_killing); } int main(int argc, char **argv) { @@ -3521,6 +3545,11 @@ int main(int argc, char **argv) { } } + if (init_reaper()) { + ALOGI("Process reaper initialized with %d threads in the pool", + reaper.thread_cnt()); + } + mainloop(); } diff --git a/reaper.cpp b/reaper.cpp new file mode 100644 index 0000000..7c0e9ef --- /dev/null +++ b/reaper.cpp @@ -0,0 +1,248 @@ +/* + * Copyright 2021 Google, Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define LOG_TAG "lowmemorykiller" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "reaper.h" + +#define NS_PER_MS (NS_PER_SEC / MS_PER_SEC) +#define THREAD_POOL_SIZE 2 + +#ifndef __NR_process_mrelease +#define __NR_process_mrelease 448 +#endif + +static int process_mrelease(int pidfd, unsigned int flags) { + return syscall(__NR_process_mrelease, pidfd, flags); +} + +static inline long get_time_diff_ms(struct timespec *from, + struct timespec *to) { + return (to->tv_sec - from->tv_sec) * (long)MS_PER_SEC + + (to->tv_nsec - from->tv_nsec) / (long)NS_PER_MS; +} + +static void* reaper_main(void* param) { + Reaper *reaper = static_cast(param); + struct timespec start_tm, end_tm; + struct Reaper::target_proc target; + pid_t tid = gettid(); + + // Ensure the thread does not use little cores + if (!SetTaskProfiles(tid, {"CPUSET_SP_FOREGROUND"}, true)) { + ALOGE("Failed to assign cpuset to the reaper thread"); + } + + for (;;) { + target = reaper->dequeue_request(); + + if (reaper->debug_enabled()) { + clock_gettime(CLOCK_MONOTONIC_COARSE, &start_tm); + } + + if (pidfd_send_signal(target.pidfd, SIGKILL, NULL, 0)) { + // Inform the main thread about failure to kill + reaper->notify_kill_failure(target.pid); + goto done; + } + if (process_mrelease(target.pidfd, 0)) { + ALOGE("process_mrelease %d failed: %s", target.pidfd, strerror(errno)); + goto done; + } + if (reaper->debug_enabled()) { + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_tm); + ALOGI("Process %d was reaped in %ldms", target.pid, + get_time_diff_ms(&start_tm, &end_tm)); + } +done: + close(target.pidfd); + reaper->request_complete(); + } + + return NULL; +} + +bool Reaper::is_reaping_supported() { + static enum { + UNKNOWN, + SUPPORTED, + UNSUPPORTED + } reap_support = UNKNOWN; + + if (reap_support == UNKNOWN) { + if (process_mrelease(-1, 0) && errno == ENOSYS) { + reap_support = UNSUPPORTED; + } else { + reap_support = SUPPORTED; + } + } + return reap_support == SUPPORTED; +} + +bool Reaper::init(int comm_fd) { + char name[16]; + + if (thread_cnt_ > 0) { + // init should not be called multiple times + return false; + } + + thread_pool_ = new pthread_t[THREAD_POOL_SIZE]; + for (int i = 0; i < THREAD_POOL_SIZE; i++) { + if (pthread_create(&thread_pool_[thread_cnt_], NULL, reaper_main, this)) { + ALOGE("pthread_create failed: %s", strerror(errno)); + continue; + } + snprintf(name, sizeof(name), "lmkd_reaper%d", thread_cnt_); + if (pthread_setname_np(thread_pool_[thread_cnt_], name)) { + ALOGW("pthread_setname_np failed: %s", strerror(errno)); + } + thread_cnt_++; + } + + if (!thread_cnt_) { + delete[] thread_pool_; + return false; + } + + queue_.reserve(thread_cnt_); + comm_fd_ = comm_fd; + return true; +} + +static void set_process_group_and_prio(int pid, const std::vector& profiles, + int prio) { + DIR* d; + char proc_path[PATH_MAX]; + struct dirent* de; + + snprintf(proc_path, sizeof(proc_path), "/proc/%d/task", pid); + if (!(d = opendir(proc_path))) { + ALOGW("Failed to open %s; errno=%d: process pid(%d) might have died", proc_path, errno, + pid); + return; + } + + while ((de = readdir(d))) { + int t_pid; + + if (de->d_name[0] == '.') continue; + t_pid = atoi(de->d_name); + + if (!t_pid) { + ALOGW("Failed to get t_pid for '%s' of pid(%d)", de->d_name, pid); + continue; + } + + if (setpriority(PRIO_PROCESS, t_pid, prio) && errno != ESRCH) { + ALOGW("Unable to raise priority of killing t_pid (%d): errno=%d", t_pid, errno); + } + + if (!SetTaskProfiles(t_pid, profiles, true)) { + ALOGW("Failed to set task_profiles on pid(%d) t_pid(%d)", pid, t_pid); + continue; + } + } + closedir(d); +} + +bool Reaper::async_kill(const struct target_proc& target) { + if (target.pidfd == -1) { + return false; + } + + if (!thread_cnt_) { + return false; + } + + mutex_.lock(); + if (active_requests_ >= thread_cnt_) { + mutex_.unlock(); + return false; + } + active_requests_++; + + // Duplicate pidfd instead of reusing the original one to avoid synchronization and refcounting + // when both reaper and main threads are using or closing the pidfd + queue_.push_back({ dup(target.pidfd), target.pid }); + // Wake up a reaper thread + cond_.notify_one(); + mutex_.unlock(); + + set_process_group_and_prio(target.pid, {"CPUSET_SP_FOREGROUND", "SCHED_SP_FOREGROUND"}, + ANDROID_PRIORITY_HIGHEST); + + return true; +} + +int Reaper::kill(const struct target_proc& target) { + /* CAP_KILL required */ + if (target.pidfd < 0) { + return ::kill(target.pid, SIGKILL); + } + + if (async_kill(target)) { + // we assume the kill will be successful and if it fails we will be notified + return 0; + } + + return pidfd_send_signal(target.pidfd, SIGKILL, NULL, 0); +} + +Reaper::target_proc Reaper::dequeue_request() { + struct target_proc target; + std::unique_lock lock(mutex_); + + while (queue_.empty()) { + cond_.wait(lock); + } + target = queue_.back(); + queue_.pop_back(); + + return target; +} + +void Reaper::request_complete() { + std::scoped_lock lock(mutex_); + active_requests_--; +} + +void Reaper::notify_kill_failure(int pid) { + std::scoped_lock lock(mutex_); + + ALOGE("Failed to kill process %d", pid); + if (TEMP_FAILURE_RETRY(write(comm_fd_, &pid, sizeof(pid))) != sizeof(pid)) { + ALOGE("thread communication write failed: %s", strerror(errno)); + } +} diff --git a/reaper.h b/reaper.h new file mode 100644 index 0000000..ce77d29 --- /dev/null +++ b/reaper.h @@ -0,0 +1,59 @@ +/* + * Copyright 2021 Google, Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +class Reaper { +public: + struct target_proc { + int pidfd; + int pid; + }; +private: + // mutex_ and cond_ are used to wakeup the reaper thread. + std::mutex mutex_; + std::condition_variable cond_; + // mutex_ protects queue_ and active_requests_ access. + std::vector queue_; + int active_requests_; + // write side of the pipe to communicate kill failures with the main thread + int comm_fd_; + int thread_cnt_; + pthread_t* thread_pool_; + bool debug_enabled_; + + bool async_kill(const struct target_proc& target); +public: + Reaper() : active_requests_(0), thread_cnt_(0), debug_enabled_(false) {} + + static bool is_reaping_supported(); + + bool init(int comm_fd); + int thread_cnt() const { return thread_cnt_; } + void enable_debug(bool enable) { debug_enabled_ = enable; } + bool debug_enabled() const { return debug_enabled_; } + + // return 0 on success or error code returned by the syscall + int kill(const struct target_proc& target); + // below members are used only by reaper_main + target_proc dequeue_request(); + void request_complete(); + void notify_kill_failure(int pid); +};