From 3a3d11f70a3c2ef4dff17b4d2e34911797d45cae Mon Sep 17 00:00:00 2001 From: Martin Liu Date: Sun, 19 Jan 2025 16:27:16 +0000 Subject: [PATCH] lmkd: Introduce vendor lmk kill memevent This change allows vendors to trigger LMKD kill events with custom reasons and minimum score adjustments. This enables experimentation with custom heuristics and data collection for evaluation before upstreaming changes to AOSP. Bug: 385050909 Test: build and check the vendor kill event Change-Id: If9b51ed9603f0e10e6fc4671fb6da26548f41aaf Merged-In: If9b51ed9603f0e10e6fc4671fb6da26548f41aaf Signed-off-by: Martin Liu --- Android.bp | 4 +++ lmkd.cpp | 86 +++++++++++++++++++++++++++++++++++++++++------------- statslog.h | 4 +++ 3 files changed, 74 insertions(+), 20 deletions(-) diff --git a/Android.bp b/Android.bp index 9d70642..ec0be49 100644 --- a/Android.bp +++ b/Android.bp @@ -88,6 +88,10 @@ cc_library_static { "liblog", "libprocessgroup", ], + header_libs: [ + "libmemevents_headers", + "libbase_headers", + ], } cc_library_static { diff --git a/lmkd.cpp b/lmkd.cpp index 4ddef57..16ee231 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -2684,7 +2684,18 @@ static int calc_swap_utilization(union meminfo *mi) { return total_swappable > 0 ? (swap_used * 100) / total_swappable : 0; } -static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_params) { +enum event_source { + PSI, + VENDOR, +}; + +union psi_event_data { + enum vmpressure_level level; + mem_event_t vendor_event; +}; + +static void __mp_event_psi(enum event_source source, union psi_event_data data, + uint32_t events, struct polling_params *poll_params) { enum reclaim_state { NO_RECLAIM = 0, KSWAPD_RECLAIM, @@ -2712,7 +2723,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ struct timespec curr_tm; int64_t thrashing = 0; bool swap_is_low = false; - enum vmpressure_level level = (enum vmpressure_level)data; + enum vmpressure_level level = (source == PSI) ? data.level: (enum vmpressure_level)0; enum kill_reasons kill_reason = NONE; bool cycle_after_kill = false; enum reclaim_state reclaim = NO_RECLAIM; @@ -2731,8 +2742,11 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ mp_event_count++; if (debug_process_killing) { - ALOGI("%s memory pressure event #%" PRIu64 " is triggered", - level_name[level], mp_event_count); + if (source == PSI) + ALOGI("%s memory pressure event #%" PRIu64 " is triggered", + level_name[level], mp_event_count); + else + ALOGI("vendor kill event #%" PRIu64 " is triggered", mp_event_count); } if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) { @@ -2740,21 +2754,23 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ return; } - if (events > 0 ) { - /* Ignore a lower event within the first polling window. */ - if (level < prev_level) { - if (debug_process_killing) - ALOGI("Ignoring %s pressure event; occurred too soon.", - level_name[level]); - return; + if (source == PSI) { + if (events > 0 ) { + /* Ignore a lower event within the first polling window. */ + if (level < prev_level) { + if (debug_process_killing) + ALOGI("Ignoring %s pressure event; occurred too soon.", + level_name[level]); + return; + } + prev_level = level; + } else { + /* Reset event level after the first polling window. */ + prev_level = VMPRESS_LEVEL_LOW; } - prev_level = level; - } else { - /* Reset event level after the first polling window. */ - prev_level = VMPRESS_LEVEL_LOW; - } - record_wakeup_time(&curr_tm, events ? Event : Polling, &wi); + record_wakeup_time(&curr_tm, events ? Event : Polling, &wi); + } bool kill_pending = is_kill_pending(); if (kill_pending && (kill_timeout_ms == 0 || @@ -2821,7 +2837,8 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ init_pgscan_kswapd = vs.field.pgscan_kswapd; init_pgrefill = vs.field.pgrefill; reclaim = KSWAPD_RECLAIM; - } else if (workingset_refault_file == prev_workingset_refault) { + } else if ((workingset_refault_file == prev_workingset_refault) && + (source == PSI)) { /* * Device is not thrashing and not reclaiming, bail out early until we see these stats * changing @@ -2901,7 +2918,23 @@ update_watermarks: * TODO: move this logic into a separate function * Decide if killing a process is necessary and record the reason */ - if (cycle_after_kill && wmark < WMARK_LOW) { + if (source == VENDOR) { + int vendor_kill_reason = data.vendor_event.event_data.vendor_kill.reason; + short vendor_kill_min_oom_score_adj = + data.vendor_event.event_data.vendor_kill.min_oom_score_adj; + if (vendor_kill_reason < 0 || + vendor_kill_reason > VENDOR_KILL_REASON_END || + vendor_kill_min_oom_score_adj < 0) { + ALOGE("Invalid vendor kill reason %d, min_oom_score_adj %d", + vendor_kill_reason, vendor_kill_min_oom_score_adj); + return; + } + + kill_reason = (enum kill_reasons)(vendor_kill_reason + VENDOR_KILL_REASON_BASE); + min_score_adj = vendor_kill_min_oom_score_adj; + snprintf(kill_desc, sizeof(kill_desc), + "vendor kill with the reason %d, min_score_adj %d", kill_reason, min_score_adj); + } else if (cycle_after_kill && wmark < WMARK_LOW) { /* * Prevent kills not freeing enough memory which might lead to OOM kill. * This might happen when a process is consuming memory faster than reclaim can @@ -3067,6 +3100,11 @@ no_kill: } } +static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_params) { + union psi_event_data event_data = {.level = (enum vmpressure_level)data}; + __mp_event_psi(PSI, event_data, events, poll_params); +} + static std::string GetCgroupAttributePath(const char* attr) { std::string path; if (!CgroupGetAttributePath(attr, &path)) { @@ -3391,7 +3429,7 @@ static MemcgVersion memcg_version() { } static void memevent_listener_notification(int data __unused, uint32_t events __unused, - struct polling_params* poll_params __unused) { + struct polling_params* poll_params) { struct timespec curr_tm; std::vector mem_events; @@ -3428,6 +3466,10 @@ static void memevent_listener_notification(int data __unused, uint32_t events __ kswapd_start_tm.tv_sec = 0; kswapd_start_tm.tv_nsec = 0; break; + case MEM_EVENT_VENDOR_LMK_KILL: + union psi_event_data event_data = {.vendor_event = mem_event}; + __mp_event_psi(VENDOR, event_data, 0, poll_params); + break; } } } @@ -3462,6 +3504,10 @@ static bool init_memevent_listener_monitoring() { return false; } + if (!memevent_listener->registerEvent(MEM_EVENT_VENDOR_LMK_KILL)) { + ALOGI("Failed to register android_vendor_kill memevents"); + } + int memevent_listener_fd = memevent_listener->getRingBufferFd(); if (memevent_listener_fd < 0) { memevent_listener.reset(); diff --git a/statslog.h b/statslog.h index 60c7016..4c51f70 100644 --- a/statslog.h +++ b/statslog.h @@ -26,6 +26,7 @@ #include #include +#include __BEGIN_DECLS @@ -66,6 +67,9 @@ enum kill_reasons { LOW_FILECACHE_AFTER_THRASHING, LOW_MEM, DIRECT_RECL_STUCK, + /* reserve aosp kill 0 ~ 999 */ + VENDOR_KILL_REASON_BASE = 1000, + VENDOR_KILL_REASON_END = VENDOR_KILL_REASON_BASE + NUM_VENDOR_LMK_KILL_REASON - 1, KILL_REASON_COUNT };