From d58acd70e543d16ffc9f00390de678fe1d27fc69 Mon Sep 17 00:00:00 2001 From: Yoshihiro OKUMURA Date: Mon, 20 Apr 2026 13:05:21 +0900 Subject: [PATCH] fix(token): use dedicated .lock file to guard entire refresh critical section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous implementation had two correctness issues: 1. flock on .tmp was ineffective for cross-process exclusion. After fs::rename(), the .tmp inode disappears. A second process opening .tmp gets a brand-new inode, so both processes hold flocks on different inodes simultaneously — no mutual exclusion occurs. 2. The critical section was too narrow. The in-process tokio::Mutex only serializes tasks within the same process. Two separate mdrs processes could both read the cache, both decide a refresh was needed, and both call the token-refresh endpoint before either had written the new token back — risking double-refresh and potential failures on servers that use refresh-token rotation. Fix: introduce a dedicated `cache/{remote}.lock` file as the cross- process advisory lock target. The lock file is never renamed, so its inode remains stable for the entire critical section. The flock now wraps the complete read-check-refresh-write cycle in load_cache_with_token_refresh(), and the redundant flock on .tmp in refresh_and_persist() is removed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/commands/shared.rs | 63 ++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/src/commands/shared.rs b/src/commands/shared.rs index a3579fb..89465dd 100644 --- a/src/commands/shared.rs +++ b/src/commands/shared.rs @@ -232,32 +232,56 @@ pub fn load_cache(remote: &str) -> Result> { /// Locking strategy: /// - Per-remote `tokio::sync::Mutex` serializes concurrent async tasks within /// the same process. -/// - `flock(LOCK_EX)` on the cache file serializes across separate processes -/// on the same host. +/// - `flock(LOCK_EX)` on a dedicated `cache/{remote}.lock` file serializes +/// the entire read-check-refresh-write cycle across separate processes on +/// the same host. The lock file is stable (never renamed), so the flock +/// reliably protects the same inode for the lifetime of the critical section. pub async fn load_cache_with_token_refresh( remote: &str, ) -> Result> { - // Acquire the in-process async mutex for this remote + // Acquire the in-process async mutex for this remote first. let lock = get_remote_lock(remote); let _guard = lock.lock().await; - // Re-read the cache inside the lock (another task may have already refreshed) - let mut cache = load_cache(remote)?; + // Acquire an exclusive cross-process file lock. This ensures that no + // other process can race through the read-check-refresh-write cycle at + // the same time as us. The lock file is separate from the cache file so + // that it never disappears (unlike a .tmp file that gets renamed away). + let lock_path = cache_file_path(remote).with_extension("lock"); + use fs2::FileExt; + let lock_file = fs::OpenOptions::new() + .write(true) + .create(true) + .open(&lock_path)?; + lock_file.lock_exclusive()?; - if crate::token::is_expired(&cache.token.refresh) { - return Err(format!( - "Session for `{}` has expired. Please run `mdrs login {}` again.", - remote, remote - ) - .into()); + // Re-read the cache inside the lock: another process may have already + // refreshed the token since we last checked. + let result: Result> = async { + let mut cache = load_cache(remote)?; + + if crate::token::is_expired(&cache.token.refresh) { + return Err(format!( + "Session for `{}` has expired. Please run `mdrs login {}` again.", + remote, remote + ) + .into()); + } + + if crate::token::is_refresh_required(&cache.token.access, &cache.token.refresh) { + let new_access = refresh_and_persist(remote, &cache).await?; + cache.token.access = new_access; + } + + Ok(cache) } + .await; - if crate::token::is_refresh_required(&cache.token.access, &cache.token.refresh) { - let new_access = refresh_and_persist(remote, &cache).await?; - cache.token.access = new_access; - } + // Release the cross-process file lock. The OS will also release it when + // lock_file is dropped, but we unlock explicitly for clarity. + lock_file.unlock()?; - Ok(cache) + result } /// Call the token-refresh endpoint and write the new access token back to the @@ -293,21 +317,18 @@ async fn refresh_and_persist( obj["token"]["access"] = serde_json::Value::String(new_access.clone()); obj["digest"] = serde_json::Value::String(new_digest); - // Write atomically: write to .tmp then rename, while holding an exclusive - // flock on the .tmp file for cross-process safety. + // Write atomically: write to .tmp then rename. The caller already holds + // the exclusive .lock file, so no additional flock is needed here. let tmp_path = cache_path.with_extension("tmp"); { - use fs2::FileExt; use std::io::Write; let mut tmp_file = fs::OpenOptions::new() .write(true) .create(true) .truncate(true) .open(&tmp_path)?; - tmp_file.lock_exclusive()?; tmp_file.write_all(serde_json::to_string(&obj)?.as_bytes())?; tmp_file.flush()?; - tmp_file.unlock()?; } #[cfg(unix)] {