Skip to content

fix(pageserver): frozen->L0 flush failure causes data loss #12043

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 75 additions & 2 deletions pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5751,6 +5751,7 @@ pub(crate) mod harness {
pub conf: &'static PageServerConf,
pub tenant_conf: pageserver_api::models::TenantConfig,
pub tenant_shard_id: TenantShardId,
pub shard_identity: ShardIdentity,
pub generation: Generation,
pub shard: ShardIndex,
pub remote_storage: GenericRemoteStorage,
Expand Down Expand Up @@ -5818,6 +5819,7 @@ pub(crate) mod harness {
conf,
tenant_conf,
tenant_shard_id,
shard_identity,
generation,
shard,
remote_storage,
Expand Down Expand Up @@ -5879,8 +5881,7 @@ pub(crate) mod harness {
&ShardParameters::default(),
))
.unwrap(),
// This is a legacy/test code path: sharding isn't supported here.
ShardIdentity::unsharded(),
self.shard_identity,
Some(walredo_mgr),
self.tenant_shard_id,
self.remote_storage.clone(),
Expand Down Expand Up @@ -6002,6 +6003,7 @@ mod tests {
use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};
use timeline::{CompactOptions, DeltaLayerTestDesc, VersionedKeySpaceQuery};
use utils::id::TenantId;
use utils::shard::{ShardCount, ShardNumber};

use super::*;
use crate::DEFAULT_PG_VERSION;
Expand Down Expand Up @@ -9321,6 +9323,77 @@ mod tests {
Ok(())
}

#[tokio::test]
async fn test_failed_flush_should_not_update_disk_consistent_lsn() -> anyhow::Result<()> {
//
// Setup
//
let harness = TenantHarness::create_custom(
"test_failed_flush_should_not_upload_disk_consistent_lsn",
pageserver_api::models::TenantConfig::default(),
TenantId::generate(),
ShardIdentity::new(ShardNumber(0), ShardCount(4), ShardStripeSize(128)).unwrap(),
Generation::new(1),
)
.await?;
let (tenant, ctx) = harness.load().await;

let timeline = tenant
.create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
.await?;
assert_eq!(timeline.get_shard_identity().count, ShardCount(4));
let mut writer = timeline.writer().await;
writer
.put(
*TEST_KEY,
Lsn(0x20),
&Value::Image(test_img("foo at 0x20")),
&ctx,
)
.await?;
writer.finish_write(Lsn(0x20));
drop(writer);
timeline.freeze_and_flush().await.unwrap();

timeline.remote_client.wait_completion().await.unwrap();
let disk_consistent_lsn = timeline.get_disk_consistent_lsn();
let remote_consistent_lsn = timeline.get_remote_consistent_lsn_projected();
assert_eq!(Some(disk_consistent_lsn), remote_consistent_lsn);

//
// Test
//

let mut writer = timeline.writer().await;
writer
.put(
*TEST_KEY,
Lsn(0x30),
&Value::Image(test_img("foo at 0x30")),
&ctx,
)
.await?;
writer.finish_write(Lsn(0x30));
drop(writer);

fail::cfg(
"flush-layer-before-update-remote-consistent-lsn",
"return()",
)
.unwrap();

let flush_res = timeline.freeze_and_flush().await;
// if flush failed, the disk/remote consistent LSN should not be updated
assert!(flush_res.is_err());
assert_eq!(disk_consistent_lsn, timeline.get_disk_consistent_lsn());
assert_eq!(
remote_consistent_lsn,
timeline.get_remote_consistent_lsn_projected()
);

Ok(())
}

#[cfg(feature = "testing")]
#[tokio::test]
async fn test_simple_bottom_most_compaction_deltas_1() -> anyhow::Result<()> {
Expand Down
9 changes: 8 additions & 1 deletion pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4761,7 +4761,10 @@ impl Timeline {
|| !flushed_to_lsn.is_valid()
);

if flushed_to_lsn < frozen_to_lsn && self.shard_identity.count.count() > 1 {
if flushed_to_lsn < frozen_to_lsn
&& self.shard_identity.count.count() > 1
&& result.is_ok()
{
// If our layer flushes didn't carry disk_consistent_lsn up to the `to_lsn` advertised
// to us via layer_flush_start_rx, then advance it here.
//
Expand Down Expand Up @@ -4939,6 +4942,10 @@ impl Timeline {
return Err(FlushLayerError::Cancelled);
}

fail_point!("flush-layer-before-update-remote-consistent-lsn", |_| {
Err(FlushLayerError::Other(anyhow!("failpoint").into()))
});

let disk_consistent_lsn = Lsn(lsn_range.end.0 - 1);

// The new on-disk layers are now in the layer map. We can remove the
Expand Down
Loading