Skip to content

Commit 3dac966

Browse files
committed
DDT: Add locking for table ZAP destruction
Similar to BRT, DDT ZAP can be destroyed by sync context when it becomes empty. Respectively similar to BRT introduce RW-lock to protect open context methods from the destruction. Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com>
1 parent 2301755 commit 3dac966

File tree

2 files changed

+74
-17
lines changed

2 files changed

+74
-17
lines changed

include/sys/ddt.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,9 @@ typedef struct {
284284
avl_tree_t ddt_tree; /* "live" (changed) entries this txg */
285285
avl_tree_t ddt_repair_tree; /* entries being repaired */
286286

287+
/* Protects ddt_object[] and ddt_object_dnode[]. */
288+
krwlock_t ddt_objects_lock ____cacheline_aligned;
289+
287290
/*
288291
* Log trees are stable during I/O, and only modified during sync
289292
* with exclusive access.

module/zfs/ddt.c

Lines changed: 71 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -440,14 +440,18 @@ ddt_object_destroy(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
440440
VERIFY0(count);
441441
VERIFY0(zap_remove(os, ddt->ddt_dir_object, name, tx));
442442
VERIFY0(zap_remove(os, spa->spa_ddt_stat_object, name, tx));
443-
if (ddt->ddt_object_dnode[type][class] != NULL) {
444-
dnode_rele(ddt->ddt_object_dnode[type][class], ddt);
445-
ddt->ddt_object_dnode[type][class] = NULL;
446-
}
447-
VERIFY0(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx));
448-
memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t));
449443

444+
uint64_t object = *objectp;
445+
dnode_t *dn = ddt->ddt_object_dnode[type][class];
446+
rw_enter(&ddt->ddt_objects_lock, RW_WRITER);
450447
*objectp = 0;
448+
ddt->ddt_object_dnode[type][class] = NULL;
449+
rw_exit(&ddt->ddt_objects_lock);
450+
451+
if (dn != NULL)
452+
dnode_rele(dn, ddt);
453+
VERIFY0(ddt_ops[type]->ddt_op_destroy(os, object, tx));
454+
memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t));
451455
}
452456

453457
static int
@@ -553,6 +557,20 @@ ddt_object_lookup(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
553557
dde->dde_phys, DDT_PHYS_SIZE(ddt)));
554558
}
555559

560+
/*
561+
* Like ddt_object_lookup(), but for open context where we need protection
562+
* against concurrent object destruction by sync context.
563+
*/
564+
static int
565+
ddt_object_lookup_open(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
566+
ddt_entry_t *dde)
567+
{
568+
rw_enter(&ddt->ddt_objects_lock, RW_READER);
569+
int error = ddt_object_lookup(ddt, type, class, dde);
570+
rw_exit(&ddt->ddt_objects_lock);
571+
return (error);
572+
}
573+
556574
static int
557575
ddt_object_contains(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
558576
const ddt_key_t *ddk)
@@ -568,21 +586,33 @@ static void
568586
ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
569587
const ddt_key_t *ddk)
570588
{
589+
/*
590+
* Called from open context, so protect against concurrent
591+
* object destruction by sync context.
592+
*/
593+
rw_enter(&ddt->ddt_objects_lock, RW_READER);
594+
571595
dnode_t *dn = ddt->ddt_object_dnode[type][class];
572-
if (dn == NULL)
573-
return;
596+
if (dn != NULL)
597+
ddt_ops[type]->ddt_op_prefetch(dn, ddk);
574598

575-
ddt_ops[type]->ddt_op_prefetch(dn, ddk);
599+
rw_exit(&ddt->ddt_objects_lock);
576600
}
577601

578602
static void
579603
ddt_object_prefetch_all(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
580604
{
605+
/*
606+
* Called from open context, so protect against concurrent
607+
* object destruction by sync context.
608+
*/
609+
rw_enter(&ddt->ddt_objects_lock, RW_READER);
610+
581611
dnode_t *dn = ddt->ddt_object_dnode[type][class];
582-
if (dn == NULL)
583-
return;
612+
if (dn != NULL)
613+
ddt_ops[type]->ddt_op_prefetch_all(dn);
584614

585-
ddt_ops[type]->ddt_op_prefetch_all(dn);
615+
rw_exit(&ddt->ddt_objects_lock);
586616
}
587617

588618
static int
@@ -610,27 +640,49 @@ int
610640
ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
611641
uint64_t *walk, ddt_lightweight_entry_t *ddlwe)
612642
{
643+
/*
644+
* Can be called from open context, so protect against concurrent
645+
* object destruction by sync context.
646+
*/
647+
rw_enter(&ddt->ddt_objects_lock, RW_READER);
648+
613649
dnode_t *dn = ddt->ddt_object_dnode[type][class];
614-
ASSERT(dn != NULL);
650+
if (dn == NULL) {
651+
rw_exit(&ddt->ddt_objects_lock);
652+
return (SET_ERROR(ENOENT));
653+
}
615654

616655
int error = ddt_ops[type]->ddt_op_walk(dn, walk, &ddlwe->ddlwe_key,
617656
&ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt));
618657
if (error == 0) {
619658
ddlwe->ddlwe_type = type;
620659
ddlwe->ddlwe_class = class;
621-
return (0);
622660
}
661+
662+
rw_exit(&ddt->ddt_objects_lock);
623663
return (error);
624664
}
625665

626666
int
627667
ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
628668
uint64_t *count)
629669
{
670+
/*
671+
* Can be called from open context, so protect against concurrent
672+
* object destruction by sync context.
673+
*/
674+
rw_enter(&ddt->ddt_objects_lock, RW_READER);
675+
630676
dnode_t *dn = ddt->ddt_object_dnode[type][class];
631-
ASSERT(dn != NULL);
677+
if (dn == NULL) {
678+
rw_exit(&ddt->ddt_objects_lock);
679+
return (SET_ERROR(ENOENT));
680+
}
632681

633-
return (ddt_ops[type]->ddt_op_count(dn, count));
682+
int error = ddt_ops[type]->ddt_op_count(dn, count);
683+
684+
rw_exit(&ddt->ddt_objects_lock);
685+
return (error);
634686
}
635687

636688
int
@@ -1698,6 +1750,7 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c)
16981750
sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node));
16991751
avl_create(&ddt->ddt_repair_tree, ddt_key_compare,
17001752
sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node));
1753+
rw_init(&ddt->ddt_objects_lock, NULL, RW_DEFAULT, NULL);
17011754

17021755
ddt->ddt_checksum = c;
17031756
ddt->ddt_spa = spa;
@@ -1744,6 +1797,7 @@ ddt_table_free(ddt_t *ddt)
17441797
}
17451798
}
17461799
}
1800+
rw_destroy(&ddt->ddt_objects_lock);
17471801
ASSERT0(avl_numnodes(&ddt->ddt_tree));
17481802
ASSERT0(avl_numnodes(&ddt->ddt_repair_tree));
17491803
avl_destroy(&ddt->ddt_tree);
@@ -1876,7 +1930,7 @@ ddt_repair_start(ddt_t *ddt, const blkptr_t *bp)
18761930
* there's definitely only one copy, so don't even try.
18771931
*/
18781932
if (class != DDT_CLASS_UNIQUE &&
1879-
ddt_object_lookup(ddt, type, class, dde) == 0)
1933+
ddt_object_lookup_open(ddt, type, class, dde) == 0)
18801934
return (dde);
18811935
}
18821936
}

0 commit comments

Comments
 (0)