Skip to content

Commit 2c9fec3

Browse files
amotintonyhutter
authored andcommitted
DDT: Add locking for table ZAP destruction
Similar to BRT, DDT ZAP can be destroyed by sync context when it becomes empty. Respectively similar to BRT introduce RW-lock to protect open context methods from the destruction. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com> Closes #18115
1 parent 6f7f718 commit 2c9fec3

File tree

2 files changed

+76
-20
lines changed

2 files changed

+76
-20
lines changed

include/sys/ddt.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,9 @@ typedef struct {
284284
avl_tree_t ddt_tree; /* "live" (changed) entries this txg */
285285
avl_tree_t ddt_repair_tree; /* entries being repaired */
286286

287+
/* Protects ddt_object[] and ddt_object_dnode[]. */
288+
krwlock_t ddt_objects_lock ____cacheline_aligned;
289+
287290
/*
288291
* Log trees are stable during I/O, and only modified during sync
289292
* with exclusive access.

module/zfs/ddt.c

Lines changed: 73 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -426,28 +426,31 @@ ddt_object_destroy(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
426426
{
427427
spa_t *spa = ddt->ddt_spa;
428428
objset_t *os = ddt->ddt_os;
429-
uint64_t *objectp = &ddt->ddt_object[type][class];
430429
uint64_t count;
431430
char name[DDT_NAMELEN];
432431

433432
ASSERT3U(ddt->ddt_dir_object, >, 0);
434433

435434
ddt_object_name(ddt, type, class, name);
436435

437-
ASSERT3U(*objectp, !=, 0);
436+
ASSERT(ddt->ddt_object[type][class] != 0);
438437
ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class]));
439438
VERIFY0(ddt_object_count(ddt, type, class, &count));
440439
VERIFY0(count);
441440
VERIFY0(zap_remove(os, ddt->ddt_dir_object, name, tx));
442441
VERIFY0(zap_remove(os, spa->spa_ddt_stat_object, name, tx));
443-
if (ddt->ddt_object_dnode[type][class] != NULL) {
444-
dnode_rele(ddt->ddt_object_dnode[type][class], ddt);
445-
ddt->ddt_object_dnode[type][class] = NULL;
446-
}
447-
VERIFY0(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx));
448-
memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t));
449442

450-
*objectp = 0;
443+
uint64_t object = ddt->ddt_object[type][class];
444+
dnode_t *dn = ddt->ddt_object_dnode[type][class];
445+
rw_enter(&ddt->ddt_objects_lock, RW_WRITER);
446+
ddt->ddt_object[type][class] = 0;
447+
ddt->ddt_object_dnode[type][class] = NULL;
448+
rw_exit(&ddt->ddt_objects_lock);
449+
450+
if (dn != NULL)
451+
dnode_rele(dn, ddt);
452+
VERIFY0(ddt_ops[type]->ddt_op_destroy(os, object, tx));
453+
memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t));
451454
}
452455

453456
static int
@@ -553,6 +556,20 @@ ddt_object_lookup(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
553556
dde->dde_phys, DDT_PHYS_SIZE(ddt)));
554557
}
555558

559+
/*
560+
* Like ddt_object_lookup(), but for open context where we need protection
561+
* against concurrent object destruction by sync context.
562+
*/
563+
static int
564+
ddt_object_lookup_open(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
565+
ddt_entry_t *dde)
566+
{
567+
rw_enter(&ddt->ddt_objects_lock, RW_READER);
568+
int error = ddt_object_lookup(ddt, type, class, dde);
569+
rw_exit(&ddt->ddt_objects_lock);
570+
return (error);
571+
}
572+
556573
static int
557574
ddt_object_contains(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
558575
const ddt_key_t *ddk)
@@ -568,21 +585,33 @@ static void
568585
ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
569586
const ddt_key_t *ddk)
570587
{
588+
/*
589+
* Called from open context, so protect against concurrent
590+
* object destruction by sync context.
591+
*/
592+
rw_enter(&ddt->ddt_objects_lock, RW_READER);
593+
571594
dnode_t *dn = ddt->ddt_object_dnode[type][class];
572-
if (dn == NULL)
573-
return;
595+
if (dn != NULL)
596+
ddt_ops[type]->ddt_op_prefetch(dn, ddk);
574597

575-
ddt_ops[type]->ddt_op_prefetch(dn, ddk);
598+
rw_exit(&ddt->ddt_objects_lock);
576599
}
577600

578601
static void
579602
ddt_object_prefetch_all(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
580603
{
604+
/*
605+
* Called from open context, so protect against concurrent
606+
* object destruction by sync context.
607+
*/
608+
rw_enter(&ddt->ddt_objects_lock, RW_READER);
609+
581610
dnode_t *dn = ddt->ddt_object_dnode[type][class];
582-
if (dn == NULL)
583-
return;
611+
if (dn != NULL)
612+
ddt_ops[type]->ddt_op_prefetch_all(dn);
584613

585-
ddt_ops[type]->ddt_op_prefetch_all(dn);
614+
rw_exit(&ddt->ddt_objects_lock);
586615
}
587616

588617
static int
@@ -610,27 +639,49 @@ int
610639
ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
611640
uint64_t *walk, ddt_lightweight_entry_t *ddlwe)
612641
{
642+
/*
643+
* Can be called from open context, so protect against concurrent
644+
* object destruction by sync context.
645+
*/
646+
rw_enter(&ddt->ddt_objects_lock, RW_READER);
647+
613648
dnode_t *dn = ddt->ddt_object_dnode[type][class];
614-
ASSERT(dn != NULL);
649+
if (dn == NULL) {
650+
rw_exit(&ddt->ddt_objects_lock);
651+
return (SET_ERROR(ENOENT));
652+
}
615653

616654
int error = ddt_ops[type]->ddt_op_walk(dn, walk, &ddlwe->ddlwe_key,
617655
&ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt));
618656
if (error == 0) {
619657
ddlwe->ddlwe_type = type;
620658
ddlwe->ddlwe_class = class;
621-
return (0);
622659
}
660+
661+
rw_exit(&ddt->ddt_objects_lock);
623662
return (error);
624663
}
625664

626665
int
627666
ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
628667
uint64_t *count)
629668
{
669+
/*
670+
* Can be called from open context, so protect against concurrent
671+
* object destruction by sync context.
672+
*/
673+
rw_enter(&ddt->ddt_objects_lock, RW_READER);
674+
630675
dnode_t *dn = ddt->ddt_object_dnode[type][class];
631-
ASSERT(dn != NULL);
676+
if (dn == NULL) {
677+
rw_exit(&ddt->ddt_objects_lock);
678+
return (SET_ERROR(ENOENT));
679+
}
632680

633-
return (ddt_ops[type]->ddt_op_count(dn, count));
681+
int error = ddt_ops[type]->ddt_op_count(dn, count);
682+
683+
rw_exit(&ddt->ddt_objects_lock);
684+
return (error);
634685
}
635686

636687
int
@@ -1698,6 +1749,7 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c)
16981749
sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node));
16991750
avl_create(&ddt->ddt_repair_tree, ddt_key_compare,
17001751
sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node));
1752+
rw_init(&ddt->ddt_objects_lock, NULL, RW_DEFAULT, NULL);
17011753

17021754
ddt->ddt_checksum = c;
17031755
ddt->ddt_spa = spa;
@@ -1744,6 +1796,7 @@ ddt_table_free(ddt_t *ddt)
17441796
}
17451797
}
17461798
}
1799+
rw_destroy(&ddt->ddt_objects_lock);
17471800
ASSERT0(avl_numnodes(&ddt->ddt_tree));
17481801
ASSERT0(avl_numnodes(&ddt->ddt_repair_tree));
17491802
avl_destroy(&ddt->ddt_tree);
@@ -1876,7 +1929,7 @@ ddt_repair_start(ddt_t *ddt, const blkptr_t *bp)
18761929
* there's definitely only one copy, so don't even try.
18771930
*/
18781931
if (class != DDT_CLASS_UNIQUE &&
1879-
ddt_object_lookup(ddt, type, class, dde) == 0)
1932+
ddt_object_lookup_open(ddt, type, class, dde) == 0)
18801933
return (dde);
18811934
}
18821935
}

0 commit comments

Comments
 (0)