diff --git a/client/templates/metered.tmpl b/client/templates/metered.tmpl
index 9b57880b077..8cd386292bd 100644
--- a/client/templates/metered.tmpl
+++ b/client/templates/metered.tmpl
@@ -1,6 +1,7 @@
 import (
 	"context"
 	"strings"
+	"time"
 
 	"go.uber.org/yarpc"
     "github.com/uber/cadence/common/constants"
@@ -44,9 +45,11 @@ func (c *{{$decorator}}) {{$method.Declaration}} {
     c.emitForwardedFromStats(scope, {{(index $method.Params 1).Name}})
     {{ end }}
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	{{$method.ResultsNames}} = c.client.{{$method.Call}}
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
diff --git a/client/wrappers/metered/admin_generated.go b/client/wrappers/metered/admin_generated.go
index 10e9d28e873..b80eefebc5c 100644
--- a/client/wrappers/metered/admin_generated.go
+++ b/client/wrappers/metered/admin_generated.go
@@ -6,6 +6,7 @@ package metered
 
 import (
 	"context"
+	"time"
 
 	"go.uber.org/yarpc"
 
@@ -40,9 +41,11 @@ func (c *adminClient) AddSearchAttribute(ctx context.Context, ap1 *types.AddSear
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.AddSearchAttribute(ctx, ap1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -62,9 +65,11 @@ func (c *adminClient) CloseShard(ctx context.Context, cp1 *types.CloseShardReque
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.CloseShard(ctx, cp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -84,9 +89,11 @@ func (c *adminClient) CountDLQMessages(ctx context.Context, cp1 *types.CountDLQM
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	cp2, err = c.client.CountDLQMessages(ctx, cp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -106,9 +113,11 @@ func (c *adminClient) DeleteWorkflow(ctx context.Context, ap1 *types.AdminDelete
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	ap2, err = c.client.DeleteWorkflow(ctx, ap1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -128,9 +137,11 @@ func (c *adminClient) DescribeCluster(ctx context.Context, p1 ...yarpc.CallOptio
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp1, err = c.client.DescribeCluster(ctx, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -150,9 +161,11 @@ func (c *adminClient) DescribeHistoryHost(ctx context.Context, dp1 *types.Descri
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp2, err = c.client.DescribeHistoryHost(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -172,9 +185,11 @@ func (c *adminClient) DescribeQueue(ctx context.Context, dp1 *types.DescribeQueu
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp2, err = c.client.DescribeQueue(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -194,9 +209,11 @@ func (c *adminClient) DescribeShardDistribution(ctx context.Context, dp1 *types.
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp2, err = c.client.DescribeShardDistribution(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -216,9 +233,11 @@ func (c *adminClient) DescribeWorkflowExecution(ctx context.Context, ap1 *types.
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	ap2, err = c.client.DescribeWorkflowExecution(ctx, ap1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -238,9 +257,11 @@ func (c *adminClient) GetDLQReplicationMessages(ctx context.Context, gp1 *types.
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetDLQReplicationMessages(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -260,9 +281,11 @@ func (c *adminClient) GetDomainAsyncWorkflowConfiguraton(ctx context.Context, re
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp1, err = c.client.GetDomainAsyncWorkflowConfiguraton(ctx, request, opts...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -282,9 +305,11 @@ func (c *adminClient) GetDomainIsolationGroups(ctx context.Context, request *typ
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp1, err = c.client.GetDomainIsolationGroups(ctx, request, opts...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -304,9 +329,11 @@ func (c *adminClient) GetDomainReplicationMessages(ctx context.Context, gp1 *typ
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetDomainReplicationMessages(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -326,9 +353,11 @@ func (c *adminClient) GetDynamicConfig(ctx context.Context, gp1 *types.GetDynami
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetDynamicConfig(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -348,9 +377,11 @@ func (c *adminClient) GetGlobalIsolationGroups(ctx context.Context, request *typ
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp1, err = c.client.GetGlobalIsolationGroups(ctx, request, opts...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -370,9 +401,11 @@ func (c *adminClient) GetReplicationMessages(ctx context.Context, gp1 *types.Get
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetReplicationMessages(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -392,9 +425,11 @@ func (c *adminClient) GetWorkflowExecutionRawHistoryV2(ctx context.Context, gp1
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetWorkflowExecutionRawHistoryV2(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -414,9 +449,11 @@ func (c *adminClient) ListDynamicConfig(ctx context.Context, lp1 *types.ListDyna
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	lp2, err = c.client.ListDynamicConfig(ctx, lp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -436,9 +473,11 @@ func (c *adminClient) MaintainCorruptWorkflow(ctx context.Context, ap1 *types.Ad
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	ap2, err = c.client.MaintainCorruptWorkflow(ctx, ap1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -458,9 +497,11 @@ func (c *adminClient) MergeDLQMessages(ctx context.Context, mp1 *types.MergeDLQM
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	mp2, err = c.client.MergeDLQMessages(ctx, mp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -480,9 +521,11 @@ func (c *adminClient) PurgeDLQMessages(ctx context.Context, pp1 *types.PurgeDLQM
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.PurgeDLQMessages(ctx, pp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -502,9 +545,11 @@ func (c *adminClient) ReadDLQMessages(ctx context.Context, rp1 *types.ReadDLQMes
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp2, err = c.client.ReadDLQMessages(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -524,9 +569,11 @@ func (c *adminClient) ReapplyEvents(ctx context.Context, rp1 *types.ReapplyEvent
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.ReapplyEvents(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -546,9 +593,11 @@ func (c *adminClient) RefreshWorkflowTasks(ctx context.Context, rp1 *types.Refre
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RefreshWorkflowTasks(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -568,9 +617,11 @@ func (c *adminClient) RemoveTask(ctx context.Context, rp1 *types.RemoveTaskReque
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RemoveTask(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -590,9 +641,11 @@ func (c *adminClient) ResendReplicationTasks(ctx context.Context, rp1 *types.Res
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.ResendReplicationTasks(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -612,9 +665,11 @@ func (c *adminClient) ResetQueue(ctx context.Context, rp1 *types.ResetQueueReque
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.ResetQueue(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -634,9 +689,11 @@ func (c *adminClient) RestoreDynamicConfig(ctx context.Context, rp1 *types.Resto
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RestoreDynamicConfig(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -656,9 +713,11 @@ func (c *adminClient) UpdateDomainAsyncWorkflowConfiguraton(ctx context.Context,
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	up1, err = c.client.UpdateDomainAsyncWorkflowConfiguraton(ctx, request, opts...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -678,9 +737,11 @@ func (c *adminClient) UpdateDomainIsolationGroups(ctx context.Context, request *
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	up1, err = c.client.UpdateDomainIsolationGroups(ctx, request, opts...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -700,9 +761,11 @@ func (c *adminClient) UpdateDynamicConfig(ctx context.Context, up1 *types.Update
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.UpdateDynamicConfig(ctx, up1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -722,9 +785,11 @@ func (c *adminClient) UpdateGlobalIsolationGroups(ctx context.Context, request *
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	up1, err = c.client.UpdateGlobalIsolationGroups(ctx, request, opts...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -744,9 +809,11 @@ func (c *adminClient) UpdateTaskListPartitionConfig(ctx context.Context, request
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	up1, err = c.client.UpdateTaskListPartitionConfig(ctx, request, opts...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
diff --git a/client/wrappers/metered/frontend_generated.go b/client/wrappers/metered/frontend_generated.go
index f5062763d6f..ad39b122fb2 100644
--- a/client/wrappers/metered/frontend_generated.go
+++ b/client/wrappers/metered/frontend_generated.go
@@ -6,6 +6,7 @@ package metered
 
 import (
 	"context"
+	"time"
 
 	"go.uber.org/yarpc"
 
@@ -40,9 +41,11 @@ func (c *frontendClient) BackfillSchedule(ctx context.Context, bp1 *types.Backfi
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	bp2, err = c.client.BackfillSchedule(ctx, bp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -62,9 +65,11 @@ func (c *frontendClient) CountWorkflowExecutions(ctx context.Context, cp1 *types
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	cp2, err = c.client.CountWorkflowExecutions(ctx, cp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -84,9 +89,11 @@ func (c *frontendClient) CreateSchedule(ctx context.Context, cp1 *types.CreateSc
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	cp2, err = c.client.CreateSchedule(ctx, cp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -106,9 +113,11 @@ func (c *frontendClient) DeleteDomain(ctx context.Context, dp1 *types.DeleteDoma
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.DeleteDomain(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -128,9 +137,11 @@ func (c *frontendClient) DeleteSchedule(ctx context.Context, dp1 *types.DeleteSc
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp2, err = c.client.DeleteSchedule(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -150,9 +161,11 @@ func (c *frontendClient) DeprecateDomain(ctx context.Context, dp1 *types.Depreca
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.DeprecateDomain(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -172,9 +185,11 @@ func (c *frontendClient) DescribeDomain(ctx context.Context, dp1 *types.Describe
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp2, err = c.client.DescribeDomain(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -194,9 +209,11 @@ func (c *frontendClient) DescribeSchedule(ctx context.Context, dp1 *types.Descri
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp2, err = c.client.DescribeSchedule(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -216,9 +233,11 @@ func (c *frontendClient) DescribeTaskList(ctx context.Context, dp1 *types.Descri
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp2, err = c.client.DescribeTaskList(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -238,9 +257,11 @@ func (c *frontendClient) DescribeWorkflowExecution(ctx context.Context, dp1 *typ
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp2, err = c.client.DescribeWorkflowExecution(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -260,9 +281,11 @@ func (c *frontendClient) DiagnoseWorkflowExecution(ctx context.Context, dp1 *typ
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp2, err = c.client.DiagnoseWorkflowExecution(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -282,9 +305,11 @@ func (c *frontendClient) FailoverDomain(ctx context.Context, fp1 *types.Failover
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	fp2, err = c.client.FailoverDomain(ctx, fp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -304,9 +329,11 @@ func (c *frontendClient) GetClusterInfo(ctx context.Context, p1 ...yarpc.CallOpt
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	cp1, err = c.client.GetClusterInfo(ctx, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -326,9 +353,11 @@ func (c *frontendClient) GetSearchAttributes(ctx context.Context, p1 ...yarpc.Ca
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp1, err = c.client.GetSearchAttributes(ctx, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -348,9 +377,11 @@ func (c *frontendClient) GetTaskListsByDomain(ctx context.Context, gp1 *types.Ge
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetTaskListsByDomain(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -370,9 +401,11 @@ func (c *frontendClient) GetWorkflowExecutionHistory(ctx context.Context, gp1 *t
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetWorkflowExecutionHistory(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -392,9 +425,11 @@ func (c *frontendClient) ListArchivedWorkflowExecutions(ctx context.Context, lp1
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	lp2, err = c.client.ListArchivedWorkflowExecutions(ctx, lp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -414,9 +449,11 @@ func (c *frontendClient) ListClosedWorkflowExecutions(ctx context.Context, lp1 *
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	lp2, err = c.client.ListClosedWorkflowExecutions(ctx, lp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -436,9 +473,11 @@ func (c *frontendClient) ListDomains(ctx context.Context, lp1 *types.ListDomains
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	lp2, err = c.client.ListDomains(ctx, lp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -458,9 +497,11 @@ func (c *frontendClient) ListFailoverHistory(ctx context.Context, lp1 *types.Lis
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	lp2, err = c.client.ListFailoverHistory(ctx, lp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -480,9 +521,11 @@ func (c *frontendClient) ListOpenWorkflowExecutions(ctx context.Context, lp1 *ty
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	lp2, err = c.client.ListOpenWorkflowExecutions(ctx, lp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -502,9 +545,11 @@ func (c *frontendClient) ListSchedules(ctx context.Context, lp1 *types.ListSched
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	lp2, err = c.client.ListSchedules(ctx, lp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -524,9 +569,11 @@ func (c *frontendClient) ListTaskListPartitions(ctx context.Context, lp1 *types.
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	lp2, err = c.client.ListTaskListPartitions(ctx, lp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -546,9 +593,11 @@ func (c *frontendClient) ListWorkflowExecutions(ctx context.Context, lp1 *types.
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	lp2, err = c.client.ListWorkflowExecutions(ctx, lp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -568,9 +617,11 @@ func (c *frontendClient) PauseSchedule(ctx context.Context, pp1 *types.PauseSche
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	pp2, err = c.client.PauseSchedule(ctx, pp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -590,9 +641,11 @@ func (c *frontendClient) PollForActivityTask(ctx context.Context, pp1 *types.Pol
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	pp2, err = c.client.PollForActivityTask(ctx, pp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -612,9 +665,11 @@ func (c *frontendClient) PollForDecisionTask(ctx context.Context, pp1 *types.Pol
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	pp2, err = c.client.PollForDecisionTask(ctx, pp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -634,9 +689,11 @@ func (c *frontendClient) QueryWorkflow(ctx context.Context, qp1 *types.QueryWork
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	qp2, err = c.client.QueryWorkflow(ctx, qp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -656,9 +713,11 @@ func (c *frontendClient) RecordActivityTaskHeartbeat(ctx context.Context, rp1 *t
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp2, err = c.client.RecordActivityTaskHeartbeat(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -678,9 +737,11 @@ func (c *frontendClient) RecordActivityTaskHeartbeatByID(ctx context.Context, rp
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp2, err = c.client.RecordActivityTaskHeartbeatByID(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -700,9 +761,11 @@ func (c *frontendClient) RefreshWorkflowTasks(ctx context.Context, rp1 *types.Re
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RefreshWorkflowTasks(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -722,9 +785,11 @@ func (c *frontendClient) RegisterDomain(ctx context.Context, rp1 *types.Register
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RegisterDomain(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -744,9 +809,11 @@ func (c *frontendClient) RequestCancelWorkflowExecution(ctx context.Context, rp1
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RequestCancelWorkflowExecution(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -766,9 +833,11 @@ func (c *frontendClient) ResetStickyTaskList(ctx context.Context, rp1 *types.Res
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp2, err = c.client.ResetStickyTaskList(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -788,9 +857,11 @@ func (c *frontendClient) ResetWorkflowExecution(ctx context.Context, rp1 *types.
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp2, err = c.client.ResetWorkflowExecution(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -810,9 +881,11 @@ func (c *frontendClient) RespondActivityTaskCanceled(ctx context.Context, rp1 *t
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RespondActivityTaskCanceled(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -832,9 +905,11 @@ func (c *frontendClient) RespondActivityTaskCanceledByID(ctx context.Context, rp
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RespondActivityTaskCanceledByID(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -854,9 +929,11 @@ func (c *frontendClient) RespondActivityTaskCompleted(ctx context.Context, rp1 *
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RespondActivityTaskCompleted(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -876,9 +953,11 @@ func (c *frontendClient) RespondActivityTaskCompletedByID(ctx context.Context, r
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RespondActivityTaskCompletedByID(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -898,9 +977,11 @@ func (c *frontendClient) RespondActivityTaskFailed(ctx context.Context, rp1 *typ
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RespondActivityTaskFailed(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -920,9 +1001,11 @@ func (c *frontendClient) RespondActivityTaskFailedByID(ctx context.Context, rp1
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RespondActivityTaskFailedByID(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -942,9 +1025,11 @@ func (c *frontendClient) RespondDecisionTaskCompleted(ctx context.Context, rp1 *
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp2, err = c.client.RespondDecisionTaskCompleted(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -964,9 +1049,11 @@ func (c *frontendClient) RespondDecisionTaskFailed(ctx context.Context, rp1 *typ
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RespondDecisionTaskFailed(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -986,9 +1073,11 @@ func (c *frontendClient) RespondQueryTaskCompleted(ctx context.Context, rp1 *typ
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RespondQueryTaskCompleted(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -1008,9 +1097,11 @@ func (c *frontendClient) RestartWorkflowExecution(ctx context.Context, rp1 *type
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp2, err = c.client.RestartWorkflowExecution(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -1030,9 +1121,11 @@ func (c *frontendClient) ScanWorkflowExecutions(ctx context.Context, lp1 *types.
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	lp2, err = c.client.ScanWorkflowExecutions(ctx, lp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -1052,9 +1145,11 @@ func (c *frontendClient) SignalWithStartWorkflowExecution(ctx context.Context, s
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	sp2, err = c.client.SignalWithStartWorkflowExecution(ctx, sp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -1074,9 +1169,11 @@ func (c *frontendClient) SignalWithStartWorkflowExecutionAsync(ctx context.Conte
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	sp2, err = c.client.SignalWithStartWorkflowExecutionAsync(ctx, sp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -1096,9 +1193,11 @@ func (c *frontendClient) SignalWorkflowExecution(ctx context.Context, sp1 *types
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.SignalWorkflowExecution(ctx, sp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -1118,9 +1217,11 @@ func (c *frontendClient) StartWorkflowExecution(ctx context.Context, sp1 *types.
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	sp2, err = c.client.StartWorkflowExecution(ctx, sp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -1140,9 +1241,11 @@ func (c *frontendClient) StartWorkflowExecutionAsync(ctx context.Context, sp1 *t
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	sp2, err = c.client.StartWorkflowExecutionAsync(ctx, sp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -1162,9 +1265,11 @@ func (c *frontendClient) TerminateWorkflowExecution(ctx context.Context, tp1 *ty
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.TerminateWorkflowExecution(ctx, tp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -1184,9 +1289,11 @@ func (c *frontendClient) UnpauseSchedule(ctx context.Context, up1 *types.Unpause
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	up2, err = c.client.UnpauseSchedule(ctx, up1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -1206,9 +1313,11 @@ func (c *frontendClient) UpdateDomain(ctx context.Context, up1 *types.UpdateDoma
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	up2, err = c.client.UpdateDomain(ctx, up1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -1228,9 +1337,11 @@ func (c *frontendClient) UpdateSchedule(ctx context.Context, up1 *types.UpdateSc
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	up2, err = c.client.UpdateSchedule(ctx, up1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
diff --git a/client/wrappers/metered/history_generated.go b/client/wrappers/metered/history_generated.go
index ee392ce92e8..a1ca3060a8e 100644
--- a/client/wrappers/metered/history_generated.go
+++ b/client/wrappers/metered/history_generated.go
@@ -6,6 +6,7 @@ package metered
 
 import (
 	"context"
+	"time"
 
 	"go.uber.org/yarpc"
 
@@ -40,9 +41,11 @@ func (c *historyClient) CloseShard(ctx context.Context, cp1 *types.CloseShardReq
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.CloseShard(ctx, cp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -62,9 +65,11 @@ func (c *historyClient) CountDLQMessages(ctx context.Context, cp1 *types.CountDL
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	hp1, err = c.client.CountDLQMessages(ctx, cp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -84,9 +89,11 @@ func (c *historyClient) DescribeHistoryHost(ctx context.Context, dp1 *types.Desc
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp2, err = c.client.DescribeHistoryHost(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -106,9 +113,11 @@ func (c *historyClient) DescribeMutableState(ctx context.Context, dp1 *types.Des
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp2, err = c.client.DescribeMutableState(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -128,9 +137,11 @@ func (c *historyClient) DescribeQueue(ctx context.Context, dp1 *types.DescribeQu
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp2, err = c.client.DescribeQueue(ctx, dp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -150,9 +161,11 @@ func (c *historyClient) DescribeWorkflowExecution(ctx context.Context, hp1 *type
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp1, err = c.client.DescribeWorkflowExecution(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -172,9 +185,11 @@ func (c *historyClient) GetCrossClusterTasks(ctx context.Context, gp1 *types.Get
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetCrossClusterTasks(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -194,9 +209,11 @@ func (c *historyClient) GetDLQReplicationMessages(ctx context.Context, gp1 *type
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetDLQReplicationMessages(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -216,9 +233,11 @@ func (c *historyClient) GetFailoverInfo(ctx context.Context, gp1 *types.GetFailo
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetFailoverInfo(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -238,9 +257,11 @@ func (c *historyClient) GetMutableState(ctx context.Context, gp1 *types.GetMutab
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetMutableState(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -260,9 +281,11 @@ func (c *historyClient) GetReplicationMessages(ctx context.Context, gp1 *types.G
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetReplicationMessages(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -282,9 +305,11 @@ func (c *historyClient) MergeDLQMessages(ctx context.Context, mp1 *types.MergeDL
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	mp2, err = c.client.MergeDLQMessages(ctx, mp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -304,9 +329,11 @@ func (c *historyClient) NotifyFailoverMarkers(ctx context.Context, np1 *types.No
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.NotifyFailoverMarkers(ctx, np1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -326,9 +353,11 @@ func (c *historyClient) PollMutableState(ctx context.Context, pp1 *types.PollMut
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	pp2, err = c.client.PollMutableState(ctx, pp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -348,9 +377,11 @@ func (c *historyClient) PurgeDLQMessages(ctx context.Context, pp1 *types.PurgeDL
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.PurgeDLQMessages(ctx, pp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -370,9 +401,11 @@ func (c *historyClient) QueryWorkflow(ctx context.Context, hp1 *types.HistoryQue
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	hp2, err = c.client.QueryWorkflow(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -392,9 +425,11 @@ func (c *historyClient) RatelimitUpdate(ctx context.Context, request *types.Rate
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp1, err = c.client.RatelimitUpdate(ctx, request, opts...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -414,9 +449,11 @@ func (c *historyClient) ReadDLQMessages(ctx context.Context, rp1 *types.ReadDLQM
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp2, err = c.client.ReadDLQMessages(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -436,9 +473,11 @@ func (c *historyClient) ReapplyEvents(ctx context.Context, hp1 *types.HistoryRea
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.ReapplyEvents(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -458,9 +497,11 @@ func (c *historyClient) RecordActivityTaskHeartbeat(ctx context.Context, hp1 *ty
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp1, err = c.client.RecordActivityTaskHeartbeat(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -480,9 +521,11 @@ func (c *historyClient) RecordActivityTaskStarted(ctx context.Context, rp1 *type
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp2, err = c.client.RecordActivityTaskStarted(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -502,9 +545,11 @@ func (c *historyClient) RecordChildExecutionCompleted(ctx context.Context, rp1 *
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RecordChildExecutionCompleted(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -524,9 +569,11 @@ func (c *historyClient) RecordDecisionTaskStarted(ctx context.Context, rp1 *type
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp2, err = c.client.RecordDecisionTaskStarted(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -546,9 +593,11 @@ func (c *historyClient) RefreshWorkflowTasks(ctx context.Context, hp1 *types.His
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RefreshWorkflowTasks(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -568,9 +617,11 @@ func (c *historyClient) RemoveSignalMutableState(ctx context.Context, rp1 *types
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RemoveSignalMutableState(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -590,9 +641,11 @@ func (c *historyClient) RemoveTask(ctx context.Context, rp1 *types.RemoveTaskReq
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RemoveTask(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -612,9 +665,11 @@ func (c *historyClient) ReplicateEventsV2(ctx context.Context, rp1 *types.Replic
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.ReplicateEventsV2(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -634,9 +689,11 @@ func (c *historyClient) RequestCancelWorkflowExecution(ctx context.Context, hp1
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RequestCancelWorkflowExecution(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -656,9 +713,11 @@ func (c *historyClient) ResetQueue(ctx context.Context, rp1 *types.ResetQueueReq
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.ResetQueue(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -678,9 +737,11 @@ func (c *historyClient) ResetStickyTaskList(ctx context.Context, hp1 *types.Hist
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	hp2, err = c.client.ResetStickyTaskList(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -700,9 +761,11 @@ func (c *historyClient) ResetWorkflowExecution(ctx context.Context, hp1 *types.H
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp1, err = c.client.ResetWorkflowExecution(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -722,9 +785,11 @@ func (c *historyClient) RespondActivityTaskCanceled(ctx context.Context, hp1 *ty
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RespondActivityTaskCanceled(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -744,9 +809,11 @@ func (c *historyClient) RespondActivityTaskCompleted(ctx context.Context, hp1 *t
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RespondActivityTaskCompleted(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -766,9 +833,11 @@ func (c *historyClient) RespondActivityTaskFailed(ctx context.Context, hp1 *type
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RespondActivityTaskFailed(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -788,9 +857,11 @@ func (c *historyClient) RespondCrossClusterTasksCompleted(ctx context.Context, r
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	rp2, err = c.client.RespondCrossClusterTasksCompleted(ctx, rp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -810,9 +881,11 @@ func (c *historyClient) RespondDecisionTaskCompleted(ctx context.Context, hp1 *t
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	hp2, err = c.client.RespondDecisionTaskCompleted(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -832,9 +905,11 @@ func (c *historyClient) RespondDecisionTaskFailed(ctx context.Context, hp1 *type
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RespondDecisionTaskFailed(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -854,9 +929,11 @@ func (c *historyClient) ScheduleDecisionTask(ctx context.Context, sp1 *types.Sch
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.ScheduleDecisionTask(ctx, sp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -876,9 +953,11 @@ func (c *historyClient) SignalWithStartWorkflowExecution(ctx context.Context, hp
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	sp1, err = c.client.SignalWithStartWorkflowExecution(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -898,9 +977,11 @@ func (c *historyClient) SignalWorkflowExecution(ctx context.Context, hp1 *types.
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.SignalWorkflowExecution(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -920,9 +1001,11 @@ func (c *historyClient) StartWorkflowExecution(ctx context.Context, hp1 *types.H
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	sp1, err = c.client.StartWorkflowExecution(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -942,9 +1025,11 @@ func (c *historyClient) SyncActivity(ctx context.Context, sp1 *types.SyncActivit
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.SyncActivity(ctx, sp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -964,9 +1049,11 @@ func (c *historyClient) SyncShardStatus(ctx context.Context, sp1 *types.SyncShar
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.SyncShardStatus(ctx, sp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -986,9 +1073,11 @@ func (c *historyClient) TerminateWorkflowExecution(ctx context.Context, hp1 *typ
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.TerminateWorkflowExecution(ctx, hp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
diff --git a/client/wrappers/metered/matching_generated.go b/client/wrappers/metered/matching_generated.go
index 76d0efeb6cc..a26519ccb85 100644
--- a/client/wrappers/metered/matching_generated.go
+++ b/client/wrappers/metered/matching_generated.go
@@ -7,6 +7,7 @@ package metered
 import (
 	"context"
 	"strings"
+	"time"
 
 	"go.uber.org/yarpc"
 
@@ -43,9 +44,11 @@ func (c *matchingClient) AddActivityTask(ctx context.Context, ap1 *types.AddActi
 	scope.IncCounter(metrics.CadenceClientRequests)
 	c.emitForwardedFromStats(scope, ap1)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	ap2, err = c.client.AddActivityTask(ctx, ap1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -66,9 +69,11 @@ func (c *matchingClient) AddDecisionTask(ctx context.Context, ap1 *types.AddDeci
 	scope.IncCounter(metrics.CadenceClientRequests)
 	c.emitForwardedFromStats(scope, ap1)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	ap2, err = c.client.AddDecisionTask(ctx, ap1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -89,9 +94,11 @@ func (c *matchingClient) CancelOutstandingPoll(ctx context.Context, cp1 *types.C
 	scope.IncCounter(metrics.CadenceClientRequests)
 	c.emitForwardedFromStats(scope, cp1)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.CancelOutstandingPoll(ctx, cp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -112,9 +119,11 @@ func (c *matchingClient) DescribeTaskList(ctx context.Context, mp1 *types.Matchi
 	scope.IncCounter(metrics.CadenceClientRequests)
 	c.emitForwardedFromStats(scope, mp1)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	dp1, err = c.client.DescribeTaskList(ctx, mp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -135,9 +144,11 @@ func (c *matchingClient) GetTaskListsByDomain(ctx context.Context, gp1 *types.Ge
 	scope.IncCounter(metrics.CadenceClientRequests)
 	c.emitForwardedFromStats(scope, gp1)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetTaskListsByDomain(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -158,9 +169,11 @@ func (c *matchingClient) ListTaskListPartitions(ctx context.Context, mp1 *types.
 	scope.IncCounter(metrics.CadenceClientRequests)
 	c.emitForwardedFromStats(scope, mp1)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	lp1, err = c.client.ListTaskListPartitions(ctx, mp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -181,9 +194,11 @@ func (c *matchingClient) PollForActivityTask(ctx context.Context, mp1 *types.Mat
 	scope.IncCounter(metrics.CadenceClientRequests)
 	c.emitForwardedFromStats(scope, mp1)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	mp2, err = c.client.PollForActivityTask(ctx, mp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -204,9 +219,11 @@ func (c *matchingClient) PollForDecisionTask(ctx context.Context, mp1 *types.Mat
 	scope.IncCounter(metrics.CadenceClientRequests)
 	c.emitForwardedFromStats(scope, mp1)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	mp2, err = c.client.PollForDecisionTask(ctx, mp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -227,9 +244,11 @@ func (c *matchingClient) QueryWorkflow(ctx context.Context, mp1 *types.MatchingQ
 	scope.IncCounter(metrics.CadenceClientRequests)
 	c.emitForwardedFromStats(scope, mp1)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	mp2, err = c.client.QueryWorkflow(ctx, mp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -250,9 +269,11 @@ func (c *matchingClient) RefreshTaskListPartitionConfig(ctx context.Context, mp1
 	scope.IncCounter(metrics.CadenceClientRequests)
 	c.emitForwardedFromStats(scope, mp1)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	mp2, err = c.client.RefreshTaskListPartitionConfig(ctx, mp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -273,9 +294,11 @@ func (c *matchingClient) RespondQueryTaskCompleted(ctx context.Context, mp1 *typ
 	scope.IncCounter(metrics.CadenceClientRequests)
 	c.emitForwardedFromStats(scope, mp1)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	err = c.client.RespondQueryTaskCompleted(ctx, mp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -296,9 +319,11 @@ func (c *matchingClient) UpdateTaskListPartitionConfig(ctx context.Context, mp1
 	scope.IncCounter(metrics.CadenceClientRequests)
 	c.emitForwardedFromStats(scope, mp1)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	mp2, err = c.client.UpdateTaskListPartitionConfig(ctx, mp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
diff --git a/client/wrappers/metered/sharddistributor_generated.go b/client/wrappers/metered/sharddistributor_generated.go
index be0475d9f4a..d8c8028392b 100644
--- a/client/wrappers/metered/sharddistributor_generated.go
+++ b/client/wrappers/metered/sharddistributor_generated.go
@@ -6,6 +6,7 @@ package metered
 
 import (
 	"context"
+	"time"
 
 	"go.uber.org/yarpc"
 
@@ -40,9 +41,11 @@ func (c *sharddistributorClient) GetShardOwner(ctx context.Context, gp1 *types.G
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	gp2, err = c.client.GetShardOwner(ctx, gp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
@@ -62,9 +65,11 @@ func (c *sharddistributorClient) WatchNamespaceState(ctx context.Context, wp1 *t
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	w1, err = c.client.WatchNamespaceState(ctx, wp1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
diff --git a/client/wrappers/metered/sharddistributorexecutor_generated.go b/client/wrappers/metered/sharddistributorexecutor_generated.go
index 316cd0bf821..f90394dfc7e 100644
--- a/client/wrappers/metered/sharddistributorexecutor_generated.go
+++ b/client/wrappers/metered/sharddistributorexecutor_generated.go
@@ -6,6 +6,7 @@ package metered
 
 import (
 	"context"
+	"time"
 
 	"go.uber.org/yarpc"
 
@@ -40,9 +41,11 @@ func (c *sharddistributorexecutorClient) Heartbeat(ctx context.Context, ep1 *typ
 
 	scope.IncCounter(metrics.CadenceClientRequests)
 
+	clientLatencyStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceClientLatency)
 	ep2, err = c.client.Heartbeat(ctx, ep1, p1...)
 	sw.Stop()
+	scope.RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(clientLatencyStart))
 
 	if err != nil {
 		scope.IncCounter(metrics.CadenceClientFailures)
diff --git a/common/asyncworkflow/queue/consumer/default_consumer.go b/common/asyncworkflow/queue/consumer/default_consumer.go
index 63a4b22c26d..58a615263b1 100644
--- a/common/asyncworkflow/queue/consumer/default_consumer.go
+++ b/common/asyncworkflow/queue/consumer/default_consumer.go
@@ -156,8 +156,12 @@ func (c *DefaultConsumer) processMessage(msg messaging.Message) {
 	logger := c.logger.WithTags(tag.Dynamic("partition", msg.Partition()), tag.Dynamic("offset", msg.Offset()))
 	logger.Debug("Received message")
 
+	asyncProcessStart := time.Now()
 	sw := c.scope.StartTimer(metrics.AsyncWorkflowProcessMsgLatency)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		c.scope.RecordHistogramDuration(metrics.AsyncWorkflowProcessMsgLatencyHistogram, time.Since(asyncProcessStart))
+	}()
 
 	var request sqlblobs.AsyncRequestMessage
 	if err := c.msgDecoder.Decode(msg.Value(), &request); err != nil {
diff --git a/common/metrics/config.go b/common/metrics/config.go
index 70ce0cc27cd..267a7363c83 100644
--- a/common/metrics/config.go
+++ b/common/metrics/config.go
@@ -265,6 +265,89 @@ var HistogramMigrationMetrics = map[string]struct{}{
 	"direct_query_dispatch_non_sticky_latency_ns":       {},
 	"direct_query_dispatch_clear_stickiness_latency":    {},
 	"direct_query_dispatch_clear_stickiness_latency_ns": {},
+	"cadence_authorization_latency":                     {},
+	"cadence_authorization_latency_ns":                  {},
+
+	"pinot_latency":               {},
+	"pinot_latency_ns":            {},
+	"pinot_latency_per_domain":    {},
+	"pinot_latency_per_domain_ns": {},
+
+	"sequentialtask_submit_latency":              {},
+	"sequentialtask_submit_latency_ns":           {},
+	"sequentialtask_queue_size":                  {},
+	"sequentialtask_queue_size_counts":           {},
+	"sequentialtask_queue_processing_latency":    {},
+	"sequentialtask_queue_processing_latency_ns": {},
+	"sequentialtask_task_processing_latency":     {},
+	"sequentialtask_task_processing_latency_ns":  {},
+
+	"prioritytask_submit_latency":    {},
+	"prioritytask_submit_latency_ns": {},
+
+	"graceful_failover_latency":    {},
+	"graceful_failover_latency_ns": {},
+
+	"async_request_payload_size_per_domain":        {},
+	"async_request_payload_size_per_domain_counts": {},
+
+	"task_redispatch_queue_pending_tasks":        {},
+	"task_redispatch_queue_pending_tasks_counts": {},
+
+	"workflow_context_lock_latency":    {},
+	"workflow_context_lock_latency_ns": {},
+
+	"get_replication_messages_for_shard":    {},
+	"get_replication_messages_for_shard_ns": {},
+	"get_dlq_replication_messages":          {},
+	"get_dlq_replication_messages_ns":       {},
+
+	"decision_task_query_latency":    {},
+	"decision_task_query_latency_ns": {},
+
+	"syncmatch_latency_per_tl":     {},
+	"syncmatch_latency_per_tl_ns":  {},
+	"asyncmatch_latency_per_tl":    {},
+	"asyncmatch_latency_per_tl_ns": {},
+
+	"asyncmatch_local_poll_attempt_per_tl":                             {},
+	"asyncmatch_local_poll_attempt_per_tl_counts":                      {},
+	"asyncmatch_forward_poll_attempt_per_tl":                           {},
+	"asyncmatch_forward_poll_attempt_per_tl_counts":                    {},
+	"asyncmatch_local_poll_after_forward_failed_attempt_per_tl":        {},
+	"asyncmatch_local_poll_after_forward_failed_attempt_per_tl_counts": {},
+
+	"poll_local_match_latency_per_tl":                         {},
+	"poll_local_match_latency_per_tl_ns":                      {},
+	"poll_forward_match_latency_per_tl":                       {},
+	"poll_forward_match_latency_per_tl_ns":                    {},
+	"poll_local_match_after_forward_failed_latency_per_tl":    {},
+	"poll_local_match_after_forward_failed_latency_per_tl_ns": {},
+
+	"es_processor_process_msg_latency":       {},
+	"es_processor_process_msg_latency_ns":    {},
+	"index_processor_process_msg_latency":    {},
+	"index_processor_process_msg_latency_ns": {},
+
+	"async_workflow_process_msg_latency":        {},
+	"async_workflow_process_msg_latency_ns":     {},
+	"diagnostics_workflow_execution_latency":    {},
+	"diagnostics_workflow_execution_latency_ns": {},
+
+	"shard_distributor_latency":    {},
+	"shard_distributor_latency_ns": {},
+
+	"global_ratelimiter_update_latency":    {},
+	"global_ratelimiter_update_latency_ns": {},
+
+	"cadence_latency":                       {},
+	"cadence_latency_ns":                    {},
+	"cadence_client_latency":                {},
+	"cadence_client_latency_ns":             {},
+	"cadence_client_latency_redirection":    {},
+	"cadence_client_latency_redirection_ns": {},
+	"cadence_latency_per_tl":                {},
+	"cadence_latency_per_tl_ns":             {},
 }
 
 func (h HistogramMigration) EmitTimer(name string) bool {
diff --git a/common/metrics/defs.go b/common/metrics/defs.go
index 1269e7e6719..a5d48882a50 100644
--- a/common/metrics/defs.go
+++ b/common/metrics/defs.go
@@ -2282,6 +2282,7 @@ const (
 	CadenceRequests MetricIdx = iota
 	CadenceFailures
 	CadenceLatency
+	CadenceLatencyHistogram
 	CadenceErrBadRequestCounter
 	CadenceErrDomainNotActiveCounter
 	CadenceErrServiceBusyCounter
@@ -2366,14 +2367,17 @@ const (
 	CadenceClientRequests
 	CadenceClientFailures
 	CadenceClientLatency
+	CadenceClientLatencyHistogram
 
 	CadenceTasklistRequests
 
 	CadenceDcRedirectionClientRequests
 	CadenceDcRedirectionClientFailures
 	CadenceDcRedirectionClientLatency
+	CadenceDcRedirectionClientLatencyHistogram
 
 	CadenceAuthorizationLatency
+	CadenceAuthorizationLatencyHistogram
 
 	DomainCachePrepareCallbacksLatency
 	DomainCachePrepareCallbacksLatencyHistogram
@@ -2412,11 +2416,13 @@ const (
 	PinotRequests
 	PinotFailures
 	PinotLatency
+	PinotLatencyHistogram
 	PinotErrBadRequestCounter
 	PinotErrBusyCounter
 	PinotRequestsPerDomain
 	PinotFailuresPerDomain
 	PinotLatencyPerDomain
+	PinotLatencyPerDomainHistogram
 	PinotErrBadRequestCounterPerDomain
 	PinotErrBusyCounterPerDomain
 
@@ -2424,9 +2430,13 @@ const (
 	SequentialTaskSubmitRequestTaskQueueExist
 	SequentialTaskSubmitRequestTaskQueueMissing
 	SequentialTaskSubmitLatency
+	SequentialTaskSubmitLatencyHistogram
 	SequentialTaskQueueSize
+	SequentialTaskQueueSizeHistogram
 	SequentialTaskQueueProcessingLatency
+	SequentialTaskQueueProcessingLatencyHistogram
 	SequentialTaskTaskProcessingLatency
+	SequentialTaskTaskProcessingLatencyHistogram
 
 	ParallelTaskSubmitRequest
 	ParallelTaskSubmitLatency
@@ -2436,6 +2446,7 @@ const (
 
 	PriorityTaskSubmitRequest
 	PriorityTaskSubmitLatency
+	PriorityTaskSubmitLatencyHistogram
 
 	KafkaConsumerMessageIn
 	KafkaConsumerMessageAck
@@ -2447,6 +2458,7 @@ const (
 	DescribeWorkflowStatusError
 
 	GracefulFailoverLatency
+	GracefulFailoverLatencyHistogram
 	GracefulFailoverFailure
 
 	HistoryArchiverArchiveNonRetryableErrorCount
@@ -2481,6 +2493,7 @@ const (
 	CadenceRequestsPerTaskListWithoutRollup
 	CadenceFailuresPerTaskList
 	CadenceLatencyPerTaskList
+	CadenceLatencyPerTaskListHistogram
 	CadenceErrBadRequestPerTaskListCounter
 	CadenceErrDomainNotActivePerTaskListCounter
 	CadenceErrServiceBusyPerTaskListCounter
@@ -2518,15 +2531,17 @@ const (
 	HashringViewIdentifier
 
 	AsyncRequestPayloadSize
+	AsyncRequestPayloadSizeHistogram
 
 	// limiter-side metrics
 	GlobalRatelimiterStartupUsageHistogram
 	GlobalRatelimiterFailingUsageHistogram
 	GlobalRatelimiterGlobalUsageHistogram
-	GlobalRatelimiterUpdateLatency         // time spent performing all Update requests, per batch attempt. ideally well below update interval.
-	GlobalRatelimiterAllowedRequestsCount  // per key/type usage
-	GlobalRatelimiterRejectedRequestsCount // per key/type usage
-	GlobalRatelimiterQuota                 // per-global-key quota information, emitted when a key is in us
+	GlobalRatelimiterUpdateLatency          // time spent performing all Update requests, per batch attempt. ideally well below update interval.
+	GlobalRatelimiterUpdateLatencyHistogram // histogram version
+	GlobalRatelimiterAllowedRequestsCount   // per key/type usage
+	GlobalRatelimiterRejectedRequestsCount  // per key/type usage
+	GlobalRatelimiterQuota                  // per-global-key quota information, emitted when a key is in us
 
 	// aggregator-side metrics
 	GlobalRatelimiterInitialized
@@ -2640,6 +2655,7 @@ const (
 	TaskSchedulerThrottledCounterPerDomain
 
 	TaskRedispatchQueuePendingTasksTimer
+	TaskRedispatchQueuePendingTasksHistogram
 
 	TransferTaskThrottledCounter
 	TimerTaskThrottledCounter
@@ -2786,6 +2802,7 @@ const (
 	AcquireLockFailedCounter
 	WorkflowContextCleared
 	WorkflowContextLockLatency
+	WorkflowContextLockLatencyHistogram
 	MutableStateSize
 	MutableStateSizeHistogram
 	ExecutionInfoSize
@@ -2902,7 +2919,9 @@ const (
 	ReplicationDLQValidationFailed
 	ReplicationMessageTooLargePerShard
 	GetReplicationMessagesForShardLatency
+	GetReplicationMessagesForShardLatencyHistogram
 	GetDLQReplicationMessagesLatency
+	GetDLQReplicationMessagesLatencyHistogram
 	EventReapplySkippedCount
 	DirectQueryDispatchLatency
 	DirectQueryDispatchLatencyHistogram
@@ -2917,6 +2936,7 @@ const (
 	DirectQueryDispatchClearStickinessSuccessCount
 	DirectQueryDispatchTimeoutBeforeNonStickyCount
 	DecisionTaskQueryLatency
+	DecisionTaskQueryLatencyHistogram
 	ConsistentQueryPerShard
 	ConsistentQueryTimeoutCount
 	QueryBeforeFirstDecisionCount
@@ -2941,7 +2961,6 @@ const (
 	WorkflowRepairTimeout
 	WorkflowRepairDuration
 	FailoverMarkerCount
-	FailoverMarkerReplicationLatency
 	FailoverMarkerInsertFailure
 	FailoverMarkerNotificationFailure
 	FailoverMarkerUpdateShardFailure
@@ -2990,8 +3009,9 @@ const (
 	BufferIsolationGroupRedirectFailureCounter
 	BufferIsolationGroupMisconfiguredCounter
 	SyncMatchLatencyPerTaskList
+	SyncMatchLatencyPerTaskListHistogram
 	AsyncMatchLatencyPerTaskList
-	AsyncMatchDispatchLatencyPerTaskList
+	AsyncMatchLatencyPerTaskListHistogram
 	AsyncMatchDispatchTimeoutCounterPerTaskList
 	ExpiredTasksPerTaskListCounter
 	ForwardedPerTaskListCounter
@@ -3024,16 +3044,22 @@ const (
 	SyncMatchForwardPollLatencyPerTaskList
 	AsyncMatchLocalPollCounterPerTaskList
 	AsyncMatchLocalPollAttemptPerTaskList
+	AsyncMatchLocalPollAttemptPerTaskListHistogram
 	AsyncMatchLocalPollLatencyPerTaskList
 	AsyncMatchForwardPollCounterPerTaskList
 	AsyncMatchForwardPollAttemptPerTaskList
+	AsyncMatchForwardPollAttemptPerTaskListHistogram
 	AsyncMatchForwardPollLatencyPerTaskList
 	AsyncMatchLocalPollAfterForwardFailedCounterPerTaskList
 	AsyncMatchLocalPollAfterForwardFailedAttemptPerTaskList
+	AsyncMatchLocalPollAfterForwardFailedAttemptPerTaskListHistogram
 	AsyncMatchLocalPollAfterForwardFailedLatencyPerTaskList
 	PollLocalMatchLatencyPerTaskList
+	PollLocalMatchLatencyPerTaskListHistogram
 	PollForwardMatchLatencyPerTaskList
+	PollForwardMatchLatencyPerTaskListHistogram
 	PollLocalMatchAfterForwardFailedLatencyPerTaskList
+	PollLocalMatchAfterForwardFailedLatencyPerTaskListHistogram
 	PollDecisionTaskAlreadyStartedCounterPerTaskList
 	PollActivityTaskAlreadyStartedCounterPerTaskList
 	TaskListReadWritePartitionMismatchGauge
@@ -3070,8 +3096,10 @@ const (
 	ESProcessorFailures
 	ESProcessorCorruptedData
 	ESProcessorProcessMsgLatency
+	ESProcessorProcessMsgLatencyHistogram
 	IndexProcessorCorruptedData
 	IndexProcessorProcessMsgLatency
+	IndexProcessorProcessMsgLatencyHistogram
 	ArchiverNonRetryableErrorCount
 	ArchiverStartedCount
 	ArchiverStoppedCount
@@ -3138,12 +3166,14 @@ const (
 	ESAnalyzerNumLongRunningWorkflows
 	AsyncWorkflowConsumerCount
 	AsyncWorkflowProcessMsgLatency
+	AsyncWorkflowProcessMsgLatencyHistogram
 	AsyncWorkflowFailureCorruptMsgCount
 	AsyncWorkflowFailureByFrontendCount
 	AsyncWorkflowSuccessCount
 	DiagnosticsWorkflowStartedCount
 	DiagnosticsWorkflowSuccess
 	DiagnosticsWorkflowExecutionLatency
+	DiagnosticsWorkflowExecutionLatencyHistogram
 
 	// Scheduler worker metrics
 	// SchedulerWorkerActiveGauge is the number of per-domain workers running on this host (host-level)
@@ -3187,6 +3217,7 @@ const (
 	ShardDistributorRequests = iota + NumWorkerMetrics
 	ShardDistributorFailures
 	ShardDistributorLatency
+	ShardDistributorLatencyHistogram
 	ShardDistributorErrContextTimeoutCounter
 	ShardDistributorErrNamespaceNotFound
 	ShardDistributorErrShardNotFound
@@ -3237,6 +3268,7 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		CadenceRequests:                                              {metricName: "cadence_requests", metricType: Counter},
 		CadenceFailures:                                              {metricName: "cadence_errors", metricType: Counter},
 		CadenceLatency:                                               {metricName: "cadence_latency", metricType: Timer},
+		CadenceLatencyHistogram:                                      {metricName: "cadence_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		CadenceErrBadRequestCounter:                                  {metricName: "cadence_errors_bad_request", metricType: Counter},
 		CadenceErrDomainNotActiveCounter:                             {metricName: "cadence_errors_domain_not_active", metricType: Counter},
 		CadenceErrServiceBusyCounter:                                 {metricName: "cadence_errors_service_busy", metricType: Counter},
@@ -3317,11 +3349,14 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		CadenceClientRequests:                                        {metricName: "cadence_client_requests", metricType: Counter},
 		CadenceClientFailures:                                        {metricName: "cadence_client_errors", metricType: Counter},
 		CadenceClientLatency:                                         {metricName: "cadence_client_latency", metricType: Timer},
+		CadenceClientLatencyHistogram:                                {metricName: "cadence_client_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		CadenceTasklistRequests:                                      {metricName: "cadence_tasklist_request", metricType: Counter},
 		CadenceDcRedirectionClientRequests:                           {metricName: "cadence_client_requests_redirection", metricType: Counter},
 		CadenceDcRedirectionClientFailures:                           {metricName: "cadence_client_errors_redirection", metricType: Counter},
 		CadenceDcRedirectionClientLatency:                            {metricName: "cadence_client_latency_redirection", metricType: Timer},
+		CadenceDcRedirectionClientLatencyHistogram:                   {metricName: "cadence_client_latency_redirection_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		CadenceAuthorizationLatency:                                  {metricName: "cadence_authorization_latency", metricType: Timer},
+		CadenceAuthorizationLatencyHistogram:                         {metricName: "cadence_authorization_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		DomainCachePrepareCallbacksLatency:                           {metricName: "domain_cache_prepare_callbacks_latency", metricType: Timer},
 		DomainCachePrepareCallbacksLatencyHistogram:                  {metricName: "domain_cache_prepare_callbacks_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		DomainCacheCallbacksLatency:                                  {metricName: "domain_cache_callbacks_latency", metricType: Timer},
@@ -3353,20 +3388,26 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		PinotRequests:                                                {metricName: "pinot_requests", metricType: Counter},
 		PinotFailures:                                                {metricName: "pinot_errors", metricType: Counter},
 		PinotLatency:                                                 {metricName: "pinot_latency", metricType: Timer},
+		PinotLatencyHistogram:                                        {metricName: "pinot_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		PinotErrBadRequestCounter:                                    {metricName: "pinot_errors_bad_request", metricType: Counter},
 		PinotErrBusyCounter:                                          {metricName: "pinot_errors_busy", metricType: Counter},
 		PinotRequestsPerDomain:                                       {metricName: "pinot_requests_per_domain", metricRollupName: "pinot_requests", metricType: Counter},
 		PinotFailuresPerDomain:                                       {metricName: "pinot_errors_per_domain", metricRollupName: "pinot_errors", metricType: Counter},
 		PinotLatencyPerDomain:                                        {metricName: "pinot_latency_per_domain", metricRollupName: "pinot_latency", metricType: Timer},
+		PinotLatencyPerDomainHistogram:                               {metricName: "pinot_latency_per_domain_ns", metricRollupName: "pinot_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		PinotErrBadRequestCounterPerDomain:                           {metricName: "pinot_errors_bad_request_per_domain", metricRollupName: "pinot_errors_bad_request", metricType: Counter},
 		PinotErrBusyCounterPerDomain:                                 {metricName: "pinot_errors_busy_per_domain", metricRollupName: "pinot_errors_busy", metricType: Counter},
 		SequentialTaskSubmitRequest:                                  {metricName: "sequentialtask_submit_request", metricType: Counter},
 		SequentialTaskSubmitRequestTaskQueueExist:                    {metricName: "sequentialtask_submit_request_taskqueue_exist", metricType: Counter},
 		SequentialTaskSubmitRequestTaskQueueMissing:                  {metricName: "sequentialtask_submit_request_taskqueue_missing", metricType: Counter},
 		SequentialTaskSubmitLatency:                                  {metricName: "sequentialtask_submit_latency", metricType: Timer},
+		SequentialTaskSubmitLatencyHistogram:                         {metricName: "sequentialtask_submit_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		SequentialTaskQueueSize:                                      {metricName: "sequentialtask_queue_size", metricType: Timer},
+		SequentialTaskQueueSizeHistogram:                             {metricName: "sequentialtask_queue_size_counts", metricType: Histogram, intExponentialBuckets: Mid1To16k},
 		SequentialTaskQueueProcessingLatency:                         {metricName: "sequentialtask_queue_processing_latency", metricType: Timer},
+		SequentialTaskQueueProcessingLatencyHistogram:                {metricName: "sequentialtask_queue_processing_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		SequentialTaskTaskProcessingLatency:                          {metricName: "sequentialtask_task_processing_latency", metricType: Timer},
+		SequentialTaskTaskProcessingLatencyHistogram:                 {metricName: "sequentialtask_task_processing_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		ParallelTaskSubmitRequest:                                    {metricName: "paralleltask_submit_request", metricType: Counter},
 		ParallelTaskSubmitLatency:                                    {metricName: "paralleltask_submit_latency", metricType: Timer},
 		ParallelTaskSubmitLatencyHistogram:                           {metricName: "paralleltask_submit_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
@@ -3374,12 +3415,14 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		ParallelTaskTaskProcessingLatencyHistogram:                   {metricName: "paralleltask_task_processing_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		PriorityTaskSubmitRequest:                                    {metricName: "prioritytask_submit_request", metricType: Counter},
 		PriorityTaskSubmitLatency:                                    {metricName: "prioritytask_submit_latency", metricType: Timer},
+		PriorityTaskSubmitLatencyHistogram:                           {metricName: "prioritytask_submit_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		KafkaConsumerMessageIn:                                       {metricName: "kafka_consumer_message_in", metricType: Counter},
 		KafkaConsumerMessageAck:                                      {metricName: "kafka_consumer_message_ack", metricType: Counter},
 		KafkaConsumerMessageNack:                                     {metricName: "kafka_consumer_message_nack", metricType: Counter},
 		KafkaConsumerMessageNackDlqErr:                               {metricName: "kafka_consumer_message_nack_dlq_err", metricType: Counter},
 		KafkaConsumerSessionStart:                                    {metricName: "kafka_consumer_session_start", metricType: Counter},
 		GracefulFailoverLatency:                                      {metricName: "graceful_failover_latency", metricType: Timer},
+		GracefulFailoverLatencyHistogram:                             {metricName: "graceful_failover_latency_ns", metricType: Histogram, exponentialBuckets: Mid1ms24h},
 		GracefulFailoverFailure:                                      {metricName: "graceful_failover_failures", metricType: Counter},
 
 		HistoryArchiverArchiveNonRetryableErrorCount:              {metricName: "history_archiver_archive_non_retryable_error", metricType: Counter},
@@ -3417,6 +3460,9 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		CadenceLatencyPerTaskList: {
 			metricName: "cadence_latency_per_tl", metricRollupName: "cadence_latency", metricType: Timer,
 		},
+		CadenceLatencyPerTaskListHistogram: {
+			metricName: "cadence_latency_per_tl_ns", metricRollupName: "cadence_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s,
+		},
 		CadenceErrBadRequestPerTaskListCounter: {
 			metricName: "cadence_errors_bad_request_per_tl", metricRollupName: "cadence_errors_bad_request", metricType: Counter,
 		},
@@ -3493,15 +3539,17 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		DescribeWorkflowStatusError: {metricName: "describe_wf_error", metricType: Counter},
 		DescribeWorkflowStatusCount: {metricName: "describe_wf_status", metricType: Counter},
 
-		AsyncRequestPayloadSize: {metricName: "async_request_payload_size_per_domain", metricRollupName: "async_request_payload_size", metricType: Timer},
+		AsyncRequestPayloadSize:          {metricName: "async_request_payload_size_per_domain", metricRollupName: "async_request_payload_size", metricType: Timer},
+		AsyncRequestPayloadSizeHistogram: {metricName: "async_request_payload_size_per_domain_counts", metricRollupName: "async_request_payload_size_counts", metricType: Histogram, intExponentialBuckets: Mid8B16MB},
 
-		GlobalRatelimiterStartupUsageHistogram: {metricName: "global_ratelimiter_startup_usage_histogram", metricType: Histogram, buckets: GlobalRatelimiterUsageHistogram},
-		GlobalRatelimiterFailingUsageHistogram: {metricName: "global_ratelimiter_failing_usage_histogram", metricType: Histogram, buckets: GlobalRatelimiterUsageHistogram},
-		GlobalRatelimiterGlobalUsageHistogram:  {metricName: "global_ratelimiter_global_usage_histogram", metricType: Histogram, buckets: GlobalRatelimiterUsageHistogram},
-		GlobalRatelimiterUpdateLatency:         {metricName: "global_ratelimiter_update_latency", metricType: Timer},
-		GlobalRatelimiterAllowedRequestsCount:  {metricName: "global_ratelimiter_allowed_requests", metricType: Counter},
-		GlobalRatelimiterRejectedRequestsCount: {metricName: "global_ratelimiter_rejected_requests", metricType: Counter},
-		GlobalRatelimiterQuota:                 {metricName: "global_ratelimiter_quota", metricType: Gauge},
+		GlobalRatelimiterStartupUsageHistogram:  {metricName: "global_ratelimiter_startup_usage_histogram", metricType: Histogram, buckets: GlobalRatelimiterUsageHistogram},
+		GlobalRatelimiterFailingUsageHistogram:  {metricName: "global_ratelimiter_failing_usage_histogram", metricType: Histogram, buckets: GlobalRatelimiterUsageHistogram},
+		GlobalRatelimiterGlobalUsageHistogram:   {metricName: "global_ratelimiter_global_usage_histogram", metricType: Histogram, buckets: GlobalRatelimiterUsageHistogram},
+		GlobalRatelimiterUpdateLatency:          {metricName: "global_ratelimiter_update_latency", metricType: Timer},
+		GlobalRatelimiterUpdateLatencyHistogram: {metricName: "global_ratelimiter_update_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
+		GlobalRatelimiterAllowedRequestsCount:   {metricName: "global_ratelimiter_allowed_requests", metricType: Counter},
+		GlobalRatelimiterRejectedRequestsCount:  {metricName: "global_ratelimiter_rejected_requests", metricType: Counter},
+		GlobalRatelimiterQuota:                  {metricName: "global_ratelimiter_quota", metricType: Gauge},
 
 		GlobalRatelimiterInitialized:       {metricName: "global_ratelimiter_initialized", metricType: Histogram, buckets: GlobalRatelimiterUsageHistogram},
 		GlobalRatelimiterReinitialized:     {metricName: "global_ratelimiter_reinitialized", metricType: Histogram, buckets: GlobalRatelimiterUsageHistogram},
@@ -3614,6 +3662,7 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		TaskBatchCompleteCounter:                                      {metricName: "task_batch_complete_counter", metricType: Counter},
 		TaskBatchCompleteFailure:                                      {metricName: "task_batch_complete_error", metricType: Counter},
 		TaskRedispatchQueuePendingTasksTimer:                          {metricName: "task_redispatch_queue_pending_tasks", metricType: Timer},
+		TaskRedispatchQueuePendingTasksHistogram:                      {metricName: "task_redispatch_queue_pending_tasks_counts", metricType: Histogram, intExponentialBuckets: Mid1To16k},
 		TransferTaskThrottledCounter:                                  {metricName: "transfer_task_throttled_counter", metricType: Counter},
 		TimerTaskThrottledCounter:                                     {metricName: "timer_task_throttled_counter", metricType: Counter},
 		CrossClusterTaskThrottledCounter:                              {metricName: "cross_cluster_task_throttled_counter", metricType: Counter},
@@ -3754,6 +3803,7 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		AcquireLockFailedCounter:                                      {metricName: "acquire_lock_failed", metricType: Counter},
 		WorkflowContextCleared:                                        {metricName: "workflow_context_cleared", metricType: Counter},
 		WorkflowContextLockLatency:                                    {metricName: "workflow_context_lock_latency", metricType: Timer},
+		WorkflowContextLockLatencyHistogram:                           {metricName: "workflow_context_lock_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		MutableStateSize:                                              {metricName: "mutable_state_size", metricType: Timer},
 		MutableStateSizeHistogram:                                     {metricName: "mutable_state_size_counts", metricType: Histogram, intExponentialBuckets: Mid8B16MB},
 		ExecutionInfoSize:                                             {metricName: "execution_info_size", metricType: Timer},
@@ -3860,7 +3910,9 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		ReplicationDLQValidationFailed:                                {metricName: "replication_dlq_validation_failed", metricType: Counter},
 		ReplicationMessageTooLargePerShard:                            {metricName: "replication_message_too_large_per_shard", metricType: Counter},
 		GetReplicationMessagesForShardLatency:                         {metricName: "get_replication_messages_for_shard", metricType: Timer},
+		GetReplicationMessagesForShardLatencyHistogram:                {metricName: "get_replication_messages_for_shard_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		GetDLQReplicationMessagesLatency:                              {metricName: "get_dlq_replication_messages", metricType: Timer},
+		GetDLQReplicationMessagesLatencyHistogram:                     {metricName: "get_dlq_replication_messages_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		EventReapplySkippedCount:                                      {metricName: "event_reapply_skipped_count", metricType: Counter},
 		DirectQueryDispatchLatency:                                    {metricName: "direct_query_dispatch_latency", metricType: Timer},
 		DirectQueryDispatchLatencyHistogram:                           {metricName: "direct_query_dispatch_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
@@ -3875,6 +3927,7 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		DirectQueryDispatchClearStickinessSuccessCount:                {metricName: "direct_query_dispatch_clear_stickiness_success", metricType: Counter},
 		DirectQueryDispatchTimeoutBeforeNonStickyCount:                {metricName: "direct_query_dispatch_timeout_before_non_sticky", metricType: Counter},
 		DecisionTaskQueryLatency:                                      {metricName: "decision_task_query_latency", metricType: Timer},
+		DecisionTaskQueryLatencyHistogram:                             {metricName: "decision_task_query_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		ConsistentQueryPerShard:                                       {metricName: "consistent_query_per_shard", metricType: Counter},
 		ConsistentQueryTimeoutCount:                                   {metricName: "consistent_query_timeout", metricType: Counter},
 		QueryBeforeFirstDecisionCount:                                 {metricName: "query_before_first_decision", metricType: Counter},
@@ -3899,7 +3952,6 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		WorkflowRepairTimeout:                                         {metricName: "workflow_repair_timeout", metricType: Counter},
 		WorkflowRepairDuration:                                        {metricName: "workflow_repair_duration_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		FailoverMarkerCount:                                           {metricName: "failover_marker_count", metricType: Counter},
-		FailoverMarkerReplicationLatency:                              {metricName: "failover_marker_replication_latency", metricType: Timer},
 		FailoverMarkerInsertFailure:                                   {metricName: "failover_marker_insert_failures", metricType: Counter},
 		FailoverMarkerNotificationFailure:                             {metricName: "failover_marker_notification_failures", metricType: Counter},
 		FailoverMarkerUpdateShardFailure:                              {metricName: "failover_marker_update_shard_failures", metricType: Counter},
@@ -3930,84 +3982,91 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		VirtualQueueRunningGauge:                                      {metricName: "virtual_queue_running", metricType: Gauge},
 	},
 	Matching: {
-		PollSuccessPerTaskListCounter:                           {metricName: "poll_success_per_tl", metricRollupName: "poll_success"},
-		PollTimeoutPerTaskListCounter:                           {metricName: "poll_timeouts_per_tl", metricRollupName: "poll_timeouts"},
-		PollSuccessWithSyncPerTaskListCounter:                   {metricName: "poll_success_sync_per_tl", metricRollupName: "poll_success_sync"},
-		LeaseRequestPerTaskListCounter:                          {metricName: "lease_requests_per_tl", metricRollupName: "lease_requests"},
-		LeaseFailurePerTaskListCounter:                          {metricName: "lease_failures_per_tl", metricRollupName: "lease_failures"},
-		ConditionFailedErrorPerTaskListCounter:                  {metricName: "condition_failed_errors_per_tl", metricRollupName: "condition_failed_errors"},
-		RespondQueryTaskFailedPerTaskListCounter:                {metricName: "respond_query_failed_per_tl", metricRollupName: "respond_query_failed"},
-		SyncThrottlePerTaskListCounter:                          {metricName: "sync_throttle_count_per_tl", metricRollupName: "sync_throttle_count"},
-		BufferThrottlePerTaskListCounter:                        {metricName: "buffer_throttle_count_per_tl", metricRollupName: "buffer_throttle_count"},
-		BufferUnknownTaskDispatchError:                          {metricName: "buffer_unknown_task_dispatch_error_per_tl", metricRollupName: "buffer_unknown_task_dispatch_error"},
-		BufferIsolationGroupRedirectCounter:                     {metricName: "buffer_isolation_group_redirected_per_tl", metricRollupName: "buffer_isolation_group_redirected"},
-		BufferIsolationGroupRedirectFailureCounter:              {metricName: "buffer_isolation_group_redirect_failure_per_tl", metricRollupName: "buffer_isolation_group_redirect_failure"},
-		BufferIsolationGroupMisconfiguredCounter:                {metricName: "buffer_isolation_group_misconfigured_failure_per_tl", metricRollupName: "buffer_isolation_group_misconfigured_failure"},
-		ExpiredTasksPerTaskListCounter:                          {metricName: "tasks_expired_per_tl", metricRollupName: "tasks_expired"},
-		ForwardedPerTaskListCounter:                             {metricName: "forwarded_per_tl", metricRollupName: "forwarded"},
-		ForwardTaskCallsPerTaskList:                             {metricName: "forward_task_calls_per_tl", metricRollupName: "forward_task_calls"},
-		ForwardTaskErrorsPerTaskList:                            {metricName: "forward_task_errors_per_tl", metricRollupName: "forward_task_errors"},
-		SyncMatchForwardTaskThrottleErrorPerTasklist:            {metricName: "sync_forward_task_throttle_errors_per_tl", metricRollupName: "sync_forward_task_throttle_errors"},
-		AsyncMatchForwardTaskThrottleErrorPerTasklist:           {metricName: "async_forward_task_throttle_errors_per_tl", metricRollupName: "async_forward_task_throttle_errors"},
-		ForwardQueryCallsPerTaskList:                            {metricName: "forward_query_calls_per_tl", metricRollupName: "forward_query_calls"},
-		ForwardQueryErrorsPerTaskList:                           {metricName: "forward_query_errors_per_tl", metricRollupName: "forward_query_errors"},
-		ForwardPollCallsPerTaskList:                             {metricName: "forward_poll_calls_per_tl", metricRollupName: "forward_poll_calls"},
-		ForwardPollErrorsPerTaskList:                            {metricName: "forward_poll_errors_per_tl", metricRollupName: "forward_poll_errors"},
-		SyncMatchLatencyPerTaskList:                             {metricName: "syncmatch_latency_per_tl", metricRollupName: "syncmatch_latency", metricType: Timer},
-		AsyncMatchLatencyPerTaskList:                            {metricName: "asyncmatch_latency_per_tl", metricRollupName: "asyncmatch_latency", metricType: Timer},
-		AsyncMatchDispatchLatencyPerTaskList:                    {metricName: "asyncmatch_dispatch_latency_per_tl", metricRollupName: "asyncmatch_dispatch_latency", metricType: Timer},
-		AsyncMatchDispatchTimeoutCounterPerTaskList:             {metricName: "asyncmatch_dispatch_timeouts_per_tl", metricRollupName: "asyncmatch_dispatch_timeouts"},
-		ForwardTaskLatencyPerTaskList:                           {metricName: "forward_task_latency_per_tl", metricRollupName: "forward_task_latency"},
-		ForwardQueryLatencyPerTaskList:                          {metricName: "forward_query_latency_per_tl", metricRollupName: "forward_query_latency"},
-		ForwardPollLatencyPerTaskList:                           {metricName: "forward_poll_latency_per_tl", metricRollupName: "forward_poll_latency"},
-		LocalToLocalMatchPerTaskListCounter:                     {metricName: "local_to_local_matches_per_tl", metricRollupName: "local_to_local_matches"},
-		LocalToRemoteMatchPerTaskListCounter:                    {metricName: "local_to_remote_matches_per_tl", metricRollupName: "local_to_remote_matches"},
-		RemoteToLocalMatchPerTaskListCounter:                    {metricName: "remote_to_local_matches_per_tl", metricRollupName: "remote_to_local_matches"},
-		RemoteToRemoteMatchPerTaskListCounter:                   {metricName: "remote_to_remote_matches_per_tl", metricRollupName: "remote_to_remote_matches"},
-		IsolationTaskMatchPerTaskListCounter:                    {metricName: "isolation_task_matches_per_tl", metricType: Counter},
-		IsolationSuccessPerTaskListCounter:                      {metricName: "isolation_success_per_tl", metricRollupName: "isolation_success"},
-		PollerPerTaskListCounter:                                {metricName: "poller_count_per_tl", metricRollupName: "poller_count"},
-		PollerInvalidIsolationGroupCounter:                      {metricName: "poller_invalid_isolation_group_per_tl", metricType: Counter},
-		TaskListPartitionUpdateFailedCounter:                    {metricName: "tasklist_partition_update_failed_per_tl", metricType: Counter},
-		TaskListManagersGauge:                                   {metricName: "tasklist_managers", metricType: Gauge},
-		TaskLagPerTaskListGauge:                                 {metricName: "task_lag_per_tl", metricType: Gauge},
-		TaskBacklogPerTaskListGauge:                             {metricName: "task_backlog_per_tl", metricType: Gauge},
-		TaskCountPerTaskListGauge:                               {metricName: "task_count_per_tl", metricType: Gauge},
-		RateLimitPerTaskListGauge:                               {metricName: "rate_limit_per_tl", metricType: Gauge},
-		SyncMatchLocalPollLatencyPerTaskList:                    {metricName: "syncmatch_local_poll_latency_per_tl", metricRollupName: "syncmatch_local_poll_latency"},
-		SyncMatchForwardPollLatencyPerTaskList:                  {metricName: "syncmatch_forward_poll_latency_per_tl", metricRollupName: "syncmatch_forward_poll_latency"},
-		AsyncMatchLocalPollCounterPerTaskList:                   {metricName: "asyncmatch_local_poll_per_tl", metricRollupName: "asyncmatch_local_poll"},
-		AsyncMatchLocalPollAttemptPerTaskList:                   {metricName: "asyncmatch_local_poll_attempt_per_tl", metricRollupName: "asyncmatch_local_poll_attempt", metricType: Timer},
-		AsyncMatchLocalPollLatencyPerTaskList:                   {metricName: "asyncmatch_local_poll_latency_per_tl", metricRollupName: "asyncmatch_local_poll_latency"},
-		AsyncMatchForwardPollCounterPerTaskList:                 {metricName: "asyncmatch_forward_poll_per_tl", metricRollupName: "asyncmatch_forward_poll"},
-		AsyncMatchForwardPollAttemptPerTaskList:                 {metricName: "asyncmatch_forward_poll_attempt_per_tl", metricRollupName: "asyncmatch_forward_poll_attempt", metricType: Timer},
-		AsyncMatchForwardPollLatencyPerTaskList:                 {metricName: "asyncmatch_forward_poll_latency_per_tl", metricRollupName: "asyncmatch_forward_poll_latency"},
-		AsyncMatchLocalPollAfterForwardFailedCounterPerTaskList: {metricName: "asyncmatch_local_poll_after_forward_failed_per_tl", metricRollupName: "asyncmatch_local_poll_after_forward_failed"},
-		AsyncMatchLocalPollAfterForwardFailedAttemptPerTaskList: {metricName: "asyncmatch_local_poll_after_forward_failed_attempt_per_tl", metricRollupName: "asyncmatch_local_poll_after_forward_failed_attempt", metricType: Timer},
-		AsyncMatchLocalPollAfterForwardFailedLatencyPerTaskList: {metricName: "asyncmatch_local_poll_after_forward_failed_latency_per_tl", metricRollupName: "asyncmatch_local_poll_after_forward_failed_latency"},
-		PollLocalMatchLatencyPerTaskList:                        {metricName: "poll_local_match_latency_per_tl", metricRollupName: "poll_local_match_latency", metricType: Timer},
-		PollForwardMatchLatencyPerTaskList:                      {metricName: "poll_forward_match_latency_per_tl", metricRollupName: "poll_forward_match_latency", metricType: Timer},
-		PollLocalMatchAfterForwardFailedLatencyPerTaskList:      {metricName: "poll_local_match_after_forward_failed_latency_per_tl", metricRollupName: "poll_local_match_after_forward_failed_latency", metricType: Timer},
-		PollDecisionTaskAlreadyStartedCounterPerTaskList:        {metricName: "poll_decision_task_already_started_per_tl", metricType: Counter},
-		PollActivityTaskAlreadyStartedCounterPerTaskList:        {metricName: "poll_activity_task_already_started_per_tl", metricType: Counter},
-		TaskListReadWritePartitionMismatchGauge:                 {metricName: "tasklist_read_write_partition_mismatch", metricType: Gauge},
-		TaskListPollerPartitionMismatchGauge:                    {metricName: "tasklist_poller_partition_mismatch", metricType: Gauge},
-		EstimatedAddTaskQPSGauge:                                {metricName: "estimated_add_task_qps_per_tl", metricType: Gauge},
-		TaskListPartitionUpscaleThresholdGauge:                  {metricName: "tasklist_partition_upscale_threshold", metricType: Gauge},
-		TaskListPartitionDownscaleThresholdGauge:                {metricName: "tasklist_partition_downscale_threshold", metricType: Gauge},
-		StandbyClusterTasksCompletedCounterPerTaskList:          {metricName: "standby_cluster_tasks_completed_per_tl", metricType: Counter},
-		StandbyClusterTasksNotStartedCounterPerTaskList:         {metricName: "standby_cluster_tasks_not_started_per_tl", metricType: Counter},
-		StandbyClusterTasksCompletionFailurePerTaskList:         {metricName: "standby_cluster_tasks_completion_failure_per_tl", metricType: Counter},
-		TaskIsolationLeakPerTaskList:                            {metricName: "task_isolation_leak_per_tl", metricRollupName: "task_isolation_leak"},
-		PartitionUpscale:                                        {metricName: "partition_upscale_per_tl", metricRollupName: "partition_upscale"},
-		PartitionDownscale:                                      {metricName: "partition_downscale_per_tl", metricRollupName: "partition_downscale"},
-		PartitionDrained:                                        {metricName: "partition_drained_per_tl", metricRollupName: "partition_drained"},
-		IsolationRebalance:                                      {metricName: "isolation_rebalance_per_tl", metricRollupName: "isolation_rebalance"},
-		IsolationGroupStartedPolling:                            {metricName: "ig_started_polling_per_tl", metricRollupName: "ig_started_polling"},
-		IsolationGroupStoppedPolling:                            {metricName: "ig_stopped_polling_per_tl", metricRollupName: "ig_stopped_polling"},
-		IsolationGroupUpscale:                                   {metricName: "ig_upscale_per_tl", metricRollupName: "ig_upscale"},
-		IsolationGroupDownscale:                                 {metricName: "ig_downscale_per_tl", metricRollupName: "ig_downscale"},
-		IsolationGroupPartitionsGauge:                           {metricName: "ig_partitions_per_tl", metricType: Gauge},
+		PollSuccessPerTaskListCounter:                                    {metricName: "poll_success_per_tl", metricRollupName: "poll_success"},
+		PollTimeoutPerTaskListCounter:                                    {metricName: "poll_timeouts_per_tl", metricRollupName: "poll_timeouts"},
+		PollSuccessWithSyncPerTaskListCounter:                            {metricName: "poll_success_sync_per_tl", metricRollupName: "poll_success_sync"},
+		LeaseRequestPerTaskListCounter:                                   {metricName: "lease_requests_per_tl", metricRollupName: "lease_requests"},
+		LeaseFailurePerTaskListCounter:                                   {metricName: "lease_failures_per_tl", metricRollupName: "lease_failures"},
+		ConditionFailedErrorPerTaskListCounter:                           {metricName: "condition_failed_errors_per_tl", metricRollupName: "condition_failed_errors"},
+		RespondQueryTaskFailedPerTaskListCounter:                         {metricName: "respond_query_failed_per_tl", metricRollupName: "respond_query_failed"},
+		SyncThrottlePerTaskListCounter:                                   {metricName: "sync_throttle_count_per_tl", metricRollupName: "sync_throttle_count"},
+		BufferThrottlePerTaskListCounter:                                 {metricName: "buffer_throttle_count_per_tl", metricRollupName: "buffer_throttle_count"},
+		BufferUnknownTaskDispatchError:                                   {metricName: "buffer_unknown_task_dispatch_error_per_tl", metricRollupName: "buffer_unknown_task_dispatch_error"},
+		BufferIsolationGroupRedirectCounter:                              {metricName: "buffer_isolation_group_redirected_per_tl", metricRollupName: "buffer_isolation_group_redirected"},
+		BufferIsolationGroupRedirectFailureCounter:                       {metricName: "buffer_isolation_group_redirect_failure_per_tl", metricRollupName: "buffer_isolation_group_redirect_failure"},
+		BufferIsolationGroupMisconfiguredCounter:                         {metricName: "buffer_isolation_group_misconfigured_failure_per_tl", metricRollupName: "buffer_isolation_group_misconfigured_failure"},
+		ExpiredTasksPerTaskListCounter:                                   {metricName: "tasks_expired_per_tl", metricRollupName: "tasks_expired"},
+		ForwardedPerTaskListCounter:                                      {metricName: "forwarded_per_tl", metricRollupName: "forwarded"},
+		ForwardTaskCallsPerTaskList:                                      {metricName: "forward_task_calls_per_tl", metricRollupName: "forward_task_calls"},
+		ForwardTaskErrorsPerTaskList:                                     {metricName: "forward_task_errors_per_tl", metricRollupName: "forward_task_errors"},
+		SyncMatchForwardTaskThrottleErrorPerTasklist:                     {metricName: "sync_forward_task_throttle_errors_per_tl", metricRollupName: "sync_forward_task_throttle_errors"},
+		AsyncMatchForwardTaskThrottleErrorPerTasklist:                    {metricName: "async_forward_task_throttle_errors_per_tl", metricRollupName: "async_forward_task_throttle_errors"},
+		ForwardQueryCallsPerTaskList:                                     {metricName: "forward_query_calls_per_tl", metricRollupName: "forward_query_calls"},
+		ForwardQueryErrorsPerTaskList:                                    {metricName: "forward_query_errors_per_tl", metricRollupName: "forward_query_errors"},
+		ForwardPollCallsPerTaskList:                                      {metricName: "forward_poll_calls_per_tl", metricRollupName: "forward_poll_calls"},
+		ForwardPollErrorsPerTaskList:                                     {metricName: "forward_poll_errors_per_tl", metricRollupName: "forward_poll_errors"},
+		SyncMatchLatencyPerTaskList:                                      {metricName: "syncmatch_latency_per_tl", metricRollupName: "syncmatch_latency", metricType: Timer},
+		SyncMatchLatencyPerTaskListHistogram:                             {metricName: "syncmatch_latency_per_tl_ns", metricRollupName: "syncmatch_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
+		AsyncMatchLatencyPerTaskList:                                     {metricName: "asyncmatch_latency_per_tl", metricRollupName: "asyncmatch_latency", metricType: Timer},
+		AsyncMatchLatencyPerTaskListHistogram:                            {metricName: "asyncmatch_latency_per_tl_ns", metricRollupName: "asyncmatch_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
+		AsyncMatchDispatchTimeoutCounterPerTaskList:                      {metricName: "asyncmatch_dispatch_timeouts_per_tl", metricRollupName: "asyncmatch_dispatch_timeouts"},
+		ForwardTaskLatencyPerTaskList:                                    {metricName: "forward_task_latency_per_tl", metricRollupName: "forward_task_latency"},
+		ForwardQueryLatencyPerTaskList:                                   {metricName: "forward_query_latency_per_tl", metricRollupName: "forward_query_latency"},
+		ForwardPollLatencyPerTaskList:                                    {metricName: "forward_poll_latency_per_tl", metricRollupName: "forward_poll_latency"},
+		LocalToLocalMatchPerTaskListCounter:                              {metricName: "local_to_local_matches_per_tl", metricRollupName: "local_to_local_matches"},
+		LocalToRemoteMatchPerTaskListCounter:                             {metricName: "local_to_remote_matches_per_tl", metricRollupName: "local_to_remote_matches"},
+		RemoteToLocalMatchPerTaskListCounter:                             {metricName: "remote_to_local_matches_per_tl", metricRollupName: "remote_to_local_matches"},
+		RemoteToRemoteMatchPerTaskListCounter:                            {metricName: "remote_to_remote_matches_per_tl", metricRollupName: "remote_to_remote_matches"},
+		IsolationTaskMatchPerTaskListCounter:                             {metricName: "isolation_task_matches_per_tl", metricType: Counter},
+		IsolationSuccessPerTaskListCounter:                               {metricName: "isolation_success_per_tl", metricRollupName: "isolation_success"},
+		PollerPerTaskListCounter:                                         {metricName: "poller_count_per_tl", metricRollupName: "poller_count"},
+		PollerInvalidIsolationGroupCounter:                               {metricName: "poller_invalid_isolation_group_per_tl", metricType: Counter},
+		TaskListPartitionUpdateFailedCounter:                             {metricName: "tasklist_partition_update_failed_per_tl", metricType: Counter},
+		TaskListManagersGauge:                                            {metricName: "tasklist_managers", metricType: Gauge},
+		TaskLagPerTaskListGauge:                                          {metricName: "task_lag_per_tl", metricType: Gauge},
+		TaskBacklogPerTaskListGauge:                                      {metricName: "task_backlog_per_tl", metricType: Gauge},
+		TaskCountPerTaskListGauge:                                        {metricName: "task_count_per_tl", metricType: Gauge},
+		RateLimitPerTaskListGauge:                                        {metricName: "rate_limit_per_tl", metricType: Gauge},
+		SyncMatchLocalPollLatencyPerTaskList:                             {metricName: "syncmatch_local_poll_latency_per_tl", metricRollupName: "syncmatch_local_poll_latency"},
+		SyncMatchForwardPollLatencyPerTaskList:                           {metricName: "syncmatch_forward_poll_latency_per_tl", metricRollupName: "syncmatch_forward_poll_latency"},
+		AsyncMatchLocalPollCounterPerTaskList:                            {metricName: "asyncmatch_local_poll_per_tl", metricRollupName: "asyncmatch_local_poll"},
+		AsyncMatchLocalPollAttemptPerTaskList:                            {metricName: "asyncmatch_local_poll_attempt_per_tl", metricRollupName: "asyncmatch_local_poll_attempt", metricType: Timer},
+		AsyncMatchLocalPollAttemptPerTaskListHistogram:                   {metricName: "asyncmatch_local_poll_attempt_per_tl_counts", metricRollupName: "asyncmatch_local_poll_attempt_counts", metricType: Histogram, intExponentialBuckets: Mid1To16k},
+		AsyncMatchLocalPollLatencyPerTaskList:                            {metricName: "asyncmatch_local_poll_latency_per_tl", metricRollupName: "asyncmatch_local_poll_latency"},
+		AsyncMatchForwardPollCounterPerTaskList:                          {metricName: "asyncmatch_forward_poll_per_tl", metricRollupName: "asyncmatch_forward_poll"},
+		AsyncMatchForwardPollAttemptPerTaskList:                          {metricName: "asyncmatch_forward_poll_attempt_per_tl", metricRollupName: "asyncmatch_forward_poll_attempt", metricType: Timer},
+		AsyncMatchForwardPollAttemptPerTaskListHistogram:                 {metricName: "asyncmatch_forward_poll_attempt_per_tl_counts", metricRollupName: "asyncmatch_forward_poll_attempt_counts", metricType: Histogram, intExponentialBuckets: Mid1To16k},
+		AsyncMatchForwardPollLatencyPerTaskList:                          {metricName: "asyncmatch_forward_poll_latency_per_tl", metricRollupName: "asyncmatch_forward_poll_latency"},
+		AsyncMatchLocalPollAfterForwardFailedCounterPerTaskList:          {metricName: "asyncmatch_local_poll_after_forward_failed_per_tl", metricRollupName: "asyncmatch_local_poll_after_forward_failed"},
+		AsyncMatchLocalPollAfterForwardFailedAttemptPerTaskList:          {metricName: "asyncmatch_local_poll_after_forward_failed_attempt_per_tl", metricRollupName: "asyncmatch_local_poll_after_forward_failed_attempt", metricType: Timer},
+		AsyncMatchLocalPollAfterForwardFailedAttemptPerTaskListHistogram: {metricName: "asyncmatch_local_poll_after_forward_failed_attempt_per_tl_counts", metricRollupName: "asyncmatch_local_poll_after_forward_failed_attempt_counts", metricType: Histogram, intExponentialBuckets: Mid1To16k},
+		AsyncMatchLocalPollAfterForwardFailedLatencyPerTaskList:          {metricName: "asyncmatch_local_poll_after_forward_failed_latency_per_tl", metricRollupName: "asyncmatch_local_poll_after_forward_failed_latency"},
+		PollLocalMatchLatencyPerTaskList:                                 {metricName: "poll_local_match_latency_per_tl", metricRollupName: "poll_local_match_latency", metricType: Timer},
+		PollLocalMatchLatencyPerTaskListHistogram:                        {metricName: "poll_local_match_latency_per_tl_ns", metricRollupName: "poll_local_match_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
+		PollForwardMatchLatencyPerTaskList:                               {metricName: "poll_forward_match_latency_per_tl", metricRollupName: "poll_forward_match_latency", metricType: Timer},
+		PollForwardMatchLatencyPerTaskListHistogram:                      {metricName: "poll_forward_match_latency_per_tl_ns", metricRollupName: "poll_forward_match_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
+		PollLocalMatchAfterForwardFailedLatencyPerTaskList:               {metricName: "poll_local_match_after_forward_failed_latency_per_tl", metricRollupName: "poll_local_match_after_forward_failed_latency", metricType: Timer},
+		PollLocalMatchAfterForwardFailedLatencyPerTaskListHistogram:      {metricName: "poll_local_match_after_forward_failed_latency_per_tl_ns", metricRollupName: "poll_local_match_after_forward_failed_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
+		PollDecisionTaskAlreadyStartedCounterPerTaskList:                 {metricName: "poll_decision_task_already_started_per_tl", metricType: Counter},
+		PollActivityTaskAlreadyStartedCounterPerTaskList:                 {metricName: "poll_activity_task_already_started_per_tl", metricType: Counter},
+		TaskListReadWritePartitionMismatchGauge:                          {metricName: "tasklist_read_write_partition_mismatch", metricType: Gauge},
+		TaskListPollerPartitionMismatchGauge:                             {metricName: "tasklist_poller_partition_mismatch", metricType: Gauge},
+		EstimatedAddTaskQPSGauge:                                         {metricName: "estimated_add_task_qps_per_tl", metricType: Gauge},
+		TaskListPartitionUpscaleThresholdGauge:                           {metricName: "tasklist_partition_upscale_threshold", metricType: Gauge},
+		TaskListPartitionDownscaleThresholdGauge:                         {metricName: "tasklist_partition_downscale_threshold", metricType: Gauge},
+		StandbyClusterTasksCompletedCounterPerTaskList:                   {metricName: "standby_cluster_tasks_completed_per_tl", metricType: Counter},
+		StandbyClusterTasksNotStartedCounterPerTaskList:                  {metricName: "standby_cluster_tasks_not_started_per_tl", metricType: Counter},
+		StandbyClusterTasksCompletionFailurePerTaskList:                  {metricName: "standby_cluster_tasks_completion_failure_per_tl", metricType: Counter},
+		TaskIsolationLeakPerTaskList:                                     {metricName: "task_isolation_leak_per_tl", metricRollupName: "task_isolation_leak"},
+		PartitionUpscale:                                                 {metricName: "partition_upscale_per_tl", metricRollupName: "partition_upscale"},
+		PartitionDownscale:                                               {metricName: "partition_downscale_per_tl", metricRollupName: "partition_downscale"},
+		PartitionDrained:                                                 {metricName: "partition_drained_per_tl", metricRollupName: "partition_drained"},
+		IsolationRebalance:                                               {metricName: "isolation_rebalance_per_tl", metricRollupName: "isolation_rebalance"},
+		IsolationGroupStartedPolling:                                     {metricName: "ig_started_polling_per_tl", metricRollupName: "ig_started_polling"},
+		IsolationGroupStoppedPolling:                                     {metricName: "ig_stopped_polling_per_tl", metricRollupName: "ig_stopped_polling"},
+		IsolationGroupUpscale:                                            {metricName: "ig_upscale_per_tl", metricRollupName: "ig_upscale"},
+		IsolationGroupDownscale:                                          {metricName: "ig_downscale_per_tl", metricRollupName: "ig_downscale"},
+		IsolationGroupPartitionsGauge:                                    {metricName: "ig_partitions_per_tl", metricType: Gauge},
 	},
 	Worker: {
 		ReplicatorMessages:                            {metricName: "replicator_messages"},
@@ -4020,8 +4079,10 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		ESProcessorFailures:                           {metricName: "es_processor_errors"},
 		ESProcessorCorruptedData:                      {metricName: "es_processor_corrupted_data"},
 		ESProcessorProcessMsgLatency:                  {metricName: "es_processor_process_msg_latency", metricType: Timer},
+		ESProcessorProcessMsgLatencyHistogram:         {metricName: "es_processor_process_msg_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		IndexProcessorCorruptedData:                   {metricName: "index_processor_corrupted_data"},
 		IndexProcessorProcessMsgLatency:               {metricName: "index_processor_process_msg_latency", metricType: Timer},
+		IndexProcessorProcessMsgLatencyHistogram:      {metricName: "index_processor_process_msg_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		ArchiverNonRetryableErrorCount:                {metricName: "archiver_non_retryable_error"},
 		ArchiverStartedCount:                          {metricName: "archiver_started"},
 		ArchiverStoppedCount:                          {metricName: "archiver_stopped"},
@@ -4088,12 +4149,14 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		ESAnalyzerNumLongRunningWorkflows:             {metricName: "es_analyzer_num_long_running_workflows", metricType: Counter},
 		AsyncWorkflowConsumerCount:                    {metricName: "async_workflow_consumer_count", metricType: Gauge},
 		AsyncWorkflowProcessMsgLatency:                {metricName: "async_workflow_process_msg_latency", metricType: Timer},
+		AsyncWorkflowProcessMsgLatencyHistogram:       {metricName: "async_workflow_process_msg_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		AsyncWorkflowFailureCorruptMsgCount:           {metricName: "async_workflow_failure_corrupt_msg", metricType: Counter},
 		AsyncWorkflowFailureByFrontendCount:           {metricName: "async_workflow_failure_by_frontend", metricType: Counter},
 		AsyncWorkflowSuccessCount:                     {metricName: "async_workflow_success", metricType: Counter},
 		DiagnosticsWorkflowStartedCount:               {metricName: "diagnostics_workflow_count", metricType: Counter},
 		DiagnosticsWorkflowSuccess:                    {metricName: "diagnostics_workflow_success", metricType: Counter},
 		DiagnosticsWorkflowExecutionLatency:           {metricName: "diagnostics_workflow_execution_latency", metricType: Timer},
+		DiagnosticsWorkflowExecutionLatencyHistogram:  {metricName: "diagnostics_workflow_execution_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		SchedulerWorkerActiveGauge:                    {metricName: "scheduler_worker_active_gauge", metricType: Gauge},
 		SchedulerWorkerStartedCount:                   {metricName: "scheduler_worker_started_count", metricType: Counter},
 		SchedulerWorkerStoppedCount:                   {metricName: "scheduler_worker_stopped_count", metricType: Counter},
@@ -4115,6 +4178,7 @@ var MetricDefs = map[ServiceIdx]map[MetricIdx]metricDefinition{
 		ShardDistributorErrContextTimeoutCounter:        {metricName: "shard_distributor_err_context_timeout", metricType: Counter},
 		ShardDistributorFailures:                        {metricName: "shard_distributor_failures", metricType: Counter},
 		ShardDistributorLatency:                         {metricName: "shard_distributor_latency", metricType: Timer},
+		ShardDistributorLatencyHistogram:                {metricName: "shard_distributor_latency_ns", metricType: Histogram, exponentialBuckets: Low1ms100s},
 		ShardDistributorErrNamespaceNotFound:            {metricName: "shard_distributor_err_namespace_not_found", metricType: Counter},
 		ShardDistributorErrShardNotFound:                {metricName: "shard_distributor_err_shard_not_found", metricType: Counter},
 		ShardDistributorAssignLoopShardRebalanceLatency: {metricName: "shard_distrubutor_shard_assign_latency", metricType: Histogram},
diff --git a/common/metrics/scope.go b/common/metrics/scope.go
index 43965b6910e..9f14d9fb8c4 100644
--- a/common/metrics/scope.go
+++ b/common/metrics/scope.go
@@ -123,10 +123,15 @@ func (m *metricsScope) RecordHistogramDuration(id MetricIdx, value time.Duration
 	if m.migrationConfig.Histogram.EmitHistogram(def.metricName.String()) {
 		m.scope.Histogram(def.metricName.String(), m.getBuckets(id)).RecordDuration(value)
 	}
-	if !def.metricRollupName.Empty() {
+	switch {
+	case !def.metricRollupName.Empty():
 		if m.migrationConfig.Histogram.EmitHistogram(def.metricRollupName.String()) {
 			m.rootScope.Histogram(def.metricRollupName.String(), m.getBuckets(id)).RecordDuration(value)
 		}
+	case m.isDomainTagged:
+		if m.migrationConfig.Histogram.EmitHistogram(def.metricName.String()) {
+			m.scope.Tagged(map[string]string{domain: allValue}).Histogram(def.metricName.String(), m.getBuckets(id)).RecordDuration(value)
+		}
 	}
 }
 
diff --git a/common/metrics/scope_test.go b/common/metrics/scope_test.go
index c9927f6b941..c37eee29b2c 100644
--- a/common/metrics/scope_test.go
+++ b/common/metrics/scope_test.go
@@ -426,3 +426,38 @@ func TestGaugeRollupUsesRootScope(t *testing.T) {
 	}
 	assert.True(t, foundInRoot, "rollup gauge should be emitted on root scope")
 }
+
+func TestRecordHistogramDurationDomainTaggedDualEmit(t *testing.T) {
+	rootScope := tally.NewTestScope("", nil)
+	childScope := tally.NewTestScope("", map[string]string{domain: "test-domain"})
+
+	defs := map[MetricIdx]metricDefinition{
+		CadenceLatencyHistogram: {
+			metricName: "cadence_latency_ns",
+			metricType: Histogram,
+		},
+	}
+
+	scope := newMetricsScope(rootScope, childScope, defs, true, MigrationConfig{
+		Histogram: HistogramMigration{Default: "histogram"},
+	})
+
+	scope.RecordHistogramDuration(CadenceLatencyHistogram, 5*time.Millisecond)
+
+	// per-domain series emitted on child scope
+	domainFound := false
+	allFound := false
+	for _, h := range childScope.Snapshot().Histograms() {
+		if h.Name() == "cadence_latency_ns" {
+			tags := h.Tags()
+			if tags[domain] == "test-domain" {
+				domainFound = true
+			}
+			if tags[domain] == allValue {
+				allFound = true
+			}
+		}
+	}
+	assert.True(t, domainFound, "per-domain histogram series should be emitted")
+	assert.True(t, allFound, "aggregate domain=all histogram series should be emitted")
+}
diff --git a/common/persistence/pinot/pinot_visibility_metric_clients.go b/common/persistence/pinot/pinot_visibility_metric_clients.go
index 98011ea737c..a3dbfe827fd 100644
--- a/common/persistence/pinot/pinot_visibility_metric_clients.go
+++ b/common/persistence/pinot/pinot_visibility_metric_clients.go
@@ -22,6 +22,7 @@ package pinotvisibility
 
 import (
 	"context"
+	"time"
 
 	"github.com/uber/cadence/common/log"
 	"github.com/uber/cadence/common/log/tag"
@@ -59,8 +60,12 @@ func (p *pinotVisibilityMetricsClient) RecordWorkflowExecutionStarted(
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotRecordWorkflowExecutionStartedScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
 
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	err := p.persistence.RecordWorkflowExecutionStarted(ctx, request)
 
 	if err != nil {
@@ -78,8 +83,12 @@ func (p *pinotVisibilityMetricsClient) RecordWorkflowExecutionClosed(
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotRecordWorkflowExecutionClosedScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
 
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	err := p.persistence.RecordWorkflowExecutionClosed(ctx, request)
 
 	if err != nil {
@@ -97,8 +106,12 @@ func (p *pinotVisibilityMetricsClient) RecordWorkflowExecutionUninitialized(
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotRecordWorkflowExecutionUninitializedScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
 
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	err := p.persistence.RecordWorkflowExecutionUninitialized(ctx, request)
 
 	if err != nil {
@@ -116,8 +129,12 @@ func (p *pinotVisibilityMetricsClient) UpsertWorkflowExecution(
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotUpsertWorkflowExecutionScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
 
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	err := p.persistence.UpsertWorkflowExecution(ctx, request)
 
 	if err != nil {
@@ -135,8 +152,12 @@ func (p *pinotVisibilityMetricsClient) ListOpenWorkflowExecutions(
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotListOpenWorkflowExecutionsScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
 
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	response, err := p.persistence.ListOpenWorkflowExecutions(ctx, request)
 
 	if err != nil {
@@ -154,8 +175,12 @@ func (p *pinotVisibilityMetricsClient) ListClosedWorkflowExecutions(
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotListClosedWorkflowExecutionsScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
 
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	response, err := p.persistence.ListClosedWorkflowExecutions(ctx, request)
 
 	if err != nil {
@@ -173,8 +198,12 @@ func (p *pinotVisibilityMetricsClient) ListOpenWorkflowExecutionsByType(
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotListOpenWorkflowExecutionsByTypeScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
 
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	response, err := p.persistence.ListOpenWorkflowExecutionsByType(ctx, request)
 
 	if err != nil {
@@ -191,8 +220,12 @@ func (p *pinotVisibilityMetricsClient) ListClosedWorkflowExecutionsByType(
 
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotListClosedWorkflowExecutionsByTypeScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	response, err := p.persistence.ListClosedWorkflowExecutionsByType(ctx, request)
 
 	if err != nil {
@@ -209,8 +242,12 @@ func (p *pinotVisibilityMetricsClient) ListOpenWorkflowExecutionsByWorkflowID(
 
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotListOpenWorkflowExecutionsByWorkflowIDScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	response, err := p.persistence.ListOpenWorkflowExecutionsByWorkflowID(ctx, request)
 
 	if err != nil {
@@ -227,8 +264,12 @@ func (p *pinotVisibilityMetricsClient) ListClosedWorkflowExecutionsByWorkflowID(
 
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotListClosedWorkflowExecutionsByWorkflowIDScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	response, err := p.persistence.ListClosedWorkflowExecutionsByWorkflowID(ctx, request)
 
 	if err != nil {
@@ -245,8 +286,12 @@ func (p *pinotVisibilityMetricsClient) ListClosedWorkflowExecutionsByStatus(
 
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotListClosedWorkflowExecutionsByStatusScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	response, err := p.persistence.ListClosedWorkflowExecutionsByStatus(ctx, request)
 
 	if err != nil {
@@ -263,8 +308,12 @@ func (p *pinotVisibilityMetricsClient) GetClosedWorkflowExecution(
 
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotGetClosedWorkflowExecutionScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	response, err := p.persistence.GetClosedWorkflowExecution(ctx, request)
 
 	if err != nil {
@@ -281,8 +330,12 @@ func (p *pinotVisibilityMetricsClient) ListWorkflowExecutions(
 
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotListWorkflowExecutionsScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	response, err := p.persistence.ListWorkflowExecutions(ctx, request)
 
 	if err != nil {
@@ -299,8 +352,12 @@ func (p *pinotVisibilityMetricsClient) ScanWorkflowExecutions(
 
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotScanWorkflowExecutionsScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	response, err := p.persistence.ScanWorkflowExecutions(ctx, request)
 
 	if err != nil {
@@ -317,8 +374,12 @@ func (p *pinotVisibilityMetricsClient) CountWorkflowExecutions(
 
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotCountWorkflowExecutionsScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	response, err := p.persistence.CountWorkflowExecutions(ctx, request)
 
 	if err != nil {
@@ -335,8 +396,12 @@ func (p *pinotVisibilityMetricsClient) DeleteWorkflowExecution(
 
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotDeleteWorkflowExecutionsScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	err := p.persistence.DeleteWorkflowExecution(ctx, request)
 
 	if err != nil {
@@ -353,8 +418,12 @@ func (p *pinotVisibilityMetricsClient) DeleteUninitializedWorkflowExecution(
 
 	scopeWithDomainTag := p.metricClient.Scope(metrics.PinotDeleteWorkflowExecutionsScope, metrics.DomainTag(request.Domain))
 	scopeWithDomainTag.IncCounter(metrics.PinotRequestsPerDomain)
+	pinotStart := time.Now()
 	sw := scopeWithDomainTag.StartTimer(metrics.PinotLatencyPerDomain)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scopeWithDomainTag.RecordHistogramDuration(metrics.PinotLatencyPerDomainHistogram, time.Since(pinotStart))
+	}()
 	err := p.persistence.DeleteWorkflowExecution(ctx, request)
 
 	if err != nil {
diff --git a/common/persistence/pinot/pinot_visibility_metric_clients_test.go b/common/persistence/pinot/pinot_visibility_metric_clients_test.go
index b79fa67a417..50e601372ea 100644
--- a/common/persistence/pinot/pinot_visibility_metric_clients_test.go
+++ b/common/persistence/pinot/pinot_visibility_metric_clients_test.go
@@ -128,6 +128,7 @@ func TestMetricClientRecordWorkflowExecutionStarted(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.producerMockAffordance(mockProducer)
 			test.scopeMockAffordance(mockScope)
 
@@ -214,6 +215,7 @@ func TestMetricClientRecordWorkflowExecutionClosed(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.producerMockAffordance(mockProducer)
 			test.scopeMockAffordance(mockScope)
 
@@ -290,6 +292,7 @@ func TestMetricClientRecordWorkflowExecutionUninitialized(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.producerMockAffordance(mockProducer)
 			test.scopeMockAffordance(mockScope)
 
@@ -366,6 +369,7 @@ func TestMetricClientUpsertWorkflowExecution(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.producerMockAffordance(mockProducer)
 			test.scopeMockAffordance(mockScope)
 
@@ -440,6 +444,7 @@ func TestMetricClientListOpenWorkflowExecutions(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.pinotClientMockAffordance(mockPinotClient)
 			test.scopeMockAffordance(mockScope)
 
@@ -514,6 +519,7 @@ func TestMetricClientListClosedWorkflowExecutions(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.pinotClientMockAffordance(mockPinotClient)
 			test.scopeMockAffordance(mockScope)
 
@@ -589,6 +595,7 @@ func TestMetricClientListOpenWorkflowExecutionsByType(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.pinotClientMockAffordance(mockPinotClient)
 			test.scopeMockAffordance(mockScope)
 
@@ -664,6 +671,7 @@ func TestMetricClientListClosedWorkflowExecutionsByType(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.pinotClientMockAffordance(mockPinotClient)
 			test.scopeMockAffordance(mockScope)
 
@@ -738,6 +746,7 @@ func TestMetricClientListOpenWorkflowExecutionsByWorkflowID(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.pinotClientMockAffordance(mockPinotClient)
 			test.scopeMockAffordance(mockScope)
 
@@ -812,6 +821,7 @@ func TestMetricClientListClosedWorkflowExecutionsByWorkflowID(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.pinotClientMockAffordance(mockPinotClient)
 			test.scopeMockAffordance(mockScope)
 
@@ -890,6 +900,7 @@ func TestMetricClientListClosedWorkflowExecutionsByStatus(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.pinotClientMockAffordance(mockPinotClient)
 			test.scopeMockAffordance(mockScope)
 
@@ -966,6 +977,7 @@ func TestMetricClientGetClosedWorkflowExecution(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.pinotClientMockAffordance(mockPinotClient)
 			test.scopeMockAffordance(mockScope)
 
@@ -1038,6 +1050,7 @@ func TestMetricClientListWorkflowExecutions(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.pinotClientMockAffordance(mockPinotClient)
 			test.scopeMockAffordance(mockScope)
 
@@ -1106,6 +1119,7 @@ func TestMetricClientScanWorkflowExecutions(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.pinotClientMockAffordance(mockPinotClient)
 			test.scopeMockAffordance(mockScope)
 
@@ -1174,6 +1188,7 @@ func TestMetricClientCountWorkflowExecutions(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.pinotClientMockAffordance(mockPinotClient)
 			test.scopeMockAffordance(mockScope)
 
@@ -1250,6 +1265,7 @@ func TestMetricClientDeleteWorkflowExecution(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.producerMockAffordance(mockProducer)
 			test.scopeMockAffordance(mockScope)
 
@@ -1322,6 +1338,7 @@ func TestMetricClientDeleteUninitializedWorkflowExecution(t *testing.T) {
 			// mock behaviors
 			mockMetricClient.On("Scope", mock.Anything, mock.Anything).Return(mockScope).Once()
 			mockScope.On("StartTimer", mock.Anything, mock.Anything).Return(testStopwatch).Once()
+			mockScope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Once()
 			test.producerMockAffordance(mockProducer)
 			test.scopeMockAffordance(mockScope)
 
diff --git a/common/quotas/global/collection/collection.go b/common/quotas/global/collection/collection.go
index d09a13bbc9f..764c6283236 100644
--- a/common/quotas/global/collection/collection.go
+++ b/common/quotas/global/collection/collection.go
@@ -407,9 +407,11 @@ func (c *Collection) backgroundUpdateLoop() {
 			c.scope.RecordHistogramValue(metrics.GlobalRatelimiterGlobalUsageHistogram, float64(globals))
 
 			if len(usage) > 0 {
+				ratelimiterUpdateStart := time.Now()
 				sw := c.scope.StartTimer(metrics.GlobalRatelimiterUpdateLatency)
 				c.doUpdate(now.Sub(lastGatherTime), usage)
 				sw.Stop()
+				c.scope.RecordHistogramDuration(metrics.GlobalRatelimiterUpdateLatencyHistogram, time.Since(ratelimiterUpdateStart))
 			}
 
 			<-localMetricsDone // should be much faster than doUpdate, unless it's no-opped
diff --git a/common/task/hierarchical_weighted_round_robin_task_scheduler.go b/common/task/hierarchical_weighted_round_robin_task_scheduler.go
index 3aba59f3f23..54e4316972e 100644
--- a/common/task/hierarchical_weighted_round_robin_task_scheduler.go
+++ b/common/task/hierarchical_weighted_round_robin_task_scheduler.go
@@ -116,8 +116,12 @@ func (w *hierarchicalWeightedRoundRobinTaskSchedulerImpl[K, T]) Stop() {
 
 func (w *hierarchicalWeightedRoundRobinTaskSchedulerImpl[K, T]) Submit(task T) error {
 	w.metricsScope.IncCounter(metrics.PriorityTaskSubmitRequest)
+	submitStart := time.Now()
 	sw := w.metricsScope.StartTimer(metrics.PriorityTaskSubmitLatency)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		w.metricsScope.RecordHistogramDuration(metrics.PriorityTaskSubmitLatencyHistogram, time.Since(submitStart))
+	}()
 
 	w.RLock()
 	defer w.RUnlock()
@@ -137,8 +141,12 @@ func (w *hierarchicalWeightedRoundRobinTaskSchedulerImpl[K, T]) TrySubmit(
 	task T,
 ) (bool, error) {
 	w.metricsScope.IncCounter(metrics.PriorityTaskSubmitRequest)
+	submitStart := time.Now()
 	sw := w.metricsScope.StartTimer(metrics.PriorityTaskSubmitLatency)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		w.metricsScope.RecordHistogramDuration(metrics.PriorityTaskSubmitLatencyHistogram, time.Since(submitStart))
+	}()
 
 	w.RLock()
 	defer w.RUnlock()
diff --git a/common/task/sequential_task_processor.go b/common/task/sequential_task_processor.go
index ff9a2fa5126..f3cb389161a 100644
--- a/common/task/sequential_task_processor.go
+++ b/common/task/sequential_task_processor.go
@@ -96,8 +96,12 @@ func (t *sequentialTaskProcessorImpl) Stop() {
 func (t *sequentialTaskProcessorImpl) Submit(task Task) error {
 
 	t.metricsScope.IncCounter(metrics.SequentialTaskSubmitRequest)
+	submitStart := time.Now()
 	metricsTimer := t.metricsScope.StartTimer(metrics.SequentialTaskSubmitLatency)
-	defer metricsTimer.Stop()
+	defer func() {
+		metricsTimer.Stop()
+		t.metricsScope.RecordHistogramDuration(metrics.SequentialTaskSubmitLatencyHistogram, time.Since(submitStart))
+	}()
 
 	taskqueue := t.taskQueueFactory(task)
 	taskqueue.Add(task)
@@ -139,9 +143,11 @@ func (t *sequentialTaskProcessorImpl) pollAndProcessTaskQueue() {
 		case <-t.shutdownChan:
 			return
 		case taskqueue := <-t.taskqueueChan:
+			queueProcessingStart := time.Now()
 			metricsTimer := t.metricsScope.StartTimer(metrics.SequentialTaskQueueProcessingLatency)
 			t.processTaskQueue(taskqueue)
 			metricsTimer.Stop()
+			t.metricsScope.RecordHistogramDuration(metrics.SequentialTaskQueueProcessingLatencyHistogram, time.Since(queueProcessingStart))
 		}
 	}
 }
@@ -154,6 +160,7 @@ func (t *sequentialTaskProcessorImpl) processTaskQueue(taskqueue SequentialTaskQ
 		default:
 			queueSize := taskqueue.Len()
 			t.metricsScope.RecordTimer(metrics.SequentialTaskQueueSize, time.Duration(queueSize))
+			t.metricsScope.IntExponentialHistogram(metrics.SequentialTaskQueueSizeHistogram, queueSize)
 			if queueSize > 0 {
 				t.processTaskOnce(taskqueue)
 			}
@@ -173,8 +180,12 @@ func (t *sequentialTaskProcessorImpl) processTaskQueue(taskqueue SequentialTaskQ
 }
 
 func (t *sequentialTaskProcessorImpl) processTaskOnce(taskqueue SequentialTaskQueue) {
+	taskProcessingStart := time.Now()
 	metricsTimer := t.metricsScope.StartTimer(metrics.SequentialTaskTaskProcessingLatency)
-	defer metricsTimer.Stop()
+	defer func() {
+		metricsTimer.Stop()
+		t.metricsScope.RecordHistogramDuration(metrics.SequentialTaskTaskProcessingLatencyHistogram, time.Since(taskProcessingStart))
+	}()
 
 	task := taskqueue.Remove()
 	err := task.Execute()
diff --git a/common/task/weighted_round_robin_task_scheduler.go b/common/task/weighted_round_robin_task_scheduler.go
index b3a753f8cd4..7ea2f8dd7a4 100644
--- a/common/task/weighted_round_robin_task_scheduler.go
+++ b/common/task/weighted_round_robin_task_scheduler.go
@@ -125,8 +125,12 @@ func (w *weightedRoundRobinTaskSchedulerImpl[K, T]) Stop() {
 
 func (w *weightedRoundRobinTaskSchedulerImpl[K, T]) Submit(task T) error {
 	w.metricsScope.IncCounter(metrics.PriorityTaskSubmitRequest)
+	submitStart := time.Now()
 	sw := w.metricsScope.StartTimer(metrics.PriorityTaskSubmitLatency)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		w.metricsScope.RecordHistogramDuration(metrics.PriorityTaskSubmitLatencyHistogram, time.Since(submitStart))
+	}()
 
 	if w.isStopped() {
 		return ErrTaskSchedulerClosed
diff --git a/service/frontend/api/handler.go b/service/frontend/api/handler.go
index 581badf7669..e618dc15f79 100644
--- a/service/frontend/api/handler.go
+++ b/service/frontend/api/handler.go
@@ -1622,6 +1622,7 @@ func (wh *WorkflowHandler) StartWorkflowExecutionAsync(
 		return nil, fmt.Errorf("failed to encode StartWorkflowExecutionAsyncRequest: %v", err)
 	}
 	scope.RecordTimer(metrics.AsyncRequestPayloadSize, time.Duration(len(payload)))
+	scope.IntExponentialHistogram(metrics.AsyncRequestPayloadSizeHistogram, len(payload))
 
 	// propagate the headers from the context to the message
 	header := &shared.Header{
@@ -2247,6 +2248,7 @@ func (wh *WorkflowHandler) SignalWithStartWorkflowExecutionAsync(
 		return nil, fmt.Errorf("failed to encode SignalWithStartWorkflowExecutionAsyncRequest: %v", err)
 	}
 	scope.RecordTimer(metrics.AsyncRequestPayloadSize, time.Duration(len(payload)))
+	scope.IntExponentialHistogram(metrics.AsyncRequestPayloadSizeHistogram, len(payload))
 
 	// propagate the headers from the context to the message
 	header := &shared.Header{
diff --git a/service/frontend/templates/metered.tmpl b/service/frontend/templates/metered.tmpl
index da3c23f64e4..91d411f3836 100644
--- a/service/frontend/templates/metered.tmpl
+++ b/service/frontend/templates/metered.tmpl
@@ -1,5 +1,6 @@
 import (
 	"context"
+	"time"
 
 	"github.com/uber/cadence/common/log"
 	"github.com/uber/cadence/common/log/tag"
@@ -85,17 +86,20 @@ func (h *{{$decorator}}) {{$method.Declaration}} {
 	{{- if has $method.Name $taskListAPIs}}
 	scope := common.NewPerTaskListScope({{(index $method.Params 1).Name}}.Domain, {{(index $method.Params 1).Name}}.TaskList.GetName(), {{(index $method.Params 1).Name}}.TaskList.GetKind(), h.metricsClient, {{$scope}}).Tagged(metrics.GetContextTags(ctx)...)
 	scope.IncCounter(metrics.CadenceRequestsPerTaskListWithoutRollup)
+	swPerTLStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatencyPerTaskList)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swPerTLStart)) }()
 	scopePerDomain := h.metricsClient.Scope({{$scope}}).Tagged(append(metrics.GetContextTags(ctx), {{$domainMetricTag}})...)
 	scopePerDomain.IncCounter(metrics.CadenceRequests)
+	swPerDomainStart := time.Now()
 	swPerDomain := scopePerDomain.StartTimer(metrics.CadenceLatency)
-	defer swPerDomain.Stop()
+	defer func() { swPerDomain.Stop(); scopePerDomain.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swPerDomainStart)) }()
 	{{- else}}
 	scope := h.metricsClient.Scope({{$scope}}).Tagged(append(metrics.GetContextTags(ctx), {{$domainMetricTag}})...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	{{- end}}
 	logger := h.logger.WithTags(tags...)
 
diff --git a/service/frontend/wrappers/accesscontrolled/access_controlled.go b/service/frontend/wrappers/accesscontrolled/access_controlled.go
index 56d68d7734d..44af8761a6d 100644
--- a/service/frontend/wrappers/accesscontrolled/access_controlled.go
+++ b/service/frontend/wrappers/accesscontrolled/access_controlled.go
@@ -24,6 +24,7 @@ package accesscontrolled
 
 import (
 	"context"
+	"time"
 
 	"github.com/uber/cadence/common/authorization"
 	"github.com/uber/cadence/common/metrics"
@@ -46,8 +47,12 @@ func (a *apiHandler) isAuthorized(
 	attr *authorization.Attributes,
 	scope metrics.Scope,
 ) (bool, error) {
+	authStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceAuthorizationLatency)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scope.RecordHistogramDuration(metrics.CadenceAuthorizationLatencyHistogram, time.Since(authStart))
+	}()
 
 	result, err := a.authorizer.Authorize(ctx, attr)
 	if err != nil {
diff --git a/service/frontend/wrappers/accesscontrolled/access_controlled_test.go b/service/frontend/wrappers/accesscontrolled/access_controlled_test.go
index b25f32a7450..2b97f19fa0d 100644
--- a/service/frontend/wrappers/accesscontrolled/access_controlled_test.go
+++ b/service/frontend/wrappers/accesscontrolled/access_controlled_test.go
@@ -28,6 +28,7 @@ import (
 	"testing"
 
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/mock"
 	"go.uber.org/mock/gomock"
 
 	"github.com/uber/cadence/common/authorization"
@@ -49,6 +50,7 @@ func TestIsAuthorized(t *testing.T) {
 			mockSetup: func(authorizer *authorization.MockAuthorizer, scope *mocks.Scope) {
 				authorizer.EXPECT().Authorize(gomock.Any(), gomock.Any()).Return(authorization.Result{Decision: authorization.DecisionAllow}, nil)
 				scope.On("StartTimer", metrics.CadenceAuthorizationLatency).Return(metrics.NewTestStopwatch()).Once()
+				scope.On("RecordHistogramDuration", metrics.CadenceAuthorizationLatencyHistogram, mock.AnythingOfType("time.Duration")).Return().Once()
 			},
 			isAuthorized: true,
 			wantErr:      false,
@@ -58,6 +60,7 @@ func TestIsAuthorized(t *testing.T) {
 			mockSetup: func(authorizer *authorization.MockAuthorizer, scope *mocks.Scope) {
 				authorizer.EXPECT().Authorize(gomock.Any(), gomock.Any()).Return(authorization.Result{Decision: authorization.DecisionDeny}, nil)
 				scope.On("StartTimer", metrics.CadenceAuthorizationLatency).Return(metrics.NewTestStopwatch()).Once()
+				scope.On("RecordHistogramDuration", metrics.CadenceAuthorizationLatencyHistogram, mock.AnythingOfType("time.Duration")).Return().Once()
 				scope.On("IncCounter", metrics.CadenceErrUnauthorizedCounter).Return().Once()
 			},
 			isAuthorized: false,
@@ -68,6 +71,7 @@ func TestIsAuthorized(t *testing.T) {
 			mockSetup: func(authorizer *authorization.MockAuthorizer, scope *mocks.Scope) {
 				authorizer.EXPECT().Authorize(gomock.Any(), gomock.Any()).Return(authorization.Result{}, errors.New("some random error"))
 				scope.On("StartTimer", metrics.CadenceAuthorizationLatency).Return(metrics.NewTestStopwatch()).Once()
+				scope.On("RecordHistogramDuration", metrics.CadenceAuthorizationLatencyHistogram, mock.AnythingOfType("time.Duration")).Return().Once()
 				scope.On("IncCounter", metrics.CadenceErrAuthorizeFailedCounter).Return().Once()
 			},
 			isAuthorized: false,
diff --git a/service/frontend/wrappers/clusterredirection/callwrappers.go b/service/frontend/wrappers/clusterredirection/callwrappers.go
index d5dda1f8edf..a31a38e78b5 100644
--- a/service/frontend/wrappers/clusterredirection/callwrappers.go
+++ b/service/frontend/wrappers/clusterredirection/callwrappers.go
@@ -60,7 +60,9 @@ func (handler *clusterRedirectionHandler) afterCall(
 
 	scope = scope.Tagged(metrics.TargetClusterTag(cluster))
 	scope.IncCounter(metrics.CadenceDcRedirectionClientRequests)
-	scope.RecordTimer(metrics.CadenceDcRedirectionClientLatency, handler.GetTimeSource().Now().Sub(startTime))
+	elapsed := handler.GetTimeSource().Now().Sub(startTime)
+	scope.RecordTimer(metrics.CadenceDcRedirectionClientLatency, elapsed)
+	scope.RecordHistogramDuration(metrics.CadenceDcRedirectionClientLatencyHistogram, elapsed)
 	if *retError != nil {
 		scope.IncCounter(metrics.CadenceDcRedirectionClientFailures)
 	}
diff --git a/service/frontend/wrappers/metered/api_generated.go b/service/frontend/wrappers/metered/api_generated.go
index 1ee81bd4ae9..f559d739e72 100644
--- a/service/frontend/wrappers/metered/api_generated.go
+++ b/service/frontend/wrappers/metered/api_generated.go
@@ -6,6 +6,7 @@ package metered
 
 import (
 	"context"
+	"time"
 
 	"github.com/uber/cadence/common"
 	"github.com/uber/cadence/common/cache"
@@ -45,8 +46,9 @@ func (h *apiHandler) BackfillSchedule(ctx context.Context, bp1 *types.BackfillSc
 	tags = append(tags, toBackfillScheduleRequestTags(bp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendBackfillScheduleScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(bp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	bp2, err = h.handler.BackfillSchedule(ctx, bp1)
@@ -61,8 +63,9 @@ func (h *apiHandler) CountWorkflowExecutions(ctx context.Context, cp1 *types.Cou
 	tags = append(tags, toCountWorkflowExecutionsRequestTags(cp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendCountWorkflowExecutionsScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(cp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	cp2, err = h.handler.CountWorkflowExecutions(ctx, cp1)
@@ -77,8 +80,9 @@ func (h *apiHandler) CreateSchedule(ctx context.Context, cp1 *types.CreateSchedu
 	tags = append(tags, toCreateScheduleRequestTags(cp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendCreateScheduleScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(cp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	cp2, err = h.handler.CreateSchedule(ctx, cp1)
@@ -92,8 +96,9 @@ func (h *apiHandler) DeleteDomain(ctx context.Context, dp1 *types.DeleteDomainRe
 	tags := []tag.Tag{tag.WorkflowHandlerName("DeleteDomain")}
 	scope := h.metricsClient.Scope(metrics.FrontendDeleteDomainScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainUnknownTag())...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.DeleteDomain(ctx, dp1)
@@ -108,8 +113,9 @@ func (h *apiHandler) DeleteSchedule(ctx context.Context, dp1 *types.DeleteSchedu
 	tags = append(tags, toDeleteScheduleRequestTags(dp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendDeleteScheduleScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(dp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	dp2, err = h.handler.DeleteSchedule(ctx, dp1)
@@ -123,8 +129,9 @@ func (h *apiHandler) DeprecateDomain(ctx context.Context, dp1 *types.DeprecateDo
 	tags := []tag.Tag{tag.WorkflowHandlerName("DeprecateDomain")}
 	scope := h.metricsClient.Scope(metrics.FrontendDeprecateDomainScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainUnknownTag())...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.DeprecateDomain(ctx, dp1)
@@ -138,8 +145,9 @@ func (h *apiHandler) DescribeDomain(ctx context.Context, dp1 *types.DescribeDoma
 	tags := []tag.Tag{tag.WorkflowHandlerName("DescribeDomain")}
 	scope := h.metricsClient.Scope(metrics.FrontendDescribeDomainScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainUnknownTag())...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	dp2, err = h.handler.DescribeDomain(ctx, dp1)
@@ -154,8 +162,9 @@ func (h *apiHandler) DescribeSchedule(ctx context.Context, dp1 *types.DescribeSc
 	tags = append(tags, toDescribeScheduleRequestTags(dp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendDescribeScheduleScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(dp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	dp2, err = h.handler.DescribeSchedule(ctx, dp1)
@@ -170,12 +179,20 @@ func (h *apiHandler) DescribeTaskList(ctx context.Context, dp1 *types.DescribeTa
 	tags = append(tags, toDescribeTaskListRequestTags(dp1)...)
 	scope := common.NewPerTaskListScope(dp1.Domain, dp1.TaskList.GetName(), dp1.TaskList.GetKind(), h.metricsClient, metrics.FrontendDescribeTaskListScope).Tagged(metrics.GetContextTags(ctx)...)
 	scope.IncCounter(metrics.CadenceRequestsPerTaskListWithoutRollup)
+	swPerTLStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatencyPerTaskList)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swPerTLStart))
+	}()
 	scopePerDomain := h.metricsClient.Scope(metrics.FrontendDescribeTaskListScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(dp1.GetDomain()))...)
 	scopePerDomain.IncCounter(metrics.CadenceRequests)
+	swPerDomainStart := time.Now()
 	swPerDomain := scopePerDomain.StartTimer(metrics.CadenceLatency)
-	defer swPerDomain.Stop()
+	defer func() {
+		swPerDomain.Stop()
+		scopePerDomain.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swPerDomainStart))
+	}()
 	logger := h.logger.WithTags(tags...)
 
 	dp2, err = h.handler.DescribeTaskList(ctx, dp1)
@@ -190,8 +207,9 @@ func (h *apiHandler) DescribeWorkflowExecution(ctx context.Context, dp1 *types.D
 	tags = append(tags, toDescribeWorkflowExecutionRequestTags(dp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendDescribeWorkflowExecutionScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(dp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	dp2, err = h.handler.DescribeWorkflowExecution(ctx, dp1)
@@ -206,8 +224,9 @@ func (h *apiHandler) DiagnoseWorkflowExecution(ctx context.Context, dp1 *types.D
 	tags = append(tags, toDiagnoseWorkflowExecutionRequestTags(dp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendDiagnoseWorkflowExecutionScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(dp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	dp2, err = h.handler.DiagnoseWorkflowExecution(ctx, dp1)
@@ -222,8 +241,9 @@ func (h *apiHandler) FailoverDomain(ctx context.Context, fp1 *types.FailoverDoma
 	tags = append(tags, toFailoverDomainRequestTags(fp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendFailoverDomainScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(fp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	fp2, err = h.handler.FailoverDomain(ctx, fp1)
@@ -237,8 +257,9 @@ func (h *apiHandler) GetClusterInfo(ctx context.Context) (cp1 *types.ClusterInfo
 	tags := []tag.Tag{tag.WorkflowHandlerName("GetClusterInfo")}
 	scope := h.metricsClient.Scope(metrics.FrontendGetClusterInfoScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainUnknownTag())...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	cp1, err = h.handler.GetClusterInfo(ctx)
@@ -252,8 +273,9 @@ func (h *apiHandler) GetSearchAttributes(ctx context.Context) (gp1 *types.GetSea
 	tags := []tag.Tag{tag.WorkflowHandlerName("GetSearchAttributes")}
 	scope := h.metricsClient.Scope(metrics.FrontendGetSearchAttributesScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainUnknownTag())...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	gp1, err = h.handler.GetSearchAttributes(ctx)
@@ -268,8 +290,9 @@ func (h *apiHandler) GetTaskListsByDomain(ctx context.Context, gp1 *types.GetTas
 	tags = append(tags, toGetTaskListsByDomainRequestTags(gp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendGetTaskListsByDomainScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(gp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	gp2, err = h.handler.GetTaskListsByDomain(ctx, gp1)
@@ -284,8 +307,9 @@ func (h *apiHandler) GetWorkflowExecutionHistory(ctx context.Context, gp1 *types
 	tags = append(tags, toGetWorkflowExecutionHistoryRequestTags(gp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendGetWorkflowExecutionHistoryScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(gp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	gp2, err = h.handler.GetWorkflowExecutionHistory(ctx, gp1)
@@ -303,8 +327,9 @@ func (h *apiHandler) ListArchivedWorkflowExecutions(ctx context.Context, lp1 *ty
 	tags = append(tags, toListArchivedWorkflowExecutionsRequestTags(lp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendListArchivedWorkflowExecutionsScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(lp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	lp2, err = h.handler.ListArchivedWorkflowExecutions(ctx, lp1)
@@ -319,8 +344,9 @@ func (h *apiHandler) ListClosedWorkflowExecutions(ctx context.Context, lp1 *type
 	tags = append(tags, toListClosedWorkflowExecutionsRequestTags(lp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendListClosedWorkflowExecutionsScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(lp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	lp2, err = h.handler.ListClosedWorkflowExecutions(ctx, lp1)
@@ -334,8 +360,9 @@ func (h *apiHandler) ListDomains(ctx context.Context, lp1 *types.ListDomainsRequ
 	tags := []tag.Tag{tag.WorkflowHandlerName("ListDomains")}
 	scope := h.metricsClient.Scope(metrics.FrontendListDomainsScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainUnknownTag())...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	lp2, err = h.handler.ListDomains(ctx, lp1)
@@ -349,8 +376,9 @@ func (h *apiHandler) ListFailoverHistory(ctx context.Context, lp1 *types.ListFai
 	tags := []tag.Tag{tag.WorkflowHandlerName("ListFailoverHistory")}
 	scope := h.metricsClient.Scope(metrics.FrontendListFailoverHistoryScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainUnknownTag())...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	lp2, err = h.handler.ListFailoverHistory(ctx, lp1)
@@ -365,8 +393,9 @@ func (h *apiHandler) ListOpenWorkflowExecutions(ctx context.Context, lp1 *types.
 	tags = append(tags, toListOpenWorkflowExecutionsRequestTags(lp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendListOpenWorkflowExecutionsScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(lp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	lp2, err = h.handler.ListOpenWorkflowExecutions(ctx, lp1)
@@ -381,8 +410,9 @@ func (h *apiHandler) ListSchedules(ctx context.Context, lp1 *types.ListSchedules
 	tags = append(tags, toListSchedulesRequestTags(lp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendListSchedulesScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(lp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	lp2, err = h.handler.ListSchedules(ctx, lp1)
@@ -397,12 +427,20 @@ func (h *apiHandler) ListTaskListPartitions(ctx context.Context, lp1 *types.List
 	tags = append(tags, toListTaskListPartitionsRequestTags(lp1)...)
 	scope := common.NewPerTaskListScope(lp1.Domain, lp1.TaskList.GetName(), lp1.TaskList.GetKind(), h.metricsClient, metrics.FrontendListTaskListPartitionsScope).Tagged(metrics.GetContextTags(ctx)...)
 	scope.IncCounter(metrics.CadenceRequestsPerTaskListWithoutRollup)
+	swPerTLStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatencyPerTaskList)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swPerTLStart))
+	}()
 	scopePerDomain := h.metricsClient.Scope(metrics.FrontendListTaskListPartitionsScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(lp1.GetDomain()))...)
 	scopePerDomain.IncCounter(metrics.CadenceRequests)
+	swPerDomainStart := time.Now()
 	swPerDomain := scopePerDomain.StartTimer(metrics.CadenceLatency)
-	defer swPerDomain.Stop()
+	defer func() {
+		swPerDomain.Stop()
+		scopePerDomain.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swPerDomainStart))
+	}()
 	logger := h.logger.WithTags(tags...)
 
 	lp2, err = h.handler.ListTaskListPartitions(ctx, lp1)
@@ -417,8 +455,9 @@ func (h *apiHandler) ListWorkflowExecutions(ctx context.Context, lp1 *types.List
 	tags = append(tags, toListWorkflowExecutionsRequestTags(lp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendListWorkflowExecutionsScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(lp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	lp2, err = h.handler.ListWorkflowExecutions(ctx, lp1)
@@ -433,8 +472,9 @@ func (h *apiHandler) PauseSchedule(ctx context.Context, pp1 *types.PauseSchedule
 	tags = append(tags, toPauseScheduleRequestTags(pp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendPauseScheduleScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(pp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	pp2, err = h.handler.PauseSchedule(ctx, pp1)
@@ -449,12 +489,20 @@ func (h *apiHandler) PollForActivityTask(ctx context.Context, pp1 *types.PollFor
 	tags = append(tags, toPollForActivityTaskRequestTags(pp1)...)
 	scope := common.NewPerTaskListScope(pp1.Domain, pp1.TaskList.GetName(), pp1.TaskList.GetKind(), h.metricsClient, metrics.FrontendPollForActivityTaskScope).Tagged(metrics.GetContextTags(ctx)...)
 	scope.IncCounter(metrics.CadenceRequestsPerTaskListWithoutRollup)
+	swPerTLStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatencyPerTaskList)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swPerTLStart))
+	}()
 	scopePerDomain := h.metricsClient.Scope(metrics.FrontendPollForActivityTaskScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(pp1.GetDomain()))...)
 	scopePerDomain.IncCounter(metrics.CadenceRequests)
+	swPerDomainStart := time.Now()
 	swPerDomain := scopePerDomain.StartTimer(metrics.CadenceLatency)
-	defer swPerDomain.Stop()
+	defer func() {
+		swPerDomain.Stop()
+		scopePerDomain.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swPerDomainStart))
+	}()
 	logger := h.logger.WithTags(tags...)
 
 	pp2, err = h.handler.PollForActivityTask(ctx, pp1)
@@ -469,12 +517,20 @@ func (h *apiHandler) PollForDecisionTask(ctx context.Context, pp1 *types.PollFor
 	tags = append(tags, toPollForDecisionTaskRequestTags(pp1)...)
 	scope := common.NewPerTaskListScope(pp1.Domain, pp1.TaskList.GetName(), pp1.TaskList.GetKind(), h.metricsClient, metrics.FrontendPollForDecisionTaskScope).Tagged(metrics.GetContextTags(ctx)...)
 	scope.IncCounter(metrics.CadenceRequestsPerTaskListWithoutRollup)
+	swPerTLStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatencyPerTaskList)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swPerTLStart))
+	}()
 	scopePerDomain := h.metricsClient.Scope(metrics.FrontendPollForDecisionTaskScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(pp1.GetDomain()))...)
 	scopePerDomain.IncCounter(metrics.CadenceRequests)
+	swPerDomainStart := time.Now()
 	swPerDomain := scopePerDomain.StartTimer(metrics.CadenceLatency)
-	defer swPerDomain.Stop()
+	defer func() {
+		swPerDomain.Stop()
+		scopePerDomain.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swPerDomainStart))
+	}()
 	logger := h.logger.WithTags(tags...)
 
 	pp2, err = h.handler.PollForDecisionTask(ctx, pp1)
@@ -489,8 +545,9 @@ func (h *apiHandler) QueryWorkflow(ctx context.Context, qp1 *types.QueryWorkflow
 	tags = append(tags, toQueryWorkflowRequestTags(qp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendQueryWorkflowScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(qp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	qp2, err = h.handler.QueryWorkflow(ctx, qp1)
@@ -513,8 +570,9 @@ func (h *apiHandler) RecordActivityTaskHeartbeat(ctx context.Context, rp1 *types
 	tags = append(tags, tag.WorkflowDomainName(domainName), tag.WorkflowID(token.WorkflowID), tag.WorkflowRunID(token.RunID))
 	scope := h.metricsClient.Scope(metrics.FrontendRecordActivityTaskHeartbeatScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(domainName))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	rp2, err = h.handler.RecordActivityTaskHeartbeat(ctx, rp1)
@@ -529,8 +587,9 @@ func (h *apiHandler) RecordActivityTaskHeartbeatByID(ctx context.Context, rp1 *t
 	tags = append(tags, toRecordActivityTaskHeartbeatByIDRequestTags(rp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendRecordActivityTaskHeartbeatByIDScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(rp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	rp2, err = h.handler.RecordActivityTaskHeartbeatByID(ctx, rp1)
@@ -545,8 +604,9 @@ func (h *apiHandler) RefreshWorkflowTasks(ctx context.Context, rp1 *types.Refres
 	tags = append(tags, toRefreshWorkflowTasksRequestTags(rp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendRefreshWorkflowTasksScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(rp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.RefreshWorkflowTasks(ctx, rp1)
@@ -560,8 +620,9 @@ func (h *apiHandler) RegisterDomain(ctx context.Context, rp1 *types.RegisterDoma
 	tags := []tag.Tag{tag.WorkflowHandlerName("RegisterDomain")}
 	scope := h.metricsClient.Scope(metrics.FrontendRegisterDomainScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainUnknownTag())...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.RegisterDomain(ctx, rp1)
@@ -576,8 +637,9 @@ func (h *apiHandler) RequestCancelWorkflowExecution(ctx context.Context, rp1 *ty
 	tags = append(tags, toRequestCancelWorkflowExecutionRequestTags(rp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendRequestCancelWorkflowExecutionScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(rp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.RequestCancelWorkflowExecution(ctx, rp1)
@@ -592,8 +654,9 @@ func (h *apiHandler) ResetStickyTaskList(ctx context.Context, rp1 *types.ResetSt
 	tags = append(tags, toResetStickyTaskListRequestTags(rp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendResetStickyTaskListScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(rp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	rp2, err = h.handler.ResetStickyTaskList(ctx, rp1)
@@ -608,8 +671,9 @@ func (h *apiHandler) ResetWorkflowExecution(ctx context.Context, rp1 *types.Rese
 	tags = append(tags, toResetWorkflowExecutionRequestTags(rp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendResetWorkflowExecutionScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(rp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	rp2, err = h.handler.ResetWorkflowExecution(ctx, rp1)
@@ -632,8 +696,9 @@ func (h *apiHandler) RespondActivityTaskCanceled(ctx context.Context, rp1 *types
 	tags = append(tags, tag.WorkflowDomainName(domainName), tag.WorkflowID(token.WorkflowID), tag.WorkflowRunID(token.RunID))
 	scope := h.metricsClient.Scope(metrics.FrontendRespondActivityTaskCanceledScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(domainName))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.RespondActivityTaskCanceled(ctx, rp1)
@@ -648,8 +713,9 @@ func (h *apiHandler) RespondActivityTaskCanceledByID(ctx context.Context, rp1 *t
 	tags = append(tags, toRespondActivityTaskCanceledByIDRequestTags(rp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendRespondActivityTaskCanceledByIDScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(rp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.RespondActivityTaskCanceledByID(ctx, rp1)
@@ -672,8 +738,9 @@ func (h *apiHandler) RespondActivityTaskCompleted(ctx context.Context, rp1 *type
 	tags = append(tags, tag.WorkflowDomainName(domainName), tag.WorkflowID(token.WorkflowID), tag.WorkflowRunID(token.RunID))
 	scope := h.metricsClient.Scope(metrics.FrontendRespondActivityTaskCompletedScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(domainName))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.RespondActivityTaskCompleted(ctx, rp1)
@@ -688,8 +755,9 @@ func (h *apiHandler) RespondActivityTaskCompletedByID(ctx context.Context, rp1 *
 	tags = append(tags, toRespondActivityTaskCompletedByIDRequestTags(rp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendRespondActivityTaskCompletedByIDScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(rp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.RespondActivityTaskCompletedByID(ctx, rp1)
@@ -712,8 +780,9 @@ func (h *apiHandler) RespondActivityTaskFailed(ctx context.Context, rp1 *types.R
 	tags = append(tags, tag.WorkflowDomainName(domainName), tag.WorkflowID(token.WorkflowID), tag.WorkflowRunID(token.RunID))
 	scope := h.metricsClient.Scope(metrics.FrontendRespondActivityTaskFailedScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(domainName))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.RespondActivityTaskFailed(ctx, rp1)
@@ -728,8 +797,9 @@ func (h *apiHandler) RespondActivityTaskFailedByID(ctx context.Context, rp1 *typ
 	tags = append(tags, toRespondActivityTaskFailedByIDRequestTags(rp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendRespondActivityTaskFailedByIDScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(rp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.RespondActivityTaskFailedByID(ctx, rp1)
@@ -752,8 +822,9 @@ func (h *apiHandler) RespondDecisionTaskCompleted(ctx context.Context, rp1 *type
 	tags = append(tags, tag.WorkflowDomainName(domainName), tag.WorkflowID(token.WorkflowID), tag.WorkflowRunID(token.RunID))
 	scope := h.metricsClient.Scope(metrics.FrontendRespondDecisionTaskCompletedScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(domainName))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	rp2, err = h.handler.RespondDecisionTaskCompleted(ctx, rp1)
@@ -776,8 +847,9 @@ func (h *apiHandler) RespondDecisionTaskFailed(ctx context.Context, rp1 *types.R
 	tags = append(tags, tag.WorkflowDomainName(domainName), tag.WorkflowID(token.WorkflowID), tag.WorkflowRunID(token.RunID))
 	scope := h.metricsClient.Scope(metrics.FrontendRespondDecisionTaskFailedScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(domainName))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.RespondDecisionTaskFailed(ctx, rp1)
@@ -800,8 +872,9 @@ func (h *apiHandler) RespondQueryTaskCompleted(ctx context.Context, rp1 *types.R
 	tags = append(tags, tag.WorkflowDomainName(domainName))
 	scope := h.metricsClient.Scope(metrics.FrontendRespondQueryTaskCompletedScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(domainName))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.RespondQueryTaskCompleted(ctx, rp1)
@@ -816,8 +889,9 @@ func (h *apiHandler) RestartWorkflowExecution(ctx context.Context, rp1 *types.Re
 	tags = append(tags, toRestartWorkflowExecutionRequestTags(rp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendRestartWorkflowExecutionScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(rp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	rp2, err = h.handler.RestartWorkflowExecution(ctx, rp1)
@@ -832,8 +906,9 @@ func (h *apiHandler) ScanWorkflowExecutions(ctx context.Context, lp1 *types.List
 	tags = append(tags, toScanWorkflowExecutionsRequestTags(lp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendScanWorkflowExecutionsScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(lp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	lp2, err = h.handler.ScanWorkflowExecutions(ctx, lp1)
@@ -848,12 +923,20 @@ func (h *apiHandler) SignalWithStartWorkflowExecution(ctx context.Context, sp1 *
 	tags = append(tags, toSignalWithStartWorkflowExecutionRequestTags(sp1)...)
 	scope := common.NewPerTaskListScope(sp1.Domain, sp1.TaskList.GetName(), sp1.TaskList.GetKind(), h.metricsClient, metrics.FrontendSignalWithStartWorkflowExecutionScope).Tagged(metrics.GetContextTags(ctx)...)
 	scope.IncCounter(metrics.CadenceRequestsPerTaskListWithoutRollup)
+	swPerTLStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatencyPerTaskList)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swPerTLStart))
+	}()
 	scopePerDomain := h.metricsClient.Scope(metrics.FrontendSignalWithStartWorkflowExecutionScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(sp1.GetDomain()))...)
 	scopePerDomain.IncCounter(metrics.CadenceRequests)
+	swPerDomainStart := time.Now()
 	swPerDomain := scopePerDomain.StartTimer(metrics.CadenceLatency)
-	defer swPerDomain.Stop()
+	defer func() {
+		swPerDomain.Stop()
+		scopePerDomain.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swPerDomainStart))
+	}()
 	logger := h.logger.WithTags(tags...)
 
 	sp2, err = h.handler.SignalWithStartWorkflowExecution(ctx, sp1)
@@ -868,12 +951,20 @@ func (h *apiHandler) SignalWithStartWorkflowExecutionAsync(ctx context.Context,
 	tags = append(tags, toSignalWithStartWorkflowExecutionAsyncRequestTags(sp1)...)
 	scope := common.NewPerTaskListScope(sp1.Domain, sp1.TaskList.GetName(), sp1.TaskList.GetKind(), h.metricsClient, metrics.FrontendSignalWithStartWorkflowExecutionAsyncScope).Tagged(metrics.GetContextTags(ctx)...)
 	scope.IncCounter(metrics.CadenceRequestsPerTaskListWithoutRollup)
+	swPerTLStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatencyPerTaskList)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swPerTLStart))
+	}()
 	scopePerDomain := h.metricsClient.Scope(metrics.FrontendSignalWithStartWorkflowExecutionAsyncScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(sp1.GetDomain()))...)
 	scopePerDomain.IncCounter(metrics.CadenceRequests)
+	swPerDomainStart := time.Now()
 	swPerDomain := scopePerDomain.StartTimer(metrics.CadenceLatency)
-	defer swPerDomain.Stop()
+	defer func() {
+		swPerDomain.Stop()
+		scopePerDomain.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swPerDomainStart))
+	}()
 	logger := h.logger.WithTags(tags...)
 
 	sp2, err = h.handler.SignalWithStartWorkflowExecutionAsync(ctx, sp1)
@@ -889,8 +980,9 @@ func (h *apiHandler) SignalWorkflowExecution(ctx context.Context, sp1 *types.Sig
 	tags = append(tags, toSignalWorkflowExecutionRequestTags(sp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendSignalWorkflowExecutionScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(sp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.SignalWorkflowExecution(ctx, sp1)
@@ -905,12 +997,20 @@ func (h *apiHandler) StartWorkflowExecution(ctx context.Context, sp1 *types.Star
 	tags = append(tags, toStartWorkflowExecutionRequestTags(sp1)...)
 	scope := common.NewPerTaskListScope(sp1.Domain, sp1.TaskList.GetName(), sp1.TaskList.GetKind(), h.metricsClient, metrics.FrontendStartWorkflowExecutionScope).Tagged(metrics.GetContextTags(ctx)...)
 	scope.IncCounter(metrics.CadenceRequestsPerTaskListWithoutRollup)
+	swPerTLStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatencyPerTaskList)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swPerTLStart))
+	}()
 	scopePerDomain := h.metricsClient.Scope(metrics.FrontendStartWorkflowExecutionScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(sp1.GetDomain()))...)
 	scopePerDomain.IncCounter(metrics.CadenceRequests)
+	swPerDomainStart := time.Now()
 	swPerDomain := scopePerDomain.StartTimer(metrics.CadenceLatency)
-	defer swPerDomain.Stop()
+	defer func() {
+		swPerDomain.Stop()
+		scopePerDomain.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swPerDomainStart))
+	}()
 	logger := h.logger.WithTags(tags...)
 
 	sp2, err = h.handler.StartWorkflowExecution(ctx, sp1)
@@ -925,12 +1025,20 @@ func (h *apiHandler) StartWorkflowExecutionAsync(ctx context.Context, sp1 *types
 	tags = append(tags, toStartWorkflowExecutionAsyncRequestTags(sp1)...)
 	scope := common.NewPerTaskListScope(sp1.Domain, sp1.TaskList.GetName(), sp1.TaskList.GetKind(), h.metricsClient, metrics.FrontendStartWorkflowExecutionAsyncScope).Tagged(metrics.GetContextTags(ctx)...)
 	scope.IncCounter(metrics.CadenceRequestsPerTaskListWithoutRollup)
+	swPerTLStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatencyPerTaskList)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swPerTLStart))
+	}()
 	scopePerDomain := h.metricsClient.Scope(metrics.FrontendStartWorkflowExecutionAsyncScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(sp1.GetDomain()))...)
 	scopePerDomain.IncCounter(metrics.CadenceRequests)
+	swPerDomainStart := time.Now()
 	swPerDomain := scopePerDomain.StartTimer(metrics.CadenceLatency)
-	defer swPerDomain.Stop()
+	defer func() {
+		swPerDomain.Stop()
+		scopePerDomain.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swPerDomainStart))
+	}()
 	logger := h.logger.WithTags(tags...)
 
 	sp2, err = h.handler.StartWorkflowExecutionAsync(ctx, sp1)
@@ -945,8 +1053,9 @@ func (h *apiHandler) TerminateWorkflowExecution(ctx context.Context, tp1 *types.
 	tags = append(tags, toTerminateWorkflowExecutionRequestTags(tp1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendTerminateWorkflowExecutionScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(tp1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	err = h.handler.TerminateWorkflowExecution(ctx, tp1)
@@ -961,8 +1070,9 @@ func (h *apiHandler) UnpauseSchedule(ctx context.Context, up1 *types.UnpauseSche
 	tags = append(tags, toUnpauseScheduleRequestTags(up1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendUnpauseScheduleScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(up1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	up2, err = h.handler.UnpauseSchedule(ctx, up1)
@@ -976,8 +1086,9 @@ func (h *apiHandler) UpdateDomain(ctx context.Context, up1 *types.UpdateDomainRe
 	tags := []tag.Tag{tag.WorkflowHandlerName("UpdateDomain")}
 	scope := h.metricsClient.Scope(metrics.FrontendUpdateDomainScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainUnknownTag())...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	up2, err = h.handler.UpdateDomain(ctx, up1)
@@ -992,8 +1103,9 @@ func (h *apiHandler) UpdateSchedule(ctx context.Context, up1 *types.UpdateSchedu
 	tags = append(tags, toUpdateScheduleRequestTags(up1)...)
 	scope := h.metricsClient.Scope(metrics.FrontendUpdateScheduleScope).Tagged(append(metrics.GetContextTags(ctx), metrics.DomainTag(up1.GetDomain()))...)
 	scope.IncCounter(metrics.CadenceRequests)
+	swStart := time.Now()
 	sw := scope.StartTimer(metrics.CadenceLatency)
-	defer sw.Stop()
+	defer func() { sw.Stop(); scope.RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(swStart)) }()
 	logger := h.logger.WithTags(tags...)
 
 	up2, err = h.handler.UpdateSchedule(ctx, up1)
diff --git a/service/history/engine/engineimpl/get_replication_messages.go b/service/history/engine/engineimpl/get_replication_messages.go
index fda5362982f..63d9ee48c19 100644
--- a/service/history/engine/engineimpl/get_replication_messages.go
+++ b/service/history/engine/engineimpl/get_replication_messages.go
@@ -25,6 +25,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"time"
 
 	"github.com/uber/cadence/common"
 	"github.com/uber/cadence/common/log/tag"
@@ -40,8 +41,12 @@ func (e *historyEngineImpl) GetReplicationMessages(
 ) (*types.ReplicationMessages, error) {
 
 	scope := metrics.HistoryGetReplicationMessagesScope
+	replMsgStart := time.Now()
 	sw := e.metricsClient.StartTimer(scope, metrics.GetReplicationMessagesForShardLatency)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		e.metricsClient.Scope(scope).RecordHistogramDuration(metrics.GetReplicationMessagesForShardLatencyHistogram, time.Since(replMsgStart))
+	}()
 
 	replicationMessages, err := e.replicationAckManager.GetTasks(
 		ctx,
@@ -78,8 +83,12 @@ func (e *historyEngineImpl) GetDLQReplicationMessages(
 ) ([]*types.ReplicationTask, error) {
 
 	scope := metrics.HistoryGetDLQReplicationMessagesScope
+	dlqStart := time.Now()
 	sw := e.metricsClient.StartTimer(scope, metrics.GetDLQReplicationMessagesLatency)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		e.metricsClient.Scope(scope).RecordHistogramDuration(metrics.GetDLQReplicationMessagesLatencyHistogram, time.Since(dlqStart))
+	}()
 
 	tasks := make([]*types.ReplicationTask, 0, len(taskInfos))
 	for _, taskInfo := range taskInfos {
diff --git a/service/history/engine/engineimpl/query_workflow.go b/service/history/engine/engineimpl/query_workflow.go
index f03c4932e32..24e2894c5db 100644
--- a/service/history/engine/engineimpl/query_workflow.go
+++ b/service/history/engine/engineimpl/query_workflow.go
@@ -158,8 +158,12 @@ func (e *historyEngineImpl) QueryWorkflow(
 
 	// If we get here it means query could not be dispatched through matching directly, so it must block
 	// until either an result has been obtained on a decision task response or until it is safe to dispatch directly through matching.
+	decisionQueryStart := time.Now()
 	sw := scope.StartTimer(metrics.DecisionTaskQueryLatency)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scope.RecordHistogramDuration(metrics.DecisionTaskQueryLatencyHistogram, time.Since(decisionQueryStart))
+	}()
 	queryReg := mutableState.GetQueryRegistry()
 	if len(queryReg.GetBufferedIDs()) >= e.config.MaxBufferedQueryCount() {
 		scope.IncCounter(metrics.QueryBufferExceededCount)
diff --git a/service/history/execution/context.go b/service/history/execution/context.go
index 667f988081d..d5ec67d7b40 100644
--- a/service/history/execution/context.go
+++ b/service/history/execution/context.go
@@ -295,6 +295,7 @@ func (c *contextImpl) Unlock() {
 	}
 	elapsed := time.Since(c.lockTime)
 	c.metricsClient.RecordTimer(metrics.WorkflowContextScope, metrics.WorkflowContextLockLatency, elapsed)
+	c.metricsClient.Scope(metrics.WorkflowContextScope).RecordHistogramDuration(metrics.WorkflowContextLockLatencyHistogram, elapsed)
 	if elapsed > c.maxLockDuration {
 		c.maxLockDuration = elapsed
 		c.logger.Info("workflow context lock is released. this is logged only when it's longer than maxLockDuration", tag.WorkflowContextLockLatency(elapsed))
diff --git a/service/history/failover/coordinator.go b/service/history/failover/coordinator.go
index 2a15015927b..589146c72b2 100644
--- a/service/history/failover/coordinator.go
+++ b/service/history/failover/coordinator.go
@@ -327,6 +327,12 @@ func (c *coordinatorImpl) handleFailoverMarkers(
 			metrics.GracefulFailoverLatency,
 			now.Sub(time.Unix(0, marker.GetCreationTime())),
 		)
+		c.scope.Tagged(
+			metrics.DomainTag(domainName),
+		).RecordHistogramDuration(
+			metrics.GracefulFailoverLatencyHistogram,
+			now.Sub(time.Unix(0, marker.GetCreationTime())),
+		)
 		c.logger.Info("Updated domain from pending-active to active",
 			tag.WorkflowDomainName(domainName),
 			tag.FailoverVersion(marker.FailoverVersion),
diff --git a/service/history/replication/task_executor.go b/service/history/replication/task_executor.go
index a668901b20b..b355dd21e22 100644
--- a/service/history/replication/task_executor.go
+++ b/service/history/replication/task_executor.go
@@ -22,6 +22,7 @@ package replication
 
 import (
 	"context"
+	"time"
 
 	"github.com/uber/cadence/common"
 	"github.com/uber/cadence/common/cache"
@@ -130,8 +131,12 @@ func (e *taskExecutorImpl) handleActivityTask(
 		return err
 	}
 
+	replicationLatencyStart := time.Now()
 	replicationStopWatch := e.metricsClient.StartTimer(metrics.SyncActivityTaskScope, metrics.CadenceLatency)
-	defer replicationStopWatch.Stop()
+	defer func() {
+		replicationStopWatch.Stop()
+		e.metricsClient.Scope(metrics.SyncActivityTaskScope).RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(replicationLatencyStart))
+	}()
 	request := &types.SyncActivityRequest{
 		DomainID:           attr.DomainID,
 		WorkflowID:         attr.WorkflowID,
@@ -171,8 +176,12 @@ func (e *taskExecutorImpl) handleActivityTask(
 	}
 	// Handle resend error
 	e.metricsClient.IncCounter(metrics.HistoryRereplicationByActivityReplicationScope, metrics.CadenceClientRequests)
+	activityResendLatencyStart := time.Now()
 	stopwatch := e.metricsClient.StartTimer(metrics.HistoryRereplicationByActivityReplicationScope, metrics.CadenceClientLatency)
-	defer stopwatch.Stop()
+	defer func() {
+		stopwatch.Stop()
+		e.metricsClient.Scope(metrics.HistoryRereplicationByActivityReplicationScope).RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(activityResendLatencyStart))
+	}()
 
 	resendErr := e.historyResender.SendSingleWorkflowHistory(
 		e.sourceCluster,
@@ -234,8 +243,12 @@ func (e *taskExecutorImpl) handleHistoryReplicationTaskV2(
 		return err
 	}
 
+	replicationV2LatencyStart := time.Now()
 	replicationStopWatch := e.metricsClient.StartTimer(metrics.HistoryReplicationV2TaskScope, metrics.CadenceLatency)
-	defer replicationStopWatch.Stop()
+	defer func() {
+		replicationStopWatch.Stop()
+		e.metricsClient.Scope(metrics.HistoryReplicationV2TaskScope).RecordHistogramDuration(metrics.CadenceLatencyHistogram, time.Since(replicationV2LatencyStart))
+	}()
 	request := &types.ReplicateEventsV2Request{
 		DomainUUID: attr.DomainID,
 		WorkflowExecution: &types.WorkflowExecution{
@@ -268,8 +281,12 @@ func (e *taskExecutorImpl) handleHistoryReplicationTaskV2(
 		return err
 	}
 	e.metricsClient.IncCounter(metrics.HistoryRereplicationByHistoryReplicationScope, metrics.CadenceClientRequests)
+	historyResendLatencyStart := time.Now()
 	resendStopWatch := e.metricsClient.StartTimer(metrics.HistoryRereplicationByHistoryReplicationScope, metrics.CadenceClientLatency)
-	defer resendStopWatch.Stop()
+	defer func() {
+		resendStopWatch.Stop()
+		e.metricsClient.Scope(metrics.HistoryRereplicationByHistoryReplicationScope).RecordHistogramDuration(metrics.CadenceClientLatencyHistogram, time.Since(historyResendLatencyStart))
+	}()
 
 	resendErr := e.historyResender.SendSingleWorkflowHistory(
 		e.sourceCluster,
diff --git a/service/history/task/redispatcher.go b/service/history/task/redispatcher.go
index 344d9d39922..0cfc0e6c2ff 100644
--- a/service/history/task/redispatcher.go
+++ b/service/history/task/redispatcher.go
@@ -213,6 +213,7 @@ func (r *redispatcherImpl) redispatchTasks(notification redispatchNotification)
 
 	queueSize := r.sizeLocked()
 	r.metricsScope.RecordTimer(metrics.TaskRedispatchQueuePendingTasksTimer, time.Duration(queueSize))
+	r.metricsScope.IntExponentialHistogram(metrics.TaskRedispatchQueuePendingTasksHistogram, queueSize)
 
 	// add some buffer here as new tasks may be added
 	targetRedispatched := queueSize + defaultBufferSize - notification.targetSize
diff --git a/service/matching/handler/context.go b/service/matching/handler/context.go
index 7fad39797b3..615c26026db 100644
--- a/service/matching/handler/context.go
+++ b/service/matching/handler/context.go
@@ -24,6 +24,7 @@ import (
 	"context"
 	"errors"
 	"sync"
+	"time"
 
 	"github.com/uber/cadence/common"
 	cadence_errors "github.com/uber/cadence/common/errors"
@@ -55,11 +56,12 @@ func newHandlerContext(
 }
 
 // startProfiling initiates recording of request metrics
-func (reqCtx *handlerContext) startProfiling(wg *sync.WaitGroup) metrics.Stopwatch {
+func (reqCtx *handlerContext) startProfiling(wg *sync.WaitGroup) (metrics.Stopwatch, time.Time) {
 	wg.Wait()
+	start := time.Now()
 	sw := reqCtx.scope.StartTimer(metrics.CadenceLatencyPerTaskList)
 	reqCtx.scope.IncCounter(metrics.CadenceRequestsPerTaskList)
-	return sw
+	return sw, start
 }
 
 func (reqCtx *handlerContext) handleErr(err error) error {
diff --git a/service/matching/handler/engine.go b/service/matching/handler/engine.go
index 17806eb5863..6ad3b582075 100644
--- a/service/matching/handler/engine.go
+++ b/service/matching/handler/engine.go
@@ -452,6 +452,7 @@ func (e *matchingEngineImpl) AddDecisionTask(
 	}
 	if syncMatched {
 		hCtx.scope.RecordTimer(metrics.SyncMatchLatencyPerTaskList, time.Since(startT))
+		hCtx.scope.RecordHistogramDuration(metrics.SyncMatchLatencyPerTaskListHistogram, time.Since(startT))
 	}
 	return &types.AddDecisionTaskResponse{
 		PartitionConfig: tlMgr.TaskListPartitionConfig(),
@@ -528,6 +529,7 @@ func (e *matchingEngineImpl) AddActivityTask(
 	}
 	if syncMatched {
 		hCtx.scope.RecordTimer(metrics.SyncMatchLatencyPerTaskList, time.Since(startT))
+		hCtx.scope.RecordHistogramDuration(metrics.SyncMatchLatencyPerTaskListHistogram, time.Since(startT))
 	}
 	return &types.AddActivityTaskResponse{
 		PartitionConfig: tlMgr.TaskListPartitionConfig(),
@@ -1296,6 +1298,7 @@ func (e *matchingEngineImpl) createPollForDecisionTaskResponse(
 		token, _ = e.tokenSerializer.Serialize(taskToken)
 		if task.ResponseC == nil {
 			scope.RecordTimer(metrics.AsyncMatchLatencyPerTaskList, time.Since(task.Event.CreatedTime))
+			scope.RecordHistogramDuration(metrics.AsyncMatchLatencyPerTaskListHistogram, time.Since(task.Event.CreatedTime))
 		}
 	}
 
@@ -1329,6 +1332,7 @@ func (e *matchingEngineImpl) createPollForActivityTaskResponse(
 	}
 	if task.ResponseC == nil {
 		scope.RecordTimer(metrics.AsyncMatchLatencyPerTaskList, time.Since(task.Event.CreatedTime))
+		scope.RecordHistogramDuration(metrics.AsyncMatchLatencyPerTaskListHistogram, time.Since(task.Event.CreatedTime))
 	}
 
 	response := &types.MatchingPollForActivityTaskResponse{}
diff --git a/service/matching/handler/handler.go b/service/matching/handler/handler.go
index ce174392462..a6969806275 100644
--- a/service/matching/handler/handler.go
+++ b/service/matching/handler/handler.go
@@ -23,6 +23,7 @@ package handler
 import (
 	"context"
 	"sync"
+	"time"
 
 	"github.com/uber/cadence/common"
 	"github.com/uber/cadence/common/cache"
@@ -139,8 +140,11 @@ func (h *handlerImpl) AddActivityTask(
 		metrics.MatchingAddActivityTaskScope,
 	)
 
-	sw := hCtx.startProfiling(&h.startWG)
-	defer sw.Stop()
+	sw, swStart := hCtx.startProfiling(&h.startWG)
+	defer func() {
+		sw.Stop()
+		hCtx.scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swStart))
+	}()
 
 	if request.GetForwardedFrom() != "" {
 		hCtx.scope.IncCounter(metrics.ForwardedPerTaskListCounter)
@@ -169,8 +173,11 @@ func (h *handlerImpl) AddDecisionTask(
 		metrics.MatchingAddDecisionTaskScope,
 	)
 
-	sw := hCtx.startProfiling(&h.startWG)
-	defer sw.Stop()
+	sw, swStart := hCtx.startProfiling(&h.startWG)
+	defer func() {
+		sw.Stop()
+		hCtx.scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swStart))
+	}()
 
 	if request.GetForwardedFrom() != "" {
 		hCtx.scope.IncCounter(metrics.ForwardedPerTaskListCounter)
@@ -199,8 +206,11 @@ func (h *handlerImpl) PollForActivityTask(
 		metrics.MatchingPollForActivityTaskScope,
 	)
 
-	sw := hCtx.startProfiling(&h.startWG)
-	defer sw.Stop()
+	sw, swStart := hCtx.startProfiling(&h.startWG)
+	defer func() {
+		sw.Stop()
+		hCtx.scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swStart))
+	}()
 
 	if request.GetForwardedFrom() != "" {
 		hCtx.scope.IncCounter(metrics.ForwardedPerTaskListCounter)
@@ -236,8 +246,11 @@ func (h *handlerImpl) PollForDecisionTask(
 		metrics.MatchingPollForDecisionTaskScope,
 	)
 
-	sw := hCtx.startProfiling(&h.startWG)
-	defer sw.Stop()
+	sw, swStart := hCtx.startProfiling(&h.startWG)
+	defer func() {
+		sw.Stop()
+		hCtx.scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swStart))
+	}()
 
 	if request.GetForwardedFrom() != "" {
 		hCtx.scope.IncCounter(metrics.ForwardedPerTaskListCounter)
@@ -274,8 +287,11 @@ func (h *handlerImpl) QueryWorkflow(
 		metrics.MatchingQueryWorkflowScope,
 	)
 
-	sw := hCtx.startProfiling(&h.startWG)
-	defer sw.Stop()
+	sw, swStart := hCtx.startProfiling(&h.startWG)
+	defer func() {
+		sw.Stop()
+		hCtx.scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swStart))
+	}()
 
 	if request.GetForwardedFrom() != "" {
 		hCtx.scope.IncCounter(metrics.ForwardedPerTaskListCounter)
@@ -304,8 +320,11 @@ func (h *handlerImpl) RespondQueryTaskCompleted(
 		metrics.MatchingRespondQueryTaskCompletedScope,
 	)
 
-	sw := hCtx.startProfiling(&h.startWG)
-	defer sw.Stop()
+	sw, swStart := hCtx.startProfiling(&h.startWG)
+	defer func() {
+		sw.Stop()
+		hCtx.scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swStart))
+	}()
 
 	// Count the request in the RPS, but we still accept it even if RPS is exceeded
 	h.workerRateLimiter.Allow(quotas.Info{Domain: domainName})
@@ -327,8 +346,11 @@ func (h *handlerImpl) CancelOutstandingPoll(ctx context.Context,
 		metrics.MatchingCancelOutstandingPollScope,
 	)
 
-	sw := hCtx.startProfiling(&h.startWG)
-	defer sw.Stop()
+	sw, swStart := hCtx.startProfiling(&h.startWG)
+	defer func() {
+		sw.Stop()
+		hCtx.scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swStart))
+	}()
 
 	// Count the request in the RPS, but we still accept it even if RPS is exceeded
 	h.workerRateLimiter.Allow(quotas.Info{Domain: domainName})
@@ -354,8 +376,11 @@ func (h *handlerImpl) DescribeTaskList(
 		metrics.MatchingDescribeTaskListScope,
 	)
 
-	sw := hCtx.startProfiling(&h.startWG)
-	defer sw.Stop()
+	sw, swStart := hCtx.startProfiling(&h.startWG)
+	defer func() {
+		sw.Stop()
+		hCtx.scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swStart))
+	}()
 
 	if ok := h.userRateLimiter.Allow(quotas.Info{Domain: domainName}); !ok {
 		return nil, hCtx.handleErr(errMatchingHostThrottle)
@@ -381,8 +406,11 @@ func (h *handlerImpl) ListTaskListPartitions(
 		h.logger,
 	)
 
-	sw := hCtx.startProfiling(&h.startWG)
-	defer sw.Stop()
+	sw, swStart := hCtx.startProfiling(&h.startWG)
+	defer func() {
+		sw.Stop()
+		hCtx.scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swStart))
+	}()
 
 	if ok := h.userRateLimiter.Allow(quotas.Info{Domain: request.GetDomain()}); !ok {
 		return nil, hCtx.handleErr(errMatchingHostThrottle)
@@ -408,8 +436,11 @@ func (h *handlerImpl) GetTaskListsByDomain(
 		h.logger,
 	)
 
-	sw := hCtx.startProfiling(&h.startWG)
-	defer sw.Stop()
+	sw, swStart := hCtx.startProfiling(&h.startWG)
+	defer func() {
+		sw.Stop()
+		hCtx.scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swStart))
+	}()
 
 	if ok := h.userRateLimiter.Allow(quotas.Info{Domain: request.GetDomain()}); !ok {
 		return nil, hCtx.handleErr(errMatchingHostThrottle)
@@ -433,8 +464,11 @@ func (h *handlerImpl) UpdateTaskListPartitionConfig(
 		metrics.MatchingUpdateTaskListPartitionConfigScope,
 	)
 
-	sw := hCtx.startProfiling(&h.startWG)
-	defer sw.Stop()
+	sw, swStart := hCtx.startProfiling(&h.startWG)
+	defer func() {
+		sw.Stop()
+		hCtx.scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swStart))
+	}()
 
 	if ok := h.userRateLimiter.Allow(quotas.Info{Domain: domainName}); !ok {
 		return nil, hCtx.handleErr(errMatchingHostThrottle)
@@ -458,8 +492,11 @@ func (h *handlerImpl) RefreshTaskListPartitionConfig(
 		metrics.MatchingRefreshTaskListPartitionConfigScope,
 	)
 
-	sw := hCtx.startProfiling(&h.startWG)
-	defer sw.Stop()
+	sw, swStart := hCtx.startProfiling(&h.startWG)
+	defer func() {
+		sw.Stop()
+		hCtx.scope.RecordHistogramDuration(metrics.CadenceLatencyPerTaskListHistogram, time.Since(swStart))
+	}()
 
 	// Count the request in the RPS, but we still accept it even if RPS is exceeded
 	h.userRateLimiter.Allow(quotas.Info{Domain: domainName})
diff --git a/service/matching/tasklist/matcher.go b/service/matching/tasklist/matcher.go
index 2e4542400c0..e6ccff609fb 100644
--- a/service/matching/tasklist/matcher.go
+++ b/service/matching/tasklist/matcher.go
@@ -346,6 +346,7 @@ forLoop:
 			event.Log(e)
 			tm.scope.IncCounter(metrics.AsyncMatchLocalPollCounterPerTaskList)
 			tm.scope.RecordTimer(metrics.AsyncMatchLocalPollAttemptPerTaskList, time.Duration(attempt))
+			tm.scope.IntExponentialHistogram(metrics.AsyncMatchLocalPollAttemptPerTaskListHistogram, attempt)
 			tm.scope.RecordTimer(metrics.AsyncMatchLocalPollLatencyPerTaskList, time.Since(startT))
 			return nil
 		case token := <-tm.fwdrAddReqTokenC():
@@ -377,6 +378,7 @@ forLoop:
 					cancel()
 					tm.scope.IncCounter(metrics.AsyncMatchLocalPollAfterForwardFailedCounterPerTaskList)
 					tm.scope.RecordTimer(metrics.AsyncMatchLocalPollAfterForwardFailedAttemptPerTaskList, time.Duration(attempt))
+					tm.scope.IntExponentialHistogram(metrics.AsyncMatchLocalPollAfterForwardFailedAttemptPerTaskListHistogram, attempt)
 					tm.scope.RecordTimer(metrics.AsyncMatchLocalPollAfterForwardFailedLatencyPerTaskList, time.Since(startT))
 					return nil
 				case <-childCtx.Done():
@@ -391,6 +393,7 @@ forLoop:
 			event.Log(e)
 			tm.scope.IncCounter(metrics.AsyncMatchForwardPollCounterPerTaskList)
 			tm.scope.RecordTimer(metrics.AsyncMatchForwardPollAttemptPerTaskList, time.Duration(attempt))
+			tm.scope.IntExponentialHistogram(metrics.AsyncMatchForwardPollAttemptPerTaskListHistogram, attempt)
 			tm.scope.RecordTimer(metrics.AsyncMatchForwardPollLatencyPerTaskList, time.Since(startT))
 
 			// at this point, we forwarded the task to a parent partition which
@@ -438,6 +441,7 @@ func (tm *taskMatcherImpl) Poll(ctx context.Context, isolationGroup string) (*In
 	// try local match first without blocking until context timeout
 	if task, err = tm.pollNonBlocking(ctxWithCancelPropagation, isolatedTaskC, tm.taskC, tm.queryTaskC); err == nil {
 		tm.scope.RecordTimer(metrics.PollLocalMatchLatencyPerTaskList, time.Since(startT))
+		tm.scope.RecordHistogramDuration(metrics.PollLocalMatchLatencyPerTaskListHistogram, time.Since(startT))
 		return task, nil
 	}
 	// there is no local poller available to pickup this task. Now block waiting
@@ -466,6 +470,7 @@ func (tm *taskMatcherImpl) PollForQuery(ctx context.Context) (*InternalTask, err
 	// try local match first without blocking until context timeout
 	if task, err := tm.pollNonBlocking(ctx, nil, nil, tm.queryTaskC); err == nil {
 		tm.scope.RecordTimer(metrics.PollLocalMatchLatencyPerTaskList, time.Since(startT))
+		tm.scope.RecordHistogramDuration(metrics.PollLocalMatchLatencyPerTaskListHistogram, time.Since(startT))
 		return task, nil
 	}
 
@@ -498,6 +503,7 @@ func (tm *taskMatcherImpl) pollOrForward(
 			tm.scope.IncCounter(metrics.PollSuccessWithSyncPerTaskListCounter)
 		}
 		tm.scope.RecordTimer(metrics.PollLocalMatchLatencyPerTaskList, time.Since(startT))
+		tm.scope.RecordHistogramDuration(metrics.PollLocalMatchLatencyPerTaskListHistogram, time.Since(startT))
 		tm.scope.IncCounter(metrics.PollSuccessPerTaskListCounter)
 		event.Log(event.E{
 			TaskListName: tm.tasklist.GetName(),
@@ -518,6 +524,7 @@ func (tm *taskMatcherImpl) pollOrForward(
 			tm.scope.IncCounter(metrics.PollSuccessWithSyncPerTaskListCounter)
 		}
 		tm.scope.RecordTimer(metrics.PollLocalMatchLatencyPerTaskList, time.Since(startT))
+		tm.scope.RecordHistogramDuration(metrics.PollLocalMatchLatencyPerTaskListHistogram, time.Since(startT))
 		tm.scope.IncCounter(metrics.PollSuccessPerTaskListCounter)
 		event.Log(event.E{
 			TaskListName: tm.tasklist.GetName(),
@@ -559,6 +566,7 @@ func (tm *taskMatcherImpl) pollOrForward(
 		if task, err := tm.fwdr.ForwardPoll(ctx); err == nil {
 			token.release()
 			tm.scope.RecordTimer(metrics.PollForwardMatchLatencyPerTaskList, time.Since(startT))
+			tm.scope.RecordHistogramDuration(metrics.PollForwardMatchLatencyPerTaskListHistogram, time.Since(startT))
 			event.Log(event.E{
 				TaskListName: tm.tasklist.GetName(),
 				TaskListType: tm.tasklist.GetType(),
@@ -585,6 +593,7 @@ func (tm *taskMatcherImpl) poll(
 			tm.scope.IncCounter(metrics.PollSuccessWithSyncPerTaskListCounter)
 		}
 		tm.scope.RecordTimer(metrics.PollLocalMatchAfterForwardFailedLatencyPerTaskList, time.Since(startT))
+		tm.scope.RecordHistogramDuration(metrics.PollLocalMatchAfterForwardFailedLatencyPerTaskListHistogram, time.Since(startT))
 		tm.scope.IncCounter(metrics.PollSuccessPerTaskListCounter)
 		event.Log(event.E{
 			TaskListName: tm.tasklist.GetName(),
@@ -605,6 +614,7 @@ func (tm *taskMatcherImpl) poll(
 			tm.scope.IncCounter(metrics.PollSuccessWithSyncPerTaskListCounter)
 		}
 		tm.scope.RecordTimer(metrics.PollLocalMatchAfterForwardFailedLatencyPerTaskList, time.Since(startT))
+		tm.scope.RecordHistogramDuration(metrics.PollLocalMatchAfterForwardFailedLatencyPerTaskListHistogram, time.Since(startT))
 		tm.scope.IncCounter(metrics.PollSuccessPerTaskListCounter)
 		event.Log(event.E{
 			TaskListName: tm.tasklist.GetName(),
diff --git a/service/matching/tasklist/matcher_test.go b/service/matching/tasklist/matcher_test.go
index bd7323f0580..a2d332c388e 100644
--- a/service/matching/tasklist/matcher_test.go
+++ b/service/matching/tasklist/matcher_test.go
@@ -483,6 +483,8 @@ func (t *MatcherTestSuite) TestMustOfferRemoteRateLimit() {
 	scope := mocks.Scope{}
 	scope.On("IncCounter", metrics.AsyncMatchForwardTaskThrottleErrorPerTasklist)
 	scope.On("RecordTimer", mock.Anything, mock.Anything)
+	scope.On("IntExponentialHistogram", mock.Anything, mock.AnythingOfType("int")).Return().Maybe()
+	scope.On("RecordHistogramDuration", mock.Anything, mock.AnythingOfType("time.Duration")).Return().Maybe()
 	t.matcher.scope = &scope
 	completionFunc := func(*persistence.TaskInfo, error) {}
 	for i := 0; i < 5; i++ {
diff --git a/service/matching/tasklist/task_reader.go b/service/matching/tasklist/task_reader.go
index b5b7fb99ac7..8f81f91a3dc 100644
--- a/service/matching/tasklist/task_reader.go
+++ b/service/matching/tasklist/task_reader.go
@@ -479,9 +479,11 @@ func (tr *taskReader) dispatchSingleTaskFromBuffer(taskInfo *persistence.TaskInf
 	}
 	task := newInternalTask(taskInfo, tr.completeTask, types.TaskSourceDbBacklog, "", false, nil, isolationGroup)
 	dispatchCtx, cancel := tr.newDispatchContext(isolationGroup, isolationDuration)
+	asyncMatchStart := time.Now()
 	timerScope := tr.scope.StartTimer(metrics.AsyncMatchLatencyPerTaskList)
 	err := tr.dispatchTask(dispatchCtx, task)
 	timerScope.Stop()
+	tr.scope.RecordHistogramDuration(metrics.AsyncMatchLatencyPerTaskListHistogram, time.Since(asyncMatchStart))
 	cancel()
 
 	if err == nil {
diff --git a/service/worker/diagnostics/workflow.go b/service/worker/diagnostics/workflow.go
index b38f90eaa17..5857412a6cb 100644
--- a/service/worker/diagnostics/workflow.go
+++ b/service/worker/diagnostics/workflow.go
@@ -110,8 +110,12 @@ type retryIssuesResult struct {
 func (w *dw) DiagnosticsWorkflow(ctx workflow.Context, params DiagnosticsWorkflowInput) (*DiagnosticsWorkflowResult, error) {
 	scope := w.metricsClient.Scope(metrics.DiagnosticsWorkflowScope, metrics.DomainTag(params.Domain))
 	scope.IncCounter(metrics.DiagnosticsWorkflowStartedCount)
+	diagStart := workflow.Now(ctx)
 	sw := scope.StartTimer(metrics.DiagnosticsWorkflowExecutionLatency)
-	defer sw.Stop()
+	defer func() {
+		sw.Stop()
+		scope.RecordHistogramDuration(metrics.DiagnosticsWorkflowExecutionLatencyHistogram, workflow.Now(ctx).Sub(diagStart))
+	}()
 
 	var timeoutsResult *timeoutDiagnostics
 	var failureResult *failureDiagnostics
diff --git a/service/worker/indexer/esProcessor.go b/service/worker/indexer/esProcessor.go
index 172ba2096a5..1e86092edb9 100644
--- a/service/worker/indexer/esProcessor.go
+++ b/service/worker/indexer/esProcessor.go
@@ -60,6 +60,8 @@ type (
 	kafkaMessageWithMetrics struct { // value of ESProcessorImpl.mapToKafkaMsg
 		message        messaging.Message
 		swFromAddToAck *metrics.Stopwatch // metric from message add to process, to message ack/nack
+		processStart   time.Time
+		scope          metrics.Scope
 	}
 )
 
@@ -113,8 +115,9 @@ func (p *ESProcessorImpl) Add(request *bulk.GenericBulkableAddRequest, key strin
 	actionWhenFoundDuplicates := func(key interface{}, value interface{}) error {
 		return kafkaMsg.Ack()
 	}
+	esProcessStart := time.Now()
 	sw := p.scope.StartTimer(metrics.ESProcessorProcessMsgLatency)
-	mapVal := newKafkaMessageWithMetrics(kafkaMsg, &sw)
+	mapVal := newKafkaMessageWithMetrics(kafkaMsg, &sw, esProcessStart, p.scope)
 	_, isDup, _ := p.mapToKafkaMsg.PutOrDo(key, mapVal, actionWhenFoundDuplicates)
 	if isDup {
 		return
@@ -360,10 +363,12 @@ func getErrorMsgFromESResp(resp *bulk.GenericBulkResponseItem) string {
 	return errMsg
 }
 
-func newKafkaMessageWithMetrics(kafkaMsg messaging.Message, stopwatch *metrics.Stopwatch) *kafkaMessageWithMetrics {
+func newKafkaMessageWithMetrics(kafkaMsg messaging.Message, stopwatch *metrics.Stopwatch, processStart time.Time, scope metrics.Scope) *kafkaMessageWithMetrics {
 	return &kafkaMessageWithMetrics{
 		message:        kafkaMsg,
 		swFromAddToAck: stopwatch,
+		processStart:   processStart,
+		scope:          scope,
 	}
 }
 
@@ -371,6 +376,7 @@ func (km *kafkaMessageWithMetrics) Ack() {
 	km.message.Ack() // nolint:errcheck
 	if km.swFromAddToAck != nil {
 		km.swFromAddToAck.Stop()
+		km.scope.RecordHistogramDuration(metrics.ESProcessorProcessMsgLatencyHistogram, time.Since(km.processStart))
 	}
 }
 
@@ -378,5 +384,6 @@ func (km *kafkaMessageWithMetrics) Nack() {
 	km.message.Nack() //nolint:errcheck
 	if km.swFromAddToAck != nil {
 		km.swFromAddToAck.Stop()
+		km.scope.RecordHistogramDuration(metrics.ESProcessorProcessMsgLatencyHistogram, time.Since(km.processStart))
 	}
 }
diff --git a/service/worker/indexer/esProcessor_test.go b/service/worker/indexer/esProcessor_test.go
index e05db5eeb9a..c26f1425697 100644
--- a/service/worker/indexer/esProcessor_test.go
+++ b/service/worker/indexer/esProcessor_test.go
@@ -196,9 +196,10 @@ func (s *esProcessorSuite) TestBulkAfterActionX() {
 	}
 
 	mockKafkaMsg := &msgMocks.Message{}
-	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch)
+	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch, time.Now(), s.esProcessor.scope)
 	s.esProcessor.mapToKafkaMsg.Put(testKey, mapVal)
 	mockKafkaMsg.On("Ack").Return(nil).Once()
+	s.mockScope.On("RecordHistogramDuration", metrics.ESProcessorProcessMsgLatencyHistogram, mock.AnythingOfType("time.Duration")).Once()
 	s.esProcessor.bulkAfterAction(0, requests, response, nil)
 	mockKafkaMsg.AssertExpectations(s.T())
 }
@@ -232,10 +233,11 @@ func (s *esProcessorSuite) TestBulkAfterAction_Nack() {
 	payload := s.getEncodedMsg(wid, rid, domainID)
 
 	mockKafkaMsg := &msgMocks.Message{}
-	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch)
+	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch, time.Now(), s.esProcessor.scope)
 	s.esProcessor.mapToKafkaMsg.Put(testKey, mapVal)
 	mockKafkaMsg.On("Nack").Return(nil).Once()
 	mockKafkaMsg.On("Value").Return(payload).Once()
+	s.mockScope.On("RecordHistogramDuration", metrics.ESProcessorProcessMsgLatencyHistogram, mock.AnythingOfType("time.Duration")).Once()
 	// s.mockBulkProcessor.On("RetrieveKafkaKey", request, mock.Anything, mock.Anything).Return(testKey)
 	s.esProcessor.bulkAfterAction(0, requests, response, nil)
 	mockKafkaMsg.AssertExpectations(s.T())
@@ -270,11 +272,12 @@ func (s *esProcessorSuite) TestBulkAfterAction_Error() {
 	payload := s.getEncodedMsg(wid, rid, domainID)
 
 	mockKafkaMsg := &msgMocks.Message{}
-	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch)
+	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch, time.Now(), s.esProcessor.scope)
 	s.esProcessor.mapToKafkaMsg.Put(testKey, mapVal)
 	mockKafkaMsg.On("Nack").Return(nil).Once()
 	mockKafkaMsg.On("Value").Return(payload).Once()
 	s.mockScope.On("IncCounter", metrics.ESProcessorFailures).Once()
+	s.mockScope.On("RecordHistogramDuration", metrics.ESProcessorProcessMsgLatencyHistogram, mock.AnythingOfType("time.Duration")).Once()
 	s.esProcessor.bulkAfterAction(0, requests, response, &bulk.GenericError{Details: fmt.Errorf("some error")})
 }
 
@@ -307,12 +310,13 @@ func (s *esProcessorSuite) TestBulkAfterAction_Error_Nack() {
 	payload := s.getEncodedMsg(wid, rid, domainID)
 
 	mockKafkaMsg := &msgMocks.Message{}
-	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch)
+	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch, time.Now(), s.esProcessor.scope)
 	s.esProcessor.mapToKafkaMsg.Put(testKey, mapVal)
 	mockKafkaMsg.On("Nack").Return(nil).Once()
 	mockKafkaMsg.On("Ack").Return(nil).Once() // Expect Ack to be called
 	mockKafkaMsg.On("Value").Return(payload).Once()
 	s.mockScope.On("IncCounter", metrics.ESProcessorFailures).Once()
+	s.mockScope.On("RecordHistogramDuration", metrics.ESProcessorProcessMsgLatencyHistogram, mock.AnythingOfType("time.Duration")).Once()
 	s.esProcessor.bulkAfterAction(0, requests, response, &bulk.GenericError{Status: 404, Details: fmt.Errorf("some error")})
 }
 
@@ -329,6 +333,7 @@ func (s *esProcessorSuite) TestAckKafkaMsg() {
 	s.Equal(1, s.esProcessor.mapToKafkaMsg.Len())
 
 	mockKafkaMsg.On("Ack").Return(nil).Once()
+	s.mockScope.On("RecordHistogramDuration", metrics.ESProcessorProcessMsgLatencyHistogram, mock.AnythingOfType("time.Duration")).Once()
 	s.esProcessor.ackKafkaMsg(key)
 	mockKafkaMsg.AssertExpectations(s.T())
 	s.Equal(0, s.esProcessor.mapToKafkaMsg.Len())
@@ -347,6 +352,7 @@ func (s *esProcessorSuite) TestNackKafkaMsg() {
 	s.Equal(1, s.esProcessor.mapToKafkaMsg.Len())
 
 	mockKafkaMsg.On("Nack").Return(nil).Once()
+	s.mockScope.On("RecordHistogramDuration", metrics.ESProcessorProcessMsgLatencyHistogram, mock.AnythingOfType("time.Duration")).Once()
 	s.esProcessor.nackKafkaMsg(key)
 	mockKafkaMsg.AssertExpectations(s.T())
 	s.Equal(0, s.esProcessor.mapToKafkaMsg.Len())
@@ -377,7 +383,7 @@ func (s *esProcessorSuite) TestGetMsgWithInfo() {
 
 	mockKafkaMsg := &msgMocks.Message{}
 	mockKafkaMsg.On("Value").Return(payload).Once()
-	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch)
+	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch, time.Now(), s.esProcessor.scope)
 	s.esProcessor.mapToKafkaMsg.Put(testKey, mapVal)
 	wid, rid, domainID := s.esProcessor.getMsgWithInfo(testKey)
 	s.Equal(testWid, wid)
@@ -389,7 +395,7 @@ func (s *esProcessorSuite) TestGetMsgInfo_Error() {
 	testKey := "test-key"
 	mockKafkaMsg := &msgMocks.Message{}
 	mockKafkaMsg.On("Value").Return([]byte{}).Once()
-	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch)
+	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch, time.Now(), s.esProcessor.scope)
 	s.esProcessor.mapToKafkaMsg.Put(testKey, mapVal)
 	wid, rid, domainID := s.esProcessor.getMsgWithInfo(testKey)
 	s.Equal("", wid)
@@ -523,13 +529,14 @@ func (s *esProcessorSuite) TestBulkAfterAction_Nack_Shadow_WithError() {
 	payload := s.getEncodedMsg(wid, rid, domainID)
 
 	mockKafkaMsg := &msgMocks.Message{}
-	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch)
+	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch, time.Now(), s.esProcessor.scope)
 	s.esProcessor.mapToKafkaMsg.Put(testKey, mapVal)
 
 	// Mock Kafka message Nack and Value
 	mockKafkaMsg.On("Nack").Return(nil).Once()
 	mockKafkaMsg.On("Value").Return(payload).Once()
 	s.mockScope.On("IncCounter", mock.AnythingOfType("metrics.MetricIdx")).Return()
+	s.mockScope.On("RecordHistogramDuration", metrics.ESProcessorProcessMsgLatencyHistogram, mock.AnythingOfType("time.Duration")).Once()
 	// Execute bulkAfterAction for primary processor with error
 	s.esProcessor.bulkAfterAction(0, requests, response, mockErr)
 }
@@ -563,13 +570,14 @@ func (s *esProcessorSuite) TestBulkAfterAction_Shadow_Fail_WithoutError() {
 	payload := s.getEncodedMsg(wid, rid, domainID)
 
 	mockKafkaMsg := &msgMocks.Message{}
-	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch)
+	mapVal := newKafkaMessageWithMetrics(mockKafkaMsg, &testStopWatch, time.Now(), s.esProcessor.scope)
 	s.esProcessor.mapToKafkaMsg.Put(testKey, mapVal)
 
 	// Mock Kafka message Nack and Value
 	mockKafkaMsg.On("Nack").Return(nil).Once()
 	mockKafkaMsg.On("Value").Return(payload).Once()
 	s.mockScope.On("IncCounter", mock.AnythingOfType("int")).Return()
+	s.mockScope.On("RecordHistogramDuration", metrics.ESProcessorProcessMsgLatencyHistogram, mock.AnythingOfType("time.Duration")).Once()
 	// Execute bulkAfterAction for primary processor with error
 	s.esProcessor.bulkAfterAction(0, requests, response, nil)
 }
diff --git a/service/worker/indexer/indexer.go b/service/worker/indexer/indexer.go
index 72b1f37fe2d..153980327fc 100644
--- a/service/worker/indexer/indexer.go
+++ b/service/worker/indexer/indexer.go
@@ -195,9 +195,11 @@ func (i *Indexer) messageProcessLoop(workerWG *sync.WaitGroup) {
 	defer workerWG.Done()
 
 	for msg := range i.consumer.Messages() {
+		indexProcessStart := time.Now()
 		sw := i.scope.StartTimer(metrics.IndexProcessorProcessMsgLatency)
 		err := i.process(msg)
 		sw.Stop()
+		i.scope.RecordHistogramDuration(metrics.IndexProcessorProcessMsgLatencyHistogram, time.Since(indexProcessStart))
 		if err != nil {
 			msg.Nack() //nolint:errcheck
 		}