Skip to content

Commit fb54ac3

Browse files
committed
feat: add pipeline retry dashboard for failed sectors
Add a new 'Failed Sectors' page to the Curio web UI that shows sectors which have failed during the sealing pipeline, with retry details. Backend (web/api/webrpc/pipeline_failed.go): - New FailedSectorDetail struct with pipeline stage booleans and failure info - New PipelineFailedSectors RPC method querying sectors_sdr_pipeline WHERE failed = true, ordered by failed_at DESC, limited to 100 Frontend (web/static/pages/pipeline_failed/): - index.html: page shell using curio-ux wrapper - pipeline-failed.mjs: Lit component with 10s auto-refresh showing: - Miner ID, sector number, failure timestamp, last completed stage - Failure reason and details (truncated with tooltip) - Sector age since creation - Color-coded rows: red for <1h, orange for <24h failures - Green success message when no sectors have failed Navigation (web/static/ux/curio-ux.mjs): - Added 'Failed Sectors' nav item with warning triangle icon after PoRep
1 parent a7dae22 commit fb54ac3

File tree

4 files changed

+280
-0
lines changed

4 files changed

+280
-0
lines changed

web/api/webrpc/pipeline_failed.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package webrpc
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"golang.org/x/xerrors"
8+
)
9+
10+
type FailedSectorDetail struct {
11+
SpID int64 `db:"sp_id" json:"SpID"`
12+
SectorNumber int64 `db:"sector_number" json:"SectorNumber"`
13+
CreateTime time.Time `db:"create_time" json:"CreateTime"`
14+
FailedAt NullTime `db:"failed_at" json:"FailedAt"`
15+
FailedReason string `db:"failed_reason" json:"FailedReason"`
16+
FailedReasonMsg string `db:"failed_reason_msg" json:"FailedReasonMsg"`
17+
18+
// Pipeline stage info - which stage was reached
19+
AfterSDR bool `db:"after_sdr" json:"AfterSDR"`
20+
AfterTreeD bool `db:"after_tree_d" json:"AfterTreeD"`
21+
AfterTreeC bool `db:"after_tree_c" json:"AfterTreeC"`
22+
AfterTreeR bool `db:"after_tree_r" json:"AfterTreeR"`
23+
AfterPrecommitMsg bool `db:"after_precommit_msg" json:"AfterPrecommitMsg"`
24+
AfterPrecommitMsgSuccess bool `db:"after_precommit_msg_success" json:"AfterPrecommitMsgSuccess"`
25+
AfterPorep bool `db:"after_porep" json:"AfterPorep"`
26+
AfterFinalize bool `db:"after_finalize" json:"AfterFinalize"`
27+
AfterMoveStorage bool `db:"after_move_storage" json:"AfterMoveStorage"`
28+
AfterCommitMsg bool `db:"after_commit_msg" json:"AfterCommitMsg"`
29+
AfterCommitMsgSuccess bool `db:"after_commit_msg_success" json:"AfterCommitMsgSuccess"`
30+
}
31+
32+
func (a *WebRPC) PipelineFailedSectors(ctx context.Context) ([]FailedSectorDetail, error) {
33+
var result []FailedSectorDetail
34+
35+
err := a.deps.DB.Select(ctx, &result, `
36+
SELECT sp_id, sector_number, create_time, failed_at, failed_reason, failed_reason_msg,
37+
after_sdr, after_tree_d, after_tree_c, after_tree_r,
38+
after_precommit_msg, after_precommit_msg_success,
39+
after_porep, after_finalize, after_move_storage,
40+
after_commit_msg, after_commit_msg_success
41+
FROM sectors_sdr_pipeline
42+
WHERE failed = true
43+
ORDER BY failed_at DESC NULLS LAST
44+
LIMIT 100`)
45+
if err != nil {
46+
return nil, xerrors.Errorf("failed to fetch failed sectors: %w", err)
47+
}
48+
49+
return result, nil
50+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<title>Curio Failed Sectors</title>
5+
<script src="https://unpkg.com/htmx.org@1.9.5" integrity="sha384-xcuj3WpfgjlKF+FXhSQFQ0ZNr39ln+hwjN3npfM9VBnUskLolQAcN80McRIVOPuO" crossorigin="anonymous"></script>
6+
<script type="module" src="/ux/curio-ux.mjs"></script>
7+
<script type="module" src="/ux/components/Drawer.mjs"></script>
8+
<script type="module" src="/chain-connectivity.mjs"></script>
9+
<script type="module" src="pipeline-failed.mjs"></script>
10+
<link rel="stylesheet" href="/ux/main.css">
11+
</head>
12+
<body style="visibility: hidden">
13+
<curio-ux>
14+
<div class="page" style="margin-left: 20px; margin-right: 10px">
15+
<div class="app-head">
16+
<div class="head-left">
17+
<h1>Failed Sectors</h1>
18+
</div>
19+
<hr/>
20+
</div>
21+
<div class="row">
22+
<div class="row-md-auto" style="width: 100%">
23+
<div class="info-block">
24+
<pipeline-failed-sectors></pipeline-failed-sectors>
25+
</div>
26+
</div>
27+
</div>
28+
</div>
29+
</curio-ux>
30+
</body>
31+
</html>
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
import { LitElement, html, css } from 'https://cdn.jsdelivr.net/gh/lit/dist@3/all/lit-all.min.js';
2+
import RPCCall from '/lib/jsonrpc.mjs';
3+
4+
class PipelineFailedSectors extends LitElement {
5+
static properties = {
6+
data: { type: Array },
7+
loading: { type: Boolean },
8+
};
9+
10+
constructor() {
11+
super();
12+
this.data = [];
13+
this.loading = true;
14+
this.loadData();
15+
}
16+
17+
async loadData() {
18+
try {
19+
const result = await RPCCall('PipelineFailedSectors');
20+
this.data = result || [];
21+
} catch (e) {
22+
console.error('Failed to load failed sectors:', e);
23+
this.data = [];
24+
}
25+
this.loading = false;
26+
this.requestUpdate();
27+
setTimeout(() => this.loadData(), 10000);
28+
}
29+
30+
static styles = css`
31+
:host {
32+
color: #d0d0d0;
33+
}
34+
.count-header {
35+
font-size: 1.2em;
36+
margin-bottom: 1em;
37+
}
38+
.count-header .count {
39+
font-weight: bold;
40+
color: #ff6b6b;
41+
}
42+
.success-message {
43+
background: rgba(75, 181, 67, 0.15);
44+
border: 1px solid rgba(75, 181, 67, 0.4);
45+
border-radius: 8px;
46+
padding: 2em;
47+
text-align: center;
48+
font-size: 1.3em;
49+
color: #4BB543;
50+
}
51+
.row-recent {
52+
background-color: rgba(180, 40, 40, 0.3) !important;
53+
}
54+
.row-day {
55+
background-color: rgba(180, 120, 40, 0.2) !important;
56+
}
57+
.stage-badge {
58+
display: inline-block;
59+
padding: 2px 8px;
60+
border-radius: 4px;
61+
font-size: 0.85em;
62+
font-weight: 600;
63+
background: #444;
64+
color: #eee;
65+
}
66+
.details-cell {
67+
max-width: 200px;
68+
overflow: hidden;
69+
text-overflow: ellipsis;
70+
white-space: nowrap;
71+
cursor: help;
72+
}
73+
.reason-cell {
74+
font-family: monospace;
75+
font-size: 0.9em;
76+
}
77+
`;
78+
79+
computeStage(sector) {
80+
// Return the LAST completed stage
81+
if (sector.AfterCommitMsgSuccess) return 'CommitMsgSuccess';
82+
if (sector.AfterCommitMsg) return 'CommitMsg';
83+
if (sector.AfterMoveStorage) return 'MoveStorage';
84+
if (sector.AfterFinalize) return 'Finalize';
85+
if (sector.AfterPorep) return 'PoRep';
86+
if (sector.AfterPrecommitMsgSuccess) return 'PrecommitMsgSuccess';
87+
if (sector.AfterPrecommitMsg) return 'PrecommitMsg';
88+
if (sector.AfterTreeR) return 'TreeR';
89+
if (sector.AfterTreeC) return 'TreeC';
90+
if (sector.AfterTreeD) return 'TreeD';
91+
if (sector.AfterSDR) return 'SDR';
92+
return 'New';
93+
}
94+
95+
formatAge(dateStr) {
96+
if (!dateStr) return '--';
97+
const date = new Date(dateStr);
98+
if (isNaN(date.getTime()) || date.getFullYear() < 2000) return '--';
99+
const now = Date.now();
100+
const diff = now - date.getTime();
101+
if (diff < 0) return 'just now';
102+
103+
const minutes = Math.floor(diff / 60000);
104+
const hours = Math.floor(diff / 3600000);
105+
const days = Math.floor(diff / 86400000);
106+
107+
if (minutes < 1) return 'just now';
108+
if (minutes < 60) return `${minutes}m ago`;
109+
if (hours < 24) return `${hours}h ago`;
110+
if (days < 30) return `${days}d ago`;
111+
return `${Math.floor(days / 30)}mo ago`;
112+
}
113+
114+
formatTimestamp(dateStr) {
115+
if (!dateStr) return '--';
116+
const date = new Date(dateStr);
117+
if (isNaN(date.getTime()) || date.getFullYear() < 2000) return '--';
118+
return date.toLocaleString();
119+
}
120+
121+
getRowClass(sector) {
122+
const failedAt = new Date(sector.FailedAt);
123+
if (isNaN(failedAt.getTime()) || failedAt.getFullYear() < 2000) return '';
124+
const diff = Date.now() - failedAt.getTime();
125+
if (diff < 3600000) return 'row-recent'; // <1h
126+
if (diff < 86400000) return 'row-day'; // <24h
127+
return '';
128+
}
129+
130+
truncate(str, len) {
131+
if (!str) return '';
132+
if (str.length <= len) return str;
133+
return str.substring(0, len) + '…';
134+
}
135+
136+
render() {
137+
if (this.loading) {
138+
return html`<div>Loading...</div>`;
139+
}
140+
141+
if (!this.data || this.data.length === 0) {
142+
return html`
143+
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
144+
<link rel="stylesheet" href="/ux/main.css" onload="document.body.style.visibility = 'initial'">
145+
<div class="success-message">No failed sectors 🎉</div>
146+
`;
147+
}
148+
149+
return html`
150+
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
151+
<link rel="stylesheet" href="/ux/main.css" onload="document.body.style.visibility = 'initial'">
152+
153+
<div class="count-header">
154+
<span class="count">${this.data.length}</span> Failed Sector${this.data.length !== 1 ? 's' : ''}
155+
</div>
156+
157+
<table class="table table-dark table-hover">
158+
<thead>
159+
<tr>
160+
<th>Miner</th>
161+
<th>Sector #</th>
162+
<th>Failed At</th>
163+
<th>Stage</th>
164+
<th>Reason</th>
165+
<th>Details</th>
166+
<th>Age</th>
167+
</tr>
168+
</thead>
169+
<tbody>
170+
${this.data.map(sector => this.renderRow(sector))}
171+
</tbody>
172+
</table>
173+
`;
174+
}
175+
176+
renderRow(sector) {
177+
return html`
178+
<tr class="${this.getRowClass(sector)}">
179+
<td>f0${sector.SpID}</td>
180+
<td>${sector.SectorNumber}</td>
181+
<td style="white-space: nowrap">${this.formatTimestamp(sector.FailedAt)}</td>
182+
<td><span class="stage-badge">${this.computeStage(sector)}</span></td>
183+
<td class="reason-cell">${sector.FailedReason || '--'}</td>
184+
<td class="details-cell" title="${sector.FailedReasonMsg || ''}">${this.truncate(sector.FailedReasonMsg, 100) || '--'}</td>
185+
<td style="white-space: nowrap">${this.formatAge(sector.CreateTime)}</td>
186+
</tr>
187+
`;
188+
}
189+
}
190+
191+
customElements.define('pipeline-failed-sectors', PipelineFailedSectors);

web/static/ux/curio-ux.mjs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,14 @@ class CurioUX extends LitElement {
243243
<span>PoRep</span>
244244
</a>
245245
</li>
246+
<li>
247+
<a href="/pages/pipeline_failed/" class="nav-link text-white ${active=='/pages/pipeline_failed/'? 'active':''}">
248+
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi me-2" viewBox="0 0 16 16">
249+
<path d="M8.982 1.566a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767zM8 5c.535 0 .954.462.9.995l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995A.905.905 0 0 1 8 5m.002 6a1 1 0 1 1 0 2 1 1 0 0 1 0-2"/>
250+
</svg>
251+
<span>Failed Sectors</span>
252+
</a>
253+
</li>
246254
<li>
247255
<a href="/snap/" class="nav-link text-white ${active=='/snap/'? 'active':''}">
248256
<svg class="bi me-2 bi-backpack" xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" viewBox="0 0 16 16">

0 commit comments

Comments
 (0)