-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpca.html
More file actions
411 lines (370 loc) · 24.1 KB
/
pca.html
File metadata and controls
411 lines (370 loc) · 24.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
<!DOCTYPE html>
<html lang="zh-TW">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>scRNA-seq中的PCA分析互動指南</title>
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script>
<style>
.chart-container {
position: relative;
width: 100%;
max-width: 800px;
margin-left: auto;
margin-right: auto;
height: 450px;
max-height: 500px;
}
@media (max-width: 768px) {
.chart-container { height: 350px; }
}
.fade-in { animation: fadeIn 0.5s ease-in-out; }
@keyframes fadeIn { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } }
body { scroll-behavior: smooth; }
</style>
<!-- Chosen Palette: Warm Neutrals (Stone/Slate) with Ocean Blue and Teal Accents -->
<!-- Application Structure Plan: A single-page scrolling dashboard. The structure moves logically from "The Problem" (high dimensions), to "The Concept" (interactive 3D to 2D visualization), into the "Standard Pipeline" (interactive tabs), followed by "Parameter Selection" (interactive Elbow plot), and finally "Downstream & Limits" (cards). This flow builds understanding step-by-step. -->
<!-- Visualization & Content Choices:
1. Problem -> HTML Cards with Unicode icons -> Interactive hover to reveal details -> Justification: Easy to read, cleanly separates the three main issues. Library: Tailwind/HTML.
2. Principle -> 3D Scatter Plot -> Interactive rotation and "Project to 2D" button -> Justification: PCA is a spatial transformation; 3D to 2D is the best human-comprehensible proxy for 20,000D to 50D. Library: Plotly.js.
3. Pipeline -> Horizontal/Vertical Interactive Tabs -> Click to show step details -> Justification: Avoids wall-of-text, forces user to process one step at a time. Library: Vanilla JS + Tailwind.
4. PC Selection -> Line Chart (Elbow Plot) -> Hover for variance -> Justification: The Elbow plot is the literal industry standard for this task. Library: Chart.js.
5. Downstream & Limits -> Grid layout text blocks -> Justification: Clear categorization of what comes next vs what PCA cannot do. Library: Tailwind/HTML.
Confirming NO SVG/Mermaid. -->
<!-- CONFIRMATION: NO SVG graphics used. NO Mermaid JS used. -->
</head>
<body class="bg-stone-50 text-stone-800 font-sans antialiased leading-relaxed">
<nav class="sticky top-0 z-50 bg-white/90 backdrop-blur border-b border-stone-200 shadow-sm">
<div class="max-w-6xl mx-auto px-4 sm:px-6 lg:px-8 h-16 flex items-center justify-between">
<div class="font-bold text-xl tracking-tight text-sky-800">scRNA-seq <span class="text-teal-600">PCA</span></div>
<div class="hidden md:flex space-x-8 text-sm font-medium text-stone-600">
<a href="#why" class="hover:text-sky-700 transition">為何需要 PCA?</a>
<a href="#principle" class="hover:text-sky-700 transition">核心原理</a>
<a href="#pipeline" class="hover:text-sky-700 transition">分析流程</a>
<a href="#elbow" class="hover:text-sky-700 transition">挑選 PCs</a>
<a href="#downstream" class="hover:text-sky-700 transition">後續與限制</a>
</div>
</div>
</nav>
<header class="pt-24 pb-16 px-4 sm:px-6 lg:px-8 max-w-4xl mx-auto text-center">
<div class="text-6xl mb-6">🧬 ➔ 📉</div>
<h1 class="text-4xl md:text-5xl font-extrabold tracking-tight text-stone-900 mb-6">單細胞 RNA 定序中的主成分分析 (PCA)</h1>
<p class="text-lg md:text-xl text-stone-600 mb-8 max-w-3xl mx-auto">
在處理擁有兩萬多個基因維度的單細胞數據時,PCA 是不可或缺的<strong class="text-sky-700">「資訊壓縮機」</strong>與<strong class="text-teal-600">「雜訊過濾器」</strong>。它幫助我們從混沌中提取真實的生物學訊號。
</p>
</header>
<main class="max-w-6xl mx-auto px-4 sm:px-6 lg:px-8 pb-24 space-y-32">
<section id="why" class="scroll-mt-24">
<div class="mb-12">
<h2 class="text-3xl font-bold text-stone-800 mb-4 border-b-4 border-sky-200 inline-block pb-2">1. 為什麼在 scRNA-seq 中需要 PCA?</h2>
<p class="text-stone-600 text-lg">人類或小鼠細胞具有超過 20,000 個基因,形成極度高維的數據空間。直接分析這些數據會面臨三大挑戰:</p>
</div>
<div class="grid md:grid-cols-3 gap-6">
<div class="bg-white p-8 rounded-2xl shadow-md border border-stone-100 hover:-translate-y-1 hover:shadow-lg transition duration-300">
<div class="text-4xl mb-4">🌌</div>
<h3 class="text-xl font-bold text-sky-800 mb-3">維度災難 (Curse of Dimensionality)</h3>
<p class="text-stone-600">在高維度空間中,計算細胞與細胞之間的距離(相似度)會變得非常困難且失去數學與生物學意義。</p>
</div>
<div class="bg-white p-8 rounded-2xl shadow-md border border-stone-100 hover:-translate-y-1 hover:shadow-lg transition duration-300">
<div class="text-4xl mb-4">💻</div>
<h3 class="text-xl font-bold text-sky-800 mb-3">計算資源消耗</h3>
<p class="text-stone-600">直接對兩萬多個基因進行複雜的聚類或非線性降維 (如 UMAP),需要極其龐大的記憶體和計算時間。</p>
</div>
<div class="bg-white p-8 rounded-2xl shadow-md border border-stone-100 hover:-translate-y-1 hover:shadow-lg transition duration-300">
<div class="text-4xl mb-4">🧹</div>
<h3 class="text-xl font-bold text-sky-800 mb-3">去除雜訊 (Noise Reduction)</h3>
<p class="text-stone-600">scRNA-seq 充滿技術雜訊。PCA 能提取代表「真實生物學訊號」的主要特徵,將隨機雜訊遺棄在後面的維度中。</p>
</div>
</div>
</section>
<section id="principle" class="scroll-mt-24 bg-stone-100 p-8 md:p-12 rounded-3xl">
<div class="mb-8">
<h2 class="text-3xl font-bold text-stone-800 mb-4 border-b-4 border-teal-200 inline-block pb-2">2. PCA 的基本概念與原理</h2>
<p class="text-stone-600 text-lg">PCA 是一種<strong>線性降維</strong>數學方法。核心思想:尋找能夠<strong>最大化數據變異(Variance)</strong>的方向。透過 PCA,原本 20,000 維的數據,可濃縮到前 10~50 個主成分(PCs)中。</p>
</div>
<div class="grid md:grid-cols-3 gap-8 items-center mb-8">
<div class="md:col-span-1 space-y-4">
<div class="bg-white p-4 rounded-xl shadow-sm border-l-4 border-sky-500">
<h4 class="font-bold text-sky-900">PC1 (第一主成分)</h4>
<p class="text-sm text-stone-600">捕捉數據中<strong>最大</strong>的變異來源(例如:免疫與上皮細胞的根本差異)。</p>
</div>
<div class="bg-white p-4 rounded-xl shadow-sm border-l-4 border-teal-500">
<h4 class="font-bold text-teal-900">PC2 (第二主成分)</h4>
<p class="text-sm text-stone-600">與 PC1 垂直(正交),捕捉剩餘數據中最大的變異。</p>
</div>
<div class="bg-white p-4 rounded-xl shadow-sm border-l-4 border-stone-400">
<h4 class="font-bold text-stone-700">後續 PCs...</h4>
<p class="text-sm text-stone-600">捕捉變異量遞減,越後面的 PC 越可能只包含技術雜訊。</p>
</div>
</div>
<div class="md:col-span-2">
<div class="bg-white rounded-2xl p-4 shadow-inner">
<div class="flex justify-between items-center mb-2 px-4">
<span class="text-sm font-semibold text-stone-500">互動式概念演示:高維轉低維</span>
<button id="togglePcaBtn" class="bg-sky-600 hover:bg-sky-700 text-white px-4 py-2 rounded-lg text-sm font-medium transition shadow">模擬執行 PCA 降維</button>
</div>
<div id="plotly-container" class="w-full h-[400px]"></div>
<p class="text-xs text-center text-stone-400 mt-2">提示:您可以用滑鼠旋轉或縮放上方的散佈圖。</p>
</div>
</div>
</div>
</section>
<section id="pipeline" class="scroll-mt-24">
<div class="mb-12 text-center max-w-3xl mx-auto">
<h2 class="text-3xl font-bold text-stone-800 mb-4 border-b-4 border-sky-200 inline-block pb-2">3. PCA 在標準分析流程中的位置</h2>
<p class="text-stone-600 text-lg">在 Seurat 或 Scanpy 等軟體中,PCA 發生在資料前處理之後、細胞分群之前。點擊下方步驟查看詳情:</p>
</div>
<div class="flex flex-col md:flex-row gap-6">
<div class="md:w-1/3 flex flex-col space-y-3" id="pipeline-buttons">
</div>
<div class="md:w-2/3 bg-white p-8 rounded-2xl shadow-lg border border-sky-100 flex items-center min-h-[250px]" id="pipeline-content">
</div>
</div>
</section>
<section id="elbow" class="scroll-mt-24 bg-sky-50 p-8 md:p-12 rounded-3xl">
<div class="mb-10 text-center">
<h2 class="text-3xl font-bold text-stone-800 mb-4 border-b-4 border-teal-200 inline-block pb-2">4. 如何決定要使用多少個 PCs?</h2>
<p class="text-stone-600 text-lg max-w-3xl mx-auto">保留太少會遺失罕見亞群特徵;保留太多則引入雜訊。我們通常使用 <strong>碎石圖 (Elbow Plot)</strong> 或 <strong>JackStraw 分析</strong> 來做決定。</p>
</div>
<div class="bg-white rounded-2xl p-6 shadow-md border border-stone-200">
<h3 class="text-xl font-bold text-center text-stone-700 mb-2">碎石圖 / 肘部法則 (Elbow Plot) 範例</h3>
<p class="text-sm text-center text-stone-500 mb-6">觀察曲線急劇下降後變得平緩的「轉折點」,即為建議保留的 PC 數量(通常落在 10-20 之間)。</p>
<div class="chart-container">
<canvas id="elbowChart"></canvas>
</div>
</div>
</section>
<section id="downstream" class="scroll-mt-24 mb-12">
<div class="mb-10">
<h2 class="text-3xl font-bold text-stone-800 mb-4 border-b-4 border-stone-300 inline-block pb-2">5. 後續應用 & 6. 限制</h2>
<p class="text-stone-600 text-lg">確定保留的 PCs(例如前 20 個)後,這 20 維的矩陣將成為所有高階分析的基礎。</p>
</div>
<div class="grid md:grid-cols-2 gap-8">
<div class="space-y-6">
<h3 class="text-2xl font-bold text-sky-800 flex items-center">➔ PCA 的後續應用</h3>
<div class="bg-white p-6 rounded-xl border-l-4 border-teal-500 shadow-sm">
<h4 class="font-bold text-lg mb-2">細胞聚類 (Clustering)</h4>
<p class="text-stone-600">如 Louvain 或 Leiden 演算法,會基於這幾十個 PCs 來構建 K-最近鄰圖 (KNN graph),進而將細胞分群。</p>
</div>
<div class="bg-white p-6 rounded-xl border-l-4 border-teal-500 shadow-sm">
<h4 class="font-bold text-lg mb-2">非線性降維與視覺化 (UMAP / t-SNE)</h4>
<p class="text-stone-600">UMAP/t-SNE 適合 2D 視覺化。計算起點<strong>不是</strong>原始矩陣,而是 PCA 降維後的結果。這能大幅加速運算並獲得更好視覺效果。</p>
</div>
<div class="bg-white p-6 rounded-xl border-l-4 border-teal-500 shadow-sm">
<h4 class="font-bold text-lg mb-2">軌跡推斷 (Trajectory Inference)</h4>
<p class="text-stone-600">許多發育軌跡分析工具(如 Pseudotime)也是基於 PCA 空間來計算細胞狀態的轉換。</p>
</div>
</div>
<div class="space-y-6">
<h3 class="text-2xl font-bold text-orange-700 flex items-center">⚠️ PCA 的先天限制</h3>
<div class="bg-orange-50 p-8 rounded-xl border border-orange-200 h-full flex flex-col justify-center">
<div class="text-4xl text-center mb-4 text-orange-400">📈 ➔ 🧬</div>
<h4 class="font-bold text-xl mb-4 text-orange-900 text-center">線性假設的侷限</h4>
<p class="text-orange-800 leading-relaxed text-lg">
PCA 只能捕捉變數之間的<strong>線性關係</strong>。然而,生物系統(如基因調控網路)往往是高度非線性的。
</p>
<p class="text-orange-800 leading-relaxed text-lg mt-4">
這就是為什麼我們不用 PCA 來做最終的 2D 視覺化(它無法很好地展開複雜的細胞分化流形空間),而是仰賴後續的 UMAP 或 t-SNE 進行非線性映射。
</p>
</div>
</div>
</div>
</section>
</main>
<footer class="bg-stone-900 text-stone-400 py-8 text-center">
<p>互動式 scRNA-seq 分析教學文件 • 基於標準資料科學與生物資訊學流程</p>
</footer>
<script>
document.addEventListener('DOMContentLoaded', () => {
const pipelineSteps = [
{
id: 'qc',
title: '1. 品質控制與標準化',
icon: '🔬',
desc: '<strong>(QC & Normalization)</strong><br><br>過濾掉死細胞或品質不佳的細胞(如粒線體基因比例過高),並標準化數據以消除不同細胞測序深度的影響。'
},
{
id: 'hvg',
title: '2. 尋找高變異基因',
icon: '🎯',
desc: '<strong>(Find Highly Variable Genes, HVGs)</strong><br><br>我們通常不會用全部兩萬個基因跑 PCA,而是挑選出在細胞群體間表現量差異最大的 2,000~3,000 個基因(HVGs)。這些基因包含了主要訊號。'
},
{
id: 'scale',
title: '3. 數據縮放',
icon: '⚖️',
desc: '<strong>(Scaling)</strong><br><br>將基因表現量標準化(通常是轉換為 Z-score),讓每個基因在矩陣中的權重平等。這可以避免高表現量的管家基因 (Housekeeping genes) 主導了 PCA 的結果。'
},
{
id: 'pca',
title: '4. 👉 執行 PCA',
icon: '📉',
desc: '<strong>(Run PCA)</strong><br><br>對這 2,000 個高變異基因的縮放矩陣執行線性代數運算(奇異值分解 SVD 或特徵值分解),正式計算出主成分 (PCs)。'
}
];
const pipelineBtnContainer = document.getElementById('pipeline-buttons');
const pipelineContent = document.getElementById('pipeline-content');
function renderPipelineContent(index) {
const step = pipelineSteps[index];
Array.from(pipelineBtnContainer.children).forEach((btn, i) => {
if (i === index) {
btn.className = 'text-left px-6 py-4 rounded-xl font-bold bg-sky-600 text-white shadow-md transition-all duration-200 transform scale-105';
} else {
btn.className = 'text-left px-6 py-4 rounded-xl font-medium bg-stone-100 text-stone-600 hover:bg-stone-200 transition-all duration-200';
}
});
pipelineContent.innerHTML = `
<div class="fade-in w-full">
<div class="text-5xl mb-6 text-sky-700">${step.icon}</div>
<h3 class="text-2xl font-bold text-stone-800 mb-4">${step.title}</h3>
<p class="text-lg text-stone-600 leading-relaxed">${step.desc}</p>
</div>
`;
}
pipelineSteps.forEach((step, index) => {
const btn = document.createElement('button');
btn.innerHTML = step.title;
btn.onclick = () => renderPipelineContent(index);
pipelineBtnContainer.appendChild(btn);
});
renderPipelineContent(0);
const elbowCtx = document.getElementById('elbowChart').getContext('2d');
const varianceData = [22.5, 14.2, 9.5, 6.8, 4.2, 3.5, 2.8, 2.1, 1.8, 1.5, 1.3, 1.1, 1.0, 0.9, 0.8, 0.7, 0.65, 0.6, 0.55, 0.5];
const labels = Array.from({length: 20}, (_, i) => `PC${i + 1}`);
new Chart(elbowCtx, {
type: 'line',
data: {
labels: labels,
datasets: [{
label: '解釋變異比例 (%)',
data: varianceData,
borderColor: '#0284c7',
backgroundColor: 'rgba(2, 132, 199, 0.1)',
pointBackgroundColor: '#0f766e',
pointBorderColor: '#fff',
pointHoverBackgroundColor: '#fff',
pointHoverBorderColor: '#0f766e',
borderWidth: 3,
pointRadius: 5,
pointHoverRadius: 8,
fill: true,
tension: 0.3
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
plugins: {
legend: { display: false },
tooltip: {
backgroundColor: 'rgba(28, 25, 23, 0.9)',
titleFont: { size: 14 },
bodyFont: { size: 14 },
padding: 12,
displayColors: false,
callbacks: {
label: function(context) {
return `變異比例: ${context.parsed.y}%`;
}
}
},
annotation: {
annotations: {
elbowPoint: {
type: 'point',
xValue: 'PC8',
yValue: 2.1,
backgroundColor: '#e11d48',
radius: 8,
label: {
display: true,
content: '轉折點 (Elbow)',
position: 'top',
color: '#e11d48',
font: { weight: 'bold', size: 14 }
}
}
}
}
},
scales: {
y: {
beginAtZero: true,
title: { display: true, text: '解釋變異比例 (%)', font: { size: 14, weight: 'bold' } },
grid: { color: '#f5f5f4' }
},
x: {
grid: { display: false }
}
}
}
});
function generateCluster(centerX, centerY, centerZ, count, spread) {
let x = [], y = [], z = [];
for(let i=0; i<count; i++) {
x.push(centerX + (Math.random() - 0.5) * spread);
y.push(centerY + (Math.random() - 0.5) * spread);
z.push(centerZ + (Math.random() - 0.5) * spread);
}
return {x, y, z};
}
const c1 = generateCluster(2, 2, 2, 60, 2);
const c2 = generateCluster(-2, -2, 4, 60, 2.5);
const c3 = generateCluster(3, -3, -2, 60, 1.5);
let isPCA = false;
const baseTrace = {
mode: 'markers',
marker: { size: 5, opacity: 0.8 },
type: 'scatter3d'
};
const dataHighD = [
{ ...baseTrace, x: c1.x, y: c1.y, z: c1.z, marker: { ...baseTrace.marker, color: '#0ea5e9' }, name: '細胞群 A' },
{ ...baseTrace, x: c2.x, y: c2.y, z: c2.z, marker: { ...baseTrace.marker, color: '#14b8a6' }, name: '細胞群 B' },
{ ...baseTrace, x: c3.x, y: c3.y, z: c3.z, marker: { ...baseTrace.marker, color: '#f59e0b' }, name: '細胞群 C' }
];
const layoutHighD = {
margin: { l: 0, r: 0, b: 0, t: 0 },
scene: {
xaxis: { title: '基因 X 表現量' },
yaxis: { title: '基因 Y 表現量' },
zaxis: { title: '基因 Z 表現量' }
},
paper_bgcolor: 'rgba(0,0,0,0)',
plot_bgcolor: 'rgba(0,0,0,0)',
showlegend: false
};
Plotly.newPlot('plotly-container', dataHighD, layoutHighD, {displayModeBar: false});
document.getElementById('togglePcaBtn').addEventListener('click', function() {
const btn = this;
if (!isPCA) {
const dataLowD = dataHighD.map(trace => ({
...trace,
z: trace.z.map(() => 0)
}));
const layoutLowD = {
...layoutHighD,
scene: {
xaxis: { title: 'PC1 (主成分 1)' },
yaxis: { title: 'PC2 (主成分 2)' },
zaxis: { title: '丟棄的雜訊維度', range: [-5, 5], showticklabels: false }
}
};
Plotly.react('plotly-container', dataLowD, layoutLowD);
btn.innerText = "重置為高維度資料";
btn.classList.replace('bg-sky-600', 'bg-stone-600');
btn.classList.replace('hover:bg-sky-700', 'hover:bg-stone-700');
isPCA = true;
} else {
Plotly.react('plotly-container', dataHighD, layoutHighD);
btn.innerText = "模擬執行 PCA 降維";
btn.classList.replace('bg-stone-600', 'bg-sky-600');
btn.classList.replace('hover:bg-stone-700', 'hover:bg-sky-700');
isPCA = false;
}
});
});
</script>
</body>
</html>