|
5 | 5 | <head> |
6 | 6 | <meta charset="utf-8" /> |
7 | 7 | <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
8 | | - <title>pythainlp.benchmarks.metrics — PyThaiNLP 9dfae5a documentation</title> |
| 8 | + <title>pythainlp.benchmarks.metrics — PyThaiNLP 0207d40 documentation</title> |
9 | 9 | <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=03e43079" /> |
10 | 10 | <link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=9edc463e" /> |
11 | 11 | <link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" /> |
|
14 | 14 |
|
15 | 15 | <script src="../../../_static/jquery.js?v=5d32c60e"></script> |
16 | 16 | <script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script> |
17 | | - <script src="../../../_static/documentation_options.js?v=8559df46"></script> |
| 17 | + <script src="../../../_static/documentation_options.js?v=ec1a47bb"></script> |
18 | 18 | <script src="../../../_static/doctools.js?v=fd6eb6e6"></script> |
19 | 19 | <script src="../../../_static/sphinx_highlight.js?v=6ffebe34"></script> |
20 | 20 | <script src="../../../_static/clipboard.min.js?v=a7894cd8"></script> |
@@ -246,24 +246,22 @@ <h1>Source code for pythainlp.benchmarks.metrics</h1><div class="highlight"><pre |
246 | 246 | <span class="sd"> :rtype: BleuScore</span> |
247 | 247 |
|
248 | 248 | <span class="sd"> :Example:</span> |
249 | | -<span class="sd"> ::</span> |
250 | 249 |
|
251 | | -<span class="sd"> from pythainlp.benchmarks import bleu_score</span> |
252 | | - |
253 | | -<span class="sd"> references = ["สวัสดีครับ วันนี้อากาศดีมาก"]</span> |
254 | | -<span class="sd"> hypotheses = ["สวัสดีค่ะ วันนี้อากาศดี"]</span> |
255 | | -<span class="sd"> score = bleu_score(references, hypotheses)</span> |
256 | | -<span class="sd"> print(f"BLEU score: {score['bleu']:.2f}")</span> |
257 | | - |
258 | | -<span class="sd"> ::</span> |
259 | | - |
260 | | -<span class="sd"> # Multiple references per hypothesis</span> |
261 | | -<span class="sd"> references = [</span> |
262 | | -<span class="sd"> ["สวัสดีครับ", "สวัสดีค่ะ"], # two refs for first hypothesis</span> |
263 | | -<span class="sd"> ["ลาก่อนครับ", "ลาก่อนค่ะ"], # two refs for second hypothesis</span> |
264 | | -<span class="sd"> ]</span> |
265 | | -<span class="sd"> hypotheses = ["สวัสดี", "ลาก่อน"]</span> |
266 | | -<span class="sd"> score = bleu_score(references, hypotheses)</span> |
| 250 | +<span class="sd"> >>> from pythainlp.benchmarks import bleu_score</span> |
| 251 | + |
| 252 | +<span class="sd"> >>> references = ["สวัสดีครับ วันนี้อากาศดีมาก"]</span> |
| 253 | +<span class="sd"> >>> hypotheses = ["สวัสดีค่ะ วันนี้อากาศดี"]</span> |
| 254 | +<span class="sd"> >>> score = bleu_score(references, hypotheses)</span> |
| 255 | +<span class="sd"> >>> print(f"BLEU score: {score['bleu']:.2f}")</span> |
| 256 | +<span class="sd"> BLEU score: 28.12</span> |
| 257 | + |
| 258 | +<span class="sd"> >>> # Multiple references per hypothesis</span> |
| 259 | +<span class="sd"> >>> references = [</span> |
| 260 | +<span class="sd"> ... ["สวัสดีครับ", "สวัสดีค่ะ"], # two refs for first hypothesis</span> |
| 261 | +<span class="sd"> ... ["ลาก่อนครับ", "ลาก่อนค่ะ"], # two refs for second hypothesis</span> |
| 262 | +<span class="sd"> ... ]</span> |
| 263 | +<span class="sd"> >>> hypotheses = ["สวัสดี", "ลาก่อน"]</span> |
| 264 | +<span class="sd"> >>> score = bleu_score(references, hypotheses)</span> |
267 | 265 | <span class="sd"> """</span> |
268 | 266 | <span class="kn">from</span><span class="w"> </span><span class="nn">pythainlp.tokenize</span><span class="w"> </span><span class="kn">import</span> <span class="n">word_tokenize</span> |
269 | 267 |
|
@@ -400,16 +398,18 @@ <h1>Source code for pythainlp.benchmarks.metrics</h1><div class="highlight"><pre |
400 | 398 | <span class="sd"> :rtype: dict[str, RougeScore]</span> |
401 | 399 |
|
402 | 400 | <span class="sd"> :Example:</span> |
403 | | -<span class="sd"> ::</span> |
404 | | - |
405 | | -<span class="sd"> from pythainlp.benchmarks import rouge_score</span> |
406 | 401 |
|
407 | | -<span class="sd"> reference = "สวัสดีครับ วันนี้อากาศดีมาก"</span> |
408 | | -<span class="sd"> hypothesis = "สวัสดีค่ะ วันนี้อากาศดี"</span> |
409 | | -<span class="sd"> scores = rouge_score(reference, hypothesis)</span> |
410 | | -<span class="sd"> print(f"ROUGE-1 F-measure: {scores['rouge1']['fmeasure']:.4f}")</span> |
411 | | -<span class="sd"> print(f"ROUGE-2 F-measure: {scores['rouge2']['fmeasure']:.4f}")</span> |
412 | | -<span class="sd"> print(f"ROUGE-L F-measure: {scores['rougeL']['fmeasure']:.4f}")</span> |
| 402 | +<span class="sd"> >>> from pythainlp.benchmarks import rouge_score</span> |
| 403 | + |
| 404 | +<span class="sd"> >>> reference = "สวัสดีครับ วันนี้อากาศดีมาก"</span> |
| 405 | +<span class="sd"> >>> hypothesis = "สวัสดีค่ะ วันนี้อากาศดี"</span> |
| 406 | +<span class="sd"> >>> scores = rouge_score(reference, hypothesis)</span> |
| 407 | +<span class="sd"> >>> print(f"ROUGE-1 F-measure: {scores['rouge1']['fmeasure']:.4f}")</span> |
| 408 | +<span class="sd"> ROUGE-1 F-measure: 0.6000</span> |
| 409 | +<span class="sd"> >>> print(f"ROUGE-2 F-measure: {scores['rouge2']['fmeasure']:.4f}")</span> |
| 410 | +<span class="sd"> ROUGE-2 F-measure: 0.2500</span> |
| 411 | +<span class="sd"> >>> print(f"ROUGE-L F-measure: {scores['rougeL']['fmeasure']:.4f}")</span> |
| 412 | +<span class="sd"> ROUGE-L F-measure: 0.6000</span> |
413 | 413 | <span class="sd"> """</span> |
414 | 414 | <span class="kn">from</span><span class="w"> </span><span class="nn">pythainlp.tokenize</span><span class="w"> </span><span class="kn">import</span> <span class="n">word_tokenize</span> |
415 | 415 |
|
@@ -512,14 +512,14 @@ <h1>Source code for pythainlp.benchmarks.metrics</h1><div class="highlight"><pre |
512 | 512 | <span class="sd"> :rtype: float</span> |
513 | 513 |
|
514 | 514 | <span class="sd"> :Example:</span> |
515 | | -<span class="sd"> ::</span> |
516 | 515 |
|
517 | | -<span class="sd"> from pythainlp.benchmarks import word_error_rate</span> |
| 516 | +<span class="sd"> >>> from pythainlp.benchmarks import word_error_rate</span> |
518 | 517 |
|
519 | | -<span class="sd"> reference = "สวัสดีครับ วันนี้อากาศดีมาก"</span> |
520 | | -<span class="sd"> hypothesis = "สวัสดีค่ะ วันนี้อากาศดี"</span> |
521 | | -<span class="sd"> wer = word_error_rate(reference, hypothesis)</span> |
522 | | -<span class="sd"> print(f"WER: {wer:.4f}")</span> |
| 518 | +<span class="sd"> >>> reference = "สวัสดีครับ วันนี้อากาศดีมาก"</span> |
| 519 | +<span class="sd"> >>> hypothesis = "สวัสดีค่ะ วันนี้อากาศดี"</span> |
| 520 | +<span class="sd"> >>> wer = word_error_rate(reference, hypothesis)</span> |
| 521 | +<span class="sd"> >>> print(f"WER: {wer:.4f}")</span> |
| 522 | +<span class="sd"> WER: 0.4000</span> |
523 | 523 | <span class="sd"> """</span> |
524 | 524 | <span class="kn">from</span><span class="w"> </span><span class="nn">pythainlp.tokenize</span><span class="w"> </span><span class="kn">import</span> <span class="n">word_tokenize</span> |
525 | 525 |
|
@@ -593,14 +593,14 @@ <h1>Source code for pythainlp.benchmarks.metrics</h1><div class="highlight"><pre |
593 | 593 | <span class="sd"> :rtype: float</span> |
594 | 594 |
|
595 | 595 | <span class="sd"> :Example:</span> |
596 | | -<span class="sd"> ::</span> |
597 | 596 |
|
598 | | -<span class="sd"> from pythainlp.benchmarks import character_error_rate</span> |
| 597 | +<span class="sd"> >>> from pythainlp.benchmarks import character_error_rate</span> |
599 | 598 |
|
600 | | -<span class="sd"> reference = "สวัสดีครับ"</span> |
601 | | -<span class="sd"> hypothesis = "สวัสดีค่ะ"</span> |
602 | | -<span class="sd"> cer = character_error_rate(reference, hypothesis)</span> |
603 | | -<span class="sd"> print(f"CER: {cer:.4f}")</span> |
| 599 | +<span class="sd"> >>> reference = "สวัสดีครับ"</span> |
| 600 | +<span class="sd"> >>> hypothesis = "สวัสดีค่ะ"</span> |
| 601 | +<span class="sd"> >>> cer = character_error_rate(reference, hypothesis)</span> |
| 602 | +<span class="sd"> >>> print(f"CER: {cer:.4f}")</span> |
| 603 | +<span class="sd"> CER: 0.3000</span> |
604 | 604 | <span class="sd"> """</span> |
605 | 605 | <span class="c1"># Work with characters directly (no tokenization needed)</span> |
606 | 606 | <span class="n">ref_chars</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">reference</span><span class="p">)</span> |
|
0 commit comments