|
4 | 4 | "metadata": {
|
5 | 5 | "category_depth": 0,
|
6 | 6 | "filename": "example.pdf",
|
| 7 | + "filetype": "text/html", |
| 8 | + "languages": [ |
| 9 | + "eng" |
| 10 | + ], |
7 | 11 | "page_number": 1,
|
8 | 12 | "parent_id": "897a8a47377c4ad6aab839a929879537",
|
9 | 13 | "text_as_html": "<div class=\"Page\" data-page-number=\"1\" id=\"3a6b156a81764e17be128264241f8136\" />"
|
|
16 | 20 | "metadata": {
|
17 | 21 | "category_depth": 1,
|
18 | 22 | "filename": "example.pdf",
|
| 23 | + "filetype": "text/html", |
| 24 | + "languages": [ |
| 25 | + "eng" |
| 26 | + ], |
19 | 27 | "page_number": 1,
|
20 | 28 | "parent_id": "3a6b156a81764e17be128264241f8136",
|
21 | 29 | "text_as_html": "<header class=\"Header\" id=\"45b3d0053468484ba1c7b53998115412\" />"
|
|
28 | 36 | "metadata": {
|
29 | 37 | "category_depth": 2,
|
30 | 38 | "filename": "example.pdf",
|
| 39 | + "filetype": "text/html", |
| 40 | + "languages": [ |
| 41 | + "eng" |
| 42 | + ], |
31 | 43 | "page_number": 1,
|
32 | 44 | "parent_id": "45b3d0053468484ba1c7b53998115412",
|
33 |
| - "text_as_html": "<h1 class=\"Title\" id=\"c95473e8a3704fc2b418697f9fddb27b\">Header </h1>" |
| 45 | + "text_as_html": "<h1 class=\"Title\" id=\"c95473e8a3704fc2b418697f9fddb27b\">Header</h1>" |
34 | 46 | },
|
35 | 47 | "text": "Header",
|
36 | 48 | "type": "Title"
|
|
40 | 52 | "metadata": {
|
41 | 53 | "category_depth": 2,
|
42 | 54 | "filename": "example.pdf",
|
| 55 | + "filetype": "text/html", |
| 56 | + "languages": [ |
| 57 | + "eng" |
| 58 | + ], |
43 | 59 | "page_number": 1,
|
44 | 60 | "parent_id": "45b3d0053468484ba1c7b53998115412",
|
45 |
| - "text_as_html": "<time class=\"CalendarDate\" id=\"379cbfdc16d44bd6a59e6cfabe6438d5\">Date: October 30, 2023 </time>" |
| 61 | + "text_as_html": "<time class=\"CalendarDate\" id=\"379cbfdc16d44bd6a59e6cfabe6438d5\">Date: October 30, 2023</time>" |
46 | 62 | },
|
47 | 63 | "text": "Date: October 30, 2023",
|
48 | 64 | "type": "UncategorizedText"
|
|
52 | 68 | "metadata": {
|
53 | 69 | "category_depth": 1,
|
54 | 70 | "filename": "example.pdf",
|
| 71 | + "filetype": "text/html", |
| 72 | + "languages": [ |
| 73 | + "eng" |
| 74 | + ], |
55 | 75 | "page_number": 1,
|
56 | 76 | "parent_id": "3a6b156a81764e17be128264241f8136",
|
57 |
| - "text_as_html": "<form class=\"Form\" id=\"637c2f6935fb4353a5f73025ce04619d\"> <label class=\"FormField\" for=\"company-name\" id=\"50027cccbe1948c9853ce0de37b635c2\">From field name </label><input class=\"FormFieldValue\" id=\"0032242af75c4b37984ea7fea9aac74c\" value=\"Example value\" /></form>" |
| 77 | + "text_as_html": "<form class=\"Form\" id=\"637c2f6935fb4353a5f73025ce04619d\"><label class=\"FormField\" for=\"company-name\" id=\"50027cccbe1948c9853ce0de37b635c2\">From field name</label><input class=\"FormFieldValue\" id=\"0032242af75c4b37984ea7fea9aac74c\" value=\"Example value\" /></form>" |
58 | 78 | },
|
59 | 79 | "text": "From field name Example value",
|
60 | 80 | "type": "UncategorizedText"
|
|
64 | 84 | "metadata": {
|
65 | 85 | "category_depth": 1,
|
66 | 86 | "filename": "example.pdf",
|
| 87 | + "filetype": "text/html", |
| 88 | + "languages": [ |
| 89 | + "eng" |
| 90 | + ], |
67 | 91 | "page_number": 1,
|
68 | 92 | "parent_id": "3a6b156a81764e17be128264241f8136",
|
69 | 93 | "text_as_html": "<section class=\"Section\" id=\"592422373ed741b68a077e2003f8ed81\" />"
|
|
76 | 100 | "metadata": {
|
77 | 101 | "category_depth": 2,
|
78 | 102 | "filename": "example.pdf",
|
| 103 | + "filetype": "text/html", |
| 104 | + "languages": [ |
| 105 | + "eng" |
| 106 | + ], |
79 | 107 | "page_number": 1,
|
80 | 108 | "parent_id": "592422373ed741b68a077e2003f8ed81",
|
81 |
| - "text_as_html": "<table class=\"Table\" id=\"dc3792d4422e444f90876b56d0cfb20d\"> <thead> <tr> <th>Description</th><th>Row header</th></tr></thead><tbody> <tr> <td>Value description</td><td>50 $ (1.32 %)</td></tr></tbody></table>" |
| 109 | + "text_as_html": "<table class=\"Table\" id=\"dc3792d4422e444f90876b56d0cfb20d\"><thead><tr><th>Description</th><th>Row header</th></tr></thead><tbody><tr><td>Value description</td><td><span>50 $</span><span>(1.32 %)</span></td></tr></tbody></table>" |
82 | 110 | },
|
83 | 111 | "text": "Description Row header Value description 50 $ (1.32 %)",
|
84 | 112 | "type": "Table"
|
|
88 | 116 | "metadata": {
|
89 | 117 | "category_depth": 1,
|
90 | 118 | "filename": "example.pdf",
|
| 119 | + "filetype": "text/html", |
| 120 | + "languages": [ |
| 121 | + "eng" |
| 122 | + ], |
91 | 123 | "page_number": 1,
|
92 | 124 | "parent_id": "3a6b156a81764e17be128264241f8136",
|
93 | 125 | "text_as_html": "<section class=\"Section\" id=\"1032242af75c4b37984ea7fea9aac74c\" />"
|
|
100 | 132 | "metadata": {
|
101 | 133 | "category_depth": 2,
|
102 | 134 | "filename": "example.pdf",
|
| 135 | + "filetype": "text/html", |
| 136 | + "languages": [ |
| 137 | + "eng" |
| 138 | + ], |
103 | 139 | "page_number": 1,
|
104 | 140 | "parent_id": "1032242af75c4b37984ea7fea9aac74c",
|
105 |
| - "text_as_html": "<h2 class=\"Subtitle\" id=\"2a4e2c4a689f4f9a8c180b6b521e45c3\">2. Subtitle </h2>" |
| 141 | + "text_as_html": "<h2 class=\"Subtitle\" id=\"2a4e2c4a689f4f9a8c180b6b521e45c3\">2. Subtitle</h2>" |
106 | 142 | },
|
107 | 143 | "text": "2. Subtitle",
|
108 | 144 | "type": "Title"
|
|
112 | 148 | "metadata": {
|
113 | 149 | "category_depth": 2,
|
114 | 150 | "filename": "example.pdf",
|
| 151 | + "filetype": "text/html", |
| 152 | + "languages": [ |
| 153 | + "eng" |
| 154 | + ], |
115 | 155 | "page_number": 1,
|
116 | 156 | "parent_id": "1032242af75c4b37984ea7fea9aac74c",
|
117 |
| - "text_as_html": "<p class=\"NarrativeText\" id=\"5591f7a4df01447e82515ce45f686fbe\">Paragraph text </p>" |
| 157 | + "text_as_html": "<p class=\"NarrativeText\" id=\"5591f7a4df01447e82515ce45f686fbe\">Paragraph text</p>" |
118 | 158 | },
|
119 | 159 | "text": "Paragraph text",
|
120 | 160 | "type": "NarrativeText"
|
|
0 commit comments