Skip to content

Commit f3ca590

Browse files
committed
setup ci
1 parent 7d7a157 commit f3ca590

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1189
-671
lines changed

.github/workflows/ci.yml

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: [ main, master ]
6+
pull_request:
7+
branches: [ main, master ]
8+
9+
env:
10+
CARGO_TERM_COLOR: always
11+
12+
jobs:
13+
test:
14+
name: Test
15+
runs-on: ubuntu-latest
16+
strategy:
17+
matrix:
18+
rust:
19+
- stable
20+
- beta
21+
- nightly
22+
steps:
23+
- uses: actions/checkout@v4
24+
- uses: dtolnay/rust-toolchain@master
25+
with:
26+
toolchain: ${{ matrix.rust }}
27+
- uses: Swatinem/rust-cache@v2
28+
- name: Run tests
29+
run: cargo test --all-features
30+
31+
fmt:
32+
name: Rustfmt
33+
runs-on: ubuntu-latest
34+
steps:
35+
- uses: actions/checkout@v4
36+
- uses: dtolnay/rust-toolchain@stable
37+
with:
38+
components: rustfmt
39+
- name: Enforce formatting
40+
run: cargo fmt --check
41+
42+
clippy:
43+
name: Clippy
44+
runs-on: ubuntu-latest
45+
steps:
46+
- uses: actions/checkout@v4
47+
- uses: dtolnay/rust-toolchain@stable
48+
with:
49+
components: clippy
50+
- uses: Swatinem/rust-cache@v2
51+
- name: Linting
52+
run: cargo clippy --all-features --all-targets -- -D warnings
53+
54+
security_audit:
55+
name: Security audit
56+
runs-on: ubuntu-latest
57+
steps:
58+
- uses: actions/checkout@v4
59+
- uses: rustsec/[email protected]
60+
with:
61+
token: ${{ secrets.GITHUB_TOKEN }}
62+
63+
check:
64+
name: Check
65+
runs-on: ubuntu-latest
66+
steps:
67+
- uses: actions/checkout@v4
68+
- uses: dtolnay/rust-toolchain@stable
69+
- uses: Swatinem/rust-cache@v2
70+
- name: Check
71+
run: cargo check --all-features

.github/workflows/docs.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: Documentation
2+
3+
on:
4+
push:
5+
branches: [ main, master ]
6+
pull_request:
7+
branches: [ main, master ]
8+
9+
env:
10+
CARGO_TERM_COLOR: always
11+
12+
jobs:
13+
docs:
14+
name: Documentation
15+
runs-on: ubuntu-latest
16+
steps:
17+
- uses: actions/checkout@v4
18+
- uses: dtolnay/rust-toolchain@stable
19+
- uses: Swatinem/rust-cache@v2
20+
- name: Build documentation
21+
run: cargo doc --no-deps --all-features
22+
- name: Check documentation links
23+
run: cargo doc --no-deps --all-features
24+
env:
25+
RUSTDOCFLAGS: "-D warnings"

.github/workflows/release.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
name: Release
2+
3+
on:
4+
push:
5+
tags:
6+
- 'v*.*.*'
7+
8+
env:
9+
CARGO_TERM_COLOR: always
10+
11+
jobs:
12+
create_release:
13+
name: Create Release
14+
runs-on: ubuntu-latest
15+
outputs:
16+
upload_url: ${{ steps.create_release.outputs.upload_url }}
17+
steps:
18+
- name: Create Release
19+
id: create_release
20+
uses: actions/create-release@v1
21+
env:
22+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
23+
with:
24+
tag_name: ${{ github.ref }}
25+
release_name: Release ${{ github.ref }}
26+
draft: false
27+
prerelease: false
28+
29+
test:
30+
name: Test before release
31+
runs-on: ubuntu-latest
32+
steps:
33+
- uses: actions/checkout@v4
34+
- uses: dtolnay/rust-toolchain@stable
35+
- uses: Swatinem/rust-cache@v2
36+
- name: Run tests
37+
run: cargo test --all-features
38+
- name: Check formatting
39+
run: cargo fmt --check
40+
- name: Run clippy
41+
run: cargo clippy --all-features --all-targets -- -D warnings
42+
43+
publish:
44+
name: Publish to crates.io
45+
runs-on: ubuntu-latest
46+
needs: [create_release, test]
47+
steps:
48+
- uses: actions/checkout@v4
49+
- uses: dtolnay/rust-toolchain@stable
50+
- uses: Swatinem/rust-cache@v2
51+
- name: Login to crates.io
52+
run: cargo login ${{ secrets.CRATES_IO_TOKEN }}
53+
- name: Publish to crates.io
54+
run: cargo publish --all-features

Cargo.toml

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,20 @@ name = "hwpers"
33
version = "0.1.0"
44
edition = "2021"
55
authors = ["HWP Parser Contributors"]
6-
description = "A Rust library for parsing Hangul Word Processor (HWP) files"
6+
description = "A Rust library for parsing Korean Hangul Word Processor (HWP) files with full layout rendering support"
77
license = "MIT OR Apache-2.0"
88
repository = "https://github.com/yourusername/hwpers"
9-
keywords = ["hwp", "parser", "hangul", "document"]
10-
categories = ["parser-implementations", "text-processing"]
9+
documentation = "https://docs.rs/hwpers"
10+
homepage = "https://github.com/yourusername/hwpers"
11+
readme = "README.md"
12+
keywords = ["hwp", "parser", "hangul", "document", "korean"]
13+
categories = ["parser-implementations", "text-processing", "rendering"]
14+
include = [
15+
"src/**/*",
16+
"Cargo.toml",
17+
"README.md",
18+
"LICENSE-*",
19+
]
1120

1221
[dependencies]
1322
cfb = "0.9"

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2024 HWP Parser Contributors
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
# hwpers
2+
3+
[![Crates.io](https://img.shields.io/crates/v/hwpers.svg)](https://crates.io/crates/hwpers)
4+
[![Documentation](https://docs.rs/hwpers/badge.svg)](https://docs.rs/hwpers)
5+
[![CI](https://github.com/yourusername/hwpers/workflows/CI/badge.svg)](https://github.com/yourusername/hwpers/actions)
6+
[![License](https://img.shields.io/badge/license-MIT%2FApache--2.0-blue.svg)](LICENSE-MIT)
7+
8+
A Rust library for parsing Korean Hangul Word Processor (HWP) files with full layout rendering support.
9+
10+
## Features
11+
12+
- **Complete HWP 5.0 Format Support**: Parse all document components including text, formatting, tables, and embedded objects
13+
- **Visual Layout Rendering**: Reconstruct documents with pixel-perfect accuracy when layout data is available
14+
- **Font and Style Preservation**: Extract and apply original fonts, sizes, colors, and text formatting
15+
- **Advanced Layout Engine**: Support for multi-column layouts, line-by-line positioning, and character-level formatting
16+
- **SVG Export**: Render documents to scalable vector graphics
17+
- **Zero-copy Parsing**: Efficient parsing with minimal memory allocation
18+
- **Safe Rust**: Memory-safe implementation with comprehensive error handling
19+
20+
## Quick Start
21+
22+
Add this to your `Cargo.toml`:
23+
24+
```toml
25+
[dependencies]
26+
hwpers = "0.1"
27+
```
28+
29+
### Basic Usage
30+
31+
```rust
32+
use hwpers::HwpReader;
33+
34+
// Parse an HWP file
35+
let document = HwpReader::from_file("document.hwp")?;
36+
37+
// Extract text content
38+
let text = document.extract_text();
39+
println!("{}", text);
40+
41+
// Access document properties
42+
if let Some(props) = document.get_properties() {
43+
println!("Pages: {}", props.total_page_count);
44+
}
45+
46+
// Iterate through sections and paragraphs
47+
for (i, section) in document.sections().enumerate() {
48+
println!("Section {}: {} paragraphs", i, section.paragraphs.len());
49+
50+
for paragraph in &section.paragraphs {
51+
if let Some(text) = &paragraph.text {
52+
println!(" {}", text.content);
53+
}
54+
}
55+
}
56+
```
57+
58+
### Visual Layout Rendering
59+
60+
```rust
61+
use hwpers::{HwpReader, render::{HwpRenderer, RenderOptions}};
62+
63+
let document = HwpReader::from_file("document.hwp")?;
64+
65+
// Create renderer with custom options
66+
let options = RenderOptions {
67+
dpi: 96,
68+
scale: 1.0,
69+
show_margins: false,
70+
show_baselines: false,
71+
};
72+
73+
let renderer = HwpRenderer::new(&document, options);
74+
let result = renderer.render();
75+
76+
// Export first page to SVG
77+
if let Some(svg) = result.to_svg(0) {
78+
std::fs::write("page1.svg", svg)?;
79+
}
80+
81+
println!("Rendered {} pages", result.pages.len());
82+
```
83+
84+
### Advanced Formatting Access
85+
86+
```rust
87+
// Access character and paragraph formatting
88+
for section in document.sections() {
89+
for paragraph in &section.paragraphs {
90+
// Get paragraph formatting
91+
if let Some(para_shape) = document.get_para_shape(paragraph.para_shape_id as usize) {
92+
println!("Indent: {}, Alignment: {}",
93+
para_shape.indent,
94+
para_shape.get_alignment()
95+
);
96+
}
97+
98+
// Get character formatting runs
99+
if let Some(char_shapes) = &paragraph.char_shapes {
100+
for pos_shape in &char_shapes.char_positions {
101+
if let Some(char_shape) = document.get_char_shape(pos_shape.char_shape_id as usize) {
102+
println!("Position {}: Size {}, Bold: {}",
103+
pos_shape.position,
104+
char_shape.base_size / 100,
105+
char_shape.is_bold()
106+
);
107+
}
108+
}
109+
}
110+
}
111+
}
112+
```
113+
114+
## Supported Features
115+
116+
### Document Structure
117+
- ✅ File header and version detection
118+
- ✅ Document properties and metadata
119+
- ✅ Section definitions and page layout
120+
- ✅ Paragraph and character formatting
121+
- ✅ Font definitions (FaceName)
122+
- ✅ Styles and templates
123+
124+
### Content Types
125+
- ✅ Text content with full Unicode support
126+
- ✅ Tables and structured data
127+
- ✅ Control objects (images, OLE objects)
128+
- ✅ Numbering and bullet lists
129+
- ✅ Tab stops and alignment
130+
131+
### Layout and Rendering
132+
- ✅ Page dimensions and margins
133+
- ✅ Multi-column layouts
134+
- ✅ Line-by-line positioning (when available)
135+
- ✅ Character-level positioning (when available)
136+
- ✅ Borders and fill patterns
137+
- ✅ SVG export with accurate positioning
138+
139+
### Advanced Features
140+
- ✅ Compressed document support
141+
- ✅ CFB (Compound File Binary) format handling
142+
- ✅ Multiple encoding support (UTF-16LE)
143+
- ✅ Error recovery and partial parsing
144+
145+
## Command Line Tool
146+
147+
The library includes a command-line tool for inspecting HWP files:
148+
149+
```bash
150+
# Install the tool
151+
cargo install hwpers
152+
153+
# Inspect an HWP file
154+
hwp_info document.hwp
155+
```
156+
157+
## Format Support
158+
159+
This library supports HWP 5.0 format files. For older HWP formats, consider using format conversion tools first.
160+
161+
## Contributing
162+
163+
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
164+
165+
## License
166+
167+
This project is licensed under either of
168+
169+
- Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
170+
- MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
171+
172+
at your option.
173+
174+
## Acknowledgments
175+
176+
- HWP file format specification by Hancom Inc.
177+
- Korean text processing community
178+
- Rust parsing and document processing ecosystem

0 commit comments

Comments
 (0)