Skip to content

Commit 583928e

Browse files
authored
Allow unknown identifiers (#85)
1 parent c100f3e commit 583928e

File tree

4 files changed

+129
-7
lines changed

4 files changed

+129
-7
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
## [Unreleased] - ReleaseDate
1111
### Added
1212
- [PR#84](https://github.com/EmbarkStudios/spdx/pull/84) resolved [#67](https://github.com/EmbarkStudios/spdx/issues/67) by inling the `askalono` crate to allow detection of license texts or headers from arbitrary text data. There are multiple features flags associated with this new feature.
13+
- [PR#85](https://github.com/EmbarkStudios/spdx/pull/85) resolved [#82](https://github.com/EmbarkStudios/spdx/issues/82) by optionally allowing the parsing of unknown identifiers via `ParseMode::allow_unknown`. Unknown identifiers are either treated as `LicenseRef-<unknown identifier>` or `AdditionRef-<unknown identifier>` depending on their position. Unknown identifiers on positions that are invalid for either licenses or exceptions are still considered parse errors.
1314

1415
## [0.12.0] - 2025-08-19
1516
### Added

src/expression/parser.rs

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ impl Expression {
124124
can.push_str("AdditionRef-");
125125
can.push_str(add_ref);
126126
}
127+
Token::Unknown(_u) => unreachable!(),
127128
}
128129
}
129130

@@ -188,7 +189,7 @@ impl Expression {
188189
None | Some(Token::And | Token::Or | Token::OpenParen) => &["<license>", "("],
189190
Some(Token::CloseParen) => &["AND", "OR"],
190191
Some(Token::Exception(_) | Token::AdditionRef { .. }) => &["AND", "OR", ")"],
191-
Some(Token::Spdx(_)) => &["AND", "OR", "WITH", ")", "+"],
192+
Some(Token::Spdx(_) | Token::Unknown(_)) => &["AND", "OR", "WITH", ")", "+"],
192193
Some(Token::LicenseRef { .. } | Token::Plus) => &["AND", "OR", "WITH", ")"],
193194
Some(Token::With) => &["<addition>"],
194195
};
@@ -282,7 +283,9 @@ impl Expression {
282283
_ => return make_err_for_token(last_token, lt.span),
283284
},
284285
Token::With => match last_token {
285-
Some(Token::Spdx(_) | Token::LicenseRef { .. } | Token::Plus) => {}
286+
Some(
287+
Token::Spdx(_) | Token::LicenseRef { .. } | Token::Plus | Token::Unknown(_),
288+
) => {}
286289
_ => return make_err_for_token(last_token, lt.span),
287290
},
288291
Token::Or | Token::And => match last_token {
@@ -292,7 +295,8 @@ impl Expression {
292295
| Token::CloseParen
293296
| Token::Exception(_)
294297
| Token::AdditionRef { .. }
295-
| Token::Plus,
298+
| Token::Plus
299+
| Token::Unknown(_),
296300
) => {
297301
let new_op = match lt.token {
298302
Token::Or => Op::Or,
@@ -342,7 +346,8 @@ impl Expression {
342346
| Token::Plus
343347
| Token::Exception(_)
344348
| Token::AdditionRef { .. }
345-
| Token::CloseParen,
349+
| Token::CloseParen
350+
| Token::Unknown(_),
346351
) => {
347352
while let Some(top) = op_stack.pop() {
348353
match top.op {
@@ -387,6 +392,35 @@ impl Expression {
387392
},
388393
_ => return make_err_for_token(last_token, lt.span),
389394
},
395+
Token::Unknown(unknown) => {
396+
match last_token {
397+
None | Some(Token::And | Token::Or | Token::OpenParen) => {
398+
// This is the same position as a valid SPDX license id,
399+
// so assume that is what the user was attempting
400+
expr_queue.push(ExprNode::Req(ExpressionReq {
401+
req: LicenseReq {
402+
license: LicenseItem::Other(Box::new(LicenseRef {
403+
doc_ref: None,
404+
lic_ref: (*unknown).to_owned(),
405+
})),
406+
addition: None,
407+
},
408+
span: lt.span.start as u32..lt.span.end as u32,
409+
}));
410+
}
411+
Some(Token::With) => {
412+
let Some(ExprNode::Req(lic)) = expr_queue.last_mut() else {
413+
return make_err_for_token(last_token, lt.span);
414+
};
415+
416+
lic.req.addition = Some(AdditionItem::Other(Box::new(AdditionRef {
417+
doc_ref: None,
418+
add_ref: (*unknown).to_owned(),
419+
})));
420+
}
421+
_ => return make_err_for_token(last_token, lt.span),
422+
}
423+
}
390424
}
391425

392426
last_token = Some(lt.token);
@@ -400,7 +434,8 @@ impl Expression {
400434
| Token::Exception(_)
401435
| Token::AdditionRef { .. }
402436
| Token::CloseParen
403-
| Token::Plus,
437+
| Token::Plus
438+
| Token::Unknown(_),
404439
) => {}
405440
// We have to have at least one valid license requirement
406441
None => {

src/lexer.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@ pub struct ParseMode {
2626
/// This option just allows GPL licenses to be treated similarly to all of
2727
/// the other SPDX licenses.
2828
pub allow_postfix_plus_on_gpl: bool,
29-
/// How deprecated license identifiers are treated
29+
/// Whether deprecated license or exception identifiers are allowed
3030
pub allow_deprecated: bool,
31+
/// Whether unknown license or exception identifiers are allowed
32+
pub allow_unknown: bool,
3133
}
3234

3335
impl ParseMode {
@@ -38,11 +40,13 @@ impl ParseMode {
3840
/// case-sensitive.
3941
/// 1. `WITH`, `AND`, and `OR`, case-insensitive, are the only valid operators
4042
/// 1. Deprecated licenses are not allowed
43+
/// 1. Unknown licenses or exeptions are not allowed
4144
pub const STRICT: Self = Self {
4245
allow_slash_as_or_operator: false,
4346
allow_imprecise_license_names: false,
4447
allow_postfix_plus_on_gpl: false,
4548
allow_deprecated: false,
49+
allow_unknown: false,
4650
};
4751

4852
/// Allow non-conforming syntax for crates-io compatibility
@@ -55,11 +59,13 @@ impl ParseMode {
5559
/// 1. `/` can by used as a synonym for `OR`, and doesn't need to be
5660
/// separated by whitespace from the terms it combines
5761
/// 1. Deprecated license identifiers are allowed
62+
/// 1. Unknown licenses or exeptions are not allowed
5863
pub const LAX: Self = Self {
5964
allow_slash_as_or_operator: true,
6065
allow_imprecise_license_names: true,
6166
allow_postfix_plus_on_gpl: true,
6267
allow_deprecated: true,
68+
allow_unknown: false,
6369
};
6470
}
6571

@@ -84,6 +90,8 @@ pub enum Token<'a> {
8490
/// The name of the addition reference
8591
add_ref: &'a str,
8692
},
93+
/// An unknown license term was encountered
94+
Unknown(&'a str),
8795
/// A postfix `+` indicating "or later" for a particular SPDX license id
8896
Plus,
8997
/// A `(` for starting a group
@@ -127,6 +135,7 @@ impl Token<'_> {
127135
}) + "AdditionRef-".len()
128136
+ add_ref.len()
129137
}
138+
Token::Unknown(u) => u.len(),
130139
}
131140
}
132141
}
@@ -322,6 +331,8 @@ impl<'a> Iterator for Lexer<'a> {
322331
}
323332
{
324333
Some(Ok((Token::Spdx(lic_id), token_len)))
334+
} else if self.mode.allow_unknown {
335+
ok_token(Token::Unknown(m))
325336
} else {
326337
Some(Err(ParseError {
327338
original: self.original.to_owned(),

tests/check.rs

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#![allow(clippy::nonminimal_bool, clippy::eq_op, clippy::cognitive_complexity)]
22

3-
use spdx::LicenseItem;
3+
use spdx::{Expression, LicenseItem};
44

55
macro_rules! exact {
66
($req:expr, $e:expr) => {
@@ -559,3 +559,78 @@ fn too_many_to_minimize() {
559559
spdx::expression::MinimizeError::TooManyRequirements(65)
560560
);
561561
}
562+
563+
/// Test that we handle unknown licenses and exceptions
564+
#[test]
565+
fn handles_unknown() {
566+
const UNKNOWN: spdx::ParseMode = spdx::ParseMode {
567+
allow_deprecated: false,
568+
allow_imprecise_license_names: false,
569+
allow_postfix_plus_on_gpl: false,
570+
allow_slash_as_or_operator: false,
571+
allow_unknown: true,
572+
};
573+
574+
let single = spdx::Expression::parse_mode("sigh", UNKNOWN).unwrap();
575+
576+
fn get_reqs(e: &Expression) -> Vec<spdx::LicenseReq> {
577+
e.requirements().map(|er| er.req.clone()).collect()
578+
}
579+
580+
fn bad(s: &str) -> spdx::LicenseReq {
581+
spdx::LicenseReq {
582+
license: LicenseItem::Other(Box::new(spdx::LicenseRef {
583+
lic_ref: s.into(),
584+
doc_ref: None,
585+
})),
586+
addition: None,
587+
}
588+
}
589+
590+
assert_eq!(get_reqs(&single), vec![bad("sigh")]);
591+
592+
let compound = spdx::Expression::parse_mode("bad or MIT", UNKNOWN).unwrap();
593+
594+
assert_eq!(
595+
get_reqs(&compound),
596+
vec![
597+
bad("bad"),
598+
spdx::LicenseReq::from(spdx::license_id("MIT").unwrap())
599+
]
600+
);
601+
602+
let parens = spdx::Expression::parse_mode("(bad and Apache-2.0) or superbad", UNKNOWN).unwrap();
603+
604+
assert_eq!(
605+
get_reqs(&parens),
606+
vec![
607+
bad("bad"),
608+
spdx::LicenseReq::from(spdx::license_id("Apache-2.0").unwrap()),
609+
bad("superbad")
610+
]
611+
);
612+
613+
let exc = spdx::Expression::parse_mode(
614+
"terrible and (Apache-2.0 with even-worse or superbad)",
615+
UNKNOWN,
616+
)
617+
.unwrap();
618+
619+
assert_eq!(
620+
get_reqs(&exc),
621+
vec![
622+
bad("terrible"),
623+
spdx::LicenseReq {
624+
license: spdx::LicenseItem::Spdx {
625+
id: spdx::license_id("Apache-2.0").unwrap(),
626+
or_later: false
627+
},
628+
addition: Some(spdx::AdditionItem::Other(Box::new(spdx::AdditionRef {
629+
add_ref: "even-worse".into(),
630+
doc_ref: None,
631+
}))),
632+
},
633+
bad("superbad")
634+
]
635+
);
636+
}

0 commit comments

Comments
 (0)