From fcc5bee7154d7f5bda832f0fd1091a4b1247a679 Mon Sep 17 00:00:00 2001 From: ShootingStarDragons Date: Mon, 14 Apr 2025 22:08:22 +0900 Subject: [PATCH 01/12] feat: bump uris , bitflags to the newest --- Cargo.toml | 8 ++++---- src/lib.rs | 1 + src/uri.rs | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ae08a87..ef5d58c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,11 +15,11 @@ keywords = ["language", "server", "lsp", "vscode", "lsif"] license = "MIT" [dependencies] -bitflags = "1.0.1" -serde = { version = "1.0.34", features = ["derive"] } -serde_json = "1.0.50" +bitflags = "2.9.0" +serde = { version = "1.0.219", features = ["derive"] } +serde_json = "1.0.140" serde_repr = "0.1" -fluent-uri = "0.1.4" +fluent-uri = "0.3.2" [features] default = [] diff --git a/src/lib.rs b/src/lib.rs index 6ffeab5..1250036 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2386,6 +2386,7 @@ pub struct RelativePattern { pub type Pattern = String; bitflags! { +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] pub struct WatchKind: u8 { /// Interested in create events. const Create = 1; diff --git a/src/uri.rs b/src/uri.rs index 6f82d58..35d3597 100644 --- a/src/uri.rs +++ b/src/uri.rs @@ -21,9 +21,9 @@ impl<'de> Deserialize<'de> for Uri { D: serde::Deserializer<'de>, { let string = String::deserialize(deserializer)?; - fluent_uri::Uri::::parse_from(string) + fluent_uri::Uri::::parse(string) .map(Uri) - .map_err(|(_, error)| Error::custom(error.to_string())) + .map_err(|err| Error::custom(err.to_string())) } } @@ -40,7 +40,7 @@ impl PartialOrd for Uri { } impl FromStr for Uri { - type Err = fluent_uri::ParseError; + type Err = fluent_uri::error::ParseError; fn from_str(s: &str) -> Result { // TOUCH-UP: From 4d9a9d8dcb83cd6b5302e84228d67b6256c85798 Mon Sep 17 00:00:00 2001 From: ShootingStarDragons Date: Mon, 14 Apr 2025 22:11:41 +0900 Subject: [PATCH 02/12] chore: to 2024 --- Cargo.toml | 2 +- src/inline_value.rs | 4 ++-- src/lib.rs | 2 +- src/notification.rs | 2 +- src/request.rs | 2 +- src/uri.rs | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ef5d58c..ac1f455 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "lsp-types" version = "0.97.0" authors = ["Markus Westerlind ", "Bruno Medeiros "] -edition = "2018" +edition = "2024" description = "Types for interaction with a language server, using VSCode's Language Server Protocol" repository = "https://github.com/gluon-lang/lsp-types" diff --git a/src/inline_value.rs b/src/inline_value.rs index dd29fbb..ae137e9 100644 --- a/src/inline_value.rs +++ b/src/inline_value.rs @@ -132,7 +132,7 @@ pub struct InlineValueEvaluatableExpression { /// - directly as a text value (class InlineValueText). /// - as a name to use for a variable lookup (class InlineValueVariableLookup) /// - as an evaluatable expression (class InlineValueEvaluatableExpression) -/// The InlineValue types combines all inline value types into one type. +/// The InlineValue types combines all inline value types into one type. /// /// @since 3.17.0 #[derive(Debug, Eq, PartialEq, Clone, Deserialize, Serialize)] @@ -184,8 +184,8 @@ pub struct InlineValueWorkspaceClientCapabilities { #[cfg(test)] mod tests { use super::*; - use crate::tests::test_serialization; use crate::Position; + use crate::tests::test_serialization; #[test] fn inline_values() { diff --git a/src/lib.rs b/src/lib.rs index 1250036..cf4d388 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,7 @@ extern crate bitflags; use std::{collections::HashMap, fmt::Debug}; -use serde::{de, de::Error, Deserialize, Serialize}; +use serde::{Deserialize, Serialize, de, de::Error}; use serde_json::Value; pub use uri::Uri; diff --git a/src/notification.rs b/src/notification.rs index ccf712d..25e1e10 100644 --- a/src/notification.rs +++ b/src/notification.rs @@ -1,6 +1,6 @@ use super::*; -use serde::{de::DeserializeOwned, Serialize}; +use serde::{Serialize, de::DeserializeOwned}; pub trait Notification { type Params: DeserializeOwned + Serialize + Send + Sync + 'static; diff --git a/src/request.rs b/src/request.rs index 5464513..ad37256 100644 --- a/src/request.rs +++ b/src/request.rs @@ -1,6 +1,6 @@ use super::*; -use serde::{de::DeserializeOwned, Serialize}; +use serde::{Serialize, de::DeserializeOwned}; pub trait Request { type Params: DeserializeOwned + Serialize + Send + Sync + 'static; diff --git a/src/uri.rs b/src/uri.rs index 35d3597..e65f9d9 100644 --- a/src/uri.rs +++ b/src/uri.rs @@ -1,6 +1,6 @@ use std::{hash::Hash, ops::Deref, str::FromStr}; -use serde::{de::Error, Deserialize, Serialize}; +use serde::{Deserialize, Serialize, de::Error}; /// Newtype struct around `fluent_uri::Uri` with serialization implementations that use `as_str()` and 'from_str()' respectively. #[derive(Debug, Clone)] From 39d2bb0290afb6c5cff5dcfecac871b5c2ddc36d Mon Sep 17 00:00:00 2001 From: ShootingStarDragons Date: Mon, 14 Apr 2025 22:23:10 +0900 Subject: [PATCH 03/12] chore: add path operator --- src/uri.rs | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 137 insertions(+), 1 deletion(-) diff --git a/src/uri.rs b/src/uri.rs index e65f9d9..105ca11 100644 --- a/src/uri.rs +++ b/src/uri.rs @@ -1,6 +1,7 @@ -use std::{hash::Hash, ops::Deref, str::FromStr}; +use std::{borrow::Cow, hash::Hash, ops::Deref, str::FromStr}; use serde::{Deserialize, Serialize, de::Error}; +use std::path::{Path, PathBuf}; /// Newtype struct around `fluent_uri::Uri` with serialization implementations that use `as_str()` and 'from_str()' respectively. #[derive(Debug, Clone)] @@ -78,3 +79,138 @@ impl Hash for Uri { self.as_str().hash(state) } } + +#[cfg(not(windows))] +pub use std::fs::canonicalize as strict_canonicalize; + +/// On Windows, rewrites the wide path prefix `\\?\C:` to `C:` +/// Source: https://stackoverflow.com/a/70970317 +#[inline] +#[cfg(windows)] +fn strict_canonicalize>(path: P) -> std::io::Result { + use std::io; + + fn impl_(path: PathBuf) -> std::io::Result { + let head = path + .components() + .next() + .ok_or(io::Error::new(io::ErrorKind::Other, "empty path"))?; + let disk_; + let head = if let std::path::Component::Prefix(prefix) = head { + if let std::path::Prefix::VerbatimDisk(disk) = prefix.kind() { + disk_ = format!("{}:", disk as char); + Path::new(&disk_).components().next().ok_or(io::Error::new( + io::ErrorKind::Other, + "failed to parse disk component", + ))? + } else { + head + } + } else { + head + }; + Ok(std::iter::once(head) + .chain(path.components().skip(1)) + .collect()) + } + + let canon = std::fs::canonicalize(path)?; + impl_(canon) +} + +impl Uri { + pub fn to_file_path(&self) -> Option> { + let path = match self.path().decode().into_string_lossy() { + Cow::Borrowed(ref_) => Cow::Borrowed(Path::new(ref_)), + Cow::Owned(owned) => Cow::Owned(PathBuf::from(owned)), + }; + + if cfg!(windows) { + let authority = self.authority().expect("url has no authority component"); + let host = authority.host(); + if host.is_empty() { + // very high chance this is a `file:///` uri + // in which case the path will include a leading slash we need to remove + let host = path.to_string_lossy(); + let host = &host[1..]; + return Some(Cow::Owned(PathBuf::from(host))); + } + + let host = format!("{host}:"); + Some(Cow::Owned( + Path::new(&host) + .components() + .chain(path.components()) + .collect(), + )) + } else { + Some(path) + } + } + + pub fn from_file_path>(path: A) -> Option { + let path = path.as_ref(); + + let fragment = if path.is_absolute() { + Cow::Borrowed(path) + } else { + match strict_canonicalize(path) { + Ok(path) => Cow::Owned(path), + Err(_) => return None, + } + }; + + let raw_uri = if cfg!(windows) { + // we want to parse a triple-slash path for Windows paths + // it's a shorthand for `file://localhost/C:/Windows` with the `localhost` omitted + format!("file:///{}", fragment.to_string_lossy().replace("\\", "/")) + } else { + format!("file://{}", fragment.to_string_lossy()) + }; + + Uri::from_str(&raw_uri).ok() + } +} + +#[cfg(test)] +mod tests { + use super::strict_canonicalize; + use super::Uri; + use std::path::Path; + + #[test] + #[cfg(windows)] + fn test_idempotent_canonicalization() { + let lhs = strict_canonicalize(Path::new(".")).unwrap(); + let rhs = strict_canonicalize(&lhs).unwrap(); + assert_eq!(lhs, rhs); + } + + #[test] + fn test_path_roundtrip_conversion() { + let src = strict_canonicalize(Path::new(".")).unwrap(); + let conv = Uri::from_file_path(&src).unwrap(); + let roundtrip = conv.to_file_path().unwrap(); + assert_eq!(src, roundtrip, "conv={conv:?}",); + } + + #[test] + #[cfg(windows)] + fn test_windows_uri_roundtrip_conversion() { + use std::str::FromStr; + + let uri = Uri::from_str("file:///C:/Windows").unwrap(); + let path = uri.to_file_path().unwrap(); + assert_eq!(&path, Path::new("C:/Windows"), "uri={uri:?}"); + + let conv = Uri::from_file_path(&path).unwrap(); + + assert_eq!( + uri, + conv, + "path={path:?} left={} right={}", + uri.as_str(), + conv.as_str() + ); + } +} From 82bb3be4f816a751e5c3707ddabe0d2a9f090207 Mon Sep 17 00:00:00 2001 From: ShootingStarDragons Date: Mon, 14 Apr 2025 22:33:47 +0900 Subject: [PATCH 04/12] chore: ci fix --- .github/workflows/rust.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 3b5207f..98b5021 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -13,7 +13,7 @@ jobs: build: strategy: matrix: - rust: [stable, nightly] + rust: [stable] features: ["", proposed] env: @@ -23,17 +23,21 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: actions-rs/toolchain@v1 + - uses: dtolnay/rust-toolchain@stable with: - profile: minimal - toolchain: ${{ matrix.rust }} - override: true + components: clippy - name: Build run: cargo build + - name: Fmt + run: cargo fmt --all -- --check + + - name: clippy + run: cargo clippy --all-targets -- -D warnings + - name: Run tests run: > FEATURES=${{ matrix.features }} From 147416043db65708f8d339f72c9d58f29d274326 Mon Sep 17 00:00:00 2001 From: ShootingStarDragons Date: Mon, 14 Apr 2025 22:35:29 +0900 Subject: [PATCH 05/12] chore: name changes --- Cargo.toml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ac1f455..efffea0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,15 @@ [package] -name = "lsp-types" +name = "lsp-types-f" version = "0.97.0" -authors = ["Markus Westerlind ", "Bruno Medeiros "] +authors = [ + "Markus Westerlind ", + "Bruno Medeiros ", + "Decodertalkers ", +] edition = "2024" description = "Types for interaction with a language server, using VSCode's Language Server Protocol" -repository = "https://github.com/gluon-lang/lsp-types" +repository = "https://github.com/neocmakelsp/lsp-types" documentation = "https://docs.rs/lsp-types" readme = "README.md" From 2a602a98746a745a4a86e84e71082d20b758d55d Mon Sep 17 00:00:00 2001 From: ShootingStarDragons Date: Mon, 14 Apr 2025 22:36:26 +0900 Subject: [PATCH 06/12] chore: fmt fix --- src/uri.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uri.rs b/src/uri.rs index 105ca11..abb7ea2 100644 --- a/src/uri.rs +++ b/src/uri.rs @@ -174,8 +174,8 @@ impl Uri { #[cfg(test)] mod tests { - use super::strict_canonicalize; use super::Uri; + use super::strict_canonicalize; use std::path::Path; #[test] From 1ddf5c063b006efc4ba4c475dc7fc1a09d565f82 Mon Sep 17 00:00:00 2001 From: ShootingStarDragons Date: Mon, 14 Apr 2025 23:09:15 +0900 Subject: [PATCH 07/12] chore: ci fix --- src/completion.rs | 1 + tests/lsif.rs | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/completion.rs b/src/completion.rs index bdf60fb..51b55e0 100644 --- a/src/completion.rs +++ b/src/completion.rs @@ -571,6 +571,7 @@ pub struct CompletionItemLabelDetails { pub description: Option, } +#[allow(clippy::field_reassign_with_default)] #[cfg(test)] mod tests { use super::*; diff --git a/tests/lsif.rs b/tests/lsif.rs index 3b90c8b..253679c 100644 --- a/tests/lsif.rs +++ b/tests/lsif.rs @@ -1,12 +1,13 @@ #[test] #[cfg(unix)] fn run() { - use lsp_types::lsif::Entry; + use lsp_types_f::lsif::Entry; let jsonl = include_str!("tsc-unix.lsif"); for json in jsonl.lines() { - let r = serde_json::from_str::(&json).expect(&format!("can not parse {}", json)); - let x = serde_json::to_string(&r).expect(&format!("can not serialize {}", json)); + let r = serde_json::from_str::(json) + .unwrap_or_else(|_| panic!("can not parse {}", json)); + let x = serde_json::to_string(&r).unwrap_or_else(|_| panic!("can not serialize {}", json)); assert_eq!( serde_json::from_str::(&x).unwrap(), serde_json::from_str::(json).unwrap(), From 8b16d4b9f7b18aba43c06fc2246c684abf6234c5 Mon Sep 17 00:00:00 2001 From: ShootingStarDragons Date: Mon, 14 Apr 2025 23:10:25 +0900 Subject: [PATCH 08/12] chore: udpate README --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b5641f5..78d8d2f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# lsp-types [![Build Status](https://travis-ci.org/gluon-lang/lsp-types.svg?branch=master)](https://travis-ci.org/gluon-lang/lsp-types) [![Documentation](https://docs.rs/lsp-types/badge.svg)](https://docs.rs/crate/lsp-types) +# lsp-types-f [![Build Status](https://travis-ci.org/neocmakelsp/lsp-types.svg?branch=master)](https://travis-ci.org/neocmakelsp/lsp-types) [![Documentation](https://docs.rs/lsp-types-f/badge.svg)](https://docs.rs/crate/lsp-types) Types useful for interacting with a [language server](https://code.visualstudio.com/blogs/2016/06/27/common-language-protocol). @@ -7,6 +7,9 @@ Supports Language Server Protocol (LSP) version 3.16.0. Proposed 3.17 features can be activated using the `proposed` feature flag. - **NOTE** that these are unstable and may change between releases. +## NOTE +Fork for neocmakelsp + ## Contributing If you are making a change which adds, removes or modifies the LSP API it is highly appreciated if you link to the spec where this change is described. This gives context to whether the change should be an experimental addition and lets the reviewer double check the changes easily against the spec. From c1ef279a21bfca96d02e9b4f7a5f609fb287442f Mon Sep 17 00:00:00 2001 From: ShootingStarDragons Date: Mon, 14 Apr 2025 23:11:28 +0900 Subject: [PATCH 09/12] chore: add more ci --- .github/dependabot.yml | 33 +++++++++++++++++++++++++++++++++ .github/workflows/typos.yml | 19 +++++++++++++++++++ _typos.toml | 2 ++ 3 files changed, 54 insertions(+) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/typos.yml create mode 100644 _typos.toml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..2193598 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,33 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +# docs +# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file +version: 2 +updates: + - package-ecosystem: "cargo" + directory: "/" + schedule: + interval: "weekly" + # We release on Tuesdays and open dependabot PRs will rebase after the + # version bump and thus consume unnecessary workers during release, thus + # let's open new ones on Wednesday + day: "wednesday" + ignore: + - dependency-name: "*" + update-types: ["version-update:semver-patch"] + groups: + # Only update polars as a whole as there are many subcrates that need to + # be updated at once. We explicitly depend on some of them, so batch their + # updates to not take up dependabot PR slots with dysfunctional PRs + polars: + patterns: + - "polars" + - "polars-*" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + day: "wednesday" diff --git a/.github/workflows/typos.yml b/.github/workflows/typos.yml new file mode 100644 index 0000000..ae63733 --- /dev/null +++ b/.github/workflows/typos.yml @@ -0,0 +1,19 @@ +--- +# yamllint disable rule:line-length +name: check_typos + +on: # yamllint disable-line rule:truthy + push: + pull_request: + branches: + - '**' + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: typos-action + uses: crate-ci/typos@v1.31.1 diff --git a/_typos.toml b/_typos.toml new file mode 100644 index 0000000..78e656c --- /dev/null +++ b/_typos.toml @@ -0,0 +1,2 @@ +[default.extend-words] +typ = "typ" From d6b0ae08976631a871479d3d57787541c93ddd06 Mon Sep 17 00:00:00 2001 From: ShootingStarDragons Date: Mon, 14 Apr 2025 23:12:53 +0900 Subject: [PATCH 10/12] chore: tag 0.97.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index efffea0..accd80c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lsp-types-f" -version = "0.97.0" +version = "0.97.1" authors = [ "Markus Westerlind ", "Bruno Medeiros ", From 6ca739ac1c9f20e11dfe6cab7e80dddb770daa9c Mon Sep 17 00:00:00 2001 From: ShootingStarDragons Date: Mon, 14 Apr 2025 23:33:55 +0900 Subject: [PATCH 11/12] chore: induce thiserror --- Cargo.toml | 3 ++- src/uri.rs | 21 ++++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index accd80c..2ddd0e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lsp-types-f" -version = "0.97.1" +version = "0.98.0" authors = [ "Markus Westerlind ", "Bruno Medeiros ", @@ -24,6 +24,7 @@ serde = { version = "1.0.219", features = ["derive"] } serde_json = "1.0.140" serde_repr = "0.1" fluent-uri = "0.3.2" +thiserror = "2.0.12" [features] default = [] diff --git a/src/uri.rs b/src/uri.rs index abb7ea2..348e6f2 100644 --- a/src/uri.rs +++ b/src/uri.rs @@ -118,33 +118,44 @@ fn strict_canonicalize>(path: P) -> std::io::Result { impl_(canon) } +#[derive(thiserror::Error, Debug)] +pub enum UriError { + #[error("scheme wrong")] + SchemeWrong, + #[error("url has no authority component")] + AuthorityWrong, +} + impl Uri { - pub fn to_file_path(&self) -> Option> { + pub fn to_file_path(&self) -> Result, UriError> { + if self.scheme().as_str().to_lowercase() != "file" { + return Err(UriError::SchemeWrong); + } let path = match self.path().decode().into_string_lossy() { Cow::Borrowed(ref_) => Cow::Borrowed(Path::new(ref_)), Cow::Owned(owned) => Cow::Owned(PathBuf::from(owned)), }; if cfg!(windows) { - let authority = self.authority().expect("url has no authority component"); + let authority = self.authority().ok_or(UriError::AuthorityWrong)?; let host = authority.host(); if host.is_empty() { // very high chance this is a `file:///` uri // in which case the path will include a leading slash we need to remove let host = path.to_string_lossy(); let host = &host[1..]; - return Some(Cow::Owned(PathBuf::from(host))); + return Ok(Cow::Owned(PathBuf::from(host))); } let host = format!("{host}:"); - Some(Cow::Owned( + Ok(Cow::Owned( Path::new(&host) .components() .chain(path.components()) .collect(), )) } else { - Some(path) + Ok(path) } } From b74f0d287c26e2fafc8e65e57a4c1e224587435d Mon Sep 17 00:00:00 2001 From: ShootingStarDragons Date: Wed, 16 Apr 2025 18:20:26 +0900 Subject: [PATCH 12/12] feat: better path parse --- Cargo.toml | 1 + src/uri.rs | 401 +++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 313 insertions(+), 89 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2ddd0e7..a79afeb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ serde_json = "1.0.140" serde_repr = "0.1" fluent-uri = "0.3.2" thiserror = "2.0.12" +percent-encoding = "2.3.1" [features] default = [] diff --git a/src/uri.rs b/src/uri.rs index 348e6f2..facc5d9 100644 --- a/src/uri.rs +++ b/src/uri.rs @@ -1,7 +1,45 @@ -use std::{borrow::Cow, hash::Hash, ops::Deref, str::FromStr}; +use std::{hash::Hash, ops::Deref, str::FromStr}; use serde::{Deserialize, Serialize, de::Error}; -use std::path::{Path, PathBuf}; +extern crate alloc; +use fluent_uri::encoding::Split; +use fluent_uri::encoding::encoder::Path as UriPath; +use fluent_uri::{component::Scheme, error::BuildError}; + +mod control_chars { + use percent_encoding::AsciiSet; + /// https://url.spec.whatwg.org/#fragment-percent-encode-set + const FRAGMENT: &AsciiSet = &percent_encoding::CONTROLS + .add(b' ') + .add(b'"') + .add(b'<') + .add(b'>') + .add(b'`'); + + /// https://url.spec.whatwg.org/#path-percent-encode-set + const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}'); + pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%'); + + pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\'); +} + +/// The error describe what happened during converting [std::path::Path] to [Uri] or converting +/// [Uri] to [std::path::Path] +#[derive(Debug, thiserror::Error)] +pub enum UriPathError { + #[error("No Segments")] + NoSegments, + #[error("HostError")] + HostError, + #[error("build error")] + BuildError(#[from] BuildError), + #[error("not absolute path")] + NotAbsolutePath, + #[error("Path illegal")] + IllegalPath, +} + +const SCHEME_FILE: &Scheme = Scheme::new_or_panic("file"); /// Newtype struct around `fluent_uri::Uri` with serialization implementations that use `as_str()` and 'from_str()' respectively. #[derive(Debug, Clone)] @@ -80,113 +118,296 @@ impl Hash for Uri { } } -#[cfg(not(windows))] -pub use std::fs::canonicalize as strict_canonicalize; - -/// On Windows, rewrites the wide path prefix `\\?\C:` to `C:` -/// Source: https://stackoverflow.com/a/70970317 -#[inline] -#[cfg(windows)] -fn strict_canonicalize>(path: P) -> std::io::Result { - use std::io; - - fn impl_(path: PathBuf) -> std::io::Result { - let head = path - .components() - .next() - .ok_or(io::Error::new(io::ErrorKind::Other, "empty path"))?; - let disk_; - let head = if let std::path::Component::Prefix(prefix) = head { - if let std::path::Prefix::VerbatimDisk(disk) = prefix.kind() { - disk_ = format!("{}:", disk as char); - Path::new(&disk_).components().next().ok_or(io::Error::new( - io::ErrorKind::Other, - "failed to parse disk component", - ))? - } else { - head - } - } else { - head +impl Uri { + /// Assuming the URL is in the `file` scheme or similar, + /// convert its path to an absolute `std::path::Path`. + /// + /// **Note:** This does not actually check the URL’s `scheme`, + /// and may give nonsensical results for other schemes. + /// It is the user’s responsibility to check the URL’s scheme before calling this. + /// + /// ``` + /// # use lsp_types_f::Uri; + /// # let url = Uri::parse("file:///etc/passwd").unwrap(); + /// let path = url.to_file_path(); + /// ``` + /// + /// # Errors + /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where + /// `file:` URLs may have a non-local host), + /// or if `Path::new_opt()` returns `None`. + /// (That is, if the percent-decoded path contains a NUL byte or, + /// for a Windows path, is not UTF-8.) + /// + /// This method is only available if the `std` Cargo feature is enabled. + /// + pub fn to_file_path(&self) -> Result { + let segments = self.path(); + let segments = segments + .segments_if_absolute() + .ok_or(UriPathError::NoSegments)?; + let host: Option<&str> = match self.authority().map(|authority| authority.host()) { + None | Some("localhost") => None, + Some(host_data) if self.scheme().as_str() == "file" => Some(host_data), + Some(_) => return Err(UriPathError::NoSegments), }; - Ok(std::iter::once(head) - .chain(path.components().skip(1)) - .collect()) + file_url_segments_to_pathbuf(host, segments) + } + + /// Convert a file name as `std::path::Path` into an URL in the `file` scheme. + /// + /// This returns `Err` if the given path is not absolute or, + /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). + /// + /// # Examples + /// + /// On Unix-like platforms: + /// + /// ``` + /// # if cfg!(unix) { + /// # use lsp_types_f::Uri; + /// + /// let uri = Uri::from_file_path("/tmp/foo.txt").unwrap(); + /// assert_eq!(uri.as_str(), "file:///tmp/foo.txt"); + /// + /// let uri = Uri::from_file_path("../foo.txt"); + /// assert!(uri.is_err()); + /// + /// let uri = Uri::from_file_path("https://google.com/"); + /// assert!(uri.is_err()); + /// # } + /// ``` + /// + /// # Errors + /// + /// Will return error when the path is illegal + pub fn from_file_path>(path: P) -> Result { + use control_chars::*; + use fluent_uri::Uri as FUri; + use fluent_uri::component::Authority; + use fluent_uri::encoding::EStr; + use percent_encoding::percent_encode; + use std::os::unix::ffi::OsStrExt; + let path = path.as_ref(); + if !path.is_absolute() { + return Err(UriPathError::NotAbsolutePath); + } + let mut serialization = "".to_owned(); + let mut empty = true; + for component in path.components().skip(1) { + empty = false; + serialization.push('/'); + #[cfg(not(target_os = "wasi"))] + serialization.extend(percent_encode( + component.as_os_str().as_bytes(), + SPECIAL_PATH_SEGMENT, + )); + + #[cfg(target_os = "wasi")] + serialization.extend(percent_encode( + component.as_os_str().to_string_lossy().as_bytes(), + SPECIAL_PATH_SEGMENT, + )); + } + if empty { + serialization.push('/'); + } + let path = EStr::new(&serialization).ok_or(UriPathError::IllegalPath)?; + Ok(Self( + FUri::builder() + .scheme(SCHEME_FILE) + .authority(Authority::EMPTY) + .path(path) + .build()?, + )) + } + pub fn parse(path: &str) -> Result { + use fluent_uri::Uri as FUri; + let uri = FUri::parse(path)?; + Ok(Self(uri.into())) + } +} + +#[cfg(all(any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")))] +fn file_url_segments_to_pathbuf( + host: Option<&str>, + + segments: Split<'_, UriPath>, +) -> Result { + use alloc::vec::Vec; + + use percent_encoding::percent_decode; + + #[cfg(not(target_os = "wasi"))] + use std::ffi::OsStr; + + #[cfg(target_os = "hermit")] + use std::os::hermit::ffi::OsStrExt; + + #[cfg(any(unix, target_os = "redox"))] + use std::os::unix::prelude::OsStrExt; + + use std::path::PathBuf; + + if host.is_some_and(|host| !host.is_empty()) { + return Err(UriPathError::HostError); + } + + let mut bytes = if cfg!(target_os = "redox") { + b"file:".to_vec() + } else { + Vec::new() + }; + + for segment in segments { + bytes.push(b'/'); + + bytes.extend(percent_decode(segment.as_str().as_bytes())); + } + + // A windows drive letter must end with a slash. + + if bytes.len() > 2 + && bytes[bytes.len() - 2].is_ascii_alphabetic() + && matches!(bytes[bytes.len() - 1], b':' | b'|') + { + bytes.push(b'/'); } - let canon = std::fs::canonicalize(path)?; - impl_(canon) + #[cfg(not(target_os = "wasi"))] + let path = PathBuf::from(OsStr::from_bytes(&bytes)); + + #[cfg(target_os = "wasi")] + let path = String::from_utf8(bytes) + .map(|path| PathBuf::from(path)) + .map_err(|_| ())?; + + debug_assert!( + path.is_absolute(), + "to_file_path() failed to produce an absolute Path" + ); + + Ok(path) } -#[derive(thiserror::Error, Debug)] -pub enum UriError { - #[error("scheme wrong")] - SchemeWrong, - #[error("url has no authority component")] - AuthorityWrong, +#[cfg(windows)] +fn file_url_segments_to_pathbuf( + host: Option<&str>, + segments: Split<'_, Path>, +) -> Result { + file_url_segments_to_pathbuf_windows(host, segments) } -impl Uri { - pub fn to_file_path(&self) -> Result, UriError> { - if self.scheme().as_str().to_lowercase() != "file" { - return Err(UriError::SchemeWrong); - } - let path = match self.path().decode().into_string_lossy() { - Cow::Borrowed(ref_) => Cow::Borrowed(Path::new(ref_)), - Cow::Owned(owned) => Cow::Owned(PathBuf::from(owned)), - }; +/// https://url.spec.whatwg.org/#ascii-alpha +#[allow(unused)] +#[inline] +fn ascii_alpha(ch: char) -> bool { + ch.is_ascii_alphabetic() +} - if cfg!(windows) { - let authority = self.authority().ok_or(UriError::AuthorityWrong)?; - let host = authority.host(); - if host.is_empty() { - // very high chance this is a `file:///` uri - // in which case the path will include a leading slash we need to remove - let host = path.to_string_lossy(); - let host = &host[1..]; - return Ok(Cow::Owned(PathBuf::from(host))); +// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 +#[cfg_attr(not(windows), allow(dead_code))] +fn file_url_segments_to_pathbuf_windows( + host: Option<&str>, + mut segments: Split<'_, UriPath>, +) -> Result { + use percent_encoding::percent_decode; + use std::path::PathBuf; + + let mut string = if let Some(host) = host { + r"\\".to_owned() + host + } else { + let first = segments.next().ok_or(UriPathError::HostError)?.as_str(); + + match first.len() { + 2 => { + if !first.starts_with(ascii_alpha) || first.as_bytes()[1] != b':' { + return Err(UriPathError::HostError); + } + + first.to_owned() } - let host = format!("{host}:"); - Ok(Cow::Owned( - Path::new(&host) - .components() - .chain(path.components()) - .collect(), - )) - } else { - Ok(path) - } - } + 4 => { + if !first.starts_with(ascii_alpha) { + return Err(UriPathError::HostError); + } - pub fn from_file_path>(path: A) -> Option { - let path = path.as_ref(); + let bytes = first.as_bytes(); + + if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') { + return Err(UriPathError::HostError); + } - let fragment = if path.is_absolute() { - Cow::Borrowed(path) - } else { - match strict_canonicalize(path) { - Ok(path) => Cow::Owned(path), - Err(_) => return None, + first[0..1].to_owned() + ":" } - }; - let raw_uri = if cfg!(windows) { - // we want to parse a triple-slash path for Windows paths - // it's a shorthand for `file://localhost/C:/Windows` with the `localhost` omitted - format!("file:///{}", fragment.to_string_lossy().replace("\\", "/")) - } else { - format!("file://{}", fragment.to_string_lossy()) - }; + _ => return Err(UriPathError::HostError), + } + }; + + for segment in segments.map(|seg| seg.as_str()) { + string.push('\\'); + + // Currently non-unicode windows paths cannot be represented - Uri::from_str(&raw_uri).ok() + match String::from_utf8(percent_decode(segment.as_bytes()).collect()) { + Ok(s) => string.push_str(&s), + + Err(..) => return Err(UriPathError::HostError), + } } + + let path = PathBuf::from(string); + + debug_assert!( + path.is_absolute(), + "to_file_path() failed to produce an absolute Path" + ); + + Ok(path) } #[cfg(test)] mod tests { + #[cfg(not(windows))] + pub use std::fs::canonicalize as strict_canonicalize; + + /// On Windows, rewrites the wide path prefix `\\?\C:` to `C:` + /// Source: https://stackoverflow.com/a/70970317 + #[inline] + #[cfg(windows)] + fn strict_canonicalize>(path: P) -> std::io::Result { + use std::io; + + fn impl_(path: PathBuf) -> std::io::Result { + let head = path + .components() + .next() + .ok_or(io::Error::new(io::ErrorKind::Other, "empty path"))?; + let disk_; + let head = if let std::path::Component::Prefix(prefix) = head { + if let std::path::Prefix::VerbatimDisk(disk) = prefix.kind() { + disk_ = format!("{}:", disk as char); + Path::new(&disk_).components().next().ok_or(io::Error::new( + io::ErrorKind::Other, + "failed to parse disk component", + ))? + } else { + head + } + } else { + head + }; + Ok(std::iter::once(head) + .chain(path.components().skip(1)) + .collect()) + } + + let canon = std::fs::canonicalize(path)?; + impl_(canon) + } use super::Uri; - use super::strict_canonicalize; use std::path::Path; #[test] @@ -203,8 +424,10 @@ mod tests { let conv = Uri::from_file_path(&src).unwrap(); let roundtrip = conv.to_file_path().unwrap(); assert_eq!(src, roundtrip, "conv={conv:?}",); - } + let url = Uri::from_file_path("/tmp/foo.txt").unwrap(); + assert_eq!(url.as_str(), "file:///tmp/foo.txt"); + } #[test] #[cfg(windows)] fn test_windows_uri_roundtrip_conversion() {