Skip to content

Commit a573a69

Browse files
committed
feat(analyzer): add parse_url function handler
closes #765 Signed-off-by: azjezz <[email protected]>
1 parent af71e0f commit a573a69

File tree

6 files changed

+355
-0
lines changed

6 files changed

+355
-0
lines changed

crates/analyzer/src/plugin/libraries/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ pub fn register_library_providers(registry: &mut PluginRegistry) {
1919
registry.register_function_provider(stdlib::array::CompactProvider);
2020
registry.register_method_provider(stdlib::closure::ClosureGetCurrentProvider);
2121
registry.register_method_provider(stdlib::r#enum::EnumCasesProvider);
22+
registry.register_function_provider(stdlib::url::ParseUrlProvider);
2223

2324
registry.register_function_provider(psl::type_::ShapeProvider);
2425
registry.register_function_provider(psl::type_::OptionalProvider);

crates/analyzer/src/plugin/libraries/stdlib/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ pub mod json;
77
pub mod random;
88
pub mod spl;
99
pub mod string;
10+
pub mod url;
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
//! URL-related function providers.
2+
3+
mod parse_url;
4+
5+
pub use parse_url::ParseUrlProvider;
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
//! `parse_url()` return type provider.
2+
3+
use std::collections::BTreeMap;
4+
5+
use mago_atom::Atom;
6+
use mago_codex::ttype::atomic::TAtomic;
7+
use mago_codex::ttype::atomic::array::TArray;
8+
use mago_codex::ttype::atomic::array::key::ArrayKey;
9+
use mago_codex::ttype::atomic::array::keyed::TKeyedArray;
10+
use mago_codex::ttype::atomic::scalar::TScalar;
11+
use mago_codex::ttype::atomic::scalar::bool::TBool;
12+
use mago_codex::ttype::atomic::scalar::int::TInteger;
13+
use mago_codex::ttype::atomic::scalar::string::TString;
14+
use mago_codex::ttype::get_int_range;
15+
use mago_codex::ttype::get_non_empty_string;
16+
use mago_codex::ttype::get_string;
17+
use mago_codex::ttype::union::TUnion;
18+
19+
use crate::plugin::context::InvocationInfo;
20+
use crate::plugin::context::ProviderContext;
21+
use crate::plugin::provider::Provider;
22+
use crate::plugin::provider::ProviderMeta;
23+
use crate::plugin::provider::function::FunctionReturnTypeProvider;
24+
use crate::plugin::provider::function::FunctionTarget;
25+
26+
const PHP_URL_SCHEME: i64 = 0;
27+
const PHP_URL_HOST: i64 = 1;
28+
const PHP_URL_PORT: i64 = 2;
29+
const PHP_URL_USER: i64 = 3;
30+
const PHP_URL_PASS: i64 = 4;
31+
const PHP_URL_PATH: i64 = 5;
32+
const PHP_URL_QUERY: i64 = 6;
33+
const PHP_URL_FRAGMENT: i64 = 7;
34+
35+
static META: ProviderMeta = ProviderMeta::new(
36+
"php::url::parse_url",
37+
"parse_url",
38+
"Returns typed array or component value based on component argument",
39+
);
40+
41+
/// Provider for the `parse_url()` function.
42+
///
43+
/// When called without a component argument, returns `false|array{...}` with URL parts.
44+
/// When called with a specific component constant, returns the appropriate narrowed type.
45+
#[derive(Default)]
46+
pub struct ParseUrlProvider;
47+
48+
impl Provider for ParseUrlProvider {
49+
fn meta() -> &'static ProviderMeta {
50+
&META
51+
}
52+
}
53+
54+
impl FunctionReturnTypeProvider for ParseUrlProvider {
55+
fn targets() -> FunctionTarget {
56+
FunctionTarget::Exact("parse_url")
57+
}
58+
59+
fn get_return_type(
60+
&self,
61+
context: &ProviderContext<'_, '_, '_>,
62+
invocation: &InvocationInfo<'_, '_, '_>,
63+
) -> Option<TUnion> {
64+
let component_arg = invocation.get_argument(1, &["component"]);
65+
66+
if let Some(arg) = component_arg {
67+
if let Some(component_type) = context.get_expression_type(arg) {
68+
let values = collect_component_values(component_type);
69+
70+
if let Some(values) = values {
71+
if values.is_empty() {
72+
// No valid component values - fall through to generic return
73+
} else {
74+
let mut result_types: Vec<TAtomic> = Vec::new();
75+
for value in values {
76+
let component_ret = get_component_return_type(value);
77+
for atomic in component_ret.types.iter() {
78+
if !result_types.contains(atomic) {
79+
result_types.push(atomic.clone());
80+
}
81+
}
82+
}
83+
84+
return Some(TUnion::from_vec(result_types));
85+
}
86+
}
87+
}
88+
89+
// Component provided but not resolvable - return generic union type
90+
return Some(get_all_components_return_type());
91+
}
92+
93+
// No component argument - return full array type
94+
Some(get_full_array_return_type())
95+
}
96+
}
97+
98+
/// Collects all possible component values from a type.
99+
/// Returns `None` if the type represents an unbounded set of integers.
100+
/// Returns `Some(vec![])` if the type is empty or has no valid integers.
101+
fn collect_component_values(component_type: &TUnion) -> Option<Vec<i64>> {
102+
let mut values = Vec::new();
103+
104+
for atomic in component_type.types.iter() {
105+
if let TAtomic::Scalar(TScalar::Integer(int_type)) = atomic {
106+
match *int_type {
107+
TInteger::Literal(v) => {
108+
if !values.contains(&v) {
109+
values.push(v);
110+
}
111+
}
112+
TInteger::Range(from, to) => {
113+
let effective_from = from.max(-1);
114+
let effective_to = to.min(7);
115+
116+
if effective_from <= effective_to {
117+
for v in effective_from..=effective_to {
118+
if !values.contains(&v) {
119+
values.push(v);
120+
}
121+
}
122+
}
123+
}
124+
TInteger::From(from) => {
125+
let effective_from = from.max(-1);
126+
if effective_from <= 7 {
127+
for v in effective_from..=7 {
128+
if !values.contains(&v) {
129+
values.push(v);
130+
}
131+
}
132+
}
133+
}
134+
TInteger::To(to) => {
135+
let effective_to = to.min(7);
136+
if -1 <= effective_to {
137+
for v in -1..=effective_to {
138+
if !values.contains(&v) {
139+
values.push(v);
140+
}
141+
}
142+
}
143+
}
144+
TInteger::Unspecified | TInteger::UnspecifiedLiteral => {
145+
return None;
146+
}
147+
}
148+
}
149+
}
150+
151+
Some(values)
152+
}
153+
154+
/// Returns the type for a specific URL component.
155+
fn get_component_return_type(component: i64) -> TUnion {
156+
match component {
157+
PHP_URL_SCHEME | PHP_URL_HOST | PHP_URL_USER | PHP_URL_PASS | PHP_URL_QUERY | PHP_URL_FRAGMENT => {
158+
// null|non-empty-string
159+
TUnion::from_vec(vec![TAtomic::Null, TAtomic::Scalar(TScalar::String(TString::non_empty()))])
160+
}
161+
PHP_URL_PORT => {
162+
// null|int<0, 65535>
163+
TUnion::from_vec(vec![TAtomic::Null, TAtomic::Scalar(TScalar::Integer(TInteger::Range(0, 65535)))])
164+
}
165+
PHP_URL_PATH => {
166+
// null|string (path can be empty string)
167+
TUnion::from_vec(vec![TAtomic::Null, TAtomic::Scalar(TScalar::String(TString::general()))])
168+
}
169+
-1 => {
170+
// -1 is equivalent to no component - return full array
171+
get_full_array_return_type()
172+
}
173+
_ => TUnion::from_vec(vec![TAtomic::Scalar(TScalar::Bool(TBool::r#false()))]),
174+
}
175+
}
176+
177+
/// Returns the union of all possible component return types.
178+
/// Used when component type is non-literal (e.g., `int`).
179+
/// `false|null|int<0, 65535>|string|array{...}`
180+
fn get_all_components_return_type() -> TUnion {
181+
let mut all_components_return_type = get_full_array_return_type();
182+
all_components_return_type.types.to_mut().push(TAtomic::Null);
183+
all_components_return_type.types.to_mut().push(TAtomic::Scalar(TScalar::Integer(TInteger::Range(0, 65535))));
184+
all_components_return_type.types.to_mut().push(TAtomic::Scalar(TScalar::String(TString::general())));
185+
186+
all_components_return_type
187+
}
188+
189+
/// Returns the full array type when no component is specified.
190+
fn get_full_array_return_type() -> TUnion {
191+
let mut known_items: BTreeMap<ArrayKey, (bool, TUnion)> = BTreeMap::new();
192+
193+
let optional_string_fields = ["scheme", "user", "pass", "host", "query", "fragment"];
194+
for field in optional_string_fields {
195+
known_items.insert(ArrayKey::String(Atom::from(field)), (true, get_non_empty_string()));
196+
}
197+
198+
known_items.insert(ArrayKey::String(Atom::from("port")), (true, get_int_range(Some(0), Some(65535))));
199+
known_items.insert(ArrayKey::String(Atom::from("path")), (false, get_string()));
200+
201+
let keyed_array = TKeyedArray::new().with_known_items(known_items);
202+
203+
TUnion::from_vec(vec![TAtomic::Scalar(TScalar::Bool(TBool::r#false())), TAtomic::Array(TArray::Keyed(keyed_array))])
204+
}
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/**
6+
* @param string $url
7+
*
8+
* @throws RuntimeException
9+
*
10+
* @return array{'fragment'?: non-empty-string, 'host'?: non-empty-string, 'pass'?: non-empty-string, 'path': string, 'port'?: int<0, 65535>, 'query'?: non-empty-string, 'scheme'?: non-empty-string, 'user'?: non-empty-string}
11+
*/
12+
function getFullResult(string $url): array
13+
{
14+
$result = parse_url($url);
15+
16+
if ($result === false) {
17+
throw new RuntimeException('Invalid URL');
18+
}
19+
20+
return $result;
21+
}
22+
23+
/**
24+
* @param string $url
25+
*
26+
* @return non-empty-string|null
27+
*/
28+
function getScheme(string $url): null|string
29+
{
30+
return parse_url($url, PHP_URL_SCHEME);
31+
}
32+
33+
/**
34+
* @param string $url
35+
*
36+
* @return non-empty-string|null
37+
*/
38+
function getHost(string $url): null|string
39+
{
40+
return parse_url($url, PHP_URL_HOST);
41+
}
42+
43+
/**
44+
* @param string $url
45+
*
46+
* @return int<0, 65535>|null
47+
*/
48+
function getPort(string $url): null|int
49+
{
50+
return parse_url($url, PHP_URL_PORT);
51+
}
52+
53+
/**
54+
* @param string $url
55+
*
56+
* @return non-empty-string|null
57+
*/
58+
function getUser(string $url): null|string
59+
{
60+
return parse_url($url, PHP_URL_USER);
61+
}
62+
63+
/**
64+
* @param string $url
65+
*
66+
* @return non-empty-string|null
67+
*/
68+
function getPass(string $url): null|string
69+
{
70+
return parse_url($url, PHP_URL_PASS);
71+
}
72+
73+
/**
74+
* @param string $url
75+
*
76+
* @return string|null
77+
*/
78+
function getPath(string $url): null|string
79+
{
80+
return parse_url($url, PHP_URL_PATH);
81+
}
82+
83+
/**
84+
* @param string $url
85+
*
86+
* @return non-empty-string|null
87+
*/
88+
function getQuery(string $url): null|string
89+
{
90+
return parse_url($url, PHP_URL_QUERY);
91+
}
92+
93+
/**
94+
* @param string $url
95+
*
96+
* @return non-empty-string|null
97+
*/
98+
function getFragment(string $url): null|string
99+
{
100+
return parse_url($url, PHP_URL_FRAGMENT);
101+
}
102+
103+
/**
104+
* @param string $url
105+
*
106+
* @throws RuntimeException
107+
*
108+
* @return array{'fragment'?: non-empty-string, 'host'?: non-empty-string, 'pass'?: non-empty-string, 'path': string, 'port'?: int<0, 65535>, 'query'?: non-empty-string, 'scheme'?: non-empty-string, 'user'?: non-empty-string}
109+
*/
110+
function getFullResultWithMinusOne(string $url): array
111+
{
112+
$result = parse_url($url, -1);
113+
114+
if ($result === false) {
115+
throw new RuntimeException('Invalid URL');
116+
}
117+
118+
return $result;
119+
}
120+
121+
/**
122+
* Both return null|non-empty-string
123+
*
124+
* @param string $url
125+
* @param int<0, 1> $component (PHP_URL_SCHEME or PHP_URL_HOST)
126+
*
127+
* @return non-empty-string|null
128+
*/
129+
function getSchemeOrHost(string $url, int $component): null|string
130+
{
131+
return parse_url($url, $component);
132+
}
133+
134+
/**
135+
* @param string $url
136+
* @param 0|2 $component (PHP_URL_SCHEME or PHP_URL_PORT)
137+
*
138+
* @return int<0, 65535>|non-empty-string|null
139+
*/
140+
function getSchemeOrPort(string $url, int $component): int|string|null
141+
{
142+
return parse_url($url, $component);
143+
}

crates/analyzer/tests/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,7 @@ test_case!(issue_754);
466466
test_case!(issue_755);
467467
test_case!(issue_756);
468468
test_case!(issue_764);
469+
test_case!(issue_765);
469470

470471
#[test]
471472
fn test_all_test_cases_are_ran() {

0 commit comments

Comments
 (0)