Skip to content

Implement a File Link Resolver #5981

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: krishna/refactor-main
Choose a base branch
from
Open
7 changes: 7 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@ jobs:
command: test
args: --package graph-tests --test runner_tests

- name: Run file link resolver test
id: file-link-resolver-test
uses: actions-rs/cargo@v1
with:
command: test
args: --package graph-tests --test file_link_resolver

integration-tests:
name: Run integration tests
runs-on: ubuntu-latest
Expand Down
188 changes: 188 additions & 0 deletions graph/src/components/link_resolver/file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
use std::path::{Path, PathBuf};
use std::time::Duration;

use anyhow::anyhow;
use async_trait::async_trait;
use slog::Logger;

use crate::data::subgraph::Link;
use crate::prelude::{Error, JsonValueStream, LinkResolver as LinkResolverTrait};

#[derive(Clone, Debug)]
pub struct FileLinkResolver {
base_dir: Option<PathBuf>,
timeout: Duration,
}

impl FileLinkResolver {
/// Create a new FileLinkResolver
///
/// All paths are treated as absolute paths.
pub fn new() -> Self {
Self {
base_dir: None,
timeout: Duration::from_secs(30),
}
}

/// Create a new FileLinkResolver with a base directory
///
/// All paths that are not absolute will be considered
/// relative to this base directory.
pub fn with_base_dir<P: AsRef<Path>>(base_dir: P) -> Self {
Self {
base_dir: Some(base_dir.as_ref().to_owned()),
timeout: Duration::from_secs(30),
}
}

fn resolve_path(&self, link: &str) -> PathBuf {
let path = Path::new(link);

// Return the path as is if base_dir is None, or join with base_dir if present.
// if "link" is an absolute path, join will simply return that path.
self.base_dir
.as_ref()
.map_or_else(|| path.to_owned(), |base_dir| base_dir.join(link))
}
}

pub fn remove_prefix(link: &str) -> &str {
const IPFS: &str = "/ipfs/";
if link.starts_with(IPFS) {
&link[IPFS.len()..]
} else {
link
}
}

#[async_trait]
impl LinkResolverTrait for FileLinkResolver {
fn with_timeout(&self, timeout: Duration) -> Box<dyn LinkResolverTrait> {
let mut resolver = self.clone();
resolver.timeout = timeout;
Box::new(resolver)
}

fn with_retries(&self) -> Box<dyn LinkResolverTrait> {
Box::new(self.clone())
}

async fn cat(&self, logger: &Logger, link: &Link) -> Result<Vec<u8>, Error> {
let link = remove_prefix(&link.link);
let path = self.resolve_path(&link);

slog::debug!(logger, "File resolver: reading file";
"path" => path.to_string_lossy().to_string());

match tokio::fs::read(&path).await {
Ok(data) => Ok(data),
Err(e) => {
slog::error!(logger, "Failed to read file";
"path" => path.to_string_lossy().to_string(),
"error" => e.to_string());
Err(anyhow!("Failed to read file {}: {}", path.display(), e).into())
}
}
}

async fn get_block(&self, _logger: &Logger, _link: &Link) -> Result<Vec<u8>, Error> {
Err(anyhow!("get_block is not implemented for FileLinkResolver").into())
}

async fn json_stream(&self, _logger: &Logger, _link: &Link) -> Result<JsonValueStream, Error> {
Err(anyhow!("json_stream is not implemented for FileLinkResolver").into())
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making these always return errors is sorta ugly. It would be nicer to have a trait FileResolver that doesn't have these two and a trait LinkResolver that does and change the code to use the right one. That's going to be a bigger change, so fine to do it in a separate PR, but we shouldn't let this linger for too long.

The danger with leaving this too long is that the code becomes brittle since now every user of LinkResolver needs to make sure it gets the right kind as that's not guaranteed by the type alone anymore

}

#[cfg(test)]
mod tests {
use super::*;
use std::env;
use std::fs;
use std::io::Write;

#[tokio::test]
async fn test_file_resolver_absolute() {
// Test the resolver without a base directory (absolute paths only)

// Create a temporary directory for test files
let temp_dir = env::temp_dir().join("file_resolver_test");
let _ = fs::create_dir_all(&temp_dir);

// Create a test file in the temp directory
let test_file_path = temp_dir.join("test.txt");
let test_content = b"Hello, world!";
let mut file = fs::File::create(&test_file_path).unwrap();
file.write_all(test_content).unwrap();

// Create a resolver without a base directory
let resolver = FileLinkResolver::new();
let logger = slog::Logger::root(slog::Discard, slog::o!());

// Test valid path resolution
let link = Link {
link: test_file_path.to_string_lossy().to_string(),
};
let result = resolver.cat(&logger, &link).await.unwrap();
assert_eq!(result, test_content);

// Test path with leading slash that likely doesn't exist
let link = Link {
link: "/test.txt".to_string(),
};
let result = resolver.cat(&logger, &link).await;
assert!(
result.is_err(),
"Reading /test.txt should fail as it doesn't exist"
);

// Clean up
let _ = fs::remove_file(test_file_path);
let _ = fs::remove_dir(temp_dir);
}

#[tokio::test]
async fn test_file_resolver_with_base_dir() {
// Test the resolver with a base directory

// Create a temporary directory for test files
let temp_dir = env::temp_dir().join("file_resolver_test_base_dir");
let _ = fs::create_dir_all(&temp_dir);

// Create a test file in the temp directory
let test_file_path = temp_dir.join("test.txt");
let test_content = b"Hello from base dir!";
let mut file = fs::File::create(&test_file_path).unwrap();
file.write_all(test_content).unwrap();

// Create a resolver with a base directory
let resolver = FileLinkResolver::with_base_dir(&temp_dir);
let logger = slog::Logger::root(slog::Discard, slog::o!());

// Test relative path (no leading slash)
let link = Link {
link: "test.txt".to_string(),
};
let result = resolver.cat(&logger, &link).await.unwrap();
assert_eq!(result, test_content);

// Test absolute path
let link = Link {
link: test_file_path.to_string_lossy().to_string(),
};
let result = resolver.cat(&logger, &link).await.unwrap();
assert_eq!(result, test_content);

// Test missing file
let link = Link {
link: "missing.txt".to_string(),
};
let result = resolver.cat(&logger, &link).await;
assert!(result.is_err());

// Clean up
let _ = fs::remove_file(test_file_path);
let _ = fs::remove_dir(temp_dir);
}
}
2 changes: 2 additions & 0 deletions graph/src/components/link_resolver/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ use crate::prelude::Error;
use std::fmt::Debug;

mod arweave;
mod file;
mod ipfs;

pub use arweave::*;
use async_trait::async_trait;
pub use file::*;
pub use ipfs::*;

/// Resolves links to subgraph manifests and resources referenced by them.
Expand Down
84 changes: 70 additions & 14 deletions graph/src/data/subgraph/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,20 +116,23 @@ impl DeploymentHash {
pub fn new(s: impl Into<String>) -> Result<Self, String> {
let s = s.into();

// Enforce length limit
if s.len() > 46 {
return Err(s);
}
// When the disable_deployment_hash_validation flag is set, we skip the validation
if !ENV_VARS.disable_deployment_hash_validation {
// Enforce length limit
if s.len() > 46 {
return Err(s);
}

// Check that the ID contains only allowed characters.
if !s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
return Err(s);
}
// Check that the ID contains only allowed characters.
if !s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
return Err(s);
}

// Allow only deployment id's for 'real' subgraphs, not the old
// metadata subgraph.
if s == "subgraphs" {
return Err(s);
// Allow only deployment id's for 'real' subgraphs, not the old
// metadata subgraph.
if s == "subgraphs" {
return Err(s);
}
}

Ok(DeploymentHash(s))
Expand Down Expand Up @@ -397,12 +400,65 @@ impl From<HashMap<Word, Value>> for DataSourceContext {
}

/// IPLD link.
#[derive(Clone, Debug, Default, Hash, Eq, PartialEq, Deserialize)]
#[derive(Clone, Debug, Default, Hash, Eq, PartialEq)]
pub struct Link {
#[serde(rename = "/")]
pub link: String,
}

/// Custom deserializer for Link
/// This handles both formats:
/// 1. Simple string: "schema.graphql" or "subgraph.yaml" which is used in [`FileLinkResolver`]
/// FileLinkResolver is used in local development environments
/// 2. IPLD format: { "/": "Qm..." } which is used in [`IpfsLinkResolver`]
impl<'de> de::Deserialize<'de> for Link {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
struct LinkVisitor;

impl<'de> de::Visitor<'de> for LinkVisitor {
type Value = Link;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("string or map with '/' key")
}

fn visit_str<E>(self, value: &str) -> Result<Link, E>
where
E: de::Error,
{
Ok(Link {
link: value.to_string(),
})
}

fn visit_map<A>(self, mut map: A) -> Result<Link, A::Error>
where
A: de::MapAccess<'de>,
{
let mut link = None;

while let Some(key) = map.next_key::<String>()? {
if key == "/" {
if link.is_some() {
return Err(de::Error::duplicate_field("/"));
}
link = Some(map.next_value()?);
} else {
return Err(de::Error::unknown_field(&key, &["/"]));
}
}

link.map(|l: String| Link { link: l })
.ok_or_else(|| de::Error::missing_field("/"))
}
}

deserializer.deserialize_any(LinkVisitor)
}
}

impl<S: ToString> From<S> for Link {
fn from(s: S) -> Self {
Self {
Expand Down
12 changes: 12 additions & 0 deletions graph/src/env/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,12 @@ pub struct EnvVars {
/// if no genesis hash can be retrieved from an adapter. If enabled, the adapter is
/// ignored if unable to produce a genesis hash or produces a different an unexpected hash.
pub genesis_validation_enabled: bool,
/// Whether to enforce deployment hash validation rules.
/// When disabled, any string can be used as a deployment hash.
/// When enabled, deployment hashes must meet length and character constraints.
///
/// Set by the flag `GRAPH_NODE_DISABLE_DEPLOYMENT_HASH_VALIDATION`. Enabled by default.
pub disable_deployment_hash_validation: bool,
/// How long do we wait for a response from the provider before considering that it is unavailable.
/// Default is 30s.
pub genesis_validation_timeout: Duration,
Expand Down Expand Up @@ -332,6 +338,7 @@ impl EnvVars {
section_map: inner.section_map,
firehose_grpc_max_decode_size_mb: inner.firehose_grpc_max_decode_size_mb,
genesis_validation_enabled: inner.genesis_validation_enabled.0,
disable_deployment_hash_validation: inner.disable_deployment_hash_validation.0,
genesis_validation_timeout: Duration::from_secs(inner.genesis_validation_timeout),
graphman_server_auth_token: inner.graphman_server_auth_token,
firehose_disable_extended_blocks_for_chains:
Expand Down Expand Up @@ -528,6 +535,11 @@ struct Inner {
firehose_block_fetch_timeout: u64,
#[envconfig(from = "GRAPH_FIREHOSE_FETCH_BLOCK_BATCH_SIZE", default = "10")]
firehose_block_fetch_batch_size: usize,
#[envconfig(
from = "GRAPH_NODE_DISABLE_DEPLOYMENT_HASH_VALIDATION",
default = "false"
)]
disable_deployment_hash_validation: EnvVarBoolean,
}

#[derive(Clone, Debug)]
Expand Down
15 changes: 15 additions & 0 deletions tests/runner-tests/file-link-resolver/abis/Contract.abi
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[
{
"anonymous": false,
"inputs": [
{
"indexed": false,
"internalType": "string",
"name": "testCommand",
"type": "string"
}
],
"name": "TestEvent",
"type": "event"
}
]
13 changes: 13 additions & 0 deletions tests/runner-tests/file-link-resolver/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"name": "file-link-resolver",
"version": "0.1.0",
"scripts": {
"codegen": "graph codegen --skip-migrations",
"create:test": "graph create test/file-link-resolver --node $GRAPH_NODE_ADMIN_URI",
"deploy:test": "graph deploy test/file-link-resolver --version-label v0.0.1 --ipfs $IPFS_URI --node $GRAPH_NODE_ADMIN_URI"
},
"devDependencies": {
"@graphprotocol/graph-cli": "0.60.0",
"@graphprotocol/graph-ts": "0.31.0"
}
}
5 changes: 5 additions & 0 deletions tests/runner-tests/file-link-resolver/schema.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
type Block @entity {
id: ID!
number: BigInt!
hash: Bytes!
}
Loading