diff --git a/.env.example b/.env.example index 7202d7d..4de135b 100644 --- a/.env.example +++ b/.env.example @@ -9,3 +9,8 @@ GLIF_URL=https://api.node.glif.io/rpc/v1 PROXY_URL= PROXY_USER= PROXY_PASSWORD= + +# BMS Configuration +BMS_URL=https://bms.allocator.tech +BMS_WORKER_COUNT=10 +BMS_TEST_INTERVAL_DAYS=7 \ No newline at end of file diff --git a/.gitignore b/.gitignore index fc850a5..a1a2252 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ +.vscode target -z_notes -dump + .env \ No newline at end of file diff --git a/.sqlx/query-0735e4bd270243bb1fb6f73dbef302243f97f803e4631319f1900e6f281cdf7b.json b/.sqlx/query-0735e4bd270243bb1fb6f73dbef302243f97f803e4631319f1900e6f281cdf7b.json new file mode 100644 index 0000000..b0e93ec --- /dev/null +++ b/.sqlx/query-0735e4bd270243bb1fb6f73dbef302243f97f803e4631319f1900e6f281cdf7b.json @@ -0,0 +1,106 @@ +{ + "db_name": "PostgreSQL", + "query": "UPDATE\n storage_providers\n SET\n next_bms_test_at = NOW(),\n updated_at = NOW()\n WHERE\n provider_id = $1\n RETURNING\n id,\n provider_id AS \"provider_id: ProviderId\",\n next_url_discovery_at,\n url_discovery_status,\n url_discovery_pending_since,\n last_working_url,\n next_bms_test_at,\n bms_test_status,\n bms_routing_key,\n last_bms_region_discovery_at,\n is_consistent,\n is_reliable,\n url_metadata,\n created_at,\n updated_at\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Uuid" + }, + { + "ordinal": 1, + "name": "provider_id: ProviderId", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "next_url_discovery_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 3, + "name": "url_discovery_status", + "type_info": "Varchar" + }, + { + "ordinal": 4, + "name": "url_discovery_pending_since", + "type_info": "Timestamptz" + }, + { + "ordinal": 5, + "name": "last_working_url", + "type_info": "Text" + }, + { + "ordinal": 6, + "name": "next_bms_test_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 7, + "name": "bms_test_status", + "type_info": "Varchar" + }, + { + "ordinal": 8, + "name": "bms_routing_key", + "type_info": "Varchar" + }, + { + "ordinal": 9, + "name": "last_bms_region_discovery_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 10, + "name": "is_consistent", + "type_info": "Bool" + }, + { + "ordinal": 11, + "name": "is_reliable", + "type_info": "Bool" + }, + { + "ordinal": 12, + "name": "url_metadata", + "type_info": "Jsonb" + }, + { + "ordinal": 13, + "name": "created_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 14, + "name": "updated_at", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + false, + false, + true, + true, + true, + false, + true, + true, + true, + false, + false, + true, + false, + false + ] + }, + "hash": "0735e4bd270243bb1fb6f73dbef302243f97f803e4631319f1900e6f281cdf7b" +} diff --git a/.sqlx/query-0888cedbf0d7f3e8c83aede3466fccb00af870688c27550b194ea1d6a47e624e.json b/.sqlx/query-0888cedbf0d7f3e8c83aede3466fccb00af870688c27550b194ea1d6a47e624e.json new file mode 100644 index 0000000..69e69f7 --- /dev/null +++ b/.sqlx/query-0888cedbf0d7f3e8c83aede3466fccb00af870688c27550b194ea1d6a47e624e.json @@ -0,0 +1,108 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT DISTINCT ON (DATE(tested_at))\n DATE(tested_at) AS \"date!\",\n retrievability_percent::float8 AS \"retrievability_percent!\",\n sector_utilization_percent::float8 AS \"sector_utilization_percent\",\n is_consistent,\n is_reliable,\n working_url,\n result_code AS \"result_code: ResultCode\",\n error_code AS \"error_code: ErrorCode\",\n tested_at,\n url_metadata\n FROM\n url_results\n WHERE\n provider_id = $1\n AND result_type = 'Provider'\n AND tested_at >= $2::date\n AND tested_at < ($3::date + INTERVAL '1 day')\n ORDER BY\n DATE(tested_at),\n tested_at DESC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "date!", + "type_info": "Date" + }, + { + "ordinal": 1, + "name": "retrievability_percent!", + "type_info": "Float8" + }, + { + "ordinal": 2, + "name": "sector_utilization_percent", + "type_info": "Float8" + }, + { + "ordinal": 3, + "name": "is_consistent", + "type_info": "Bool" + }, + { + "ordinal": 4, + "name": "is_reliable", + "type_info": "Bool" + }, + { + "ordinal": 5, + "name": "working_url", + "type_info": "Text" + }, + { + "ordinal": 6, + "name": "result_code: ResultCode", + "type_info": { + "Custom": { + "name": "result_code", + "kind": { + "Enum": [ + "NoCidContactData", + "MissingAddrFromCidContact", + "MissingHttpAddrFromCidContact", + "FailedToGetWorkingUrl", + "NoDealsFound", + "TimedOut", + "Success", + "JobCreated", + "Error" + ] + } + } + } + }, + { + "ordinal": 7, + "name": "error_code: ErrorCode", + "type_info": { + "Custom": { + "name": "error_code", + "kind": { + "Enum": [ + "NoProviderOrClient", + "NoProvidersFound", + "FailedToRetrieveCidContactData", + "FailedToGetPeerId", + "FailedToGetDeals" + ] + } + } + } + }, + { + "ordinal": 8, + "name": "tested_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 9, + "name": "url_metadata", + "type_info": "Jsonb" + } + ], + "parameters": { + "Left": [ + "Text", + "Date", + "Date" + ] + }, + "nullable": [ + null, + null, + null, + true, + true, + true, + false, + true, + false, + true + ] + }, + "hash": "0888cedbf0d7f3e8c83aede3466fccb00af870688c27550b194ea1d6a47e624e" +} diff --git a/.sqlx/query-115e726bcb380b3521ffd3e41db323939295497fe867b5fe96fd661069ea4f9d.json b/.sqlx/query-115e726bcb380b3521ffd3e41db323939295497fe867b5fe96fd661069ea4f9d.json new file mode 100644 index 0000000..e49b6ac --- /dev/null +++ b/.sqlx/query-115e726bcb380b3521ffd3e41db323939295497fe867b5fe96fd661069ea4f9d.json @@ -0,0 +1,106 @@ +{ + "db_name": "PostgreSQL", + "query": "UPDATE\n storage_providers\n SET\n next_url_discovery_at = NOW(),\n next_bms_test_at = NOW(),\n updated_at = NOW()\n WHERE\n provider_id = $1\n RETURNING\n id,\n provider_id AS \"provider_id: ProviderId\",\n next_url_discovery_at,\n url_discovery_status,\n url_discovery_pending_since,\n last_working_url,\n next_bms_test_at,\n bms_test_status,\n bms_routing_key,\n last_bms_region_discovery_at,\n is_consistent,\n is_reliable,\n url_metadata,\n created_at,\n updated_at\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Uuid" + }, + { + "ordinal": 1, + "name": "provider_id: ProviderId", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "next_url_discovery_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 3, + "name": "url_discovery_status", + "type_info": "Varchar" + }, + { + "ordinal": 4, + "name": "url_discovery_pending_since", + "type_info": "Timestamptz" + }, + { + "ordinal": 5, + "name": "last_working_url", + "type_info": "Text" + }, + { + "ordinal": 6, + "name": "next_bms_test_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 7, + "name": "bms_test_status", + "type_info": "Varchar" + }, + { + "ordinal": 8, + "name": "bms_routing_key", + "type_info": "Varchar" + }, + { + "ordinal": 9, + "name": "last_bms_region_discovery_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 10, + "name": "is_consistent", + "type_info": "Bool" + }, + { + "ordinal": 11, + "name": "is_reliable", + "type_info": "Bool" + }, + { + "ordinal": 12, + "name": "url_metadata", + "type_info": "Jsonb" + }, + { + "ordinal": 13, + "name": "created_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 14, + "name": "updated_at", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + false, + false, + true, + true, + true, + false, + true, + true, + true, + false, + false, + true, + false, + false + ] + }, + "hash": "115e726bcb380b3521ffd3e41db323939295497fe867b5fe96fd661069ea4f9d" +} diff --git a/.sqlx/query-12d7f6ae8955ca4b916c0d4d311455c156a6d9e1b09fbac16f65f47beef4ab8b.json b/.sqlx/query-12d7f6ae8955ca4b916c0d4d311455c156a6d9e1b09fbac16f65f47beef4ab8b.json new file mode 100644 index 0000000..27ccc87 --- /dev/null +++ b/.sqlx/query-12d7f6ae8955ca4b916c0d4d311455c156a6d9e1b09fbac16f65f47beef4ab8b.json @@ -0,0 +1,15 @@ +{ + "db_name": "PostgreSQL", + "query": "UPDATE\n storage_providers\n SET\n next_bms_test_at = NOW() + ($2 || ' days')::INTERVAL,\n updated_at = NOW()\n WHERE\n provider_id = $1\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Text", + "Text" + ] + }, + "nullable": [] + }, + "hash": "12d7f6ae8955ca4b916c0d4d311455c156a6d9e1b09fbac16f65f47beef4ab8b" +} diff --git a/.sqlx/query-14429b1538ea4ee9862eb2ca4eedec7453b70d1bfd25c0b23d7e1fc00d69ee29.json b/.sqlx/query-14429b1538ea4ee9862eb2ca4eedec7453b70d1bfd25c0b23d7e1fc00d69ee29.json new file mode 100644 index 0000000..7eb3f72 --- /dev/null +++ b/.sqlx/query-14429b1538ea4ee9862eb2ca4eedec7453b70d1bfd25c0b23d7e1fc00d69ee29.json @@ -0,0 +1,94 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT DISTINCT ON (provider_id)\n id,\n provider_id,\n bms_job_id,\n url_tested,\n routing_key,\n worker_count,\n status,\n ping_avg_ms,\n head_avg_ms,\n ttfb_ms,\n download_speed_mbps,\n created_at,\n completed_at\n FROM\n bms_bandwidth_results\n WHERE\n provider_id = ANY($1)\n AND status != 'Pending'\n ORDER BY\n provider_id,\n completed_at DESC NULLS LAST\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Uuid" + }, + { + "ordinal": 1, + "name": "provider_id", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "bms_job_id", + "type_info": "Uuid" + }, + { + "ordinal": 3, + "name": "url_tested", + "type_info": "Text" + }, + { + "ordinal": 4, + "name": "routing_key", + "type_info": "Varchar" + }, + { + "ordinal": 5, + "name": "worker_count", + "type_info": "Int4" + }, + { + "ordinal": 6, + "name": "status", + "type_info": "Varchar" + }, + { + "ordinal": 7, + "name": "ping_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 8, + "name": "head_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 9, + "name": "ttfb_ms", + "type_info": "Numeric" + }, + { + "ordinal": 10, + "name": "download_speed_mbps", + "type_info": "Numeric" + }, + { + "ordinal": 11, + "name": "created_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 12, + "name": "completed_at", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "TextArray" + ] + }, + "nullable": [ + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + true, + false, + true + ] + }, + "hash": "14429b1538ea4ee9862eb2ca4eedec7453b70d1bfd25c0b23d7e1fc00d69ee29" +} diff --git a/.sqlx/query-b4fbaf70960f73b8699d52bb57cb383dac479ccee61c7cba2cacf8dc0c340d85.json b/.sqlx/query-1d19a32b718db6841bf9e043c74d5ee04121ae74f7484de022a12e2b1af3e3c9.json similarity index 70% rename from .sqlx/query-b4fbaf70960f73b8699d52bb57cb383dac479ccee61c7cba2cacf8dc0c340d85.json rename to .sqlx/query-1d19a32b718db6841bf9e043c74d5ee04121ae74f7484de022a12e2b1af3e3c9.json index 5c64cee..de09a86 100644 --- a/.sqlx/query-b4fbaf70960f73b8699d52bb57cb383dac479ccee61c7cba2cacf8dc0c340d85.json +++ b/.sqlx/query-1d19a32b718db6841bf9e043c74d5ee04121ae74f7484de022a12e2b1af3e3c9.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "SELECT\n id,\n provider_id AS \"provider_id: ProviderId\",\n client_id AS \"client_id: ClientId\",\n result_type AS \"result_type: DiscoveryType\",\n working_url,\n retrievability_percent::float8 AS \"retrievability_percent!\",\n result_code AS \"result_code: ResultCode\",\n error_code AS \"error_code: ErrorCode\",\n tested_at\n FROM\n url_results\n WHERE\n provider_id = $1\n AND result_type = 'Provider'\n ORDER BY\n tested_at DESC\n LIMIT 1\n ", + "query": "SELECT\n id,\n provider_id AS \"provider_id: ProviderId\",\n client_id AS \"client_id: ClientId\",\n result_type AS \"result_type: DiscoveryType\",\n working_url,\n retrievability_percent::float8 AS \"retrievability_percent!\",\n result_code AS \"result_code: ResultCode\",\n error_code AS \"error_code: ErrorCode\",\n tested_at,\n is_consistent,\n is_reliable,\n url_metadata,\n sector_utilization_percent::float8 AS \"sector_utilization_percent\"\n FROM\n url_results\n WHERE\n provider_id = $1\n AND client_id = $2\n AND result_type = 'ProviderClient'\n ORDER BY\n tested_at DESC\n LIMIT 1\n ", "describe": { "columns": [ { @@ -87,10 +87,31 @@ "ordinal": 8, "name": "tested_at", "type_info": "Timestamptz" + }, + { + "ordinal": 9, + "name": "is_consistent", + "type_info": "Bool" + }, + { + "ordinal": 10, + "name": "is_reliable", + "type_info": "Bool" + }, + { + "ordinal": 11, + "name": "url_metadata", + "type_info": "Jsonb" + }, + { + "ordinal": 12, + "name": "sector_utilization_percent", + "type_info": "Float8" } ], "parameters": { "Left": [ + "Text", "Text" ] }, @@ -103,8 +124,12 @@ null, false, true, - false + false, + true, + true, + true, + null ] }, - "hash": "b4fbaf70960f73b8699d52bb57cb383dac479ccee61c7cba2cacf8dc0c340d85" + "hash": "1d19a32b718db6841bf9e043c74d5ee04121ae74f7484de022a12e2b1af3e3c9" } diff --git a/.sqlx/query-808f33282188dbb6bc4ed48c3cbad3f9f793e9b51bbb50736d979733cd4455d5.json b/.sqlx/query-34ed43c0128f82fe08e55a85195c8bd5e5892e213efa669b7caa00ce2db24e6a.json similarity index 69% rename from .sqlx/query-808f33282188dbb6bc4ed48c3cbad3f9f793e9b51bbb50736d979733cd4455d5.json rename to .sqlx/query-34ed43c0128f82fe08e55a85195c8bd5e5892e213efa669b7caa00ce2db24e6a.json index ce055c1..04cfa1a 100644 --- a/.sqlx/query-808f33282188dbb6bc4ed48c3cbad3f9f793e9b51bbb50736d979733cd4455d5.json +++ b/.sqlx/query-34ed43c0128f82fe08e55a85195c8bd5e5892e213efa669b7caa00ce2db24e6a.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "SELECT\n id,\n provider_id AS \"provider_id: ProviderId\",\n next_url_discovery_at,\n url_discovery_status,\n url_discovery_pending_since,\n last_working_url,\n next_bms_test_at,\n bms_test_status,\n bms_routing_key,\n last_bms_region_discovery_at,\n created_at,\n updated_at\n FROM\n storage_providers\n WHERE\n (\n next_url_discovery_at <= NOW()\n AND url_discovery_status IS DISTINCT FROM 'pending'\n )\n OR\n (\n url_discovery_status = 'pending'\n AND url_discovery_pending_since < NOW() - INTERVAL '60 minutes'\n )\n ORDER BY\n next_url_discovery_at ASC\n LIMIT $1\n ", + "query": "SELECT\n id,\n provider_id AS \"provider_id: ProviderId\",\n next_url_discovery_at,\n url_discovery_status,\n url_discovery_pending_since,\n last_working_url,\n next_bms_test_at,\n bms_test_status,\n bms_routing_key,\n last_bms_region_discovery_at,\n is_consistent,\n is_reliable,\n url_metadata,\n created_at,\n updated_at\n FROM\n storage_providers\n WHERE\n last_working_url IS NOT NULL\n AND is_consistent = true\n AND next_bms_test_at <= NOW()\n ORDER BY\n next_bms_test_at ASC\n LIMIT $1\n ", "describe": { "columns": [ { @@ -55,11 +55,26 @@ }, { "ordinal": 10, + "name": "is_consistent", + "type_info": "Bool" + }, + { + "ordinal": 11, + "name": "is_reliable", + "type_info": "Bool" + }, + { + "ordinal": 12, + "name": "url_metadata", + "type_info": "Jsonb" + }, + { + "ordinal": 13, "name": "created_at", "type_info": "Timestamptz" }, { - "ordinal": 11, + "ordinal": 14, "name": "updated_at", "type_info": "Timestamptz" } @@ -81,8 +96,11 @@ true, true, false, + false, + true, + false, false ] }, - "hash": "808f33282188dbb6bc4ed48c3cbad3f9f793e9b51bbb50736d979733cd4455d5" + "hash": "34ed43c0128f82fe08e55a85195c8bd5e5892e213efa669b7caa00ce2db24e6a" } diff --git a/.sqlx/query-e489f8a98958aeab9a5d28a6bd0e3a1cef2fa9a813bdd85db0a3e7f9db335ab1.json b/.sqlx/query-4cb72d7b0fe0dd9c2dd7b912b99e8aef514f187fea51f660aa2a0fb22e58fb79.json similarity index 57% rename from .sqlx/query-e489f8a98958aeab9a5d28a6bd0e3a1cef2fa9a813bdd85db0a3e7f9db335ab1.json rename to .sqlx/query-4cb72d7b0fe0dd9c2dd7b912b99e8aef514f187fea51f660aa2a0fb22e58fb79.json index ccdabf1..c6afcd6 100644 --- a/.sqlx/query-e489f8a98958aeab9a5d28a6bd0e3a1cef2fa9a813bdd85db0a3e7f9db335ab1.json +++ b/.sqlx/query-4cb72d7b0fe0dd9c2dd7b912b99e8aef514f187fea51f660aa2a0fb22e58fb79.json @@ -1,15 +1,18 @@ { "db_name": "PostgreSQL", - "query": "UPDATE\n storage_providers\n SET\n next_url_discovery_at = NOW() + INTERVAL '1 day',\n url_discovery_status = NULL,\n url_discovery_pending_since = NULL,\n last_working_url = $2,\n updated_at = NOW()\n WHERE\n provider_id = $1\n ", + "query": "UPDATE\n storage_providers\n SET\n next_url_discovery_at = NOW() + INTERVAL '1 day',\n url_discovery_status = NULL,\n url_discovery_pending_since = NULL,\n last_working_url = $2,\n is_consistent = $3,\n is_reliable = $4,\n url_metadata = $5,\n updated_at = NOW()\n WHERE\n provider_id = $1\n ", "describe": { "columns": [], "parameters": { "Left": [ "Text", - "Text" + "Text", + "Bool", + "Bool", + "Jsonb" ] }, "nullable": [] }, - "hash": "e489f8a98958aeab9a5d28a6bd0e3a1cef2fa9a813bdd85db0a3e7f9db335ab1" + "hash": "4cb72d7b0fe0dd9c2dd7b912b99e8aef514f187fea51f660aa2a0fb22e58fb79" } diff --git a/.sqlx/query-5baace3095ee5cd3774b7533c4c17244fa1be1730bc055fa49e167f597c619b0.json b/.sqlx/query-5baace3095ee5cd3774b7533c4c17244fa1be1730bc055fa49e167f597c619b0.json new file mode 100644 index 0000000..dcddc2c --- /dev/null +++ b/.sqlx/query-5baace3095ee5cd3774b7533c4c17244fa1be1730bc055fa49e167f597c619b0.json @@ -0,0 +1,137 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT DISTINCT ON (ur.provider_id)\n ur.id,\n ur.provider_id AS \"provider_id: ProviderId\",\n ur.client_id AS \"client_id: ClientId\",\n ur.result_type AS \"result_type: DiscoveryType\",\n ur.working_url,\n ur.retrievability_percent::float8 AS \"retrievability_percent!\",\n ur.result_code AS \"result_code: ResultCode\",\n ur.error_code AS \"error_code: ErrorCode\",\n ur.tested_at,\n ur.is_consistent,\n ur.is_reliable,\n ur.url_metadata,\n ur.sector_utilization_percent::float8 AS \"sector_utilization_percent\"\n FROM\n url_results ur\n JOIN\n storage_providers sp ON ur.provider_id = sp.provider_id\n WHERE\n ur.result_type = 'Provider'\n AND ($3::bool IS NULL OR (sp.last_working_url IS NOT NULL) = $3)\n AND ($4::bool IS NULL OR sp.is_consistent = $4)\n ORDER BY\n ur.provider_id,\n ur.tested_at DESC\n LIMIT $1\n OFFSET $2\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Uuid" + }, + { + "ordinal": 1, + "name": "provider_id: ProviderId", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "client_id: ClientId", + "type_info": "Varchar" + }, + { + "ordinal": 3, + "name": "result_type: DiscoveryType", + "type_info": { + "Custom": { + "name": "discovery_type", + "kind": { + "Enum": [ + "Provider", + "ProviderClient" + ] + } + } + } + }, + { + "ordinal": 4, + "name": "working_url", + "type_info": "Text" + }, + { + "ordinal": 5, + "name": "retrievability_percent!", + "type_info": "Float8" + }, + { + "ordinal": 6, + "name": "result_code: ResultCode", + "type_info": { + "Custom": { + "name": "result_code", + "kind": { + "Enum": [ + "NoCidContactData", + "MissingAddrFromCidContact", + "MissingHttpAddrFromCidContact", + "FailedToGetWorkingUrl", + "NoDealsFound", + "TimedOut", + "Success", + "JobCreated", + "Error" + ] + } + } + } + }, + { + "ordinal": 7, + "name": "error_code: ErrorCode", + "type_info": { + "Custom": { + "name": "error_code", + "kind": { + "Enum": [ + "NoProviderOrClient", + "NoProvidersFound", + "FailedToRetrieveCidContactData", + "FailedToGetPeerId", + "FailedToGetDeals" + ] + } + } + } + }, + { + "ordinal": 8, + "name": "tested_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 9, + "name": "is_consistent", + "type_info": "Bool" + }, + { + "ordinal": 10, + "name": "is_reliable", + "type_info": "Bool" + }, + { + "ordinal": 11, + "name": "url_metadata", + "type_info": "Jsonb" + }, + { + "ordinal": 12, + "name": "sector_utilization_percent", + "type_info": "Float8" + } + ], + "parameters": { + "Left": [ + "Int8", + "Int8", + "Bool", + "Bool" + ] + }, + "nullable": [ + false, + false, + true, + false, + true, + null, + false, + true, + false, + true, + true, + true, + null + ] + }, + "hash": "5baace3095ee5cd3774b7533c4c17244fa1be1730bc055fa49e167f597c619b0" +} diff --git a/.sqlx/query-5def243cfa2cd40cc4c0b6269ee4cc0d6d0e9b8346b86a3bcda0f5ba8a8a4212.json b/.sqlx/query-5def243cfa2cd40cc4c0b6269ee4cc0d6d0e9b8346b86a3bcda0f5ba8a8a4212.json new file mode 100644 index 0000000..a878a84 --- /dev/null +++ b/.sqlx/query-5def243cfa2cd40cc4c0b6269ee4cc0d6d0e9b8346b86a3bcda0f5ba8a8a4212.json @@ -0,0 +1,109 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT DISTINCT ON (DATE(tested_at))\n DATE(tested_at) AS \"date!\",\n retrievability_percent::float8 AS \"retrievability_percent!\",\n sector_utilization_percent::float8 AS \"sector_utilization_percent\",\n is_consistent,\n is_reliable,\n working_url,\n result_code AS \"result_code: ResultCode\",\n error_code AS \"error_code: ErrorCode\",\n tested_at,\n url_metadata\n FROM\n url_results\n WHERE\n provider_id = $1\n AND client_id = $2\n AND result_type = 'ProviderClient'\n AND tested_at >= $3::date\n AND tested_at < ($4::date + INTERVAL '1 day')\n ORDER BY\n DATE(tested_at),\n tested_at DESC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "date!", + "type_info": "Date" + }, + { + "ordinal": 1, + "name": "retrievability_percent!", + "type_info": "Float8" + }, + { + "ordinal": 2, + "name": "sector_utilization_percent", + "type_info": "Float8" + }, + { + "ordinal": 3, + "name": "is_consistent", + "type_info": "Bool" + }, + { + "ordinal": 4, + "name": "is_reliable", + "type_info": "Bool" + }, + { + "ordinal": 5, + "name": "working_url", + "type_info": "Text" + }, + { + "ordinal": 6, + "name": "result_code: ResultCode", + "type_info": { + "Custom": { + "name": "result_code", + "kind": { + "Enum": [ + "NoCidContactData", + "MissingAddrFromCidContact", + "MissingHttpAddrFromCidContact", + "FailedToGetWorkingUrl", + "NoDealsFound", + "TimedOut", + "Success", + "JobCreated", + "Error" + ] + } + } + } + }, + { + "ordinal": 7, + "name": "error_code: ErrorCode", + "type_info": { + "Custom": { + "name": "error_code", + "kind": { + "Enum": [ + "NoProviderOrClient", + "NoProvidersFound", + "FailedToRetrieveCidContactData", + "FailedToGetPeerId", + "FailedToGetDeals" + ] + } + } + } + }, + { + "ordinal": 8, + "name": "tested_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 9, + "name": "url_metadata", + "type_info": "Jsonb" + } + ], + "parameters": { + "Left": [ + "Text", + "Text", + "Date", + "Date" + ] + }, + "nullable": [ + null, + null, + null, + true, + true, + true, + false, + true, + false, + true + ] + }, + "hash": "5def243cfa2cd40cc4c0b6269ee4cc0d6d0e9b8346b86a3bcda0f5ba8a8a4212" +} diff --git a/.sqlx/query-5e3e3f9aa43234269d673cb4b321e1f09dc278938a3ea3955110ed60c42423f2.json b/.sqlx/query-5e3e3f9aa43234269d673cb4b321e1f09dc278938a3ea3955110ed60c42423f2.json new file mode 100644 index 0000000..3d4e1ea --- /dev/null +++ b/.sqlx/query-5e3e3f9aa43234269d673cb4b321e1f09dc278938a3ea3955110ed60c42423f2.json @@ -0,0 +1,104 @@ +{ + "db_name": "PostgreSQL", + "query": "INSERT INTO\n bms_bandwidth_results (\n provider_id,\n bms_job_id,\n url_tested,\n routing_key,\n worker_count,\n status,\n ping_avg_ms,\n head_avg_ms,\n ttfb_ms,\n download_speed_mbps,\n completed_at\n )\n VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)\n RETURNING\n id,\n provider_id,\n bms_job_id,\n url_tested,\n routing_key,\n worker_count,\n status,\n ping_avg_ms,\n head_avg_ms,\n ttfb_ms,\n download_speed_mbps,\n created_at,\n completed_at\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Uuid" + }, + { + "ordinal": 1, + "name": "provider_id", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "bms_job_id", + "type_info": "Uuid" + }, + { + "ordinal": 3, + "name": "url_tested", + "type_info": "Text" + }, + { + "ordinal": 4, + "name": "routing_key", + "type_info": "Varchar" + }, + { + "ordinal": 5, + "name": "worker_count", + "type_info": "Int4" + }, + { + "ordinal": 6, + "name": "status", + "type_info": "Varchar" + }, + { + "ordinal": 7, + "name": "ping_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 8, + "name": "head_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 9, + "name": "ttfb_ms", + "type_info": "Numeric" + }, + { + "ordinal": 10, + "name": "download_speed_mbps", + "type_info": "Numeric" + }, + { + "ordinal": 11, + "name": "created_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 12, + "name": "completed_at", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Varchar", + "Uuid", + "Text", + "Varchar", + "Int4", + "Varchar", + "Numeric", + "Numeric", + "Numeric", + "Numeric", + "Timestamptz" + ] + }, + "nullable": [ + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + true, + false, + true + ] + }, + "hash": "5e3e3f9aa43234269d673cb4b321e1f09dc278938a3ea3955110ed60c42423f2" +} diff --git a/.sqlx/query-5eff09a6443b679f3bc725a5d9117f117c721086e0e2040e4476461de67e4355.json b/.sqlx/query-5eff09a6443b679f3bc725a5d9117f117c721086e0e2040e4476461de67e4355.json new file mode 100644 index 0000000..0f7b6e3 --- /dev/null +++ b/.sqlx/query-5eff09a6443b679f3bc725a5d9117f117c721086e0e2040e4476461de67e4355.json @@ -0,0 +1,98 @@ +{ + "db_name": "PostgreSQL", + "query": "INSERT INTO\n bms_bandwidth_results (\n provider_id,\n bms_job_id,\n url_tested,\n routing_key,\n worker_count,\n status\n )\n VALUES ($1, $2, $3, $4, $5, 'Pending')\n RETURNING\n id,\n provider_id,\n bms_job_id,\n url_tested,\n routing_key,\n worker_count,\n status,\n ping_avg_ms,\n head_avg_ms,\n ttfb_ms,\n download_speed_mbps,\n created_at,\n completed_at\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Uuid" + }, + { + "ordinal": 1, + "name": "provider_id", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "bms_job_id", + "type_info": "Uuid" + }, + { + "ordinal": 3, + "name": "url_tested", + "type_info": "Text" + }, + { + "ordinal": 4, + "name": "routing_key", + "type_info": "Varchar" + }, + { + "ordinal": 5, + "name": "worker_count", + "type_info": "Int4" + }, + { + "ordinal": 6, + "name": "status", + "type_info": "Varchar" + }, + { + "ordinal": 7, + "name": "ping_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 8, + "name": "head_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 9, + "name": "ttfb_ms", + "type_info": "Numeric" + }, + { + "ordinal": 10, + "name": "download_speed_mbps", + "type_info": "Numeric" + }, + { + "ordinal": 11, + "name": "created_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 12, + "name": "completed_at", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Varchar", + "Uuid", + "Text", + "Varchar", + "Int4" + ] + }, + "nullable": [ + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + true, + false, + true + ] + }, + "hash": "5eff09a6443b679f3bc725a5d9117f117c721086e0e2040e4476461de67e4355" +} diff --git a/.sqlx/query-65059fe3bf55781ccbf2fc119fe8288664209f3d8fdc038d1bbefa7d2029b9da.json b/.sqlx/query-65059fe3bf55781ccbf2fc119fe8288664209f3d8fdc038d1bbefa7d2029b9da.json deleted file mode 100644 index 6504c61..0000000 --- a/.sqlx/query-65059fe3bf55781ccbf2fc119fe8288664209f3d8fdc038d1bbefa7d2029b9da.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "\n SELECT\n id,\n \"dealId\" AS deal_id,\n \"claimId\" AS claim_id,\n \"clientId\" AS client_id,\n \"providerId\" AS provider_id,\n \"pieceCid\" AS piece_cid\n FROM unified_verified_deal\n WHERE \n \"providerId\" = $1\n AND \"clientId\" = $2\n ORDER BY random()\n LIMIT $3\n OFFSET $4\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "id", - "type_info": "Int4" - }, - { - "ordinal": 1, - "name": "deal_id", - "type_info": "Int4" - }, - { - "ordinal": 2, - "name": "claim_id", - "type_info": "Int4" - }, - { - "ordinal": 3, - "name": "client_id", - "type_info": "Varchar" - }, - { - "ordinal": 4, - "name": "provider_id", - "type_info": "Varchar" - }, - { - "ordinal": 5, - "name": "piece_cid", - "type_info": "Varchar" - } - ], - "parameters": { - "Left": [ - "Text", - "Text", - "Int8", - "Int8" - ] - }, - "nullable": [ - false, - false, - false, - true, - true, - true - ] - }, - "hash": "65059fe3bf55781ccbf2fc119fe8288664209f3d8fdc038d1bbefa7d2029b9da" -} diff --git a/.sqlx/query-696252c8021291171d7b230cbf41b22f0101acd09ee425525aca5a535c1f315a.json b/.sqlx/query-696252c8021291171d7b230cbf41b22f0101acd09ee425525aca5a535c1f315a.json new file mode 100644 index 0000000..e1374a4 --- /dev/null +++ b/.sqlx/query-696252c8021291171d7b230cbf41b22f0101acd09ee425525aca5a535c1f315a.json @@ -0,0 +1,94 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT\n id,\n provider_id,\n bms_job_id,\n url_tested,\n routing_key,\n worker_count,\n status,\n ping_avg_ms,\n head_avg_ms,\n ttfb_ms,\n download_speed_mbps,\n created_at,\n completed_at\n FROM\n bms_bandwidth_results\n WHERE\n provider_id = $1\n AND status != 'Pending'\n ORDER BY\n completed_at DESC NULLS LAST\n LIMIT 1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Uuid" + }, + { + "ordinal": 1, + "name": "provider_id", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "bms_job_id", + "type_info": "Uuid" + }, + { + "ordinal": 3, + "name": "url_tested", + "type_info": "Text" + }, + { + "ordinal": 4, + "name": "routing_key", + "type_info": "Varchar" + }, + { + "ordinal": 5, + "name": "worker_count", + "type_info": "Int4" + }, + { + "ordinal": 6, + "name": "status", + "type_info": "Varchar" + }, + { + "ordinal": 7, + "name": "ping_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 8, + "name": "head_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 9, + "name": "ttfb_ms", + "type_info": "Numeric" + }, + { + "ordinal": 10, + "name": "download_speed_mbps", + "type_info": "Numeric" + }, + { + "ordinal": 11, + "name": "created_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 12, + "name": "completed_at", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + true, + false, + true + ] + }, + "hash": "696252c8021291171d7b230cbf41b22f0101acd09ee425525aca5a535c1f315a" +} diff --git a/.sqlx/query-78da6ac872754bf3131bd38b056a9ce9762e16f635bb8955e56d3f52bb2582a1.json b/.sqlx/query-78da6ac872754bf3131bd38b056a9ce9762e16f635bb8955e56d3f52bb2582a1.json new file mode 100644 index 0000000..ae6a475 --- /dev/null +++ b/.sqlx/query-78da6ac872754bf3131bd38b056a9ce9762e16f635bb8955e56d3f52bb2582a1.json @@ -0,0 +1,106 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT\n id,\n provider_id AS \"provider_id: ProviderId\",\n next_url_discovery_at,\n url_discovery_status,\n url_discovery_pending_since,\n last_working_url,\n next_bms_test_at,\n bms_test_status,\n bms_routing_key,\n last_bms_region_discovery_at,\n is_consistent,\n is_reliable,\n url_metadata,\n created_at,\n updated_at\n FROM\n storage_providers\n WHERE\n (\n next_url_discovery_at <= NOW()\n AND url_discovery_status IS DISTINCT FROM 'pending'\n )\n OR\n (\n url_discovery_status = 'pending'\n AND url_discovery_pending_since < NOW() - INTERVAL '60 minutes'\n )\n ORDER BY\n next_url_discovery_at ASC\n LIMIT $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Uuid" + }, + { + "ordinal": 1, + "name": "provider_id: ProviderId", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "next_url_discovery_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 3, + "name": "url_discovery_status", + "type_info": "Varchar" + }, + { + "ordinal": 4, + "name": "url_discovery_pending_since", + "type_info": "Timestamptz" + }, + { + "ordinal": 5, + "name": "last_working_url", + "type_info": "Text" + }, + { + "ordinal": 6, + "name": "next_bms_test_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 7, + "name": "bms_test_status", + "type_info": "Varchar" + }, + { + "ordinal": 8, + "name": "bms_routing_key", + "type_info": "Varchar" + }, + { + "ordinal": 9, + "name": "last_bms_region_discovery_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 10, + "name": "is_consistent", + "type_info": "Bool" + }, + { + "ordinal": 11, + "name": "is_reliable", + "type_info": "Bool" + }, + { + "ordinal": 12, + "name": "url_metadata", + "type_info": "Jsonb" + }, + { + "ordinal": 13, + "name": "created_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 14, + "name": "updated_at", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Int8" + ] + }, + "nullable": [ + false, + false, + false, + true, + true, + true, + false, + true, + true, + true, + false, + false, + true, + false, + false + ] + }, + "hash": "78da6ac872754bf3131bd38b056a9ce9762e16f635bb8955e56d3f52bb2582a1" +} diff --git a/.sqlx/query-c3e0ac8f56140c94a5e0865750391db0aede162482c0b2a654927e0591619183.json b/.sqlx/query-9a96fa69afe9a8a45e5ecc6bfed52cc6f3c950c7d2bbd0f1fbb7b35556f193dc.json similarity index 70% rename from .sqlx/query-c3e0ac8f56140c94a5e0865750391db0aede162482c0b2a654927e0591619183.json rename to .sqlx/query-9a96fa69afe9a8a45e5ecc6bfed52cc6f3c950c7d2bbd0f1fbb7b35556f193dc.json index 3c4db76..2d26d6d 100644 --- a/.sqlx/query-c3e0ac8f56140c94a5e0865750391db0aede162482c0b2a654927e0591619183.json +++ b/.sqlx/query-9a96fa69afe9a8a45e5ecc6bfed52cc6f3c950c7d2bbd0f1fbb7b35556f193dc.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT\n id,\n \"dealId\" AS deal_id,\n \"claimId\" AS claim_id,\n \"clientId\" AS client_id,\n \"providerId\" AS provider_id,\n \"pieceCid\" AS piece_cid\n FROM unified_verified_deal\n WHERE\n \"providerId\" = $1\n AND \"clientId\" = $2\n ORDER BY random()\n LIMIT $3\n OFFSET $4\n ", + "query": "\n SELECT\n id,\n \"dealId\" AS deal_id,\n \"claimId\" AS claim_id,\n \"clientId\" AS client_id,\n \"providerId\" AS provider_id,\n \"pieceCid\" AS piece_cid,\n \"pieceSize\" AS piece_size\n FROM unified_verified_deal\n WHERE\n \"providerId\" = $1\n AND \"clientId\" = $2\n ORDER BY random()\n LIMIT $3\n OFFSET $4\n ", "describe": { "columns": [ { @@ -32,6 +32,11 @@ "ordinal": 5, "name": "piece_cid", "type_info": "Varchar" + }, + { + "ordinal": 6, + "name": "piece_size", + "type_info": "Numeric" } ], "parameters": { @@ -48,8 +53,9 @@ false, true, true, + true, true ] }, - "hash": "c3e0ac8f56140c94a5e0865750391db0aede162482c0b2a654927e0591619183" + "hash": "9a96fa69afe9a8a45e5ecc6bfed52cc6f3c950c7d2bbd0f1fbb7b35556f193dc" } diff --git a/.sqlx/query-a8996d2438aedd6547f0e2c5a3b33e5ee4084b0c41e90038ed490eab7127baf1.json b/.sqlx/query-a8996d2438aedd6547f0e2c5a3b33e5ee4084b0c41e90038ed490eab7127baf1.json new file mode 100644 index 0000000..bdd7d6a --- /dev/null +++ b/.sqlx/query-a8996d2438aedd6547f0e2c5a3b33e5ee4084b0c41e90038ed490eab7127baf1.json @@ -0,0 +1,106 @@ +{ + "db_name": "PostgreSQL", + "query": "UPDATE\n storage_providers\n SET\n next_url_discovery_at = NOW(),\n updated_at = NOW()\n WHERE\n provider_id = $1\n RETURNING\n id,\n provider_id AS \"provider_id: ProviderId\",\n next_url_discovery_at,\n url_discovery_status,\n url_discovery_pending_since,\n last_working_url,\n next_bms_test_at,\n bms_test_status,\n bms_routing_key,\n last_bms_region_discovery_at,\n is_consistent,\n is_reliable,\n url_metadata,\n created_at,\n updated_at\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Uuid" + }, + { + "ordinal": 1, + "name": "provider_id: ProviderId", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "next_url_discovery_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 3, + "name": "url_discovery_status", + "type_info": "Varchar" + }, + { + "ordinal": 4, + "name": "url_discovery_pending_since", + "type_info": "Timestamptz" + }, + { + "ordinal": 5, + "name": "last_working_url", + "type_info": "Text" + }, + { + "ordinal": 6, + "name": "next_bms_test_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 7, + "name": "bms_test_status", + "type_info": "Varchar" + }, + { + "ordinal": 8, + "name": "bms_routing_key", + "type_info": "Varchar" + }, + { + "ordinal": 9, + "name": "last_bms_region_discovery_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 10, + "name": "is_consistent", + "type_info": "Bool" + }, + { + "ordinal": 11, + "name": "is_reliable", + "type_info": "Bool" + }, + { + "ordinal": 12, + "name": "url_metadata", + "type_info": "Jsonb" + }, + { + "ordinal": 13, + "name": "created_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 14, + "name": "updated_at", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + false, + false, + true, + true, + true, + false, + true, + true, + true, + false, + false, + true, + false, + false + ] + }, + "hash": "a8996d2438aedd6547f0e2c5a3b33e5ee4084b0c41e90038ed490eab7127baf1" +} diff --git a/.sqlx/query-ad6dbed94b19808e4cf4e1bb7d94579915d6583e434fc86b499806cda6043036.json b/.sqlx/query-ad6dbed94b19808e4cf4e1bb7d94579915d6583e434fc86b499806cda6043036.json deleted file mode 100644 index b3036ec..0000000 --- a/.sqlx/query-ad6dbed94b19808e4cf4e1bb7d94579915d6583e434fc86b499806cda6043036.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "INSERT INTO\n storage_providers (provider_id)\n VALUES\n ($1)\n ON CONFLICT DO NOTHING\n ", - "describe": { - "columns": [], - "parameters": { - "Left": [ - "Varchar" - ] - }, - "nullable": [] - }, - "hash": "ad6dbed94b19808e4cf4e1bb7d94579915d6583e434fc86b499806cda6043036" -} diff --git a/.sqlx/query-afa1154b98b7dd8903668b2e3a791d1ee1f17cdf6ade455fa7d52a6b74d8ac5f.json b/.sqlx/query-afa1154b98b7dd8903668b2e3a791d1ee1f17cdf6ade455fa7d52a6b74d8ac5f.json new file mode 100644 index 0000000..0a1c348 --- /dev/null +++ b/.sqlx/query-afa1154b98b7dd8903668b2e3a791d1ee1f17cdf6ade455fa7d52a6b74d8ac5f.json @@ -0,0 +1,23 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT\n COUNT(DISTINCT ur.provider_id) AS \"count!\"\n FROM\n url_results ur\n JOIN\n storage_providers sp ON ur.provider_id = sp.provider_id\n WHERE\n ur.result_type = 'Provider'\n AND ($1::bool IS NULL OR (sp.last_working_url IS NOT NULL) = $1)\n AND ($2::bool IS NULL OR sp.is_consistent = $2)\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "count!", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Bool", + "Bool" + ] + }, + "nullable": [ + null + ] + }, + "hash": "afa1154b98b7dd8903668b2e3a791d1ee1f17cdf6ade455fa7d52a6b74d8ac5f" +} diff --git a/.sqlx/query-567597c712512a134b21cf0d7dd4e1c75e9e3441b4914a085fda76dd6f3368fc.json b/.sqlx/query-b2dff2341802927f51749256a34eb4deb4f3b63ec3ca7c534ea17539f107e1b4.json similarity index 67% rename from .sqlx/query-567597c712512a134b21cf0d7dd4e1c75e9e3441b4914a085fda76dd6f3368fc.json rename to .sqlx/query-b2dff2341802927f51749256a34eb4deb4f3b63ec3ca7c534ea17539f107e1b4.json index f9889d8..f632910 100644 --- a/.sqlx/query-567597c712512a134b21cf0d7dd4e1c75e9e3441b4914a085fda76dd6f3368fc.json +++ b/.sqlx/query-b2dff2341802927f51749256a34eb4deb4f3b63ec3ca7c534ea17539f107e1b4.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "INSERT INTO\n url_results (id, provider_id, client_id, result_type, working_url, retrievability_percent, result_code, error_code, tested_at)\n SELECT\n a1, a2, a3, a4, a5, a6, a7, a8, a9\n FROM UNNEST(\n $1::uuid[],\n $2::text[],\n $3::text[],\n $4::discovery_type[],\n $5::text[],\n $6::double precision[],\n $7::result_code[],\n $8::error_code[],\n $9::timestamptz[]\n ) AS t(a1, a2, a3, a4, a5, a6, a7, a8, a9)\n ", + "query": "INSERT INTO\n url_results (id, provider_id, client_id, result_type, working_url, retrievability_percent, result_code, error_code, tested_at, is_consistent, is_reliable, url_metadata, sector_utilization_percent)\n SELECT\n a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13\n FROM UNNEST(\n $1::uuid[],\n $2::text[],\n $3::text[],\n $4::discovery_type[],\n $5::text[],\n $6::double precision[],\n $7::result_code[],\n $8::error_code[],\n $9::timestamptz[],\n $10::bool[],\n $11::bool[],\n $12::jsonb[],\n $13::double precision[]\n ) AS t(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13)\n ", "describe": { "columns": [], "parameters": { @@ -74,10 +74,14 @@ } } }, - "TimestamptzArray" + "TimestamptzArray", + "BoolArray", + "BoolArray", + "JsonbArray", + "Float8Array" ] }, "nullable": [] }, - "hash": "567597c712512a134b21cf0d7dd4e1c75e9e3441b4914a085fda76dd6f3368fc" + "hash": "b2dff2341802927f51749256a34eb4deb4f3b63ec3ca7c534ea17539f107e1b4" } diff --git a/.sqlx/query-c2c958b52086e4f9559db175d2ee01a125ddca116b5d02656825e4bef039eba9.json b/.sqlx/query-c2c958b52086e4f9559db175d2ee01a125ddca116b5d02656825e4bef039eba9.json deleted file mode 100644 index bc10509..0000000 --- a/.sqlx/query-c2c958b52086e4f9559db175d2ee01a125ddca116b5d02656825e4bef039eba9.json +++ /dev/null @@ -1,54 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "\n SELECT\n id,\n \"dealId\" AS deal_id,\n \"claimId\" AS claim_id,\n \"clientId\" AS client_id,\n \"providerId\" AS provider_id,\n \"pieceCid\" AS piece_cid\n FROM unified_verified_deal\n WHERE \n \"providerId\" = $1\n ORDER BY random()\n LIMIT $2\n OFFSET $3\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "id", - "type_info": "Int4" - }, - { - "ordinal": 1, - "name": "deal_id", - "type_info": "Int4" - }, - { - "ordinal": 2, - "name": "claim_id", - "type_info": "Int4" - }, - { - "ordinal": 3, - "name": "client_id", - "type_info": "Varchar" - }, - { - "ordinal": 4, - "name": "provider_id", - "type_info": "Varchar" - }, - { - "ordinal": 5, - "name": "piece_cid", - "type_info": "Varchar" - } - ], - "parameters": { - "Left": [ - "Text", - "Int8", - "Int8" - ] - }, - "nullable": [ - false, - false, - false, - true, - true, - true - ] - }, - "hash": "c2c958b52086e4f9559db175d2ee01a125ddca116b5d02656825e4bef039eba9" -} diff --git a/.sqlx/query-c3f055777ef017282ee244247417aefb57add7148b2b65bf82bd92f9db8c514d.json b/.sqlx/query-c3f055777ef017282ee244247417aefb57add7148b2b65bf82bd92f9db8c514d.json deleted file mode 100644 index 22b88df..0000000 --- a/.sqlx/query-c3f055777ef017282ee244247417aefb57add7148b2b65bf82bd92f9db8c514d.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT \n id, \n provider_id, \n next_url_discovery_at, \n url_discovery_status, \n last_working_url,\n next_bms_test_at, \n bms_test_status, \n bms_routing_key, \n last_bms_region_discovery_at,\n created_at, \n updated_at\n FROM \n storage_providers \n WHERE \n provider_id = $1\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "id", - "type_info": "Uuid" - }, - { - "ordinal": 1, - "name": "provider_id", - "type_info": "Varchar" - }, - { - "ordinal": 2, - "name": "next_url_discovery_at", - "type_info": "Timestamptz" - }, - { - "ordinal": 3, - "name": "url_discovery_status", - "type_info": "Varchar" - }, - { - "ordinal": 4, - "name": "last_working_url", - "type_info": "Text" - }, - { - "ordinal": 5, - "name": "next_bms_test_at", - "type_info": "Timestamptz" - }, - { - "ordinal": 6, - "name": "bms_test_status", - "type_info": "Varchar" - }, - { - "ordinal": 7, - "name": "bms_routing_key", - "type_info": "Varchar" - }, - { - "ordinal": 8, - "name": "last_bms_region_discovery_at", - "type_info": "Timestamptz" - }, - { - "ordinal": 9, - "name": "created_at", - "type_info": "Timestamptz" - }, - { - "ordinal": 10, - "name": "updated_at", - "type_info": "Timestamptz" - } - ], - "parameters": { - "Left": [ - "Text" - ] - }, - "nullable": [ - false, - false, - false, - true, - true, - false, - true, - true, - true, - false, - false - ] - }, - "hash": "c3f055777ef017282ee244247417aefb57add7148b2b65bf82bd92f9db8c514d" -} diff --git a/.sqlx/query-6bf9bd322ce9b3c312d0e6d880368ea2997370858f1b5a184bb884af1765cf89.json b/.sqlx/query-cbd365519d041e7a696b755573dfdb38805490222afec89555bbc4caafd58c17.json similarity index 71% rename from .sqlx/query-6bf9bd322ce9b3c312d0e6d880368ea2997370858f1b5a184bb884af1765cf89.json rename to .sqlx/query-cbd365519d041e7a696b755573dfdb38805490222afec89555bbc4caafd58c17.json index db6ed6a..1dd9256 100644 --- a/.sqlx/query-6bf9bd322ce9b3c312d0e6d880368ea2997370858f1b5a184bb884af1765cf89.json +++ b/.sqlx/query-cbd365519d041e7a696b755573dfdb38805490222afec89555bbc4caafd58c17.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT\n id,\n \"dealId\" AS deal_id,\n \"claimId\" AS claim_id,\n \"clientId\" AS client_id,\n \"providerId\" AS provider_id,\n \"pieceCid\" AS piece_cid\n FROM unified_verified_deal\n WHERE\n \"providerId\" = $1\n ORDER BY random()\n LIMIT $2\n OFFSET $3\n ", + "query": "\n SELECT\n id,\n \"dealId\" AS deal_id,\n \"claimId\" AS claim_id,\n \"clientId\" AS client_id,\n \"providerId\" AS provider_id,\n \"pieceCid\" AS piece_cid,\n \"pieceSize\" AS piece_size\n FROM unified_verified_deal\n WHERE\n \"providerId\" = $1\n ORDER BY random()\n LIMIT $2\n OFFSET $3\n ", "describe": { "columns": [ { @@ -32,6 +32,11 @@ "ordinal": 5, "name": "piece_cid", "type_info": "Varchar" + }, + { + "ordinal": 6, + "name": "piece_size", + "type_info": "Numeric" } ], "parameters": { @@ -47,8 +52,9 @@ false, true, true, + true, true ] }, - "hash": "6bf9bd322ce9b3c312d0e6d880368ea2997370858f1b5a184bb884af1765cf89" + "hash": "cbd365519d041e7a696b755573dfdb38805490222afec89555bbc4caafd58c17" } diff --git a/.sqlx/query-cf8742633602da083aa2ee08fc825a985671f1667514fc8aef30f45c26cf0ba7.json b/.sqlx/query-cf8742633602da083aa2ee08fc825a985671f1667514fc8aef30f45c26cf0ba7.json new file mode 100644 index 0000000..38be02d --- /dev/null +++ b/.sqlx/query-cf8742633602da083aa2ee08fc825a985671f1667514fc8aef30f45c26cf0ba7.json @@ -0,0 +1,19 @@ +{ + "db_name": "PostgreSQL", + "query": "UPDATE\n bms_bandwidth_results\n SET\n status = $2,\n ping_avg_ms = $3,\n head_avg_ms = $4,\n ttfb_ms = $5,\n download_speed_mbps = $6,\n completed_at = NOW()\n WHERE\n bms_job_id = $1\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Uuid", + "Varchar", + "Numeric", + "Numeric", + "Numeric", + "Numeric" + ] + }, + "nullable": [] + }, + "hash": "cf8742633602da083aa2ee08fc825a985671f1667514fc8aef30f45c26cf0ba7" +} diff --git a/.sqlx/query-dcfc858a070771ba6f49494456b3887c2dc96af4cfe60f2cf62308b1b72335ff.json b/.sqlx/query-dcfc858a070771ba6f49494456b3887c2dc96af4cfe60f2cf62308b1b72335ff.json new file mode 100644 index 0000000..1889469 --- /dev/null +++ b/.sqlx/query-dcfc858a070771ba6f49494456b3887c2dc96af4cfe60f2cf62308b1b72335ff.json @@ -0,0 +1,95 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT\n id,\n provider_id,\n bms_job_id,\n url_tested,\n routing_key,\n worker_count,\n status,\n ping_avg_ms,\n head_avg_ms,\n ttfb_ms,\n download_speed_mbps,\n created_at,\n completed_at\n FROM\n bms_bandwidth_results\n WHERE\n provider_id = $1\n ORDER BY\n created_at DESC\n LIMIT $2\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Uuid" + }, + { + "ordinal": 1, + "name": "provider_id", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "bms_job_id", + "type_info": "Uuid" + }, + { + "ordinal": 3, + "name": "url_tested", + "type_info": "Text" + }, + { + "ordinal": 4, + "name": "routing_key", + "type_info": "Varchar" + }, + { + "ordinal": 5, + "name": "worker_count", + "type_info": "Int4" + }, + { + "ordinal": 6, + "name": "status", + "type_info": "Varchar" + }, + { + "ordinal": 7, + "name": "ping_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 8, + "name": "head_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 9, + "name": "ttfb_ms", + "type_info": "Numeric" + }, + { + "ordinal": 10, + "name": "download_speed_mbps", + "type_info": "Numeric" + }, + { + "ordinal": 11, + "name": "created_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 12, + "name": "completed_at", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text", + "Int8" + ] + }, + "nullable": [ + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + true, + false, + true + ] + }, + "hash": "dcfc858a070771ba6f49494456b3887c2dc96af4cfe60f2cf62308b1b72335ff" +} diff --git a/.sqlx/query-2c8f0fca32d7d16d6956dbf7bad248067dbfe22dcd7e7d58dff1db2b261826cc.json b/.sqlx/query-e38878ea83d70c39c7b6076d4951fb2d2f959a01f1daa34162aeb5fa8c844b12.json similarity index 71% rename from .sqlx/query-2c8f0fca32d7d16d6956dbf7bad248067dbfe22dcd7e7d58dff1db2b261826cc.json rename to .sqlx/query-e38878ea83d70c39c7b6076d4951fb2d2f959a01f1daa34162aeb5fa8c844b12.json index 2da9745..472a36a 100644 --- a/.sqlx/query-2c8f0fca32d7d16d6956dbf7bad248067dbfe22dcd7e7d58dff1db2b261826cc.json +++ b/.sqlx/query-e38878ea83d70c39c7b6076d4951fb2d2f959a01f1daa34162aeb5fa8c844b12.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "SELECT\n id,\n provider_id AS \"provider_id: ProviderId\",\n client_id AS \"client_id: ClientId\",\n result_type AS \"result_type: DiscoveryType\",\n working_url,\n retrievability_percent::float8 AS \"retrievability_percent!\",\n result_code AS \"result_code: ResultCode\",\n error_code AS \"error_code: ErrorCode\",\n tested_at\n FROM\n url_results\n WHERE\n provider_id = $1\n AND client_id = $2\n AND result_type = 'ProviderClient'\n ORDER BY\n tested_at DESC\n LIMIT 1\n ", + "query": "SELECT\n id,\n provider_id AS \"provider_id: ProviderId\",\n client_id AS \"client_id: ClientId\",\n result_type AS \"result_type: DiscoveryType\",\n working_url,\n retrievability_percent::float8 AS \"retrievability_percent!\",\n result_code AS \"result_code: ResultCode\",\n error_code AS \"error_code: ErrorCode\",\n tested_at,\n is_consistent,\n is_reliable,\n url_metadata,\n sector_utilization_percent::float8 AS \"sector_utilization_percent\"\n FROM\n url_results\n WHERE\n provider_id = $1\n AND result_type = 'Provider'\n ORDER BY\n tested_at DESC\n LIMIT 1\n ", "describe": { "columns": [ { @@ -87,11 +87,30 @@ "ordinal": 8, "name": "tested_at", "type_info": "Timestamptz" + }, + { + "ordinal": 9, + "name": "is_consistent", + "type_info": "Bool" + }, + { + "ordinal": 10, + "name": "is_reliable", + "type_info": "Bool" + }, + { + "ordinal": 11, + "name": "url_metadata", + "type_info": "Jsonb" + }, + { + "ordinal": 12, + "name": "sector_utilization_percent", + "type_info": "Float8" } ], "parameters": { "Left": [ - "Text", "Text" ] }, @@ -104,8 +123,12 @@ null, false, true, - false + false, + true, + true, + true, + null ] }, - "hash": "2c8f0fca32d7d16d6956dbf7bad248067dbfe22dcd7e7d58dff1db2b261826cc" + "hash": "e38878ea83d70c39c7b6076d4951fb2d2f959a01f1daa34162aeb5fa8c844b12" } diff --git a/.sqlx/query-e53c092c11aa0c8e0ce7df337c1c6f4cc70d23b68d67fd0d45b365131ca016f8.json b/.sqlx/query-e53c092c11aa0c8e0ce7df337c1c6f4cc70d23b68d67fd0d45b365131ca016f8.json new file mode 100644 index 0000000..e85c296 --- /dev/null +++ b/.sqlx/query-e53c092c11aa0c8e0ce7df337c1c6f4cc70d23b68d67fd0d45b365131ca016f8.json @@ -0,0 +1,134 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT DISTINCT ON (provider_id)\n id,\n provider_id AS \"provider_id: ProviderId\",\n client_id AS \"client_id: ClientId\",\n result_type AS \"result_type: DiscoveryType\",\n working_url,\n retrievability_percent::float8 AS \"retrievability_percent!\",\n result_code AS \"result_code: ResultCode\",\n error_code AS \"error_code: ErrorCode\",\n tested_at,\n is_consistent,\n is_reliable,\n url_metadata,\n sector_utilization_percent::float8 AS \"sector_utilization_percent\"\n FROM\n url_results\n WHERE\n provider_id = ANY($1)\n AND result_type = 'Provider'\n ORDER BY\n provider_id,\n tested_at DESC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Uuid" + }, + { + "ordinal": 1, + "name": "provider_id: ProviderId", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "client_id: ClientId", + "type_info": "Varchar" + }, + { + "ordinal": 3, + "name": "result_type: DiscoveryType", + "type_info": { + "Custom": { + "name": "discovery_type", + "kind": { + "Enum": [ + "Provider", + "ProviderClient" + ] + } + } + } + }, + { + "ordinal": 4, + "name": "working_url", + "type_info": "Text" + }, + { + "ordinal": 5, + "name": "retrievability_percent!", + "type_info": "Float8" + }, + { + "ordinal": 6, + "name": "result_code: ResultCode", + "type_info": { + "Custom": { + "name": "result_code", + "kind": { + "Enum": [ + "NoCidContactData", + "MissingAddrFromCidContact", + "MissingHttpAddrFromCidContact", + "FailedToGetWorkingUrl", + "NoDealsFound", + "TimedOut", + "Success", + "JobCreated", + "Error" + ] + } + } + } + }, + { + "ordinal": 7, + "name": "error_code: ErrorCode", + "type_info": { + "Custom": { + "name": "error_code", + "kind": { + "Enum": [ + "NoProviderOrClient", + "NoProvidersFound", + "FailedToRetrieveCidContactData", + "FailedToGetPeerId", + "FailedToGetDeals" + ] + } + } + } + }, + { + "ordinal": 8, + "name": "tested_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 9, + "name": "is_consistent", + "type_info": "Bool" + }, + { + "ordinal": 10, + "name": "is_reliable", + "type_info": "Bool" + }, + { + "ordinal": 11, + "name": "url_metadata", + "type_info": "Jsonb" + }, + { + "ordinal": 12, + "name": "sector_utilization_percent", + "type_info": "Float8" + } + ], + "parameters": { + "Left": [ + "TextArray" + ] + }, + "nullable": [ + false, + false, + true, + false, + true, + null, + false, + true, + false, + true, + true, + true, + null + ] + }, + "hash": "e53c092c11aa0c8e0ce7df337c1c6f4cc70d23b68d67fd0d45b365131ca016f8" +} diff --git a/.sqlx/query-6d0246db36942f288a537ce27d69774377dbdca219880e5d54c21c817813d648.json b/.sqlx/query-e563df6b0dbf81185128a6325534037828e19d3176422c4d0d59d15f70d15394.json similarity index 74% rename from .sqlx/query-6d0246db36942f288a537ce27d69774377dbdca219880e5d54c21c817813d648.json rename to .sqlx/query-e563df6b0dbf81185128a6325534037828e19d3176422c4d0d59d15f70d15394.json index e793275..f1c2055 100644 --- a/.sqlx/query-6d0246db36942f288a537ce27d69774377dbdca219880e5d54c21c817813d648.json +++ b/.sqlx/query-e563df6b0dbf81185128a6325534037828e19d3176422c4d0d59d15f70d15394.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "SELECT\n id,\n provider_id AS \"provider_id: ProviderId\",\n next_url_discovery_at,\n url_discovery_status,\n url_discovery_pending_since,\n last_working_url,\n next_bms_test_at,\n bms_test_status,\n bms_routing_key,\n last_bms_region_discovery_at,\n created_at,\n updated_at\n FROM\n storage_providers\n WHERE\n provider_id = $1\n ", + "query": "SELECT\n id,\n provider_id AS \"provider_id: ProviderId\",\n next_url_discovery_at,\n url_discovery_status,\n url_discovery_pending_since,\n last_working_url,\n next_bms_test_at,\n bms_test_status,\n bms_routing_key,\n last_bms_region_discovery_at,\n is_consistent,\n is_reliable,\n url_metadata,\n created_at,\n updated_at\n FROM\n storage_providers\n WHERE\n provider_id = $1\n ", "describe": { "columns": [ { @@ -55,11 +55,26 @@ }, { "ordinal": 10, + "name": "is_consistent", + "type_info": "Bool" + }, + { + "ordinal": 11, + "name": "is_reliable", + "type_info": "Bool" + }, + { + "ordinal": 12, + "name": "url_metadata", + "type_info": "Jsonb" + }, + { + "ordinal": 13, "name": "created_at", "type_info": "Timestamptz" }, { - "ordinal": 11, + "ordinal": 14, "name": "updated_at", "type_info": "Timestamptz" } @@ -81,8 +96,11 @@ true, true, false, + false, + true, + false, false ] }, - "hash": "6d0246db36942f288a537ce27d69774377dbdca219880e5d54c21c817813d648" + "hash": "e563df6b0dbf81185128a6325534037828e19d3176422c4d0d59d15f70d15394" } diff --git a/.sqlx/query-ea912830651bdfaffa16afb83ca986e2b9b1b9de422d2070594974a8f0df9508.json b/.sqlx/query-ea912830651bdfaffa16afb83ca986e2b9b1b9de422d2070594974a8f0df9508.json new file mode 100644 index 0000000..0bdb76f --- /dev/null +++ b/.sqlx/query-ea912830651bdfaffa16afb83ca986e2b9b1b9de422d2070594974a8f0df9508.json @@ -0,0 +1,94 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT\n id,\n provider_id,\n bms_job_id,\n url_tested,\n routing_key,\n worker_count,\n status,\n ping_avg_ms,\n head_avg_ms,\n ttfb_ms,\n download_speed_mbps,\n created_at,\n completed_at\n FROM\n bms_bandwidth_results\n WHERE\n provider_id = $1\n ORDER BY\n created_at DESC\n LIMIT 1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Uuid" + }, + { + "ordinal": 1, + "name": "provider_id", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "bms_job_id", + "type_info": "Uuid" + }, + { + "ordinal": 3, + "name": "url_tested", + "type_info": "Text" + }, + { + "ordinal": 4, + "name": "routing_key", + "type_info": "Varchar" + }, + { + "ordinal": 5, + "name": "worker_count", + "type_info": "Int4" + }, + { + "ordinal": 6, + "name": "status", + "type_info": "Varchar" + }, + { + "ordinal": 7, + "name": "ping_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 8, + "name": "head_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 9, + "name": "ttfb_ms", + "type_info": "Numeric" + }, + { + "ordinal": 10, + "name": "download_speed_mbps", + "type_info": "Numeric" + }, + { + "ordinal": 11, + "name": "created_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 12, + "name": "completed_at", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + true, + false, + true + ] + }, + "hash": "ea912830651bdfaffa16afb83ca986e2b9b1b9de422d2070594974a8f0df9508" +} diff --git a/.sqlx/query-054294f218252f0353178e41f058f2bafa6a923c085e92dadade8ab74c274051.json b/.sqlx/query-ecc7001ffd569699cb33ec926a8f569db47e42e6a5b19699d621f889f44894d6.json similarity index 72% rename from .sqlx/query-054294f218252f0353178e41f058f2bafa6a923c085e92dadade8ab74c274051.json rename to .sqlx/query-ecc7001ffd569699cb33ec926a8f569db47e42e6a5b19699d621f889f44894d6.json index 6eebe82..b20bde7 100644 --- a/.sqlx/query-054294f218252f0353178e41f058f2bafa6a923c085e92dadade8ab74c274051.json +++ b/.sqlx/query-ecc7001ffd569699cb33ec926a8f569db47e42e6a5b19699d621f889f44894d6.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "SELECT DISTINCT ON (provider_id)\n id,\n provider_id AS \"provider_id: ProviderId\",\n client_id AS \"client_id: ClientId\",\n result_type AS \"result_type: DiscoveryType\",\n working_url,\n retrievability_percent::float8 AS \"retrievability_percent!\",\n result_code AS \"result_code: ResultCode\",\n error_code AS \"error_code: ErrorCode\",\n tested_at\n FROM\n url_results\n WHERE\n client_id = $1\n AND result_type = 'ProviderClient'\n ORDER BY\n provider_id,\n tested_at DESC\n ", + "query": "SELECT DISTINCT ON (provider_id)\n id,\n provider_id AS \"provider_id: ProviderId\",\n client_id AS \"client_id: ClientId\",\n result_type AS \"result_type: DiscoveryType\",\n working_url,\n retrievability_percent::float8 AS \"retrievability_percent!\",\n result_code AS \"result_code: ResultCode\",\n error_code AS \"error_code: ErrorCode\",\n tested_at,\n is_consistent,\n is_reliable,\n url_metadata,\n sector_utilization_percent::float8 AS \"sector_utilization_percent\"\n FROM\n url_results\n WHERE\n client_id = $1\n AND result_type = 'ProviderClient'\n ORDER BY\n provider_id,\n tested_at DESC\n ", "describe": { "columns": [ { @@ -87,6 +87,26 @@ "ordinal": 8, "name": "tested_at", "type_info": "Timestamptz" + }, + { + "ordinal": 9, + "name": "is_consistent", + "type_info": "Bool" + }, + { + "ordinal": 10, + "name": "is_reliable", + "type_info": "Bool" + }, + { + "ordinal": 11, + "name": "url_metadata", + "type_info": "Jsonb" + }, + { + "ordinal": 12, + "name": "sector_utilization_percent", + "type_info": "Float8" } ], "parameters": { @@ -103,8 +123,12 @@ null, false, true, - false + false, + true, + true, + true, + null ] }, - "hash": "054294f218252f0353178e41f058f2bafa6a923c085e92dadade8ab74c274051" + "hash": "ecc7001ffd569699cb33ec926a8f569db47e42e6a5b19699d621f889f44894d6" } diff --git a/.sqlx/query-f03de544aa939f9096ea57c39db78ff3019ebfef7bfa65e7821b46b70b1cceca.json b/.sqlx/query-f03de544aa939f9096ea57c39db78ff3019ebfef7bfa65e7821b46b70b1cceca.json new file mode 100644 index 0000000..b5fae90 --- /dev/null +++ b/.sqlx/query-f03de544aa939f9096ea57c39db78ff3019ebfef7bfa65e7821b46b70b1cceca.json @@ -0,0 +1,92 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT\n id,\n provider_id,\n bms_job_id,\n url_tested,\n routing_key,\n worker_count,\n status,\n ping_avg_ms,\n head_avg_ms,\n ttfb_ms,\n download_speed_mbps,\n created_at,\n completed_at\n FROM\n bms_bandwidth_results\n WHERE\n status = 'Pending'\n ORDER BY\n created_at ASC,\n id ASC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Uuid" + }, + { + "ordinal": 1, + "name": "provider_id", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "bms_job_id", + "type_info": "Uuid" + }, + { + "ordinal": 3, + "name": "url_tested", + "type_info": "Text" + }, + { + "ordinal": 4, + "name": "routing_key", + "type_info": "Varchar" + }, + { + "ordinal": 5, + "name": "worker_count", + "type_info": "Int4" + }, + { + "ordinal": 6, + "name": "status", + "type_info": "Varchar" + }, + { + "ordinal": 7, + "name": "ping_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 8, + "name": "head_avg_ms", + "type_info": "Numeric" + }, + { + "ordinal": 9, + "name": "ttfb_ms", + "type_info": "Numeric" + }, + { + "ordinal": 10, + "name": "download_speed_mbps", + "type_info": "Numeric" + }, + { + "ordinal": 11, + "name": "created_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 12, + "name": "completed_at", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + true, + false, + true + ] + }, + "hash": "f03de544aa939f9096ea57c39db78ff3019ebfef7bfa65e7821b46b70b1cceca" +} diff --git a/.sqlx/query-fe83a06bb1c021f918755241bfd2c2933eca082a8e85abc1e5dd3de87a6eeca6.json b/.sqlx/query-fe83a06bb1c021f918755241bfd2c2933eca082a8e85abc1e5dd3de87a6eeca6.json deleted file mode 100644 index 82cefd0..0000000 --- a/.sqlx/query-fe83a06bb1c021f918755241bfd2c2933eca082a8e85abc1e5dd3de87a6eeca6.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT DISTINCT\n \"providerId\"\n FROM\n unified_verified_deal\n WHERE\n \"providerId\" IS NOT NULL\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "providerId", - "type_info": "Varchar" - } - ], - "parameters": { - "Left": [] - }, - "nullable": [ - true - ] - }, - "hash": "fe83a06bb1c021f918755241bfd2c2933eca082a8e85abc1e5dd3de87a6eeca6" -} diff --git a/Cargo.lock b/Cargo.lock index 381f312..3f8b819 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1366,6 +1366,33 @@ dependencies = [ "windows-link", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cmake" version = "0.1.54" @@ -2391,6 +2418,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -5881,6 +5919,7 @@ dependencies = [ "axum-extra", "axum-test", "chrono", + "ciborium", "color-eyre", "dotenvy", "futures", @@ -5903,11 +5942,13 @@ dependencies = [ "testcontainers-modules", "tokio", "tokio-test", + "tokio-util", "tower", "tower-http", "tower_governor", "tracing", "tracing-subscriber", + "unsigned-varint", "urlencoding", "utoipa", "utoipa-swagger-ui", diff --git a/migrations/20251127114453_create_bms_bandwidth_results.down.sql b/migrations/20251127114453_create_bms_bandwidth_results.down.sql new file mode 100644 index 0000000..ff65e84 --- /dev/null +++ b/migrations/20251127114453_create_bms_bandwidth_results.down.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS bms_bandwidth_results; diff --git a/migrations/20251127114453_create_bms_bandwidth_results.up.sql b/migrations/20251127114453_create_bms_bandwidth_results.up.sql new file mode 100644 index 0000000..91577e3 --- /dev/null +++ b/migrations/20251127114453_create_bms_bandwidth_results.up.sql @@ -0,0 +1,18 @@ +CREATE TABLE bms_bandwidth_results ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + provider_id VARCHAR(255) NOT NULL, + bms_job_id UUID NOT NULL UNIQUE, + url_tested TEXT NOT NULL, + routing_key VARCHAR(50) NOT NULL, + worker_count INTEGER NOT NULL, + status VARCHAR(50) NOT NULL, + ping_avg_ms NUMERIC(10, 3), + head_avg_ms NUMERIC(10, 3), + ttfb_ms NUMERIC(10, 3), + download_speed_mbps NUMERIC(10, 2), + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + completed_at TIMESTAMPTZ +); + +CREATE INDEX idx_bms_results_provider ON bms_bandwidth_results(provider_id, created_at DESC); +CREATE INDEX idx_bms_results_job_id ON bms_bandwidth_results(bms_job_id); diff --git a/migrations/20260102120000_add_url_validation.down.sql b/migrations/20260102120000_add_url_validation.down.sql new file mode 100644 index 0000000..6ee993b --- /dev/null +++ b/migrations/20260102120000_add_url_validation.down.sql @@ -0,0 +1,5 @@ +-- Remove URL validation columns from storage_providers +DROP INDEX IF EXISTS idx_sp_consistent_bms; +ALTER TABLE storage_providers + DROP COLUMN IF EXISTS is_consistent, + DROP COLUMN IF EXISTS url_metadata; diff --git a/migrations/20260102120000_add_url_validation.up.sql b/migrations/20260102120000_add_url_validation.up.sql new file mode 100644 index 0000000..f7eb51e --- /dev/null +++ b/migrations/20260102120000_add_url_validation.up.sql @@ -0,0 +1,14 @@ +-- Add URL validation columns to storage_providers +ALTER TABLE storage_providers + ADD COLUMN is_consistent BOOLEAN NOT NULL DEFAULT true, + ADD COLUMN url_metadata JSONB; + +-- Force re-validation for existing providers with URLs +-- These were discovered before Content-Length validation was added +UPDATE storage_providers +SET is_consistent = false +WHERE last_working_url IS NOT NULL; + +CREATE INDEX idx_sp_consistent_bms + ON storage_providers(next_bms_test_at) + WHERE is_consistent = true AND last_working_url IS NOT NULL; diff --git a/migrations/20260109125708_add_is_reliable.down.sql b/migrations/20260109125708_add_is_reliable.down.sql new file mode 100644 index 0000000..9edb0e5 --- /dev/null +++ b/migrations/20260109125708_add_is_reliable.down.sql @@ -0,0 +1,10 @@ +DROP INDEX IF EXISTS idx_sp_bms_eligible; + +CREATE INDEX idx_sp_consistent_bms ON storage_providers ( + next_bms_test_at, + bms_test_status +) WHERE + is_consistent = true + AND last_working_url IS NOT NULL; + +ALTER TABLE storage_providers DROP COLUMN is_reliable; diff --git a/migrations/20260109125708_add_is_reliable.up.sql b/migrations/20260109125708_add_is_reliable.up.sql new file mode 100644 index 0000000..b3a0718 --- /dev/null +++ b/migrations/20260109125708_add_is_reliable.up.sql @@ -0,0 +1,17 @@ +-- Add is_reliable column for tracking connection stability +ALTER TABLE storage_providers +ADD COLUMN is_reliable BOOLEAN NOT NULL DEFAULT true; + +-- Update BMS eligibility index to require both consistent AND reliable +DROP INDEX IF EXISTS idx_sp_consistent_bms; + +CREATE INDEX idx_sp_bms_eligible ON storage_providers ( + next_bms_test_at, + bms_test_status +) WHERE + is_consistent = true + AND is_reliable = true + AND last_working_url IS NOT NULL; + +COMMENT ON COLUMN storage_providers.is_reliable IS + 'False if timeout rate exceeds 30% during URL discovery'; diff --git a/migrations/20260109140000_add_url_results_history_columns.down.sql b/migrations/20260109140000_add_url_results_history_columns.down.sql new file mode 100644 index 0000000..b29f811 --- /dev/null +++ b/migrations/20260109140000_add_url_results_history_columns.down.sql @@ -0,0 +1,4 @@ +ALTER TABLE url_results + DROP COLUMN IF EXISTS is_consistent, + DROP COLUMN IF EXISTS is_reliable, + DROP COLUMN IF EXISTS url_metadata; diff --git a/migrations/20260109140000_add_url_results_history_columns.up.sql b/migrations/20260109140000_add_url_results_history_columns.up.sql new file mode 100644 index 0000000..923dd90 --- /dev/null +++ b/migrations/20260109140000_add_url_results_history_columns.up.sql @@ -0,0 +1,5 @@ +-- Add columns to support historical data retrieval with extended details +ALTER TABLE url_results + ADD COLUMN is_consistent BOOLEAN, + ADD COLUMN is_reliable BOOLEAN, + ADD COLUMN url_metadata JSONB; diff --git a/migrations/20260115120000_create_deal_labels.down.sql b/migrations/20260115120000_create_deal_labels.down.sql new file mode 100644 index 0000000..c67852c --- /dev/null +++ b/migrations/20260115120000_create_deal_labels.down.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS deal_labels; diff --git a/migrations/20260115120000_create_deal_labels.up.sql b/migrations/20260115120000_create_deal_labels.up.sql new file mode 100644 index 0000000..17dc406 --- /dev/null +++ b/migrations/20260115120000_create_deal_labels.up.sql @@ -0,0 +1,17 @@ +-- Deal label cache for CAR header verification +-- Stores Label (payload CID) fetched from Lotus RPC +-- Data is immutable once deal is made, so cache forever + +CREATE TABLE deal_labels ( + deal_id INTEGER PRIMARY KEY, + piece_cid TEXT NOT NULL, + label_raw TEXT, + payload_cid TEXT, + fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX idx_deal_labels_piece_cid ON deal_labels(piece_cid); + +COMMENT ON TABLE deal_labels IS 'Cache of deal Labels fetched from Lotus RPC for CAR header verification'; +COMMENT ON COLUMN deal_labels.label_raw IS 'Raw Label value from DealProposal'; +COMMENT ON COLUMN deal_labels.payload_cid IS 'Parsed CID if label_raw is valid CID format (bafy/bafk/Qm prefix)'; diff --git a/migrations/20260120120000_add_sector_utilization.down.sql b/migrations/20260120120000_add_sector_utilization.down.sql new file mode 100644 index 0000000..963d8ef --- /dev/null +++ b/migrations/20260120120000_add_sector_utilization.down.sql @@ -0,0 +1,2 @@ +ALTER TABLE url_results + DROP COLUMN sector_utilization_percent; diff --git a/migrations/20260120120000_add_sector_utilization.up.sql b/migrations/20260120120000_add_sector_utilization.up.sql new file mode 100644 index 0000000..4554c53 --- /dev/null +++ b/migrations/20260120120000_add_sector_utilization.up.sql @@ -0,0 +1,3 @@ +-- Add sector utilization tracking to url_results +ALTER TABLE url_results + ADD COLUMN sector_utilization_percent NUMERIC(5, 2); diff --git a/scripts/sql/bms_test_providers.sql b/scripts/sql/bms_test_providers.sql new file mode 100644 index 0000000..914dbce --- /dev/null +++ b/scripts/sql/bms_test_providers.sql @@ -0,0 +1,26 @@ +-- BMS Test Providers Seed +-- Providers: f08403, f01518369 +-- Real deal data from sample_deals.sql and providers_with_clients.sql + +-- Insert sample deals for test providers +INSERT INTO unified_verified_deal (id, "dealId", "claimId", type, "clientId", "providerId", + "sectorId", "pieceCid", "pieceSize", "termMax", "termMin", "termStart", "slashedEpoch", "processedSlashedEpoch", removed, "createdAt", "updatedAt", "dcSource") +VALUES +-- f08403 deal +(151651301, 0, 118131274, 'claim', '3200311', '8403', '21097', 'baga6ea4seaqh32ogu5iav6gzkxvdsyustxpcsn3zygunp2ohxxpuce3dxiekcgy', 34359738368, 1785600, 1526400, 5461183, 0, 0, false, '2025-11-02 22:29:01.859', '2025-11-02 22:29:01.859', NULL), +-- f01518369 deals +(61696683, 29547762, 0, 'deal', '1837711', '1518369', '643629', 'baga6ea4seaqp47n3cjm3y2sll3if7kusze4e7h2wuybpxzzv6wszbwdxftk76ki', 34359738368, 4218244, NULL, 2708343, 0, 0, false, '2024-10-23 16:21:16.512869', '2024-10-23 16:21:16.512869', NULL), +(52701111, 19334805, 0, 'deal', '1880196', '1518369', '0', 'baga6ea4seaqpkzvgyguwodcetsrh2uxak2efh62bf6vyxbdel23l73jmmjq5ypi', 34359738368, 3951512, NULL, 2438141, 0, 0, false, '2024-10-23 14:49:28.360997', '2024-10-23 14:49:28.360997', NULL), +(61696685, 29547763, 0, 'deal', '1901107', '1518369', '643628', 'baga6ea4seaqivqjo2bg2gmjqo2d6phwgyzil2hrwihhcn7kzm2gbl42ffa2nchy', 34359738368, 4220461, NULL, 2708334, 0, 0, false, '2024-10-23 16:21:16.512869', '2024-10-23 16:21:16.512869', NULL), +(110031315, 83028807, 0, 'deal', '1924648', '1518369', '722558', 'baga6ea4seaqjuhxvn33zkqvuzmjsggla35un3pohav2awpcerxefijlf4pazimi', 34359738368, 7626855, NULL, 3980991, 0, 0, false, '2024-10-23 19:58:37.739883', '2024-10-23 19:58:37.739883', NULL), +(109627694, 82129856, 0, 'deal', '1924649', '1518369', '711737', 'baga6ea4seaqmy5sfbbcye7jfchyrlkpztbfsuhhkcxir4niz33oklr2lbcfnkii', 34359738368, 7604226, NULL, 3951000, 0, 0, false, '2024-10-23 19:57:57.51002', '2024-10-23 19:57:57.51002', NULL) +ON CONFLICT (id) DO NOTHING; + +-- Setting next_bms_test_at in the past so they're immediately eligible +INSERT INTO storage_providers (provider_id, next_bms_test_at, next_url_discovery_at) +VALUES +('8403', NOW() - INTERVAL '1 day', NOW() - INTERVAL '1 day'), +('1518369', NOW() - INTERVAL '1 day', NOW() - INTERVAL '1 day') +ON CONFLICT (provider_id) DO UPDATE SET + next_bms_test_at = EXCLUDED.next_bms_test_at, + next_url_discovery_at = EXCLUDED.next_url_discovery_at; diff --git a/url_finder/Cargo.toml b/url_finder/Cargo.toml index 6b4bd49..d788f7f 100644 --- a/url_finder/Cargo.toml +++ b/url_finder/Cargo.toml @@ -56,7 +56,10 @@ alloy = { version = "1.0.41", default-features = false, features = ["sol-types", urlencoding = "2.1.3" multiaddr = "0.18.2" dotenvy = "0.15.7" +tokio-util = "0.7" rand = "0.9.2" +unsigned-varint = "0.8" +ciborium = "0.2" [dev-dependencies] wiremock = "0.6.5" diff --git a/url_finder/src/api/api_doc.rs b/url_finder/src/api/api_doc.rs index 3626655..064d92f 100644 --- a/url_finder/src/api/api_doc.rs +++ b/url_finder/src/api/api_doc.rs @@ -1,11 +1,11 @@ use crate::api_response::ErrorResponse; use utoipa::OpenApi; +use crate::api::providers::*; use crate::api::*; #[derive(OpenApi)] #[openapi( - // API Metadata info( title = "Url Finder", description = r#" @@ -13,56 +13,101 @@ This is the API documentation for the Url Finder micro-service. The Url Finder service is responsible for finding the URL of a miner given its address. -The service is using [CID Contact](https://cid.contact) as source of HTTP entry point for any given miner address. +## New Providers API + +The `/providers/*` and `/clients/*` endpoints serve pre-computed data from the database with combined URL, retrievability, and performance metrics. + +## Legacy URL API + +The `/url/*` endpoints remain fully backward compatible. ### Result Codes - **NoCidContactData** - No entry in cid contact - **MissingAddrFromCidContact** - No entry point found in cid contact - - **MissingHttpAddrFromCidContact** - No HTTP entry point in cid contact (taken from ExtendedProviders) - - **FailedToGetWorkingUrl** - None of tested URLs is working and can be downloaded - - **NoDealsFound** - No deals found for given miner (should not happen, unless miner address is invalid) + - **MissingHttpAddrFromCidContact** - No HTTP entry point in cid contact + - **FailedToGetWorkingUrl** - None of the tested URLs are working + - **NoDealsFound** - No deals found for given miner - **Success** - Found working URL - **Error** - Provider not indexed yet or error occurred "#, version = "1.0.0" ), - // API Handler Functions paths( + // Legacy API handle_find_url_sp, handle_find_url_sp_client, handle_find_retri_by_client_and_sp, handle_find_retri_by_sp, handle_find_client, handle_healthcheck, + // New Providers API + handle_get_provider, + handle_get_provider_client, + handle_get_client_providers, + handle_list_providers, + handle_bulk_providers, + handle_reset_provider, + handle_history_retrievability, + handle_history_retrievability_client, ), components( schemas( - // URL + // Legacy URL FindUrlSpPath, FindUrlSpResponse, FindUrlSpClientPath, FindUrlSpClientResponse, - // Retri + // Legacy Retri FindRetriByClientAndSpPath, FindRetriByClientAndSpResponse, - // Client + // Legacy Client FindByClientPath, FindByClientResponse, - // misc + // New Providers API + GetProviderPath, + GetProviderClientPath, + GetClientProvidersPath, + ListProvidersQuery, + BulkProvidersRequest, + ProviderResponse, + ProviderClientResponse, + ClientProvidersResponse, + ProvidersListResponse, + BulkProvidersResponse, + PerformanceResponse, + BandwidthTestResponse, + GeolocationTestResponse, + ResetProviderPath, + ResetProviderQuery, + ScheduleType, + HistoryProviderPath, + HistoryProviderClientPath, + HistoryQuery, + RetrievabilityHistoryResponse, + RetrievabilityDataPoint, + // Extended response types + ExtendedQuery, + AnalysisResponse, + DiagnosticsResponse, + ScheduleStateResponse, + SchedulingResponse, + + // Misc HealthcheckResponse, - // common + // Common ErrorCode, ErrorResponse, ), - ), + ), tags( - // API Categories - (name = "URL", description = "Url Finder APIs"), - (name = "Healthcheck", description = "Url Finder Misc APIs"), + (name = "Providers", description = "New Providers API - pre-computed data with performance metrics"), + (name = "Clients", description = "Client endpoints - providers for a specific client"), + (name = "URL", description = "Legacy URL Finder APIs"), + (name = "Healthcheck", description = "Health check endpoints"), ) )] pub struct ApiDoc; diff --git a/url_finder/src/api/find_client.rs b/url_finder/src/api/find_client.rs index 20126bd..04ae889 100644 --- a/url_finder/src/api/find_client.rs +++ b/url_finder/src/api/find_client.rs @@ -13,7 +13,8 @@ use utoipa::{IntoParams, ToSchema}; use crate::{ AppState, - types::{ClientAddress, ClientId, ProviderAddress}, + services::provider_service::ProviderData, + types::{ClientAddress, ProviderAddress}, }; use super::ResultCode; @@ -31,6 +32,17 @@ pub struct ProviderResult { pub retrievability_percent: f64, } +impl From for ProviderResult { + fn from(data: ProviderData) -> Self { + Self { + provider: data.provider_id.into(), + result: data.result_code, + working_url: data.working_url, + retrievability_percent: data.retrievability_percent, + } + } +} + #[derive(Serialize, ToSchema)] pub struct FindByClientResponse { pub client: String, @@ -40,11 +52,10 @@ pub struct FindByClientResponse { pub message: Option, } -/// Find retrivabiliy of urls for a given SP and Client address #[utoipa::path( get, path = "/url/client/{client}", - params (FindByClientPath), + params(FindByClientPath), description = r#" **Find client SPs with working url and retrievabiliy of urls for for each found SP** "#, @@ -65,22 +76,21 @@ pub async fn handle_find_client( &path.client ); - // Parse and validate client address - let client_address = ClientAddress::new(path.client.clone()) + let client_address = ClientAddress::new(&path.client) .map_err(|e| bad_request(format!("Invalid client address: {e}")))?; - let client_id: ClientId = client_address.into(); + let client_id = client_address.into(); - let url_results = state - .url_repo - .get_latest_for_client_all_providers(&client_id) + let providers_data = state + .provider_service + .get_providers_for_client(&client_id) .await .map_err(|e| { - debug!("Failed to query url_results: {:?}", e); - internal_server_error("Failed to query url results") + debug!("Failed to query client providers: {:?}", e); + internal_server_error("Failed to query client providers") })?; - if url_results.is_empty() { + if providers_data.is_empty() { return Ok(ok_response(FindByClientResponse { result: ResultCode::Error, client: path.client.clone(), @@ -92,15 +102,7 @@ pub async fn handle_find_client( })); } - let providers: Vec = url_results - .into_iter() - .map(|r| ProviderResult { - provider: r.provider_id.into(), - result: r.result_code, - working_url: r.working_url, - retrievability_percent: r.retrievability_percent, - }) - .collect(); + let providers: Vec = providers_data.into_iter().map(Into::into).collect(); Ok(ok_response(FindByClientResponse { result: ResultCode::Success, diff --git a/url_finder/src/api/find_retri_sp.rs b/url_finder/src/api/find_retri_sp.rs index c691641..618842f 100644 --- a/url_finder/src/api/find_retri_sp.rs +++ b/url_finder/src/api/find_retri_sp.rs @@ -8,13 +8,10 @@ use axum::{ use axum_extra::extract::WithRejection; use color_eyre::Result; use serde::{Deserialize, Serialize}; -use tracing::debug; +use tracing::{debug, warn}; use utoipa::{IntoParams, ToSchema}; -use crate::{ - AppState, - types::{ProviderAddress, ProviderId}, -}; +use crate::{AppState, services::provider_service::ProviderData, types::ProviderAddress}; use super::ResultCode; @@ -31,11 +28,30 @@ pub struct FindRetriBySpResponse { pub message: Option, } -/// Find retrivabiliy of urls for a given SP address +impl From for FindRetriBySpResponse { + fn from(data: ProviderData) -> Self { + Self { + result: data.result_code, + retrievability_percent: data.retrievability_percent, + message: None, + } + } +} + +impl FindRetriBySpResponse { + fn not_indexed() -> Self { + Self { + result: ResultCode::Error, + retrievability_percent: 0.0, + message: Some("Provider has not been indexed yet. Please try again later.".to_string()), + } + } +} + #[utoipa::path( get, path = "/url/retrievability/{provider}", - params (FindRetriBySpPath), + params(FindRetriBySpPath), description = r#" **Find retrievabiliy of urls for a given SP address** "#, @@ -56,31 +72,22 @@ pub async fn handle_find_retri_by_sp( ) -> Result, ApiResponse<()>> { debug!("find retri for input address: {:?}", &path.provider); - // Parse and validate provider address - let provider_address = ProviderAddress::new(path.provider) + let provider_address = ProviderAddress::new(&path.provider) .map_err(|e| bad_request(format!("Invalid provider address: {e}")))?; - let provider_id: ProviderId = provider_address.into(); + let provider_id = provider_address.into(); let result = state - .url_repo - .get_latest_for_provider(&provider_id) + .provider_service + .get_provider(&provider_id) .await .map_err(|e| { - debug!("Failed to query url_results: {:?}", e); - internal_server_error("Failed to query url results") + warn!("Failed to query provider: {:?}", e); + internal_server_error("Failed to query provider") })?; - match result { - Some(url_result) => Ok(ok_response(FindRetriBySpResponse { - result: url_result.result_code, - retrievability_percent: url_result.retrievability_percent, - message: None, - })), - None => Ok(ok_response(FindRetriBySpResponse { - result: ResultCode::Error, - retrievability_percent: 0.0, - message: Some("Provider has not been indexed yet. Please try again later.".to_string()), - })), - } + Ok(ok_response(match result { + Some(data) => data.into(), + None => FindRetriBySpResponse::not_indexed(), + })) } diff --git a/url_finder/src/api/find_retri_sp_client.rs b/url_finder/src/api/find_retri_sp_client.rs index d6f1436..d21ce7d 100644 --- a/url_finder/src/api/find_retri_sp_client.rs +++ b/url_finder/src/api/find_retri_sp_client.rs @@ -13,7 +13,8 @@ use utoipa::{IntoParams, ToSchema}; use crate::{ AppState, - types::{ClientAddress, ClientId, ProviderAddress, ProviderId}, + services::provider_service::ProviderData, + types::{ClientAddress, ProviderAddress}, }; use super::ResultCode; @@ -32,13 +33,35 @@ pub struct FindRetriByClientAndSpResponse { pub message: Option, } -/// Find retrievability of urls for a given SP and Client address +impl From for FindRetriByClientAndSpResponse { + fn from(data: ProviderData) -> Self { + Self { + result: data.result_code, + retrievability_percent: data.retrievability_percent, + message: None, + } + } +} + +impl FindRetriByClientAndSpResponse { + fn not_indexed() -> Self { + Self { + result: ResultCode::Error, + retrievability_percent: 0.0, + message: Some( + "Provider/client pair has not been indexed yet. Please try again later." + .to_string(), + ), + } + } +} + #[utoipa::path( get, path = "/url/retrievability/{provider}/{client}", - params (FindRetriByClientAndSpPath), + params(FindRetriByClientAndSpPath), description = r#" -**Find retrievabiliy of urls for a given SP and Client address** +**Find retrievability of urls for a given SP and Client address** "#, responses( (status = 200, description = "Successful check", body = FindRetriByClientAndSpResponse), @@ -60,37 +83,25 @@ pub async fn handle_find_retri_by_client_and_sp( &path.provider, &path.client ); - // Parse and validate provider and client addresses - let provider_address = ProviderAddress::new(path.provider) + let provider_address = ProviderAddress::new(&path.provider) .map_err(|e| bad_request(format!("Invalid provider address: {e}")))?; - let client_address = ClientAddress::new(path.client) + let client_address = ClientAddress::new(&path.client) .map_err(|e| bad_request(format!("Invalid client address: {e}")))?; - let provider_id: ProviderId = provider_address.into(); - let client_id: ClientId = client_address.into(); + let provider_id = provider_address.into(); + let client_id = client_address.into(); let result = state - .url_repo - .get_latest_for_provider_client(&provider_id, &client_id) + .provider_service + .get_provider_client(&provider_id, &client_id) .await .map_err(|e| { - debug!("Failed to query url_results: {:?}", e); - internal_server_error("Failed to query url results") + debug!("Failed to query provider+client: {:?}", e); + internal_server_error("Failed to query provider data") })?; - match result { - Some(url_result) => Ok(ok_response(FindRetriByClientAndSpResponse { - result: url_result.result_code, - retrievability_percent: url_result.retrievability_percent, - message: None, - })), - None => Ok(ok_response(FindRetriByClientAndSpResponse { - result: ResultCode::Error, - retrievability_percent: 0.0, - message: Some( - "Provider/client pair has not been indexed yet. Please try again later." - .to_string(), - ), - })), - } + Ok(ok_response(match result { + Some(data) => data.into(), + None => FindRetriByClientAndSpResponse::not_indexed(), + })) } diff --git a/url_finder/src/api/find_url_sp.rs b/url_finder/src/api/find_url_sp.rs index 1100dd9..9e6a710 100644 --- a/url_finder/src/api/find_url_sp.rs +++ b/url_finder/src/api/find_url_sp.rs @@ -12,8 +12,7 @@ use tracing::debug; use utoipa::{IntoParams, ToSchema}; use crate::{ - AppState, ResultCode, - types::{ProviderAddress, ProviderId}, + AppState, ResultCode, services::provider_service::ProviderData, types::ProviderAddress, }; #[derive(Deserialize, ToSchema, IntoParams)] @@ -30,11 +29,30 @@ pub struct FindUrlSpResponse { pub message: Option, } -/// Find a working url for a given SP address +impl From for FindUrlSpResponse { + fn from(data: ProviderData) -> Self { + Self { + result: data.result_code.clone(), + url: data.working_url, + message: data.result_code.message().map(String::from), + } + } +} + +impl FindUrlSpResponse { + fn not_indexed() -> Self { + Self { + result: ResultCode::Error, + url: None, + message: Some("Provider has not been indexed yet. Please try again later.".to_string()), + } + } +} + #[utoipa::path( get, path = "/url/find/{provider}", - params (FindUrlSpPath), + params(FindUrlSpPath), description = r#" **Find a working url for a given SP address** "#, @@ -52,31 +70,22 @@ pub async fn handle_find_url_sp( ) -> Result, ApiResponse<()>> { debug!("find url input address: {:?}", &path.provider); - // Parse and validate provider address - let provider_address = ProviderAddress::new(path.provider) + let provider_address = ProviderAddress::new(&path.provider) .map_err(|e| bad_request(format!("Invalid provider address: {e}")))?; - let provider_id: ProviderId = provider_address.into(); + let provider_id = provider_address.into(); let result = state - .url_repo - .get_latest_for_provider(&provider_id) + .provider_service + .get_provider(&provider_id) .await .map_err(|e| { - debug!("Failed to query url_results: {:?}", e); - internal_server_error("Failed to query url results") + debug!("Failed to query provider: {:?}", e); + internal_server_error("Failed to query provider") })?; - match result { - Some(url_result) => Ok(ok_response(FindUrlSpResponse { - result: url_result.result_code.clone(), - url: url_result.working_url, - message: url_result.result_code.message().map(String::from), - })), - None => Ok(ok_response(FindUrlSpResponse { - result: ResultCode::Error, - url: None, - message: Some("Provider has not been indexed yet. Please try again later.".to_string()), - })), - } + Ok(ok_response(match result { + Some(data) => data.into(), + None => FindUrlSpResponse::not_indexed(), + })) } diff --git a/url_finder/src/api/find_url_sp_client.rs b/url_finder/src/api/find_url_sp_client.rs index c17144e..74396a6 100644 --- a/url_finder/src/api/find_url_sp_client.rs +++ b/url_finder/src/api/find_url_sp_client.rs @@ -13,7 +13,8 @@ use utoipa::{IntoParams, ToSchema}; use crate::{ AppState, ResultCode, - types::{ClientAddress, ClientId, ProviderAddress, ProviderId}, + services::provider_service::ProviderData, + types::{ClientAddress, ProviderAddress}, }; #[derive(Deserialize, ToSchema, IntoParams)] @@ -31,11 +32,33 @@ pub struct FindUrlSpClientResponse { pub message: Option, } -/// Find a working url for a given SP address +impl From for FindUrlSpClientResponse { + fn from(data: ProviderData) -> Self { + Self { + result: data.result_code, + url: data.working_url, + message: None, + } + } +} + +impl FindUrlSpClientResponse { + fn not_indexed() -> Self { + Self { + result: ResultCode::Error, + url: None, + message: Some( + "Provider/client pair has not been indexed yet. Please try again later." + .to_string(), + ), + } + } +} + #[utoipa::path( get, path = "/url/find/{provider}/{client}", - params (FindUrlSpClientPath), + params(FindUrlSpClientPath), description = r#" **Find a working url for a given SP address** "#, @@ -54,39 +77,30 @@ pub async fn handle_find_url_sp_client( ApiResponse, >, ) -> Result, ApiResponse<()>> { - debug!("find url input address: {:?}", &path.provider); + debug!( + "find url input addresses - provider: {:?}, client: {:?}", + &path.provider, &path.client + ); - // Parse and validate provider and client addresses - let provider_address = ProviderAddress::new(path.provider) + let provider_address = ProviderAddress::new(&path.provider) .map_err(|e| bad_request(format!("Invalid provider address: {e}")))?; - let client_address = ClientAddress::new(path.client) + let client_address = ClientAddress::new(&path.client) .map_err(|e| bad_request(format!("Invalid client address: {e}")))?; - let provider_id: ProviderId = provider_address.into(); - let client_id: ClientId = client_address.into(); + let provider_id = provider_address.into(); + let client_id = client_address.into(); let result = state - .url_repo - .get_latest_for_provider_client(&provider_id, &client_id) + .provider_service + .get_provider_client(&provider_id, &client_id) .await .map_err(|e| { - debug!("Failed to query url_results: {:?}", e); + debug!("Failed to query provider+client: {:?}", e); internal_server_error("Failed to query url results") })?; - match result { - Some(url_result) => Ok(ok_response(FindUrlSpClientResponse { - result: url_result.result_code, - url: url_result.working_url, - message: None, - })), - None => Ok(ok_response(FindUrlSpClientResponse { - result: ResultCode::Error, - url: None, - message: Some( - "Provider/client pair has not been indexed yet. Please try again later." - .to_string(), - ), - })), - } + Ok(ok_response(match result { + Some(data) => data.into(), + None => FindUrlSpClientResponse::not_indexed(), + })) } diff --git a/url_finder/src/api/mod.rs b/url_finder/src/api/mod.rs index 2cf9a8d..8c25045 100644 --- a/url_finder/src/api/mod.rs +++ b/url_finder/src/api/mod.rs @@ -21,3 +21,5 @@ pub use find_retri_sp_client::*; mod responses; pub use responses::*; + +pub mod providers; diff --git a/url_finder/src/api/providers/bulk_providers.rs b/url_finder/src/api/providers/bulk_providers.rs new file mode 100644 index 0000000..29d7246 --- /dev/null +++ b/url_finder/src/api/providers/bulk_providers.rs @@ -0,0 +1,94 @@ +use std::sync::Arc; + +use axum::{ + Json, debug_handler, + extract::{Query, State}, +}; +use tracing::debug; + +use crate::{ + AppState, + api_response::{ + ApiResponse, ErrorCode, bad_request_with_code, internal_server_error_with_code, ok_response, + }, + types::{ProviderAddress, ProviderId}, +}; + +use super::ExtendedQuery; + +const MAX_PROVIDER_IDS: usize = 100; + +use super::types::{BulkProvidersRequest, BulkProvidersResponse, ProviderResponse}; + +#[utoipa::path( + post, + path = "/providers/bulk", + params(ExtendedQuery), + request_body = BulkProvidersRequest, + responses( + (status = 200, description = "Bulk providers result", body = BulkProvidersResponse), + (status = 400, description = "Bad Request", body = crate::api_response::ErrorResponse), + (status = 500, description = "Internal error", body = crate::api_response::ErrorResponse), + ), + tags = ["Providers"], +)] +#[debug_handler] +pub async fn handle_bulk_providers( + State(state): State>, + Query(query): Query, + Json(request): Json, +) -> Result, ApiResponse<()>> { + debug!( + "POST /providers/bulk?extended={} with {} ids", + query.extended, + request.provider_ids.len() + ); + + if request.provider_ids.len() > MAX_PROVIDER_IDS { + return Err(bad_request_with_code( + ErrorCode::InvalidRequest, + format!( + "Too many provider IDs: {} exceeds maximum of {MAX_PROVIDER_IDS}", + request.provider_ids.len() + ), + )); + } + + let mut valid_ids: Vec = Vec::new(); + let mut invalid_ids: Vec = Vec::new(); + + for id in &request.provider_ids { + match ProviderAddress::new(id) { + Ok(addr) => valid_ids.push(addr.into()), + Err(_) => invalid_ids.push(id.clone()), + } + } + + let result = state + .provider_service + .bulk_get_providers(&valid_ids) + .await + .map_err(|e| { + debug!("Failed to bulk query providers: {:?}", e); + internal_server_error_with_code(ErrorCode::InternalError, "Failed to query providers") + })?; + + let providers: Vec = result + .providers + .into_iter() + .map(|p| ProviderResponse::from_data(p, query.extended)) + .collect(); + + let mut not_found: Vec = result + .not_found + .into_iter() + .map(|id| ProviderAddress::from(id).to_string()) + .collect(); + + not_found.extend(invalid_ids); + + Ok(ok_response(BulkProvidersResponse { + providers, + not_found, + })) +} diff --git a/url_finder/src/api/providers/get_client_providers.rs b/url_finder/src/api/providers/get_client_providers.rs new file mode 100644 index 0000000..439079e --- /dev/null +++ b/url_finder/src/api/providers/get_client_providers.rs @@ -0,0 +1,95 @@ +use std::sync::Arc; + +use axum::{ + debug_handler, + extract::{Path, Query, State}, +}; +use axum_extra::extract::WithRejection; +use serde::Deserialize; +use tracing::{debug, error}; +use utoipa::{IntoParams, ToSchema}; + +use super::ExtendedQuery; + +use crate::{ + AppState, + api_response::{ + ApiResponse, ErrorCode, ErrorResponse, bad_request_with_code, + internal_server_error_with_code, not_found_with_code, ok_response, + }, + types::ClientAddress, +}; + +use super::types::{ClientProvidersResponse, ProviderResponse}; + +#[derive(Deserialize, ToSchema, IntoParams)] +pub struct GetClientProvidersPath { + pub id: String, +} + +#[utoipa::path( + get, + path = "/clients/{id}/providers", + params(GetClientProvidersPath, ExtendedQuery), + responses( + (status = 200, description = "Client providers found", body = ClientProvidersResponse), + (status = 400, description = "Invalid client address", body = ErrorResponse), + (status = 404, description = "Client not found", body = ErrorResponse), + (status = 500, description = "Internal error", body = ErrorResponse), + ), + tags = ["Clients"], +)] +#[debug_handler] +pub async fn handle_get_client_providers( + State(state): State>, + WithRejection(Path(path), _): WithRejection< + Path, + ApiResponse, + >, + Query(query): Query, +) -> Result, ApiResponse<()>> { + debug!( + "GET /clients/{}/providers?extended={}", + &path.id, query.extended + ); + + let client_address = ClientAddress::new(&path.id).map_err(|e| { + bad_request_with_code( + ErrorCode::InvalidAddress, + format!("Invalid client address: {e}"), + ) + })?; + + let client_id = client_address.clone().into(); + + let providers_data = state + .provider_service + .get_providers_for_client(&client_id) + .await + .map_err(|e| { + error!( + "Failed to query client providers for {}: {:?}", + client_id, e + ); + internal_server_error_with_code(ErrorCode::InternalError, "Failed to query providers") + })?; + + if providers_data.is_empty() { + return Err(not_found_with_code( + ErrorCode::NotFound, + format!("Client {} has no providers", &path.id), + )); + } + + let providers: Vec = providers_data + .into_iter() + .map(|p| ProviderResponse::from_data(p, query.extended)) + .collect(); + let total = providers.len() as i64; + + Ok(ok_response(ClientProvidersResponse { + client_id: client_address.to_string(), + providers, + total, + })) +} diff --git a/url_finder/src/api/providers/get_provider.rs b/url_finder/src/api/providers/get_provider.rs new file mode 100644 index 0000000..a88adb7 --- /dev/null +++ b/url_finder/src/api/providers/get_provider.rs @@ -0,0 +1,88 @@ +use std::sync::Arc; + +use axum::{ + debug_handler, + extract::{Path, Query, State}, +}; +use axum_extra::extract::WithRejection; +use serde::Deserialize; +use tracing::{debug, error}; +use utoipa::{IntoParams, ToSchema}; + +use super::ExtendedQuery; + +use crate::{ + AppState, + api_response::{ + ApiResponse, ErrorCode, ErrorResponse, bad_request_with_code, + internal_server_error_with_code, not_found_with_code, ok_response, + }, + types::ProviderAddress, +}; + +use super::types::ProviderResponse; + +#[derive(Deserialize, ToSchema, IntoParams)] +pub struct GetProviderPath { + pub id: String, +} + +#[utoipa::path( + get, + path = "/providers/{id}", + params(GetProviderPath, ExtendedQuery), + responses( + (status = 200, description = "Provider found", body = ProviderResponse), + (status = 400, description = "Invalid provider address", body = ErrorResponse), + (status = 404, description = "Provider not found", body = ErrorResponse), + (status = 500, description = "Internal error", body = ErrorResponse), + ), + tags = ["Providers"], +)] +#[debug_handler] +pub async fn handle_get_provider( + State(state): State>, + WithRejection(Path(path), _): WithRejection, ApiResponse>, + Query(query): Query, +) -> Result, ApiResponse<()>> { + debug!("GET /providers/{}?extended={}", &path.id, query.extended); + + let provider_address = ProviderAddress::new(&path.id).map_err(|e| { + error!("Invalid provider address '{}': {}", &path.id, e); + bad_request_with_code(ErrorCode::InvalidAddress, "Invalid provider address") + })?; + + let provider_id = provider_address.into(); + + let data = state + .provider_service + .get_provider(&provider_id) + .await + .map_err(|e| { + error!("Failed to query provider_id={}: {e:?}", provider_id); + internal_server_error_with_code(ErrorCode::InternalError, "Failed to query provider") + })? + .ok_or_else(|| { + not_found_with_code( + ErrorCode::NotFound, + format!("Provider {} not found", &path.id), + ) + })?; + + let scheduling = if query.extended { + state + .provider_service + .get_scheduling_data(&provider_id) + .await + .ok() + .flatten() + } else { + None + }; + + Ok(ok_response(ProviderResponse::from_data_with_scheduling( + data, + scheduling, + query.extended, + ))) +} diff --git a/url_finder/src/api/providers/get_provider_client.rs b/url_finder/src/api/providers/get_provider_client.rs new file mode 100644 index 0000000..3717c98 --- /dev/null +++ b/url_finder/src/api/providers/get_provider_client.rs @@ -0,0 +1,108 @@ +use std::sync::Arc; + +use axum::{ + debug_handler, + extract::{Path, Query, State}, +}; +use axum_extra::extract::WithRejection; +use serde::Deserialize; +use tracing::debug; +use utoipa::{IntoParams, ToSchema}; + +use super::ExtendedQuery; + +use crate::{ + AppState, + api_response::{ + ApiResponse, ErrorCode, ErrorResponse, bad_request_with_code, + internal_server_error_with_code, not_found_with_code, ok_response, + }, + types::{ClientAddress, ProviderAddress}, +}; + +use super::types::ProviderClientResponse; + +#[derive(Deserialize, ToSchema, IntoParams)] +pub struct GetProviderClientPath { + pub id: String, + pub client_id: String, +} + +#[utoipa::path( + get, + path = "/providers/{id}/clients/{client_id}", + params(GetProviderClientPath, ExtendedQuery), + responses( + (status = 200, description = "Provider+client found", body = ProviderClientResponse), + (status = 400, description = "Invalid address", body = ErrorResponse), + (status = 404, description = "Provider+client not found", body = ErrorResponse), + (status = 500, description = "Internal error", body = ErrorResponse), + ), + tags = ["Providers"], +)] +#[debug_handler] +pub async fn handle_get_provider_client( + State(state): State>, + WithRejection(Path(path), _): WithRejection< + Path, + ApiResponse, + >, + Query(query): Query, +) -> Result, ApiResponse<()>> { + debug!( + "GET /providers/{}/clients/{}?extended={}", + &path.id, &path.client_id, query.extended + ); + + let provider_address = ProviderAddress::new(&path.id).map_err(|e| { + bad_request_with_code( + ErrorCode::InvalidAddress, + format!("Invalid provider address: {e}"), + ) + })?; + let client_address = ClientAddress::new(&path.client_id).map_err(|e| { + bad_request_with_code( + ErrorCode::InvalidAddress, + format!("Invalid client address: {e}"), + ) + })?; + + let provider_id = provider_address.into(); + let client_id = client_address.into(); + + let data = state + .provider_service + .get_provider_client(&provider_id, &client_id) + .await + .map_err(|e| { + debug!("Failed to query provider+client: {:?}", e); + internal_server_error_with_code( + ErrorCode::InternalError, + "Failed to query provider+client", + ) + })? + .ok_or_else(|| { + not_found_with_code( + ErrorCode::NotFound, + format!( + "Provider {} with client {} not found", + &path.id, &path.client_id + ), + ) + })?; + + let scheduling = if query.extended { + state + .provider_service + .get_scheduling_data(&provider_id) + .await + .ok() + .flatten() + } else { + None + }; + + Ok(ok_response( + ProviderClientResponse::from_data_with_scheduling(data, scheduling, query.extended), + )) +} diff --git a/url_finder/src/api/providers/history_retrievability.rs b/url_finder/src/api/providers/history_retrievability.rs new file mode 100644 index 0000000..653966d --- /dev/null +++ b/url_finder/src/api/providers/history_retrievability.rs @@ -0,0 +1,286 @@ +use std::sync::Arc; + +use axum::{ + debug_handler, + extract::{Path, Query, State}, +}; +use axum_extra::extract::WithRejection; +use chrono::{DateTime, NaiveDate, Utc}; +use serde::{Deserialize, Serialize}; +use tracing::debug; +use utoipa::{IntoParams, ToSchema}; + +use crate::{ + AppState, + api_response::{ + ApiResponse, ErrorCode, ErrorResponse, bad_request_with_code, + internal_server_error_with_code, ok_response, + }, + config::MAX_HISTORY_DAYS, + repository::HistoryRow, + types::{ClientAddress, ErrorCode as TypesErrorCode, ProviderAddress, ResultCode}, +}; + +#[derive(Deserialize, ToSchema, IntoParams)] +pub struct HistoryProviderPath { + pub id: String, +} + +#[derive(Deserialize, ToSchema, IntoParams)] +pub struct HistoryProviderClientPath { + pub id: String, + pub client_id: String, +} + +#[derive(Deserialize, ToSchema, IntoParams)] +pub struct HistoryQuery { + /// Start date (YYYY-MM-DD). Defaults to 30 days ago. + pub from: Option, + /// End date (YYYY-MM-DD). Defaults to today. + pub to: Option, + /// Include extended test details. Defaults to false. + #[serde(default)] + pub extended: bool, +} + +#[derive(Serialize, ToSchema)] +pub struct RetrievabilityHistoryResponse { + pub provider_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub client_id: Option, + pub from: NaiveDate, + pub to: NaiveDate, + pub data: Vec, +} + +#[derive(Serialize, ToSchema)] +pub struct RetrievabilityDataPoint { + pub date: NaiveDate, + pub retrievability_percent: f64, + #[serde(skip_serializing_if = "Option::is_none")] + pub sector_utilization_percent: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub is_consistent: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub is_reliable: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub working_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub result_code: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error_code: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub tested_at: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub url_metadata: Option, +} + +impl RetrievabilityDataPoint { + fn basic( + date: NaiveDate, + retrievability_percent: f64, + sector_utilization_percent: Option, + ) -> Self { + Self { + date, + retrievability_percent, + sector_utilization_percent, + is_consistent: None, + is_reliable: None, + working_url: None, + result_code: None, + error_code: None, + tested_at: None, + url_metadata: None, + } + } +} + +impl From for RetrievabilityDataPoint { + fn from(row: HistoryRow) -> Self { + Self { + date: row.date, + retrievability_percent: row.retrievability_percent, + sector_utilization_percent: row.sector_utilization_percent, + is_consistent: row.is_consistent, + is_reliable: row.is_reliable, + working_url: row.working_url, + result_code: Some(row.result_code), + error_code: row.error_code, + tested_at: Some(row.tested_at), + url_metadata: row.url_metadata, + } + } +} + +fn validate_date_range( + from: Option, + to: Option, +) -> Result<(NaiveDate, NaiveDate), ApiResponse<()>> { + let today = Utc::now().date_naive(); + let default_from = today - chrono::Duration::days(MAX_HISTORY_DAYS); + + let from_date = from.unwrap_or(default_from); + let to_date = to.unwrap_or(today); + + if from_date > to_date { + return Err(bad_request_with_code( + ErrorCode::InvalidDateRange, + "Parameter 'from' must be before or equal to 'to'", + )); + } + + let range_days = (to_date - from_date).num_days(); + if range_days > MAX_HISTORY_DAYS { + return Err(bad_request_with_code( + ErrorCode::DateRangeExceeded, + format!("Date range exceeds maximum of {MAX_HISTORY_DAYS} days"), + )); + } + + Ok((from_date, to_date)) +} + +#[utoipa::path( + get, + path = "/providers/{id}/history/retrievability", + params(HistoryProviderPath, HistoryQuery), + responses( + (status = 200, description = "Historical retrievability data", body = RetrievabilityHistoryResponse), + (status = 400, description = "Invalid request", body = ErrorResponse), + (status = 500, description = "Internal error", body = ErrorResponse), + ), + tags = ["Providers"], +)] +#[debug_handler] +pub async fn handle_history_retrievability( + State(state): State>, + WithRejection(Path(path), _): WithRejection< + Path, + ApiResponse, + >, + Query(query): Query, +) -> Result, ApiResponse<()>> { + debug!( + "GET /providers/{}/history/retrievability?from={:?}&to={:?}&extended={}", + &path.id, query.from, query.to, query.extended + ); + + let provider_address = ProviderAddress::new(&path.id).map_err(|e| { + bad_request_with_code( + ErrorCode::InvalidAddress, + format!("Invalid provider address: {e}"), + ) + })?; + let provider_id = provider_address.clone().into(); + + let (from_date, to_date) = validate_date_range(query.from, query.to)?; + + let rows = state + .url_repo + .get_history_for_provider(&provider_id, from_date, to_date) + .await + .map_err(|e| { + tracing::warn!("Failed to query history: {:?}", e); + internal_server_error_with_code(ErrorCode::InternalError, "Failed to query history") + })?; + + let data: Vec = rows + .into_iter() + .map(|row| { + if query.extended { + row.into() + } else { + RetrievabilityDataPoint::basic( + row.date, + row.retrievability_percent, + row.sector_utilization_percent, + ) + } + }) + .collect(); + + Ok(ok_response(RetrievabilityHistoryResponse { + provider_id: provider_address.to_string(), + client_id: None, + from: from_date, + to: to_date, + data, + })) +} + +#[utoipa::path( + get, + path = "/providers/{id}/clients/{client_id}/history/retrievability", + params(HistoryProviderClientPath, HistoryQuery), + responses( + (status = 200, description = "Historical retrievability data", body = RetrievabilityHistoryResponse), + (status = 400, description = "Invalid request", body = ErrorResponse), + (status = 500, description = "Internal error", body = ErrorResponse), + ), + tags = ["Providers"], +)] +#[debug_handler] +pub async fn handle_history_retrievability_client( + State(state): State>, + WithRejection(Path(path), _): WithRejection< + Path, + ApiResponse, + >, + Query(query): Query, +) -> Result, ApiResponse<()>> { + debug!( + "GET /providers/{}/clients/{}/history/retrievability?from={:?}&to={:?}&extended={}", + &path.id, &path.client_id, query.from, query.to, query.extended + ); + + let provider_address = ProviderAddress::new(&path.id).map_err(|e| { + bad_request_with_code( + ErrorCode::InvalidAddress, + format!("Invalid provider address: {e}"), + ) + })?; + let client_address = ClientAddress::new(&path.client_id).map_err(|e| { + bad_request_with_code( + ErrorCode::InvalidAddress, + format!("Invalid client address: {e}"), + ) + })?; + + let provider_id = provider_address.clone().into(); + let client_id = client_address.clone().into(); + + let (from_date, to_date) = validate_date_range(query.from, query.to)?; + + let rows = state + .url_repo + .get_history_for_provider_client(&provider_id, &client_id, from_date, to_date) + .await + .map_err(|e| { + tracing::warn!("Failed to query history: {:?}", e); + internal_server_error_with_code(ErrorCode::InternalError, "Failed to query history") + })?; + + let data: Vec = rows + .into_iter() + .map(|row| { + if query.extended { + row.into() + } else { + RetrievabilityDataPoint::basic( + row.date, + row.retrievability_percent, + row.sector_utilization_percent, + ) + } + }) + .collect(); + + Ok(ok_response(RetrievabilityHistoryResponse { + provider_id: provider_address.to_string(), + client_id: Some(client_address.to_string()), + from: from_date, + to: to_date, + data, + })) +} diff --git a/url_finder/src/api/providers/list_providers.rs b/url_finder/src/api/providers/list_providers.rs new file mode 100644 index 0000000..9202b4a --- /dev/null +++ b/url_finder/src/api/providers/list_providers.rs @@ -0,0 +1,92 @@ +use std::sync::Arc; + +use axum::{ + debug_handler, + extract::{Query, State}, +}; +use serde::Deserialize; +use tracing::{debug, warn}; +use utoipa::{IntoParams, ToSchema}; + +use crate::{ + AppState, + api_response::{ + ApiResponse, ErrorCode, ErrorResponse, internal_server_error_with_code, ok_response, + }, +}; + +use crate::repository::ProviderFilters; + +use super::types::{ProviderResponse, ProvidersListResponse}; + +#[derive(Deserialize, ToSchema, IntoParams)] +pub struct ListProvidersQuery { + /// Maximum number of providers to return (1-500) + #[serde(default = "default_limit")] + pub limit: i64, + /// Number of providers to skip + #[serde(default)] + pub offset: i64, + /// Filter by URL availability: true=has URL, false=no URL, omit=all + pub has_working_url: Option, + /// Filter by URL consistency: true=consistent, false=inconsistent, omit=all + pub is_consistent: Option, + /// Include diagnostic and scheduling details + #[serde(default)] + pub extended: bool, +} + +fn default_limit() -> i64 { + 100 +} + +#[utoipa::path( + get, + path = "/providers", + params(ListProvidersQuery), + responses( + (status = 200, description = "Providers list", body = ProvidersListResponse), + (status = 500, description = "Internal error", body = ErrorResponse), + ), + tags = ["Providers"], +)] +#[debug_handler] +pub async fn handle_list_providers( + State(state): State>, + Query(query): Query, +) -> Result, ApiResponse<()>> { + let limit = query.limit.clamp(1, 500); + let offset = query.offset.max(0); + + let filters = ProviderFilters { + has_working_url: query.has_working_url, + is_consistent: query.is_consistent, + }; + + debug!( + "GET /providers?limit={limit}&offset={offset}&has_working_url={:?}&is_consistent={:?}&extended={}", + filters.has_working_url, filters.is_consistent, query.extended + ); + + let paginated = state + .provider_service + .list_providers(&filters, limit, offset) + .await + .map_err(|e| { + warn!("Failed to list providers: {:?}", e); + internal_server_error_with_code(ErrorCode::InternalError, "Failed to query providers") + })?; + + let providers: Vec = paginated + .providers + .into_iter() + .map(|p| ProviderResponse::from_data(p, query.extended)) + .collect(); + + Ok(ok_response(ProvidersListResponse { + providers, + total: paginated.total, + limit: paginated.limit, + offset: paginated.offset, + })) +} diff --git a/url_finder/src/api/providers/mod.rs b/url_finder/src/api/providers/mod.rs new file mode 100644 index 0000000..076632f --- /dev/null +++ b/url_finder/src/api/providers/mod.rs @@ -0,0 +1,23 @@ +mod types; +pub use types::*; + +mod get_provider; +pub use get_provider::*; + +mod get_provider_client; +pub use get_provider_client::*; + +mod get_client_providers; +pub use get_client_providers::*; + +mod list_providers; +pub use list_providers::*; + +mod bulk_providers; +pub use bulk_providers::*; + +mod reset_provider; +pub use reset_provider::*; + +mod history_retrievability; +pub use history_retrievability::*; diff --git a/url_finder/src/api/providers/reset_provider.rs b/url_finder/src/api/providers/reset_provider.rs new file mode 100644 index 0000000..6b3f222 --- /dev/null +++ b/url_finder/src/api/providers/reset_provider.rs @@ -0,0 +1,113 @@ +use std::sync::Arc; + +use axum::{ + debug_handler, + extract::{Path, Query, State}, +}; +use axum_extra::extract::WithRejection; +use serde::Deserialize; +use tracing::{debug, error}; +use utoipa::{IntoParams, ToSchema}; + +use crate::{ + AppState, + api_response::{ + ApiResponse, ErrorCode, ErrorResponse, bad_request_with_code, + internal_server_error_with_code, not_found_with_code, ok_response, + }, + repository::StorageProvider, + types::ProviderAddress, +}; + +#[derive(Debug, Clone, Copy, Deserialize, ToSchema)] +#[serde(rename_all = "snake_case")] +pub enum ScheduleType { + UrlDiscovery, + BmsTest, + All, +} + +#[derive(Deserialize, ToSchema, IntoParams)] +pub struct ResetProviderPath { + pub id: String, +} + +#[derive(Deserialize, ToSchema, IntoParams)] +pub struct ResetProviderQuery { + pub schedule: ScheduleType, +} + +#[utoipa::path( + post, + path = "/providers/{id}/reset", + params(ResetProviderPath, ResetProviderQuery), + responses( + (status = 200, description = "Schedule reset successfully", body = StorageProvider), + (status = 400, description = "Invalid provider address or schedule parameter", body = ErrorResponse), + (status = 404, description = "Provider not found", body = ErrorResponse), + (status = 500, description = "Internal error", body = ErrorResponse), + ), + tags = ["Providers"], +)] +#[debug_handler] +pub async fn handle_reset_provider( + State(state): State>, + WithRejection(Path(path), _): WithRejection< + Path, + ApiResponse, + >, + WithRejection(Query(query), _): WithRejection< + Query, + ApiResponse, + >, +) -> Result, ApiResponse<()>> { + debug!( + "POST /providers/{}/reset?schedule={:?}", + &path.id, query.schedule + ); + + let provider_address = ProviderAddress::new(&path.id).map_err(|e| { + error!("Invalid provider address '{}': {}", &path.id, e); + bad_request_with_code(ErrorCode::InvalidAddress, "Invalid provider address") + })?; + + let provider_id = provider_address.into(); + + let result = match query.schedule { + ScheduleType::UrlDiscovery => { + state + .storage_provider_repo + .reset_url_discovery_schedule(&provider_id) + .await + } + ScheduleType::BmsTest => { + state + .storage_provider_repo + .reset_bms_test_schedule(&provider_id) + .await + } + ScheduleType::All => { + state + .storage_provider_repo + .reset_all_schedules(&provider_id) + .await + } + }; + + let provider = result + .map_err(|e| { + error!( + "Failed to reset schedule for provider_id={}: {e:?}", + provider_id + ); + internal_server_error_with_code(ErrorCode::InternalError, "Failed to reset schedule") + })? + .ok_or_else(|| { + not_found_with_code( + ErrorCode::NotFound, + format!("Provider {} not found", &path.id), + ) + })?; + + Ok(ok_response(provider)) +} diff --git a/url_finder/src/api/providers/types.rs b/url_finder/src/api/providers/types.rs new file mode 100644 index 0000000..41ffa9c --- /dev/null +++ b/url_finder/src/api/providers/types.rs @@ -0,0 +1,401 @@ +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use utoipa::{IntoParams, ToSchema}; + +use crate::services::provider_service::{ + BandwidthResult, PerformanceData, ProviderData, SchedulingData, +}; +use crate::types::{ErrorCode, ProviderAddress, ResultCode}; + +/// Common query parameters for extended response +#[derive(Debug, Clone, Deserialize, ToSchema, IntoParams, Default)] +pub struct ExtendedQuery { + /// Include diagnostic and scheduling details + #[serde(default)] + pub extended: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct BandwidthTestResponse { + pub status: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub tested_at: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub ping_avg_ms: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub head_avg_ms: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub ttfb_ms: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub download_speed_mbps: Option, + // Extended fields (only populated when extended=true) + #[serde(skip_serializing_if = "Option::is_none")] + pub worker_count: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub routing_key: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub url_tested: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct GeolocationTestResponse { + pub status: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub tested_at: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub routing_key: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub region: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub country: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub city: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema, Default)] +pub struct PerformanceResponse { + #[serde(skip_serializing_if = "Option::is_none")] + pub bandwidth: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub geolocation: Option, +} + +/// Breakdown of inconsistent URL tests by cause (extended only) +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct InconsistentBreakdown { + /// (Small|Failed, Valid) - Second tap returned valid data after warm-up + pub warm_up: usize, + /// (Valid, Small|Failed) - First tap valid, second degraded + pub flaky: usize, + /// (Small, Small|Failed) or (Failed, Small) - Neither returned valid + pub small_responses: usize, + /// (Failed, Failed) - Both taps failed completely + pub both_failed: usize, + /// (Valid, Valid) but different Content-Length + pub size_mismatch: usize, +} + +/// Analysis metrics from URL testing (extended only) +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct AnalysisResponse { + /// Number of URLs tested + pub sample_count: usize, + /// Number of URLs that returned valid data + pub success_count: usize, + /// Number of URLs that timed out + pub timeout_count: usize, + /// Number of URLs with inconsistent double-tap results + pub inconsistent_count: usize, + /// Breakdown of inconsistency causes + #[serde(skip_serializing_if = "Option::is_none")] + pub inconsistent_breakdown: Option, + /// When this analysis was performed + pub validated_at: DateTime, +} + +/// Diagnostic information (extended only) +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct DiagnosticsResponse { + /// Result of the URL discovery + pub result_code: ResultCode, + /// Error details if discovery failed + #[serde(skip_serializing_if = "Option::is_none")] + pub error_code: Option, + /// Detailed analysis metrics + #[serde(skip_serializing_if = "Option::is_none")] + pub analysis: Option, +} + +/// State of a scheduled task (extended only) +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct ScheduleStateResponse { + /// When the next run is scheduled + #[serde(skip_serializing_if = "Option::is_none")] + pub next_at: Option>, + /// Current status (null = ready, "pending" = in progress) + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, + /// When task entered pending state (URL discovery only) + #[serde(skip_serializing_if = "Option::is_none")] + pub pending_since: Option>, +} + +/// Scheduler state for all tasks (extended only) +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct SchedulingResponse { + /// URL discovery schedule + pub url_discovery: ScheduleStateResponse, + /// BMS bandwidth test schedule + pub bms_test: ScheduleStateResponse, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct ProviderResponse { + pub provider_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub working_url: Option, + pub retrievability_percent: f64, + #[serde(skip_serializing_if = "Option::is_none")] + pub sector_utilization_percent: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub tested_at: Option>, + // New: always returned + #[serde(skip_serializing_if = "Option::is_none")] + pub is_consistent: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub is_reliable: Option, + #[serde(default)] + pub performance: PerformanceResponse, + // Extended only (omitted when extended=false) + #[serde(skip_serializing_if = "Option::is_none")] + pub diagnostics: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub scheduling: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct ProviderClientResponse { + pub provider_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub client_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub working_url: Option, + pub retrievability_percent: f64, + #[serde(skip_serializing_if = "Option::is_none")] + pub sector_utilization_percent: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub tested_at: Option>, + // New: always returned + #[serde(skip_serializing_if = "Option::is_none")] + pub is_consistent: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub is_reliable: Option, + #[serde(default)] + pub performance: PerformanceResponse, + // Extended only (omitted when extended=false) + #[serde(skip_serializing_if = "Option::is_none")] + pub diagnostics: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub scheduling: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct ClientProvidersResponse { + pub client_id: String, + pub providers: Vec, + pub total: i64, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct ProvidersListResponse { + pub providers: Vec, + pub total: i64, + pub limit: i64, + pub offset: i64, +} + +#[derive(Debug, Clone, Deserialize, ToSchema)] +pub struct BulkProvidersRequest { + pub provider_ids: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct BulkProvidersResponse { + pub providers: Vec, + pub not_found: Vec, +} + +// --- Impl blocks with conversion methods --- + +impl ProviderResponse { + pub fn from_data(data: ProviderData, extended: bool) -> Self { + Self::from_data_with_scheduling(data, None, extended) + } + + pub fn from_data_with_scheduling( + data: ProviderData, + scheduling: Option, + extended: bool, + ) -> Self { + let provider_address: ProviderAddress = data.provider_id.into(); + + let diagnostics = if extended { + Some(DiagnosticsResponse { + result_code: data.result_code, + error_code: data.error_code, + analysis: Self::parse_analysis(&data.url_metadata), + }) + } else { + None + }; + + let scheduling_response = if extended { + scheduling.map(|s| SchedulingResponse { + url_discovery: ScheduleStateResponse { + next_at: s.url_discovery_next_at, + status: s.url_discovery_status, + pending_since: s.url_discovery_pending_since, + }, + bms_test: ScheduleStateResponse { + next_at: s.bms_test_next_at, + status: s.bms_test_status, + pending_since: None, + }, + }) + } else { + None + }; + + Self { + provider_id: provider_address.to_string(), + working_url: data.working_url, + retrievability_percent: data.retrievability_percent, + sector_utilization_percent: data.sector_utilization_percent, + tested_at: Some(data.tested_at), + is_consistent: data.is_consistent, + is_reliable: data.is_reliable, + performance: PerformanceResponse::from_data(data.performance, extended), + diagnostics, + scheduling: scheduling_response, + } + } + + fn parse_analysis(metadata: &Option) -> Option { + let meta = metadata.as_ref()?; + let analysis = meta.get("analysis")?; + + let breakdown = analysis.get("inconsistent_breakdown").and_then(|b| { + Some(InconsistentBreakdown { + warm_up: b.get("warm_up")?.as_u64()? as usize, + flaky: b.get("flaky")?.as_u64()? as usize, + small_responses: b.get("small_responses")?.as_u64()? as usize, + both_failed: b.get("both_failed")?.as_u64()? as usize, + size_mismatch: b.get("size_mismatch")?.as_u64()? as usize, + }) + }); + + Some(AnalysisResponse { + sample_count: analysis.get("sample_count")?.as_u64()? as usize, + success_count: analysis.get("success_count")?.as_u64()? as usize, + timeout_count: analysis.get("timeout_count")?.as_u64()? as usize, + inconsistent_count: analysis + .get("inconsistent_count") + .and_then(|v| v.as_u64()) + .unwrap_or(0) as usize, + inconsistent_breakdown: breakdown, + validated_at: { + let ts = meta.get("validated_at")?.as_str()?; + DateTime::parse_from_rfc3339(ts).ok()?.with_timezone(&Utc) + }, + }) + } +} + +impl From for ProviderResponse { + fn from(data: ProviderData) -> Self { + Self::from_data(data, false) + } +} + +impl ProviderClientResponse { + pub fn from_data(data: ProviderData, extended: bool) -> Self { + Self::from_data_with_scheduling(data, None, extended) + } + + pub fn from_data_with_scheduling( + data: ProviderData, + scheduling: Option, + extended: bool, + ) -> Self { + let provider_address: ProviderAddress = data.provider_id.into(); + let client_id = data + .client_id + .map(|c| crate::types::ClientAddress::from(c).to_string()); + + let diagnostics = if extended { + Some(DiagnosticsResponse { + result_code: data.result_code, + error_code: data.error_code, + analysis: ProviderResponse::parse_analysis(&data.url_metadata), + }) + } else { + None + }; + + let scheduling_response = if extended { + scheduling.map(|s| SchedulingResponse { + url_discovery: ScheduleStateResponse { + next_at: s.url_discovery_next_at, + status: s.url_discovery_status, + pending_since: s.url_discovery_pending_since, + }, + bms_test: ScheduleStateResponse { + next_at: s.bms_test_next_at, + status: s.bms_test_status, + pending_since: None, + }, + }) + } else { + None + }; + + Self { + provider_id: provider_address.to_string(), + client_id, + working_url: data.working_url, + retrievability_percent: data.retrievability_percent, + sector_utilization_percent: data.sector_utilization_percent, + tested_at: Some(data.tested_at), + is_consistent: data.is_consistent, + is_reliable: data.is_reliable, + performance: PerformanceResponse::from_data(data.performance, extended), + diagnostics, + scheduling: scheduling_response, + } + } +} + +impl From for ProviderClientResponse { + fn from(data: ProviderData) -> Self { + Self::from_data(data, false) + } +} + +impl BandwidthTestResponse { + pub fn from_data(b: BandwidthResult, extended: bool) -> Self { + Self { + status: b.status, + tested_at: b.tested_at, + ping_avg_ms: b.ping_avg_ms, + head_avg_ms: b.head_avg_ms, + ttfb_ms: b.ttfb_ms, + download_speed_mbps: b.download_speed_mbps, + worker_count: if extended { b.worker_count } else { None }, + routing_key: if extended { b.routing_key } else { None }, + url_tested: if extended { b.url_tested } else { None }, + } + } +} + +impl From for BandwidthTestResponse { + fn from(b: BandwidthResult) -> Self { + Self::from_data(b, false) + } +} + +impl PerformanceResponse { + pub fn from_data(data: PerformanceData, extended: bool) -> Self { + Self { + bandwidth: data + .bandwidth + .map(|b| BandwidthTestResponse::from_data(b, extended)), + geolocation: None, + } + } +} + +impl From for PerformanceResponse { + fn from(data: PerformanceData) -> Self { + Self::from_data(data, false) + } +} diff --git a/url_finder/src/api_response.rs b/url_finder/src/api_response.rs index 6ae1019..e7a23db 100644 --- a/url_finder/src/api_response.rs +++ b/url_finder/src/api_response.rs @@ -8,9 +8,42 @@ use axum::{ use serde::Serialize; use utoipa::ToSchema; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ErrorCode { + InternalError, + InvalidAddress, + InvalidRequest, + NotFound, + InvalidDateRange, + DateRangeExceeded, + InvalidDateFormat, +} + +impl ErrorCode { + pub fn as_str(&self) -> &'static str { + match self { + Self::InternalError => "INTERNAL_ERROR", + Self::InvalidAddress => "INVALID_ADDRESS", + Self::InvalidRequest => "INVALID_REQUEST", + Self::NotFound => "NOT_FOUND", + Self::InvalidDateRange => "INVALID_DATE_RANGE", + Self::DateRangeExceeded => "DATE_RANGE_EXCEEDED", + Self::InvalidDateFormat => "INVALID_DATE_FORMAT", + } + } +} + +impl From for String { + fn from(code: ErrorCode) -> String { + code.as_str().to_string() + } +} + #[derive(Serialize, ToSchema, Clone, Debug)] pub struct ErrorResponse { - error: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub error_code: Option, + pub error: String, } #[derive(Clone, Debug)] @@ -26,6 +59,7 @@ pub enum ApiResponse { impl From for ApiResponse { fn from(rejection: JsonRejection) -> ApiResponse { ApiResponse::BadRequest(Json(ErrorResponse { + error_code: None, error: rejection.body_text(), })) } @@ -34,6 +68,7 @@ impl From for ApiResponse { impl From for ApiResponse { fn from(rejection: QueryRejection) -> ApiResponse { ApiResponse::BadRequest(Json(ErrorResponse { + error_code: None, error: rejection.body_text(), })) } @@ -42,6 +77,7 @@ impl From for ApiResponse { impl From for ApiResponse { fn from(rejection: PathRejection) -> ApiResponse { ApiResponse::BadRequest(Json(ErrorResponse { + error_code: None, error: rejection.body_text(), })) } @@ -68,15 +104,48 @@ where } pub fn bad_request>(msg: T) -> ApiResponse<()> { - ApiResponse::BadRequest(Json(ErrorResponse { error: msg.into() })) + ApiResponse::BadRequest(Json(ErrorResponse { + error_code: None, + error: msg.into(), + })) +} + +pub fn bad_request_with_code, T: Into>(code: C, msg: T) -> ApiResponse<()> { + ApiResponse::BadRequest(Json(ErrorResponse { + error_code: Some(code.into()), + error: msg.into(), + })) } pub fn internal_server_error>(msg: T) -> ApiResponse<()> { - ApiResponse::InternalServerError(Json(ErrorResponse { error: msg.into() })) + ApiResponse::InternalServerError(Json(ErrorResponse { + error_code: None, + error: msg.into(), + })) +} + +pub fn internal_server_error_with_code, T: Into>( + code: C, + msg: T, +) -> ApiResponse<()> { + ApiResponse::InternalServerError(Json(ErrorResponse { + error_code: Some(code.into()), + error: msg.into(), + })) } pub fn not_found>(msg: T) -> ApiResponse<()> { - ApiResponse::NotFound(Json(ErrorResponse { error: msg.into() })) + ApiResponse::NotFound(Json(ErrorResponse { + error_code: None, + error: msg.into(), + })) +} + +pub fn not_found_with_code, T: Into>(code: C, msg: T) -> ApiResponse<()> { + ApiResponse::NotFound(Json(ErrorResponse { + error_code: Some(code.into()), + error: msg.into(), + })) } pub fn ok_response(data: T) -> ApiResponse { @@ -84,9 +153,15 @@ pub fn ok_response(data: T) -> ApiResponse { } pub fn unauthorized>(msg: T) -> ApiResponse<()> { - ApiResponse::Unauthorized(Json(ErrorResponse { error: msg.into() })) + ApiResponse::Unauthorized(Json(ErrorResponse { + error_code: None, + error: msg.into(), + })) } pub fn too_many_requests>(msg: T) -> ApiResponse<()> { - ApiResponse::TooManyRequests(Json(ErrorResponse { error: msg.into() })) + ApiResponse::TooManyRequests(Json(ErrorResponse { + error_code: None, + error: msg.into(), + })) } diff --git a/url_finder/src/background/bms_scheduler.rs b/url_finder/src/background/bms_scheduler.rs new file mode 100644 index 0000000..bc37de0 --- /dev/null +++ b/url_finder/src/background/bms_scheduler.rs @@ -0,0 +1,617 @@ +use crate::{ + bms_client::{BmsClient, BmsJobResponse}, + circuit_breaker::CircuitBreaker, + config::Config, + repository::{BmsBandwidthResult, BmsBandwidthResultRepository, StorageProviderRepository}, +}; +use chrono::Utc; +use color_eyre::Result; +use std::sync::Arc; +use std::time::Duration; +use tokio::time::sleep; +use tokio_util::sync::CancellationToken; +use tracing::{debug, error, info, warn}; + +const JOB_CREATOR_INTERVAL: Duration = Duration::from_secs(60); +const JOB_CREATOR_SLEEP_INTERVAL: Duration = Duration::from_secs(3600); +const RESULT_POLLER_INTERVAL: Duration = Duration::from_secs(30); +const BATCH_SIZE: i64 = 50; +const BMS_JOB_TIMEOUT_HOURS: i64 = 48; + +// Circuit breaker configuration +const BMS_CIRCUIT_BREAKER_THRESHOLD: usize = 5; +const BMS_CIRCUIT_BREAKER_COOLDOWN: Duration = Duration::from_secs(300); // 5 minutes + +/// Create a circuit breaker for BMS API calls. +pub fn create_bms_circuit_breaker() -> CircuitBreaker { + CircuitBreaker::new( + "BMS", + BMS_CIRCUIT_BREAKER_THRESHOLD, + BMS_CIRCUIT_BREAKER_COOLDOWN, + ) +} + +pub async fn run_bms_scheduler( + config: Arc, + bms_client: Arc, + circuit_breaker: Arc, + sp_repo: Arc, + result_repo: Arc, + shutdown: CancellationToken, +) { + info!("Starting BMS scheduler"); + tokio::join!( + run_job_creator( + config.clone(), + bms_client.clone(), + circuit_breaker.clone(), + sp_repo.clone(), + result_repo.clone(), + shutdown.clone() + ), + run_result_poller(config, bms_client, circuit_breaker, result_repo, shutdown), + ); + info!("BMS scheduler stopped"); +} + +async fn run_job_creator( + config: Arc, + bms_client: Arc, + circuit_breaker: Arc, + sp_repo: Arc, + result_repo: Arc, + shutdown: CancellationToken, +) { + info!("Starting BMS job creator loop"); + + loop { + let interval = match create_bms_jobs( + &config, + &bms_client, + &circuit_breaker, + &sp_repo, + &result_repo, + ) + .await + { + Ok(0) => { + debug!("No providers due for BMS test, sleeping..."); + JOB_CREATOR_SLEEP_INTERVAL + } + Ok(count) => { + info!("BMS job creation cycle completed: {count} jobs created"); + JOB_CREATOR_INTERVAL + } + Err(e) => { + error!("BMS job creator failed: {:?}", e); + JOB_CREATOR_SLEEP_INTERVAL + } + }; + + tokio::select! { + _ = sleep(interval) => {} + _ = shutdown.cancelled() => { + info!("BMS job creator received shutdown signal"); + break; + } + } + } + + info!("BMS job creator stopped"); +} + +async fn create_bms_jobs( + config: &Config, + bms_client: &BmsClient, + circuit_breaker: &CircuitBreaker, + sp_repo: &StorageProviderRepository, + result_repo: &BmsBandwidthResultRepository, +) -> Result { + let providers = sp_repo.get_due_for_bms_test(BATCH_SIZE).await?; + + debug!("Found {} providers due for BMS test", providers.len()); + + let mut jobs_created = 0; + + for provider in providers { + // Check circuit breaker before each BMS API call + if let Err(e) = circuit_breaker.check_allowed() { + warn!( + "BMS circuit breaker open, skipping provider {}: {}", + provider.provider_id, e + ); + continue; + } + + let url = match &provider.last_working_url { + Some(url) => url, + None => { + warn!( + "Provider {} has no last_working_url, skipping BMS test", + provider.provider_id + ); + continue; + } + }; + + // Schedule next test FIRST to prevent duplicate jobs if later steps fail. + // This marks the provider as not-due before creating external resources. + if let Err(e) = sp_repo + .schedule_next_bms_test(&provider.provider_id, config.bms_test_interval_days) + .await + { + error!( + "Failed to schedule next BMS test for provider {}: {:?}", + provider.provider_id, e + ); + continue; + } + + match bms_client + .create_job( + url.clone(), + config.bms_default_worker_count, + Some(format!("f0{}", provider.provider_id)), + ) + .await + { + Ok(job) => { + circuit_breaker.record_success(); + + if let Err(e) = result_repo + .insert_pending( + &provider.provider_id, + job.id, + &job.url, + &job.routing_key, + config.bms_default_worker_count as i32, + ) + .await + { + error!( + "Failed to insert pending result for provider {} (BMS job {} created but untracked): {:?}", + provider.provider_id, job.id, e + ); + continue; + } + + debug!( + "Created BMS job {} for provider {} (routing_key: {})", + job.id, provider.provider_id, job.routing_key + ); + jobs_created += 1; + } + Err(e) => { + circuit_breaker.record_failure(); + error!( + "Failed to create BMS job for provider {}: {} {:?}", + provider.provider_id, url, e + ); + } + } + } + + Ok(jobs_created) +} + +async fn run_result_poller( + config: Arc, + bms_client: Arc, + circuit_breaker: Arc, + result_repo: Arc, + shutdown: CancellationToken, +) { + info!("Starting BMS result poller loop"); + + loop { + if let Err(e) = poll_bms_results(&config, &bms_client, &circuit_breaker, &result_repo).await + { + error!("BMS result poller failed: {:?}", e); + } + + tokio::select! { + _ = sleep(RESULT_POLLER_INTERVAL) => {} + _ = shutdown.cancelled() => { + info!("BMS result poller received shutdown signal"); + break; + } + } + } + + info!("BMS result poller stopped"); +} + +async fn poll_bms_results( + _config: &Config, + bms_client: &BmsClient, + circuit_breaker: &CircuitBreaker, + result_repo: &BmsBandwidthResultRepository, +) -> Result<()> { + let pending_results = result_repo.get_pending().await?; + + debug!("Polling {} pending BMS jobs", pending_results.len()); + + for result in pending_results { + if is_result_timed_out(&result) { + if let Err(e) = handle_timeout(&result, result_repo).await { + error!( + "Failed to handle timeout for BMS job {} (provider {}): {:?}", + result.bms_job_id, result.provider_id, e + ); + } + continue; + } + + if let Err(e) = poll_single_result(&result, bms_client, circuit_breaker, result_repo).await + { + error!( + "Failed to poll BMS job {} (provider {}): {:?}", + result.bms_job_id, result.provider_id, e + ); + } + } + + Ok(()) +} + +fn is_result_timed_out(result: &BmsBandwidthResult) -> bool { + (Utc::now() - result.created_at).num_hours() >= BMS_JOB_TIMEOUT_HOURS +} + +async fn handle_timeout( + result: &BmsBandwidthResult, + result_repo: &BmsBandwidthResultRepository, +) -> Result<()> { + let hours = (Utc::now() - result.created_at).num_hours(); + warn!( + "BMS job {} for provider {} timed out after {} hours", + result.bms_job_id, result.provider_id, hours + ); + + result_repo + .update_completed(result.bms_job_id, "Timeout", None, None, None, None) + .await +} + +async fn poll_single_result( + result: &BmsBandwidthResult, + bms_client: &BmsClient, + circuit_breaker: &CircuitBreaker, + result_repo: &BmsBandwidthResultRepository, +) -> Result<()> { + // Check circuit breaker before polling BMS API + if let Err(e) = circuit_breaker.check_allowed() { + debug!( + "BMS circuit breaker open, skipping poll for job {}: {}", + result.bms_job_id, e + ); + return Ok(()); + } + + match bms_client.get_job(result.bms_job_id).await { + Ok(job_response) => { + circuit_breaker.record_success(); + + if BmsClient::is_job_finished(&job_response.status) { + process_completed_job(result, &job_response, result_repo).await?; + } else { + debug!( + "BMS job {} for provider {} still in progress: {}", + result.bms_job_id, result.provider_id, job_response.status + ); + } + } + Err(e) => { + circuit_breaker.record_failure(); + warn!( + "Failed to fetch BMS job {} for provider {}: {:?}", + result.bms_job_id, result.provider_id, e + ); + } + } + + Ok(()) +} + +async fn process_completed_job( + result: &BmsBandwidthResult, + job_response: &BmsJobResponse, + result_repo: &BmsBandwidthResultRepository, +) -> Result<()> { + debug!( + "BMS job {} completed for provider {} with status: {}", + job_response.id, result.provider_id, job_response.status + ); + + let (ping_avg_ms, head_avg_ms, ttfb_ms, download_speed_mbps) = + extract_results_from_job(job_response); + + result_repo + .update_completed( + result.bms_job_id, + &job_response.status, + ping_avg_ms, + head_avg_ms, + ttfb_ms, + download_speed_mbps, + ) + .await?; + + Ok(()) +} + +fn extract_results_from_job( + job_response: &BmsJobResponse, +) -> (Option, Option, Option, Option) { + // Find the last completed subjob with worker data (typically the 100% test) + let worker_data = job_response + .sub_jobs + .as_ref() + .and_then(|subs| { + subs.iter() + .rev() + .find(|s| s.status == "Completed" && s.worker_data.is_some()) + }) + .and_then(|s| s.worker_data.as_ref()) + .and_then(|wd| wd.first()); + + match worker_data { + Some(data) => { + // Ping avg is in seconds from BMS, convert to ms + let ping_avg_ms = data.ping.as_ref().and_then(|p| p.avg).map(|v| v * 1000.0); + + // Head avg is already in ms from BMS + let head_avg_ms = data.head.as_ref().and_then(|h| h.avg); + + // TTFB is in ms from BMS + let ttfb_ms = data.download.as_ref().and_then(|d| d.time_to_first_byte_ms); + + // Download speed is in Mbps from BMS + let download_speed_mbps = data.download.as_ref().and_then(|d| d.download_speed); + + (ping_avg_ms, head_avg_ms, ttfb_ms, download_speed_mbps) + } + None => (None, None, None, None), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::bms_client::{ + BmsJobDetails, BmsJobResponse, DownloadResult, HeadResult, PingResult, SubJob, WorkerData, + }; + use chrono::{Duration, Utc}; + use uuid::Uuid; + + fn make_job_response(sub_jobs: Option>) -> BmsJobResponse { + BmsJobResponse { + id: Uuid::new_v4(), + status: "Completed".to_string(), + url: "http://example.com/file".to_string(), + routing_key: "us_east".to_string(), + details: Some(BmsJobDetails { + worker_count: Some(3), + size_mb: Some(100), + }), + sub_jobs, + } + } + + fn make_worker_data( + ping_avg: Option, + head_avg: Option, + ttfb: Option, + speed: Option, + ) -> WorkerData { + WorkerData { + ping: ping_avg.map(|avg| PingResult { + avg: Some(avg), + min: Some(avg * 0.9), + max: Some(avg * 1.1), + }), + head: head_avg.map(|avg| HeadResult { + avg: Some(avg), + min: Some(avg * 0.9), + max: Some(avg * 1.1), + }), + download: Some(DownloadResult { + download_speed: speed, + time_to_first_byte_ms: ttfb, + total_bytes: Some(100_000_000), + elapsed_secs: Some(10.0), + }), + } + } + + fn make_subjob(status: &str, worker_data: Option>) -> SubJob { + SubJob { + id: Uuid::new_v4(), + status: status.to_string(), + worker_data, + } + } + + // --- extract_results_from_job tests --- + + #[test] + fn test_extract_results_completed_job_with_all_metrics() { + // Ping is in seconds from BMS, should be converted to ms + let worker = make_worker_data(Some(0.025), Some(50.0), Some(100.0), Some(500.0)); + let subjob = make_subjob("Completed", Some(vec![worker])); + let job = make_job_response(Some(vec![subjob])); + + let (ping, head, ttfb, speed) = extract_results_from_job(&job); + + assert_eq!(ping, Some(25.0)); // 0.025s * 1000 = 25ms + assert_eq!(head, Some(50.0)); + assert_eq!(ttfb, Some(100.0)); + assert_eq!(speed, Some(500.0)); + } + + #[test] + fn test_extract_results_uses_last_completed_subjob() { + // First subjob (warmup) - lower values + let worker1 = make_worker_data(Some(0.010), Some(20.0), Some(50.0), Some(100.0)); + let subjob1 = make_subjob("Completed", Some(vec![worker1])); + + // Second subjob (80% workers) - medium values + let worker2 = make_worker_data(Some(0.020), Some(40.0), Some(80.0), Some(300.0)); + let subjob2 = make_subjob("Completed", Some(vec![worker2])); + + // Third subjob (100% workers) - highest values, this should be used + let worker3 = make_worker_data(Some(0.030), Some(60.0), Some(120.0), Some(500.0)); + let subjob3 = make_subjob("Completed", Some(vec![worker3])); + + let job = make_job_response(Some(vec![subjob1, subjob2, subjob3])); + + let (ping, head, ttfb, speed) = extract_results_from_job(&job); + + // Should get values from last completed subjob (subjob3) + assert_eq!(ping, Some(30.0)); // 0.030s * 1000 + assert_eq!(head, Some(60.0)); + assert_eq!(ttfb, Some(120.0)); + assert_eq!(speed, Some(500.0)); + } + + #[test] + fn test_extract_results_skips_failed_subjobs() { + // First subjob completed + let worker1 = make_worker_data(Some(0.010), Some(20.0), Some(50.0), Some(100.0)); + let subjob1 = make_subjob("Completed", Some(vec![worker1])); + + // Second subjob failed (should be skipped) + let subjob2 = make_subjob("Failed", None); + + // Third subjob cancelled (should be skipped) + let subjob3 = make_subjob("Cancelled", None); + + let job = make_job_response(Some(vec![subjob1, subjob2, subjob3])); + + let (ping, head, ttfb, speed) = extract_results_from_job(&job); + + // Should get values from first completed subjob (subjob1) + assert_eq!(ping, Some(10.0)); // 0.010s * 1000 + assert_eq!(head, Some(20.0)); + assert_eq!(ttfb, Some(50.0)); + assert_eq!(speed, Some(100.0)); + } + + #[test] + fn test_extract_results_no_subjobs() { + let job = make_job_response(None); + + let (ping, head, ttfb, speed) = extract_results_from_job(&job); + + assert_eq!(ping, None); + assert_eq!(head, None); + assert_eq!(ttfb, None); + assert_eq!(speed, None); + } + + #[test] + fn test_extract_results_empty_subjobs() { + let job = make_job_response(Some(vec![])); + + let (ping, head, ttfb, speed) = extract_results_from_job(&job); + + assert_eq!(ping, None); + assert_eq!(head, None); + assert_eq!(ttfb, None); + assert_eq!(speed, None); + } + + #[test] + fn test_extract_results_completed_but_no_worker_data() { + let subjob = make_subjob("Completed", None); + let job = make_job_response(Some(vec![subjob])); + + let (ping, head, ttfb, speed) = extract_results_from_job(&job); + + assert_eq!(ping, None); + assert_eq!(head, None); + assert_eq!(ttfb, None); + assert_eq!(speed, None); + } + + #[test] + fn test_extract_results_completed_empty_worker_data() { + let subjob = make_subjob("Completed", Some(vec![])); + let job = make_job_response(Some(vec![subjob])); + + let (ping, head, ttfb, speed) = extract_results_from_job(&job); + + assert_eq!(ping, None); + assert_eq!(head, None); + assert_eq!(ttfb, None); + assert_eq!(speed, None); + } + + #[test] + fn test_extract_results_partial_metrics() { + // Worker with only download data, no ping/head + let worker = WorkerData { + ping: None, + head: None, + download: Some(DownloadResult { + download_speed: Some(500.0), + time_to_first_byte_ms: Some(100.0), + total_bytes: None, + elapsed_secs: None, + }), + }; + let subjob = make_subjob("Completed", Some(vec![worker])); + let job = make_job_response(Some(vec![subjob])); + + let (ping, head, ttfb, speed) = extract_results_from_job(&job); + + assert_eq!(ping, None); + assert_eq!(head, None); + assert_eq!(ttfb, Some(100.0)); + assert_eq!(speed, Some(500.0)); + } + + // --- is_result_timed_out tests --- + + fn make_bms_result(created_at: chrono::DateTime) -> BmsBandwidthResult { + BmsBandwidthResult { + id: Uuid::new_v4(), + provider_id: "12345".to_string(), + bms_job_id: Uuid::new_v4(), + url_tested: "http://example.com".to_string(), + routing_key: "us_east".to_string(), + worker_count: 3, + status: "Pending".to_string(), + ping_avg_ms: None, + head_avg_ms: None, + ttfb_ms: None, + download_speed_mbps: None, + created_at, + completed_at: None, + } + } + + #[test] + fn test_is_result_timed_out_fresh_job() { + let result = make_bms_result(Utc::now()); + assert!(!is_result_timed_out(&result)); + } + + #[test] + fn test_is_result_timed_out_47_hours() { + let result = make_bms_result(Utc::now() - Duration::hours(47)); + assert!(!is_result_timed_out(&result)); + } + + #[test] + fn test_is_result_timed_out_exactly_48_hours() { + let result = make_bms_result(Utc::now() - Duration::hours(48)); + assert!(is_result_timed_out(&result)); + } + + #[test] + fn test_is_result_timed_out_72_hours() { + let result = make_bms_result(Utc::now() - Duration::hours(72)); + assert!(is_result_timed_out(&result)); + } +} diff --git a/url_finder/src/background/mod.rs b/url_finder/src/background/mod.rs index dbf5309..03b87cd 100644 --- a/url_finder/src/background/mod.rs +++ b/url_finder/src/background/mod.rs @@ -1,5 +1,7 @@ +mod bms_scheduler; mod provider_discovery; mod url_discovery_scheduler; +pub use bms_scheduler::*; pub use provider_discovery::*; pub use url_discovery_scheduler::*; diff --git a/url_finder/src/background/provider_discovery.rs b/url_finder/src/background/provider_discovery.rs index 5294bef..8b5378e 100644 --- a/url_finder/src/background/provider_discovery.rs +++ b/url_finder/src/background/provider_discovery.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use std::time::Duration; use tokio::time::sleep; +use tokio_util::sync::CancellationToken; use tracing::{debug, error, info}; use crate::repository::{DealRepository, StorageProviderRepository}; @@ -11,6 +12,7 @@ const DMOB_QUERY_TIMEOUT: Duration = Duration::from_secs(1200); // 20 minutes pub async fn run_provider_discovery( sp_repo: Arc, deal_repo: Arc, + shutdown: CancellationToken, ) { info!("Starting provider discovery loop"); @@ -20,8 +22,16 @@ pub async fn run_provider_discovery( Err(e) => error!("Provider discovery failed: {:?}", e), } - sleep(DISCOVERY_INTERVAL).await; + tokio::select! { + _ = sleep(DISCOVERY_INTERVAL) => {} + _ = shutdown.cancelled() => { + info!("Provider discovery received shutdown signal"); + break; + } + } } + + info!("Provider discovery stopped"); } async fn discover_and_sync_providers( diff --git a/url_finder/src/background/url_discovery_scheduler.rs b/url_finder/src/background/url_discovery_scheduler.rs index f718481..228454d 100644 --- a/url_finder/src/background/url_discovery_scheduler.rs +++ b/url_finder/src/background/url_discovery_scheduler.rs @@ -7,8 +7,9 @@ use crate::{ use color_eyre::Result; use futures::future::join_all; use std::sync::Arc; -use std::time::Duration; +use std::time::{Duration, Instant}; use tokio::{sync::Semaphore, time::sleep}; +use tokio_util::sync::CancellationToken; use tracing::{debug, error, info, warn}; const SCHEDULER_SLEEP_INTERVAL: Duration = Duration::from_secs(3600); @@ -16,33 +17,157 @@ const SCHEDULER_NEXT_INTERVAL: Duration = Duration::from_secs(60); const BATCH_SIZE: i64 = 100; const MAX_CONCURRENT_CLIENT_TESTS: usize = 5; +// --- Helper Structs --- + +struct DiscoveryBatchStats { + total: usize, + ok: usize, + failed: usize, + total_retrievability: f64, + consistent: usize, + started_at: Instant, +} + +impl DiscoveryBatchStats { + fn new() -> Self { + Self { + total: 0, + ok: 0, + failed: 0, + total_retrievability: 0.0, + consistent: 0, + started_at: Instant::now(), + } + } + + fn record(&mut self, outcome: &ProviderOutcome) { + self.total += 1; + if outcome.success { + self.ok += 1; + } else { + self.failed += 1; + } + self.total_retrievability += outcome.retrievability; + if outcome.consistent { + self.consistent += 1; + } + } + + fn avg_retrievability(&self) -> f64 { + if self.total > 0 { + self.total_retrievability / self.total as f64 + } else { + 0.0 + } + } + + fn elapsed(&self) -> Duration { + self.started_at.elapsed() + } + + fn is_empty(&self) -> bool { + self.total == 0 + } + + fn success_percent(&self) -> usize { + if self.total > 0 { + (self.ok * 100) / self.total + } else { + 0 + } + } +} + +struct ProviderOutcome { + success: bool, + retrievability: f64, + consistent: bool, +} + +struct ProgressReporter { + batch_size: usize, + last_checkpoint: usize, +} + +impl ProgressReporter { + fn new(batch_size: usize) -> Self { + Self { + batch_size, + last_checkpoint: 0, + } + } + + fn maybe_log(&mut self, stats: &DiscoveryBatchStats, current_provider_id: &ProviderId) { + if self.batch_size < 4 { + return; + } + + let current_percent = (stats.total * 100) / self.batch_size; + let checkpoint = current_percent / 25; + + if checkpoint > self.last_checkpoint && checkpoint < 4 { + info!( + "URL discovery: {}% ({}/{}) current: f0{} | {} ok {} fail", + checkpoint * 25, + stats.total, + self.batch_size, + current_provider_id, + stats.ok, + stats.failed + ); + self.last_checkpoint = checkpoint; + } + } +} + +// --- Main Scheduler --- + pub async fn run_url_discovery_scheduler( config: Arc, sp_repo: Arc, url_repo: Arc, deal_repo: Arc, + shutdown: CancellationToken, ) { info!("Starting URL discovery scheduler loop"); loop { - let interval = - match schedule_url_discoveries(&config, &sp_repo, &url_repo, &deal_repo).await { - Ok(0) => { - info!("No providers due for URL discovery, sleeping..."); - SCHEDULER_SLEEP_INTERVAL - } - Ok(count) => { - info!("URL discovery cycle completed: {} providers tested", count); - SCHEDULER_NEXT_INTERVAL - } - Err(e) => { - error!("URL discovery scheduler failed: {:?}", e); - SCHEDULER_SLEEP_INTERVAL - } - }; - - sleep(interval).await; + let interval = match schedule_url_discoveries(&config, &sp_repo, &url_repo, &deal_repo) + .await + { + Ok(stats) if stats.is_empty() => { + debug!("No providers due for URL discovery, sleeping..."); + SCHEDULER_SLEEP_INTERVAL + } + Ok(stats) => { + info!( + "URL discovery: done {}/{} ({}%) in {:.0}s | avg_retri: {:.1}% consistent: {}/{}", + stats.ok, + stats.total, + stats.success_percent(), + stats.elapsed().as_secs_f64(), + stats.avg_retrievability(), + stats.consistent, + stats.total + ); + SCHEDULER_NEXT_INTERVAL + } + Err(e) => { + error!("URL discovery scheduler failed: {:?}", e); + SCHEDULER_SLEEP_INTERVAL + } + }; + + tokio::select! { + _ = sleep(interval) => {} + _ = shutdown.cancelled() => { + info!("URL discovery scheduler received shutdown signal"); + break; + } + } } + + info!("URL discovery scheduler stopped"); } async fn schedule_url_discoveries( @@ -50,12 +175,17 @@ async fn schedule_url_discoveries( sp_repo: &StorageProviderRepository, url_repo: &UrlResultRepository, deal_repo: &DealRepository, -) -> Result { +) -> Result { let providers = sp_repo.get_due_for_url_discovery(BATCH_SIZE).await?; debug!("Found {} providers due for URL discovery", providers.len()); - let mut total_tested = 0; + if !providers.is_empty() { + info!("URL discovery: starting {} providers", providers.len()); + } + + let mut stats = DiscoveryBatchStats::new(); + let mut progress = ProgressReporter::new(providers.len()); for provider in providers { if provider.url_discovery_status.as_deref() == Some("pending") { @@ -65,46 +195,107 @@ async fn schedule_url_discoveries( ); } - sp_repo - .set_url_discovery_pending(&provider.provider_id) - .await?; + let outcome = + process_single_provider(config, sp_repo, url_repo, deal_repo, &provider.provider_id) + .await?; + + stats.record(&outcome); + progress.maybe_log(&stats, &provider.provider_id); + } + + Ok(stats) +} + +async fn process_single_provider( + config: &Config, + sp_repo: &StorageProviderRepository, + url_repo: &UrlResultRepository, + deal_repo: &DealRepository, + provider_id: &ProviderId, +) -> Result { + sp_repo.set_url_discovery_pending(provider_id).await?; - let clients = deal_repo - .get_clients_for_provider(&provider.provider_id) - .await?; + let clients = deal_repo.get_clients_for_provider(provider_id).await?; - debug!( - "Provider {} has {} clients", - provider.provider_id, - clients.len() - ); + let client_ids_for_log: Vec = clients + .iter() + .map(|c| format!("f0{}", c.as_str())) + .collect(); - let results = - test_provider_with_clients(config, &provider.provider_id, clients, deal_repo).await; + debug!("Provider {} has {} clients", provider_id, clients.len()); - let url_results: Vec = results.into_iter().map(|r| r.into()).collect(); + let results = test_provider_with_clients(config, provider_id, clients, deal_repo).await; - let last_working_url = url_results - .iter() - .find(|r| r.client_id.is_none()) - .and_then(|r| r.working_url.clone()); + // Extract provider-only result for storage_providers update + // None case: provider-only discovery missing (panic, filtering, etc.) - default is_consistent + // to false since consistency was not verified + let provider_discovery = results.iter().find(|r| r.client_id.is_none()); - match url_repo.insert_batch(&url_results).await { - Ok(count) => debug!( - "Inserted {} URL results for provider {}", - count, provider.provider_id + let (last_working_url, is_consistent, is_reliable, url_metadata, outcome) = + match provider_discovery { + Some(r) => ( + r.working_url.clone(), + r.is_consistent, + r.is_reliable, + r.url_metadata.clone(), + ProviderOutcome { + success: r.working_url.is_some(), + retrievability: r.retrievability_percent, + consistent: r.is_consistent, + }, ), - Err(e) => error!("Failed to insert URL results: {:?}", e), - } + None => ( + None, + false, + false, + None, + ProviderOutcome { + success: false, + retrievability: 0.0, + consistent: false, + }, + ), + }; - sp_repo - .update_after_url_discovery(&provider.provider_id, last_working_url) - .await?; + let url_results: Vec = results.into_iter().map(|r| r.into()).collect(); - total_tested += 1; + match url_repo.insert_batch(&url_results).await { + Ok(count) => debug!( + "Inserted {} URL results for provider {}", + count, provider_id + ), + Err(e) => error!("Failed to insert URL results: {:?}", e), } - Ok(total_tested) + sp_repo + .update_after_url_discovery( + provider_id, + last_working_url, + is_consistent, + is_reliable, + url_metadata, + ) + .await?; + + // Debug per-provider result + let client_display = if client_ids_for_log.is_empty() { + "(0 clients)".to_string() + } else if client_ids_for_log.len() == 1 { + format!("(1 client) [{}]", client_ids_for_log.join(", ")) + } else { + format!( + "({} clients) [{}]", + client_ids_for_log.len(), + client_ids_for_log.join(", ") + ) + }; + let result_str = if outcome.success { "ok" } else { "failed" }; + debug!( + "f0{} {}: {} retri={:.1}% consistent={}", + provider_id, client_display, result_str, outcome.retrievability, outcome.consistent + ); + + Ok(outcome) } async fn test_provider_with_clients( diff --git a/url_finder/src/bms_client.rs b/url_finder/src/bms_client.rs new file mode 100644 index 0000000..59b1338 --- /dev/null +++ b/url_finder/src/bms_client.rs @@ -0,0 +1,171 @@ +use color_eyre::{Result, eyre::eyre}; +use reqwest_middleware::ClientWithMiddleware; +use serde::{Deserialize, Serialize}; +use tracing::{debug, warn}; +use uuid::Uuid; + +use crate::utils::build_reqwest_retry_client_with_config; + +const BMS_ROUTING_KEY: &str = "us_east"; + +// BMS client timeouts - aggressive to fail fast on gateway timeouts +const BMS_MIN_RETRY_INTERVAL_MS: u64 = 1_000; +const BMS_MAX_RETRY_INTERVAL_MS: u64 = 5_000; +const BMS_MAX_RETRIES: u32 = 1; // Reduced from 3 - 504s indicate BMS is struggling +const BMS_CONNECT_TIMEOUT_MS: u64 = 5_000; // 5s connect timeout +const BMS_REQUEST_TIMEOUT_MS: u64 = 30_000; // 30s request timeout (less than typical gateway 60s) + +#[derive(Debug, Clone, Serialize)] +pub struct CreateJobRequest { + pub url: String, + pub routing_key: String, + pub worker_count: i64, + #[serde(skip_serializing_if = "Option::is_none")] + pub entity: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct BmsJob { + pub id: Uuid, + pub status: String, + pub url: String, + pub routing_key: String, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct BmsJobDetails { + pub worker_count: Option, + pub size_mb: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct DownloadResult { + pub download_speed: Option, + pub time_to_first_byte_ms: Option, + pub total_bytes: Option, + pub elapsed_secs: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct PingResult { + pub avg: Option, + pub min: Option, + pub max: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct HeadResult { + pub avg: Option, + pub min: Option, + pub max: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct WorkerData { + pub download: Option, + pub ping: Option, + pub head: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct SubJob { + pub id: Uuid, + pub status: String, + pub worker_data: Option>, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct BmsJobResponse { + pub id: Uuid, + pub status: String, + pub url: String, + pub routing_key: String, + pub details: Option, + pub sub_jobs: Option>, +} + +#[derive(Clone)] +pub struct BmsClient { + client: ClientWithMiddleware, + base_url: String, +} + +impl BmsClient { + pub fn new(base_url: String) -> Self { + Self { + client: build_reqwest_retry_client_with_config( + BMS_MIN_RETRY_INTERVAL_MS, + BMS_MAX_RETRY_INTERVAL_MS, + BMS_MAX_RETRIES, + Some(BMS_CONNECT_TIMEOUT_MS), + Some(BMS_REQUEST_TIMEOUT_MS), + ), + base_url, + } + } + + pub async fn create_job( + &self, + url: String, + worker_count: i64, + entity: Option, + ) -> Result { + if worker_count < 0 { + return Err(eyre!( + "worker_count must be non-negative, got {worker_count}" + )); + } + + let request = CreateJobRequest { + url, + routing_key: BMS_ROUTING_KEY.to_string(), + worker_count, + entity, + }; + + debug!("Creating BMS job: {:?}", request); + + let response = self + .client + .post(format!("{}/jobs", self.base_url)) + .json(&request) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + warn!("BMS create job failed: {status} - {body}"); + return Err(eyre!("BMS create job failed: {status} - {body}")); + } + + let result: BmsJob = response.json().await?; + debug!("BMS job created: {:?}", result); + Ok(result) + } + + pub async fn get_job(&self, job_id: Uuid) -> Result { + debug!("Fetching BMS job: {job_id}"); + + let response = self + .client + .get(format!("{}/jobs/{job_id}", self.base_url)) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + warn!("BMS get job failed: {status} - {body}"); + return Err(eyre!("BMS get job failed: {status} - {body}")); + } + + let result: BmsJobResponse = response.json().await?; + debug!("BMS job fetched: {} - status: {}", result.id, result.status); + Ok(result) + } + + pub fn is_job_finished(status: &str) -> bool { + matches!(status, "Completed" | "Failed" | "Cancelled") + } +} diff --git a/url_finder/src/car_header.rs b/url_finder/src/car_header.rs new file mode 100644 index 0000000..1dce4dc --- /dev/null +++ b/url_finder/src/car_header.rs @@ -0,0 +1,289 @@ +//! CAR (Content Addressable aRchive) header parsing. +//! +//! Parses CAR v1/v2 headers to extract root CID for verification against deal Labels. + +use ciborium::Value; +use tracing::trace; + +/// CARv2 pragma: fixed 11 bytes identifying CARv2 format +const CAR_V2_PRAGMA: &[u8] = &[ + 0x0a, 0xa1, 0x67, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x02, +]; + +/// Result of parsing CAR header +#[derive(Debug, Clone, Default)] +pub struct CarHeaderParseResult { + pub is_valid: bool, + pub version: Option, + pub root_cid: Option, + pub header_size: Option, +} + +impl CarHeaderParseResult { + fn invalid() -> Self { + Self::default() + } +} + +/// Parse CAR header from response bytes, extract root CID if valid. +pub fn parse_car_header(bytes: &[u8]) -> CarHeaderParseResult { + if bytes.is_empty() { + return CarHeaderParseResult::invalid(); + } + + // Check for CARv2 pragma + if bytes.len() >= CAR_V2_PRAGMA.len() && bytes.starts_with(CAR_V2_PRAGMA) { + return parse_car_v2_header(bytes); + } + + parse_car_v1_header(bytes) +} + +fn parse_car_v1_header(bytes: &[u8]) -> CarHeaderParseResult { + // 1. Read varint length prefix (LEB128 unsigned) + let (header_len, varint_size) = match read_varint(bytes) { + Some(v) => v, + None => return CarHeaderParseResult::invalid(), + }; + + // Sanity check: header shouldn't be too large + if header_len > 10_000 { + trace!("CAR header too large: {header_len}"); + return CarHeaderParseResult::invalid(); + } + + // 2. Bounds check + let header_end = varint_size + header_len; + if bytes.len() < header_end { + trace!( + "CAR header truncated: need {header_end}, have {}", + bytes.len() + ); + return CarHeaderParseResult::invalid(); + } + + // 3. Parse DAG-CBOR header + let header_bytes = &bytes[varint_size..header_end]; + let cbor: Value = match ciborium::from_reader(header_bytes) { + Ok(v) => v, + Err(e) => { + trace!("CAR CBOR parse failed: {e}"); + return CarHeaderParseResult::invalid(); + } + }; + + // 4. Extract version and roots from CBOR map + let map = match cbor.as_map() { + Some(m) => m, + None => { + trace!("CAR header not a CBOR map"); + return CarHeaderParseResult::invalid(); + } + }; + + let version = extract_version(map); + if version != Some(1) { + trace!("CAR version not 1: {version:?}"); + return CarHeaderParseResult::invalid(); + } + + let root_cid = extract_first_root_cid(map); + + CarHeaderParseResult { + is_valid: true, + version: Some(1), + root_cid, + header_size: Some(header_end), + } +} + +fn parse_car_v2_header(bytes: &[u8]) -> CarHeaderParseResult { + // CARv2 structure: + // [11-byte pragma][40-byte header][CARv1 payload at data_offset] + // + // 40-byte header: + // - 16 bytes: characteristics (bitfield) + // - 8 bytes: data_offset (u64 little-endian) + // - 8 bytes: data_size (u64 little-endian) + // - 8 bytes: index_offset (u64 little-endian) + + const HEADER_START: usize = 11; + const HEADER_SIZE: usize = 40; + const DATA_OFFSET_POS: usize = HEADER_START + 16; + + if bytes.len() < HEADER_START + HEADER_SIZE { + trace!("CARv2 header truncated"); + return CarHeaderParseResult::invalid(); + } + + // Read data_offset (little-endian u64) + let data_offset_bytes: [u8; 8] = bytes[DATA_OFFSET_POS..DATA_OFFSET_POS + 8] + .try_into() + .unwrap(); + let data_offset_u64 = u64::from_le_bytes(data_offset_bytes); + let data_offset = match usize::try_from(data_offset_u64) { + Ok(offset) => offset, + Err(_) => { + trace!("CARv2 data_offset too large for platform: {data_offset_u64}"); + return CarHeaderParseResult::invalid(); + } + }; + + // Parse inner CARv1 at data_offset + if bytes.len() <= data_offset { + trace!("CARv2 data_offset beyond bytes"); + return CarHeaderParseResult::invalid(); + } + + let mut inner_result = parse_car_v1_header(&bytes[data_offset..]); + if inner_result.is_valid { + inner_result.version = Some(2); + if let Some(size) = inner_result.header_size { + inner_result.header_size = Some(data_offset + size); + } + } + inner_result +} + +/// Read unsigned LEB128 varint, return (value, bytes_consumed) +fn read_varint(bytes: &[u8]) -> Option<(usize, usize)> { + let mut result: usize = 0; + let mut shift = 0; + + for (i, &byte) in bytes.iter().enumerate() { + if i >= 10 { + // Varint too long (max 10 bytes for u64) + return None; + } + + result |= ((byte & 0x7F) as usize) << shift; + + if byte & 0x80 == 0 { + return Some((result, i + 1)); + } + + shift += 7; + } + + None // Incomplete varint +} + +fn extract_version(map: &[(Value, Value)]) -> Option { + map.iter() + .find(|(k, _)| matches!(k, Value::Text(s) if s == "version")) + .and_then(|(_, v)| match v { + Value::Integer(i) => u64::try_from(*i).ok(), + _ => None, + }) +} + +fn extract_first_root_cid(map: &[(Value, Value)]) -> Option { + let roots = map + .iter() + .find(|(k, _)| matches!(k, Value::Text(s) if s == "roots")) + .and_then(|(_, v)| v.as_array())?; + + let first_root = roots.first()?; + + // CID is stored as CBOR tag 42 with byte string + // Tag 42 value contains: [0x00 multibase prefix][CID bytes] + let cid_bytes = match first_root { + Value::Tag(42, inner) => inner.as_bytes()?, + _ => return None, + }; + + // Skip the 0x00 identity multibase prefix + if cid_bytes.is_empty() || cid_bytes[0] != 0x00 { + return None; + } + + let raw_cid = &cid_bytes[1..]; + Some(encode_cid_base32(raw_cid)) +} + +/// Encode raw CID bytes to base32lower (multibase 'b' prefix) +fn encode_cid_base32(bytes: &[u8]) -> String { + // RFC 4648 base32 lowercase alphabet + const ALPHABET: &[u8] = b"abcdefghijklmnopqrstuvwxyz234567"; + + let mut result = String::with_capacity(1 + (bytes.len() * 8).div_ceil(5)); + result.push('b'); // multibase base32lower prefix + + let mut buffer: u64 = 0; + let mut bits_in_buffer = 0; + + for &byte in bytes { + buffer = (buffer << 8) | (byte as u64); + bits_in_buffer += 8; + + while bits_in_buffer >= 5 { + bits_in_buffer -= 5; + let index = ((buffer >> bits_in_buffer) & 0x1F) as usize; + result.push(ALPHABET[index] as char); + } + } + + // Handle remaining bits + if bits_in_buffer > 0 { + let index = ((buffer << (5 - bits_in_buffer)) & 0x1F) as usize; + result.push(ALPHABET[index] as char); + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_read_varint_single_byte() { + let bytes = [0x39]; // 57 + assert_eq!(read_varint(&bytes), Some((57, 1))); + } + + #[test] + fn test_read_varint_multi_byte() { + let bytes = [0x96, 0x01]; // 150 + assert_eq!(read_varint(&bytes), Some((150, 2))); + } + + #[test] + fn test_read_varint_incomplete() { + let bytes = [0x80]; // High bit set, no continuation + assert_eq!(read_varint(&bytes), None); + } + + #[test] + fn test_invalid_empty_bytes() { + let result = parse_car_header(&[]); + assert!(!result.is_valid); + } + + #[test] + fn test_invalid_garbage() { + let garbage = b"Not Found"; + let result = parse_car_header(garbage); + assert!(!result.is_valid); + } + + #[test] + fn test_carv2_pragma_detection() { + // Just the pragma, not enough data + let result = parse_car_header(CAR_V2_PRAGMA); + assert!(!result.is_valid); + } + + #[test] + fn test_encode_cid_base32() { + // CIDv1 raw bytes (simplified test) + let bytes = [0x01, 0x55, 0x12, 0x20]; // version, codec, hash fn, length prefix + let encoded = encode_cid_base32(&bytes); + assert!(encoded.starts_with('b')); // multibase prefix + assert!( + encoded + .chars() + .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit()) + ); + } +} diff --git a/url_finder/src/cid_contact.rs b/url_finder/src/cid_contact.rs index 3afc8ed..6256532 100644 --- a/url_finder/src/cid_contact.rs +++ b/url_finder/src/cid_contact.rs @@ -46,13 +46,13 @@ pub async fn get_contact( .await .map_err(|_| CidContactError::InvalidResponse)?; - debug!("cid contact status: {:?}", res.status()); + let status = res.status(); + debug!("cid contact status: {:?}", status); - if !res.status().is_success() { - debug!( - "cid contact returned non-success status: {:?}", - res.status() - ); + if !status.is_success() { + debug!("cid contact returned non-success status: {:?}", status); + // Drain body to allow connection reuse + let _ = res.text().await; return Err(CidContactError::NoData); } diff --git a/url_finder/src/circuit_breaker.rs b/url_finder/src/circuit_breaker.rs new file mode 100644 index 0000000..70854ff --- /dev/null +++ b/url_finder/src/circuit_breaker.rs @@ -0,0 +1,353 @@ +use std::sync::Mutex; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::time::Duration; + +use chrono::{DateTime, Utc}; +use tracing::{debug, info, warn}; + +/// Circuit breaker states for external service calls. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CircuitState { + /// Normal operation - all requests allowed + Closed, + /// Service failing - requests rejected to allow recovery + Open, + /// Testing recovery - single probe request allowed + HalfOpen, +} + +/// Thread-safe circuit breaker for protecting external service calls. +/// +/// When consecutive failures exceed the threshold, the circuit "opens" and +/// rejects all requests for a cooldown period. After cooldown, it enters +/// "half-open" state and allows a single probe request to test recovery. +pub struct CircuitBreaker { + /// Service name for logging + name: String, + /// Current consecutive failure count + failure_count: AtomicUsize, + /// Timestamp when circuit was opened (None if closed) + opened_at: Mutex>>, + /// Whether we're in half-open state (allowing one probe) + in_half_open: AtomicUsize, // 0 = not half-open, 1 = half-open waiting for probe result + /// Number of consecutive failures before opening circuit + failure_threshold: usize, + /// How long to wait before attempting probe request + cooldown: Duration, +} + +impl CircuitBreaker { + /// Create a new circuit breaker. + /// + /// - `name`: Service name for logging (e.g., "BMS") + /// - `failure_threshold`: Open circuit after this many consecutive failures + /// - `cooldown`: Wait this long before allowing probe request + pub fn new(name: impl Into, failure_threshold: usize, cooldown: Duration) -> Self { + Self { + name: name.into(), + failure_count: AtomicUsize::new(0), + opened_at: Mutex::new(None), + in_half_open: AtomicUsize::new(0), + failure_threshold, + cooldown, + } + } + + /// Check if a request is allowed through the circuit breaker. + /// + /// Returns `Ok(())` if the request can proceed, or `Err(CircuitOpenError)` + /// if the circuit is open and the request should be rejected. + pub fn check_allowed(&self) -> Result<(), CircuitOpenError> { + let state = self.get_state(); + + match state { + CircuitState::Closed => Ok(()), + CircuitState::HalfOpen => { + // In half-open, only one probe request is allowed + // Use compare_exchange to atomically claim the probe slot + match self.in_half_open.compare_exchange( + 1, // Expected: half-open state + 2, // New: probe in progress + Ordering::SeqCst, + Ordering::SeqCst, + ) { + Ok(_) => { + debug!("{} circuit breaker allowing probe request", self.name); + Ok(()) + } + Err(_) => { + // Another request already claimed the probe slot + debug!( + "{} circuit breaker rejecting request - probe already in progress", + self.name + ); + Err(CircuitOpenError::ProbeInProgress) + } + } + } + CircuitState::Open => { + let opened = self.opened_at.lock().unwrap(); + let remaining = opened + .map(|t| { + let elapsed = Utc::now() - t; + self.cooldown + .checked_sub(elapsed.to_std().unwrap_or_default()) + .unwrap_or_default() + }) + .unwrap_or_default(); + + Err(CircuitOpenError::Open { + failures: self.failure_count.load(Ordering::SeqCst), + remaining_cooldown: remaining, + }) + } + } + } + + /// Record a successful request, resetting the failure count. + pub fn record_success(&self) { + let prev_failures = self.failure_count.swap(0, Ordering::SeqCst); + let was_half_open = self.in_half_open.swap(0, Ordering::SeqCst) > 0; + + // Clear opened_at + *self.opened_at.lock().unwrap() = None; + + if was_half_open { + info!( + "{} circuit breaker closed - probe succeeded after {} failures", + self.name, prev_failures + ); + } else if prev_failures > 0 { + debug!( + "{} circuit breaker: success resets {} consecutive failures", + self.name, prev_failures + ); + } + } + + /// Record a failed request, potentially opening the circuit. + pub fn record_failure(&self) { + // Atomically check if we were the probe request (state 2 = probe in progress) + // Only treat as probe failure if we successfully consume the probe-in-progress state + if self + .in_half_open + .compare_exchange(2, 0, Ordering::SeqCst, Ordering::SeqCst) + .is_ok() + { + // We were the probe request and it failed - reopen the circuit + *self.opened_at.lock().unwrap() = Some(Utc::now()); + let current_failures = self.failure_count.load(Ordering::SeqCst); + warn!( + "{} circuit breaker reopened - probe failed (failures at threshold: {})", + self.name, current_failures + ); + return; + } + + let new_count = self.failure_count.fetch_add(1, Ordering::SeqCst) + 1; + + // Check if we've hit the threshold to open the circuit + if new_count >= self.failure_threshold { + let mut opened = self.opened_at.lock().unwrap(); + if opened.is_none() { + *opened = Some(Utc::now()); + warn!( + "{} circuit breaker opened after {} consecutive failures (cooldown: {:?})", + self.name, new_count, self.cooldown + ); + } + } else { + debug!( + "{} circuit breaker: failure {} of {} threshold", + self.name, new_count, self.failure_threshold + ); + } + } + + /// Get the current circuit state. + pub fn get_state(&self) -> CircuitState { + let opened_at = *self.opened_at.lock().unwrap(); + + match opened_at { + None => CircuitState::Closed, + Some(opened) => { + let elapsed = Utc::now() - opened; + if elapsed.to_std().unwrap_or_default() >= self.cooldown { + // Cooldown expired, transition to half-open + // Use compare_exchange to ensure only one thread transitions + match self.in_half_open.compare_exchange( + 0, + 1, + Ordering::SeqCst, + Ordering::SeqCst, + ) { + Ok(_) => { + debug!( + "{} circuit breaker entering half-open state after {:?} cooldown", + self.name, self.cooldown + ); + } + Err(_) => { + // Already in half-open or probe in progress + } + } + CircuitState::HalfOpen + } else { + CircuitState::Open + } + } + } + } + + /// Get current failure count (for testing/monitoring). + pub fn failure_count(&self) -> usize { + self.failure_count.load(Ordering::SeqCst) + } +} + +/// Error returned when circuit breaker rejects a request. +#[derive(Debug, Clone)] +pub enum CircuitOpenError { + /// Circuit is open, request rejected + Open { + failures: usize, + remaining_cooldown: Duration, + }, + /// Circuit is half-open but probe already in progress + ProbeInProgress, +} + +impl std::fmt::Display for CircuitOpenError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + CircuitOpenError::Open { + failures, + remaining_cooldown, + } => write!( + f, + "circuit open after {} failures, {:.0}s remaining", + failures, + remaining_cooldown.as_secs_f64() + ), + CircuitOpenError::ProbeInProgress => write!(f, "probe request already in progress"), + } + } +} + +impl std::error::Error for CircuitOpenError {} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_circuit_breaker_starts_closed() { + let cb = CircuitBreaker::new("test", 3, Duration::from_secs(60)); + assert_eq!(cb.get_state(), CircuitState::Closed); + assert!(cb.check_allowed().is_ok()); + } + + #[test] + fn test_circuit_breaker_opens_after_threshold() { + let cb = CircuitBreaker::new("test", 3, Duration::from_secs(60)); + + // Record failures up to threshold + cb.record_failure(); + assert_eq!(cb.get_state(), CircuitState::Closed); + cb.record_failure(); + assert_eq!(cb.get_state(), CircuitState::Closed); + cb.record_failure(); + + // Now should be open + assert_eq!(cb.get_state(), CircuitState::Open); + assert!(cb.check_allowed().is_err()); + } + + #[test] + fn test_circuit_breaker_success_resets_failures() { + let cb = CircuitBreaker::new("test", 3, Duration::from_secs(60)); + + cb.record_failure(); + cb.record_failure(); + assert_eq!(cb.failure_count(), 2); + + cb.record_success(); + assert_eq!(cb.failure_count(), 0); + assert_eq!(cb.get_state(), CircuitState::Closed); + } + + #[test] + fn test_circuit_breaker_half_open_after_cooldown() { + let cb = CircuitBreaker::new("test", 3, Duration::from_millis(10)); + + // Open the circuit + cb.record_failure(); + cb.record_failure(); + cb.record_failure(); + assert_eq!(cb.get_state(), CircuitState::Open); + + // Wait for cooldown + std::thread::sleep(Duration::from_millis(20)); + + // Should be half-open now + assert_eq!(cb.get_state(), CircuitState::HalfOpen); + assert!(cb.check_allowed().is_ok()); // First probe allowed + } + + #[test] + fn test_circuit_breaker_probe_success_closes() { + let cb = CircuitBreaker::new("test", 3, Duration::from_millis(10)); + + // Open and wait for half-open + cb.record_failure(); + cb.record_failure(); + cb.record_failure(); + std::thread::sleep(Duration::from_millis(20)); + + // Claim probe slot + assert!(cb.check_allowed().is_ok()); + + // Success closes the circuit + cb.record_success(); + assert_eq!(cb.get_state(), CircuitState::Closed); + assert_eq!(cb.failure_count(), 0); + } + + #[test] + fn test_circuit_breaker_probe_failure_reopens() { + let cb = CircuitBreaker::new("test", 3, Duration::from_millis(10)); + + // Open and wait for half-open + cb.record_failure(); + cb.record_failure(); + cb.record_failure(); + assert_eq!(cb.failure_count(), 3); + std::thread::sleep(Duration::from_millis(20)); + + // Claim probe slot + assert!(cb.check_allowed().is_ok()); + + // Failure reopens the circuit but doesn't increment failure count + cb.record_failure(); + assert_eq!(cb.get_state(), CircuitState::Open); + assert_eq!(cb.failure_count(), 3); // Still at threshold, not incremented + } + + #[test] + fn test_circuit_breaker_only_one_probe_allowed() { + let cb = CircuitBreaker::new("test", 3, Duration::from_millis(10)); + + // Open and wait for half-open + cb.record_failure(); + cb.record_failure(); + cb.record_failure(); + std::thread::sleep(Duration::from_millis(20)); + + // First probe allowed + assert!(cb.check_allowed().is_ok()); + + // Second probe rejected + let result = cb.check_allowed(); + assert!(matches!(result, Err(CircuitOpenError::ProbeInProgress))); + } +} diff --git a/url_finder/src/config.rs b/url_finder/src/config.rs index cca5078..b725d34 100644 --- a/url_finder/src/config.rs +++ b/url_finder/src/config.rs @@ -1,9 +1,40 @@ use std::env; use color_eyre::Result; +use tracing::warn; use crate::types::DbConnectParams; +// Double-tap consistency testing settings +pub const DOUBLE_TAP_DELAY_MS: u64 = 500; +pub const RANGE_REQUEST_BYTES: u64 = 4096; +pub const MAX_CONCURRENT_URL_TESTS: usize = 20; + +// Thresholds +pub const RELIABILITY_TIMEOUT_THRESHOLD: f64 = 0.30; +pub const MIN_VALID_CONTENT_LENGTH: u64 = 8 * 1024 * 1024 * 1024; // 8GB + +// History endpoint settings +pub const MAX_HISTORY_DAYS: i64 = 30; + +fn parse_positive_i64_or_default(env_var: &str, default: i64) -> i64 { + assert!(default > 0, "default must be positive"); + match env::var(env_var) { + Ok(s) => match s.parse::() { + Ok(v) if v > 0 => v, + Ok(v) => { + warn!("{env_var}={v} is not positive, defaulting to {default}"); + default + } + Err(e) => { + warn!("{env_var}='{s}' is not a valid integer ({e}), defaulting to {default}"); + default + } + }, + Err(_) => default, + } +} + #[derive(Debug, Clone)] pub struct Config { pub db_url: String, @@ -16,6 +47,9 @@ pub struct Config { pub proxy_password: Option, pub proxy_ip_count: Option, pub proxy_default_port: Option, + pub bms_url: String, + pub bms_default_worker_count: i64, + pub bms_test_interval_days: i64, } impl Config { @@ -44,6 +78,9 @@ impl Config { .ok() .and_then(|s| s.parse().ok()), proxy_ip_count: env::var("PROXY_IP_COUNT").ok().and_then(|s| s.parse().ok()), + bms_url: env::var("BMS_URL").expect("BMS_URL must be set"), + bms_default_worker_count: parse_positive_i64_or_default("BMS_WORKER_COUNT", 10), + bms_test_interval_days: parse_positive_i64_or_default("BMS_TEST_INTERVAL_DAYS", 7), }) } @@ -60,6 +97,9 @@ impl Config { proxy_user: None, proxy_ip_count: None, proxy_default_port: None, + bms_url: "http://localhost:8080".to_string(), + bms_default_worker_count: 10, + bms_test_interval_days: 7, } } } diff --git a/url_finder/src/http_client.rs b/url_finder/src/http_client.rs index e210a7f..950350f 100644 --- a/url_finder/src/http_client.rs +++ b/url_finder/src/http_client.rs @@ -1,3 +1,4 @@ +use std::sync::Once; use std::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; @@ -9,6 +10,7 @@ use tracing::info; const RETRI_TIMEOUT_SEC: u64 = 15; static ATOMIC_PROXY_PORT: AtomicU32 = AtomicU32::new(8001); static ATOMIC_PROXY_LAST_CHANGE: AtomicU64 = AtomicU64::new(0); +static PROXY_LOG_ONCE: Once = Once::new(); fn get_sticky_port_atomic(config: &Config) -> u32 { let proxy_default_port = config.proxy_default_port.unwrap(); @@ -52,7 +54,7 @@ pub fn build_client(config: &Config) -> Result { Some(proxy_user), Some(proxy_password), Some(proxy_ip_count), - Some(proxy_default_port), + Some(_proxy_default_port), ) = ( &config.proxy_url, &config.proxy_user, @@ -60,15 +62,15 @@ pub fn build_client(config: &Config) -> Result { &config.proxy_ip_count, &config.proxy_default_port, ) { - info!( - "Configuring HTTP client with proxy: {} (user: {}, ip_count: {}, default_port: {})", - proxy_url, proxy_user, proxy_ip_count, proxy_default_port - ); - let port = get_sticky_port_atomic(config); let proxy_url_result = format!("{}:{}", proxy_url, port); - info!("Start using proxy: {}", proxy_url_result); + PROXY_LOG_ONCE.call_once(|| { + info!( + "HTTP client configured with proxy: {} (user: {}, ip_count: {}, port: {})", + proxy_url, proxy_user, proxy_ip_count, port + ); + }); let proxy = (Proxy::http(proxy_url_result))?.basic_auth(proxy_user, proxy_password); builder = builder diff --git a/url_finder/src/lib.rs b/url_finder/src/lib.rs index f503188..c24adf4 100644 --- a/url_finder/src/lib.rs +++ b/url_finder/src/lib.rs @@ -5,7 +5,10 @@ pub use std::sync::{Arc, atomic::AtomicUsize}; pub mod api; pub mod api_response; pub mod background; +pub mod bms_client; +pub mod car_header; mod cid_contact; +pub mod circuit_breaker; pub mod config; mod http_client; mod lotus_rpc; @@ -16,8 +19,8 @@ pub mod repository; pub mod routes; pub mod services; pub mod types; -mod url_tester; -mod utils; +pub mod url_tester; +pub mod utils; pub use types::{ErrorCode, ResultCode}; @@ -26,5 +29,7 @@ pub struct AppState { pub active_requests: Arc, pub storage_provider_repo: Arc, pub url_repo: Arc, + pub bms_repo: Arc, + pub provider_service: Arc, pub config: Arc, } diff --git a/url_finder/src/main.rs b/url_finder/src/main.rs index b0f7235..d39b6c0 100644 --- a/url_finder/src/main.rs +++ b/url_finder/src/main.rs @@ -4,6 +4,7 @@ use std::{ Arc, atomic::{AtomicUsize, Ordering}, }, + time::Duration, }; use axum::{ @@ -15,13 +16,17 @@ use color_eyre::Result; use tokio::{ net::TcpListener, signal::unix::{SignalKind, signal}, + task::JoinHandle, }; +use tokio_util::sync::CancellationToken; use tower_http::cors::{Any, CorsLayer}; -use tracing::{debug, info}; +use tracing::{debug, error, info, warn}; use tracing_subscriber::EnvFilter; use url_finder::{AppState, background, config::Config, repository::*, routes::create_routes}; +const SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(30); + /// Active requests counter middleware. /// Keeps track of the number of active requests. /// The counter is used to gracefully shutdown the server. @@ -64,36 +69,76 @@ async fn main() -> Result<()> { info!("Database migrations applied successfully"); let active_requests = Arc::new(AtomicUsize::new(0)); + let shutdown_token = CancellationToken::new(); let sp_repo = Arc::new(StorageProviderRepository::new(pool.clone())); let deal_repo = Arc::new(DealRepository::new(dmob_pool.clone())); let url_repo = Arc::new(UrlResultRepository::new(pool.clone())); + let bms_result_repo = Arc::new(BmsBandwidthResultRepository::new(pool.clone())); + let bms_client = Arc::new(url_finder::bms_client::BmsClient::new( + config.bms_url.clone(), + )); + + let provider_service = Arc::new( + url_finder::services::provider_service::ProviderService::new( + url_repo.clone(), + bms_result_repo.clone(), + sp_repo.clone(), + ), + ); let app_state = Arc::new(AppState { deal_repo: deal_repo.clone(), active_requests: active_requests.clone(), storage_provider_repo: sp_repo.clone(), url_repo: url_repo.clone(), + bms_repo: bms_result_repo.clone(), + provider_service, config: config.clone(), }); // Start the provider discovery in the background - tokio::spawn({ + let provider_discovery_handle: JoinHandle<()> = tokio::spawn({ let sp_repo = sp_repo.clone(); let deal_repo = deal_repo.clone(); + let shutdown = shutdown_token.clone(); async move { - background::run_provider_discovery(sp_repo, deal_repo).await; + background::run_provider_discovery(sp_repo, deal_repo, shutdown).await; } }); // Start the URL discovery scheduler in the background - tokio::spawn({ + let url_discovery_handle: JoinHandle<()> = tokio::spawn({ let sp_repo = sp_repo.clone(); let url_repo = url_repo.clone(); let deal_repo = deal_repo.clone(); let config = config.clone(); + let shutdown = shutdown_token.clone(); async move { - background::run_url_discovery_scheduler(config, sp_repo, url_repo, deal_repo).await; + background::run_url_discovery_scheduler(config, sp_repo, url_repo, deal_repo, shutdown) + .await; + } + }); + + // Start the BMS scheduler in the background + let bms_circuit_breaker = Arc::new(background::create_bms_circuit_breaker()); + let bms_scheduler_handle: JoinHandle<()> = tokio::spawn({ + let config = config.clone(); + let sp_repo = sp_repo.clone(); + let bms_result_repo = bms_result_repo.clone(); + let bms_client = bms_client.clone(); + let bms_circuit_breaker = bms_circuit_breaker.clone(); + let shutdown = shutdown_token.clone(); + async move { + background::run_bms_scheduler( + config, + bms_client, + bms_circuit_breaker, + sp_repo, + bms_result_repo, + shutdown, + ) + .await; } }); @@ -119,15 +164,34 @@ async fn main() -> Result<()> { listener, app.into_make_service_with_connect_info::(), ) - .with_graceful_shutdown(shutdown_signal(active_requests.clone())) + .with_graceful_shutdown(shutdown_signal( + active_requests.clone(), + shutdown_token.clone(), + )) .await?; + // Await background task completion with timeout + info!("Waiting for background tasks to complete..."); + let background_handles = vec![ + ("provider_discovery", provider_discovery_handle), + ("url_discovery", url_discovery_handle), + ("bms_scheduler", bms_scheduler_handle), + ]; + + for (name, handle) in background_handles { + match tokio::time::timeout(SHUTDOWN_TIMEOUT, handle).await { + Ok(Ok(())) => info!("Background task '{name}' completed successfully"), + Ok(Err(e)) => error!("Background task '{name}' panicked: {e:?}"), + Err(_) => warn!("Background task '{name}' did not complete within timeout"), + } + } + info!("UrlFinder shut down gracefully"); Ok(()) } -async fn shutdown_signal(active_requests: Arc) { +async fn shutdown_signal(active_requests: Arc, shutdown_token: CancellationToken) { let mut sigint = signal(SignalKind::interrupt()).expect("SIGINT signal handler failed"); let mut sigterm = signal(SignalKind::terminate()).expect("SIGTERM signal handler failed"); @@ -140,6 +204,10 @@ async fn shutdown_signal(active_requests: Arc) { } } + // Signal background tasks to stop + info!("Signaling background tasks to stop..."); + shutdown_token.cancel(); + while active_requests.load(Ordering::SeqCst) > 0 { debug!( "Waiting for {} active requests to finish...", diff --git a/url_finder/src/provider_endpoints.rs b/url_finder/src/provider_endpoints.rs index 6ac4776..993923f 100644 --- a/url_finder/src/provider_endpoints.rs +++ b/url_finder/src/provider_endpoints.rs @@ -65,7 +65,7 @@ pub async fn valid_curio_provider( break; } Err(e) => { - info!("Attempt {attempt}/3 failed: {e} for address: {address}"); + debug!("Attempt {attempt}/3 failed: {e} for address: {address}"); sleep(Duration::from_secs(1)).await; } } @@ -123,12 +123,24 @@ pub async fn get_provider_endpoints( } // parse addresses to http endpoints - let endpoints = multiaddr_parser::parse(addrs); + let mut endpoints = multiaddr_parser::parse(addrs); if endpoints.is_empty() { debug!("Missing http addr from cid contact, No endpoints found"); return Ok((ResultCode::MissingHttpAddrFromCidContact, None)); } + // Deduplicate endpoints + let original_count = endpoints.len(); + endpoints.sort(); + endpoints.dedup(); + if endpoints.len() < original_count { + debug!( + "Deduplicated endpoints: {} -> {} unique", + original_count, + endpoints.len() + ); + } + Ok((ResultCode::Success, Some(endpoints))) } diff --git a/url_finder/src/repository/bms_result_repo.rs b/url_finder/src/repository/bms_result_repo.rs new file mode 100644 index 0000000..3ba8cdf --- /dev/null +++ b/url_finder/src/repository/bms_result_repo.rs @@ -0,0 +1,372 @@ +use chrono::{DateTime, Utc}; +use color_eyre::Result; +use sqlx::PgPool; +use sqlx::types::BigDecimal; +use std::str::FromStr; +use uuid::Uuid; + +use crate::types::ProviderId; + +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct BmsBandwidthResult { + pub id: Uuid, + pub provider_id: String, + pub bms_job_id: Uuid, + pub url_tested: String, + pub routing_key: String, + pub worker_count: i32, + pub status: String, + pub ping_avg_ms: Option, + pub head_avg_ms: Option, + pub ttfb_ms: Option, + pub download_speed_mbps: Option, + pub created_at: DateTime, + pub completed_at: Option>, +} + +#[derive(Debug, Clone)] +pub struct NewBmsBandwidthResult { + pub provider_id: ProviderId, + pub bms_job_id: Uuid, + pub url_tested: String, + pub routing_key: String, + pub worker_count: i32, + pub status: String, + pub ping_avg_ms: Option, + pub head_avg_ms: Option, + pub ttfb_ms: Option, + pub download_speed_mbps: Option, + pub completed_at: Option>, +} + +fn f64_to_bigdecimal(val: Option) -> Option { + val.and_then(|v| BigDecimal::from_str(&v.to_string()).ok()) +} + +#[derive(Clone)] +pub struct BmsBandwidthResultRepository { + pool: PgPool, +} + +impl BmsBandwidthResultRepository { + pub fn new(pool: PgPool) -> Self { + Self { pool } + } + + pub async fn insert(&self, result: &NewBmsBandwidthResult) -> Result { + let ping_avg_ms = f64_to_bigdecimal(result.ping_avg_ms); + let head_avg_ms = f64_to_bigdecimal(result.head_avg_ms); + let ttfb_ms = f64_to_bigdecimal(result.ttfb_ms); + let download_speed_mbps = f64_to_bigdecimal(result.download_speed_mbps); + + Ok(sqlx::query_as!( + BmsBandwidthResult, + r#"INSERT INTO + bms_bandwidth_results ( + provider_id, + bms_job_id, + url_tested, + routing_key, + worker_count, + status, + ping_avg_ms, + head_avg_ms, + ttfb_ms, + download_speed_mbps, + completed_at + ) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) + RETURNING + id, + provider_id, + bms_job_id, + url_tested, + routing_key, + worker_count, + status, + ping_avg_ms, + head_avg_ms, + ttfb_ms, + download_speed_mbps, + created_at, + completed_at + "#, + result.provider_id.as_str(), + result.bms_job_id, + result.url_tested, + result.routing_key, + result.worker_count, + result.status, + ping_avg_ms, + head_avg_ms, + ttfb_ms, + download_speed_mbps, + result.completed_at + ) + .fetch_one(&self.pool) + .await?) + } + + pub async fn insert_pending( + &self, + provider_id: &ProviderId, + job_id: Uuid, + url: &str, + routing_key: &str, + worker_count: i32, + ) -> Result { + Ok(sqlx::query_as!( + BmsBandwidthResult, + r#"INSERT INTO + bms_bandwidth_results ( + provider_id, + bms_job_id, + url_tested, + routing_key, + worker_count, + status + ) + VALUES ($1, $2, $3, $4, $5, 'Pending') + RETURNING + id, + provider_id, + bms_job_id, + url_tested, + routing_key, + worker_count, + status, + ping_avg_ms, + head_avg_ms, + ttfb_ms, + download_speed_mbps, + created_at, + completed_at + "#, + provider_id.as_str(), + job_id, + url, + routing_key, + worker_count + ) + .fetch_one(&self.pool) + .await?) + } + + pub async fn get_pending(&self) -> Result> { + Ok(sqlx::query_as!( + BmsBandwidthResult, + r#"SELECT + id, + provider_id, + bms_job_id, + url_tested, + routing_key, + worker_count, + status, + ping_avg_ms, + head_avg_ms, + ttfb_ms, + download_speed_mbps, + created_at, + completed_at + FROM + bms_bandwidth_results + WHERE + status = 'Pending' + ORDER BY + created_at ASC, + id ASC + "# + ) + .fetch_all(&self.pool) + .await?) + } + + pub async fn update_completed( + &self, + job_id: Uuid, + status: &str, + ping_avg_ms: Option, + head_avg_ms: Option, + ttfb_ms: Option, + download_speed_mbps: Option, + ) -> Result<()> { + let ping = f64_to_bigdecimal(ping_avg_ms); + let head = f64_to_bigdecimal(head_avg_ms); + let ttfb = f64_to_bigdecimal(ttfb_ms); + let speed = f64_to_bigdecimal(download_speed_mbps); + + let result = sqlx::query!( + r#"UPDATE + bms_bandwidth_results + SET + status = $2, + ping_avg_ms = $3, + head_avg_ms = $4, + ttfb_ms = $5, + download_speed_mbps = $6, + completed_at = NOW() + WHERE + bms_job_id = $1 + "#, + job_id, + status, + ping, + head, + ttfb, + speed + ) + .execute(&self.pool) + .await?; + + if result.rows_affected() == 0 { + return Err(color_eyre::eyre::eyre!("BMS job not found: {job_id}")); + } + + Ok(()) + } + + pub async fn get_latest_for_provider( + &self, + provider_id: &ProviderId, + ) -> Result> { + Ok(sqlx::query_as!( + BmsBandwidthResult, + r#"SELECT + id, + provider_id, + bms_job_id, + url_tested, + routing_key, + worker_count, + status, + ping_avg_ms, + head_avg_ms, + ttfb_ms, + download_speed_mbps, + created_at, + completed_at + FROM + bms_bandwidth_results + WHERE + provider_id = $1 + ORDER BY + created_at DESC + LIMIT 1 + "#, + provider_id.as_str() + ) + .fetch_optional(&self.pool) + .await?) + } + + pub async fn get_latest_completed_for_provider( + &self, + provider_id: &ProviderId, + ) -> Result> { + Ok(sqlx::query_as!( + BmsBandwidthResult, + r#"SELECT + id, + provider_id, + bms_job_id, + url_tested, + routing_key, + worker_count, + status, + ping_avg_ms, + head_avg_ms, + ttfb_ms, + download_speed_mbps, + created_at, + completed_at + FROM + bms_bandwidth_results + WHERE + provider_id = $1 + AND status != 'Pending' + ORDER BY + completed_at DESC NULLS LAST + LIMIT 1 + "#, + provider_id.as_str() + ) + .fetch_optional(&self.pool) + .await?) + } + + pub async fn get_latest_completed_for_providers( + &self, + provider_ids: &[String], + ) -> Result> { + if provider_ids.is_empty() { + return Ok(vec![]); + } + + Ok(sqlx::query_as!( + BmsBandwidthResult, + r#"SELECT DISTINCT ON (provider_id) + id, + provider_id, + bms_job_id, + url_tested, + routing_key, + worker_count, + status, + ping_avg_ms, + head_avg_ms, + ttfb_ms, + download_speed_mbps, + created_at, + completed_at + FROM + bms_bandwidth_results + WHERE + provider_id = ANY($1) + AND status != 'Pending' + ORDER BY + provider_id, + completed_at DESC NULLS LAST + "#, + provider_ids + ) + .fetch_all(&self.pool) + .await?) + } + + pub async fn get_history_for_provider( + &self, + provider_id: &ProviderId, + limit: i64, + ) -> Result> { + Ok(sqlx::query_as!( + BmsBandwidthResult, + r#"SELECT + id, + provider_id, + bms_job_id, + url_tested, + routing_key, + worker_count, + status, + ping_avg_ms, + head_avg_ms, + ttfb_ms, + download_speed_mbps, + created_at, + completed_at + FROM + bms_bandwidth_results + WHERE + provider_id = $1 + ORDER BY + created_at DESC + LIMIT $2 + "#, + provider_id.as_str(), + limit + ) + .fetch_all(&self.pool) + .await?) + } +} diff --git a/url_finder/src/repository/deal_repo.rs b/url_finder/src/repository/deal_repo.rs index b288cf0..7a6c674 100644 --- a/url_finder/src/repository/deal_repo.rs +++ b/url_finder/src/repository/deal_repo.rs @@ -1,6 +1,7 @@ use color_eyre::Result; use serde::{Deserialize, Serialize}; use sqlx::PgPool; +use sqlx::types::BigDecimal; use crate::types::{ClientId, ProviderId}; @@ -17,6 +18,8 @@ pub struct UnifiedVerifiedDeal { pub client_id: Option, pub provider_id: Option, pub piece_cid: Option, + #[serde(skip)] + pub piece_size: Option, } #[derive(Debug, Serialize, Deserialize)] @@ -44,7 +47,8 @@ impl DealRepository { "claimId" AS claim_id, "clientId" AS client_id, "providerId" AS provider_id, - "pieceCid" AS piece_cid + "pieceCid" AS piece_cid, + "pieceSize" AS piece_size FROM unified_verified_deal WHERE "providerId" = $1 @@ -78,9 +82,10 @@ impl DealRepository { "claimId" AS claim_id, "clientId" AS client_id, "providerId" AS provider_id, - "pieceCid" AS piece_cid + "pieceCid" AS piece_cid, + "pieceSize" AS piece_size FROM unified_verified_deal - WHERE + WHERE "providerId" = $1 AND "clientId" = $2 ORDER BY random() @@ -114,7 +119,8 @@ impl DealRepository { "claimId" AS claim_id, "clientId" AS client_id, "providerId" AS provider_id, - "pieceCid" AS piece_cid + "pieceCid" AS piece_cid, + "pieceSize" AS piece_size FROM unified_verified_deal WHERE "providerId" = $1 @@ -149,7 +155,8 @@ impl DealRepository { "claimId" AS claim_id, "clientId" AS client_id, "providerId" AS provider_id, - "pieceCid" AS piece_cid + "pieceCid" AS piece_cid, + "pieceSize" AS piece_size FROM unified_verified_deal WHERE "providerId" = $1 diff --git a/url_finder/src/repository/mod.rs b/url_finder/src/repository/mod.rs index 18a9a7f..1572167 100644 --- a/url_finder/src/repository/mod.rs +++ b/url_finder/src/repository/mod.rs @@ -1,7 +1,9 @@ +mod bms_result_repo; mod deal_repo; mod storage_provider_repo; mod url_result_repo; +pub use bms_result_repo::*; pub use deal_repo::*; pub use storage_provider_repo::*; pub use url_result_repo::*; diff --git a/url_finder/src/repository/storage_provider_repo.rs b/url_finder/src/repository/storage_provider_repo.rs index 96df6d6..f4935fe 100644 --- a/url_finder/src/repository/storage_provider_repo.rs +++ b/url_finder/src/repository/storage_provider_repo.rs @@ -19,6 +19,9 @@ pub struct StorageProvider { pub bms_test_status: Option, pub bms_routing_key: Option, pub last_bms_region_discovery_at: Option>, + pub is_consistent: bool, + pub is_reliable: bool, + pub url_metadata: Option, pub created_at: DateTime, pub updated_at: DateTime, } @@ -75,11 +78,14 @@ impl StorageProviderRepository { bms_test_status, bms_routing_key, last_bms_region_discovery_at, + is_consistent, + is_reliable, + url_metadata, created_at, updated_at FROM storage_providers - WHERE + WHERE provider_id = $1 "#, provider_id as &ProviderId @@ -102,6 +108,9 @@ impl StorageProviderRepository { bms_test_status, bms_routing_key, last_bms_region_discovery_at, + is_consistent, + is_reliable, + url_metadata, created_at, updated_at FROM @@ -147,6 +156,9 @@ impl StorageProviderRepository { &self, provider_id: &ProviderId, last_working_url: Option, + is_consistent: bool, + is_reliable: bool, + url_metadata: Option, ) -> Result<()> { sqlx::query!( r#"UPDATE @@ -156,15 +168,187 @@ impl StorageProviderRepository { url_discovery_status = NULL, url_discovery_pending_since = NULL, last_working_url = $2, + is_consistent = $3, + is_reliable = $4, + url_metadata = $5, + updated_at = NOW() + WHERE + provider_id = $1 + "#, + provider_id as &ProviderId, + last_working_url, + is_consistent, + is_reliable, + url_metadata + ) + .execute(&self.pool) + .await?; + Ok(()) + } + + pub async fn get_due_for_bms_test(&self, limit: i64) -> Result> { + Ok(sqlx::query_as!( + StorageProvider, + r#"SELECT + id, + provider_id AS "provider_id: ProviderId", + next_url_discovery_at, + url_discovery_status, + url_discovery_pending_since, + last_working_url, + next_bms_test_at, + bms_test_status, + bms_routing_key, + last_bms_region_discovery_at, + is_consistent, + is_reliable, + url_metadata, + created_at, + updated_at + FROM + storage_providers + WHERE + last_working_url IS NOT NULL + AND is_consistent = true + AND next_bms_test_at <= NOW() + ORDER BY + next_bms_test_at ASC + LIMIT $1 + "#, + limit + ) + .fetch_all(&self.pool) + .await?) + } + + pub async fn schedule_next_bms_test( + &self, + provider_id: &ProviderId, + interval_days: i64, + ) -> Result<()> { + sqlx::query!( + r#"UPDATE + storage_providers + SET + next_bms_test_at = NOW() + ($2 || ' days')::INTERVAL, updated_at = NOW() WHERE provider_id = $1 "#, provider_id as &ProviderId, - last_working_url + interval_days.to_string() ) .execute(&self.pool) .await?; Ok(()) } + + pub async fn reset_url_discovery_schedule( + &self, + provider_id: &ProviderId, + ) -> Result> { + Ok(sqlx::query_as!( + StorageProvider, + r#"UPDATE + storage_providers + SET + next_url_discovery_at = NOW(), + updated_at = NOW() + WHERE + provider_id = $1 + RETURNING + id, + provider_id AS "provider_id: ProviderId", + next_url_discovery_at, + url_discovery_status, + url_discovery_pending_since, + last_working_url, + next_bms_test_at, + bms_test_status, + bms_routing_key, + last_bms_region_discovery_at, + is_consistent, + is_reliable, + url_metadata, + created_at, + updated_at + "#, + provider_id as &ProviderId + ) + .fetch_optional(&self.pool) + .await?) + } + + pub async fn reset_bms_test_schedule( + &self, + provider_id: &ProviderId, + ) -> Result> { + Ok(sqlx::query_as!( + StorageProvider, + r#"UPDATE + storage_providers + SET + next_bms_test_at = NOW(), + updated_at = NOW() + WHERE + provider_id = $1 + RETURNING + id, + provider_id AS "provider_id: ProviderId", + next_url_discovery_at, + url_discovery_status, + url_discovery_pending_since, + last_working_url, + next_bms_test_at, + bms_test_status, + bms_routing_key, + last_bms_region_discovery_at, + is_consistent, + is_reliable, + url_metadata, + created_at, + updated_at + "#, + provider_id as &ProviderId + ) + .fetch_optional(&self.pool) + .await?) + } + + pub async fn reset_all_schedules( + &self, + provider_id: &ProviderId, + ) -> Result> { + Ok(sqlx::query_as!( + StorageProvider, + r#"UPDATE + storage_providers + SET + next_url_discovery_at = NOW(), + next_bms_test_at = NOW(), + updated_at = NOW() + WHERE + provider_id = $1 + RETURNING + id, + provider_id AS "provider_id: ProviderId", + next_url_discovery_at, + url_discovery_status, + url_discovery_pending_since, + last_working_url, + next_bms_test_at, + bms_test_status, + bms_routing_key, + last_bms_region_discovery_at, + is_consistent, + is_reliable, + url_metadata, + created_at, + updated_at + "#, + provider_id as &ProviderId + ) + .fetch_optional(&self.pool) + .await?) + } } diff --git a/url_finder/src/repository/url_result_repo.rs b/url_finder/src/repository/url_result_repo.rs index 16f50b8..a00a92d 100644 --- a/url_finder/src/repository/url_result_repo.rs +++ b/url_finder/src/repository/url_result_repo.rs @@ -1,4 +1,4 @@ -use chrono::{DateTime, Utc}; +use chrono::{DateTime, NaiveDate, Utc}; use color_eyre::Result; use serde::{Deserialize, Serialize}; use sqlx::PgPool; @@ -8,6 +8,15 @@ use uuid::Uuid; use crate::services::url_discovery_service::UrlDiscoveryResult; use crate::types::{ClientId, DiscoveryType, ErrorCode, ProviderId, ResultCode}; +/// Filters for provider queries +#[derive(Debug, Clone, Default)] +pub struct ProviderFilters { + /// Filter by last_working_url IS [NOT] NULL in storage_providers + pub has_working_url: Option, + /// Filter by is_consistent in storage_providers + pub is_consistent: Option, +} + #[derive(Debug, Clone, Serialize, Deserialize, ToSchema, sqlx::FromRow)] pub struct UrlResult { pub id: Uuid, @@ -19,6 +28,10 @@ pub struct UrlResult { pub result_code: ResultCode, pub error_code: Option, pub tested_at: DateTime, + pub is_consistent: Option, + pub is_reliable: Option, + pub url_metadata: Option, + pub sector_utilization_percent: Option, } impl From for UrlResult { @@ -32,11 +45,29 @@ impl From for UrlResult { retrievability_percent: result.retrievability_percent, result_code: result.result_code, error_code: result.error_code, - tested_at: Utc::now(), + tested_at: result.tested_at, + is_consistent: Some(result.is_consistent), + is_reliable: Some(result.is_reliable), + url_metadata: result.url_metadata, + sector_utilization_percent: result.sector_utilization_percent, } } } +#[derive(Debug, sqlx::FromRow)] +pub struct HistoryRow { + pub date: NaiveDate, + pub retrievability_percent: f64, + pub sector_utilization_percent: Option, + pub is_consistent: Option, + pub is_reliable: Option, + pub working_url: Option, + pub result_code: ResultCode, + pub error_code: Option, + pub tested_at: DateTime, + pub url_metadata: Option, +} + #[derive(Clone)] pub struct UrlResultRepository { pool: PgPool, @@ -62,7 +93,11 @@ impl UrlResultRepository { retrievability_percent::float8 AS "retrievability_percent!", result_code AS "result_code: ResultCode", error_code AS "error_code: ErrorCode", - tested_at + tested_at, + is_consistent, + is_reliable, + url_metadata, + sector_utilization_percent::float8 AS "sector_utilization_percent" FROM url_results WHERE @@ -96,7 +131,11 @@ impl UrlResultRepository { retrievability_percent::float8 AS "retrievability_percent!", result_code AS "result_code: ResultCode", error_code AS "error_code: ErrorCode", - tested_at + tested_at, + is_consistent, + is_reliable, + url_metadata, + sector_utilization_percent::float8 AS "sector_utilization_percent" FROM url_results WHERE @@ -131,7 +170,11 @@ impl UrlResultRepository { retrievability_percent::float8 AS "retrievability_percent!", result_code AS "result_code: ResultCode", error_code AS "error_code: ErrorCode", - tested_at + tested_at, + is_consistent, + is_reliable, + url_metadata, + sector_utilization_percent::float8 AS "sector_utilization_percent" FROM url_results WHERE @@ -149,6 +192,116 @@ impl UrlResultRepository { Ok(results) } + pub async fn get_all_providers_paginated( + &self, + filters: &ProviderFilters, + limit: i64, + offset: i64, + ) -> Result> { + let results = sqlx::query_as!( + UrlResult, + r#"SELECT DISTINCT ON (ur.provider_id) + ur.id, + ur.provider_id AS "provider_id: ProviderId", + ur.client_id AS "client_id: ClientId", + ur.result_type AS "result_type: DiscoveryType", + ur.working_url, + ur.retrievability_percent::float8 AS "retrievability_percent!", + ur.result_code AS "result_code: ResultCode", + ur.error_code AS "error_code: ErrorCode", + ur.tested_at, + ur.is_consistent, + ur.is_reliable, + ur.url_metadata, + ur.sector_utilization_percent::float8 AS "sector_utilization_percent" + FROM + url_results ur + JOIN + storage_providers sp ON ur.provider_id = sp.provider_id + WHERE + ur.result_type = 'Provider' + AND ($3::bool IS NULL OR (sp.last_working_url IS NOT NULL) = $3) + AND ($4::bool IS NULL OR sp.is_consistent = $4) + ORDER BY + ur.provider_id, + ur.tested_at DESC + LIMIT $1 + OFFSET $2 + "#, + limit, + offset, + filters.has_working_url, + filters.is_consistent + ) + .fetch_all(&self.pool) + .await?; + + Ok(results) + } + + pub async fn count_all_providers(&self, filters: &ProviderFilters) -> Result { + let result = sqlx::query_scalar!( + r#"SELECT + COUNT(DISTINCT ur.provider_id) AS "count!" + FROM + url_results ur + JOIN + storage_providers sp ON ur.provider_id = sp.provider_id + WHERE + ur.result_type = 'Provider' + AND ($1::bool IS NULL OR (sp.last_working_url IS NOT NULL) = $1) + AND ($2::bool IS NULL OR sp.is_consistent = $2) + "#, + filters.has_working_url, + filters.is_consistent + ) + .fetch_one(&self.pool) + .await?; + + Ok(result) + } + + pub async fn get_latest_for_providers( + &self, + provider_ids: &[String], + ) -> Result> { + if provider_ids.is_empty() { + return Ok(vec![]); + } + + let results = sqlx::query_as!( + UrlResult, + r#"SELECT DISTINCT ON (provider_id) + id, + provider_id AS "provider_id: ProviderId", + client_id AS "client_id: ClientId", + result_type AS "result_type: DiscoveryType", + working_url, + retrievability_percent::float8 AS "retrievability_percent!", + result_code AS "result_code: ResultCode", + error_code AS "error_code: ErrorCode", + tested_at, + is_consistent, + is_reliable, + url_metadata, + sector_utilization_percent::float8 AS "sector_utilization_percent" + FROM + url_results + WHERE + provider_id = ANY($1) + AND result_type = 'Provider' + ORDER BY + provider_id, + tested_at DESC + "#, + provider_ids + ) + .fetch_all(&self.pool) + .await?; + + Ok(results) + } + pub async fn insert_batch(&self, results: &[UrlResult]) -> Result { if results.is_empty() { return Ok(0); @@ -164,6 +317,10 @@ impl UrlResultRepository { let mut result_codes: Vec = Vec::with_capacity(len); let mut error_codes: Vec> = Vec::with_capacity(len); let mut tested_ats: Vec> = Vec::with_capacity(len); + let mut is_consistents: Vec> = Vec::with_capacity(len); + let mut is_reliables: Vec> = Vec::with_capacity(len); + let mut url_metadatas: Vec> = Vec::with_capacity(len); + let mut sector_utilization_percents: Vec> = Vec::with_capacity(len); for result in results { ids.push(result.id); @@ -175,13 +332,17 @@ impl UrlResultRepository { result_codes.push(result.result_code.clone()); error_codes.push(result.error_code.clone()); tested_ats.push(result.tested_at); + is_consistents.push(result.is_consistent); + is_reliables.push(result.is_reliable); + url_metadatas.push(result.url_metadata.clone()); + sector_utilization_percents.push(result.sector_utilization_percent); } let result = sqlx::query!( r#"INSERT INTO - url_results (id, provider_id, client_id, result_type, working_url, retrievability_percent, result_code, error_code, tested_at) + url_results (id, provider_id, client_id, result_type, working_url, retrievability_percent, result_code, error_code, tested_at, is_consistent, is_reliable, url_metadata, sector_utilization_percent) SELECT - a1, a2, a3, a4, a5, a6, a7, a8, a9 + a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13 FROM UNNEST( $1::uuid[], $2::text[], @@ -191,8 +352,12 @@ impl UrlResultRepository { $6::double precision[], $7::result_code[], $8::error_code[], - $9::timestamptz[] - ) AS t(a1, a2, a3, a4, a5, a6, a7, a8, a9) + $9::timestamptz[], + $10::bool[], + $11::bool[], + $12::jsonb[], + $13::double precision[] + ) AS t(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13) "#, &ids as &[Uuid], &provider_ids as &[String], @@ -202,11 +367,98 @@ impl UrlResultRepository { &retrievability_percents as &[f64], &result_codes as &[ResultCode], &error_codes as &[Option], - &tested_ats as &[DateTime] + &tested_ats as &[DateTime], + &is_consistents as &[Option], + &is_reliables as &[Option], + &url_metadatas as &[Option], + §or_utilization_percents as &[Option] ) .execute(&self.pool) .await?; Ok(result.rows_affected().try_into()?) } + + pub async fn get_history_for_provider( + &self, + provider_id: &ProviderId, + from: NaiveDate, + to: NaiveDate, + ) -> Result> { + let results = sqlx::query_as!( + HistoryRow, + r#"SELECT DISTINCT ON (DATE(tested_at)) + DATE(tested_at) AS "date!", + retrievability_percent::float8 AS "retrievability_percent!", + sector_utilization_percent::float8 AS "sector_utilization_percent", + is_consistent, + is_reliable, + working_url, + result_code AS "result_code: ResultCode", + error_code AS "error_code: ErrorCode", + tested_at, + url_metadata + FROM + url_results + WHERE + provider_id = $1 + AND result_type = 'Provider' + AND tested_at >= $2::date + AND tested_at < ($3::date + INTERVAL '1 day') + ORDER BY + DATE(tested_at), + tested_at DESC + "#, + provider_id.as_str(), + from, + to + ) + .fetch_all(&self.pool) + .await?; + + Ok(results) + } + + pub async fn get_history_for_provider_client( + &self, + provider_id: &ProviderId, + client_id: &ClientId, + from: NaiveDate, + to: NaiveDate, + ) -> Result> { + let results = sqlx::query_as!( + HistoryRow, + r#"SELECT DISTINCT ON (DATE(tested_at)) + DATE(tested_at) AS "date!", + retrievability_percent::float8 AS "retrievability_percent!", + sector_utilization_percent::float8 AS "sector_utilization_percent", + is_consistent, + is_reliable, + working_url, + result_code AS "result_code: ResultCode", + error_code AS "error_code: ErrorCode", + tested_at, + url_metadata + FROM + url_results + WHERE + provider_id = $1 + AND client_id = $2 + AND result_type = 'ProviderClient' + AND tested_at >= $3::date + AND tested_at < ($4::date + INTERVAL '1 day') + ORDER BY + DATE(tested_at), + tested_at DESC + "#, + provider_id.as_str(), + client_id.as_str(), + from, + to + ) + .fetch_all(&self.pool) + .await?; + + Ok(results) + } } diff --git a/url_finder/src/routes.rs b/url_finder/src/routes.rs index 866ed9f..c078578 100644 --- a/url_finder/src/routes.rs +++ b/url_finder/src/routes.rs @@ -1,6 +1,12 @@ use std::{sync::Arc, time::Duration}; -use axum::{Router, body::Body, http::Response, response::IntoResponse, routing::get}; +use axum::{ + Router, + body::Body, + http::Response, + response::IntoResponse, + routing::{get, post}, +}; use tower_governor::{ GovernorError, GovernorLayer, governor::GovernorConfigBuilder, key_extractor::SmartIpKeyExtractor, @@ -51,7 +57,7 @@ pub fn create_routes() -> Router> { let swagger_routes = SwaggerUi::new("/").url("/api-doc/openapi.json", ApiDoc::openapi()); - let api_routes = Router::new() + let legacy_api_routes = Router::new() .route("/url/find/{provider}", get(handle_find_url_sp)) .route( "/url/find/{provider}/{client}", @@ -71,6 +77,35 @@ pub fn create_routes() -> Router> { .error_handler(too_many_requests_error_handler), ); + let providers_api_routes = Router::new() + .route("/providers", get(providers::handle_list_providers)) + .route("/providers/bulk", post(providers::handle_bulk_providers)) + .route("/providers/{id}", get(providers::handle_get_provider)) + .route( + "/providers/{id}/reset", + post(providers::handle_reset_provider), + ) + .route( + "/providers/{id}/clients/{client_id}", + get(providers::handle_get_provider_client), + ) + .route( + "/providers/{id}/history/retrievability", + get(providers::handle_history_retrievability), + ) + .route( + "/providers/{id}/clients/{client_id}/history/retrievability", + get(providers::handle_history_retrievability_client), + ) + .route( + "/clients/{id}/providers", + get(providers::handle_get_client_providers), + ) + .layer( + GovernorLayer::new(governor_config.clone()) + .error_handler(too_many_requests_error_handler), + ); + let healthcheck_route = Router::new() .route("/healthcheck", get(handle_healthcheck)) .layer( @@ -80,6 +115,7 @@ pub fn create_routes() -> Router> { Router::new() .merge(swagger_routes) - .merge(api_routes) + .merge(legacy_api_routes) + .merge(providers_api_routes) .merge(healthcheck_route) } diff --git a/url_finder/src/services/consistency_analyzer.rs b/url_finder/src/services/consistency_analyzer.rs new file mode 100644 index 0000000..a59c4fb --- /dev/null +++ b/url_finder/src/services/consistency_analyzer.rs @@ -0,0 +1,187 @@ +use crate::config::RELIABILITY_TIMEOUT_THRESHOLD; +use crate::types::{InconsistencyType, ProviderAnalysis, UrlTestError, UrlTestResult}; + +pub fn analyze_results(results: &[UrlTestResult]) -> ProviderAnalysis { + if results.is_empty() { + return ProviderAnalysis::empty(); + } + + let total = results.len(); + let success_count = results.iter().filter(|r| r.success).count(); + let timeout_count = results + .iter() + .filter(|r| matches!(r.error, Some(UrlTestError::Timeout))) + .count(); + + // Count inconsistent results by type + let mut inconsistent_count = 0; + let mut warm_up = 0; + let mut flaky = 0; + let mut small_responses = 0; + let mut both_failed = 0; + let mut size_mismatch = 0; + + for r in results.iter().filter(|r| !r.consistent) { + inconsistent_count += 1; + match r.inconsistency_type { + Some(InconsistencyType::WarmUp) => warm_up += 1, + Some(InconsistencyType::Flaky) => flaky += 1, + Some(InconsistencyType::SmallResponses) => small_responses += 1, + Some(InconsistencyType::BothFailed) => both_failed += 1, + Some(InconsistencyType::SizeMismatch) => size_mismatch += 1, + None => {} // Shouldn't happen if !consistent, but handle gracefully + } + } + + // Total requests = 2 per URL (double-tap) + let total_requests = total * 2; + let timeout_rate = timeout_count as f64 / total_requests as f64; + + ProviderAnalysis { + retrievability_percent: (success_count as f64 / total as f64) * 100.0, + is_consistent: inconsistent_count == 0, + is_reliable: timeout_rate < RELIABILITY_TIMEOUT_THRESHOLD, + sample_count: total, + success_count, + timeout_count, + inconsistent_count, + inconsistent_warm_up: warm_up, + inconsistent_flaky: flaky, + inconsistent_small_responses: small_responses, + inconsistent_both_failed: both_failed, + inconsistent_size_mismatch: size_mismatch, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::InconsistencyType; + + fn make_result(success: bool, consistent: bool, error: Option) -> UrlTestResult { + UrlTestResult { + url: "http://test".to_string(), + success, + consistent, + inconsistency_type: if consistent { + None + } else { + Some(InconsistencyType::WarmUp) + }, + content_length: Some(16_000_000_000), + response_time_ms: 100, + error, + is_valid_car: false, + root_cid: None, + } + } + + fn make_inconsistent(inconsistency_type: InconsistencyType) -> UrlTestResult { + UrlTestResult { + url: "http://test".to_string(), + success: true, + consistent: false, + inconsistency_type: Some(inconsistency_type), + content_length: Some(16_000_000_000), + response_time_ms: 100, + error: None, + is_valid_car: false, + root_cid: None, + } + } + + #[test] + fn test_analyze_all_successful_consistent() { + let results = vec![ + make_result(true, true, None), + make_result(true, true, None), + make_result(true, true, None), + ]; + + let analysis = analyze_results(&results); + + assert_eq!(analysis.retrievability_percent, 100.0); + assert!(analysis.is_consistent); + assert!(analysis.is_reliable); + assert_eq!(analysis.sample_count, 3); + assert_eq!(analysis.success_count, 3); + } + + #[test] + fn test_analyze_one_inconsistent_fails_all() { + let results = vec![ + make_result(true, true, None), + make_result(true, false, None), // Inconsistent + make_result(true, true, None), + ]; + + let analysis = analyze_results(&results); + + assert!(!analysis.is_consistent); // One bad = all bad + } + + #[test] + fn test_analyze_high_timeout_rate() { + let results = vec![ + make_result(false, true, Some(UrlTestError::Timeout)), + make_result(false, true, Some(UrlTestError::Timeout)), + make_result(true, true, None), + ]; + + let analysis = analyze_results(&results); + + // 2 timeouts / 6 total requests = 33% > 30% threshold + assert!(!analysis.is_reliable); + } + + #[test] + fn test_analyze_empty_results() { + let results: Vec = vec![]; + let analysis = analyze_results(&results); + + assert_eq!(analysis.retrievability_percent, 0.0); + // Empty results should NOT claim consistency or reliability + // since no verification was performed + assert!(!analysis.is_consistent); + assert!(!analysis.is_reliable); + } + + #[test] + fn test_analyze_inconsistent_breakdown() { + let results = vec![ + make_result(true, true, None), // consistent + make_inconsistent(InconsistencyType::WarmUp), + make_inconsistent(InconsistencyType::WarmUp), + make_inconsistent(InconsistencyType::Flaky), + make_inconsistent(InconsistencyType::SmallResponses), + make_inconsistent(InconsistencyType::BothFailed), + make_inconsistent(InconsistencyType::SizeMismatch), + ]; + + let analysis = analyze_results(&results); + + assert_eq!(analysis.sample_count, 7); + assert_eq!(analysis.inconsistent_count, 6); + assert_eq!(analysis.inconsistent_warm_up, 2); + assert_eq!(analysis.inconsistent_flaky, 1); + assert_eq!(analysis.inconsistent_small_responses, 1); + assert_eq!(analysis.inconsistent_both_failed, 1); + assert_eq!(analysis.inconsistent_size_mismatch, 1); + assert!(!analysis.is_consistent); + } + + #[test] + fn test_analyze_all_consistent_has_zero_breakdown() { + let results = vec![make_result(true, true, None), make_result(true, true, None)]; + + let analysis = analyze_results(&results); + + assert_eq!(analysis.inconsistent_count, 0); + assert_eq!(analysis.inconsistent_warm_up, 0); + assert_eq!(analysis.inconsistent_flaky, 0); + assert_eq!(analysis.inconsistent_small_responses, 0); + assert_eq!(analysis.inconsistent_both_failed, 0); + assert_eq!(analysis.inconsistent_size_mismatch, 0); + assert!(analysis.is_consistent); + } +} diff --git a/url_finder/src/services/deal_service.rs b/url_finder/src/services/deal_service.rs index cdb76da..2d22b2f 100644 --- a/url_finder/src/services/deal_service.rs +++ b/url_finder/src/services/deal_service.rs @@ -1,16 +1,26 @@ use color_eyre::Result; +use sqlx::types::BigDecimal; use crate::{ repository::DealRepository, types::{ClientAddress, ClientId, ProviderAddress, ProviderId}, }; -/// get deals and extract piece_ids -pub async fn get_piece_ids_by_provider( +/// Context for testing a piece URL with deal metadata +#[derive(Debug, Clone)] +pub struct PieceTestContext { + pub piece_cid: String, + pub deal_id: i32, + pub piece_size: Option, + pub url: String, +} + +/// Get deals and extract piece contexts (piece_cid + deal_id + piece_size) +pub async fn get_piece_contexts_by_provider( deal_repo: &DealRepository, provider_id: &ProviderId, client_id: Option<&ClientId>, -) -> Result> { +) -> Result)>> { let limit = 100; let offset = 0; @@ -28,12 +38,32 @@ pub async fn get_piece_ids_by_provider( return Ok(vec![]); } - let piece_ids: Vec = deals + let contexts: Vec<(String, i32, Option)> = deals .iter() - .filter_map(|deal| deal.piece_cid.clone()) + .filter_map(|deal| { + deal.piece_cid.clone().map(|cid| { + let piece_size = deal.piece_size.as_ref().and_then(bigdecimal_to_i64); + (cid, deal.deal_id, piece_size) + }) + }) .collect(); - Ok(piece_ids) + Ok(contexts) +} + +fn bigdecimal_to_i64(val: &BigDecimal) -> Option { + use std::str::FromStr; + i64::from_str(&val.to_string()).ok() +} + +/// Backward-compatible: get deals and extract piece_ids only +pub async fn get_piece_ids_by_provider( + deal_repo: &DealRepository, + provider_id: &ProviderId, + client_id: Option<&ClientId>, +) -> Result> { + let contexts = get_piece_contexts_by_provider(deal_repo, provider_id, client_id).await?; + Ok(contexts.into_iter().map(|(cid, _, _)| cid).collect()) } pub async fn get_distinct_providers_by_client( @@ -106,7 +136,28 @@ pub async fn get_random_piece_ids_by_provider( Ok(piece_ids) } -/// construct every piece_cid and endoint combination +/// Build test contexts: (piece_cid, deal_id, piece_size, url) for each endpoint × piece combination +pub fn build_piece_test_contexts( + endpoints: Vec, + piece_contexts: Vec<(String, i32, Option)>, +) -> Vec { + endpoints + .iter() + .flat_map(|endpoint| { + let endpoint = endpoint.trim_end_matches('/'); + piece_contexts + .iter() + .map(move |(piece_cid, deal_id, piece_size)| PieceTestContext { + piece_cid: piece_cid.clone(), + deal_id: *deal_id, + piece_size: *piece_size, + url: format!("{endpoint}/piece/{piece_cid}"), + }) + }) + .collect() +} + +/// Backward-compatible: construct every piece_cid and endpoint combination pub async fn get_piece_url(endpoints: Vec, piece_ids: Vec) -> Vec { endpoints .iter() diff --git a/url_finder/src/services/mod.rs b/url_finder/src/services/mod.rs index fa52f9d..d24d7bb 100644 --- a/url_finder/src/services/mod.rs +++ b/url_finder/src/services/mod.rs @@ -1,2 +1,4 @@ +pub mod consistency_analyzer; pub mod deal_service; +pub mod provider_service; pub mod url_discovery_service; diff --git a/url_finder/src/services/provider_service.rs b/url_finder/src/services/provider_service.rs new file mode 100644 index 0000000..c5e7036 --- /dev/null +++ b/url_finder/src/services/provider_service.rs @@ -0,0 +1,288 @@ +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +use chrono::{DateTime, Utc}; +use color_eyre::Result; +use sqlx::types::BigDecimal; + +use crate::repository::{ + BmsBandwidthResult, BmsBandwidthResultRepository, ProviderFilters, StorageProviderRepository, + UrlResult, UrlResultRepository, +}; +use crate::types::{ClientId, ErrorCode, ProviderId, ResultCode}; + +// --- Domain Types --- + +#[derive(Debug, Clone)] +pub struct ProviderData { + pub provider_id: ProviderId, + pub client_id: Option, + pub working_url: Option, + pub retrievability_percent: f64, + pub tested_at: DateTime, + pub result_code: ResultCode, + pub error_code: Option, + pub is_consistent: Option, + pub is_reliable: Option, + pub url_metadata: Option, + pub sector_utilization_percent: Option, + pub performance: PerformanceData, +} + +#[derive(Debug, Clone, Default)] +pub struct PerformanceData { + pub bandwidth: Option, + pub geolocation: Option, +} + +#[derive(Debug, Clone, Default)] +pub struct SchedulingData { + pub url_discovery_next_at: Option>, + pub url_discovery_status: Option, + pub url_discovery_pending_since: Option>, + pub bms_test_next_at: Option>, + pub bms_test_status: Option, +} + +#[derive(Debug, Clone)] +pub struct BandwidthResult { + pub status: String, + pub tested_at: Option>, + pub ping_avg_ms: Option, + pub head_avg_ms: Option, + pub ttfb_ms: Option, + pub download_speed_mbps: Option, + pub worker_count: Option, + pub routing_key: Option, + pub url_tested: Option, +} + +impl From for BandwidthResult { + fn from(b: BmsBandwidthResult) -> Self { + Self { + status: b.status, + tested_at: b.completed_at, + ping_avg_ms: b.ping_avg_ms.as_ref().and_then(bigdecimal_to_f64), + head_avg_ms: b.head_avg_ms.as_ref().and_then(bigdecimal_to_f64), + ttfb_ms: b.ttfb_ms.as_ref().and_then(bigdecimal_to_f64), + download_speed_mbps: b.download_speed_mbps.as_ref().and_then(bigdecimal_to_f64), + worker_count: Some(b.worker_count), + routing_key: Some(b.routing_key), + url_tested: Some(b.url_tested), + } + } +} + +#[derive(Debug, Clone)] +pub struct GeolocationResult { + pub status: String, + pub tested_at: Option>, + pub routing_key: Option, + pub region: Option, + pub country: Option, + pub city: Option, +} + +pub struct PaginatedProviders { + pub providers: Vec, + pub total: i64, + pub limit: i64, + pub offset: i64, +} + +pub struct BulkProviderResult { + pub providers: Vec, + pub not_found: Vec, +} + +// --- Service --- + +pub struct ProviderService { + url_repo: Arc, + bms_repo: Arc, + sp_repo: Arc, +} + +impl ProviderService { + pub fn new( + url_repo: Arc, + bms_repo: Arc, + sp_repo: Arc, + ) -> Self { + Self { + url_repo, + bms_repo, + sp_repo, + } + } + + pub async fn get_provider(&self, id: &ProviderId) -> Result> { + let url_result = self.url_repo.get_latest_for_provider(id).await?; + + let Some(url_result) = url_result else { + return Ok(None); + }; + + let bms_result = self.bms_repo.get_latest_completed_for_provider(id).await?; + + Ok(Some(self.enrich(url_result, bms_result))) + } + + pub async fn get_provider_client( + &self, + provider: &ProviderId, + client: &ClientId, + ) -> Result> { + let url_result = self + .url_repo + .get_latest_for_provider_client(provider, client) + .await?; + + let Some(url_result) = url_result else { + return Ok(None); + }; + + let bms_result = self + .bms_repo + .get_latest_completed_for_provider(provider) + .await?; + + Ok(Some(self.enrich(url_result, bms_result))) + } + + pub async fn get_providers_for_client(&self, client: &ClientId) -> Result> { + let url_results = self + .url_repo + .get_latest_for_client_all_providers(client) + .await?; + self.enrich_batch(url_results).await + } + + pub async fn list_providers( + &self, + filters: &ProviderFilters, + limit: i64, + offset: i64, + ) -> Result { + let total = self.url_repo.count_all_providers(filters).await?; + let url_results = self + .url_repo + .get_all_providers_paginated(filters, limit, offset) + .await?; + let providers = self.enrich_batch(url_results).await?; + + Ok(PaginatedProviders { + providers, + total, + limit, + offset, + }) + } + + pub async fn bulk_get_providers(&self, ids: &[ProviderId]) -> Result { + let id_strings: Vec = ids.iter().map(|id| id.as_str().to_string()).collect(); + + let url_results = self.url_repo.get_latest_for_providers(&id_strings).await?; + + let found_ids: HashSet = url_results + .iter() + .map(|r| r.provider_id.as_str().to_string()) + .collect(); + + let not_found: Vec = ids + .iter() + .filter(|id| !found_ids.contains(id.as_str())) + .cloned() + .collect(); + + let providers = self.enrich_batch(url_results).await?; + + Ok(BulkProviderResult { + providers, + not_found, + }) + } + + pub async fn get_scheduling_data( + &self, + provider_id: &ProviderId, + ) -> Result> { + let sp = self.sp_repo.get_by_provider_id(provider_id).await?; + Ok(sp.map(|sp| SchedulingData { + url_discovery_next_at: Some(sp.next_url_discovery_at), + url_discovery_status: sp.url_discovery_status, + url_discovery_pending_since: sp.url_discovery_pending_since, + bms_test_next_at: Some(sp.next_bms_test_at), + bms_test_status: sp.bms_test_status, + })) + } + + // --- Private helpers --- + + fn enrich( + &self, + url_result: UrlResult, + bms_result: Option, + ) -> ProviderData { + ProviderData { + provider_id: url_result.provider_id, + client_id: url_result.client_id, + working_url: url_result.working_url, + retrievability_percent: url_result.retrievability_percent, + tested_at: url_result.tested_at, + result_code: url_result.result_code, + error_code: url_result.error_code, + is_consistent: url_result.is_consistent, + is_reliable: url_result.is_reliable, + url_metadata: url_result.url_metadata, + sector_utilization_percent: url_result.sector_utilization_percent, + performance: Self::build_performance(bms_result), + } + } + + async fn enrich_batch(&self, url_results: Vec) -> Result> { + if url_results.is_empty() { + return Ok(vec![]); + } + + let provider_ids: Vec = url_results + .iter() + .map(|r| r.provider_id.as_str().to_string()) + .collect(); + + let bms_results = self + .bms_repo + .get_latest_completed_for_providers(&provider_ids) + .await?; + + let bms_map: HashMap = bms_results + .into_iter() + .map(|r| (r.provider_id.clone(), r)) + .collect(); + + let providers = url_results + .into_iter() + .map(|url_result| { + let bms = bms_map.get(url_result.provider_id.as_str()).cloned(); + self.enrich(url_result, bms) + }) + .collect(); + + Ok(providers) + } + + fn build_performance(bms: Option) -> PerformanceData { + match bms { + Some(b) => PerformanceData { + bandwidth: Some(b.into()), + geolocation: None, + }, + None => PerformanceData::default(), + } + } +} + +fn bigdecimal_to_f64(val: &BigDecimal) -> Option { + use std::str::FromStr; + f64::from_str(&val.to_string()).ok() +} diff --git a/url_finder/src/services/url_discovery_service.rs b/url_finder/src/services/url_discovery_service.rs index ccc6783..5c7cc63 100644 --- a/url_finder/src/services/url_discovery_service.rs +++ b/url_finder/src/services/url_discovery_service.rs @@ -1,14 +1,17 @@ +use chrono::{DateTime, Utc}; + use crate::{ - config::Config, + config::{Config, MIN_VALID_CONTENT_LENGTH}, + http_client::build_client, provider_endpoints, repository::DealRepository, - services::deal_service, + services::{consistency_analyzer::analyze_results, deal_service}, types::{ ClientAddress, ClientId, DiscoveryType, ErrorCode, ProviderAddress, ProviderId, ResultCode, }, - url_tester, + url_tester::test_url_double_tap, }; -use tracing::{debug, error}; +use tracing::{debug, error, trace}; use uuid::Uuid; #[derive(Debug, Clone)] @@ -21,6 +24,11 @@ pub struct UrlDiscoveryResult { pub retrievability_percent: f64, pub result_code: ResultCode, pub error_code: Option, + pub tested_at: DateTime, + pub is_consistent: bool, + pub is_reliable: bool, + pub url_metadata: Option, + pub sector_utilization_percent: Option, } impl UrlDiscoveryResult { @@ -34,6 +42,11 @@ impl UrlDiscoveryResult { retrievability_percent: 0.0, result_code: ResultCode::Error, error_code: None, + tested_at: Utc::now(), + is_consistent: false, // No verification performed yet + is_reliable: false, // No verification performed yet + url_metadata: None, + sector_utilization_percent: None, } } @@ -47,6 +60,11 @@ impl UrlDiscoveryResult { retrievability_percent: 0.0, result_code: ResultCode::Error, error_code: None, + tested_at: Utc::now(), + is_consistent: false, // No verification performed yet + is_reliable: false, // No verification performed yet + url_metadata: None, + sector_utilization_percent: None, } } } @@ -60,10 +78,9 @@ pub async fn discover_url( let provider_id: ProviderId = provider_address.clone().into(); let client_id: Option = client_address.clone().map(|c| c.into()); - tracing::info!( - "discover_url called for provider={}, client={:?}", - provider_address, - client_address + trace!( + "discover_url: provider={}, client={:?}", + provider_address, client_address ); let mut result = match &client_id { @@ -71,6 +88,7 @@ pub async fn discover_url( None => UrlDiscoveryResult::new_provider_only(provider_id.clone()), }; + // Get endpoints let (result_code, endpoints) = match provider_endpoints::get_provider_endpoints(config, provider_address).await { Ok((code, eps)) => (code, eps), @@ -90,43 +108,147 @@ pub async fn discover_url( return result; }; - let piece_ids = - match deal_service::get_piece_ids_by_provider(deal_repo, &provider_id, client_id.as_ref()) - .await - { - Ok(ids) => ids, - Err(e) => { - error!( - "Failed to get piece ids for {} {:?}: {:?}", - provider_id, client_id, e - ); - result.result_code = ResultCode::Error; - result.error_code = Some(ErrorCode::FailedToGetDeals); - return result; - } - }; + // Get piece contexts (piece_cid + deal_id) + let piece_contexts = match deal_service::get_piece_contexts_by_provider( + deal_repo, + &provider_id, + client_id.as_ref(), + ) + .await + { + Ok(ctx) => ctx, + Err(e) => { + error!( + "Failed to get piece contexts for {} {:?}: {:?}", + provider_id, client_id, e + ); + result.result_code = ResultCode::Error; + result.error_code = Some(ErrorCode::FailedToGetDeals); + return result; + } + }; - if piece_ids.is_empty() { + if piece_contexts.is_empty() { result.result_code = ResultCode::NoDealsFound; return result; } - let urls = deal_service::get_piece_url(endpoints.clone(), piece_ids).await; + // Build test contexts with deal_id preserved + let test_contexts = deal_service::build_piece_test_contexts(endpoints.clone(), piece_contexts); debug!( - "Built {} URLs to test from endpoints: {:?}", - urls.len(), + "Built {} test contexts from endpoints: {:?}", + test_contexts.len(), endpoints ); - debug!("Testing URLs: {:?}", urls); - let (working_url, retrievability_percent) = - url_tester::check_retrievability_with_get(config, urls, true).await; - debug!( - "URL test result - working_url: {:?}, retrievability: {:?}", - working_url, retrievability_percent - ); + + // Build HTTP client + let client = match build_client(config) { + Ok(c) => c, + Err(e) => { + error!("Failed to build HTTP client: {:?}", e); + result.result_code = ResultCode::Error; + return result; + } + }; + + // Double-tap test all URLs, collecting results with context + let mut test_results = Vec::with_capacity(test_contexts.len()); + for ctx in &test_contexts { + let url_result = test_url_double_tap(&client, &ctx.url).await; + test_results.push((ctx.clone(), url_result)); + } + debug!("Double-tap tested {} URLs", test_results.len()); + + // Extract just UrlTestResults for analysis + let url_results: Vec<_> = test_results.iter().map(|(_, r)| r.clone()).collect(); + let analysis = analyze_results(&url_results); + + // Select working URL (largest valid response) + let working_url_result = test_results + .iter() + .filter(|(_, r)| r.success) + .filter(|(_, r)| r.content_length.unwrap_or(0) >= MIN_VALID_CONTENT_LENGTH) + .max_by_key(|(_, r)| r.content_length); + + let working_url = working_url_result.map(|(_, r)| r.url.clone()); + + // CAR diagnostics (kept for diagnostic value - detects if response is actually a CAR file) + let valid_car_count = test_results.iter().filter(|(_, r)| r.is_valid_car).count(); + let small_car_count = test_results + .iter() + .filter(|(_, r)| r.is_valid_car && r.content_length.unwrap_or(0) < MIN_VALID_CONTENT_LENGTH) + .count(); + + // Extract CAR info for working URL (diagnostic only, no verification) + let working_url_car_info = working_url_result.map(|(_, r)| { + serde_json::json!({ + "is_valid_car": r.is_valid_car, + "root_cid": r.root_cid, + "content_length": r.content_length, + }) + }); + + // Calculate sector utilization (content_length / piece_size * 100) + let utilization_samples: Vec = test_results + .iter() + .filter(|(_, r)| r.success) + .filter_map(|(ctx, r)| { + let content_length = r.content_length? as f64; + let piece_size = ctx.piece_size? as f64; + if piece_size > 0.0 { + Some((content_length / piece_size) * 100.0) + } else { + None + } + }) + .collect(); + + let sector_utilization_percent = if utilization_samples.is_empty() { + None + } else { + let sum: f64 = utilization_samples.iter().sum(); + Some(sum / utilization_samples.len() as f64) + }; + + // Build metadata + let url_metadata = serde_json::json!({ + "analysis": { + "sample_count": analysis.sample_count, + "success_count": analysis.success_count, + "timeout_count": analysis.timeout_count, + "inconsistent_count": analysis.inconsistent_count, + "inconsistent_breakdown": { + "warm_up": analysis.inconsistent_warm_up, + "flaky": analysis.inconsistent_flaky, + "small_responses": analysis.inconsistent_small_responses, + "both_failed": analysis.inconsistent_both_failed, + "size_mismatch": analysis.inconsistent_size_mismatch, + }, + "retrievability_percent": analysis.retrievability_percent, + "is_consistent": analysis.is_consistent, + "is_reliable": analysis.is_reliable, + }, + "car_diagnostics": { + "responses_parsed": test_results.len(), + "valid_car_headers": valid_car_count, + "small_car_responses": small_car_count, + "working_url_car_info": working_url_car_info, + }, + "sector_utilization": { + "sample_count": utilization_samples.len(), + "min_percent": utilization_samples.iter().cloned().reduce(f64::min), + "max_percent": utilization_samples.iter().cloned().reduce(f64::max), + }, + "validated_at": Utc::now().to_rfc3339(), + }); result.working_url = working_url.clone(); - result.retrievability_percent = retrievability_percent.unwrap_or(0.0); + result.retrievability_percent = analysis.retrievability_percent; + result.is_consistent = analysis.is_consistent; + result.is_reliable = analysis.is_reliable; + result.url_metadata = Some(url_metadata); + result.sector_utilization_percent = sector_utilization_percent; + result.result_code = if working_url.is_some() { ResultCode::Success } else { diff --git a/url_finder/src/types.rs b/url_finder/src/types.rs index 475ced8..4453376 100644 --- a/url_finder/src/types.rs +++ b/url_finder/src/types.rs @@ -448,6 +448,105 @@ impl PgHasArrayType for ErrorCode { } } +/// Error types for URL testing operations +#[derive(Debug, Clone, PartialEq)] +pub enum UrlTestError { + Timeout, + ConnectionRefused, + ConnectionReset, + DnsFailure, + TlsError, + HttpError(u16), + Other(String), +} + +impl std::fmt::Display for UrlTestError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Timeout => write!(f, "timeout"), + Self::ConnectionRefused => write!(f, "connection_refused"), + Self::ConnectionReset => write!(f, "connection_reset"), + Self::DnsFailure => write!(f, "dns_failure"), + Self::TlsError => write!(f, "tls_error"), + Self::HttpError(code) => write!(f, "http_{code}"), + Self::Other(msg) => write!(f, "other: {msg}"), + } + } +} + +/// Classification of why a URL test was inconsistent +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum InconsistencyType { + /// (Small|Failed, Valid) - Second tap returned valid data after warm-up. + /// Provider CAN serve data, just needs initial request to "warm up". + /// This is the pattern double-tap was designed to detect and handle. + WarmUp, + /// (Valid, Small|Failed) - First tap valid, second degraded. + /// Provider is unreliable - served data once then stopped. + Flaky, + /// (Small, Small|Failed) or (Failed, Small) - Neither tap returned valid data. + /// Provider consistently returns small/garbage responses. + SmallResponses, + /// (Failed, Failed) - Both taps failed completely. + /// Provider unreachable or broken. + BothFailed, + /// (Valid, Valid) but different Content-Length. + /// Data integrity issue - file size changed between requests. + SizeMismatch, +} + +/// Result of a double-tap URL test +#[derive(Debug, Clone)] +pub struct UrlTestResult { + pub url: String, + pub success: bool, + pub consistent: bool, + pub inconsistency_type: Option, + pub content_length: Option, + pub response_time_ms: u64, + pub error: Option, + // CAR header info + pub is_valid_car: bool, + pub root_cid: Option, +} + +/// Analysis of URL test results for a provider +#[derive(Debug, Clone)] +pub struct ProviderAnalysis { + pub retrievability_percent: f64, + pub is_consistent: bool, + pub is_reliable: bool, + pub sample_count: usize, + pub success_count: usize, + pub timeout_count: usize, + pub inconsistent_count: usize, + pub inconsistent_warm_up: usize, + pub inconsistent_flaky: usize, + pub inconsistent_small_responses: usize, + pub inconsistent_both_failed: usize, + pub inconsistent_size_mismatch: usize, +} + +impl ProviderAnalysis { + pub fn empty() -> Self { + Self { + retrievability_percent: 0.0, + is_consistent: false, + is_reliable: false, + sample_count: 0, + success_count: 0, + timeout_count: 0, + inconsistent_count: 0, + inconsistent_warm_up: 0, + inconsistent_flaky: 0, + inconsistent_small_responses: 0, + inconsistent_both_failed: 0, + inconsistent_size_mismatch: 0, + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/url_finder/src/url_tester.rs b/url_finder/src/url_tester.rs index 6f3dfb9..05b2d26 100644 --- a/url_finder/src/url_tester.rs +++ b/url_finder/src/url_tester.rs @@ -2,15 +2,353 @@ use std::sync::{ Arc, atomic::{AtomicUsize, Ordering}, }; +use std::time::Duration; use futures::{StreamExt, stream}; use reqwest::Client; -use tracing::{debug, info}; +use tokio::sync::Semaphore; +use tracing::debug; -use crate::{config::Config, http_client::build_client}; +use crate::car_header::{CarHeaderParseResult, parse_car_header}; +use crate::config::{ + Config, DOUBLE_TAP_DELAY_MS, MAX_CONCURRENT_URL_TESTS, MIN_VALID_CONTENT_LENGTH, + RANGE_REQUEST_BYTES, +}; +use crate::http_client::build_client; +use crate::types::{InconsistencyType, UrlTestError, UrlTestResult}; + +const FILTER_CONCURRENCY_LIMIT: usize = 5; +const RETRI_CONCURRENCY_LIMIT: usize = 20; + +/// Response from a range request, containing the total file size from Content-Range header +#[derive(Debug)] +#[allow(dead_code)] +struct RangeResponse { + content_length: Option, + response_time_ms: u64, + body_sample: Option>, +} + +/// Classification of a single tap result for consistency checking. +#[derive(Debug, Clone)] +enum TapResult { + /// HTTP success with Content-Length >= MIN_VALID_CONTENT_LENGTH (8GB) + Valid { + content_length: u64, + response_time_ms: u64, + car_header: Option, + }, + /// HTTP success but Content-Length < MIN_VALID_CONTENT_LENGTH (likely error page) + Small { + content_length: u64, + response_time_ms: u64, + car_header: Option, + }, + /// Request failed (timeout, connection error, HTTP error status) + Failed { error: UrlTestError }, +} + +impl TapResult { + /// Classify a range request result into Valid/Small/Failed + fn from_range_result(result: Result) -> Self { + let response = match result { + Ok(r) => r, + Err(e) => return TapResult::Failed { error: e }, + }; + + let content_length = response.content_length.unwrap_or(0); + let response_time_ms = response.response_time_ms; + + // Parse CAR header from body sample + let car_header = response + .body_sample + .as_ref() + .map(|bytes| parse_car_header(bytes)); + + if content_length >= MIN_VALID_CONTENT_LENGTH { + return TapResult::Valid { + content_length, + response_time_ms, + car_header, + }; + } + + TapResult::Small { + content_length, + response_time_ms, + car_header, + } + } + + fn is_valid(&self) -> bool { + matches!(self, TapResult::Valid { .. }) + } + + fn content_length(&self) -> Option { + match self { + TapResult::Valid { content_length, .. } => Some(*content_length), + TapResult::Small { content_length, .. } => Some(*content_length), + TapResult::Failed { .. } => None, + } + } + + fn response_time_ms(&self) -> u64 { + match self { + TapResult::Valid { + response_time_ms, .. + } => *response_time_ms, + TapResult::Small { + response_time_ms, .. + } => *response_time_ms, + TapResult::Failed { .. } => 0, + } + } + + fn error(&self) -> Option { + match self { + TapResult::Failed { error } => Some(error.clone()), + _ => None, + } + } + + fn car_header(&self) -> Option<&CarHeaderParseResult> { + match self { + TapResult::Valid { car_header, .. } => car_header.as_ref(), + TapResult::Small { car_header, .. } => car_header.as_ref(), + TapResult::Failed { .. } => None, + } + } + + #[allow(dead_code)] + fn root_cid(&self) -> Option { + self.car_header() + .filter(|h| h.is_valid) + .and_then(|h| h.root_cid.clone()) + } +} + +/// Determines if two tap results indicate a consistent provider. +/// Everything else (Small responses, failures, mismatched sizes) = inconsistent. +fn is_consistent_pair(tap1: &TapResult, tap2: &TapResult) -> bool { + match (tap1, tap2) { + ( + TapResult::Valid { + content_length: a, .. + }, + TapResult::Valid { + content_length: b, .. + }, + ) => a == b, + // All other combinations are inconsistent: + // - VALID + SMALL: real piece vs error page + // - SMALL + VALID: error page vs real piece + // - SMALL + SMALL: both returned garbage + // - VALID + FAILED: gaming pattern (timeout then respond) + // - FAILED + VALID: gaming pattern (timeout then respond) + // - SMALL + FAILED: got garbage + // - FAILED + SMALL: got garbage + // - FAILED + FAILED: cannot verify, assume bad + _ => false, + } +} + +/// Classifies WHY a pair of tap results is inconsistent. +fn classify_inconsistency(tap1: &TapResult, tap2: &TapResult) -> InconsistencyType { + use TapResult::*; + + // Size mismatch: both valid but different Content-Length + if let ( + Valid { + content_length: a, .. + }, + Valid { + content_length: b, .. + }, + ) = (tap1, tap2) + && a != b + { + return InconsistencyType::SizeMismatch; + } + + // WarmUp: tap2 returned valid data (tap1 was not valid) + if matches!(tap2, Valid { .. }) && !matches!(tap1, Valid { .. }) { + return InconsistencyType::WarmUp; + } + + // Flaky: tap1 was valid but tap2 degraded + if matches!(tap1, Valid { .. }) && !matches!(tap2, Valid { .. }) { + return InconsistencyType::Flaky; + } + + // Both failed + if matches!((tap1, tap2), (Failed { .. }, Failed { .. })) { + return InconsistencyType::BothFailed; + } + + // Default: small/garbage responses + InconsistencyType::SmallResponses +} + +/// Makes a range request (bytes=0-4095) and extracts total file size from Content-Range header. +/// Used for double-tap consistency testing to verify Content-Length without downloading full file. +async fn range_request(client: &Client, url: &str) -> Result { + let start = std::time::Instant::now(); + + let resp = client + .get(url) + .header("Range", format!("bytes=0-{}", RANGE_REQUEST_BYTES - 1)) + .send() + .await + .map_err(|e| classify_request_error(&e))?; + + let status = resp.status(); + if !status.is_success() && status != reqwest::StatusCode::PARTIAL_CONTENT { + return Err(UrlTestError::HttpError(status.as_u16())); + } + + let content_length = extract_total_length(&resp); + let response_time_ms = start.elapsed().as_millis() as u64; + + // Always capture body sample for CAR header parsing + let body_sample = resp.bytes().await.ok().map(|b| b.to_vec()); + + Ok(RangeResponse { + content_length, + response_time_ms, + body_sample, + }) +} + +/// Extracts total file size from Content-Range header (e.g., "bytes 0-4095/19327352832" -> 19327352832) +/// Falls back to Content-Length header if Content-Range is not present. +#[allow(dead_code)] +fn extract_total_length(resp: &reqwest::Response) -> Option { + // Try Content-Range first: "bytes 0-4095/19327352832" + if let Some(range) = resp.headers().get("content-range") + && let Ok(s) = range.to_str() + && let Some(total) = s.split('/').nth(1) + && total != "*" + { + return total.parse().ok(); + } + // Fall back to Content-Length + resp.content_length() +} + +/// Classifies a reqwest error into a more specific UrlTestError type +#[allow(dead_code)] +fn classify_request_error(e: &reqwest::Error) -> UrlTestError { + if e.is_timeout() { + UrlTestError::Timeout + } else if e.is_connect() { + if e.to_string().contains("dns") { + UrlTestError::DnsFailure + } else { + UrlTestError::ConnectionRefused + } + } else if e.to_string().contains("reset") { + UrlTestError::ConnectionReset + } else if e.to_string().contains("tls") || e.to_string().contains("ssl") { + UrlTestError::TlsError + } else { + UrlTestError::Other(e.to_string()) + } +} + +/// Performs a double-tap URL test: two range requests with a delay between them. +/// +/// STRICT CONSISTENCY RULES +/// - success: true if either request succeeded (HTTP 2xx with valid Content-Length) +/// - consistent: true ONLY if both requests return VALID responses (>= 8GB) with identical Content-Length +/// - Everything else is inconsistent: failures, small responses (error pages), mismatched sizes +pub async fn test_url_double_tap(client: &Client, url: &str) -> UrlTestResult { + let r1 = range_request(client, url).await; + tokio::time::sleep(Duration::from_millis(DOUBLE_TAP_DELAY_MS)).await; + let r2 = range_request(client, url).await; + + let tap1 = TapResult::from_range_result(r1); + let tap2 = TapResult::from_range_result(r2); + + let success = tap1.is_valid() || tap2.is_valid(); + let consistent = is_consistent_pair(&tap1, &tap2); + + let best_content_length = match (&tap1, &tap2) { + ( + TapResult::Valid { + content_length: a, .. + }, + TapResult::Valid { + content_length: b, .. + }, + ) => Some(std::cmp::max(*a, *b)), + (TapResult::Valid { content_length, .. }, _) => Some(*content_length), + (_, TapResult::Valid { content_length, .. }) => Some(*content_length), + _ => tap2.content_length().or(tap1.content_length()), + }; + + let best_response_time = match (&tap1, &tap2) { + ( + _, + TapResult::Valid { + response_time_ms, .. + }, + ) => *response_time_ms, + ( + TapResult::Valid { + response_time_ms, .. + }, + _, + ) => *response_time_ms, + _ if tap2.content_length().is_some() => tap2.response_time_ms(), + _ => tap1.response_time_ms(), + }; + + let error = tap2.error().or(tap1.error()); -const FILTER_CONCURENCY_LIMIT: usize = 5; -const RETRI_CONCURENCY_LIMIT: usize = 20; + let inconsistency_type = if consistent { + None + } else { + Some(classify_inconsistency(&tap1, &tap2)) + }; + + // CAR header info: prefer tap2, fall back to tap1 + let best_car = tap2.car_header().or(tap1.car_header()); + let is_valid_car = best_car.map(|h| h.is_valid).unwrap_or(false); + let root_cid = best_car.and_then(|h| h.root_cid.clone()); + + UrlTestResult { + url: url.to_string(), + success, + consistent, + inconsistency_type, + content_length: best_content_length, + response_time_ms: best_response_time, + error, + is_valid_car, + root_cid, + } +} + +/// Tests multiple URLs in parallel using double-tap consistency checks. +/// Limits concurrency to MAX_CONCURRENT_URL_TESTS to avoid overwhelming targets. +pub async fn test_urls_double_tap(client: &Client, urls: Vec) -> Vec { + let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_URL_TESTS)); + + let futures: Vec<_> = urls + .into_iter() + .map(|url| { + let client = client.clone(); + let permit = semaphore.clone(); + + async move { + let _permit = permit.acquire().await.unwrap(); + test_url_double_tap(&client, &url).await + } + }) + .collect(); + + futures::future::join_all(futures).await +} /// return first working url through head requests /// let's keep both head and get versions for now @@ -45,16 +383,16 @@ pub async fn filter_working_with_head(urls: Vec) -> Option { } } }) - .buffer_unordered(FILTER_CONCURENCY_LIMIT); + .buffer_unordered(FILTER_CONCURRENCY_LIMIT); while let Some(result) = stream.next().await { if let Some(url) = result { - tracing::info!("number of requests: {:?}", counter.load(Ordering::SeqCst)); + tracing::debug!("number of requests: {:?}", counter.load(Ordering::SeqCst)); return Some(url); } } - tracing::info!("number of requests: {:?}", counter.load(Ordering::SeqCst)); + tracing::debug!("number of requests: {:?}", counter.load(Ordering::SeqCst)); None } @@ -91,7 +429,7 @@ pub async fn get_retrivability_with_head( } } }) - .buffer_unordered(RETRI_CONCURENCY_LIMIT); + .buffer_unordered(RETRI_CONCURRENCY_LIMIT); let mut sample_url: Option = None; @@ -113,7 +451,7 @@ pub async fn get_retrivability_with_head( 0.0 }; - info!( + debug!( "Successfully retrieved URLs: {} out of {} ({:.2}%)", success, total, retri_percentage ); @@ -121,6 +459,7 @@ pub async fn get_retrivability_with_head( (sample_url, round_to_two_decimals(retri_percentage)) } +#[allow(dead_code)] pub async fn check_retrievability_with_get( config: &Config, urls: Vec, @@ -147,20 +486,24 @@ pub async fn check_retrievability_with_get( let content_type = resp .headers() .get("content-type") - .and_then(|v| v.to_str().ok()); - let etag = resp.headers().get("etag"); + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()); + let etag = resp.headers().get("etag").is_some(); debug!( "Response for {}: status={}, content_type={:?}, etag={:?}", url, status, content_type, etag ); + // Drain body to allow connection reuse + drain_response_body(resp).await; + if status.is_success() && matches!( - content_type, + content_type.as_deref(), Some("application/octet-stream") | Some("application/piece") ) - && etag.is_some() + && etag { success_clone.fetch_add(1, Ordering::SeqCst); Some(url) @@ -176,7 +519,7 @@ pub async fn check_retrievability_with_get( } } }) - .buffer_unordered(RETRI_CONCURENCY_LIMIT); + .buffer_unordered(RETRI_CONCURRENCY_LIMIT); let mut sample_url: Option = None; @@ -199,7 +542,7 @@ pub async fn check_retrievability_with_get( 0.0 }; - info!( + debug!( "Successfully retrieved URLs: {} out of {} ({:.2}%)", success, total, retri_percentage ); @@ -210,3 +553,607 @@ pub async fn check_retrievability_with_get( fn round_to_two_decimals(number: f64) -> f64 { (number * 100.0).round() / 100.0 } + +/// Maximum Content-Length we're willing to drain for connection reuse. +/// Error responses are typically small; large bodies aren't worth draining. +pub const MAX_DRAIN_CONTENT_LENGTH: u64 = 8192; + +/// Drains the response body to allow HTTP connection reuse. +/// Only drains if Content-Length is present and small (typical for error responses). +/// Skips draining for chunked/unknown transfer encodings to avoid reading large streams. +async fn drain_response_body(resp: reqwest::Response) { + let content_length = resp.content_length(); + + // Check transfer-encoding header - skip draining for chunked responses + let is_chunked = resp + .headers() + .get("transfer-encoding") + .and_then(|v| v.to_str().ok()) + .is_some_and(|te| te.eq_ignore_ascii_case("chunked")); + + // Only drain when we know the size is small (Content-Length present and <= threshold) + // Skip draining for: + // - Chunked responses (unknown size, potentially large) + // - Missing Content-Length (unknown size) + // - Large Content-Length (not worth the cost) + if !is_chunked && content_length.is_some_and(|len| len <= MAX_DRAIN_CONTENT_LENGTH) { + // Consume the body - ignore errors, we just want to drain it + let _ = resp.bytes().await; + } + // For unknown/large bodies, dropping resp closes the connection + // This is acceptable since large file responses are less frequent +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Instant; + use wiremock::{Mock, MockServer, ResponseTemplate, matchers::method}; + + #[tokio::test] + async fn test_drain_small_response_body() { + let mock_server = MockServer::start().await; + let small_body = vec![0u8; 100]; // Well under MAX_DRAIN_CONTENT_LENGTH + + Mock::given(method("GET")) + .respond_with(ResponseTemplate::new(200).set_body_raw(small_body, "text/plain")) + .mount(&mock_server) + .await; + + let client = Client::new(); + let resp = client.get(mock_server.uri()).send().await.unwrap(); + + // Should complete without error - body is drained + drain_response_body(resp).await; + } + + #[tokio::test] + async fn test_drain_large_response_skipped() { + let mock_server = MockServer::start().await; + // Body larger than MAX_DRAIN_CONTENT_LENGTH (8192) + let large_body = vec![0u8; 50_000]; + + Mock::given(method("GET")) + .respond_with( + ResponseTemplate::new(200).set_body_raw(large_body, "application/octet-stream"), + ) + .mount(&mock_server) + .await; + + let client = Client::new(); + let resp = client.get(mock_server.uri()).send().await.unwrap(); + + let start = Instant::now(); + drain_response_body(resp).await; + let elapsed = start.elapsed(); + + // Should return quickly since we skip draining large bodies + // Reading 50KB would take noticeable time; skipping is instant + assert!( + elapsed.as_millis() < 100, + "Large body should be skipped, not drained. Took {:?}", + elapsed + ); + } + + #[tokio::test] + async fn test_drain_empty_response_body() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .respond_with(ResponseTemplate::new(204)) // No content + .mount(&mock_server) + .await; + + let client = Client::new(); + let resp = client.get(mock_server.uri()).send().await.unwrap(); + + // Should handle empty body gracefully + drain_response_body(resp).await; + } + + #[tokio::test] + async fn test_drain_error_response() { + let mock_server = MockServer::start().await; + let error_body = b"Not Found".to_vec(); + + Mock::given(method("GET")) + .respond_with(ResponseTemplate::new(404).set_body_raw(error_body, "text/plain")) + .mount(&mock_server) + .await; + + let client = Client::new(); + let resp = client.get(mock_server.uri()).send().await.unwrap(); + + // Error responses are typically small and should be drained + drain_response_body(resp).await; + } + + #[tokio::test] + async fn test_drain_chunked_response_skipped() { + let mock_server = MockServer::start().await; + // Small body but with chunked transfer-encoding should NOT be drained + let body = vec![0u8; 100]; + + Mock::given(method("GET")) + .respond_with( + ResponseTemplate::new(200) + .insert_header("transfer-encoding", "chunked") + .set_body_raw(body, "text/plain"), + ) + .mount(&mock_server) + .await; + + let client = Client::new(); + let resp = client.get(mock_server.uri()).send().await.unwrap(); + + let start = Instant::now(); + drain_response_body(resp).await; + let elapsed = start.elapsed(); + + // Should return quickly since chunked responses are skipped + assert!( + elapsed.as_millis() < 100, + "Chunked response should be skipped, not drained. Took {:?}", + elapsed + ); + } + + #[tokio::test] + async fn test_drain_missing_content_length_skipped() { + let mock_server = MockServer::start().await; + + // Response without Content-Length header (wiremock may add it, but let's test the logic) + Mock::given(method("GET")) + .respond_with(ResponseTemplate::new(200)) + .mount(&mock_server) + .await; + + let client = Client::new(); + let resp = client.get(mock_server.uri()).send().await.unwrap(); + + // Should complete without error - missing Content-Length means skip draining + drain_response_body(resp).await; + } + + #[tokio::test] + async fn test_range_request_extracts_content_length_from_content_range() { + use wiremock::matchers::header; + + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206) + .insert_header("Content-Range", "bytes 0-4095/19327352832") + .insert_header("Content-Length", "4096"), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let result = range_request(&client, &mock_server.uri()).await; + + assert!(result.is_ok()); + let response = result.unwrap(); + assert_eq!(response.content_length, Some(19327352832)); + } + + #[tokio::test] + async fn test_double_tap_both_success_consistent() { + use wiremock::matchers::header; + + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206) + .insert_header("Content-Range", "bytes 0-4095/16000000000"), + ) + .expect(2) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let result = test_url_double_tap(&client, &mock_server.uri()).await; + + assert!(result.success); + assert!(result.consistent); + assert_eq!(result.content_length, Some(16000000000)); + } + + /// SMALL + VALID = WarmUp pattern - THE KEY REAL-WORLD SCENARIO + /// First request returns small stub, second returns real data after warm-up. + /// This is exactly what double-tap was built to handle. + #[tokio::test] + async fn test_double_tap_warm_up_small_then_valid() { + use wiremock::matchers::header; + + let mock_server = MockServer::start().await; + + // First request returns small stub/placeholder + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206).insert_header("Content-Range", "bytes 0-252/252"), + ) + .up_to_n_times(1) + .mount(&mock_server) + .await; + + // Second request returns valid large file - warm-up worked! + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206) + .insert_header("Content-Range", "bytes 0-4095/16000000000"), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let result = test_url_double_tap(&client, &mock_server.uri()).await; + + assert!(result.success); // tap2 was valid - we got real data! + assert!(!result.consistent); + assert_eq!( + result.inconsistency_type, + Some(crate::types::InconsistencyType::WarmUp) + ); + assert_eq!(result.content_length, Some(16000000000)); // We captured the valid size + } + + /// FAIL + VALID = WarmUp pattern - THIS IS WHY DOUBLE-TAP EXISTS + /// First request fails/times out, second succeeds with valid data. + /// We still get the data, provider just needs warm-up. + #[tokio::test] + async fn test_double_tap_warm_up_fail_then_valid() { + use wiremock::matchers::header; + + let mock_server = MockServer::start().await; + + // First request fails (simulates timeout/warm-up needed) + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with(ResponseTemplate::new(500)) + .up_to_n_times(1) + .mount(&mock_server) + .await; + + // Second request succeeds with valid large file + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206) + .insert_header("Content-Range", "bytes 0-4095/16000000000"), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let result = test_url_double_tap(&client, &mock_server.uri()).await; + + assert!(result.success); // tap2 succeeded - we got the data! + assert!(!result.consistent); + assert_eq!( + result.inconsistency_type, + Some(crate::types::InconsistencyType::WarmUp) + ); + assert_eq!(result.content_length, Some(16000000000)); + } + + /// VALID + FAIL = Flaky - provider served data once then stopped + #[tokio::test] + async fn test_double_tap_flaky_valid_then_fail() { + use wiremock::matchers::header; + + let mock_server = MockServer::start().await; + + // First request succeeds + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206) + .insert_header("Content-Range", "bytes 0-4095/16000000000"), + ) + .up_to_n_times(1) + .mount(&mock_server) + .await; + + // Second request fails - provider degraded + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with(ResponseTemplate::new(500)) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let result = test_url_double_tap(&client, &mock_server.uri()).await; + + assert!(result.success); // tap1 succeeded - we got the data + assert!(!result.consistent); + assert_eq!( + result.inconsistency_type, + Some(crate::types::InconsistencyType::Flaky) + ); + assert_eq!(result.content_length, Some(16000000000)); + } + + /// SMALL + SMALL = NOT successful - neither returned valid (>= 8GB) data + #[tokio::test] + async fn test_double_tap_both_small_responses() { + use wiremock::matchers::header; + + let mock_server = MockServer::start().await; + + // Both requests return small responses - neither is valid + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206).insert_header("Content-Range", "bytes 0-500/500"), + ) + .expect(2) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let result = test_url_double_tap(&client, &mock_server.uri()).await; + + assert!(!result.success); // Neither tap was valid - NOT successful! + assert!(!result.consistent); + assert_eq!( + result.inconsistency_type, + Some(crate::types::InconsistencyType::SmallResponses) + ); + } + + /// FAIL + FAIL = inconsistent (cannot verify, assume bad) + #[tokio::test] + async fn test_double_tap_both_fail() { + use wiremock::matchers::header; + + let mock_server = MockServer::start().await; + + // Both requests fail + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with(ResponseTemplate::new(500)) + .expect(2) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let result = test_url_double_tap(&client, &mock_server.uri()).await; + + assert!(!result.success); // Both failed + assert!(!result.consistent); // STRICT: FAIL + FAIL = inconsistent + } + + #[tokio::test] + async fn test_batch_url_testing() { + use wiremock::matchers::header; + + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206) + .insert_header("Content-Range", "bytes 0-4095/16000000000"), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let urls = vec![ + format!("{}/piece/a", mock_server.uri()), + format!("{}/piece/b", mock_server.uri()), + format!("{}/piece/c", mock_server.uri()), + ]; + + let results = test_urls_double_tap(&client, urls).await; + + assert_eq!(results.len(), 3); + assert!(results.iter().all(|r| r.success)); + assert!(results.iter().all(|r| r.consistent)); + } + + #[tokio::test] + async fn test_classify_flaky_valid_then_fail() { + use wiremock::matchers::header; + + let mock_server = MockServer::start().await; + + // First request succeeds with valid size + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206) + .insert_header("Content-Range", "bytes 0-4095/16000000000"), + ) + .up_to_n_times(1) + .mount(&mock_server) + .await; + + // Second request fails - provider degraded + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with(ResponseTemplate::new(500)) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let result = test_url_double_tap(&client, &mock_server.uri()).await; + + assert!(result.success); // tap1 was valid + assert!(!result.consistent); + assert_eq!( + result.inconsistency_type, + Some(crate::types::InconsistencyType::Flaky) + ); + } + + #[tokio::test] + async fn test_classify_both_failed() { + use wiremock::matchers::header; + + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with(ResponseTemplate::new(500)) + .expect(2) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let result = test_url_double_tap(&client, &mock_server.uri()).await; + + assert!(!result.consistent); + assert_eq!( + result.inconsistency_type, + Some(crate::types::InconsistencyType::BothFailed) + ); + } + + #[tokio::test] + async fn test_classify_small_responses() { + use wiremock::matchers::header; + + let mock_server = MockServer::start().await; + + // Both return small responses - neither is valid + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206).insert_header("Content-Range", "bytes 0-500/500"), + ) + .expect(2) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let result = test_url_double_tap(&client, &mock_server.uri()).await; + + assert!(!result.success); // Neither tap was valid + assert!(!result.consistent); + assert_eq!( + result.inconsistency_type, + Some(crate::types::InconsistencyType::SmallResponses) + ); + } + + #[tokio::test] + async fn test_classify_size_mismatch() { + use wiremock::matchers::header; + + let mock_server = MockServer::start().await; + + // First returns one size + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206) + .insert_header("Content-Range", "bytes 0-4095/16000000000"), + ) + .up_to_n_times(1) + .mount(&mock_server) + .await; + + // Second returns different size + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206) + .insert_header("Content-Range", "bytes 0-4095/20000000000"), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let result = test_url_double_tap(&client, &mock_server.uri()).await; + + assert!(!result.consistent); + assert_eq!( + result.inconsistency_type, + Some(crate::types::InconsistencyType::SizeMismatch) + ); + } + + #[tokio::test] + async fn test_consistent_has_no_inconsistency_type() { + use wiremock::matchers::header; + + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206) + .insert_header("Content-Range", "bytes 0-4095/16000000000"), + ) + .expect(2) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let result = test_url_double_tap(&client, &mock_server.uri()).await; + + assert!(result.consistent); + assert_eq!(result.inconsistency_type, None); + } +} diff --git a/url_finder/src/utils/reqwest_retry.rs b/url_finder/src/utils/reqwest_retry.rs index a6248e3..4936189 100644 --- a/url_finder/src/utils/reqwest_retry.rs +++ b/url_finder/src/utils/reqwest_retry.rs @@ -39,6 +39,27 @@ impl Middleware for HttpRequestContextLogger { pub fn build_reqwest_retry_client( min_retry_interval_ms: u64, max_retry_interval_ms: u64, +) -> ClientWithMiddleware { + build_reqwest_retry_client_with_config( + min_retry_interval_ms, + max_retry_interval_ms, + 3, + None, + None, + ) +} + +/// Build an HTTP client with configurable retry policy and timeouts. +/// +/// - `max_retries`: Number of retry attempts (use 1 for services with long gateway timeouts) +/// - `connect_timeout_ms`: TCP connection timeout (None = no timeout) +/// - `request_timeout_ms`: Per-request timeout applied to each attempt (None = no timeout) +pub fn build_reqwest_retry_client_with_config( + min_retry_interval_ms: u64, + max_retry_interval_ms: u64, + max_retries: u32, + connect_timeout_ms: Option, + request_timeout_ms: Option, ) -> ClientWithMiddleware { let retry_policy = ExponentialBackoff::builder() .jitter(Jitter::None) @@ -47,10 +68,22 @@ pub fn build_reqwest_retry_client( Duration::from_millis(min_retry_interval_ms), Duration::from_millis(max_retry_interval_ms), ) - .build_with_max_retries(3); + .build_with_max_retries(max_retries); + + let mut client_builder = Client::builder(); + + if let Some(connect_ms) = connect_timeout_ms { + client_builder = client_builder.connect_timeout(Duration::from_millis(connect_ms)); + } + + if let Some(request_ms) = request_timeout_ms { + client_builder = client_builder.timeout(Duration::from_millis(request_ms)); + } + + let client = client_builder.build().expect("Failed to build HTTP client"); - ClientBuilder::new(Client::new()) - .with(HttpRequestContextLogger) // Add context before retry middleware + ClientBuilder::new(client) + .with(HttpRequestContextLogger) .with(RetryTransientMiddleware::new_with_policy(retry_policy)) .build() } diff --git a/url_finder/tests/common/db_setup.rs b/url_finder/tests/common/db_setup.rs index d02060e..c289f70 100644 --- a/url_finder/tests/common/db_setup.rs +++ b/url_finder/tests/common/db_setup.rs @@ -1,5 +1,6 @@ #![allow(dead_code)] +use chrono::{DateTime, Utc}; use sqlx::{PgPool, Postgres, migrate::MigrateDatabase}; pub use super::container::{POSTGRES_PASSWORD, POSTGRES_USER}; @@ -145,3 +146,124 @@ pub async fn seed_url_result( .await .expect("Failed to insert url_result"); } + +pub async fn seed_provider_with_url_status( + app_pool: &PgPool, + provider_id: &str, + last_working_url: Option<&str>, + is_consistent: bool, +) { + sqlx::query( + r#"INSERT INTO + storage_providers ( + provider_id, + last_working_url, + is_consistent + ) + VALUES + ($1, $2, $3) + ON CONFLICT (provider_id) DO UPDATE SET + last_working_url = $2, + is_consistent = $3"#, + ) + .bind(provider_id) + .bind(last_working_url) + .bind(is_consistent) + .execute(app_pool) + .await + .expect("Failed to insert provider with url status"); +} + +#[allow(clippy::too_many_arguments)] +pub async fn seed_bms_bandwidth_result( + app_pool: &PgPool, + provider_id: &str, + url_tested: &str, + status: &str, + ping_avg_ms: Option, + head_avg_ms: Option, + ttfb_ms: Option, + download_speed_mbps: Option, +) { + sqlx::query( + r#"INSERT INTO + bms_bandwidth_results ( + provider_id, + bms_job_id, + url_tested, + routing_key, + worker_count, + status, + ping_avg_ms, + head_avg_ms, + ttfb_ms, + download_speed_mbps, + completed_at + ) + VALUES + ($1, gen_random_uuid(), $2, 'test-region', 1, $3, $4, $5, $6, $7, NOW())"#, + ) + .bind(provider_id) + .bind(url_tested) + .bind(status) + .bind(ping_avg_ms) + .bind(head_avg_ms) + .bind(ttfb_ms) + .bind(download_speed_mbps) + .execute(app_pool) + .await + .expect("Failed to insert bms_bandwidth_result"); +} + +#[allow(clippy::too_many_arguments)] +pub async fn seed_url_result_at( + app_pool: &PgPool, + provider_id: &str, + client_id: Option<&str>, + working_url: Option<&str>, + retrievability: f64, + result_code: &str, + tested_at: DateTime, + is_consistent: Option, + is_reliable: Option, +) { + assert!( + (0.0..=100.0).contains(&retrievability), + "retrievability must be in range 0..=100, got {retrievability}" + ); + + let result_type = if client_id.is_some() { + "ProviderClient" + } else { + "Provider" + }; + + sqlx::query( + r#"INSERT INTO + url_results ( + provider_id, + client_id, + result_type, + working_url, + retrievability_percent, + result_code, + tested_at, + is_consistent, + is_reliable + ) + VALUES + ($1, $2, $3::discovery_type, $4, $5, $6::result_code, $7, $8, $9)"#, + ) + .bind(provider_id) + .bind(client_id) + .bind(result_type) + .bind(working_url) + .bind(retrievability) + .bind(result_code) + .bind(tested_at) + .bind(is_consistent) + .bind(is_reliable) + .execute(app_pool) + .await + .expect("Failed to insert url_result with timestamp"); +} diff --git a/url_finder/tests/common/mock_servers.rs b/url_finder/tests/common/mock_servers.rs index ee57700..879901c 100644 --- a/url_finder/tests/common/mock_servers.rs +++ b/url_finder/tests/common/mock_servers.rs @@ -86,26 +86,30 @@ impl MockExternalServices { pub async fn setup_piece_retrieval_mock(&self, piece_cid: &str, should_succeed: bool) { if should_succeed { - for http_method in ["GET", "HEAD"] { - Mock::given(method(http_method)) - .and(path(format!("/piece/{piece_cid}"))) - .respond_with( - ResponseTemplate::new(200) - .insert_header("content-type", "application/piece") - .insert_header("etag", "\"mock-etag-12345\"") - .set_body_raw(vec![0u8; 100], "application/piece"), - ) - .mount(&self.piece_server) - .await; - } + // Total file size must be >= 8GB (MIN_VALID_CONTENT_LENGTH) to pass URL validation + // Using 16GB to be safely above the threshold + let total_file_size: u64 = 16_000_000_000; + + // For GET requests WITH Range header (double-tap testing) + // Returns 206 Partial Content with Content-Range header indicating total file size + let range_body = vec![0u8; 4096]; // Just the requested range bytes + Mock::given(method("GET")) + .and(path(format!("/piece/{piece_cid}"))) + .and(wiremock::matchers::header("Range", "bytes=0-4095")) + .respond_with( + ResponseTemplate::new(206) // Partial Content + .insert_header("etag", "\"mock-etag-12345\"") + .insert_header("Content-Range", format!("bytes 0-4095/{total_file_size}")) + .set_body_raw(range_body, "application/piece"), + ) + .mount(&self.piece_server) + .await; } else { - for http_method in ["GET", "HEAD"] { - Mock::given(method(http_method)) - .and(path(format!("/piece/{piece_cid}"))) - .respond_with(ResponseTemplate::new(404)) - .mount(&self.piece_server) - .await; - } + Mock::given(method("GET")) + .and(path(format!("/piece/{piece_cid}"))) + .respond_with(ResponseTemplate::new(404)) + .mount(&self.piece_server) + .await; } } } diff --git a/url_finder/tests/common/test_app.rs b/url_finder/tests/common/test_app.rs index 7c5de8a..1afffca 100644 --- a/url_finder/tests/common/test_app.rs +++ b/url_finder/tests/common/test_app.rs @@ -10,7 +10,15 @@ use std::{ net::SocketAddr, sync::{Arc, atomic::AtomicUsize}, }; -use url_finder::{AppState, config::Config, repository::*}; +use url_finder::{ + AppState, + config::Config, + repository::{ + BmsBandwidthResultRepository, DealRepository, StorageProviderRepository, + UrlResultRepository, + }, + services::provider_service::ProviderService, +}; use super::{TestDatabases, mock_servers::MockExternalServices}; @@ -30,11 +38,22 @@ pub async fn create_test_app(dbs: &TestDatabases, mocks: &MockExternalServices) mocks.cid_contact_url(), )); + let url_repo = Arc::new(UrlResultRepository::new(dbs.app_pool.clone())); + let bms_repo = Arc::new(BmsBandwidthResultRepository::new(dbs.app_pool.clone())); + let storage_provider_repo = Arc::new(StorageProviderRepository::new(dbs.app_pool.clone())); + let provider_service = Arc::new(ProviderService::new( + url_repo.clone(), + bms_repo.clone(), + storage_provider_repo.clone(), + )); + let app_state = Arc::new(AppState { deal_repo: Arc::new(DealRepository::new(dbs.app_pool.clone())), active_requests, - storage_provider_repo: Arc::new(StorageProviderRepository::new(dbs.app_pool.clone())), - url_repo: Arc::new(UrlResultRepository::new(dbs.app_pool.clone())), + storage_provider_repo, + url_repo, + bms_repo, + provider_service, config, }); diff --git a/url_finder/tests/common/test_constants.rs b/url_finder/tests/common/test_constants.rs index 0438b09..262f1e4 100644 --- a/url_finder/tests/common/test_constants.rs +++ b/url_finder/tests/common/test_constants.rs @@ -56,3 +56,20 @@ pub fn multiaddrs_http_8080() -> Vec { pub fn multiaddrs_empty() -> Vec { vec![] } + +// Provider API test constants +pub const TEST_PROVIDER_3_DB: &str = "88883000"; +pub const TEST_PROVIDER_3_API: &str = "f088883000"; +pub const TEST_CLIENT_2_ID_DB: &str = "2000"; +pub const TEST_CLIENT_2_ID_API: &str = "f02000"; + +pub fn test_client_2_id() -> ClientId { + ClientId::new(TEST_CLIENT_2_ID_DB).unwrap() +} + +pub fn test_client_2_address() -> ClientAddress { + ClientAddress::new(TEST_CLIENT_2_ID_API).unwrap() +} + +pub const TEST_WORKING_URL: &str = "http://example.com/piece"; +pub const TEST_WORKING_URL_2: &str = "http://example2.com/piece"; diff --git a/url_finder/tests/common/test_schema.sql b/url_finder/tests/common/test_schema.sql index 95c99e0..736723d 100644 --- a/url_finder/tests/common/test_schema.sql +++ b/url_finder/tests/common/test_schema.sql @@ -6,5 +6,6 @@ CREATE TABLE IF NOT EXISTS unified_verified_deal ( "claimId" INTEGER NOT NULL DEFAULT 0, "clientId" TEXT, "providerId" TEXT, - "pieceCid" TEXT + "pieceCid" TEXT, + "pieceSize" NUMERIC ); diff --git a/url_finder/tests/integration_tests/bms_client.rs b/url_finder/tests/integration_tests/bms_client.rs new file mode 100644 index 0000000..3f51d4b --- /dev/null +++ b/url_finder/tests/integration_tests/bms_client.rs @@ -0,0 +1,299 @@ +use serde_json::json; +use url_finder::bms_client::BmsClient; +use uuid::Uuid; +use wiremock::matchers::{body_partial_json, method, path}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +async fn create_bms_mock() -> (MockServer, BmsClient) { + let mock = MockServer::start().await; + let client = BmsClient::new(mock.uri()); + (mock, client) +} + +// --- create_job tests --- + +#[tokio::test] +async fn test_create_job_success() { + let (mock, client) = create_bms_mock().await; + + let job_id = Uuid::new_v4(); + Mock::given(method("POST")) + .and(path("/jobs")) + .and(body_partial_json(json!({ + "url": "http://example.com/file", + "routing_key": "us_east", + "worker_count": 3 + }))) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "id": job_id, + "status": "Pending", + "url": "http://example.com/file", + "routing_key": "us_east" + }))) + .mount(&mock) + .await; + + let result = client + .create_job( + "http://example.com/file".to_string(), + 3, + Some("f012345".to_string()), + ) + .await; + + assert!(result.is_ok()); + let job = result.unwrap(); + assert_eq!(job.id, job_id); + assert_eq!(job.status, "Pending"); + assert_eq!(job.url, "http://example.com/file"); + assert_eq!(job.routing_key, "us_east"); +} + +#[tokio::test] +async fn test_create_job_with_entity() { + let (mock, client) = create_bms_mock().await; + + let job_id = Uuid::new_v4(); + Mock::given(method("POST")) + .and(path("/jobs")) + .and(body_partial_json(json!({ + "url": "http://example.com/file", + "entity": "f012345" + }))) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "id": job_id, + "status": "Pending", + "url": "http://example.com/file", + "routing_key": "us_east" + }))) + .mount(&mock) + .await; + + let result = client + .create_job( + "http://example.com/file".to_string(), + 3, + Some("f012345".to_string()), + ) + .await; + + assert!(result.is_ok()); +} + +#[tokio::test] +async fn test_create_job_without_entity() { + let (mock, client) = create_bms_mock().await; + + let job_id = Uuid::new_v4(); + Mock::given(method("POST")) + .and(path("/jobs")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "id": job_id, + "status": "Pending", + "url": "http://example.com/file", + "routing_key": "us_east" + }))) + .mount(&mock) + .await; + + let result = client + .create_job("http://example.com/file".to_string(), 3, None) + .await; + + assert!(result.is_ok()); +} + +#[tokio::test] +async fn test_create_job_bad_request() { + let (mock, client) = create_bms_mock().await; + + Mock::given(method("POST")) + .and(path("/jobs")) + .respond_with( + ResponseTemplate::new(400).set_body_json(json!({"error": "Invalid URL format"})), + ) + .mount(&mock) + .await; + + let result = client + .create_job("not-a-valid-url".to_string(), 3, None) + .await; + + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("400"), "Error should mention 400 status"); +} + +#[tokio::test] +async fn test_create_job_server_error() { + let (mock, client) = create_bms_mock().await; + + Mock::given(method("POST")) + .and(path("/jobs")) + .respond_with(ResponseTemplate::new(500).set_body_string("Internal Server Error")) + .mount(&mock) + .await; + + let result = client + .create_job("http://example.com/file".to_string(), 3, None) + .await; + + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("500"), "Error should mention 500 status"); +} + +#[tokio::test] +async fn test_create_job_negative_worker_count() { + let (_mock, client) = create_bms_mock().await; + + let result = client + .create_job("http://example.com/file".to_string(), -1, None) + .await; + + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("non-negative"), + "Error should mention non-negative requirement" + ); +} + +// --- get_job tests --- + +#[tokio::test] +async fn test_get_job_success_pending() { + let (mock, client) = create_bms_mock().await; + + let job_id = Uuid::new_v4(); + Mock::given(method("GET")) + .and(path(format!("/jobs/{job_id}"))) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "id": job_id, + "status": "Pending", + "url": "http://example.com/file", + "routing_key": "us_east", + "details": { + "worker_count": 3, + "size_mb": 100 + } + }))) + .mount(&mock) + .await; + + let result = client.get_job(job_id).await; + + assert!(result.is_ok()); + let job = result.unwrap(); + assert_eq!(job.id, job_id); + assert_eq!(job.status, "Pending"); + assert!(!BmsClient::is_job_finished(&job.status)); +} + +#[tokio::test] +async fn test_get_job_success_completed_with_results() { + let (mock, client) = create_bms_mock().await; + + let job_id = Uuid::new_v4(); + let subjob_id = Uuid::new_v4(); + + Mock::given(method("GET")) + .and(path(format!("/jobs/{job_id}"))) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "id": job_id, + "status": "Completed", + "url": "http://example.com/file", + "routing_key": "us_east", + "details": { + "worker_count": 3, + "size_mb": 100 + }, + "sub_jobs": [ + { + "id": subjob_id, + "status": "Completed", + "worker_data": [ + { + "ping": {"avg": 0.025, "min": 0.020, "max": 0.030}, + "head": {"avg": 50.0, "min": 45.0, "max": 55.0}, + "download": { + "download_speed": 500.0, + "time_to_first_byte_ms": 100.0, + "total_bytes": 104857600, + "elapsed_secs": 10.0 + } + } + ] + } + ] + }))) + .mount(&mock) + .await; + + let result = client.get_job(job_id).await; + + assert!(result.is_ok()); + let job = result.unwrap(); + assert_eq!(job.id, job_id); + assert_eq!(job.status, "Completed"); + assert!(BmsClient::is_job_finished(&job.status)); + + let sub_jobs = job.sub_jobs.unwrap(); + assert_eq!(sub_jobs.len(), 1); + assert_eq!(sub_jobs[0].status, "Completed"); + + let worker_data = sub_jobs[0].worker_data.as_ref().unwrap(); + assert_eq!(worker_data.len(), 1); + assert_eq!(worker_data[0].ping.as_ref().unwrap().avg, Some(0.025)); + assert_eq!( + worker_data[0].download.as_ref().unwrap().download_speed, + Some(500.0) + ); +} + +#[tokio::test] +async fn test_get_job_not_found() { + let (mock, client) = create_bms_mock().await; + + let job_id = Uuid::new_v4(); + Mock::given(method("GET")) + .and(path(format!("/jobs/{job_id}"))) + .respond_with(ResponseTemplate::new(404).set_body_json(json!({"error": "Job not found"}))) + .mount(&mock) + .await; + + let result = client.get_job(job_id).await; + + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("404"), "Error should mention 404 status"); +} + +#[tokio::test] +async fn test_get_job_server_error() { + let (mock, client) = create_bms_mock().await; + + let job_id = Uuid::new_v4(); + Mock::given(method("GET")) + .and(path(format!("/jobs/{job_id}"))) + .respond_with(ResponseTemplate::new(500).set_body_string("Internal Server Error")) + .mount(&mock) + .await; + + let result = client.get_job(job_id).await; + + assert!(result.is_err()); +} + +// --- is_job_finished tests --- + +#[tokio::test] +async fn test_is_job_finished_statuses() { + assert!(BmsClient::is_job_finished("Completed")); + assert!(BmsClient::is_job_finished("Failed")); + assert!(BmsClient::is_job_finished("Cancelled")); + assert!(!BmsClient::is_job_finished("Pending")); + assert!(!BmsClient::is_job_finished("Running")); + assert!(!BmsClient::is_job_finished("InProgress")); + assert!(!BmsClient::is_job_finished("unknown")); +} diff --git a/url_finder/tests/integration_tests/bms_result_repo.rs b/url_finder/tests/integration_tests/bms_result_repo.rs new file mode 100644 index 0000000..2f4a2db --- /dev/null +++ b/url_finder/tests/integration_tests/bms_result_repo.rs @@ -0,0 +1,338 @@ +use sqlx::PgPool; +use url_finder::repository::BmsBandwidthResultRepository; +use url_finder::types::ProviderId; +use uuid::Uuid; + +use crate::common::*; + +async fn create_repo(pool: &PgPool) -> BmsBandwidthResultRepository { + BmsBandwidthResultRepository::new(pool.clone()) +} + +fn provider_id(id: &str) -> ProviderId { + ProviderId::new(id).unwrap() +} + +#[tokio::test] +async fn test_insert_pending_creates_record() { + let ctx = TestContext::new().await; + let repo = create_repo(&ctx.dbs.app_pool).await; + + let job_id = Uuid::new_v4(); + let provider = provider_id(TEST_PROVIDER_1_DB); + + let result = repo + .insert_pending(&provider, job_id, TEST_WORKING_URL, "us_east", 3) + .await + .expect("Failed to insert pending result"); + + assert_eq!(result.provider_id, TEST_PROVIDER_1_DB); + assert_eq!(result.bms_job_id, job_id); + assert_eq!(result.url_tested, TEST_WORKING_URL); + assert_eq!(result.routing_key, "us_east"); + assert_eq!(result.worker_count, 3); + assert_eq!(result.status, "Pending"); + assert!(result.ping_avg_ms.is_none()); + assert!(result.head_avg_ms.is_none()); + assert!(result.ttfb_ms.is_none()); + assert!(result.download_speed_mbps.is_none()); + assert!(result.completed_at.is_none()); +} + +#[tokio::test] +async fn test_insert_pending_duplicate_job_id_fails() { + let ctx = TestContext::new().await; + let repo = create_repo(&ctx.dbs.app_pool).await; + + let job_id = Uuid::new_v4(); + let provider = provider_id(TEST_PROVIDER_1_DB); + + // First insert succeeds + repo.insert_pending(&provider, job_id, TEST_WORKING_URL, "us_east", 3) + .await + .expect("First insert should succeed"); + + // Second insert with same job_id should fail (UNIQUE constraint) + let result = repo + .insert_pending(&provider, job_id, TEST_WORKING_URL, "us_east", 3) + .await; + + assert!(result.is_err(), "Duplicate job_id should fail"); +} + +#[tokio::test] +async fn test_get_pending_returns_only_pending() { + let ctx = TestContext::new().await; + let repo = create_repo(&ctx.dbs.app_pool).await; + + let provider = provider_id(TEST_PROVIDER_1_DB); + let job1 = Uuid::new_v4(); + let job2 = Uuid::new_v4(); + + // Insert two pending results + repo.insert_pending(&provider, job1, TEST_WORKING_URL, "us_east", 3) + .await + .unwrap(); + repo.insert_pending(&provider, job2, TEST_WORKING_URL_2, "eu_west", 5) + .await + .unwrap(); + + // Mark one as completed + repo.update_completed( + job1, + "Completed", + Some(10.0), + Some(20.0), + Some(30.0), + Some(100.0), + ) + .await + .unwrap(); + + // get_pending should only return job2 + let pending = repo.get_pending().await.unwrap(); + + assert_eq!(pending.len(), 1); + assert_eq!(pending[0].bms_job_id, job2); + assert_eq!(pending[0].status, "Pending"); +} + +#[tokio::test] +async fn test_update_completed_sets_all_fields() { + let ctx = TestContext::new().await; + let repo = create_repo(&ctx.dbs.app_pool).await; + + let provider = provider_id(TEST_PROVIDER_1_DB); + let job_id = Uuid::new_v4(); + + repo.insert_pending(&provider, job_id, TEST_WORKING_URL, "us_east", 3) + .await + .unwrap(); + + repo.update_completed( + job_id, + "Completed", + Some(15.5), + Some(25.0), + Some(50.0), + Some(500.0), + ) + .await + .unwrap(); + + // Verify by fetching latest + let result = repo + .get_latest_for_provider(&provider) + .await + .unwrap() + .expect("Should have result"); + + assert_eq!(result.status, "Completed"); + assert!(result.completed_at.is_some()); + + // Compare BigDecimal values as f64 (approximate) + let ping = result + .ping_avg_ms + .map(|v| v.to_string().parse::().unwrap()); + let head = result + .head_avg_ms + .map(|v| v.to_string().parse::().unwrap()); + let ttfb = result + .ttfb_ms + .map(|v| v.to_string().parse::().unwrap()); + let speed = result + .download_speed_mbps + .map(|v| v.to_string().parse::().unwrap()); + + assert_eq!(ping, Some(15.5)); + assert_eq!(head, Some(25.0)); + assert_eq!(ttfb, Some(50.0)); + assert_eq!(speed, Some(500.0)); +} + +#[tokio::test] +async fn test_update_completed_not_found_fails() { + let ctx = TestContext::new().await; + let repo = create_repo(&ctx.dbs.app_pool).await; + + let nonexistent_job = Uuid::new_v4(); + + let result = repo + .update_completed( + nonexistent_job, + "Completed", + Some(10.0), + Some(20.0), + Some(30.0), + Some(100.0), + ) + .await; + + assert!(result.is_err(), "Update for non-existent job should fail"); +} + +#[tokio::test] +async fn test_get_latest_completed_excludes_pending() { + let ctx = TestContext::new().await; + let repo = create_repo(&ctx.dbs.app_pool).await; + + let provider = provider_id(TEST_PROVIDER_1_DB); + + // Insert a completed result first + let job1 = Uuid::new_v4(); + repo.insert_pending(&provider, job1, TEST_WORKING_URL, "us_east", 3) + .await + .unwrap(); + repo.update_completed( + job1, + "Completed", + Some(10.0), + Some(20.0), + Some(30.0), + Some(100.0), + ) + .await + .unwrap(); + + // Insert a pending result (more recent) + let job2 = Uuid::new_v4(); + repo.insert_pending(&provider, job2, TEST_WORKING_URL_2, "eu_west", 5) + .await + .unwrap(); + + // get_latest_completed should return job1, not job2 + let result = repo + .get_latest_completed_for_provider(&provider) + .await + .unwrap() + .expect("Should have completed result"); + + assert_eq!(result.bms_job_id, job1); + assert_eq!(result.status, "Completed"); +} + +#[tokio::test] +async fn test_get_latest_completed_returns_none_when_only_pending() { + let ctx = TestContext::new().await; + let repo = create_repo(&ctx.dbs.app_pool).await; + + let provider = provider_id(TEST_PROVIDER_1_DB); + let job_id = Uuid::new_v4(); + + // Insert only pending + repo.insert_pending(&provider, job_id, TEST_WORKING_URL, "us_east", 3) + .await + .unwrap(); + + let result = repo + .get_latest_completed_for_provider(&provider) + .await + .unwrap(); + + assert!(result.is_none()); +} + +#[tokio::test] +async fn test_get_latest_completed_for_providers_batch() { + let ctx = TestContext::new().await; + let repo = create_repo(&ctx.dbs.app_pool).await; + + let provider1 = provider_id(TEST_PROVIDER_1_DB); + let provider2 = provider_id(TEST_PROVIDER_2_DB); + + // Provider 1: completed result + let job1 = Uuid::new_v4(); + repo.insert_pending(&provider1, job1, TEST_WORKING_URL, "us_east", 3) + .await + .unwrap(); + repo.update_completed( + job1, + "Completed", + Some(10.0), + Some(20.0), + Some(30.0), + Some(100.0), + ) + .await + .unwrap(); + + // Provider 2: completed result + let job2 = Uuid::new_v4(); + repo.insert_pending(&provider2, job2, TEST_WORKING_URL_2, "eu_west", 5) + .await + .unwrap(); + repo.update_completed( + job2, + "Completed", + Some(15.0), + Some(25.0), + Some(35.0), + Some(200.0), + ) + .await + .unwrap(); + + // Batch query + let ids = vec![ + TEST_PROVIDER_1_DB.to_string(), + TEST_PROVIDER_2_DB.to_string(), + ]; + let results = repo.get_latest_completed_for_providers(&ids).await.unwrap(); + + assert_eq!(results.len(), 2); + + let r1 = results.iter().find(|r| r.provider_id == TEST_PROVIDER_1_DB); + let r2 = results.iter().find(|r| r.provider_id == TEST_PROVIDER_2_DB); + + assert!(r1.is_some()); + assert!(r2.is_some()); + assert_eq!(r1.unwrap().bms_job_id, job1); + assert_eq!(r2.unwrap().bms_job_id, job2); +} + +#[tokio::test] +async fn test_get_latest_completed_for_providers_empty_input() { + let ctx = TestContext::new().await; + let repo = create_repo(&ctx.dbs.app_pool).await; + + let results = repo.get_latest_completed_for_providers(&[]).await.unwrap(); + + assert!(results.is_empty()); +} + +#[tokio::test] +async fn test_get_history_for_provider() { + let ctx = TestContext::new().await; + let repo = create_repo(&ctx.dbs.app_pool).await; + + let provider = provider_id(TEST_PROVIDER_1_DB); + + // Insert 3 results with small delays to ensure unique timestamps + for i in 0..3 { + let job_id = Uuid::new_v4(); + repo.insert_pending(&provider, job_id, TEST_WORKING_URL, "us_east", 3) + .await + .unwrap(); + repo.update_completed(job_id, "Completed", Some(10.0 + i as f64), None, None, None) + .await + .unwrap(); + // Sleep between iterations to ensure unique created_at timestamps + // so the ordering assertion is meaningfully validated + if i < 2 { + tokio::time::sleep(tokio::time::Duration::from_millis(2)).await; + } + } + + // Get history with limit 2 + let history = repo.get_history_for_provider(&provider, 2).await.unwrap(); + + assert_eq!(history.len(), 2); + // Verify ordered by created_at DESC (most recent first) + // Use strict inequality to confirm timestamps are actually different + assert!( + history[0].created_at > history[1].created_at, + "History should be ordered by created_at DESC with distinct timestamps: first={:?}, second={:?}", + history[0].created_at, + history[1].created_at + ); +} diff --git a/url_finder/tests/integration_tests/clients_providers.rs b/url_finder/tests/integration_tests/clients_providers.rs new file mode 100644 index 0000000..6f48b1c --- /dev/null +++ b/url_finder/tests/integration_tests/clients_providers.rs @@ -0,0 +1,71 @@ +use assert_json_diff::assert_json_include; +use axum::http::StatusCode; +use serde_json::json; + +use crate::common::*; + +#[tokio::test] +async fn test_get_client_providers_success() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + Some(TEST_CLIENT_ID_DB), + Some(TEST_WORKING_URL), + 90.0, + "Success", + ) + .await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_2_DB).await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_2_DB, + Some(TEST_CLIENT_ID_DB), + Some(TEST_WORKING_URL_2), + 85.0, + "Success", + ) + .await; + + let response = ctx + .app + .get(&format!("/clients/{}/providers", TEST_CLIENT_ID_API)) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_json_include!( + actual: body, + expected: json!({ + "client_id": TEST_CLIENT_ID_API, + "total": 2 + }) + ); + assert_eq!(body["providers"].as_array().unwrap().len(), 2); +} + +#[tokio::test] +async fn test_get_client_providers_not_found() { + let ctx = TestContext::new().await; + + let response = ctx.app.get("/clients/f099999999/providers").await; + + assert_eq!(response.status_code(), StatusCode::NOT_FOUND); + let body: serde_json::Value = response.json(); + assert_json_include!(actual: body, expected: json!({"error_code": "NOT_FOUND"})); +} + +#[tokio::test] +async fn test_get_client_providers_invalid_address() { + let ctx = TestContext::new().await; + + let response = ctx.app.get("/clients/invalid/providers").await; + + assert_eq!(response.status_code(), StatusCode::BAD_REQUEST); + let body: serde_json::Value = response.json(); + assert_json_include!(actual: body, expected: json!({"error_code": "INVALID_ADDRESS"})); +} diff --git a/url_finder/tests/integration_tests/extended_response.rs b/url_finder/tests/integration_tests/extended_response.rs new file mode 100644 index 0000000..c05c304 --- /dev/null +++ b/url_finder/tests/integration_tests/extended_response.rs @@ -0,0 +1,676 @@ +//! Integration tests for extended provider response functionality. +//! +//! Tests verify that: +//! - Standard responses include is_consistent/is_reliable but NOT diagnostics/scheduling +//! - Extended responses (extended=true) include diagnostics and scheduling sections +//! - List and bulk endpoints respect the extended flag + +use assert_json_diff::assert_json_include; +use axum::http::StatusCode; +use chrono::Utc; +use serde_json::json; + +use crate::common::*; + +// ============================================================================= +// GET /providers/{id} tests +// ============================================================================= + +#[tokio::test] +async fn test_get_provider_standard_response_has_quality_metrics() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 85.0, + "Success", + Utc::now(), + Some(true), // is_consistent + Some(true), // is_reliable + ) + .await; + + let response = ctx + .app + .get(&format!("/providers/{TEST_PROVIDER_1_API}")) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + // Standard fields present + assert_json_include!( + actual: body, + expected: json!({ + "provider_id": TEST_PROVIDER_1_API, + "working_url": TEST_WORKING_URL, + "retrievability_percent": 85.0, + "is_consistent": true, + "is_reliable": true + }) + ); + + // Extended fields ABSENT in standard response + assert!( + body.get("diagnostics").is_none(), + "diagnostics should not be present in standard response" + ); + assert!( + body.get("scheduling").is_none(), + "scheduling should not be present in standard response" + ); +} + +#[tokio::test] +async fn test_get_provider_standard_response_with_null_quality_metrics() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 75.0, + "Success", + Utc::now(), + None, // is_consistent not set + None, // is_reliable not set + ) + .await; + + let response = ctx + .app + .get(&format!("/providers/{TEST_PROVIDER_1_API}")) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + // Fields should be absent (null -> omitted via skip_serializing_if) + assert!( + body.get("is_consistent").is_none(), + "is_consistent should not be present when null" + ); + assert!( + body.get("is_reliable").is_none(), + "is_reliable should not be present when null" + ); + + // Extended fields still absent + assert!(body.get("diagnostics").is_none()); + assert!(body.get("scheduling").is_none()); +} + +#[tokio::test] +async fn test_get_provider_extended_response_has_diagnostics() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 90.0, + "Success", + Utc::now(), + Some(true), + Some(true), + ) + .await; + + let response = ctx + .app + .get(&format!("/providers/{TEST_PROVIDER_1_API}?extended=true")) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + // Standard fields still present + assert_json_include!( + actual: body, + expected: json!({ + "provider_id": TEST_PROVIDER_1_API, + "is_consistent": true, + "is_reliable": true + }) + ); + + // Diagnostics present in extended response + let diagnostics = body + .get("diagnostics") + .expect("diagnostics should be present in extended response"); + assert_json_include!( + actual: diagnostics, + expected: json!({ + "result_code": "Success" + }) + ); + + // scheduling is present when there's a storage_provider record + // (it may be null if no SP record exists - depends on service implementation) +} + +#[tokio::test] +async fn test_get_provider_extended_response_has_scheduling() { + let ctx = TestContext::new().await; + + // Seed both provider and url_result + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 80.0, + "Success", + Utc::now(), + Some(true), + Some(false), + ) + .await; + + let response = ctx + .app + .get(&format!("/providers/{TEST_PROVIDER_1_API}?extended=true")) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + // Diagnostics always present in extended mode + assert!( + body.get("diagnostics").is_some(), + "diagnostics should be present in extended response" + ); + + // scheduling should be present when SP record exists + let scheduling = body + .get("scheduling") + .expect("scheduling should be present in extended response"); + + // Verify scheduling structure + assert!( + scheduling.get("url_discovery").is_some(), + "scheduling.url_discovery should exist" + ); + assert!( + scheduling.get("bms_test").is_some(), + "scheduling.bms_test should exist" + ); +} + +#[tokio::test] +async fn test_get_provider_extended_false_explicit() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 85.0, + "Success", + Utc::now(), + Some(true), + Some(true), + ) + .await; + + // Explicitly set extended=false + let response = ctx + .app + .get(&format!("/providers/{TEST_PROVIDER_1_API}?extended=false")) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + // Quality metrics present + assert!(body.get("is_consistent").is_some()); + assert!(body.get("is_reliable").is_some()); + + // Extended fields absent + assert!(body.get("diagnostics").is_none()); + assert!(body.get("scheduling").is_none()); +} + +// ============================================================================= +// GET /providers (list) tests +// ============================================================================= + +#[tokio::test] +async fn test_list_providers_standard_response() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 80.0, + "Success", + Utc::now(), + Some(true), + Some(false), + ) + .await; + + let response = ctx.app.get("/providers").await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + let providers = body["providers"].as_array().expect("providers array"); + assert_eq!(providers.len(), 1); + + let first = &providers[0]; + + // Quality metrics present + assert!( + first.get("is_consistent").is_some(), + "is_consistent should be present in list response" + ); + + // Extended fields absent + assert!( + first.get("diagnostics").is_none(), + "diagnostics should not be present in standard list response" + ); + assert!( + first.get("scheduling").is_none(), + "scheduling should not be present in standard list response" + ); +} + +#[tokio::test] +async fn test_list_providers_extended_response() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 85.0, + "Success", + Utc::now(), + Some(true), + Some(true), + ) + .await; + + let response = ctx.app.get("/providers?extended=true").await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + let providers = body["providers"].as_array().expect("providers array"); + assert_eq!(providers.len(), 1); + + let first = &providers[0]; + + // Quality metrics present + assert_json_include!( + actual: first, + expected: json!({ + "is_consistent": true, + "is_reliable": true + }) + ); + + // Diagnostics present in extended mode + assert!( + first.get("diagnostics").is_some(), + "diagnostics should be present in extended list response" + ); + + // NOTE: scheduling is intentionally NOT included in list responses + // to avoid N+1 queries. This is by design. + assert!( + first.get("scheduling").is_none(), + "scheduling should NOT be present in list response (N+1 avoidance)" + ); +} + +#[tokio::test] +async fn test_list_providers_extended_multiple() { + let ctx = TestContext::new().await; + + // Provider 1: consistent and reliable + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 90.0, + "Success", + Utc::now(), + Some(true), + Some(true), + ) + .await; + + // Provider 2: not consistent, not reliable + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_2_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_2_DB, + None, + Some(TEST_WORKING_URL_2), + 50.0, + "Success", + Utc::now(), + Some(false), + Some(false), + ) + .await; + + let response = ctx.app.get("/providers?extended=true&limit=10").await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + let providers = body["providers"].as_array().expect("providers array"); + assert_eq!(providers.len(), 2); + + // All providers should have diagnostics + for provider in providers { + assert!( + provider.get("diagnostics").is_some(), + "each provider should have diagnostics in extended mode" + ); + } +} + +// ============================================================================= +// POST /providers/bulk tests +// ============================================================================= + +#[tokio::test] +async fn test_bulk_providers_standard_response() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 85.0, + "Success", + Utc::now(), + Some(true), + Some(true), + ) + .await; + + let response = ctx + .app + .post("/providers/bulk") + .json(&json!({ + "provider_ids": [TEST_PROVIDER_1_API] + })) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + let providers = body["providers"].as_array().expect("providers array"); + assert_eq!(providers.len(), 1); + + let first = &providers[0]; + + // Quality metrics present + assert!(first.get("is_consistent").is_some()); + assert!(first.get("is_reliable").is_some()); + + // Extended fields absent + assert!( + first.get("diagnostics").is_none(), + "diagnostics should not be present in standard bulk response" + ); +} + +#[tokio::test] +async fn test_bulk_providers_extended_response() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 90.0, + "Success", + Utc::now(), + Some(true), + Some(false), + ) + .await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_2_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_2_DB, + None, + Some(TEST_WORKING_URL_2), + 75.0, + "Success", + Utc::now(), + Some(false), + Some(true), + ) + .await; + + let response = ctx + .app + .post("/providers/bulk?extended=true") + .json(&json!({ + "provider_ids": [TEST_PROVIDER_1_API, TEST_PROVIDER_2_API] + })) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + let providers = body["providers"].as_array().expect("providers array"); + assert_eq!(providers.len(), 2); + + // All providers should have diagnostics in extended mode + for provider in providers { + assert!( + provider.get("diagnostics").is_some(), + "diagnostics should be present in extended bulk response" + ); + + let diagnostics = provider.get("diagnostics").unwrap(); + assert!( + diagnostics.get("result_code").is_some(), + "diagnostics should have result_code" + ); + } +} + +#[tokio::test] +async fn test_bulk_providers_extended_with_not_found() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 80.0, + "Success", + Utc::now(), + Some(true), + Some(true), + ) + .await; + + let response = ctx + .app + .post("/providers/bulk?extended=true") + .json(&json!({ + "provider_ids": [TEST_PROVIDER_1_API, "f099999999"] + })) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + // One found with diagnostics + let providers = body["providers"].as_array().expect("providers array"); + assert_eq!(providers.len(), 1); + assert!(providers[0].get("diagnostics").is_some()); + + // One not found + let not_found = body["not_found"].as_array().expect("not_found array"); + assert_eq!(not_found.len(), 1); + assert_eq!(not_found[0].as_str().unwrap(), "f099999999"); +} + +// ============================================================================= +// BMS extended fields tests +// ============================================================================= + +#[tokio::test] +async fn test_get_provider_bms_extended_fields() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 85.0, + "Success", + Utc::now(), + Some(true), + Some(true), + ) + .await; + + // Seed BMS result with extended fields + seed_bms_bandwidth_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + TEST_WORKING_URL, + "completed", + Some(10.0), // ping_avg_ms + Some(25.0), // head_avg_ms + Some(50.0), // ttfb_ms + Some(100.0), // download_speed_mbps + ) + .await; + + // Standard response - BMS extended fields should be absent + let response = ctx + .app + .get(&format!("/providers/{TEST_PROVIDER_1_API}")) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + let bandwidth = &body["performance"]["bandwidth"]; + assert!(bandwidth["status"].is_string()); + assert!(bandwidth["ping_avg_ms"].is_number()); + + // Extended BMS fields should be absent in standard response + assert!( + bandwidth.get("worker_count").is_none() || bandwidth["worker_count"].is_null(), + "worker_count should not be in standard response" + ); + assert!( + bandwidth.get("routing_key").is_none() || bandwidth["routing_key"].is_null(), + "routing_key should not be in standard response" + ); + assert!( + bandwidth.get("url_tested").is_none() || bandwidth["url_tested"].is_null(), + "url_tested should not be in standard response" + ); + + // Extended response - BMS extended fields should be present + let response = ctx + .app + .get(&format!("/providers/{TEST_PROVIDER_1_API}?extended=true")) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + let bandwidth = &body["performance"]["bandwidth"]; + + // Extended BMS fields should be present + assert!( + bandwidth.get("worker_count").is_some() && !bandwidth["worker_count"].is_null(), + "worker_count should be present in extended response" + ); + assert!( + bandwidth.get("routing_key").is_some() && !bandwidth["routing_key"].is_null(), + "routing_key should be present in extended response" + ); + assert!( + bandwidth.get("url_tested").is_some() && !bandwidth["url_tested"].is_null(), + "url_tested should be present in extended response" + ); +} + +// ============================================================================= +// Error code in diagnostics tests +// ============================================================================= + +#[tokio::test] +async fn test_get_provider_extended_with_error_code() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + // Seed with a failure result code + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + None, // No working URL + 0.0, + "NoDealsFound", + Utc::now(), + None, + None, + ) + .await; + + let response = ctx + .app + .get(&format!("/providers/{TEST_PROVIDER_1_API}?extended=true")) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + // Diagnostics present + let diagnostics = body + .get("diagnostics") + .expect("diagnostics should be present"); + + assert_json_include!( + actual: diagnostics, + expected: json!({ + "result_code": "NoDealsFound" + }) + ); +} diff --git a/url_finder/tests/integration_tests/history_retrievability.rs b/url_finder/tests/integration_tests/history_retrievability.rs new file mode 100644 index 0000000..3e1e8cd --- /dev/null +++ b/url_finder/tests/integration_tests/history_retrievability.rs @@ -0,0 +1,292 @@ +use assert_json_diff::assert_json_include; +use axum::http::StatusCode; +use chrono::{Duration, Utc}; +use serde_json::json; + +use crate::common::*; + +#[tokio::test] +async fn test_history_returns_latest_per_day() { + let ctx = TestContext::new().await; + let now = Utc::now(); + let yesterday = now - Duration::days(1); + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + + // Two results on same day - should return only latest (85.5) + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 75.0, + "Success", + yesterday - Duration::hours(2), + Some(true), + Some(true), + ) + .await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 85.5, + "Success", + yesterday, + Some(true), + Some(true), + ) + .await; + + let response = ctx + .app + .get(&format!( + "/providers/{TEST_PROVIDER_1_API}/history/retrievability" + )) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_eq!(body["data"].as_array().unwrap().len(), 1); + assert_eq!(body["data"][0]["retrievability_percent"], 85.5); +} + +#[tokio::test] +async fn test_history_date_range_filtering() { + let ctx = TestContext::new().await; + let now = Utc::now(); + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + + // Result 5 days ago + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 80.0, + "Success", + now - Duration::days(5), + Some(true), + Some(true), + ) + .await; + + // Result 2 days ago + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 90.0, + "Success", + now - Duration::days(2), + Some(true), + Some(true), + ) + .await; + + let from = (now - Duration::days(3)).format("%Y-%m-%d"); + let to = (now - Duration::days(1)).format("%Y-%m-%d"); + + let response = ctx + .app + .get(&format!( + "/providers/{TEST_PROVIDER_1_API}/history/retrievability?from={from}&to={to}" + )) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + // Should only include the 2-day-ago result + assert_eq!(body["data"].as_array().unwrap().len(), 1); + assert_eq!(body["data"][0]["retrievability_percent"], 90.0); +} + +#[tokio::test] +async fn test_history_extended_fields() { + let ctx = TestContext::new().await; + let now = Utc::now(); + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 85.5, + "Success", + now - Duration::days(1), + Some(true), + Some(false), + ) + .await; + + let response = ctx + .app + .get(&format!( + "/providers/{TEST_PROVIDER_1_API}/history/retrievability?extended=true" + )) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_json_include!( + actual: &body["data"][0], + expected: json!({ + "retrievability_percent": 85.5, + "is_consistent": true, + "is_reliable": false, + "working_url": TEST_WORKING_URL, + "result_code": "Success" + }) + ); + assert!(body["data"][0]["tested_at"].is_string()); +} + +#[tokio::test] +async fn test_history_minimal_omits_extended() { + let ctx = TestContext::new().await; + let now = Utc::now(); + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 85.5, + "Success", + now - Duration::days(1), + Some(true), + Some(true), + ) + .await; + + let response = ctx + .app + .get(&format!( + "/providers/{TEST_PROVIDER_1_API}/history/retrievability" + )) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + // Minimal should NOT have extended fields + assert!(body["data"][0].get("is_consistent").is_none()); + assert!(body["data"][0].get("working_url").is_none()); + assert!(body["data"][0].get("tested_at").is_none()); + + // Should have minimal fields + assert!(body["data"][0]["date"].is_string()); + assert_eq!(body["data"][0]["retrievability_percent"], 85.5); +} + +#[tokio::test] +async fn test_history_empty_range_returns_empty_array() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + // No url_results seeded + + let response = ctx + .app + .get(&format!( + "/providers/{TEST_PROVIDER_1_API}/history/retrievability" + )) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_eq!(body["data"].as_array().unwrap().len(), 0); +} + +#[tokio::test] +async fn test_history_invalid_date_range() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + + let response = ctx + .app + .get(&format!( + "/providers/{TEST_PROVIDER_1_API}/history/retrievability?from=2026-01-10&to=2026-01-01" + )) + .await; + + assert_eq!(response.status_code(), StatusCode::BAD_REQUEST); + let body: serde_json::Value = response.json(); + assert_json_include!(actual: body, expected: json!({"error_code": "INVALID_DATE_RANGE"})); +} + +#[tokio::test] +async fn test_history_exceeds_max_days() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + + let response = ctx + .app + .get(&format!( + "/providers/{TEST_PROVIDER_1_API}/history/retrievability?from=2025-01-01&to=2026-01-09" + )) + .await; + + assert_eq!(response.status_code(), StatusCode::BAD_REQUEST); + let body: serde_json::Value = response.json(); + assert_json_include!(actual: body, expected: json!({"error_code": "DATE_RANGE_EXCEEDED"})); +} + +#[tokio::test] +async fn test_history_invalid_provider() { + let ctx = TestContext::new().await; + + let response = ctx + .app + .get("/providers/invalid/history/retrievability") + .await; + + assert_eq!(response.status_code(), StatusCode::BAD_REQUEST); + let body: serde_json::Value = response.json(); + assert_json_include!(actual: body, expected: json!({"error_code": "INVALID_ADDRESS"})); +} + +#[tokio::test] +async fn test_history_provider_client() { + let ctx = TestContext::new().await; + let now = Utc::now(); + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result_at( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + Some(TEST_CLIENT_ID_DB), + Some(TEST_WORKING_URL), + 92.0, + "Success", + now - Duration::days(1), + Some(true), + Some(true), + ) + .await; + + let response = ctx + .app + .get(&format!( + "/providers/{TEST_PROVIDER_1_API}/clients/{TEST_CLIENT_ID_API}/history/retrievability" + )) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_eq!(body["provider_id"], TEST_PROVIDER_1_API); + assert_eq!(body["client_id"], TEST_CLIENT_ID_API); + assert_eq!(body["data"][0]["retrievability_percent"], 92.0); +} diff --git a/url_finder/tests/integration_tests/mod.rs b/url_finder/tests/integration_tests/mod.rs index 8afa14c..9f62114 100644 --- a/url_finder/tests/integration_tests/mod.rs +++ b/url_finder/tests/integration_tests/mod.rs @@ -1,7 +1,18 @@ +pub mod bms_client; +pub mod bms_result_repo; +pub mod clients_providers; +pub mod extended_response; pub mod find_client; pub mod find_retri_sp; pub mod find_retri_sp_client; pub mod find_url_sp; pub mod find_url_sp_client; +pub mod history_retrievability; +pub mod providers_bulk; +pub mod providers_client; +pub mod providers_get; +pub mod providers_list; +pub mod providers_reset; pub mod rate_limiting; pub mod url_discovery_service; +pub mod url_validation; diff --git a/url_finder/tests/integration_tests/providers_bulk.rs b/url_finder/tests/integration_tests/providers_bulk.rs new file mode 100644 index 0000000..0408eb9 --- /dev/null +++ b/url_finder/tests/integration_tests/providers_bulk.rs @@ -0,0 +1,92 @@ +use assert_json_diff::assert_json_include; +use axum::http::StatusCode; +use serde_json::json; + +use crate::common::*; + +#[tokio::test] +async fn test_bulk_providers_mixed() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 90.0, + "Success", + ) + .await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_2_DB).await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_2_DB, + None, + Some(TEST_WORKING_URL_2), + 85.0, + "Success", + ) + .await; + + let response = ctx + .app + .post("/providers/bulk") + .json(&json!({ + "provider_ids": [TEST_PROVIDER_1_API, TEST_PROVIDER_2_API, "f099999999"] + })) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_eq!(body["providers"].as_array().unwrap().len(), 2); + assert_json_include!( + actual: body, + expected: json!({ + "not_found": ["f099999999"] + }) + ); +} + +#[tokio::test] +async fn test_bulk_providers_all_not_found() { + let ctx = TestContext::new().await; + + let response = ctx + .app + .post("/providers/bulk") + .json(&json!({ + "provider_ids": ["f099999991", "f099999992"] + })) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_json_include!( + actual: body, + expected: json!({ + "providers": [] + }) + ); + assert_eq!(body["not_found"].as_array().unwrap().len(), 2); +} + +#[tokio::test] +async fn test_bulk_providers_exceeds_limit() { + let ctx = TestContext::new().await; + + let provider_ids: Vec = (0..101).map(|i| format!("f0{i}")).collect(); + + let response = ctx + .app + .post("/providers/bulk") + .json(&json!({ "provider_ids": provider_ids })) + .await; + + assert_eq!(response.status_code(), StatusCode::BAD_REQUEST); + let body: serde_json::Value = response.json(); + assert!(body["error"].as_str().unwrap().contains("exceeds maximum")); +} diff --git a/url_finder/tests/integration_tests/providers_client.rs b/url_finder/tests/integration_tests/providers_client.rs new file mode 100644 index 0000000..5badc98 --- /dev/null +++ b/url_finder/tests/integration_tests/providers_client.rs @@ -0,0 +1,102 @@ +use assert_json_diff::assert_json_include; +use axum::http::StatusCode; +use serde_json::json; + +use crate::common::*; + +#[tokio::test] +async fn test_get_provider_client_success() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + Some(TEST_CLIENT_ID_DB), + Some(TEST_WORKING_URL), + 92.5, + "Success", + ) + .await; + + let response = ctx + .app + .get(&format!( + "/providers/{}/clients/{}", + TEST_PROVIDER_1_API, TEST_CLIENT_ID_API + )) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_json_include!( + actual: body, + expected: json!({ + "provider_id": TEST_PROVIDER_1_API, + "client_id": TEST_CLIENT_ID_API, + "working_url": TEST_WORKING_URL, + "retrievability_percent": 92.5 + }) + ); +} + +#[tokio::test] +async fn test_get_provider_client_not_found() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 80.0, + "Success", + ) + .await; + + let response = ctx + .app + .get(&format!( + "/providers/{}/clients/{}", + TEST_PROVIDER_1_API, TEST_CLIENT_ID_API + )) + .await; + + assert_eq!(response.status_code(), StatusCode::NOT_FOUND); +} + +#[tokio::test] +async fn test_get_provider_client_invalid_provider() { + let ctx = TestContext::new().await; + + let response = ctx + .app + .get(&format!( + "/providers/invalid/clients/{}", + TEST_CLIENT_ID_API + )) + .await; + + assert_eq!(response.status_code(), StatusCode::BAD_REQUEST); + let body: serde_json::Value = response.json(); + assert_json_include!(actual: body, expected: json!({"error_code": "INVALID_ADDRESS"})); +} + +#[tokio::test] +async fn test_get_provider_client_invalid_client() { + let ctx = TestContext::new().await; + + let response = ctx + .app + .get(&format!( + "/providers/{}/clients/invalid", + TEST_PROVIDER_1_API + )) + .await; + + assert_eq!(response.status_code(), StatusCode::BAD_REQUEST); + let body: serde_json::Value = response.json(); + assert_json_include!(actual: body, expected: json!({"error_code": "INVALID_ADDRESS"})); +} diff --git a/url_finder/tests/integration_tests/providers_get.rs b/url_finder/tests/integration_tests/providers_get.rs new file mode 100644 index 0000000..7f0aead --- /dev/null +++ b/url_finder/tests/integration_tests/providers_get.rs @@ -0,0 +1,115 @@ +use assert_json_diff::assert_json_include; +use axum::http::StatusCode; +use serde_json::json; + +use crate::common::*; + +#[tokio::test] +async fn test_get_provider_success() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some(TEST_WORKING_URL), + 85.5, + "Success", + ) + .await; + seed_bms_bandwidth_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + TEST_WORKING_URL, + "completed", + Some(10.5), + Some(25.0), + Some(50.0), + Some(100.0), + ) + .await; + + let response = ctx + .app + .get(&format!("/providers/{TEST_PROVIDER_1_API}")) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_json_include!( + actual: body, + expected: json!({ + "provider_id": TEST_PROVIDER_1_API, + "working_url": TEST_WORKING_URL, + "retrievability_percent": 85.5, + "performance": { + "bandwidth": { + "status": "completed", + "ping_avg_ms": 10.5, + "head_avg_ms": 25.0, + "ttfb_ms": 50.0, + "download_speed_mbps": 100.0 + } + } + }) + ); + assert!(body["tested_at"].is_string()); +} + +#[tokio::test] +async fn test_get_provider_without_bms() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_2_DB).await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_2_DB, + None, + Some(TEST_WORKING_URL), + 75.0, + "Success", + ) + .await; + + let response = ctx + .app + .get(&format!("/providers/{TEST_PROVIDER_2_API}")) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_json_include!( + actual: body, + expected: json!({ + "provider_id": TEST_PROVIDER_2_API, + "working_url": TEST_WORKING_URL, + "retrievability_percent": 75.0 + }) + ); + assert!(body["performance"]["bandwidth"].is_null()); +} + +#[tokio::test] +async fn test_get_provider_invalid_address() { + let ctx = TestContext::new().await; + + let response = ctx.app.get("/providers/invalid").await; + + assert_eq!(response.status_code(), StatusCode::BAD_REQUEST); + let body: serde_json::Value = response.json(); + assert_json_include!(actual: body, expected: json!({"error_code": "INVALID_ADDRESS"})); +} + +#[tokio::test] +async fn test_get_provider_not_found() { + let ctx = TestContext::new().await; + + let response = ctx.app.get("/providers/f099999999").await; + + assert_eq!(response.status_code(), StatusCode::NOT_FOUND); + let body: serde_json::Value = response.json(); + assert_json_include!(actual: body, expected: json!({"error_code": "NOT_FOUND"})); +} diff --git a/url_finder/tests/integration_tests/providers_list.rs b/url_finder/tests/integration_tests/providers_list.rs new file mode 100644 index 0000000..0dc3b43 --- /dev/null +++ b/url_finder/tests/integration_tests/providers_list.rs @@ -0,0 +1,287 @@ +use assert_json_diff::assert_json_include; +use axum::http::StatusCode; +use serde_json::json; + +use crate::common::*; + +#[tokio::test] +async fn test_list_providers_success() { + let ctx = TestContext::new().await; + + for (db_id, url) in [ + (TEST_PROVIDER_1_DB, TEST_WORKING_URL), + (TEST_PROVIDER_2_DB, TEST_WORKING_URL_2), + (TEST_PROVIDER_3_DB, TEST_WORKING_URL), + ] { + seed_provider(&ctx.dbs.app_pool, db_id).await; + seed_url_result(&ctx.dbs.app_pool, db_id, None, Some(url), 80.0, "Success").await; + } + + let response = ctx.app.get("/providers?limit=10&offset=0").await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_json_include!( + actual: body, + expected: json!({ + "total": 3, + "limit": 10, + "offset": 0 + }) + ); + assert_eq!(body["providers"].as_array().unwrap().len(), 3); +} + +#[tokio::test] +async fn test_list_providers_pagination() { + let ctx = TestContext::new().await; + + for db_id in [TEST_PROVIDER_1_DB, TEST_PROVIDER_2_DB, TEST_PROVIDER_3_DB] { + seed_provider(&ctx.dbs.app_pool, db_id).await; + seed_url_result( + &ctx.dbs.app_pool, + db_id, + None, + Some(TEST_WORKING_URL), + 80.0, + "Success", + ) + .await; + } + + let response = ctx.app.get("/providers?limit=2&offset=1").await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_json_include!( + actual: body, + expected: json!({ + "total": 3, + "limit": 2, + "offset": 1 + }) + ); + assert_eq!(body["providers"].as_array().unwrap().len(), 2); +} + +#[tokio::test] +async fn test_list_providers_empty() { + let ctx = TestContext::new().await; + + let response = ctx.app.get("/providers").await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_json_include!( + actual: body, + expected: json!({ + "total": 0, + "providers": [] + }) + ); +} + +#[tokio::test] +async fn test_list_providers_filter_has_working_url_true() { + let ctx = TestContext::new().await; + + // Provider 1: has working URL, consistent + seed_provider_with_url_status( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + Some("http://example.com/piece/123"), + true, + ) + .await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some("http://example.com/piece/123"), + 80.0, + "Success", + ) + .await; + + // Provider 2: no working URL + seed_provider_with_url_status(&ctx.dbs.app_pool, TEST_PROVIDER_2_DB, None, true).await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_2_DB, + None, + None, + 0.0, + "NoDealsFound", + ) + .await; + + // Filter for providers WITH working URL + let response = ctx.app.get("/providers?has_working_url=true").await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_eq!(body["total"].as_i64().unwrap(), 1); + assert_eq!(body["providers"].as_array().unwrap().len(), 1); +} + +#[tokio::test] +async fn test_list_providers_filter_has_working_url_false() { + let ctx = TestContext::new().await; + + // Provider 1: has working URL + seed_provider_with_url_status( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + Some("http://example.com/piece/123"), + true, + ) + .await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some("http://example.com/piece/123"), + 80.0, + "Success", + ) + .await; + + // Provider 2: no working URL + seed_provider_with_url_status(&ctx.dbs.app_pool, TEST_PROVIDER_2_DB, None, true).await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_2_DB, + None, + None, + 0.0, + "NoDealsFound", + ) + .await; + + // Filter for providers WITHOUT working URL + let response = ctx.app.get("/providers?has_working_url=false").await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_eq!(body["total"].as_i64().unwrap(), 1); + assert_eq!(body["providers"].as_array().unwrap().len(), 1); +} + +#[tokio::test] +async fn test_list_providers_filter_is_consistent() { + let ctx = TestContext::new().await; + + // Provider 1: has working URL, consistent + seed_provider_with_url_status( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + Some("http://example.com/piece/123"), + true, + ) + .await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some("http://example.com/piece/123"), + 80.0, + "Success", + ) + .await; + + // Provider 2: has working URL, NOT consistent + seed_provider_with_url_status( + &ctx.dbs.app_pool, + TEST_PROVIDER_2_DB, + Some("http://example.com/piece/456"), + false, + ) + .await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_2_DB, + None, + Some("http://example.com/piece/456"), + 60.0, + "Success", + ) + .await; + + // Filter for consistent providers only + let response = ctx.app.get("/providers?is_consistent=true").await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_eq!(body["total"].as_i64().unwrap(), 1); +} + +#[tokio::test] +async fn test_list_providers_filter_combined() { + let ctx = TestContext::new().await; + + // Provider 1: has URL, consistent (BMS ready) + seed_provider_with_url_status( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + Some("http://example.com/piece/123"), + true, + ) + .await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_1_DB, + None, + Some("http://example.com/piece/123"), + 80.0, + "Success", + ) + .await; + + // Provider 2: has URL, NOT consistent + seed_provider_with_url_status( + &ctx.dbs.app_pool, + TEST_PROVIDER_2_DB, + Some("http://example.com/piece/456"), + false, + ) + .await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_2_DB, + None, + Some("http://example.com/piece/456"), + 60.0, + "Success", + ) + .await; + + // Provider 3: no URL + seed_provider_with_url_status(&ctx.dbs.app_pool, TEST_PROVIDER_3_DB, None, true).await; + seed_url_result( + &ctx.dbs.app_pool, + TEST_PROVIDER_3_DB, + None, + None, + 0.0, + "NoDealsFound", + ) + .await; + + // Filter for BMS-ready providers (has_working_url=true AND is_consistent=true) + let response = ctx + .app + .get("/providers?has_working_url=true&is_consistent=true") + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + let body: serde_json::Value = response.json(); + + assert_eq!(body["total"].as_i64().unwrap(), 1); + assert_eq!(body["providers"].as_array().unwrap().len(), 1); +} diff --git a/url_finder/tests/integration_tests/providers_reset.rs b/url_finder/tests/integration_tests/providers_reset.rs new file mode 100644 index 0000000..ab3386f --- /dev/null +++ b/url_finder/tests/integration_tests/providers_reset.rs @@ -0,0 +1,188 @@ +use axum::http::StatusCode; +use chrono::{Duration, Utc}; + +use crate::common::*; + +#[tokio::test] +async fn test_reset_url_discovery_schedule() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + + // Set next_url_discovery_at to future (1 day from now) + sqlx::query( + r#"UPDATE storage_providers + SET next_url_discovery_at = NOW() + INTERVAL '1 day' + WHERE provider_id = $1"#, + ) + .bind(TEST_PROVIDER_1_DB) + .execute(&ctx.dbs.app_pool) + .await + .unwrap(); + + let response = ctx + .app + .post(&format!( + "/providers/{TEST_PROVIDER_1_API}/reset?schedule=url_discovery" + )) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + + // Verify schedule was reset to approximately now + let row: (chrono::DateTime,) = sqlx::query_as( + r#"SELECT next_url_discovery_at FROM storage_providers WHERE provider_id = $1"#, + ) + .bind(TEST_PROVIDER_1_DB) + .fetch_one(&ctx.dbs.app_pool) + .await + .unwrap(); + + let diff = Utc::now() - row.0; + assert!( + diff < Duration::seconds(5), + "next_url_discovery_at should be reset to now, diff was {diff:?}" + ); +} + +#[tokio::test] +async fn test_reset_bms_test_schedule() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + + // Set next_bms_test_at to future + sqlx::query( + r#"UPDATE storage_providers + SET next_bms_test_at = NOW() + INTERVAL '7 days' + WHERE provider_id = $1"#, + ) + .bind(TEST_PROVIDER_1_DB) + .execute(&ctx.dbs.app_pool) + .await + .unwrap(); + + let response = ctx + .app + .post(&format!( + "/providers/{TEST_PROVIDER_1_API}/reset?schedule=bms_test" + )) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + + let row: (chrono::DateTime,) = + sqlx::query_as(r#"SELECT next_bms_test_at FROM storage_providers WHERE provider_id = $1"#) + .bind(TEST_PROVIDER_1_DB) + .fetch_one(&ctx.dbs.app_pool) + .await + .unwrap(); + + let diff = Utc::now() - row.0; + assert!( + diff < Duration::seconds(5), + "next_bms_test_at should be reset to now, diff was {diff:?}" + ); +} + +#[tokio::test] +async fn test_reset_all_schedules() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + + // Set both schedules to future + sqlx::query( + r#"UPDATE storage_providers + SET next_url_discovery_at = NOW() + INTERVAL '1 day', + next_bms_test_at = NOW() + INTERVAL '7 days' + WHERE provider_id = $1"#, + ) + .bind(TEST_PROVIDER_1_DB) + .execute(&ctx.dbs.app_pool) + .await + .unwrap(); + + let response = ctx + .app + .post(&format!( + "/providers/{TEST_PROVIDER_1_API}/reset?schedule=all" + )) + .await; + + assert_eq!(response.status_code(), StatusCode::OK); + + let row: (chrono::DateTime, chrono::DateTime) = sqlx::query_as( + r#"SELECT next_url_discovery_at, next_bms_test_at + FROM storage_providers WHERE provider_id = $1"#, + ) + .bind(TEST_PROVIDER_1_DB) + .fetch_one(&ctx.dbs.app_pool) + .await + .unwrap(); + + let diff_url = Utc::now() - row.0; + let diff_bms = Utc::now() - row.1; + assert!( + diff_url < Duration::seconds(5), + "next_url_discovery_at should be reset" + ); + assert!( + diff_bms < Duration::seconds(5), + "next_bms_test_at should be reset" + ); +} + +#[tokio::test] +async fn test_reset_provider_not_found() { + let ctx = TestContext::new().await; + + let response = ctx + .app + .post("/providers/f099999999/reset?schedule=url_discovery") + .await; + + assert_eq!(response.status_code(), StatusCode::NOT_FOUND); +} + +#[tokio::test] +async fn test_reset_invalid_schedule_param() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + + let response = ctx + .app + .post(&format!( + "/providers/{TEST_PROVIDER_1_API}/reset?schedule=invalid" + )) + .await; + + assert_eq!(response.status_code(), StatusCode::BAD_REQUEST); +} + +#[tokio::test] +async fn test_reset_missing_schedule_param() { + let ctx = TestContext::new().await; + + seed_provider(&ctx.dbs.app_pool, TEST_PROVIDER_1_DB).await; + + let response = ctx + .app + .post(&format!("/providers/{TEST_PROVIDER_1_API}/reset")) + .await; + + assert_eq!(response.status_code(), StatusCode::BAD_REQUEST); +} + +#[tokio::test] +async fn test_reset_invalid_provider_address() { + let ctx = TestContext::new().await; + + let response = ctx + .app + .post("/providers/invalid/reset?schedule=url_discovery") + .await; + + assert_eq!(response.status_code(), StatusCode::BAD_REQUEST); +} diff --git a/url_finder/tests/integration_tests/url_validation.rs b/url_finder/tests/integration_tests/url_validation.rs new file mode 100644 index 0000000..7d5dde6 --- /dev/null +++ b/url_finder/tests/integration_tests/url_validation.rs @@ -0,0 +1,117 @@ +use std::time::{Duration, Instant}; +use url_finder::url_tester::MAX_DRAIN_CONTENT_LENGTH; +use wiremock::{ + Mock, MockServer, ResponseTemplate, + matchers::{method, path}, +}; + +/// Tests that connections are properly reused when response bodies are drained. +/// With a constrained pool (1 connection), sequential requests should complete +/// quickly if connections are reused. Without draining, each request would +/// need a new connection (slow) or time out. +#[tokio::test] +async fn test_connection_reuse_with_constrained_pool() { + use reqwest::Client; + + let mock_server = MockServer::start().await; + let small_body = vec![0u8; 500]; // Small response that should be drained + + // Mount a mock that counts requests + Mock::given(method("GET")) + .and(path("/test")) + .respond_with( + ResponseTemplate::new(200) + .insert_header("content-type", "application/octet-stream") + .insert_header("content-length", "500") + .set_body_raw(small_body.clone(), "application/octet-stream"), + ) + .expect(10) // Expect exactly 10 requests + .mount(&mock_server) + .await; + + // Create a client with a very constrained connection pool + let client = Client::builder() + .pool_max_idle_per_host(1) // Only 1 idle connection per host + .pool_idle_timeout(Duration::from_secs(30)) + .build() + .unwrap(); + + let url = format!("{}/test", mock_server.uri()); + let num_requests = 10; + let start = Instant::now(); + + // Make sequential requests - if connections are reused, this should be fast + for i in 0..num_requests { + let resp = client.get(&url).send().await.unwrap(); + assert!( + resp.status().is_success(), + "Request {i} failed: {:?}", + resp.status() + ); + + // Drain the body to allow connection reuse (mimics our fix) + let _ = resp.bytes().await; + } + + let elapsed = start.elapsed(); + + // With connection reuse, 10 requests to localhost should complete in < 1 second + // Without reuse (creating new connections each time), it would be slower + // and with pool_max_idle_per_host=1 + no draining, requests could hang + assert!( + elapsed < Duration::from_secs(5), + "10 sequential requests took {:?}, expected < 5s with connection reuse", + elapsed + ); + + // Verify all requests were received + // (wiremock's expect(10) will panic on drop if not all received) +} + +/// Tests that the drain function doesn't block on large responses. +/// This verifies we don't accidentally read huge file bodies. +#[tokio::test] +async fn test_large_response_does_not_block() { + use reqwest::Client; + + let mock_server = MockServer::start().await; + // Simulate a large file response (100KB) + let large_body = vec![0u8; 100_000]; + + Mock::given(method("GET")) + .and(path("/large")) + .respond_with( + ResponseTemplate::new(200) + .insert_header("content-type", "application/octet-stream") + .set_body_raw(large_body, "application/octet-stream"), + ) + .mount(&mock_server) + .await; + + let client = Client::new(); + let url = format!("{}/large", mock_server.uri()); + + let start = Instant::now(); + + let resp = client.get(&url).send().await.unwrap(); + let content_length = resp.content_length(); + + // The drain logic checks content_length and skips large bodies + // We're verifying the behavior by checking content_length is available + // before we would call drain (as our code does) + assert!(content_length.is_some()); + assert!(content_length.unwrap() > MAX_DRAIN_CONTENT_LENGTH); + + // Just drop the response without reading body + // This simulates what our drain_response_body does for large responses + drop(resp); + + let elapsed = start.elapsed(); + + // Dropping without reading should be nearly instant + assert!( + elapsed < Duration::from_millis(500), + "Dropping large response should be fast, took {:?}", + elapsed + ); +}