@@ -928,6 +928,115 @@ describe('Meta Tags', () => {
928928 } ) ;
929929 expect ( logStub . error ) . to . not . have . been . called ;
930930 } ) ;
931+
932+ it ( 'should skip page when statusCode is 404 (new scrape with HTTP status)' , async ( ) => {
933+ const mockScrapeResult = {
934+ finalUrl : 'http://example.com/missing' ,
935+ statusCode : 404 ,
936+ scrapeResult : {
937+ tags : {
938+ title : 'Page Not Found' ,
939+ description : 'The page you requested was not found' ,
940+ h1 : [ 'Not Found' ] ,
941+ } ,
942+ rawBody : '<html><body><h1>Not Found</h1><p>Content here to exceed 300 chars.</p></body></html>' . repeat ( 5 ) ,
943+ } ,
944+ } ;
945+
946+ s3ClientStub . send . resolves ( {
947+ Body : {
948+ transformToString : ( ) => JSON . stringify ( mockScrapeResult ) ,
949+ } ,
950+ ContentType : 'application/json' ,
951+ } ) ;
952+
953+ const result = await fetchAndProcessPageObject (
954+ s3ClientStub ,
955+ 'test-bucket' ,
956+ 'http://example.com/missing' ,
957+ 'scrapes/site-id/missing/scrape.json' ,
958+ logStub ,
959+ ) ;
960+
961+ expect ( result ) . to . be . null ;
962+ expect ( logStub . info ) . to . have . been . calledWith (
963+ '[metatags] Skipping page with HTTP 404 for http://example.com/missing' ,
964+ ) ;
965+ } ) ;
966+
967+ it ( 'should skip page when statusCode is 4xx (e.g. 403)' , async ( ) => {
968+ const mockScrapeResult = {
969+ finalUrl : 'http://example.com/forbidden' ,
970+ statusCode : 403 ,
971+ scrapeResult : {
972+ tags : {
973+ title : 'Access Denied' ,
974+ description : 'You do not have permission' ,
975+ h1 : [ 'Forbidden' ] ,
976+ } ,
977+ rawBody : 'A' . repeat ( 400 ) ,
978+ } ,
979+ } ;
980+
981+ s3ClientStub . send . resolves ( {
982+ Body : {
983+ transformToString : ( ) => JSON . stringify ( mockScrapeResult ) ,
984+ } ,
985+ ContentType : 'application/json' ,
986+ } ) ;
987+
988+ const result = await fetchAndProcessPageObject (
989+ s3ClientStub ,
990+ 'test-bucket' ,
991+ 'http://example.com/forbidden' ,
992+ 'scrapes/site-id/forbidden/scrape.json' ,
993+ logStub ,
994+ ) ;
995+
996+ expect ( result ) . to . be . null ;
997+ expect ( logStub . info ) . to . have . been . calledWith (
998+ '[metatags] Skipping page with HTTP 403 for http://example.com/forbidden' ,
999+ ) ;
1000+ } ) ;
1001+
1002+ it ( 'should process page when statusCode is 200 (new scrape)' , async ( ) => {
1003+ const mockScrapeResult = {
1004+ finalUrl : 'http://example.com/ok' ,
1005+ statusCode : 200 ,
1006+ scrapeResult : {
1007+ tags : {
1008+ title : 'OK Page' ,
1009+ description : 'A valid page' ,
1010+ h1 : [ 'OK' ] ,
1011+ } ,
1012+ rawBody : 'A' . repeat ( 300 ) ,
1013+ } ,
1014+ } ;
1015+
1016+ s3ClientStub . send . resolves ( {
1017+ Body : {
1018+ transformToString : ( ) => JSON . stringify ( mockScrapeResult ) ,
1019+ } ,
1020+ ContentType : 'application/json' ,
1021+ } ) ;
1022+
1023+ const result = await fetchAndProcessPageObject (
1024+ s3ClientStub ,
1025+ 'test-bucket' ,
1026+ 'http://example.com/ok' ,
1027+ 'scrapes/site-id/ok/scrape.json' ,
1028+ logStub ,
1029+ ) ;
1030+
1031+ expect ( result ) . to . deep . equal ( {
1032+ '/ok' : {
1033+ title : 'OK Page' ,
1034+ description : 'A valid page' ,
1035+ h1 : [ 'OK' ] ,
1036+ s3key : 'scrapes/site-id/ok/scrape.json' ,
1037+ } ,
1038+ } ) ;
1039+ } ) ;
9311040 } ) ;
9321041
9331042 describe ( 'opportunities handler method' , ( ) => {
0 commit comments