Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 73 additions & 83 deletions test/scraping.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,65 +27,87 @@ describe( 'scraping', function () {
}

describe( 'parseAll function', () => {
it( 'should resolve promise from woorank with headers', () => {
const url = 'https://www.woorank.com/en/blog/dublin-core-metadata-for-seo-and-usability';
return meta( { uri: url, headers: { 'User-Agent': userAgent, Accept: acceptHeader } } )
.then( ( result ) => {
assert.ok( result, 'Expected result to be truthy' );
} )
.catch( ( e ) => {
console.error( 'Error in woorank test:', e );
throw e;
} );
} );

it( 'should resolve promise from blog.schema.org without headers', () => {
const url = 'http://blog.schema.org';
return meta( url )
.then( ( result ) => {
assert.ok( result, 'Expected result to be truthy' );
describe( 'Promise style', () => {
it( 'should resolve promise from woorank with headers', () => {
const url = 'https://www.woorank.com/en/blog/dublin-core-metadata-for-seo-and-usability';
return meta( { uri: url, headers: { 'User-Agent': userAgent, Accept: acceptHeader } } )
.then( ( result ) => {
assert.ok( result, 'Expected result to be truthy' );
} )
.catch( ( e ) => {
console.error( 'Error in woorank test:', e );
throw e;
} );
} );

it( 'should resolve promise from blog.schema.org without headers', () => {
const url = 'http://blog.schema.org';
return meta( url )
.then( ( result ) => {
assert.ok( result, 'Expected result to be truthy' );
} )
.catch( ( e ) => {
console.error( 'Error in blog.schema.org test:', e );
throw e;
} );
} );

it( 'should throw error if no uri supplied', () => meta()
.then( () => {
assert.fail( 'Should have rejected the promise' );
} )
.catch( ( e ) => {
console.error( 'Error in blog.schema.org test:', e );
throw e;
assert.ok( e instanceof Error, 'Error should be an Error object' );
assert.strictEqual( e.message, 'No uri supplied in argument', 'Error message should match expected message' );
} )
);

it( 'should not have any undefined values', () => {
const url = 'http://web.archive.org/web/20220127144804/https://www.cnet.com/special-reports/vr101/';
return getWithHeaders( url ).then( ( body ) => {
const chtml = cheerio.load( body );
return meta.parseAll( chtml )
.then( ( results ) => {
Object.keys( results ).forEach( ( metadataType ) => {
Object.keys( results[ metadataType ] ).forEach( ( key ) => {
assert.notStrictEqual( results[ metadataType ][ key ], undefined, `${ metadataType }.${ key } should not be undefined` );
} );
} );
} );
} );
} );
} );

it( 'should throw error if no uri supplied', () => meta()
.then( () => {
assert.fail( 'Should have rejected the promise' );
} )
.catch( ( e ) => {
assert.ok( e instanceof Error, 'Error should be an Error object' );
assert.strictEqual( e.message, 'No uri supplied in argument', 'Error message should match expected message' );
} )
);

it( 'should support await implementation with headers', async () => {
const url = 'http://blog.schema.org';
const result = await meta( { uri: url, headers: { 'User-Agent': userAgent, Accept: acceptHeader } } );
assert.ok( result, 'Expected result to be truthy' );
} );

it( 'should support await implementation without headers', async () => {
const url = 'http://blog.schema.org';
const result = await meta( url );
assert.ok( result, 'Expected result to be truthy' );
} );
describe( 'Await style', () => {

it( 'should throw error if no uri is supplied with async/await', async () => {
try {
await meta();
assert.fail( 'Should have thrown an error' );
} catch ( e ) {
assert.ok( e instanceof Error, 'Error should be an Error object' );
assert.strictEqual( e.message, 'No uri supplied in argument', 'Error message should match expected message' );
}
it( 'should support await implementation with headers', async () => {
const url = 'http://blog.schema.org';
const result = await meta( { uri: url, headers: { 'User-Agent': userAgent, Accept: acceptHeader } } );
assert.ok( result, 'Expected result to be truthy' );
} );

it( 'should support await implementation without headers', async () => {
const url = 'http://blog.schema.org';
const result = await meta( url );
assert.ok( result, 'Expected result to be truthy' );
} );

it( 'should throw error if no uri is supplied with async/await', async () => {
try {
await meta();
assert.fail( 'Should have thrown an error' );
} catch ( e ) {
assert.ok( e instanceof Error, 'Error should be an Error object' );
assert.strictEqual( e.message, 'No uri supplied in argument', 'Error message should match expected message' );
}
} );
} );

} );

describe( 'parseBEPress function', () => {
describe( 'Individual metadata functions', () => {
it( 'should get BE Press metadata tags', () => {
const url = 'http://biostats.bepress.com/harvardbiostat/paper154/';
return getWithHeaders( url ).then( ( body ) => {
Expand All @@ -106,9 +128,7 @@ describe( 'scraping', function () {
} );
} );
} );
} );

describe( 'parseCOinS function', () => {
it( 'should get COinS metadata', () => {
const url = 'https://en.wikipedia.org/wiki/Viral_phylodynamics';
return getWithHeaders( url ).then( ( body ) => {
Expand All @@ -121,9 +141,7 @@ describe( 'scraping', function () {
} );
} );
} );
} );

describe( 'parseEPrints function', () => {
it( 'should get EPrints metadata', () => {
const url = 'http://eprints.gla.ac.uk/113711/';
return getWithHeaders( url ).then( ( body ) => {
Expand All @@ -139,45 +157,17 @@ describe( 'scraping', function () {
} );
} );
} );
} );

describe( 'parseGeneral function', () => {
it( 'should get html lang parameter', () => {
const expected = 'fr';
const url = 'http://www.lemonde.fr';
return getWithHeaders( url ).then( ( body ) => {
const chtml = cheerio.load( body );
return meta.parseGeneral( chtml ).then( ( results ) => {
assert.strictEqual( results.lang, expected );
} );
} );
} );

it( 'should get html dir parameter', () => {
const expected = 'rtl';
const url = 'https://www.iranrights.org/fa/';
it( 'should get general metadata', () => {
const expected = 'Example Domain';
const url = 'http://example.com';
return getWithHeaders( url ).then( ( body ) => {
const chtml = cheerio.load( body );
return meta.parseGeneral( chtml ).then( ( results ) => {
assert.strictEqual( results.dir, expected );
assert.strictEqual( results.title, expected );
} );
} );
} );
} );

it( 'should not have any undefined values', () => {
const url = 'http://web.archive.org/web/20220127144804/https://www.cnet.com/special-reports/vr101/';
return getWithHeaders( url ).then( ( body ) => {
const chtml = cheerio.load( body );
return meta.parseAll( chtml )
.then( ( results ) => {
Object.keys( results ).forEach( ( metadataType ) => {
Object.keys( results[ metadataType ] ).forEach( ( key ) => {
assert.notStrictEqual( results[ metadataType ][ key ], undefined, `${ metadataType }.${ key } should not be undefined` );
} );
} );
} );
} );
} );

} );