img
如果只想获取 <img>
的图片,有两种方式:
直接获取所有 img
标签:
function getImgs (doc) { return Array.from(doc.getElementsByTagName('img')) .map(img => ({ src: img.currentSrc, // 用 img.src 如果要本来的 src width: img.naturalWidth, height: img.naturalHeight })) } getImgs(document)
还可以用 document.images:
function getImgs (doc) { return Array.from(doc.images) .map(img => ({ src: img.currentSrc, // img.src if you want the origin width: img.naturalWidth, height: img.naturalHeight })) } getImgs(document)
background-image
获得背景图片需要查看所有 DOM 节点的 background-image
属性:
function getBgImgs (doc) { const srcChecker = /url\(\s*?['"]?\s*?(\S+?)\s*?["']?\s*?\)/i return Array.from( Array.from(doc.querySelectorAll('*')) .reduce((collection, node) => { let prop = window.getComputedStyle(node, null) .getPropertyValue('background-image') // match `url(...)` let match = srcChecker.exec(prop) if (match) { collection.add(match[1]) } return collection }, new Set()) ) } getBgImgs(document)
背景图片不能直接得到尺寸信息,如果需要的话要加载一遍。因为搜集的图片很有可能已经在浏览器缓存里,所以加载过程应该很快。
function loadImg (src, timeout = 500) { var imgPromise = new Promise((resolve, reject) => { let img = new Image() img.onload = () => { resolve({ src: src, width: img.naturalWidth, height: img.naturalHeight }) } img.onerror = reject img.src = src }) var timer = new Promise((resolve, reject) => { setTimeout(reject, timeout) }) return Promise.race([imgPromise, timer]) } function loadImgAll (imgList, timeout = 500) { return new Promise((resolve, reject) => { Promise.all( imgList .map(src => loadImg(src, timeout)) .map(p => p.catch(e => false)) ).then(results => resolve(results.filter(r => r))) }) } loadImgAll(getBgImgs(document)).then(imgs => console.log(imgs))
iframe
只需要递归遍历 iframe 的 document
function searchIframes (doc) { var imgList = [] doc.querySelectorAll('iframe') .forEach(iframe => { try { iframeDoc = iframe.contentDocument || iframe.contentWindow.document imgList = imgList.concat(getImgs(iframeDoc) || []) // or getBgImgs(iframeDoc) imgList = imgList.concat(searchIframes(iframeDoc) || []) } catch (e) { // 直接忽略错误的 iframe (e.g. cross-origin) } }) return imgList } searchIframes(document)
整合一起
直接使用就行。
function getImgAll (doc) { return new Promise((resolve, reject) => { loadImgAll(Array.from(searchDOM(doc))) .then(resolve, reject) }) function searchDOM (doc) { const srcChecker = /url\(\s*?['"]?\s*?(\S+?)\s*?["']?\s*?\)/i return Array.from(doc.querySelectorAll('*')) .reduce((collection, node) => { // bg src let prop = window.getComputedStyle(node, null) .getPropertyValue('background-image') // match `url(...)` let match = srcChecker.exec(prop) if (match) { collection.add(match[1]) } if (/^img$/i.test(node.tagName)) { // src from img tag collection.add(node.src) } else if (/^frame$/i.test(node.tagName)) { // iframe try { searchDOM(node.contentDocument || node.contentWindow.document) .forEach(img => { if (img) { collection.add(img) } }) } catch (e) {} } return collection }, new Set()) } function loadImg (src, timeout = 500) { var imgPromise = new Promise((resolve, reject) => { let img = new Image() img.onload = () => { resolve({ src: src, width: img.naturalWidth, height: img.naturalHeight }) } img.onerror = reject img.src = src }) var timer = new Promise((resolve, reject) => { setTimeout(reject, timeout) }) return Promise.race([imgPromise, timer]) } function loadImgAll