nodejs + chrome headless + puppeteer로 캡쳐하는 앱을 테스트해봤다.
https://github.com/knight76/chrome-headless-puppeteer
chrome headless 단독으로 사용하기 어렵고 nodejs의 puppeteer(https://github.com/GoogleChrome/puppeteer)을 같이 써야 한다.
server.js
const http = require('http')
const puppeteer = require('puppeteer')
const url = require('url')
const DEFAULT_WIDTH = 800
const DEFAULT_HEIGHT = 600
const port = 3000
http.createServer(async (request, response) => {
const urlParts = url.parse(request.url, true)
const urlPathname = urlParts.pathname
const urlParams = urlParts.query
const requrl = urlParams['url']
if (!requrl) {
response.writeHead(200)
response.end('Capture Service.')
return
}
let browerWidth, browerHeight
if (urlPathname === '/capture') {
if (parseInt(urlParams['w'], 0) > 0) {
browerWidth = parseInt(urlParams['w'], 0)
}
if (parseInt(urlParams['h'], 0) > 0) {
browerHeight = parseInt(urlParams['h'], 0)
}
} else if (urlPathname === '/check.html') {
response.writeHead(200)
response.end()
} else {
response.writeHead(500)
response.end()
return
}
if (!browerWidth) {
browerWidth = DEFAULT_WIDTH
}
if (!browerHeight) {
browerHeight = DEFAULT_HEIGHT
}
let page
try {
const browser = await puppeteer.launch({
args: ['--no-sandbox']
});
page = await browser.newPage()
await page.setViewport({
width: browerWidth,
height: browerHeight
})
await page.goto(requrl, {
waitUntil: 'networkidle2'
})
const screenshot = await page.screenshot({
type: 'jpeg',
quality: 100
})
console.log("5")
response.writeHead(200, {
'content-type': 'image/jpg',
'cache-control': 'public,max-age=60,immutable'
})
response.end(screenshot, 'binary')
} catch (e) {
console.log(e)
response.writeHead(500, {
'Content-Type': 'text/plain'
})
response.end('UNKNOWN ERROR(500)!!')
} finally {
if (page && page.isClosed() == false) {
await page.close()
}
}
}).listen(port)
console.log(`Capture server running port : ${port}...`)
Dockerfile
FROM zenato/puppeteer
USER root
COPY . /usr/src/app
RUN cd /usr/src/app && npm install
EXPOSE 3000
WORKDIR /usr/src/app
CMD [ "node", "server.js" ]
참고로 파이썬 + cProto(https://pypi.org/project/cproto/)를 사용하려 했으나 파이썬 쪽으로는 이제 scraping은 좀 되는데, 화면 캡쳐는 괜찮은 라이브러리가 없다.
chrome headless는 nodejs와 잘 맞는 것 같다.