• 主页
  • AWS Lambda Python 3.7 Web抓取-“无法使用以下命令获取Chrome的版本: google-chrome -- version”

AWS Lambda Python 3.7 Web抓取-“无法使用以下命令获取Chrome的版本: google-chrome -- version”

通过一个ZIP,我上传了一个S3函数以及它的依赖项。lambda函数是一个web抓取器,它使用以下初始代码来启动抓取器:

import json
import os
import pymysql
import boto3
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--window-size=1280x1696')
chrome_options.add_argument('--user-data-dir=/tmp/user-data')
chrome_options.add_argument('--hide-scrollbars')
chrome_options.add_argument('--enable-logging')
chrome_options.add_argument('--log-level=0')
chrome_options.add_argument('--v=99')
chrome_options.add_argument('--single-process')
chrome_options.add_argument('--data-path=/tmp/data-path')
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument('--homedir=/tmp')
chrome_options.add_argument('--disk-cache-dir=/tmp/cache-dir')
chrome_options.binary_location = os.getcwd() + "/bin/headless-chromium"
browser = webdriver.Chrome(executable_path=ChromeDriverManager().install(), options=chrome_options)

当我尝试测试lambda函数时,我在控制台中得到以下错误:

{
  "errorMessage": "Could not get version for Chrome with this command: google-chrome --version",
  "errorType": "ValueError",
  "stackTrace": [
    "  File \"/var/task/lambda_function.py\", line 67, in lambda_handler\n    browser = webdriver.Chrome(executable_path=ChromeDriverManager().install(), options=chrome_options)\n",
    "  File \"/var/task/webdriver_manager/chrome.py\", line 24, in install\n    driver_path = self.download_driver(self.driver)\n",
    "  File \"/var/task/webdriver_manager/manager.py\", line 32, in download_driver\n    driver_version, is_latest = self.__get_version_to_download(driver)\n",
    "  File \"/var/task/webdriver_manager/manager.py\", line 23, in __get_version_to_download\n    return self.__get_latest_driver_version(driver), True\n",
    "  File \"/var/task/webdriver_manager/manager.py\", line 17, in __get_latest_driver_version\n    return driver.get_latest_release_version()\n",
    "  File \"/var/task/webdriver_manager/driver.py\", line 54, in get_latest_release_version\n    self._latest_release_url + '_' + chrome_version())\n",
    "  File \"/var/task/webdriver_manager/utils.py\", line 98, in chrome_version\n    .format(cmd)\n"
  ]
}

作为回应,我尝试编辑webdriver_manager依赖文件夹中的utils.py文件,使用其他命令,如'chrome -- version‘和’to browser --version‘,而不是'chrome_version()’函数定义下的'google-chrome --version‘,但得到类似的错误,无法从新命令获取chrome版本:

def chrome_version():
    pattern = r'\d+\.\d+\.\d+'
    cmd_mapping = {
        OSType.LINUX: 'google-chrome --version',
        OSType.MAC: r'/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --version',
        OSType.WIN: r'reg query "HKEY_CURRENT_USER\Software\Google\Chrome\BLBeacon" /v version'
    }

    cmd = cmd_mapping[os_name()]
    stdout = os.popen(cmd).read()
    version = re.search(pattern, stdout)
    if not version:
        raise ValueError(
            'Could not get version for Chrome with this command: {}'
            .format(cmd)
        )
    return version.group(0)

谁能告诉我我应该用什么命令来代替'google-chrome --version'?

转载请注明出处:http://www.xjzlzx.com/article/20230513/2338943.html