selenium webdriver 执行原理
Posted 浮尘~若梦
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了selenium webdriver 执行原理相关的知识,希望对你有一定的参考价值。
selenium webdriver源码结构
Python版的源码结构,只看webdriver相关
selenium-masterpyseleniumwebdriver emote 下的文件
|-- command.py 命令相关
|-- errorhandler.py 错误处理
|-- file_detector.py 文件标识
|-- mobile.py 手机相关
|-- remote_connection.py 远程连接driver 服务端
|-- switch_to.py 切换alert相关
|-- utils.py 公用类
|-- webdriver.py webdriver客户端
|-- webelement.py 解析dom元素
|-- __init__.py
启动流程:
1.一般是初始化一个webdriver实例,以chrome driver 为例看源码可知chrome driver 都是继承remotedriver的,如下代码所示
class WebDriver(RemoteWebDriver): """ Controls the ChromeDriver and allows you to drive the browser. You will need to download the ChromeDriver executable from http://chromedriver.storage.googleapis.com/index.html """ def __init__(self, executable_path="chromedriver", port=DEFAULT_PORT, options=None, service_args=None, desired_capabilities=None, service_log_path=DEFAULT_SERVICE_LOG_PATH, chrome_options=None, service=None, keep_alive=True): """ Creates a new instance of the chrome driver. Starts the service and then creates new instance of chrome driver. :Args: - executable_path - Deprecated: path to the executable. If the default is used it assumes the executable is in the $PATH - port - Deprecated: port you would like the service to run, if left as 0, a free port will be found. - options - this takes an instance of ChromeOptions - service_args - Deprecated: List of args to pass to the driver service - desired_capabilities - Deprecated: Dictionary object with non-browser specific capabilities only, such as "proxy" or "loggingPref". - service_log_path - Deprecated: Where to log information from the driver. - keep_alive - Whether to configure ChromeRemoteConnection to use HTTP keep-alive. """ #检查各个参数 if executable_path != ‘chromedriver‘: warnings.warn(‘executable_path has been deprecated, please pass in a Service object‘, DeprecationWarning, stacklevel=2) if desired_capabilities is not None: warnings.warn(‘desired_capabilities has been deprecated, please pass in a Service object‘, DeprecationWarning, stacklevel=2) if port != DEFAULT_PORT: warnings.warn(‘port has been deprecated, please pass in a Service object‘, DeprecationWarning, stacklevel=2) self.port = port if service_log_path != DEFAULT_SERVICE_LOG_PATH: warnings.warn(‘service_log_path has been deprecated, please pass in a Service object‘, DeprecationWarning, stacklevel=2) if chrome_options: warnings.warn(‘use options instead of chrome_options‘, DeprecationWarning, stacklevel=2) options = chrome_options if options is None: # desired_capabilities stays as passed in if desired_capabilities is None: desired_capabilities = self.create_options().to_capabilities() else: if desired_capabilities is None: desired_capabilities = options.to_capabilities() else: desired_capabilities.update(options.to_capabilities()) if service: self.service = service else: #在本地启动webdriver,port如果没指定则为随机端口 self.service = Service( executable_path, port=port, service_args=service_args, log_path=service_log_path) self.service.start() try: #RemoteWebDriver 初始化 RemoteWebDriver.__init__( self, command_executor=ChromeRemoteConnection( remote_server_addr=self.service.service_url, keep_alive=keep_alive), desired_capabilities=desired_capabilities) except Exception: self.quit() raise self._is_remote = False
接着看RemoteWebDriver 源码:
class WebDriver(object): """ Controls a browser by sending commands to a remote server. This server is expected to be running the WebDriver wire protocol as defined at https://github.com/SeleniumHQ/selenium/wiki/JsonWireProtocol :Attributes: - session_id - String ID of the browser session started and controlled by this WebDriver. - capabilities - Dictionary of effective capabilities of this browser session as returned by the remote server. See https://github.com/SeleniumHQ/selenium/wiki/DesiredCapabilities - command_executor - remote_connection.RemoteConnection object used to execute commands. - error_handler - errorhandler.ErrorHandler object used to handle errors. """ _web_element_cls = WebElement def __init__(self, command_executor=‘http://127.0.0.1:4444/wd/hub‘, desired_capabilities=None, browser_profile=None, proxy=None, keep_alive=True, file_detector=None, options=None): """ Create a new driver that will issue commands using the wire protocol. :Args: - command_executor - Either a string representing URL of the remote server or a custom remote_connection.RemoteConnection object. Defaults to ‘http://127.0.0.1:4444/wd/hub‘. - desired_capabilities - A dictionary of capabilities to request when starting the browser session. Required parameter. - browser_profile - A selenium.webdriver.firefox.firefox_profile.FirefoxProfile object. Only used if Firefox is requested. Optional. - proxy - A selenium.webdriver.common.proxy.Proxy object. The browser session will be started with given proxy settings, if possible. Optional. - keep_alive - Whether to configure remote_connection.RemoteConnection to use HTTP keep-alive. Defaults to True. - file_detector - Pass custom file detector object during instantiation. If None, then default LocalFileDetector() will be used. - options - instance of a driver options.Options class """ capabilities = {} if options is not None: capabilities = options.to_capabilities() if desired_capabilities is not None: if not isinstance(desired_capabilities, dict): raise WebDriverException("Desired Capabilities must be a dictionary") else: capabilities.update(desired_capabilities) self.command_executor = command_executor if type(self.command_executor) is bytes or isinstance(self.command_executor, str): #初始化一个webdriver的连接 self.command_executor = RemoteConnection(command_executor, keep_alive=keep_alive) self._is_remote = True self.session_id = None self.capabilities = {} self.error_handler = ErrorHandler() self.start_client() self.start_session(capabilities, browser_profile) self._switch_to = SwitchTo(self) self._mobile = Mobile(self) self.file_detector = file_detector or LocalFileDetector() ........
2.webdriver实例化之后可以调用其方法,以 find_element_by_id讲解
def find_element_by_id(self, id_): """Finds an element by id. :Args: - id\_ - The id of the element to be found. :Returns: - WebElement - the element if it was found :Raises: - NoSuchElementException - if the element wasn‘t found :Usage: :: element = driver.find_element_by_id(‘foo‘) """ return self.find_element(by=By.ID, value=id_) #主要是这个方法 def find_element(self, by=By.ID, value=None): ..... return self.execute(Command.FIND_ELEMENT, { ‘using‘: by, ‘value‘: value})[‘value‘] #command.py中的所有命令 STATUS = "status" NEW_SESSION = "newSession" GET_ALL_SESSIONS = "getAllSessions" DELETE_SESSION = "deleteSession" NEW_WINDOW = "newWindow" CLOSE = "close" QUIT = "quit" GET = "get" GO_BACK = "goBack" GO_FORWARD = "goForward" REFRESH = "refresh" ADD_COOKIE = "addCookie" GET_COOKIE = "getCookie" GET_ALL_COOKIES = "getCookies" DELETE_COOKIE = "deleteCookie" DELETE_ALL_COOKIES = "deleteAllCookies" FIND_ELEMENT = "findElement" FIND_ELEMENTS = "findElements" FIND_CHILD_ELEMENT = "findChildElement" FIND_CHILD_ELEMENTS = "findChildElements" CLEAR_ELEMENT = "clearElement" CLICK_ELEMENT = "clickElement" SEND_KEYS_TO_ELEMENT = "sendKeysToElement" SEND_KEYS_TO_ACTIVE_ELEMENT = "sendKeysToActiveElement" SUBMIT_ELEMENT = "submitElement" UPLOAD_FILE = "uploadFile" GET_CURRENT_WINDOW_HANDLE = "getCurrentWindowHandle" W3C_GET_CURRENT_WINDOW_HANDLE = "w3cGetCurrentWindowHandle" GET_WINDOW_HANDLES = "getWindowHandles" W3C_GET_WINDOW_HANDLES = "w3cGetWindowHandles" GET_WINDOW_SIZE = "getWindowSize" W3C_GET_WINDOW_SIZE = "w3cGetWindowSize" ...... #执行方法 def execute(self, driver_command, params=None): """ Sends a command to be executed by a command.CommandExecutor. :Args: - driver_command: The name of the command to execute as a string. - params: A dictionary of named parameters to send with the command. :Returns: The command‘s JSON response loaded into a dictionary object. """ if self.session_id is not None: if not params: params = {‘sessionId‘: self.session_id} elif ‘sessionId‘ not in params: params[‘sessionId‘] = self.session_id params = self._wrap_value(params) #具体执行 response = self.command_executor.execute(driver_command, params) if response: self.error_handler.check_response(response) response[‘value‘] = self._unwrap_value( response.get(‘value‘, None)) return response # If the server doesn‘t send a response, assume the command was # a success return {‘success‘: 0, ‘value‘: None, ‘sessionId‘: self.session_id} #execute def execute(self, command, params): """ Send a command to the remote server. Any path subtitutions required for the URL mapped to the command should be included in the command parameters. :Args: - command - A string specifying the command to execute. - params - A dictionary of named parameters to send with the command as its JSON payload. """ command_info = self._commands[command] assert command_info is not None, ‘Unrecognised command %s‘ % command path = string.Template(command_info[1]).substitute(params) if hasattr(self, ‘w3c‘) and self.w3c and isinstance(params, dict) and ‘sessionId‘ in params: del params[‘sessionId‘] data = utils.dump_json(params) url = ‘%s%s‘ % (self._url, path) return self._request(command_info[0], url, body=data) # _request def _request(self, method, url, body=None): """ Send an HTTP request to the remote server. :Args: - method - A string for the HTTP method to send the request with. - url - A string for the URL to send the request to. - body - A string for request body. Ignored unless method is POST or PUT. :Returns: A dictionary with the server‘s parsed JSON response. """ LOGGER.debug(‘%s %s %s‘ % (method, url, body)) parsed_url = parse.urlparse(url) headers = self.get_remote_connection_headers(parsed_url, self.keep_alive) resp = None if body and method != ‘POST‘ and method != ‘PUT‘: body = None if self.keep_alive: #self._conn就是remote_connection 对象 resp = self._conn.request(method, url, body=body, headers=headers) statuscode = resp.status else: http = urllib3.PoolManager(timeout=self._timeout) resp = http.request(method, url, body=body, headers=headers) ..... #remote_connection def __init__(self, remote_server_addr, keep_alive=False, resolve_ip=True): # Attempt to resolve the hostname and get an IP address. self.keep_alive = keep_alive parsed_url = parse.urlparse(remote_server_addr) if parsed_url.hostname and resolve_ip: port = parsed_url.port or None if parsed_url.scheme == "https": ip = parsed_url.hostname elif port and not common_utils.is_connectable(port, parsed_url.hostname): ip = None LOGGER.info(‘Could not connect to port {} on host ‘ ‘{}‘.format(port, parsed_url.hostname)) else: ip = common_utils.find_connectable_ip(parsed_url.hostname, port=port) if ip: netloc = ip if parsed_url.port: netloc = common_utils.join_host_port(netloc, parsed_url.port) if parsed_url.username: auth = parsed_url.username if parsed_url.password: auth += ‘:%s‘ % parsed_url.password netloc = ‘%s@%s‘ % (auth, netloc) remote_server_addr = parse.urlunparse( (parsed_url.scheme, netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) else: LOGGER.info(‘Could not get IP address for host: %s‘ % parsed_url.hostname) #webdriver的地址 self._url = remote_server_addr if keep_alive: self._conn = urllib3.PoolManager(timeout=self._timeout) #命令对应的请求 self._commands = { Command.STATUS: (‘GET‘, ‘/status‘), Command.NEW_SESSION: (‘POST‘, ‘/session‘), Command.GET_ALL_SESSIONS: (‘GET‘, ‘/sessions‘), Command.QUIT: (‘DELETE‘, ‘/session/$sessionId‘), Command.GET_CURRENT_WINDOW_HANDLE: (‘GET‘, ‘/session/$sessionId/window_handle‘), Command.W3C_GET_CURRENT_WINDOW_HANDLE: (‘GET‘, ‘/session/$sessionId/window‘), Command.GET_WINDOW_HANDLES: (‘GET‘, ‘/session/$sessionId/window_handles‘), Command.W3C_GET_WINDOW_HANDLES: (‘GET‘, ‘/session/$sessionId/window/handles‘), Command.GET: (‘POST‘, ‘/session/$sessionId/url‘), Command.GO_FORWARD: (‘POST‘, ‘/session/$sessionId/forward‘), Command.GO_BACK: (‘POST‘, ‘/session/$sessionId/back‘), Command.REFRESH: (‘POST‘, ‘/session/$sessionId/refresh‘), Command.EXECUTE_SCRIPT: (‘POST‘, ‘/session/$sessionId/execute‘), Command.W3C_EXECUTE_SCRIPT: (‘POST‘, ‘/session/$sessionId/execute/sync‘), Command.W3C_EXECUTE_SCRIPT_ASYNC: (‘POST‘, ‘/session/$sessionId/execute/async‘), Command.GET_CURRENT_URL: (‘GET‘, ‘/session/$sessionId/url‘), Command.GET_TITLE: (‘GET‘, ‘/session/$sessionId/title‘), Command.GET_PAGE_SOURCE: (‘GET‘, ‘/session/$sessionId/source‘), Command.SCREENSHOT: (‘GET‘, ‘/session/$sessionId/screenshot‘), Command.ELEMENT_SCREENSHOT: (‘GET‘, ‘/session/$sessionId/element/$id/screenshot‘), Command.FIND_ELEMENT: (‘POST‘, ‘/session/$sessionId/element‘), Command.FIND_ELEMENTS: (‘POST‘, ‘/session/$sessionId/elements‘), Command.W3C_GET_ACTIVE_ELEMENT: (‘GET‘, ‘/session/$sessionId/element/active‘), Command.GET_ACTIVE_ELEMENT: (‘POST‘, ‘/session/$sessionId/element/active‘), Command.FIND_CHILD_ELEMENT: (‘POST‘, ‘/session/$sessionId/element/$id/element‘), Command.FIND_CHILD_ELEMENTS: (‘POST‘, ‘/session/$sessionId/element/$id/elements‘), Command.CLICK_ELEMENT: (‘POST‘, ‘/session/$sessionId/element/$id/click‘), Command.CLEAR_ELEMENT: (‘POST‘, ‘/session/$sessionId/element/$id/clear‘), Command.SUBMIT_ELEMENT: (‘POST‘, ‘/session/$sessionId/element/$id/submit‘), Command.GET_ELEMENT_TEXT: (‘GET‘, ‘/session/$sessionId/element/$id/text‘), Command.SEND_KEYS_TO_ELEMENT: (‘POST‘, ‘/session/$sessionId/element/$id/value‘), Command.SEND_KEYS_TO_ACTIVE_ELEMENT: (‘POST‘, ‘/session/$sessionId/keys‘), Command.UPLOAD_FILE: (‘POST‘, "/session/$sessionId/file"), Command.GET_ELEMENT_VALUE: (‘GET‘, ‘/session/$sessionId/element/$id/value‘), Command.GET_ELEMENT_TAG_NAME: (‘GET‘, ‘/session/$sessionId/element/$id/name‘), Command.IS_ELEMENT_SELECTED: (‘GET‘, ‘/session/$sessionId/element/$id/selected‘), Command.SET_ELEMENT_SELECTED: (‘POST‘, ‘/session/$sessionId/element/$id/selected‘), Command.IS_ELEMENT_ENABLED: (‘GET‘, ‘/session/$sessionId/element/$id/enabled‘), Command.IS_ELEMENT_DISPLAYED: (‘GET‘, ‘/session/$sessionId/element/$id/displayed‘), Command.GET_ELEMENT_LOCATION: ...
由以上代码可知,当操作元素时,其实是发操作对应的请求给webdriver服务端,然后服务端操作浏览器,并且返回响应给客户端。
以上是关于selenium webdriver 执行原理的主要内容,如果未能解决你的问题,请参考以下文章
浅谈python中selenium库调动webdriver驱动浏览器的实现原理