FastCDP API

Source and API details
len(_cdp_domains), [d['domain'] for d in _cdp_domains[:5]]
(55, ['Accessibility', 'Animation', 'Audits', 'Autofill', 'BackgroundService'])

source

CDP


def CDP(
    port:int=9222, debug:bool=False
):

Chrome DevTools Protocol connection with event support

# await cdp.close()
cdp = await CDP.connect()

source

CDP.pages


async def pages(
    
):
ps = await cdp.pages
pg = next(p for p in ps if 'example.com' in p['url'])
pg['title']
'4. Example Domain'

source

CDPDomain


def CDPDomain(
    cdp, domain
):

Initialize self. See help(type(self)) for accurate signature.


source

CDPMethod


def CDPMethod(
    cdp, domain, method
):

Initialize self. See help(type(self)) for accurate signature.


source

CDP.eval


async def eval(
    expr, sid:NoneType=None
):

source

CDP.attach


async def attach(
    tid
):
tid = pg['targetId']
sid = await cdp.attach(tid)
await cdp.eval('document.title', sid)
'4. Example Domain'

source

CDP.wait_event


async def wait_event(
    event, timeout:int=10
):

source

CDP.on


def on(
    event
):

source

CDP.wait_for_selector


async def wait_for_selector(
    sel, sid:NoneType=None, timeout:int=10
):

Wait for CSS selector to match an element


source

CDP.wait_for


async def wait_for(
    expr, sid:NoneType=None, timeout:int=10
):

Wait for JS expression to be truthy, return its value


source

CDP.wait_load


async def wait_load(
    sid:NoneType=None, timeout:int=10
):
t = await cdp.target.createTarget(url='about:blank')
sid = await cdp.attach(t)
page = await cdp.page.enable(sid=sid)

await cdp.page.navigate(sid=sid, url='https://httpbin.org/forms/post')
await cdp.wait_for_selector('form', sid)
True
await cdp.target.closeTarget(targetId=t)
True

source

Page


def Page(
    cdp, t, sid
):

Initialize self. See help(type(self)) for accurate signature.


source

PageDomain


def PageDomain(
    sid, domain
):

Initialize self. See help(type(self)) for accurate signature.


source

CDP.new_page


async def new_page(
    
):

Create a new tab, return Page

page = await cdp.new_page()
await page.page.navigate(url='https://httpbin.org/forms/post')
await page.wait_for_selector('form')
True
await page.close()
True

source

cdp_yolo


def cdp_yolo(
    
):

Allow all CDP classes in safepyrun


source

CDP.event


def event(
    name
):

source

Event


def Event(
    cdp, name
):

Context manager for CDP event subscription


source

CDP.goto


async def goto(
    url, sid:NoneType=None, timeout:int=10
):
page = await cdp.new_page()
await page.goto('https://httpbin.org/forms/post')
await page.wait_for('document.title')
'6. httpbin.org/forms/post'

source

CDP.screenshot


async def screenshot(
    sid:NoneType=None
):
img = await page.screenshot()
# img
await page.accessibility.enable()
tree = await page.accessibility.getFullAXTree()
len(tree)
90
tree[0]
{'nodeId': '2',
 'ignored': False,
 'role': {'type': 'internalRole', 'value': 'RootWebArea'},
 'chromeRole': {'type': 'internalRole', 'value': 144},
 'name': {'type': 'computedString',
  'value': '6. httpbin.org/forms/post',
  'sources': [{'type': 'relatedElement', 'attribute': 'aria-labelledby'},
   {'type': 'attribute', 'attribute': 'aria-label'},
   {'type': 'attribute', 'attribute': 'aria-label', 'superseded': True},
   {'type': 'relatedElement',
    'value': {'type': 'computedString', 'value': '6. httpbin.org/forms/post'},
    'nativeSource': 'title'}]},
 'properties': [{'name': 'focusable',
   'value': {'type': 'booleanOrUndefined', 'value': True}},
  {'name': 'url',
   'value': {'type': 'string', 'value': 'https://httpbin.org/forms/post'}}],
 'childIds': ['19'],
 'backendDOMNodeId': 2,
 'frameId': 'DED6AD4E409638E475109A9463285F29'}
await page.close()
True
await cdp.close()

Approach to having LLM interact with screen:

  1. Accessibility.enable on the session
  2. Accessibility.getFullAXTree to get all nodes
  3. Filter to interactive/visible nodes, assign ref numbers
  4. Format as that numbered list
  5. Give the LLM that list + optionally a screenshot