PlaywrightNB source

Helpers for using Playwright from notebooks and more

source

get_page


def get_page(
    args:VAR_POSITIONAL, stealth:bool=False, kwargs:VAR_KEYWORD
):
page = await get_page()
await page.goto('http://example.org')
<Response url='http://example.org/' request=<Request url='http://example.org/' method='GET'>>

source

page_ready


def page_ready(
    page, pause:int=50, timeout:int=5000
):

Waith until main content of page is ready


source

frames_ready


def frames_ready(
    page, pause:int=50, timeout:int=5000
):

Wait until all visible frames (if any) on page are ready


source

wait_page


def wait_page(
    page, pause:int=50, timeout:int=5000
):

Wait until page and visible frames (if any) on page are ready

sh_url = 'https://help.dyalog.com/19.0/#UserGuide/Installation%20and%20Configuration/Shell%20Scripts.htm'
await page.goto(sh_url)
await wait_page(page)

source

get_full_content


def get_full_content(
    page
):

Tuple of page content and dict of frames’ content

cts, iframes = await get_full_content(page)
await page.close()

source

read_page_async


def read_page_async(
    url, pause:int=50, timeout:int=5000, stealth:bool=False, page:NoneType=None
):

Return contents of url and its iframes using Playwright async

cts,iframes = await read_page_async(sh_url)

source

read_page


def read_page(
    url, pause:int=50, timeout:int=5000, stealth:bool=False, page:NoneType=None
):

Return contents of url and its iframes using Playwright

cts,iframes = read_page(sh_url)
iframes['topic'][:50]
'<!DOCTYPE html><html xmlns:madcap="http://www.madc'

source

h2md


def h2md(
    h
):

Convert HTML h to markdown using `HTML2Text


source

url2md_async


def url2md_async(
    url, sel:NoneType=None, pause:int=50, timeout:int=5000, stealth:bool=False, page:NoneType=None
):

Read url with read_page, optionally selecting CSS selector sel


source

url2md


def url2md(
    url, sel:NoneType=None, pause:int=50, timeout:int=5000, stealth:bool=False, page:NoneType=None
):

Read url with read_page


source

get2md


def get2md(
    url, sel:NoneType=None, params:QueryParamTypes | None=None, headers:HeaderTypes | None=None,
    cookies:CookieTypes | None=None, auth:AuthTypes | None=None, proxy:ProxyTypes | None=None,
    follow_redirects:bool=False, verify:ssl.SSLContext | str | bool=True, timeout:TimeoutTypes=Timeout(timeout=5.0),
    trust_env:bool=True
):

Read url with httpx.get

url = 'https://docs.railway.app/guides/public-api'
md = get2md(url, ".docs-content")
print(md[:120])
None