source
get_page
def get_page(
args:VAR_POSITIONAL, stealth:bool=False, kwargs:VAR_KEYWORD
):
page = await get_page()
await page.goto('http://example.org')
<Response url='http://example.org/' request=<Request url='http://example.org/' method='GET'>>
source
page_ready
def page_ready(
page, pause:int=50, timeout:int=5000
):
Waith until main content of page is ready
source
frames_ready
def frames_ready(
page, pause:int=50, timeout:int=5000
):
Wait until all visible frames (if any) on page are ready
source
wait_page
def wait_page(
page, pause:int=50, timeout:int=5000
):
Wait until page and visible frames (if any) on page are ready
sh_url = 'https://help.dyalog.com/19.0/#UserGuide/Installation%20and%20Configuration/Shell%20Scripts.htm'
await page.goto(sh_url)
await wait_page(page)
source
get_full_content
def get_full_content(
page
):
Tuple of page content and dict of frames’ content
cts, iframes = await get_full_content(page)
source
read_page_async
def read_page_async(
url, pause:int=50, timeout:int=5000, stealth:bool=False, page:NoneType=None
):
Return contents of url and its iframes using Playwright async
cts,iframes = await read_page_async(sh_url)
source
read_page
def read_page(
url, pause:int=50, timeout:int=5000, stealth:bool=False, page:NoneType=None
):
Return contents of url and its iframes using Playwright
cts,iframes = read_page(sh_url)
'<!DOCTYPE html><html xmlns:madcap="http://www.madc'
source
h2md
Convert HTML h to markdown using `HTML2Text
source
url2md_async
def url2md_async(
url, sel:NoneType=None, pause:int=50, timeout:int=5000, stealth:bool=False, page:NoneType=None
):
Read url with read_page, optionally selecting CSS selector sel
source
url2md
def url2md(
url, sel:NoneType=None, pause:int=50, timeout:int=5000, stealth:bool=False, page:NoneType=None
):
Read url with read_page
source
get2md
def get2md(
url, sel:NoneType=None, params:QueryParamTypes | None=None, headers:HeaderTypes | None=None,
cookies:CookieTypes | None=None, auth:AuthTypes | None=None, proxy:ProxyTypes | None=None,
follow_redirects:bool=False, verify:ssl.SSLContext | str | bool=True, timeout:TimeoutTypes=Timeout(timeout=5.0),
trust_env:bool=True
):
Read url with httpx.get
url = 'https://docs.railway.app/guides/public-api'
md = get2md(url, ".docs-content")
print(md[:120])