core

Using pyinstrument to profile FastHTML apps.

Sometimes when building FastHTML apps we run into performance bottlenecks. Figuring out what is slow can be challenging, especially when building apps with async components. That’s where profiling tools like pyinstrument can help. Profilers are tools that show exactly how long each component of a project takes to run. Identifying slow parts of an app is the first step in figuring out how to make things run faster.

from starlette.testclient import TestClient
from fastcore.all import *
from functools import partialmethod
from anyio import from_thread

get_trigger_name


def get_trigger_name(
    
):

Call self as a function.


ProfileMiddleware


def ProfileMiddleware(
    app, save_dir:NoneType=None
):

Add the power of pyinstrument to potentially every request.


profiling


def profiling(
    
):

Call self as a function.


instrument


def instrument(
    route_handler
):

Replaces the route handler’s output with pyinstrument results.

def slow(): time.sleep(0.01)

@instrument
def my_route(): slow()

res = await my_route()
assert 'slow' in res and 'pyinstrument' in res.lower()
res[:50]
'<!DOCTYPE html>\n            <html>\n            <he'

load_session


def load_session(
    path
):

Load a saved pyinstrument session and return a renderer-ready object.

app, rt = fast_app()
app.add_middleware(ProfileMiddleware, save_dir='/tmp/profiles')
client = TestClient(app)

@rt
def index(): return Titled('Hello, profiler')
'pyinstrumentHTMLRenderer' in client.get('/?profile=1').text
True

Session.flat


def flat(
    paths:NoneType=None, n:int=20
):

Aggregate self-time by function, optionally filtered by file path substrings.

flat walks the frame tree and sums total_self_time per unique (function, file, line) tuple. Filter with paths to focus on specific codebases. Returns a sorted list of ProfileEntry named tuples.

sess = load_session(sorted(Path('/tmp/profiles').glob('*.pkl'))[-1])
for e in sess.flat(n=5): print(f'{e.time*1000:7.1f}ms  {e.func}  {e.file}:{e.line}')
    1.7ms  run_sync_in_worker_thread  
/Users/jhoward/aai-ws/.venv/lib/python3.12/site-packages/anyio/_backends/_asyncio.py:2459
    1.7ms    :None
    1.0ms  getcoroutinestate  
/Users/jhoward/.local/share/uv/python/cpython-3.12.0-macos-aarch64-none/lib/python3.12/inspect.py:1919
    1.0ms    :None

Session.callers


def callers(
    func_name, paths:NoneType=None, n:int=10
):

Find which functions call func_name and how much time they contribute.

callers answers “who is calling this hot function?”. For each occurrence of func_name with self-time, it attributes that time to the immediate parent frame.

for e in sess.callers('getattr', n=5): print(f'{e.time*1000:7.1f}ms  {e.func}  {e.file}:{e.line}')

Session.callees


def callees(
    func_name, paths:NoneType=None, n:int=10
):

Find what func_name spends its time calling.

callees answers “what does this function spend its time on?”. It walks descendants of matching frames and aggregates their self-time.

for e in sess.callees('getattr', n=5): print(f'{e.time*1000:7.1f}ms  {e.func}  {e.file}:{e.line}')

Session.hot_paths


def hot_paths(
    paths:NoneType=None, n:int=10, depth:int=8
):

Top call stacks by cumulative time, filtered to frames matching paths.

hot_paths shows the most expensive call stacks, collapsed to only frames matching paths. The depth parameter limits stack depth to keep output readable.

ps = ['solveit/', 'fasthtml/', 'fastcore/']
for t,s in sess.hot_paths(paths=ps, n=5): print(f'{t*1000:7.1f}ms  {s}')
    3.4ms  _f core.py:669 → _wrap_call core.py:478 → _handle core.py:269

render_session


def render_session(
    sess, text:bool=True, show_all:bool=False, short_mode:bool=True
):

Render a saved session as text or html.

Tests

First, confirm that the view works normally

assert 'Hello, profiler' in client.get('/').text

Now lets profile it! Or rather, check that it works.

'pyinstrumentHTMLRenderer' in client.get('/?profile=1').text
True

Let’s print to the terminal

client.get('/?profile=1&term=1')
  _     ._   __/__   _ _  _  _ _/_   Recorded: 13:41:32  Samples:  1
 /_//_/// /_\ / //_// / //_'/ //     Duration: 0.001     CPU time: 0.001
/   _/                      v5.1.2

Profile at /var/folders/51/b2_szf2945n072c0vj2cyty40000gn/T/ipykernel_92920/363051617.py:18

0.001 Handle._run  asyncio/events.py:82
`- 0.001 coro  starlette/middleware/base.py:139
      [9 frames hidden]  starlette
         0.001 app  starlette/routing.py:60
         `- 0.001 _f  ../fasthtml/fasthtml/core.py:669
            `- 0.001 _wrap_call  ../fasthtml/fasthtml/core.py:478
               `- 0.001 _handle  ../fasthtml/fasthtml/core.py:269
                  `- 0.001 run_in_threadpool  starlette/concurrency.py:30
                     `- 0.001 run_sync  anyio/to_thread.py:25
                        `- 0.001 AsyncIOBackend.run_sync_in_worker_thread  anyio/_backends/_asyncio.py:2459
                           `- 0.001   anyio/_backends/_asyncio.py


<Response [200 OK]>
@rt
@instrument
def saxaphone(): return Titled('Play that sweet horn')
assert 'pyinstrumentHTMLRenderer' in client.get('/saxaphone').text
@rt
@instrument
async def trombone(): return Titled('Async horn')
assert 'pyinstrumentHTMLRenderer' in client.get('/trombone').text