openclaw - 💡(How to fix) Fix [Bug] High pre-processing latency (8-12s) on webchat after gateway restart [1 participants]

Official PRs (…)
ON THIS PAGE

Recommended Tools

×6

Utilities matched from this issue’s tags and category — try them while you read without losing context.

GitHub issue graph ai analysis

Paste a GitHub issue URL. We fetch that issue, discover linked issues from bodies/comments/timeline, collect linked pull requests, and produce a structured English report.

The report is written in English Markdown for sharing and archival.

Helpful · Quick feedback

Loading…
GitHub stats
openclaw/openclaw#60548Fetched 2026-04-08 02:49:39
View on GitHub
Comments
0
Participants
1
Timeline
4
Reactions
0
Author
Participants
Timeline (top)
cross-referenced ×2closed ×1locked ×1

Error Message

#!/usr/bin/env python3

-- coding: utf-8 --

import os, time, json, ssl, base64 import websocket import logging

URL = "ws://127.0.0.1:18789/ws" TOKEN = "123456" SESSION_KEY = "agent:main:main" DEVICE_STORAGE = os.path.expanduser("~/.openclaw/workspace/.xiaomeng_correct_device.json")

logging.basicConfig(format='%(asctime)s.%(msecs)03d %(message)s', datefmt='%H:%M:%S', level=logging.INFO) log = logging.info

class DeviceIdentity: def init(self, storage_path): self.storage_path = storage_path self.load()

def load(self):
    if not os.path.exists(self.storage_path):
        log(f"❌ Error: device file not found {self.storage_path}")
        os._exit(1)
    with open(self.storage_path, 'r') as f: data = json.load(f)
    from cryptography.hazmat.primitives.asymmetric import ed25519
    pem_lines = data['privateKeyPem'].strip().split('\n')
    b64 = ''.join([l for l in pem_lines if not l.startswith('---')])
    self.priv = ed25519.Ed25519PrivateKey.from_private_bytes(base64.b64decode(b64)[16:])
    self.device_id = data['deviceId']
    from cryptography.hazmat.primitives import serialization
    pub_bytes = self.priv.public_key().public_bytes(
        encoding=serialization.Encoding.Raw, format=serialization.PublicFormat.Raw
    )
    self.pub_b64 = base64.b64encode(pub_bytes).decode('ascii').rstrip('=').replace('+','-').replace('/','_')

def sign(self, p):
    sig = self.priv.sign(p.encode('utf-8'))
    return base64.b64encode(sig).decode('ascii').rstrip('=').replace('+','-').replace('/','_')

class LatencyTester: def init(self): self.device = DeviceIdentity(DEVICE_STORAGE) self.t0_send = 0 self.t1_start = 0 self.t2_first = 0 self.has_sent = False

def start(self):
    log("🚀 Connecting to OpenClaw...")
    self.ws = websocket.WebSocketApp(URL, header={'Authorization': f'Bearer {TOKEN}'},
        on_open=self.on_open, on_message=self.on_message, on_error=self.on_error)
    self.ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})

def on_open(self, ws): log("🔗 Connected")
def on_error(self, ws, e): log(f"❌ Error: {e}")

def send_test_msg(self, ws):
    if self.has_sent: return
    self.has_sent = True
    msg_id = f"m-{int(time.time()*1000)}"
    idem_key = f"s-{int(time.time()*1000)}"
    req = {
        "type": "req",
        "id": msg_id,
        "method": "chat.send",
        "params": {
            "sessionKey": SESSION_KEY,
            "message": "(延迟测试)",
            "deliver": True,
            "idempotencyKey": idem_key
        }
    }
    self.t0_send = time.time()
    log(f"📤 [T0] Sending message (ID: {msg_id})...")
    ws.send(json.dumps(req))

def on_message(self, ws, message):
    try:
        d = json.loads(message)
        ev = d.get('event')
        pl = d.get('payload', {})

        if ev == 'connect.challenge':
            ts = pl['ts']
            nonce = pl['nonce']
            auth_str = f"v2|{self.device.device_id}|webchat|webchat|operator|operator.admin,operator.write,operator.approvals,operator.pairing,operator.read|{ts}|{TOKEN}|{nonce}"
            sig = self.device.sign(auth_str)
            ws.send(json.dumps({
                "type":"req","id":"auth","method":"connect",
                "params":{
                    "minProtocol":3,"maxProtocol":3,"role":"operator",
                    "scopes":["operator.admin","operator.write","operator.approvals","operator.pairing","operator.read"],
                    "auth":{"token":TOKEN},
                    "device":{"id":self.device.device_id,"publicKey":self.device.pub_b64,"signature":sig,"signedAt":ts,"nonce":nonce},
                    "client":{"id":"webchat","version":"1.0","platform":"linux","mode":"webchat"}
                }
            }))

        elif d.get('id') == 'auth':
            if d.get('ok'):
                log("✅ Authentication successful, sending test message...")
                self.send_test_msg(ws)
            else:
                log(f"❌ Authentication failed: {d.get('error')}")

        elif ev == 'agent':
            stream = pl.get('stream')
            phase = pl.get('data', {}).get('phase')

            if stream == 'lifecycle' and phase == 'start':
                self.t1_start = time.time()
                diff = int((self.t1_start - self.t0_send) * 1000)
                log(f"⏱️ [T1] Agent started (Phase: Start): +{diff} ms")

            elif stream == 'assistant':
                if 'text' in pl.get('data', {}) or 'delta' in pl.get('data', {}):
                    if self.t2_first == 0:
                        self.t2_first = time.time()
                        self.print_report()
                        ws.close()

    except Exception as e:
        log(f"Error processing message: {e}")

def print_report(self):
    t0 = self.t0_send
    t1 = self.t1_start
    t2 = self.t2_first
    if t1 == 0: t1 = t2
    p1 = int((t1 - t0) * 1000)
    p2 = int((t2 - t1) * 1000)
    total = int((t2 - t0) * 1000)
    print("\n" + "="*40)
    print("📊 Latency Report")
    print("="*40)
    print(f"1️⃣ Phase 1 (Scheduling/RAG/History): {p1} ms")
    print(f"2️⃣ Phase 2 (LLM Generation):         {p2} ms")
    print("-" * 40)
    print(f"🏁 Total TTFT:                       {total} ms")
    print("="*40 + "\n")

if name == "main": LatencyTester().start()

Fix Action

Fix / Workaround

2. Config patch attempt (failed)

Tried reducing contextTokens to 32k — no improvement.

Code Example

02:52:25.731+08:00 session-init store-load agent=main session=agent:main:main elapsedMs=2
02:52:25.764+08:00 [model-selection] stage=configured-allowlist-built elapsedMs=2
02:52:25.765+08:00 [model-selection] stage=auth-profile-store-loaded elapsedMs=3
(T1 fires at +572ms — fast first message)
...
02:52:39.113+08:00 session-init store-load agent=main session=agent:main:main elapsedMs=2
02:52:39.146+08:00 [model-selection] stage=configured-allowlist-built elapsedMs=1
(T1 fires at +11303ms — slow second message)

---

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os, time, json, ssl, base64
import websocket
import logging

URL = "ws://127.0.0.1:18789/ws"
TOKEN = "123456"
SESSION_KEY = "agent:main:main"
DEVICE_STORAGE = os.path.expanduser("~/.openclaw/workspace/.xiaomeng_correct_device.json")

logging.basicConfig(format='%(asctime)s.%(msecs)03d %(message)s', datefmt='%H:%M:%S', level=logging.INFO)
log = logging.info

class DeviceIdentity:
    def __init__(self, storage_path):
        self.storage_path = storage_path
        self.load()

    def load(self):
        if not os.path.exists(self.storage_path):
            log(f"❌ Error: device file not found {self.storage_path}")
            os._exit(1)
        with open(self.storage_path, 'r') as f: data = json.load(f)
        from cryptography.hazmat.primitives.asymmetric import ed25519
        pem_lines = data['privateKeyPem'].strip().split('\n')
        b64 = ''.join([l for l in pem_lines if not l.startswith('---')])
        self.priv = ed25519.Ed25519PrivateKey.from_private_bytes(base64.b64decode(b64)[16:])
        self.device_id = data['deviceId']
        from cryptography.hazmat.primitives import serialization
        pub_bytes = self.priv.public_key().public_bytes(
            encoding=serialization.Encoding.Raw, format=serialization.PublicFormat.Raw
        )
        self.pub_b64 = base64.b64encode(pub_bytes).decode('ascii').rstrip('=').replace('+','-').replace('/','_')

    def sign(self, p):
        sig = self.priv.sign(p.encode('utf-8'))
        return base64.b64encode(sig).decode('ascii').rstrip('=').replace('+','-').replace('/','_')

class LatencyTester:
    def __init__(self):
        self.device = DeviceIdentity(DEVICE_STORAGE)
        self.t0_send = 0
        self.t1_start = 0
        self.t2_first = 0
        self.has_sent = False

    def start(self):
        log("🚀 Connecting to OpenClaw...")
        self.ws = websocket.WebSocketApp(URL, header={'Authorization': f'Bearer {TOKEN}'},
            on_open=self.on_open, on_message=self.on_message, on_error=self.on_error)
        self.ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})

    def on_open(self, ws): log("🔗 Connected")
    def on_error(self, ws, e): log(f"❌ Error: {e}")

    def send_test_msg(self, ws):
        if self.has_sent: return
        self.has_sent = True
        msg_id = f"m-{int(time.time()*1000)}"
        idem_key = f"s-{int(time.time()*1000)}"
        req = {
            "type": "req",
            "id": msg_id,
            "method": "chat.send",
            "params": {
                "sessionKey": SESSION_KEY,
                "message": "(延迟测试)",
                "deliver": True,
                "idempotencyKey": idem_key
            }
        }
        self.t0_send = time.time()
        log(f"📤 [T0] Sending message (ID: {msg_id})...")
        ws.send(json.dumps(req))

    def on_message(self, ws, message):
        try:
            d = json.loads(message)
            ev = d.get('event')
            pl = d.get('payload', {})

            if ev == 'connect.challenge':
                ts = pl['ts']
                nonce = pl['nonce']
                auth_str = f"v2|{self.device.device_id}|webchat|webchat|operator|operator.admin,operator.write,operator.approvals,operator.pairing,operator.read|{ts}|{TOKEN}|{nonce}"
                sig = self.device.sign(auth_str)
                ws.send(json.dumps({
                    "type":"req","id":"auth","method":"connect",
                    "params":{
                        "minProtocol":3,"maxProtocol":3,"role":"operator",
                        "scopes":["operator.admin","operator.write","operator.approvals","operator.pairing","operator.read"],
                        "auth":{"token":TOKEN},
                        "device":{"id":self.device.device_id,"publicKey":self.device.pub_b64,"signature":sig,"signedAt":ts,"nonce":nonce},
                        "client":{"id":"webchat","version":"1.0","platform":"linux","mode":"webchat"}
                    }
                }))

            elif d.get('id') == 'auth':
                if d.get('ok'):
                    log("✅ Authentication successful, sending test message...")
                    self.send_test_msg(ws)
                else:
                    log(f"❌ Authentication failed: {d.get('error')}")

            elif ev == 'agent':
                stream = pl.get('stream')
                phase = pl.get('data', {}).get('phase')

                if stream == 'lifecycle' and phase == 'start':
                    self.t1_start = time.time()
                    diff = int((self.t1_start - self.t0_send) * 1000)
                    log(f"⏱️ [T1] Agent started (Phase: Start): +{diff} ms")

                elif stream == 'assistant':
                    if 'text' in pl.get('data', {}) or 'delta' in pl.get('data', {}):
                        if self.t2_first == 0:
                            self.t2_first = time.time()
                            self.print_report()
                            ws.close()

        except Exception as e:
            log(f"Error processing message: {e}")

    def print_report(self):
        t0 = self.t0_send
        t1 = self.t1_start
        t2 = self.t2_first
        if t1 == 0: t1 = t2
        p1 = int((t1 - t0) * 1000)
        p2 = int((t2 - t1) * 1000)
        total = int((t2 - t0) * 1000)
        print("\n" + "="*40)
        print("📊 Latency Report")
        print("="*40)
        print(f"1️⃣ Phase 1 (Scheduling/RAG/History): {p1} ms")
        print(f"2️⃣ Phase 2 (LLM Generation):         {p2} ms")
        print("-" * 40)
        print(f"🏁 Total TTFT:                       {total} ms")
        print("="*40 + "\n")

if __name__ == "__main__":
    LatencyTester().start()
RAW_BUFFERClick to expand / collapse

Bug Description

After upgrading past v2026.2.26, webchat messages suffer from 5-12 seconds of pre-processing latency before the LLM starts generating tokens. The first message after gateway restart is fast (~500ms-1.5s), but subsequent messages consistently show 8-12 seconds of delay.

Steps to Reproduce

  1. Start/restart the OpenClaw gateway
  2. Send a message via webchat — note the low latency
  3. Send another message without restarting — observe 8-12s delay
  4. The delay accumulates with each message exchange

Latency Test Results

Run 1 — First message after restart:

  • Phase 1 (preprocessing/RAG/history): ~1.5s
  • Phase 2 (LLM generation): ~20s
  • Total TTFT: ~21s

Run 2 — Second message (no restart):

  • Phase 1: ~7.9s
  • Phase 2: ~14s
  • Total TTFT: ~22s

Run 3 — Third message:

  • Phase 1: ~11.4s
  • Phase 2: ~12s
  • Total TTFT: ~23s

The delay increases with each message. Session info at the time:

  • contextTokens: 200000 (max)
  • inputTokens: ~43000 per message
  • Session store file size: 158KB for agent:main:main

Environment

  • OpenClaw: v2026.4.2 (also reproduced on v2026.2.26+)
  • Platform: WSL2 (Windows), Node.js 22.22.0
  • Model: MiniMax-M2.7
  • Session key: agent:main:main
  • WebSocket connection to ws://127.0.0.1:18789/ws

Troubleshooting Attempted

1. Session store IO analysis

Timing logs (OPENCLAW_DEBUG_INGRESS_TIMING=1) showed session store load is fast (~2-8ms). The bottleneck is NOT in session file reading.

2. Config patch attempt (failed)

Tried reducing contextTokens to 32k — no improvement.

3. Code investigation

  • loadSessionStore() with skipCache: true reads and does structuredClone() on the entire session store on every message.
  • The delay appears to be in the preprocessing pipeline (pre-LLM), not in the model inference itself.

4. Issue is NOT in v2026.2.26

User confirmed that v2026.2.26 does not have this problem. The regression was introduced in a subsequent version update.

5. Not related to humanDelay

The humanDelay mechanism was confirmed to only affect outgoing message delivery, not the pre-processing phase.

Relevant Log Excerpt (debug timing enabled)

02:52:25.731+08:00 session-init store-load agent=main session=agent:main:main elapsedMs=2
02:52:25.764+08:00 [model-selection] stage=configured-allowlist-built elapsedMs=2
02:52:25.765+08:00 [model-selection] stage=auth-profile-store-loaded elapsedMs=3
(T1 fires at +572ms — fast first message)
...
02:52:39.113+08:00 session-init store-load agent=main session=agent:main:main elapsedMs=2
02:52:39.146+08:00 [model-selection] stage=configured-allowlist-built elapsedMs=1
(T1 fires at +11303ms — slow second message)

Test Script

latency_test.py — a Python WebSocket latency test tool using device identity authentication:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os, time, json, ssl, base64
import websocket
import logging

URL = "ws://127.0.0.1:18789/ws"
TOKEN = "123456"
SESSION_KEY = "agent:main:main"
DEVICE_STORAGE = os.path.expanduser("~/.openclaw/workspace/.xiaomeng_correct_device.json")

logging.basicConfig(format='%(asctime)s.%(msecs)03d %(message)s', datefmt='%H:%M:%S', level=logging.INFO)
log = logging.info

class DeviceIdentity:
    def __init__(self, storage_path):
        self.storage_path = storage_path
        self.load()

    def load(self):
        if not os.path.exists(self.storage_path):
            log(f"❌ Error: device file not found {self.storage_path}")
            os._exit(1)
        with open(self.storage_path, 'r') as f: data = json.load(f)
        from cryptography.hazmat.primitives.asymmetric import ed25519
        pem_lines = data['privateKeyPem'].strip().split('\n')
        b64 = ''.join([l for l in pem_lines if not l.startswith('---')])
        self.priv = ed25519.Ed25519PrivateKey.from_private_bytes(base64.b64decode(b64)[16:])
        self.device_id = data['deviceId']
        from cryptography.hazmat.primitives import serialization
        pub_bytes = self.priv.public_key().public_bytes(
            encoding=serialization.Encoding.Raw, format=serialization.PublicFormat.Raw
        )
        self.pub_b64 = base64.b64encode(pub_bytes).decode('ascii').rstrip('=').replace('+','-').replace('/','_')

    def sign(self, p):
        sig = self.priv.sign(p.encode('utf-8'))
        return base64.b64encode(sig).decode('ascii').rstrip('=').replace('+','-').replace('/','_')

class LatencyTester:
    def __init__(self):
        self.device = DeviceIdentity(DEVICE_STORAGE)
        self.t0_send = 0
        self.t1_start = 0
        self.t2_first = 0
        self.has_sent = False

    def start(self):
        log("🚀 Connecting to OpenClaw...")
        self.ws = websocket.WebSocketApp(URL, header={'Authorization': f'Bearer {TOKEN}'},
            on_open=self.on_open, on_message=self.on_message, on_error=self.on_error)
        self.ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})

    def on_open(self, ws): log("🔗 Connected")
    def on_error(self, ws, e): log(f"❌ Error: {e}")

    def send_test_msg(self, ws):
        if self.has_sent: return
        self.has_sent = True
        msg_id = f"m-{int(time.time()*1000)}"
        idem_key = f"s-{int(time.time()*1000)}"
        req = {
            "type": "req",
            "id": msg_id,
            "method": "chat.send",
            "params": {
                "sessionKey": SESSION_KEY,
                "message": "(延迟测试)",
                "deliver": True,
                "idempotencyKey": idem_key
            }
        }
        self.t0_send = time.time()
        log(f"📤 [T0] Sending message (ID: {msg_id})...")
        ws.send(json.dumps(req))

    def on_message(self, ws, message):
        try:
            d = json.loads(message)
            ev = d.get('event')
            pl = d.get('payload', {})

            if ev == 'connect.challenge':
                ts = pl['ts']
                nonce = pl['nonce']
                auth_str = f"v2|{self.device.device_id}|webchat|webchat|operator|operator.admin,operator.write,operator.approvals,operator.pairing,operator.read|{ts}|{TOKEN}|{nonce}"
                sig = self.device.sign(auth_str)
                ws.send(json.dumps({
                    "type":"req","id":"auth","method":"connect",
                    "params":{
                        "minProtocol":3,"maxProtocol":3,"role":"operator",
                        "scopes":["operator.admin","operator.write","operator.approvals","operator.pairing","operator.read"],
                        "auth":{"token":TOKEN},
                        "device":{"id":self.device.device_id,"publicKey":self.device.pub_b64,"signature":sig,"signedAt":ts,"nonce":nonce},
                        "client":{"id":"webchat","version":"1.0","platform":"linux","mode":"webchat"}
                    }
                }))

            elif d.get('id') == 'auth':
                if d.get('ok'):
                    log("✅ Authentication successful, sending test message...")
                    self.send_test_msg(ws)
                else:
                    log(f"❌ Authentication failed: {d.get('error')}")

            elif ev == 'agent':
                stream = pl.get('stream')
                phase = pl.get('data', {}).get('phase')

                if stream == 'lifecycle' and phase == 'start':
                    self.t1_start = time.time()
                    diff = int((self.t1_start - self.t0_send) * 1000)
                    log(f"⏱️ [T1] Agent started (Phase: Start): +{diff} ms")

                elif stream == 'assistant':
                    if 'text' in pl.get('data', {}) or 'delta' in pl.get('data', {}):
                        if self.t2_first == 0:
                            self.t2_first = time.time()
                            self.print_report()
                            ws.close()

        except Exception as e:
            log(f"Error processing message: {e}")

    def print_report(self):
        t0 = self.t0_send
        t1 = self.t1_start
        t2 = self.t2_first
        if t1 == 0: t1 = t2
        p1 = int((t1 - t0) * 1000)
        p2 = int((t2 - t1) * 1000)
        total = int((t2 - t0) * 1000)
        print("\n" + "="*40)
        print("📊 Latency Report")
        print("="*40)
        print(f"1️⃣ Phase 1 (Scheduling/RAG/History): {p1} ms")
        print(f"2️⃣ Phase 2 (LLM Generation):         {p2} ms")
        print("-" * 40)
        print(f"🏁 Total TTFT:                       {total} ms")
        print("="*40 + "\n")

if __name__ == "__main__":
    LatencyTester().start()

Questions for Maintainers

  1. What changed in the preprocessing pipeline between v2026.2.26 and later versions that could cause this cumulative delay?
  2. Is there a known memory/GC issue or session state growth problem that could explain the increasing delay pattern?
  3. Are there any session compaction or context pruning settings that could help reduce input token load per message?

extent analysis

TL;DR

The most likely fix for the cumulative delay issue is to optimize the session store loading process, specifically the loadSessionStore() function with skipCache: true, which is causing the preprocessing pipeline to slow down.

Guidance

  • Investigate the loadSessionStore() function to see if there are any optimization opportunities, such as caching or lazy loading, to reduce the overhead of loading the session store on every message.
  • Consider implementing a session compaction or context pruning mechanism to reduce the input token load per message, which could help alleviate the increasing delay pattern.
  • Review the changes made to the preprocessing pipeline between v2026.2.26 and later versions to identify any potential causes of the cumulative delay.
  • Monitor the memory usage and garbage collection patterns to see if there are any issues that could be contributing to the delay.

Example

# Example of how to implement a simple caching mechanism for the session store
session_store_cache = {}

def loadSessionStore(session_key):
    if session_key in session_store_cache:
        return session_store_cache[session_key]
    else:
        # Load the session store from storage
        session_store = load_session_store_from_storage(session_key)
        session_store_cache[session_key] = session_store
        return session_store

Notes

  • The issue seems to be related to the preprocessing pipeline, specifically the loadSessionStore() function, which is causing the delay.
  • The cumulative delay pattern suggests that there may be a memory or garbage collection issue that is causing the delay to increase over time.
  • The latency_test.py script provides a useful tool for measuring the latency and identifying the source of the delay.

Recommendation

Apply a workaround by optimizing the loadSessionStore() function and implementing a caching mechanism to reduce the overhead of loading the session store on every message. This should help alleviate the cumulative delay issue until a more permanent fix can be implemented.

Vote matrix · Quick signals

Works
Did the solution work? Tap to confirm.
Easy Fix
Was it a quick fix?
Time Saver
Did it save you time?
Blocking
Was it severely blocking?
Common Issue
Are others likely hitting this too?
Flaky / Intermittent
Is it intermittent?
Verified / Reproducible
Can you reproduce it reliably?
Loading…

Still need to ship something?

×6

Another batch ranked right after the header list — different links, same matching logic.

Back to top recommendations

TRENDING