innFactory · pull · Jun 13, 2026 · Jun 13, 2026 · Jun 13, 2026 · Jun 13, 2026
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@librechat/agents",
-  "version": "3.2.34",
+  "version": "3.2.35",
   "main": "./dist/cjs/main.cjs",
   "module": "./dist/esm/main.mjs",
   "types": "./dist/types/index.d.ts",

diff --git a/src/agents/AgentContext.ts b/src/agents/AgentContext.ts
@@ -21,6 +21,7 @@ import {
   addCacheControlToStablePrefixMessages,
 } from '@/messages/cache';
 import { createSchemaOnlyTools } from '@/tools/schema';
+import { apportionTokenCounts } from '@/utils/tokens';
 import { DEFAULT_RESERVE_RATIO } from '@/messages';
 import { toJsonSchema } from '@/utils/schema';
 
@@ -191,6 +192,11 @@ export class AgentContext {
   dynamicInstructionTokens: number = 0;
   /** Token count for tool schemas only. */
   toolSchemaTokens: number = 0;
+  /** Per-tool schema token counts (post-multiplier), keyed by tool name.
+   *  `undefined` when not calculated (e.g. cached aggregate schema tokens). */
+  toolTokenCounts?: Record<string, number>;
+  /** Names of counted tools that are deferred (`defer_loading`) and discovered. */
+  deferredToolNames: string[] = [];
   /** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
   calibrationRatio: number = 1;
   /** Provider-observed instruction overhead from the pruner's best-variance turn. */
@@ -894,6 +900,8 @@ export class AgentContext {
     this.systemMessageTokens = 0;
     this.dynamicInstructionTokens = 0;
     this.toolSchemaTokens = 0;
+    this.toolTokenCounts = undefined;
+    this.deferredToolNames = [];
     this.cachedSystemRunnable = undefined;
     this.systemRunnableStale = true;
     this.lastToken = undefined;
@@ -1006,6 +1014,10 @@ export class AgentContext {
   ): Promise<void> {
     let toolTokens = 0;
     const countedToolNames = new Set<string>();
+    /** Prototype-free: external tool names like `toString` must not hit
+     *  inherited properties during accumulation */
+    const rawToolTokenCounts: Record<string, number> = Object.create(null);
+    const deferredCountedNames = new Set<string>();
 
     /**
      * Iterate both `tools` (user-provided instance tools) and `graphTools`
@@ -1040,11 +1052,14 @@ export class AgentContext {
             toolName,
             (genericTool.description as string | undefined) ?? ''
           );
-          toolTokens += tokenCounter(
+          const schemaTokens = tokenCounter(
             new SystemMessage(JSON.stringify(jsonSchema))
           );
+          toolTokens += schemaTokens;
           if (toolName) {
             countedToolNames.add(toolName);
+            rawToolTokenCounts[toolName] =
+              (rawToolTokenCounts[toolName] ?? 0) + schemaTokens;
           }
         }
       }
@@ -1062,7 +1077,16 @@ export class AgentContext {
           parameters: def.parameters ?? {},
         },
       };
-      toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
+      const schemaTokens = tokenCounter(
+        new SystemMessage(JSON.stringify(schema))
+      );
+      toolTokens += schemaTokens;
+      countedToolNames.add(def.name);
+      rawToolTokenCounts[def.name] =
+        (rawToolTokenCounts[def.name] ?? 0) + schemaTokens;
+      if (def.defer_loading === true) {
+        deferredCountedNames.add(def.name);
+      }
     }
 
     const isAnthropic =
@@ -1077,6 +1101,25 @@ export class AgentContext {
       ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER
       : DEFAULT_TOOL_TOKEN_MULTIPLIER;
     this.toolSchemaTokens = Math.ceil(toolTokens * toolTokenMultiplier);
+
+    /** Largest-remainder apportionment keeps the per-tool counts summing
+     *  exactly to the aggregate despite per-entry rounding */
+    const toolTokenCounts = apportionTokenCounts(
+      rawToolTokenCounts,
+      toolTokenMultiplier,
+      this.toolSchemaTokens
+    );
+    const deferredToolNames: string[] = [];
+    for (const name of Object.keys(rawToolTokenCounts)) {
+      if (
+        deferredCountedNames.has(name) ||
+        this.toolRegistry?.get(name)?.defer_loading === true
+      ) {
+        deferredToolNames.push(name);
+      }
+    }
+    this.toolTokenCounts = toolTokenCounts;
+    this.deferredToolNames = deferredToolNames;
   }
 
   /**
@@ -1212,9 +1255,8 @@ export class AgentContext {
    * Returns a structured breakdown of how the context token budget is consumed.
    * Useful for diagnostics when context overflow or pruning issues occur.
    *
-   * Note: `toolCount` reflects discoveries immediately, but `toolSchemaTokens`
-   * is a snapshot taken during `calculateInstructionTokens` and is not
-   * recomputed when `markToolsAsDiscovered` is called mid-run.
+   * Note: `markToolsAsDiscovered` re-triggers `calculateInstructionTokens`,
+   * so `toolSchemaTokens`/`toolTokenCounts` refresh before the next call.
    */
   getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown {
     const maxContextTokens = this.maxContextTokens ?? 0;
@@ -1238,7 +1280,14 @@ export class AgentContext {
       }
     }
 
-    const reserveTokens = Math.round(maxContextTokens * DEFAULT_RESERVE_RATIO);
+    /** Mirror the pruner's reserve math so availableForMessages agrees
+     *  with the contextBudget computed during pruning */
+    const reserveRatio =
+      this.summarizationConfig?.reserveRatio ?? DEFAULT_RESERVE_RATIO;
+    const reserveTokens =
+      reserveRatio > 0 && reserveRatio < 1
+        ? Math.round(maxContextTokens * reserveRatio)
+        : 0;
     const availableForMessages = Math.max(
       0,
       maxContextTokens - reserveTokens - this.instructionTokens
@@ -1255,6 +1304,12 @@ export class AgentContext {
       messageCount,
       messageTokens,
       availableForMessages,
+      toolTokenCounts:
+        this.toolTokenCounts != null ? { ...this.toolTokenCounts } : undefined,
+      deferredToolNames:
+        this.deferredToolNames.length > 0
+          ? [...this.deferredToolNames]
+          : undefined,
     };
   }
 
@@ -1324,6 +1379,14 @@ export class AgentContext {
     }
     if (hasNewDiscoveries) {
       this.systemRunnableStale = true;
+      /** Refresh schema token accounting so the next call's budget and
+       *  per-tool breakdown include the newly discovered tools; awaited
+       *  via tokenCalculationPromise before the next model call */
+      if (this.tokenCounter) {
+        this.tokenCalculationPromise = this.calculateInstructionTokens(
+          this.tokenCounter
+        );
+      }
     }
     return hasNewDiscoveries;
   }

diff --git a/src/agents/__tests__/AgentContext.test.ts b/src/agents/__tests__/AgentContext.test.ts
@@ -1414,7 +1414,7 @@ describe('AgentContext', () => {
       expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(2);
     });
 
-    it('toolSchemaTokens snapshot does not auto-update after markToolsAsDiscovered', async () => {
+    it('refreshes toolSchemaTokens and per-tool counts after markToolsAsDiscovered', async () => {
       const toolDefinitions: t.LCTool[] = [
         {
           name: 'deferred',
@@ -1431,9 +1431,13 @@ describe('AgentContext', () => {
 
       await ctx.tokenCalculationPromise;
       expect(ctx.toolSchemaTokens).toBe(0);
+      expect(ctx.toolTokenCounts).toEqual({});
 
       ctx.markToolsAsDiscovered(['deferred']);
-      expect(ctx.toolSchemaTokens).toBe(0);
+      await ctx.tokenCalculationPromise;
+      expect(ctx.toolSchemaTokens).toBeGreaterThan(0);
+      expect(ctx.toolTokenCounts?.deferred).toBeGreaterThan(0);
+      expect(ctx.deferredToolNames).toContain('deferred');
     });
   });
 

diff --git a/src/common/enum.ts b/src/common/enum.ts
@@ -31,6 +31,8 @@ export enum GraphEvents {
   ON_SUBAGENT_UPDATE = 'on_subagent_update',
   /** [Custom] Diagnostic logging event for context management observability */
   ON_AGENT_LOG = 'on_agent_log',
+  /** [Custom] Per-model-call context window usage snapshot (post-prune token budget) */
+  ON_CONTEXT_USAGE = 'on_context_usage',
 
   /* Official Events */
 
@@ -185,6 +187,17 @@ export enum Constants {
   /** Anthropic server tool ID prefix (web_search, code_execution, etc.) */
   ANTHROPIC_SERVER_TOOL_PREFIX = 'srvtoolu_',
   SKILL_TOOL = 'skill',
+  /**
+   * Callback-metadata keys stamped by `attemptInvoke` /
+   * `tryFallbackProviders` carrying the provider (SDK `Providers` enum
+   * value) and configured model that actually served a model invocation.
+   * Unlike `ls_provider` — which derived providers inherit from their base
+   * class (e.g. DeepSeek/OpenRouter report `'openai'`) — these reflect the
+   * SDK's own routing, including fallback-provider calls. Consumed by the
+   * subagent usage-capture handler to tag billing events.
+   */
+  INVOKED_PROVIDER = '__invoked_provider',
+  INVOKED_MODEL = '__invoked_model',
   READ_FILE = 'read_file',
   BASH_TOOL = 'bash_tool',
   BASH_PROGRAMMATIC_TOOL_CALLING = 'run_tools_with_bash',