Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@librechat/agents",
"version": "3.2.34",
"version": "3.2.35",
"main": "./dist/cjs/main.cjs",
"module": "./dist/esm/main.mjs",
"types": "./dist/types/index.d.ts",
Expand Down
75 changes: 69 additions & 6 deletions src/agents/AgentContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import {
addCacheControlToStablePrefixMessages,
} from '@/messages/cache';
import { createSchemaOnlyTools } from '@/tools/schema';
import { apportionTokenCounts } from '@/utils/tokens';
import { DEFAULT_RESERVE_RATIO } from '@/messages';
import { toJsonSchema } from '@/utils/schema';

Expand Down Expand Up @@ -191,6 +192,11 @@ export class AgentContext {
dynamicInstructionTokens: number = 0;
/** Token count for tool schemas only. */
toolSchemaTokens: number = 0;
/** Per-tool schema token counts (post-multiplier), keyed by tool name.
* `undefined` when not calculated (e.g. cached aggregate schema tokens). */
toolTokenCounts?: Record<string, number>;
/** Names of counted tools that are deferred (`defer_loading`) and discovered. */
deferredToolNames: string[] = [];
/** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
calibrationRatio: number = 1;
/** Provider-observed instruction overhead from the pruner's best-variance turn. */
Expand Down Expand Up @@ -894,6 +900,8 @@ export class AgentContext {
this.systemMessageTokens = 0;
this.dynamicInstructionTokens = 0;
this.toolSchemaTokens = 0;
this.toolTokenCounts = undefined;
this.deferredToolNames = [];
this.cachedSystemRunnable = undefined;
this.systemRunnableStale = true;
this.lastToken = undefined;
Expand Down Expand Up @@ -1006,6 +1014,10 @@ export class AgentContext {
): Promise<void> {
let toolTokens = 0;
const countedToolNames = new Set<string>();
/** Prototype-free: external tool names like `toString` must not hit
* inherited properties during accumulation */
const rawToolTokenCounts: Record<string, number> = Object.create(null);
const deferredCountedNames = new Set<string>();

/**
* Iterate both `tools` (user-provided instance tools) and `graphTools`
Expand Down Expand Up @@ -1040,11 +1052,14 @@ export class AgentContext {
toolName,
(genericTool.description as string | undefined) ?? ''
);
toolTokens += tokenCounter(
const schemaTokens = tokenCounter(
new SystemMessage(JSON.stringify(jsonSchema))
);
toolTokens += schemaTokens;
if (toolName) {
countedToolNames.add(toolName);
rawToolTokenCounts[toolName] =
(rawToolTokenCounts[toolName] ?? 0) + schemaTokens;
}
}
}
Expand All @@ -1062,7 +1077,16 @@ export class AgentContext {
parameters: def.parameters ?? {},
},
};
toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
const schemaTokens = tokenCounter(
new SystemMessage(JSON.stringify(schema))
);
toolTokens += schemaTokens;
countedToolNames.add(def.name);
rawToolTokenCounts[def.name] =
(rawToolTokenCounts[def.name] ?? 0) + schemaTokens;
if (def.defer_loading === true) {
deferredCountedNames.add(def.name);
}
}

const isAnthropic =
Expand All @@ -1077,6 +1101,25 @@ export class AgentContext {
? ANTHROPIC_TOOL_TOKEN_MULTIPLIER
: DEFAULT_TOOL_TOKEN_MULTIPLIER;
this.toolSchemaTokens = Math.ceil(toolTokens * toolTokenMultiplier);

/** Largest-remainder apportionment keeps the per-tool counts summing
* exactly to the aggregate despite per-entry rounding */
const toolTokenCounts = apportionTokenCounts(
rawToolTokenCounts,
toolTokenMultiplier,
this.toolSchemaTokens
);
const deferredToolNames: string[] = [];
for (const name of Object.keys(rawToolTokenCounts)) {
if (
deferredCountedNames.has(name) ||
this.toolRegistry?.get(name)?.defer_loading === true
) {
deferredToolNames.push(name);
}
}
this.toolTokenCounts = toolTokenCounts;
this.deferredToolNames = deferredToolNames;
}

/**
Expand Down Expand Up @@ -1212,9 +1255,8 @@ export class AgentContext {
* Returns a structured breakdown of how the context token budget is consumed.
* Useful for diagnostics when context overflow or pruning issues occur.
*
* Note: `toolCount` reflects discoveries immediately, but `toolSchemaTokens`
* is a snapshot taken during `calculateInstructionTokens` and is not
* recomputed when `markToolsAsDiscovered` is called mid-run.
* Note: `markToolsAsDiscovered` re-triggers `calculateInstructionTokens`,
* so `toolSchemaTokens`/`toolTokenCounts` refresh before the next call.
*/
getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown {
const maxContextTokens = this.maxContextTokens ?? 0;
Expand All @@ -1238,7 +1280,14 @@ export class AgentContext {
}
}

const reserveTokens = Math.round(maxContextTokens * DEFAULT_RESERVE_RATIO);
/** Mirror the pruner's reserve math so availableForMessages agrees
* with the contextBudget computed during pruning */
const reserveRatio =
this.summarizationConfig?.reserveRatio ?? DEFAULT_RESERVE_RATIO;
const reserveTokens =
reserveRatio > 0 && reserveRatio < 1
? Math.round(maxContextTokens * reserveRatio)
: 0;
const availableForMessages = Math.max(
0,
maxContextTokens - reserveTokens - this.instructionTokens
Expand All @@ -1255,6 +1304,12 @@ export class AgentContext {
messageCount,
messageTokens,
availableForMessages,
toolTokenCounts:
this.toolTokenCounts != null ? { ...this.toolTokenCounts } : undefined,
deferredToolNames:
this.deferredToolNames.length > 0
? [...this.deferredToolNames]
: undefined,
};
}

Expand Down Expand Up @@ -1324,6 +1379,14 @@ export class AgentContext {
}
if (hasNewDiscoveries) {
this.systemRunnableStale = true;
/** Refresh schema token accounting so the next call's budget and
* per-tool breakdown include the newly discovered tools; awaited
* via tokenCalculationPromise before the next model call */
if (this.tokenCounter) {
this.tokenCalculationPromise = this.calculateInstructionTokens(
this.tokenCounter
);
}
}
return hasNewDiscoveries;
}
Expand Down
8 changes: 6 additions & 2 deletions src/agents/__tests__/AgentContext.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1414,7 +1414,7 @@ describe('AgentContext', () => {
expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(2);
});

it('toolSchemaTokens snapshot does not auto-update after markToolsAsDiscovered', async () => {
it('refreshes toolSchemaTokens and per-tool counts after markToolsAsDiscovered', async () => {
const toolDefinitions: t.LCTool[] = [
{
name: 'deferred',
Expand All @@ -1431,9 +1431,13 @@ describe('AgentContext', () => {

await ctx.tokenCalculationPromise;
expect(ctx.toolSchemaTokens).toBe(0);
expect(ctx.toolTokenCounts).toEqual({});

ctx.markToolsAsDiscovered(['deferred']);
expect(ctx.toolSchemaTokens).toBe(0);
await ctx.tokenCalculationPromise;
expect(ctx.toolSchemaTokens).toBeGreaterThan(0);
expect(ctx.toolTokenCounts?.deferred).toBeGreaterThan(0);
expect(ctx.deferredToolNames).toContain('deferred');
});
});

Expand Down
13 changes: 13 additions & 0 deletions src/common/enum.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ export enum GraphEvents {
ON_SUBAGENT_UPDATE = 'on_subagent_update',
/** [Custom] Diagnostic logging event for context management observability */
ON_AGENT_LOG = 'on_agent_log',
/** [Custom] Per-model-call context window usage snapshot (post-prune token budget) */
ON_CONTEXT_USAGE = 'on_context_usage',

/* Official Events */

Expand Down Expand Up @@ -185,6 +187,17 @@ export enum Constants {
/** Anthropic server tool ID prefix (web_search, code_execution, etc.) */
ANTHROPIC_SERVER_TOOL_PREFIX = 'srvtoolu_',
SKILL_TOOL = 'skill',
/**
* Callback-metadata keys stamped by `attemptInvoke` /
* `tryFallbackProviders` carrying the provider (SDK `Providers` enum
* value) and configured model that actually served a model invocation.
* Unlike `ls_provider` — which derived providers inherit from their base
* class (e.g. DeepSeek/OpenRouter report `'openai'`) — these reflect the
* SDK's own routing, including fallback-provider calls. Consumed by the
* subagent usage-capture handler to tag billing events.
*/
INVOKED_PROVIDER = '__invoked_provider',
INVOKED_MODEL = '__invoked_model',
READ_FILE = 'read_file',
BASH_TOOL = 'bash_tool',
BASH_PROGRAMMATIC_TOOL_CALLING = 'run_tools_with_bash',
Expand Down
Loading
Loading