Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions .github/workflows/run-bot-aib-tournament.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,59 @@ jobs:

# NOTE: don't remove any of the open source models, since these are the best option for a long term baseline (other models get deprecated)

#################################### No-research one-shot bots ####################################

bot_gpt_5_5_no_research_one_shot:
needs: precache_asknews
uses: ./.github/workflows/run-bot-launcher.yaml
with:
bot_name: "METAC_GPT_5_5_NO_RESEARCH_ONE_SHOT"
metac_name: "metac-gpt-5-5-no-research-one-shot"
cache_key: asknews-cache-${{ github.run_id }}
secrets:
INPUT_METACULUS_TOKENS: ${{ secrets.METACULUS_TOKENS }}
INPUT_METACULUS_API_BASE_URL: ${{ secrets.METACULUS_API_BASE_URL }}
INPUT_OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
INPUT_OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}

bot_gemini_3_1_pro_no_research_one_shot:
needs: precache_asknews
uses: ./.github/workflows/run-bot-launcher.yaml
with:
bot_name: "METAC_GEMINI_3_1_PRO_NO_RESEARCH_ONE_SHOT"
metac_name: "metac-gemini-3-1-pro-no-research-one-shot"
cache_key: asknews-cache-${{ github.run_id }}
secrets:
INPUT_METACULUS_TOKENS: ${{ secrets.METACULUS_TOKENS }}
INPUT_METACULUS_API_BASE_URL: ${{ secrets.METACULUS_API_BASE_URL }}
INPUT_OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}

bot_claude_fable_5_no_research_one_shot:
needs: precache_asknews
uses: ./.github/workflows/run-bot-launcher.yaml
with:
bot_name: "METAC_CLAUDE_FABLE_5_NO_RESEARCH_ONE_SHOT"
metac_name: "metac-claude-fable-5-no-research-one-shot"
cache_key: asknews-cache-${{ github.run_id }}
secrets:
INPUT_METACULUS_TOKENS: ${{ secrets.METACULUS_TOKENS }}
INPUT_METACULUS_API_BASE_URL: ${{ secrets.METACULUS_API_BASE_URL }}
INPUT_ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
INPUT_OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}

bot_grok_4_3_no_research_one_shot:
needs: precache_asknews
uses: ./.github/workflows/run-bot-launcher.yaml
with:
bot_name: "METAC_GROK_4_3_NO_RESEARCH_ONE_SHOT"
metac_name: "metac-grok-4-3-no-research-one-shot"
cache_key: asknews-cache-${{ github.run_id }}
secrets:
INPUT_METACULUS_TOKENS: ${{ secrets.METACULUS_TOKENS }}
INPUT_METACULUS_API_BASE_URL: ${{ secrets.METACULUS_API_BASE_URL }}
INPUT_XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
INPUT_OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}

#################################### June 2026 new bots ####################################

bot_claude_fable_5_high:
Expand Down
4 changes: 4 additions & 0 deletions forecasting_tools/forecast_bots/bot_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
from forecasting_tools.forecast_bots.official_bots.gpt_4_1_optimized_bot import (
GPT41OptimizedBot,
)
from forecasting_tools.forecast_bots.official_bots.no_research_one_shot_bot import (
NoResearchOneShotBot,
)
from forecasting_tools.forecast_bots.official_bots.q1_template_bot import (
Q1TemplateBot2025,
)
Expand Down Expand Up @@ -60,6 +63,7 @@ def get_all_important_bot_classes() -> list[type[ForecastBot]]:
SpringTemplateBot2026,
SummerTemplateBot2026,
GPT41OptimizedBot,
NoResearchOneShotBot,
]


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
"""A minimal single-shot forecasting bot with no research and no tools.

This bot asks a model to forecast a question directly, with a short
"helpful assistant" framing and a request for a JSON forecast. It performs no
research phase and (when configured with a single prediction per question)
makes exactly one model call per question.
"""

import logging
from datetime import datetime, timezone

from forecasting_tools.ai_models.general_llm import GeneralLlm
from forecasting_tools.data_models.forecast_report import ReasonedPrediction
from forecasting_tools.data_models.multiple_choice_report import PredictedOptionList
from forecasting_tools.data_models.numeric_report import NumericDistribution
from forecasting_tools.data_models.questions import (
BinaryQuestion,
DateQuestion,
MetaculusQuestion,
MultipleChoiceQuestion,
NumericQuestion,
)
from forecasting_tools.forecast_bots.official_bots.template_bot_2026_summer import (
SummerTemplateBot2026,
)

logger = logging.getLogger(__name__)

_SYSTEM_FRAMING = "You are a helpful assistant.\n\n"
_REASONING_INSTRUCTION = (
"Briefly explain your reasoning and provide your forecast as a JSON code block.\n\n"
)
_PERCENTILE_KEYS = ["p05", "p25", "p50", "p75", "p95"]
_EXAMPLE_FRACTIONS = [0.05, 0.25, 0.5, 0.75, 0.95]


class NoResearchOneShotBot(SummerTemplateBot2026):
"""Forecasts each question in a single model call with no research phase.

The prompts are intentionally minimal: there is no professional-forecaster
persona and no guided chain-of-thought sub-questions. The model is simply
asked to reason briefly and return a JSON forecast, which is then parsed by
the configured parser model.
"""

@classmethod
def _llm_config_defaults(cls) -> dict[str, str | GeneralLlm | None]:
config_dict = super()._llm_config_defaults()
if "researcher" in config_dict:
config_dict.pop("researcher")
if "summarizer" in config_dict:
config_dict["summarizer"] = None
return config_dict

async def run_research(self, question: MetaculusQuestion) -> str:
return ""

@staticmethod
def _question_details(question: MetaculusQuestion) -> str:
parts: list[str] = []
if question.background_info:
parts.append(question.background_info)
if question.resolution_criteria:
parts.append(f"Resolution Criteria:\n{question.resolution_criteria}")
if question.fine_print:
parts.append(f"Fine Print:\n{question.fine_print}")
return "\n\n".join(parts)

@classmethod
def _header(cls, question: MetaculusQuestion) -> str:
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
return (
f"**Question:** {question.question_text}\n\n"
f"**Today's Date:** {today}\n\n"
f"**Forecasting Window:** opens {question.open_time}, "
f"closes {question.scheduled_close_time}\n\n"
f"**Details:**\n{cls._question_details(question)}\n\n"
)

async def _run_forecast_on_binary(
self, question: BinaryQuestion, research: str
) -> ReasonedPrediction[float]:
prompt = (
_SYSTEM_FRAMING
+ self._header(question)
+ "Forecast the probability that this question resolves YES.\n\n"
+ _REASONING_INSTRUCTION
+ "The JSON must be in this exact format:\n"
'```json\n{"yes": 0.XXX}\n```\n'
"where 0.XXXX is a float between 0 and 1 representing P(yes)."
)
return await self._binary_prompt_to_forecast(question, prompt)

async def _run_forecast_on_multiple_choice(
self, question: MultipleChoiceQuestion, research: str
) -> ReasonedPrediction[PredictedOptionList]:
outcomes_str = ", ".join(f'"{option}"' for option in question.options)
prompt = (
_SYSTEM_FRAMING
+ self._header(question)
+ f"Forecast the probability for each outcome. Outcomes: [{outcomes_str}]\n\n"
+ _REASONING_INSTRUCTION
+ "The JSON must map each outcome to its probability. "
"All values must be non-negative and sum to 1.0. Example format:\n"
"```json\n"
+ "{\n"
+ "".join(f' "{option}": 0.XXX,\n' for option in question.options)
+ "}\n```"
)
return await self._multiple_choice_prompt_to_forecast(question, prompt)

async def _run_forecast_on_numeric(
self, question: NumericQuestion, research: str
) -> ReasonedPrediction[NumericDistribution]:
upper_bound_message, lower_bound_message = (
self._create_upper_and_lower_bound_messages(question)
)
lo = (
question.nominal_lower_bound
if question.nominal_lower_bound is not None
else question.lower_bound
)
hi = (
question.nominal_upper_bound
if question.nominal_upper_bound is not None
else question.upper_bound
)
range_desc = f"{lo} to {hi}"
if question.zero_point is not None:
range_desc += " (logarithmic scale)"
examples = [f"{lo + f * (hi - lo):g}" for f in _EXAMPLE_FRACTIONS]
prompt = (
_SYSTEM_FRAMING
+ self._header(question)
+ f"Forecast this continuous question. The scale ranges from {range_desc}.\n"
f"{lower_bound_message} {upper_bound_message}\n\n"
+ _REASONING_INSTRUCTION
+ "Provide percentile estimates as numeric values on the question's scale. "
'Use keys of the form "p<N>" where N is 1-99. '
"Values must be strictly increasing. "
"Set wide intervals - good forecasters account for unknown unknowns.\n"
"Example:\n"
"```json\n{\n"
f"{self._json_example_body(examples)}\n"
"}\n```"
)
return await self._numeric_prompt_to_forecast(question, prompt)

async def _run_forecast_on_date(
self, question: DateQuestion, research: str
) -> ReasonedPrediction[NumericDistribution]:
upper_bound_message, lower_bound_message = (
self._create_upper_and_lower_bound_messages(question)
)
lo_dt = question.lower_bound
hi_dt = question.upper_bound
range_desc = f"{lo_dt.date().isoformat()} to {hi_dt.date().isoformat()}"
span = hi_dt - lo_dt
examples = [
f'"{(lo_dt + span * f).date().isoformat()}"' for f in _EXAMPLE_FRACTIONS
]

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably sanity check that this looks right in the prompt itself.

prompt = (
_SYSTEM_FRAMING
+ self._header(question)
+ f"Forecast this continuous question. The scale ranges from {range_desc}.\n"
f"{lower_bound_message} {upper_bound_message}\n\n"
+ _REASONING_INSTRUCTION
+ "Provide percentile estimates as ISO date strings "
'(e.g. "2025-06-15" or "2025-06-15T14:30:00" - time is optional). '
'Use keys of the form "p<N>" where N is 1-99. '
"Dates must be in strictly chronological order. "
"Set wide intervals - good forecasters account for unknown unknowns.\n"
"Example:\n"
"```json\n{\n"
f"{self._json_example_body(examples)}\n"
"}\n```"
)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll assume the prompt matches minimalistic for all these in general.

return await self._date_prompt_to_forecast(question, prompt)

@staticmethod
def _json_example_body(example_values: list[str]) -> str:
return "\n".join(
f' "{key}": {value}{"," if index < len(_PERCENTILE_KEYS) - 1 else ""}'
for index, (key, value) in enumerate(zip(_PERCENTILE_KEYS, example_values))
)
71 changes: 70 additions & 1 deletion run_bots.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
from forecasting_tools.forecast_bots.official_bots.gpt_4_1_optimized_bot import (
GPT41OptimizedBot,
)
from forecasting_tools.forecast_bots.official_bots.no_research_one_shot_bot import (
NoResearchOneShotBot,
)
from forecasting_tools.forecast_bots.official_bots.research_only_bot_2025_fall import (
FallResearchOnlyBot2025,
)
Expand Down Expand Up @@ -357,10 +360,29 @@ def create_bot(
llm: GeneralLlm,
researcher: str | GeneralLlm = "asknews/news-summaries",
predictions_per_research_report: int | None = None,
bot_type: Literal["template", "gpt_4_1_optimized", "research_only"] = "template",
bot_type: Literal[
"template", "gpt_4_1_optimized", "research_only", "no_research_one_shot"
] = "template",
) -> ForecastBot:
default_summarizer = "openrouter/openai/gpt-4.1-mini"

if bot_type == "no_research_one_shot":
return NoResearchOneShotBot(
research_reports_per_question=1,
predictions_per_research_report=predictions_per_research_report or 1,

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we constrain to 1? Raise error if its more than that?

use_research_summary_to_forecast=default_for_using_summary,
publish_reports_to_metaculus=default_for_publish_to_metaculus,
skip_previously_forecasted_questions=default_for_skipping_questions,
llms={
"default": llm,
"summarizer": None,
"researcher": "no_research",
"parser": structure_output_model,
},
enable_summarize_research=False,
extra_metadata_in_explanation=True,
)

if bot_type == "research_only":
return FallResearchOnlyBot2025(
research_reports_per_question=1,
Expand Down Expand Up @@ -579,6 +601,53 @@ def get_default_bot_dict() -> dict[str, RunBotConfig]: # NOSONAR
}

mode_base_bot_mapping = {
############################ No-research one-shot bots ############################
"METAC_GPT_5_5_NO_RESEARCH_ONE_SHOT": {
"estimated_cost_per_question": roughly_gpt_5_cost,
"bot": create_bot(
llm=GeneralLlm(
model="openai/gpt-5.5",
temperature=None,
timeout=gpt_5_timeout,
),
bot_type="no_research_one_shot",
),
"tournaments": TournConfig.aib_and_site,
},
"METAC_GEMINI_3_1_PRO_NO_RESEARCH_ONE_SHOT": {
"estimated_cost_per_question": roughly_gemini_2_5_pro_preview_cost,
"bot": create_bot(
llm=GeneralLlm(
model="openrouter/google/gemini-3.1-pro-preview",
temperature=default_temperature,
timeout=gemini_default_timeout,
),
bot_type="no_research_one_shot",
),
"tournaments": TournConfig.aib_and_site,
},
"METAC_CLAUDE_FABLE_5_NO_RESEARCH_ONE_SHOT": {
"estimated_cost_per_question": roughly_opus_4_5_cost * 2,
"bot": create_bot(
llm=GeneralLlm(
model="anthropic/claude-fable-5",
temperature=default_temperature,
),
bot_type="no_research_one_shot",
),
"tournaments": TournConfig.aib_and_site,
},
"METAC_GROK_4_3_NO_RESEARCH_ONE_SHOT": {
"estimated_cost_per_question": 5 * roughly_one_call_to_grok_4_llm,
"bot": create_bot(
llm=GeneralLlm(
model="openrouter/x-ai/grok-4.3",
temperature=default_temperature,
),
bot_type="no_research_one_shot",
),
"tournaments": TournConfig.aib_and_site,
},
############################ Bots started in June 2026 ############################
"METAC_CLAUDE_FABLE_5_HIGH": {
"estimated_cost_per_question": roughly_opus_4_5_cost * 2,
Expand Down
Loading