diff --git a/.github/workflows/run-bot-aib-tournament.yaml b/.github/workflows/run-bot-aib-tournament.yaml index 95a6fd5e..8eddc69d 100644 --- a/.github/workflows/run-bot-aib-tournament.yaml +++ b/.github/workflows/run-bot-aib-tournament.yaml @@ -86,6 +86,59 @@ jobs: # NOTE: don't remove any of the open source models, since these are the best option for a long term baseline (other models get deprecated) + #################################### No-research one-shot bots #################################### + + bot_gpt_5_5_no_research_one_shot: + needs: precache_asknews + uses: ./.github/workflows/run-bot-launcher.yaml + with: + bot_name: "METAC_GPT_5_5_NO_RESEARCH_ONE_SHOT" + metac_name: "metac-gpt-5-5-no-research-one-shot" + cache_key: asknews-cache-${{ github.run_id }} + secrets: + INPUT_METACULUS_TOKENS: ${{ secrets.METACULUS_TOKENS }} + INPUT_METACULUS_API_BASE_URL: ${{ secrets.METACULUS_API_BASE_URL }} + INPUT_OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + INPUT_OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + + bot_gemini_3_1_pro_no_research_one_shot: + needs: precache_asknews + uses: ./.github/workflows/run-bot-launcher.yaml + with: + bot_name: "METAC_GEMINI_3_1_PRO_NO_RESEARCH_ONE_SHOT" + metac_name: "metac-gemini-3-1-pro-no-research-one-shot" + cache_key: asknews-cache-${{ github.run_id }} + secrets: + INPUT_METACULUS_TOKENS: ${{ secrets.METACULUS_TOKENS }} + INPUT_METACULUS_API_BASE_URL: ${{ secrets.METACULUS_API_BASE_URL }} + INPUT_OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + + bot_claude_fable_5_no_research_one_shot: + needs: precache_asknews + uses: ./.github/workflows/run-bot-launcher.yaml + with: + bot_name: "METAC_CLAUDE_FABLE_5_NO_RESEARCH_ONE_SHOT" + metac_name: "metac-claude-fable-5-no-research-one-shot" + cache_key: asknews-cache-${{ github.run_id }} + secrets: + INPUT_METACULUS_TOKENS: ${{ secrets.METACULUS_TOKENS }} + INPUT_METACULUS_API_BASE_URL: ${{ secrets.METACULUS_API_BASE_URL }} + INPUT_ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + INPUT_OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + + bot_grok_4_3_no_research_one_shot: + needs: precache_asknews + uses: ./.github/workflows/run-bot-launcher.yaml + with: + bot_name: "METAC_GROK_4_3_NO_RESEARCH_ONE_SHOT" + metac_name: "metac-grok-4-3-no-research-one-shot" + cache_key: asknews-cache-${{ github.run_id }} + secrets: + INPUT_METACULUS_TOKENS: ${{ secrets.METACULUS_TOKENS }} + INPUT_METACULUS_API_BASE_URL: ${{ secrets.METACULUS_API_BASE_URL }} + INPUT_XAI_API_KEY: ${{ secrets.XAI_API_KEY }} + INPUT_OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + #################################### June 2026 new bots #################################### bot_claude_fable_5_high: diff --git a/forecasting_tools/forecast_bots/bot_lists.py b/forecasting_tools/forecast_bots/bot_lists.py index 6d6605f0..a598f2ed 100644 --- a/forecasting_tools/forecast_bots/bot_lists.py +++ b/forecasting_tools/forecast_bots/bot_lists.py @@ -11,6 +11,9 @@ from forecasting_tools.forecast_bots.official_bots.gpt_4_1_optimized_bot import ( GPT41OptimizedBot, ) +from forecasting_tools.forecast_bots.official_bots.no_research_one_shot_bot import ( + NoResearchOneShotBot, +) from forecasting_tools.forecast_bots.official_bots.q1_template_bot import ( Q1TemplateBot2025, ) @@ -60,6 +63,7 @@ def get_all_important_bot_classes() -> list[type[ForecastBot]]: SpringTemplateBot2026, SummerTemplateBot2026, GPT41OptimizedBot, + NoResearchOneShotBot, ] diff --git a/forecasting_tools/forecast_bots/official_bots/no_research_one_shot_bot.py b/forecasting_tools/forecast_bots/official_bots/no_research_one_shot_bot.py new file mode 100644 index 00000000..fc90c316 --- /dev/null +++ b/forecasting_tools/forecast_bots/official_bots/no_research_one_shot_bot.py @@ -0,0 +1,185 @@ +"""A minimal single-shot forecasting bot with no research and no tools. + +This bot asks a model to forecast a question directly, with a short +"helpful assistant" framing and a request for a JSON forecast. It performs no +research phase and (when configured with a single prediction per question) +makes exactly one model call per question. +""" + +import logging +from datetime import datetime, timezone + +from forecasting_tools.ai_models.general_llm import GeneralLlm +from forecasting_tools.data_models.forecast_report import ReasonedPrediction +from forecasting_tools.data_models.multiple_choice_report import PredictedOptionList +from forecasting_tools.data_models.numeric_report import NumericDistribution +from forecasting_tools.data_models.questions import ( + BinaryQuestion, + DateQuestion, + MetaculusQuestion, + MultipleChoiceQuestion, + NumericQuestion, +) +from forecasting_tools.forecast_bots.official_bots.template_bot_2026_summer import ( + SummerTemplateBot2026, +) + +logger = logging.getLogger(__name__) + +_SYSTEM_FRAMING = "You are a helpful assistant.\n\n" +_REASONING_INSTRUCTION = ( + "Briefly explain your reasoning and provide your forecast as a JSON code block.\n\n" +) +_PERCENTILE_KEYS = ["p05", "p25", "p50", "p75", "p95"] +_EXAMPLE_FRACTIONS = [0.05, 0.25, 0.5, 0.75, 0.95] + + +class NoResearchOneShotBot(SummerTemplateBot2026): + """Forecasts each question in a single model call with no research phase. + + The prompts are intentionally minimal: there is no professional-forecaster + persona and no guided chain-of-thought sub-questions. The model is simply + asked to reason briefly and return a JSON forecast, which is then parsed by + the configured parser model. + """ + + @classmethod + def _llm_config_defaults(cls) -> dict[str, str | GeneralLlm | None]: + config_dict = super()._llm_config_defaults() + if "researcher" in config_dict: + config_dict.pop("researcher") + if "summarizer" in config_dict: + config_dict["summarizer"] = None + return config_dict + + async def run_research(self, question: MetaculusQuestion) -> str: + return "" + + @staticmethod + def _question_details(question: MetaculusQuestion) -> str: + parts: list[str] = [] + if question.background_info: + parts.append(question.background_info) + if question.resolution_criteria: + parts.append(f"Resolution Criteria:\n{question.resolution_criteria}") + if question.fine_print: + parts.append(f"Fine Print:\n{question.fine_print}") + return "\n\n".join(parts) + + @classmethod + def _header(cls, question: MetaculusQuestion) -> str: + today = datetime.now(timezone.utc).strftime("%Y-%m-%d") + return ( + f"**Question:** {question.question_text}\n\n" + f"**Today's Date:** {today}\n\n" + f"**Forecasting Window:** opens {question.open_time}, " + f"closes {question.scheduled_close_time}\n\n" + f"**Details:**\n{cls._question_details(question)}\n\n" + ) + + async def _run_forecast_on_binary( + self, question: BinaryQuestion, research: str + ) -> ReasonedPrediction[float]: + prompt = ( + _SYSTEM_FRAMING + + self._header(question) + + "Forecast the probability that this question resolves YES.\n\n" + + _REASONING_INSTRUCTION + + "The JSON must be in this exact format:\n" + '```json\n{"yes": 0.XXX}\n```\n' + "where 0.XXXX is a float between 0 and 1 representing P(yes)." + ) + return await self._binary_prompt_to_forecast(question, prompt) + + async def _run_forecast_on_multiple_choice( + self, question: MultipleChoiceQuestion, research: str + ) -> ReasonedPrediction[PredictedOptionList]: + outcomes_str = ", ".join(f'"{option}"' for option in question.options) + prompt = ( + _SYSTEM_FRAMING + + self._header(question) + + f"Forecast the probability for each outcome. Outcomes: [{outcomes_str}]\n\n" + + _REASONING_INSTRUCTION + + "The JSON must map each outcome to its probability. " + "All values must be non-negative and sum to 1.0. Example format:\n" + "```json\n" + + "{\n" + + "".join(f' "{option}": 0.XXX,\n' for option in question.options) + + "}\n```" + ) + return await self._multiple_choice_prompt_to_forecast(question, prompt) + + async def _run_forecast_on_numeric( + self, question: NumericQuestion, research: str + ) -> ReasonedPrediction[NumericDistribution]: + upper_bound_message, lower_bound_message = ( + self._create_upper_and_lower_bound_messages(question) + ) + lo = ( + question.nominal_lower_bound + if question.nominal_lower_bound is not None + else question.lower_bound + ) + hi = ( + question.nominal_upper_bound + if question.nominal_upper_bound is not None + else question.upper_bound + ) + range_desc = f"{lo} to {hi}" + if question.zero_point is not None: + range_desc += " (logarithmic scale)" + examples = [f"{lo + f * (hi - lo):g}" for f in _EXAMPLE_FRACTIONS] + prompt = ( + _SYSTEM_FRAMING + + self._header(question) + + f"Forecast this continuous question. The scale ranges from {range_desc}.\n" + f"{lower_bound_message} {upper_bound_message}\n\n" + + _REASONING_INSTRUCTION + + "Provide percentile estimates as numeric values on the question's scale. " + 'Use keys of the form "p" where N is 1-99. ' + "Values must be strictly increasing. " + "Set wide intervals - good forecasters account for unknown unknowns.\n" + "Example:\n" + "```json\n{\n" + f"{self._json_example_body(examples)}\n" + "}\n```" + ) + return await self._numeric_prompt_to_forecast(question, prompt) + + async def _run_forecast_on_date( + self, question: DateQuestion, research: str + ) -> ReasonedPrediction[NumericDistribution]: + upper_bound_message, lower_bound_message = ( + self._create_upper_and_lower_bound_messages(question) + ) + lo_dt = question.lower_bound + hi_dt = question.upper_bound + range_desc = f"{lo_dt.date().isoformat()} to {hi_dt.date().isoformat()}" + span = hi_dt - lo_dt + examples = [ + f'"{(lo_dt + span * f).date().isoformat()}"' for f in _EXAMPLE_FRACTIONS + ] + prompt = ( + _SYSTEM_FRAMING + + self._header(question) + + f"Forecast this continuous question. The scale ranges from {range_desc}.\n" + f"{lower_bound_message} {upper_bound_message}\n\n" + + _REASONING_INSTRUCTION + + "Provide percentile estimates as ISO date strings " + '(e.g. "2025-06-15" or "2025-06-15T14:30:00" - time is optional). ' + 'Use keys of the form "p" where N is 1-99. ' + "Dates must be in strictly chronological order. " + "Set wide intervals - good forecasters account for unknown unknowns.\n" + "Example:\n" + "```json\n{\n" + f"{self._json_example_body(examples)}\n" + "}\n```" + ) + return await self._date_prompt_to_forecast(question, prompt) + + @staticmethod + def _json_example_body(example_values: list[str]) -> str: + return "\n".join( + f' "{key}": {value}{"," if index < len(_PERCENTILE_KEYS) - 1 else ""}' + for index, (key, value) in enumerate(zip(_PERCENTILE_KEYS, example_values)) + ) diff --git a/run_bots.py b/run_bots.py index 3ea6146e..176791eb 100644 --- a/run_bots.py +++ b/run_bots.py @@ -23,6 +23,9 @@ from forecasting_tools.forecast_bots.official_bots.gpt_4_1_optimized_bot import ( GPT41OptimizedBot, ) +from forecasting_tools.forecast_bots.official_bots.no_research_one_shot_bot import ( + NoResearchOneShotBot, +) from forecasting_tools.forecast_bots.official_bots.research_only_bot_2025_fall import ( FallResearchOnlyBot2025, ) @@ -357,10 +360,29 @@ def create_bot( llm: GeneralLlm, researcher: str | GeneralLlm = "asknews/news-summaries", predictions_per_research_report: int | None = None, - bot_type: Literal["template", "gpt_4_1_optimized", "research_only"] = "template", + bot_type: Literal[ + "template", "gpt_4_1_optimized", "research_only", "no_research_one_shot" + ] = "template", ) -> ForecastBot: default_summarizer = "openrouter/openai/gpt-4.1-mini" + if bot_type == "no_research_one_shot": + return NoResearchOneShotBot( + research_reports_per_question=1, + predictions_per_research_report=predictions_per_research_report or 1, + use_research_summary_to_forecast=default_for_using_summary, + publish_reports_to_metaculus=default_for_publish_to_metaculus, + skip_previously_forecasted_questions=default_for_skipping_questions, + llms={ + "default": llm, + "summarizer": None, + "researcher": "no_research", + "parser": structure_output_model, + }, + enable_summarize_research=False, + extra_metadata_in_explanation=True, + ) + if bot_type == "research_only": return FallResearchOnlyBot2025( research_reports_per_question=1, @@ -579,6 +601,53 @@ def get_default_bot_dict() -> dict[str, RunBotConfig]: # NOSONAR } mode_base_bot_mapping = { + ############################ No-research one-shot bots ############################ + "METAC_GPT_5_5_NO_RESEARCH_ONE_SHOT": { + "estimated_cost_per_question": roughly_gpt_5_cost, + "bot": create_bot( + llm=GeneralLlm( + model="openai/gpt-5.5", + temperature=None, + timeout=gpt_5_timeout, + ), + bot_type="no_research_one_shot", + ), + "tournaments": TournConfig.aib_and_site, + }, + "METAC_GEMINI_3_1_PRO_NO_RESEARCH_ONE_SHOT": { + "estimated_cost_per_question": roughly_gemini_2_5_pro_preview_cost, + "bot": create_bot( + llm=GeneralLlm( + model="openrouter/google/gemini-3.1-pro-preview", + temperature=default_temperature, + timeout=gemini_default_timeout, + ), + bot_type="no_research_one_shot", + ), + "tournaments": TournConfig.aib_and_site, + }, + "METAC_CLAUDE_FABLE_5_NO_RESEARCH_ONE_SHOT": { + "estimated_cost_per_question": roughly_opus_4_5_cost * 2, + "bot": create_bot( + llm=GeneralLlm( + model="anthropic/claude-fable-5", + temperature=default_temperature, + ), + bot_type="no_research_one_shot", + ), + "tournaments": TournConfig.aib_and_site, + }, + "METAC_GROK_4_3_NO_RESEARCH_ONE_SHOT": { + "estimated_cost_per_question": 5 * roughly_one_call_to_grok_4_llm, + "bot": create_bot( + llm=GeneralLlm( + model="openrouter/x-ai/grok-4.3", + temperature=default_temperature, + ), + bot_type="no_research_one_shot", + ), + "tournaments": TournConfig.aib_and_site, + }, ############################ Bots started in June 2026 ############################ "METAC_CLAUDE_FABLE_5_HIGH": { "estimated_cost_per_question": roughly_opus_4_5_cost * 2,