-
Notifications
You must be signed in to change notification settings - Fork 31
add in no-research-one-shot and 4 frontier models to use this template #279
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+312
−1
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
185 changes: 185 additions & 0 deletions
185
forecasting_tools/forecast_bots/official_bots/no_research_one_shot_bot.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,185 @@ | ||
| """A minimal single-shot forecasting bot with no research and no tools. | ||
|
|
||
| This bot asks a model to forecast a question directly, with a short | ||
| "helpful assistant" framing and a request for a JSON forecast. It performs no | ||
| research phase and (when configured with a single prediction per question) | ||
| makes exactly one model call per question. | ||
| """ | ||
|
|
||
| import logging | ||
| from datetime import datetime, timezone | ||
|
|
||
| from forecasting_tools.ai_models.general_llm import GeneralLlm | ||
| from forecasting_tools.data_models.forecast_report import ReasonedPrediction | ||
| from forecasting_tools.data_models.multiple_choice_report import PredictedOptionList | ||
| from forecasting_tools.data_models.numeric_report import NumericDistribution | ||
| from forecasting_tools.data_models.questions import ( | ||
| BinaryQuestion, | ||
| DateQuestion, | ||
| MetaculusQuestion, | ||
| MultipleChoiceQuestion, | ||
| NumericQuestion, | ||
| ) | ||
| from forecasting_tools.forecast_bots.official_bots.template_bot_2026_summer import ( | ||
| SummerTemplateBot2026, | ||
| ) | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
| _SYSTEM_FRAMING = "You are a helpful assistant.\n\n" | ||
| _REASONING_INSTRUCTION = ( | ||
| "Briefly explain your reasoning and provide your forecast as a JSON code block.\n\n" | ||
| ) | ||
| _PERCENTILE_KEYS = ["p05", "p25", "p50", "p75", "p95"] | ||
| _EXAMPLE_FRACTIONS = [0.05, 0.25, 0.5, 0.75, 0.95] | ||
|
|
||
|
|
||
| class NoResearchOneShotBot(SummerTemplateBot2026): | ||
| """Forecasts each question in a single model call with no research phase. | ||
|
|
||
| The prompts are intentionally minimal: there is no professional-forecaster | ||
| persona and no guided chain-of-thought sub-questions. The model is simply | ||
| asked to reason briefly and return a JSON forecast, which is then parsed by | ||
| the configured parser model. | ||
| """ | ||
|
|
||
| @classmethod | ||
| def _llm_config_defaults(cls) -> dict[str, str | GeneralLlm | None]: | ||
| config_dict = super()._llm_config_defaults() | ||
| if "researcher" in config_dict: | ||
| config_dict.pop("researcher") | ||
| if "summarizer" in config_dict: | ||
| config_dict["summarizer"] = None | ||
| return config_dict | ||
|
|
||
| async def run_research(self, question: MetaculusQuestion) -> str: | ||
| return "" | ||
|
|
||
| @staticmethod | ||
| def _question_details(question: MetaculusQuestion) -> str: | ||
| parts: list[str] = [] | ||
| if question.background_info: | ||
| parts.append(question.background_info) | ||
| if question.resolution_criteria: | ||
| parts.append(f"Resolution Criteria:\n{question.resolution_criteria}") | ||
| if question.fine_print: | ||
| parts.append(f"Fine Print:\n{question.fine_print}") | ||
| return "\n\n".join(parts) | ||
|
|
||
| @classmethod | ||
| def _header(cls, question: MetaculusQuestion) -> str: | ||
| today = datetime.now(timezone.utc).strftime("%Y-%m-%d") | ||
| return ( | ||
| f"**Question:** {question.question_text}\n\n" | ||
| f"**Today's Date:** {today}\n\n" | ||
| f"**Forecasting Window:** opens {question.open_time}, " | ||
| f"closes {question.scheduled_close_time}\n\n" | ||
| f"**Details:**\n{cls._question_details(question)}\n\n" | ||
| ) | ||
|
|
||
| async def _run_forecast_on_binary( | ||
| self, question: BinaryQuestion, research: str | ||
| ) -> ReasonedPrediction[float]: | ||
| prompt = ( | ||
| _SYSTEM_FRAMING | ||
| + self._header(question) | ||
| + "Forecast the probability that this question resolves YES.\n\n" | ||
| + _REASONING_INSTRUCTION | ||
| + "The JSON must be in this exact format:\n" | ||
| '```json\n{"yes": 0.XXX}\n```\n' | ||
| "where 0.XXXX is a float between 0 and 1 representing P(yes)." | ||
| ) | ||
| return await self._binary_prompt_to_forecast(question, prompt) | ||
|
|
||
| async def _run_forecast_on_multiple_choice( | ||
| self, question: MultipleChoiceQuestion, research: str | ||
| ) -> ReasonedPrediction[PredictedOptionList]: | ||
| outcomes_str = ", ".join(f'"{option}"' for option in question.options) | ||
| prompt = ( | ||
| _SYSTEM_FRAMING | ||
| + self._header(question) | ||
| + f"Forecast the probability for each outcome. Outcomes: [{outcomes_str}]\n\n" | ||
| + _REASONING_INSTRUCTION | ||
| + "The JSON must map each outcome to its probability. " | ||
| "All values must be non-negative and sum to 1.0. Example format:\n" | ||
| "```json\n" | ||
| + "{\n" | ||
| + "".join(f' "{option}": 0.XXX,\n' for option in question.options) | ||
| + "}\n```" | ||
| ) | ||
| return await self._multiple_choice_prompt_to_forecast(question, prompt) | ||
|
|
||
| async def _run_forecast_on_numeric( | ||
| self, question: NumericQuestion, research: str | ||
| ) -> ReasonedPrediction[NumericDistribution]: | ||
| upper_bound_message, lower_bound_message = ( | ||
| self._create_upper_and_lower_bound_messages(question) | ||
| ) | ||
| lo = ( | ||
| question.nominal_lower_bound | ||
| if question.nominal_lower_bound is not None | ||
| else question.lower_bound | ||
| ) | ||
| hi = ( | ||
| question.nominal_upper_bound | ||
| if question.nominal_upper_bound is not None | ||
| else question.upper_bound | ||
| ) | ||
| range_desc = f"{lo} to {hi}" | ||
| if question.zero_point is not None: | ||
| range_desc += " (logarithmic scale)" | ||
| examples = [f"{lo + f * (hi - lo):g}" for f in _EXAMPLE_FRACTIONS] | ||
| prompt = ( | ||
| _SYSTEM_FRAMING | ||
| + self._header(question) | ||
| + f"Forecast this continuous question. The scale ranges from {range_desc}.\n" | ||
| f"{lower_bound_message} {upper_bound_message}\n\n" | ||
| + _REASONING_INSTRUCTION | ||
| + "Provide percentile estimates as numeric values on the question's scale. " | ||
| 'Use keys of the form "p<N>" where N is 1-99. ' | ||
| "Values must be strictly increasing. " | ||
| "Set wide intervals - good forecasters account for unknown unknowns.\n" | ||
| "Example:\n" | ||
| "```json\n{\n" | ||
| f"{self._json_example_body(examples)}\n" | ||
| "}\n```" | ||
| ) | ||
| return await self._numeric_prompt_to_forecast(question, prompt) | ||
|
|
||
| async def _run_forecast_on_date( | ||
| self, question: DateQuestion, research: str | ||
| ) -> ReasonedPrediction[NumericDistribution]: | ||
| upper_bound_message, lower_bound_message = ( | ||
| self._create_upper_and_lower_bound_messages(question) | ||
| ) | ||
| lo_dt = question.lower_bound | ||
| hi_dt = question.upper_bound | ||
| range_desc = f"{lo_dt.date().isoformat()} to {hi_dt.date().isoformat()}" | ||
| span = hi_dt - lo_dt | ||
| examples = [ | ||
| f'"{(lo_dt + span * f).date().isoformat()}"' for f in _EXAMPLE_FRACTIONS | ||
| ] | ||
| prompt = ( | ||
| _SYSTEM_FRAMING | ||
| + self._header(question) | ||
| + f"Forecast this continuous question. The scale ranges from {range_desc}.\n" | ||
| f"{lower_bound_message} {upper_bound_message}\n\n" | ||
| + _REASONING_INSTRUCTION | ||
| + "Provide percentile estimates as ISO date strings " | ||
| '(e.g. "2025-06-15" or "2025-06-15T14:30:00" - time is optional). ' | ||
| 'Use keys of the form "p<N>" where N is 1-99. ' | ||
| "Dates must be in strictly chronological order. " | ||
| "Set wide intervals - good forecasters account for unknown unknowns.\n" | ||
| "Example:\n" | ||
| "```json\n{\n" | ||
| f"{self._json_example_body(examples)}\n" | ||
| "}\n```" | ||
| ) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll assume the prompt matches minimalistic for all these in general. |
||
| return await self._date_prompt_to_forecast(question, prompt) | ||
|
|
||
| @staticmethod | ||
| def _json_example_body(example_values: list[str]) -> str: | ||
| return "\n".join( | ||
| f' "{key}": {value}{"," if index < len(_PERCENTILE_KEYS) - 1 else ""}' | ||
| for index, (key, value) in enumerate(zip(_PERCENTILE_KEYS, example_values)) | ||
| ) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,9 @@ | |
| from forecasting_tools.forecast_bots.official_bots.gpt_4_1_optimized_bot import ( | ||
| GPT41OptimizedBot, | ||
| ) | ||
| from forecasting_tools.forecast_bots.official_bots.no_research_one_shot_bot import ( | ||
| NoResearchOneShotBot, | ||
| ) | ||
| from forecasting_tools.forecast_bots.official_bots.research_only_bot_2025_fall import ( | ||
| FallResearchOnlyBot2025, | ||
| ) | ||
|
|
@@ -357,10 +360,29 @@ def create_bot( | |
| llm: GeneralLlm, | ||
| researcher: str | GeneralLlm = "asknews/news-summaries", | ||
| predictions_per_research_report: int | None = None, | ||
| bot_type: Literal["template", "gpt_4_1_optimized", "research_only"] = "template", | ||
| bot_type: Literal[ | ||
| "template", "gpt_4_1_optimized", "research_only", "no_research_one_shot" | ||
| ] = "template", | ||
| ) -> ForecastBot: | ||
| default_summarizer = "openrouter/openai/gpt-4.1-mini" | ||
|
|
||
| if bot_type == "no_research_one_shot": | ||
| return NoResearchOneShotBot( | ||
| research_reports_per_question=1, | ||
| predictions_per_research_report=predictions_per_research_report or 1, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we constrain to 1? Raise error if its more than that? |
||
| use_research_summary_to_forecast=default_for_using_summary, | ||
| publish_reports_to_metaculus=default_for_publish_to_metaculus, | ||
| skip_previously_forecasted_questions=default_for_skipping_questions, | ||
| llms={ | ||
| "default": llm, | ||
| "summarizer": None, | ||
| "researcher": "no_research", | ||
| "parser": structure_output_model, | ||
| }, | ||
| enable_summarize_research=False, | ||
| extra_metadata_in_explanation=True, | ||
| ) | ||
|
|
||
| if bot_type == "research_only": | ||
| return FallResearchOnlyBot2025( | ||
| research_reports_per_question=1, | ||
|
|
@@ -579,6 +601,53 @@ def get_default_bot_dict() -> dict[str, RunBotConfig]: # NOSONAR | |
| } | ||
|
|
||
| mode_base_bot_mapping = { | ||
| ############################ No-research one-shot bots ############################ | ||
| "METAC_GPT_5_5_NO_RESEARCH_ONE_SHOT": { | ||
| "estimated_cost_per_question": roughly_gpt_5_cost, | ||
| "bot": create_bot( | ||
| llm=GeneralLlm( | ||
| model="openai/gpt-5.5", | ||
| temperature=None, | ||
| timeout=gpt_5_timeout, | ||
| ), | ||
| bot_type="no_research_one_shot", | ||
| ), | ||
| "tournaments": TournConfig.aib_and_site, | ||
| }, | ||
| "METAC_GEMINI_3_1_PRO_NO_RESEARCH_ONE_SHOT": { | ||
| "estimated_cost_per_question": roughly_gemini_2_5_pro_preview_cost, | ||
| "bot": create_bot( | ||
| llm=GeneralLlm( | ||
| model="openrouter/google/gemini-3.1-pro-preview", | ||
| temperature=default_temperature, | ||
| timeout=gemini_default_timeout, | ||
| ), | ||
| bot_type="no_research_one_shot", | ||
| ), | ||
| "tournaments": TournConfig.aib_and_site, | ||
| }, | ||
| "METAC_CLAUDE_FABLE_5_NO_RESEARCH_ONE_SHOT": { | ||
| "estimated_cost_per_question": roughly_opus_4_5_cost * 2, | ||
| "bot": create_bot( | ||
| llm=GeneralLlm( | ||
| model="anthropic/claude-fable-5", | ||
| temperature=default_temperature, | ||
| ), | ||
| bot_type="no_research_one_shot", | ||
| ), | ||
| "tournaments": TournConfig.aib_and_site, | ||
| }, | ||
| "METAC_GROK_4_3_NO_RESEARCH_ONE_SHOT": { | ||
| "estimated_cost_per_question": 5 * roughly_one_call_to_grok_4_llm, | ||
| "bot": create_bot( | ||
| llm=GeneralLlm( | ||
| model="openrouter/x-ai/grok-4.3", | ||
| temperature=default_temperature, | ||
| ), | ||
| bot_type="no_research_one_shot", | ||
| ), | ||
| "tournaments": TournConfig.aib_and_site, | ||
| }, | ||
| ############################ Bots started in June 2026 ############################ | ||
| "METAC_CLAUDE_FABLE_5_HIGH": { | ||
| "estimated_cost_per_question": roughly_opus_4_5_cost * 2, | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably sanity check that this looks right in the prompt itself.