diff --git a/eureka_ml_insights/configs/model_configs.py b/eureka_ml_insights/configs/model_configs.py index 1b83a7b..71e4e1c 100644 --- a/eureka_ml_insights/configs/model_configs.py +++ b/eureka_ml_insights/configs/model_configs.py @@ -186,6 +186,28 @@ }, ) +AIF_NT_LLAMA3_2_90B_VISION_INSTRUCT_CONFIG = ModelConfig( + LlamaServerlessAzureRestEndpointModel, + { + "url": "https://Llama-3-2-90B-Vision-Instruct-ev.eastus2.models.ai.azure.com/chat/completions", + "secret_key_params": { + "key_name": "aif-nt-meta-llama-3-2-90b-Instruct-1", + "local_keys_path": "keys/aifeval-vault-azure-net.json", + }, + }, +) + +AIF_NT_LLAMA3_2_90B_VISION_INSTRUCT_CONFIG_2 = ModelConfig( + LlamaServerlessAzureRestEndpointModel, + { + "url": "https://Llama-3-2-90B-Vision-Instruct-2.eastus2.models.ai.azure.com/chat/completions", + "secret_key_params": { + "key_name": "aif-nt-meta-llama-3-2-90b-Instruct-2", + "local_keys_path": "keys/aifeval-vault-azure-net.json", + }, + }, +) + # Mistral Endpoints AIF_NT_MISTRAL_LARGE_2_2407_CONFIG = ModelConfig( MistralServerlessAzureRestEndpointModel, diff --git a/eureka_ml_insights/data_utils/__init__.py b/eureka_ml_insights/data_utils/__init__.py index d8cbe50..d4c4037 100644 --- a/eureka_ml_insights/data_utils/__init__.py +++ b/eureka_ml_insights/data_utils/__init__.py @@ -1,3 +1,4 @@ +from .aime_utils import AIMEExtractAnswer from .data import ( AzureDataReader, AzureJsonReader, @@ -41,6 +42,7 @@ ) __all__ = [ + AIMEExtractAnswer, JsonLinesWriter, JsonReader, HFJsonReader, diff --git a/eureka_ml_insights/data_utils/aime_utils.py b/eureka_ml_insights/data_utils/aime_utils.py index 780b706..7f1f095 100644 --- a/eureka_ml_insights/data_utils/aime_utils.py +++ b/eureka_ml_insights/data_utils/aime_utils.py @@ -3,7 +3,7 @@ import pandas as pd -from eureka_ml_insights.data_utils import DFTransformBase +from .transform import DFTransformBase @dataclass @@ -12,32 +12,32 @@ class AIMEExtractAnswer(DFTransformBase): model_answer_column: str def transform(self, df: pd.DataFrame) -> pd.DataFrame: - df[self.model_answer_column] = df[self.model_output_column].apply(parse_output_answer) + df[self.model_answer_column] = df[self.model_output_column].apply(self.parse_output_answer) return df - -def parse_output_answer(response): - """ - Parse the input string to extract answer of a given AIME question. - Parameters: - response (str): Input string containing answer X in the form of "Final Answer: X". - Returns: - numerical_value (float): A numeric value representing the model's answer. - """ - numerical_value = None - - # Try to find an answer in the "Final Answer: X" format - match = re.search(r"Final Answer:\s*([\$]?-?[\d,]+(?:\.\d+)?%?)", response) - if match: - answer_str = match.group(1) - # Remove $ and commas, handle percentages for numerical comparison - answer_str = answer_str.replace("$", "").replace(",", "") - if answer_str.endswith("%"): - numerical_value = float(answer_str[:-1]) / 100 # Convert percentage to decimal - else: - try: - numerical_value = float(answer_str) - except ValueError as e: - numerical_value = None - - return numerical_value \ No newline at end of file + @staticmethod + def parse_output_answer(response): + """ + Parse the input string to extract answer of a given AIME question. + Parameters: + response (str): Input string containing answer X in the form of "Final Answer: X". + Returns: + numerical_value (float): A numeric value representing the model's answer. + """ + numerical_value = None + + # Try to find an answer in the "Final Answer: X" format + match = re.search(r"Final Answer:\s*([\$]?-?[\d,]+(?:\.\d+)?%?)", response) + if match: + answer_str = match.group(1) + # Remove $ and commas, handle percentages for numerical comparison + answer_str = answer_str.replace("$", "").replace(",", "") + if answer_str.endswith("%"): + numerical_value = float(answer_str[:-1]) / 100 # Convert percentage to decimal + else: + try: + numerical_value = float(answer_str) + except ValueError as e: + numerical_value = None + + return numerical_value \ No newline at end of file diff --git a/eureka_ml_insights/data_utils/dna_utils.py b/eureka_ml_insights/data_utils/dna_utils.py index e4e9203..2ba3026 100644 --- a/eureka_ml_insights/data_utils/dna_utils.py +++ b/eureka_ml_insights/data_utils/dna_utils.py @@ -2,7 +2,7 @@ import pandas as pd -from eureka_ml_insights.data_utils import DFTransformBase +from .transform import DFTransformBase @dataclass diff --git a/eureka_ml_insights/data_utils/flenqa_utils.py b/eureka_ml_insights/data_utils/flenqa_utils.py index 8fdeace..6ede605 100644 --- a/eureka_ml_insights/data_utils/flenqa_utils.py +++ b/eureka_ml_insights/data_utils/flenqa_utils.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd -from eureka_ml_insights.data_utils import DFTransformBase +from .transform import DFTransformBase @dataclass diff --git a/eureka_ml_insights/data_utils/kitab_utils.py b/eureka_ml_insights/data_utils/kitab_utils.py index fed5cfe..0f0a932 100644 --- a/eureka_ml_insights/data_utils/kitab_utils.py +++ b/eureka_ml_insights/data_utils/kitab_utils.py @@ -9,7 +9,7 @@ import pandas as pd -from eureka_ml_insights.data_utils import DFTransformBase +from .transform import DFTransformBase @dataclass diff --git a/eureka_ml_insights/data_utils/mmmu_utils.py b/eureka_ml_insights/data_utils/mmmu_utils.py index 9b2f974..a3732f9 100644 --- a/eureka_ml_insights/data_utils/mmmu_utils.py +++ b/eureka_ml_insights/data_utils/mmmu_utils.py @@ -2,7 +2,7 @@ import pandas as pd -from eureka_ml_insights.data_utils import DFTransformBase +from .transform import DFTransformBase MMMUCategories = { "Art and Design": ["Art", "Art_Theory", "Design", "Music"], diff --git a/eureka_ml_insights/data_utils/spatial_utils.py b/eureka_ml_insights/data_utils/spatial_utils.py index 8a579bf..9af95c4 100644 --- a/eureka_ml_insights/data_utils/spatial_utils.py +++ b/eureka_ml_insights/data_utils/spatial_utils.py @@ -5,10 +5,7 @@ import pandas as pd -from eureka_ml_insights.data_utils.transform import ( - DFTransformBase, - MultiColumnTransform, -) +from .transform import DFTransformBase, MultiColumnTransform @dataclass diff --git a/eureka_ml_insights/data_utils/toxigen_utils.py b/eureka_ml_insights/data_utils/toxigen_utils.py index ee4b8c2..f2dd4ac 100644 --- a/eureka_ml_insights/data_utils/toxigen_utils.py +++ b/eureka_ml_insights/data_utils/toxigen_utils.py @@ -3,7 +3,7 @@ import pandas as pd -from eureka_ml_insights.data_utils import DFTransformBase +from .transform import DFTransformBase delimiters = [ "Toxic score", diff --git a/eureka_ml_insights/models/models.py b/eureka_ml_insights/models/models.py index 5681171..1868280 100644 --- a/eureka_ml_insights/models/models.py +++ b/eureka_ml_insights/models/models.py @@ -13,7 +13,6 @@ from eureka_ml_insights.secret_management import get_secret - @dataclass class Model(ABC): """This class is used to define the structure of a model class. @@ -91,7 +90,7 @@ class EndpointModel(Model): num_retries: int = 3 @abstractmethod - def create_request(self, text_prompt, query_images=None, system_message=None): + def create_request(self, text_prompt, **kwargs): raise NotImplementedError @abstractmethod @@ -99,7 +98,7 @@ def get_response(self, request): # must return the model output and the response time raise NotImplementedError - def generate(self, query_text, query_images=None, system_message=None): + def generate(self, query_text, **kwargs): """ Calls the endpoint to generate the model response. args: @@ -111,7 +110,7 @@ def generate(self, query_text, query_images=None, system_message=None): and any other relevant information returned by the model. """ response_dict = {} - request = self.create_request(query_text, query_images=query_images, system_message=system_message) + request = self.create_request(query_text, **kwargs) attempts = 0 while attempts < self.num_retries: try: @@ -120,7 +119,7 @@ def generate(self, query_text, query_images=None, system_message=None): response_dict.update(meta_response) self.is_valid = True break - except Exception as e: + except Exception as e: logging.warning(f"Attempt {attempts+1}/{self.num_retries} failed: {e}") do_return = self.handle_request_error(e) if do_return: @@ -159,15 +158,17 @@ class RestEndpointModel(EndpointModel, KeyBasedAuthMixIn): presence_penalty: float = 0 do_sample: bool = True - def create_request(self, text_prompt, query_images=None, system_message=None): + def create_request(self, text_prompt, query_images=None, system_message=None, previous_messages=None): + """Creates a request for the model.""" + messages = [] + if system_message: + messages.append({"role": "system", "content": system_message}) + if previous_messages: + messages.extend(previous_messages) + messages.append({"role": "user", "content": text_prompt}) data = { "input_data": { - "input_string": [ - { - "role": "user", - "content": text_prompt, - } - ], + "input_string": messages, "parameters": { "temperature": self.temperature, "top_p": self.top_p, @@ -176,12 +177,8 @@ def create_request(self, text_prompt, query_images=None, system_message=None): }, } } - if system_message: - data["input_data"]["input_string"] = [{"role": "system", "content": system_message}] + data["input_data"][ - "input_string" - ] if query_images: - raise NotImplementedError("Images are not supported for GCR endpoints yet.") + raise NotImplementedError("Images are not supported for RestEndpointModel endpoints yet.") body = str.encode(json.dumps(data)) # The azureml-model-deployment header will force the request to go to a specific deployment. @@ -220,6 +217,7 @@ class ServerlessAzureRestEndpointModel(EndpointModel, KeyBasedAuthMixIn): url: str = None model_name: str = None stream: bool = False + auth_scope: str = "https://cognitiveservices.azure.com/.default" def __post_init__(self): try: @@ -235,7 +233,7 @@ def __post_init__(self): } except ValueError: self.bearer_token_provider = get_bearer_token_provider( - DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default" + DefaultAzureCredential(), self.auth_scope ) self.headers = { "Content-Type": "application/json", @@ -248,12 +246,16 @@ def __post_init__(self): } @abstractmethod - def create_request(self, text_prompt, query_images=None, system_message=None): + def create_request(self, text_prompt, query_images=None, system_message=None, previous_messages=None): # Exact model parameters are model-specific. # The method cannot be implemented unless the model being deployed is known. raise NotImplementedError def get_response(self, request): + + if isinstance(request, dict) and "error" in request: + raise RuntimeError(request["error"]) + start_time = time.time() response = urllib.request.urlopen(request) end_time = time.time() @@ -264,6 +266,13 @@ def get_response(self, request): return {"usage": res["usage"]} def handle_request_error(self, e): + + if isinstance(e, RuntimeError): # Handle custom error + logging.error(f"Custom error during request handling: {e}") + self.response = str(e) # Set the error message + self.is_valid = False + return True # Signal to exit the retry loop + if isinstance(e, urllib.error.HTTPError): logging.info("The request failed with status code: " + str(e.code)) # Print the headers - they include the request ID and the timestamp, which are useful for debugging. @@ -288,13 +297,19 @@ class LlamaServerlessAzureRestEndpointModel(ServerlessAzureRestEndpointModel): skip_special_tokens: bool = False ignore_eos: bool = False - def create_request(self, text_prompt, query_images=None, *args, **kwargs): - user_content = {"role": "user", "content": text_prompt} + def create_request(self, text_prompt, query_images=None, system_message=None, previous_messages=None): + messages = [] + if system_message: + messages.append({"role": "system", "content": system_message}) + if previous_messages: + messages.extend(previous_messages) + user_content = text_prompt if query_images: - if len(query_images) > 1: - raise ValueError("Llama vision model does not support more than 1 image.") + if len(query_images) > 1: + logging.warning("Llama vision model does not support more than 1 image.") + return {"error": "Too many images provided. Llama vision model supports only 1 image."} encoded_images = self.base64encode(query_images) - user_content["content"] = [ + user_content = [ {"type": "text", "text": text_prompt}, { "type": "image_url", @@ -303,9 +318,11 @@ def create_request(self, text_prompt, query_images=None, *args, **kwargs): }, }, ] + messages.append({"role": "user", "content": user_content}) + data = { - "messages": [user_content], + "messages": messages, "max_tokens": self.max_tokens, "temperature": self.temperature, "top_p": self.top_p, @@ -337,9 +354,17 @@ def __post_init__(self): self.top_p = 1 super().__post_init__() - def create_request(self, text_prompt, *args, **kwargs): + def create_request(self, text_prompt, query_images=None, system_message=None, previous_messages=None): + messages = [] + if system_message: + messages.append({"role": "system", "content": system_message}) + if previous_messages: + messages.extend(previous_messages) + if query_images: + raise NotImplementedError("Images are not supported for MistralServerlessAzureRestEndpointModel endpoints.") + messages.append({"role": "user", "content": text_prompt}) data = { - "messages": [{"role": "user", "content": text_prompt}], + "messages": messages, "max_tokens": self.max_tokens, "temperature": self.temperature, "top_p": self.top_p, @@ -358,14 +383,16 @@ class OpenAICommonRequestResponseMixIn: This mixin class defines the request and response handling for most OpenAI models. """ - def create_request(self, prompt, query_images=None, system_message=None): + def create_request(self, prompt, query_images=None, system_message=None, previous_messages=None): messages = [] if system_message: messages.append({"role": "system", "content": system_message}) - user_content = {"role": "user", "content": prompt} + if previous_messages: + messages.extend(previous_messages) + user_content = prompt if query_images: encoded_images = self.base64encode(query_images) - user_content["content"] = [ + user_content = [ {"type": "text", "text": prompt}, { "type": "image_url", @@ -374,7 +401,7 @@ def create_request(self, prompt, query_images=None, system_message=None): }, }, ] - messages.append(user_content) + messages.append({"role": "user", "content": user_content}) return {"messages": messages} def get_response(self, request): @@ -404,7 +431,7 @@ def get_client(self): from openai import AzureOpenAI token_provider = get_bearer_token_provider( - DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default" + DefaultAzureCredential(), self.auth_scope ) return AzureOpenAI( azure_endpoint=self.url, @@ -449,6 +476,7 @@ class AzureOpenAIModel(OpenAICommonRequestResponseMixIn, AzureOpenAIClientMixIn, presence_penalty: float = 0 seed: int = 0 api_version: str = "2023-06-01-preview" + auth_scope: str = "https://cognitiveservices.azure.com/.default" def __post_init__(self): self.client = self.get_client() @@ -473,8 +501,17 @@ def __post_init__(self): class OpenAIO1RequestResponseMixIn: - def create_request(self, prompt, *args, **kwargs): - messages = [{"role": "user", "content": prompt}] + + def create_request(self, prompt, query_images=None, system_message=None, previous_messages=None): + if system_message: + # system messages are not supported for OAI reasoning models + # https://platform.openai.com/docs/guides/reasoning + logging.warning("System messages are not supported for OAI reasoning models.") + messages = [] + if previous_messages: + messages.extend(previous_messages) + + messages.append({"role": "user", "content": prompt}) return {"messages": messages} def get_response(self, request): @@ -528,6 +565,8 @@ class AzureOpenAIO1Model(OpenAIO1RequestResponseMixIn, AzureOpenAIClientMixIn, E frequency_penalty: float = 0 presence_penalty: float = 0 api_version: str = "2023-06-01-preview" + auth_scope: str = "https://cognitiveservices.azure.com/.default" + def __post_init__(self): self.client = self.get_client() @@ -563,7 +602,13 @@ def __post_init__(self): def create_request(self, text_prompt, query_images=None, system_message=None): import google.generativeai as genai - self.model = genai.GenerativeModel(self.model_name, system_instruction=system_message) + if self.model_name == "gemini-1.0-pro": + if system_message: + logging.warning("System messages are not supported for Gemini 1.0 Pro.") + self.model = genai.GenerativeModel(self.model_name) + else: + self.model = genai.GenerativeModel(self.model_name, system_instruction=system_message) + if query_images: return [text_prompt] + query_images else: @@ -942,14 +987,15 @@ def __post_init__(self): timeout=self.timeout, ) - def create_request(self, prompt, query_images=None, system_message=None): + def create_request(self, prompt, query_images=None, system_message=None, previous_messages=None): messages = [] - user_content = {"role": "user", "content": prompt} - + user_content = prompt + if previous_messages: + messages.extend(previous_messages) if query_images: encoded_images = self.base64encode(query_images) - user_content["content"] = [ + user_content = [ {"type": "text", "text": prompt}, { "type": "image", @@ -960,7 +1006,7 @@ def create_request(self, prompt, query_images=None, system_message=None): }, }, ] - messages.append(user_content) + messages.append({"role": "user", "content": user_content}) if system_message: return {"messages": messages, "system": system_message}