from google import genai
import time
import os

from google.api_core.exceptions import ResourceExhausted

from django.conf import settings

class RetryExhaustedError(Exception):
    """Raised when retries are exhausted after repeated ResourceExhausted errors."""
    pass


class AIEditLLM:
    def __init__(self):
        """
Initialize the AIEditLLM client, file state, and runtime configuration.

Detailed Description:
Creates a [`genai.Client`](ai_settings/ai_gen.py) instance with the configured API key, initializes the chat handle to None, and loads a persistent file mapping from ai_gen.json located under Django’s BASE_DIR. If the JSON file does not exist or is empty, it is created via [`FileJson.save_json`](ai_settings/ai_util.py) and an empty dictionary is used. This setup enables durable file ID reuse across sessions for efficient uploads and retrievals through [`AIEditLLM.upload_file`](ai_settings/ai_gen.py).

Parameters (Args):
- None

Returns:
- None

Raises:
- Exception: If `genai.Client` initialization fails (e.g., invalid API key or environment misconfiguration).
- OSError: If ai_gen.json cannot be created or accessed on disk.

Side Effects:
- Reads ai_gen.json from disk using [`FileJson.load_json`](ai_settings/ai_util.py).
- Writes ai_gen.json to disk (creates file when missing).
- Stores absolute path to ai_gen.json using Django `settings.BASE_DIR`.

Usage Example:
    from ai_settings.ai_gen import AIEditLLM

    llm = AIEditLLM()
    # Ready to create chats and upload files
    llm.create_chat(aiedit_temperature=0.4, aiedit_budget=16384)
"""
        self.client = genai.Client(
            api_key=settings.SECRET_PROJECT_NAME
        )
        self.chat = None
        self.ai_json_name = os.path.join(
            settings.BASE_DIR, "ai_settings", "ai_gen.json"
        )
        self.files_list = FileJson.load_json(self.ai_json_name)
        if not self.files_list:
            self.files_list = FileJson.save_json({}, self.ai_json_name)
            self.files_list = {}

    def create_chat(self, aiedit_temperature=0.2, aiedit_budget=32768):
        """
Create a chat session with configured temperature and thinking budget.

Detailed Description:
Initializes a new chat session on the Gemini model ("gemini-2.5-pro") using the provided temperature and thinking budget. It builds a GenerateContentConfig with a ThinkingConfig and assigns the created chat handle to self.chat. Logs model name and configuration via custom_app_logger and prints them for visibility. This is the primary entry point for starting conversations before send_message or send_message_stream.

Parameters (Args):
- aiedit_temperature (float): Sampling temperature for generation (e.g., 0.2 for deterministic outputs).
- aiedit_budget (int): Thinking budget token limit used by the model’s reasoning (e.g., 32768).

Returns:
- None: Sets self.chat for subsequent message sending; no explicit return value.

Raises:
- Exception: Propagated if client.chats.create fails due to misconfiguration, network errors, or invalid parameters.

Side Effects:
- Assigns a new chat session to self.chat.
- Writes info logs to custom_app_logger.
- Prints model name, temperature, and thinking budget to stdout.

Usage Example:
    from ai_settings.ai_gen import AIEditLLM

    llm = AIEditLLM()
    # Create a chat with custom parameters
    llm.create_chat(aiedit_temperature=0.4, aiedit_budget=16384)

    # Send a message using the established chat
    resp = llm.send_message("Hello, model!", new_temperature=0.5)
    print(resp)
"""
        model_name = "gemini-2.5-pro"
        aiedit_config = genai.types.GenerateContentConfig(
            thinking_config=genai.types.ThinkingConfig(thinking_budget=aiedit_budget),
            temperature=aiedit_temperature,
        )

        self.chat = self.client.chats.create(
            model=model_name,
            config=aiedit_config,
        )

        print("Model name: ", model_name)
        print("Temperature: ", aiedit_temperature)
        print("Thinking Budget: ", aiedit_budget)

    # def generate_embeddings(text: str) -> list[float] | None:
    #     try:
    #         if not text:
    #             print("[EMBEDDING] No text provided, skipping embedding.")
    #             return None

    #         print(f"[EMBEDDING] Generating embedding for text (length={len(text)})")

    #         result = genai.embed_content(
    #             model="models/embedding-001",
    #             content=text,
    #             task_type="retrieval_document"
    #         )

    #         embedding = result.get("embedding")

    #         if not embedding:
    #             print("[EMBEDDING][ERROR] Empty embedding received from Gemini.")
    #             return None

    #         print(f"[EMBEDDING] Embedding generated successfully. Dimensions={len(embedding)}")

    #         return embedding

    #     except Exception as e:
    #         print(f"[EMBEDDING][EXCEPTION] Failed to generate embedding: {str(e)}")
    #         return None
    
    def generate_embeddings(self, text: str) -> list[float] | None:
        try:
            if not text:
                print("[EMBEDDING] No text provided")
                return None

            print(f"[EMBEDDING] Generating embedding, text length={len(text)}")

            response = self.client.models.embed_content(
                model="text-embedding-004",
                contents=text,
            )

            embedding = response.embeddings[0].values

            if not embedding:
                print("[EMBEDDING][ERROR] Empty embedding received")
                return None

            print(f"[EMBEDDING] Embedding generated, dimensions={len(embedding)}")

            return embedding

        except Exception as e:
            print(f"[EMBEDDING][EXCEPTION] Failed to generate embedding: {e}")
            return None

    def create_chat_without_system_instruction(
        self, aiedit_temperature=0.2, aiedit_budget=32768
    ):
        """
Create a chat session without a system instruction using configured temperature and thinking budget.

Detailed Description:
Initializes a new chat on the Gemini model ("gemini-2.5-pro") without providing a system_instruction. Builds a GenerateContentConfig with a ThinkingConfig using the given aiedit_budget and temperature, then assigns the created chat handle to self.chat. Prints the model name, temperature, and thinking budget for visibility. Use this when you want a neutral chat context before sending prompts via [`AIEditLLM.send_message`](ai_settings/ai_gen.py) or [`AIEditLLM.send_message_stream`](ai_settings/ai_gen.py).

Parameters (Args):
- aiedit_temperature (float): Sampling temperature for generation (e.g., 0.2 for deterministic outputs).
- aiedit_budget (int): Thinking budget token limit for the model’s reasoning (e.g., 32768).

Returns:
- None: Sets self.chat for subsequent message sending; no explicit return value.

Raises:
- Exception: Propagated if client.chats.create fails due to misconfiguration, network errors, or invalid parameters.

Side Effects:
- Assigns a new chat session to self.chat.
- Prints model name, temperature, and thinking budget to stdout.

Usage Example:
    from ai_settings.ai_gen import AIEditLLM

    llm = AIEditLLM()
    # Create a neutral chat without system instruction
    llm.create_chat_without_system_instruction(aiedit_temperature=0.3, aiedit_budget=16384)

    # Send a message using the established chat
    resp = llm.send_message("Hello!", new_temperature=0.4)
    print(resp)
"""
        model_name = "gemini-2.5-pro"
        aiedit_config = genai.types.GenerateContentConfig(
            thinking_config=genai.types.ThinkingConfig(thinking_budget=aiedit_budget),
            temperature=aiedit_temperature,
        )

        self.chat = self.client.chats.create(
            model=model_name,
            config=aiedit_config,
        )

        print("Model name: ", model_name)
        print("Temperature: ", aiedit_temperature)
        print("Thinking Budget: ", aiedit_budget)

    def create_chat_paper_cut_system_instruction(
        self, aiedit_temperature=1.5, aiedit_budget=32768
    ):
        """
Create a chat session with a paper-cut focused system instruction.

Detailed Description:
Initializes a Gemini chat ("gemini-2.5-pro") with a strong editorial system_instruction emphasizing continuity rules (no jump cuts, complete dialogue, no repetition, no cut-offs). Builds a GenerateContentConfig using the provided temperature (default 1.5) and thinking budget, then assigns the created chat to self.chat. This configuration is intended for paper cut generation and related steps where strict editing principles must guide model outputs. Use before sending prompts via [`AIEditLLM.send_message`](ai_settings/ai_gen.py) or [`AIEditLLM.send_message_stream`](ai_settings/ai_gen.py).

Parameters (Args):
- aiedit_temperature (float): Sampling temperature for generation; higher values (e.g., 1.5) encourage diverse suggestions.
- aiedit_budget (int): Thinking budget token limit for the model’s reasoning (e.g., 32768).

Returns:
- None: Sets self.chat for subsequent message sending; no explicit return value.

Raises:
- Exception: Propagated if client.chats.create fails due to misconfiguration, network errors, or invalid parameters.

Side Effects:
- Assigns a new chat session to self.chat.
- Prints model name, temperature, and thinking budget to stdout.

Usage Example:
    from ai_settings.ai_gen import AIEditLLM

    llm = AIEditLLM()
    # Create a chat optimized for paper cut editing rules
    llm.create_chat_paper_cut_system_instruction(aiedit_temperature=1.2, aiedit_budget=32768)

    # Send a paper cut prompt after establishing the chat
    resp = llm.send_message("Generate a paper cut for scene 3A using the uploaded AVD.")
    print(resp)
"""
        model_name = "gemini-2.5-pro"
        aiedit_config = genai.types.GenerateContentConfig(
            system_instruction="""
            YOU ARE THE BEST FILMMAKER IN THE WORLD. YOU HAVE A SPECIAL SKILL AS A HOLLYWOOD LEVEL PROFESSIONAL EDITOR. YOU KNOW ALL THE RULES AND ARE KEEN ON CONTINUITY. YOUR TOP FOUR RULES ARE
            1. NO JUMP CUTS.
            2. DIALOGUE MUST BE SAID IN FULL. 
            3. NO REPETITION OF DIALOGUES.
            4. NO CUT-OFF OF DIALOGUES.
            YOU WILL CONFIRM THIS SYSTEM INSTRUCTIONS BEFORE EVERY TASK. GIVE ME A THUMBS UP EMOJI.
            """,
            thinking_config=genai.types.ThinkingConfig(thinking_budget=aiedit_budget),
            temperature=aiedit_temperature,
        )

        self.chat = self.client.chats.create(
            model=model_name,
            config=aiedit_config,
        )

        print("Model name: ", model_name)
        print("Temperature: ", aiedit_temperature)
        print("Thinking Budget: ", aiedit_budget)

    def create_chat_paper_cut_correction_system_instruction(
        self, aiedit_temperature=0.2, aiedit_budget=32768
    ):
        """
Create a chat session with a paper-cut timecode correction system instruction.

Detailed Description:
Initializes a Gemini chat ("gemini-2.5-pro") configured for precise verification and correction of timecodes for a single paper cut edit. The system instruction focuses the model on using an Audio and Video Description (AVD) JSON as authoritative reference for timecodes and dialogue alignment. Builds a GenerateContentConfig with a ThinkingConfig using the provided temperature (default 0.2) and thinking budget, then assigns the created chat to self.chat. Use before sending prompts via [`AIEditLLM.send_message`](ai_settings/ai_gen.py) or [`AIEditLLM.send_message_stream`](ai_settings/ai_gen.py).

Parameters (Args):
- aiedit_temperature (float): Sampling temperature for generation; low values (e.g., 0.2) favor stable, corrective behavior.
- aiedit_budget (int): Thinking budget token limit for the model’s reasoning (e.g., 32768).

Returns:
- None: Sets self.chat for subsequent message sending; no explicit return value.

Raises:
- Exception: Propagated if client.chats.create fails due to misconfiguration, network errors, or invalid parameters.

Side Effects:
- Assigns a new chat session to self.chat.
- Prints model name, temperature, and thinking budget to stdout.

Usage Example:
    from ai_settings.ai_gen import AIEditLLM

    llm = AIEditLLM()
    # Create a chat optimized for timecode correction using AVD reference
    llm.create_chat_paper_cut_correction_system_instruction(aiedit_temperature=0.25, aiedit_budget=16384)

    # Send a correction prompt after establishing the chat
    resp = llm.send_message("Verify and correct timecodes for edit #7 using the provided AVD.")
    print(resp)
"""
        model_name = "gemini-2.5-pro"
        aiedit_config = genai.types.GenerateContentConfig(
            system_instruction="""
            You are an expert Video Editing Assistant and Data Analyst.
            Your task is to meticulously verify and correct the timecodes of a single "edit" from a paper cut JSON file. You will use a comprehensive Audio and Video Description (AVD) JSON file as reference for timecodes and dialogue.
            """,
            thinking_config=genai.types.ThinkingConfig(thinking_budget=aiedit_budget),
            temperature=aiedit_temperature,
        )

        self.chat = self.client.chats.create(
            model=model_name,
            config=aiedit_config,
        )

        print("Model name: ", model_name)
        print("Temperature: ", aiedit_temperature)
        print("Thinking Budget: ", aiedit_budget)

    def upload_file(self, file_name, force=False, type=None):
        """
Upload a local file to the Gemini Files service with optional caching and retry polling.

Detailed Description:
Uploads a file to the model’s file storage using [`genai.Client.files.upload`](ai_settings/ai_gen.py), storing the returned server-side file name in a persistent map (ai_gen.json) via [`FileJson.save_json`](ai_settings/ai_util.py). When force=False, the function first checks for an existing cached entry using [`AIEditLLM.get_file_by_file_name`](ai_settings/ai_gen.py); if present, it reuses the previously uploaded file to avoid duplicate uploads. After initiating an upload, it polls until the file’s state becomes ACTIVE by repeatedly calling [`genai.Client.files.get`](ai_settings/ai_gen.py). Prints progress messages and returns the active file object.

Parameters (Args):
- file_name (str): Absolute or relative path to the local file to upload.
- force (bool): If True, bypasses cache and uploads the file unconditionally; otherwise attempts to reuse a cached server file ID.
- type (str or None): Optional metadata flag for future behavior; currently unused.

Returns:
- object or None: The active file object returned by the client with attributes like name and display_name. Returns None if upload fails or exceptions are caught.

Raises:
- FileNotFoundError: Raised when the local file path is invalid or missing (caught internally and converted to a printed error with None return).
- Exception: Any unexpected error during upload or polling is caught, printed, and results in None return.

Side Effects:
- Writes to ai_gen.json to persist the local-to-remote file mapping using [`FileJson.save_json`](ai_settings/ai_util.py).
- Performs network I/O to upload the file and poll status via the Gemini Files API.
- Prints progress and completion messages to stdout.

Usage Example:
    from ai_settings.ai_gen import AIEditLLM

    llm = AIEditLLM()
    llm.create_chat(aiedit_temperature=0.3, aiedit_budget=16384)

    # Upload a file, reusing cached ID if already uploaded
    uploaded = llm.upload_file("/path/to/script_scene_3A.pdf", force=False)
    if uploaded:
        print("Uploaded:", uploaded.display_name, "->", uploaded.name)

    # Force re-upload even if previously cached
    uploaded2 = llm.upload_file("/path/to/scene_audio_transcript.txt", force=True)
    print("Uploaded (forced):", uploaded2.display_name)
"""
        try:
            mime_type=None
            display_name = os.path.basename(file_name)
            config_map = {"display_name": display_name}
            if force:
                my_file = self.client.files.upload(file=file_name, config=config_map)
                self.files_list[file_name] = my_file.name
                FileJson.save_json(self.files_list, self.ai_json_name)
            else:
                my_file = self.get_file_by_file_name(file_name)
                if not my_file:
                    my_file = self.client.files.upload(file=file_name, config=config_map)
                    self.files_list[file_name] = my_file.name
                    FileJson.save_json(self.files_list, self.ai_json_name)
        except FileNotFoundError:
            print(f"Error: file not found at '{my_file}'.", debug=True)
            return
        except Exception as e:
            print(f"An error occurred during File upload: {e}", debug=True)
            return
        while not my_file.state or my_file.state.name != "ACTIVE":
            print("File is processing, checking again in 5 seconds...")
            time.sleep(5)
            my_file = self.client.files.get(name=my_file.name)
 
        print(f"Upload completed. File Name: {my_file.display_name}")
        return my_file

    def get_file_by_file_name(self, l_file_name):
        """
Retrieve a cached Gemini File object by original local file name.

Detailed Description:
Looks up the persistent local-to-remote file mapping stored in ai_gen.json (loaded into self.files_list) to find a previously uploaded file’s server-side name. If found, fetches the current file object via [`AIEditLLM.get_file`](ai_settings/ai_gen.py) and returns it; otherwise returns None. Useful for avoiding duplicate uploads in [`AIEditLLM.upload_file`](ai_settings/ai_gen.py) by reusing already uploaded files. Prints a confirmation with the found file’s name when a cache hit occurs.

Parameters (Args):
- l_file_name (str): Original local path used during upload (key in self.files_list), e.g., "/path/to/script_scene_3A.pdf".

Returns:
- object or None: Active file object returned by client.files.get on cache hit; None if no cached entry exists for l_file_name.

Raises:
- Exception: Propagated from [`AIEditLLM.get_file`](ai_settings/ai_gen.py) if the remote file retrieval fails (e.g., network errors, missing resource).

Side Effects:
- Prints a message to stdout when a cached file is found: "File exists File Name: <name>".

Usage Example:
    from ai_settings.ai_gen import AIEditLLM

    llm = AIEditLLM()
    # Attempt to reuse a previously uploaded file
    cached = llm.get_file_by_file_name("/data/1001/scripts/script_scene_3A.pdf")
    if cached:
        print("Reused cached file:", cached.display_name)
    else:
        print("No cached entry; upload required")
"""
        my_file = None
        if self.files_list:
            ai_file_name = self.files_list.get(l_file_name, None)
            if ai_file_name:
                my_file = self.get_file(ai_file_name)
                print(f"File exists File Name: {my_file.name}")
        return my_file

    def get_file(self, file_name):
        """
Retrieve a Gemini File object by server-side name.

Detailed Description:
Calls the underlying client to fetch a file resource using its server-assigned name (returned from uploads). This is a thin wrapper around [`genai.Client.files.get`](ai_settings/ai_gen.py) and is typically used after resolving a cached mapping via [`AIEditLLM.get_file_by_file_name`](ai_settings/ai_gen.py). The function does not perform retries, validation, or state checks—callers should handle exceptions and verify file state (e.g., ACTIVE) if required.

Parameters (Args):
- file_name (str): Server-side file name (not the local path), such as the value returned by files.upload (e.g., "files/abc123").

Returns:
- object: The file object returned by the Gemini client, which includes attributes like name, display_name, and state.

Raises:
- Exception: Propagated from client.files.get if the file does not exist or network/API errors occur.

Side Effects:
- None: Pure retrieval; no local state mutation.

Usage Example:
    from ai_settings.ai_gen import AIEditLLM

    llm = AIEditLLM()
    # Suppose you previously uploaded a file and stored its server name
    server_name = "files/abc123"
    file_obj = llm.get_file(server_name)
    print(file_obj.display_name, file_obj.state.name)
"""
        file = self.client.files.get(name=file_name)
        # print(file.uri)
        return file

    def file_list(self):
        """
List Gemini Files for the current client session.

Detailed Description:
Retrieves the list of files stored in the Gemini Files service for the authenticated client. This is a thin wrapper over [`genai.Client.files.list`](ai_settings/ai_gen.py) and returns the raw iterable/object provided by the SDK. It does not perform pagination, filtering, or transformation—callers should iterate and handle attributes like name, display_name, and state as needed.

Parameters (Args):
- None

Returns:
- object: An iterable or SDK-specific collection of file objects returned by client.files.list.

Raises:
- Exception: Propagated from the underlying SDK if listing fails due to network errors, authentication issues, or service unavailability.

Side Effects:
- Performs a network request to the Gemini Files API.

Usage Example:
    from ai_settings.ai_gen import AIEditLLM

    llm = AIEditLLM()
    llm.create_chat(aiedit_temperature=0.3, aiedit_budget=16384)

    files = llm.file_list()
    for f in files:
        print(f.display_name, f.name, getattr(f, "state", None))
"""
        files = self.client.files.list()
        return files

    def file_delete(self, file_name):
        """
Delete a file from the Gemini Files service by server-side name.

Detailed Description:
Calls the underlying Gemini client to remove a previously uploaded file using its server-assigned name. This is a thin wrapper over [`genai.Client.files.delete`](ai_settings/ai_gen.py) and performs no local cache cleanup of ai_gen.json. Use this to free storage or invalidate obsolete artifacts uploaded via [`AIEditLLM.upload_file`](ai_settings/ai_gen.py).

Parameters (Args):
- file_name (str): Server-side file identifier (e.g., "files/abc123") returned by the upload API.

Returns:
- None: Executes deletion request; no value returned.

Raises:
- Exception: Propagated from the SDK if deletion fails (e.g., invalid name, network errors, permission issues).

Side Effects:
- Performs a network request to delete remote file content.
- Does not modify local ai_gen.json mappings (cached entries remain unless explicitly updated elsewhere).

Usage Example:
    from ai_settings.ai_gen import AIEditLLM

    llm = AIEditLLM()
    # Delete a previously uploaded file by its server-side name
    llm.file_delete("files/abc123")
"""
        self.client.files.delete(name=file_name)

        
    def send_message(self, msgs, new_temperature=None, aiedit_budget=32768, retries=2, backoff=62):
        message_specific_config = None
        """
Send a message to the active Gemini chat with optional temperature override and 429 retry logic.

Detailed Description:
Sends msgs to the currently established chat session (self.chat). If new_temperature is provided, builds a per‑message [`genai.types.GenerateContentConfig`](ai_settings/ai_gen.py) with a [`genai.types.ThinkingConfig`](ai_settings/ai_gen.py) using aiedit_budget. Implements retry logic for HTTP 429 "ResourceExhausted" errors: waits backoff seconds and retries up to retries times. Non‑429 exceptions are printed and re‑raised immediately. On success, returns the SDK response object from client.chats.send_message; if all retries for 429 fail, raises [`AIEditLLM.RetryExhaustedError`](ai_settings/ai_gen.py).

Parameters (Args):
- msgs (Any): Message content compatible with client.chats.send_message (string or SDK-structured payload).
- new_temperature (float or None): Optional per‑message temperature override; if None, uses chat’s default.
- aiedit_budget (int): Thinking budget tokens for the model’s reasoning config when overriding temperature.
- retries (int): Number of retry attempts for 429 ResourceExhausted errors.
- backoff (int): Wait time in seconds between retry attempts for 429 errors.

Returns:
- object: The response object returned by self.chat.send_message on success.

Raises:
- AIEditLLM.RetryExhaustedError: When all retries fail due to 429 ResourceExhausted.
- Exception: Any non‑429 error from the underlying SDK; printed and re‑raised.

Side Effects:
- Prints retry and error diagnostics to stdout.
- Sleeps for backoff seconds during 429 retry handling.

Usage Example:
    from ai_settings.ai_gen import AIEditLLM

    llm = AIEditLLM()
    llm.create_chat(aiedit_temperature=0.3, aiedit_budget=16384)

    try:
        resp = llm.send_message(
            "Summarize this scene.",
            new_temperature=0.5,
            aiedit_budget=8192,
            retries=3,
            backoff=30
        )
        print(resp)
    except Exception as e:
        print("Message send failed:", e)
"""

        if new_temperature is not None:
            print(f"\nSending message with New temperature: {new_temperature}")
            message_specific_config = genai.types.GenerateContentConfig(
                thinking_config=genai.types.ThinkingConfig(thinking_budget=aiedit_budget),
                temperature=new_temperature
            )

        for attempt in range(retries):
            try:
                response = self.chat.send_message(msgs, config=message_specific_config)
                return response  # success

            except Exception as e:
                if isinstance(e, ResourceExhausted):
                    wait = backoff
                    print(f"[429] Resource exhausted. Retrying in {wait}s...", debug=True)
                    time.sleep(wait)
                    continue  # retry the request
                else:
                    print(f"Error in send_message: {type(e).__name__} -> {e}", debug=True)
                    raise  # immediately raise non-429 exceptions

        # If all retries fail due to ResourceExhausted
        raise RetryExhaustedError(f"send_message failed after {retries} retries due to 429 ResourceExhausted")
    
    def send_message_stream(self, msg, new_temperature=None, aiedit_budget=32768, retries=2, backoff=62):
        """
Send a streaming message to the active Gemini chat with optional temperature override and 429 retry logic.

Detailed Description:
Streams a message to the currently established chat session (self.chat) using client.chats.send_message_stream. If new_temperature is provided, builds a per‑message [`genai.types.GenerateContentConfig`](ai_settings/ai_gen.py) with a [`genai.types.ThinkingConfig`](ai_settings/ai_gen.py) using aiedit_budget. Implements retry logic for HTTP 429 "ResourceExhausted" errors: waits backoff seconds and retries up to retries times. Non‑429 exceptions are printed and re‑raised immediately. On success, returns the streaming response iterator-like object from the SDK; if all retries for 429 fail, raises [`AIEditLLM.RetryExhaustedError`](ai_settings/ai_gen.py).

Parameters (Args):
- msg (Any): Message content compatible with client.chats.send_message_stream (string or SDK-structured payload).
- new_temperature (float or None): Optional per‑message temperature override; if None, uses chat’s default.
- aiedit_budget (int): Thinking budget tokens for the model’s reasoning config when overriding temperature.
- retries (int): Number of retry attempts for 429 ResourceExhausted errors.
- backoff (int): Wait time in seconds between retry attempts for 429 errors.

Returns:
- object: A streaming response object/iterator returned by self.chat.send_message_stream on success.

Raises:
- AIEditLLM.RetryExhaustedError: When all retries fail due to 429 ResourceExhausted.
- Exception: Any non‑429 error from the underlying SDK; printed and re‑raised.

Side Effects:
- Prints retry and error diagnostics to stdout.
- Sleeps for backoff seconds during 429 retry handling.

Usage Example:
    from ai_settings.ai_gen import AIEditLLM

    llm = AIEditLLM()
    llm.create_chat(aiedit_temperature=0.3, aiedit_budget=16384)

    try:
        stream = llm.send_message_stream(
            "Stream a summary of this scene.",
            new_temperature=0.5,
            aiedit_budget=8192,
            retries=3,
            backoff=30
        )
        for chunk in stream:
            print(chunk)  # Handle streamed tokens/parts
    except Exception as e:
        print("Streaming failed:", e)
"""
        message_specific_config = None

        if new_temperature is not None:
            print(f"\nSending message with OVERRIDE temperature: {new_temperature}")
            message_specific_config = genai.types.GenerateContentConfig(
                thinking_config=genai.types.ThinkingConfig(thinking_budget=aiedit_budget),
                temperature=new_temperature
            )
            
        for attempt in range(retries):
            try:
                response = self.chat.send_message_stream(msg, config=message_specific_config)
                return response  # finished successfully

            except Exception as e:
                if isinstance(e, ResourceExhausted):
                    wait = backoff
                    print(f"[429] Resource exhausted. Retrying in {wait}s...", debug=True)
                    time.sleep(wait)
                    continue  # retry whole request
                else:
                    print(f"Error in send_message_stream: {type(e).__name__} -> {e}", debug=True)
                    raise  # immediately raise non-429 exceptions

        # If we reach here, all retries failed due to ResourceExhausted
        raise RetryExhaustedError(f"send_message_stream failed after {retries} retries due to 429 ResourceExhausted")