import base64
import html
from html.parser import HTMLParser
import json
import os
import random
import re
import time
import urllib.parse
import urllib.request
import urllib.error

# --- CONFIGURATION ---
MISTRAL_API_KEY = os.environ.get("MISTRAL_API_KEY", "CHTDjXRY5kE0odBx7jG064NccPJT3Cik")
BOARD_URL = "https://indiachan.top"
CATALOG_URL = f"{BOARD_URL}/boards/b/catalog"
STATE_FILE = "bot_state.json"
RUN_INTERVAL_SECONDS = 300  # 5 minutes exact delay step

# --- TARGETED PERSONA PROMPT ---
SYSTEM_INSTRUCTION = (
    "You are an anonymous poster on an imageboard. "
    "Write ONLY the exact post content matching the tone of the thread. "
    "Do NOT include introductions, explanations, preambles, meta-commentary, "
    "or quotation marks around your answer. Be raw, direct, and completely in character."
)


# --- NATIVE HTML PARSER (No External Libraries) ---

class CatalogParser(HTMLParser):
    """Native event-driven parser to extract metadata and image links out of the catalog layout."""
    def __init__(self):
        super().__init__()
        self.threads = []
        self.current_thread = None
        self.in_preview_div = False
        self.div_depth = 0

    def handle_starttag(self, tag, attrs):
        attr_dict = dict(attrs)
        
        # Identify the individual thread card container block
        if tag == "div" and attr_dict.get("class") == "post-container":
            self.current_thread = {
                "id": None,
                "replies": int(attr_dict.get("data-replies", 0)),
                "preview": "",
                "image_url": None
            }
            self.div_depth = 0
            self.in_preview_div = False
            return

        if self.current_thread is not None:
            if tag == "div":
                self.div_depth += 1
                # The preview text block sits inside an unclassed nested div block
                if self.div_depth >= 1 and "class" not in attr_dict:
                    self.in_preview_div = True
            
            # Extract Thread ID from the target href structure
            elif tag == "a" and attr_dict.get("class") == "thread-catalog-link":
                href = attr_dict.get("href", "")
                if "/thread/" in href:
                    self.current_thread["id"] = href.split("/thread/")[-1].split("#")[0]
            
            # Catch the thread thumbnail image URL
            elif tag == "img" and "thread-img" in attr_dict.get("class", ""):
                src = attr_dict.get("src", "")
                if src:
                    # Construct an absolute URL if the path is relative
                    self.current_thread["image_url"] = src if src.startswith("http") else f"{BOARD_URL}{src}"

    def handle_data(self, data):
        if self.current_thread and self.in_preview_div:
            self.current_thread["preview"] += data

    def handle_endtag(self, tag):
        if self.current_thread is not None:
            if tag == "div":
                if self.in_preview_div:
                    self.in_preview_div = False
                self.div_depth -= 1
                if self.div_depth < 0:
                    # Outer container boundary closed, finalize the current item
                    self.current_thread["preview"] = html.unescape(self.current_thread["preview"]).strip()
                    if self.current_thread["id"]:
                        self.threads.append(self.current_thread)
                    self.current_thread = None


# --- BOT IMPLEMENTATION HUB ---

class DynamicImageboardBot:

    def __init__(self):
        self.state = self.load_state()

    def load_state(self):
        """Loads state from file or initializes structural defaults."""
        if os.path.exists(STATE_FILE):
            try:
                with open(STATE_FILE, "r") as f:
                    return json.load(f)
            except Exception:
                print("[-] State file corrupted. Re-initializing...")
        return {"replied_threads": [], "learned_keywords": {}}

    def save_state(self, current_catalog_size: int = 100):
        """Maintains structural size limits and updates the state file atomically."""
        if len(self.state["replied_threads"]) > current_catalog_size:
            self.state["replied_threads"] = self.state["replied_threads"][-current_catalog_size:]
        
        # Atomic replacement strategies safeguard json integrity from random process exits
        temp_file = f"{STATE_FILE}.tmp"
        try:
            with open(temp_file, "w") as f:
                json.dump(self.state, f, indent=4)
            os.replace(temp_file, STATE_FILE)
        except Exception as e:
            print(f"[-] Failed to write state file safely: {e}")

    def fetch_live_page(self, url: str) -> str:
        """Downloads HTML layout sequences over basic network streams natively."""
        req = urllib.request.Request(url)
        req.add_header(
            "User-Agent",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        )
        try:
            with urllib.request.urlopen(req, timeout=15) as response:
                return response.read().decode("utf-8", errors="ignore")
        except Exception as e:
            print(f"[-] Network connection block fault context ({url}): {e}")
            return ""

    def download_image_as_base64(self, img_url: str) -> str:
        """Downloads binary graphics context streams directly into data URI strings."""
        if not img_url:
            return None
        req = urllib.request.Request(img_url)
        req.add_header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
        try:
            with urllib.request.urlopen(req, timeout=10) as response:
                content_type = response.headers.get("Content-Type", "image/jpeg")
                img_data = response.read()
                b64_data = base64.b64encode(img_data).decode("utf-8")
                print(f"[+] Encoded image asset wrapper payload ({len(img_data)} bytes)")
                return f"data:{content_type};base64,{b64_data}"
        except Exception as e:
            print(f"[-] Vision resource compilation skipped for URI ({img_url}): {e}")
            return None

    def evaluate_threads(self, threads: list) -> dict:
        """Calculates targeted threat weights using dynamic learning history metrics."""
        eligible = [t for t in threads if t["id"] not in self.state["replied_threads"]]
        if not eligible:
            return None

        scored_threads = []
        for t in eligible:
            score = 50 + random.randint(-15, 15)
            
            # Target small-to-mid discussions; avoid oversized mega-threads
            if 2 < t["replies"] < 40:
                score += 20
            elif t["replies"] >= 40:
                score -= 15

            # Historical interest multipliers
            words = re.findall(r"\w+", t["preview"].lower())
            for word in words:
                if word in self.state["learned_keywords"]:
                    score += min(self.state["learned_keywords"][word], 25)
            scored_threads.append((score, t))

        scored_threads.sort(key=lambda x: x[0], reverse=True)
        chosen_thread = scored_threads[0][1]

        # Self-Learning Feedback loop updates
        chosen_words = set(re.findall(r"\w+", chosen_thread["preview"].lower()))
        for w in chosen_words:
            if len(w) > 4:
                current_weight = self.state["learned_keywords"].get(w, 0)
                self.state["learned_keywords"][w] = min(current_weight + 2, 50)

        # Keyword decay cycle over broad run cycles
        if random.random() < 0.10:
            for k in list(self.state["learned_keywords"].keys()):
                self.state["learned_keywords"][k] -= 1
                if self.state["learned_keywords"][k] <= 0:
                    del self.state["learned_keywords"][k]

        return chosen_thread

    def query_mistral_multimodal(self, prompt: str, image_base64: str = None) -> str:
        """Submits textual prompts merged with raw vision blocks to Mistral API endpoints."""
        url = "https://api.mistral.ai/v1/chat/completions"
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {MISTRAL_API_KEY}",
        }

        user_content = [{"type": "text", "text": prompt}]
        
        if image_base64:
            user_content.append({
                "type": "image_url",
                "image_url": {
                    "url": image_base64
                }
            })

        data = {
            "model": "pixtral-12b-latest",
            "messages": [
                {
                    "role": "system", 
                    "content": SYSTEM_INSTRUCTION
                },
                {
                    "role": "user", 
                    "content": user_content
                },
            ],
            "temperature": 0.85,
        }
        
        req = urllib.request.Request(
            url, data=json.dumps(data).encode("utf-8"), headers=headers
        )
        try:
            with urllib.request.urlopen(req) as response:
                res = json.loads(response.read().decode("utf-8"))
                return res["choices"][0]["message"]["content"].strip()
        except urllib.error.HTTPError as http_err:
            error_body = http_err.read().decode("utf-8", errors="ignore")
            print(f"[-] API rejected execution structure: {http_err.code} - {error_body}")
            return ""
        except Exception as e:
            print(f"[-] AI generation failed completely during token exchange: {e}")
            return ""

    def submit_reply(self, thread_id: str, content: str, catalog_capacity: int):
        """Transmits raw response content directly over target data webhook frames."""
        endpoint = f"{BOARD_URL}/boards/b/thread/{thread_id}/reply"
        
        # Added delete_password parameter field directly to payload mapping
        payload = {
            "content": content,
            "delete_password": "apnimaasepuch"
        }

        data = urllib.parse.urlencode(payload).encode("utf-8")
        req = urllib.request.Request(endpoint, data=data)
        
        req.add_header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
        req.add_header("Content-Type", "application/x-www-form-urlencoded")
        req.add_header("Content-Length", str(len(data)))
        req.add_header("Referer", f"{BOARD_URL}/boards/b/thread/{thread_id}")

        try:
            with urllib.request.urlopen(req, timeout=15) as r:
                response_code = r.getcode()
                if response_code in [200, 201]:
                    print(f"[+] Simulation transaction completed for Thread #{thread_id}!")
                    self.state["replied_threads"].append(thread_id)
                    self.save_state(current_catalog_size=catalog_capacity)
                else:
                    print(f"[-] Target instance rejected payload with status code: {response_code}")
        except Exception as e:
            print(f"[-] Reply submission pipeline fault event handled: {e}")

    def run_one_iteration(self):
        """Sequences target thread retrieval parsing steps, multimodal generation, and submission."""
        print("\n[*] Synchronizing remote catalog live stream data views...")
        catalog_html = self.fetch_live_page(CATALOG_URL)
        if not catalog_html:
            return

        parser = CatalogParser()
        parser.feed(catalog_html)
        threads = parser.threads

        catalog_total_capacity = len(threads) if threads else 100
        print(f"[*] Parsed {catalog_total_capacity} active catalog thread entries.")

        chosen_thread = self.evaluate_threads(threads)
        if not chosen_thread:
            print("[*] No target entries chosen during this execution block run step.")
            return

        print(f"[*] Selected Target Thread: #{chosen_thread['id']}")
        
        base64_image = None
        if chosen_thread["image_url"]:
            if not chosen_thread["image_url"].lower().endswith(".gif"):
                print(f"[*] Processing visual information element: {chosen_thread['image_url']}")
                base64_image = self.download_image_as_base64(chosen_thread["image_url"])
            else:
                print(f"[*] Skipping vision evaluation for animated GIF element: {chosen_thread['image_url']}")

        prompt = (
            f"Context: Imageboard discussion thread.\n"
            f"OP Text Post: '{chosen_thread['preview']}'\n\n"
            f"Analyze the thread message context along with the attached image thumbnail if available. "
            f"Draft a short response compatible with imageboard board culture.\n"
            f"CRITICAL RULES:\n"
            f"- Output ONLY the final response text.\n"
            f"- Do NOT wrap text in quotation marks.\n"
            f"- Avoid explanatory text, preambles, or conversational introductions entirely."
        )

        ai_response = self.query_mistral_multimodal(prompt, base64_image)

        if ai_response:
            final_text = f">>OP\n{ai_response}"
            self.submit_reply(chosen_thread["id"], final_text, catalog_total_capacity)


if __name__ == "__main__":
    bot = DynamicImageboardBot()
    print("[+] Core engine synchronized natively... Terminate execution loops via [Ctrl+C]")
    while True:
        try:
            bot.run_one_iteration()
        except Exception as global_err:
            print(f"[-] Global application level monitoring fault caught: {global_err}")
        
        print(f"[*] Sleeping for {RUN_INTERVAL_SECONDS // 60} minutes...")
        time.sleep(RUN_INTERVAL_SECONDS)