import os import re import json import urllib.request import urllib.parse import time from datetime import datetime # --- CONFIGURATION --- USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" MEMORY_FILE = "bot_memory.json" MAX_MEMORY_ITEMS = 150 # Enforces a maximum of 150 entries at a time def load_memory(): """Loads the bot's past successful posts from a local JSON file.""" if not os.path.exists(MEMORY_FILE): return [] try: with open(MEMORY_FILE, 'r', encoding='utf-8') as f: return json.load(f) except Exception as e: print(f"[Memory] Failed to load memory: {e}") return [] def save_memory(history): """Saves the updated history back to the JSON file.""" try: with open(MEMORY_FILE, 'w', encoding='utf-8') as f: json.dump(history, f, indent=4) except Exception as e: print(f"[Memory] Failed to save memory: {e}") def get_oldest_thread(html_content): """Finds the oldest bumped thread in the catalog.""" valid_threads = [] chunks = html_content.split('class="post-container"') for chunk in chunks[1:]: bump_match = re.search(r'data-bump="(\d+)"', chunk) pinned_match = re.search(r'data-pinned="(\d+)"', chunk) href_match = re.search(r'class="thread-catalog-link"\s*href="([^"]+)"', chunk) text_match = re.search(r'
.*?
\s*
(.*?)
', chunk, re.DOTALL | re.IGNORECASE) if bump_match and href_match and pinned_match: if pinned_match.group(1) == '1': continue href = href_match.group(1) post_id_match = re.search(r'/thread/(\d+)', href) post_id = post_id_match.group(1) if post_id_match else "unknown" raw_text = text_match.group(1) if text_match else "" clean_text = re.sub(r'<[^>]+>', '', raw_text).strip() valid_threads.append({ 'bump': int(bump_match.group(1)), 'href': href, 'post_id': post_id, 'context': clean_text }) if not valid_threads: return None valid_threads.sort(key=lambda x: x['bump']) return valid_threads[0] def generate_reply_with_mistral(thread_context, post_id, memory_history): """ Calls Mistral, feeding it both the current thread AND its past memory so it learns to maintain consistency and avoid repetition. """ api_key = os.environ.get("MISTRAL_API_KEY") if not api_key: print("[Mistral] Error: MISTRAL_API_KEY not set.") return None, None url = "https://api.mistral.ai/v1/chat/completions" # Format the memory into a readable string for the prompt memory_string = "" if memory_history: memory_string = "\nYour Recent Post History (Learn from this style but DO NOT repeat it):\n" # Reading from the end or just taking the top few so the prompt context window doesn't explode # Note: Passing all 150 entries directly might make the API prompt too large/expensive, # so we only feed the latest 5 into the prompt context while retaining 150 in the JSON file. for i, item in enumerate(memory_history[:5], 1): memory_string += f"Thread: {item['thread_topic']}\nYour Reply: {item['your_reply']}\n\n" prompt = f""" You are an anonymous user on an internet imageboard. Write a short, casual, and witty reply (1-3 sentences) to the current thread. Fit the imageboard culture. Do not use hashtags or signatures. Do not include the >>id link yourself. {memory_string} Current Thread Snippet: "{thread_context}" """ payload = { "model": "mistral-large-latest", "messages": [{"role": "user", "content": prompt}], "temperature": 0.8 } data = json.dumps(payload).encode('utf-8') headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" } request = urllib.request.Request(url, data=data, headers=headers, method="POST") try: with urllib.request.urlopen(request) as response: resp_json = json.loads(response.read().decode('utf-8')) ai_text = resp_json['choices'][0]['message']['content'].strip() return f">>{post_id}\n{ai_text}", ai_text except Exception as e: print(f"[Mistral] API Error: {e}") return None, None def submit_reply(post_id, text_content): """Submits the POST request with only the User-Agent in the header.""" reply_url = f"https://indiachan.top/boards/b/thread/{post_id}/reply" payload = { "content": text_content, "delete_password": "botpassword123" } data = urllib.parse.urlencode(payload).encode('utf-8') headers = { "User-Agent": USER_AGENT } request = urllib.request.Request(reply_url, data=data, headers=headers, method="POST") try: with urllib.request.urlopen(request) as response: return response.getcode() in [200, 302] except Exception as e: print(f"[Submit Error] Failed to post reply: {e}") return False def fetch_latest_html(): return """[YOUR_HTML_SNIPPET_HERE]""" # --- Main Execution Loop --- def main(): print("Starting self-learning imageboard bot...\n") bot_memory = load_memory() while True: print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Checking catalog...") try: sample_html = fetch_latest_html() oldest_thread = get_oldest_thread(sample_html) if oldest_thread: tid = oldest_thread['post_id'] context = oldest_thread['context'] # Check if we already replied to this exact thread ID recently if any(entry.get('post_id') == tid for entry in bot_memory): print(f"[Skip] Already replied to thread {tid} recently.") else: full_reply, raw_ai_text = generate_reply_with_mistral(context, tid, bot_memory) if full_reply: print(f"[Response Generated]\n{full_reply}") success = submit_reply(tid, full_reply) if success: print("[Memory] Post successful. Saving new interaction to memory...") # Insert new interaction at the very front (index 0) bot_memory.insert(0, { "timestamp": time.time(), "post_id": tid, "thread_topic": context, "your_reply": raw_ai_text }) # Slicing the list keeps the first 150 items (index 0 to 149). # Because new entries are pushed to index 0, the 151st (oldest) item is automatically dropped. bot_memory = bot_memory[:MAX_MEMORY_ITEMS] save_memory(bot_memory) else: print("[Error] Post failed. Memory not updated.") else: print("[Warning] No valid threads found.") except Exception as e: print(f"[Error] Loop anomaly: {e}") print("Sleeping for 5 minutes...\n") time.sleep(300) if __name__ == "__main__": main()