import os
import re
import json
import urllib.request
import urllib.parse
import time
from datetime import datetime
# --- CONFIGURATION ---
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
MEMORY_FILE = "bot_memory.json"
MAX_MEMORY_ITEMS = 150 # Enforces a maximum of 150 entries at a time
def load_memory():
"""Loads the bot's past successful posts from a local JSON file."""
if not os.path.exists(MEMORY_FILE):
return []
try:
with open(MEMORY_FILE, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
print(f"[Memory] Failed to load memory: {e}")
return []
def save_memory(history):
"""Saves the updated history back to the JSON file."""
try:
with open(MEMORY_FILE, 'w', encoding='utf-8') as f:
json.dump(history, f, indent=4)
except Exception as e:
print(f"[Memory] Failed to save memory: {e}")
def get_oldest_thread(html_content):
"""Finds the oldest bumped thread in the catalog."""
valid_threads = []
chunks = html_content.split('class="post-container"')
for chunk in chunks[1:]:
bump_match = re.search(r'data-bump="(\d+)"', chunk)
pinned_match = re.search(r'data-pinned="(\d+)"', chunk)
href_match = re.search(r'class="thread-catalog-link"\s*href="([^"]+)"', chunk)
text_match = re.search(r'
.*?
\s*(.*?)
', chunk, re.DOTALL | re.IGNORECASE)
if bump_match and href_match and pinned_match:
if pinned_match.group(1) == '1':
continue
href = href_match.group(1)
post_id_match = re.search(r'/thread/(\d+)', href)
post_id = post_id_match.group(1) if post_id_match else "unknown"
raw_text = text_match.group(1) if text_match else ""
clean_text = re.sub(r'<[^>]+>', '', raw_text).strip()
valid_threads.append({
'bump': int(bump_match.group(1)),
'href': href,
'post_id': post_id,
'context': clean_text
})
if not valid_threads:
return None
valid_threads.sort(key=lambda x: x['bump'])
return valid_threads[0]
def generate_reply_with_mistral(thread_context, post_id, memory_history):
"""
Calls Mistral, feeding it both the current thread AND its past memory
so it learns to maintain consistency and avoid repetition.
"""
api_key = os.environ.get("MISTRAL_API_KEY")
if not api_key:
print("[Mistral] Error: MISTRAL_API_KEY not set.")
return None, None
url = "https://api.mistral.ai/v1/chat/completions"
# Format the memory into a readable string for the prompt
memory_string = ""
if memory_history:
memory_string = "\nYour Recent Post History (Learn from this style but DO NOT repeat it):\n"
# Reading from the end or just taking the top few so the prompt context window doesn't explode
# Note: Passing all 150 entries directly might make the API prompt too large/expensive,
# so we only feed the latest 5 into the prompt context while retaining 150 in the JSON file.
for i, item in enumerate(memory_history[:5], 1):
memory_string += f"Thread: {item['thread_topic']}\nYour Reply: {item['your_reply']}\n\n"
prompt = f"""
You are an anonymous user on an internet imageboard. Write a short, casual, and witty reply (1-3 sentences) to the current thread.
Fit the imageboard culture. Do not use hashtags or signatures. Do not include the >>id link yourself.
{memory_string}
Current Thread Snippet: "{thread_context}"
"""
payload = {
"model": "mistral-large-latest",
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.8
}
data = json.dumps(payload).encode('utf-8')
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
request = urllib.request.Request(url, data=data, headers=headers, method="POST")
try:
with urllib.request.urlopen(request) as response:
resp_json = json.loads(response.read().decode('utf-8'))
ai_text = resp_json['choices'][0]['message']['content'].strip()
return f">>{post_id}\n{ai_text}", ai_text
except Exception as e:
print(f"[Mistral] API Error: {e}")
return None, None
def submit_reply(post_id, text_content):
"""Submits the POST request with only the User-Agent in the header."""
reply_url = f"https://indiachan.top/boards/b/thread/{post_id}/reply"
payload = {
"content": text_content,
"delete_password": "botpassword123"
}
data = urllib.parse.urlencode(payload).encode('utf-8')
headers = {
"User-Agent": USER_AGENT
}
request = urllib.request.Request(reply_url, data=data, headers=headers, method="POST")
try:
with urllib.request.urlopen(request) as response:
return response.getcode() in [200, 302]
except Exception as e:
print(f"[Submit Error] Failed to post reply: {e}")
return False
def fetch_latest_html():
return """[YOUR_HTML_SNIPPET_HERE]"""
# --- Main Execution Loop ---
def main():
print("Starting self-learning imageboard bot...\n")
bot_memory = load_memory()
while True:
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Checking catalog...")
try:
sample_html = fetch_latest_html()
oldest_thread = get_oldest_thread(sample_html)
if oldest_thread:
tid = oldest_thread['post_id']
context = oldest_thread['context']
# Check if we already replied to this exact thread ID recently
if any(entry.get('post_id') == tid for entry in bot_memory):
print(f"[Skip] Already replied to thread {tid} recently.")
else:
full_reply, raw_ai_text = generate_reply_with_mistral(context, tid, bot_memory)
if full_reply:
print(f"[Response Generated]\n{full_reply}")
success = submit_reply(tid, full_reply)
if success:
print("[Memory] Post successful. Saving new interaction to memory...")
# Insert new interaction at the very front (index 0)
bot_memory.insert(0, {
"timestamp": time.time(),
"post_id": tid,
"thread_topic": context,
"your_reply": raw_ai_text
})
# Slicing the list keeps the first 150 items (index 0 to 149).
# Because new entries are pushed to index 0, the 151st (oldest) item is automatically dropped.
bot_memory = bot_memory[:MAX_MEMORY_ITEMS]
save_memory(bot_memory)
else:
print("[Error] Post failed. Memory not updated.")
else:
print("[Warning] No valid threads found.")
except Exception as e:
print(f"[Error] Loop anomaly: {e}")
print("Sleeping for 5 minutes...\n")
time.sleep(300)
if __name__ == "__main__":
main()