Initial commit

2025-07-13 09:46:56 -07:00 · 2025-07-13 09:46:56 -07:00 · ccbcebf0e8
commit ccbcebf0e8
14 changed files with 703 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
 env
 data/*
 __pycache__
--- a/22
+++ b/22
@ -0,0 +1,22 @@
 DOCKER_COMPOSE := COMPOSE_MENU=0 docker compose --env_file ./env
 default: up
 up: env
 	$(DOCKER_COMPOSE) up
 build:
 	$(DOCKER_COMPOSE) build
 env:
 	@echo "Please copy env.in to env and edit the contents to what you need."
 	@false
 signalbash:
 	$(DOCKER_COMPOSE) run --rm signal-cli bash
 import_factoids:
 	$(DOCKER_COMPOSE) run --rm init-db
--- a/README.md
+++ b/README.md
@ -0,0 +1,70 @@
 # What the devil is this?
 It's a Signal bot that emits Infobot-style factoids, with an LLM for fuzzy matching.
 When a user asks a question, it first checks the database for a verbatim answer, and emits that if it finds one.
 If there's no exact match:
 - the LLM parses the question into one or more topics ("tell me about alice and bob" becomes `["alice", "bob"]`)
 - the topics are vector encoded and queried against the encodings in the factoid database
 - the nearest match for each topic is sent back to the LLM, which then is asked to phrase it back to the user
 # Why?
 Huh?
 # How do I use it?
 You need CUDA working in Docker, or to edit the docker compose files to take that stuff out and rely on CPU.
 At present the LLM seems to require about 2GB of GPU RAM, which is really small as LLMs go.  My PC works harder playing Balatro.
 # What are its limitations?
 It doesn't have heaps of feature parity with the old perl infobot.  The right way to get that might be to hack on the old bot code and use it as the main chat parser for this.  I don't have a ton of desire to sit down and code my own implementation of the entire thing.
 Some important stuff we're missing right now:
 - Creating new factoids
 - Understanding when questions are being asked of the bot so it doesn't just respond to every single thing that's said
 - Botsnacks.
 And some non-infobot stuff we could use:
 - A better prompt for the LLM to integrate multiple factoids into a single response
 - Some security precautions against prompt injection etc. -- at the moment it's just "trust only those on the allowlist"
 ## Initialize the database
 This took around an hour to do 300k factoids and a fair amount of compute/GPU power.  There's no consistency or duplicate checking at the moment so you're best off trashing the postgres data dir first
 - dump the factoid database into "is.txt" and "are.txt" and put them in scripts/
    (tab separated lines: "topic\tresponse")
 - make import_factoids
 ## Prepare a Signal account
 Making signal-cli work can be fairly involved.  Check the wiki at the signal-cli repo for details on how, but run this command to get a shell into the signal-cli container:
 - make signalbash
 ## Create an env file
 - Copy `env.in` to `env` and edit its contents to what you need.
 ## Start the server
 - make
--- a/app/Dockerfile
+++ b/app/Dockerfile
@ -0,0 +1,9 @@
 FROM python:3.11-slim
 WORKDIR /app
 COPY . /app
 RUN pip install --no-cache-dir -r requirements.txt
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/app/main.py
+++ b/app/main.py
@ -0,0 +1,244 @@
 import os
 import asyncpg
 import numpy as np
 from fastapi import FastAPI, Request
 from sentence_transformers import SentenceTransformer
 import httpx
 import random
 import itertools
 DB_CONFIG = {
    "host": os.getenv("DB_HOST"),
    "port": int(os.getenv("DB_PORT", "5432")),
    "user": os.getenv("DB_USER"),
    "password": os.getenv("DB_PASSWORD"),
    "database": os.getenv("DB_NAME"),
 }
 LLM_API_URL = os.getenv("LLM_API_URL", "http://llm:80")
 app = FastAPI()
 model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
@app.on_event("startup")
 async def startup():
    app.state.db = await asyncpg.create_pool(**DB_CONFIG)
@app.on_event("shutdown")
 async def shutdown():
    await app.state.db.close()
 async def extract_triggers(user_query: str) -> list[str]:
    """Ask the LLM to extract factoid-like terms from a user query."""
    prompt = f"""<|im_start|>system
 You are a factoid trigger extractor. Extract a list of keywords or short phrases that might match entries in an infobot-style knowledge base.
 Respond only with a comma-separated list of triggers. Do not answer the question or correct spelling or grammar.
 <|im_end|>
 <|im_start|>user
 Who is steve?
 <|im_end|>
 <|im_start|>assistant
 steve
 <|im_end|>
 <|im_start|>user
 Tell me about HTTP status codes.
 <|im_end|>
 <|im_start|>assistant
 HTTP status codes
 <|im_end|>
 <|im_start|>user
 Who are you and your Gary
 <|im_end|>
 <|im_start|>assistant
 you, Gary
 <|im_end|>
 <|im_start|>user
 {user_query}
 <|im_end|>
 <|im_start|>assistant
 """
    payload = {
        "inputs": prompt,
        "parameters": {
            "temperature": 0.3,
            "max_new_tokens": 64,
        },
    }
    print("PAYLOAD:", payload)
    async with httpx.AsyncClient() as client:
        r = await client.post(
            f"{LLM_API_URL}/generate",
            json=payload,
            timeout=httpx.Timeout(120.0),
        )
        if r.status_code != 200:
            return []
        output = r.json().get("generated_text", "")
    print("OUTPUT:", output)
    # Try to parse output as a list
    triggers = []
    for line in output.splitlines():
        line = line.strip(" []\"'")
        if line:
            triggers.extend([t.strip(" \"'") for t in line.split(",") if t.strip()])
    return list(set(triggers))
@app.post("/ask")
 async def ask(request: Request):
    body = await request.json()
    query = body.get("query", "").strip()
    if not query:
        return {"error": "Missing query"}
    responses = []
    # try to extract the query verbatim
    async with app.state.db.acquire() as conn:
        rows = await conn.fetchrow(
            """
            SELECT trigger, response, copula
            FROM factoids
            WHERE trigger = $1
            """,
            query.strip()
        )
        if rows:
            rows = [ {'trigger': x[0], 'response': x[1], 'copula': x[2]} for x in itertools.combinations(rows, 3) ]
            options = []
            for row in rows:
                for option in row["response"].split("|"):
                    options.append(row | {"response": option.strip()})
            row = random.choice(options)
            print("CHOICE:", row)
            triggers = [query.strip()]
            choices = [row['response']]
            # Use <reply> if present, otherwise prepend the trigger
            for c in choices:
                if c.startswith("<reply>"):
                    responses.append(c[len("<reply>"):].strip())
                else:
                    responses.append(f"{row['trigger']} {row['copula']} {c}")
        else:
            triggers = await extract_triggers(query)
            if not triggers:
                return {"response": "I don't know that one."}
            print("triggers: ", triggers)
            for trigger in triggers:
                row = await conn.fetchrow(
                    """
                    SELECT trigger, response, copula
                    FROM factoids
                    WHERE trigger = $1
                    LIMIT 1
                    """,
                    trigger
                )
                if not row:
                    embedding = model.encode(trigger)
                    embedding_str = f"[{', '.join(map(str, embedding))}]"
                    row = await conn.fetchrow(
                        """
                        SELECT trigger, response, copula
                        FROM factoids
                        ORDER BY embedding <-> $1::vector
                        LIMIT 1
                        """,
                        embedding_str
                    )
                if row:
                    print("ROW:", row)
                    response = row['response']
                    # Pick a random option from pipe-separated responses
                    options = [r.strip() for r in response.split('|') if r.strip()]
                    if options:
                        k = 1
                        choices = random.choices(options,k=k)
                    else:
                        choices = [response]
                    # Use <reply> if present, otherwise prepend the trigger
                    for c in choices:
                        if c.startswith("<reply>"):
                            responses.append(c[len("<reply>"):].strip())
                        else:
                            responses.append(f"{row['trigger']} {row['copula']} {c}")
    if not responses:
        return {"response": "I don't know any of those."}
    # Ask LLM to summarize
    responses_str = "\nValue: ".join(responses)
    #
    summary_prompt = (
        f"""<|im_start|>system
 You are a summarizer for a fact-based chatbot. Your task is to condense database entries into short, accurate one-line summaries. Do not speculate, define, or add new facts.  Do not correct spelling or phrasing from the facts or triggers.  Do not mix context from prior triggers.
 <|im_end|>
 <|im_start|>user
 Summarize the following database entry.
 Trigger: Paris
 Value: Paris is the capital of France
 Value: Paris is located in the north-central part of the country.
 <|im_end|>
 <|im_start|>assistant
 Paris is the capital of France and located in the north-central part of the country.
 <|im_end|>
 <|im_start|>user
 Summarize the following database entry.
 Trigger: squinky, spacehobo
 Value: spacehobo is a Citizen.
 Value: squinky is kinky
 <|im_end|>
 <|im_start|>assistant
 spacehobo is a Citizen and squinky is kinky
 <|im_end|>
 <|im_start|>user
 Summarize the following database entry.
 Trigger: sky
 Value: sky is blue
 Value: the sky is the big thing outside when you look up
 Value: Look!
 <|im_end|>
 <|im_start|>assistant
 Look up at that big blue thing outside!
 <|im_end|>
 <|im_start|>user
 Summarize the following database entry.
 Trigger: {query}
 Value: {responses_str}
 <|im_end|>"""
    )
    print("SUMMARY PAYLOAD:", summary_prompt)
    async with httpx.AsyncClient() as client:
        r = await client.post(
            f"{LLM_API_URL}/generate",
            json={"inputs": summary_prompt, "parameters": {"temperature": 0.8, "max_new_tokens": 200}},
            timeout=httpx.Timeout(120.0),
        )
        final_response = r.json().get("generated_text", "\n".join(responses))
        print("FINAL RESPONSE:", final_response)
    return {"reply": final_response, "matches": responses, "triggers": triggers}
--- a/app/requirements.txt
+++ b/app/requirements.txt
@ -0,0 +1,6 @@
 fastapi
 uvicorn[standard]
 asyncpg
 sentence-transformers
 httpx
--- a/docker-compose-initdb.yaml
+++ b/docker-compose-initdb.yaml
@ -0,0 +1,42 @@
 services:
  init-db:
    build:
        context: .
        dockerfile: initdb.Dockerfile
    container_name: infobot-init
    depends_on:
      db:
        condition: service_healthy
    volumes:
      - ./scripts:/scripts:ro
      - ./data/hf_cache:/root/.cache/huggingface
    environment:
      DB_HOST: db
      DB_PORT: 5432
      DB_USER: infobot
      DB_PASSWORD: infobot
      DB_NAME: infobot
    entrypoint: ["python", "/scripts/init_and_load.py"]
    devices:
      - /dev/nvidia-uvm
      - /dev/nvidia-uvm-tools
      - /dev/nvidia-modeset
      - /dev/nvidiactl
      - /dev/nvidia0
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: ["gpu"]
    networks: ["dumant"]
 networks:
  dumant:
    name: dumant
    external: true
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -0,0 +1,105 @@
 services:
  db:
    image: ankane/pgvector
    container_name: infobot-db
    restart: unless-stopped
    ports:
      - "5432:5432"
    environment:
      POSTGRES_DB: infobot
      POSTGRES_USER: infobot
      POSTGRES_PASSWORD: infobot
    volumes:
      - ./data/postgres:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD", "pg_isready", "-U", "infobot"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks: ["dumant"]
  llm:
    image: ghcr.io/huggingface/text-generation-inference:1.4
    container_name: infobot-llm
    restart: unless-stopped
    ports:
      - "8080:80"
    environment:
      HF_HUB_ENABLE_HF_TRANSFER: 0
 #      PYTORCH_CUDA_ALLOC_CONF: max_split_size_mb:64
      MODEL_ID: ${MODEL_ID}
    volumes:
      - ./data/models:/data
    devices:
      - /dev/nvidia-uvm
      - /dev/nvidia-uvm-tools
      - /dev/nvidia-modeset
      - /dev/nvidiactl
      - /dev/nvidia0
    command:
      - --max-total-tokens=1024
      - --max-batch-prefill-tokens=256
      - --max-input-length=256
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: ["gpu"]
    networks: ["dumant"]
  app:
    build: ./app
    container_name: infobot-app
    restart: unless-stopped
    depends_on:
      db:
        condition: service_healthy
      llm:
        condition: service_started
    ports:
      - "8000:8000"
    volumes:
      - ./app:/app
    environment:
      DB_HOST: db
      DB_PORT: 5432
      DB_USER: infobot
      DB_PASSWORD: infobot
      DB_NAME: infobot
      LLM_API_URL: http://llm:80
    networks: ["dumant"]
  signal-app:
    build: ./signal-app
    container_name: infobot-signal-app
    restart: unless-stopped
    volumes:
      - ./signal-app:/app
    networks: ["dumant"]
  signal-cli:
    #image: dogukanakkaya/signal-cli
    image: registry.gitlab.com/packaging/signal-cli/signal-cli-native:latest
    container_name: infobot-signal-cli
    restart: unless-stopped
    volumes:
      - ./data/signal-cli:/var/lib/signal-cli
    ports:
      - "7583:7583"
    tmpfs:
      - "/tmp:exec"
    command: -a ${SIGNAL_ID} daemon --tcp=0.0.0.0:7583 --receive-mode=on-start
    networks: ["dumant"]
 volumes:
  postgres:
  models:
 networks:
  dumant:
    name: dumant
--- a/env.in
+++ b/env.in
@ -0,0 +1,14 @@
 # Set this to the valid phone number associated with your Signal account
 SIGNAL_ID=+12345678901
 # Accept DMS from this list of users only (space-separated)
 SIGNAL_USER_ALLOWLIST="+12345678901 +12345678901 +12345678901 +12345678901 +12345678901"
 # Accept messages from anyone in these groups
 SIGNAL_GROUP_ALLOWLIST="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg1234567890= ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg1234567890="
 # Use this LLM.
 # Visit https://huggingface.co/models?pipeline_tag=text-generation&sort=trending and try to use an "instruct" model
 # The one listed here by default is a rather small one but adequate for now
 MODEL_ID="Gensyn/Qwen2.5-0.5B-Instruct"
--- a/initdb.Dockerfile
+++ b/initdb.Dockerfile
@ -0,0 +1,7 @@
 FROM python:3
 #FROM python:3.11-slim
 RUN pip install --no-cache-dir \
        psycopg2 \
        sentence_transformers \
    && /bin/true
--- a/scripts/init_and_load.py
+++ b/scripts/init_and_load.py
@ -0,0 +1,68 @@
 import os
 import psycopg2
 import csv
 from sentence_transformers import SentenceTransformer
 DB_HOST = os.environ["DB_HOST"]
 DB_PORT = os.environ["DB_PORT"]
 DB_USER = os.environ["DB_USER"]
 DB_PASSWORD = os.environ["DB_PASSWORD"]
 DB_NAME = os.environ["DB_NAME"]
 TSV_IS = "/scripts/is.txt"
 TSV_ARE = "/scripts/are.txt"
 # Connect to DB
 conn = psycopg2.connect(
    host=DB_HOST,
    port=DB_PORT,
    dbname=DB_NAME,
    user=DB_USER,
    password=DB_PASSWORD
 )
 conn.autocommit = True
 cur = conn.cursor()
 # Ensure pgvector extension and table exist
 cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
 cur.execute("""
    CREATE TABLE IF NOT EXISTS factoids (
        id SERIAL PRIMARY KEY,
        trigger TEXT NOT NULL,
        copula TEXT NOT NULL DEFAULT 'is',
        response TEXT NOT NULL,
        embedding VECTOR(384)
    );
 """)
 # Load model
 print("Loading embedding model...")
 model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 # Process TSVs one line at a time
 for copula, TSV_PATH in ( ("is", TSV_IS), ("are", TSV_ARE) ):
    print(f"Loading and inserting from {TSV_PATH}...")
    with open(TSV_PATH, "r", encoding="utf-8", errors="replace") as f:
        reader = csv.reader(f, delimiter="\t")
        count = 0
        for row in reader:
            if len(row) == 2:
                if count % 100 == 0:
                    print(count, row[0], copula, row[1])
                count += 1
                if len(row) != 2:
                    continue
                trigger, response = row[0].strip(), row[1].strip()
                if not trigger or not response:
                    continue
                embedding = model.encode(trigger).tolist()
                cur.execute(
                    "INSERT INTO factoids (trigger, copula, response, embedding) VALUES (%s, %s, %s, %s)",
                    (trigger, copula, response, embedding)
                )
 print("All factoids loaded.")
 cur.close()
 conn.close()
--- a/signal-app/Dockerfile
+++ b/signal-app/Dockerfile
@ -0,0 +1,9 @@
 FROM python:3.11-slim
 WORKDIR /app
 COPY requirements.txt /tmp
 RUN pip install --no-cache-dir -r /tmp/requirements.txt
 CMD [ "python", "-u", "/app/main.py" ]
--- a/signal-app/main.py
+++ b/signal-app/main.py
@ -0,0 +1,102 @@
 import socket
 import json
 import time
 import requests
 import os
 SIGNAL_USER_ALLOWLIST = " ".split(os.getenv("SIGNAL_USER_ALLOWLIST"))
 SIGNAL_GROUP_ALLOWLIST = " ".split(os.getenv("SIGNAL_GROUP_ALLOWLIST"))
 LLM_API_URL = os.getenv("LLM_API_URL", "http://llm:80")
 #SIGNAL_API = "http://signal-cli:7583"
 BOT_API="http://app:8000/ask"
 def send_json_rpc(method, params=None, request_id=1, host="signal-cli", port=7583):
    """Send a JSON-RPC 2.0 request over TCP to signal-cli."""
    request = {
        "jsonrpc": "2.0",
        "id": request_id,
        "method": method,
        "params": params or {}
    }
    request_str = json.dumps(request) + "\n"
    with socket.create_connection((host, port)) as sock:
        sock.sendall(request_str.encode("utf-8"))
        response = recv_all(sock)
        return json.loads(response)
 def recv_all(sock):
    """Read until newline (signal-cli terminates each JSON-RPC response with \\n)."""
    buffer = b""
    while not buffer.endswith(b"\n"):
        data = sock.recv(4096)
        if not data:
            break
        buffer += data
    return buffer.decode("utf-8")
 def process_message(msg):
    with requests.Session() as session:
        result = session.post(BOT_API, json={"query": str(msg)})
        j = result.json()
        return j["reply"]
 def listen_for_notifications(host="signal-cli", port=7583):
    with socket.create_connection((host, port)) as sock:
        print("Connected to signal-cli JSON-RPC")
        buffer = b""
        while True:
            chunk = sock.recv(4096)
            if not chunk:
                break
            buffer += chunk
            while b"\n" in buffer:
                line, buffer = buffer.split(b"\n", 1)
                try:
                    msg = json.loads(line)
                    print(msg)
                    if "method" in msg:
                        if msg["method"] == "receive":
                            envelope = msg["params"]["envelope"]
                            source = envelope["source"]
                            if "dataMessage" in envelope:
                                msg = envelope["dataMessage"]["message"] # there are non-message messages, like read receipts
                                if "groupInfo" in envelope["dataMessage"]:
                                    group_id = envelope["dataMessage"]["groupInfo"]["groupId"]
                                    if group_id not in SIGNAL_GROUP_ALLOWLIST:
                                        print(f"GROUP RECV DENIED ({source}): {envelope}")
                                        break
                                    print(f"GROUP ({group_id}/{source}): {msg}")
                                    params = { "recipient": group_id, "groupId": group_id }
                                else:
                                    if source not in SIGNAL_USER_ALLOWLIST:
                                        print(f"RECV DENIED ({source}): {envelope}")
                                        break
                                    print(f"RECV ({source}): {msg}")
                                    params = { "recipient": source }
                                params["message"] = process_message(msg)
                                print("PARAMS:",params)
                                result = send_json_rpc(
                                    method="send",
                                    params=params
                                )
                except json.JSONDecodeError:
                    print("Invalid JSON:", line)
 # Example usage:
 if __name__ == "__main__":
 #    result = send_json_rpc(
 #        method="listIdentities",
 #        params={},
 #    )
 #    print("Response:", json.dumps(result, indent=2))
    listen_for_notifications()
--- a/signal-app/requirements.txt
+++ b/signal-app/requirements.txt
@ -0,0 +1,2 @@
 requests
 jsonrpclib