Initial commit

This commit is contained in:
Erik Stambaugh 2025-07-13 09:46:56 -07:00
commit ccbcebf0e8
14 changed files with 703 additions and 0 deletions

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
env
data/*
__pycache__

22
Makefile Normal file
View file

@ -0,0 +1,22 @@
DOCKER_COMPOSE := COMPOSE_MENU=0 docker compose --env_file ./env
default: up
up: env
$(DOCKER_COMPOSE) up
build:
$(DOCKER_COMPOSE) build
env:
@echo "Please copy env.in to env and edit the contents to what you need."
@false
signalbash:
$(DOCKER_COMPOSE) run --rm signal-cli bash
import_factoids:
$(DOCKER_COMPOSE) run --rm init-db

70
README.md Normal file
View file

@ -0,0 +1,70 @@
# What the devil is this?
It's a Signal bot that emits Infobot-style factoids, with an LLM for fuzzy matching.
When a user asks a question, it first checks the database for a verbatim answer, and emits that if it finds one.
If there's no exact match:
- the LLM parses the question into one or more topics ("tell me about alice and bob" becomes `["alice", "bob"]`)
- the topics are vector encoded and queried against the encodings in the factoid database
- the nearest match for each topic is sent back to the LLM, which then is asked to phrase it back to the user
# Why?
Huh?
# How do I use it?
You need CUDA working in Docker, or to edit the docker compose files to take that stuff out and rely on CPU.
At present the LLM seems to require about 2GB of GPU RAM, which is really small as LLMs go. My PC works harder playing Balatro.
# What are its limitations?
It doesn't have heaps of feature parity with the old perl infobot. The right way to get that might be to hack on the old bot code and use it as the main chat parser for this. I don't have a ton of desire to sit down and code my own implementation of the entire thing.
Some important stuff we're missing right now:
- Creating new factoids
- Understanding when questions are being asked of the bot so it doesn't just respond to every single thing that's said
- Botsnacks.
And some non-infobot stuff we could use:
- A better prompt for the LLM to integrate multiple factoids into a single response
- Some security precautions against prompt injection etc. -- at the moment it's just "trust only those on the allowlist"
## Initialize the database
This took around an hour to do 300k factoids and a fair amount of compute/GPU power. There's no consistency or duplicate checking at the moment so you're best off trashing the postgres data dir first
- dump the factoid database into "is.txt" and "are.txt" and put them in scripts/
(tab separated lines: "topic\tresponse")
- make import_factoids
## Prepare a Signal account
Making signal-cli work can be fairly involved. Check the wiki at the signal-cli repo for details on how, but run this command to get a shell into the signal-cli container:
- make signalbash
## Create an env file
- Copy `env.in` to `env` and edit its contents to what you need.
## Start the server
- make

9
app/Dockerfile Normal file
View file

@ -0,0 +1,9 @@
FROM python:3.11-slim
WORKDIR /app
COPY . /app
RUN pip install --no-cache-dir -r requirements.txt
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

244
app/main.py Normal file
View file

@ -0,0 +1,244 @@
import os
import asyncpg
import numpy as np
from fastapi import FastAPI, Request
from sentence_transformers import SentenceTransformer
import httpx
import random
import itertools
DB_CONFIG = {
"host": os.getenv("DB_HOST"),
"port": int(os.getenv("DB_PORT", "5432")),
"user": os.getenv("DB_USER"),
"password": os.getenv("DB_PASSWORD"),
"database": os.getenv("DB_NAME"),
}
LLM_API_URL = os.getenv("LLM_API_URL", "http://llm:80")
app = FastAPI()
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
@app.on_event("startup")
async def startup():
app.state.db = await asyncpg.create_pool(**DB_CONFIG)
@app.on_event("shutdown")
async def shutdown():
await app.state.db.close()
async def extract_triggers(user_query: str) -> list[str]:
"""Ask the LLM to extract factoid-like terms from a user query."""
prompt = f"""<|im_start|>system
You are a factoid trigger extractor. Extract a list of keywords or short phrases that might match entries in an infobot-style knowledge base.
Respond only with a comma-separated list of triggers. Do not answer the question or correct spelling or grammar.
<|im_end|>
<|im_start|>user
Who is steve?
<|im_end|>
<|im_start|>assistant
steve
<|im_end|>
<|im_start|>user
Tell me about HTTP status codes.
<|im_end|>
<|im_start|>assistant
HTTP status codes
<|im_end|>
<|im_start|>user
Who are you and your Gary
<|im_end|>
<|im_start|>assistant
you, Gary
<|im_end|>
<|im_start|>user
{user_query}
<|im_end|>
<|im_start|>assistant
"""
payload = {
"inputs": prompt,
"parameters": {
"temperature": 0.3,
"max_new_tokens": 64,
},
}
print("PAYLOAD:", payload)
async with httpx.AsyncClient() as client:
r = await client.post(
f"{LLM_API_URL}/generate",
json=payload,
timeout=httpx.Timeout(120.0),
)
if r.status_code != 200:
return []
output = r.json().get("generated_text", "")
print("OUTPUT:", output)
# Try to parse output as a list
triggers = []
for line in output.splitlines():
line = line.strip(" []\"'")
if line:
triggers.extend([t.strip(" \"'") for t in line.split(",") if t.strip()])
return list(set(triggers))
@app.post("/ask")
async def ask(request: Request):
body = await request.json()
query = body.get("query", "").strip()
if not query:
return {"error": "Missing query"}
responses = []
# try to extract the query verbatim
async with app.state.db.acquire() as conn:
rows = await conn.fetchrow(
"""
SELECT trigger, response, copula
FROM factoids
WHERE trigger = $1
""",
query.strip()
)
if rows:
rows = [ {'trigger': x[0], 'response': x[1], 'copula': x[2]} for x in itertools.combinations(rows, 3) ]
options = []
for row in rows:
for option in row["response"].split("|"):
options.append(row | {"response": option.strip()})
row = random.choice(options)
print("CHOICE:", row)
triggers = [query.strip()]
choices = [row['response']]
# Use <reply> if present, otherwise prepend the trigger
for c in choices:
if c.startswith("<reply>"):
responses.append(c[len("<reply>"):].strip())
else:
responses.append(f"{row['trigger']} {row['copula']} {c}")
else:
triggers = await extract_triggers(query)
if not triggers:
return {"response": "I don't know that one."}
print("triggers: ", triggers)
for trigger in triggers:
row = await conn.fetchrow(
"""
SELECT trigger, response, copula
FROM factoids
WHERE trigger = $1
LIMIT 1
""",
trigger
)
if not row:
embedding = model.encode(trigger)
embedding_str = f"[{', '.join(map(str, embedding))}]"
row = await conn.fetchrow(
"""
SELECT trigger, response, copula
FROM factoids
ORDER BY embedding <-> $1::vector
LIMIT 1
""",
embedding_str
)
if row:
print("ROW:", row)
response = row['response']
# Pick a random option from pipe-separated responses
options = [r.strip() for r in response.split('|') if r.strip()]
if options:
k = 1
choices = random.choices(options,k=k)
else:
choices = [response]
# Use <reply> if present, otherwise prepend the trigger
for c in choices:
if c.startswith("<reply>"):
responses.append(c[len("<reply>"):].strip())
else:
responses.append(f"{row['trigger']} {row['copula']} {c}")
if not responses:
return {"response": "I don't know any of those."}
# Ask LLM to summarize
responses_str = "\nValue: ".join(responses)
#
summary_prompt = (
f"""<|im_start|>system
You are a summarizer for a fact-based chatbot. Your task is to condense database entries into short, accurate one-line summaries. Do not speculate, define, or add new facts. Do not correct spelling or phrasing from the facts or triggers. Do not mix context from prior triggers.
<|im_end|>
<|im_start|>user
Summarize the following database entry.
Trigger: Paris
Value: Paris is the capital of France
Value: Paris is located in the north-central part of the country.
<|im_end|>
<|im_start|>assistant
Paris is the capital of France and located in the north-central part of the country.
<|im_end|>
<|im_start|>user
Summarize the following database entry.
Trigger: squinky, spacehobo
Value: spacehobo is a Citizen.
Value: squinky is kinky
<|im_end|>
<|im_start|>assistant
spacehobo is a Citizen and squinky is kinky
<|im_end|>
<|im_start|>user
Summarize the following database entry.
Trigger: sky
Value: sky is blue
Value: the sky is the big thing outside when you look up
Value: Look!
<|im_end|>
<|im_start|>assistant
Look up at that big blue thing outside!
<|im_end|>
<|im_start|>user
Summarize the following database entry.
Trigger: {query}
Value: {responses_str}
<|im_end|>"""
)
print("SUMMARY PAYLOAD:", summary_prompt)
async with httpx.AsyncClient() as client:
r = await client.post(
f"{LLM_API_URL}/generate",
json={"inputs": summary_prompt, "parameters": {"temperature": 0.8, "max_new_tokens": 200}},
timeout=httpx.Timeout(120.0),
)
final_response = r.json().get("generated_text", "\n".join(responses))
print("FINAL RESPONSE:", final_response)
return {"reply": final_response, "matches": responses, "triggers": triggers}

6
app/requirements.txt Normal file
View file

@ -0,0 +1,6 @@
fastapi
uvicorn[standard]
asyncpg
sentence-transformers
httpx

View file

@ -0,0 +1,42 @@
services:
init-db:
build:
context: .
dockerfile: initdb.Dockerfile
container_name: infobot-init
depends_on:
db:
condition: service_healthy
volumes:
- ./scripts:/scripts:ro
- ./data/hf_cache:/root/.cache/huggingface
environment:
DB_HOST: db
DB_PORT: 5432
DB_USER: infobot
DB_PASSWORD: infobot
DB_NAME: infobot
entrypoint: ["python", "/scripts/init_and_load.py"]
devices:
- /dev/nvidia-uvm
- /dev/nvidia-uvm-tools
- /dev/nvidia-modeset
- /dev/nvidiactl
- /dev/nvidia0
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: ["gpu"]
networks: ["dumant"]
networks:
dumant:
name: dumant
external: true

105
docker-compose.yaml Normal file
View file

@ -0,0 +1,105 @@
services:
db:
image: ankane/pgvector
container_name: infobot-db
restart: unless-stopped
ports:
- "5432:5432"
environment:
POSTGRES_DB: infobot
POSTGRES_USER: infobot
POSTGRES_PASSWORD: infobot
volumes:
- ./data/postgres:/var/lib/postgresql/data
healthcheck:
test: ["CMD", "pg_isready", "-U", "infobot"]
interval: 10s
timeout: 5s
retries: 5
networks: ["dumant"]
llm:
image: ghcr.io/huggingface/text-generation-inference:1.4
container_name: infobot-llm
restart: unless-stopped
ports:
- "8080:80"
environment:
HF_HUB_ENABLE_HF_TRANSFER: 0
# PYTORCH_CUDA_ALLOC_CONF: max_split_size_mb:64
MODEL_ID: ${MODEL_ID}
volumes:
- ./data/models:/data
devices:
- /dev/nvidia-uvm
- /dev/nvidia-uvm-tools
- /dev/nvidia-modeset
- /dev/nvidiactl
- /dev/nvidia0
command:
- --max-total-tokens=1024
- --max-batch-prefill-tokens=256
- --max-input-length=256
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: ["gpu"]
networks: ["dumant"]
app:
build: ./app
container_name: infobot-app
restart: unless-stopped
depends_on:
db:
condition: service_healthy
llm:
condition: service_started
ports:
- "8000:8000"
volumes:
- ./app:/app
environment:
DB_HOST: db
DB_PORT: 5432
DB_USER: infobot
DB_PASSWORD: infobot
DB_NAME: infobot
LLM_API_URL: http://llm:80
networks: ["dumant"]
signal-app:
build: ./signal-app
container_name: infobot-signal-app
restart: unless-stopped
volumes:
- ./signal-app:/app
networks: ["dumant"]
signal-cli:
#image: dogukanakkaya/signal-cli
image: registry.gitlab.com/packaging/signal-cli/signal-cli-native:latest
container_name: infobot-signal-cli
restart: unless-stopped
volumes:
- ./data/signal-cli:/var/lib/signal-cli
ports:
- "7583:7583"
tmpfs:
- "/tmp:exec"
command: -a ${SIGNAL_ID} daemon --tcp=0.0.0.0:7583 --receive-mode=on-start
networks: ["dumant"]
volumes:
postgres:
models:
networks:
dumant:
name: dumant

14
env.in Normal file
View file

@ -0,0 +1,14 @@
# Set this to the valid phone number associated with your Signal account
SIGNAL_ID=+12345678901
# Accept DMS from this list of users only (space-separated)
SIGNAL_USER_ALLOWLIST="+12345678901 +12345678901 +12345678901 +12345678901 +12345678901"
# Accept messages from anyone in these groups
SIGNAL_GROUP_ALLOWLIST="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg1234567890= ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg1234567890="
# Use this LLM.
# Visit https://huggingface.co/models?pipeline_tag=text-generation&sort=trending and try to use an "instruct" model
# The one listed here by default is a rather small one but adequate for now
MODEL_ID="Gensyn/Qwen2.5-0.5B-Instruct"

7
initdb.Dockerfile Normal file
View file

@ -0,0 +1,7 @@
FROM python:3
#FROM python:3.11-slim
RUN pip install --no-cache-dir \
psycopg2 \
sentence_transformers \
&& /bin/true

68
scripts/init_and_load.py Normal file
View file

@ -0,0 +1,68 @@
import os
import psycopg2
import csv
from sentence_transformers import SentenceTransformer
DB_HOST = os.environ["DB_HOST"]
DB_PORT = os.environ["DB_PORT"]
DB_USER = os.environ["DB_USER"]
DB_PASSWORD = os.environ["DB_PASSWORD"]
DB_NAME = os.environ["DB_NAME"]
TSV_IS = "/scripts/is.txt"
TSV_ARE = "/scripts/are.txt"
# Connect to DB
conn = psycopg2.connect(
host=DB_HOST,
port=DB_PORT,
dbname=DB_NAME,
user=DB_USER,
password=DB_PASSWORD
)
conn.autocommit = True
cur = conn.cursor()
# Ensure pgvector extension and table exist
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
cur.execute("""
CREATE TABLE IF NOT EXISTS factoids (
id SERIAL PRIMARY KEY,
trigger TEXT NOT NULL,
copula TEXT NOT NULL DEFAULT 'is',
response TEXT NOT NULL,
embedding VECTOR(384)
);
""")
# Load model
print("Loading embedding model...")
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# Process TSVs one line at a time
for copula, TSV_PATH in ( ("is", TSV_IS), ("are", TSV_ARE) ):
print(f"Loading and inserting from {TSV_PATH}...")
with open(TSV_PATH, "r", encoding="utf-8", errors="replace") as f:
reader = csv.reader(f, delimiter="\t")
count = 0
for row in reader:
if len(row) == 2:
if count % 100 == 0:
print(count, row[0], copula, row[1])
count += 1
if len(row) != 2:
continue
trigger, response = row[0].strip(), row[1].strip()
if not trigger or not response:
continue
embedding = model.encode(trigger).tolist()
cur.execute(
"INSERT INTO factoids (trigger, copula, response, embedding) VALUES (%s, %s, %s, %s)",
(trigger, copula, response, embedding)
)
print("All factoids loaded.")
cur.close()
conn.close()

9
signal-app/Dockerfile Normal file
View file

@ -0,0 +1,9 @@
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt /tmp
RUN pip install --no-cache-dir -r /tmp/requirements.txt
CMD [ "python", "-u", "/app/main.py" ]

102
signal-app/main.py Normal file
View file

@ -0,0 +1,102 @@
import socket
import json
import time
import requests
import os
SIGNAL_USER_ALLOWLIST = " ".split(os.getenv("SIGNAL_USER_ALLOWLIST"))
SIGNAL_GROUP_ALLOWLIST = " ".split(os.getenv("SIGNAL_GROUP_ALLOWLIST"))
LLM_API_URL = os.getenv("LLM_API_URL", "http://llm:80")
#SIGNAL_API = "http://signal-cli:7583"
BOT_API="http://app:8000/ask"
def send_json_rpc(method, params=None, request_id=1, host="signal-cli", port=7583):
"""Send a JSON-RPC 2.0 request over TCP to signal-cli."""
request = {
"jsonrpc": "2.0",
"id": request_id,
"method": method,
"params": params or {}
}
request_str = json.dumps(request) + "\n"
with socket.create_connection((host, port)) as sock:
sock.sendall(request_str.encode("utf-8"))
response = recv_all(sock)
return json.loads(response)
def recv_all(sock):
"""Read until newline (signal-cli terminates each JSON-RPC response with \\n)."""
buffer = b""
while not buffer.endswith(b"\n"):
data = sock.recv(4096)
if not data:
break
buffer += data
return buffer.decode("utf-8")
def process_message(msg):
with requests.Session() as session:
result = session.post(BOT_API, json={"query": str(msg)})
j = result.json()
return j["reply"]
def listen_for_notifications(host="signal-cli", port=7583):
with socket.create_connection((host, port)) as sock:
print("Connected to signal-cli JSON-RPC")
buffer = b""
while True:
chunk = sock.recv(4096)
if not chunk:
break
buffer += chunk
while b"\n" in buffer:
line, buffer = buffer.split(b"\n", 1)
try:
msg = json.loads(line)
print(msg)
if "method" in msg:
if msg["method"] == "receive":
envelope = msg["params"]["envelope"]
source = envelope["source"]
if "dataMessage" in envelope:
msg = envelope["dataMessage"]["message"] # there are non-message messages, like read receipts
if "groupInfo" in envelope["dataMessage"]:
group_id = envelope["dataMessage"]["groupInfo"]["groupId"]
if group_id not in SIGNAL_GROUP_ALLOWLIST:
print(f"GROUP RECV DENIED ({source}): {envelope}")
break
print(f"GROUP ({group_id}/{source}): {msg}")
params = { "recipient": group_id, "groupId": group_id }
else:
if source not in SIGNAL_USER_ALLOWLIST:
print(f"RECV DENIED ({source}): {envelope}")
break
print(f"RECV ({source}): {msg}")
params = { "recipient": source }
params["message"] = process_message(msg)
print("PARAMS:",params)
result = send_json_rpc(
method="send",
params=params
)
except json.JSONDecodeError:
print("Invalid JSON:", line)
# Example usage:
if __name__ == "__main__":
# result = send_json_rpc(
# method="listIdentities",
# params={},
# )
# print("Response:", json.dumps(result, indent=2))
listen_for_notifications()

View file

@ -0,0 +1,2 @@
requests
jsonrpclib