mirror of
https://github.com/tmdinosaurcenter/kiosk-guestbook.git
synced 2026-06-04 01:18:12 -06:00
fix: improve profanity filter to catch spacing and embedding bypasses
Add a secondary normalized substring check: strips all non-alpha chars then checks if any banned word appears as a substring. This catches: - Spacing tricks: 'f u c k' - Embedded forms: 'fucking' Note: substring matching can produce false positives (e.g. 'classic' contains 'ass'). Trade-off accepted for a museum kiosk context.
This commit is contained in:
@@ -5,6 +5,7 @@ from email_validator import validate_email, EmailNotValidError
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
# Set up logging
|
# Set up logging
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
@@ -36,11 +37,17 @@ def load_banned_words():
|
|||||||
BANNED_WORDS = load_banned_words()
|
BANNED_WORDS = load_banned_words()
|
||||||
|
|
||||||
def contains_banned_words(text):
|
def contains_banned_words(text):
|
||||||
# TODO: This filter is easily bypassed (spacing, leet-speak, numbers). Consider a more robust NLP-based approach.
|
lower = text.lower()
|
||||||
words = text.lower().split()
|
# Whole-word check (punctuation-stripped) — catches exact matches
|
||||||
for word in words:
|
for word in lower.split():
|
||||||
word_clean = word.strip(".,!?;:\"'")
|
if word.strip(".,!?;:\"'") in BANNED_WORDS:
|
||||||
if word_clean in BANNED_WORDS:
|
return True
|
||||||
|
# Normalized substring check — catches spacing tricks (f u c k) and
|
||||||
|
# embedded forms (fucking). Note: may produce false positives on words
|
||||||
|
# that contain a banned word as a substring (e.g. "classic" → "ass").
|
||||||
|
normalized = re.sub(r'[^a-z]', '', lower)
|
||||||
|
for banned in BANNED_WORDS:
|
||||||
|
if banned in normalized:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user