-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgoshdarnit.go
More file actions
166 lines (136 loc) · 4.21 KB
/
Copy pathgoshdarnit.go
File metadata and controls
166 lines (136 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
package goshdarnit
// CensorMode controls how profanity is censored.
type CensorMode int
const (
// CensorAll replaces all characters with asterisks.
CensorAll CensorMode = iota
// CensorKeepFirst keeps the first character visible.
CensorKeepFirst
// CensorKeepFirstLast keeps the first and last characters visible.
CensorKeepFirstLast
// TODO: add CensorKeepLastLast? Mixed Symbols?
)
// profanityMatcher is the global Aho-Corasick automaton, initialized once at package load.
var profanityMatcher *ahoCorasick
var collapsedToOriginal map[string]string
func init() {
collapsedPatterns := make([]string, 0, len(profanityList))
collapsedToOriginal = make(map[string]string, len(profanityList))
seen := make(map[string]struct{})
for _, pattern := range profanityList {
collapsed, _ := collapseRepeats(pattern)
// Avoid duplicates (e.g., "ass" and "as" might both become "as")
if _, exists := seen[collapsed]; !exists {
seen[collapsed] = struct{}{}
collapsedPatterns = append(collapsedPatterns, collapsed)
collapsedToOriginal[collapsed] = pattern
}
}
profanityMatcher = newAhoCorasick(collapsedPatterns)
}
// IsProfane returns true if the text contains any profanity.
// The function handles various evasion techniques and uses word boundary
// detection to avoid false positives.
func IsProfane(text string) bool {
if len(text) == 0 {
return false
}
matches := findMatches(text, profanityMatcher)
return len(matches) > 0
}
// Censor replaces profanity in the text with asterisks.
// The mode parameter controls which characters to reveal:
// - CensorAll: replaces all characters with asterisks
// - CensorKeepFirst: keeps the first character visible (e.g., "f***")
// - CensorKeepFirstLast: keeps first and last characters visible (e.g., "f**k")
//
// The returned string preserves the original length.
func Censor(text string, mode CensorMode) string {
if len(text) == 0 {
return text
}
matches := findMatches(text, profanityMatcher)
if len(matches) == 0 {
return text
}
// Sort and merge overlapping matches
matches = mergeOverlapping(matches)
// Build the censored string
result := make([]byte, 0, len(text))
lastEnd := 0
for _, m := range matches {
// Add text before this match
if m.origStart > lastEnd {
result = append(result, text[lastEnd:m.origStart]...)
}
// Censor this match
segment := text[m.origStart:m.origEnd]
mask := buildAsteriskMask(segment, int(mode))
result = append(result, mask...)
lastEnd = m.origEnd
}
// Add any remaining text
if lastEnd < len(text) {
result = append(result, text[lastEnd:]...)
}
return string(result)
}
// CensorWithDefault is a convenience function that censors with CensorAll mode.
func CensorWithDefault(text string) string {
return Censor(text, CensorAll)
}
func mergeOverlapping(matches []matchInfo) []matchInfo {
if len(matches) <= 1 {
return matches
}
// Sort by start position (simple insertion sort for typically small slices)
for i := 1; i < len(matches); i++ {
j := i
for j > 0 && matches[j].origStart < matches[j-1].origStart {
matches[j], matches[j-1] = matches[j-1], matches[j]
j--
}
}
// Merge overlapping
result := make([]matchInfo, 0, len(matches))
current := matches[0]
for i := 1; i < len(matches); i++ {
if matches[i].origStart <= current.origEnd {
// Overlapping, extend current
if matches[i].origEnd > current.origEnd {
current.origEnd = matches[i].origEnd
}
} else {
// Not overlapping, save current and start new
result = append(result, current)
current = matches[i]
}
}
result = append(result, current)
return result
}
// FindProfanity returns a slice of profane words discovered in the txet.
// Returns nil if no profanity is found.
func FindProfanity(text string) []string {
if len(text) == 0 {
return nil
}
matches := findMatches(text, profanityMatcher)
if len(matches) == 0 {
return nil
}
seen := make(map[string]struct{})
var result []string
for _, m := range matches {
// Map collapsed pattern back to original
original := m.pattern
if orig, exists := collapsedToOriginal[m.pattern]; exists {
original = orig
}
if _, exists := seen[original]; !exists {
seen[original] = struct{}{}
result = append(result, original)
}
}
return result
}