forked from timescale/pg_textsearch
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpg_textsearch--0.5.0.sql
More file actions
204 lines (166 loc) · 6.29 KB
/
Copy pathpg_textsearch--0.5.0.sql
File metadata and controls
204 lines (166 loc) · 6.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
-- pg_textsearch extension version 0.5.0
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION pg_textsearch" to load this file. \quit
-- Access method
CREATE FUNCTION tp_handler(internal)
RETURNS index_am_handler
AS 'MODULE_PATHNAME', 'tp_handler'
LANGUAGE C;
CREATE ACCESS METHOD bm25 TYPE INDEX HANDLER tp_handler;
-- bm25vector type
CREATE FUNCTION bm25vector_in(cstring)
RETURNS bm25vector
AS 'MODULE_PATHNAME', 'tpvector_in'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION bm25vector_out(bm25vector)
RETURNS cstring
AS 'MODULE_PATHNAME', 'tpvector_out'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION bm25vector_recv(internal)
RETURNS bm25vector
AS 'MODULE_PATHNAME', 'tpvector_recv'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION bm25vector_send(bm25vector)
RETURNS bytea
AS 'MODULE_PATHNAME', 'tpvector_send'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE TYPE bm25vector (
INPUT = bm25vector_in,
OUTPUT = bm25vector_out,
RECEIVE = bm25vector_recv,
SEND = bm25vector_send,
STORAGE = extended,
ALIGNMENT = int4
);
-- bm25query type
CREATE FUNCTION bm25query_in(cstring)
RETURNS bm25query
AS 'MODULE_PATHNAME', 'tpquery_in'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION bm25query_out(bm25query)
RETURNS cstring
AS 'MODULE_PATHNAME', 'tpquery_out'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION bm25query_recv(internal)
RETURNS bm25query
AS 'MODULE_PATHNAME', 'tpquery_recv'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION bm25query_send(bm25query)
RETURNS bytea
AS 'MODULE_PATHNAME', 'tpquery_send'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE TYPE bm25query (
INPUT = bm25query_in,
OUTPUT = bm25query_out,
RECEIVE = bm25query_recv,
SEND = bm25query_send,
STORAGE = extended,
ALIGNMENT = int4
);
-- Convert text to bm25query
CREATE FUNCTION to_bm25query(input_text text)
RETURNS bm25query
AS 'MODULE_PATHNAME', 'to_tpquery_text'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION to_bm25query(input_text text, index_name text)
RETURNS bm25query
AS 'MODULE_PATHNAME', 'to_tpquery_text_index'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
-- Equality function: bm25vector = bm25vector → boolean
CREATE FUNCTION bm25vector_eq(bm25vector, bm25vector)
RETURNS boolean
AS 'MODULE_PATHNAME', 'tpvector_eq'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
-- Create the = operator for equality
CREATE OPERATOR = (
LEFTARG = bm25vector,
RIGHTARG = bm25vector,
FUNCTION = bm25vector_eq,
COMMUTATOR = =,
HASHES
);
-- BM25 scoring function for text <@> bm25query operations
--
-- COST 1000: Standalone scoring is expensive. Each call parses document text
-- with to_tsvector (~14μs per doc), opens the index, looks up IDF values, and
-- calculates BM25 scores. High cost helps planner prefer index scans.
CREATE FUNCTION bm25_text_bm25query_score(left_text text, right_query bm25query)
RETURNS float8
AS 'MODULE_PATHNAME', 'bm25_text_bm25query_score'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE COST 1000;
-- bm25query equality function
CREATE FUNCTION bm25query_eq(bm25query, bm25query)
RETURNS boolean
AS 'MODULE_PATHNAME', 'tpquery_eq'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
-- <@> operator for text <@> bm25query operations
CREATE OPERATOR <@> (
LEFTARG = text,
RIGHTARG = bm25query,
PROCEDURE = bm25_text_bm25query_score
);
-- Function for text <@> text operator (planner hook rewrites to text <@> bm25query)
-- COST 1000: High cost makes planner prefer index scans over seq scan + sort.
-- In practice, this function errors without index scan context, but the cost
-- helps the planner choose the right path before execution.
CREATE FUNCTION bm25_text_text_score(text, text) RETURNS float8
AS 'MODULE_PATHNAME', 'bm25_text_text_score'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE COST 1000;
-- Stub function returning cached score from index scan.
-- The planner hook replaces resjunk ORDER BY expressions with calls to this
-- function, avoiding expensive re-computation of BM25 scores.
CREATE FUNCTION bm25_get_current_score() RETURNS float8
AS 'MODULE_PATHNAME', 'bm25_get_current_score'
LANGUAGE C VOLATILE STRICT PARALLEL SAFE;
-- <@> operator for text <@> text operations (implicit index resolution)
-- The planner hook transforms this to text <@> bm25query when a BM25 index exists
CREATE OPERATOR <@> (
LEFTARG = text,
RIGHTARG = text,
PROCEDURE = bm25_text_text_score
);
-- = operator for bm25query equality
CREATE OPERATOR = (
LEFTARG = bm25query,
RIGHTARG = bm25query,
FUNCTION = bm25query_eq,
COMMUTATOR = =,
HASHES
);
-- bm25 operator class for text columns
-- The planner hook rewrites text <@> text to text <@> bm25query, so we only
-- need to register the bm25query operator and support function here.
CREATE OPERATOR CLASS text_bm25_ops
DEFAULT FOR TYPE text USING bm25 AS
OPERATOR 1 <@> (text, bm25query) FOR ORDER BY float_ops,
FUNCTION 8 (text, bm25query) bm25_text_bm25query_score(text, bm25query);
-- Debug function to dump index contents (memtable and segments)
-- Single argument version returns truncated output as text
CREATE FUNCTION bm25_dump_index(text) RETURNS text
AS 'MODULE_PATHNAME', 'tp_dump_index'
LANGUAGE C STRICT STABLE;
-- Two argument version writes full dump (with hex) to file
CREATE FUNCTION bm25_dump_index(text, text) RETURNS text
AS 'MODULE_PATHNAME', 'tp_dump_index'
LANGUAGE C STRICT STABLE;
-- Display version info
DO $$
BEGIN
RAISE INFO 'pg_textsearch v0.5.0 installed';
END
$$;
-- Function to force segment write (spill memtable to disk)
CREATE FUNCTION bm25_spill_index(index_name text)
RETURNS int4
AS 'MODULE_PATHNAME', 'tp_spill_memtable'
LANGUAGE C VOLATILE STRICT;
-- Fast summary function showing only statistics (no content dump)
CREATE FUNCTION bm25_summarize_index(text) RETURNS text
AS 'MODULE_PATHNAME', 'tp_summarize_index'
LANGUAGE C STRICT STABLE;
-- Page visualization - dumps ANSI-colored page layout to file
-- Shows page types: M=meta R=recovery H=header d=dict p=post s=skip m=docmap i=idx .=empty
-- Colors indicate segment level (cyan=L0, yellow=L1, green=L2, magenta=L3+)
CREATE FUNCTION bm25_debug_pageviz(index_name text, filepath text) RETURNS text
AS 'MODULE_PATHNAME', 'tp_debug_pageviz'
LANGUAGE C STRICT STABLE;