Skip to content

Commit 19346fa

Browse files
feat: Geospatial Data Type and GIS Function support for milvus (milvus-io#44547)
issue: milvus-io#43427 This pr's main goal is merge milvus-io#37417 to milvus 2.5 without conflicts. # Main Goals 1. Create and describe collections with geospatial type 2. Insert geospatial data into the insert binlog 3. Load segments containing geospatial data into memory 4. Enable query and search can display geospatial data 5. Support using GIS funtions like ST_EQUALS in query 6. Support R-Tree index for geometry type # Solution 1. **Add Type**: Modify the Milvus core by adding a Geospatial type in both the C++ and Go code layers, defining the Geospatial data structure and the corresponding interfaces. 2. **Dependency Libraries**: Introduce necessary geospatial data processing libraries. In the C++ source code, use Conan package management to include the GDAL library. In the Go source code, add the go-geom library to the go.mod file. 3. **Protocol Interface**: Revise the Milvus protocol to provide mechanisms for Geospatial message serialization and deserialization. 4. **Data Pipeline**: Facilitate interaction between the client and proxy using the WKT format for geospatial data. The proxy will convert all data into WKB format for downstream processing, providing column data interfaces, segment encapsulation, segment loading, payload writing, and cache block management. 5. **Query Operators**: Implement simple display and support for filter queries. Initially, focus on filtering based on spatial relationships for a single column of geospatial literal values, providing parsing and execution for query expressions.Now only support brutal search 7. **Client Modification**: Enable the client to handle user input for geospatial data and facilitate end-to-end testing.Check the modification in pymilvus. --------- Signed-off-by: Yinwei Li <yinwei.li@zilliz.com> Signed-off-by: Cai Zhang <cai.zhang@zilliz.com> Co-authored-by: ZhuXi <150327960+Yinwei-Yu@users.noreply.github.com>
1 parent 2bd377a commit 19346fa

136 files changed

Lines changed: 11218 additions & 1320 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

client/column/columns.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,9 @@ func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) {
212212
case schemapb.DataType_JSON:
213213
return parseScalarData(fd.GetFieldName(), fd.GetScalars().GetJsonData().GetData(), begin, end, validData, NewColumnJSONBytes, NewNullableColumnJSONBytes)
214214

215+
case schemapb.DataType_Geometry:
216+
return parseScalarData(fd.GetFieldName(), fd.GetScalars().GetGeometryWktData().GetData(), begin, end, validData, NewColumnGeometryWKT, NewNullableColumnGeometryWKT)
217+
215218
case schemapb.DataType_FloatVector:
216219
vectors := fd.GetVectors()
217220
x, ok := vectors.GetData().(*schemapb.VectorField_FloatVector)

client/column/conversion.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ func values2FieldData[T any](values []T, fieldType entity.FieldType, dim int) *s
117117
entity.FieldTypeInt64,
118118
entity.FieldTypeVarChar,
119119
entity.FieldTypeString,
120-
entity.FieldTypeJSON:
120+
entity.FieldTypeJSON,
121+
entity.FieldTypeGeometry:
121122
fd.Field = &schemapb.FieldData_Scalars{
122123
Scalars: values2Scalars(values, fieldType), // scalars,
123124
}
@@ -199,6 +200,12 @@ func values2Scalars[T any](values []T, fieldType entity.FieldType) *schemapb.Sca
199200
Data: data,
200201
},
201202
}
203+
case entity.FieldTypeGeometry:
204+
var strVals []string
205+
strVals, ok = any(values).([]string)
206+
scalars.Data = &schemapb.ScalarField_GeometryWktData{
207+
GeometryWktData: &schemapb.GeometryWktArray{Data: strVals},
208+
}
202209
}
203210
// shall not be accessed
204211
if !ok {

client/column/geometry.go

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
package column
2+
3+
import (
4+
"github.com/cockroachdb/errors"
5+
6+
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
7+
"github.com/milvus-io/milvus/client/v2/entity"
8+
)
9+
10+
type ColumnGeometryWKT struct {
11+
*genericColumnBase[string]
12+
}
13+
14+
// Name returns column name.
15+
func (c *ColumnGeometryWKT) Name() string {
16+
return c.name
17+
}
18+
19+
// Type returns column entity.FieldType.
20+
func (c *ColumnGeometryWKT) Type() entity.FieldType {
21+
return entity.FieldTypeGeometry
22+
}
23+
24+
// Len returns column values length.
25+
func (c *ColumnGeometryWKT) Len() int {
26+
return len(c.values)
27+
}
28+
29+
func (c *ColumnGeometryWKT) Slice(start, end int) Column {
30+
l := c.Len()
31+
if start > l {
32+
start = l
33+
}
34+
if end == -1 || end > l {
35+
end = l
36+
}
37+
return &ColumnGeometryWKT{
38+
genericColumnBase: c.genericColumnBase.slice(start, end),
39+
}
40+
}
41+
42+
// Get returns value at index as interface{}.
43+
func (c *ColumnGeometryWKT) Get(idx int) (interface{}, error) {
44+
if idx < 0 || idx >= c.Len() {
45+
return nil, errors.New("index out of range")
46+
}
47+
return c.values[idx], nil
48+
}
49+
50+
func (c *ColumnGeometryWKT) GetAsString(idx int) (string, error) {
51+
return c.ValueByIdx(idx)
52+
}
53+
54+
// FieldData return column data mapped to schemapb.FieldData.
55+
func (c *ColumnGeometryWKT) FieldData() *schemapb.FieldData {
56+
fd := c.genericColumnBase.FieldData()
57+
return fd
58+
}
59+
60+
// ValueByIdx returns value of the provided index.
61+
func (c *ColumnGeometryWKT) ValueByIdx(idx int) (string, error) {
62+
if idx < 0 || idx >= c.Len() {
63+
return "", errors.New("index out of range")
64+
}
65+
return c.values[idx], nil
66+
}
67+
68+
// AppendValue append value into column.
69+
func (c *ColumnGeometryWKT) AppendValue(i interface{}) error {
70+
s, ok := i.(string)
71+
if !ok {
72+
return errors.New("expect geometry WKT type(string)")
73+
}
74+
c.values = append(c.values, s)
75+
return nil
76+
}
77+
78+
// Data returns column data.
79+
func (c *ColumnGeometryWKT) Data() []string {
80+
return c.values
81+
}
82+
83+
func NewColumnGeometryWKT(name string, values []string) *ColumnGeometryWKT {
84+
return &ColumnGeometryWKT{
85+
genericColumnBase: &genericColumnBase[string]{
86+
name: name,
87+
fieldType: entity.FieldTypeGeometry,
88+
values: values,
89+
},
90+
}
91+
}

client/column/geometry_test.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package column
2+
3+
import (
4+
"fmt"
5+
"math/rand"
6+
"testing"
7+
"time"
8+
9+
"github.com/stretchr/testify/suite"
10+
11+
"github.com/milvus-io/milvus/client/v2/entity"
12+
)
13+
14+
type ColumnGeometryWKTSuite struct {
15+
suite.Suite
16+
}
17+
18+
func (s *ColumnGeometryWKTSuite) SetupSuite() {
19+
rand.Seed(time.Now().UnixNano())
20+
}
21+
22+
func (s *ColumnGeometryWKTSuite) TestAttrMethods() {
23+
columnName := fmt.Sprintf("column_Geometrywkt_%d", rand.Int())
24+
columnLen := 8 + rand.Intn(10)
25+
26+
v := make([]string, columnLen)
27+
column := NewColumnGeometryWKT(columnName, v)
28+
29+
s.Run("test_meta", func() {
30+
ft := entity.FieldTypeGeometry
31+
s.Equal("Geometry", ft.Name())
32+
s.Equal("Geometry", ft.String())
33+
pbName, pbType := ft.PbFieldType()
34+
s.Equal("Geometry", pbName)
35+
s.Equal("Geometry", pbType)
36+
})
37+
38+
s.Run("test_column_attribute", func() {
39+
s.Equal(columnName, column.Name())
40+
s.Equal(entity.FieldTypeGeometry, column.Type())
41+
s.Equal(columnLen, column.Len())
42+
s.EqualValues(v, column.Data())
43+
})
44+
45+
s.Run("test_column_field_data", func() {
46+
fd := column.FieldData()
47+
s.NotNil(fd)
48+
s.Equal(fd.GetFieldName(), columnName)
49+
})
50+
51+
s.Run("test_column_valuer_by_idx", func() {
52+
_, err := column.ValueByIdx(-1)
53+
s.Error(err)
54+
_, err = column.ValueByIdx(columnLen)
55+
s.Error(err)
56+
for i := 0; i < columnLen; i++ {
57+
v, err := column.ValueByIdx(i)
58+
s.NoError(err)
59+
s.Equal(column.values[i], v)
60+
}
61+
})
62+
63+
s.Run("test_append_value", func() {
64+
item := "POINT (30.123 -10.456)"
65+
err := column.AppendValue(item)
66+
s.NoError(err)
67+
s.Equal(columnLen+1, column.Len())
68+
val, err := column.ValueByIdx(columnLen)
69+
s.NoError(err)
70+
s.Equal(item, val)
71+
})
72+
}
73+
74+
func TestColumnGeometryWKT(t *testing.T) {
75+
suite.Run(t, new(ColumnGeometryWKTSuite))
76+
}

client/column/nullable.go

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,18 @@ package column
1818

1919
var (
2020
// scalars
21-
NewNullableColumnBool NullableColumnCreateFunc[bool, *ColumnBool] = NewNullableColumnCreator(NewColumnBool).New
22-
NewNullableColumnInt8 NullableColumnCreateFunc[int8, *ColumnInt8] = NewNullableColumnCreator(NewColumnInt8).New
23-
NewNullableColumnInt16 NullableColumnCreateFunc[int16, *ColumnInt16] = NewNullableColumnCreator(NewColumnInt16).New
24-
NewNullableColumnInt32 NullableColumnCreateFunc[int32, *ColumnInt32] = NewNullableColumnCreator(NewColumnInt32).New
25-
NewNullableColumnInt64 NullableColumnCreateFunc[int64, *ColumnInt64] = NewNullableColumnCreator(NewColumnInt64).New
26-
NewNullableColumnVarChar NullableColumnCreateFunc[string, *ColumnVarChar] = NewNullableColumnCreator(NewColumnVarChar).New
27-
NewNullableColumnString NullableColumnCreateFunc[string, *ColumnString] = NewNullableColumnCreator(NewColumnString).New
28-
NewNullableColumnFloat NullableColumnCreateFunc[float32, *ColumnFloat] = NewNullableColumnCreator(NewColumnFloat).New
29-
NewNullableColumnDouble NullableColumnCreateFunc[float64, *ColumnDouble] = NewNullableColumnCreator(NewColumnDouble).New
30-
NewNullableColumnTimestamptz NullableColumnCreateFunc[int64, *ColumnTimestamptz] = NewNullableColumnCreator(NewColumnTimestamptz).New
31-
NewNullableColumnJSONBytes NullableColumnCreateFunc[[]byte, *ColumnJSONBytes] = NewNullableColumnCreator(NewColumnJSONBytes).New
21+
NewNullableColumnBool NullableColumnCreateFunc[bool, *ColumnBool] = NewNullableColumnCreator(NewColumnBool).New
22+
NewNullableColumnInt8 NullableColumnCreateFunc[int8, *ColumnInt8] = NewNullableColumnCreator(NewColumnInt8).New
23+
NewNullableColumnInt16 NullableColumnCreateFunc[int16, *ColumnInt16] = NewNullableColumnCreator(NewColumnInt16).New
24+
NewNullableColumnInt32 NullableColumnCreateFunc[int32, *ColumnInt32] = NewNullableColumnCreator(NewColumnInt32).New
25+
NewNullableColumnInt64 NullableColumnCreateFunc[int64, *ColumnInt64] = NewNullableColumnCreator(NewColumnInt64).New
26+
NewNullableColumnVarChar NullableColumnCreateFunc[string, *ColumnVarChar] = NewNullableColumnCreator(NewColumnVarChar).New
27+
NewNullableColumnString NullableColumnCreateFunc[string, *ColumnString] = NewNullableColumnCreator(NewColumnString).New
28+
NewNullableColumnFloat NullableColumnCreateFunc[float32, *ColumnFloat] = NewNullableColumnCreator(NewColumnFloat).New
29+
NewNullableColumnDouble NullableColumnCreateFunc[float64, *ColumnDouble] = NewNullableColumnCreator(NewColumnDouble).New
30+
NewNullableColumnTimestamptz NullableColumnCreateFunc[int64, *ColumnTimestamptz] = NewNullableColumnCreator(NewColumnTimestamptz).New
31+
NewNullableColumnJSONBytes NullableColumnCreateFunc[[]byte, *ColumnJSONBytes] = NewNullableColumnCreator(NewColumnJSONBytes).New
32+
NewNullableColumnGeometryWKT NullableColumnCreateFunc[string, *ColumnGeometryWKT] = NewNullableColumnCreator(NewColumnGeometryWKT).New
3233
// array
3334
NewNullableColumnBoolArray NullableColumnCreateFunc[[]bool, *ColumnBoolArray] = NewNullableColumnCreator(NewColumnBoolArray).New
3435
NewNullableColumnInt8Array NullableColumnCreateFunc[[]int8, *ColumnInt8Array] = NewNullableColumnCreator(NewColumnInt8Array).New

client/entity/field.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ func (t FieldType) Name() string {
5656
return "Array"
5757
case FieldTypeJSON:
5858
return "JSON"
59+
case FieldTypeGeometry:
60+
return "Geometry"
5961
case FieldTypeBinaryVector:
6062
return "BinaryVector"
6163
case FieldTypeFloatVector:
@@ -98,6 +100,8 @@ func (t FieldType) String() string {
98100
return "Array"
99101
case FieldTypeJSON:
100102
return "JSON"
103+
case FieldTypeGeometry:
104+
return "Geometry"
101105
case FieldTypeBinaryVector:
102106
return "[]byte"
103107
case FieldTypeFloatVector:
@@ -138,6 +142,8 @@ func (t FieldType) PbFieldType() (string, string) {
138142
return "VarChar", "string"
139143
case FieldTypeJSON:
140144
return "JSON", "JSON"
145+
case FieldTypeGeometry:
146+
return "Geometry", "Geometry"
141147
case FieldTypeBinaryVector:
142148
return "[]byte", ""
143149
case FieldTypeFloatVector:
@@ -181,6 +187,8 @@ const (
181187
FieldTypeArray FieldType = 22
182188
// FieldTypeJSON field type JSON
183189
FieldTypeJSON FieldType = 23
190+
// FieldTypeGeometry field type Geometry
191+
FieldTypeGeometry FieldType = 24
184192
// FieldTypeBinaryVector field type binary vector
185193
FieldTypeBinaryVector FieldType = 100
186194
// FieldTypeFloatVector field type float vector

client/index/common.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,5 @@ const (
6767
Sorted IndexType = "STL_SORT"
6868
Inverted IndexType = "INVERTED"
6969
BITMAP IndexType = "BITMAP"
70+
RTREE IndexType = "RTREE"
7071
)

client/index/rtree.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// Licensed to the LF AI & Data foundation under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
package index
18+
19+
var _ Index = rtreeIndex{}
20+
21+
// rtreeIndex represents an RTree index for geometry fields
22+
type rtreeIndex struct {
23+
baseIndex
24+
}
25+
26+
func (idx rtreeIndex) Params() map[string]string {
27+
params := map[string]string{
28+
IndexTypeKey: string(RTREE),
29+
}
30+
return params
31+
}
32+
33+
// NewRTreeIndex creates a new RTree index with default parameters
34+
func NewRTreeIndex() Index {
35+
return rtreeIndex{
36+
baseIndex: baseIndex{
37+
indexType: RTREE,
38+
},
39+
}
40+
}
41+
42+
// NewRTreeIndexWithParams creates a new RTree index with custom parameters
43+
func NewRTreeIndexWithParams() Index {
44+
return rtreeIndex{
45+
baseIndex: baseIndex{
46+
indexType: RTREE,
47+
},
48+
}
49+
}
50+
51+
// RTreeIndexBuilder provides a fluent API for building RTree indexes
52+
type RTreeIndexBuilder struct {
53+
index rtreeIndex
54+
}
55+
56+
// NewRTreeIndexBuilder creates a new RTree index builder
57+
func NewRTreeIndexBuilder() *RTreeIndexBuilder {
58+
return &RTreeIndexBuilder{
59+
index: rtreeIndex{
60+
baseIndex: baseIndex{
61+
indexType: RTREE,
62+
},
63+
},
64+
}
65+
}
66+
67+
// Build returns the constructed RTree index
68+
func (b *RTreeIndexBuilder) Build() Index {
69+
return b.index
70+
}

0 commit comments

Comments
 (0)