Recently, I wanted to use the Vector Database to store OpenAI embedding vectors.
I've researched a variety of vector databases. Like Pinecone, Redis, Qdrant, Milvus, Weaviate, Chroma, pgvector. After comparing them, I chose Weaviate as my vector database. weaviate is developed in Golang, which makes it easier for me to install and test.
How to install Weaviate on Debian 12
First, Download Weaviate
Go to github.com/weaviate/weaviate, then find the version that is suitable for your operating system.
I chose to download weaviate-v1.20.3-linux-amd64.tar.gz
mkdir vectordb
cd vectordb
wget https://github.com/weaviate/weaviate/releases/download/v1.20.3/weaviate-v1.20.3-linux-amd64.tar.gz
Then install Weaviate
First unzip the file, then add the configuration file.
tar -zxvf weaviate-v1.20.3-linux-amd64.tar.gz
You need to configure the authentication and data persistence environment as below. We use API authentication for test.
export AUTHENTICATION_APIKEY_ENABLED=true
export AUTHENTICATION_APIKEY_ALLOWED_KEYS=q8Z5HN4U2w8jUDRGeGxyKkqvwjPg7w4P
export AUTHENTICATION_APIKEY_USERS=newbing@example.com
export PERSISTENCE_DATA_PATH=/home/newbing/vectordb/data
Launch Weaviate
After unzip file and set the environment, you can launch a instance of weaviate.
./weaviate --host=127.0.0.1 --port=8181 --scheme=http
Launch with supervisor
If you don't want to start or stop by hand, you can use Supervisor to manage the Weaviate instance.
The supervisor config file:
[program:weaviate]
directory=/home/newbing/vectordb/
command=/home/newbing/vectordb/weaviate --host=127.0.0.1 --port=8181 --scheme=http
autostart=true
autorestart=true
user=www
environment=HOME="/home/www",AUTHENTICATION_APIKEY_ENABLED="true",AUTHENTICATION_APIKEY_ALLOWED_KEYS="q8Z5HN4U2w7jUDRGeGxyKkqvwjPg7w4P",AUTHENTICATION_APIKEY_USERS="newbing@example.com",PERSISTENCE_DATA_PATH="/home/newbing/vectordb/data"
numprocs=1
redirect_stderr=true
stdout_logfile=/var/log/supervisor/weaviate.log
Use Weaviate in Golang
Create weaviate client
package vector
import (
"github.com/rs/zerolog/log"
"github.com/spf13/viper"
"github.com/weaviate/weaviate-go-client/v4/weaviate"
"github.com/weaviate/weaviate-go-client/v4/weaviate/auth"
"net/url"
)
var (
_cli *weaviate.Client
)
func GetClient() (*weaviate.Client, error) {
if _cli != nil {
return _cli, nil
}
uri, err := url.Parse(viper.GetString("weaviate.api"))
if err != nil {
log.Error().Err(err).
Str("func", "init").
Str("package", "vector").
Msg("parse api addr failed")
return nil, err
}
cfg := weaviate.Config{
Host: uri.Host,
Scheme: uri.Scheme,
AuthConfig: auth.ApiKey{Value: viper.GetString("weaviate.key")},
}
client, err := weaviate.NewClient(cfg)
if err != nil {
log.Error().Err(err).
Str("func", "init").
Str("package", "vector").
Msg("create client failed")
client = nil
}
_cli = client
return client, err
}
Store vector to Weaviate
// Create store vector to weaviate.
// class is schema or table name,
// props are the attributes of the data,
// vector is the embeddings from openai
func Create(ctx context.Context, class string, props map[string]string, vector []float32) (*models.Object, error) {
cli, err := GetClient()
if err != nil {
return nil, err
}
created, err := cli.Data().Creator().
WithClassName(class).
WithProperties(props).
WithVector(vector).
Do(ctx)
if err != nil {
return nil, err
}
return created.Object, nil
}
Search vector with similarity
// Near search vectors with similarity
// class is schema or table name,
// fields are the attributes of the data will be return,
// vector is the embeddings from openai of source compare data,
// maxDistance the max distance of searched data, between 0 from 1, small is better
// limit how many data rows to return
func Near(ctx context.Context, class string, fields []string, vector []float32, maxDistance float32, limit int) ([]any, error) {
if limit == 0 {
limit = 10
}
cli, err := GetClient()
if err != nil {
return nil, err
}
gqlField := make([]graphql.Field, len(fields)+1)
for _, field := range fields {
gqlField = append(gqlField, graphql.Field{Name: field})
}
_additional := graphql.Field{
Name: "_additional", Fields: []graphql.Field{
{Name: "certainty"}, // only supported if distance==cosine
{Name: "distance"}, // always supported
},
}
gqlField = append(gqlField, _additional)
nearVector := cli.GraphQL().NearVectorArgBuilder().
WithVector(vector). // Replace with a compatible vector
WithDistance(maxDistance) // set the max distance
res, err := cli.GraphQL().Get().
WithClassName(class).
WithFields(gqlField...).
WithNearVector(nearVector).
WithLimit(limit).
Do(ctx)
if err != nil {
return nil, err
}
retList := make([]any, 0)
if getRes, ok := res.Data["Get"]; ok {
getMap, ok := getRes.(map[string]any)
if ok {
list, ok := getMap[class]
if ok {
retList, ok := list.([]any)
if ok {
return retList, nil
} else {
return nil, errors.New("data not array list")
}
} else {
return nil, errors.New("data not found")
}
} else {
return nil, errors.New("no get data found")
}
}
return retList, nil
}
Find vector by attribute
// FindByAttribute find vector data by attribute.
// class is schema or table name,
// fields are the attributes of the data will be return,
// key the attribute to compare
// value the compare value
func FindByAttribute(ctx context.Context, class string, fields []string, key, value string) ([]any, error) {
retList := make([]any, 0)
cli, err := GetClient()
if err != nil {
return retList, err
}
gqlField := make([]graphql.Field, len(fields)+1)
for _, field := range fields {
gqlField = append(gqlField, graphql.Field{Name: field})
}
_additional := graphql.Field{
Name: "_additional", Fields: []graphql.Field{
{Name: "vector"}, // always supported
},
}
gqlField = append(gqlField, _additional)
res, err := cli.GraphQL().Get().
WithClassName(class).
WithFields(gqlField...).
WithWhere(filters.Where().WithPath([]string{key}).WithOperator(filters.Equal).WithValueString(value)).
Do(ctx)
if err != nil {
return nil, err
}
if getRes, ok := res.Data["Get"]; ok {
getMap, ok := getRes.(map[string]any)
if ok {
list, ok := getMap[class]
if ok {
retList, ok := list.([]any)
if ok {
return retList, nil
} else {
return nil, errors.New("data not array list")
}
} else {
return nil, errors.New("data not found")
}
} else {
return nil, errors.New("no get data found")
}
}
return retList, nil
}
The whole weaviate store,search,find code
package vector
import (
"context"
"errors"
"github.com/weaviate/weaviate-go-client/v4/weaviate/filters"
"github.com/weaviate/weaviate-go-client/v4/weaviate/graphql"
"github.com/weaviate/weaviate/entities/models"
)
// Create store vector to weaviate.
// class is schema or table name,
// props are the attributes of the data,
// vector is the embeddings from openai
func Create(ctx context.Context, class string, props map[string]string, vector []float32) (*models.Object, error) {
cli, err := GetClient()
if err != nil {
return nil, err
}
created, err := cli.Data().Creator().
WithClassName(class).
WithProperties(props).
WithVector(vector).
Do(ctx)
if err != nil {
return nil, err
}
return created.Object, nil
}
// Near search vectors with similarity
// class is schema or table name,
// fields are the attributes of the data will be return,
// vector is the embeddings from openai of source compare data,
// maxDistance the max distance of searched data, between 0 from 1, small is better
// limit how many data rows to return
func Near(ctx context.Context, class string, fields []string, vector []float32, maxDistance float32, limit int) ([]any, error) {
if limit == 0 {
limit = 10
}
cli, err := GetClient()
if err != nil {
return nil, err
}
gqlField := make([]graphql.Field, len(fields)+1)
for _, field := range fields {
gqlField = append(gqlField, graphql.Field{Name: field})
}
_additional := graphql.Field{
Name: "_additional", Fields: []graphql.Field{
{Name: "certainty"}, // only supported if distance==cosine
{Name: "distance"}, // always supported
},
}
gqlField = append(gqlField, _additional)
nearVector := cli.GraphQL().NearVectorArgBuilder().
WithVector(vector). // Replace with a compatible vector
WithDistance(maxDistance) // set the max distance
res, err := cli.GraphQL().Get().
WithClassName(class).
WithFields(gqlField...).
WithNearVector(nearVector).
WithLimit(limit).
Do(ctx)
if err != nil {
return nil, err
}
retList := make([]any, 0)
if getRes, ok := res.Data["Get"]; ok {
getMap, ok := getRes.(map[string]any)
if ok {
list, ok := getMap[class]
if ok {
retList, ok := list.([]any)
if ok {
return retList, nil
} else {
return nil, errors.New("data not array list")
}
} else {
return nil, errors.New("data not found")
}
} else {
return nil, errors.New("no get data found")
}
}
return retList, nil
}
// FindByAttribute find vector data by attribute.
// class is schema or table name,
// fields are the attributes of the data will be return,
// key the attribute to compare
// value the compare value
func FindByAttribute(ctx context.Context, class string, fields []string, key, value string) ([]any, error) {
retList := make([]any, 0)
cli, err := GetClient()
if err != nil {
return retList, err
}
gqlField := make([]graphql.Field, len(fields)+1)
for _, field := range fields {
gqlField = append(gqlField, graphql.Field{Name: field})
}
_additional := graphql.Field{
Name: "_additional", Fields: []graphql.Field{
{Name: "vector"}, // always supported
},
}
gqlField = append(gqlField, _additional)
res, err := cli.GraphQL().Get().
WithClassName(class).
WithFields(gqlField...).
WithWhere(filters.Where().WithPath([]string{key}).WithOperator(filters.Equal).WithValueString(value)).
Do(ctx)
if err != nil {
return nil, err
}
if getRes, ok := res.Data["Get"]; ok {
getMap, ok := getRes.(map[string]any)
if ok {
list, ok := getMap[class]
if ok {
retList, ok := list.([]any)
if ok {
return retList, nil
} else {
return nil, errors.New("data not array list")
}
} else {
return nil, errors.New("data not found")
}
} else {
return nil, errors.New("no get data found")
}
}
return retList, nil
}
Finally, I'd like to introduce my new project: the GPT2API.
What is GPT2API?
Website: https://aicanvas.app/gpt
GPT2API is a platform to help you build API to make ChatGPT easier to use. You can build API and share it with the community, or you can call API from API.Hub, which is other API shared by the community.
Features:
- Build API with ChatGPT commands.
- Test API on the website.
- Share it with the community.
- Have the community extend the API.
- Sample code for your project.
- Cheap price for calling ChatGPT, $1 with 600K tokens.
If you have any questions about GPT2API or programming, you can contact me on the twitter. You are very welcome to experience GPT2API. I hope to get your comments.
Top comments (0)