Skip to content
Snippets Groups Projects
Commit a219b6e5 authored by Dmitry Gruzd's avatar Dmitry Gruzd :red_circle: Committed by David Dieulivol
Browse files

Add the tanuki_bot migration/model

Changelog: added
EE: true
parent f09aff5b
No related branches found
No related tags found
3 merge requests!122597doc/gitaly: Remove references to removed metrics,!118700Remove refactor_vulnerability_filters feature flag,!118195Add the tanuki_bot model
Showing with 173 additions and 12 deletions
......@@ -219,8 +219,9 @@
.use-pg12:
services:
- name: postgres:12
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-12-pgvector-0.4.1
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
alias: postgres
- name: redis:6.0-alpine
variables:
POSTGRES_HOST_AUTH_METHOD: trust
......@@ -228,8 +229,9 @@
.use-pg13:
services:
- name: postgres:13
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-13-pgvector-0.4.1
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
alias: postgres
- name: redis:6.2-alpine
variables:
POSTGRES_HOST_AUTH_METHOD: trust
......@@ -237,8 +239,9 @@
.use-pg14:
services:
- name: postgres:14
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-14-pgvector-0.4.1
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
alias: postgres
- name: redis:6.2-alpine
variables:
POSTGRES_HOST_AUTH_METHOD: trust
......@@ -246,8 +249,9 @@
.use-pg12-es7-ee:
services:
- name: postgres:12
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-12-pgvector-0.4.1
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
alias: postgres
- name: redis:6.0-alpine
- name: elasticsearch:7.17.6
command: ["elasticsearch", "-E", "discovery.type=single-node", "-E", "xpack.security.enabled=false"]
......@@ -261,8 +265,9 @@
.use-pg13-es7-ee:
services:
- name: postgres:13
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-13-pgvector-0.4.1
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
alias: postgres
- name: redis:6.2-alpine
- name: elasticsearch:7.17.6
command: ["elasticsearch", "-E", "discovery.type=single-node", "-E", "xpack.security.enabled=false"]
......@@ -276,8 +281,9 @@
.use-pg14-es7-ee:
services:
- name: postgres:14
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-14-pgvector-0.4.1
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
alias: postgres
- name: redis:6.2-alpine
- name: elasticsearch:7.17.6
command: ["elasticsearch", "-E", "discovery.type=single-node", "-E", "xpack.security.enabled=false"]
......@@ -291,8 +297,9 @@
.use-pg13-es8-ee:
services:
- name: postgres:13
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-13-pgvector-0.4.1
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
alias: postgres
- name: redis:6.0-alpine
- name: elasticsearch:8.6.2
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:zoekt-ci-image-1.0
......@@ -307,8 +314,9 @@
.use-pg14-es8-ee:
services:
- name: postgres:14
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-14-pgvector-0.4.1
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
alias: postgres
- name: redis:6.0-alpine
- name: elasticsearch:8.6.2
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:zoekt-ci-image-1.0
......@@ -323,8 +331,9 @@
.use-pg13-opensearch1-ee:
services:
- name: postgres:13
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-13-pgvector-0.4.1
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
alias: postgres
- name: redis:6.0-alpine
- name: opensearchproject/opensearch:1.3.5
alias: elasticsearch
......@@ -339,8 +348,9 @@
.use-pg13-opensearch2-ee:
services:
- name: postgres:13
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-13-pgvector-0.4.1
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
alias: postgres
- name: redis:6.0-alpine
- name: opensearchproject/opensearch:2.2.1
alias: elasticsearch
......@@ -355,8 +365,9 @@
.use-pg14-opensearch1-ee:
services:
- name: postgres:14
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-14-pgvector-0.4.1
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
alias: postgres
- name: redis:6.0-alpine
- name: opensearchproject/opensearch:1.3.5
alias: elasticsearch
......@@ -371,8 +382,9 @@
.use-pg14-opensearch2-ee:
services:
- name: postgres:14
- name: ${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images:postgres-14-pgvector-0.4.1
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
alias: postgres
- name: redis:6.0-alpine
- name: opensearchproject/opensearch:2.2.1
alias: elasticsearch
......
......@@ -30,6 +30,8 @@ gem 'view_component', '~> 2.74.1'
# Supported DBs
gem 'pg', '~> 1.4.6'
gem 'neighbor', '~> 0.2.3'
gem 'rugged', '~> 1.5'
gem 'grape-path-helpers', '~> 1.7.1'
......
......@@ -373,6 +373,7 @@
{"name":"mustermann","version":"1.1.1","platform":"ruby","checksum":"0a21cfe505869cce9ce17998db5260344e78df81ae857c07a62143fd30299531"},
{"name":"mustermann-grape","version":"1.0.1","platform":"ruby","checksum":"00ce12b3df66be33ec4304aa9108fb9e1a0689f2a136c96b51c104684f5c5436"},
{"name":"nap","version":"1.1.0","platform":"ruby","checksum":"949691660f9d041d75be611bb2a8d2fd559c467537deac241f4097d9b5eea576"},
{"name":"neighbor","version":"0.2.3","platform":"ruby","checksum":"70887ac2110d0c7ab243ee988f64359b8bb94a63a0c78542bbeef4f33b1933e5"},
{"name":"nenv","version":"0.3.0","platform":"ruby","checksum":"d9de6d8fb7072228463bf61843159419c969edb34b3cef51832b516ae7972765"},
{"name":"net-http-persistent","version":"4.0.1","platform":"ruby","checksum":"2752f4cce05fd1c45e0537c6f3a98fa5a4899efd5f88e63c104ed5f05cbddef9"},
{"name":"net-imap","version":"0.3.4","platform":"ruby","checksum":"a82a59e2a429433dc54cae5a8b2979ffe49da8c66085740811bfa337dc3729b5"},
......
......@@ -985,6 +985,8 @@ GEM
mustermann-grape (1.0.1)
mustermann (>= 1.0.0)
nap (1.1.0)
neighbor (0.2.3)
activerecord (>= 5.2)
nenv (0.3.0)
net-http-persistent (4.0.1)
connection_pool (~> 2.2)
......@@ -1827,6 +1829,7 @@ DEPENDENCIES
mini_magick (~> 4.10.1)
minitest (~> 5.11.0)
multi_json (~> 1.14.1)
neighbor (~> 0.2.3)
net-ldap (~> 0.17.1)
net-ntp
net-protocol (~> 0.1.3)
......
......@@ -103,3 +103,10 @@ test: &test
username: postgres
password:
host: localhost
embedding:
adapter: postgresql
encoding: unicode
database: gitlabhq_embedding_test
username: postgres
password:
host: localhost
......@@ -124,3 +124,10 @@ test: &test
username: postgres
password:
host: localhost
embedding:
adapter: postgresql
encoding: unicode
database: gitlabhq_embedding_test
username: postgres
password:
host: localhost
# frozen_string_literal: true
module Embedding
# This model should only store public content and embeddings
class TanukiBotMvc < Embedding::ApplicationRecord
self.table_name = 'tanuki_bot_mvc'
has_neighbors :embedding
scope :neighbor_for, ->(embedding) { nearest_neighbors(:embedding, embedding, distance: 'inner_product') }
end
end
---
table_name: tanuki_bot_mvc
classes:
- Embedding::TanukiBotMvc
feature_categories:
- global_search
description:
introduced_by_url:
milestone: '16.0'
gitlab_schema: gitlab_embedding
# frozen_string_literal: true
class CreateTanukiBotMvc < Gitlab::Database::Migration[2.1]
enable_lock_retries!
def up
create_table :tanuki_bot_mvc do |t|
t.timestamps_with_timezone null: false
t.vector :embedding, limit: 1536, null: false
t.text :url, null: false, limit: 2048
t.text :content, null: false, limit: 32768
t.jsonb :metadata, null: false
t.text :chroma_id, index: { unique: true }, limit: 512
end
end
def down
drop_table :tanuki_bot_mvc
end
end
295782269f4738b6eb308f53144d7d4358affa39e7246a538d774200088a41d8
\ No newline at end of file
......@@ -11,8 +11,38 @@ CREATE TABLE schema_migrations (
version character varying NOT NULL
);
CREATE TABLE tanuki_bot_mvc (
id bigint NOT NULL,
created_at timestamp with time zone NOT NULL,
updated_at timestamp with time zone NOT NULL,
embedding vector(1536) NOT NULL,
url text NOT NULL,
content text NOT NULL,
metadata jsonb NOT NULL,
chroma_id text,
CONSTRAINT check_5df597f0fb CHECK ((char_length(url) <= 2048)),
CONSTRAINT check_67053ce605 CHECK ((char_length(content) <= 32768)),
CONSTRAINT check_e130e042d4 CHECK ((char_length(chroma_id) <= 512))
);
CREATE SEQUENCE tanuki_bot_mvc_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE tanuki_bot_mvc_id_seq OWNED BY tanuki_bot_mvc.id;
ALTER TABLE ONLY tanuki_bot_mvc ALTER COLUMN id SET DEFAULT nextval('tanuki_bot_mvc_id_seq'::regclass);
ALTER TABLE ONLY ar_internal_metadata
ADD CONSTRAINT ar_internal_metadata_pkey PRIMARY KEY (key);
ALTER TABLE ONLY schema_migrations
ADD CONSTRAINT schema_migrations_pkey PRIMARY KEY (version);
ALTER TABLE ONLY tanuki_bot_mvc
ADD CONSTRAINT tanuki_bot_mvc_pkey PRIMARY KEY (id);
CREATE UNIQUE INDEX index_tanuki_bot_mvc_on_chroma_id ON tanuki_bot_mvc USING btree (chroma_id);
# frozen_string_literal: true
FactoryBot.define do
factory :tanuki_bot_mvc, class: 'Embedding::TanukiBotMvc' do
url { 'http://example.com/path/to/a/doc' }
metadata do
{
info: 'A description',
source: 'path/to/a/doc.md',
source_type: 'doc'
}
end
content { 'Some text' }
embedding { Array.new(1536, 0.3) }
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Embedding::TanukiBotMvc, type: :model, feature_category: :global_search do
describe 'scopes' do
describe '.neighbor_for' do
let_it_be(:question) { build(:tanuki_bot_mvc) }
it 'calls nearest_neighbors for question' do
create_list(:tanuki_bot_mvc, 2)
expect(described_class).to receive(:nearest_neighbors)
.with(:embedding, question.embedding, distance: 'inner_product').once
described_class.neighbor_for(question.embedding)
end
context 'with a far away embedding' do
let_it_be(:far_embedding) { create(:tanuki_bot_mvc, embedding: Array.new(1536, -0.999)) }
let_it_be(:close_embedding) { create(:tanuki_bot_mvc, embedding: Array.new(1536, 0.333)) }
it 'does not return the far neighbor' do
expect(described_class.neighbor_for(question.embedding).limit(1)).to match_array(close_embedding)
end
end
end
end
end
......@@ -32,6 +32,15 @@ else
sed -i '/geo:/,/^$/d' config/database.yml
fi
# Set up Embedding database if the job name matches `rspec-ee`
# Since Embedding is an EE feature, we shouldn't set it up for non-EE tests.
if [[ "${CI_JOB_NAME}" =~ "rspec-ee" ]]; then
echoinfo "Embedding DB will be set up."
else
echoinfo "Embedding DB won't be set up."
sed -i '/embedding:/,/^$/d' config/database.yml
fi
# Set user to a non-superuser to ensure we test permissions
sed -i 's/username: root/username: gitlab/g' config/database.yml
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment