Use Claude 3 Sonnet for Duo Chat Zero Shot

- This is behind a feature flag for testing - Uses request body format needed for messages API, which is now supported by the AI Gateway: gitlab-org/modelops/applied-ml/code-suggestions/ai-assist!668 - #444629

Use Claude 3 Sonnet for Duo Chat Zero Shot
1f44c61d · Jessie Young · e9e72d21 · 1f44c61d · 1f44c61d · 1f44c61d
Commit 1f44c61d authored 1 year ago by Jessie Young
--- a/ee/app/workers/llm/completion_worker.rb
+++ b/ee/app/workers/llm/completion_worker.rb
@@ -30,7 +30,7 @@ def deserialize_message(message_hash, options)
      end

      def perform_for(message, options = {})
-        perform_async(serialize_message(message), options)
+        perform_inline(serialize_message(message), options)
      end
    end


--- a/ee/config/feature_flags/gitlab_com_derisk/ai_claude_3_sonnet.yml
+++ b/ee/config/feature_flags/gitlab_com_derisk/ai_claude_3_sonnet.yml
+---
+name: ai_claude_3_sonnet
+feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/444629
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/148223
+rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/446302
+milestone: '16.11'
+group: group::duo chat
+type: gitlab_com_derisk
+default_enabled: false
--- a/ee/lib/gitlab/llm/ai_gateway/client.rb
+++ b/ee/lib/gitlab/llm/ai_gateway/client.rb
@@ -134,7 +134,11 @@ def token_size(content)
        end

        def model
-          DEFAULT_MODEL
+          if Feature.enabled?(:ai_claude_3_sonnet, user)
+            'claude-3-sonnet-20240229'
+          else
+            DEFAULT_MODEL
+          end
        end
      end
    end

--- a/ee/lib/gitlab/llm/chain/agents/zero_shot/executor.rb
+++ b/ee/lib/gitlab/llm/chain/agents/zero_shot/executor.rb
@@ -102,10 +102,12 @@ def options
                agent_scratchpad: +"",
                conversation: conversation,
                prompt_version: prompt_version,
+                zero_shot_prompt: zero_shot_prompt,
                agent_version_prompt: context.agent_version&.prompt,
                current_resource: current_resource,
                current_code: current_code,
-                resources: available_resources_names
+                resources: available_resources_names,
+                current_user: context.current_user
              }
            end

@@ -143,6 +145,10 @@ def prompt_version
              PROMPT_TEMPLATE
            end

+            def zero_shot_prompt
+              ZERO_SHOT_PROMPT
+            end
+
            def last_conversation
              ChatStorage.new(context.current_user, context.agent_version&.id).last_conversation
            end
@@ -197,9 +203,7 @@ def current_resource
              ""
            end

-            PROMPT_TEMPLATE = [
-              Utils::Prompt.as_system(
-                <<~PROMPT
+            ZERO_SHOT_PROMPT = <<~PROMPT.freeze
                  Answer the question as accurate as you can.

                  You have access only to the following tools:
@@ -234,8 +238,10 @@ def current_resource

                  %<current_resource>s
                  Begin!
-                PROMPT
-              ),
+            PROMPT
+
+            PROMPT_TEMPLATE = [
+              Utils::Prompt.as_system(ZERO_SHOT_PROMPT),
              Utils::Prompt.as_user("Question: %<user_input>s"),
              # We're explicitly adding "\n" before the `Assistant:` in order to avoid the Anthropic API error
              # `prompt must end with "\n\nAssistant:" turn`.

--- a/ee/lib/gitlab/llm/chain/agents/zero_shot/prompts/anthropic.rb
+++ b/ee/lib/gitlab/llm/chain/agents/zero_shot/prompts/anthropic.rb
@@ -11,19 +11,42 @@ class Anthropic < Base
              extend Langsmith::RunHelpers

              def self.prompt(options)
-                human_role = ROLE_NAMES[Llm::AiMessage::ROLE_USER]
+                if Feature.enabled?(:ai_claude_3_sonnet, options.fetch(:current_user))
+                  history = truncated_conversation_list(options[:conversation])

-                text = <<~PROMPT
+                  text = history + base_prompt(options)
+                else
+                  human_role = ROLE_NAMES[Llm::AiMessage::ROLE_USER]
+
+                  text = <<~PROMPT
                  \n\n#{human_role}: #{base_prompt(options)}
-                PROMPT
+                  PROMPT
+
+                  history = truncated_conversation(
+                    options[:conversation],
+                    Requests::Anthropic::PROMPT_SIZE - text.size
+                  )

-                history = truncated_conversation(options[:conversation], Requests::Anthropic::PROMPT_SIZE - text.size)
-                text = [history, text].join if history.present?
+                  text = [history, text].join if history.present?
+                end

                Requests::Anthropic.prompt(text)
              end
              traceable :prompt, name: 'Build prompt', run_type: 'prompt', class_method: true

+              def self.truncated_conversation_list(conversation)
+                # TODO: Requests::Anthropic::PROMPT_SIZE is 30,000, which made
+                # sense for a 9k context window. We now have a 200k context
+                # window. Need to decide with team if we allow full context
+                # window length or something smaller.
+                # For now, no truncating actually happening here
+                return [] if conversation.blank?
+
+                conversation.map do |message, _|
+                  { role: message.role.to_sym, content: message.content }
+                end
+              end
+
              # Returns messages from previous conversation. To assure that overall prompt size is not too big,
              # we keep adding messages from most-recent to older until we reach overall prompt limit.
              def self.truncated_conversation(conversation, limit)

--- a/ee/lib/gitlab/llm/chain/agents/zero_shot/prompts/base.rb
+++ b/ee/lib/gitlab/llm/chain/agents/zero_shot/prompts/base.rb
@@ -10,12 +10,19 @@ class Base
              def self.base_prompt(options)
                return agent_version_prompt(options) if options[:agent_version_prompt]

-                base_prompt = Utils::Prompt.no_role_text(
-                  options.fetch(:prompt_version),
-                  options
-                )
+                if options[:current_user] && Feature.enabled?(:ai_claude_3_sonnet, options[:current_user])
+                  Utils::Prompt.role_conversation([
+                    Utils::Prompt.as_system(Utils::Prompt.default_system_prompt),
+                    Utils::Prompt.as_user(options[:zero_shot_prompt], "Question: %<user_input>s"),
+                    Utils::Prompt.as_assistant(options[:agent_scratchpad], "Thought:")
+                  ], options)
+                else
+                  base_prompt = Utils::Prompt.no_role_text(options.fetch(:prompt_version),
+                    options
+                  )

-                "#{Utils::Prompt.default_system_prompt}\n\n#{base_prompt}"
+                  "#{Utils::Prompt.default_system_prompt}\n\n#{base_prompt}"
+                end
              end

              def self.agent_version_prompt(options)

--- a/ee/lib/gitlab/llm/chain/parsers/chain_of_thought_parser.rb
+++ b/ee/lib/gitlab/llm/chain/parsers/chain_of_thought_parser.rb
@@ -23,6 +23,7 @@ def parse

          # Match the first occurrence of "Action: " and capture everything until "Action Input"
          def parse_action
+            # /(?<=Action:)(?<action>.+?)(?=Action Input:|Final Answer:)/m =~ output
            /Action:(?<action>.+?)(?=Action Input:|Final Answer:)/m =~ output

            @action = action&.strip

--- a/ee/lib/gitlab/llm/chain/tools/ci_editor_assistant/executor.rb
+++ b/ee/lib/gitlab/llm/chain/tools/ci_editor_assistant/executor.rb
@@ -23,7 +23,7 @@ class Executor < Tool
                Thought: You have asked a question related to deployment of an application or CI/CD pipelines.
                  "CiEditorAssistant" tool can assist with this kind of questions.
                Action: CiEditorAssistant
-                ActionInput: Please create a deployment configuration for a node.js application.
+                Action Input: Please create a deployment configuration for a node.js application.
              PROMPT

            PROVIDER_PROMPT_CLASSES = {

--- a/ee/lib/gitlab/llm/chain/tools/epic_reader/executor.rb
+++ b/ee/lib/gitlab/llm/chain/tools/epic_reader/executor.rb
@@ -23,7 +23,7 @@ class Executor < EpicIdentifier::Executor
                  The question is about an epic, so you need to use "EpicReader" tool.
                  Based on this information you can present final answer.
                Action: EpicReader
-                ActionInput: Please identify the author of &123 epic.
+                Action Input: Please identify the author of &123 epic.
              PROMPT

            PROVIDER_PROMPT_CLASSES = {

--- a/ee/lib/gitlab/llm/chain/tools/gitlab_documentation/executor.rb
+++ b/ee/lib/gitlab/llm/chain/tools/gitlab_documentation/executor.rb
@@ -22,7 +22,7 @@ class Executor < Tool
                Thought: Question is about inner working of GitLab. "GitlabDocumentation" tool is the right one for
                  the job.
                Action: GitlabDocumentation
-                ActionInput: How do I set up a new project?
+                Action Input: How do I set up a new project?
              PROMPT

            def perform(&_block)

--- a/ee/lib/gitlab/llm/chain/tools/issue_reader/executor.rb
+++ b/ee/lib/gitlab/llm/chain/tools/issue_reader/executor.rb
@@ -24,7 +24,7 @@ class Executor < IssueIdentifier::Executor
                  Question is about the content of an issue, so you need to use "IssueReader" tool to retrieve and read issue.
                  Based on this information you can present final answer about issue.
                Action: IssueReader
-                ActionInput: Please identify the author of #123 issue
+                Action Input: Please identify the author of #123 issue
              PROMPT

            PROVIDER_PROMPT_CLASSES = {

--- a/ee/lib/gitlab/llm/chain/utils/prompt.rb
+++ b/ee/lib/gitlab/llm/chain/utils/prompt.rb
@@ -48,7 +48,7 @@ def self.separator(predecessor)
          def self.role_conversation(prompt_template, input_variables)
            prompt_template.map do |x|
              { role: x.first, content: format(x.last, input_variables) }
-            end.to_json
+            end
          end

          def self.default_system_prompt

--- a/ee/spec/lib/gitlab/llm/ai_gateway/client_spec.rb
+++ b/ee/spec/lib/gitlab/llm/ai_gateway/client_spec.rb
@@ -55,7 +55,7 @@
      "response" => "Completion Response",
      "metadata" => {
        "provider" => "anthropic",
-        "model" => "claude-2.0",
+        "model" => model,
        "timestamp" => 1000000000 # The number of seconds passed since epoch
      }
    }
@@ -68,6 +68,8 @@
  let(:response_headers) { { 'Content-Type' => 'application/json' } }

  before do
+    stub_feature_flags(ai_claude_3_sonnet: false)
+
    stub_request(:post, request_url)
      .with(
        body: expected_request_body,
@@ -130,6 +132,21 @@
      expect(complete.parsed_response).to eq(expected_response)
    end

+    context 'when ai_claude_3_sonnet feature flag is enabled' do
+      let(:model) { 'claude-3-sonnet-20240229' }
+
+      before do
+        stub_feature_flags(ai_claude_3_sonnet: true)
+      end
+
+      it 'returns expected response' do
+        expect(Gitlab::HTTP).to receive(:post)
+          .with(anything, hash_including(timeout: described_class::DEFAULT_TIMEOUT))
+          .and_call_original
+        expect(complete.parsed_response).to eq(expected_response)
+      end
+    end
+
    context 'when AI_GATEWAY_URL is not set' do
      let(:request_url) { "https://cloud.gitlab.com/ai/v1/chat/agent" }


--- a/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_spec.rb
+++ b/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_spec.rb
@@ -296,18 +296,41 @@
          CONTEXT
        end

-        it 'includes the current resource metadata' do
-          expect(context).to receive(:resource_serialized).and_return(metadata)
-          expect(agent.prompt[:prompt]).to include(prompt_resource)
+        context "with claude 2" do
+          before do
+            stub_feature_flags(ai_claude_3_sonnet: false)
+          end
+
+          it 'includes the current resource metadata' do
+            expect(context).to receive(:resource_serialized).and_return(metadata)
+            expect(agent.prompt[:prompt]).to include(prompt_resource)
+          end
+
+          context 'when duo_chat_current_resource_by_default is disabled' do
+            before do
+              stub_feature_flags(duo_chat_current_resource_by_default: false)
+            end
+
+            it 'does not include resource metadata' do
+              expect(agent.prompt[:prompt]).not_to include("<resource>")
+            end
+          end
        end

-        context 'when duo_chat_current_resource_by_default is disabled' do
-          before do
-            stub_feature_flags(duo_chat_current_resource_by_default: false)
+        context "with claude 3" do
+          it 'includes the current resource metadata' do
+            expect(context).to receive(:resource_serialized).and_return(metadata)
+            expect(claude_3_user_prompt(agent)).to include(prompt_resource)
          end

-          it 'does not include resource metadata' do
-            expect(agent.prompt[:prompt]).not_to include("<resource>")
+          context 'when duo_chat_current_resource_by_default is disabled' do
+            before do
+              stub_feature_flags(duo_chat_current_resource_by_default: false)
+            end
+
+            it 'does not include resource metadata' do
+              expect(claude_3_user_prompt(agent)).not_to include("<resource>")
+            end
          end
        end
      end
@@ -328,8 +351,20 @@
    context 'with self discover part' do
      let_it_be(:self_discoverability_prompt) { "You have access to the following GitLab resources: issues, epics" }

-      it 'includes self-discoverability part in the prompt' do
-        expect(agent.prompt[:prompt]).to include self_discoverability_prompt
+      context 'with claude 2.1' do
+        before do
+          stub_feature_flags(ai_claude_3_sonnet: false)
+        end
+
+        it 'includes self-discoverability part in the prompt' do
+          expect(agent.prompt[:prompt]).to include self_discoverability_prompt
+        end
+      end
+
+      context 'with claude 3' do
+        it 'includes self-discoverability part in the prompt' do
+          expect(claude_3_user_prompt(agent)).to include(self_discoverability_prompt)
+        end
      end
    end

@@ -344,15 +379,35 @@
        }
      end

-      it 'includes selected code in the prompt' do
-        expect(agent.prompt[:prompt]).to include("code selection")
+      context 'with claude 2.1' do
+        before do
+          stub_feature_flags(ai_claude_3_sonnet: false)
+        end
+
+        it 'includes selected code in the prompt' do
+          expect(agent.prompt[:prompt]).to include("code selection")
+        end
+
+        context 'when selected_text is empty' do
+          let(:selected_text) { '' }
+
+          it 'does not include selected code in the prompt' do
+            expect(agent.prompt[:prompt]).not_to include("code selection")
+          end
+        end
+      end
+
+      context 'with claude 3' do
+        it 'includes selected code in the prompt' do
+          expect(claude_3_user_prompt(agent)).to include("code selection")
+        end
      end

      context 'when selected_text is empty' do
        let(:selected_text) { '' }

        it 'does not include selected code in the prompt' do
-          expect(agent.prompt[:prompt]).not_to include("code selection")
+          expect(claude_3_user_prompt(agent)).not_to include("code selection")
        end
      end
    end
@@ -362,9 +417,22 @@
      let(:blob) { fake_blob(path: 'foobar.rb', data: 'puts "hello world"') }
      let(:extra_resource) { { blob: blob } }

-      it 'includes the blob name and data in the prompt' do
-        expect(agent.prompt[:prompt]).to include("foobar.rb")
-        expect(agent.prompt[:prompt]).to include("puts \"hello world\"")
+      context 'with claude 2.1' do
+        before do
+          stub_feature_flags(ai_claude_3_sonnet: false)
+        end
+
+        it 'includes the blob name and data in the prompt' do
+          expect(agent.prompt[:prompt]).to include("foobar.rb")
+          expect(agent.prompt[:prompt]).to include("puts \"hello world\"")
+        end
+      end
+
+      context 'with claude 3' do
+        it 'includes the blob name and data in the prompt' do
+          expect(claude_3_user_prompt(agent)).to include("foobar.rb")
+          expect(claude_3_user_prompt(agent)).to include("puts \"hello world\"")
+        end
      end
    end

@@ -409,4 +477,8 @@
      end
    end
  end
+
+  def claude_3_user_prompt(agent)
+    agent.prompt[:prompt].find { |h| h["role"] == "user" }["content"]
+  end
 end
--- a/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/prompts/anthropic_spec.rb
+++ b/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/prompts/anthropic_spec.rb
@@ -7,12 +7,15 @@

  describe '.prompt' do
    let(:prompt_version) { ::Gitlab::Llm::Chain::Agents::ZeroShot::Executor::PROMPT_TEMPLATE }
+    let(:zero_shot_prompt) { ::Gitlab::Llm::Chain::Agents::ZeroShot::Executor::ZERO_SHOT_PROMPT }
    let(:agent_version_prompt) { nil }
+    let(:user) { create(:user) }
+    let(:user_input) { 'foo?' }
    let(:options) do
      {
        tools_definitions: "tool definitions",
        tool_names: "tool names",
-        user_input: 'foo?',
+        user_input: user_input,
        agent_scratchpad: "some observation",
        conversation: [
          build(:ai_message, request_id: 'uuid1', role: 'user', content: 'question 1'),
@@ -24,7 +27,9 @@
        current_code: "",
        current_resource: "",
        resources: "",
-        agent_version_prompt: agent_version_prompt
+        agent_version_prompt: agent_version_prompt,
+        current_user: user,
+        zero_shot_prompt: zero_shot_prompt
      }
    end

@@ -32,58 +37,89 @@

    subject { described_class.prompt(options)[:prompt] }

-    it 'returns prompt' do
-      expect(subject).to include('Human:')
-      expect(subject).to include('Assistant:')
-      expect(subject).to include('foo?')
-      expect(subject).to include('tool definitions')
-      expect(subject).to include('tool names')
-      expect(subject).to include(prompt_text)
-      expect(subject).to include(Gitlab::Llm::Chain::Utils::Prompt.default_system_prompt)
-    end
-
-    it 'includes conversation history' do
-      expect(subject)
-        .to start_with("\n\nHuman: question 1\n\nAssistant: response 1\n\nHuman: question 2\n\nAssistant: response 2")
-    end
-
-    context 'when conversation history does not fit prompt limit' do
-      let(:prompt_size) { described_class.prompt(options.merge(conversation: []))[:prompt].size }
-
+    context 'for Claude completions API' do
      before do
-        stub_const("::Gitlab::Llm::Chain::Requests::Anthropic::PROMPT_SIZE", prompt_size + 50)
+        stub_feature_flags(ai_claude_3_sonnet: false)
      end

-      it 'includes truncated conversation history' do
-        expect(subject).to start_with("\n\nHuman: question 2\n\nAssistant: response 2\n\n")
+      it 'returns prompt' do
+        expect(subject).to include('Human:')
+        expect(subject).to include('Assistant:')
+        expect(subject).to include('foo?')
+        expect(subject).to include('tool definitions')
+        expect(subject).to include('tool names')
+        expect(subject).to include(prompt_text)
+        expect(subject).to include(Gitlab::Llm::Chain::Utils::Prompt.default_system_prompt)
      end

-      context 'when the truncated history would begin with an Assistant turn' do
+      it 'includes conversation history' do
+        expect(subject)
+          .to start_with("\n\nHuman: question 1\n\nAssistant: response 1\n\nHuman: question 2\n\nAssistant: response 2")
+      end
+
+      context 'when conversation history does not fit prompt limit' do
+        let(:prompt_size) { described_class.prompt(options.merge(conversation: []))[:prompt].size }
+
        before do
-          stub_const("::Gitlab::Llm::Chain::Requests::Anthropic::PROMPT_SIZE", prompt_size + 75)
+          stub_const("::Gitlab::Llm::Chain::Requests::Anthropic::PROMPT_SIZE", prompt_size + 50)
        end

-        it 'only includes history up to the latest fitting Human turn' do
+        it 'includes truncated conversation history' do
          expect(subject).to start_with("\n\nHuman: question 2\n\nAssistant: response 2\n\n")
        end
+
+        context 'when the truncated history would begin with an Assistant turn' do
+          before do
+            stub_const("::Gitlab::Llm::Chain::Requests::Anthropic::PROMPT_SIZE", prompt_size + 75)
+          end
+
+          it 'only includes history up to the latest fitting Human turn' do
+            expect(subject).to start_with("\n\nHuman: question 2\n\nAssistant: response 2\n\n")
+          end
+        end
+      end
+
+      context 'when agent version prompt is provided' do
+        let(:agent_version_prompt) { 'A custom prompt' }
+
+        it 'returns the agent version prompt' do
+          expected_prompt = [
+            "\n\nHuman: question 1",
+            "\n\nAssistant: response 1",
+            "\n\nHuman: question 2",
+            "\n\nAssistant: response 2",
+            "\n\nHuman: A custom prompt",
+            "\n\nQuestion: foo?",
+            "\nThought: \n"
+          ].join('')
+
+          is_expected.to eq(expected_prompt)
+        end
      end
    end

-    context 'when agent version prompt is provided' do
-      let(:agent_version_prompt) { 'A custom prompt' }
-
-      it 'returns the agent version prompt' do
-        expected_prompt = [
-          "\n\nHuman: question 1",
-          "\n\nAssistant: response 1",
-          "\n\nHuman: question 2",
-          "\n\nAssistant: response 2",
-          "\n\nHuman: A custom prompt",
-          "\n\nQuestion: foo?",
-          "\nThought: \n"
-        ].join('')
-
-        is_expected.to eq(expected_prompt)
+    context 'with claude 3' do
+      before do
+        stub_feature_flags(ai_claude_3_sonnet: true)
+      end
+
+      it 'returns the prompt format expected by the anthropic messages API' do
+        prompt = subject
+        prompts_by_role = prompt.group_by { |prompt| prompt[:role] }
+        user_prompts = prompts_by_role[:user]
+        assistant_prompts = prompts_by_role[:assistant]
+
+        expect(prompts_by_role[:system][0][:content]).to eq(Gitlab::Llm::Chain::Utils::Prompt.default_system_prompt)
+
+        expect(user_prompts[0][:content]).to eq("question 1")
+        expect(user_prompts[1][:content]).to eq("question 2")
+        user_prompts[2][:content] do |content|
+          expect(content).to start_with("Answer the question as accurate as you can")
+          expect(content).to include(user_input)
+        end
+
+        expect(assistant_prompts[0][:content]).to eq("response 1")
+        expect(assistant_prompts[1][:content]).to eq("response 2")
      end
    end
  end

--- a/ee/spec/lib/gitlab/llm/chain/utils/prompt_spec.rb
+++ b/ee/spec/lib/gitlab/llm/chain/utils/prompt_spec.rb
@@ -96,7 +96,7 @@
    it 'returns bare text from role based prompt' do
      result = { role: :assistant, content: "multi\nline\ninput" }

-      expect(described_class.role_conversation([prompt], input_vars)).to eq([result].to_json)
+      expect(described_class.role_conversation([prompt], input_vars)).to eq([result])
    end
  end