Add development mode for AI actions

Sidekiq can cause issues with code reloading in development. To overcome this, it's now possible to set `LLM_DEVELOPMENT_SYNC_EXECUTION=1` in development which executes AI actions synchronously.

Add development mode for AI actions
58c88893 · Nicolas Dular · Gosia Ksionek · 150d10ed · 58c88893 · 58c88893
Commit 58c88893 authored 1 year ago by Nicolas Dular Committed by Gosia Ksionek 1 year ago
--- a/doc/development/ai_features/duo_chat.md
+++ b/doc/development/ai_features/duo_chat.md
@@ -25,27 +25,6 @@ Use [this snippet](https://gitlab.com/gitlab-org/gitlab/-/snippets/2554994) for
 1. Ensure that your current branch is up-to-date with `master`.
 1. To access the GitLab Duo Chat interface, in the lower-left corner of any page, select **Help** and **Ask GitLab Duo Chat**.

-### Tips for local development
-
-1. When responses are taking too long to appear in the user interface, consider restarting Sidekiq by running `gdk restart rails-background-jobs`. If that doesn't work, try `gdk kill` and then `gdk start`.
-1. Alternatively, bypass Sidekiq entirely and run the chat service synchronously. This can help with debugging errors as GraphQL errors are now available in the network inspector instead of the Sidekiq logs.
-
-```diff
-diff --git a/ee/app/services/llm/chat_service.rb b/ee/app/services/llm/chat_service.rb
-index 5fa7ae8a2bc1..5fe996ba0345 100644
--- a/ee/app/services/llm/chat_service.rb
-+++ b/ee/app/services/llm/chat_service.rb
-@@ -5,7 +5,7 @@ class ChatService < BaseService
-     private
-
-     def perform
-      worker_perform(user, resource, :chat, options)
-+      worker_perform(user, resource, :chat, options.merge(sync: true))
-     end
-
-     def valid?
-```
-
 ## Working with GitLab Duo Chat

 Prompts are the most vital part of GitLab Duo Chat system. Prompts are the instructions sent to the Large Language Model to perform certain tasks.

--- a/doc/development/ai_features/index.md
+++ b/doc/development/ai_features/index.md
@@ -166,6 +166,16 @@ context 'when asking about how to use GitLab', :ai_embedding_fixtures do
 end
 ```

+### Tips for local development
+
+1. When responses are taking too long to appear in the user interface, consider restarting Sidekiq by running `gdk restart rails-background-jobs`. If that doesn't work, try `gdk kill` and then `gdk start`.
+1. Alternatively, bypass Sidekiq entirely and run the chat service synchronously. This can help with debugging errors as GraphQL errors are now available in the network inspector instead of the Sidekiq logs.
+
+```shell
+export LLM_DEVELOPMENT_SYNC_EXECUTION=1
+gdk start
+```
+
 ### Working with GitLab Duo Chat

 View [guidelines](duo_chat.md) for working with GitLab Duo Chat.

--- a/ee/app/services/llm/base_service.rb
+++ b/ee/app/services/llm/base_service.rb
@@ -70,11 +70,11 @@ def worker_perform(user, resource, action_name, options)
        options: options
      )

-      if options[:sync] == true
+      if development_sync_execution?
        response_data = ::Llm::CompletionWorker.new.perform(
          user.id, resource&.id, resource&.class&.name, action_name, options
        )
-        payload.merge!(response_data)
+        payload[:response] = response_data
      else
        ::Llm::CompletionWorker.perform_async(user.id, resource&.id, resource&.class&.name, action_name, options)
      end
@@ -115,6 +115,10 @@ def cache_response?(options)
      options.fetch(:cache_response, false)
    end

+    def development_sync_execution?
+      Gitlab.dev_or_test_env? && Gitlab::Utils.to_boolean(ENV['LLM_DEVELOPMENT_SYNC_EXECUTION'])
+    end
+
    def emit_response?(options)
      return false if options[:internal_request]


--- a/ee/app/workers/llm/completion_worker.rb
+++ b/ee/app/workers/llm/completion_worker.rb
@@ -46,6 +46,8 @@ def perform(user_id, resource_id, resource_class, ai_action_name, options = {})
      response = ai_completion.execute(user, resource, options)
      update_error_rate(ai_action_name, response)
      update_duration_metric(ai_action_name, ::Gitlab::Metrics::System.monotonic_time - start_time)
+
+      response
    rescue StandardError => err
      update_error_rate(ai_action_name)


--- a/ee/spec/support/shared_examples/services/llm/async_service_shared_examples.rb
+++ b/ee/spec/support/shared_examples/services/llm/async_service_shared_examples.rb
 # frozen_string_literal: true

 RSpec.shared_examples 'completion worker sync and async' do
+  let(:resonse_double) { instance_double(Gitlab::Llm::BaseResponseModifier, response_body: "response") }
+  let(:expected_options) { options.merge(request_id: 'uuid') }
+
  before do
    allow(SecureRandom).to receive(:uuid).and_return('uuid')
  end

-  context 'when running synchronously' do
+  context 'when enabling sync execution via environment variables' do
    before do
-      options[:sync] = true
+      stub_env('LLM_DEVELOPMENT_SYNC_EXECUTION', true)
    end

-    it 'worker runs synchronously' do
-      expected_options = options.merge(request_id: 'uuid')
-
+    it 'runs worker synchronously' do
      expect_next_instance_of(Llm::CompletionWorker) do |worker|
        expect(worker).to receive(:perform).with(
          user.id, resource.id, resource.class.name, action_name, hash_including(**expected_options)
-        ).and_return({})
+        ).and_return(resonse_double)
      end

      expect(subject.execute).to be_success
@@ -25,13 +26,10 @@

  context 'when running asynchronously' do
    before do
-      options[:sync] = false
      allow(::Llm::CompletionWorker).to receive(:perform_async)
    end

    it 'worker runs asynchronously with correct params' do
-      expected_options = options.merge(request_id: 'uuid')
-
      expect(::Llm::CompletionWorker)
        .to receive(:perform_async)
        .with(user.id, resource.id, resource.class.name, action_name, hash_including(**expected_options))

--- a/ee/spec/workers/llm/completion_worker_spec.rb
+++ b/ee/spec/workers/llm/completion_worker_spec.rb
@@ -33,6 +33,7 @@
    shared_examples 'performs successfully' do
      it 'calls Gitlab::Llm::CompletionsFactory and tracks event', :aggregate_failures do
        completion = instance_double(Gitlab::Llm::Completions::SummarizeAllOpenNotes)
+        response_double = instance_double(Gitlab::Llm::BaseResponseModifier)
        extra_resource_finder = instance_double(::Llm::ExtraResourceFinder)

        expect(::Llm::ExtraResourceFinder).to receive(:new).with(user, referer_url).and_return(extra_resource_finder)
@@ -46,8 +47,9 @@
        expect(completion)
          .to receive(:execute)
          .with(user, resource, options.symbolize_keys.merge(extra_resource: extra_resource))
+          .and_return(response_double)

-        subject
+        expect(subject).to eq(response_double)

        expect_snowplow_event(
          category: described_class.to_s,