Define different service names per LLM client

`Gitlab::Llm::Concerns::CircuitBreaker` requires `service_name` to be defined. Before this change, we are only using a single `service_name` and that means all client will use a single circuit. If a single provider fails and the circuit opens, all providers will be affected. To prevent that, since we have different clients (e.g. OpenAI, Vertex, Anthropic), we define a specific service name per client. This also includes a fix to `ExponentialBackoff` concern to raise the correct exception to avoid a `NameError`.

Define different service names per LLM client
401d4dae · Patrick Bajao · 155c7561 · 401d4dae · 401d4dae · 401d4dae
Commit 401d4dae authored 1 year ago by Patrick Bajao
--- a/ee/lib/gitlab/llm/anthropic/client.rb
+++ b/ee/lib/gitlab/llm/anthropic/client.rb
@@ -55,6 +55,10 @@ def request_body(prompt:, options: {})
            temperature: DEFAULT_TEMPERATURE
          }.merge(options)
        end
+
+        def service_name
+          'anthropic'
+        end
      end
    end
  end

--- a/ee/lib/gitlab/llm/concerns/exponential_backoff.rb
+++ b/ee/lib/gitlab/llm/concerns/exponential_backoff.rb
@@ -51,7 +51,10 @@ def retry_with_exponential_backoff
            http_response = response.response
            return if http_response.nil? || http_response.body.blank?

-            raise InternalServerError if response.server_error? && Feature.enabled?(:circuit_breaker, type: :ops)
+            if response.server_error? && Feature.enabled?(:circuit_breaker, type: :ops)
+              raise Gitlab::Llm::Concerns::CircuitBreaker::InternalServerError
+            end
+
            return response unless response.too_many_requests?

            retries += 1
@@ -62,10 +65,6 @@ def retry_with_exponential_backoff
            next
          end
        end
-
-        def service_name
-          'open_ai'
-        end
      end
    end
  end

--- a/ee/lib/gitlab/llm/open_ai/client.rb
+++ b/ee/lib/gitlab/llm/open_ai/client.rb
@@ -198,6 +198,10 @@ def moderation_output(endpoint, parsed_response)
          end
        end
        # rubocop:enable CodeReuse/ActiveRecord
+
+        def service_name
+          'open_ai'
+        end
      end
    end
  end

--- a/ee/lib/gitlab/llm/vertex_ai/client.rb
+++ b/ee/lib/gitlab/llm/vertex_ai/client.rb
@@ -72,6 +72,10 @@ def request(content:, config:, **options)
            body: config.payload(content).merge(options).to_json
          )
        end
+
+        def service_name
+          'vertex_ai'
+        end
      end
    end
  end

--- a/ee/spec/lib/gitlab/llm/concerns/exponential_backoff_spec.rb
+++ b/ee/spec/lib/gitlab/llm/concerns/exponential_backoff_spec.rb
@@ -30,6 +30,13 @@
    instance_double(HTTParty::Response, response: nil)
  end

+  let(:server_error) do
+    instance_double(HTTParty::Response,
+      code: 503, success?: false, parsed_response: {},
+      response: response, server_error?: true, too_many_requests?: false
+    )
+  end
+
  let(:response_caller) { -> { success } }

  let(:dummy_class) do
@@ -40,6 +47,10 @@ def dummy_method(response_caller)

      include Gitlab::Llm::Concerns::ExponentialBackoff
      retry_methods_with_exponential_backoff :dummy_method
+
+      def service_name
+        'dummy'
+      end
    end
  end

@@ -126,5 +137,13 @@ def dummy_method(response_caller)
        expect(response_caller).to have_received(:call).once
      end
    end
+
+    context 'when the function response is a server error' do
+      it 'returns a nil response' do
+        allow(response_caller).to receive(:call).and_return(server_error)
+
+        expect(subject).to be_nil
+      end
+    end
  end
 end