Bug: status code 500 after introducing a better prompt length estimation
After introducing the logic to better estimate the prompt length in #167 (closed), we often receive status code 500 with the following exception message:
Traceback (most recent call last):
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/anyio/streams/memory.py", line 98, in receive
return self.receive_nowait()
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/anyio/streams/memory.py", line 93, in receive_nowait
raise WouldBlock
anyio.WouldBlock
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/starlette/middleware/base.py", line 43, in call_next
message = await recv_stream.receive()
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/anyio/streams/memory.py", line 118, in receive
raise EndOfStream
anyio.EndOfStream
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/app/codesuggestions/api/middleware.py", line 103, in dispatch
response = await call_next(request)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/starlette/middleware/base.py", line 46, in call_next
raise app_exc
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/starlette/middleware/base.py", line 36, in coro
await self.app(scope, request.receive, send_stream.send)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/starlette/middleware/authentication.py", line 48, in __call__
await self.app(scope, receive, send)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/starlette/middleware/base.py", line 68, in __call__
response = await self.dispatch_func(request, call_next)
File "/app/codesuggestions/api/middleware.py", line 253, in dispatch
return await call_next(request)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/starlette/middleware/base.py", line 46, in call_next
raise app_exc
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/starlette/middleware/base.py", line 36, in coro
await self.app(scope, request.receive, send_stream.send)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/starlette/exceptions.py", line 93, in __call__
raise exc
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/starlette/exceptions.py", line 82, in __call__
await self.app(scope, receive, sender)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/fastapi/middleware/asyncexitstack.py", line 21, in __call__
raise e
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
await self.app(scope, receive, send)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/starlette/routing.py", line 670, in __call__
await route.handle(scope, receive, send)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/starlette/routing.py", line 266, in handle
await self.app(scope, receive, send)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/starlette/routing.py", line 65, in app
response = await func(request)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/fastapi/routing.py", line 231, in app
raw_response = await run_endpoint_function(
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/fastapi/routing.py", line 160, in run_endpoint_function
return await dependant.call(**values)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/dependency_injector/wiring.py", line 994, in _patched
return await _async_inject(
File "src/dependency_injector/_cwiring.pyx", line 66, in _async_inject
File "/app/codesuggestions/api/v2/endpoints/suggestions.py", line 81, in completions
suggestion = await run_in_threadpool(
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/starlette/concurrency.py", line 41, in run_in_threadpool
return await anyio.to_thread.run_sync(func, *args)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/anyio/to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 807, in run
result = context.run(func, *args)
File "/app/codesuggestions/api/timing.py", line 13, in wrap
result = f(*args, **kwargs)
File "/app/codesuggestions/api/v2/endpoints/suggestions.py", line 104, in get_suggestions
return usecase(
File "/app/codesuggestions/suggestions/base.py", line 112, in __call__
return self.engine.generate_completion(prefix, suffix, file_name)
File "/app/codesuggestions/suggestions/processing/engine.py", line 152, in generate_completion
prompt, suffix = self._build_prompt(prefix, suffix, lang_id)
File "/app/codesuggestions/suggestions/processing/engine.py", line 159, in _build_prompt
imports = self._get_imports(prefix, lang_id)
File "/app/codesuggestions/suggestions/processing/engine.py", line 180, in _get_imports
imports_tokenized = self.tokenizer(
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/transformers/tokenization_utils_base.py", line 2561, in __call__
encodings = self._call_one(text=text, text_pair=text_pair, **all_kwargs)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/transformers/tokenization_utils_base.py", line 2647, in _call_one
return self.batch_encode_plus(
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/transformers/tokenization_utils_base.py", line 2838, in batch_encode_plus
return self._batch_encode_plus(
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/transformers/models/gpt2/tokenization_gpt2_fast.py", line 168, in _batch_encode_plus
return super()._batch_encode_plus(*args, **kwargs)
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/transformers/tokenization_utils_fast.py", line 417, in _batch_encode_plus
self.set_truncation_and_padding(
File "/opt/venv/codesuggestions-9TtSrW0h-py3.9/lib/python3.9/site-packages/transformers/tokenization_utils_fast.py", line 353, in set_truncation_and_padding
self._tokenizer.no_truncation()
RuntimeError: Already borrowed
To solve this error, we need to create a copy of the tokenizer for each thread since truncation is not thread-safe according to https://github.com/huggingface/tokenizers/issues/537.
Edited by Alexander Chueshev