Vectors: Added command to regenerate for all
Some checks failed
test-php / build (8.4) (push) Has been cancelled
analyse-php / build (push) Has been cancelled
lint-php / build (push) Has been cancelled
test-migrations / build (8.2) (push) Has been cancelled
test-migrations / build (8.3) (push) Has been cancelled
test-migrations / build (8.4) (push) Has been cancelled
test-php / build (8.2) (push) Has been cancelled
test-php / build (8.3) (push) Has been cancelled

Also made models configurable.
Tested system scales via 86k vector entries.
This commit is contained in:
Dan Brown 2025-03-25 19:38:32 +00:00
parent 0ffcb3d4aa
commit a023bed41d
No known key found for this signature in database
GPG Key ID: 46D9F943C24A2EF9
6 changed files with 68 additions and 10 deletions

View File

@ -30,6 +30,8 @@ return [
'openai' => [
'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'),
'key' => env('OPENAI_KEY', ''),
'embedding_model' => env('OPENAI_EMBEDDING_MODEL', 'text-embedding-3-small'),
'query_model' => env('OPENAI_QUERY_MODEL', 'gpt-4o'),
],
'github' => [

View File

@ -0,0 +1,46 @@
<?php
namespace BookStack\Console\Commands;
use BookStack\Entities\EntityProvider;
use BookStack\Entities\Models\Entity;
use BookStack\Search\Vectors\SearchVector;
use BookStack\Search\Vectors\StoreEntityVectorsJob;
use Illuminate\Console\Command;
class RegenerateVectorsCommand extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'bookstack:regenerate-vectors';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Re-index vectors for all content in the system';
/**
* Execute the console command.
*/
public function handle(EntityProvider $entityProvider)
{
// TODO - Add confirmation before run regarding deletion/time/effort/api-cost etc...
SearchVector::query()->delete();
$types = $entityProvider->all();
foreach ($types as $type => $typeInstance) {
$this->info("Creating jobs to store vectors for {$type} data...");
/** @var Entity[] $entities */
$typeInstance->newQuery()->chunkById(100, function ($entities) {
foreach ($entities as $entity) {
dispatch(new StoreEntityVectorsJob($entity));
}
});
}
}
}

View File

@ -6,17 +6,26 @@ use BookStack\Http\HttpRequestService;
class OpenAiVectorQueryService implements VectorQueryService
{
protected string $key;
protected string $endpoint;
protected string $embeddingModel;
protected string $queryModel;
public function __construct(
protected string $endpoint,
protected string $key,
protected array $options,
protected HttpRequestService $http,
) {
// TODO - Some kind of validation of options
$this->key = $this->options['key'] ?? '';
$this->endpoint = $this->options['endpoint'] ?? '';
$this->embeddingModel = $this->options['embedding_model'] ?? '';
$this->queryModel = $this->options['query_model'] ?? '';
}
protected function jsonRequest(string $method, string $uri, array $data): array
{
$fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/');
$client = $this->http->buildClient(10);
$client = $this->http->buildClient(30);
$request = $this->http->jsonRequest($method, $fullUrl, $data)
->withHeader('Authorization', 'Bearer ' . $this->key);
@ -28,7 +37,7 @@ class OpenAiVectorQueryService implements VectorQueryService
{
$response = $this->jsonRequest('POST', 'v1/embeddings', [
'input' => $text,
'model' => 'text-embedding-3-small',
'model' => $this->embeddingModel,
]);
return $response['data'][0]['embedding'];
@ -39,15 +48,15 @@ class OpenAiVectorQueryService implements VectorQueryService
$formattedContext = implode("\n", $context);
$response = $this->jsonRequest('POST', 'v1/chat/completions', [
'model' => 'gpt-4o',
'model' => $this->queryModel,
'messages' => [
[
'role' => 'developer',
'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response.'
'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response. Don\'t try to converse or continue the conversation.'
],
[
'role' => 'user',
'content' => "Provide a response to the below given QUERY using the below given CONTEXT\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}",
'content' => "Provide a response to the below given QUERY using the below given CONTEXT. The CONTEXT is split into parts via lines. Ignore any nonsensical lines of CONTEXT.\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}",
]
],
]);

View File

@ -18,9 +18,7 @@ class VectorQueryServiceProvider
$service = $this->getServiceName();
if ($service === 'openai') {
$key = config('services.openai.key');
$endpoint = config('services.openai.endpoint');
return new OpenAiVectorQueryService($endpoint, $key, $this->http);
return new OpenAiVectorQueryService(config('services.openai'), $this->http);
}
throw new \Exception("No '{$service}' LLM service found");

View File

@ -19,6 +19,7 @@ class VectorSearchRunner
$topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id')
->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance')
->orderBy('distance', 'asc')
->having('distance', '<', 0.6)
->limit(10)
->get();

View File

@ -21,6 +21,8 @@ return new class extends Migration
});
$table = DB::getTablePrefix() . 'search_vectors';
// TODO - Vector size might need to be dynamic
DB::statement("ALTER TABLE {$table} ADD COLUMN (embedding VECTOR(1536) NOT NULL)");
DB::statement("ALTER TABLE {$table} ADD VECTOR INDEX (embedding) DISTANCE=cosine");
}