mirror of
https://github.com/BookStackApp/BookStack.git
synced 2025-04-17 08:49:00 +08:00
Vectors: Added command to regenerate for all
Some checks failed
test-php / build (8.4) (push) Has been cancelled
analyse-php / build (push) Has been cancelled
lint-php / build (push) Has been cancelled
test-migrations / build (8.2) (push) Has been cancelled
test-migrations / build (8.3) (push) Has been cancelled
test-migrations / build (8.4) (push) Has been cancelled
test-php / build (8.2) (push) Has been cancelled
test-php / build (8.3) (push) Has been cancelled
Some checks failed
test-php / build (8.4) (push) Has been cancelled
analyse-php / build (push) Has been cancelled
lint-php / build (push) Has been cancelled
test-migrations / build (8.2) (push) Has been cancelled
test-migrations / build (8.3) (push) Has been cancelled
test-migrations / build (8.4) (push) Has been cancelled
test-php / build (8.2) (push) Has been cancelled
test-php / build (8.3) (push) Has been cancelled
Also made models configurable. Tested system scales via 86k vector entries.
This commit is contained in:
parent
0ffcb3d4aa
commit
a023bed41d
@ -30,6 +30,8 @@ return [
|
||||
'openai' => [
|
||||
'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'),
|
||||
'key' => env('OPENAI_KEY', ''),
|
||||
'embedding_model' => env('OPENAI_EMBEDDING_MODEL', 'text-embedding-3-small'),
|
||||
'query_model' => env('OPENAI_QUERY_MODEL', 'gpt-4o'),
|
||||
],
|
||||
|
||||
'github' => [
|
||||
|
46
app/Console/Commands/RegenerateVectorsCommand.php
Normal file
46
app/Console/Commands/RegenerateVectorsCommand.php
Normal file
@ -0,0 +1,46 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Console\Commands;
|
||||
|
||||
use BookStack\Entities\EntityProvider;
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use BookStack\Search\Vectors\SearchVector;
|
||||
use BookStack\Search\Vectors\StoreEntityVectorsJob;
|
||||
use Illuminate\Console\Command;
|
||||
|
||||
class RegenerateVectorsCommand extends Command
|
||||
{
|
||||
/**
|
||||
* The name and signature of the console command.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $signature = 'bookstack:regenerate-vectors';
|
||||
|
||||
/**
|
||||
* The console command description.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $description = 'Re-index vectors for all content in the system';
|
||||
|
||||
/**
|
||||
* Execute the console command.
|
||||
*/
|
||||
public function handle(EntityProvider $entityProvider)
|
||||
{
|
||||
// TODO - Add confirmation before run regarding deletion/time/effort/api-cost etc...
|
||||
SearchVector::query()->delete();
|
||||
|
||||
$types = $entityProvider->all();
|
||||
foreach ($types as $type => $typeInstance) {
|
||||
$this->info("Creating jobs to store vectors for {$type} data...");
|
||||
/** @var Entity[] $entities */
|
||||
$typeInstance->newQuery()->chunkById(100, function ($entities) {
|
||||
foreach ($entities as $entity) {
|
||||
dispatch(new StoreEntityVectorsJob($entity));
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
@ -6,17 +6,26 @@ use BookStack\Http\HttpRequestService;
|
||||
|
||||
class OpenAiVectorQueryService implements VectorQueryService
|
||||
{
|
||||
protected string $key;
|
||||
protected string $endpoint;
|
||||
protected string $embeddingModel;
|
||||
protected string $queryModel;
|
||||
|
||||
public function __construct(
|
||||
protected string $endpoint,
|
||||
protected string $key,
|
||||
protected array $options,
|
||||
protected HttpRequestService $http,
|
||||
) {
|
||||
// TODO - Some kind of validation of options
|
||||
$this->key = $this->options['key'] ?? '';
|
||||
$this->endpoint = $this->options['endpoint'] ?? '';
|
||||
$this->embeddingModel = $this->options['embedding_model'] ?? '';
|
||||
$this->queryModel = $this->options['query_model'] ?? '';
|
||||
}
|
||||
|
||||
protected function jsonRequest(string $method, string $uri, array $data): array
|
||||
{
|
||||
$fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/');
|
||||
$client = $this->http->buildClient(10);
|
||||
$client = $this->http->buildClient(30);
|
||||
$request = $this->http->jsonRequest($method, $fullUrl, $data)
|
||||
->withHeader('Authorization', 'Bearer ' . $this->key);
|
||||
|
||||
@ -28,7 +37,7 @@ class OpenAiVectorQueryService implements VectorQueryService
|
||||
{
|
||||
$response = $this->jsonRequest('POST', 'v1/embeddings', [
|
||||
'input' => $text,
|
||||
'model' => 'text-embedding-3-small',
|
||||
'model' => $this->embeddingModel,
|
||||
]);
|
||||
|
||||
return $response['data'][0]['embedding'];
|
||||
@ -39,15 +48,15 @@ class OpenAiVectorQueryService implements VectorQueryService
|
||||
$formattedContext = implode("\n", $context);
|
||||
|
||||
$response = $this->jsonRequest('POST', 'v1/chat/completions', [
|
||||
'model' => 'gpt-4o',
|
||||
'model' => $this->queryModel,
|
||||
'messages' => [
|
||||
[
|
||||
'role' => 'developer',
|
||||
'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response.'
|
||||
'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response. Don\'t try to converse or continue the conversation.'
|
||||
],
|
||||
[
|
||||
'role' => 'user',
|
||||
'content' => "Provide a response to the below given QUERY using the below given CONTEXT\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}",
|
||||
'content' => "Provide a response to the below given QUERY using the below given CONTEXT. The CONTEXT is split into parts via lines. Ignore any nonsensical lines of CONTEXT.\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}",
|
||||
]
|
||||
],
|
||||
]);
|
||||
|
@ -18,9 +18,7 @@ class VectorQueryServiceProvider
|
||||
$service = $this->getServiceName();
|
||||
|
||||
if ($service === 'openai') {
|
||||
$key = config('services.openai.key');
|
||||
$endpoint = config('services.openai.endpoint');
|
||||
return new OpenAiVectorQueryService($endpoint, $key, $this->http);
|
||||
return new OpenAiVectorQueryService(config('services.openai'), $this->http);
|
||||
}
|
||||
|
||||
throw new \Exception("No '{$service}' LLM service found");
|
||||
|
@ -19,6 +19,7 @@ class VectorSearchRunner
|
||||
$topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id')
|
||||
->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance')
|
||||
->orderBy('distance', 'asc')
|
||||
->having('distance', '<', 0.6)
|
||||
->limit(10)
|
||||
->get();
|
||||
|
||||
|
@ -21,6 +21,8 @@ return new class extends Migration
|
||||
});
|
||||
|
||||
$table = DB::getTablePrefix() . 'search_vectors';
|
||||
|
||||
// TODO - Vector size might need to be dynamic
|
||||
DB::statement("ALTER TABLE {$table} ADD COLUMN (embedding VECTOR(1536) NOT NULL)");
|
||||
DB::statement("ALTER TABLE {$table} ADD VECTOR INDEX (embedding) DISTANCE=cosine");
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user