mirror of
https://github.com/BookStackApp/BookStack.git
synced 2025-06-06 18:54:33 +08:00
Vectors: Added command to regenerate for all
Some checks failed
test-php / build (8.4) (push) Has been cancelled
analyse-php / build (push) Has been cancelled
lint-php / build (push) Has been cancelled
test-migrations / build (8.2) (push) Has been cancelled
test-migrations / build (8.3) (push) Has been cancelled
test-migrations / build (8.4) (push) Has been cancelled
test-php / build (8.2) (push) Has been cancelled
test-php / build (8.3) (push) Has been cancelled
Some checks failed
test-php / build (8.4) (push) Has been cancelled
analyse-php / build (push) Has been cancelled
lint-php / build (push) Has been cancelled
test-migrations / build (8.2) (push) Has been cancelled
test-migrations / build (8.3) (push) Has been cancelled
test-migrations / build (8.4) (push) Has been cancelled
test-php / build (8.2) (push) Has been cancelled
test-php / build (8.3) (push) Has been cancelled
Also made models configurable. Tested system scales via 86k vector entries.
This commit is contained in:
@ -30,6 +30,8 @@ return [
|
|||||||
'openai' => [
|
'openai' => [
|
||||||
'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'),
|
'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'),
|
||||||
'key' => env('OPENAI_KEY', ''),
|
'key' => env('OPENAI_KEY', ''),
|
||||||
|
'embedding_model' => env('OPENAI_EMBEDDING_MODEL', 'text-embedding-3-small'),
|
||||||
|
'query_model' => env('OPENAI_QUERY_MODEL', 'gpt-4o'),
|
||||||
],
|
],
|
||||||
|
|
||||||
'github' => [
|
'github' => [
|
||||||
|
46
app/Console/Commands/RegenerateVectorsCommand.php
Normal file
46
app/Console/Commands/RegenerateVectorsCommand.php
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace BookStack\Console\Commands;
|
||||||
|
|
||||||
|
use BookStack\Entities\EntityProvider;
|
||||||
|
use BookStack\Entities\Models\Entity;
|
||||||
|
use BookStack\Search\Vectors\SearchVector;
|
||||||
|
use BookStack\Search\Vectors\StoreEntityVectorsJob;
|
||||||
|
use Illuminate\Console\Command;
|
||||||
|
|
||||||
|
class RegenerateVectorsCommand extends Command
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* The name and signature of the console command.
|
||||||
|
*
|
||||||
|
* @var string
|
||||||
|
*/
|
||||||
|
protected $signature = 'bookstack:regenerate-vectors';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The console command description.
|
||||||
|
*
|
||||||
|
* @var string
|
||||||
|
*/
|
||||||
|
protected $description = 'Re-index vectors for all content in the system';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute the console command.
|
||||||
|
*/
|
||||||
|
public function handle(EntityProvider $entityProvider)
|
||||||
|
{
|
||||||
|
// TODO - Add confirmation before run regarding deletion/time/effort/api-cost etc...
|
||||||
|
SearchVector::query()->delete();
|
||||||
|
|
||||||
|
$types = $entityProvider->all();
|
||||||
|
foreach ($types as $type => $typeInstance) {
|
||||||
|
$this->info("Creating jobs to store vectors for {$type} data...");
|
||||||
|
/** @var Entity[] $entities */
|
||||||
|
$typeInstance->newQuery()->chunkById(100, function ($entities) {
|
||||||
|
foreach ($entities as $entity) {
|
||||||
|
dispatch(new StoreEntityVectorsJob($entity));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -6,17 +6,26 @@ use BookStack\Http\HttpRequestService;
|
|||||||
|
|
||||||
class OpenAiVectorQueryService implements VectorQueryService
|
class OpenAiVectorQueryService implements VectorQueryService
|
||||||
{
|
{
|
||||||
|
protected string $key;
|
||||||
|
protected string $endpoint;
|
||||||
|
protected string $embeddingModel;
|
||||||
|
protected string $queryModel;
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
protected string $endpoint,
|
protected array $options,
|
||||||
protected string $key,
|
|
||||||
protected HttpRequestService $http,
|
protected HttpRequestService $http,
|
||||||
) {
|
) {
|
||||||
|
// TODO - Some kind of validation of options
|
||||||
|
$this->key = $this->options['key'] ?? '';
|
||||||
|
$this->endpoint = $this->options['endpoint'] ?? '';
|
||||||
|
$this->embeddingModel = $this->options['embedding_model'] ?? '';
|
||||||
|
$this->queryModel = $this->options['query_model'] ?? '';
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function jsonRequest(string $method, string $uri, array $data): array
|
protected function jsonRequest(string $method, string $uri, array $data): array
|
||||||
{
|
{
|
||||||
$fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/');
|
$fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/');
|
||||||
$client = $this->http->buildClient(10);
|
$client = $this->http->buildClient(30);
|
||||||
$request = $this->http->jsonRequest($method, $fullUrl, $data)
|
$request = $this->http->jsonRequest($method, $fullUrl, $data)
|
||||||
->withHeader('Authorization', 'Bearer ' . $this->key);
|
->withHeader('Authorization', 'Bearer ' . $this->key);
|
||||||
|
|
||||||
@ -28,7 +37,7 @@ class OpenAiVectorQueryService implements VectorQueryService
|
|||||||
{
|
{
|
||||||
$response = $this->jsonRequest('POST', 'v1/embeddings', [
|
$response = $this->jsonRequest('POST', 'v1/embeddings', [
|
||||||
'input' => $text,
|
'input' => $text,
|
||||||
'model' => 'text-embedding-3-small',
|
'model' => $this->embeddingModel,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
return $response['data'][0]['embedding'];
|
return $response['data'][0]['embedding'];
|
||||||
@ -39,15 +48,15 @@ class OpenAiVectorQueryService implements VectorQueryService
|
|||||||
$formattedContext = implode("\n", $context);
|
$formattedContext = implode("\n", $context);
|
||||||
|
|
||||||
$response = $this->jsonRequest('POST', 'v1/chat/completions', [
|
$response = $this->jsonRequest('POST', 'v1/chat/completions', [
|
||||||
'model' => 'gpt-4o',
|
'model' => $this->queryModel,
|
||||||
'messages' => [
|
'messages' => [
|
||||||
[
|
[
|
||||||
'role' => 'developer',
|
'role' => 'developer',
|
||||||
'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response.'
|
'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response. Don\'t try to converse or continue the conversation.'
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
'role' => 'user',
|
'role' => 'user',
|
||||||
'content' => "Provide a response to the below given QUERY using the below given CONTEXT\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}",
|
'content' => "Provide a response to the below given QUERY using the below given CONTEXT. The CONTEXT is split into parts via lines. Ignore any nonsensical lines of CONTEXT.\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}",
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
]);
|
]);
|
||||||
|
@ -18,9 +18,7 @@ class VectorQueryServiceProvider
|
|||||||
$service = $this->getServiceName();
|
$service = $this->getServiceName();
|
||||||
|
|
||||||
if ($service === 'openai') {
|
if ($service === 'openai') {
|
||||||
$key = config('services.openai.key');
|
return new OpenAiVectorQueryService(config('services.openai'), $this->http);
|
||||||
$endpoint = config('services.openai.endpoint');
|
|
||||||
return new OpenAiVectorQueryService($endpoint, $key, $this->http);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new \Exception("No '{$service}' LLM service found");
|
throw new \Exception("No '{$service}' LLM service found");
|
||||||
|
@ -19,6 +19,7 @@ class VectorSearchRunner
|
|||||||
$topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id')
|
$topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id')
|
||||||
->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance')
|
->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance')
|
||||||
->orderBy('distance', 'asc')
|
->orderBy('distance', 'asc')
|
||||||
|
->having('distance', '<', 0.6)
|
||||||
->limit(10)
|
->limit(10)
|
||||||
->get();
|
->get();
|
||||||
|
|
||||||
|
@ -21,6 +21,8 @@ return new class extends Migration
|
|||||||
});
|
});
|
||||||
|
|
||||||
$table = DB::getTablePrefix() . 'search_vectors';
|
$table = DB::getTablePrefix() . 'search_vectors';
|
||||||
|
|
||||||
|
// TODO - Vector size might need to be dynamic
|
||||||
DB::statement("ALTER TABLE {$table} ADD COLUMN (embedding VECTOR(1536) NOT NULL)");
|
DB::statement("ALTER TABLE {$table} ADD COLUMN (embedding VECTOR(1536) NOT NULL)");
|
||||||
DB::statement("ALTER TABLE {$table} ADD VECTOR INDEX (embedding) DISTANCE=cosine");
|
DB::statement("ALTER TABLE {$table} ADD VECTOR INDEX (embedding) DISTANCE=cosine");
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user