mirror of
https://github.com/BookStackApp/BookStack.git
synced 2025-04-17 08:49:00 +08:00
Merge a023bed41d0219d08d7dbce52948e3e5c3528381 into fa566f156ad8998d67a63be2856dafc7ce277d88
This commit is contained in:
commit
8d6577c094
@ -22,6 +22,18 @@ return [
|
||||
// Callback URL for social authentication methods
|
||||
'callback_url' => env('APP_URL', false),
|
||||
|
||||
// LLM Service
|
||||
// Options: openai
|
||||
'llm' => env('LLM_SERVICE', ''),
|
||||
|
||||
// OpenAI API-compatible service details
|
||||
'openai' => [
|
||||
'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'),
|
||||
'key' => env('OPENAI_KEY', ''),
|
||||
'embedding_model' => env('OPENAI_EMBEDDING_MODEL', 'text-embedding-3-small'),
|
||||
'query_model' => env('OPENAI_QUERY_MODEL', 'gpt-4o'),
|
||||
],
|
||||
|
||||
'github' => [
|
||||
'client_id' => env('GITHUB_APP_ID', false),
|
||||
'client_secret' => env('GITHUB_APP_SECRET', false),
|
||||
|
46
app/Console/Commands/RegenerateVectorsCommand.php
Normal file
46
app/Console/Commands/RegenerateVectorsCommand.php
Normal file
@ -0,0 +1,46 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Console\Commands;
|
||||
|
||||
use BookStack\Entities\EntityProvider;
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use BookStack\Search\Vectors\SearchVector;
|
||||
use BookStack\Search\Vectors\StoreEntityVectorsJob;
|
||||
use Illuminate\Console\Command;
|
||||
|
||||
class RegenerateVectorsCommand extends Command
|
||||
{
|
||||
/**
|
||||
* The name and signature of the console command.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $signature = 'bookstack:regenerate-vectors';
|
||||
|
||||
/**
|
||||
* The console command description.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $description = 'Re-index vectors for all content in the system';
|
||||
|
||||
/**
|
||||
* Execute the console command.
|
||||
*/
|
||||
public function handle(EntityProvider $entityProvider)
|
||||
{
|
||||
// TODO - Add confirmation before run regarding deletion/time/effort/api-cost etc...
|
||||
SearchVector::query()->delete();
|
||||
|
||||
$types = $entityProvider->all();
|
||||
foreach ($types as $type => $typeInstance) {
|
||||
$this->info("Creating jobs to store vectors for {$type} data...");
|
||||
/** @var Entity[] $entities */
|
||||
$typeInstance->newQuery()->chunkById(100, function ($entities) {
|
||||
foreach ($entities as $entity) {
|
||||
dispatch(new StoreEntityVectorsJob($entity));
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
@ -6,6 +6,7 @@ use BookStack\Entities\Queries\PageQueries;
|
||||
use BookStack\Entities\Queries\QueryPopular;
|
||||
use BookStack\Entities\Tools\SiblingFetcher;
|
||||
use BookStack\Http\Controller;
|
||||
use BookStack\Search\Vectors\VectorSearchRunner;
|
||||
use Illuminate\Http\Request;
|
||||
|
||||
class SearchController extends Controller
|
||||
@ -139,4 +140,19 @@ class SearchController extends Controller
|
||||
|
||||
return view('entities.list-basic', ['entities' => $entities, 'style' => 'compact']);
|
||||
}
|
||||
|
||||
public function searchQuery(Request $request, VectorSearchRunner $runner)
|
||||
{
|
||||
$query = $request->get('query', '');
|
||||
|
||||
if ($query) {
|
||||
$results = $runner->run($query);
|
||||
} else {
|
||||
$results = null;
|
||||
}
|
||||
|
||||
return view('search.query', [
|
||||
'results' => $results,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
@ -6,6 +6,8 @@ use BookStack\Activity\Models\Tag;
|
||||
use BookStack\Entities\EntityProvider;
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use BookStack\Entities\Models\Page;
|
||||
use BookStack\Search\Vectors\StoreEntityVectorsJob;
|
||||
use BookStack\Search\Vectors\VectorQueryServiceProvider;
|
||||
use BookStack\Util\HtmlDocument;
|
||||
use DOMNode;
|
||||
use Illuminate\Database\Eloquent\Builder;
|
||||
@ -25,7 +27,7 @@ class SearchIndex
|
||||
public static string $softDelimiters = ".-";
|
||||
|
||||
public function __construct(
|
||||
protected EntityProvider $entityProvider
|
||||
protected EntityProvider $entityProvider,
|
||||
) {
|
||||
}
|
||||
|
||||
@ -37,6 +39,10 @@ class SearchIndex
|
||||
$this->deleteEntityTerms($entity);
|
||||
$terms = $this->entityToTermDataArray($entity);
|
||||
$this->insertTerms($terms);
|
||||
|
||||
if (VectorQueryServiceProvider::isEnabled()) {
|
||||
dispatch(new StoreEntityVectorsJob($entity));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -47,9 +53,15 @@ class SearchIndex
|
||||
public function indexEntities(array $entities): void
|
||||
{
|
||||
$terms = [];
|
||||
$vectorQueryEnabled = VectorQueryServiceProvider::isEnabled();
|
||||
|
||||
foreach ($entities as $entity) {
|
||||
$entityTerms = $this->entityToTermDataArray($entity);
|
||||
array_push($terms, ...$entityTerms);
|
||||
|
||||
if ($vectorQueryEnabled) {
|
||||
dispatch(new StoreEntityVectorsJob($entity));
|
||||
}
|
||||
}
|
||||
|
||||
$this->insertTerms($terms);
|
||||
|
84
app/Search/Vectors/EntityVectorGenerator.php
Normal file
84
app/Search/Vectors/EntityVectorGenerator.php
Normal file
@ -0,0 +1,84 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use BookStack\Search\Vectors\Services\VectorQueryService;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
|
||||
class EntityVectorGenerator
|
||||
{
|
||||
public function __construct(
|
||||
protected VectorQueryServiceProvider $vectorQueryServiceProvider
|
||||
) {
|
||||
}
|
||||
|
||||
public function generateAndStore(Entity $entity): void
|
||||
{
|
||||
$vectorService = $this->vectorQueryServiceProvider->get();
|
||||
|
||||
$text = $this->entityToPlainText($entity);
|
||||
$chunks = $this->chunkText($text);
|
||||
$embeddings = $this->chunksToEmbeddings($chunks, $vectorService);
|
||||
|
||||
$this->deleteExistingEmbeddingsForEntity($entity);
|
||||
$this->storeEmbeddings($embeddings, $chunks, $entity);
|
||||
}
|
||||
|
||||
protected function deleteExistingEmbeddingsForEntity(Entity $entity): void
|
||||
{
|
||||
SearchVector::query()
|
||||
->where('entity_type', '=', $entity->getMorphClass())
|
||||
->where('entity_id', '=', $entity->id)
|
||||
->delete();
|
||||
}
|
||||
|
||||
protected function storeEmbeddings(array $embeddings, array $textChunks, Entity $entity): void
|
||||
{
|
||||
$toInsert = [];
|
||||
|
||||
foreach ($embeddings as $index => $embedding) {
|
||||
$text = $textChunks[$index];
|
||||
$toInsert[] = [
|
||||
'entity_id' => $entity->id,
|
||||
'entity_type' => $entity->getMorphClass(),
|
||||
'embedding' => DB::raw('VEC_FROMTEXT("[' . implode(',', $embedding) . ']")'),
|
||||
'text' => $text,
|
||||
];
|
||||
}
|
||||
|
||||
// TODO - Chunk inserts
|
||||
SearchVector::query()->insert($toInsert);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $chunks
|
||||
* @return float[] array
|
||||
*/
|
||||
protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQueryService): array
|
||||
{
|
||||
$embeddings = [];
|
||||
foreach ($chunks as $index => $chunk) {
|
||||
$embeddings[$index] = $vectorQueryService->generateEmbeddings($chunk);
|
||||
}
|
||||
return $embeddings;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
protected function chunkText(string $text): array
|
||||
{
|
||||
// TODO - Join adjacent smaller chunks up
|
||||
return array_filter(array_map(function (string $section): string {
|
||||
return trim($section);
|
||||
}, explode("\n", $text)));
|
||||
}
|
||||
|
||||
protected function entityToPlainText(Entity $entity): string
|
||||
{
|
||||
$text = $entity->name . "\n\n" . $entity->{$entity->textField};
|
||||
// TODO - Add tags
|
||||
return $text;
|
||||
}
|
||||
}
|
16
app/Search/Vectors/SearchVector.php
Normal file
16
app/Search/Vectors/SearchVector.php
Normal file
@ -0,0 +1,16 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
|
||||
/**
|
||||
* @property string $entity_type
|
||||
* @property int $entity_id
|
||||
* @property string $text
|
||||
* @property string $embedding
|
||||
*/
|
||||
class SearchVector extends Model
|
||||
{
|
||||
public $timestamps = false;
|
||||
}
|
66
app/Search/Vectors/Services/OpenAiVectorQueryService.php
Normal file
66
app/Search/Vectors/Services/OpenAiVectorQueryService.php
Normal file
@ -0,0 +1,66 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors\Services;
|
||||
|
||||
use BookStack\Http\HttpRequestService;
|
||||
|
||||
class OpenAiVectorQueryService implements VectorQueryService
|
||||
{
|
||||
protected string $key;
|
||||
protected string $endpoint;
|
||||
protected string $embeddingModel;
|
||||
protected string $queryModel;
|
||||
|
||||
public function __construct(
|
||||
protected array $options,
|
||||
protected HttpRequestService $http,
|
||||
) {
|
||||
// TODO - Some kind of validation of options
|
||||
$this->key = $this->options['key'] ?? '';
|
||||
$this->endpoint = $this->options['endpoint'] ?? '';
|
||||
$this->embeddingModel = $this->options['embedding_model'] ?? '';
|
||||
$this->queryModel = $this->options['query_model'] ?? '';
|
||||
}
|
||||
|
||||
protected function jsonRequest(string $method, string $uri, array $data): array
|
||||
{
|
||||
$fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/');
|
||||
$client = $this->http->buildClient(30);
|
||||
$request = $this->http->jsonRequest($method, $fullUrl, $data)
|
||||
->withHeader('Authorization', 'Bearer ' . $this->key);
|
||||
|
||||
$response = $client->sendRequest($request);
|
||||
return json_decode($response->getBody()->getContents(), true);
|
||||
}
|
||||
|
||||
public function generateEmbeddings(string $text): array
|
||||
{
|
||||
$response = $this->jsonRequest('POST', 'v1/embeddings', [
|
||||
'input' => $text,
|
||||
'model' => $this->embeddingModel,
|
||||
]);
|
||||
|
||||
return $response['data'][0]['embedding'];
|
||||
}
|
||||
|
||||
public function query(string $input, array $context): string
|
||||
{
|
||||
$formattedContext = implode("\n", $context);
|
||||
|
||||
$response = $this->jsonRequest('POST', 'v1/chat/completions', [
|
||||
'model' => $this->queryModel,
|
||||
'messages' => [
|
||||
[
|
||||
'role' => 'developer',
|
||||
'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response. Don\'t try to converse or continue the conversation.'
|
||||
],
|
||||
[
|
||||
'role' => 'user',
|
||||
'content' => "Provide a response to the below given QUERY using the below given CONTEXT. The CONTEXT is split into parts via lines. Ignore any nonsensical lines of CONTEXT.\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}",
|
||||
]
|
||||
],
|
||||
]);
|
||||
|
||||
return $response['choices'][0]['message']['content'] ?? '';
|
||||
}
|
||||
}
|
21
app/Search/Vectors/Services/VectorQueryService.php
Normal file
21
app/Search/Vectors/Services/VectorQueryService.php
Normal file
@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors\Services;
|
||||
|
||||
interface VectorQueryService
|
||||
{
|
||||
/**
|
||||
* Generate embedding vectors from the given chunk of text.
|
||||
* @return float[]
|
||||
*/
|
||||
public function generateEmbeddings(string $text): array;
|
||||
|
||||
/**
|
||||
* Query the LLM service using the given user input, and
|
||||
* relevant context text retrieved locally via a vector search.
|
||||
* Returns the response output text from the LLM.
|
||||
*
|
||||
* @param string[] $context
|
||||
*/
|
||||
public function query(string $input, array $context): string;
|
||||
}
|
28
app/Search/Vectors/StoreEntityVectorsJob.php
Normal file
28
app/Search/Vectors/StoreEntityVectorsJob.php
Normal file
@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Foundation\Queue\Queueable;
|
||||
|
||||
class StoreEntityVectorsJob implements ShouldQueue
|
||||
{
|
||||
use Queueable;
|
||||
|
||||
/**
|
||||
* Create a new job instance.
|
||||
*/
|
||||
public function __construct(
|
||||
protected Entity $entity
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the job.
|
||||
*/
|
||||
public function handle(EntityVectorGenerator $generator): void
|
||||
{
|
||||
$generator->generateAndStore($this->entity);
|
||||
}
|
||||
}
|
36
app/Search/Vectors/VectorQueryServiceProvider.php
Normal file
36
app/Search/Vectors/VectorQueryServiceProvider.php
Normal file
@ -0,0 +1,36 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
|
||||
use BookStack\Http\HttpRequestService;
|
||||
use BookStack\Search\Vectors\Services\OpenAiVectorQueryService;
|
||||
use BookStack\Search\Vectors\Services\VectorQueryService;
|
||||
|
||||
class VectorQueryServiceProvider
|
||||
{
|
||||
public function __construct(
|
||||
protected HttpRequestService $http,
|
||||
) {
|
||||
}
|
||||
|
||||
public function get(): VectorQueryService
|
||||
{
|
||||
$service = $this->getServiceName();
|
||||
|
||||
if ($service === 'openai') {
|
||||
return new OpenAiVectorQueryService(config('services.openai'), $this->http);
|
||||
}
|
||||
|
||||
throw new \Exception("No '{$service}' LLM service found");
|
||||
}
|
||||
|
||||
protected static function getServiceName(): string
|
||||
{
|
||||
return strtolower(config('services.llm'));
|
||||
}
|
||||
|
||||
public static function isEnabled(): bool
|
||||
{
|
||||
return !empty(static::getServiceName());
|
||||
}
|
||||
}
|
34
app/Search/Vectors/VectorSearchRunner.php
Normal file
34
app/Search/Vectors/VectorSearchRunner.php
Normal file
@ -0,0 +1,34 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
|
||||
class VectorSearchRunner
|
||||
{
|
||||
public function __construct(
|
||||
protected VectorQueryServiceProvider $vectorQueryServiceProvider
|
||||
) {
|
||||
}
|
||||
|
||||
public function run(string $query): array
|
||||
{
|
||||
$queryService = $this->vectorQueryServiceProvider->get();
|
||||
$queryVector = $queryService->generateEmbeddings($query);
|
||||
|
||||
// TODO - Apply permissions
|
||||
// TODO - Join models
|
||||
$topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id')
|
||||
->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance')
|
||||
->orderBy('distance', 'asc')
|
||||
->having('distance', '<', 0.6)
|
||||
->limit(10)
|
||||
->get();
|
||||
|
||||
$matchesText = array_values(array_map(fn (SearchVector $match) => $match->text, $topMatches->all()));
|
||||
$llmResult = $queryService->query($query, $matchesText);
|
||||
|
||||
return [
|
||||
'llm_result' => $llmResult,
|
||||
'entity_matches' => $topMatches->toArray()
|
||||
];
|
||||
}
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
<?php
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
/**
|
||||
* Run the migrations.
|
||||
*/
|
||||
public function up(): void
|
||||
{
|
||||
// TODO - Handle compatibility with older databases that don't support vectors
|
||||
Schema::create('search_vectors', function (Blueprint $table) {
|
||||
$table->string('entity_type', 100);
|
||||
$table->integer('entity_id');
|
||||
$table->text('text');
|
||||
|
||||
$table->index(['entity_type', 'entity_id']);
|
||||
});
|
||||
|
||||
$table = DB::getTablePrefix() . 'search_vectors';
|
||||
|
||||
// TODO - Vector size might need to be dynamic
|
||||
DB::statement("ALTER TABLE {$table} ADD COLUMN (embedding VECTOR(1536) NOT NULL)");
|
||||
DB::statement("ALTER TABLE {$table} ADD VECTOR INDEX (embedding) DISTANCE=cosine");
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse the migrations.
|
||||
*/
|
||||
public function down(): void
|
||||
{
|
||||
Schema::dropIfExists('search_vectors');
|
||||
}
|
||||
};
|
29
resources/views/search/query.blade.php
Normal file
29
resources/views/search/query.blade.php
Normal file
@ -0,0 +1,29 @@
|
||||
@extends('layouts.simple')
|
||||
|
||||
@section('body')
|
||||
<div class="container mt-xl" id="search-system">
|
||||
|
||||
<form action="{{ url('/search/query') }}" method="get">
|
||||
<input name="query" type="text">
|
||||
<button class="button">Query</button>
|
||||
</form>
|
||||
|
||||
@if($results)
|
||||
<h2>Results</h2>
|
||||
|
||||
<h3>LLM Output</h3>
|
||||
<p>{{ $results['llm_result'] }}</p>
|
||||
|
||||
<h3>Entity Matches</h3>
|
||||
@foreach($results['entity_matches'] as $match)
|
||||
<div>
|
||||
<div><strong>{{ $match['entity_type'] }}:{{ $match['entity_id'] }}; Distance: {{ $match['distance'] }}</strong></div>
|
||||
<details>
|
||||
<summary>match text</summary>
|
||||
<div>{{ $match['text'] }}</div>
|
||||
</details>
|
||||
</div>
|
||||
@endforeach
|
||||
@endif
|
||||
</div>
|
||||
@stop
|
@ -187,6 +187,7 @@ Route::middleware('auth')->group(function () {
|
||||
|
||||
// Search
|
||||
Route::get('/search', [SearchController::class, 'search']);
|
||||
Route::get('/search/query', [SearchController::class, 'searchQuery']);
|
||||
Route::get('/search/book/{bookId}', [SearchController::class, 'searchBook']);
|
||||
Route::get('/search/chapter/{bookId}', [SearchController::class, 'searchChapter']);
|
||||
Route::get('/search/entity/siblings', [SearchController::class, 'searchSiblings']);
|
||||
|
Loading…
x
Reference in New Issue
Block a user