diff --git a/app/Search/SearchIndex.php b/app/Search/SearchIndex.php index 36f71f6cc..844e3584b 100644 --- a/app/Search/SearchIndex.php +++ b/app/Search/SearchIndex.php @@ -160,7 +160,9 @@ class SearchIndex /** @var DOMNode $child */ foreach ($doc->getBodyChildren() as $child) { $nodeName = $child->nodeName; - $termCounts = $this->textToTermCountMap(trim($child->textContent)); + $text = trim($child->textContent); + $text = str_replace("\u{00A0}", ' ', $text); + $termCounts = $this->textToTermCountMap($text); foreach ($termCounts as $term => $count) { $scoreChange = $count * ($elementScoreAdjustmentMap[$nodeName] ?? 1); $scoresByTerm[$term] = ($scoresByTerm[$term] ?? 0) + $scoreChange; diff --git a/tests/Search/SearchIndexingTest.php b/tests/Search/SearchIndexingTest.php index 64779dec6..d2bbb2905 100644 --- a/tests/Search/SearchIndexingTest.php +++ b/tests/Search/SearchIndexingTest.php @@ -106,4 +106,14 @@ class SearchIndexingTest extends TestCase $this->assertNull($scoreByTerm->get($term), "Failed asserting that \"$term\" is not indexed"); } } + + public function test_non_breaking_spaces_handled_as_spaces() + { + $page = $this->entities->newPage(['html' => '

a tigerbadger is a dangerous animal

']); + + $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); + $this->assertNotNull($scoreByTerm->get('tigerbadger')); + $this->assertNotNull($scoreByTerm->get('dangerous')); + $this->assertNotNull($scoreByTerm->get('animal')); + } }