<?php
namespace App\Controller\View\ElasticSearch;
use App\Application\Sonata\MediaBundle\Entity\Media;
use App\Entity\Magazine\Magazine;
use App\Entity\Magazine\MagazinePage;
use Elastica\Query;
use Elastica\Query\Nested;
use Elastica\Query\InnerHits;
use Elastica\Query\BoolQuery;
use Elastica\Query\Term;
use Elastica\Suggest;
use Elastica\Util;
use Sensio\Bundle\FrameworkExtraBundle\Configuration\Route;
use Symfony\Bundle\FrameworkBundle\Controller\Controller;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\HttpFoundation\JsonResponse;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\HttpFoundation\Session\SessionInterface;
use Symfony\Component\HttpKernel\Exception\BadRequestHttpException;
use Symfony\Component\Serializer\Encoder\JsonEncoder;
use Symfony\Component\Serializer\Normalizer\ObjectNormalizer;
use Symfony\Component\Serializer\Serializer;
use function Symfony\Component\String\u;
/**
* @Route("/view/search")
*/
class SearchController extends Controller
{
/**
* @Route("/suggest/{slug}", name="search_suggest", options={"expose"=true})
* @param Request $request
* @return JsonResponse
*/
public function getSuggestAction(Request $request, $slug): JsonResponse
{
if (!($text = $request->get('q'))) {
throw new BadRequestHttpException('Missing "q" parameter.');
}
$qmagazine = $this->getDoctrine()->getRepository(Magazine::class)->findOneBy(['slug'=>$slug, 'paid'=>false]);
if(!$qmagazine) {
return $this->json(['message' => 'magazine not found'], Response::HTTP_NOT_FOUND);
}
/* check if magazine has preprod version | deploy */
if ($qmagazine->getPreprod() === false) {
$qmagazine = $this->getDoctrine()->getRepository(Magazine::class)->findOneBy(['slug'=>'preprod-' . $slug, 'paid'=>false]);
}
/* redirect to preprod result if preprod exist */
$magazine = ($qmagazine) ?: $this->getDoctrine()->getRepository(Magazine::class)->findOneBy(['slug'=>$slug, 'paid'=>false]);
$completion = new Suggest\Completion('search', 'suggest');
$completion->setParam('context', ['id' => $magazine->getId()]);
$completion->setPrefix($text);
$completion->setFuzzy(array('fuzziness' => 1));
$completion->setSize(5);
$resultSet = $this->get('fos_elastica.index.suggest.magazines')->search(Query::create($completion));
//dump($resultSet->getSuggests());die;
$suggestions = array();
foreach ($resultSet->getSuggests() as $suggests) {
foreach ($suggests as $suggest) {
foreach ($suggest['options'] as $option) {
//dump($option['_source']['id']);die;
$suggestions[] = array(
// 'id' => $option['_source']['id'],
//'name' => $option['_source']['id'],
'keyword' => $option['text']
);
}
}
}
return new JsonResponse(array(
'suggestions' => $suggestions,
));
}
/**
* @Route("/{slug}", name="search_magazines", options={"expose"=true})
* @param Request $request
* @return JsonResponse
*/
public function getMagazinesAction(Request $request, $slug, SessionInterface $session): JsonResponse
{
if (!($text = $request->get('q'))) {
throw new BadRequestHttpException('Missing "q" parameter.');
}
$qmagazine = $this->getDoctrine()->getRepository(Magazine::class)->findOneBy(['slug'=>$slug, 'paid'=>false]);
if(!$qmagazine) {
return $this->json(['message' => 'magazine not found'], Response::HTTP_NOT_FOUND);
}
/* check if magazine has preprod version | deploy */
if ($qmagazine->getPreprod() === false) {
$qmagazine = $this->getDoctrine()->getRepository(Magazine::class)->findOneBy(['slug'=>'preprod-' . $slug, 'paid'=>false]);
}
/* redirect to preprod result if preprod exist */
$magazine = ($qmagazine) ?: $this->getDoctrine()->getRepository(Magazine::class)->findOneBy(['slug'=>$slug, 'paid'=>false]);
$bool = new BoolQuery();
$searchTermQuery = new Query\Term();
$searchTermQuery->setParam('id', $magazine->getId());
$bool->addMust($searchTermQuery);
$nested = new Nested();
$nested->setPath("pages");
//$nestedQuery = new Query();
$nestedBool = new BoolQuery();
$text = mb_strtolower($text);
$nestedSearchExactQuery = new Query\MatchPhrase();
$nestedSearchExactQuery->setFieldQuery('pages.content', $text);
$nestedSearchQuery = new Query\MatchQuery();
$nestedSearchQuery->setFieldQuery('pages.content', $text);
$nestedBool->addShould($nestedSearchExactQuery);
$nestedBool->addShould($nestedSearchQuery);
$wildQuery = new Query\Wildcard();
$wildQuery->setValue('pages.content', '*' . $text . '*');
//$nestedQuery->setQuery($wildQuery);
$nested->setQuery($nestedBool);
$innerhits = new InnerHits();
/* specific highlight settings for claudie-pierlot-automne-hiver-2021 | catalogue */
if ($magazine->getId() === 118) {
$innerhits->setHighlight([
"type" => "fvh",
"boundary_max_scan" => 200,
"fragment_size" => 18,
"boundary_scanner" => "chars",
"boundary_chars" => "-",
"pre_tags" => ["<strong>"],
"post_tags" => ["</strong>"],
"fields" => [
"pages.content" => new \stdClass()
]
]);
} else {
$innerhits->setHighlight([
// "fragment_size" => 0,
"pre_tags" => ["<strong>"],
"post_tags" => ["</strong>"],
"fields" => [
"pages.content" => new \stdClass()
]
]);
}
$innerhits->setSource(['pages.id']);
$innerhits->setSize(100);
$nested->setInnerHits($innerhits);
$bool->addMust($nested);
//$temp = new Suggest\Term("suggestions", "pages.content");
//$temp->setSize(1);
//$temp->setText($text);
/*$suggest = new Suggest();
$suggest->setParam('suggestions', ['text' => $text, 'term' => ['field' => 'pages.content']]);*/
$search = $this->get('fos_elastica.index.app.magazines')->createSearch();
//$search->setSuggest($suggest);
$query = new Query();
$query->setQuery($bool);
/*$query->setHighlight([
// "pre_tag" => ["<strong>"],
// "post_tag" => ["</strong>"],
"fields" => [
"pages.content" => new \stdClass()
]
]);*/
//$query->setSuggest($suggest);
// $queryObj = new Query\Filtered($queryObj, $nested);
$results = $search->search($bool);
if (!$results->getResults()) {
$bool = new BoolQuery();
$nested->setQuery($wildQuery);
$bool->addMust($searchTermQuery);
$bool->addMust($nested);
$search = $this->get('fos_elastica.index.app.magazines')->createSearch();
$results = $search->search($bool);
}
// dump($results->getResults());
// die;
$formatted = array();
//dump($results->getSuggests());die;
function getContents($stringarr, $startDelimiter, $endDelimiter) {
$contents = array();
foreach ($stringarr as $string) {
$startDelimiterLength = strlen($startDelimiter);
$endDelimiterLength = strlen($endDelimiter);
$startFrom = $contentStart = $contentEnd = 0;
while (false !== ($contentStart = strpos($string, $startDelimiter, $startFrom))) {
$contentStart += $startDelimiterLength;
$contentEnd = strpos($string, $endDelimiter, $contentStart);
if (false === $contentEnd) {
break;
}
$contents[] = mb_strtolower(substr($string, $contentStart, $contentEnd - $contentStart));
$startFrom = $contentEnd + $endDelimiterLength;
}
}
return $contents;
}
function startsWithNumber($str) {
return preg_match('/^\d\d\d/', $str) === 1;
}
$isClaudieProductQuery = false;
if (startsWithNumber($text)) {
$isClaudieProductQuery = true;
}
if ($results->getResults()) {
foreach ($results->getResults()[0]->getHit()['inner_hits']['pages']['hits']['hits'] as $hit) {
$isClaudieProduct = false;
$page = $this->getDoctrine()->getRepository(MagazinePage::class)->findOneBy(['id'=>$hit['_source']['id']]);
$crawler = new Crawler($page->getContent());
$hightlightTxt = $hit['highlight']['pages.content'];
$keywordTxt = array_values(array_unique(getContents($hightlightTxt, '<strong>', '</strong>')));
/* specific highlight settings for claudie-pierlot-automne-hiver-2021 | catalogue */
/*if ($magazine->getId() === 118) {
foreach ($keywordTxt as $txt) {
if (startsWithNumber($txt)) {
$isClaudieProduct = true;
break;
}
}
$hightlightTxtManual = [];
foreach ($keywordTxt as $txt) {
$hightlightTxtManual[] = array_values(array_filter($crawler->filter('p')->each(function (Crawler $node, $i) use ($txt) {
if (trim(mb_strtolower($node->text())) === $txt) {
return str_replace($txt, "<strong>".$txt."</strong>", mb_strtolower($this->cleanContent($node->closest('div')->text())));
}
})))[0];
}
// dump($hightlightTxtManual);
if ($isClaudieProduct) {
array_unshift($formatted, array(
'keywords' => $keywordTxt,
'page' => [
'name' => $page->getName(),
'pageno' => $page->getPageno(),
'pdfpage' => $page->getPdfpage(),
'position' => $page->getPosition(),
'thumbnail' => $page->getThumbnail()->getUrlMethod()
],
'highlight' => $hightlightTxtManual,
));
} else {
$formatted[] = array(
'keywords' => $keywordTxt,
'page' => [
'name' => $page->getName(),
'pageno' => $page->getPageno(),
'pdfpage' => $page->getPdfpage(),
'position' => $page->getPosition(),
'thumbnail' => $page->getThumbnail()->getUrlMethod()
],
'highlight' => array_values(array_filter($hightlightTxtManual)),
);
}
} else {
$formatted[] = array(
'keywords' => $keywordTxt,
'page' => [
'name' => $page->getName(),
'pageno' => $page->getPageno(),
'pdfpage' => $page->getPdfpage(),
'position' => $page->getPosition(),
'thumbnail' => $page->getThumbnail()->getUrlMethod()
],
'highlight' => $hightlightTxt,
);
}*/
if ($magazine->getId() === 118) {
foreach ($keywordTxt as $txt) {
if (startsWithNumber($txt)) {
$isClaudieProduct = true;
break;
}
}
if ($isClaudieProduct && !$isClaudieProductQuery) {
foreach ($hightlightTxt as $i => $TXT) {
$hightlightTxt[$i] = strstr($TXT, '<strong>');
}
array_unshift($formatted, array(
'keywords' => $keywordTxt,
'page' => [
'name' => $page->getName(),
'pageno' => $page->getPageno(),
'pdfpage' => $page->getPdfpage(),
'position' => $page->getPosition(),
'thumbnail' => $page->getThumbnail()->getUrlMethod()
],
'highlight' => $hightlightTxt,
));
} else {
$formatted[] = array(
'keywords' => $keywordTxt,
'page' => [
'name' => $page->getName(),
'pageno' => $page->getPageno(),
'pdfpage' => $page->getPdfpage(),
'position' => $page->getPosition(),
'thumbnail' => $page->getThumbnail()->getUrlMethod()
],
'highlight' => $hightlightTxt,
);
}
} else {
$formatted[] = array(
'keywords' => $keywordTxt,
'page' => [
'name' => $page->getName(),
'pageno' => $page->getPageno(),
/* str_replace pdfpage if needed | deploy */
'pdfpage' => $page->getPdfpage(),
'position' => $page->getPosition(),
'thumbnail' => $page->getThumbnail()->getUrlMethod()
],
'highlight' => $hightlightTxt,
);
}
}
}
$encoder = new JsonEncoder();
$normalizer = new ObjectNormalizer();
$normalizer->setIgnoredAttributes(['imageJson', 'subscriptions', 'startDate', 'endDate', 'download', 'publisher', 'toc', 'createdAt', 'updatedAt', 'pipeline', 'version', 'versionType', 'parent', 'opType', 'routing', 'fields', 'retryOnConflict', 'refresh', 'timeout', 'consistency', 'replication']);
$normalizer->setCallbacks([
'thumbnail' => $this->callbackImage(),
]);
$normalizer->setCircularReferenceHandler(function ($object) {
return $object->getId();
});
$serializer = new Serializer(array($normalizer), array($encoder));
//dump($results);die;
/* mb_convert_encoding($formatted, 'UTF-8', 'UTF-8') */
return new JsonResponse(array(
'q' => $text,
'results' => $serializer->normalize($formatted, 'json')
));
}
private function callbackImage()
{
$callback = function ($image) {
return $image instanceof Media
? $image->getUrlMethod()
: null;
};
return $callback;
}
private function cleanContent($content): string
{
$cleanContent = str_replace("\r", ' ', $content); // --- replace with space
$cleanContent = str_replace("\n", ' ', $cleanContent); // --- replace with space
$cleanContent = str_replace("\t", ' ', $cleanContent); // --- replace with space
return trim(preg_replace('/ {2,}/', ' ', $cleanContent));
}
}