src/Controller/View/ElasticSearch/SearchController.php line 91

Open in your IDE?
  1. <?php
  2. namespace App\Controller\View\ElasticSearch;
  3. use App\Application\Sonata\MediaBundle\Entity\Media;
  4. use App\Entity\Magazine\Magazine;
  5. use App\Entity\Magazine\MagazinePage;
  6. use Elastica\Query;
  7. use Elastica\Query\Nested;
  8. use Elastica\Query\InnerHits;
  9. use Elastica\Query\BoolQuery;
  10. use Elastica\Query\Term;
  11. use Elastica\Suggest;
  12. use Elastica\Util;
  13. use Sensio\Bundle\FrameworkExtraBundle\Configuration\Route;
  14. use Symfony\Bundle\FrameworkBundle\Controller\Controller;
  15. use Symfony\Component\DomCrawler\Crawler;
  16. use Symfony\Component\HttpFoundation\JsonResponse;
  17. use Symfony\Component\HttpFoundation\Request;
  18. use Symfony\Component\HttpFoundation\Response;
  19. use Symfony\Component\HttpFoundation\Session\SessionInterface;
  20. use Symfony\Component\HttpKernel\Exception\BadRequestHttpException;
  21. use Symfony\Component\Serializer\Encoder\JsonEncoder;
  22. use Symfony\Component\Serializer\Normalizer\ObjectNormalizer;
  23. use Symfony\Component\Serializer\Serializer;
  24. use function Symfony\Component\String\u;
  25. /**
  26.  * @Route("/view/search")
  27.  */
  28. class SearchController extends Controller
  29. {
  30.     /**
  31.      * @Route("/suggest/{slug}", name="search_suggest", options={"expose"=true})
  32.      * @param Request $request
  33.      * @return JsonResponse
  34.      */
  35.     public function getSuggestAction(Request $request$slug): JsonResponse
  36.     {
  37.         if (!($text $request->get('q'))) {
  38.             throw new BadRequestHttpException('Missing "q" parameter.');
  39.         }
  40.         $qmagazine $this->getDoctrine()->getRepository(Magazine::class)->findOneBy(['slug'=>$slug'paid'=>false]);
  41.         if(!$qmagazine) {
  42.             return $this->json(['message' => 'magazine not found'], Response::HTTP_NOT_FOUND);
  43.         }
  44.         /* check if magazine has preprod version | deploy */
  45.         if ($qmagazine->getPreprod() === false) {
  46.             $qmagazine $this->getDoctrine()->getRepository(Magazine::class)->findOneBy(['slug'=>'preprod-' $slug'paid'=>false]);
  47.         }
  48.         /* redirect to preprod result if preprod exist */
  49.         $magazine = ($qmagazine) ?: $this->getDoctrine()->getRepository(Magazine::class)->findOneBy(['slug'=>$slug'paid'=>false]);
  50.         $completion = new Suggest\Completion('search''suggest');
  51.         $completion->setParam('context', ['id' => $magazine->getId()]);
  52.         $completion->setPrefix($text);
  53.         $completion->setFuzzy(array('fuzziness' => 1));
  54.         $completion->setSize(5);
  55.         $resultSet $this->get('fos_elastica.index.suggest.magazines')->search(Query::create($completion));
  56.         //dump($resultSet->getSuggests());die;
  57.         $suggestions = array();
  58.         foreach ($resultSet->getSuggests() as $suggests) {
  59.             foreach ($suggests as $suggest) {
  60.                 foreach ($suggest['options'] as $option) {
  61.                     //dump($option['_source']['id']);die;
  62.                     $suggestions[] = array(
  63.                         // 'id' => $option['_source']['id'],
  64.                         //'name' => $option['_source']['id'],
  65.                         'keyword' => $option['text']
  66.                     );
  67.                 }
  68.             }
  69.         }
  70.         return new JsonResponse(array(
  71.             'suggestions' => $suggestions,
  72.         ));
  73.     }
  74.     /**
  75.      * @Route("/{slug}", name="search_magazines", options={"expose"=true})
  76.      * @param Request $request
  77.      * @return JsonResponse
  78.      */
  79.     public function getMagazinesAction(Request $request$slugSessionInterface $session): JsonResponse
  80.     {
  81.         if (!($text $request->get('q'))) {
  82.             throw new BadRequestHttpException('Missing "q" parameter.');
  83.         }
  84.         $qmagazine $this->getDoctrine()->getRepository(Magazine::class)->findOneBy(['slug'=>$slug'paid'=>false]);
  85.         if(!$qmagazine) {
  86.             return $this->json(['message' => 'magazine not found'], Response::HTTP_NOT_FOUND);
  87.         }
  88.         /* check if magazine has preprod version | deploy */
  89.         if ($qmagazine->getPreprod() === false) {
  90.             $qmagazine $this->getDoctrine()->getRepository(Magazine::class)->findOneBy(['slug'=>'preprod-' $slug'paid'=>false]);
  91.         }
  92.         /* redirect to preprod result if preprod exist */
  93.         $magazine = ($qmagazine) ?: $this->getDoctrine()->getRepository(Magazine::class)->findOneBy(['slug'=>$slug'paid'=>false]);
  94.         $bool = new BoolQuery();
  95.         $searchTermQuery = new Query\Term();
  96.         $searchTermQuery->setParam('id'$magazine->getId());
  97.         $bool->addMust($searchTermQuery);
  98.         $nested = new Nested();
  99.         $nested->setPath("pages");
  100.         //$nestedQuery = new Query();
  101.         $nestedBool = new BoolQuery();
  102.         $text mb_strtolower($text);
  103.         $nestedSearchExactQuery = new Query\MatchPhrase();
  104.         $nestedSearchExactQuery->setFieldQuery('pages.content'$text);
  105.         $nestedSearchQuery = new Query\MatchQuery();
  106.         $nestedSearchQuery->setFieldQuery('pages.content'$text);
  107.         $nestedBool->addShould($nestedSearchExactQuery);
  108.         $nestedBool->addShould($nestedSearchQuery);
  109.         $wildQuery = new Query\Wildcard();
  110.         $wildQuery->setValue('pages.content''*' $text '*');
  111.         //$nestedQuery->setQuery($wildQuery);
  112.         $nested->setQuery($nestedBool);
  113.         $innerhits = new InnerHits();
  114.         /* specific highlight settings for claudie-pierlot-automne-hiver-2021 | catalogue */
  115.         if ($magazine->getId() === 118) {
  116.             $innerhits->setHighlight([
  117.                 "type" => "fvh",
  118.                 "boundary_max_scan" => 200,
  119.                 "fragment_size" => 18,
  120.                 "boundary_scanner" => "chars",
  121.                 "boundary_chars" => "-",
  122.                 "pre_tags" => ["<strong>"],
  123.                 "post_tags" => ["</strong>"],
  124.                 "fields" => [
  125.                     "pages.content" => new \stdClass()
  126.                 ]
  127.             ]);
  128.         } else {
  129.             $innerhits->setHighlight([
  130.                 // "fragment_size" => 0,
  131.                 "pre_tags" => ["<strong>"],
  132.                 "post_tags" => ["</strong>"],
  133.                 "fields" => [
  134.                     "pages.content" => new \stdClass()
  135.                 ]
  136.             ]);
  137.         }
  138.         $innerhits->setSource(['pages.id']);
  139.         $innerhits->setSize(100);
  140.         $nested->setInnerHits($innerhits);
  141.         $bool->addMust($nested);
  142.         //$temp = new Suggest\Term("suggestions", "pages.content");
  143.         //$temp->setSize(1);
  144.         //$temp->setText($text);
  145.         /*$suggest = new Suggest();
  146.         $suggest->setParam('suggestions', ['text' => $text, 'term' => ['field' => 'pages.content']]);*/
  147.         $search $this->get('fos_elastica.index.app.magazines')->createSearch();
  148.         //$search->setSuggest($suggest);
  149.         $query = new Query();
  150.         $query->setQuery($bool);
  151.         /*$query->setHighlight([
  152. //            "pre_tag" => ["<strong>"],
  153. //            "post_tag" => ["</strong>"],
  154.             "fields" => [
  155.                 "pages.content" => new \stdClass()
  156.             ]
  157.         ]);*/
  158.         //$query->setSuggest($suggest);
  159.         // $queryObj = new Query\Filtered($queryObj, $nested);
  160.         $results $search->search($bool);
  161.         if (!$results->getResults()) {
  162.             $bool = new BoolQuery();
  163.             $nested->setQuery($wildQuery);
  164.             $bool->addMust($searchTermQuery);
  165.             $bool->addMust($nested);
  166.             $search $this->get('fos_elastica.index.app.magazines')->createSearch();
  167.             $results $search->search($bool);
  168.         }
  169. //        dump($results->getResults());
  170. //        die;
  171.         $formatted = array();
  172.         //dump($results->getSuggests());die;
  173.         function getContents($stringarr$startDelimiter$endDelimiter) {
  174.             $contents = array();
  175.             foreach ($stringarr as $string) {
  176.                 $startDelimiterLength strlen($startDelimiter);
  177.                 $endDelimiterLength strlen($endDelimiter);
  178.                 $startFrom $contentStart $contentEnd 0;
  179.                 while (false !== ($contentStart strpos($string$startDelimiter$startFrom))) {
  180.                     $contentStart += $startDelimiterLength;
  181.                     $contentEnd strpos($string$endDelimiter$contentStart);
  182.                     if (false === $contentEnd) {
  183.                         break;
  184.                     }
  185.                     $contents[] = mb_strtolower(substr($string$contentStart$contentEnd $contentStart));
  186.                     $startFrom $contentEnd $endDelimiterLength;
  187.                 }
  188.             }
  189.             return $contents;
  190.         }
  191.         function startsWithNumber($str) {
  192.             return preg_match('/^\d\d\d/'$str) === 1;
  193.         }
  194.         $isClaudieProductQuery false;
  195.         if (startsWithNumber($text)) {
  196.             $isClaudieProductQuery true;
  197.         }
  198.         if ($results->getResults()) {
  199.             foreach ($results->getResults()[0]->getHit()['inner_hits']['pages']['hits']['hits'] as $hit) {
  200.                 $isClaudieProduct false;
  201.                 $page $this->getDoctrine()->getRepository(MagazinePage::class)->findOneBy(['id'=>$hit['_source']['id']]);
  202.                 $crawler = new Crawler($page->getContent());
  203.                 $hightlightTxt $hit['highlight']['pages.content'];
  204.                 $keywordTxt array_values(array_unique(getContents($hightlightTxt'<strong>''</strong>')));
  205.                 /* specific highlight settings for claudie-pierlot-automne-hiver-2021 | catalogue */
  206.                 /*if ($magazine->getId() === 118) {
  207.                     foreach ($keywordTxt as $txt) {
  208.                         if (startsWithNumber($txt)) {
  209.                             $isClaudieProduct = true;
  210.                             break;
  211.                         }
  212.                     }
  213.                     $hightlightTxtManual = [];
  214.                     foreach ($keywordTxt as $txt) {
  215.                         $hightlightTxtManual[] = array_values(array_filter($crawler->filter('p')->each(function (Crawler $node, $i) use ($txt) {
  216.                             if (trim(mb_strtolower($node->text())) === $txt) {
  217.                                 return str_replace($txt, "<strong>".$txt."</strong>", mb_strtolower($this->cleanContent($node->closest('div')->text())));
  218.                             }
  219.                         })))[0];
  220.                     }
  221.                     // dump($hightlightTxtManual);
  222.                     if ($isClaudieProduct) {
  223.                         array_unshift($formatted, array(
  224.                             'keywords' => $keywordTxt,
  225.                             'page' => [
  226.                                 'name' => $page->getName(),
  227.                                 'pageno' => $page->getPageno(),
  228.                                 'pdfpage' => $page->getPdfpage(),
  229.                                 'position' => $page->getPosition(),
  230.                                 'thumbnail' => $page->getThumbnail()->getUrlMethod()
  231.                             ],
  232.                             'highlight' => $hightlightTxtManual,
  233.                         ));
  234.                     } else {
  235.                         $formatted[] = array(
  236.                             'keywords' => $keywordTxt,
  237.                             'page' => [
  238.                                 'name' => $page->getName(),
  239.                                 'pageno' => $page->getPageno(),
  240.                                 'pdfpage' => $page->getPdfpage(),
  241.                                 'position' => $page->getPosition(),
  242.                                 'thumbnail' => $page->getThumbnail()->getUrlMethod()
  243.                             ],
  244.                             'highlight' => array_values(array_filter($hightlightTxtManual)),
  245.                         );
  246.                     }
  247.                 } else {
  248.                     $formatted[] = array(
  249.                         'keywords' => $keywordTxt,
  250.                         'page' => [
  251.                             'name' => $page->getName(),
  252.                             'pageno' => $page->getPageno(),
  253.                             'pdfpage' => $page->getPdfpage(),
  254.                             'position' => $page->getPosition(),
  255.                             'thumbnail' => $page->getThumbnail()->getUrlMethod()
  256.                         ],
  257.                         'highlight' => $hightlightTxt,
  258.                     );
  259.                 }*/
  260.                 if ($magazine->getId() === 118) {
  261.                     foreach ($keywordTxt as $txt) {
  262.                         if (startsWithNumber($txt)) {
  263.                             $isClaudieProduct true;
  264.                             break;
  265.                         }
  266.                     }
  267.                     if ($isClaudieProduct && !$isClaudieProductQuery) {
  268.                         foreach ($hightlightTxt as $i => $TXT) {
  269.                             $hightlightTxt[$i] = strstr($TXT'<strong>');
  270.                         }
  271.                         array_unshift($formatted, array(
  272.                             'keywords' => $keywordTxt,
  273.                             'page' => [
  274.                                 'name' => $page->getName(),
  275.                                 'pageno' => $page->getPageno(),
  276.                                 'pdfpage' => $page->getPdfpage(),
  277.                                 'position' => $page->getPosition(),
  278.                                 'thumbnail' => $page->getThumbnail()->getUrlMethod()
  279.                             ],
  280.                             'highlight' => $hightlightTxt,
  281.                         ));
  282.                     } else {
  283.                         $formatted[] = array(
  284.                             'keywords' => $keywordTxt,
  285.                             'page' => [
  286.                                 'name' => $page->getName(),
  287.                                 'pageno' => $page->getPageno(),
  288.                                 'pdfpage' => $page->getPdfpage(),
  289.                                 'position' => $page->getPosition(),
  290.                                 'thumbnail' => $page->getThumbnail()->getUrlMethod()
  291.                             ],
  292.                             'highlight' => $hightlightTxt,
  293.                         );
  294.                     }
  295.                 } else {
  296.                     $formatted[] = array(
  297.                         'keywords' => $keywordTxt,
  298.                         'page' => [
  299.                             'name' => $page->getName(),
  300.                             'pageno' => $page->getPageno(),
  301.                             /* str_replace pdfpage if needed | deploy */
  302.                             'pdfpage' => $page->getPdfpage(),
  303.                             'position' => $page->getPosition(),
  304.                             'thumbnail' => $page->getThumbnail()->getUrlMethod()
  305.                         ],
  306.                         'highlight' => $hightlightTxt,
  307.                     );
  308.                 }
  309.             }
  310.         }
  311.         $encoder = new JsonEncoder();
  312.         $normalizer = new ObjectNormalizer();
  313.         $normalizer->setIgnoredAttributes(['imageJson''subscriptions''startDate''endDate''download''publisher''toc''createdAt''updatedAt''pipeline''version''versionType''parent''opType''routing''fields''retryOnConflict''refresh''timeout''consistency''replication']);
  314.         $normalizer->setCallbacks([
  315.             'thumbnail' => $this->callbackImage(),
  316.         ]);
  317.         $normalizer->setCircularReferenceHandler(function ($object) {
  318.             return $object->getId();
  319.         });
  320.         $serializer = new Serializer(array($normalizer), array($encoder));
  321.         //dump($results);die;
  322.         /* mb_convert_encoding($formatted, 'UTF-8', 'UTF-8') */
  323.         return new JsonResponse(array(
  324.             'q' => $text,
  325.             'results' => $serializer->normalize($formatted'json')
  326.         ));
  327.     }
  328.     private function callbackImage()
  329.     {
  330.         $callback = function ($image) {
  331.             return $image instanceof Media
  332.                 $image->getUrlMethod()
  333.                 : null;
  334.         };
  335.         return $callback;
  336.     }
  337.     private function cleanContent($content): string
  338.     {
  339.         $cleanContent str_replace("\r"' '$content);    // --- replace with space
  340.         $cleanContent str_replace("\n"' '$cleanContent);   // --- replace with space
  341.         $cleanContent str_replace("\t"' '$cleanContent);   // --- replace with space
  342.         return trim(preg_replace('/ {2,}/'' '$cleanContent));
  343.     }
  344. }