原创 Elasticsearch应用之京东搜索|七日打卡

发布时间:2021-06-24 14:24:27 浏览 1047 来源:猿笔记 作者:后端码匠

    #贡献主题:##开发环境所有开发环境`全栈自学社区`公众号回复`电脑环境`关键字即可获取.##项目概况1##实现代码用于对应京东单个商品数据用于解析京东搜索的数据4分析京东搜索商品6367##项目运行-搜索前3-搜索后4文章已上传项目地址


    #主题列表:juejin,github,smartblue,cyanosis,channing-cyan,fancy,hydrogen,condensed-night-purple,greenwillow,v-green,vue-pro,healer-readable,mk-cute,jzman,geek-black,awesome-green,qklhk-chocolate

    #投稿主题:

    theme:qklhk-chocolate

    highlight:

    Sometimesallweneedisjustanewperspective.有时候我们只是需要新的角度

    ##Elasticsearch应用之京东搜索

    # #开发环境

    -springboot2.4.2

    -elasticsearch7.10.1

    -lombok

    -解析网页jsoup1.10.2

    -alibabafastjson1.2.73

    -jdk1.8

    -集成IDEidea

    -elasticsearch-head

    所有开发环境‘全栈自学社区’微信官方账号,回复关键词‘电脑环境’即可获得。

    # #项目概述

    ##pom.xml

    xml4.0.0cn.com.codingcecodingce-es0.0.1-SNAPSHOTcn.com.codingcecodingce-es-jd0.0.1-SNAPSHOTcodingce-es-jdDemoprojectforSpringBoot1.87.10.1org.springframework.bootspring-boot-starter-data-elasticsearchorg.springframework.bootspring-boot-starter-thymeleaforg.springframework.bootspring-boot-starter-weborg.springframework.bootspring-boot-devtoolsruntimetrueorg.springframework.bootspring-boot-configuration-processortrueorg.projectlomboklomboktrueorg.springframework.bootspring-boot-starter-testtestorg.jsoupjsoup1.10.2com.alibabafastjson1.2.73compileorg.springframework.bootspring-boot-maven-pluginorg.projectlomboklombok

    # #实现代码

    ###ElasticSearchConfig

    这里仅仅是单个Elasticsearch

    java/***ElasticSearch配置类*找到对象,放到Spring里面就可以用了**@authormxz*/@ConfigurationpublicclassElasticSearchConfig{@BeanpublicRestHighLevelClientrestHighLevelClient(){RestHighLevelClientclient=newRestHighLevelClient(RestClient.builder(newHttpHost("localhost",9200,"http")));//newHttpHost("localhost",9201,"http")));returnclient;}}

    ###实体类Content

    用于对应JD.COM的单一商品数据

    java/***实体类**@authormxz*/@Data@NoArgsConstructor@AllArgsConstructorpublicclassContent{privateStringtitle;privateStringimg;privateStringprice;}

    ###工具类HtmlParseUtil

    用于解析JD.COM搜索的数据

    java/***@authormxz*/@ComponentpublicclassHtmlParseUtil{publicListparseJD(Stringkeywords)throwsException{//获取请求https://search.jd.com/Searchkeyword=javaStringurl="https://search.jd.com/Searchkeyword="+keywords;//解析网页(返回Document就是浏览器Document对象)Documentdocument=Jsoup.parse(newURL(url),30000);//所有在js中可以使用的方法,这里都可以使用Elementelement=document.getElementById("J_goodsList");//System.out.println(element.html());ArrayListgoodList=newArrayList<>();//获取所有的li元素Elementselements=element.getElementsByTag("li");//获取元素中的内容for(Elementel:elements){//关于这种图片特别多的网站,所有的图片都是延迟加载的Stringimg=el.getElementsByTag("img").eq(0).attr("data-lazy-img");Stringprice=el.getElementsByClass("p-price").eq(0).text();Stringtitle=el.getElementsByClass("p-name").eq(0).text();Contentcontent=newContent(title,img,price);goodList.add(content);}returngoodList;}publicstaticvoidmain(String[]args)throwsIOException{//获取请求https://search.jd.com/Searchkeyword=javaStringurl="https://search.jd.com/Searchkeyword=java";//解析网页(返回Document就是浏览器Document对象)Documentdocument=Jsoup.parse(newURL(url),30000);//所有在js中可以使用的方法,这里都可以使用Elementelement=document.getElementById("J_goodsList");//System.out.println(element.html());//获取所有的li元素Elementselements=element.getElementsByTag("li");//获取元素中的内容for(Elementel:elements){//关于这种图片特别多的网站,所有的图片都是延迟加载的source-data-Lazy-imgStringimg=el.getElementsByTag("img").eq(0).attr("src");Stringprice=el.getElementsByClass("p-price").eq(0).text();Stringtitle=el.getElementsByClass("p-name").eq(0).text();System.out.println("=====================================");System.out.println(img);System.out.println(price);System.out.println(title);}}}分析JD.COM搜货

    ###业务逻辑层ContentService

    java/***业务逻辑层**@authormxz*/@ServicepublicclassContentService{publicstaticfinalStringES_INDEX="jd_goods";@Autowired@Qualifier("restHighLevelClient")privateRestHighLevelClientclient;/***1解析数据放入es中**@paramkeywords*@return*@throwsException*/publicBooleanparseContent(Stringkeywords)throwsException{Listcontents=newHtmlParseUtil().parseJD(keywords);//查询出来的数据放入到es中BulkRequestbulkRequest=newBulkRequest();bulkRequest.timeout(TimeValue.timeValueSeconds(2));for(inti=0;i>searchPage(Stringkeywords,intpageNo,intpageSize)throwsIOException{if(pageNo<=1){pageNo=1;}//条件搜索SearchRequestsearchRequest=newSearchRequest(ES_INDEX);SearchSourceBuildersearchSourceBuilder=newSearchSourceBuilder();//分页searchSourceBuilder.from(pageNo);searchSourceBuilder.size(pageSize);//精准匹配TermQueryBuildertermQuery=QueryBuilders.termQuery("title",keywords);searchSourceBuilder.query(termQuery);searchSourceBuilder.timeout(TimeValue.timeValueSeconds(60));//执行搜索searchRequest.source(searchSourceBuilder);//通过客户端查询SearchResponsesearchResponse=client.search(searchRequest,RequestOptions.DEFAULT);//解析结果List>list=newArrayList<>();for(SearchHitdocumentFields:searchResponse.getHits().getHits()){list.add(documentFields.getSourceAsMap());}returnlist;}/***3实现搜索功能高亮**@paramkeywords*@parampageNo*@parampageSize*@return*@throwsIOException*/publicList>searchPageHighlighter(Stringkeywords,intpageNo,intpageSize)throwsIOException{if(pageNo<=1){pageNo=1;}//条件搜索SearchRequestsearchRequest=newSearchRequest(ES_INDEX);SearchSourceBuildersearchSourceBuilder=newSearchSourceBuilder();//分页searchSourceBuilder.from(pageNo);searchSourceBuilder.size(pageSize);//精准匹配TermQueryBuildertermQuery=QueryBuilders.termQuery("title",keywords);searchSourceBuilder.query(termQuery);searchSourceBuilder.timeout(TimeValue.timeValueSeconds(60));//生成高亮查询器HighlightBuilderhighlightBuilder=newHighlightBuilder();//高亮查询字段highlightBuilder.field("title");//如果要多个字段高亮,这项要为falsehighlightBuilder.requireFieldMatch(false);//高亮设置highlightBuilder.preTags("");highlightBuilder.postTags("");//下面这两项,如果你要高亮如文字内容等有很多字的字段,必须配置,不然会导致高亮不全,文章内容缺失等//最大高亮分片数highlightBuilder.fragmentSize(800000);//从第一个分片获取高亮片段highlightBuilder.numOfFragments(0);searchSourceBuilder.highlighter(highlightBuilder);//执行搜索searchRequest.source(searchSourceBuilder);//通过客户端查询SearchResponsesearchResponse=client.search(searchRequest,RequestOptions.DEFAULT);//解析结果List>list=newArrayList<>();for(SearchHitdocumentFields:searchResponse.getHits().getHits()){//解析高亮字段MaphighlightFields=documentFields.getHighlightFields();HighlightFieldtitle=highlightFields.get("title");MapsourceAsMap=documentFields.getSourceAsMap();//原来的结果//解析高亮字段将原来的字段换为高亮字段//千万记得要记得判断是不是为空,不然你匹配的第一个结果没有高亮内容,那么就会报空指针异常,这个错误一开始真的搞了很久if(title!=null){Text[]fragments=title.fragments();StringnewTitle="";for(Textfragment:fragments){newTitle+=fragment;}sourceAsMap.put("title",newTitle);System.out.println(newTitle);}list.add(sourceAsMap);}returnlist;}}

    ###控制层RestController

    java@RestControllerpublicclassContentController{@AutowiredprivateContentServicecontentService;@GetMapping("/parse/{keywords}")publicBooleanparse(@PathVariable("keywords")Stringkeywords)throwsException{returncontentService.parseContent(keywords);}@GetMapping("/search/{keywords}/{pageNo}/{pageSize}")publicList>searchPage(@PathVariable("keywords")Stringkeywords,@PathVariable("pageNo")intpageNo,@PathVariable("pageSize")intpageSize)throwsIOException{//returncontentService.searchPage(keywords,pageNo,pageSize);//高亮returncontentService.searchPageHighlighter(keywords,pageNo,pageSize);}}

    ###前端页面myindex.html

    html全栈自学社区搜索全栈自学社区主页(current)友链控制面板
搜索

{{result.price}}

# #项目运行

-搜索前

-搜索后

文章已上传

项目地址:

作者信息

后端码匠 [等级:3] 后端研发
发布了 54 篇专栏 · 获得点赞 166 · 获得阅读 7822

相关推荐 更多

热门文章

扫描二维码,了解更多信息

猿笔记二维码