From d8143b9121bbe941f116230eaa5524ab2cc12a66 Mon Sep 17 00:00:00 2001
From: huliguo <2023611923@qq.com>
Date: 星期四, 10 七月 2025 21:20:21 +0800
Subject: [PATCH] 新增

---
 src/main/java/com/linghu/controller/CollectController.java |  140 +++++++++++++++++++++++++++++++++-------------
 1 files changed, 100 insertions(+), 40 deletions(-)

diff --git a/src/main/java/com/linghu/controller/CollectController.java b/src/main/java/com/linghu/controller/CollectController.java
index 09acdce..bbd9f8e 100644
--- a/src/main/java/com/linghu/controller/CollectController.java
+++ b/src/main/java/com/linghu/controller/CollectController.java
@@ -47,6 +47,7 @@
 
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
+import java.util.stream.Stream;
 
 @RestController
 @RequestMapping("/collect")
@@ -206,10 +207,11 @@
 
        int maxConcurrentUsers = searchTaskRequest.getConfig() != null ?
                searchTaskRequest.getConfig().getMax_concurrent_users() : 3;
-       List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers);
 
        // 获取 keywordId
        Integer keywordId = searchTaskRequest.getKeyword_id();
+       //分割
+       List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers,keywordId);
 
        return Flux.fromIterable(userBatches)
                .flatMap(batch -> {
@@ -226,6 +228,7 @@
                }, 1) // 限制并发数为1,确保顺序执行
                .collectList() // 收集所有批次的响应
                .flatMap(responses ->
+
                        saveKeywordTasks(keywordId, responses) // 保存关联关系
                                .thenReturn(responses) // 返回原始响应
                );
@@ -238,6 +241,7 @@
                     KeywordTask keywordTask = new KeywordTask();
                     keywordTask.setKeyword_id(keywordId);
                     keywordTask.setTask_id(response.getTask_id());
+                    keywordTask.setStatus("pending");
                     return keywordTask;
                 })
                 .collect(Collectors.toList());
@@ -253,7 +257,15 @@
                 .then();
     }
 
-    private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize) {
+    private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize,Integer keywordId) {
+
+        Keyword keyword = keywordService.getById(keywordId);
+        if (null==keyword.getNum()){
+            keyword.setNum(0);
+        }
+        keyword.setNum(keyword.getNum()+1);
+        keywordService.updateById(keyword);
+
         List<List<UserDto>> batches = new ArrayList<>();
         for (int i = 0; i < users.size(); i += batchSize) {
             batches.add(users.subList(i, Math.min(i + batchSize, users.size())));
@@ -272,11 +284,12 @@
                 .bodyToMono(new ParameterizedTypeReference<SearchTaskResponse>() {})
                 .flatMap(taskResponse -> {
                     if (taskResponse != null && taskResponse.getTask_id() != null) {
+
                         // 使用 Reactor 的方式更新数据库
                         return Mono.fromRunnable(() -> {
                                     LambdaUpdateWrapper<Keyword> updateWrapper = new LambdaUpdateWrapper<>();
                                     updateWrapper.eq(Keyword::getKeyword_id, batchRequest.getKeyword_id());
-                                    updateWrapper.set(Keyword::getStatus, "Submitted");
+                                    updateWrapper.set(Keyword::getStatus, "submitted");
                                     updateWrapper.set(Keyword::getTask_id, taskResponse.getTask_id());
                                     keywordService.update(updateWrapper);
                                 }).subscribeOn(Schedulers.boundedElastic()) // 在弹性线程池执行
@@ -342,9 +355,9 @@
                 .map(data -> ResponseResult.success(data))
                 .onErrorResume(e -> {
                     if (e.getMessage().contains("任务不存在")) {
-                        return Mono.just(ResponseResult.error(404, "任务不存在"));
+                        return Mono.just(ResponseResult.error(200, "任务不存在"));
                     } else if (e.getMessage().contains("无法取消")) {
-                        return Mono.just(ResponseResult.error(400, "任务已完成,无法取消"));
+                        return Mono.just(ResponseResult.error(200, "任务已完成,无法取消"));
                     }
                     return Mono.just(ResponseResult.error(500, "取消任务失败: " + e.getMessage()));
                 });
@@ -453,20 +466,12 @@
     private Mono<Void> updateQuestionAndReference(TaskResultResponse result) {
         return Mono.fromRunnable(() -> {
             try {
-                // 1. 更新关键词状态
-                LambdaUpdateWrapper<Keyword> keywordUpdate = new LambdaUpdateWrapper<>();
-                keywordUpdate.eq(Keyword::getTask_id, result.getTask_id())
-                        .set(Keyword::getStatus, "completed");
-                keywordService.update(keywordUpdate);
-
+                // 1. 根据KeywordTask更新关键词状态
                 // 查询关键词ID
-
                 LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>();
                 keywordTaskWrapper.eq(KeywordTask::getTask_id, result.getTask_id());
                 KeywordTask keywordTask = keywordTaskService.getOne(keywordTaskWrapper);
 
-//                LambdaQueryWrapper<Keyword> keywordQuery = new LambdaQueryWrapper<>();
-//                keywordQuery.eq(Keyword::getTask_id, keywordTask.getTask_id());
                 Keyword keyword = keywordService.getById(keywordTask.getKeyword_id());
 
                 if (keyword == null) {
@@ -475,6 +480,8 @@
                     throw new Exception("未找到关联的关键词,task_id: " + result.getTask_id());
 //                    return;
                 }
+                keyword.setStatus("completed");
+                keywordService.updateById(keyword);
 
                 // 2. 批量查询所有问题
                 LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>();
@@ -488,7 +495,7 @@
                 // 3. 收集所有需要更新的问题和引用
                 List<Question> questionsToUpdate = new ArrayList<>();
                 List<Reference> allReferences = new ArrayList<>();
-
+                List<Reference> resultList = new ArrayList<>();
                 // 遍历结果
                 for (UserResult userResult : result.getResults()) {
                     for (QuestionResult questionResult : userResult.getQuestions_results()) {
@@ -514,17 +521,6 @@
 
                                 questionsToUpdate.add(question);
 
-                                //如果查询结果不为空查询num
-                                Integer maxNumByKeywordId = referenceService.getMaxNumByKeywordId(keyword.getKeyword_id());
-                               if (maxNumByKeywordId != null){
-                                   maxNumByKeywordId++;
-                               }else {
-                                   maxNumByKeywordId = 1;
-                               }
-
-
-                                // 收集引用数据,处理空集合情况
-                                Integer finalMaxNumByKeywordId = maxNumByKeywordId;
                                 List<Reference> references =
                                         Optional.ofNullable(questionResult.getReferences())
                                                 .orElse(Collections.emptyList())
@@ -535,28 +531,38 @@
                                                     reference.setTitle(ref.getTitle());
                                                     reference.setUrl(ref.getUrl());
                                                     reference.setDomain(ref.getDomain());
-                                                    reference.setNum(finalMaxNumByKeywordId);
+                                                    reference.setNum(keyword.getNum());
+                                                    reference.setTask_id(result.getTask_id());
+                                                    reference.setKeyword_id(keyword.getKeyword_id());
                                                     //域名和平台id映射
                                                     reference.setCreate_time(LocalDateTime.now());
                                                     Platform platform = platformService.getPlatformByDomain(reference.getDomain());
-//                                                    if (platform == null) {
-//                                                        throw new RuntimeException("未找到对应的平台: " + reference.getDomain());
-//                                                    }
-                                                    if (platform != null){
+                                                    if (platform == null) {
+                                                        //平台为空 创建平台 类型为“默认”
+                                                        Type type = typeService.getOne(new LambdaQueryWrapper<Type>().eq(Type::getType_name,"默认"));
+                                                        if (type == null) {
+                                                            Type newType = new Type();
+                                                            newType.setType_name("默认");
+                                                            typeService.save(newType);
+                                                            type = newType;
+                                                        }
+                                                        Platform platform1 = new Platform();
+                                                        platform1.setDomain(reference.getDomain());
+                                                        platform1.setPlatform_name(reference.getDomain());
+                                                        platform1.setType_id(type.getType_id());
+                                                        platformService.save(platform1);
+
+                                                        reference.setType_id(type.getType_id());
+                                                        reference.setPlatform_id(platform1.getPlatform_id());
+
+                                                    }
+                                                    else {
                                                         reference.setPlatform_id(platform.getPlatform_id());
                                                         Type type = typeService.getById(platform.getType_id());
-//                                                    if (type == null) {
-//                                                        throw new RuntimeException("未找到对应的类型: " + reference.getDomain());
-//                                                    }
                                                         if (type != null){
                                                             reference.setType_id(type.getType_id());
                                                         }
                                                     }
-
-
-                                                    // 根据 domain 查询类型
-
-
                                                     return reference;
                                                 })
                                                 .collect(Collectors.toList());
@@ -565,6 +571,53 @@
                                 if (!references.isEmpty()) {
                                     allReferences.addAll(references);
                                 }
+
+                                //取数据库中当前关键词的当前轮次的当前问题id结果拿出来
+                                List<Reference> dbList = referenceService.list(new LambdaQueryWrapper<Reference>().eq(Reference::getKeyword_id, keyword.getKeyword_id())
+                                        .eq(Reference::getNum, keyword.getNum())
+                                        .eq(Reference::getQuestion_id, question.getQuestion_id())
+                                );
+
+                                // 1. 合并两个列表
+                                List<Reference> combinedList = new ArrayList<>();
+                                combinedList.addAll(allReferences);
+                                combinedList.addAll(dbList);
+
+                                // 2. 创建复合键的Map,用于统计完全匹配的记录
+                                Map<String, List<Reference>> compositeKeyMap = combinedList.stream()
+                                        .collect(Collectors.groupingBy(
+                                                ref -> ref.getTitle() + "|" + ref.getUrl() + "|" + ref.getDomain()
+                                        ));
+
+                                // 3. 处理每组重复记录
+
+                                compositeKeyMap.forEach((key, refGroup) -> {
+                                    // 3.1 找出组内有ID的记录(优先从dbList中获取)
+                                    Optional<Reference> existingRecord = refGroup.stream()
+                                            .filter(ref -> ref.getReference_id() != null)
+                                            .findFirst();
+
+                                    // 3.2 统计该组的重复次数(总数-1)
+                                    int repetitionCount = refGroup.size() - 1;
+
+                                    // 3.3 决定最终保留的记录
+                                    Reference recordToSave;
+                                    if (existingRecord.isPresent()) {
+                                        // 使用已有ID的记录并更新重复次数
+                                        recordToSave = existingRecord.get();
+                                        recordToSave.setRepetition_num(
+                                                (recordToSave.getRepetition_num() == null ? 0 : recordToSave.getRepetition_num())
+                                                        + repetitionCount
+                                        );
+                                    } else {
+                                        // 没有ID记录则取第一条并设置重复次数
+                                        recordToSave = refGroup.get(0);
+                                        recordToSave.setRepetition_num(repetitionCount);
+                                    }
+
+                                    resultList.add(recordToSave);
+                                });
+                                referenceService.saveOrUpdateBatch(resultList);
                             }
                         } catch (Exception e) {
                             log.error(e.getMessage(), e);
@@ -579,7 +632,7 @@
                     questionService.updateBatchById(questionsToUpdate);
                     System.out.println("成功批量更新 " + questionsToUpdate.size() + " 个问题");
                 }
-                referenceService.saveBatch(allReferences);
+
                 // 5. 批量插入引用,使用流式分批处理
 //                if (!allReferences.isEmpty()) {
 //                    int batchSize = 1000;
@@ -621,6 +674,7 @@
     }
 
     @GetMapping("/health")
+    @ApiOperation("健康检查")
     public Mono<HealthResponse> checkThirdPartyHealth() {
         return webClient.get()
                 .uri(baseUrl + "/health") // 假设第三方健康检查接口路径为/health
@@ -634,6 +688,7 @@
      * 查询服务器资源
      */
     @GetMapping("/server/resource")
+    @ApiOperation(value = "查询服务器资源")
     public Mono<ServerResourceResponse> getServerResource() {
         return webClient.get()
                 .uri(baseUrl + "/api/v1/system/resources")
@@ -642,4 +697,9 @@
                 .onErrorResume(e -> Mono.just(
                         new ServerResourceResponse( e.getMessage())));
     }
+    /**
+     * 传入orderid查所有关键词id以及关键词下面的所有任务id,轮询所有任务状态,如果状态为completed,则循环调用获取结果接口,处理结果
+     */
+
+
 }

--
Gitblit v1.7.1