guyue
2 天以前 d6011fd37ef2ff794d8efa93932bdf98d8f76dda
src/main/java/com/linghu/controller/CollectController.java
@@ -6,26 +6,18 @@
import java.util.*;
import java.util.stream.Collectors;
import javax.annotation.Resource;
import javax.servlet.http.HttpServletRequest;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.linghu.mapper.PlatformMapper;
import com.linghu.mapper.TypeMapper;
import com.linghu.model.dto.*;
import com.linghu.model.entity.*;
import com.linghu.service.*;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.ParameterizedTypeReference;
import org.springframework.http.*;
import org.springframework.web.client.RestTemplate;
import org.springframework.web.reactive.function.client.WebClient;
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
@@ -37,7 +29,6 @@
import io.swagger.annotations.ApiOperation;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.publisher.SignalType;
import org.springframework.web.bind.annotation.* ;
import org.springframework.http.HttpStatus;
@@ -47,6 +38,7 @@
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
@RestController
@RequestMapping("/collect")
@@ -198,38 +190,77 @@
                .next() // 找到第一个完成的响应后结束流
                .then(); // 转换为Mono<Void>
    }*/
   // 添加一个辅助方法来安全地将字符串转换为double
   private double parseUsage(String usageStr) {
       try {
           if (usageStr != null) {
               // 移除可能存在的百分号
               usageStr = usageStr.replace("%", "").trim();
               return Double.parseDouble(usageStr);
           }
           return 0.0;
       } catch (NumberFormatException e) {
           log.error("解析资源使用率失败: {}", e.getMessage());
           return 0.0;
       }
   }
   @PostMapping("/search")
   @ApiOperation(value = "开始采集")
   public Mono<List<SearchTaskResponse>> createSearchTask(
   public Mono<ResponseResult<?>> createSearchTask(
           @RequestBody SearchTaskRequest searchTaskRequest,
           HttpServletRequest request) throws JsonProcessingException {
       int maxConcurrentUsers = searchTaskRequest.getConfig() != null ?
               searchTaskRequest.getConfig().getMax_concurrent_users() : 3;
       List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers);
       // 首先检查服务器资源
       return getServerResource()
               .flatMap(resourceResponse -> {
                   // 将字符串类型的使用率转换为double类型
                   double cpuUsage = parseUsage(resourceResponse.getCpu_usage_percent());
                   double memoryUsage = parseUsage(resourceResponse.getMemory_usage_percent());
                   // 检查CPU和内存使用率
                   if (cpuUsage >= 90.0 || memoryUsage >= 90.0) {
                       String errorMsg = String.format("服务器资源不足:CPU使用率 %.1f%%,内存使用率 %.1f%%",
                               resourceResponse.getCpu_usage_percent(), resourceResponse.getMemory_usage_percent());
                       log.warn(errorMsg);
                       return Mono.just(ResponseResult.error(503, errorMsg));
                   }
       // 获取 keywordId
       Integer keywordId = searchTaskRequest.getKeyword_id();
                   int maxConcurrentUsers = searchTaskRequest.getConfig() != null ?
                           searchTaskRequest.getConfig().getMax_concurrent_users() : 3;
                   List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers);
       return Flux.fromIterable(userBatches)
               .flatMap(batch -> {
                   SearchTaskRequest batchRequest = new SearchTaskRequest();
                   batchRequest.setUsers(batch);
                   batchRequest.setQuestions(searchTaskRequest.getQuestions());
                   batchRequest.setConfig(searchTaskRequest.getConfig());
                   batchRequest.setSave_to_database(searchTaskRequest.getSave_to_database());
                   batchRequest.setWebhook_url(searchTaskRequest.getWebhook_url());
                   batchRequest.setKeyword_id(keywordId);
                   // 获取 keywordId
                   Integer keywordId = searchTaskRequest.getKeyword_id();
                   //分割
                   return createSingleBatchTask(batchRequest)
                           .delaySubscription(Duration.ofSeconds(2)); // 批次之间添加延迟
               }, 1) // 限制并发数为1,确保顺序执行
               .collectList() // 收集所有批次的响应
               .flatMap(responses ->
                   return Flux.fromIterable(userBatches)
                           .flatMap(batch -> {
                               SearchTaskRequest batchRequest = new SearchTaskRequest();
                               batchRequest.setUsers(batch);
                               batchRequest.setQuestions(searchTaskRequest.getQuestions());
                               batchRequest.setConfig(searchTaskRequest.getConfig());
                               batchRequest.setSave_to_database(searchTaskRequest.getSave_to_database());
                               batchRequest.setWebhook_url(searchTaskRequest.getWebhook_url());
                               batchRequest.setKeyword_id(keywordId);
                       saveKeywordTasks(keywordId, responses) // 保存关联关系
                               .thenReturn(responses) // 返回原始响应
               );
                               return createSingleBatchTask(batchRequest)
                                       .delaySubscription(Duration.ofSeconds(2)); // 批次之间添加延迟
                           }, 1) // 限制并发数为1,确保顺序执行
                           .collectList() // 收集所有批次的响应
                           .flatMap(responses ->
                                   saveKeywordTasks(keywordId, responses) // 保存关联关系
                                           .thenReturn(responses) // 返回原始响应
                           )
                           .map(responses -> ResponseResult.success(responses)) // 使用ResponseResult包装结果
                           .onErrorResume(e -> {
                               log.error("创建搜索任务失败: {}", e.getMessage(), e);
                               return Mono.just(ResponseResult.error("创建搜索任务失败: " + e.getMessage()));
                           });
               })
               .onErrorResume(e -> {
                   log.error("检查服务器资源失败: {}", e.getMessage(), e);
                   return Mono.just(ResponseResult.error("检查服务器资源失败: " + e.getMessage()));
               });
   }
    private Mono<Void> saveKeywordTasks(Integer keywordId, List<SearchTaskResponse> taskResponses) {
@@ -239,6 +270,7 @@
                    KeywordTask keywordTask = new KeywordTask();
                    keywordTask.setKeyword_id(keywordId);
                    keywordTask.setTask_id(response.getTask_id());
                    keywordTask.setStatus("pending");
                    return keywordTask;
                })
                .collect(Collectors.toList());
@@ -254,7 +286,15 @@
                .then();
    }
    private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize) {
    private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize,Integer keywordId) {
        Keyword keyword = keywordService.getById(keywordId);
        if (null==keyword.getNum()){
            keyword.setNum(0);
        }
        keyword.setNum(keyword.getNum()+1);
        keywordService.updateById(keyword);
        List<List<UserDto>> batches = new ArrayList<>();
        for (int i = 0; i < users.size(); i += batchSize) {
            batches.add(users.subList(i, Math.min(i + batchSize, users.size())));
@@ -484,7 +524,7 @@
                // 3. 收集所有需要更新的问题和引用
                List<Question> questionsToUpdate = new ArrayList<>();
                List<Reference> allReferences = new ArrayList<>();
                List<Reference> resultList = new ArrayList<>();
                // 遍历结果
                for (UserResult userResult : result.getResults()) {
                    for (QuestionResult questionResult : userResult.getQuestions_results()) {
@@ -510,17 +550,6 @@
                                questionsToUpdate.add(question);
                                //如果查询结果不为空查询num
                                Integer maxNumByKeywordId = referenceService.getMaxNumByKeywordId(keyword.getKeyword_id());
                               if (maxNumByKeywordId != null){
                                   maxNumByKeywordId++;
                               }else {
                                   maxNumByKeywordId = 1;
                               }
                                // 收集引用数据,处理空集合情况
                                Integer finalMaxNumByKeywordId = maxNumByKeywordId;
                                List<Reference> references =
                                        Optional.ofNullable(questionResult.getReferences())
                                                .orElse(Collections.emptyList())
@@ -531,30 +560,38 @@
                                                    reference.setTitle(ref.getTitle());
                                                    reference.setUrl(ref.getUrl());
                                                    reference.setDomain(ref.getDomain());
                                                    reference.setNum(finalMaxNumByKeywordId);
                                                    reference.setNum(keyword.getNum());
                                                    reference.setTask_id(result.getTask_id());
                                                    reference.setKeyword_id(keyword.getKeyword_id());
                                                    //域名和平台id映射
                                                    reference.setCreate_time(LocalDateTime.now());
                                                    Platform platform = platformService.getPlatformByDomain(reference.getDomain());
//                                                    if (platform == null) {
//                                                        throw new RuntimeException("未找到对应的平台: " + reference.getDomain());
//                                                    }
                                                    if (platform != null){
                                                    if (platform == null) {
                                                        //平台为空 创建平台 类型为“默认”
                                                        Type type = typeService.getOne(new LambdaQueryWrapper<Type>().eq(Type::getType_name,"默认"));
                                                        if (type == null) {
                                                            Type newType = new Type();
                                                            newType.setType_name("默认");
                                                            typeService.save(newType);
                                                            type = newType;
                                                        }
                                                        Platform platform1 = new Platform();
                                                        platform1.setDomain(reference.getDomain());
                                                        platform1.setPlatform_name(reference.getDomain());
                                                        platform1.setType_id(type.getType_id());
                                                        platformService.save(platform1);
                                                        reference.setType_id(type.getType_id());
                                                        reference.setPlatform_id(platform1.getPlatform_id());
                                                    }
                                                    else {
                                                        reference.setPlatform_id(platform.getPlatform_id());
                                                        Type type = typeService.getById(platform.getType_id());
//                                                    if (type == null) {
//                                                        throw new RuntimeException("未找到对应的类型: " + reference.getDomain());
//                                                    }
                                                        if (type != null){
                                                            reference.setType_id(type.getType_id());
                                                        }
                                                    }
                                                    // 根据 domain 查询类型
                                                    return reference;
                                                })
                                                .collect(Collectors.toList());
@@ -563,6 +600,53 @@
                                if (!references.isEmpty()) {
                                    allReferences.addAll(references);
                                }
                                //取数据库中当前关键词的当前轮次的当前问题id结果拿出来
                                List<Reference> dbList = referenceService.list(new LambdaQueryWrapper<Reference>().eq(Reference::getKeyword_id, keyword.getKeyword_id())
                                        .eq(Reference::getNum, keyword.getNum())
                                        .eq(Reference::getQuestion_id, question.getQuestion_id())
                                );
                                // 1. 合并两个列表
                                List<Reference> combinedList = new ArrayList<>();
                                combinedList.addAll(allReferences);
                                combinedList.addAll(dbList);
                                // 2. 创建复合键的Map,用于统计完全匹配的记录
                                Map<String, List<Reference>> compositeKeyMap = combinedList.stream()
                                        .collect(Collectors.groupingBy(
                                                ref -> ref.getTitle() + "|" + ref.getUrl() + "|" + ref.getDomain()
                                        ));
                                // 3. 处理每组重复记录
                                compositeKeyMap.forEach((key, refGroup) -> {
                                    // 3.1 找出组内有ID的记录(优先从dbList中获取)
                                    Optional<Reference> existingRecord = refGroup.stream()
                                            .filter(ref -> ref.getReference_id() != null)
                                            .findFirst();
                                    // 3.2 统计该组的重复次数(总数-1)
                                    int repetitionCount = refGroup.size() - 1;
                                    // 3.3 决定最终保留的记录
                                    Reference recordToSave;
                                    if (existingRecord.isPresent()) {
                                        // 使用已有ID的记录并更新重复次数
                                        recordToSave = existingRecord.get();
                                        recordToSave.setRepetition_num(
                                                (recordToSave.getRepetition_num() == null ? 0 : recordToSave.getRepetition_num())
                                                        + repetitionCount
                                        );
                                    } else {
                                        // 没有ID记录则取第一条并设置重复次数
                                        recordToSave = refGroup.get(0);
                                        recordToSave.setRepetition_num(repetitionCount);
                                    }
                                    resultList.add(recordToSave);
                                });
                                referenceService.saveOrUpdateBatch(resultList);
                            }
                        } catch (Exception e) {
                            log.error(e.getMessage(), e);
@@ -577,7 +661,7 @@
                    questionService.updateBatchById(questionsToUpdate);
                    System.out.println("成功批量更新 " + questionsToUpdate.size() + " 个问题");
                }
                referenceService.saveBatch(allReferences);
                // 5. 批量插入引用,使用流式分批处理
//                if (!allReferences.isEmpty()) {
//                    int batchSize = 1000;
@@ -642,4 +726,9 @@
                .onErrorResume(e -> Mono.just(
                        new ServerResourceResponse( e.getMessage())));
    }
    /**
     * 传入orderid查所有关键词id以及关键词下面的所有任务id,轮询所有任务状态,如果状态为completed,则循环调用获取结果接口,处理结果
     */
}