From d6011fd37ef2ff794d8efa93932bdf98d8f76dda Mon Sep 17 00:00:00 2001 From: guyue <1721849008@qq.com> Date: 星期四, 10 七月 2025 21:26:55 +0800 Subject: [PATCH] 资源判断 --- src/main/java/com/linghu/controller/CollectController.java | 231 ++++++++++++++++++++++++++++++++++++++++----------------- 1 files changed, 160 insertions(+), 71 deletions(-) diff --git a/src/main/java/com/linghu/controller/CollectController.java b/src/main/java/com/linghu/controller/CollectController.java index 09acdce..e0fdeeb 100644 --- a/src/main/java/com/linghu/controller/CollectController.java +++ b/src/main/java/com/linghu/controller/CollectController.java @@ -6,26 +6,18 @@ import java.util.*; import java.util.stream.Collectors; -import javax.annotation.Resource; import javax.servlet.http.HttpServletRequest; import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linghu.mapper.PlatformMapper; -import com.linghu.mapper.TypeMapper; import com.linghu.model.dto.*; import com.linghu.model.entity.*; import com.linghu.service.*; import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.BeanUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.core.ParameterizedTypeReference; import org.springframework.http.*; -import org.springframework.web.client.RestTemplate; import org.springframework.web.reactive.function.client.WebClient; -import org.springframework.http.client.HttpComponentsClientHttpRequestFactory; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; @@ -37,7 +29,6 @@ import io.swagger.annotations.ApiOperation; import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; -import reactor.core.publisher.SignalType; import org.springframework.web.bind.annotation.* ; import org.springframework.http.HttpStatus; @@ -47,6 +38,7 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import java.util.stream.Stream; @RestController @RequestMapping("/collect") @@ -198,37 +190,77 @@ .next() // 找到第一个完成的响应后结束流 .then(); // 转换为Mono<Void> }*/ + // 添加一个辅助方法来安全地将字符串转换为double + private double parseUsage(String usageStr) { + try { + if (usageStr != null) { + // 移除可能存在的百分号 + usageStr = usageStr.replace("%", "").trim(); + return Double.parseDouble(usageStr); + } + return 0.0; + } catch (NumberFormatException e) { + log.error("解析资源使用率失败: {}", e.getMessage()); + return 0.0; + } + } @PostMapping("/search") @ApiOperation(value = "开始采集") - public Mono<List<SearchTaskResponse>> createSearchTask( + public Mono<ResponseResult<?>> createSearchTask( @RequestBody SearchTaskRequest searchTaskRequest, HttpServletRequest request) throws JsonProcessingException { - int maxConcurrentUsers = searchTaskRequest.getConfig() != null ? - searchTaskRequest.getConfig().getMax_concurrent_users() : 3; - List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers); + // 首先检查服务器资源 + return getServerResource() + .flatMap(resourceResponse -> { + // 将字符串类型的使用率转换为double类型 + double cpuUsage = parseUsage(resourceResponse.getCpu_usage_percent()); + double memoryUsage = parseUsage(resourceResponse.getMemory_usage_percent()); + // 检查CPU和内存使用率 + if (cpuUsage >= 90.0 || memoryUsage >= 90.0) { + String errorMsg = String.format("服务器资源不足:CPU使用率 %.1f%%,内存使用率 %.1f%%", + resourceResponse.getCpu_usage_percent(), resourceResponse.getMemory_usage_percent()); + log.warn(errorMsg); + return Mono.just(ResponseResult.error(503, errorMsg)); + } - // 获取 keywordId - Integer keywordId = searchTaskRequest.getKeyword_id(); + int maxConcurrentUsers = searchTaskRequest.getConfig() != null ? + searchTaskRequest.getConfig().getMax_concurrent_users() : 3; + List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers); - return Flux.fromIterable(userBatches) - .flatMap(batch -> { - SearchTaskRequest batchRequest = new SearchTaskRequest(); - batchRequest.setUsers(batch); - batchRequest.setQuestions(searchTaskRequest.getQuestions()); - batchRequest.setConfig(searchTaskRequest.getConfig()); - batchRequest.setSave_to_database(searchTaskRequest.getSave_to_database()); - batchRequest.setWebhook_url(searchTaskRequest.getWebhook_url()); - batchRequest.setKeyword_id(keywordId); + // 获取 keywordId + Integer keywordId = searchTaskRequest.getKeyword_id(); + //分割 - return createSingleBatchTask(batchRequest) - .delaySubscription(Duration.ofSeconds(2)); // 批次之间添加延迟 - }, 1) // 限制并发数为1,确保顺序执行 - .collectList() // 收集所有批次的响应 - .flatMap(responses -> - saveKeywordTasks(keywordId, responses) // 保存关联关系 - .thenReturn(responses) // 返回原始响应 - ); + + return Flux.fromIterable(userBatches) + .flatMap(batch -> { + SearchTaskRequest batchRequest = new SearchTaskRequest(); + batchRequest.setUsers(batch); + batchRequest.setQuestions(searchTaskRequest.getQuestions()); + batchRequest.setConfig(searchTaskRequest.getConfig()); + batchRequest.setSave_to_database(searchTaskRequest.getSave_to_database()); + batchRequest.setWebhook_url(searchTaskRequest.getWebhook_url()); + batchRequest.setKeyword_id(keywordId); + + return createSingleBatchTask(batchRequest) + .delaySubscription(Duration.ofSeconds(2)); // 批次之间添加延迟 + }, 1) // 限制并发数为1,确保顺序执行 + .collectList() // 收集所有批次的响应 + .flatMap(responses -> + saveKeywordTasks(keywordId, responses) // 保存关联关系 + .thenReturn(responses) // 返回原始响应 + ) + .map(responses -> ResponseResult.success(responses)) // 使用ResponseResult包装结果 + .onErrorResume(e -> { + log.error("创建搜索任务失败: {}", e.getMessage(), e); + return Mono.just(ResponseResult.error("创建搜索任务失败: " + e.getMessage())); + }); + }) + .onErrorResume(e -> { + log.error("检查服务器资源失败: {}", e.getMessage(), e); + return Mono.just(ResponseResult.error("检查服务器资源失败: " + e.getMessage())); + }); } private Mono<Void> saveKeywordTasks(Integer keywordId, List<SearchTaskResponse> taskResponses) { @@ -238,6 +270,7 @@ KeywordTask keywordTask = new KeywordTask(); keywordTask.setKeyword_id(keywordId); keywordTask.setTask_id(response.getTask_id()); + keywordTask.setStatus("pending"); return keywordTask; }) .collect(Collectors.toList()); @@ -253,7 +286,15 @@ .then(); } - private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize) { + private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize,Integer keywordId) { + + Keyword keyword = keywordService.getById(keywordId); + if (null==keyword.getNum()){ + keyword.setNum(0); + } + keyword.setNum(keyword.getNum()+1); + keywordService.updateById(keyword); + List<List<UserDto>> batches = new ArrayList<>(); for (int i = 0; i < users.size(); i += batchSize) { batches.add(users.subList(i, Math.min(i + batchSize, users.size()))); @@ -272,11 +313,12 @@ .bodyToMono(new ParameterizedTypeReference<SearchTaskResponse>() {}) .flatMap(taskResponse -> { if (taskResponse != null && taskResponse.getTask_id() != null) { + // 使用 Reactor 的方式更新数据库 return Mono.fromRunnable(() -> { LambdaUpdateWrapper<Keyword> updateWrapper = new LambdaUpdateWrapper<>(); updateWrapper.eq(Keyword::getKeyword_id, batchRequest.getKeyword_id()); - updateWrapper.set(Keyword::getStatus, "Submitted"); + updateWrapper.set(Keyword::getStatus, "submitted"); updateWrapper.set(Keyword::getTask_id, taskResponse.getTask_id()); keywordService.update(updateWrapper); }).subscribeOn(Schedulers.boundedElastic()) // 在弹性线程池执行 @@ -342,9 +384,9 @@ .map(data -> ResponseResult.success(data)) .onErrorResume(e -> { if (e.getMessage().contains("任务不存在")) { - return Mono.just(ResponseResult.error(404, "任务不存在")); + return Mono.just(ResponseResult.error(200, "任务不存在")); } else if (e.getMessage().contains("无法取消")) { - return Mono.just(ResponseResult.error(400, "任务已完成,无法取消")); + return Mono.just(ResponseResult.error(200, "任务已完成,无法取消")); } return Mono.just(ResponseResult.error(500, "取消任务失败: " + e.getMessage())); }); @@ -453,20 +495,12 @@ private Mono<Void> updateQuestionAndReference(TaskResultResponse result) { return Mono.fromRunnable(() -> { try { - // 1. 更新关键词状态 - LambdaUpdateWrapper<Keyword> keywordUpdate = new LambdaUpdateWrapper<>(); - keywordUpdate.eq(Keyword::getTask_id, result.getTask_id()) - .set(Keyword::getStatus, "completed"); - keywordService.update(keywordUpdate); - + // 1. 根据KeywordTask更新关键词状态 // 查询关键词ID - LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>(); keywordTaskWrapper.eq(KeywordTask::getTask_id, result.getTask_id()); KeywordTask keywordTask = keywordTaskService.getOne(keywordTaskWrapper); -// LambdaQueryWrapper<Keyword> keywordQuery = new LambdaQueryWrapper<>(); -// keywordQuery.eq(Keyword::getTask_id, keywordTask.getTask_id()); Keyword keyword = keywordService.getById(keywordTask.getKeyword_id()); if (keyword == null) { @@ -475,6 +509,8 @@ throw new Exception("未找到关联的关键词,task_id: " + result.getTask_id()); // return; } + keyword.setStatus("completed"); + keywordService.updateById(keyword); // 2. 批量查询所有问题 LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>(); @@ -488,7 +524,7 @@ // 3. 收集所有需要更新的问题和引用 List<Question> questionsToUpdate = new ArrayList<>(); List<Reference> allReferences = new ArrayList<>(); - + List<Reference> resultList = new ArrayList<>(); // 遍历结果 for (UserResult userResult : result.getResults()) { for (QuestionResult questionResult : userResult.getQuestions_results()) { @@ -514,17 +550,6 @@ questionsToUpdate.add(question); - //如果查询结果不为空查询num - Integer maxNumByKeywordId = referenceService.getMaxNumByKeywordId(keyword.getKeyword_id()); - if (maxNumByKeywordId != null){ - maxNumByKeywordId++; - }else { - maxNumByKeywordId = 1; - } - - - // 收集引用数据,处理空集合情况 - Integer finalMaxNumByKeywordId = maxNumByKeywordId; List<Reference> references = Optional.ofNullable(questionResult.getReferences()) .orElse(Collections.emptyList()) @@ -535,28 +560,38 @@ reference.setTitle(ref.getTitle()); reference.setUrl(ref.getUrl()); reference.setDomain(ref.getDomain()); - reference.setNum(finalMaxNumByKeywordId); + reference.setNum(keyword.getNum()); + reference.setTask_id(result.getTask_id()); + reference.setKeyword_id(keyword.getKeyword_id()); //域名和平台id映射 reference.setCreate_time(LocalDateTime.now()); Platform platform = platformService.getPlatformByDomain(reference.getDomain()); -// if (platform == null) { -// throw new RuntimeException("未找到对应的平台: " + reference.getDomain()); -// } - if (platform != null){ + if (platform == null) { + //平台为空 创建平台 类型为“默认” + Type type = typeService.getOne(new LambdaQueryWrapper<Type>().eq(Type::getType_name,"默认")); + if (type == null) { + Type newType = new Type(); + newType.setType_name("默认"); + typeService.save(newType); + type = newType; + } + Platform platform1 = new Platform(); + platform1.setDomain(reference.getDomain()); + platform1.setPlatform_name(reference.getDomain()); + platform1.setType_id(type.getType_id()); + platformService.save(platform1); + + reference.setType_id(type.getType_id()); + reference.setPlatform_id(platform1.getPlatform_id()); + + } + else { reference.setPlatform_id(platform.getPlatform_id()); Type type = typeService.getById(platform.getType_id()); -// if (type == null) { -// throw new RuntimeException("未找到对应的类型: " + reference.getDomain()); -// } if (type != null){ reference.setType_id(type.getType_id()); } } - - - // 根据 domain 查询类型 - - return reference; }) .collect(Collectors.toList()); @@ -565,6 +600,53 @@ if (!references.isEmpty()) { allReferences.addAll(references); } + + //取数据库中当前关键词的当前轮次的当前问题id结果拿出来 + List<Reference> dbList = referenceService.list(new LambdaQueryWrapper<Reference>().eq(Reference::getKeyword_id, keyword.getKeyword_id()) + .eq(Reference::getNum, keyword.getNum()) + .eq(Reference::getQuestion_id, question.getQuestion_id()) + ); + + // 1. 合并两个列表 + List<Reference> combinedList = new ArrayList<>(); + combinedList.addAll(allReferences); + combinedList.addAll(dbList); + + // 2. 创建复合键的Map,用于统计完全匹配的记录 + Map<String, List<Reference>> compositeKeyMap = combinedList.stream() + .collect(Collectors.groupingBy( + ref -> ref.getTitle() + "|" + ref.getUrl() + "|" + ref.getDomain() + )); + + // 3. 处理每组重复记录 + + compositeKeyMap.forEach((key, refGroup) -> { + // 3.1 找出组内有ID的记录(优先从dbList中获取) + Optional<Reference> existingRecord = refGroup.stream() + .filter(ref -> ref.getReference_id() != null) + .findFirst(); + + // 3.2 统计该组的重复次数(总数-1) + int repetitionCount = refGroup.size() - 1; + + // 3.3 决定最终保留的记录 + Reference recordToSave; + if (existingRecord.isPresent()) { + // 使用已有ID的记录并更新重复次数 + recordToSave = existingRecord.get(); + recordToSave.setRepetition_num( + (recordToSave.getRepetition_num() == null ? 0 : recordToSave.getRepetition_num()) + + repetitionCount + ); + } else { + // 没有ID记录则取第一条并设置重复次数 + recordToSave = refGroup.get(0); + recordToSave.setRepetition_num(repetitionCount); + } + + resultList.add(recordToSave); + }); + referenceService.saveOrUpdateBatch(resultList); } } catch (Exception e) { log.error(e.getMessage(), e); @@ -579,7 +661,7 @@ questionService.updateBatchById(questionsToUpdate); System.out.println("成功批量更新 " + questionsToUpdate.size() + " 个问题"); } - referenceService.saveBatch(allReferences); + // 5. 批量插入引用,使用流式分批处理 // if (!allReferences.isEmpty()) { // int batchSize = 1000; @@ -621,6 +703,7 @@ } @GetMapping("/health") + @ApiOperation("健康检查") public Mono<HealthResponse> checkThirdPartyHealth() { return webClient.get() .uri(baseUrl + "/health") // 假设第三方健康检查接口路径为/health @@ -634,6 +717,7 @@ * 查询服务器资源 */ @GetMapping("/server/resource") + @ApiOperation(value = "查询服务器资源") public Mono<ServerResourceResponse> getServerResource() { return webClient.get() .uri(baseUrl + "/api/v1/system/resources") @@ -642,4 +726,9 @@ .onErrorResume(e -> Mono.just( new ServerResourceResponse( e.getMessage()))); } + /** + * 传入orderid查所有关键词id以及关键词下面的所有任务id,轮询所有任务状态,如果状态为completed,则循环调用获取结果接口,处理结果 + */ + + } -- Gitblit v1.7.1