package com.linghu.controller;
|
|
import java.time.Duration;
|
import java.time.LocalDateTime;
|
import java.time.format.DateTimeFormatter;
|
import java.util.*;
|
import java.util.concurrent.*;
|
import java.util.concurrent.locks.ReentrantLock;
|
import java.util.stream.Collectors;
|
|
import javax.servlet.http.HttpServletRequest;
|
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
import com.linghu.model.dto.*;
|
import com.linghu.model.entity.*;
|
import com.linghu.service.*;
|
import lombok.extern.slf4j.Slf4j;
|
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.core.ParameterizedTypeReference;
|
import org.springframework.dao.DuplicateKeyException;
|
import org.springframework.http.*;
|
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.web.reactive.function.client.ExchangeStrategies;
|
import org.springframework.web.reactive.function.client.WebClient;
|
|
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
|
import com.linghu.model.common.ResponseResult;
|
import com.linghu.utils.JwtUtils;
|
|
import io.jsonwebtoken.lang.Collections;
|
import io.swagger.annotations.Api;
|
import io.swagger.annotations.ApiOperation;
|
import reactor.core.publisher.Flux;
|
import reactor.core.publisher.Mono;
|
|
import org.springframework.web.bind.annotation.* ;
|
import org.springframework.http.HttpStatus;
|
import com.linghu.model.dto.TaskResultResponse.QuestionResult;
|
import com.linghu.model.dto.TaskResultResponse.UserResult;
|
import reactor.core.scheduler.Schedulers;
|
|
@RestController
|
@RequestMapping("/collect")
|
@Api(value = "采集接口", tags = "采集管理")
|
@Slf4j
|
public class CollectController {
|
|
@Autowired
|
private ReferenceService referenceService;
|
|
@Value("${linghu.url}")
|
private String baseUrl;
|
|
@Autowired
|
private WebClient webClient;
|
|
@Autowired
|
private JwtUtils jwtUtils;
|
@Autowired
|
private KeywordService keywordService;
|
@Autowired
|
private QuestionService questionService;
|
@Autowired
|
private KeywordTaskService keywordTaskService;
|
@Autowired
|
private PlatformService platformService;
|
@Autowired
|
private TypeService typeService;
|
@Autowired
|
private UserService userService;
|
@Autowired
|
private OrderService orderService;
|
@Autowired
|
private QuestionResultService questionResultService;
|
|
// 替换为线程安全队列
|
private static final Queue<SearchTaskRequest> taskQueue = new ConcurrentLinkedQueue<>();
|
// 全局映射:关键词ID -> 批次队列
|
private static final ConcurrentMap<Integer, Queue<List<UserDto>>> batchQueues = new ConcurrentHashMap<>();
|
|
// private static boolean isProcessing = false;
|
private static volatile boolean isProcessing = false; // 添加 volatile
|
|
@PostMapping("/search")
|
@ApiOperation(value = "开始采集")
|
public Mono<ResponseResult<?>> createSearchTask(
|
@RequestBody SearchTaskRequest searchTaskRequest,
|
HttpServletRequest request) throws JsonProcessingException {
|
|
// 首先检查服务器资源
|
return getServerResource()
|
.flatMap(resourceResponse -> {
|
double cpuUsage = parseUsage(resourceResponse.getCpu_usage_percent());
|
double memoryUsage = parseUsage(resourceResponse.getMemory_usage_percent());
|
|
if (cpuUsage >= 90.0 || memoryUsage >= 90.0) {
|
|
String errorMsg = String.format("服务器资源不足,请稍后再试");
|
|
log.warn(errorMsg);
|
return Mono.just(ResponseResult.error(503, errorMsg));
|
}
|
|
// 将新的任务请求加入队列
|
taskQueue.add(searchTaskRequest);
|
|
// 如果当前没有任务在处理中,则启动任务队列的处理
|
if (!isProcessing) {
|
processNextTaskInQueue();
|
}
|
|
// 返回响应,通知用户任务已开始
|
return Mono.just(ResponseResult.success("任务已加入队列,正在处理..."));
|
})
|
.onErrorResume(e -> {
|
log.error("检查服务器资源失败: {}", e.getMessage(), e);
|
return Mono.just(ResponseResult.error("检查服务器资源失败: " + e.getMessage()));
|
});
|
}
|
|
private void processNextTaskInQueue() {
|
// 设置为正在处理
|
isProcessing = true;
|
|
// 从队列中取出下一个任务
|
SearchTaskRequest nextTaskRequest = taskQueue.poll();
|
|
if (nextTaskRequest != null) {
|
Integer keywordId = nextTaskRequest.getKeyword_id();
|
log.info("开始处理任务队列,keywordId: {}", keywordId);
|
|
executeBatchTask(nextTaskRequest)
|
.doFinally(signal -> {
|
isProcessing = false;
|
if (!taskQueue.isEmpty()) {
|
processNextTaskInQueue();
|
}
|
})
|
.subscribe(
|
result -> log.info("任务处理完成,keywordId: {}", keywordId), // 成功日志
|
error -> { // 关键:添加错误处理
|
log.error("任务队列处理异常,keywordId: {}", keywordId, error);
|
}
|
);
|
} else {
|
isProcessing = false; // 无任务时重置状态
|
}
|
}
|
private Mono<ResponseResult<String>> executeBatchTask(SearchTaskRequest searchTaskRequest) {
|
Integer keywordId = searchTaskRequest.getKeyword_id();
|
//
|
int maxConcurrentUsers = searchTaskRequest.getConfig() != null ?
|
searchTaskRequest.getConfig().getMax_concurrent_users() : 3;
|
List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers, keywordId,searchTaskRequest.getIs_first());
|
|
|
// 创建批次队列并存入全局映射
|
Queue<List<UserDto>> batchQueue = new ConcurrentLinkedQueue<>(userBatches);
|
batchQueues.put(keywordId, batchQueue); // 存储到全局映射
|
|
return Mono.just(ResponseResult.success("第一个批次已开始"))
|
.doOnTerminate(() -> {
|
executeBatchTask(batchQueue, searchTaskRequest, keywordId)
|
.subscribe(
|
result -> log.info("批次任务启动成功,keywordId: {}", keywordId),
|
error -> { // 处理批次执行异常
|
log.error("批次任务执行异常,keywordId: {}", keywordId, error);
|
// 可选:异常时清理资源
|
batchQueues.remove(keywordId);
|
}
|
);
|
});
|
}
|
private Mono<ResponseResult<?>> executeBatchTask(Queue<List<UserDto>> batchQueue, SearchTaskRequest searchTaskRequest, Integer keywordId) {
|
// 如果队列为空,说明所有批次已经完成
|
|
if (batchQueue == null || batchQueue.isEmpty()) {
|
// 清理资源
|
batchQueues.remove(keywordId);
|
return Mono.just(ResponseResult.success("所有批次已完成"));
|
}
|
|
List<UserDto> currentBatch = batchQueue.poll(); // 从队列中获取当前批次
|
SearchTaskRequest batchRequest = new SearchTaskRequest();
|
batchRequest.setUsers(currentBatch);
|
batchRequest.setQuestions(searchTaskRequest.getQuestions());
|
batchRequest.setConfig(searchTaskRequest.getConfig());
|
batchRequest.setSave_to_database(searchTaskRequest.getSave_to_database());
|
batchRequest.setWebhook_url(searchTaskRequest.getWebhook_url());
|
batchRequest.setKeyword_id(keywordId);
|
|
return createSingleBatchTask(batchRequest)
|
.flatMap(taskResponse -> {
|
if (taskResponse != null && taskResponse.getTask_id() != null) {
|
// 直接等待任务完成,不再保存任务关联信息
|
return waitForTaskCompletion(taskResponse.getTask_id(), batchQueue, searchTaskRequest, keywordId);
|
} else {
|
return Mono.just(ResponseResult.error("创建批次任务失败"));
|
}
|
})
|
.onErrorResume(e -> {
|
log.error("调用第三方接口失败: {}", e.getMessage(), e); // 关键日志
|
return Mono.error(new RuntimeException("调用第三方接口失败: " + e.getMessage()));
|
})
|
.doFinally(signal -> {
|
// 任务完成时清理资源
|
if (batchQueue.isEmpty()) {
|
batchQueues.remove(keywordId);
|
}
|
});
|
}
|
|
private Mono<ResponseResult<?>> waitForTaskCompletion(String taskId, Queue<List<UserDto>> batchQueue, SearchTaskRequest searchTaskRequest, Integer keywordId) {
|
// 查询任务状态
|
return getTaskStatus(taskId)
|
.flatMap(statusResponse -> {
|
// 检查任务是否被取消
|
if ("cancelled".equalsIgnoreCase(statusResponse.getStatus())) {
|
batchQueues.remove(keywordId); // 清理资源
|
return Mono.just(ResponseResult.success("任务已被取消"));
|
}
|
// 如果任务状态是"submitted"或"running",继续轮询
|
if (!"completed".equalsIgnoreCase(statusResponse.getStatus()) && !"failed".equalsIgnoreCase(statusResponse.getStatus()) && !"cancelled".equalsIgnoreCase(statusResponse.getStatus()) && !("ERROR".equalsIgnoreCase(statusResponse.getStatus()) && statusResponse.getMessage().contains("Task not found")) ) {
|
return Mono.delay(Duration.ofSeconds(5)) // 延迟 5 秒后再次查询
|
.flatMap(aLong -> waitForTaskCompletion(taskId, batchQueue, searchTaskRequest, keywordId)); // 递归调用继续等待
|
} else {
|
// 如果状态为其他状态,则继续处理下一个批次
|
return executeBatchTask(batchQueue, searchTaskRequest, keywordId);
|
}
|
})
|
.onErrorResume(e -> {
|
// 处理查询任务状态时的错误
|
return Mono.just(ResponseResult.error("查询任务状态失败: " + e.getMessage()));
|
});
|
}
|
@ApiOperation(value = "查询任务状态")
|
@GetMapping("/status")
|
public Mono<TaskStatusResponse> getTaskStatus(String taskId) {
|
return webClient.get()
|
.uri(baseUrl + "/api/v1/tasks/" + taskId)
|
.accept(MediaType.APPLICATION_JSON)
|
.retrieve()
|
.onStatus(HttpStatus::isError, response -> response.bodyToMono(TaskStatusResponse.class)
|
.flatMap(errorBody -> Mono.error(new RuntimeException(errorBody.getDetail()))))
|
.bodyToMono(TaskStatusResponse.class)
|
.onErrorResume(e -> {
|
// 处理错误,创建一个自定义的错误响应对象
|
TaskStatusResponse errorResponse = new TaskStatusResponse();
|
errorResponse.setStatus("ERROR");
|
errorResponse.setMessage(e.getMessage());
|
errorResponse.setDetail(e.getMessage());
|
return Mono.just(errorResponse);
|
});
|
}
|
|
// 添加一个辅助方法来安全地将字符串转换为double
|
private double parseUsage(String usageStr) {
|
try {
|
if (usageStr != null) {
|
// 移除可能存在的百分号
|
usageStr = usageStr.replace("%", "").trim();
|
return Double.parseDouble(usageStr);
|
}
|
return 0.0;
|
} catch (NumberFormatException e) {
|
log.error("解析资源使用率失败: {}", e.getMessage());
|
return 0.0;
|
}
|
}
|
|
private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize,Integer keywordId,Boolean isFirst) {
|
|
Keyword keyword = keywordService.getById(keywordId);
|
if (isFirst){
|
keyword.setNum(1);
|
}else {
|
keyword.setNum(keyword.getNum()+1);
|
}
|
keywordService.updateById(keyword);
|
|
List<List<UserDto>> batches = new ArrayList<>();
|
for (int i = 0; i < users.size(); i += batchSize) {
|
batches.add(users.subList(i, Math.min(i + batchSize, users.size())));
|
|
}
|
for (int i = 0; i < batches.size(); i++){
|
// 创建 KeywordTask 关联,task_id 设置为 null,表示任务尚未开始
|
KeywordTask keywordTask = new KeywordTask();
|
keywordTask.setKeyword_id(keywordId);
|
keywordTask.setTask_id(null); // 任务ID为空
|
|
keywordTask.setNum(keyword.getNum());
|
keywordTaskService.save(keywordTask); // 保存 KeywordTask
|
}
|
|
|
return batches;
|
}
|
|
private Mono<SearchTaskResponse> createSingleBatchTask(SearchTaskRequest batchRequest) {
|
// 记录请求第三方的基本信息(便于排查)
|
String thirdPartyUrl = baseUrl + "/api/v1/search";
|
Integer keywordId = batchRequest.getKeyword_id();
|
log.info("开始向第三方提交任务,keywordId: {}, URL: {}, 请求参数: {}",
|
keywordId, thirdPartyUrl, batchRequest.toString()); // 打印请求参数(建议用工具类转JSON)
|
return webClient.post()
|
.uri(baseUrl + "/api/v1/search")
|
.contentType(MediaType.APPLICATION_JSON)
|
.bodyValue(batchRequest)
|
.retrieve()
|
.onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(String.class)
|
.flatMap(errorBody -> Mono.error(new RuntimeException(errorBody))))
|
// 处理第三方返回的5xx服务器错误(如第三方服务异常)
|
.onStatus(HttpStatus::is5xxServerError, response ->
|
response.bodyToMono(String.class)
|
.flatMap(errorBody -> {
|
String errorMsg = String.format("第三方接口5xx错误,keywordId: %d, URL: %s, 状态码: %d, 错误详情: %s",
|
keywordId, thirdPartyUrl, response.statusCode().value(), errorBody);
|
log.error(errorMsg);
|
return Mono.error(new RuntimeException(errorMsg));
|
})
|
)
|
.bodyToMono(new ParameterizedTypeReference<SearchTaskResponse>() {})
|
.flatMap(taskResponse -> {
|
if (taskResponse != null && taskResponse.getTask_id() != null) {
|
|
// 使用 Reactor 的方式更新数据库
|
return Mono.fromRunnable(() -> {
|
|
//更新关键词状态
|
LambdaUpdateWrapper<Keyword> updateWrapper = new LambdaUpdateWrapper<>();
|
updateWrapper.eq(Keyword::getKeyword_id, batchRequest.getKeyword_id());
|
updateWrapper.set(Keyword::getStatus, "submitted");
|
updateWrapper.set(Keyword::getTask_id, taskResponse.getTask_id());
|
keywordService.update(updateWrapper);
|
//设置轮数
|
Keyword keyword = keywordService.getById(batchRequest.getKeyword_id());
|
// 更新关键词任务与任务ID的关联
|
// 获取与关键词相关的任务,task_id 为 null,确保只取一个任务
|
List<KeywordTask> keywordTasks = keywordTaskService.list(new LambdaQueryWrapper<KeywordTask>()
|
.eq(KeywordTask::getKeyword_id, keyword.getKeyword_id())
|
.eq(KeywordTask::getNum, keyword.getNum())
|
.isNull(KeywordTask::getTask_id));
|
if (keywordTasks.size() > 0) {
|
KeywordTask keywordTask = keywordTasks.get(0);
|
keywordTask.setTask_id(taskResponse.getTask_id());
|
keywordTask.setStatus("pending");
|
keywordTaskService.updateById(keywordTask);
|
}
|
//将提问词列表的状态转为pending
|
for (String questionName : batchRequest.getQuestions()) {
|
questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keyword.getKeyword_id()).eq(Question::getQuestion,questionName).set(Question::getStatus, "pending"));
|
|
}
|
//所有关键词都在采集中或者已完成或者错误设置订单进入采集状态
|
List<Keyword> orderKeywords = keywordService.list(new LambdaQueryWrapper<Keyword>()
|
.eq(Keyword::getOrder_id, keyword.getOrder_id()));
|
if (!orderKeywords.isEmpty() && orderKeywords.stream().allMatch(k ->
|
"completed".equals(k.getStatus()) || "submitted".equals(k.getStatus())
|
)) {
|
Orders orders = orderService.getById(keyword.getOrder_id());
|
if (orders != null) {
|
orders.setStatus(2);
|
orderService.updateById(orders);
|
|
}
|
}
|
}).subscribeOn(Schedulers.boundedElastic()) // 在弹性线程池执行
|
.thenReturn(taskResponse);
|
}
|
return Mono.just(taskResponse);
|
});
|
}
|
|
@PostMapping("/cancel/{keywordId}")
|
@ApiOperation(value = "取消任务")
|
public Mono<ResponseResult<TaskCancelResponse>> cancelTask(@PathVariable Integer keywordId) {
|
// 1. 从主队列移除任务
|
List<SearchTaskRequest> removedMainQueueTasks = removeTasksFromQueueByKeywordId(keywordId);
|
int removedMainQueueCount = removedMainQueueTasks.size(); // 获取移除的任务数量
|
|
// 2. 从批次队列移除任务 (新增逻辑)
|
int removedBatchQueue = removeBatchTasksByKeywordId(keywordId);
|
|
// 3. 查询所有与关键词相关的任务
|
List<KeywordTask> tasks = keywordTaskService.list(
|
new LambdaQueryWrapper<KeywordTask>().eq(KeywordTask::getKeyword_id, keywordId)
|
);
|
|
// 4. 筛选出需要远程取消的任务
|
List<KeywordTask> tasksToCancelRemotely = tasks.stream()
|
.filter(task -> task.getTask_id() != null && "pending".equalsIgnoreCase(task.getStatus()))
|
.collect(Collectors.toList());
|
|
return Flux.fromIterable(tasksToCancelRemotely)
|
.flatMap(task -> {
|
// 创建状态更新和远程取消的组合操作
|
Mono<Void> updateStatus = updateTaskStatus(task.getTask_id(), "cancelled");
|
Mono<ResponseResult<?>> cancelOp = cancelRemoteTask(task.getTask_id())
|
.onErrorResume(e -> {
|
log.error("取消任务 {} 失败: {}", task.getTask_id(), e.getMessage());
|
return Mono.just(ResponseResult.error("取消任务失败: " + e.getMessage()));
|
});
|
|
return Mono.zip(cancelOp, updateStatus)
|
.thenReturn(true);
|
}, 10)
|
.collectList()
|
.flatMap(canceledTasks -> {
|
return updateKeywordAndOrderStatus(keywordId)
|
.thenReturn(ResponseResult.success(
|
new TaskCancelResponse(
|
String.format("任务已取消: 主队列移除%d, 批次队列移除%d, 远程取消%d",
|
removedMainQueueCount ,
|
removedBatchQueue,
|
tasksToCancelRemotely.size())
|
)
|
));
|
});
|
}
|
|
// 新增方法:移除批次队列
|
private int removeBatchTasksByKeywordId(Integer keywordId) {
|
Queue<List<UserDto>> batchQueue = batchQueues.remove(keywordId);
|
if (batchQueue != null) {
|
int count = batchQueue.size();
|
batchQueue.clear();
|
log.info("从批次队列中移除关键词 {} 的 {} 个批次任务", keywordId, count);
|
return count;
|
}
|
return 0;
|
}
|
// 辅助方法:获取待取消任务
|
private List<KeywordTask> getTasksToCancel(Integer keywordId) {
|
return keywordTaskService.list(
|
new LambdaQueryWrapper<KeywordTask>()
|
.eq(KeywordTask::getKeyword_id, keywordId)
|
.isNotNull(KeywordTask::getTask_id)
|
.eq(KeywordTask::getStatus, "pending")
|
);
|
}
|
|
// 提取关键词和订单状态更新的逻辑为单独方法
|
private Mono<Void> updateKeywordAndOrderStatus(Integer keywordId) {
|
return Mono.fromRunnable(() -> {
|
try {
|
// 查询关键词
|
Keyword keyword = keywordService.getById(keywordId);
|
if (keyword == null) {
|
log.warn("未找到关键词,keywordId: {}", keywordId);
|
return;
|
}
|
|
//把任务id为空的删除
|
LambdaUpdateWrapper<KeywordTask> updateWrapper = new LambdaUpdateWrapper<>();
|
updateWrapper.eq(KeywordTask::getKeyword_id, keywordId);
|
updateWrapper.isNull(KeywordTask::getTask_id);
|
keywordTaskService.remove(updateWrapper);
|
// 查询该关键词下的所有任务
|
LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>();
|
keywordTaskWrapper.eq(KeywordTask::getKeyword_id, keywordId);
|
|
List<KeywordTask> keywordTasks = keywordTaskService.list(keywordTaskWrapper);
|
|
// 更新关键词状态
|
keyword.setStatus("completed");
|
keywordService.updateById(keyword);
|
|
//更新提问词状态为取消
|
questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keywordId).isNull(Question::getResponse).set(Question::getStatus, "cancelled").set(Question::getError, "任务已取消"));
|
// 将所有提问词设置eroor为已取消
|
// questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keywordId).set(Question::getStatus, "failed").set(Question::getError, "任务已取消"));
|
|
// 更新订单状态
|
String orderId = keyword.getOrder_id();
|
if (orderId != null && !orderId.isEmpty()) {
|
// 查询订单下所有关键词
|
LambdaQueryWrapper<Keyword> orderKeywordsWrapper = new LambdaQueryWrapper<>();
|
orderKeywordsWrapper.eq(Keyword::getOrder_id, orderId);
|
List<Keyword> orderKeywords = keywordService.list(orderKeywordsWrapper);
|
|
// 所有关键词均已完成,则更新订单状态为3
|
if (!orderKeywords.isEmpty() && orderKeywords.stream().allMatch(k ->
|
"completed".equals(k.getStatus()) || "false".equals(k.getStatus())
|
)) {
|
Orders orders = orderService.getById(orderId);
|
if (orders != null) {
|
orders.setStatus(3);
|
orderService.updateById(orders);
|
log.info("订单 {} 所有关键词已完成,更新状态为3", orderId);
|
}
|
}
|
if (!orderKeywords.isEmpty() && orderKeywords.stream().allMatch(k ->
|
!"completed".equals(k.getStatus()) || "false".equals(k.getStatus()) || "cancelled".equals(k.getStatus())
|
)) {
|
Orders orders = orderService.getById(orderId);
|
if (orders != null) {
|
orders.setStatus(1);
|
orderService.updateById(orders);
|
log.info("订单 {} 所有关键词已完成或者取消,更新状态为1", orderId);
|
}
|
}
|
}
|
} catch (Exception e) {
|
log.error("更新关键词和订单状态失败: {}", e.getMessage(), e);
|
}
|
});
|
}
|
private List<SearchTaskRequest> removeTasksFromQueueByKeywordId(Integer keywordId) {
|
List<SearchTaskRequest> removedTasks = new ArrayList<>();
|
|
Iterator<SearchTaskRequest> iterator = taskQueue.iterator();
|
while (iterator.hasNext()) {
|
SearchTaskRequest task = iterator.next();
|
if (task.getKeyword_id() != null && task.getKeyword_id().equals(keywordId)) {
|
removedTasks.add(task);
|
iterator.remove();
|
}
|
}
|
|
|
log.info("从队列中移除了 {} 个与关键词ID {} 相关的任务", removedTasks.size(), keywordId);
|
return removedTasks;
|
}
|
|
// 发送远程取消请求
|
private Mono<ResponseResult<?>> cancelRemoteTask(String taskId) {
|
// 使用Collections.singletonMap或手动创建Map
|
Map<String, Object> requestBody = new HashMap<>();
|
requestBody.put("status", "pending");
|
|
return webClient.post()
|
.uri(baseUrl + "/api/v1/tasks/" + taskId + "/cancel")
|
.contentType(MediaType.APPLICATION_JSON)
|
.bodyValue(requestBody)
|
.retrieve()
|
.onStatus(HttpStatus::isError, response -> response.bodyToMono(String.class)
|
.flatMap(errorBody -> Mono.error(new RuntimeException("取消失败: " + errorBody))))
|
.bodyToMono(Void.class)
|
.thenReturn(ResponseResult.success("任务已取消"));
|
}
|
|
// 更新单个任务状态
|
private Mono<Void> updateTaskStatus(String taskId, String status) {
|
return Mono.fromRunnable(() -> {
|
LambdaUpdateWrapper<KeywordTask> updateWrapper = new LambdaUpdateWrapper<>();
|
updateWrapper.eq(KeywordTask::getTask_id, taskId);
|
updateWrapper.set(KeywordTask::getStatus, status);
|
keywordTaskService.update(updateWrapper);
|
}).subscribeOn(Schedulers.boundedElastic()).then();
|
}
|
@ApiOperation(value = "获取任务结果")
|
@GetMapping("/tasks/{taskId}")
|
public Mono<TaskResultResponse> getTaskResult(@PathVariable String taskId) {
|
WebClient webClient2 = WebClient.builder()
|
.exchangeStrategies(ExchangeStrategies.builder()
|
.codecs(configurer -> configurer.defaultCodecs()
|
.maxInMemorySize(10 * 1024 * 1024)) // 10MB
|
.build())
|
.build();
|
return webClient2.get()
|
.uri(baseUrl + "/api/v1/tasks/" + taskId + "/result")
|
.accept(MediaType.APPLICATION_JSON)
|
.retrieve()
|
.onStatus(HttpStatus::is4xxClientError, response -> {
|
if (response.statusCode() == HttpStatus.NOT_FOUND) {
|
return response.bodyToMono(String.class)
|
.flatMap(errorBody -> Mono.error(new RuntimeException("任务不存在")));
|
} else if (response.statusCode() == HttpStatus.BAD_REQUEST) {
|
return response.bodyToMono(String.class)
|
.flatMap(errorBody -> Mono.error(new RuntimeException("任务未完成,无法获取结果")));
|
}
|
return response.createException().flatMap(Mono::error);
|
})
|
.bodyToMono(TaskResultResponse.class)
|
.flatMap(responseResult -> {
|
TaskResultResponse result = responseResult;
|
|
if (result != null && result.getResults() != null) {
|
return updateQuestionAndReference(result)
|
.thenReturn(responseResult);
|
}
|
return Mono.just(responseResult);
|
})
|
.onErrorResume(e -> {
|
System.out.println("获取任务结果失败");
|
log.error("获取任务结果失败: {}", e.getMessage(), e);
|
TaskResultResponse result = new TaskResultResponse();
|
result.setDetail("获取任务结果失败: " + e.getMessage());
|
return Mono.just(result);
|
});
|
}
|
|
/**
|
* 获取或创建平台(确保同一domain只创建一次)
|
* @param domain 平台域名
|
* @return 已存在或新创建的Platform
|
*/
|
private Platform getOrCreatePlatform(String domain,String platformName) {
|
// 1. 先尝试查询已存在的平台
|
|
Platform platform = platformService.getPlatformByDomain(domain);
|
if (platform != null) {
|
return platform;
|
}
|
|
// 2. 若不存在,尝试创建(处理并发场景)
|
try {
|
// 2.1 获取或创建“默认”类型(Type也需避免重复,建议Type表的type_name也加唯一约束)
|
Type defaultType = typeService.getOne(new LambdaQueryWrapper<Type>()
|
.eq(Type::getType_name, "默认"));
|
if (defaultType == null) {
|
defaultType = new Type();
|
defaultType.setType_name("默认");
|
typeService.save(defaultType); // 若Type可能重复,此处也需处理DuplicateKeyException
|
}
|
|
// 2.2 构建新平台对象
|
Platform newPlatform = new Platform();
|
newPlatform.setDomain(domain);
|
if (platformName != null) {
|
newPlatform.setPlatform_name(platformName);
|
}else {
|
newPlatform.setPlatform_name(domain);
|
}
|
// 平台名称默认使用域名,可根据实际需求调整
|
newPlatform.setType_id(defaultType.getType_id());
|
newPlatform.setCreate_time(LocalDateTime.now()); // 补充创建时间
|
|
// 2.3 尝试保存,若因唯一约束冲突失败,则捕获异常
|
platformService.save(newPlatform);
|
return newPlatform; // 保存成功,返回新创建的平台
|
|
} catch (DuplicateKeyException e) {
|
// 3. 若捕获到重复键异常,说明并发创建了,重新查询即可(此时数据库中已存在该平台)
|
log.warn("平台domain={}已存在,无需重复创建", domain, e);
|
return platformService.getPlatformByDomain(domain); // 重新查询,一定能获取到
|
} catch (Exception e) {
|
// 处理其他异常(如数据库连接失败等)
|
log.error("创建平台失败,domain={}", domain, e);
|
throw new RuntimeException("创建平台失败", e);
|
}
|
}
|
//更新提问词和引用数据
|
// private Mono<Void> updateQuestionAndReference(TaskResultResponse result) {
|
// return Mono.fromRunnable(() -> {
|
// try {
|
// //查看每个账号信息的status是否正常
|
//
|
// // 1. 根据KeywordTask更新关键词状态
|
// // 查询关键词ID
|
// LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>();
|
// keywordTaskWrapper.eq(KeywordTask::getTask_id, result.getTask_id());
|
// KeywordTask keywordTask = keywordTaskService.getOne(keywordTaskWrapper);
|
// keywordTask.setStatus("completed");
|
// keywordTaskService.updateById(keywordTask);
|
// Keyword keyword = keywordService.getById(keywordTask.getKeyword_id());
|
//
|
// if (keyword == null) {
|
// System.out.println("未找到关联的关键词,task_id: " + result.getTask_id());
|
// //报错
|
// throw new Exception("未找到关联的关键词,task_id: " + result.getTask_id());
|
//
|
// }
|
// LambdaQueryWrapper<KeywordTask> keywordTaskWrapper2 = new LambdaQueryWrapper<>();
|
// keywordTaskWrapper2.eq(KeywordTask::getKeyword_id, keyword.getKeyword_id());
|
// List<KeywordTask> keywordTasks = keywordTaskService.list(keywordTaskWrapper2);
|
//
|
// //如果全部为completed 或者错误、取消、任务不存在 关键词也为completed ,如果关联关系没有任务id,或者状态为running ,关键词为submitted,
|
// if (keywordTasks.stream().allMatch(task -> "completed".equals(task.getStatus()) || "false".equals(task.getStatus()) || "cancelled".equals(task.getStatus()) ||"canceled".equals(task.getStatus()) || "nonentity".equals(task.getStatus())) ) {
|
// keyword.setStatus("completed");
|
// keywordService.updateById(keyword);
|
//
|
// }
|
//
|
// String orderId = keyword.getOrder_id();
|
// if (orderId == null || orderId.isEmpty()) {
|
// System.out.println("关键词[" + keyword.getKeyword_id() + "]未关联订单,跳过订单状态更新");
|
// return;
|
// }
|
//
|
// // 2.更新订单状态为待处理 查询该订单下的所有关键词,更新订单状态(有取消)
|
// LambdaQueryWrapper<Keyword> orderKeywordsWrapper = new LambdaQueryWrapper<>();
|
// orderKeywordsWrapper.eq(Keyword::getOrder_id, orderId);
|
// List<Keyword> orderKeywords = keywordService.list(orderKeywordsWrapper);
|
//
|
// if (orderKeywords.isEmpty()) {
|
// System.out.println("订单[" + orderId + "]下无关键词,跳过状态更新");
|
// return;
|
// }
|
// boolean allValid2 = orderKeywords.stream()
|
// .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus()) || "cancelled".equals(k.getStatus()));
|
// if (allValid2) {
|
// Orders orders = orderService.getById(orderId);
|
// if (orders != null) {
|
// orders.setStatus(1); // 假设Orders有Integer类型的status字段
|
// orderService.updateById(orders);
|
// System.out.println("订单[" + orderId + "]所有关键词采集完成或者取消,已更新状态为1");
|
// } else {
|
// System.out.println("未找到订单[" + orderId + "],无法更新状态");
|
// }
|
// }
|
// // 3.更新订单状态为完成 检查所有关键词的状态是否均为 completed 或 false
|
// boolean allValid = orderKeywords.stream()
|
// .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus()));
|
//
|
// // 4. 若所有关键词状态均有效,更新订单状态为3
|
// if (allValid) {
|
// Orders orders = orderService.getById(orderId);
|
// if (orders != null) {
|
// orders.setStatus(3); // 假设Orders有Integer类型的status字段
|
// orderService.updateById(orders);
|
// System.out.println("订单[" + orderId + "]所有关键词状态符合条件,已更新状态为3");
|
// } else {
|
// System.out.println("未找到订单[" + orderId + "],无法更新状态");
|
// }
|
// }
|
//
|
//
|
// Orders orders = orderService.getById(keyword.getOrder_id());
|
//
|
// // 2. 批量查询所有问题
|
// LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>();
|
// queryWrapper.eq(Question::getKeyword_id, keyword.getKeyword_id());
|
// List<Question> questions = questionService.list(queryWrapper);
|
//
|
// // 构建问题映射表,用于快速查找
|
// Map<String, Question> questionMap = questions.stream()
|
// .collect(Collectors.toMap(Question::getQuestion, q -> q));
|
//
|
// // 3. 收集所有需要更新的问题和引用
|
// List<Question> questionsToUpdate = new ArrayList<>();
|
// List<Reference> allReferences = new ArrayList<>();
|
// List<Reference> resultList = new ArrayList<>();
|
//
|
// // 遍历账号
|
// for (UserResult userResult : result.getResults()) {
|
// //更新账号状态
|
// if ( "failed".equals(userResult.getStatus())){
|
// if (userResult.getError().contains("登录失败")){
|
// LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>();
|
// userWrapper.eq(User::getUser_email, userResult.getUser_email());
|
// userWrapper.set(User::getStatus, "无法登录");
|
// userService.update(userWrapper);
|
// //更新所有提问词的状态
|
// questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keyword.getKeyword_id())
|
// .set(Question::getStatus, "failed")
|
// .set(Question::getError, "账户登录失败"));
|
//
|
// }else if (userResult.getError().contains("信息错误")){
|
// LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>();
|
// userWrapper.eq(User::getUser_email, userResult.getUser_email());
|
// userWrapper.set(User::getStatus, "信息错误");
|
// userService.update(userWrapper);
|
// }
|
// }
|
// for (QuestionResult questionResult : userResult.getQuestions_results()) {
|
// try {
|
// Question question = questionMap.get(questionResult.getQuestion());
|
// if (question != null) {
|
//
|
//
|
// //保存问题结果
|
// QuestionResultList questionResultList = new QuestionResultList();
|
// questionResultList.setKeyword_id(keyword.getKeyword_id());
|
// questionResultList.setQuestion(questionResult.getQuestion());
|
// questionResultList.setResponse(questionResult.getResponse());
|
// questionResultList.setStatus(questionResult.getStatus());
|
// questionResultList.setExtracted_count(questionResult.getExtracted_count());
|
// questionResultList.setKeyword_task_id(result.getTask_id());
|
// questionResultList.setError(questionResult.getError());
|
// questionResultList.setNum(keyword.getNum());
|
// if (questionResult.getTimestamp() != null) {
|
// DateTimeFormatter formatter = DateTimeFormatter
|
// .ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS");
|
// questionResultList.setTimestamp(
|
// LocalDateTime.parse(questionResult.getTimestamp(), formatter));
|
// }
|
// // 保存问题结果列表(新增保存逻辑)
|
// questionResultService.save(questionResultList);
|
// // 查询当前轮次下该提问词的所有结果
|
// List<QuestionResultList> allResults = questionResultService.list(
|
// new LambdaQueryWrapper<QuestionResultList>()
|
// .eq(QuestionResultList::getKeyword_id, keyword.getKeyword_id())
|
// .eq(QuestionResultList::getQuestion, question.getQuestion())
|
// .eq(QuestionResultList::getNum, keyword.getNum())
|
// );
|
//
|
// // 判断最终状态
|
// String finalStatus = determineFinalStatus(allResults);
|
// if ("success".equals(finalStatus)){
|
// question.setStatus("success");
|
// question.setError("");
|
// }else if ("no_results".equals(finalStatus)){
|
// question.setStatus("success");
|
// question.setError("采集结果无引用数据");
|
// }else if ("busyness".equals(finalStatus)){
|
// question.setStatus("failed");
|
// question.setError("DeepSeek繁忙,请稍后尝试");
|
// }
|
//
|
// // 更新问题对象
|
// question.setResponse(questionResult.getResponse());
|
// question.setExtracted_count(questionResult.getExtracted_count());
|
//// question.setError(questionResult.getError());
|
// question.setKeyword_id(keyword.getKeyword_id());
|
//
|
// if (questionResult.getTimestamp() != null) {
|
// DateTimeFormatter formatter = DateTimeFormatter
|
// .ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS");
|
// question.setTimestamp(
|
// LocalDateTime.parse(questionResult.getTimestamp(), formatter));
|
// }
|
//
|
// questionsToUpdate.add(question);
|
// // 初始化引用列表(避免null)
|
// List<Reference> references = new ArrayList<>();
|
// List<TaskResultResponse.Reference> originalReferences = questionResult.getReferences();
|
// if (originalReferences == null) {
|
// originalReferences = Collections.emptyList();
|
// }
|
//
|
// // 遍历原始引用列表,转换为Reference对象
|
// for (TaskResultResponse.Reference ref : originalReferences) {
|
// Reference reference = new Reference();
|
// // 设置基本字段
|
// reference.setQuestion_id(question.getQuestion_id());
|
// reference.setTitle(ref.getTitle());
|
// reference.setUrl(ref.getUrl());
|
// reference.setDomain(ref.getDomain());
|
// reference.setNum(keyword.getNum());
|
// reference.setTask_id(result.getTask_id());
|
// reference.setKeyword_id(keyword.getKeyword_id());
|
// if (null!=ref.getPublish_time()) {
|
// reference.setCreate_time(ref.getPublish_time().atStartOfDay());
|
// }
|
//
|
// // 关键:使用优化后的方法获取平台,避免重复创建
|
// Platform platform = getOrCreatePlatform(ref.getDomain(),ref.getPlatform_name());
|
// reference.setPlatform_id(platform.getPlatform_id());
|
// reference.setType_id(platform.getType_id()); // 直接从平台获取类型ID,更可靠
|
// // 添加到结果列表
|
// references.add(reference);
|
// }
|
// // 添加到总引用列表
|
// if (!references.isEmpty()) {
|
// allReferences.addAll(references);
|
// }
|
//
|
// //取数据库中当前关键词的当前轮次的当前问题id结果拿出来
|
// List<Reference> dbList = referenceService.list(new LambdaQueryWrapper<Reference>().eq(Reference::getKeyword_id, keyword.getKeyword_id())
|
// .eq(Reference::getNum, keyword.getNum())
|
// .eq(Reference::getQuestion_id, question.getQuestion_id())
|
// );
|
//
|
// // 1. 合并两个列表
|
// List<Reference> combinedList = new ArrayList<>();
|
// combinedList.addAll(allReferences);
|
// combinedList.addAll(dbList);
|
//
|
// // 2. 创建复合键的Map,用于统计完全匹配的记录
|
// Map<String, List<Reference>> compositeKeyMap = combinedList.stream()
|
// .collect(Collectors.groupingBy(
|
// ref -> ref.getTitle() + "|" + ref.getUrl() + "|" + ref.getDomain()
|
// ));
|
//
|
// // 3. 处理每组重复记录
|
// compositeKeyMap.forEach((key, refGroup) -> {
|
// // 3.1 找出组内有ID的记录(优先从dbList中获取)
|
// Optional<Reference> existingRecord = refGroup.stream()
|
// .filter(ref -> ref.getReference_id() != null)
|
// .findFirst();
|
//
|
// // 3.2 统计该组的重复次数(总数-1)
|
// int repetitionCount = refGroup.size() - 1;
|
//
|
// // 3.3 决定最终保留的记录
|
// Reference recordToSave = new Reference();
|
// if (existingRecord.isPresent()) {
|
// // 使用已有ID的记录并更新重复次数
|
// recordToSave = existingRecord.get();
|
// recordToSave.setRepetition_num(
|
// (recordToSave.getRepetition_num() == null ? 1 : recordToSave.getRepetition_num())
|
// + repetitionCount
|
// );
|
// } else {
|
// // 没有ID记录则取第一条并设置重复次数
|
// recordToSave = refGroup.get(0);
|
// recordToSave.setRepetition_num(1+repetitionCount);
|
// }
|
//
|
// resultList.add(recordToSave);
|
// });
|
// referenceService.saveOrUpdateBatch(resultList);
|
// }
|
// } catch (Exception e) {
|
// log.error(e.getMessage(), e);
|
// System.out.println("处理问题结果失败: " + e.getMessage());
|
// }
|
// }
|
//
|
//
|
// }
|
//
|
// // 4. 批量更新问题
|
// System.out.println(questionsToUpdate);
|
// if (!questionsToUpdate.isEmpty()) {
|
// questionService.updateBatchById(questionsToUpdate);
|
// System.out.println("成功批量更新 " + questionsToUpdate.size() + " 个问题");
|
// }
|
//
|
// } catch (Exception e) {
|
// log.error("更新问题和引用数据失败: " ,e.getMessage(), e);
|
// throw new RuntimeException("更新问题和引用数据失败", e);
|
// }
|
// });
|
// }
|
// // 根据所有批次的结果判断最终状态
|
// private String determineFinalStatus(List<QuestionResultList> results) {
|
// if (results.isEmpty()) {
|
// return "no_results"; // 无结果
|
// }
|
//
|
// // 统计关键指标
|
// int totalCount = results.size();
|
// int emptyResponseCount = 0;
|
// int systemBusyCount = 0;
|
//
|
// for (QuestionResultList result : results) {
|
// // 判断回答是否为空
|
// if (result.getExtracted_count() == 0 ) {
|
// emptyResponseCount++;
|
// }
|
//
|
// // 判断是否为系统繁忙
|
// if ("success".equals(result.getStatus()) && (result.getResponse().isEmpty()|| result.getResponse().contains("WebDriver连接中断") || result.getResponse().contains("响应超时"))) {
|
// systemBusyCount++;
|
// }
|
//
|
// }
|
//
|
// // 全返回系统繁忙
|
// if (systemBusyCount == totalCount) {
|
// return "busyness";
|
// }
|
// // 全返回信息为空
|
// if (emptyResponseCount == totalCount) {
|
// return "no_results";
|
// }
|
//
|
//
|
// // 系统繁忙比例超过阈值(可配置,这里设为70%)
|
//// double busyRate = (double) systemBusyCount / totalCount;
|
//// if (busyRate >= 0.7) {
|
//// return "系统繁忙,请稍后尝试";
|
//// }
|
//
|
// // 其他情况返回成功
|
// return "success";
|
// }
|
|
private Mono<Void> updateQuestionAndReference(TaskResultResponse result) {
|
return Mono.fromRunnable(() -> doUpdateQuestionAndReference(result))
|
.onErrorResume(e -> {
|
log.error("处理任务结果失败", e);
|
return Mono.error(e); // 传播异常,触发事务回滚
|
}).then();
|
}
|
|
// 核心业务逻辑,添加事务注解保证原子性
|
@Transactional(rollbackFor = Exception.class)
|
public void doUpdateQuestionAndReference(TaskResultResponse result) {
|
try {
|
// 1. 查询关键词任务并更新状态
|
LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>();
|
keywordTaskWrapper.eq(KeywordTask::getTask_id, result.getTask_id());
|
KeywordTask keywordTask = keywordTaskService.getOne(keywordTaskWrapper);
|
if (keywordTask == null) {
|
throw new Exception("未找到关键词任务,task_id: " + result.getTask_id());
|
}
|
keywordTask.setStatus("completed");
|
keywordTaskService.updateById(keywordTask);
|
|
// 2. 查询关键词信息
|
Keyword keyword = keywordService.getById(keywordTask.getKeyword_id());
|
if (keyword == null) {
|
throw new Exception("未找到关联的关键词,task_id: " + result.getTask_id());
|
}
|
|
// 3. 更新关键词状态(基于关联任务状态)
|
updateKeywordStatus(keyword);
|
|
// 4. 更新订单状态(基于关键词状态)
|
updateOrderStatus(keyword);
|
|
// 5. 预查询问题列表(一次查询,内存映射)
|
LambdaQueryWrapper<Question> questionWrapper = new LambdaQueryWrapper<>();
|
questionWrapper.eq(Question::getKeyword_id, keyword.getKeyword_id());
|
List<Question> questions = questionService.list(questionWrapper);
|
Map<String, Question> questionMap = questions.stream()
|
.collect(Collectors.toMap(Question::getQuestion, q -> q));
|
|
// 6. 收集批量保存的数据(避免循环内保存)
|
List<QuestionResultList> questionResultsToSave = new ArrayList<>(); // 批量保存问题结果
|
List<Reference> allReferences = new ArrayList<>(); // 收集所有引用,后续统一处理
|
|
// 7. 遍历结果处理问题和引用
|
for (UserResult userResult : result.getResults()) {
|
// 7.1 更新用户状态(失败处理)
|
handleUserStatus(userResult, keyword);
|
|
// 7.2 处理问题结果
|
for (QuestionResult questionResult : userResult.getQuestions_results()) {
|
Question question = questionMap.get(questionResult.getQuestion());
|
if (question == null) {
|
log.warn("未找到问题记录: {}", questionResult.getQuestion());
|
continue;
|
}
|
|
// 7.2.1 构建问题结果并加入批量列表
|
QuestionResultList questionResultList = buildQuestionResultList(questionResult, keyword, result);
|
questionResultsToSave.add(questionResultList);
|
|
// 7.2.2 处理引用数据(仅收集,不立即保存)
|
List<Reference> references = buildReferences(questionResult, question, keyword, result);
|
allReferences.addAll(references);
|
}
|
}
|
|
// 8. 批量保存问题结果(一次数据库交互)
|
if (!questionResultsToSave.isEmpty()) {
|
questionResultService.saveBatch(questionResultsToSave);
|
log.info("批量保存问题结果 {} 条", questionResultsToSave.size());
|
}
|
|
// 9. 批量处理引用数据(去重+更新重复次数+批量保存)
|
if (!allReferences.isEmpty()) {
|
handleReferencesInBatch(allReferences, keyword);
|
}
|
|
// 10. 批量更新问题状态(基于最终结果)
|
updateQuestionsStatusInBatch(questions, keyword);
|
|
} catch (Exception e) {
|
log.error("更新数据失败", e);
|
throw new RuntimeException("更新数据失败", e); // 触发事务回滚
|
}
|
}
|
|
// 更新关键词状态
|
private void updateKeywordStatus(Keyword keyword) {
|
LambdaQueryWrapper<KeywordTask> taskWrapper = new LambdaQueryWrapper<>();
|
taskWrapper.eq(KeywordTask::getKeyword_id, keyword.getKeyword_id());
|
List<KeywordTask> keywordTasks = keywordTaskService.list(taskWrapper);
|
|
boolean allCompletedOrFailed = keywordTasks.stream()
|
.allMatch(task -> "completed".equals(task.getStatus())
|
|| "false".equals(task.getStatus())
|
|| "cancelled".equals(task.getStatus())
|
|| "canceled".equals(task.getStatus())
|
|| "nonentity".equals(task.getStatus()));
|
|
if (allCompletedOrFailed) {
|
keyword.setStatus("completed");
|
keywordService.updateById(keyword);
|
}
|
}
|
|
// 更新订单状态
|
private void updateOrderStatus(Keyword keyword) {
|
String orderId = keyword.getOrder_id();
|
if (orderId == null || orderId.isEmpty()) {
|
log.info("关键词[{}]未关联订单,跳过订单更新", keyword.getKeyword_id());
|
return;
|
}
|
|
LambdaQueryWrapper<Keyword> orderKeywordsWrapper = new LambdaQueryWrapper<>();
|
orderKeywordsWrapper.eq(Keyword::getOrder_id, orderId);
|
List<Keyword> orderKeywords = keywordService.list(orderKeywordsWrapper);
|
if (orderKeywords.isEmpty()) {
|
log.info("订单[{}]无关键词,跳过状态更新", orderId);
|
return;
|
}
|
|
boolean allValid2 = orderKeywords.stream()
|
.allMatch(k -> "completed".equals(k.getStatus())
|
|| "false".equals(k.getStatus())
|
|| "cancelled".equals(k.getStatus()));
|
if (allValid2) {
|
updateOrderStatus(orderId, 1, "所有关键词采集完成或取消");
|
}
|
|
boolean allValid = orderKeywords.stream()
|
.allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus()));
|
if (allValid) {
|
updateOrderStatus(orderId, 3, "所有关键词状态符合条件");
|
}
|
}
|
|
// 封装订单状态更新
|
private void updateOrderStatus(String orderId, Integer status, String logMsg) {
|
Orders orders = orderService.getById(orderId);
|
if (orders != null) {
|
orders.setStatus(status);
|
orderService.updateById(orders);
|
log.info("订单[{}]{},已更新状态为{}", orderId, logMsg, status);
|
} else {
|
log.warn("未找到订单[{}],无法更新状态", orderId);
|
}
|
}
|
|
// 构建问题结果对象
|
private QuestionResultList buildQuestionResultList(QuestionResult questionResult, Keyword keyword, TaskResultResponse result) {
|
QuestionResultList questionResultList = new QuestionResultList();
|
questionResultList.setKeyword_id(keyword.getKeyword_id());
|
questionResultList.setQuestion(questionResult.getQuestion());
|
questionResultList.setResponse(questionResult.getResponse());
|
questionResultList.setStatus(questionResult.getStatus());
|
questionResultList.setExtracted_count(questionResult.getExtracted_count());
|
questionResultList.setKeyword_task_id(result.getTask_id());
|
questionResultList.setError(questionResult.getError());
|
questionResultList.setNum(keyword.getNum());
|
|
if (questionResult.getTimestamp() != null) {
|
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS");
|
questionResultList.setTimestamp(LocalDateTime.parse(questionResult.getTimestamp(), formatter));
|
}
|
return questionResultList;
|
}
|
|
// 构建引用数据列表
|
private List<Reference> buildReferences(QuestionResult questionResult, Question question, Keyword keyword, TaskResultResponse result) {
|
List<Reference> references = new ArrayList<>();
|
List<TaskResultResponse.Reference> originalReferences = questionResult.getReferences();
|
if (originalReferences == null) {
|
return references;
|
}
|
|
for (TaskResultResponse.Reference ref : originalReferences) {
|
// 过滤无效引用(标题、URL、域名不能为空)
|
if (ref.getTitle() == null || ref.getUrl() == null || ref.getDomain() == null) {
|
log.warn("引用数据字段缺失,跳过:title={}, url={}, domain={}",
|
ref.getTitle(), ref.getUrl(), ref.getDomain());
|
continue;
|
}
|
|
Reference reference = new Reference();
|
reference.setQuestion_id(question.getQuestion_id());
|
reference.setTitle(ref.getTitle());
|
reference.setUrl(ref.getUrl());
|
reference.setDomain(ref.getDomain());
|
reference.setNum(keyword.getNum());
|
reference.setTask_id(result.getTask_id());
|
reference.setKeyword_id(keyword.getKeyword_id());
|
|
if (ref.getPublish_time() != null) {
|
reference.setCreate_time(ref.getPublish_time().atStartOfDay());
|
}
|
|
Platform platform = getOrCreatePlatform(ref.getDomain(), ref.getPlatform_name());
|
// 校验平台信息非空
|
if (platform == null || platform.getPlatform_id() == null) {
|
log.warn("平台信息无效,跳过引用:domain={}", ref.getDomain());
|
continue;
|
}
|
reference.setPlatform_id(platform.getPlatform_id());
|
reference.setType_id(platform.getType_id());
|
references.add(reference);
|
}
|
return references;
|
}
|
|
// 批量处理引用数据(去重+更新重复次数)
|
private void handleReferencesInBatch(List<Reference> allReferences, Keyword keyword) {
|
// 1. 过滤原始列表中的 null 元素
|
List<Reference> validReferences = allReferences.stream()
|
.filter(Objects::nonNull)
|
.collect(Collectors.toList());
|
|
if (validReferences.isEmpty()) {
|
log.info("无有效引用数据,跳过批量保存");
|
return;
|
}
|
|
// 2. 查询数据库中已存在的引用并过滤 null
|
LambdaQueryWrapper<Reference> dbRefWrapper = new LambdaQueryWrapper<>();
|
dbRefWrapper.eq(Reference::getKeyword_id, keyword.getKeyword_id())
|
.eq(Reference::getNum, keyword.getNum());
|
List<Reference> dbReferences = referenceService.list(dbRefWrapper);
|
List<Reference> validDbReferences = dbReferences.stream()
|
.filter(Objects::nonNull)
|
.collect(Collectors.toList());
|
|
// 3. 合并并去重
|
Map<String, List<Reference>> compositeKeyMap = new HashMap<>();
|
validReferences.forEach(ref -> addToCompositeMap(compositeKeyMap, ref));
|
validDbReferences.forEach(ref -> addToCompositeMap(compositeKeyMap, ref));
|
|
// 4. 处理重复次数
|
List<Reference> referencesToSave = new ArrayList<>();
|
compositeKeyMap.forEach((key, refGroup) -> {
|
List<Reference> validRefGroup = refGroup.stream()
|
.filter(Objects::nonNull)
|
.collect(Collectors.toList());
|
if (validRefGroup.isEmpty()) return;
|
|
Optional<Reference> existingRef = validRefGroup.stream()
|
.filter(ref -> ref.getReference_id() != null)
|
.findFirst();
|
|
Reference finalRef = existingRef.orElse(validRefGroup.get(0));
|
// 重复次数 = 老的 + (这一组里一共抓到多少-1)
|
int repetitionNum = (finalRef.getRepetition_num() == null ? 1 : finalRef.getRepetition_num())
|
+ (validRefGroup.size() - 1);
|
finalRef.setRepetition_num(repetitionNum);
|
referencesToSave.add(finalRef);
|
});
|
|
// 5. 最终校验并保存
|
List<Reference> finalSaveList = referencesToSave.stream()
|
.filter(Objects::nonNull)
|
.collect(Collectors.toList());
|
|
// 关键校验:列表非空且元素有效
|
if (finalSaveList.isEmpty()) {
|
log.info("处理后无有效引用数据可保存");
|
return;
|
}
|
if (finalSaveList.stream().anyMatch(ref -> !(ref instanceof Reference))) {
|
log.error("引用数据类型异常,无法保存");
|
return;
|
}
|
|
// 执行保存
|
try {
|
referenceService.saveOrUpdateBatch(finalSaveList);
|
log.info("批量保存引用数据成功,数量:{}", finalSaveList.size());
|
} catch (Exception e) {
|
log.error("批量保存引用数据失败", e);
|
throw new RuntimeException("保存引用数据失败", e);
|
}
|
}
|
|
// 辅助方法:将引用添加到复合键Map
|
private void addToCompositeMap(Map<String, List<Reference>> map, Reference ref) {
|
// 再次校验引用的核心字段非空
|
if (ref.getTitle() == null || ref.getUrl() == null || ref.getDomain() == null) {
|
log.warn("引用核心字段为空,跳过映射:{}", ref);
|
return;
|
}
|
String key = ref.getQuestion_id() + "|" + ref.getTitle() + "|" + ref.getUrl() + "|" + ref.getDomain() + "|" + ref.getNum();
|
map.computeIfAbsent(key, k -> new ArrayList<>()).add(ref);
|
}
|
|
// 批量更新问题状态
|
private void updateQuestionsStatusInBatch(List<Question> questions, Keyword keyword) {
|
if (questions.isEmpty()) {
|
return;
|
}
|
|
// 一次性查询所有问题结果(基于关键词+轮次)
|
LambdaQueryWrapper<QuestionResultList> resultWrapper = new LambdaQueryWrapper<>();
|
resultWrapper.eq(QuestionResultList::getKeyword_id, keyword.getKeyword_id())
|
.eq(QuestionResultList::getNum, keyword.getNum());
|
List<QuestionResultList> allQuestionResults = questionResultService.list(resultWrapper);
|
|
// 按问题分组,便于查询
|
Map<String, List<QuestionResultList>> questionResultsMap = allQuestionResults.stream()
|
.collect(Collectors.groupingBy(QuestionResultList::getQuestion));
|
|
// 批量更新问题状态
|
List<Question> questionsToUpdate = new ArrayList<>();
|
questions.forEach(question -> {
|
List<QuestionResultList> results = questionResultsMap.getOrDefault(question.getQuestion(), Collections.emptyList());
|
String finalStatus = determineFinalStatus(results);
|
|
if ("success".equals(finalStatus)) {
|
question.setStatus("success");
|
question.setError("");
|
} else if ("no_results".equals(finalStatus)) {
|
question.setStatus("success");
|
question.setError("采集结果无引用数据");
|
} else if ("busyness".equals(finalStatus)) {
|
question.setStatus("failed");
|
question.setError("DeepSeek繁忙,请稍后尝试");
|
}
|
// 更新其他字段(响应、时间戳等)
|
results.stream().findFirst().ifPresent(result -> {
|
question.setResponse(result.getResponse());
|
question.setExtracted_count(result.getExtracted_count());
|
question.setTimestamp(result.getTimestamp());
|
});
|
questionsToUpdate.add(question);
|
});
|
|
if (!questionsToUpdate.isEmpty()) {
|
questionService.updateBatchById(questionsToUpdate);
|
log.info("批量更新问题状态 {} 条", questionsToUpdate.size());
|
}
|
}
|
|
// 处理用户状态异常
|
private void handleUserStatus(UserResult userResult, Keyword keyword) {
|
if ("failed".equals(userResult.getStatus())) {
|
LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>();
|
userWrapper.eq(User::getUser_email, userResult.getUser_email());
|
|
if (userResult.getError().contains("登录失败")) {
|
userWrapper.set(User::getStatus, "无法登录");
|
userService.update(userWrapper);
|
// 批量更新问题状态为失败
|
questionService.update(new LambdaUpdateWrapper<Question>()
|
.eq(Question::getKeyword_id, keyword.getKeyword_id())
|
.set(Question::getStatus, "failed")
|
.set(Question::getError, "账户登录失败"));
|
} else if (userResult.getError().contains("信息错误")) {
|
userWrapper.set(User::getStatus, "信息错误");
|
userService.update(userWrapper);
|
// 批量更新问题状态为失败
|
questionService.update(new LambdaUpdateWrapper<Question>()
|
.eq(Question::getKeyword_id, keyword.getKeyword_id())
|
.set(Question::getStatus, "failed")
|
.set(Question::getError, "账户信息错误"));
|
}
|
}
|
}
|
|
// 原方法:判断最终状态(复用)
|
private String determineFinalStatus(List<QuestionResultList> results) {
|
if (results.isEmpty()) {
|
return "no_results";
|
}
|
|
int totalCount = results.size();
|
int emptyResponseCount = 0;
|
int systemBusyCount = 0;
|
|
for (QuestionResultList result : results) {
|
if (result.getExtracted_count() == 0) {
|
emptyResponseCount++;
|
}
|
if ("success".equals(result.getStatus()) &&
|
(result.getResponse() == null || result.getResponse().isEmpty()
|
|| result.getResponse().contains("WebDriver连接中断")
|
|| result.getResponse().contains("响应超时"))) {
|
systemBusyCount++;
|
}
|
}
|
|
if (systemBusyCount == totalCount) {
|
return "busyness";
|
}
|
if (emptyResponseCount == totalCount) {
|
return "no_results";
|
}
|
return "success";
|
}
|
|
@GetMapping("/tasks/all")
|
@ApiOperation(value = "获取所有任务列表")
|
public Mono<TaskListResponse> getAllTasks() {
|
return webClient.get()
|
.uri(baseUrl + "/api/v1/tasks")
|
.accept(MediaType.APPLICATION_JSON)
|
.retrieve()
|
.bodyToMono(new ParameterizedTypeReference<TaskListResponse>() {
|
})
|
.onErrorResume(e -> {
|
TaskListResponse response = new TaskListResponse();
|
response.setDetail("获取任务列表失败: " + e.getMessage());
|
return Mono.just(response);
|
});
|
}
|
|
@GetMapping("/health")
|
@ApiOperation("健康检查")
|
public Mono<HealthResponse> checkThirdPartyHealth() {
|
return webClient.get()
|
.uri(baseUrl + "/health") // 假设第三方健康检查接口路径为/health
|
.retrieve()
|
.bodyToMono(HealthResponse.class)
|
.onErrorResume(e -> Mono.just(
|
new HealthResponse("unhealthy", null, "", e.getMessage())));
|
}
|
|
/**
|
* 查询服务器资源
|
*/
|
@GetMapping("/server/resource")
|
@ApiOperation(value = "查询服务器资源")
|
public Mono<ServerResourceResponse> getServerResource() {
|
return webClient.get()
|
.uri(baseUrl + "/api/v1/system/resources")
|
.retrieve()
|
.bodyToMono(ServerResourceResponse.class)
|
.onErrorResume(e -> Mono.just(
|
new ServerResourceResponse( e.getMessage())));
|
}
|
|
|
|
}
|