package com.linghu.controller;
|
|
import java.time.Duration;
|
import java.time.LocalDateTime;
|
import java.time.format.DateTimeFormatter;
|
import java.util.*;
|
import java.util.concurrent.*;
|
import java.util.concurrent.locks.ReentrantLock;
|
import java.util.stream.Collectors;
|
|
import javax.servlet.http.HttpServletRequest;
|
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
import com.linghu.model.dto.*;
|
import com.linghu.model.entity.*;
|
import com.linghu.service.*;
|
import lombok.extern.slf4j.Slf4j;
|
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.core.ParameterizedTypeReference;
|
import org.springframework.dao.DuplicateKeyException;
|
import org.springframework.http.*;
|
import org.springframework.web.reactive.function.client.WebClient;
|
|
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
|
import com.linghu.model.common.ResponseResult;
|
import com.linghu.utils.JwtUtils;
|
|
import io.jsonwebtoken.lang.Collections;
|
import io.swagger.annotations.Api;
|
import io.swagger.annotations.ApiOperation;
|
import reactor.core.publisher.Flux;
|
import reactor.core.publisher.Mono;
|
|
import org.springframework.web.bind.annotation.* ;
|
import org.springframework.http.HttpStatus;
|
import com.linghu.model.dto.TaskResultResponse.QuestionResult;
|
import com.linghu.model.dto.TaskResultResponse.UserResult;
|
import reactor.core.scheduler.Schedulers;
|
|
@RestController
|
@RequestMapping("/collect")
|
@Api(value = "采集接口", tags = "采集管理")
|
@Slf4j
|
public class CollectController {
|
|
@Autowired
|
private ReferenceService referenceService;
|
|
@Value("${linghu.url}")
|
private String baseUrl;
|
|
@Autowired
|
private WebClient webClient;
|
|
@Autowired
|
private JwtUtils jwtUtils;
|
@Autowired
|
private KeywordService keywordService;
|
@Autowired
|
private QuestionService questionService;
|
@Autowired
|
private KeywordTaskService keywordTaskService;
|
@Autowired
|
private PlatformService platformService;
|
@Autowired
|
private TypeService typeService;
|
@Autowired
|
private UserService userService;
|
@Autowired
|
private OrderService orderService;
|
@Autowired
|
private QuestionResultService questionResultService;
|
|
// 替换为线程安全队列
|
private static final Queue<SearchTaskRequest> taskQueue = new ConcurrentLinkedQueue<>();
|
// 全局映射:关键词ID -> 批次队列
|
private static final ConcurrentMap<Integer, Queue<List<UserDto>>> batchQueues = new ConcurrentHashMap<>();
|
|
// private static boolean isProcessing = false;
|
private static volatile boolean isProcessing = false; // 添加 volatile
|
|
@PostMapping("/search")
|
@ApiOperation(value = "开始采集")
|
public Mono<ResponseResult<?>> createSearchTask(
|
@RequestBody SearchTaskRequest searchTaskRequest,
|
HttpServletRequest request) throws JsonProcessingException {
|
|
// 首先检查服务器资源
|
return getServerResource()
|
.flatMap(resourceResponse -> {
|
double cpuUsage = parseUsage(resourceResponse.getCpu_usage_percent());
|
double memoryUsage = parseUsage(resourceResponse.getMemory_usage_percent());
|
|
if (cpuUsage >= 90.0 || memoryUsage >= 90.0) {
|
|
String errorMsg = String.format("服务器资源不足,请稍后再试");
|
|
log.warn(errorMsg);
|
return Mono.just(ResponseResult.error(503, errorMsg));
|
}
|
|
// 将新的任务请求加入队列
|
taskQueue.add(searchTaskRequest);
|
|
// 如果当前没有任务在处理中,则启动任务队列的处理
|
if (!isProcessing) {
|
processNextTaskInQueue();
|
}
|
|
// 返回响应,通知用户任务已开始
|
return Mono.just(ResponseResult.success("任务已加入队列,正在处理..."));
|
})
|
.onErrorResume(e -> {
|
log.error("检查服务器资源失败: {}", e.getMessage(), e);
|
return Mono.just(ResponseResult.error("检查服务器资源失败: " + e.getMessage()));
|
});
|
}
|
|
private void processNextTaskInQueue() {
|
// 设置为正在处理
|
isProcessing = true;
|
|
// 从队列中取出下一个任务
|
SearchTaskRequest nextTaskRequest = taskQueue.poll();
|
|
if (nextTaskRequest != null) {
|
Integer keywordId = nextTaskRequest.getKeyword_id();
|
log.info("开始处理任务队列,keywordId: {}", keywordId);
|
|
executeBatchTask(nextTaskRequest)
|
.doFinally(signal -> {
|
isProcessing = false;
|
if (!taskQueue.isEmpty()) {
|
processNextTaskInQueue();
|
}
|
})
|
.subscribe(
|
result -> log.info("任务处理完成,keywordId: {}", keywordId), // 成功日志
|
error -> { // 关键:添加错误处理
|
log.error("任务队列处理异常,keywordId: {}", keywordId, error);
|
}
|
);
|
} else {
|
isProcessing = false; // 无任务时重置状态
|
}
|
}
|
private Mono<ResponseResult<String>> executeBatchTask(SearchTaskRequest searchTaskRequest) {
|
Integer keywordId = searchTaskRequest.getKeyword_id();
|
//
|
int maxConcurrentUsers = searchTaskRequest.getConfig() != null ?
|
searchTaskRequest.getConfig().getMax_concurrent_users() : 3;
|
List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers, keywordId,searchTaskRequest.getIs_first());
|
|
|
// 创建批次队列并存入全局映射
|
Queue<List<UserDto>> batchQueue = new ConcurrentLinkedQueue<>(userBatches);
|
batchQueues.put(keywordId, batchQueue); // 存储到全局映射
|
|
return Mono.just(ResponseResult.success("第一个批次已开始"))
|
.doOnTerminate(() -> {
|
executeBatchTask(batchQueue, searchTaskRequest, keywordId)
|
.subscribe(
|
result -> log.info("批次任务启动成功,keywordId: {}", keywordId),
|
error -> { // 处理批次执行异常
|
log.error("批次任务执行异常,keywordId: {}", keywordId, error);
|
// 可选:异常时清理资源
|
batchQueues.remove(keywordId);
|
}
|
);
|
});
|
}
|
private Mono<ResponseResult<?>> executeBatchTask(Queue<List<UserDto>> batchQueue, SearchTaskRequest searchTaskRequest, Integer keywordId) {
|
// 如果队列为空,说明所有批次已经完成
|
|
if (batchQueue == null || batchQueue.isEmpty()) {
|
// 清理资源
|
batchQueues.remove(keywordId);
|
return Mono.just(ResponseResult.success("所有批次已完成"));
|
}
|
|
List<UserDto> currentBatch = batchQueue.poll(); // 从队列中获取当前批次
|
SearchTaskRequest batchRequest = new SearchTaskRequest();
|
batchRequest.setUsers(currentBatch);
|
batchRequest.setQuestions(searchTaskRequest.getQuestions());
|
batchRequest.setConfig(searchTaskRequest.getConfig());
|
batchRequest.setSave_to_database(searchTaskRequest.getSave_to_database());
|
batchRequest.setWebhook_url(searchTaskRequest.getWebhook_url());
|
batchRequest.setKeyword_id(keywordId);
|
|
return createSingleBatchTask(batchRequest)
|
.flatMap(taskResponse -> {
|
if (taskResponse != null && taskResponse.getTask_id() != null) {
|
// 直接等待任务完成,不再保存任务关联信息
|
return waitForTaskCompletion(taskResponse.getTask_id(), batchQueue, searchTaskRequest, keywordId);
|
} else {
|
return Mono.just(ResponseResult.error("创建批次任务失败"));
|
}
|
})
|
.onErrorResume(e -> {
|
log.error("调用第三方接口失败: {}", e.getMessage(), e); // 关键日志
|
return Mono.error(new RuntimeException("调用第三方接口失败: " + e.getMessage()));
|
})
|
.doFinally(signal -> {
|
// 任务完成时清理资源
|
if (batchQueue.isEmpty()) {
|
batchQueues.remove(keywordId);
|
}
|
});
|
}
|
|
private Mono<ResponseResult<?>> waitForTaskCompletion(String taskId, Queue<List<UserDto>> batchQueue, SearchTaskRequest searchTaskRequest, Integer keywordId) {
|
// 查询任务状态
|
return getTaskStatus(taskId)
|
.flatMap(statusResponse -> {
|
// 检查任务是否被取消
|
if ("cancelled".equalsIgnoreCase(statusResponse.getStatus())) {
|
batchQueues.remove(keywordId); // 清理资源
|
return Mono.just(ResponseResult.success("任务已被取消"));
|
}
|
// 如果任务状态是"submitted"或"running",继续轮询
|
if (!"completed".equalsIgnoreCase(statusResponse.getStatus()) && !"failed".equalsIgnoreCase(statusResponse.getStatus()) && !"cancelled".equalsIgnoreCase(statusResponse.getStatus()) ) {
|
return Mono.delay(Duration.ofSeconds(5)) // 延迟 5 秒后再次查询
|
.flatMap(aLong -> waitForTaskCompletion(taskId, batchQueue, searchTaskRequest, keywordId)); // 递归调用继续等待
|
} else {
|
// 如果状态为其他状态,则继续处理下一个批次
|
return executeBatchTask(batchQueue, searchTaskRequest, keywordId);
|
}
|
})
|
.onErrorResume(e -> {
|
// 处理查询任务状态时的错误
|
return Mono.just(ResponseResult.error("查询任务状态失败: " + e.getMessage()));
|
});
|
}
|
@ApiOperation(value = "查询任务状态")
|
@GetMapping("/status")
|
public Mono<TaskStatusResponse> getTaskStatus(String taskId) {
|
return webClient.get()
|
.uri(baseUrl + "/api/v1/tasks/" + taskId)
|
.accept(MediaType.APPLICATION_JSON)
|
.retrieve()
|
.onStatus(HttpStatus::isError, response -> response.bodyToMono(TaskStatusResponse.class)
|
.flatMap(errorBody -> Mono.error(new RuntimeException(errorBody.getDetail()))))
|
.bodyToMono(TaskStatusResponse.class)
|
.onErrorResume(e -> {
|
// 处理错误,创建一个自定义的错误响应对象
|
TaskStatusResponse errorResponse = new TaskStatusResponse();
|
errorResponse.setStatus("ERROR");
|
errorResponse.setMessage(e.getMessage());
|
errorResponse.setDetail(e.getMessage());
|
return Mono.just(errorResponse);
|
});
|
}
|
|
// 添加一个辅助方法来安全地将字符串转换为double
|
private double parseUsage(String usageStr) {
|
try {
|
if (usageStr != null) {
|
// 移除可能存在的百分号
|
usageStr = usageStr.replace("%", "").trim();
|
return Double.parseDouble(usageStr);
|
}
|
return 0.0;
|
} catch (NumberFormatException e) {
|
log.error("解析资源使用率失败: {}", e.getMessage());
|
return 0.0;
|
}
|
}
|
|
private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize,Integer keywordId,Boolean isFirst) {
|
|
Keyword keyword = keywordService.getById(keywordId);
|
if (isFirst){
|
keyword.setNum(1);
|
}else {
|
keyword.setNum(keyword.getNum()+1);
|
}
|
keywordService.updateById(keyword);
|
|
List<List<UserDto>> batches = new ArrayList<>();
|
for (int i = 0; i < users.size(); i += batchSize) {
|
batches.add(users.subList(i, Math.min(i + batchSize, users.size())));
|
|
}
|
for (int i = 0; i < batches.size(); i++){
|
// 创建 KeywordTask 关联,task_id 设置为 null,表示任务尚未开始
|
KeywordTask keywordTask = new KeywordTask();
|
keywordTask.setKeyword_id(keywordId);
|
keywordTask.setTask_id(null); // 任务ID为空
|
|
keywordTask.setNum(keyword.getNum());
|
keywordTaskService.save(keywordTask); // 保存 KeywordTask
|
}
|
|
|
return batches;
|
}
|
|
private Mono<SearchTaskResponse> createSingleBatchTask(SearchTaskRequest batchRequest) {
|
// 记录请求第三方的基本信息(便于排查)
|
String thirdPartyUrl = baseUrl + "/api/v1/search";
|
Integer keywordId = batchRequest.getKeyword_id();
|
log.info("开始向第三方提交任务,keywordId: {}, URL: {}, 请求参数: {}",
|
keywordId, thirdPartyUrl, batchRequest.toString()); // 打印请求参数(建议用工具类转JSON)
|
return webClient.post()
|
.uri(baseUrl + "/api/v1/search")
|
.contentType(MediaType.APPLICATION_JSON)
|
.bodyValue(batchRequest)
|
.retrieve()
|
.onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(String.class)
|
.flatMap(errorBody -> Mono.error(new RuntimeException(errorBody))))
|
// 处理第三方返回的5xx服务器错误(如第三方服务异常)
|
.onStatus(HttpStatus::is5xxServerError, response ->
|
response.bodyToMono(String.class)
|
.flatMap(errorBody -> {
|
String errorMsg = String.format("第三方接口5xx错误,keywordId: %d, URL: %s, 状态码: %d, 错误详情: %s",
|
keywordId, thirdPartyUrl, response.statusCode().value(), errorBody);
|
log.error(errorMsg);
|
return Mono.error(new RuntimeException(errorMsg));
|
})
|
)
|
.bodyToMono(new ParameterizedTypeReference<SearchTaskResponse>() {})
|
.flatMap(taskResponse -> {
|
if (taskResponse != null && taskResponse.getTask_id() != null) {
|
|
// 使用 Reactor 的方式更新数据库
|
return Mono.fromRunnable(() -> {
|
|
//更新关键词状态
|
LambdaUpdateWrapper<Keyword> updateWrapper = new LambdaUpdateWrapper<>();
|
updateWrapper.eq(Keyword::getKeyword_id, batchRequest.getKeyword_id());
|
updateWrapper.set(Keyword::getStatus, "submitted");
|
updateWrapper.set(Keyword::getTask_id, taskResponse.getTask_id());
|
keywordService.update(updateWrapper);
|
//设置轮数
|
Keyword keyword = keywordService.getById(batchRequest.getKeyword_id());
|
// 更新关键词任务与任务ID的关联
|
// 获取与关键词相关的任务,task_id 为 null,确保只取一个任务
|
List<KeywordTask> keywordTasks = keywordTaskService.list(new LambdaQueryWrapper<KeywordTask>()
|
.eq(KeywordTask::getKeyword_id, keyword.getKeyword_id())
|
.eq(KeywordTask::getNum, keyword.getNum())
|
.isNull(KeywordTask::getTask_id));
|
if (keywordTasks.size() > 0) {
|
KeywordTask keywordTask = keywordTasks.get(0);
|
keywordTask.setTask_id(taskResponse.getTask_id());
|
keywordTask.setStatus("pending");
|
keywordTaskService.updateById(keywordTask);
|
}
|
//将提问词列表的状态转为pending
|
for (String questionName : batchRequest.getQuestions()) {
|
questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keyword.getKeyword_id()).eq(Question::getQuestion,questionName).set(Question::getStatus, "pending"));
|
|
}
|
//所有关键词都在采集中或者已完成或者错误设置订单进入采集状态
|
List<Keyword> orderKeywords = keywordService.list(new LambdaQueryWrapper<Keyword>()
|
.eq(Keyword::getOrder_id, keyword.getOrder_id()));
|
if (!orderKeywords.isEmpty() && orderKeywords.stream().allMatch(k ->
|
"completed".equals(k.getStatus()) || "submitted".equals(k.getStatus())
|
)) {
|
Orders orders = orderService.getById(keyword.getOrder_id());
|
if (orders != null) {
|
orders.setStatus(2);
|
orderService.updateById(orders);
|
|
}
|
}
|
}).subscribeOn(Schedulers.boundedElastic()) // 在弹性线程池执行
|
.thenReturn(taskResponse);
|
}
|
return Mono.just(taskResponse);
|
});
|
}
|
|
@PostMapping("/cancel/{keywordId}")
|
@ApiOperation(value = "取消任务")
|
public Mono<ResponseResult<TaskCancelResponse>> cancelTask(@PathVariable Integer keywordId) {
|
// 1. 从主队列移除任务
|
List<SearchTaskRequest> removedMainQueueTasks = removeTasksFromQueueByKeywordId(keywordId);
|
int removedMainQueueCount = removedMainQueueTasks.size(); // 获取移除的任务数量
|
|
// 2. 从批次队列移除任务 (新增逻辑)
|
int removedBatchQueue = removeBatchTasksByKeywordId(keywordId);
|
|
// 3. 查询所有与关键词相关的任务
|
List<KeywordTask> tasks = keywordTaskService.list(
|
new LambdaQueryWrapper<KeywordTask>().eq(KeywordTask::getKeyword_id, keywordId)
|
);
|
|
// 4. 筛选出需要远程取消的任务
|
List<KeywordTask> tasksToCancelRemotely = tasks.stream()
|
.filter(task -> task.getTask_id() != null && "pending".equalsIgnoreCase(task.getStatus()))
|
.collect(Collectors.toList());
|
|
return Flux.fromIterable(tasksToCancelRemotely)
|
.flatMap(task -> {
|
// 创建状态更新和远程取消的组合操作
|
Mono<Void> updateStatus = updateTaskStatus(task.getTask_id(), "cancelled");
|
Mono<ResponseResult<?>> cancelOp = cancelRemoteTask(task.getTask_id())
|
.onErrorResume(e -> {
|
log.error("取消任务 {} 失败: {}", task.getTask_id(), e.getMessage());
|
return Mono.just(ResponseResult.error("取消任务失败: " + e.getMessage()));
|
});
|
|
return Mono.zip(cancelOp, updateStatus)
|
.thenReturn(true);
|
}, 10)
|
.collectList()
|
.flatMap(canceledTasks -> {
|
return updateKeywordAndOrderStatus(keywordId)
|
.thenReturn(ResponseResult.success(
|
new TaskCancelResponse(
|
String.format("任务已取消: 主队列移除%d, 批次队列移除%d, 远程取消%d",
|
removedMainQueueCount ,
|
removedBatchQueue,
|
tasksToCancelRemotely.size())
|
)
|
));
|
});
|
}
|
|
// 新增方法:移除批次队列
|
private int removeBatchTasksByKeywordId(Integer keywordId) {
|
Queue<List<UserDto>> batchQueue = batchQueues.remove(keywordId);
|
if (batchQueue != null) {
|
int count = batchQueue.size();
|
batchQueue.clear();
|
log.info("从批次队列中移除关键词 {} 的 {} 个批次任务", keywordId, count);
|
return count;
|
}
|
return 0;
|
}
|
// 辅助方法:获取待取消任务
|
private List<KeywordTask> getTasksToCancel(Integer keywordId) {
|
return keywordTaskService.list(
|
new LambdaQueryWrapper<KeywordTask>()
|
.eq(KeywordTask::getKeyword_id, keywordId)
|
.isNotNull(KeywordTask::getTask_id)
|
.eq(KeywordTask::getStatus, "pending")
|
);
|
}
|
|
// 提取关键词和订单状态更新的逻辑为单独方法
|
private Mono<Void> updateKeywordAndOrderStatus(Integer keywordId) {
|
return Mono.fromRunnable(() -> {
|
try {
|
// 查询关键词
|
Keyword keyword = keywordService.getById(keywordId);
|
if (keyword == null) {
|
log.warn("未找到关键词,keywordId: {}", keywordId);
|
return;
|
}
|
|
//把任务id为空的删除
|
LambdaUpdateWrapper<KeywordTask> updateWrapper = new LambdaUpdateWrapper<>();
|
updateWrapper.eq(KeywordTask::getKeyword_id, keywordId);
|
updateWrapper.isNull(KeywordTask::getTask_id);
|
keywordTaskService.remove(updateWrapper);
|
// 查询该关键词下的所有任务
|
LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>();
|
keywordTaskWrapper.eq(KeywordTask::getKeyword_id, keywordId);
|
|
List<KeywordTask> keywordTasks = keywordTaskService.list(keywordTaskWrapper);
|
|
// 更新关键词状态
|
keyword.setStatus("completed");
|
keywordService.updateById(keyword);
|
|
//更新提问词状态为取消
|
questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keywordId).isNull(Question::getResponse).set(Question::getStatus, "cancelled"));
|
|
// 更新订单状态
|
String orderId = keyword.getOrder_id();
|
if (orderId != null && !orderId.isEmpty()) {
|
// 查询订单下所有关键词
|
LambdaQueryWrapper<Keyword> orderKeywordsWrapper = new LambdaQueryWrapper<>();
|
orderKeywordsWrapper.eq(Keyword::getOrder_id, orderId);
|
List<Keyword> orderKeywords = keywordService.list(orderKeywordsWrapper);
|
|
// 所有关键词均已完成,则更新订单状态为3
|
if (!orderKeywords.isEmpty() && orderKeywords.stream().allMatch(k ->
|
"completed".equals(k.getStatus()) || "false".equals(k.getStatus())
|
)) {
|
Orders orders = orderService.getById(orderId);
|
if (orders != null) {
|
orders.setStatus(3);
|
orderService.updateById(orders);
|
log.info("订单 {} 所有关键词已完成,更新状态为3", orderId);
|
}
|
}
|
if (!orderKeywords.isEmpty() && orderKeywords.stream().allMatch(k ->
|
!"completed".equals(k.getStatus()) || "false".equals(k.getStatus()) || "cancelled".equals(k.getStatus())
|
)) {
|
Orders orders = orderService.getById(orderId);
|
if (orders != null) {
|
orders.setStatus(1);
|
orderService.updateById(orders);
|
log.info("订单 {} 所有关键词已完成或者取消,更新状态为1", orderId);
|
}
|
}
|
}
|
} catch (Exception e) {
|
log.error("更新关键词和订单状态失败: {}", e.getMessage(), e);
|
}
|
});
|
}
|
private List<SearchTaskRequest> removeTasksFromQueueByKeywordId(Integer keywordId) {
|
List<SearchTaskRequest> removedTasks = new ArrayList<>();
|
|
Iterator<SearchTaskRequest> iterator = taskQueue.iterator();
|
while (iterator.hasNext()) {
|
SearchTaskRequest task = iterator.next();
|
if (task.getKeyword_id() != null && task.getKeyword_id().equals(keywordId)) {
|
removedTasks.add(task);
|
iterator.remove();
|
}
|
}
|
|
|
log.info("从队列中移除了 {} 个与关键词ID {} 相关的任务", removedTasks.size(), keywordId);
|
return removedTasks;
|
}
|
|
// 发送远程取消请求
|
private Mono<ResponseResult<?>> cancelRemoteTask(String taskId) {
|
// 使用Collections.singletonMap或手动创建Map
|
Map<String, Object> requestBody = new HashMap<>();
|
requestBody.put("status", "pending");
|
|
return webClient.post()
|
.uri(baseUrl + "/api/v1/tasks/" + taskId + "/cancel")
|
.contentType(MediaType.APPLICATION_JSON)
|
.bodyValue(requestBody)
|
.retrieve()
|
.onStatus(HttpStatus::isError, response -> response.bodyToMono(String.class)
|
.flatMap(errorBody -> Mono.error(new RuntimeException("取消失败: " + errorBody))))
|
.bodyToMono(Void.class)
|
.thenReturn(ResponseResult.success("任务已取消"));
|
}
|
|
// 更新单个任务状态
|
private Mono<Void> updateTaskStatus(String taskId, String status) {
|
return Mono.fromRunnable(() -> {
|
LambdaUpdateWrapper<KeywordTask> updateWrapper = new LambdaUpdateWrapper<>();
|
updateWrapper.eq(KeywordTask::getTask_id, taskId);
|
updateWrapper.set(KeywordTask::getStatus, status);
|
keywordTaskService.update(updateWrapper);
|
}).subscribeOn(Schedulers.boundedElastic()).then();
|
}
|
@ApiOperation(value = "获取任务结果")
|
@GetMapping("/tasks/{taskId}")
|
public Mono<TaskResultResponse> getTaskResult(@PathVariable String taskId) {
|
return webClient.get()
|
.uri(baseUrl + "/api/v1/tasks/" + taskId + "/result")
|
.accept(MediaType.APPLICATION_JSON)
|
.retrieve()
|
.onStatus(HttpStatus::is4xxClientError, response -> {
|
if (response.statusCode() == HttpStatus.NOT_FOUND) {
|
return response.bodyToMono(String.class)
|
.flatMap(errorBody -> Mono.error(new RuntimeException("任务不存在")));
|
} else if (response.statusCode() == HttpStatus.BAD_REQUEST) {
|
return response.bodyToMono(String.class)
|
.flatMap(errorBody -> Mono.error(new RuntimeException("任务未完成,无法获取结果")));
|
}
|
return response.createException().flatMap(Mono::error);
|
})
|
.bodyToMono(TaskResultResponse.class)
|
.flatMap(responseResult -> {
|
TaskResultResponse result = responseResult;
|
|
if (result != null && result.getResults() != null) {
|
return updateQuestionAndReference(result)
|
.thenReturn(responseResult);
|
}
|
return Mono.just(responseResult);
|
})
|
.onErrorResume(e -> {
|
System.out.println("获取任务结果失败");
|
TaskResultResponse result = new TaskResultResponse();
|
result.setDetail("获取任务结果失败: " + e.getMessage());
|
return Mono.just(result);
|
});
|
}
|
|
/**
|
* 获取或创建平台(确保同一domain只创建一次)
|
* @param domain 平台域名
|
* @return 已存在或新创建的Platform
|
*/
|
private Platform getOrCreatePlatform(String domain,String platformName) {
|
// 1. 先尝试查询已存在的平台
|
Platform platform = platformService.getPlatformByDomain(domain,platformName);
|
if (platform != null) {
|
return platform;
|
}
|
|
// 2. 若不存在,尝试创建(处理并发场景)
|
try {
|
// 2.1 获取或创建“默认”类型(Type也需避免重复,建议Type表的type_name也加唯一约束)
|
Type defaultType = typeService.getOne(new LambdaQueryWrapper<Type>()
|
.eq(Type::getType_name, "默认"));
|
if (defaultType == null) {
|
defaultType = new Type();
|
defaultType.setType_name("默认");
|
typeService.save(defaultType); // 若Type可能重复,此处也需处理DuplicateKeyException
|
}
|
|
// 2.2 构建新平台对象
|
Platform newPlatform = new Platform();
|
newPlatform.setDomain(domain);
|
if (platformName != null) {
|
newPlatform.setPlatform_name(platformName);
|
}else {
|
newPlatform.setPlatform_name(domain);
|
}
|
// 平台名称默认使用域名,可根据实际需求调整
|
newPlatform.setType_id(defaultType.getType_id());
|
newPlatform.setCreate_time(LocalDateTime.now()); // 补充创建时间
|
|
// 2.3 尝试保存,若因唯一约束冲突失败,则捕获异常
|
platformService.save(newPlatform);
|
return newPlatform; // 保存成功,返回新创建的平台
|
|
} catch (DuplicateKeyException e) {
|
// 3. 若捕获到重复键异常,说明并发创建了,重新查询即可(此时数据库中已存在该平台)
|
log.warn("平台domain={}已存在,无需重复创建", domain, e);
|
return platformService.getPlatformByDomain(domain,platformName); // 重新查询,一定能获取到
|
} catch (Exception e) {
|
// 处理其他异常(如数据库连接失败等)
|
log.error("创建平台失败,domain={}", domain, e);
|
throw new RuntimeException("创建平台失败", e);
|
}
|
}
|
//更新提问词和引用数据
|
private Mono<Void> updateQuestionAndReference(TaskResultResponse result) {
|
return Mono.fromRunnable(() -> {
|
try {
|
//查看每个账号信息的status是否正常
|
|
// 1. 根据KeywordTask更新关键词状态
|
// 查询关键词ID
|
LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>();
|
keywordTaskWrapper.eq(KeywordTask::getTask_id, result.getTask_id());
|
KeywordTask keywordTask = keywordTaskService.getOne(keywordTaskWrapper);
|
keywordTask.setStatus("completed");
|
keywordTaskService.updateById(keywordTask);
|
Keyword keyword = keywordService.getById(keywordTask.getKeyword_id());
|
|
if (keyword == null) {
|
System.out.println("未找到关联的关键词,task_id: " + result.getTask_id());
|
//报错
|
throw new Exception("未找到关联的关键词,task_id: " + result.getTask_id());
|
// return;
|
}
|
LambdaQueryWrapper<KeywordTask> keywordTaskWrapper2 = new LambdaQueryWrapper<>();
|
keywordTaskWrapper2.eq(KeywordTask::getKeyword_id, keyword.getKeyword_id());
|
List<KeywordTask> keywordTasks = keywordTaskService.list(keywordTaskWrapper2);
|
|
//如果全部为completed 关键词也为completed ,如果关联关系没有任务id,或者状态为running ,关键词为submitted,
|
if (keywordTasks.stream().allMatch(task -> "completed".equals(task.getStatus()) || "false".equals(task.getStatus()) || "cancelled".equals(task.getStatus()) ||"canceled".equals(task.getStatus())) ) {
|
keyword.setStatus("completed");
|
keywordService.updateById(keyword);
|
|
}
|
|
// 更新关键词状态
|
String orderId = keyword.getOrder_id();
|
if (orderId == null || orderId.isEmpty()) {
|
System.out.println("关键词[" + keyword.getKeyword_id() + "]未关联订单,跳过订单状态更新");
|
return;
|
}
|
|
// 2. 查询该订单下的所有关键词
|
LambdaQueryWrapper<Keyword> orderKeywordsWrapper = new LambdaQueryWrapper<>();
|
orderKeywordsWrapper.eq(Keyword::getOrder_id, orderId);
|
List<Keyword> orderKeywords = keywordService.list(orderKeywordsWrapper);
|
|
if (orderKeywords.isEmpty()) {
|
System.out.println("订单[" + orderId + "]下无关键词,跳过状态更新");
|
return;
|
}
|
boolean allValid2 = orderKeywords.stream()
|
.allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus()) || "cancelled".equals(k.getStatus()));
|
if (allValid2) {
|
Orders orders = orderService.getById(orderId);
|
if (orders != null) {
|
orders.setStatus(1); // 假设Orders有Integer类型的status字段
|
orderService.updateById(orders);
|
System.out.println("订单[" + orderId + "]所有关键词采集完成或者取消,已更新状态为1");
|
} else {
|
System.out.println("未找到订单[" + orderId + "],无法更新状态");
|
}
|
}
|
// 3. 检查所有关键词的状态是否均为 completed 或 false
|
boolean allValid = orderKeywords.stream()
|
.allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus()));
|
|
// 4. 若所有关键词状态均有效,更新订单状态为3
|
if (allValid) {
|
Orders orders = orderService.getById(orderId);
|
if (orders != null) {
|
orders.setStatus(3); // 假设Orders有Integer类型的status字段
|
orderService.updateById(orders);
|
System.out.println("订单[" + orderId + "]所有关键词状态符合条件,已更新状态为3");
|
} else {
|
System.out.println("未找到订单[" + orderId + "],无法更新状态");
|
}
|
}
|
|
|
Orders orders = orderService.getById(keyword.getOrder_id());
|
|
// 2. 批量查询所有问题
|
LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>();
|
queryWrapper.eq(Question::getKeyword_id, keyword.getKeyword_id());
|
List<Question> questions = questionService.list(queryWrapper);
|
|
// 构建问题映射表,用于快速查找
|
Map<String, Question> questionMap = questions.stream()
|
.collect(Collectors.toMap(Question::getQuestion, q -> q));
|
|
// 3. 收集所有需要更新的问题和引用
|
List<Question> questionsToUpdate = new ArrayList<>();
|
List<Reference> allReferences = new ArrayList<>();
|
List<Reference> resultList = new ArrayList<>();
|
|
// 4. 新增:统计所有提问词的结果状态
|
boolean allEmptyReferences = true; // 所有提问词引用数据为空
|
boolean allSystemBusy = true; // 所有提问词系统繁忙
|
// 遍历结果
|
for (UserResult userResult : result.getResults()) {
|
for (QuestionResult questionResult : userResult.getQuestions_results()) {
|
try {
|
Question question = questionMap.get(questionResult.getQuestion());
|
if (question != null) {
|
|
|
//保存问题结果
|
QuestionResultList questionResultList = new QuestionResultList();
|
questionResultList.setKeyword_id(keyword.getKeyword_id());
|
questionResultList.setQuestion(questionResult.getQuestion());
|
questionResultList.setResponse(questionResult.getResponse());
|
questionResultList.setStatus(questionResult.getStatus());
|
questionResultList.setExtracted_count(questionResult.getExtracted_count());
|
questionResultList.setKeyword_task_id(result.getTask_id());
|
questionResultList.setError(questionResult.getError());
|
questionResultList.setNum(keyword.getNum());
|
if (questionResult.getTimestamp() != null) {
|
DateTimeFormatter formatter = DateTimeFormatter
|
.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS");
|
questionResultList.setTimestamp(
|
LocalDateTime.parse(questionResult.getTimestamp(), formatter));
|
}
|
// 保存问题结果列表(新增保存逻辑)
|
questionResultService.save(questionResultList);
|
// 查询当前轮次下该提问词的所有结果
|
List<QuestionResultList> allResults = questionResultService.list(
|
new LambdaQueryWrapper<QuestionResultList>()
|
.eq(QuestionResultList::getKeyword_id, keyword.getKeyword_id())
|
.eq(QuestionResultList::getQuestion, question.getQuestion())
|
.eq(QuestionResultList::getNum, keyword.getNum())
|
);
|
|
// 判断最终状态
|
String finalStatus = determineFinalStatus(allResults);
|
if ("success".equals(finalStatus)){
|
question.setStatus("success");
|
}else if ("no_results".equals(finalStatus)){
|
question.setStatus("failed");
|
question.setError("采集结果无引用数据");
|
}else if ("busyness".equals(finalStatus)){
|
question.setStatus("failed");
|
question.setError("DeepSeek繁忙,请稍后尝试");
|
}else if ("failed".equals(finalStatus)){
|
question.setStatus("failed");
|
question.setError("所有账号登录失败");
|
}
|
|
// 更新问题对象
|
question.setResponse(questionResult.getResponse());
|
question.setExtracted_count(questionResult.getExtracted_count());
|
// question.setError(questionResult.getError());
|
question.setKeyword_id(keyword.getKeyword_id());
|
|
if (questionResult.getTimestamp() != null) {
|
DateTimeFormatter formatter = DateTimeFormatter
|
.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS");
|
question.setTimestamp(
|
LocalDateTime.parse(questionResult.getTimestamp(), formatter));
|
}
|
|
questionsToUpdate.add(question);
|
// 初始化引用列表(避免null)
|
List<Reference> references = new ArrayList<>();
|
List<TaskResultResponse.Reference> originalReferences = questionResult.getReferences();
|
if (originalReferences == null) {
|
originalReferences = Collections.emptyList();
|
}
|
|
// 遍历原始引用列表,转换为Reference对象
|
for (TaskResultResponse.Reference ref : originalReferences) {
|
Reference reference = new Reference();
|
// 设置基本字段
|
reference.setQuestion_id(question.getQuestion_id());
|
reference.setTitle(ref.getTitle());
|
reference.setUrl(ref.getUrl());
|
reference.setDomain(ref.getDomain());
|
reference.setNum(keyword.getNum());
|
reference.setTask_id(result.getTask_id());
|
reference.setKeyword_id(keyword.getKeyword_id());
|
if (null!=ref.getPublish_time()) {
|
reference.setCreate_time(ref.getPublish_time().atStartOfDay());
|
}
|
|
// 关键:使用优化后的方法获取平台,避免重复创建
|
Platform platform = getOrCreatePlatform(ref.getDomain(),ref.getPlatform_name());
|
reference.setPlatform_id(platform.getPlatform_id());
|
reference.setType_id(platform.getType_id()); // 直接从平台获取类型ID,更可靠
|
// 添加到结果列表
|
references.add(reference);
|
}
|
// 添加到总引用列表
|
if (!references.isEmpty()) {
|
allReferences.addAll(references);
|
}
|
|
//取数据库中当前关键词的当前轮次的当前问题id结果拿出来
|
List<Reference> dbList = referenceService.list(new LambdaQueryWrapper<Reference>().eq(Reference::getKeyword_id, keyword.getKeyword_id())
|
.eq(Reference::getNum, keyword.getNum())
|
.eq(Reference::getQuestion_id, question.getQuestion_id())
|
);
|
|
// 1. 合并两个列表
|
List<Reference> combinedList = new ArrayList<>();
|
combinedList.addAll(allReferences);
|
combinedList.addAll(dbList);
|
|
// 2. 创建复合键的Map,用于统计完全匹配的记录
|
Map<String, List<Reference>> compositeKeyMap = combinedList.stream()
|
.collect(Collectors.groupingBy(
|
ref -> ref.getTitle() + "|" + ref.getUrl() + "|" + ref.getDomain()
|
));
|
|
// 3. 处理每组重复记录
|
|
compositeKeyMap.forEach((key, refGroup) -> {
|
// 3.1 找出组内有ID的记录(优先从dbList中获取)
|
Optional<Reference> existingRecord = refGroup.stream()
|
.filter(ref -> ref.getReference_id() != null)
|
.findFirst();
|
|
// 3.2 统计该组的重复次数(总数-1)
|
int repetitionCount = refGroup.size() - 1;
|
|
// 3.3 决定最终保留的记录
|
Reference recordToSave = new Reference();
|
if (existingRecord.isPresent()) {
|
// 使用已有ID的记录并更新重复次数
|
recordToSave = existingRecord.get();
|
recordToSave.setRepetition_num(
|
(recordToSave.getRepetition_num() == null ? 1 : recordToSave.getRepetition_num())
|
+ repetitionCount
|
);
|
} else {
|
// 没有ID记录则取第一条并设置重复次数
|
recordToSave = refGroup.get(0);
|
recordToSave.setRepetition_num(1+repetitionCount);
|
}
|
|
resultList.add(recordToSave);
|
});
|
referenceService.saveOrUpdateBatch(resultList);
|
}
|
} catch (Exception e) {
|
log.error(e.getMessage(), e);
|
System.out.println("处理问题结果失败: " + e.getMessage());
|
}
|
}
|
//更新账号状态
|
if ( "failed".equals(userResult.getStatus())){
|
if (userResult.getError().contains("登录失败")){
|
LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>();
|
userWrapper.eq(User::getUser_email, userResult.getUser_email());
|
userWrapper.set(User::getStatus, "无法登录");
|
userService.update(userWrapper);
|
//更新所有提问词的状态
|
questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keyword.getKeyword_id())
|
.set(Question::getStatus, "failed")
|
.set(Question::getError, "账户登录失败"));
|
|
}else if (userResult.getError().contains("信息错误")){
|
LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>();
|
userWrapper.eq(User::getUser_email, userResult.getUser_email());
|
userWrapper.set(User::getStatus, "信息错误");
|
userService.update(userWrapper);
|
}
|
}
|
|
}
|
|
// 4. 批量更新问题
|
System.out.println(questionsToUpdate);
|
if (!questionsToUpdate.isEmpty()) {
|
questionService.updateBatchById(questionsToUpdate);
|
System.out.println("成功批量更新 " + questionsToUpdate.size() + " 个问题");
|
}
|
|
} catch (Exception e) {
|
log.error("更新问题和引用数据失败: " ,e.getMessage(), e);
|
throw new RuntimeException("更新问题和引用数据失败", e);
|
}
|
});
|
}
|
// 根据所有批次的结果判断最终状态
|
private String determineFinalStatus(List<QuestionResultList> results) {
|
if (results.isEmpty()) {
|
return "no_results"; // 无结果
|
}
|
|
// 统计关键指标
|
int totalCount = results.size();
|
int emptyResponseCount = 0;
|
int systemBusyCount = 0;
|
int failedCount = 0;
|
|
for (QuestionResultList result : results) {
|
// 判断回答是否为空
|
if (result.getExtracted_count() == 0 ) {
|
emptyResponseCount++;
|
}
|
|
// 判断是否为系统繁忙
|
if ("success".equals(result.getStatus()) && (result.getResponse().isEmpty()|| result.getResponse().contains("WebDriver连接中断") || result.getResponse().contains("响应超时"))) {
|
systemBusyCount++;
|
}
|
if ("failed".equals(result.getStatus()) && result.getError().contains("登录失败")){
|
failedCount++;
|
}
|
}
|
|
// 全返回系统繁忙
|
if (systemBusyCount == totalCount) {
|
return "busyness";
|
}
|
// 全返回信息为空
|
if (emptyResponseCount == totalCount) {
|
return "no_results";
|
}
|
if (failedCount == totalCount) {
|
return "failed";
|
}
|
|
// 系统繁忙比例超过阈值(可配置,这里设为70%)
|
// double busyRate = (double) systemBusyCount / totalCount;
|
// if (busyRate >= 0.7) {
|
// return "系统繁忙,请稍后尝试";
|
// }
|
|
// 其他情况返回成功
|
return "success";
|
}
|
@GetMapping("/tasks/all")
|
@ApiOperation(value = "获取所有任务列表")
|
public Mono<TaskListResponse> getAllTasks() {
|
return webClient.get()
|
.uri(baseUrl + "/api/v1/tasks")
|
.accept(MediaType.APPLICATION_JSON)
|
.retrieve()
|
.bodyToMono(new ParameterizedTypeReference<TaskListResponse>() {
|
})
|
.onErrorResume(e -> {
|
TaskListResponse response = new TaskListResponse();
|
response.setDetail("获取任务列表失败: " + e.getMessage());
|
return Mono.just(response);
|
});
|
}
|
|
@GetMapping("/health")
|
@ApiOperation("健康检查")
|
public Mono<HealthResponse> checkThirdPartyHealth() {
|
return webClient.get()
|
.uri(baseUrl + "/health") // 假设第三方健康检查接口路径为/health
|
.retrieve()
|
.bodyToMono(HealthResponse.class)
|
.onErrorResume(e -> Mono.just(
|
new HealthResponse("unhealthy", null, "", e.getMessage())));
|
}
|
|
/**
|
* 查询服务器资源
|
*/
|
@GetMapping("/server/resource")
|
@ApiOperation(value = "查询服务器资源")
|
public Mono<ServerResourceResponse> getServerResource() {
|
return webClient.get()
|
.uri(baseUrl + "/api/v1/system/resources")
|
.retrieve()
|
.bodyToMono(ServerResourceResponse.class)
|
.onErrorResume(e -> Mono.just(
|
new ServerResourceResponse( e.getMessage())));
|
}
|
|
|
|
}
|