guyue
4 天以前 8f5cb469b825cce61734c84fd633f0dfc3000ee6
src/main/java/com/linghu/controller/CollectController.java
@@ -3,13 +3,18 @@
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.servlet.http.HttpServletRequest;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.linghu.model.dto.*;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.ParameterizedTypeReference;
@@ -21,8 +26,6 @@
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
import com.linghu.model.common.ResponseResult;
import com.linghu.model.dto.HealthResponse;
import com.linghu.model.dto.SearchTaskRequest;
import com.linghu.model.entity.Keyword;
import com.linghu.model.entity.Question;
import com.linghu.model.entity.User;
@@ -30,23 +33,19 @@
import com.linghu.service.QuestionService;
import com.linghu.service.ReferenceService;
import com.linghu.utils.JwtUtils;
import com.linghu.model.dto.SearchTaskResponse;
import com.linghu.model.dto.TaskStatusResponse;
import com.linghu.model.dto.TaskCancelResponse;
import com.linghu.model.dto.TaskListResponse;
import io.jsonwebtoken.lang.Collections;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import reactor.core.publisher.Mono;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.bind.annotation.* ;
import org.springframework.http.HttpStatus;
import com.linghu.model.dto.TaskResultResponse;
import com.linghu.model.dto.TaskResultResponse.QuestionResult;
import com.linghu.model.dto.TaskResultResponse.UserResult;
import com.linghu.model.entity.Reference;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
@RestController
@RequestMapping("/collect")
@@ -71,25 +70,38 @@
    @PostMapping("/search")
    @ApiOperation(value = "开始采集")
    public Mono<ResponseResult<SearchTaskResponse>> createSearchTask(
    public Mono<SearchTaskResponse> createSearchTask(
            @RequestBody SearchTaskRequest searchTaskRequest,
            HttpServletRequest request) {
            HttpServletRequest request) throws JsonProcessingException {
        String token = request.getHeader("Authorization");
        User user = jwtUtils.parseToken(token);
        List<User> users = new ArrayList<>();
        users.add(user);
        // 复制到UserDto
        UserDto userDto = new UserDto();
        userDto.setName(user.getUser_name());
        userDto.setEmail(user.getUser_email());
        userDto.setPassword(user.getPassword());
        // List<User> users = new ArrayList<>();
        // users.add(user);
        List<UserDto> users = new ArrayList<>();
        users.add(userDto);
        searchTaskRequest.setUsers(users);
        // json格式
        ObjectMapper objectMapper = new ObjectMapper();
        System.out.println(objectMapper.writeValueAsString(searchTaskRequest));
        return webClient.post()
                .uri(baseUrl + "/search")
                .uri(baseUrl + "/api/v1/search")
                .contentType(MediaType.APPLICATION_JSON)
                .bodyValue(searchTaskRequest)
                .retrieve()
                .bodyToMono(new ParameterizedTypeReference<ResponseResult<SearchTaskResponse>>() {
                .onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(String.class)
                        .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody))))
                .bodyToMono(new ParameterizedTypeReference<SearchTaskResponse>() {
                })
                .flatMap(responseResult -> {
                    // 提取任务ID
                    SearchTaskResponse taskResponse = responseResult.getData();
                    SearchTaskResponse taskResponse = responseResult;
                    if (taskResponse != null && taskResponse.getTask_id() != null) {
                        // 保存任务ID到关键词
                        LambdaUpdateWrapper<Keyword> updateWrapper = new LambdaUpdateWrapper<>();
@@ -99,10 +111,13 @@
                        // 可选:更新响应中的其他信息
                        // taskResponse.setMessage("任务已提交并保存,ID: " + taskResponse.getTaskId());
                    }
                    return Mono.just(responseResult);
                    return Mono.just(taskResponse);
                })
                .onErrorResume(e -> {
                    return Mono.just(ResponseResult.error("调用失败: " + e.getMessage()));
                    // return Mono.just(ResponseResult.error("调用失败: " + e.getMessage()));
                    SearchTaskResponse task = new SearchTaskResponse();
                    task.setMessage("调用失败: " + e.getMessage());
                    return Mono.just(task);
                });
    }
@@ -110,11 +125,11 @@
    @GetMapping("/status")
    public Mono<TaskStatusResponse> getTaskStatus(String taskId) {
        return webClient.get()
                .uri(baseUrl + "/tasks/" + taskId)
                .uri(baseUrl + "/api/v1/tasks/" + taskId)
                .accept(MediaType.APPLICATION_JSON)
                .retrieve()
                .onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(String.class)
                        .flatMap(errorBody -> Mono.error(new RuntimeException("任务不存在: " + errorBody))))
                .onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(TaskStatusResponse.class)
                        .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody.getDetail()))))
                .bodyToMono(TaskStatusResponse.class)
                .flatMap(result -> {
                    TaskStatusResponse taskStatusResponse = result;
@@ -130,6 +145,15 @@
                        questionService.updateBatchById(updateQuestions);
                    }
                    return Mono.just(result);
                })
                .onErrorResume(e -> {
                    // 创建一个自定义的错误响应对象
                    TaskStatusResponse errorResponse = new TaskStatusResponse();
                    errorResponse.setStatus("ERROR");
                    errorResponse.setMessage(e.getMessage());
                    errorResponse.setDetail(e.getMessage());
                    return Mono.just(errorResponse);
                });
    }
@@ -137,20 +161,12 @@
    @ApiOperation(value = "取消任务")
    public Mono<ResponseResult<TaskCancelResponse>> cancelTask(@PathVariable String taskId) {
        return webClient.post()
                .uri(baseUrl + "/tasks/" + taskId + "/cancel")
                .uri(baseUrl + "/api/v1/tasks/" + taskId + "/cancel")
                .contentType(MediaType.APPLICATION_JSON)
                .bodyValue(Collections.emptyMap()) // 添加空请求体
                .retrieve()
                .onStatus(HttpStatus::isError, response -> {
                    if (response.statusCode() == HttpStatus.NOT_FOUND) {
                        return response.bodyToMono(String.class)
                                .flatMap(errorBody -> Mono.error(new RuntimeException("任务不存在")));
                    } else if (response.statusCode() == HttpStatus.BAD_REQUEST) {
                        return response.bodyToMono(String.class)
                                .flatMap(errorBody -> Mono.error(new RuntimeException("任务已经完成,无法取消")));
                    }
                    return response.createException().flatMap(Mono::error);
                })
                .onStatus(HttpStatus::isError, response -> response.bodyToMono(TaskCancelResponse.class)
                        .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody.getDetail()))))
                .bodyToMono(TaskCancelResponse.class)
                .map(data -> ResponseResult.success(data))
                .onErrorResume(e -> {
@@ -165,9 +181,9 @@
    @ApiOperation(value = "获取任务结果")
    @GetMapping("/tasks/{taskId}")
    public Mono<ResponseResult<TaskResultResponse>> getTaskResult(@PathVariable String taskId) {
    public Mono<TaskResultResponse> getTaskResult(@PathVariable String taskId) {
        return webClient.get()
                .uri(baseUrl + "/tasks/" + taskId + "/result")
                .uri(baseUrl + "/api/v1/tasks/" + taskId + "/result")
                .accept(MediaType.APPLICATION_JSON)
                .retrieve()
                .onStatus(HttpStatus::is4xxClientError, response -> {
@@ -180,10 +196,9 @@
                    }
                    return response.createException().flatMap(Mono::error);
                })
                .bodyToMono(new ParameterizedTypeReference<ResponseResult<TaskResultResponse>>() {
                })
                .bodyToMono(TaskResultResponse.class)
                .flatMap(responseResult -> {
                    TaskResultResponse result = responseResult.getData();
                    TaskResultResponse result = responseResult;
                    if (result != null && result.getResults() != null) {
                        return updateQuestionAndReference(result)
                                .thenReturn(responseResult);
@@ -192,77 +207,77 @@
                })
                .onErrorResume(e -> {
                    System.out.println("获取任务结果失败");
                    return Mono.just(ResponseResult.error(e.getMessage()));
                    TaskResultResponse result = new TaskResultResponse();
                    result.setDetail("获取任务结果失败: " + e.getMessage());
                    return Mono.just(result);
                });
    }
    /*
     * private Mono<Void> updateQuestionAndReference(TaskResultResponse result) {
     * return Mono.fromRunnable(() -> {
     * // 1. 更新关键词状态
     * LambdaUpdateWrapper<Keyword> keywordUpdate = new LambdaUpdateWrapper<>();
     * keywordUpdate.eq(Keyword::getTask_id, result.getTask_id())
     * .set(Keyword::getStatus, "completed");
     * keywordService.update(keywordUpdate);
     *
     * // 查询关键词ID
     * LambdaQueryWrapper<Keyword> keywordQuery = new LambdaQueryWrapper<>();
     * keywordQuery.eq(Keyword::getTask_id, result.getTask_id());
     * Keyword keyword = keywordService.getOne(keywordQuery);
     *
     * if (keyword == null) {
     * System.out.println("未找到关联的关键词,task_id: " + result.getTask_id());
     * return;
     * }
     *
     * // 2. 处理每个用户的问题结果
     * for (UserResult userResult : result.getResults()) {
     * for (QuestionResult questionResult : userResult.getQuestions_results()) {
     * // 2.1 查询问题ID
     * LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>();
     * queryWrapper.eq(Question::getQuestion, questionResult.getQuestion())
     * .eq(Question::getKeyword_id, keyword.getKeyword_id());
     * Question question = questionService.getOne(queryWrapper);
     *
     * if (question != null) {
     * // 更新问题状态
     * LambdaUpdateWrapper<Question> updateWrapper = new LambdaUpdateWrapper<>();
     * updateWrapper.eq(Question::getQuestion_id, question.getQuestion_id())
     * .set(Question::getStatus, questionResult.getStatus())
     * .set(Question::getResponse, questionResult.getResponse())
     * .set(Question::getExtracted_count, questionResult.getExtracted_count())
     * .set(Question::getError, questionResult.getError())
     * .set(Question::getTimestamp, LocalDateTime.parse(
     * questionResult.getTimestamp(),
     * DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS")
     * ));
     * questionService.update(updateWrapper);
     *
     * // 2.2 保存引用数据
     * List<Reference> references = questionResult.getReferences().stream()
     * .map(ref -> {
     * Reference reference = new Reference();
     * reference.setQuestion_id(question.getQuestion_id());
     * reference.setTitle(ref.getTitle());
     * reference.setUrl(ref.getUrl());
     * reference.setDomain(ref.getDomain());
     * reference.setCreate_time(new Date());
     * return reference;
     * })
     * .collect(Collectors.toList());
     *
     * if (!references.isEmpty()) {
     * referenceService.saveBatch(references);
     * }
     * } else {
     * System.out.println("未找到匹配的问题,question " + question.getQuestion());
     *
     * }
     * }
     * }
     * });
     * }
     */
//    private Mono<Void> updateQuestionAndReference(TaskResultResponse result) {
//        return Mono.fromRunnable(() -> {
//            // 1. 更新关键词状态
//            LambdaUpdateWrapper<Keyword> keywordUpdate = new LambdaUpdateWrapper<>();
//            keywordUpdate.eq(Keyword::getTask_id, result.getTask_id())
//                    .set(Keyword::getStatus, "completed");
//            keywordService.update(keywordUpdate);
//
//            // 查询关键词ID
//            LambdaQueryWrapper<Keyword> keywordQuery = new LambdaQueryWrapper<>();
//            keywordQuery.eq(Keyword::getTask_id, result.getTask_id());
//            Keyword keyword = keywordService.getOne(keywordQuery);
//
//            if (keyword == null) {
//                System.out.println("未找到关联的关键词,task_id: " + result.getTask_id());
//                return;
//            }
//
//            // 2. 处理每个用户的问题结果
//            for (UserResult userResult : result.getResults()) {
//                for (QuestionResult questionResult : userResult.getQuestions_results()) {
//                    // 2.1 查询问题ID
//                    LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>();
//                    queryWrapper.eq(Question::getQuestion, questionResult.getQuestion())
//                            .eq(Question::getKeyword_id, keyword.getKeyword_id());
//                    Question question = questionService.getOne(queryWrapper);
//
//                    if (question != null) {
//                        // 更新问题状态
//                        LambdaUpdateWrapper<Question> updateWrapper = new LambdaUpdateWrapper<>();
//                        updateWrapper.eq(Question::getQuestion_id, question.getQuestion_id())
//                                .set(Question::getStatus, questionResult.getStatus())
//                                .set(Question::getResponse, questionResult.getResponse())
//                                .set(Question::getExtracted_count, questionResult.getExtracted_count())
//                                .set(Question::getError, questionResult.getError())
//                                .set(Question::getTimestamp, LocalDateTime.parse(
//                                        questionResult.getTimestamp(),
//                                        DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS")
//                                ));
//                        questionService.update(updateWrapper);
//
//                        // 2.2 保存引用数据
//                        List<Reference> references = questionResult.getReferences().stream()
//                                .map(ref -> {
//                                    Reference reference = new Reference();
//                                    reference.setQuestion_id(question.getQuestion_id());
//                                    reference.setTitle(ref.getTitle());
//                                    reference.setUrl(ref.getUrl());
//                                    reference.setDomain(ref.getDomain());
//                                    reference.setCreate_time(LocalDateTime.now());
//                                    return reference;
//                                })
//                                .collect(Collectors.toList());
//
//                        if (!references.isEmpty()) {
//                            referenceService.saveBatch(references);
//                        }
//                    } else {
//                        System.out.println("未找到匹配的问题,question " + question.getQuestion());
//
//                    }
//                }
//            }
//        });
//    }
    private Mono<Void> updateQuestionAndReference(TaskResultResponse result) {
        return Mono.fromRunnable(() -> {
@@ -279,12 +294,11 @@
                Keyword keyword = keywordService.getOne(keywordQuery);
                if (keyword == null) {
                    // log.error("未找到关联的关键词,task_id: {}", result.getTask_id());
                    System.out.println("未找到关联的关键词,task_id: " + result.getTask_id());
                    return;
                }
                // 2. 批量查询所有问题(假设Question有task_id和keyword_id字段)
                // 2. 批量查询所有问题
                LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>();
                queryWrapper.eq(Question::getKeyword_id, keyword.getKeyword_id());
                List<Question> questions = questionService.list(queryWrapper);
@@ -297,7 +311,7 @@
                List<Question> questionsToUpdate = new ArrayList<>();
                List<Reference> allReferences = new ArrayList<>();
                // 遍历结果,只收集数据不执行数据库操作
                // 遍历结果
                for (UserResult userResult : result.getResults()) {
                    for (QuestionResult questionResult : userResult.getQuestions_results()) {
                        try {
@@ -316,55 +330,60 @@
                                    question.setTimestamp(
                                            LocalDateTime.parse(questionResult.getTimestamp(), formatter));
                                }
                                //更新
//                                questionService.updateById(question);
                                questionsToUpdate.add(question);
                                // 收集引用数据
                                List<Reference> references = questionResult.getReferences().stream()
                                        .map(ref -> {
                                            Reference reference = new Reference();
                                            reference.setQuestion_id(question.getQuestion_id());
                                            reference.setTitle(ref.getTitle());
                                            reference.setUrl(ref.getUrl());
                                            reference.setDomain(ref.getDomain());
                                            reference.setCreate_time(LocalDateTime.now());
                                            return reference;
                                        })
                                        .collect(Collectors.toList());
                                // 收集引用数据,处理空集合情况
                                List<Reference> references =
                                        Optional.ofNullable(questionResult.getReferences())
                                                .orElse(Collections.emptyList())
                                                .stream()
                                                .map(ref -> {
                                                    Reference reference = new Reference();
                                                    reference.setQuestion_id(question.getQuestion_id());
                                                    reference.setTitle(ref.getTitle());
                                                    reference.setUrl(ref.getUrl());
                                                    reference.setDomain(ref.getDomain());
                                                    reference.setCreate_time(LocalDateTime.now());
                                                    return reference;
                                                })
                                                .collect(Collectors.toList());
                                allReferences.addAll(references);
                            } else {
                                // log.warn("未找到匹配的问题,question: {}, keyword_id: {}",
                                // questionResult.getQuestion(), keyword.getKeyword_id());
                                // 添加到总引用列表
                                if (!references.isEmpty()) {
                                    allReferences.addAll(references);
                                }
                            }
                        } catch (Exception e) {
                            // log.error("处理问题结果失败,question: {}, error: {}",
                            // questionResult.getQuestion(), e.getMessage(), e);
                            System.out.println("处理问题结果失败: " + e.getMessage());
                        }
                    }
                }
                // 4. 批量更新问题
                System.out.println(questionsToUpdate);
                if (!questionsToUpdate.isEmpty()) {
                    questionService.updateBatchById(questionsToUpdate);
                    // log.info("成功批量更新 {} 个问题", questionsToUpdate.size());
                    System.out.println("成功批量更新 " + questionsToUpdate.size() + " 个问题");
                }
                // 5. 批量插入引用
                // 5. 批量插入引用,使用流式分批处理
                if (!allReferences.isEmpty()) {
                    // 分批处理,每批1000条记录,避免内存溢出
                    int batchSize = 1000;
                    for (int i = 0; i < allReferences.size(); i += batchSize) {
                        List<Reference> batch = allReferences.subList(
                                i, Math.min(i + batchSize, allReferences.size()));
                        referenceService.saveBatch(batch);
                    }
                    // log.info("成功批量插入 {} 条引用数据", allReferences.size());
                    IntStream.iterate(0, i -> i + batchSize)
                            .limit((allReferences.size() + batchSize - 1) / batchSize)
                            .forEach(i -> {
                                List<Reference> batch = allReferences.subList(
                                        i, Math.min(i + batchSize, allReferences.size()));
                                referenceService.saveBatch(batch);
                            });
                    System.out.println("成功批量插入 " + allReferences.size() + " 条引用数据");
                }
            } catch (Exception e) {
                // log.error("更新问题和引用数据失败,task_id: {}, error: {}",
                // result.getTask_id(), e.getMessage(), e);
                System.out.println("更新问题和引用数据失败: " + e.getMessage());
                throw new RuntimeException("更新问题和引用数据失败", e);
            }
        });
@@ -372,15 +391,19 @@
    @GetMapping("/tasks/all")
    @ApiOperation(value = "获取所有任务列表")
    public Mono<ResponseResult<TaskListResponse>> getAllTasks() {
    public Mono<TaskListResponse> getAllTasks() {
        return webClient.get()
                .uri(baseUrl + "/tasks")
                .uri(baseUrl + "/api/v1/tasks")
                .accept(MediaType.APPLICATION_JSON)
                .retrieve()
                .bodyToMono(new ParameterizedTypeReference<ResponseResult<TaskListResponse>>() {
                .bodyToMono(new ParameterizedTypeReference<TaskListResponse>() {
                })
                .onErrorResume(e -> {
                    return Mono.just(ResponseResult.error("获取任务列表失败: " + e.getMessage()));
                    TaskListResponse response = new TaskListResponse();
                    response.setDetail("获取任务列表失败: " + e.getMessage());
                    return Mono.just(response);
                    // return Mono.just(ResponseResult.error("获取任务列表失败: " + e.getMessage()));
                });
    }