mirror of
https://github.com/CodePhiliaX/Chat2DB.git
synced 2025-08-02 13:34:07 +08:00
ansj segment
This commit is contained in:
@ -82,6 +82,13 @@
|
||||
<groupId>ai.chat2db</groupId>
|
||||
<artifactId>chat2db-spi</artifactId>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.ansj</groupId>
|
||||
<artifactId>ansj_seg</artifactId>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
@ -1,5 +1,65 @@
|
||||
package ai.chat2db.server.web.api.controller.ai;
|
||||
|
||||
import ai.chat2db.server.domain.api.enums.AiSqlSourceEnum;
|
||||
import ai.chat2db.server.domain.api.model.Config;
|
||||
import ai.chat2db.server.domain.api.model.DataSource;
|
||||
import ai.chat2db.server.domain.api.param.ShowCreateTableParam;
|
||||
import ai.chat2db.server.domain.api.param.TableQueryParam;
|
||||
import ai.chat2db.server.domain.api.service.ConfigService;
|
||||
import ai.chat2db.server.domain.api.service.DataSourceService;
|
||||
import ai.chat2db.server.domain.api.service.TableService;
|
||||
import ai.chat2db.server.tools.base.enums.WhiteListTypeEnum;
|
||||
import ai.chat2db.server.tools.base.wrapper.result.DataResult;
|
||||
import ai.chat2db.server.tools.common.exception.ParamBusinessException;
|
||||
import ai.chat2db.server.tools.common.util.EasyEnumUtils;
|
||||
import ai.chat2db.server.web.api.aspect.ConnectionInfoAspect;
|
||||
import ai.chat2db.server.web.api.controller.ai.azure.client.AzureOpenAIClient;
|
||||
import ai.chat2db.server.web.api.controller.ai.azure.listener.AzureOpenAIEventSourceListener;
|
||||
import ai.chat2db.server.web.api.controller.ai.azure.model.AzureChatMessage;
|
||||
import ai.chat2db.server.web.api.controller.ai.azure.model.AzureChatRole;
|
||||
import ai.chat2db.server.web.api.controller.ai.chat2db.client.Chat2dbAIClient;
|
||||
import ai.chat2db.server.web.api.controller.ai.claude.client.ClaudeAIClient;
|
||||
import ai.chat2db.server.web.api.controller.ai.claude.listener.ClaudeAIEventSourceListener;
|
||||
import ai.chat2db.server.web.api.controller.ai.claude.model.ClaudeChatCompletionsOptions;
|
||||
import ai.chat2db.server.web.api.controller.ai.claude.model.ClaudeChatMessage;
|
||||
import ai.chat2db.server.web.api.controller.ai.config.LocalCache;
|
||||
import ai.chat2db.server.web.api.controller.ai.converter.ChatConverter;
|
||||
import ai.chat2db.server.web.api.controller.ai.enums.PromptType;
|
||||
import ai.chat2db.server.web.api.controller.ai.fastchat.client.FastChatAIClient;
|
||||
import ai.chat2db.server.web.api.controller.ai.fastchat.embeddings.FastChatEmbeddingResponse;
|
||||
import ai.chat2db.server.web.api.controller.ai.fastchat.listener.FastChatAIEventSourceListener;
|
||||
import ai.chat2db.server.web.api.controller.ai.fastchat.model.FastChatMessage;
|
||||
import ai.chat2db.server.web.api.controller.ai.fastchat.model.FastChatRole;
|
||||
import ai.chat2db.server.web.api.controller.ai.openai.client.OpenAIClient;
|
||||
import ai.chat2db.server.web.api.controller.ai.openai.listener.OpenAIEventSourceListener;
|
||||
import ai.chat2db.server.web.api.controller.ai.request.ChatQueryRequest;
|
||||
import ai.chat2db.server.web.api.controller.ai.request.ChatRequest;
|
||||
import ai.chat2db.server.web.api.controller.ai.rest.client.RestAIClient;
|
||||
import ai.chat2db.server.web.api.controller.ai.rest.listener.RestAIEventSourceListener;
|
||||
import ai.chat2db.server.web.api.http.GatewayClientService;
|
||||
import ai.chat2db.server.web.api.http.model.EsTableSchema;
|
||||
import ai.chat2db.server.web.api.http.model.TableSchema;
|
||||
import ai.chat2db.server.web.api.http.request.EsTableSchemaRequest;
|
||||
import ai.chat2db.server.web.api.http.request.TableSchemaRequest;
|
||||
import ai.chat2db.server.web.api.http.request.WhiteListRequest;
|
||||
import ai.chat2db.server.web.api.http.response.EsTableSchemaResponse;
|
||||
import ai.chat2db.server.web.api.http.response.TableSchemaResponse;
|
||||
import ai.chat2db.server.web.api.util.ApplicationContextUtil;
|
||||
import ai.chat2db.server.web.api.util.SegmentUtils;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.json.JSONUtil;
|
||||
import com.alibaba.fastjson2.JSON;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.unfbx.chatgpt.entity.chat.Message;
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.math.BigDecimal;
|
||||
import java.time.Duration;
|
||||
@ -10,76 +70,6 @@ import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import ai.chat2db.server.domain.api.enums.AiSqlSourceEnum;
|
||||
import ai.chat2db.server.domain.api.model.Config;
|
||||
import ai.chat2db.server.domain.api.model.DataSource;
|
||||
import ai.chat2db.server.domain.api.param.ShowCreateTableParam;
|
||||
import ai.chat2db.server.domain.api.param.TableQueryParam;
|
||||
import ai.chat2db.server.domain.api.service.ConfigService;
|
||||
import ai.chat2db.server.domain.api.service.DataSourceService;
|
||||
import ai.chat2db.server.domain.api.service.TableService;
|
||||
import ai.chat2db.server.tools.base.wrapper.result.DataResult;
|
||||
import ai.chat2db.server.tools.common.exception.ParamBusinessException;
|
||||
import ai.chat2db.server.tools.common.util.EasyEnumUtils;
|
||||
import ai.chat2db.server.web.api.aspect.ConnectionInfoAspect;
|
||||
import ai.chat2db.server.web.api.controller.ai.azure.client.AzureOpenAIClient;
|
||||
import ai.chat2db.server.web.api.controller.ai.azure.model.AzureChatMessage;
|
||||
import ai.chat2db.server.web.api.controller.ai.azure.model.AzureChatRole;
|
||||
import ai.chat2db.server.web.api.controller.ai.chat2db.client.Chat2dbAIClient;
|
||||
import ai.chat2db.server.web.api.controller.ai.claude.client.ClaudeAIClient;
|
||||
import ai.chat2db.server.web.api.controller.ai.claude.model.ClaudeChatCompletionsOptions;
|
||||
import ai.chat2db.server.web.api.controller.ai.claude.model.ClaudeChatMessage;
|
||||
import ai.chat2db.server.web.api.controller.ai.config.LocalCache;
|
||||
import ai.chat2db.server.web.api.controller.ai.converter.ChatConverter;
|
||||
import ai.chat2db.server.web.api.controller.ai.enums.PromptType;
|
||||
import ai.chat2db.server.web.api.controller.ai.azure.listener.AzureOpenAIEventSourceListener;
|
||||
import ai.chat2db.server.web.api.controller.ai.claude.listener.ClaudeAIEventSourceListener;
|
||||
import ai.chat2db.server.web.api.controller.ai.fastchat.client.FastChatAIClient;
|
||||
import ai.chat2db.server.web.api.controller.ai.fastchat.embeddings.FastChatEmbeddingResponse;
|
||||
import ai.chat2db.server.web.api.controller.ai.fastchat.listener.FastChatAIEventSourceListener;
|
||||
import ai.chat2db.server.web.api.controller.ai.fastchat.model.FastChatMessage;
|
||||
import ai.chat2db.server.web.api.controller.ai.fastchat.model.FastChatRole;
|
||||
import ai.chat2db.server.web.api.controller.ai.openai.listener.OpenAIEventSourceListener;
|
||||
import ai.chat2db.server.web.api.controller.ai.rest.listener.RestAIEventSourceListener;
|
||||
import ai.chat2db.server.web.api.controller.ai.request.ChatQueryRequest;
|
||||
import ai.chat2db.server.web.api.controller.ai.request.ChatRequest;
|
||||
import ai.chat2db.server.web.api.controller.ai.rest.client.RestAIClient;
|
||||
import ai.chat2db.server.web.api.http.GatewayClientService;
|
||||
import ai.chat2db.server.web.api.http.model.EsTableSchema;
|
||||
import ai.chat2db.server.web.api.http.model.TableSchema;
|
||||
import ai.chat2db.server.web.api.http.request.EsTableSchemaRequest;
|
||||
import ai.chat2db.server.web.api.http.request.TableSchemaRequest;
|
||||
import ai.chat2db.server.web.api.http.response.EsTableSchemaResponse;
|
||||
import ai.chat2db.server.web.api.http.response.TableSchemaResponse;
|
||||
import ai.chat2db.server.web.api.util.ApplicationContextUtil;
|
||||
import ai.chat2db.server.web.api.controller.ai.openai.client.OpenAIClient;
|
||||
import ai.chat2db.spi.model.TableColumn;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.json.JSONUtil;
|
||||
import com.alibaba.fastjson2.JSON;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.unfbx.chatgpt.OpenAiApi;
|
||||
import com.unfbx.chatgpt.entity.chat.Message;
|
||||
import com.unfbx.chatgpt.entity.embeddings.Embedding;
|
||||
import com.unfbx.chatgpt.entity.embeddings.EmbeddingResponse;
|
||||
import io.reactivex.Single;
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.Getter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.web.bind.annotation.CrossOrigin;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RequestHeader;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
|
||||
|
||||
/**
|
||||
* 描述:
|
||||
*
|
||||
@ -501,7 +491,13 @@ public class ChatController {
|
||||
TableQueryParam queryParam = chatConverter.chat2tableQuery(queryRequest);
|
||||
properties = buildTableColumn(queryParam, queryRequest.getTableNames());
|
||||
} else {
|
||||
properties = querySchemaByEs(queryRequest);
|
||||
String apiKey = getApiKey();
|
||||
if (StringUtils.isNotBlank(apiKey)) {
|
||||
boolean res = gatewayClientService.checkInWhite(new WhiteListRequest(apiKey, WhiteListTypeEnum.VECTOR.getCode())).getData();
|
||||
if (res) {
|
||||
properties = queryDatabaseSchema(queryRequest);
|
||||
}
|
||||
}
|
||||
}
|
||||
String prompt = queryRequest.getMessage();
|
||||
String promptType = StringUtils.isBlank(queryRequest.getPromptType()) ? PromptType.NL_2_SQL.getCode()
|
||||
@ -524,6 +520,26 @@ public class ChatController {
|
||||
return schemaProperty;
|
||||
}
|
||||
|
||||
/**
|
||||
* query chat2db apikey
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private String getApiKey() {
|
||||
ConfigService configService = ApplicationContextUtil.getBean(ConfigService.class);
|
||||
Config config = configService.find(RestAIClient.AI_SQL_SOURCE).getData();
|
||||
String aiSqlSource = AiSqlSourceEnum.CHAT2DBAI.getCode();
|
||||
// only sync for chat2db ai
|
||||
if (Objects.isNull(config) || !aiSqlSource.equals(config.getContent())) {
|
||||
return null;
|
||||
}
|
||||
Config keyConfig = configService.find(Chat2dbAIClient.CHAT2DB_OPENAI_KEY).getData();
|
||||
if (Objects.isNull(keyConfig) || StringUtils.isBlank(keyConfig.getContent())) {
|
||||
return null;
|
||||
}
|
||||
return keyConfig.getContent();
|
||||
}
|
||||
|
||||
/**
|
||||
* query database type
|
||||
*
|
||||
@ -550,7 +566,9 @@ public class ChatController {
|
||||
*/
|
||||
public String queryDatabaseSchema(ChatQueryRequest queryRequest) {
|
||||
// request embedding
|
||||
FastChatEmbeddingResponse response = distributeAIEmbedding(queryRequest.getMessage());
|
||||
String input = SegmentUtils.baseAnalysis(queryRequest.getMessage());
|
||||
log.info("search message:{}", input);
|
||||
FastChatEmbeddingResponse response = distributeAIEmbedding(input);
|
||||
List<List<BigDecimal>> contentVector = new ArrayList<>();
|
||||
contentVector.add(response.getData().get(0).getEmbedding());
|
||||
|
||||
@ -574,7 +592,9 @@ public class ChatController {
|
||||
schemas.add(data.getTableSchema());
|
||||
}
|
||||
}
|
||||
return JSON.toJSONString(schemas);
|
||||
String res = JSON.toJSONString(schemas);
|
||||
log.info("search vector result:{}", res);
|
||||
return res;
|
||||
} catch (Exception exception) {
|
||||
log.error("query table error, do nothing");
|
||||
return "";
|
||||
|
@ -77,7 +77,7 @@ public class RdbDdlController extends EmbeddingController {
|
||||
singleThreadExecutor.submit(() -> {
|
||||
try {
|
||||
Chat2DBContext.putContext(connectInfo);
|
||||
syncTableEs(request);
|
||||
syncTableVector(request);
|
||||
} catch (Exception e) {
|
||||
log.error("sync table vector error", e);
|
||||
} finally {
|
||||
|
@ -68,7 +68,7 @@ public class TableController extends EmbeddingController {
|
||||
singleThreadExecutor.submit(() -> {
|
||||
try {
|
||||
Chat2DBContext.putContext(connectInfo);
|
||||
syncTableEs(request);
|
||||
syncTableVector(request);
|
||||
} catch (Exception e) {
|
||||
log.error("sync table vector error", e);
|
||||
} finally {
|
||||
|
@ -0,0 +1,94 @@
|
||||
package ai.chat2db.server.web.api.util;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ansj.domain.Result;
|
||||
import org.ansj.domain.Term;
|
||||
import org.ansj.splitWord.analysis.BaseAnalysis;
|
||||
import org.ansj.splitWord.analysis.NlpAnalysis;
|
||||
import org.ansj.splitWord.analysis.ToAnalysis;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@Slf4j
|
||||
public class SegmentUtils {
|
||||
|
||||
/**
|
||||
* BaseAnalysis
|
||||
*
|
||||
* @param content
|
||||
*/
|
||||
public static String baseAnalysis(String content) {
|
||||
Result result = BaseAnalysis.parse(delHTMLTag(content).replace("\n", "").replace(" ", "").replace("\t", ""));
|
||||
log.info("base analysis result:" + result);
|
||||
return convertResToString(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* ToAnalysis
|
||||
*
|
||||
* @param content
|
||||
*/
|
||||
public static String toAnalysis(String content) {
|
||||
Result result = ToAnalysis.parse(content);
|
||||
log.info("to analysis result:" + result);
|
||||
return convertResToString(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* NlpAnalysis
|
||||
*
|
||||
* @param content
|
||||
*/
|
||||
public static String nlpAnalysis(String content) {
|
||||
Result result = NlpAnalysis.parse(delHTMLTag(content).replace("\n", "").replace(" ", "").replace("\t", ""));
|
||||
log.info("nlp analysis result:" + result);
|
||||
return convertResToString(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* convert result to string
|
||||
*
|
||||
* @param result
|
||||
* @return
|
||||
*/
|
||||
private static String convertResToString(Result result) {
|
||||
List<Term> terms = result.getTerms();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (Term term : terms) {
|
||||
String name = term.getName();
|
||||
String nature = term.getNatureStr();
|
||||
if (nature.equals("nt") || nature.equals("nr") || nature.equals("n")) {
|
||||
sb.append(name).append(" ");
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* delete html tag
|
||||
*
|
||||
* @param htmlStr
|
||||
* @return
|
||||
*/
|
||||
public static String delHTMLTag(String htmlStr) {
|
||||
String regEx_script = "<script[^>]*?>[\\s\\S]*?<\\/script>";
|
||||
String regEx_style = "<style[^>]*?>[\\s\\S]*?<\\/style>";
|
||||
String regEx_html = "<[^>]+>";
|
||||
|
||||
Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
|
||||
Matcher m_script = p_script.matcher(htmlStr);
|
||||
htmlStr = m_script.replaceAll("");
|
||||
|
||||
Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
|
||||
Matcher m_style = p_style.matcher(htmlStr);
|
||||
htmlStr = m_style.replaceAll("");
|
||||
|
||||
Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
|
||||
Matcher m_html = p_html.matcher(htmlStr);
|
||||
htmlStr = m_html.replaceAll("");
|
||||
|
||||
return htmlStr.trim();
|
||||
}
|
||||
}
|
@ -289,6 +289,13 @@
|
||||
<artifactId>pdfbox</artifactId>
|
||||
<version>2.0.24</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.ansj</groupId>
|
||||
<artifactId>ansj_seg</artifactId>
|
||||
<version>5.1.1</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
|
11
library/ambiguity.dic
Normal file
11
library/ambiguity.dic
Normal file
@ -0,0 +1,11 @@
|
||||
习近平 nr
|
||||
李民 nr 工作 vn
|
||||
三个 m 和尚 n
|
||||
的确 d 定 v 不 v
|
||||
大 a 和尚 n
|
||||
张三 nr 和 c
|
||||
动漫 n 游戏 n
|
||||
邓颖超 nr 生前 t
|
||||
|
||||
|
||||
|
386260
library/default.dic
Normal file
386260
library/default.dic
Normal file
File diff suppressed because it is too large
Load Diff
0
library/regex.dic
Normal file
0
library/regex.dic
Normal file
9
library/stop.dic
Normal file
9
library/stop.dic
Normal file
@ -0,0 +1,9 @@
|
||||
?
|
||||
:
|
||||
.
|
||||
,
|
||||
is
|
||||
a
|
||||
#
|
||||
v nature
|
||||
.*了 regex
|
9996
library/synonyms.dic
Normal file
9996
library/synonyms.dic
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user