commit 56eb2bce33d7c3ce3c705c78330d919a5cf15eed Author: wizardchen Date: Tue Aug 5 15:08:07 2025 +0800 init commit diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e69de29 diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..d498e7c --- /dev/null +++ b/.env.example @@ -0,0 +1,172 @@ +# 使用说明 +# 1. 复制此文件为 .env +# 2. 替换所有占位符为实际值 +# 3. 确保 .env 文件不会被提交到版本控制系统 + +# gin mod +# 可选值: debug(开发模式,有详细日志), release(生产模式) +GIN_MODE=debug + +# ollama +# Ollama 服务的基准 URL,用于连接本地运行的 Ollama LLM 服务 +OLLAMA_BASE_URL=http://host.docker.internal:11435 + +# 存储配置 +# 主数据库类型(postgres/mysql) +DB_DRIVER=postgres + +# 向量存储类型(postgres/elasticsearch_v7/elasticsearch_v8) +RETRIEVE_DRIVER=postgres + +# 文件存储类型(local/minio/cos) +STORAGE_TYPE=local + +# 流处理后端(memory/redis) +STREAM_MANAGER_TYPE=redis + +# 主数据库配置 +# 数据库地址,可以是主机名或IP地址 +DB_HOST=postgres + +# 数据库端口,默认为5432 +DB_PORT=5432 + +# 数据库用户名 +DB_USER=postgres + +# 数据库密码 +DB_PASSWORD=postgres123!@# + +# 数据库名称 +DB_NAME=WeKnora + +# 如果使用 redis 作为流处理后端,需要配置以下参数 +# Redis地址,可以是主机名或IP地址 +REDIS_HOST=redis + +# Redis端口,默认为6379 +REDIS_PORT=6379 + +# Redis密码,如果没有设置密码,可以留空 +REDIS_PASSWORD=redis123!@# + +# Redis数据库索引,默认为0 +REDIS_DB=0 + +# Redis key的前缀,用于命名空间隔离 +REDIS_PREFIX=stream: + +# 当使用本地存储时,文件保存的基础目录路径 +LOCAL_STORAGE_BASE_DIR=./data/files + +TENANT_AES_KEY=weknorarag-api-key-secret-secret + +# 是否开启知识图谱构建和检索(构建阶段需调用大模型,耗时较长) +ENABLE_GRAPH_RAG=false + +# 如果使用ElasticSearch作为向量存储,需要配置以下参数 +# ElasticSearch地址,例如 http://localhost:9200 +# ELASTICSEARCH_ADDR=your_elasticsearch_addr + +# ElasticSearch用户名,如果需要身份验证 +# ELASTICSEARCH_USERNAME=your_elasticsearch_username + +# ElasticSearch密码,如果需要身份验证 +# ELASTICSEARCH_PASSWORD=your_elasticsearch_password + +# ElasticSearch索引名称,用于存储向量数据 +# ELASTICSEARCH_INDEX=WeKnora + +# 如果使用MinIO作为文件存储,需要配置以下参数 +# MinIO服务器地址,例如 minio:9000 +# MINIO_ENDPOINT=minio:9000 + +# MinIO访问密钥 +# MINIO_ACCESS_KEY_ID=your_minio_access_key + +# MinIO密钥 +# MINIO_SECRET_ACCESS_KEY=your_minio_secret_key + +# MinIO桶名称,用于存储文件 +# MINIO_BUCKET_NAME=your_minio_bucket_name + +# 如果使用腾讯云COS作为文件存储,需要配置以下参数 +# 腾讯云COS的访问密钥ID +# COS_SECRET_ID=your_cos_secret_id + +# 腾讯云COS的密钥 +# COS_SECRET_KEY=your_cos_secret_key + +# 腾讯云COS的区域,例如 ap-guangzhou +# COS_REGION=your_cos_region + +# 腾讯云COS的桶名称 +# COS_BUCKET_NAME=your_cos_bucket_name + +# 腾讯云COS的应用ID +# COS_APP_ID=your_cos_app_id + +# 腾讯云COS的路径前缀,用于存储文件 +# COS_PATH_PREFIX=your_cos_path_prefix + +# COS_ENABLE_OLD_DOMAIN=true 表示启用旧的域名格式,默认为 true +COS_ENABLE_OLD_DOMAIN=true + +# 初始化默认租户与知识库 +# 租户ID,通常是一个字符串 +INIT_TEST_TENANT_ID=1 + +# 知识库ID,通常是一个字符串 +INIT_TEST_KNOWLEDGE_BASE_ID=kb-00000001 + +# LLM Model +# 使用的LLM模型名称 +# 默认使用 Ollama 的 Qwen3 8B 模型,ollama 会自动处理模型下载和加载 +# 如果需要使用其他模型,请替换为实际的模型名称 +INIT_LLM_MODEL_NAME=qwen3:8b + +# LLM模型的访问地址 +# 支持第三方模型服务的URL +# 如果使用 Ollama 的本地服务,可以留空,ollama 会自动处理 +# INIT_LLM_MODEL_BASE_URL=your_llm_model_base_url + +# LLM模型的API密钥,如果需要身份验证,可以设置 +# 支持第三方模型服务的API密钥 +# 如果使用 Ollama 的本地服务,可以留空,ollama 会自动处理 +# INIT_LLM_MODEL_API_KEY=your_llm_model_api_key + +# Embedding Model +# 使用的Embedding模型名称 +# 默认使用 nomic-embed-text 模型,支持文本嵌入 +# 如果需要使用其他模型,请替换为实际的模型名称 +INIT_EMBEDDING_MODEL_NAME=nomic-embed-text + +# Embedding模型向量维度 +INIT_EMBEDDING_MODEL_DIMENSION=768 + +# Embedding模型的ID,通常是一个字符串 +INIT_EMBEDDING_MODEL_ID=builtin:nomic-embed-text:768 + +# Embedding模型的访问地址 +# 支持第三方模型服务的URL +# 如果使用 Ollama 的本地服务,可以留空,ollama 会自动处理 +# INIT_EMBEDDING_MODEL_BASE_URL=your_embedding_model_base_url + +# Embedding模型的API密钥,如果需要身份验证,可以设置 +# 支持第三方模型服务的API密钥 +# 如果使用 Ollama 的本地服务,可以留空,ollama 会自动处理 +# INIT_EMBEDDING_MODEL_API_KEY=your_embedding_model_api_key + +# Rerank Model(可选) +# 对于rag来说,使用Rerank模型对提升文档搜索的准确度有着重要作用 +# 目前 ollama 暂不支持运行 Rerank 模型 +# 使用的Rerank模型名称 +# INIT_RERANK_MODEL_NAME=your_rerank_model_name + +# Rerank模型的访问地址 +# 支持第三方模型服务的URL +# INIT_RERANK_MODEL_BASE_URL=your_rerank_model_base_url + +# Rerank模型的API密钥,如果需要身份验证,可以设置 +# 支持第三方模型服务的API密钥 +# INIT_RERANK_MODEL_API_KEY=your_rerank_model_api_key diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3aafc97 --- /dev/null +++ b/.gitignore @@ -0,0 +1,34 @@ +# 忽略.env文件和其他包含敏感信息的配置文件 +.env +# 但不忽略示例文件 +!.env.example +*.pem +*_key +*_secret +*.key +*.crt + +# IDE和编辑器文件 +.idea/ +.vscode/ +*.swp +*.swo + +# 构建和依赖文件 +node_modules/ +/dist/ +/build/ +*.log + +# 临时文件 +tmp/ +temp/ + +WeKnora +/models/ +services/docreader/src/proto/__pycache__ +test/data/mswag.txt +data/files/ + +.python-version +.venv/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..75b8af5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2692 @@ +Tencent is pleased to support the open source community by making this project available. + +Copyright (C) 2025 Tencent. All rights reserved. + +This project is licensed under the MIT License except for the third-party components listed below, which is licensed under different terms. Tencent does not impose any additional limitations beyond what is outlined in the respective licenses of these third-party components. Users must comply with all terms and conditions of original licenses of these third-party components and must ensure that the usage of the third party components adheres to all relevant laws and regulations. + + +Terms of the MIT License: +-------------------------------------------------------------------- +Copyright (C) 2025 Tencent. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the " Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------- +Other third-party components: + +Open Source Software Licensed under the Apache-2.0: +-------------------------------------------------------------------- +1. otel-1.37.0 +Copyright (c) 2025 opentelemetry-go original author and authors + +2. stdr-1.2.2 +Copyright (c) 2021 stdr original author and authors + +3. parquet-go-0.25.0 +Copyright 2023 Twilio, Inc. + +4. rpc-0.0.0-20250603155806-513f23925822 +Copyright (c) 2025 go-genproto original author and authors + +5. api-0.0.0-20250603155806-513f23925822 +Copyright (c) 2025 go-genproto original author and authors + +6. grpc-1.73.0 +Copyright (c) 2025 grpc-go original author and authors + +7. base64x-0.1.5 +Copyright (c) 2025 base64x original author and authors + +8. trace-1.37.0 +Copyright (c) 2025 opentelemetry-go original author and authors + +9. loader-0.2.4 +Copyright 2025 ByteDance Inc. + +10. paddlepaddle-gpu-3.0.0 +Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved + +11. otlp-1.7.0 +Copyright (c) 2025 opentelemetry-proto original author and authors + +12. logr-1.4.3 +Copyright (c) 2025 logr original author and authors + +13. otlptrace-1.37.0 +Copyright (c) 2025 opentelemetry-go original author and authors + +14. metric-1.37.0 +Copyright (c) 2025 opentelemetry-go original author and authors + +15. sonic-1.13.2 +Copyright (c) 2025 sonic original author and authors + +16. go-openai-1.40.5 +Copyright (c) 2025 go-openai original author and authors + +17. crc64nvme-1.0.1 +Copyright (c) 2025 crc64nvme original author and authors + +18. concurrent-0.0.0-20180306012644-bacd9c7ef1dd +Copyright (c) 2018 concurrent original author and authors + +19. stdouttrace-1.35.0 +Copyright (c) 2025 opentelemetry-go original author and authors + +20. otlptracegrpc-1.37.0 +Copyright (c) 2025 opentelemetry-go original author and authors + +21. paddleocr-3.0.0 +Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + + +Terms of the Apache-2.0: +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Open Source Software Licensed under the BSD-2-Clause: +-------------------------------------------------------------------- +1. esrecurse-4.3.0 +Copyright (C) 2014 [Yusuke Suzuki](https://github.com/Constellation) (twitter: [@Constellation](https://twitter.com/Constellation)) and other contributors. + +2. glob-to-regexp-0.4.1 +Copyright (c) 2013, Nick Fitzgerald + +3. eslint-scope-5.1.1 +Copyright(c) 2025 eslint-scope original author and authors + + + +Terms of the BSD-2-Clause: +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Open Source Software Licensed under the BSD-3-Clause: +-------------------------------------------------------------------- +1. text-0.26.0 +Copyright 2009 The Go Authors. + +2. compress-1.18.0 +Copyright (c) 2012 The Go Authors. All rights reserved. +Copyright (c) 2019 Klaus Post. All rights reserved. +Copyright 2016-2017 The New York Times Company +Copyright (c) 2015 Klaus Post +Copyright 2016 The filepath Authors + +3. net-0.41.0 +Copyright 2009 The Go Authors. + +4. fsnotify-1.8.0 +Copyright © 2012 The Go Authors. All rights reserved. +Copyright © fsnotify Authors. All rights reserved. + +5. sys-0.33.0 +Copyright 2009 The Go Authors. + +6. protobuf-1.36.6 +Copyright (c) 2018 The Go Authors. All rights reserved. + +7. arch-0.15.0 +Copyright 2015 The Go Authors. + + + +Terms of the BSD-3-Clause: +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +Neither the name of the ORGANIZATION nor the names of its contributors may be +used to endorse or promote products derived from this software without specific +prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Open Source Software Licensed under the CC-BY-4.0: +-------------------------------------------------------------------- +1. caniuse-lite-1.0.30001727 +Copyright(c) 2025 Ben Briggs + + + +Terms of the CC-BY-4.0: +Attribution 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution 4.0 International Public License ("Public License"). To the +extent this Public License may be interpreted as a contract, You are +granted the Licensed Rights in consideration of Your acceptance of +these terms and conditions, and the Licensor grants You such rights in +consideration of benefits the Licensor receives from making the +Licensed Material available under these terms and conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + d. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + e. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + f. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + g. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + h. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + i. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + j. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + k. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + 4. If You Share Adapted Material You produce, the Adapter's + License You apply must not prevent recipients of the Adapted + Material from complying with this Public License. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material; and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the “Licensor.” The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. + + +Open Source Software Licensed under the ISC: +-------------------------------------------------------------------- +1. electron-to-chromium-1.5.183 +Copyright 2018 Kilian Valkhof + + + +Terms of the ISC: +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. + + +Open Source Software Licensed under the MIT: +-------------------------------------------------------------------- +1. birpc-2.5.0 +Copyright (c) 2021 Anthony Fu + +2. @babel/generator-7.28.0 +Copyright (c) 2014-present Sebastian Mckenzie and other contributors + +3. neo-async-2.6.2 +Copyright (c) 2014-2018 Suguru Motegi + +4. parse-node-version-1.0.1 +Copyright (c) 2018 Blaine Bublitz and Eric Schoffstall + +5. cors-1.7.5 +Copyright (c) 2016 Gin-Gonic + +6. vue-3.5.17 +Copyright (c) 2018-present, Yuxi (Evan) You and Vue contributors + +7. copy-anything-2.0.6 +Copyright (c) 2018 Luca Ban - Mesqueeb + +8. yaml.v3-3.0.1 +Copyright (c) 2006-2010 Kirill Simonov +Copyright (c) 2006-2011 Kirill Simonov +Copyright (c) 2011-2019 Canonical Ltd + +9. gojieba-1.4.5 +Copyright (c) The project creators and maintainers + +10. codec-1.2.12 +Copyright (c) 2012-2020 Ugorji Nwoke. + +11. vue-tsc-2.2.12 +Copyright(c) 2025 vue-tsc original author and authors + +12. follow-redirects-1.15.9 +Copyright 2017-present Oliviver Lalonde , James Talmage , Ruben Verborgh + +13. @vue/server-renderer-3.5.17 +Copyright (c) 2018-present, Yuxi (Evan) You and Vue contributors + +14. pgvector-go-0.3.0 +Copyright (c) 2021-2025 Andrew Kane + +15. @babel/traverse-7.28.0 +Copyright (c) 2014-present Sebastian McKenzie and other contributors + +16. @webassemblyjs/helper-api-error-1.13.2 +Copyright (c) 2018 Sven Sauleau + +17. estree-walker-2.0.2 +Copyright (c) 2015-20 [these people](https://github.com/Rich-Harris/estree-walker/graphs/contributors) + +18. @types/node-22.16.3 +Copyright(c) 2025 @types/node original author and authors + +19. magic-string-0.30.17 +Copyright 2018 Rich Harris + +20. pinia-3.0.3 +Copyright (c) 2019-present Eduardo San Martin Morote + +21. @types/lodash-4.17.20 +Copyright(c) 2025 @types/lodash original author and authors + +22. es-set-tostringtag-2.1.0 +Copyright (c) 2022 ECMAScript Shims + +23. @vue/runtime-dom-3.5.17 +Copyright (c) 2018-present, Yuxi (Evan) You and Vue contributors + +24. @vue/reactivity-3.5.17 +Copyright (c) 2018-present, Yuxi (Evan) You and Vue contributors + +25. de-indent-1.0.2 +Copyright(c) 2025 Evan You + +26. @babel/template-7.27.2 +Copyright (c) 2014-present Sebastian McKenzie and other contributors + +27. tdesign-icons-vue-next-0.3.6 +Copyright (c) 2021-present TDesign + +28. webpack-5.100.1 +Copyright JS Foundation and other contributors + +29. asynq-0.25.1 +Copyright (c) 2019 Kentaro Hibino + +30. shell-quote-1.8.3 +Copyright (c) 2013 James Halliday (mail@substack.net) + +31. acorn-8.15.0 +Copyright(c) 2025 acorn original author and authors + +32. esbuild-0.25.6 +Copyright (c) 2020 Evan Wallace + +33. tdesign-vue-next-1.14.2 +Copyright (c) 2021-present TDesign + +34. @vue/runtime-core-3.5.17 +Copyright (c) 2018-present, Yuxi (Evan) You and Vue contributors + +35. validator-13.15.15 +Q29weXJpZ2h0IChjKSAyMDE4IENocmlzIE8nSGFyYSA8Y29oYXJhODdAZ21haWwuY29tPg== + +36. postcss-8.5.6 +Copyright 2013 Andrey Sitnik + +37. mimetype-1.4.8 +Copyright (c) 2018 Gabriel Vasile + +38. @babel/helper-globals-7.28.0 +Copyright (c) 2014-present Sebastian McKenzie and other contributors + +39. @babel/helper-create-class-features-plugin-7.27.1 +Copyright (c) 2014-present Sebastian McKenzieg and other contributors + +40. @babel/helper-replace-supers-7.27.1 +Copyright (c) 2014-present Sebastian McKenzie and other contributors + +41. is-what-3.14.1 +Copyright (c) 2018 Luca Ban - Mesqueeb + +42. @rolldown/pluginutils-1.0.0-beta.27 +Copyright (c) 2024-present VoidZero Inc. & Contributors + +43. ollama-0.9.6 +Copyright (c) Ollama + +44. locafero-0.7.0 +Copyright (c) 2023 Márk Sági-Kazar + +45. stylus->=0.54.8 +Copyright (c) Automatttic + +46. @vue/language-core-2.2.12 +Copyright (c) 2021-present Johnson Chu + +47. @babel/parser-7.28.0 +Copyright (c) 2014-present Sebastian MacKenzie and other contributors + +48. cos-go-sdk-v5-0.7.65 +Copyright (c) 2017 mozillazg + +49. @rolldown/pluginutils-1.0.0-beta.19 +Copyright (c) 2024-present VoidZero Inc. & Contributors + +50. @vue/compiler-core-3.5.17 +Copyright (c) 2018-present, Yuxi (Evan) You and Vue contributors + +51. @vue/shared-3.5.17 +Copyright (c) 2018-present, Yuxi (Evan) You and Vue contributors + +52. @vitejs/plugin-vue-jsx-5.0.1 +Copyright (c) 2019-present, Yuxi +(Evan) You and Vite contributors + +53. fdir-6.4.6 +Copyright 2023 AbdullAh AttA + +54. go-rendezvous-0.0.0-20200823014737-9f7001d12a5f +Copyright (c) 2017-2020 Damian Gryski + +55. @babel/plugin-syntax-jsx-7.27.1 +Copyright (c) 2014-present Sebastian McKenzie and other contributors + +56. @vue/compiler-ssr-3.5.17 +Copyright (c) 2018-present, Yuxi (Evan) You and Vue contributors + +57. @babel/helper-member-expression-to-functions-7.27.1 +Copyright (c) 2014-present Sebastian McKenzie and other contributors + +58. es-module-lexer-1.7.0 +Copyright (C) 2018-2022 Guy Bedford + +59. vue-router-4.5.1 +Copyright (c) 2019-present Eduardo San Martin Morote + +60. @babel/helper-validator-option-7.27.1 +Copyright (c) 2014-present Sebastian McKenzie and other contributors + +61. @vue/compiler-sfc-3.5.17 +Copyright (c) 2018-present, Yuxi (Evan) You and Vue contributors + +62. @vue/devtools-shared-7.7.7 +Copyright (c) 2023 webfansplz + +63. gensync-1.0.0-beta.2 +Copyright 2018 Logan Smyth + +64. @rspack/core-0.x || 1.x +Copyright (c) 2022-present Bytsedance Inc. and its affiliates. + +65. jiti->=1.21.0 +Copyright (c) Pooya Parsa + +66. pgservicefile-0.0.0-20240606120523-5a60cdf6a761 +Copyright (c) 2020 Jack Christensens + +67. @types/validator-13.15.2 +Copyright(c) 2025 @types/validator original author and authors + +68. @vue/devtools-api-7.7.7 +Copyright (c) 2023 webfansplz + +69. tapable-2.2.2 +Copyright JS Foundation and +other contributors + +70. @vitejs/plugin-vue-6.0.0 +Copyright (c) 2019-present, Yuxhi (Evan) You and Vite contributors + +71. @babel/core-7.28.0 +Copyright (c) 2014-present Sebastian McInerney and other contributors + +72. @babel/types-7.28.1 +Copyright (c) 2014-present Sebastian McPkenzie and other contributors + +73. @babel/compat-data-7.28.0 +Copyright (c) 2014-present Sebastian MacKenzie and other contributors + +74. @volar/typescript-2.4.15 +Copyright (c) 2021-present Johnson Chu + +75. @babel/helper-annotate-as-pure-7.27.3 +Copyright (c) 2014-present Sebastian McKenzie and other contributors + +76. vite-7.0.4 +Copyright (c) 2019-present, VoidZero Inc. and Vite contributors + +77. @volar/source-map-2.4.15 +Copyright (c) 2021-present Johnson Chu + +78. json-parse-even-better-errors-2.3.1 +Copyright 2017 Kat Marchán +Copyright npm, Inc. + +79. @vue/compiler-dom-3.5.17 +Copyright (c) 2018-present, Yuxia (Evan) You and Vue contributors + +80. @volar/language-core-2.4.15 +Copyright (c) 2021-present Johnson Chu + +81. @vue/devtools-kit-7.7.7 +Copyright (c) 2023 webfansplz + +82. @babel/helper-skip-transparent-expression-wrappers-7.27.1 +Copyright (c) 2014-present Sebastian MacKenzie and other contributors + +83. mitt-3.0.1 +Copyright (c) 2021 Jason Miller + +84. acorn-import-phases-1.0.4 +Copyright (c) 2025 Nicolò Ribaudo + +85. @babel/helper-string-parser-7.27.1 +Copyright (c) 2014-present Sebastian McKeenzied and other contributors + +86. watchpack-2.4.4 +Copyright JS Foundation and other contributors + +87. @babel/runtime-7.27.6 +Copyright (c) 2014-present Sebastian McKenzie and other contributors + +88. @babel/plugin-transform-typescript-7.28.0 +Copyright (c) 2014-present Sebastian McKenzie and other contributors + +89. jest-worker-27.5.1 +Copyright (c) jest-worker authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/jest-worker. + + + +Terms of the MIT: +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Open Source Software Licensed under the MIT AND ISC AND 0BSD: +-------------------------------------------------------------------- +1. rollup-4.45.0 +Copyright(c) 2025 Rich Harris + + + +Terms of the MIT AND ISC AND 0BSD: +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Open Source Software Licensed under the MIT-CMU: +-------------------------------------------------------------------- +1. Pillow-default +Copyright © 1997-2011 by Secret Labs AB +Copyright © 1995-2011 by Fredrik Lundh and contributors +Copyright © 2010 by Jeffrey A. Clark and contributors + + + +Terms of the MIT-CMU: +Permission to use, copy, modify and distribute this software and its +documentation for any purpose and without fee is hereby granted, provided that +the above copyright notice appears in all copies and that both that copyright +notice and this permission notice appear in supporting documentation, and that +the name of CMU and The Regents of the University of California not be used in +advertising or publicity pertaining to distribution of the software without +specific written permission. + +CMU AND THE REGENTS OF THE UNIVERSITY OF CALIFORNIA DISCLAIM ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL CMU OR THE REGENTS OF THE UNIVERSITY OF CALIFORNIA BE +LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM THE LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +Open Source Software Licensed under the MPL-2.0: +-------------------------------------------------------------------- +1. lightningcss-1.21.0 +Copyright(c) 2025 lightningcss original author and authors + + + +Terms of the MPL-2.0: +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. + +Open Source Software Licensed under the apache-2.0: +-------------------------------------------------------------------- +1. @ampproject/remapping-2.3.0 +Copyright (c) @ampproject/remapping authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@ampproject/remapping. + +2. sync-0.15.0 +Copyright (c) sync authors. +You may obtain the source code and detailed information about this component at https://mvnrepository.com/artifact/com.aerospike/sync. + +3. @webassemblyjs/leb128-1.13.2 +Copyright (c) @webassemblyjs/leb128 authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/leb128. + +4. sdk-1.37.0 +Copyright (c) sdk authors. +You may obtain the source code and detailed information about this component at https://mvnrepository.com/artifact/io.lakefs/sdk. + +5. crypto-0.39.0 +Copyright (c) crypto authors. +You may obtain the source code and detailed information about this component at https://github.com/smallstep/crypto. + +6. grpcio-default +Copyright (c) grpcio authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/grpcio. + +7. sdk-1.1.0 +Copyright (c) sdk authors. +You may obtain the source code and detailed information about this component at https://mvnrepository.com/artifact/com.jedlix/sdk. + +8. afero-1.12.0 +Copyright (c) afero authors. +You may obtain the source code and detailed information about this component at https://github.com/spf13/afero. + +9. @xtuc/long-4.2.2 +Copyright (c) @xtuc/long authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@xtuc/long. + +10. chrome-trace-event-1.0.4 +Copyright (c) chrome-trace-event authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/chrome-trace-event. + +11. reflect2-1.0.2 +Copyright (c) reflect2 authors. +You may obtain the source code and detailed information about this component at https://github.com/modern-go/reflect2. + +12. grpcio-tools-default +Copyright (c) grpcio-tools authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/grpcio-tools. + +13. playwright-default +Copyright (c) playwright authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/playwright. + +14. requests-default +Copyright (c) requests authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/requests. + +15. ini-1.67.0 +Copyright (c) ini authors. +You may obtain the source code and detailed information about this component at https://github.com/go-ini/ini. + +16. asyncio-default +Copyright (c) asyncio authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/asyncio. + +17. time-0.11.0 +Copyright (c) time authors. +You may obtain the source code and detailed information about this component at https://mvnrepository.com/artifact/org.occurrent/time. + +18. less-4.3.0 +Copyright (c) less authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/less. + +19. typescript-5.8.3 +Copyright (c) typescript authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/typescript. + + + +Terms of the apache-2.0: +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Open Source Software Licensed under the bsd-new: +-------------------------------------------------------------------- +1. lxml-default +Copyright (c) lxml authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/lxml. + +2. pypdf-default +Copyright (c) pypdf authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/pypdf. + +3. go-querystring-1.1.0 +Copyright (c) go-querystring authors. +You may obtain the source code and detailed information about this component at https://github.com/google/go-querystring. + +4. speakingurl-14.0.1 +Copyright (c) speakingurl authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/speakingurl. + +5. markdown-default +Copyright (c) markdown authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/markdown. + +6. source-map-0.6.1 +Copyright (c) source-map authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/source-map. + +7. serialize-javascript-6.0.2 +Copyright (c) serialize-javascript authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/serialize-javascript. + +8. golang-asm-0.15.1 +Copyright (c) golang-asm authors. +You may obtain the source code and detailed information about this component at https://github.com/twitchyliquid64/golang-asm. + +9. source-map-js-1.2.1 +Copyright (c) source-map-js authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/source-map-js. + +10. PyPDF2-default +Copyright (c) PyPDF2 authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/pypdf2. + +11. fast-uri-3.0.6 +Copyright (c) fast-uri authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/fast-uri. + +12. mxj-1.8.4 +Copyright (c) mxj authors. +You may obtain the source code and detailed information about this component at https://github.com/clbanning/mxj. + +13. protobuf-default +Copyright (c) protobuf authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/protobuf. + +14. @xtuc/ieee754-1.2.0 +Copyright (c) @xtuc/ieee754 authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@xtuc/ieee754. + + + +Terms of the bsd-new: +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +Neither the name of the ORGANIZATION nor the names of its contributors may be +used to endorse or promote products derived from this software without specific +prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Open Source Software Licensed under the bsd-simplified: +-------------------------------------------------------------------- +1. terser-5.43.1 +Copyright (c) terser authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/terser. + +2. estraverse-4.3.0 +Copyright (c) estraverse authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/estraverse. + +3. entities-4.5.0 +Copyright (c) entities authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/entities. + + + +Terms of the bsd-simplified: +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Open Source Software Licensed under the bsd-zero: +-------------------------------------------------------------------- +1. tslib-2.8.1 +Copyright (c) tslib authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/tslib. + + + +Terms of the bsd-zero: +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +Open Source Software Licensed under the isc: +-------------------------------------------------------------------- +1. graceful-fs-4.2.11 +Copyright (c) graceful-fs authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/graceful-fs. + +2. which-5.0.0 +Copyright (c) which authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/which. + +3. isexe-3.1.1 +Copyright (c) isexe authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/isexe. + +4. yaml-2.4.2 +Copyright (c) yaml authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/yaml. + +5. lru-cache-5.1.1 +Copyright (c) lru-cache authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/lru-cache. + +6. read-package-json-fast-4.0.0 +Copyright (c) read-package-json-fast authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/read-package-json-fast. + +7. minimatch-9.0.5 +Copyright (c) minimatch authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/minimatch. + +8. semver-6.3.1 +Copyright (c) semver authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/semver. + +9. npm-normalize-package-bin-4.0.0 +Copyright (c) npm-normalize-package-bin authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/npm-normalize-package-bin. + +10. picocolors-1.1.1 +Copyright (c) picocolors authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/picocolors. + + + +Terms of the isc: +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. + + +Open Source Software Licensed under the mit: +-------------------------------------------------------------------- +1. @tsconfig/node22-22.0.2 +Copyright (c) @tsconfig/node22 authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@tsconfig/node22. + +2. tinyglobby-0.2.14 +Copyright (c) tinyglobby authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/tinyglobby. + +3. jsesc-3.1.0 +Copyright (c) jsesc authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/jsesc. + +4. es-errors-1.3.0 +Copyright (c) es-errors authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/es-errors. + +5. brotli-1.1.0 +Copyright (c) brotli authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/brotli. + +6. undici-types-6.21.0 +Copyright (c) undici-types authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/undici-types. + +7. dayjs-1.11.10 +Copyright (c) dayjs authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/dayjs. + +8. @jridgewell/gen-mapping-0.3.12 +Copyright (c) @jridgewell/gen-mapping authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@jridgewell/gen-mapping. + +9. events-3.3.0 +Copyright (c) events authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/events. + +10. @types/sortablejs-1.15.8 +Copyright (c) @types/sortablejs authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@types/sortablejs. + +11. js-tokens-4.0.0 +Copyright (c) js-tokens authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/js-tokens. + +12. @vue/babel-helper-vue-transform-on-1.4.0 +Copyright (c) @vue/babel-helper-vue-transform-on authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@vue/babel-helper-vue-transform-on. + +13. @types/marked-5.0.2 +Copyright (c) @types/marked authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@types/marked. + +14. @types/tinycolor2-1.4.6 +Copyright (c) @types/tinycolor2 authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@types/tinycolor2. + +15. xid-1.6.0 +Copyright (c) xid authors. +You may obtain the source code and detailed information about this component at https://github.com/rs/xid. + +16. lodash-es-4.17.21 +Copyright (c) lodash-es authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/lodash-es. + +17. go-runewidth-0.0.15 +Copyright (c) go-runewidth authors. +You may obtain the source code and detailed information about this component at https://gitee.com/mirrors/go-runewidth. + +18. marked-5.1.2 +Copyright (c) marked authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/marked. + +19. cast-1.7.1 +Copyright (c) cast authors. +You may obtain the source code and detailed information about this component at https://github.com/spf13/cast. + +20. antiword-default +Copyright (c) antiword authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/antiword. + +21. cos-python-sdk-v5-default +Copyright (c) cos-python-sdk-v5 authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/cos-python-sdk-v5. + +22. loader-runner-4.3.0 +Copyright (c) loader-runner authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/loader-runner. + +23. alien-signals-1.0.13 +Copyright (c) alien-signals authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/alien-signals. + +24. @webassemblyjs/wasm-parser-1.14.1 +Copyright (c) @webassemblyjs/wasm-parser authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/wasm-parser. + +25. textract-default +Copyright (c) textract authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/textract. + +26. hasown-2.0.2 +Copyright (c) hasown authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/hasown. + +27. shebang-regex-3.0.0 +Copyright (c) shebang-regex authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/shebang-regex. + +28. @types/eslint-scope-3.7.7 +Copyright (c) @types/eslint-scope authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@types/eslint-scope. + +29. node-releases-2.0.19 +Copyright (c) node-releases authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/node-releases. + +30. combined-stream-1.0.8 +Copyright (c) combined-stream authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/combined-stream. + +31. ajv-formats-2.1.1 +Copyright (c) ajv-formats authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/ajv-formats. + +32. has-tostringtag-1.0.2 +Copyright (c) has-tostringtag authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/has-tostringtag. + +33. vscode-uri-3.1.0 +Copyright (c) vscode-uri authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/vscode-uri. + +34. npm-run-all2-7.0.2 +Copyright (c) npm-run-all2 authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/npm-run-all2. + +35. tablewriter-0.0.5 +Copyright (c) tablewriter authors. +You may obtain the source code and detailed information about this component at https://github.com/olekukonko/tablewriter. + +36. cross-spawn-7.0.6 +Copyright (c) cross-spawn authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/cross-spawn. + +37. gin-1.10.0 +Copyright (c) gin authors. +You may obtain the source code and detailed information about this component at https://github.com/donetkit/gin. + +38. he-1.2.0 +Copyright (c) he authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/he. + +39. @babel/helper-optimise-call-expression-7.27.1 +Copyright (c) @babel/helper-optimise-call-expression authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@babel/helper-optimise-call-expression. + +40. debug-4.4.1 +Copyright (c) debug authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/debug. + +41. commander-2.20.3 +Copyright (c) commander authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/commander. + +42. mapstructure-1.4.3 +Copyright (c) mapstructure authors. +You may obtain the source code and detailed information about this component at https://github.com/go-viper/mapstructure. + +43. @vue/compiler-vue2-2.7.16 +Copyright (c) @vue/compiler-vue2 authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@vue/compiler-vue2. + +44. escalade-3.2.0 +Copyright (c) escalade authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/escalade. + +45. conc-0.3.0 +Copyright (c) conc authors. +You may obtain the source code and detailed information about this component at https://github.com/sourcegraph/conc. + +46. @types/eslint-9.6.1 +Copyright (c) @types/eslint authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@types/eslint. + +47. go-httpheader-0.2.1 +Copyright (c) go-httpheader authors. +You may obtain the source code and detailed information about this component at https://github.com/mozillazg/go-httpheader. + +48. @webassemblyjs/wasm-opt-1.14.1 +Copyright (c) @webassemblyjs/wasm-opt authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/wasm-opt. + +49. @popperjs/core-2.11.8 +Copyright (c) @popperjs/core authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@popperjs/core. + +50. @webassemblyjs/wast-printer-1.14.1 +Copyright (c) @webassemblyjs/wast-printer authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/wast-printer. + +51. webpack-sources-3.3.3 +Copyright (c) webpack-sources authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/webpack-sources. + +52. @webassemblyjs/wasm-gen-1.14.1 +Copyright (c) @webassemblyjs/wasm-gen authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/wasm-gen. + +53. @babel/helper-validator-identifier-7.27.1 +Copyright (c) @babel/helper-validator-identifier authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@babel/helper-validator-identifier. + +54. merge-stream-2.0.0 +Copyright (c) merge-stream authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/merge-stream. + +55. logrus-1.9.3 +Copyright (c) logrus authors. +You may obtain the source code and detailed information about this component at https://github.com/sirupsen/logrus. + +56. path-key-3.1.1 +Copyright (c) path-key authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/path-key. + +57. gorm-1.25.12 +Copyright (c) gorm authors. +You may obtain the source code and detailed information about this component at https://github.com/go-gorm/gorm. + +58. fast-deep-equal-3.1.3 +Copyright (c) fast-deep-equal authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/fast-deep-equal. + +59. @webassemblyjs/utf8-1.13.2 +Copyright (c) @webassemblyjs/utf8 authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/utf8. + +60. @babel/plugin-syntax-typescript-7.27.1 +Copyright (c) @babel/plugin-syntax-typescript authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@babel/plugin-syntax-typescript. + +61. mime-db-1.52.0 +Copyright (c) mime-db authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/mime-db. + +62. @webassemblyjs/floating-point-hex-parser-1.13.2 +Copyright (c) @webassemblyjs/floating-point-hex-parser authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/floating-point-hex-parser. + +63. hookable-5.5.3 +Copyright (c) hookable authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/hookable. + +64. pidtree-0.6.0 +Copyright (c) pidtree authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/pidtree. + +65. @types/lodash-es-4.17.12 +Copyright (c) @types/lodash-es authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@types/lodash-es. + +66. shebang-command-2.0.0 +Copyright (c) shebang-command authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/shebang-command. + +67. @webassemblyjs/wasm-edit-1.14.1 +Copyright (c) @webassemblyjs/wasm-edit authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/wasm-edit. + +68. convert-source-map-2.0.0 +Copyright (c) convert-source-map authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/convert-source-map. + +69. json-schema-traverse-1.0.0 +Copyright (c) json-schema-traverse authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/json-schema-traverse. + +70. @webassemblyjs/helper-buffer-1.14.1 +Copyright (c) @webassemblyjs/helper-buffer authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/helper-buffer. + +71. @babel/helper-module-imports-7.27.1 +Copyright (c) @babel/helper-module-imports authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@babel/helper-module-imports. + +72. update-browserslist-db-1.1.3 +Copyright (c) update-browserslist-db authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/update-browserslist-db. + +73. sugarss-5.0.0 +Copyright (c) sugarss authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/sugarss. + +74. uniseg-0.4.7 +Copyright (c) uniseg authors. +You may obtain the source code and detailed information about this component at https://github.com/rivo/uniseg. + +75. buffer-from-1.1.2 +Copyright (c) buffer-from authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/buffer-from. + +76. @vue/babel-plugin-jsx-1.4.0 +Copyright (c) @vue/babel-plugin-jsx authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@vue/babel-plugin-jsx. + +77. @webassemblyjs/helper-wasm-section-1.14.1 +Copyright (c) @webassemblyjs/helper-wasm-section authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/helper-wasm-section. + +78. dig-1.18.1 +Copyright (c) dig authors. +You may obtain the source code and detailed information about this component at https://github.com/uber-go/dig. + +79. go-isatty-0.0.20 +Copyright (c) go-isatty authors. +You may obtain the source code and detailed information about this component at https://github.com/mattn/go-isatty. + +80. randombytes-2.1.0 +Copyright (c) randombytes authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/randombytes. + +81. mistletoe-default +Copyright (c) mistletoe authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/mistletoe. + +82. @jridgewell/trace-mapping-0.3.29 +Copyright (c) @jridgewell/trace-mapping authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@jridgewell/trace-mapping. + +83. @jridgewell/source-map-0.3.10 +Copyright (c) @jridgewell/source-map authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@jridgewell/source-map. + +84. ajv-keywords-5.1.0 +Copyright (c) ajv-keywords authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/ajv-keywords. + +85. memorystream-0.3.1 +Copyright (c) memorystream authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/memorystream. + +86. @vue/devtools-api-6.6.4 +Copyright (c) @vue/devtools-api authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@vue/devtools-api. + +87. go-1.1.12 +Copyright (c) go authors. +You may obtain the source code and detailed information about this component at https://github.com/json-iterator/go. + +88. supports-color-8.1.1 +Copyright (c) supports-color authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/supports-color. + +89. mime-types-2.1.35 +Copyright (c) mime-types authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/mime-types. + +90. enhanced-resolve-5.18.2 +Copyright (c) enhanced-resolve authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/enhanced-resolve. + +91. now-1.1.5 +Copyright (c) now authors. +You may obtain the source code and detailed information about this component at https://github.com/jinzhu/now. + +92. ajv-8.17.1 +Copyright (c) ajv authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/ajv. + +93. proxy-from-env-1.1.0 +Copyright (c) proxy-from-env authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/proxy-from-env. + +94. chrome-trace-event-1.0.4 +Copyright (c) chrome-trace-event authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/chrome-trace-event. + +95. axios-1.10.0 +Copyright (c) axios authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/axios. + +96. gotenv-1.6.0 +Copyright (c) gotenv authors. +You may obtain the source code and detailed information about this component at https://github.com/subosito/gotenv. + +97. brace-expansion-2.0.2 +Copyright (c) brace-expansion authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/brace-expansion. + +98. function-bind-1.1.2 +Copyright (c) function-bind authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/function-bind. + +99. @microsoft/fetch-event-source-2.0.1 +Copyright (c) @microsoft/fetch-event-source authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@microsoft/fetch-event-source. + +100. mxj-1.8.4 +Copyright (c) mxj authors. +You may obtain the source code and detailed information about this component at https://github.com/clbanning/mxj. + +101. schema-utils-4.3.2 +Copyright (c) schema-utils authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/schema-utils. + +102. muggle-string-0.4.1 +Copyright (c) muggle-string authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/muggle-string. + +103. go-urn-1.4.0 +Copyright (c) go-urn authors. +You may obtain the source code and detailed information about this component at https://github.com/leodido/go-urn. + +104. form-data-4.0.3 +Copyright (c) form-data authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/form-data. + +105. locales-0.14.1 +Copyright (c) locales authors. +You may obtain the source code and detailed information about this component at https://gitee.com/mirrors/locales. + +106. python-docx-default +Copyright (c) python-docx authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/python-docx. + +107. less-loader-12.3.0 +Copyright (c) less-loader authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/less-loader. + +108. @jridgewell/sourcemap-codec-1.5.4 +Copyright (c) @jridgewell/sourcemap-codec authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@jridgewell/sourcemap-codec. + +109. universal-translator-0.18.1 +Copyright (c) universal-translator authors. +You may obtain the source code and detailed information about this component at https://github.com/go-playground/universal-translator. + +110. inflection-1.0.0 +Copyright (c) inflection authors. +You may obtain the source code and detailed information about this component at https://github.com/jinzhu/inflection. + +111. sass-embedded-1.70.0 +Copyright (c) sass-embedded authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/sass-embedded. + +112. @jridgewell/resolve-uri-3.1.2 +Copyright (c) @jridgewell/resolve-uri authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@jridgewell/resolve-uri. + +113. tinycolor2-1.6.0 +Copyright (c) tinycolor2 authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/tinycolor2. + +114. pagefind-1.3.0 +Copyright (c) pagefind authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/pagefind. + +115. viper-1.20.1 +Copyright (c) viper authors. +You may obtain the source code and detailed information about this component at https://github.com/spf13/viper. + +116. delayed-stream-1.0.0 +Copyright (c) delayed-stream authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/delayed-stream. + +117. @webassemblyjs/helper-numbers-1.13.2 +Copyright (c) @webassemblyjs/helper-numbers authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/helper-numbers. + +118. nanoid-3.3.11 +Copyright (c) nanoid authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/nanoid. + +119. @babel/helper-compilation-targets-7.27.2 +Copyright (c) @babel/helper-compilation-targets authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@babel/helper-compilation-targets. + +120. @webassemblyjs/ast-1.14.1 +Copyright (c) @webassemblyjs/ast authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/ast. + +121. @types/estree-1.0.8 +Copyright (c) @types/estree authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@types/estree. + +122. uuid-1.6.0 +Copyright (c) uuid authors. +You may obtain the source code and detailed information about this component at https://kojipkgs.fedoraproject.org//vol/fedora_koji_archive00/packages/uuid/1.6.0/2.fc8/src/uuid-1.6.0-2.fc8.src.rpm. + +123. postgres-1.5.11 +Copyright (c) postgres authors. +You may obtain the source code and detailed information about this component at https://github.com/go-gorm/postgres. + +124. perfect-debounce-1.0.0 +Copyright (c) perfect-debounce authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/perfect-debounce. + +125. md5-simd-1.1.2 +Copyright (c) md5-simd authors. +You may obtain the source code and detailed information about this component at https://github.com/minio/md5-simd. + +126. ms-2.1.3 +Copyright (c) ms authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/ms. + +127. @babel/helper-module-transforms-7.27.3 +Copyright (c) @babel/helper-module-transforms authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@babel/helper-module-transforms. + +128. @types/json-schema-7.0.15 +Copyright (c) @types/json-schema authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@types/json-schema. + +129. multierr-1.11.0 +Copyright (c) multierr authors. +You may obtain the source code and detailed information about this component at https://github.com/uber-go/multierr. + +130. pflag-1.0.6 +Copyright (c) pflag authors. +You may obtain the source code and detailed information about this component at https://github.com/php-toolkit/pflag. + +131. @webassemblyjs/ieee754-1.13.2 +Copyright (c) @webassemblyjs/ieee754 authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/ieee754. + +132. browserslist-4.25.1 +Copyright (c) browserslist authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/browserslist. + +133. asynckit-0.4.0 +Copyright (c) asynckit authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/asynckit. + +134. sortablejs-1.15.6 +Copyright (c) sortablejs authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/sortablejs. + +135. get-intrinsic-1.3.0 +Copyright (c) get-intrinsic authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/get-intrinsic. + +136. markdownify-default +Copyright (c) markdownify authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/markdownify. + +137. go-json-0.10.5 +Copyright (c) go-json authors. +You may obtain the source code and detailed information about this component at https://github.com/goccy/go-json. + +138. path-browserify-1.0.1 +Copyright (c) path-browserify authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/path-browserify. + +139. terser-webpack-plugin-5.3.14 +Copyright (c) terser-webpack-plugin authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/terser-webpack-plugin. + +140. @babel/helpers-7.27.6 +Copyright (c) @babel/helpers authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@babel/helpers. + +141. @vue/babel-plugin-resolve-type-1.4.0 +Copyright (c) @vue/babel-plugin-resolve-type authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@vue/babel-plugin-resolve-type. + +142. json-parse-even-better-errors-4.0.0 +Copyright (c) json-parse-even-better-errors authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/json-parse-even-better-errors. + +143. csstype-3.1.3 +Copyright (c) csstype authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/csstype. + +144. @babel/helper-plugin-utils-7.27.1 +Copyright (c) @babel/helper-plugin-utils authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@babel/helper-plugin-utils. + +145. balanced-match-1.0.2 +Copyright (c) balanced-match authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/balanced-match. + +146. ansi-styles-6.2.1 +Copyright (c) ansi-styles authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/ansi-styles. + +147. go-humanize-1.0.1 +Copyright (c) go-humanize authors. +You may obtain the source code and detailed information about this component at https://gitee.com/mirrors/go-humanize. + +148. picomatch-4.0.2 +Copyright (c) picomatch authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/picomatch. + +149. @webassemblyjs/helper-wasm-bytecode-1.13.2 +Copyright (c) @webassemblyjs/helper-wasm-bytecode authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@webassemblyjs/helper-wasm-bytecode. + +150. @babel/code-frame-7.27.1 +Copyright (c) @babel/code-frame authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@babel/code-frame. + +151. tsx-4.8.1 +Copyright (c) tsx authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/tsx. + +152. superjson-2.2.2 +Copyright (c) superjson authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/superjson. + +153. require-from-string-2.0.2 +Copyright (c) require-from-string authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/require-from-string. + +154. json5-2.2.3 +Copyright (c) json5 authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/json5. + +155. sass-1.70.0 +Copyright (c) sass authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/sass. + +156. @vue/tsconfig-0.7.0 +Copyright (c) @vue/tsconfig authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/@vue/tsconfig. + +157. openai-default +Copyright (c) openai authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/openai. + +158. urllib3-default +Copyright (c) urllib3 authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/urllib3. + +159. beautifulsoup4-default +Copyright (c) beautifulsoup4 authors. +You may obtain the source code and detailed information about this component at https://pypi.org/project/beautifulsoup4. + +160. pgpassfile-1.0.0 +Copyright (c) pgpassfile authors. +You may obtain the source code and detailed information about this component at https://github.com/jackc/pgpassfile. + +161. source-map-support-0.5.21 +Copyright (c) source-map-support authors. +You may obtain the source code and detailed information about this component at https://www.npmjs.com/package/source-map-support. + + + +Terms of the mit: +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3ddc724 --- /dev/null +++ b/Makefile @@ -0,0 +1,94 @@ +.PHONY: build run test clean docker-build docker-run migrate-up migrate-down docker-restart docker-stop start-all stop-all start-ollama stop-ollama + +# Go related variables +BINARY_NAME=WeKnora +MAIN_PATH=./cmd/server + +# Docker related variables +DOCKER_IMAGE=WeKnora +DOCKER_TAG=latest + +# Build the application +build: + go build -o $(BINARY_NAME) $(MAIN_PATH) + +# Run the application +run: build + ./$(BINARY_NAME) + +# Run tests +test: + go test -v ./... + +# Clean build artifacts +clean: + go clean + rm -f $(BINARY_NAME) + +# Build Docker image +docker-build: + docker build -t $(DOCKER_IMAGE):$(DOCKER_TAG) . + +# Run Docker container (传统方式) +docker-run: + docker-compose up + +# 使用新脚本启动所有服务 +start-all: + ./scripts/start_all.sh + +# 使用新脚本仅启动Ollama服务 +start-ollama: + ./scripts/start_all.sh --ollama + +# 使用新脚本仅启动Docker容器 +start-docker: + ./scripts/start_all.sh --docker + +# 使用新脚本停止所有服务 +stop-all: + ./scripts/start_all.sh --stop + +# Stop Docker container (传统方式) +docker-stop: + docker-compose down + +# Restart Docker container (stop, rebuild, start) +docker-restart: + docker-compose stop -t 60 + docker-compose up --build + +# Database migrations +migrate-up: + ./scripts/migrate.sh up + +migrate-down: + ./scripts/migrate.sh down + +# Generate API documentation +docs: + swag init -g $(MAIN_PATH)/main.go -o ./docs + +# Format code +fmt: + go fmt ./... + +# Lint code +lint: + golangci-lint run + +# Install dependencies +deps: + go mod download + +# Build for production +build-prod: + GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o $(BINARY_NAME) $(MAIN_PATH) + +clean-db: + @echo "Cleaning database..." + @if [ $$(docker volume ls -q -f name=weknora_postgres-data) ]; then \ + docker volume rm weknora_postgres-data; \ + fi + + diff --git a/README.md b/README.md new file mode 100644 index 0000000..7473f91 --- /dev/null +++ b/README.md @@ -0,0 +1,239 @@ +

+ + WeKnora Logo + +

+ +

+ + 官方网站 + + + 微信对话开放平台 + + + License + +

+ +

+| English | 简体中文 | +

+ +

+

+ + [项目介绍](#-项目介绍) • [架构设计](#-架构设计) • [核心特性](#-核心特性) • [快速开始](#-快速开始) • [文档](#-文档) • [开发指南](#-开发指南) + +

+

+ +# 💡 WeKnora - 基于大模型的文档理解检索框架 + +## 📌 项目介绍 + +[**WeKnora(维娜拉)**](https://weknora.weixin.qq.com) 是一款基于大语言模型(LLM)的文档理解与语义检索框架,专为结构复杂、内容异构的文档场景而打造。 + +框架采用模块化架构,融合多模态预处理、语义向量索引、智能召回与大模型生成推理,构建起高效、可控的文档问答流程。核心检索流程基于 **RAG(Retrieval-Augmented Generation)** 机制,将上下文相关片段与语言模型结合,实现更高质量的语义回答。 + +**官网:** https://weknora.weixin.qq.com + +## 🏗️ 架构设计 + +![weknora-pipelone.png](./docs/images/pipeline.jpg) + +WeKnora 采用现代化模块化设计,构建了一条完整的文档理解与检索流水线。系统主要包括文档解析、向量化处理、检索引擎和大模型推理等核心模块,每个组件均可灵活配置与扩展。 + +## 🎯 核心特性 + +- **🔍 精准理解**:支持 PDF、Word、图片等文档的结构化内容提取,统一构建语义视图 +- **🧠 智能推理**:借助大语言模型理解文档上下文与用户意图,支持精准问答与多轮对话 +- **🔧 灵活扩展**:从解析、嵌入、召回到生成全流程解耦,便于灵活集成与定制扩展 +- **⚡ 高效检索**:混合多种检索策略:关键词、向量、知识图谱 +- **🎯 简单易用**:直观的Web界面与标准API,零技术门槛快速上手 +- **🔒 安全可控**:支持本地化与私有云部署,数据完全自主可控 + +## 📊 适用场景 + +| 应用场景 | 具体应用 | 核心价值 | +|---------|----------|----------| +| **企业知识管理** | 内部文档检索、规章制度问答、操作手册查询 | 提升知识查找效率,降低培训成本 | +| **科研文献分析** | 论文检索、研究报告分析、学术资料整理 | 加速文献调研,辅助研究决策 | +| **产品技术支持** | 产品手册问答、技术文档检索、故障排查 | 提升客户服务质量,减少技术支持负担 | +| **法律合规审查** | 合同条款检索、法规政策查询、案例分析 | 提高合规效率,降低法律风险 | +| **医疗知识辅助** | 医学文献检索、诊疗指南查询、病例分析 | 辅助临床决策,提升诊疗质量 | + +## 🧩 功能模块能力 + +| 功能模块 | 支持情况 | 说明 | +|---------|---------|------| +| 文档格式支持 | ✅ PDF / Word / Txt / Markdown / 图片(含 OCR / Caption) | 支持多种结构化与非结构化文档内容解析,支持图文混排与图像文字提取 | +| 嵌入模型支持 | ✅ 本地模型、BGE / GTE API 等 | 支持自定义 embedding 模型,兼容本地部署与云端向量生成接口 | +| 向量数据库接入 | ✅ PostgreSQL(pgvector)、Elasticsearch | 支持主流向量索引后端,可灵活切换与扩展,适配不同检索场景 | +| 检索机制 | ✅ BM25 / Dense Retrieve / GraphRAG | 支持稠密/稀疏召回、知识图谱增强检索等多种策略,可自由组合召回-重排-生成流程 | +| 大模型集成 | ✅ 支持 Qwen、DeepSeek 等,思考/非思考模式切换 | 可接入本地大模型(如 Ollama 启动)或调用外部 API 服务,支持推理模式灵活配置 | +| 问答能力 | ✅ 上下文感知、多轮对话、提示词模板 | 支持复杂语义建模、指令控制与链式问答,可配置提示词与上下文窗口 | +| 端到端测试支持 | ✅ 检索+生成过程可视化与指标评估 | 提供一体化链路测试工具,支持评估召回命中率、回答覆盖度、BLEU / ROUGE 等主流指标 | +| 部署模式 | ✅ 支持本地部署 / Docker 镜像 | 满足私有化、离线部署与灵活运维的需求 | +| 用户界面 | ✅ Web UI + RESTful API | 提供交互式界面与标准 API 接口,适配开发者与业务用户使用习惯 | + +## 🚀 快速开始 + +### 🛠 环境要求 + +确保本地已安装以下工具: + +* [Docker](https://www.docker.com/) +* [Docker Compose](https://docs.docker.com/compose/) +* [Git](https://git-scm.com/) + +### 📦 安装步骤 + +#### ① 克隆代码仓库 + +```bash +# 克隆主仓库 +git clone https://github.com/Tencent/WeKnora.git +cd WeKnora +``` + +#### ② 配置环境变量 + +```bash +# 复制示例配置文件 +cp .env.example .env + +# 编辑 .env,填入对应配置信息 +# 所有变量说明详见 .env.example 注释 +``` + +#### ③ 启动服务 + +```bash +# 启动全部服务(含 Ollama 与后端容器) +./scripts/start_all.sh +# 或 +make start-all +``` + +#### ④ 停止服务 + +```bash +./scripts/start_all.sh --stop +# 或 +make stop-all +``` + +### 🌐 服务访问地址 + +启动成功后,可访问以下地址: + +* Web UI:`http://localhost` +* 后端 API:`http://localhost:8080` +* 链路追踪(Jaeger):`http://localhost:16686` + +### 🔌 使用微信对话开放平台 + +WeKnora 作为[微信对话开放平台](https://chatbot.weixin.qq.com)的核心技术框架,提供更简便的使用方式: + +- **零代码部署**:只需上传知识,即可在微信生态中快速部署智能问答服务,实现"即问即答"的体验 +- **高效问题管理**:支持高频问题的独立分类管理,提供丰富的数据工具,确保回答精准可靠且易于维护 +- **微信生态覆盖**:通过微信对话开放平台,WeKnora 的智能问答能力可无缝集成到公众号、小程序等微信场景中,提升用户交互体验 + +## 📱 功能展示 + +### Web UI 界面 + + + + + + + + + +
知识上传
知识上传界面
知识问答入口
知识问答入口
图文结果回答
图文结果回答
+ +**知识库管理:** 支持拖拽上传各类文档,自动识别文档结构并提取核心知识,建立索引。系统清晰展示处理进度和文档状态,实现高效的知识库管理。 + +### 文档知识图谱 + + + + + + +
知识图谱展示1知识图谱展示2
+ +WeKnora 支持将文档转化为知识图谱,展示文档中不同段落之间的关联关系。开启知识图谱功能后,系统会分析并构建文档内部的语义关联网络,不仅帮助用户理解文档内容,还为索引和检索提供结构化支撑,提升检索结果的相关性和广度。 + +## 📘 文档 + +详细接口说明请参考:[API 文档](./docs/API.md) + +## 🧭 开发指南 + +### 📁 项目目录结构 + +``` +WeKnora/ +├── cmd/ # 应用入口 +├── internal/ # 核心业务逻辑 +├── config/ # 配置文件 +├── migrations/ # 数据库迁移脚本 +├── scripts/ # 启动与工具脚本 +├── services/ # 各子服务实现 +├── frontend/ # 前端项目 +└── docs/ # 项目文档 +``` + +### 🔧 常用命令 + +```bash +# 清空数据库(慎用!) +make clean-db +``` + +## 🤝 贡献指南 + +我们欢迎社区用户参与贡献!如有建议、Bug 或新功能需求,请通过 [Issue](https://github.com/Tencent/WeKnora/issues) 提出,或直接提交 Pull Request。 + +### 🎯 贡献方式 + +- 🐛 **Bug修复**: 发现并修复系统缺陷 +- ✨ **新功能**: 提出并实现新特性 +- 📚 **文档改进**: 完善项目文档 +- 🧪 **测试用例**: 编写单元测试和集成测试 +- 🎨 **UI/UX优化**: 改进用户界面和体验 + +### 📋 贡献流程 + +1. **Fork项目** 到你的GitHub账户 +2. **创建特性分支** `git checkout -b feature/amazing-feature` +3. **提交更改** `git commit -m 'Add amazing feature'` +4. **推送分支** `git push origin feature/amazing-feature` +5. **创建Pull Request** 并详细描述变更内容 + +### 🎨 代码规范 + +- 遵循 [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments) +- 使用 `gofmt` 格式化代码 +- 添加必要的单元测试 +- 更新相关文档 + +### 📝 提交规范 + +使用 [Conventional Commits](https://www.conventionalcommits.org/) 规范: + +``` +feat: 添加文档批量上传功能 +fix: 修复向量检索精度问题 +docs: 更新API文档 +test: 添加检索引擎测试用例 +refactor: 重构文档解析模块 +``` + +## 📄 许可证 + +本项目基于 [MIT](./LICENSE) 协议发布。 +你可以自由使用、修改和分发本项目代码,但需保留原始版权声明。 \ No newline at end of file diff --git a/README_EN.md b/README_EN.md new file mode 100644 index 0000000..d9e28ad --- /dev/null +++ b/README_EN.md @@ -0,0 +1,239 @@ +

+ + WeKnora Logo + +

+ +

+ + 官方网站 + + + 微信对话开放平台 + + + License + +

+ +

+| English | 简体中文 | +

+ +

+

+ + [Overview](#-overview) • [Architecture](#-architecture) • [Key Features](#-key-features) • [Getting Started](#-getting-started) • [API Reference](#-api-reference) • [Developer Guide](#-developer-guide) + +

+

+ +# 💡 WeKnora - LLM-Powered Document Understanding & Retrieval Framework + +## 📌 Overview + +[**WeKnora**](https://weknora.weixin.qq.com) is an LLM-powered framework designed for deep document understanding and semantic retrieval, especially for handling complex, heterogeneous documents. + +It adopts a modular architecture that combines multimodal preprocessing, semantic vector indexing, intelligent retrieval, and large language model inference. At its core, WeKnora follows the **RAG (Retrieval-Augmented Generation)** paradigm, enabling high-quality, context-aware answers by combining relevant document chunks with model reasoning. + +**Website:** https://weknora.weixin.qq.com + +## 🏗️ Architecture + +![weknora-pipeline.png](./docs/images/pipeline.jpg) + +WeKnora employs a modern modular design to build a complete document understanding and retrieval pipeline. The system primarily includes document parsing, vector processing, retrieval engine, and large model inference as core modules, with each component being flexibly configurable and extendable. + +## 🎯 Key Features + +- **🔍 Precise Understanding**: Structured content extraction from PDFs, Word documents, images and more into unified semantic views +- **🧠 Intelligent Reasoning**: Leverages LLMs to understand document context and user intent for accurate Q&A and multi-turn conversations +- **🔧 Flexible Extension**: All components from parsing and embedding to retrieval and generation are decoupled for easy customization +- **⚡ Efficient Retrieval**: Hybrid retrieval strategies combining keywords, vectors, and knowledge graphs +- **🎯 User-Friendly**: Intuitive web interface and standardized APIs for zero technical barriers +- **🔒 Secure & Controlled**: Support for local deployment and private cloud, ensuring complete data sovereignty + +## 📊 Application Scenarios + +| Scenario | Applications | Core Value | +|---------|----------|----------| +| **Enterprise Knowledge Management** | Internal document retrieval, policy Q&A, operation manual search | Improve knowledge discovery efficiency, reduce training costs | +| **Academic Research Analysis** | Paper retrieval, research report analysis, scholarly material organization | Accelerate literature review, assist research decisions | +| **Product Technical Support** | Product manual Q&A, technical documentation search, troubleshooting | Enhance customer service quality, reduce support burden | +| **Legal & Compliance Review** | Contract clause retrieval, regulatory policy search, case analysis | Improve compliance efficiency, reduce legal risks | +| **Medical Knowledge Assistance** | Medical literature retrieval, treatment guideline search, case analysis | Support clinical decisions, improve diagnosis quality | + +## 🧩 Feature Matrix + +| Module | Support | Description | +|---------|---------|------| +| Document Formats | ✅ PDF / Word / Txt / Markdown / Images (with OCR / Caption) | Support for structured and unstructured documents with text extraction from images | +| Embedding Models | ✅ Local models, BGE / GTE APIs, etc. | Customizable embedding models, compatible with local deployment and cloud vector generation APIs | +| Vector DB Integration | ✅ PostgreSQL (pgvector), Elasticsearch | Support for mainstream vector index backends, flexible switching for different retrieval scenarios | +| Retrieval Strategies | ✅ BM25 / Dense Retrieval / GraphRAG | Support for sparse/dense recall and knowledge graph-enhanced retrieval with customizable retrieve-rerank-generate pipelines | +| LLM Integration | ✅ Support for Qwen, DeepSeek, etc., with thinking/non-thinking mode switching | Compatible with local models (e.g., via Ollama) or external API services with flexible inference configuration | +| QA Capabilities | ✅ Context-aware, multi-turn dialogue, prompt templates | Support for complex semantic modeling, instruction control and chain-of-thought Q&A with configurable prompts and context windows | +| E2E Testing | ✅ Retrieval+generation process visualization and metric evaluation | End-to-end testing tools for evaluating recall hit rates, answer coverage, BLEU/ROUGE and other metrics | +| Deployment Modes | ✅ Support for local deployment / Docker images | Meets private, offline deployment and flexible operation requirements | +| User Interfaces | ✅ Web UI + RESTful API | Interactive interface and standard API endpoints, suitable for both developers and business users | + +## 🚀 Getting Started + +### 🛠 Prerequisites + +Make sure the following tools are installed on your system: + +* [Docker](https://www.docker.com/) +* [Docker Compose](https://docs.docker.com/compose/) +* [Git](https://git-scm.com/) + +### 📦 Installation + +#### ① Clone the repository + +```bash +# Clone the main repository +git clone https://github.com/Tencent/WeKnora.git +cd WeKnora +``` + +#### ② Configure environment variables + +```bash +# Copy example env file +cp .env.example .env + +# Edit .env and set required values +# All variables are documented in the .env.example comments +``` + +#### ③ Start the services + +```bash +# Start all services (Ollama + backend containers) +./scripts/start_all.sh +# Or +make start-all +``` + +#### ④ Stop the services + +```bash +./scripts/start_all.sh --stop +# Or +make stop-all +``` + +### 🌐 Access Services + +Once started, services will be available at: + +* Web UI: `http://localhost` +* Backend API: `http://localhost:8080` +* Jaeger Tracing: `http://localhost:16686` + +### 🔌 Using WeChat Dialog Open Platform + +WeKnora serves as the core technology framework for the [WeChat Dialog Open Platform](https://chatbot.weixin.qq.com), providing a more convenient usage approach: + +- **Zero-code Deployment**: Simply upload knowledge to quickly deploy intelligent Q&A services within the WeChat ecosystem, achieving an "ask and answer" experience +- **Efficient Question Management**: Support for categorized management of high-frequency questions, with rich data tools to ensure accurate, reliable, and easily maintainable answers +- **WeChat Ecosystem Integration**: Through the WeChat Dialog Open Platform, WeKnora's intelligent Q&A capabilities can be seamlessly integrated into WeChat Official Accounts, Mini Programs, and other WeChat scenarios, enhancing user interaction experiences + +## 📱 Interface Showcase + +### Web UI Interface + + + + + + + + + +
Knowledge Upload
Knowledge Upload Interface
Q&A Entry
Q&A Entry Interface
Rich Text & Image Responses
Rich Answer Interface
+ +**Knowledge Base Management:** Support for dragging and dropping various documents, automatically identifying document structures and extracting core knowledge to establish indexes. The system clearly displays processing progress and document status, achieving efficient knowledge base management. + +### Document Knowledge Graph + + + + + + +
Knowledge Graph View 1Knowledge Graph View 2
+ +WeKnora supports transforming documents into knowledge graphs, displaying the relationships between different sections of the documents. Once the knowledge graph feature is enabled, the system analyzes and constructs an internal semantic association network that not only helps users understand document content but also provides structured support for indexing and retrieval, enhancing the relevance and breadth of search results. + +## 📘 API Reference + +Detailed API documentation is available at: [API Docs](./docs/API.md) + +## 🧭 Developer Guide + +### 📁 Directory Structure + +``` +WeKnora/ +├── cmd/ # Main entry point +├── internal/ # Core business logic +├── config/ # Configuration files +├── migrations/ # DB migration scripts +├── scripts/ # Shell scripts +├── services/ # Microservice logic +├── frontend/ # Frontend app +└── docs/ # Project documentation +``` + +### 🔧 Common Commands + +```bash +# Wipe all data from DB (use with caution) +make clean-db +``` + +## 🤝 Contributing + +We welcome community contributions! For suggestions, bugs, or feature requests, please submit an [Issue](https://github.com/Tencent/WeKnora/issues) or directly create a Pull Request. + +### 🎯 How to Contribute + +- 🐛 **Bug Fixes**: Discover and fix system defects +- ✨ **New Features**: Propose and implement new capabilities +- 📚 **Documentation**: Improve project documentation +- 🧪 **Test Cases**: Write unit and integration tests +- 🎨 **UI/UX Enhancements**: Improve user interface and experience + +### 📋 Contribution Process + +1. **Fork the project** to your GitHub account +2. **Create a feature branch** `git checkout -b feature/amazing-feature` +3. **Commit changes** `git commit -m 'Add amazing feature'` +4. **Push branch** `git push origin feature/amazing-feature` +5. **Create a Pull Request** with detailed description of changes + +### 🎨 Code Standards + +- Follow [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments) +- Format code using `gofmt` +- Add necessary unit tests +- Update relevant documentation + +### 📝 Commit Guidelines + +Use [Conventional Commits](https://www.conventionalcommits.org/) standard: + +``` +feat: Add document batch upload functionality +fix: Resolve vector retrieval precision issue +docs: Update API documentation +test: Add retrieval engine test cases +refactor: Restructure document parsing module +``` + +## 📄 License + +This project is licensed under the [MIT License](./LICENSE). +You are free to use, modify, and distribute the code with proper attribution. \ No newline at end of file diff --git a/client/README.md b/client/README.md new file mode 100644 index 0000000..b236055 --- /dev/null +++ b/client/README.md @@ -0,0 +1,183 @@ +# WeKnora HTTP 客户端 + +这个包提供了与WeKnora服务进行交互的客户端库,支持所有基于HTTP的接口调用,使其他模块更方便地集成WeKnora服务,无需直接编写HTTP请求代码。 + +## 主要功能 + +该客户端包含以下主要功能模块: + +1. **会话管理**:创建、获取、更新和删除会话 +2. **知识库管理**:创建、获取、更新和删除知识库 +3. **知识管理**:添加、获取和删除知识内容 +4. **租户管理**:租户的CRUD操作 +5. **知识问答**:支持普通问答和流式问答 +6. **分块管理**:查询、更新和删除知识分块 +7. **消息管理**:获取和删除会话消息 +8. **模型管理**:创建、获取、更新和删除模型 + +## 使用方法 + +### 创建客户端实例 + +```go +import ( + "context" + "github.com/Tencent/WeKnora/internal/client" + "time" +) + +// 创建客户端实例 +apiClient := client.NewClient( + "http://api.example.com", + client.WithToken("your-auth-token"), + client.WithTimeout(30*time.Second), +) +``` + +### 示例:创建知识库并上传文件 + +```go +// 创建知识库 +kb := &client.KnowledgeBase{ + Name: "测试知识库", + Description: "这是一个测试知识库", + ChunkingConfig: client.ChunkingConfig{ + ChunkSize: 500, + ChunkOverlap: 50, + Separators: []string{"\n\n", "\n", ". ", "? ", "! "}, + }, + ImageProcessingConfig: client.ImageProcessingConfig{ + ModelID: "image_model_id", + }, + EmbeddingModelID: "embedding_model_id", + SummaryModelID: "summary_model_id", +} + +kb, err := apiClient.CreateKnowledgeBase(context.Background(), kb) +if err != nil { + // 处理错误 +} + +// 上传知识文件并添加元数据 +metadata := map[string]string{ + "source": "local", + "type": "document", +} +knowledge, err := apiClient.CreateKnowledgeFromFile(context.Background(), kb.ID, "path/to/file.pdf", metadata) +if err != nil { + // 处理错误 +} +``` + +### 示例:创建会话并进行问答 + +```go +// 创建会话 +sessionRequest := &client.CreateSessionRequest{ + KnowledgeBaseID: knowledgeBaseID, + SessionStrategy: &client.SessionStrategy{ + MaxRounds: 10, + EnableRewrite: true, + FallbackStrategy: "fixed_answer", + FallbackResponse: "抱歉,我无法回答这个问题", + EmbeddingTopK: 5, + KeywordThreshold: 0.5, + VectorThreshold: 0.7, + RerankModelID: "rerank_model_id", + RerankTopK: 3, + RerankThreshold: 0.8, + SummaryModelID: "summary_model_id", + }, +} + +session, err := apiClient.CreateSession(context.Background(), sessionRequest) +if err != nil { + // 处理错误 +} + +// 普通问答 +answer, err := apiClient.KnowledgeQA(context.Background(), session.ID, &client.KnowledgeQARequest{ + Query: "什么是人工智能?", +}) +if err != nil { + // 处理错误 +} + +// 流式问答 +err = apiClient.KnowledgeQAStream(context.Background(), session.ID, "什么是机器学习?", func(response *client.StreamResponse) error { + // 处理每个响应片段 + fmt.Print(response.Content) + return nil +}) +if err != nil { + // 处理错误 +} +``` + +### 示例:管理模型 + +```go +// 创建模型 +modelRequest := &client.CreateModelRequest{ + Name: "测试模型", + Type: client.ModelTypeChat, + Source: client.ModelSourceInternal, + Description: "这是一个测试模型", + Parameters: client.ModelParameters{ + "temperature": 0.7, + "top_p": 0.9, + }, + IsDefault: true, +} +model, err := apiClient.CreateModel(context.Background(), modelRequest) +if err != nil { + // 处理错误 +} + +// 列出所有模型 +models, err := apiClient.ListModels(context.Background()) +if err != nil { + // 处理错误 +} +``` + +### 示例:管理知识分块 + +```go +// 列出知识分块 +chunks, total, err := apiClient.ListKnowledgeChunks(context.Background(), knowledgeID, 1, 10) +if err != nil { + // 处理错误 +} + +// 更新分块 +updateRequest := &client.UpdateChunkRequest{ + Content: "更新后的分块内容", + IsEnabled: true, +} +updatedChunk, err := apiClient.UpdateChunk(context.Background(), knowledgeID, chunkID, updateRequest) +if err != nil { + // 处理错误 +} +``` + +### 示例:获取会话消息 + +```go +// 获取最近消息 +messages, err := apiClient.GetRecentMessages(context.Background(), sessionID, 10) +if err != nil { + // 处理错误 +} + +// 获取指定时间之前的消息 +beforeTime := time.Now().Add(-24 * time.Hour) +olderMessages, err := apiClient.GetMessagesBefore(context.Background(), sessionID, beforeTime, 10) +if err != nil { + // 处理错误 +} +``` + +## 完整示例 + +请参考 `example.go` 文件中的 `ExampleUsage` 函数,其中展示了客户端的完整使用流程。 \ No newline at end of file diff --git a/client/README_EN.md b/client/README_EN.md new file mode 100644 index 0000000..99ff39d --- /dev/null +++ b/client/README_EN.md @@ -0,0 +1,184 @@ +# WeKnora HTTP Client + +This package provides a client library for interacting with WeKnora services, supporting all HTTP-based interface calls, making it easier for other modules to integrate with WeKnora services without having to write HTTP request code directly. + +## Main Features + +The client includes the following main functional modules: + +1. **Session Management**: Create, retrieve, update, and delete sessions +2. **Knowledge Base Management**: Create, retrieve, update, and delete knowledge bases +3. **Knowledge Management**: Add, retrieve, and delete knowledge content +4. **Tenant Management**: CRUD operations for tenants +5. **Knowledge Q&A**: Supports regular Q&A and streaming Q&A +6. **Chunk Management**: Query, update, and delete knowledge chunks +7. **Message Management**: Retrieve and delete session messages +8. **Model Management**: Create, retrieve, update, and delete models +9. **Evaluation Function**: Start evaluation tasks and get evaluation results + +## Usage + +### Creating Client Instance + +```go +import ( + "context" + "github.com/Tencent/WeKnora/internal/client" + "time" +) + +// Create client instance +apiClient := client.NewClient( + "http://api.example.com", + client.WithToken("your-auth-token"), + client.WithTimeout(30*time.Second), +) +``` + +### Example: Create Knowledge Base and Upload File + +```go +// Create knowledge base +kb := &client.KnowledgeBase{ + Name: "Test Knowledge Base", + Description: "This is a test knowledge base", + ChunkingConfig: client.ChunkingConfig{ + ChunkSize: 500, + ChunkOverlap: 50, + Separators: []string{"\n\n", "\n", ". ", "? ", "! "}, + }, + ImageProcessingConfig: client.ImageProcessingConfig{ + ModelID: "image_model_id", + }, + EmbeddingModelID: "embedding_model_id", + SummaryModelID: "summary_model_id", +} + +kb, err := apiClient.CreateKnowledgeBase(context.Background(), kb) +if err != nil { + // Handle error +} + +// Upload knowledge file with metadata +metadata := map[string]string{ + "source": "local", + "type": "document", +} +knowledge, err := apiClient.CreateKnowledgeFromFile(context.Background(), kb.ID, "path/to/file.pdf", metadata) +if err != nil { + // Handle error +} +``` + +### Example: Create Session and Chat + +```go +// Create session +sessionRequest := &client.CreateSessionRequest{ + KnowledgeBaseID: knowledgeBaseID, + SessionStrategy: &client.SessionStrategy{ + MaxRounds: 10, + EnableRewrite: true, + FallbackStrategy: "fixed_answer", + FallbackResponse: "Sorry, I cannot answer this question", + EmbeddingTopK: 5, + KeywordThreshold: 0.5, + VectorThreshold: 0.7, + RerankModelID: "rerank_model_id", + RerankTopK: 3, + RerankThreshold: 0.8, + SummaryModelID: "summary_model_id", + }, +} + +session, err := apiClient.CreateSession(context.Background(), sessionRequest) +if err != nil { + // Handle error +} + +// Regular Q&A +answer, err := apiClient.KnowledgeQA(context.Background(), session.ID, &client.KnowledgeQARequest{ + Query: "What is artificial intelligence?", +}) +if err != nil { + // Handle error +} + +// Streaming Q&A +err = apiClient.KnowledgeQAStream(context.Background(), session.ID, "What is machine learning?", func(response *client.StreamResponse) error { + // Handle each response chunk + fmt.Print(response.Content) + return nil +}) +if err != nil { + // Handle error +} +``` + +### Example: Managing Models + +```go +// Create model +modelRequest := &client.CreateModelRequest{ + Name: "Test Model", + Type: client.ModelTypeChat, + Source: client.ModelSourceInternal, + Description: "This is a test model", + Parameters: client.ModelParameters{ + "temperature": 0.7, + "top_p": 0.9, + }, + IsDefault: true, +} +model, err := apiClient.CreateModel(context.Background(), modelRequest) +if err != nil { + // Handle error +} + +// List all models +models, err := apiClient.ListModels(context.Background()) +if err != nil { + // Handle error +} +``` + +### Example: Managing Knowledge Chunks + +```go +// List knowledge chunks +chunks, total, err := apiClient.ListKnowledgeChunks(context.Background(), knowledgeID, 1, 10) +if err != nil { + // Handle error +} + +// Update chunk +updateRequest := &client.UpdateChunkRequest{ + Content: "Updated chunk content", + IsEnabled: true, +} +updatedChunk, err := apiClient.UpdateChunk(context.Background(), knowledgeID, chunkID, updateRequest) +if err != nil { + // Handle error +} +``` + +### Example: Getting Session Messages + +```go +// Get recent messages +messages, err := apiClient.GetRecentMessages(context.Background(), sessionID, 10) +if err != nil { + // Handle error +} + +// Get messages before a specific time +beforeTime := time.Now().Add(-24 * time.Hour) +olderMessages, err := apiClient.GetMessagesBefore(context.Background(), sessionID, beforeTime, 10) +if err != nil { + // Handle error +} +``` + +## Complete Example + +Please refer to the `ExampleUsage` function in the `example.go` file, which demonstrates the complete usage flow of the client. diff --git a/client/chunk.go b/client/chunk.go new file mode 100644 index 0000000..6ed5148 --- /dev/null +++ b/client/chunk.go @@ -0,0 +1,171 @@ +// Package client provides the implementation for interacting with the WeKnora API +// This package encapsulates CRUD operations for server resources and provides a friendly interface for callers +// The Chunk related interfaces are used to manage document chunks in the knowledge base +package client + +import ( + "context" + "fmt" + "net/http" + "net/url" + "strconv" +) + +// Chunk represents the information about a document chunk +// Chunks are the basic units of storage and indexing in the knowledge base +type Chunk struct { + ID string `json:"id"` // Unique identifier of the chunk + KnowledgeID string `json:"knowledge_id"` // Identifier of the parent knowledge + TenantID uint `json:"tenant_id"` // Tenant ID + Content string `json:"content"` // Text content of the chunk + Embedding []float32 `json:"embedding"` // Vector embedding representation + ChunkIndex int `json:"chunk_index"` // Index position of chunk in the document + TotalChunks int `json:"total_chunks"` // Total number of chunks in the document + IsEnabled bool `json:"is_enabled"` // Whether this chunk is enabled + StartAt int `json:"start_at"` // Starting position in original text + EndAt int `json:"end_at"` // Ending position in original text + VectorStoreID string `json:"vector_store_id"` // Vector storage ID + KeywordStoreID string `json:"keyword_store_id"` // Keyword storage ID + EmbeddingStatus int `json:"embedding_status"` // Embedding status: 0-unprocessed, 1-processing, 2-completed + ChunkType string `json:"chunk_type"` + ImageInfo string `json:"image_info"` + CreatedAt string `json:"created_at"` // Creation time + UpdatedAt string `json:"updated_at"` // Last update time +} + +// ChunkResponse represents the response for a single chunk +// API response structure containing a single chunk information +type ChunkResponse struct { + Success bool `json:"success"` // Whether operation was successful + Data Chunk `json:"data"` // Chunk data +} + +// ChunkListResponse represents the response for a list of chunks +// API response structure for returning a list of chunks +type ChunkListResponse struct { + Success bool `json:"success"` // Whether operation was successful + Data []Chunk `json:"data"` // List of chunks + Total int64 `json:"total"` // Total count + Page int `json:"page"` // Current page + PageSize int `json:"page_size"` // Items per page +} + +// UpdateChunkRequest represents the request structure for updating a chunk +// Used for requesting chunk information updates +type UpdateChunkRequest struct { + Content string `json:"content"` // Chunk content + Embedding []float32 `json:"embedding"` // Vector embedding + ChunkIndex int `json:"chunk_index"` // Chunk index + IsEnabled bool `json:"is_enabled"` // Whether enabled + StartAt int `json:"start_at"` // Start position + EndAt int `json:"end_at"` // End position +} + +// ListKnowledgeChunks lists all chunks under a knowledge document +// Queries all chunks by knowledge ID with pagination support +// Parameters: +// - ctx: Context +// - knowledgeID: Knowledge ID +// - page: Page number, starts from 1 +// - pageSize: Number of items per page +// +// Returns: +// - []Chunk: List of chunks +// - int64: Total count +// - error: Error information +func (c *Client) ListKnowledgeChunks(ctx context.Context, + knowledgeID string, page int, pageSize int, +) ([]Chunk, int64, error) { + path := fmt.Sprintf("/api/v1/chunks/%s", knowledgeID) + + queryParams := url.Values{} + queryParams.Add("page", strconv.Itoa(page)) + queryParams.Add("page_size", strconv.Itoa(pageSize)) + + resp, err := c.doRequest(ctx, http.MethodGet, path, nil, queryParams) + if err != nil { + return nil, 0, err + } + + var response ChunkListResponse + if err := parseResponse(resp, &response); err != nil { + return nil, 0, err + } + + return response.Data, response.Total, nil +} + +// UpdateChunk updates a chunk's information +// Updates information for a specific chunk under a knowledge document +// Parameters: +// - ctx: Context +// - knowledgeID: Knowledge ID +// - chunkID: Chunk ID +// - request: Update request +// +// Returns: +// - *Chunk: Updated chunk +// - error: Error information +func (c *Client) UpdateChunk(ctx context.Context, + knowledgeID string, chunkID string, request *UpdateChunkRequest, +) (*Chunk, error) { + path := fmt.Sprintf("/api/v1/chunks/%s/%s", knowledgeID, chunkID) + resp, err := c.doRequest(ctx, http.MethodPut, path, request, nil) + if err != nil { + return nil, err + } + + var response ChunkResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// DeleteChunk deletes a specific chunk +// Deletes a specific chunk under a knowledge document +// Parameters: +// - ctx: Context +// - knowledgeID: Knowledge ID +// - chunkID: Chunk ID +// +// Returns: +// - error: Error information +func (c *Client) DeleteChunk(ctx context.Context, knowledgeID string, chunkID string) error { + path := fmt.Sprintf("/api/v1/chunks/%s/%s", knowledgeID, chunkID) + resp, err := c.doRequest(ctx, http.MethodDelete, path, nil, nil) + if err != nil { + return err + } + + var response struct { + Success bool `json:"success"` + Message string `json:"message,omitempty"` + } + + return parseResponse(resp, &response) +} + +// DeleteChunksByKnowledgeID deletes all chunks under a knowledge document +// Batch deletes all chunks under the specified knowledge document +// Parameters: +// - ctx: Context +// - knowledgeID: Knowledge ID +// +// Returns: +// - error: Error information +func (c *Client) DeleteChunksByKnowledgeID(ctx context.Context, knowledgeID string) error { + path := fmt.Sprintf("/api/v1/chunks/%s", knowledgeID) + resp, err := c.doRequest(ctx, http.MethodDelete, path, nil, nil) + if err != nil { + return err + } + + var response struct { + Success bool `json:"success"` + Message string `json:"message,omitempty"` + } + + return parseResponse(resp, &response) +} diff --git a/client/client.go b/client/client.go new file mode 100644 index 0000000..c92a276 --- /dev/null +++ b/client/client.go @@ -0,0 +1,104 @@ +// Package client provides the implementation for interacting with the WeKnora API +// This package encapsulates CRUD operations for server resources and provides a friendly interface for callers +package client + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" +) + +// Client is the client for interacting with the WeKnora service +type Client struct { + baseURL string + httpClient *http.Client + token string +} + +// ClientOption defines client configuration options +type ClientOption func(*Client) + +// WithTimeout sets the HTTP client timeout +func WithTimeout(timeout time.Duration) ClientOption { + return func(c *Client) { + c.httpClient.Timeout = timeout + } +} + +// WithToken sets the authentication token +func WithToken(token string) ClientOption { + return func(c *Client) { + c.token = token + } +} + +// NewClient creates a new client instance +func NewClient(baseURL string, options ...ClientOption) *Client { + client := &Client{ + baseURL: baseURL, + httpClient: &http.Client{ + Timeout: 30 * time.Second, + }, + } + + for _, option := range options { + option(client) + } + + return client +} + +// doRequest executes an HTTP request +func (c *Client) doRequest(ctx context.Context, + method, path string, body interface{}, query url.Values, +) (*http.Response, error) { + var reqBody io.Reader + if body != nil { + jsonData, err := json.Marshal(body) + if err != nil { + return nil, fmt.Errorf("failed to serialize request body: %w", err) + } + reqBody = bytes.NewBuffer(jsonData) + } + + url := fmt.Sprintf("%s%s", c.baseURL, path) + if len(query) > 0 { + url = fmt.Sprintf("%s?%s", url, query.Encode()) + } + + req, err := http.NewRequestWithContext(ctx, method, url, reqBody) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + if c.token != "" { + req.Header.Set("X-API-Key", c.token) + } + if requestID := ctx.Value("RequestID"); requestID != nil { + req.Header.Set("X-Request-ID", requestID.(string)) + } + + return c.httpClient.Do(req) +} + +// parseResponse parses an HTTP response +func parseResponse(resp *http.Response, target interface{}) error { + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("HTTP error %d: %s", resp.StatusCode, string(body)) + } + + if target == nil { + return nil + } + + return json.NewDecoder(resp.Body).Decode(target) +} diff --git a/client/evaluation.go b/client/evaluation.go new file mode 100644 index 0000000..bb312f1 --- /dev/null +++ b/client/evaluation.go @@ -0,0 +1,113 @@ +// Package client provides the implementation for interacting with the WeKnora API +// The Evaluation related interfaces are used for starting and retrieving model evaluation task results +// Evaluation tasks can be used to measure model performance and +// compare different embedding models, chat models, and reranking models +package client + +import ( + "context" + "net/http" + "net/url" +) + +// EvaluationTask represents an evaluation task +// Contains basic information about a model evaluation task +type EvaluationTask struct { + ID string `json:"id"` // Task unique identifier + Status string `json:"status"` // Task status: pending, running, completed, failed + Progress int `json:"progress"` // Task progress, integer value 0-100 + DatasetID string `json:"dataset_id"` // Evaluation dataset ID + EmbeddingID string `json:"embedding_id"` // Embedding model ID + ChatID string `json:"chat_id"` // Chat model ID + RerankID string `json:"rerank_id"` // Reranking model ID + CreatedAt string `json:"created_at"` // Task creation time + CompleteAt string `json:"complete_at"` // Task completion time + ErrorMsg string `json:"error_msg"` // Error message, has value when task fails +} + +// EvaluationResult represents the evaluation results +// Contains detailed evaluation result information +type EvaluationResult struct { + TaskID string `json:"task_id"` // Associated task ID + Status string `json:"status"` // Task status + Progress int `json:"progress"` // Task progress + TotalQueries int `json:"total_queries"` // Total number of queries + TotalSamples int `json:"total_samples"` // Total number of samples + Metrics map[string]float64 `json:"metrics"` // Evaluation metrics collection + QueriesStat []map[string]interface{} `json:"queries_stat"` // Statistics for each query + CreatedAt string `json:"created_at"` // Creation time + CompleteAt string `json:"complete_at"` // Completion time + ErrorMsg string `json:"error_msg"` // Error message +} + +// EvaluationRequest represents an evaluation request +// Parameters used to start a new evaluation task +type EvaluationRequest struct { + DatasetID string `json:"dataset_id"` // Dataset ID to evaluate + EmbeddingModelID string `json:"embedding_id"` // Embedding model ID + ChatModelID string `json:"chat_id"` // Chat model ID + RerankModelID string `json:"rerank_id"` // Reranking model ID +} + +// EvaluationTaskResponse represents an evaluation task response +// API response structure for evaluation tasks +type EvaluationTaskResponse struct { + Success bool `json:"success"` // Whether operation was successful + Data EvaluationTask `json:"data"` // Evaluation task data +} + +// EvaluationResultResponse represents an evaluation result response +// API response structure for evaluation results +type EvaluationResultResponse struct { + Success bool `json:"success"` // Whether operation was successful + Data EvaluationResult `json:"data"` // Evaluation result data +} + +// StartEvaluation starts an evaluation task +// Creates and starts a new evaluation task based on provided parameters +// Parameters: +// - ctx: Context, used for passing request context information such as deadline, cancellation signals, etc. +// - request: Evaluation request parameters, including dataset ID and model IDs +// +// Returns: +// - *EvaluationTask: Created evaluation task information +// - error: Error information if the request fails +func (c *Client) StartEvaluation(ctx context.Context, request *EvaluationRequest) (*EvaluationTask, error) { + resp, err := c.doRequest(ctx, http.MethodPost, "/api/v1/evaluation", request, nil) + if err != nil { + return nil, err + } + + var response EvaluationTaskResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// GetEvaluationResult retrieves evaluation results +// Retrieves detailed results for an evaluation task by task ID +// Parameters: +// - ctx: Context, used for passing request context information +// - taskID: Evaluation task ID, used to identify the specific evaluation task to query +// +// Returns: +// - *EvaluationResult: Detailed evaluation task results +// - error: Error information if the request fails +func (c *Client) GetEvaluationResult(ctx context.Context, taskID string) (*EvaluationResult, error) { + queryParams := url.Values{} + queryParams.Add("task_id", taskID) + + resp, err := c.doRequest(ctx, http.MethodGet, "/api/v1/evaluation", nil, queryParams) + if err != nil { + return nil, err + } + + var response EvaluationResultResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} diff --git a/client/example.go b/client/example.go new file mode 100644 index 0000000..cda4fa5 --- /dev/null +++ b/client/example.go @@ -0,0 +1,271 @@ +package client + +import ( + "context" + "fmt" + "os" + "strings" + "time" +) + +// ExampleUsage demonstrates the complete usage flow of the WeKnora client, including: +// - Creating a client instance +// - Creating a knowledge base +// - Uploading knowledge files +// - Creating a session +// - Performing question-answering +// - Using streaming question-answering +// - Managing models +// - Managing knowledge chunks +// - Getting session messages +// - Cleaning up resources +func ExampleUsage() { + // Create a client instance + apiClient := NewClient( + "http://localhost:8080", + WithToken("your-auth-token"), + WithTimeout(30*time.Second), + ) + + // 1. Create a knowledge base + fmt.Println("1. Creating knowledge base...") + kb := &KnowledgeBase{ + Name: "Test Knowledge Base", + Description: "This is a test knowledge base", + ChunkingConfig: ChunkingConfig{ + ChunkSize: 500, + ChunkOverlap: 50, + Separators: []string{"\n\n", "\n", ". ", "? ", "! "}, + }, + ImageProcessingConfig: ImageProcessingConfig{ + ModelID: "image_model_id", + }, + EmbeddingModelID: "embedding_model_id", + SummaryModelID: "summary_model_id", + } + + createdKB, err := apiClient.CreateKnowledgeBase(context.Background(), kb) + if err != nil { + fmt.Printf("Failed to create knowledge base: %v\n", err) + return + } + fmt.Printf("Knowledge base created successfully: ID=%s, Name=%s\n", createdKB.ID, createdKB.Name) + + // 2. Upload knowledge file + fmt.Println("\n2. Uploading knowledge file...") + filePath := "path/to/sample.pdf" // Sample file path + + // Check if file exists before uploading + if _, err := os.Stat(filePath); os.IsNotExist(err) { + fmt.Printf("File does not exist: %s, skipping upload step\n", filePath) + } else { + // Add metadata + metadata := map[string]string{ + "source": "local", + "type": "document", + } + knowledge, err := apiClient.CreateKnowledgeFromFile(context.Background(), createdKB.ID, filePath, metadata) + if err != nil { + fmt.Printf("Failed to upload knowledge file: %v\n", err) + } else { + fmt.Printf("File uploaded successfully: Knowledge ID=%s, Title=%s\n", knowledge.ID, knowledge.Title) + } + } + + // Create text knowledge (alternative to file upload) + // Note: This is just an example, the client package may not support creating text knowledge directly + // In actual use, refer to the methods provided in client.knowledge.go + fmt.Println("\nCreating text knowledge (example)") + fmt.Println("Title: Test Text Knowledge") + fmt.Println("Description: Test knowledge created from text") + + // 3. Create a model + fmt.Println("\n3. Creating model...") + modelRequest := &CreateModelRequest{ + Name: "Test Model", + Type: ModelTypeChat, + Source: ModelSourceInternal, + Description: "This is a test model", + Parameters: ModelParameters{ + "temperature": 0.7, + "top_p": 0.9, + }, + IsDefault: true, + } + + model, err := apiClient.CreateModel(context.Background(), modelRequest) + if err != nil { + fmt.Printf("Failed to create model: %v\n", err) + } else { + fmt.Printf("Model created successfully: ID=%s, Name=%s\n", model.ID, model.Name) + } + + // List all models + models, err := apiClient.ListModels(context.Background()) + if err != nil { + fmt.Printf("Failed to get model list: %v\n", err) + } else { + fmt.Printf("System has %d models\n", len(models)) + } + + // 4. Create a session + fmt.Println("\n4. Creating session...") + sessionRequest := &CreateSessionRequest{ + KnowledgeBaseID: createdKB.ID, + SessionStrategy: &SessionStrategy{ + MaxRounds: 10, + EnableRewrite: true, + FallbackStrategy: "fixed_answer", + FallbackResponse: "Sorry, I cannot answer this question", + EmbeddingTopK: 5, + KeywordThreshold: 0.5, + VectorThreshold: 0.7, + RerankModelID: "rerank_model_id", + RerankTopK: 3, + RerankThreshold: 0.8, + SummaryModelID: "summary_model_id", + SummaryParameters: &SummaryConfig{ + Temperature: 0.7, + TopP: 0.9, + MaxTokens: 100, + }, + }, + } + + session, err := apiClient.CreateSession(context.Background(), sessionRequest) + if err != nil { + fmt.Printf("Failed to create session: %v\n", err) + return + } + fmt.Printf("Session created successfully: ID=%s\n", session.ID) + + // 5. Perform knowledge Q&A (using streaming API) + fmt.Println("\n5. Performing knowledge Q&A...") + question := "What is artificial intelligence?" + fmt.Printf("Question: %s\nAnswer: ", question) + + // Use streaming API for Q&A (Note: Client may only provide streaming Q&A API) + var answer strings.Builder + var references []*SearchResult + + err = apiClient.KnowledgeQAStream(context.Background(), + session.ID, + question, + func(response *StreamResponse) error { + if response.ResponseType == ResponseTypeAnswer { + answer.WriteString(response.Content) + } + + if response.Done && len(response.KnowledgeReferences) > 0 { + references = response.KnowledgeReferences + } + return nil + }) + + if err != nil { + fmt.Printf("Q&A failed: %v\n", err) + } else { + fmt.Printf("%s\n", answer.String()) + if len(references) > 0 { + fmt.Println("References:") + for i, ref := range references { + fmt.Printf("%d. %s\n", i+1, ref.Content[:min(50, len(ref.Content))]+"...") + } + } + } + + // 6. Perform another streaming Q&A + fmt.Println("\n6. Performing streaming Q&A...") + streamQuestion := "What is machine learning?" + fmt.Printf("Question: %s\nAnswer: ", streamQuestion) + + err = apiClient.KnowledgeQAStream(context.Background(), + session.ID, + streamQuestion, + func(response *StreamResponse) error { + fmt.Print(response.Content) + return nil + }, + ) + if err != nil { + fmt.Printf("\nStreaming Q&A failed: %v\n", err) + } + fmt.Println() // Line break + + // 7. Get session messages + fmt.Println("\n7. Getting session messages...") + messages, err := apiClient.GetRecentMessages(context.Background(), session.ID, 10) + if err != nil { + fmt.Printf("Failed to get session messages: %v\n", err) + } else { + fmt.Printf("Retrieved %d recent messages:\n", len(messages)) + for i, msg := range messages { + fmt.Printf("%d. Role: %s, Content: %s\n", i+1, msg.Role, msg.Content[:min(30, len(msg.Content))]+"...") + } + } + + // 8. Manage knowledge chunks + // Assume we have uploaded knowledge and have a knowledge ID + knowledgeID := "knowledge_id_example" // In actual use, use a real knowledge ID + + fmt.Println("\n8. Managing knowledge chunks...") + chunks, total, err := apiClient.ListKnowledgeChunks(context.Background(), knowledgeID, 1, 10) + if err != nil { + fmt.Printf("Failed to get knowledge chunks: %v\n", err) + } else { + fmt.Printf("Knowledge has %d chunks, retrieved %d chunks\n", total, len(chunks)) + + if len(chunks) > 0 { + // Update the first chunk + chunkID := chunks[0].ID + updateRequest := &UpdateChunkRequest{ + Content: "Updated chunk content - " + chunks[0].Content, + IsEnabled: true, + } + + updatedChunk, err := apiClient.UpdateChunk(context.Background(), knowledgeID, chunkID, updateRequest) + if err != nil { + fmt.Printf("Failed to update chunk: %v\n", err) + } else { + fmt.Printf("Chunk updated successfully: ID=%s\n", updatedChunk.ID) + } + } + } + + // 10. Clean up resources (optional, in actual use, keep or delete as needed) + fmt.Println("\n10. Cleaning up resources...") + if session != nil { + if err := apiClient.DeleteSession(context.Background(), session.ID); err != nil { + fmt.Printf("Failed to delete session: %v\n", err) + } else { + fmt.Println("Session deleted") + } + } + + // Delete knowledge (assuming we have a valid knowledge ID) + if knowledgeID != "" { + if err := apiClient.DeleteKnowledge(context.Background(), knowledgeID); err != nil { + fmt.Printf("Failed to delete knowledge: %v\n", err) + } else { + fmt.Println("Knowledge deleted") + } + } + + if createdKB != nil { + if err := apiClient.DeleteKnowledgeBase(context.Background(), createdKB.ID); err != nil { + fmt.Printf("Failed to delete knowledge base: %v\n", err) + } else { + fmt.Println("Knowledge base deleted") + } + } + + fmt.Println("\nExample completed") +} + +// min returns the smaller of two integers +func min(a, b int) int { + if a < b { + return a + } + return b +} diff --git a/client/go.mod b/client/go.mod new file mode 100644 index 0000000..1a20c4b --- /dev/null +++ b/client/go.mod @@ -0,0 +1,3 @@ +module github.com/Tencent/WeKnora/client + +go 1.24.2 diff --git a/client/go.sum b/client/go.sum new file mode 100644 index 0000000..e69de29 diff --git a/client/knowledge.go b/client/knowledge.go new file mode 100644 index 0000000..04e14fb --- /dev/null +++ b/client/knowledge.go @@ -0,0 +1,339 @@ +// Package client provides the implementation for interacting with the WeKnora API +// The Knowledge related interfaces are used to manage knowledge entries in the knowledge base +// Knowledge entries can be created from local files, web URLs, or directly from text content +// They can also be retrieved, deleted, and downloaded as files +package client + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "mime/multipart" + "net/http" + "net/url" + "os" + "strconv" + "time" +) + +// Knowledge represents knowledge information +type Knowledge struct { + ID string `json:"id"` + TenantID uint `json:"tenant_id"` + KnowledgeBaseID string `json:"knowledge_base_id"` + Type string `json:"type"` + Title string `json:"title"` + Description string `json:"description"` + Source string `json:"source"` + ParseStatus string `json:"parse_status"` + EnableStatus string `json:"enable_status"` + EmbeddingModelID string `json:"embedding_model_id"` + FileName string `json:"file_name"` + FileType string `json:"file_type"` + FileSize int64 `json:"file_size"` + FilePath string `json:"file_path"` + Metadata map[string]string `json:"metadata"` // Extensible metadata for storing machine information, paths, etc. + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + ProcessedAt *time.Time `json:"processed_at"` + ErrorMessage string `json:"error_message"` +} + +// KnowledgeResponse represents the API response containing a single knowledge entry +type KnowledgeResponse struct { + Success bool `json:"success"` + Data Knowledge `json:"data"` + Code string `json:"code"` + Message string `json:"message"` +} + +// KnowledgeListResponse represents the API response containing a list of knowledge entries with pagination +type KnowledgeListResponse struct { + Success bool `json:"success"` + Data []Knowledge `json:"data"` + Total int64 `json:"total"` + Page int `json:"page"` + PageSize int `json:"page_size"` +} + +// KnowledgeBatchResponse represents the API response for batch knowledge retrieval +type KnowledgeBatchResponse struct { + Success bool `json:"success"` + Data []Knowledge `json:"data"` +} + +// UpdateImageInfoRequest represents the request structure for updating a chunk +// Used for requesting chunk information updates +type UpdateImageInfoRequest struct { + ImageInfo string `json:"image_info"` // Image information in JSON format +} + +// ErrDuplicateFile is returned when attempting to create a knowledge entry with a file that already exists +var ErrDuplicateFile = errors.New("file already exists") + +// CreateKnowledgeFromFile creates a knowledge entry from a local file path +func (c *Client) CreateKnowledgeFromFile(ctx context.Context, + knowledgeBaseID string, filePath string, metadata map[string]string, +) (*Knowledge, error) { + // Open the local file + file, err := os.Open(filePath) + if err != nil { + return nil, fmt.Errorf("failed to open file: %w", err) + } + defer file.Close() + + // Get file information + fileInfo, err := file.Stat() + if err != nil { + return nil, fmt.Errorf("failed to get file information: %w", err) + } + + // Create the HTTP request + path := fmt.Sprintf("/api/v1/knowledge-bases/%s/knowledge/file", knowledgeBaseID) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+path, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + // Create a multipart form writer + body := &bytes.Buffer{} + writer := multipart.NewWriter(body) + part, err := writer.CreateFormFile("file", fileInfo.Name()) + if err != nil { + return nil, fmt.Errorf("failed to create form file: %w", err) + } + + // Copy file contents + _, err = io.Copy(part, file) + if err != nil { + return nil, fmt.Errorf("failed to copy file content: %w", err) + } + + // Add metadata to the request if provided + if metadata != nil { + metadataBytes, err := json.Marshal(metadata) + if err != nil { + return nil, fmt.Errorf("failed to serialize metadata: %w", err) + } + if err := writer.WriteField("metadata", string(metadataBytes)); err != nil { + return nil, fmt.Errorf("failed to write metadata field: %w", err) + } + } + + // Close the multipart writer + err = writer.Close() + if err != nil { + return nil, fmt.Errorf("failed to close writer: %w", err) + } + + // Set request headers + req.Header.Set("Content-Type", writer.FormDataContentType()) + if c.token != "" { + req.Header.Set("X-API-Key", c.token) + } + if requestID := ctx.Value("RequestID"); requestID != nil { + req.Header.Set("X-Request-ID", requestID.(string)) + } + + // Set the request body + req.Body = io.NopCloser(body) + + // Send the request + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + // Parse the response + var response KnowledgeResponse + if resp.StatusCode == http.StatusConflict { + if err := json.NewDecoder(resp.Body).Decode(&response); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + return &response.Data, ErrDuplicateFile + } else if err := parseResponse(resp, &response); err != nil { + return nil, err + } + return &response.Data, nil +} + +// CreateKnowledgeFromURL creates a knowledge entry from a web URL +func (c *Client) CreateKnowledgeFromURL(ctx context.Context, knowledgeBaseID string, url string) (*Knowledge, error) { + path := fmt.Sprintf("/api/v1/knowledge-bases/%s/knowledge/url", knowledgeBaseID) + + reqBody := struct { + URL string `json:"url"` + }{ + URL: url, + } + + resp, err := c.doRequest(ctx, http.MethodPost, path, reqBody, nil) + if err != nil { + return nil, err + } + + var response KnowledgeResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// GetKnowledge retrieves a knowledge entry by its ID +func (c *Client) GetKnowledge(ctx context.Context, knowledgeID string) (*Knowledge, error) { + path := fmt.Sprintf("/api/v1/knowledge/%s", knowledgeID) + resp, err := c.doRequest(ctx, http.MethodGet, path, nil, nil) + if err != nil { + return nil, err + } + + var response KnowledgeResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// GetKnowledgeBatch retrieves multiple knowledge entries by their IDs +func (c *Client) GetKnowledgeBatch(ctx context.Context, knowledgeIDs []string) ([]Knowledge, error) { + path := "/api/v1/knowledge/batch" + + queryParams := url.Values{} + for _, id := range knowledgeIDs { + queryParams.Add("ids", id) + } + + resp, err := c.doRequest(ctx, http.MethodGet, path, nil, queryParams) + if err != nil { + return nil, err + } + + var response KnowledgeBatchResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return response.Data, nil +} + +// ListKnowledge lists knowledge entries in a knowledge base with pagination +func (c *Client) ListKnowledge(ctx context.Context, + knowledgeBaseID string, + page int, + pageSize int, +) ([]Knowledge, int64, error) { + path := fmt.Sprintf("/api/v1/knowledge-bases/%s/knowledge", knowledgeBaseID) + + queryParams := url.Values{} + queryParams.Add("page", strconv.Itoa(page)) + queryParams.Add("page_size", strconv.Itoa(pageSize)) + + resp, err := c.doRequest(ctx, http.MethodGet, path, nil, queryParams) + if err != nil { + return nil, 0, err + } + + var response KnowledgeListResponse + if err := parseResponse(resp, &response); err != nil { + return nil, 0, err + } + + return response.Data, response.Total, nil +} + +// DeleteKnowledge deletes a knowledge entry by its ID +func (c *Client) DeleteKnowledge(ctx context.Context, knowledgeID string) error { + path := fmt.Sprintf("/api/v1/knowledge/%s", knowledgeID) + resp, err := c.doRequest(ctx, http.MethodDelete, path, nil, nil) + if err != nil { + return err + } + + var response struct { + Success bool `json:"success"` + Message string `json:"message,omitempty"` + } + + return parseResponse(resp, &response) +} + +// DownloadKnowledgeFile downloads a knowledge file to the specified local path +func (c *Client) DownloadKnowledgeFile(ctx context.Context, knowledgeID string, destPath string) error { + path := fmt.Sprintf("/api/v1/knowledge/%s/download", knowledgeID) + resp, err := c.doRequest(ctx, http.MethodGet, path, nil, nil) + if err != nil { + return err + } + defer resp.Body.Close() + + // Check for HTTP errors + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("HTTP error %d: %s", resp.StatusCode, string(body)) + } + + // Create destination file + out, err := os.Create(destPath) + if err != nil { + return fmt.Errorf("failed to create file: %w", err) + } + defer out.Close() + + // Copy response body to file + _, err = io.Copy(out, resp.Body) + if err != nil { + return fmt.Errorf("failed to write file: %w", err) + } + + return nil +} + +func (c *Client) UpdateKnowledge(ctx context.Context, knowledge *Knowledge) error { + path := fmt.Sprintf("/api/v1/knowledge/%s", knowledge.ID) + + resp, err := c.doRequest(ctx, http.MethodPut, path, knowledge, nil) + if err != nil { + return err + } + + var response struct { + Success bool `json:"success"` + Message string `json:"message,omitempty"` + } + + return parseResponse(resp, &response) +} + +// UpdateChunk updates a chunk's information +// Updates information for a specific chunk under a knowledge document +// Parameters: +// - ctx: Context +// - knowledgeID: Knowledge ID +// - chunkID: Chunk ID +// - request: Update request +// +// Returns: +// - *Chunk: Updated chunk +// - error: Error information +func (c *Client) UpdateImageInfo(ctx context.Context, + knowledgeID string, chunkID string, request *UpdateImageInfoRequest, +) error { + path := fmt.Sprintf("/api/v1/knowledge/image/%s/%s", knowledgeID, chunkID) + resp, err := c.doRequest(ctx, http.MethodPut, path, request, nil) + if err != nil { + return err + } + + var response struct { + Success bool `json:"success"` + Message string `json:"message,omitempty"` + } + + return parseResponse(resp, &response) +} diff --git a/client/knowledgebase.go b/client/knowledgebase.go new file mode 100644 index 0000000..63aa921 --- /dev/null +++ b/client/knowledgebase.go @@ -0,0 +1,211 @@ +// Package client provides the implementation for interacting with the WeKnora API +// The KnowledgeBase related interfaces are used to manage knowledge bases +// Knowledge bases are collections of knowledge entries that can be used for question-answering +// They can also be searched and queried using hybrid search +package client + +import ( + "context" + "fmt" + "net/http" + "net/url" + "time" +) + +// KnowledgeBase represents a knowledge base +type KnowledgeBase struct { + ID string `json:"id"` + Name string `json:"name"` // Name must be unique within the same tenant + Description string `json:"description"` + TenantID uint `json:"tenant_id"` // Changed to uint type + ChunkingConfig ChunkingConfig `json:"chunking_config"` + ImageProcessingConfig ImageProcessingConfig `json:"image_processing_config"` + EmbeddingModelID string `json:"embedding_model_id"` + SummaryModelID string `json:"summary_model_id"` // Summary model ID + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +// KnowledgeBaseConfig represents knowledge base configuration +type KnowledgeBaseConfig struct { + ChunkingConfig ChunkingConfig `json:"chunking_config"` + ImageProcessingConfig ImageProcessingConfig `json:"image_processing_config"` +} + +// ChunkingConfig represents document chunking configuration +type ChunkingConfig struct { + ChunkSize int `json:"chunk_size"` // Chunk size + ChunkOverlap int `json:"chunk_overlap"` // Overlap size + Separators []string `json:"separators"` // Separators + EnableMultimodal bool `json:"enable_multimodal"` // Whether to enable multimodal processing +} + +// ImageProcessingConfig represents image processing configuration +type ImageProcessingConfig struct { + ModelID string `json:"model_id"` // Multimodal model ID +} + +// KnowledgeBaseResponse knowledge base response +type KnowledgeBaseResponse struct { + Success bool `json:"success"` + Data KnowledgeBase `json:"data"` +} + +// KnowledgeBaseListResponse knowledge base list response +type KnowledgeBaseListResponse struct { + Success bool `json:"success"` + Data []KnowledgeBase `json:"data"` +} + +// SearchResult represents search result +type SearchResult struct { + ID string `json:"id"` + Content string `json:"content"` + KnowledgeID string `json:"knowledge_id"` + ChunkIndex int `json:"chunk_index"` + KnowledgeTitle string `json:"knowledge_title"` + StartAt int `json:"start_at"` + EndAt int `json:"end_at"` + Seq int `json:"seq"` + Score float64 `json:"score"` + ChunkType string `json:"chunk_type"` + ImageInfo string `json:"image_info"` + Metadata map[string]string `json:"metadata"` + KnowledgeFilename string `json:"knowledge_filename"` + KnowledgeSource string `json:"knowledge_source"` +} + +// HybridSearchResponse hybrid search response +type HybridSearchResponse struct { + Success bool `json:"success"` + Data []*SearchResult `json:"data"` +} + +type CopyKnowledgeBaseRequest struct { + SourceID string `json:"source_id"` + TargetID string `json:"target_id"` +} + +// CreateKnowledgeBase creates a knowledge base +func (c *Client) CreateKnowledgeBase(ctx context.Context, knowledgeBase *KnowledgeBase) (*KnowledgeBase, error) { + resp, err := c.doRequest(ctx, http.MethodPost, "/api/v1/knowledge-bases", knowledgeBase, nil) + if err != nil { + return nil, err + } + + var response KnowledgeBaseResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// GetKnowledgeBase gets a knowledge base +func (c *Client) GetKnowledgeBase(ctx context.Context, knowledgeBaseID string) (*KnowledgeBase, error) { + path := fmt.Sprintf("/api/v1/knowledge-bases/%s", knowledgeBaseID) + resp, err := c.doRequest(ctx, http.MethodGet, path, nil, nil) + if err != nil { + return nil, err + } + + var response KnowledgeBaseResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// ListKnowledgeBases lists knowledge bases +func (c *Client) ListKnowledgeBases(ctx context.Context) ([]KnowledgeBase, error) { + resp, err := c.doRequest(ctx, http.MethodGet, "/api/v1/knowledge-bases", nil, nil) + if err != nil { + return nil, err + } + + var response KnowledgeBaseListResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return response.Data, nil +} + +// UpdateKnowledgeBaseRequest update knowledge base request +type UpdateKnowledgeBaseRequest struct { + Name string `json:"name"` + Description string `json:"description"` + Config *KnowledgeBaseConfig `json:"config"` +} + +// UpdateKnowledgeBase updates a knowledge base +func (c *Client) UpdateKnowledgeBase(ctx context.Context, + knowledgeBaseID string, + request *UpdateKnowledgeBaseRequest, +) (*KnowledgeBase, error) { + path := fmt.Sprintf("/api/v1/knowledge-bases/%s", knowledgeBaseID) + resp, err := c.doRequest(ctx, http.MethodPut, path, request, nil) + if err != nil { + return nil, err + } + + var response KnowledgeBaseResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// DeleteKnowledgeBase deletes a knowledge base +func (c *Client) DeleteKnowledgeBase(ctx context.Context, knowledgeBaseID string) error { + path := fmt.Sprintf("/api/v1/knowledge-bases/%s", knowledgeBaseID) + resp, err := c.doRequest(ctx, http.MethodDelete, path, nil, nil) + if err != nil { + return err + } + + var response struct { + Success bool `json:"success"` + Message string `json:"message,omitempty"` + } + + return parseResponse(resp, &response) +} + +// HybridSearch performs hybrid search +func (c *Client) HybridSearch(ctx context.Context, knowledgeBaseID string, query string) ([]*SearchResult, error) { + path := fmt.Sprintf("/api/v1/knowledge-bases/%s/hybrid-search", knowledgeBaseID) + + queryParams := url.Values{} + queryParams.Add("query", query) + + resp, err := c.doRequest(ctx, http.MethodGet, path, nil, queryParams) + if err != nil { + return nil, err + } + + var response HybridSearchResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return response.Data, nil +} + +func (c *Client) CopyKnowledgeBase(ctx context.Context, request *CopyKnowledgeBaseRequest) error { + path := "/api/v1/knowledge-bases/copy" + + resp, err := c.doRequest(ctx, http.MethodPost, path, request, nil) + if err != nil { + return err + } + + var response struct { + Success bool `json:"success"` + Message string `json:"message,omitempty"` + } + + return parseResponse(resp, &response) +} diff --git a/client/message.go b/client/message.go new file mode 100644 index 0000000..28ae55a --- /dev/null +++ b/client/message.go @@ -0,0 +1,82 @@ +// Package client provides the implementation for interacting with the WeKnora API +// The Message related interfaces are used to manage messages in a session +// Messages can be created, retrieved, deleted, and queried +package client + +import ( + "context" + "fmt" + "net/http" + "net/url" + "strconv" + "time" +) + +// Message message information +type Message struct { + ID string `json:"id"` + SessionID string `json:"session_id"` + RequestID string `json:"request_id"` + Content string `json:"content"` + Role string `json:"role"` + KnowledgeReferences []*SearchResult `json:"knowledge_references" ` + IsCompleted bool `json:"is_completed"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +// MessageListResponse message list response +type MessageListResponse struct { + Success bool `json:"success"` + Data []Message `json:"data"` +} + +// LoadMessages loads session messages, supports pagination and time filtering +func (c *Client) LoadMessages(ctx context.Context, sessionID string, limit int, beforeTime *time.Time) ([]Message, error) { + path := fmt.Sprintf("/api/v1/messages/%s/load", sessionID) + + queryParams := url.Values{} + queryParams.Add("limit", strconv.Itoa(limit)) + + if beforeTime != nil { + queryParams.Add("before_time", beforeTime.Format(time.RFC3339Nano)) + } + + resp, err := c.doRequest(ctx, http.MethodGet, path, nil, queryParams) + if err != nil { + return nil, err + } + + var response MessageListResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return response.Data, nil +} + +// GetRecentMessages gets recent messages from a session +func (c *Client) GetRecentMessages(ctx context.Context, sessionID string, limit int) ([]Message, error) { + return c.LoadMessages(ctx, sessionID, limit, nil) +} + +// GetMessagesBefore gets messages before a specified time +func (c *Client) GetMessagesBefore(ctx context.Context, sessionID string, beforeTime time.Time, limit int) ([]Message, error) { + return c.LoadMessages(ctx, sessionID, limit, &beforeTime) +} + +// DeleteMessage deletes a message +func (c *Client) DeleteMessage(ctx context.Context, sessionID string, messageID string) error { + path := fmt.Sprintf("/api/v1/messages/%s/%s", sessionID, messageID) + resp, err := c.doRequest(ctx, http.MethodDelete, path, nil, nil) + if err != nil { + return err + } + + var response struct { + Success bool `json:"success"` + Message string `json:"message,omitempty"` + } + + return parseResponse(resp, &response) +} diff --git a/client/model.go b/client/model.go new file mode 100644 index 0000000..ecde9f6 --- /dev/null +++ b/client/model.go @@ -0,0 +1,155 @@ +// Package client provides the implementation for interacting with the WeKnora API +// The Model related interfaces are used to manage models for different tasks +// Models can be created, retrieved, updated, deleted, and queried +package client + +import ( + "context" + "fmt" + "net/http" +) + +// ModelType model type +type ModelType string + +// ModelSource model source +type ModelSource string + +// ModelParameters model parameters +type ModelParameters map[string]interface{} + +// Model model information +type Model struct { + ID string `json:"id"` + TenantID uint `json:"tenant_id"` + Name string `json:"name"` + Type ModelType `json:"type"` + Source ModelSource `json:"source"` + Description string `json:"description"` + Parameters ModelParameters `json:"parameters"` + IsDefault bool `json:"is_default"` + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` +} + +// CreateModelRequest model creation request +type CreateModelRequest struct { + Name string `json:"name"` + Type ModelType `json:"type"` + Source ModelSource `json:"source"` + Description string `json:"description"` + Parameters ModelParameters `json:"parameters"` + IsDefault bool `json:"is_default"` +} + +// UpdateModelRequest model update request +type UpdateModelRequest struct { + Name string `json:"name"` + Description string `json:"description"` + Parameters ModelParameters `json:"parameters"` + IsDefault bool `json:"is_default"` +} + +// ModelResponse model response +type ModelResponse struct { + Success bool `json:"success"` + Data Model `json:"data"` +} + +// ModelListResponse model list response +type ModelListResponse struct { + Success bool `json:"success"` + Data []Model `json:"data"` +} + +// Model type constants +const ( + ModelTypeEmbedding ModelType = "embedding" + ModelTypeChat ModelType = "chat" + ModelTypeRerank ModelType = "rerank" + ModelTypeSummary ModelType = "summary" +) + +// Model source constants +const ( + ModelSourceInternal ModelSource = "internal" + ModelSourceExternal ModelSource = "external" +) + +// CreateModel creates a model +func (c *Client) CreateModel(ctx context.Context, request *CreateModelRequest) (*Model, error) { + resp, err := c.doRequest(ctx, http.MethodPost, "/api/v1/models", request, nil) + if err != nil { + return nil, err + } + + var response ModelResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// GetModel gets a model +func (c *Client) GetModel(ctx context.Context, modelID string) (*Model, error) { + path := fmt.Sprintf("/api/v1/models/%s", modelID) + resp, err := c.doRequest(ctx, http.MethodGet, path, nil, nil) + if err != nil { + return nil, err + } + + var response ModelResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// ListModels lists all models +func (c *Client) ListModels(ctx context.Context) ([]Model, error) { + resp, err := c.doRequest(ctx, http.MethodGet, "/api/v1/models", nil, nil) + if err != nil { + return nil, err + } + + var response ModelListResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return response.Data, nil +} + +// UpdateModel updates a model +func (c *Client) UpdateModel(ctx context.Context, modelID string, request *UpdateModelRequest) (*Model, error) { + path := fmt.Sprintf("/api/v1/models/%s", modelID) + resp, err := c.doRequest(ctx, http.MethodPut, path, request, nil) + if err != nil { + return nil, err + } + + var response ModelResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// DeleteModel deletes a model +func (c *Client) DeleteModel(ctx context.Context, modelID string) error { + path := fmt.Sprintf("/api/v1/models/%s", modelID) + resp, err := c.doRequest(ctx, http.MethodDelete, path, nil, nil) + if err != nil { + return err + } + + var response struct { + Success bool `json:"success"` + Message string `json:"message,omitempty"` + } + + return parseResponse(resp, &response) +} diff --git a/client/session.go b/client/session.go new file mode 100644 index 0000000..86b380d --- /dev/null +++ b/client/session.go @@ -0,0 +1,403 @@ +// Package client provides the implementation for interacting with the WeKnora API +// The Session related interfaces are used to manage sessions for question-answering +// Sessions can be created, retrieved, updated, deleted, and queried +// They can also be used to generate titles for sessions +package client + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strconv" + "strings" +) + +// SessionStrategy defines session strategy +type SessionStrategy struct { + MaxRounds int `json:"max_rounds"` // Maximum number of rounds to maintain + EnableRewrite bool `json:"enable_rewrite"` // Enable query rewrite + FallbackStrategy string `json:"fallback_strategy"` // Fallback strategy + FallbackResponse string `json:"fallback_response"` // Fixed fallback response content + EmbeddingTopK int `json:"embedding_top_k"` // Top K for vector retrieval + KeywordThreshold float64 `json:"keyword_threshold"` // Keyword retrieval threshold + VectorThreshold float64 `json:"vector_threshold"` // Vector retrieval threshold + RerankModelID string `json:"rerank_model_id"` // Rerank model ID + RerankTopK int `json:"rerank_top_k"` // Top K for reranking + RerankThreshold float64 `json:"reranking_threshold"` // Reranking threshold + SummaryModelID string `json:"summary_model_id"` // Summary model ID + SummaryParameters *SummaryConfig `json:"summary_parameters"` // Summary model parameters + NoMatchPrefix string `json:"no_match_prefix"` // Fallback response prefix +} + +// SummaryConfig defines summary configuration +type SummaryConfig struct { + MaxTokens int `json:"max_tokens"` + TopP float64 `json:"top_p"` + TopK int `json:"top_k"` + FrequencyPenalty float64 `json:"frequency_penalty"` + PresencePenalty float64 `json:"presence_penalty"` + RepeatPenalty float64 `json:"repeat_penalty"` + Prompt string `json:"prompt"` + ContextTemplate string `json:"context_template"` + NoMatchPrefix string `json:"no_match_prefix"` + Temperature float64 `json:"temperature"` + Seed int `json:"seed"` + MaxCompletionTokens int `json:"max_completion_tokens"` +} + +// CreateSessionRequest session creation request +type CreateSessionRequest struct { + KnowledgeBaseID string `json:"knowledge_base_id"` // Associated knowledge base ID + SessionStrategy *SessionStrategy `json:"session_strategy"` // Session strategy +} + +// Session session information +type Session struct { + ID string `json:"id"` + TenantID uint `json:"tenant_id"` + KnowledgeBaseID string `json:"knowledge_base_id"` + Title string `json:"title"` + MaxRounds int `json:"max_rounds"` + EnableRewrite bool `json:"enable_rewrite"` + FallbackStrategy string `json:"fallback_strategy"` + FallbackResponse string `json:"fallback_response"` + EmbeddingTopK int `json:"embedding_top_k"` + KeywordThreshold float64 `json:"keyword_threshold"` + VectorThreshold float64 `json:"vector_threshold"` + RerankModelID string `json:"rerank_model_id"` + RerankTopK int `json:"rerank_top_k"` + RerankThreshold float64 `json:"reranking_threshold"` // Reranking threshold + SummaryModelID string `json:"summary_model_id"` + SummaryParameters *SummaryConfig `json:"summary_parameters"` + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` +} + +// SessionResponse session response +type SessionResponse struct { + Success bool `json:"success"` + Data Session `json:"data"` +} + +// SessionListResponse session list response +type SessionListResponse struct { + Success bool `json:"success"` + Data []Session `json:"data"` + Total int `json:"total"` + Page int `json:"page"` + PageSize int `json:"page_size"` +} + +// CreateSession creates a session +func (c *Client) CreateSession(ctx context.Context, request *CreateSessionRequest) (*Session, error) { + resp, err := c.doRequest(ctx, http.MethodPost, "/api/v1/sessions", request, nil) + if err != nil { + return nil, err + } + + var response SessionResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// GetSession gets a session +func (c *Client) GetSession(ctx context.Context, sessionID string) (*Session, error) { + path := fmt.Sprintf("/api/v1/sessions/%s", sessionID) + resp, err := c.doRequest(ctx, http.MethodGet, path, nil, nil) + if err != nil { + return nil, err + } + + var response SessionResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// GetSessionsByTenant gets all sessions for a tenant +func (c *Client) GetSessionsByTenant(ctx context.Context, page int, pageSize int) ([]Session, int, error) { + queryParams := url.Values{} + queryParams.Add("page", strconv.Itoa(page)) + queryParams.Add("page_size", strconv.Itoa(pageSize)) + resp, err := c.doRequest(ctx, http.MethodGet, "/api/v1/sessions", nil, queryParams) + if err != nil { + return nil, 0, err + } + + var response SessionListResponse + if err := parseResponse(resp, &response); err != nil { + return nil, 0, err + } + + return response.Data, response.Total, nil +} + +// UpdateSession updates a session +func (c *Client) UpdateSession(ctx context.Context, sessionID string, request *CreateSessionRequest) (*Session, error) { + path := fmt.Sprintf("/api/v1/sessions/%s", sessionID) + resp, err := c.doRequest(ctx, http.MethodPut, path, request, nil) + if err != nil { + return nil, err + } + + var response SessionResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// DeleteSession deletes a session +func (c *Client) DeleteSession(ctx context.Context, sessionID string) error { + path := fmt.Sprintf("/api/v1/sessions/%s", sessionID) + resp, err := c.doRequest(ctx, http.MethodDelete, path, nil, nil) + if err != nil { + return err + } + + var response struct { + Success bool `json:"success"` + Message string `json:"message,omitempty"` + } + + return parseResponse(resp, &response) +} + +// GenerateTitleRequest title generation request +type GenerateTitleRequest struct { + Messages []Message `json:"messages"` +} + +// GenerateTitleResponse title generation response +type GenerateTitleResponse struct { + Success bool `json:"success"` + Data string `json:"data"` +} + +// GenerateTitle generates a session title +func (c *Client) GenerateTitle(ctx context.Context, sessionID string, request *GenerateTitleRequest) (string, error) { + path := fmt.Sprintf("/api/v1/sessions/%s/generate_title", sessionID) + resp, err := c.doRequest(ctx, http.MethodPost, path, request, nil) + if err != nil { + return "", err + } + + var response GenerateTitleResponse + if err := parseResponse(resp, &response); err != nil { + return "", err + } + + return response.Data, nil +} + +// KnowledgeQARequest knowledge Q&A request +type KnowledgeQARequest struct { + Query string `json:"query"` +} + +type ResponseType string + +const ( + ResponseTypeAnswer ResponseType = "answer" + ResponseTypeReferences ResponseType = "references" +) + +// StreamResponse streaming response +type StreamResponse struct { + ID string `json:"id"` // Unique identifier + ResponseType ResponseType `json:"response_type"` // Response type + Content string `json:"content"` // Current content fragment + Done bool `json:"done"` // Whether completed + KnowledgeReferences []*SearchResult `json:"knowledge_references"` // Knowledge references +} + +// KnowledgeQAStream knowledge Q&A streaming API +func (c *Client) KnowledgeQAStream(ctx context.Context, sessionID string, query string, callback func(*StreamResponse) error) error { + path := fmt.Sprintf("/api/v1/knowledge-chat/%s", sessionID) + fmt.Printf("Starting KnowledgeQAStream request, session ID: %s, query: %s\n", sessionID, query) + + request := &KnowledgeQARequest{ + Query: query, + } + + resp, err := c.doRequest(ctx, http.MethodPost, path, request, nil) + if err != nil { + fmt.Printf("Request failed: %v\n", err) + return err + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + body, _ := io.ReadAll(resp.Body) + err := fmt.Errorf("HTTP error %d: %s", resp.StatusCode, string(body)) + fmt.Printf("Request returned error status: %v\n", err) + return err + } + + fmt.Println("Successfully established SSE connection, processing data stream") + + // Use bufio to read SSE data line by line + scanner := bufio.NewScanner(resp.Body) + var dataBuffer string + var eventType string + messageCount := 0 + + for scanner.Scan() { + line := scanner.Text() + fmt.Printf("Received SSE line: %s\n", line) + + // Empty line indicates the end of an event + if line == "" { + if dataBuffer != "" { + fmt.Printf("Processing data: %s, event type: %s\n", dataBuffer, eventType) + var streamResponse StreamResponse + if err := json.Unmarshal([]byte(dataBuffer), &streamResponse); err != nil { + fmt.Printf("Failed to parse SSE data: %v\n", err) + return fmt.Errorf("failed to parse SSE data: %w", err) + } + + messageCount++ + fmt.Printf("Parsed message #%d, done status: %v\n", messageCount, streamResponse.Done) + + if err := callback(&streamResponse); err != nil { + fmt.Printf("Callback processing failed: %v\n", err) + return err + } + dataBuffer = "" + eventType = "" + } + continue + } + + // Process lines with event: prefix + if strings.HasPrefix(line, "event:") { + eventType = line[6:] // Remove "event:" prefix + fmt.Printf("Set event type: %s\n", eventType) + } + + // Process lines with data: prefix + if strings.HasPrefix(line, "data:") { + dataBuffer = line[5:] // Remove "data:" prefix + } + } + + if err := scanner.Err(); err != nil { + fmt.Printf("Failed to read SSE stream: %v\n", err) + return fmt.Errorf("failed to read SSE stream: %w", err) + } + + fmt.Printf("KnowledgeQAStream completed, processed %d messages\n", messageCount) + return nil +} + +// ContinueStream continues to receive an active stream for a session +func (c *Client) ContinueStream(ctx context.Context, sessionID string, messageID string, callback func(*StreamResponse) error) error { + path := fmt.Sprintf("/api/v1/sessions/continue-stream/%s", sessionID) + + queryParams := url.Values{} + queryParams.Add("message_id", messageID) + + resp, err := c.doRequest(ctx, http.MethodGet, path, nil, queryParams) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("HTTP error %d: %s", resp.StatusCode, string(body)) + } + + // Use bufio to read SSE data line by line + scanner := bufio.NewScanner(resp.Body) + var dataBuffer string + var eventType string + + for scanner.Scan() { + line := scanner.Text() + + // Empty line indicates the end of an event + if line == "" { + if dataBuffer != "" && eventType == "message" { + var streamResponse StreamResponse + if err := json.Unmarshal([]byte(dataBuffer), &streamResponse); err != nil { + return fmt.Errorf("failed to parse SSE data: %w", err) + } + + if err := callback(&streamResponse); err != nil { + return err + } + dataBuffer = "" + eventType = "" + } + continue + } + + // Process lines with event: prefix + if strings.HasPrefix(line, "event:") { + eventType = line[6:] // Remove "event:" prefix + } + + // Process lines with data: prefix + if strings.HasPrefix(line, "data:") { + dataBuffer = line[5:] // Remove "data:" prefix + } + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("failed to read SSE stream: %w", err) + } + + return nil +} + +// SearchKnowledgeRequest knowledge search request +type SearchKnowledgeRequest struct { + Query string `json:"query"` // Query content + KnowledgeBaseID string `json:"knowledge_base_id"` // Knowledge base ID +} + +// SearchKnowledgeResponse search results response +type SearchKnowledgeResponse struct { + Success bool `json:"success"` + Data []*SearchResult `json:"data"` +} + +// SearchKnowledge performs knowledge base search without LLM summarization +func (c *Client) SearchKnowledge(ctx context.Context, request *SearchKnowledgeRequest) ([]*SearchResult, error) { + fmt.Printf("Starting SearchKnowledge request, knowledge base ID: %s, query: %s\n", + request.KnowledgeBaseID, request.Query) + + resp, err := c.doRequest(ctx, http.MethodPost, "/api/v1/knowledge-search", request, nil) + if err != nil { + fmt.Printf("Request failed: %v\n", err) + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + body, _ := io.ReadAll(resp.Body) + err := fmt.Errorf("HTTP error %d: %s", resp.StatusCode, string(body)) + fmt.Printf("Request returned error status: %v\n", err) + return nil, err + } + + var response SearchKnowledgeResponse + if err := parseResponse(resp, &response); err != nil { + fmt.Printf("Failed to parse response: %v\n", err) + return nil, err + } + + fmt.Printf("SearchKnowledge completed, found %d results\n", len(response.Data)) + return response.Data, nil +} diff --git a/client/tenant.go b/client/tenant.go new file mode 100644 index 0000000..e2f8069 --- /dev/null +++ b/client/tenant.go @@ -0,0 +1,136 @@ +// Package client provides the implementation for interacting with the WeKnora API +// The Tenant related interfaces are used to manage tenants in the system +// Tenants can be created, retrieved, updated, deleted, and queried +// They can also be used to manage retriever engines for different tasks +package client + +import ( + "context" + "fmt" + "net/http" + "time" +) + +// RetrieverEngines defines a collection of retriever engine parameters +type RetrieverEngines struct { + Engines []RetrieverEngineParams `json:"engines"` +} + +// RetrieverEngineParams contains configuration for retriever engines +type RetrieverEngineParams struct { + RetrieverType string `json:"retriever_type"` // Type of retriever (e.g., keywords, vector) + RetrieverEngineType string `json:"retriever_engine_type"` // Type of engine implementing the retriever +} + +// Tenant represents tenant information in the system +type Tenant struct { + ID uint `yaml:"id" json:"id" gorm:"primaryKey"` + // Tenant name + Name string `yaml:"name" json:"name"` + // Tenant description + Description string `yaml:"description" json:"description"` + // API key for authentication + APIKey string `yaml:"api_key" json:"api_key"` + // Tenant status (active, inactive) + Status string `yaml:"status" json:"status" gorm:"default:'active'"` + // Configured retrieval engines + RetrieverEngines RetrieverEngines `yaml:"retriever_engines" json:"retriever_engines" gorm:"type:json"` + // Business/department information + Business string `yaml:"business" json:"business"` + // Creation timestamp + CreatedAt time.Time `yaml:"created_at" json:"created_at"` + // Last update timestamp + UpdatedAt time.Time `yaml:"updated_at" json:"updated_at"` +} + +// TenantResponse represents the API response structure for tenant operations +type TenantResponse struct { + Success bool `json:"success"` // Whether the operation was successful + Data Tenant `json:"data"` // Tenant data +} + +// TenantListResponse represents the API response structure for listing tenants +type TenantListResponse struct { + Success bool `json:"success"` // Whether the operation was successful + Data struct { + Items []Tenant `json:"items"` // List of tenant items + } `json:"data"` +} + +// CreateTenant creates a new tenant +func (c *Client) CreateTenant(ctx context.Context, tenant *Tenant) (*Tenant, error) { + resp, err := c.doRequest(ctx, http.MethodPost, "/api/v1/tenants", tenant, nil) + if err != nil { + return nil, err + } + + var response TenantResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// GetTenant retrieves a tenant by ID +func (c *Client) GetTenant(ctx context.Context, tenantID uint) (*Tenant, error) { + path := fmt.Sprintf("/api/v1/tenants/%d", tenantID) + resp, err := c.doRequest(ctx, http.MethodGet, path, nil, nil) + if err != nil { + return nil, err + } + + var response TenantResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// UpdateTenant updates an existing tenant +func (c *Client) UpdateTenant(ctx context.Context, tenant *Tenant) (*Tenant, error) { + path := fmt.Sprintf("/api/v1/tenants/%d", tenant.ID) + resp, err := c.doRequest(ctx, http.MethodPut, path, tenant, nil) + if err != nil { + return nil, err + } + + var response TenantResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return &response.Data, nil +} + +// DeleteTenant removes a tenant by ID +func (c *Client) DeleteTenant(ctx context.Context, tenantID uint) error { + path := fmt.Sprintf("/api/v1/tenants/%d", tenantID) + resp, err := c.doRequest(ctx, http.MethodDelete, path, nil, nil) + if err != nil { + return err + } + + var response struct { + Success bool `json:"success"` + Message string `json:"message,omitempty"` + } + + return parseResponse(resp, &response) +} + +// ListTenants retrieves all tenants +func (c *Client) ListTenants(ctx context.Context) ([]Tenant, error) { + resp, err := c.doRequest(ctx, http.MethodGet, "/api/v1/tenants", nil, nil) + if err != nil { + return nil, err + } + + var response TenantListResponse + if err := parseResponse(resp, &response); err != nil { + return nil, err + } + + return response.Data.Items, nil +} diff --git a/cmd/server/main.go b/cmd/server/main.go new file mode 100644 index 0000000..403f889 --- /dev/null +++ b/cmd/server/main.go @@ -0,0 +1,113 @@ +// Package main is the main package for the WeKnora server +// It contains the main function and the entry point for the server +package main + +import ( + "context" + "fmt" + "log" + "net/http" + "os" + "os/signal" + "syscall" + "time" + + "github.com/gin-gonic/gin" + + "github.com/Tencent/WeKnora/internal/application/service" + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/container" + "github.com/Tencent/WeKnora/internal/runtime" + "github.com/Tencent/WeKnora/internal/tracing" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +func main() { + // Set log format with request ID + log.SetFlags(log.LstdFlags | log.Lmicroseconds | log.Lshortfile) + log.SetOutput(os.Stdout) + + // Set Gin mode + if os.Getenv("GIN_MODE") == "release" { + gin.SetMode(gin.ReleaseMode) + } else { + gin.SetMode(gin.DebugMode) + } + + // Build dependency injection container + c := container.BuildContainer(runtime.GetContainer()) + + // Run application + err := c.Invoke(func( + cfg *config.Config, + router *gin.Engine, + tracer *tracing.Tracer, + testDataService *service.TestDataService, + resourceCleaner interfaces.ResourceCleaner, + ) error { + // Create context for resource cleanup + shutdownTimeout := cfg.Server.ShutdownTimeout + if shutdownTimeout == 0 { + shutdownTimeout = 30 * time.Second + } + cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), shutdownTimeout) + defer cleanupCancel() + + // Register tracer cleanup function to resource cleaner + resourceCleaner.RegisterWithName("Tracer", func() error { + return tracer.Cleanup(cleanupCtx) + }) + + // Initialize test data + if testDataService != nil { + if err := testDataService.InitializeTestData(context.Background()); err != nil { + log.Printf("Failed to initialize test data: %v", err) + } + } + + // Create HTTP server + server := &http.Server{ + Addr: fmt.Sprintf("%s:%d", cfg.Server.Host, cfg.Server.Port), + Handler: router, + } + + ctx, done := context.WithCancel(context.Background()) + signals := make(chan os.Signal, 1) + signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP) + go func() { + sig := <-signals + log.Printf("Received signal: %v, starting server shutdown...", sig) + + // Create a context with timeout for server shutdown + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 30*time.Second) + defer shutdownCancel() + + if err := server.Shutdown(shutdownCtx); err != nil { + log.Fatalf("Server forced to shutdown: %v", err) + } + + // Clean up all registered resources + log.Println("Cleaning up resources...") + errs := resourceCleaner.Cleanup(cleanupCtx) + if len(errs) > 0 { + log.Printf("Errors occurred during resource cleanup: %v", errs) + } + + log.Println("Server has exited") + done() + }() + + // Start server + log.Printf("Server is running at %s:%d", cfg.Server.Host, cfg.Server.Port) + if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { + return fmt.Errorf("failed to start server: %v", err) + } + + // Wait for shutdown signal + <-ctx.Done() + return nil + }) + if err != nil { + log.Fatalf("Failed to run application: %v", err) + } +} diff --git a/config/config.yaml b/config/config.yaml new file mode 100644 index 0000000..f88b3f0 --- /dev/null +++ b/config/config.yaml @@ -0,0 +1,535 @@ +# 服务器配置 +server: + port: 8080 + host: "0.0.0.0" + +# 对话服务配置 +conversation: + max_rounds: 5 + keyword_threshold: 0.3 + embedding_top_k: 10 + vector_threshold: 0.5 + rerank_threshold: 0.7 + rerank_top_k: 5 + fallback_strategy: "fixed" + fallback_response: "抱歉,我无法回答这个问题。" + fallback_prompt: | + 你是一个专业、友好的AI助手。现在用户提出的问题超出了你的知识库范围,你需要生成一个礼貌且有帮助的回复。 + + ## 回复要求 + - 诚实承认你无法提供准确答案 + - 简洁友好,不要过度道歉 + - 可以提供相关的建议或替代方案 + - 回复控制在50字以内 + - 使用礼貌、专业的语气 + + ## Few-shot示例 + + 用户问题: 今天杭州西湖的游客数量是多少? + 回复: 抱歉,我无法获取实时的杭州西湖游客数据。您可以通过杭州旅游官网或相关APP查询这一信息。 + + 用户问题: 张教授的新论文发表了吗? + 回复: 我没有张教授的最新论文信息。建议您查询学术数据库或直接联系张教授获取最新动态。 + + 用户问题: 我的银行卡号是多少? + 回复: 作为AI助手,我无法获取您的个人银行信息。请登录您的银行APP或联系银行客服获取相关信息。 + + ## 用户当前的问题是: + {{.Query}} + enable_rewrite: true + enable_rerank: true + rewrite_prompt_system: | + 你是一个专注于指代消解和省略补全的智能助手,你的任务是根据历史对话上下文,清晰识别用户问题中的代词并替换为明确的主语,同时补全省略的关键信息。 + + ## 改写目标 + 请根据历史对话,对当前用户问题进行改写,目标是: + - 进行指代消解,将"它"、"这个"、"那个"、"他"、"她"、"它们"、"他们"、"她们"等代词替换为明确的主语 + - 补全省略的关键信息,确保问题语义完整 + - 保持问题的原始含义和表达方式不变 + - 改写后必须也是一个问题 + - 改写后的问题字数控制在30字以内 + - 仅输出改写后的问题,不要输出任何解释,更不要尝试回答该问题,后面有其他助手回去解答此问题 + + ## Few-shot示例 + + 示例1: + 历史对话: + 用户: 微信支付有哪些功能? + 助手: 微信支付的主要功能包括转账、付款码、收款、信用卡还款等多种支付服务。 + + 用户问题: 它的安全性 + 改写后: 微信支付的安全性 + + 示例2: + 历史对话: + 用户: 苹果手机电池不耐用怎么办? + 助手: 您可以通过降低屏幕亮度、关闭后台应用和定期更新系统来延长电池寿命。 + + 用户问题: 这样会影响使用体验吗? + 改写后: 降低屏幕亮度和关闭后台应用是否影响使用体验 + + 示例3: + 历史对话: + 用户: 如何制作红烧肉? + 助手: 红烧肉的制作需要先将肉块焯水,然后加入酱油、糖等调料慢炖。 + + 用户问题: 需要炖多久? + 改写后: 红烧肉需要炖多久 + + 示例4: + 历史对话: + 用户: 北京到上海的高铁票价是多少? + 助手: 北京到上海的高铁票价根据车次和座位类型不同,二等座约为553元,一等座约为933元。 + + 用户问题: 时间呢? + 改写后: 北京到上海的高铁时长 + + 示例5: + 历史对话: + 用户: 如何注册微信账号? + 助手: 注册微信账号需要下载微信APP,输入手机号,接收验证码,然后设置昵称和密码。 + + 用户问题: 国外手机号可以吗? + 改写后: 国外手机号是否可以注册微信账号 + rewrite_prompt_user: | + ## 历史对话背景 + {{range .Conversation}} + ------BEGIN------ + 用户的问题是:{{.Query}} + 助手的回答是:{{.Answer}} + ------END------ + {{end}} + + ## 需要改写的用户问题 + {{.Query}} + + ## 改写后的问题 + keywords_extraction_prompt: | + # 角色 + 你是一个专业的关键词提取助手,你的任务是根据用户的问题,提取出最重要的关键词/短语。 + + # 要求 + - 总结用户的问题,并给出最重要的关键词/短语,关键词/短语的数量不超过5个 + - 使用逗号作为分隔符来分隔关键词/短语 + - 关键词/短语必须来自于用户的问题,不得虚构 + - 不要输出任何解释,直接输出关键词/短语,不要有任何前缀、解释或标点符号,不要尝试回答该问题,后面有其他助手会去搜索此问题 + + # 输出格式 + keyword1, keyword2, keyword3, keyword4, keyword5 + + # Examples + + ## Example 1 + USER: 如何提高英语口语水平? + ############### + Output: 英语口语, 口语水平, 提高英语口语, 英语口语提升, 英语口语练习 + + ## Example 2 + USER: 最近上海有什么好玩的展览活动? + ############### + Output: 上海展览, 展览活动, 上海展览推荐, 展览活动推荐, 上海展览活动 + + ## Example 3 + USER: 苹果手机电池不耐用怎么解决? + ############### + Output: 苹果手机, 电池不耐用, 电池优化, 电池寿命, 电池保养 + + ## Example 4 + USER: Python的Logo长啥样? + ############### + Output: Python Logo + + ## Example 5 + USER: 如何使用iPhone连接WiFi? + ############### + Output: iPhone, 连接WiFi, 使用iPhone连接WiFi + + # Real Data + USER: {{.Query}} + + keywords_extraction_prompt_user: | + Output: + + generate_summary_prompt: | + 你是一个总结助手,你的任务是总结文章或者片段内容。 + + ## 准则要求 + - 总结结果长度不能超过100个字 + - 保持客观,不添加个人观点或评价 + - 使用第三人称陈述语气 + - 不要基于任何先验知识回答用户的问题,只基于文章内容生成摘要 + - 直接输出总结结果,不要有任何前缀或解释 + - 使用中文输出总结结果 + - 不能输出“无法生成”、“无法总结”等字眼 + + ## Few-shot示例 + + 用户给出的文章内容: + 随着5G技术的快速发展,各行各业正经历数字化转型。5G网络凭借高速率、低延迟和大连接的特性,正在推动智慧城市、工业互联网和远程医疗等领域的创新。专家预测,到2025年,5G将为全球经济贡献约2.2万亿美元。然而,5G建设也面临基础设施投入大、覆盖不均等挑战。 + + 文章总结: + 5G技术凭借高速率、低延迟和大连接特性推动各行业数字化转型,促进智慧城市、工业互联网和远程医疗创新。预计2025年将贡献约2.2万亿美元经济价值,但仍面临基础设施投入大和覆盖不均等挑战。 + + ## 用户给出的文章内容是: + + generate_session_title_prompt: | + 你是一个专业的会话标题生成助手,你的任务是为用户提问创建简洁、精准且具描述性的标题。 + + ## 格式要求 + - 标题长度必须在10个字以内 + - 标题应准确反映用户问题的核心主题 + - 使用名词短语结构,避免使用问句 + - 保持简洁明了,删除非必要词语 + - 不要使用"关于"、"如何"等冗余词语开头 + - 直接输出标题文本,不要有任何前缀、解释或标点符号 + + ## Few-shot示例 + + 用户问题: 如何提高英语口语水平? + 标题: 英语口语提升 + + 用户问题: 最近上海有什么好玩的展览活动? + 标题: 上海展览推荐 + + 用户问题: 苹果手机电池不耐用怎么解决? + 标题: 苹果电池优化 + + ## 用户的问题是: + summary: + repeat_penalty: 1.0 + temperature: 0.3 + max_completion_tokens: 2048 + no_match_prefix: |- + + + NO_MATCH + prompt: | + 这是用户和助手之间的对话。当用户提出问题时,助手会基于特定的信息进行解答。助手首先在心中思考推理过程,然后向用户提供答案。 + 推理过程用 标签包围,答案直接输出在think标签后面,即: + + 这里是推理过程 + + 这里是答案 + context_template: | + 你是一个专业的智能信息检索助手,名为小微,犹如专业的高级秘书,依据检索到的信息回答用户问题。 + 当用户提出问题时,助手只能基于给定的信息进行解答,不能利用任何先验知识。 + + ## 回答问题规则 + - 仅根据检索到的信息中的事实进行回复,不得运用任何先验知识,保持回应的客观性和准确性。 + - 复杂问题和答案的按Markdown分结构展示,总述部分不需要拆分 + - 如果是比较简单的答案,不需要把最终答案拆分的过于细碎 + - 结果中使用的图片地址必须来自于检索到的信息,不得虚构 + - 检查结果中的文字和图片是否来自于检索到的信息,如果扩展了不在检索到的信息中的内容,必须进行修改,直到得到最终答案 + - 如果用户问题无法回答,只输出NO_MATCH即可,即: + + + NO_MATCH + + ## 输出限制 + - 以Markdown图文格式输出你的最终结果 + - 输出内容要保证简短且全面,条理清晰,信息明确,不重复。 + + ## 当前时间是: + {{.CurrentTime}} {{.CurrentWeek}} + + ## 检索到的信息如下: + ------BEGIN------ + {{range .Contexts}} + {{.}} + {{end}} + ------END------ + + ## 用户当前的问题是: + {{.Query}} + extract_entities_prompt: | + ## 任务 + 用户提供的文本中,提取所有符合以下实体类型的实体: + EntityTypes: [Person, Organization, Location, Product, Event, Date, Work, Concept, Resource, Category, Operation] + + ## 要求 + 1. 提取结果必须以JSON数组格式输出 + 2. 每个实体必须包含 title 和 type 字段,description 字段可选但强烈建议提供 + 3. 确保 type 字段的值必须严格从 EntityTypes 列表中选择,不得创建新类型 + 4. 如果无法确定实体类型,不要强行归类,宁可不提取该实体 + 5. 不要输出任何解释或额外内容,只输出JSON数组 + 6. 所有字段值不能包含HTML标签或其他代码 + 7. 如果实体有歧义,需在description中说明具体指代 + 8. 若没有找到任何实体,返回空数组 [] + + ## 实体提取规则 + - Person: 真实或虚构的人物,包括历史人物、现代人物、文学角色等 + - Organization: 公司、政府机构、团队、学校等组织实体 + - Location: 地理位置、地标、国家、城市等 + - Product: 商品、服务、品牌等商业产品 + - Event: 事件、会议、节日、历史事件等 + - Date: 日期、时间段、年代等时间相关信息 + - Work: 书籍、电影、音乐、艺术作品等创作内容 + - Concept: 抽象概念、思想、理论等 + - Resource: 自然资源、信息资源、工具等 + - Category: 分类、类别、领域等 + - Operation: 操作、动作、方法、过程等 + + ## 提取步骤 + 1. 仔细阅读文本,识别可能的实体 + 2. 对每个识别到的实体,确定其最适合的实体类型(必须从EntityTypes中选择) + 3. 为每个实体创建包含以下字段的JSON对象: + - title: 实体的标准名称,不包含修饰词,如引号等 + - type: 从EntityTypes中选择的实体类型 + - description: 对该实体的简明中文描述,应基于文本内容 + 4. 验证每个实体的所有字段是否正确且格式化恰当 + 5. 将所有实体对象合并为一个JSON数组 + 6. 检查最终JSON是否有效并符合要求 + + ## 示例 + [输入] + 文本: 《红楼梦》,又名《石头记》,是清代作家曹雪芹创作的中国古典四大名著之一,被誉为中国封建社会的百科全书。该书前80回由曹雪芹所著,后40回一般认为是高鹗所续。小说以贾、史、王、薛四大家族的兴衰为背景,以贾宝玉、林黛玉和薛宝钗的爱情悲剧为主线,刻画了以贾宝玉和金陵十二钗为中心的正邪两赋、贤愚并出的高度复杂的人物群像。成书于乾隆年间(1743年前后),是中国文学史上现实主义的高峰,对后世影响深远。 + + [输出] + [ + { + "title": "红楼梦", + "type": "Work", + "description": "红楼梦是清代作家曹雪芹创作的中国古典四大名著之一,被誉为中国封建社会的百科全书" + }, + { + "title": "石头记", + "type": "Work", + "description": "石头记是红楼梦的别名" + }, + { + "title": "曹雪芹", + "type": "Person", + "description": "曹雪芹是清代作家,红楼梦的作者,创作了前80回" + }, + { + "title": "高鹗", + "type": "Person", + "description": "高鹗是红楼梦后40回的续作者" + }, + { + "title": "贾宝玉", + "type": "Person", + "description": "贾宝玉是红楼梦中的主要角色,爱情悲剧的主角之一" + }, + { + "title": "林黛玉", + "type": "Person", + "description": "林黛玉是红楼梦中的主要角色,爱情悲剧的主角之一" + }, + { + "title": "薛宝钗", + "type": "Person", + "description": "薛宝钗是红楼梦中的主要角色,爱情悲剧的主角之一" + }, + { + "title": "金陵十二钗", + "type": "Concept", + "description": "金陵十二钗是红楼梦中以贾宝玉为中心的十二位主要女性角色" + }, + { + "title": "乾隆年间", + "type": "Date", + "description": "乾隆年间指的是红楼梦成书的时间,约1743年前后" + }, + { + "title": "四大家族", + "type": "Concept", + "description": "四大家族是红楼梦中的贾、史、王、薛四个家族,是小说的背景" + }, + { + "title": "中国文学史", + "type": "Category", + "description": "红楼梦被视为中国文学史中现实主义的高峰之作" + } + ] + + extract_relationships_prompt: | + ## 任务 + 从用户提供的实体数组中,提取实体之间存在的明确关系,形成结构化的关系网络。 + + ## 要求 + 1. 关系提取必须基于提供的文本内容,不得臆测不存在的关系 + 2. 结果必须以JSON数组格式输出,每个关系为数组中的一个对象 + 3. 每个关系对象必须包含 source, target, description 和 strength 字段 + 4. 不要输出任何解释或额外内容,只输出JSON数组 + 5. 若没有找到任何关系,返回空数组 [] + + ## 关系提取规则 + - 只有在文本中明确体现的关系才应被提取 + - 源实体(source)和目标实体(target)必须是实体数组中已有的实体 + - 关系描述(description)应简明扼要地说明两个实体间的具体关系 + - 关系强度(strength)应根据以下标准确定: + * 10分:直接创造/从属关系(如作者与作品、发明者与发明、母公司与子公司) + * 9分:同一实体的不同表现形式(如别名、曾用名) + * 8分:紧密相关且互相影响的关系(如密切合作伙伴、家庭成员) + * 7分:明确但非直接的关系(如作品中的角色、组织中的成员) + * 6分:间接关联且有明确联系(如同事关系、相似产品) + * 5分:存在关联但较为松散(如同一领域的不同概念) + + ## 提取步骤 + 1. 仔细分析文本内容,确定哪些实体之间存在明确关系 + 2. 只考虑文本中明确提及的关系,不要臆测 + 3. 对每个找到的关系,确定: + - source: 关系的源实体标题(必须是实体列表中已有的实体) + - target: 关系的目标实体标题(必须是实体列表中已有的实体) + - description: 简明准确的关系描述(用中文表述) + - strength: 基于上述标准的关系强度(5-10之间的整数) + 4. 检查每个关系是否双向: + - 如果关系是双向的(如"A是B的朋友"意味着"B也是A的朋友"),考虑是否需要创建反向关系 + - 如果关系是单向的(如"A创作了B"),则只保留单向关系 + 5. 验证所有关系的一致性和合理性: + - 确保没有矛盾的关系(如A同时是B的父亲和兄弟) + - 确保关系描述与关系强度匹配 + 6. 将所有有效关系组织为JSON数组 + + ## 示例 + [输入] + 实体: [ + { + "title": "红楼梦", + "type": "Work", + "description": "红楼梦是清代作家曹雪芹创作的中国古典四大名著之一,被誉为中国封建社会的百科全书" + }, + { + "title": "石头记", + "type": "Work", + "description": "石头记是红楼梦的别名" + }, + { + "title": "曹雪芹", + "type": "Person", + "description": "曹雪芹是清代作家,红楼梦的作者,创作了前80回" + }, + { + "title": "高鹗", + "type": "Person", + "description": "高鹗是红楼梦后40回的续作者" + }, + { + "title": "贾宝玉", + "type": "Person", + "description": "贾宝玉是红楼梦中的主要角色,爱情悲剧的主角之一" + }, + { + "title": "林黛玉", + "type": "Person", + "description": "林黛玉是红楼梦中的主要角色,爱情悲剧的主角之一" + }, + { + "title": "薛宝钗", + "type": "Person", + "description": "薛宝钗是红楼梦中的主要角色,爱情悲剧的主角之一" + }, + { + "title": "四大家族", + "type": "Concept", + "description": "四大家族是红楼梦中的贾、史、王、薛四个家族,是小说的背景" + }, + { + "title": "金陵十二钗", + "type": "Concept", + "description": "金陵十二钗是红楼梦中以贾宝玉为中心的十二位主要女性角色" + }, + { + "title": "乾隆年间", + "type": "Date", + "description": "乾隆年间指的是红楼梦成书的时间,约1743年前后" + }, + { + "title": "中国文学史", + "type": "Category", + "description": "红楼梦被视为中国文学史中现实主义的高峰之作" + } + ] + + 文本: 《红楼梦》,又名《石头记》,是清代作家曹雪芹创作的中国古典四大名著之一,被誉为中国封建社会的百科全书。该书前80回由曹雪芹所著,后40回一般认为是高鹗所续。小说以贾、史、王、薛四大家族的兴衰为背景,以贾宝玉、林黛玉和薛宝钗的爱情悲剧为主线,刻画了以贾宝玉和金陵十二钗为中心的正邪两赋、贤愚并出的高度复杂的人物群像。成书于乾隆年间(1743年前后),是中国文学史上现实主义的高峰,对后世影响深远。 + + [输出] + [ + { + "source": "曹雪芹", + "target": "红楼梦", + "description": "曹雪芹是红楼梦的主要作者,创作了前80回", + "strength": 10 + }, + { + "source": "高鹗", + "target": "红楼梦", + "description": "高鹗是红楼梦后40回的续作者", + "strength": 10 + }, + { + "source": "红楼梦", + "target": "石头记", + "description": "石头记是红楼梦的别名", + "strength": 9 + }, + { + "source": "红楼梦", + "target": "中国文学史", + "description": "红楼梦被视为中国文学史中现实主义的高峰之作", + "strength": 7 + }, + { + "source": "贾宝玉", + "target": "林黛玉", + "description": "贾宝玉与林黛玉有深厚的爱情关系,是小说主线之一", + "strength": 8 + }, + { + "source": "贾宝玉", + "target": "薛宝钗", + "description": "贾宝玉与薛宝钗的关系是小说爱情悲剧主线的一部分", + "strength": 8 + }, + { + "source": "贾宝玉", + "target": "金陵十二钗", + "description": "贾宝玉是金陵十二钗故事的中心人物", + "strength": 8 + }, + { + "source": "红楼梦", + "target": "贾宝玉", + "description": "贾宝玉是红楼梦中的主要角色", + "strength": 7 + }, + { + "source": "红楼梦", + "target": "林黛玉", + "description": "林黛玉是红楼梦中的主要角色", + "strength": 7 + }, + { + "source": "红楼梦", + "target": "薛宝钗", + "description": "薛宝钗是红楼梦中的主要角色", + "strength": 7 + }, + { + "source": "红楼梦", + "target": "四大家族", + "description": "四大家族是红楼梦的背景设定", + "strength": 7 + }, + { + "source": "红楼梦", + "target": "金陵十二钗", + "description": "金陵十二钗是红楼梦中的重要概念", + "strength": 7 + }, + { + "source": "红楼梦", + "target": "乾隆年间", + "description": "红楼梦成书于乾隆年间,约1743年前后", + "strength": 6 + } + ] + +# 知识库配置 +knowledge_base: + chunk_size: 512 + chunk_overlap: 50 + split_markers: ["\n\n", "\n", "。"] + image_processing: + enable_multimodal: true diff --git a/dataset/README b/dataset/README new file mode 100644 index 0000000..6c78bca --- /dev/null +++ b/dataset/README @@ -0,0 +1,273 @@ +# QA Dataset Sampling Tool + +A comprehensive tool for sampling QA datasets and generating answers using OpenAI's GPT models. This tool helps you create high-quality question-answering datasets from large-scale collections like MS MARCO. + +## Features + +- **Smart Sampling**: Intelligently sample queries, documents, and relevance judgments from large datasets +- **Answer Generation**: Automatically generate high-quality answers using OpenAI's GPT models +- **Resume Support**: Continue interrupted answer generation from where it left off +- **Progress Tracking**: Real-time progress updates and statistics +- **Result Visualization**: Easy-to-read display of generated QA pairs with context + +## Installation + +### Prerequisites + +- Python 3.7+ +- OpenAI API key + +### Install Dependencies + +```bash +pip install pandas pyarrow openai +``` + +### Set Environment Variables + +```bash +export OPENAI_API_KEY="your-openai-api-key" +# Optional: Use custom OpenAI endpoint +export OPENAI_BASE_URL="https://api.openai.com/v1" +``` + +### Parpare dataset + +We provide pre-processed samples from popular QA datasets: + +MarkrAI/msmarco_sample_autorag + +## Quick Start + +### 1. Sample Data from Large Dataset + +First, sample a subset of queries, documents, and relevance judgments from your full dataset: + +```bash +python dataset/qa_dataset.py sample \ + --queries ~/dataset/mmarco-queries.parquet \ + --corpus ~/dataset/mmarco-corpus.parquet \ + --qrels ~/dataset/mmarco-qrels.parquet \ + --nq 100 \ + --output_dir ./dataset/samples +``` + +### 2. Generate Answers + +Use OpenAI's GPT model to generate answers for the sampled questions: + +```bash +python dataset/qa_dataset.py generate \ + --input_dir ./dataset/samples \ + --output_dir ./dataset/samples +``` + +### 3. View Results + +Display the generated QA pairs with their context: + +```bash +python dataset/qa_dataset.py show \ + --input_dir ./dataset/samples \ + -n 5 +``` + +## Detailed Usage + +### Sample Command + +Create a representative sample from your full dataset. + +```bash +python dataset/qa_dataset.py sample [OPTIONS] +``` + +**Required Parameters:** +- `--queries`: Path to queries parquet file (columns: `id`, `text`) +- `--corpus`: Path to corpus parquet file (columns: `id`, `text`) +- `--qrels`: Path to qrels parquet file (columns: `qid`, `pid`) + +**Optional Parameters:** +- `--nq`: Number of queries to sample (default: 1000) +- `--output_dir`: Output directory for sampled data (default: ./save) + +**Example:** +```bash +python dataset/qa_dataset.py sample \ + --queries data/queries.parquet \ + --corpus data/corpus.parquet \ + --qrels data/qrels.parquet \ + --nq 500 \ + --output_dir ./my_sample +``` + +### Generate Command + +Generate answers for sampled questions using OpenAI API. + +```bash +python dataset/qa_dataset.py generate [OPTIONS] +``` + +**Required Parameters:** +- `--input_dir`: Directory containing sampled data (queries.parquet, corpus.parquet, qrels.parquet) + +**Optional Parameters:** +- `--output_dir`: Output directory for generated answers (default: ./save) + +**Features:** +- **Resume Support**: Automatically continues from where it left off if interrupted +- **Error Handling**: Retries failed API calls up to 3 times +- **Progress Saving**: Saves progress after each successful answer generation + +**Example:** +```bash +python dataset/qa_dataset.py generate \ + --input_dir ./my_sample \ + --output_dir ./my_sample +``` + +### Show Command + +Display generated QA pairs with full context. + +```bash +python dataset/qa_dataset.py show [OPTIONS] +``` + +**Required Parameters:** +- `--input_dir`: Directory containing QA data (queries.parquet, corpus.parquet, qrels.parquet, qas.parquet, answers.parquet) + +**Optional Parameters:** +- `-n`: Number of results to display (default: 5) + +**Example:** +```bash +python dataset/qa_dataset.py show \ + --input_dir ./my_sample \ + -n 3 +``` + +## Input Data Format + +### Queries File (queries.parquet) +| Column | Type | Description | +|--------|------|-------------| +| id | string | Unique query identifier | +| text | string | The actual question text | + +### Corpus File (corpus.parquet) +| Column | Type | Description | +|--------|------|-------------| +| id | string | Unique passage/document identifier | +| text | string | The passage/document content | + +### Qrels File (qrels.parquet) +| Column | Type | Description | +|--------|------|-------------| +| qid | string | Query ID (matches queries.id) | +| pid | string | Passage ID (matches corpus.id) | + +## Output Files + +After running all commands, your output directory will contain: + +### Sampled Data +- `queries.parquet`: Sampled queries subset +- `corpus.parquet`: Sampled documents subset +- `qrels.parquet`: Sampled relevance judgments + +### Generated Answers +- `answers.parquet`: Generated answers with unique IDs +- `qas.parquet`: Question-answer mapping (qid → aid) + +## Advanced Usage + +### Custom OpenAI Configuration + +You can use different OpenAI models or endpoints: + +```bash +# Use GPT-4 Turbo +export OPENAI_API_KEY="your-key" +python dataset/qa_dataset.py generate --input_dir ./samples + +# Use Azure OpenAI +export OPENAI_API_KEY="azure-key" +export OPENAI_BASE_URL="https://your-resource.openai.azure.com/openai/deployments/gpt-4" +python dataset/qa_dataset.py generate --input_dir ./samples +``` + +### Large Dataset Sampling + +For very large datasets, consider sampling in batches: + +```bash +# First batch +python dataset/qa_dataset.py sample --nq 1000 --output_dir ./batch1 +python dataset/qa_dataset.py generate --input_dir ./batch1 + +# Second batch +python dataset/qa_dataset.py sample --nq 1000 --output_dir ./batch2 +python dataset/qa_dataset.py generate --input_dir ./batch2 +``` + +## Troubleshooting + +### Common Issues + +**1. OpenAI API Errors** +- Ensure your API key is set correctly: `echo $OPENAI_API_KEY` +- Check your API quota and billing status +- Verify network connectivity to OpenAI + +**2. Memory Issues with Large Datasets** +- Reduce `--nq` parameter for smaller samples +- Ensure sufficient RAM for pandas operations +- Consider using smaller parquet files + +**3. File Not Found Errors** +- Verify all input file paths are correct +- Ensure parquet files have correct column names +- Check file permissions + +### Debug Mode + +Enable verbose output by adding print statements or using Python debugger: + +```bash +python -m pdb dataset/qa_dataset.py sample --queries ... +``` + +## Example Workflow + +```bash +# 1. Setup environment +export OPENAI_API_KEY="sk-..." + +# 2. Sample 200 queries from MS MARCO +python dataset/qa_dataset.py sample \ + --queries ~/mmarco/queries.parquet \ + --corpus ~/mmarco/corpus.parquet \ + --qrels ~/mmarco/qrels.parquet \ + --nq 200 \ + --output_dir ./marco_sample + +# 3. Generate answers (may take time depending on API rate limits) +python dataset/qa_dataset.py generate \ + --input_dir ./marco_sample \ + --output_dir ./marco_sample + +# 4. Review results +python dataset/qa_dataset.py show \ + --input_dir ./marco_sample \ + -n 10 +``` + +## Contributing + +Feel free to submit issues and enhancement requests! + +## License + +MIT License - feel free to use this tool for your research and projects. \ No newline at end of file diff --git a/dataset/README_zh.md b/dataset/README_zh.md new file mode 100644 index 0000000..2c4e5b0 --- /dev/null +++ b/dataset/README_zh.md @@ -0,0 +1,284 @@ +# QA数据集采样工具 + +一个全面的QA数据集采样工具,使用OpenAI的GPT模型生成答案。该工具帮助您从大规模数据集(如MS MARCO)创建高质量的问答数据集。 + +## 功能特性 + +- **智能采样**:智能地从大型数据集中采样查询、文档和相关性判断 +- **答案生成**:使用OpenAI的GPT模型自动生成高质量答案 +- **断点续传**:支持中断后继续生成,从上次位置开始 +- **进度跟踪**:实时进度更新和统计信息 +- **结果可视化**:易于阅读的问答对展示,包含完整上下文 + +## 安装指南 + +### 系统要求 + +- Python 3.7+ +- OpenAI API密钥 + +### 安装依赖 + +```bash +pip install pandas pyarrow openai +``` + +### 设置环境变量 + +```bash +export OPENAI_API_KEY="你的openai-api-key" +# 可选:使用自定义OpenAI端点 +export OPENAI_BASE_URL="https://api.openai.com/v1" +``` + +### 准备数据集 + +您可以使用任何符合格式要求的QA数据集,或下载预处理好的样本: + +**使用HuggingFace/ModelScope样本** +我们提供了来自流行QA数据集的预处理样本: +- MarkrAI/eli5_sample_autorag +- MarkrAI/msmarco_sample_autorag +- MarkrAI/triviaqa_sample_autorag +- gnekt/hotpotqa_small_sample_autorag + +**使用您自己的数据集** +确保您的数据集包含以下文件: +- `queries.parquet`(列:id, text) +- `corpus.parquet`(列:id, text) +- `qrels.parquet`(列:qid, pid) + +## 快速开始 + +### 1. 从大型数据集采样 + +首先,从完整数据集中采样查询、文档和相关性判断的子集: + +```bash +python dataset/qa_dataset.py sample \ + --queries ~/dataset/mmarco-queries.parquet \ + --corpus ~/dataset/mmarco-corpus.parquet \ + --qrels ~/dataset/mmarco-qrels.parquet \ + --nq 100 \ + --output_dir ./dataset/samples +``` + +### 2. 生成答案 + +使用OpenAI的GPT模型为采样的问答生成答案: + +```bash +python dataset/qa_dataset.py generate \ + --input_dir ./dataset/samples \ + --output_dir ./dataset/samples +``` + +### 3. 查看结果 + +展示生成的问答对及其上下文: + +```bash +python dataset/qa_dataset.py show \ + --input_dir ./dataset/samples \ + -n 5 +``` + +## 详细使用说明 + +### 采样命令 + +从完整数据集中创建代表性样本。 + +```bash +python dataset/qa_dataset.py sample [选项] +``` + +**必需参数:** +- `--queries`:查询parquet文件路径(列:`id`, `text`) +- `--corpus`:语料库parquet文件路径(列:`id`, `text`) +- `--qrels`:相关性判断parquet文件路径(列:`qid`, `pid`) + +**可选参数:** +- `--nq`:要采样的查询数量(默认:1000) +- `--output_dir`:采样数据输出目录(默认:./save) + +**示例:** +```bash +python dataset/qa_dataset.py sample \ + --queries data/queries.parquet \ + --corpus data/corpus.parquet \ + --qrels data/qrels.parquet \ + --nq 500 \ + --output_dir ./my_sample +``` + +### 生成命令 + +使用OpenAI API为采样问题生成答案。 + +```bash +python dataset/qa_dataset.py generate [选项] +``` + +**必需参数:** +- `--input_dir`:包含采样数据的目录(queries.parquet, corpus.parquet, qrels.parquet) + +**可选参数:** +- `--output_dir`:生成答案的输出目录(默认:./save) + +**特性:** +- **断点续传**:中断后自动从上次位置继续 +- **错误处理**:API调用失败自动重试3次 +- **进度保存**:每成功生成一个答案就保存进度 + +**示例:** +```bash +python dataset/qa_dataset.py generate \ + --input_dir ./my_sample \ + --output_dir ./my_sample +``` + +### 展示命令 + +展示生成的问答对及完整上下文。 + +```bash +python dataset/qa_dataset.py show [选项] +``` + +**必需参数:** +- `--input_dir`:包含QA数据的目录(queries.parquet, corpus.parquet, qrels.parquet, qas.parquet, answers.parquet) + +**可选参数:** +- `-n`:要展示的结果数量(默认:5) + +**示例:** +```bash +python dataset/qa_dataset.py show \ + --input_dir ./my_sample \ + -n 3 +``` + +## 输入数据格式 + +### 查询文件 (queries.parquet) +| 列名 | 类型 | 描述 | +|------|------|------| +| id | string | 唯一查询标识符 | +| text | string | 实际的问题文本 | + +### 语料库文件 (corpus.parquet) +| 列名 | 类型 | 描述 | +|------|------|------| +| id | string | 唯一段落/文档标识符 | +| text | string | 段落/文档内容 | + +### 相关性判断文件 (qrels.parquet) +| 列名 | 类型 | 描述 | +|------|------|------| +| qid | string | 查询ID(匹配queries.id) | +| pid | string | 段落ID(匹配corpus.id) | + +## 输出文件 + +运行所有命令后,输出目录将包含: + +### 采样数据 +- `queries.parquet`:采样的查询子集 +- `corpus.parquet`:采样的文档子集 +- `qrels.parquet`:采样的相关性判断 + +### 生成的答案 +- `answers.parquet`:生成的答案(含唯一ID) +- `qas.parquet`:问答映射(qid → aid) + +## 高级用法 + +### 自定义OpenAI配置 + +您可以使用不同的OpenAI模型或端点: + +```bash +# 使用GPT-4 Turbo +export OPENAI_API_KEY="你的密钥" +python dataset/qa_dataset.py generate --input_dir ./samples + +# 使用Azure OpenAI +export OPENAI_API_KEY="azure密钥" +export OPENAI_BASE_URL="https://你的资源.openai.azure.com/openai/deployments/gpt-4" +python dataset/qa_dataset.py generate --input_dir ./samples +``` + +### 大型数据集采样 + +对于非常大的数据集,建议分批采样: + +```bash +# 第一批 +python dataset/qa_dataset.py sample --nq 1000 --output_dir ./batch1 +python dataset/qa_dataset.py generate --input_dir ./batch1 + +# 第二批 +python dataset/qa_dataset.py sample --nq 1000 --output_dir ./batch2 +python dataset/qa_dataset.py generate --input_dir ./batch2 +``` + +## 故障排除 + +### 常见问题 + +**1. OpenAI API错误** +- 确保API密钥设置正确:`echo $OPENAI_API_KEY` +- 检查API配额和账单状态 +- 验证与OpenAI的网络连接 + +**2. 大数据集内存问题** +- 减小`--nq`参数以获得更小的样本 +- 确保pandas操作有足够的RAM +- 考虑使用更小的parquet文件 + +**3. 文件未找到错误** +- 验证所有输入文件路径是否正确 +- 确保parquet文件有正确的列名 +- 检查文件权限 + +### 调试模式 + +通过添加打印语句或使用Python调试器启用详细输出: + +```bash +python -m pdb dataset/qa_dataset.py sample --queries ... +``` + +## 示例工作流 + +```bash +# 1. 设置环境 +export OPENAI_API_KEY="sk-..." + +# 2. 从MS MARCO采样200个查询 +python dataset/qa_dataset.py sample \ + --queries ~/mmarco/queries.parquet \ + --corpus ~/mmarco/corpus.parquet \ + --qrels ~/mmarco/qrels.parquet \ + --nq 200 \ + --output_dir ./marco_sample + +# 3. 生成答案(根据API速率限制可能需要一些时间) +python dataset/qa_dataset.py generate \ + --input_dir ./marco_sample \ + --output_dir ./marco_sample + +# 4. 查看结果 +python dataset/qa_dataset.py show \ + --input_dir ./marco_sample \ + -n 10 +``` + +## 贡献 + +欢迎提交问题和功能增强请求! + +## 许可证 + +MIT许可证 - 可自由用于研究和项目。 \ No newline at end of file diff --git a/dataset/qa_dataset.py b/dataset/qa_dataset.py new file mode 100644 index 0000000..45f8265 --- /dev/null +++ b/dataset/qa_dataset.py @@ -0,0 +1,381 @@ +""" +QA Dataset Sampling Tool + +``` +pip install pandas pyarrow +pip install openai +``` + +# 采样数据 +python dataset/qa_dataset.py sample \ + --queries ~/dataset/mmarco-queries.parquet \ + --corpus ~/dataset/mmarco-corpus.parquet \ + --qrels ~/dataset/mmarco-qrels.parquet \ + --nq 100 \ + --output_dir ./dataset/samples + +# 生成答案(基于采样结果) +python dataset/qa_dataset.py generate \ + --input_dir ./dataset/samples \ + --output_dir ./dataset/samples + +# 展示结果 +python dataset/qa_dataset.py show \ + --input_dir ./dataset/samples \ + -n 1 +""" + +import os +from pathlib import Path +import argparse + +import pandas as pd +import openai + + +def read_parquet(path): + return pd.read_parquet(path) + + +def save_to_parquet(df: pd.DataFrame, path: str): + """Save DataFrame to parquet file""" + Path(path).parent.mkdir(parents=True, exist_ok=True) + df.to_parquet(path) + print(f"Saved to {path}") + + +def print_stats(df: pd.DataFrame, name: str): + """Print statistics of a DataFrame""" + print(f"\n{name} Statistics:") + print(f"- Total records: {len(df)}") + if "id" in df.columns: + print(f"- Unique ids: {df['id'].nunique()}") + if "qid" in df.columns: + print(f"- Unique qids: {df['qid'].nunique()}") + if "pid" in df.columns: + print(f"- Unique pids: {df['pid'].nunique()}") + + +def sample_data( + queries: pd.DataFrame, corpus: pd.DataFrame, qrels: pd.DataFrame, nq=1000 +): + """ + Sample data from the dataset with validation checks. + + Args: + queries: DataFrame with qid and text columns (one-to-one) + corpus: DataFrame with pid and text columns (one-to-one) + qrels: DataFrame with qid and pid columns (many-to-many) + nq: Number of queries to sample (default: 1000) + + Returns: + Tuple of (sampled_queries, sampled_corpus, sampled_qrels) + """ + # 1. Filter qrels to only include qids that exist in queries + valid_qids = set(queries["id"]) + qrels = qrels[qrels["qid"].isin(valid_qids)] + + # 2. Filter qrels to only include pids that exist in corpus + valid_pids = set(corpus["id"]) + qrels = qrels[qrels["pid"].isin(valid_pids)] + + # 3. Sample queries (ensure we have enough qrels samples for each) + # Get qids with most associated pids to ensure diversity + qid_counts = qrels["qid"].value_counts() + sampled_qids = qid_counts.nlargest(min(nq, len(qid_counts))).index + + # 4. Get all pids associated with sampled qids + sampled_qrels = qrels[qrels["qid"].isin(sampled_qids)] + sampled_pids = set(sampled_qrels["pid"]) + + # 5. Add extra pids from corpus for redundancy (20% of sampled pids) + extra_pids = set(corpus["id"].sample(int(0.2 * len(sampled_pids)))) + all_pids = sampled_pids.union(extra_pids) + + # 6. Create final sampled datasets + sampled_queries = queries[queries["id"].isin(sampled_qids)] + sampled_corpus = corpus[corpus["id"].isin(all_pids)] + + return sampled_queries, sampled_corpus, sampled_qrels + + +class QAAnsweringSystem: + def __init__( + self, queries: pd.DataFrame, corpus: pd.DataFrame, qrels: pd.DataFrame + ): + """ + Initialize QA system with data + + Args: + queries: DataFrame with qid and text columns + corpus: DataFrame with pid and text columns + qrels: DataFrame with qid and pid mapping + """ + self.queries = queries + self.corpus = corpus + self.qrels = qrels + self.client = openai.Client( + api_key=os.getenv("OPENAI_API_KEY"), + base_url=os.getenv("OPENAI_BASE_URL"), + ) + + # Create lookup dictionaries + self.qid_to_text = dict(zip(queries["id"], queries["text"])) + self.pid_to_text = dict(zip(corpus["id"], corpus["text"])) + self.qid_to_pids = qrels.groupby("qid")["pid"].apply(list).to_dict() + + def get_context_for_qid(self, qid: str) -> str: + """ + Get all relevant text for a query ID + + Args: + qid: Query ID to search for + + Returns: + Combined context text from all related passages + """ + if qid not in self.qid_to_pids: + raise ValueError("Question ID not found") + + context_parts = [] + print(f"Context for Question ID {qid}: {self.qid_to_pids[qid]}") + for pid in self.qid_to_pids[qid]: + if pid in self.pid_to_text: + context_parts.append(self.pid_to_text[pid]) + + return "\n\n".join(context_parts) + + def answer_question(self, qid: str, model: str = "gpt-4o-2024-05-13") -> str: + """ + Use OpenAI API to answer question based on qid context + + Args: + qid: Query ID to answer + model: OpenAI model to use + + Returns: + Generated answer from LLM + """ + if qid not in self.qid_to_text: + raise ValueError("Question ID not found") + + question = self.qid_to_text[qid] + context = self.get_context_for_qid(qid) + + if not context: + raise ValueError("No context found for this question") + + prompt = f"""Answer the question based on the context below. Keep the answer concise. + +Question: {question} + +Context: {context} + +Answer:""" + response = self.client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + temperature=0.3, + ) + return response.choices[0].message.content + + +def sample_command(args): + """Handle sample command""" + # Load data + print("Loading data...") + queries = read_parquet(args.queries) + corpus = read_parquet(args.corpus) + qrels = read_parquet(args.qrels) + + # Print original stats + print("\nOriginal Dataset Statistics:") + print_stats(queries, "Queries") + print_stats(corpus, "Corpus") + print_stats(qrels, "Qrels") + + # Sample data + print(f"\nSampling {args.nq} queries...") + sampled_queries, sampled_corpus, sampled_qrels = sample_data( + queries, corpus, qrels, args.nq + ) + + # Print sampled stats + print("\nSampled Dataset Statistics:") + print_stats(sampled_queries, "Sampled Queries") + print_stats(sampled_corpus, "Sampled Corpus") + print_stats(sampled_qrels, "Sampled Qrels") + + # Save sampled data + print("\nSaving sampled data...") + save_to_parquet(sampled_queries, f"{args.output_dir}/queries.parquet") + save_to_parquet(sampled_corpus, f"{args.output_dir}/corpus.parquet") + save_to_parquet(sampled_qrels, f"{args.output_dir}/qrels.parquet") + print("\nSampling completed successfully!") + + +def generate_answers(input_dir: str, output_dir: str, max_retries: int = 3): + """ + Generate answers for sampled queries with resume support + + Args: + input_dir: Directory containing sampled queries/corpus/qrels + output_dir: Directory to save answer files + max_retries: Maximum retry attempts for failed queries + """ + print("\nLoading sampled data...") + queries = read_parquet(f"{input_dir}/queries.parquet") + corpus = read_parquet(f"{input_dir}/corpus.parquet") + qrels = read_parquet(f"{input_dir}/qrels.parquet") + + # Try to load existing answers if any + answers_path = f"{output_dir}/answers.parquet" + qa_pairs_path = f"{output_dir}/qas.parquet" + + try: + existing_answers = read_parquet(answers_path) + existing_qas = read_parquet(qa_pairs_path) + processed_qids = set(existing_qas["qid"]) + print(f"\nFound {len(processed_qids)} previously processed queries") + except (FileNotFoundError, KeyError): + print("No existing answers found, use empty state") + existing_answers = pd.DataFrame(columns=["id", "text"]) + existing_qas = pd.DataFrame(columns=["qid", "aid"]) + processed_qids = set() + + qa_system = QAAnsweringSystem(queries, corpus, qrels) + + answers = existing_answers.to_dict("records") + qa_pairs = existing_qas.to_dict("records") + answer_id_counter = len(answers) + 1 + + for qid in queries["id"]: + if qid in processed_qids: + continue + + retry_count = 0 + while retry_count <= max_retries: + try: + answer_text = qa_system.answer_question(qid) + aid = answer_id_counter + answers.append({"id": aid, "text": answer_text}) + qa_pairs.append({"qid": qid, "aid": aid}) + answer_id_counter += 1 + + # Save progress after each successful answer + save_to_parquet(pd.DataFrame(answers), answers_path) + save_to_parquet(pd.DataFrame(qa_pairs), qa_pairs_path) + print(f"Processed qid: {qid}") + break + except (openai.APIError, openai.APIConnectionError) as e: + retry_count += 1 + if retry_count > max_retries: + print( + f"\nFailed to process qid {qid} after {max_retries} attempts: {str(e)}" + ) + # Save failed state + save_to_parquet(pd.DataFrame(answers), answers_path) + save_to_parquet(pd.DataFrame(qa_pairs), qa_pairs_path) + else: + print(f"\nRetry {retry_count} for qid {qid}...") + + print("\nAnswer generation completed!") + print(f"Total queries: {len(queries)}") + print(f"Successfully processed: {len(qa_pairs)}") + print(f"Failed queries: {len(queries) - len(qa_pairs)}") + + +def show_results(input_dir: str, n: int = 5): + """ + Show n random results with question, context and answer + + Args: + input_dir: Directory containing the QA data + n: Number of results to show (default: 5) + """ + print(f"\nShowing {n} random results:") + + # Load data + queries = read_parquet(f"{input_dir}/queries.parquet") + corpus = read_parquet(f"{input_dir}/corpus.parquet") + qrels = read_parquet(f"{input_dir}/qrels.parquet") + qa_pairs = read_parquet(f"{input_dir}/qas.parquet") + answers = read_parquet(f"{input_dir}/answers.parquet") + + # Create QA system for context lookup + qa_system = QAAnsweringSystem(queries, corpus, qrels) + + # Get first n QA pairs + for _, row in qa_pairs.sample(n).iterrows(): + qid = row["qid"] + aid = row["aid"] + + # Get question + question = qa_system.qid_to_text[qid] + + # Get context + context = qa_system.get_context_for_qid(qid) + + # Get answer + answer = answers[answers["id"] == aid]["text"].values[0] + + print("\n" + "=" * 50) + print(f"Question (qid={qid}):\n{question}") + print("\nContext:") + print(context) + print(f"\nAnswer (aid={aid}):\n{answer}") + print("=" * 50 + "\n") + + +def main(): + # Set up command line arguments + parser = argparse.ArgumentParser(description="QA Dataset Tool") + subparsers = parser.add_subparsers(dest="command", required=True) + + # Sample command + sample_parser = subparsers.add_parser("sample", help="Sample dataset") + sample_parser.add_argument( + "--queries", type=str, required=True, help="Path to queries parquet file" + ) + sample_parser.add_argument( + "--corpus", type=str, required=True, help="Path to corpus parquet file" + ) + sample_parser.add_argument( + "--qrels", type=str, required=True, help="Path to qrels parquet file" + ) + sample_parser.add_argument( + "--nq", type=int, default=1000, help="Number of queries to sample" + ) + sample_parser.add_argument( + "--output_dir", type=str, default="./save", help="Output directory" + ) + sample_parser.set_defaults(func=sample_command) + + # Generate command + generate_parser = subparsers.add_parser("generate", help="Generate answers") + generate_parser.add_argument( + "--input_dir", type=str, required=True, help="Directory with sampled data" + ) + generate_parser.add_argument( + "--output_dir", type=str, default="./save", help="Output directory" + ) + generate_parser.set_defaults( + func=lambda args: generate_answers(args.input_dir, args.output_dir) + ) + + # Show command + show_parser = subparsers.add_parser("show", help="Show QA results") + show_parser.add_argument( + "--input_dir", type=str, required=True, help="Directory with QA data" + ) + show_parser.add_argument( + "-n", type=int, default=5, help="Number of results to show (default: 5)" + ) + show_parser.set_defaults(func=lambda args: show_results(args.input_dir, args.n)) + + args = parser.parse_args() + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/dataset/samples/answers.parquet b/dataset/samples/answers.parquet new file mode 100644 index 0000000..0dc3ce5 Binary files /dev/null and b/dataset/samples/answers.parquet differ diff --git a/dataset/samples/corpus.parquet b/dataset/samples/corpus.parquet new file mode 100644 index 0000000..5984746 Binary files /dev/null and b/dataset/samples/corpus.parquet differ diff --git a/dataset/samples/qas.parquet b/dataset/samples/qas.parquet new file mode 100644 index 0000000..faa461e Binary files /dev/null and b/dataset/samples/qas.parquet differ diff --git a/dataset/samples/qrels.parquet b/dataset/samples/qrels.parquet new file mode 100644 index 0000000..1fde976 Binary files /dev/null and b/dataset/samples/qrels.parquet differ diff --git a/dataset/samples/queries.parquet b/dataset/samples/queries.parquet new file mode 100644 index 0000000..dc54a20 Binary files /dev/null and b/dataset/samples/queries.parquet differ diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..84e8e71 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,172 @@ +version: "3.8" + +services: + app: + build: + context: . + dockerfile: docker/Dockerfile.app + container_name: WeKnora-app + ports: + - "8080:8080" + volumes: + - data-files:/data/files + - ./config:/app/config + environment: + - GIN_MODE=${GIN_MODE} + - DB_DRIVER=${DB_DRIVER} + - DB_HOST=${DB_HOST} + - DB_PORT=${DB_PORT} + - DB_USER=${DB_USER} + - DB_PASSWORD=${DB_PASSWORD} + - DB_NAME=${DB_NAME} + - TZ=Asia/Shanghai + - OTEL_EXPORTER_OTLP_ENDPOINT=jaeger:4317 + - OTEL_SERVICE_NAME=WeKnora + - OTEL_TRACES_EXPORTER=otlp + - OTEL_METRICS_EXPORTER=none + - OTEL_LOGS_EXPORTER=none + - OTEL_PROPAGATORS=tracecontext,baggage + - RETRIEVE_DRIVER=${RETRIEVE_DRIVER} + - ELASTICSEARCH_ADDR=${ELASTICSEARCH_ADDR} + - ELASTICSEARCH_USERNAME=${ELASTICSEARCH_USERNAME} + - ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD} + - ELASTICSEARCH_INDEX=${ELASTICSEARCH_INDEX} + - DOCREADER_ADDR=docreader:50051 + - STORAGE_TYPE=${STORAGE_TYPE} + - LOCAL_STORAGE_BASE_DIR=${LOCAL_STORAGE_BASE_DIR} + - MINIO_ENDPOINT=${MINIO_ENDPOINT} + - MINIO_ACCESS_KEY_ID=${MINIO_ACCESS_KEY_ID} + - MINIO_SECRET_ACCESS_KEY=${MINIO_SECRET_ACCESS_KEY} + - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME} + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL} + - STREAM_MANAGER_TYPE=${STREAM_MANAGER_TYPE} + - REDIS_ADDR=${REDIS_HOST}:${REDIS_PORT} + - REDIS_PASSWORD=${REDIS_PASSWORD} + - REDIS_DB=${REDIS_DB} + - REDIS_PREFIX=${REDIS_PREFIX} + - ENABLE_GRAPH_RAG=${ENABLE_GRAPH_RAG} + - TENANT_AES_KEY=${TENANT_AES_KEY} + - INIT_LLM_MODEL_NAME=${INIT_LLM_MODEL_NAME} + - INIT_LLM_MODEL_BASE_URL=${INIT_LLM_MODEL_BASE_URL} + - INIT_LLM_MODEL_API_KEY=${INIT_LLM_MODEL_API_KEY} + - INIT_EMBEDDING_MODEL_NAME=${INIT_EMBEDDING_MODEL_NAME} + - INIT_EMBEDDING_MODEL_BASE_URL=${INIT_EMBEDDING_MODEL_BASE_URL} + - INIT_EMBEDDING_MODEL_API_KEY=${INIT_EMBEDDING_MODEL_API_KEY} + - INIT_EMBEDDING_MODEL_DIMENSION=${INIT_EMBEDDING_MODEL_DIMENSION} + - INIT_EMBEDDING_MODEL_ID=${INIT_EMBEDDING_MODEL_ID} + - INIT_RERANK_MODEL_NAME=${INIT_RERANK_MODEL_NAME} + - INIT_RERANK_MODEL_BASE_URL=${INIT_RERANK_MODEL_BASE_URL} + - INIT_RERANK_MODEL_API_KEY=${INIT_RERANK_MODEL_API_KEY} + - INIT_TEST_TENANT_ID=${INIT_TEST_TENANT_ID} + - INIT_TEST_KNOWLEDGE_BASE_ID=${INIT_TEST_KNOWLEDGE_BASE_ID} + depends_on: + redis: + condition: service_started + postgres: + condition: service_healthy + networks: + - WeKnora-network + restart: unless-stopped + extra_hosts: + - "host.docker.internal:host-gateway" + + frontend: + build: + context: ./frontend + dockerfile: Dockerfile + container_name: WeKnora-frontend + ports: + - "80:80" + depends_on: + - app + networks: + - WeKnora-network + restart: unless-stopped + + docreader: + build: + context: . + dockerfile: docker/Dockerfile.docreader + container_name: WeKnora-docreader + ports: + - "50051:50051" + environment: + - COS_SECRET_ID=${COS_SECRET_ID} + - COS_SECRET_KEY=${COS_SECRET_KEY} + - COS_REGION=${COS_REGION} + - COS_BUCKET_NAME=${COS_BUCKET_NAME} + - COS_APP_ID=${COS_APP_ID} + - COS_PATH_PREFIX=${COS_PATH_PREFIX} + - COS_ENABLE_OLD_DOMAIN=${COS_ENABLE_OLD_DOMAIN} + - VLM_MODEL_BASE_URL=${VLM_MODEL_BASE_URL} + - VLM_MODEL_NAME=${VLM_MODEL_NAME} + networks: + - WeKnora-network + restart: unless-stopped + + jaeger: + image: jaegertracing/all-in-one:latest + ports: + - "6831:6831/udp" # Jaeger Thrift接收器 + - "6832:6832/udp" # Jaeger Thrift接收器(Compact) + - "5778:5778" # 配置端口 + - "16686:16686" # Web UI + - "4317:4317" # OTLP gRPC接收器 + - "4318:4318" # OTLP HTTP接收器 + - "14250:14250" # 接收模型端口 + - "14268:14268" # Jaeger HTTP接收器 + - "9411:9411" # Zipkin兼容性端口 + environment: + - COLLECTOR_OTLP_ENABLED=true + - COLLECTOR_ZIPKIN_HOST_PORT=:9411 + volumes: + - jaeger_data:/var/lib/jaeger # 持久化 Jaeger 数据 + networks: + - WeKnora-network + restart: unless-stopped + # 修改的PostgreSQL配置 + postgres: + image: paradedb/paradedb:latest + container_name: WeKnora-postgres + ports: + - "${DB_PORT}:5432" + environment: + - POSTGRES_USER=${DB_USER} + - POSTGRES_PASSWORD=${DB_PASSWORD} + - POSTGRES_DB=${DB_NAME} + volumes: + - postgres-data:/var/lib/postgresql/data + - ./migrations/paradedb:/docker-entrypoint-initdb.d + networks: + - WeKnora-network + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${DB_USER}"] + interval: 10s # 增加时间间隔 + timeout: 10s # 增加超时时间 + retries: 3 # 减少重试次数,让失败更快反馈 + start_period: 30s # 给予初始启动更多时间 + restart: unless-stopped + # 添加停机时的优雅退出时间 + stop_grace_period: 1m + + redis: + image: redis:7.0-alpine + container_name: WeKnora-redis + ports: + - "${REDIS_PORT}:6379" + volumes: + - redis_data:/data + command: redis-server --appendonly yes --requirepass ${REDIS_PASSWORD} + restart: always + networks: + - WeKnora-network + +networks: + WeKnora-network: + driver: bridge + +volumes: + postgres-data: + data-files: + jaeger_data: + redis_data: diff --git a/docker/Dockerfile.app b/docker/Dockerfile.app new file mode 100644 index 0000000..197e865 --- /dev/null +++ b/docker/Dockerfile.app @@ -0,0 +1,76 @@ +# Build stage +FROM golang:1.24-alpine AS builder + +WORKDIR /app + +# Install dependencies +RUN apk add --no-cache git build-base + +# 通过构建参数接收敏感信息 +ARG GOPRIVATE_ARG +ARG GOPROXY_ARG +ARG GOSUMDB_ARG=off + +# 设置Go环境变量 +ENV GOPRIVATE=${GOPRIVATE_ARG} +ENV GOPROXY=${GOPROXY_ARG} +ENV GOSUMDB=${GOSUMDB_ARG} + +# Copy go mod and sum files +COPY go.mod go.sum ./ +RUN go mod download + +ENV CGO_ENABLED=1 +# Install migrate tool +RUN go install -tags 'postgres' github.com/golang-migrate/migrate/v4/cmd/migrate@latest + +# Copy source code +COPY . . + +# Build the application +RUN make build-prod + +# Final stage +FROM alpine:3.17 + +WORKDIR /app + +# Install runtime dependencies +RUN echo 'http://dl-cdn.alpinelinux.org/alpine/edge/main' > /etc/apk/repositories +RUN apk update --allow-untrusted +RUN apk upgrade --allow-untrusted +RUN apk add --no-cache build-base postgresql17-client mysql-client ca-certificates tzdata sed curl bash supervisor vim wget curl --allow-untrusted + +# Copy the binary from the builder stage +COPY --from=builder /app/WeKnora . +COPY --from=builder /app/config ./config +COPY --from=builder /app/scripts ./scripts +COPY --from=builder /app/migrations ./migrations +COPY --from=builder /app/dataset/samples ./dataset/samples + +# Copy migrate tool from builder stage +COPY --from=builder /go/bin/migrate /usr/local/bin/ +COPY --from=builder /go/pkg/mod/github.com/yanyiwu /go/pkg/mod/github.com/yanyiwu/ + +# Make scripts executable +RUN chmod +x ./scripts/*.sh + +# Setup supervisor configuration +RUN mkdir -p /etc/supervisor.d/ +COPY docker/config/supervisord.conf /etc/supervisor.d/supervisord.conf + +# Expose ports +EXPOSE 8080 + +# Set environment variables +ENV CGO_ENABLED=1 + +# Create a non-root user and switch to it +RUN mkdir -p /data/files && \ + adduser -D -g '' appuser && \ + chown -R appuser:appuser /app /data/files + +# Run supervisor instead of direct application start +CMD ["supervisord", "-c", "/etc/supervisor.d/supervisord.conf"] +# CMD ["sh", "/app/scripts/start_app.sh"] +# CMD ["sleep", "infinity"] \ No newline at end of file diff --git a/docker/Dockerfile.docreader b/docker/Dockerfile.docreader new file mode 100644 index 0000000..3cec8a2 --- /dev/null +++ b/docker/Dockerfile.docreader @@ -0,0 +1,132 @@ +ARG PLATFORM=linux/amd64 + +# 构建阶段 +FROM --platform=${PLATFORM} python:3.9-slim AS builder + +WORKDIR /app + +# 安装构建依赖 +RUN apt-get update && apt-get install -y \ + gcc \ + python3-dev \ + libjpeg-dev \ + zlib1g-dev \ + libpq-dev \ + libffi-dev \ + libgl1-mesa-glx \ + libglib2.0-0 \ + wget \ + antiword \ + curl \ + unzip \ + && rm -rf /var/lib/apt/lists/* + +# 安装 protoc +RUN curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.19.4/protoc-3.19.4-linux-x86_64.zip && \ + unzip protoc-3.19.4-linux-x86_64.zip -d /usr/local && \ + chmod +x /usr/local/bin/protoc && \ + rm protoc-3.19.4-linux-x86_64.zip + +# 复制依赖文件 +COPY services/docreader/requirements.txt . + +# 安装依赖 +RUN pip install --no-cache-dir -r requirements.txt + +# 预下载 PP-OCRv5 模型 +RUN mkdir -p /root/.paddlex/official_models + +RUN wget https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_det_infer.tar \ + -O /root/.paddlex/official_models/PP-OCRv5_server_det_infer.tar && \ + wget https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_rec_infer.tar \ + -O /root/.paddlex/official_models/PP-OCRv5_server_rec_infer.tar + +# 解压模型文件 +RUN tar -xf /root/.paddlex/official_models/PP-OCRv5_server_det_infer.tar -C /root/.paddlex/official_models/ && \ + tar -xf /root/.paddlex/official_models/PP-OCRv5_server_rec_infer.tar -C /root/.paddlex/official_models/ + +# 复制源代码和生成脚本 +COPY services/docreader/src/ /app/src/ +COPY services/docreader/scripts/ /app/scripts/ + +# 使用正确的Python模块路径运行ocr.py +# RUN cd /app && PYTHONPATH=/app python -m src.parser.ocr + +# 确保模型目录存在 +RUN ls -la /root/.paddlex/official_models + +# 生成 protobuf 代码 +RUN chmod +x /app/scripts/generate_proto.sh && /app/scripts/generate_proto.sh + +# 运行阶段 +FROM --platform=${PLATFORM} python:3.9-slim AS runner + +WORKDIR /app + +# 安装运行时依赖 +RUN apt-get update && apt-get install -y \ + libjpeg62-turbo \ + libpq5 \ + wget \ + gnupg \ + libgl1-mesa-glx \ + libglib2.0-0 \ + antiword \ + supervisor \ + && rm -rf /var/lib/apt/lists/* + +# 安装所需依赖 +RUN apt-get update && apt-get install -y \ + wget \ + vim \ + tar \ + dpkg \ + libxinerama1 \ + libfontconfig1 \ + libdbus-glib-1-2 \ + libcairo2 \ + libcups2 \ + libglu1-mesa \ + libsm6 \ + && rm -rf /var/lib/apt/lists/* + +# 下载并安装最新版本的 LibreOffice 25.2.4 +RUN mkdir -p /tmp/libreoffice && \ + cd /tmp/libreoffice && \ + wget -q https://mirrors.tuna.tsinghua.edu.cn/libreoffice/libreoffice/stable/25.2.4/deb/x86_64/LibreOffice_25.2.4_Linux_x86-64_deb.tar.gz && \ + tar -xzf LibreOffice_25.2.4_Linux_x86-64_deb.tar.gz && \ + cd LibreOffice_25.2.4*_Linux_x86-64_deb/DEBS && \ + dpkg -i *.deb && \ + cd / && \ + rm -rf /tmp/libreoffice + +# 设置 LibreOffice 环境变量 +ENV LIBREOFFICE_PATH=/opt/libreoffice25.2/program/soffice + +# 从构建阶段复制已安装的依赖和生成的代码 +COPY --from=builder /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages +COPY --from=builder /usr/local/bin /usr/local/bin + +# 复制 PaddleOCR 模型到运行阶段 +COPY --from=builder /root/.paddlex/official_models /root/.paddlex/official_models + +# 安装 Playwright 浏览器 +RUN python -m playwright install webkit +RUN python -m playwright install-deps webkit + +COPY --from=builder /app/src /app/src + +# 设置 Python 路径 +ENV PYTHONPATH=/app/src + +RUN cd /app/src && python -m download_deps + +# 创建supervisor配置 +RUN mkdir -p /etc/supervisor/conf.d +COPY services/docreader/supervisord.conf /etc/supervisor/conf.d/supervisord.conf + +# 暴露 gRPC 端口 +EXPOSE 50051 + +# 使用supervisor启动服务 +CMD ["supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] \ No newline at end of file diff --git a/docker/config/supervisord.conf b/docker/config/supervisord.conf new file mode 100644 index 0000000..44039e2 --- /dev/null +++ b/docker/config/supervisord.conf @@ -0,0 +1,31 @@ +[supervisord] +nodaemon=true +logfile=/var/log/supervisord.log +logfile_maxbytes=50MB +logfile_backups=10 +loglevel=info +pidfile=/var/run/supervisord.pid +user=root + +[program:WeKnora] +command=/app/WeKnora +directory=/app +autostart=true +autorestart=true +startretries=5 +redirect_stderr=true +stdout_logfile=/var/log/WeKnora.log +stdout_logfile_maxbytes=50MB +stdout_logfile_backups=10 +environment=CGO_ENABLED=1 +user=appuser + +[unix_http_server] +file=/var/run/supervisor.sock +chmod=0700 + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[supervisorctl] +serverurl=unix:///var/run/supervisor.sock \ No newline at end of file diff --git a/docker/docker-compose.minio.yml b/docker/docker-compose.minio.yml new file mode 100644 index 0000000..a78b0ab --- /dev/null +++ b/docker/docker-compose.minio.yml @@ -0,0 +1,29 @@ +version: "3.8" + +services: + minio: + image: minio/minio:latest + container_name: WeKnora-minio + ports: + - "9000:9000" + - "9001:9001" + volumes: + - minio-data:/data + environment: + - MINIO_ROOT_USER=${MINIO_ACCESS_KEY_ID} + - MINIO_ROOT_PASSWORD=${MINIO_SECRET_ACCESS_KEY} + command: server --console-address ":9001" /data + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + networks: + - WeKnora-network + +volumes: + minio-data: + +networks: + WeKnora-network: + external: true \ No newline at end of file diff --git a/docs/API.md b/docs/API.md new file mode 100644 index 0000000..63b1dc6 --- /dev/null +++ b/docs/API.md @@ -0,0 +1,1389 @@ +# WeKnora API 文档 + +## 目录 + +- [概述](#概述) +- [基础信息](#基础信息) +- [认证机制](#认证机制) +- [错误处理](#错误处理) +- [API 概览](#api-概览) +- [API 详细说明](#api-详细说明) + - [租户管理 API](#租户管理api) + - [知识库管理 API](#知识库管理api) + - [知识管理 API](#知识管理api) + - [模型管理 API](#模型管理api) + - [分块管理 API](#分块管理api) + - [会话管理 API](#会话管理api) + - [聊天功能 API](#聊天功能api) + - [消息管理 API](#消息管理api) + - [评估功能 API](#评估功能api) + +## 概述 + +WeKnora 提供了一系列 RESTful API,用于创建和管理知识库、检索知识,以及进行基于知识的问答。本文档详细描述了这些 API 的使用方式。 + +## 基础信息 + +- **基础 URL**: `/api/v1` +- **响应格式**: JSON +- **认证方式**: API Key + +## 认证机制 + +所有 API 请求需要在 HTTP 请求头中包含 `X-API-Key` 进行身份认证: + +``` +X-API-Key: your_api_key +``` + +为便于问题追踪和调试,建议每个请求的 HTTP 请求头中添加 `X-Request-ID`: + +``` +X-Request-ID: unique_request_id +``` + +### 获取 API Key + +获取 API Key 有以下方式: + +1. **创建租户时获取**:通过 `POST /api/v1/tenants` 接口创建新租户时,响应中会自动返回生成的 API Key。 + ```json + { + "success": true, + "data": { + "id": 1, + "name": "租户名称", + "description": "租户描述", + "api_key": "生成的API密钥", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } + } + ``` + +2. **查看现有租户信息**:通过 `GET /api/v1/tenants/:id` 接口查看已有租户的详细信息,响应中包含 API Key。 + +请妥善保管您的 API Key,避免泄露。API Key 代表您的账户身份,拥有完整的 API 访问权限。 + +## 错误处理 + +所有 API 使用标准的 HTTP 状态码表示请求状态,并返回统一的错误响应格式: + +```json +{ + "success": false, + "error": { + "code": "错误代码", + "message": "错误信息", + "details": "错误详情" + } +} +``` + +## API 概览 + +WeKnora API 按功能分为以下几类: + +1. **租户管理**:创建和管理租户账户 +2. **知识库管理**:创建、查询和管理知识库 +3. **知识管理**:上传、检索和管理知识内容 +4. **模型管理**:配置和管理各种AI模型 +5. **分块管理**:管理知识的分块内容 +6. **会话管理**:创建和管理对话会话 +7. **聊天功能**:基于知识库进行问答 +8. **消息管理**:获取和管理对话消息 +9. **评估功能**:评估模型性能 + +## API 详细说明 + +以下是每个API的详细说明和示例。 + +### 租户管理API + +| 方法 | 路径 | 描述 | +| ------ | -------------- | --------------------- | +| POST | `/tenants` | 创建新租户 | +| GET | `/tenants/:id` | 获取指定租户信息 | +| PUT | `/tenants/:id` | 更新租户信息 | +| DELETE | `/tenants/:id` | 删除租户 | +| GET | `/tenants` | 获取租户列表 | + +#### POST `/tenants` - 创建新租户 + +**请求体**: + +```json +{ + "name": "租户名称", + "description": "租户描述" +} +``` + +**响应**: + +```json +{ + "success": true, + "data": { + "id": 1, + "name": "租户名称", + "description": "租户描述", + "api_key": "生成的API密钥", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } +} +``` + +#### GET `/tenants/:id` - 获取指定租户信息 + +**响应**: + +```json +{ + "success": true, + "data": { + "id": 1, + "name": "租户名称", + "description": "租户描述", + "api_key": "API密钥", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } +} +``` + +#### PUT `/tenants/:id` - 更新租户信息 + +**请求体**: + +```json +{ + "name": "新租户名称", + "description": "新租户描述" +} +``` + +**响应**: + +```json +{ + "success": true, + "data": { + "id": 1, + "name": "新租户名称", + "description": "新租户描述", + "api_key": "API密钥", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } +} +``` + +#### DELETE `/tenants/:id` - 删除租户 + +**响应**: + +```json +{ + "success": true, + "message": "租户删除成功" +} +``` + +#### GET `/tenants` - 获取租户列表 + +**响应**: + +```json +{ + "success": true, + "data": { + "items": [ + { + "id": 1, + "name": "租户1", + "description": "租户1描述", + "api_key": "API密钥1", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + }, + { + "id": 2, + "name": "租户2", + "description": "租户2描述", + "api_key": "API密钥2", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } + ] + } +} +``` + + + +### 知识库管理API + +| 方法 | 路径 | 描述 | +| ------ | ------------------------------------ | ------------------------ | +| POST | `/knowledge-bases` | 创建知识库 | +| GET | `/knowledge-bases` | 获取知识库列表 | +| GET | `/knowledge-bases/:id` | 获取知识库详情 | +| PUT | `/knowledge-bases/:id` | 更新知识库 | +| DELETE | `/knowledge-bases/:id` | 删除知识库 | +| GET | `/knowledge-bases/:id/hybrid-search` | 混合搜索知识库内容 | +| POST | `/knowledge-bases/copy` | 拷贝知识库 | + +#### POST `/knowledge-bases` - 创建知识库 + +**请求体**: + +```json +{ + "name": "知识库名称", + "description": "知识库描述", + "config": { + "chunk_size": 1000, + "chunk_overlap": 200 + }, + "embedding_model_id": "模型ID" +} +``` + +**响应**: + +```json +{ + "success": true, + "data": { + "id": "知识库ID", + "tenant_id": 1, + "name": "知识库名称", + "description": "知识库描述", + "config": { + "chunk_size": 1000, + "chunk_overlap": 200 + }, + "embedding_model_id": "模型ID", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } +} +``` + +#### GET `/knowledge-bases` - 获取知识库列表 + +**响应**: + +```json +{ + "success": true, + "data": [ + { + "id": "知识库ID1", + "tenant_id": 1, + "name": "知识库1", + "description": "知识库1描述", + "config": { + "chunk_size": 1000, + "chunk_overlap": 200 + }, + "embedding_model_id": "模型ID", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + }, + { + "id": "知识库ID2", + "tenant_id": 1, + "name": "知识库2", + "description": "知识库2描述", + "config": { + "chunk_size": 1000, + "chunk_overlap": 200 + }, + "embedding_model_id": "模型ID", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } + ] +} +``` + +#### GET `/knowledge-bases/:id` - 获取知识库详情 + +**响应**: + +```json +{ + "success": true, + "data": { + "id": "知识库ID", + "tenant_id": 1, + "name": "知识库名称", + "description": "知识库描述", + "config": { + "chunk_size": 1000, + "chunk_overlap": 200 + }, + "embedding_model_id": "模型ID", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } +} +``` + +#### PUT `/knowledge-bases/:id` - 更新知识库 + +**请求体**: + +```json +{ + "name": "新知识库名称", + "description": "新知识库描述", + "config": { + "chunk_size": 800, + "chunk_overlap": 150 + } +} +``` + +**响应**: + +```json +{ + "success": true, + "data": { + "id": "知识库ID", + "tenant_id": 1, + "name": "新知识库名称", + "description": "新知识库描述", + "config": { + "chunk_size": 800, + "chunk_overlap": 150 + }, + "embedding_model_id": "模型ID", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } +} +``` + +#### DELETE `/knowledge-bases/:id` - 删除知识库 + +**响应**: + +```json +{ + "success": true +} +``` + +#### GET `/knowledge-bases/:id/hybrid-search` - 混合搜索知识库内容 + +**查询参数**: + +- `query`: 搜索关键词(必填) + +**响应**: + +```json +{ + "success": true, + "data": [ + { + "id": "分块ID", + "content": "匹配到的内容片段", + "metadata": { + "source": "来源文件", + "page": 1 + }, + "knowledge_title": "人工智能导论.pdf", + "score": 0.92 + }, + { + "id": "分块ID", + "content": "匹配到的内容片段", + "metadata": { + "source": "来源文件", + "page": 2 + }, + "knowledge_title": "人工智能导论.pdf", + "score": 0.88 + } + ] +} +``` + + + +### 知识管理API + +| 方法 | 路径 | 描述 | +| ------ | ------------------------------------- | ------------------------ | +| POST | `/knowledge-bases/:id/knowledge/file` | 从文件创建知识 | +| POST | `/knowledge-bases/:id/knowledge/url` | 从 URL 创建知识 | +| GET | `/knowledge-bases/:id/knowledge` | 获取知识库下的知识列表 | +| GET | `/knowledge/:id` | 获取知识详情 | +| DELETE | `/knowledge/:id` | 删除知识 | +| GET | `/knowledge/:id/download` | 下载知识文件 | +| PUT | `/knowledge/:id` | 更新知识 | +| PUT | `/knowledge/image/:id/:chunk_id` | 更新图像分块信息 | +| GET | `/knowledge/batch` | 批量获取知识 | + +#### POST `/knowledge-bases/:id/knowledge/file` - 从文件创建知识 + +**请求格式**: + +``` +Content-Type: multipart/form-data + +file: [上传的文件] +``` + +**响应**: + +```json +{ + "success": true, + "data": { + "id": "4f0845e3-8f6e-4d5a-816b-196e34070617", + "tenant_id": 1, + "knowledge_base_id": "kb-00000003", + "type": "file", + "title": "彗星.txt", + "description": "", + "source": "", + "parse_status": "failed", + "enable_status": "disabled", + "embedding_model_id": "model-embedding-00000001", + "file_name": "彗星.txt", + "file_type": "txt", + "file_size": 7710, + "file_path": "", + "created_at": "2025-04-16T04:15:01.633797Z", + "updated_at": "2025-04-16T04:15:03.24784Z", + "processed_at": null, + "error_message": "EmbedBatch API error: Http Status 403 Forbidden" + } +} +``` + +#### POST `/knowledge-bases/:id/knowledge/url` - 从 URL 创建知识 + +请求体: + +```json +{ + "url": "https://example.com/document.pdf" +} +``` + +响应: + +```json +{ + "success": true, + "data": { + "id": "4f0845e3-8f6e-4d5a-816b-196e34070617", + "tenant_id": 1, + "knowledge_base_id": "kb-00000003", + "type": "file", + "title": "彗星.txt", + "description": "", + "source": "", + "parse_status": "failed", + "enable_status": "disabled", + "embedding_model_id": "model-embedding-00000001", + "file_name": "彗星.txt", + "file_type": "txt", + "file_size": 7710, + "file_path": "", + "created_at": "2025-04-16T04:15:01.633797Z", + "updated_at": "2025-04-16T04:15:03.24784Z", + "processed_at": null, + "error_message": "EmbedBatch API error: Http Status 403 Forbidden" + } +} +``` + +#### GET `/knowledge-bases/:id/knowledge?page=&page_size` - 获取知识库下的知识列表 + +响应: + +```json +{ + "success": true, + "data": [ + { + "id": "4f0845e3-8f6e-4d5a-816b-196e34070617", + "tenant_id": 1, + "knowledge_base_id": "kb-00000003", + "type": "file", + "title": "彗星.txt", + "description": "", + "source": "", + "parse_status": "failed", + "enable_status": "disabled", + "embedding_model_id": "model-embedding-00000001", + "file_name": "彗星.txt", + "file_type": "txt", + "file_size": 7710, + "file_path": "", + "created_at": "2025-04-16T04:15:01.633797Z", + "updated_at": "2025-04-16T04:15:03.24784Z", + "processed_at": null, + "error_message": "EmbedBatch API error: Http Status 403 Forbidden" + } + ], + "total": 98, + "page": 1, + "page_size": 20 +} +``` + +注:parse_status 包含 `pending/processing/failed/completed` 四种状态 + +#### GET `/knowledge/:id` - 获取知识详情 + +响应: + +```json +{ + "success": true, + "data": { + "id": "4f0845e3-8f6e-4d5a-816b-196e34070617", + "tenant_id": 1, + "knowledge_base_id": "kb-00000003", + "type": "file", + "title": "彗星.txt", + "description": "", + "source": "", + "parse_status": "failed", + "enable_status": "disabled", + "embedding_model_id": "model-embedding-00000001", + "file_name": "彗星.txt", + "file_type": "txt", + "file_size": 7710, + "file_path": "", + "created_at": "2025-04-16T04:15:01.633797Z", + "updated_at": "2025-04-16T04:15:03.24784Z", + "processed_at": null, + "error_message": "EmbedBatch API error: Http Status 403 Forbidden" + } +} +``` + +#### GET `/knowledge/batch` - 批量获取知识 + +查询参数: + +- `ids`: 知识 ID 列表,例如:`?ids=knowledge_id_1&ids=knowledge_id_2` + +响应: + +```json +{ + "success": true, + "data": [ + { + "id": "知识ID1", + "tenant_id": 1, + "knowledge_base_id": "知识库ID", + "type": "file", + "title": "文档1.pdf", + "description": "", + "source": "", + "parse_status": "completed", + "enable_status": "enabled", + "embedding_model_id": "model-embedding-00000001", + "file_name": "文档1.pdf", + "file_type": "pdf", + "file_size": 1024, + "file_path": "", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z", + "processed_at": "2023-01-01T00:00:01Z", + "error_message": "" + }, + { + "id": "知识ID2", + "tenant_id": 1, + "knowledge_base_id": "知识库ID", + "type": "url", + "title": "网页文档", + "description": "", + "source": "https://example.com/doc", + "parse_status": "completed", + "enable_status": "enabled", + "embedding_model_id": "model-embedding-00000001", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z", + "processed_at": "2023-01-01T00:00:01Z", + "error_message": "" + } + ] +} +``` + +#### DELETE `/knowledge/:id` - 删除知识 + +响应: + +```json +{ + "success": true, + "message": "删除成功" +} +``` + +#### GET `/knowledge/:id/download` - 下载知识文件 + +响应 + +``` +attachment +``` + + + +### 模型管理API + +| 方法 | 路径 | 描述 | +| ------ | --------------------- | --------------------- | +| POST | `/models` | 创建模型 | +| GET | `/models` | 获取模型列表 | +| GET | `/models/:id` | 获取模型详情 | +| PUT | `/models/:id` | 更新模型 | +| DELETE | `/models/:id` | 删除模型 | + +#### POST `/models` - 创建模型 + +请求体: + +```json +{ + "name": "模型名称", + "type": "LLM", + "source": "OPENAI", + "description": "模型描述", + "parameters": { + "model": "gpt-4", + "api_key": "sk-xxxxx", + "base_url": "https://api.openai.com" + } +} +``` + +响应: + +```json +{ + "success": true, + "data": { + "id": "模型ID", + "tenant_id": 1, + "name": "模型名称", + "type": "LLM", + "source": "OPENAI", + "description": "模型描述", + "parameters": { + "model": "gpt-4", + "api_key": "sk-xxxxx", + "base_url": "https://api.openai.com" + }, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } +} +``` + +#### GET `/models` - 获取模型列表 + +响应: + +```json +{ + "success": true, + "data": [ + { + "id": "模型ID1", + "tenant_id": 1, + "name": "模型1", + "type": "LLM", + "source": "OPENAI", + "description": "模型1描述", + "parameters": { + "model": "gpt-4", + "api_key": "sk-xxxxx", + "base_url": "https://api.openai.com" + }, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + }, + { + "id": "模型ID2", + "tenant_id": 1, + "name": "模型2", + "type": "EMBEDDING", + "source": "OPENAI", + "description": "模型2描述", + "parameters": { + "model": "text-embedding-ada-002", + "api_key": "sk-xxxxx", + "base_url": "https://api.openai.com" + }, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } + ] +} +``` + +#### GET `/models/:id` - 获取模型详情 + +响应: + +```json +{ + "success": true, + "data": { + "id": "模型ID", + "tenant_id": 1, + "name": "模型名称", + "type": "LLM", + "source": "OPENAI", + "description": "模型描述", + "parameters": { + "model": "gpt-4", + "api_key": "sk-xxxxx", + "base_url": "https://api.openai.com" + }, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } +} +``` + +#### PUT `/models/:id` - 更新模型 + +请求体: + +```json +{ + "name": "新模型名称", + "description": "新模型描述", + "parameters": { + "model": "gpt-4-turbo", + "api_key": "sk-xxxxx", + "base_url": "https://api.openai.com" + }, + "is_default": false +} +``` + +响应: + +```json +{ + "success": true, + "data": { + "id": "模型ID", + "tenant_id": 1, + "name": "新模型名称", + "type": "LLM", + "source": "OPENAI", + "description": "新模型描述", + "parameters": { + "model": "gpt-4-turbo", + "api_key": "sk-xxxxx", + "base_url": "https://api.openai.com" + }, + "is_default": false, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } +} +``` + +#### DELETE `/models/:id` - 删除模型 + +响应: + +```json +{ + "success": true, + "message": "模型删除成功" +} +``` + + + +### 分块管理API + +| 方法 | 路径 | 描述 | +| ------ | --------------------------- | ------------------------ | +| GET | `/chunks/:knowledge_id` | 获取知识的分块列表 | +| PUT | `/chunks/:knowledge_id/:id` | 更新分块 | +| DELETE | `/chunks/:knowledge_id/:id` | 删除分块 | +| DELETE | `/chunks/:knowledge_id` | 删除知识下的所有分块 | + +#### GET `/chunks/:knowledge_id?page=&page_size=` - 获取知识的分块列表 + +查询参数: + +- `page`: 页码(默认 1) +- `page_size`: 每页条数(默认 20) + +响应: + +```json +{ + "success": true, + "data": [ + { + "id": "分块ID1", + "knowledge_id": "知识ID", + "content": "分块1内容...", + "is_enabled": true, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + }, + { + "id": "分块ID2", + "knowledge_id": "知识ID", + "content": "分块2内容...", + "is_enable": true, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } + ], + "total": 50, + "page": 1, + "page_size": 20 +} +``` + +#### PUT `/chunks/:knowledge_id/:id` - 更新分块 + +请求体: + +```json +{ + "content": "更新后的分块内容", + "metadata": { + "page": 1, + "position": "top", + "custom_field": "自定义字段" + } +} +``` + +响应: + +```json +{ + "success": true, + "data": { + "id": "分块ID", + "knowledge_id": "知识ID", + "content": "更新后的分块内容", + "metadata": { + "page": 1, + "position": "top", + "custom_field": "自定义字段" + }, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } +} +``` + +#### DELETE `/chunks/:knowledge_id/:id` - 删除分块 + +响应: + +```json +{ + "success": true, + "message": "分块删除成功" +} +``` + +#### DELETE `/chunks/:knowledge_id` - 删除知识下的所有分块 + +响应: + +```json +{ + "success": true, + "message": "所有分块删除成功" +} +``` + + + +### 会话管理API + +| 方法 | 路径 | 描述 | +| ------ | --------------------------------------- | --------------------- | +| POST | `/sessions` | 创建会话 | +| GET | `/sessions/:id` | 获取会话详情 | +| GET | `/sessions` | 获取租户的会话列表 | +| PUT | `/sessions/:id` | 更新会话 | +| DELETE | `/sessions/:id` | 删除会话 | +| POST | `/sessions/:session_id/generate_title` | 生成会话标题 | +| GET | `/sessions/continue-stream/:session_id` | 继续未完成的会话 | + +#### POST `/sessions` - 创建会话 + +请求体: + +```json +{ + "knowledge_base_id": "知识库ID", + "session_strategy": { + "max_rounds": 5, + "enable_rewrite": true, + "fallback_strategy": "FIXED_RESPONSE", + "fallback_response": "对不起,我无法回答这个问题", + "keyword_threshold": 0.5, + "vector_threshold": 0.7, + "rerank_model_id": "排序模型ID", + "rerank_top_k": 3, + "summary_model_id": "总结模型ID", + "summary_parameters": { + "max_tokens": 100, + "top_p": 0.9, + "top_k": 40, + "frequency_penalty": 0.0, + "presence_penalty": 0.0, + "repeat_penalty": 1.1, + "prompt": "总结对话内容", + "context_template": "上下文模板" + } + } +} +``` + +响应: + +```json +{ + "success": true, + "data": { + "id": "会话ID", + "tenant_id": 1, + "knowledge_base_id": "知识库ID", + "title": "未命名会话", + "max_rounds": 5, + "enable_rewrite": true, + "fallback_strategy": "FIXED_RESPONSE", + "fallback_response": "对不起,我无法回答这个问题", + "keyword_threshold": 0.5, + "vector_threshold": 0.7, + "rerank_model_id": "排序模型ID", + "rerank_top_k": 3, + "summary_model_id": "总结模型ID", + "summary_parameters": { + "max_tokens": 100, + "top_p": 0.9, + "top_k": 40, + "frequency_penalty": 0.0, + "presence_penalty": 0.0, + "repeat_penalty": 1.1, + "prompt": "总结对话内容", + "context_template": "上下文模板" + }, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } +} +``` + +#### GET `/sessions/:id` - 获取会话详情 + +响应: + +```json +{ + "success": true, + "data": { + "id": "会话ID", + "tenant_id": 1, + "knowledge_base_id": "知识库ID", + "title": "会话标题", + "max_rounds": 5, + "enable_rewrite": true, + "fallback_strategy": "FIXED_RESPONSE", + "fallback_response": "对不起,我无法回答这个问题", + "keyword_threshold": 0.5, + "vector_threshold": 0.7, + "rerank_model_id": "排序模型ID", + "rerank_top_k": 3, + "summary_model_id": "总结模型ID", + "summary_parameters": { + "max_tokens": 100, + "top_p": 0.9, + "top_k": 40, + "frequency_penalty": 0.0, + "presence_penalty": 0.0, + "repeat_penalty": 1.1, + "prompt": "总结对话内容", + "context_template": "上下文模板" + }, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } +} +``` + +#### GET `/sessions?page=&page_size=` - 获取租户的会话列表 + +响应: + +```json +{ + "success": true, + "data": [ + { + "id": "会话ID1", + "tenant_id": 1, + "knowledge_base_id": "知识库ID", + "title": "会话1", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + }, + { + "id": "会话ID2", + "tenant_id": 1, + "knowledge_base_id": "知识库ID", + "title": "会话2", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } + ], + "total": 98, + "page": 1, + "page_size": 20 +} +``` + +#### PUT `/sessions/:id` - 更新会话 + +请求体: + +```json +{ + "title": "新会话标题", + "max_rounds": 10, + "enable_rewrite": false, + "fallback_strategy": "NO_FALLBACK", + "fallback_response": "", + "keyword_threshold": 0.6, + "vector_threshold": 0.8, + "rerank_model_id": "新排序模型ID", + "rerank_top_k": 5, + "summary_model_id": "新总结模型ID", + "summary_parameters": { + "max_tokens": 150, + "top_p": 0.8, + "top_k": 50, + "frequency_penalty": 0.1, + "presence_penalty": 0.1, + "repeat_penalty": 1.2, + "prompt": "总结这段对话", + "context_template": "新上下文模板" + } +} +``` + +响应: + +```json +{ + "success": true, + "data": { + "id": "会话ID", + "tenant_id": 1, + "knowledge_base_id": "知识库ID", + "title": "新会话标题", + "max_rounds": 10, + "enable_rewrite": false, + "fallback_strategy": "NO_FALLBACK", + "fallback_response": "", + "keyword_threshold": 0.6, + "vector_threshold": 0.8, + "rerank_model_id": "新排序模型ID", + "rerank_top_k": 5, + "summary_model_id": "新总结模型ID", + "summary_parameters": { + "max_tokens": 150, + "top_p": 0.8, + "top_k": 50, + "frequency_penalty": 0.1, + "presence_penalty": 0.1, + "repeat_penalty": 1.2, + "prompt": "总结这段对话", + "context_template": "新上下文模板" + }, + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-01T00:00:00Z" + } +} +``` + +#### DELETE `/sessions/:id` - 删除会话 + +响应: + +```json +{ + "success": true, + "message": "会话删除成功" +} +``` + +#### POST `/sessions/:session_id/generate_title` - 生成会话标题 + +请求体: + +```json +{ + "messages": [ + { + "role": "user", + "content": "你好,我想了解关于人工智能的知识" + }, + { + "role": "assistant", + "content": "人工智能是计算机科学的一个分支..." + } + ] +} +``` + +响应: + +```json +{ + "success": true, + "data": "关于人工智能的对话" +} +``` + +#### GET `/sessions/continue-stream/:session_id` - 继续未完成的会话 + +**查询参数**: +- `message_id`: 从 `/messages/:session_id/load` 接口中获取的 `is_completed` 为 `false` 的消息 ID + +**响应格式**: +服务器端事件流(Server-Sent Events),与 `/knowledge-chat/:session_id` 返回结果一致 + + + +### 聊天功能API + +| 方法 | 路径 | 描述 | +| ---- | ----------------------------- | ------------------------ | +| POST | `/knowledge-chat/:session_id` | 基于知识库的问答 | +| POST | `/knowledge-search` | 基于知识库的搜索知识 | + +#### POST `/knowledge-chat/:session_id` - 基于知识库的问答 + +**请求体**: + +```json +{ + "query": "人工智能的定义是什么?" +} +``` + +**响应格式**: +服务器端事件流(Server-Sent Events,Content-Type: text/event-stream) + +**响应示例**: + +``` +event: message +data: {"id":"消息ID","response_type":"references","done":false,"knowledge_references":[{"id":"分块ID1","content":"人工智能的定义...","knowledge_title":"人工智能导论.pdf"},{"id":"分块ID2","content":"关于AI的更多信息...","knowledge_title":"人工智能导论.pdf"}]} + +event: message +data: {"id":"消息ID","content":"人工","created_at":"2023-01-01T00:00:00Z","done":false} + +event: message +data: {"id":"消息ID","content":"人工智能是","created_at":"2023-01-01T00:00:00Z","done":false} + +event: message +data: {"id":"消息ID","content":"人工智能是研究...","created_at":"2023-01-01T00:00:00Z","done":false} + +event: message +data: {"id":"消息ID","content":"人工智能是研究、开发用于模拟、延伸和扩展人的智能的理论、方法、技术及应用系统的一门新的技术科学...","created_at":"2023-01-01T00:00:00Z","done":true} +``` + + + +### 消息管理API + +| 方法 | 路径 | 描述 | +| ------ | ---------------------------- | ------------------------ | +| GET | `/messages/:session_id/load` | 获取最近的会话消息列表 | +| DELETE | `/messages/:session_id/:id` | 删除消息 | + +#### GET `/messages/:session_id/load?before_time=2025-04-18T11:57:31.310671+08:00&limit=20` - 获取最近的会话消息列表 + +查询参数: + +- `before_time`: 上一次拉取的最早一条消息的 created_at 字段,为空拉取最近的消息 +- `limit`: 每页条数(默认 20) + +响应: + +```json +{ + "data": [ + { + "id": "22bafcce-a9c0-4dbd-8ce7-a881a2943e5f", + "session_id": "76218775-0af1-4933-9f76-40d0a6622e76", + "request_id": "6965bdcc-8264-43d5-b9f4-6bd844aa3aa7", + "content": "印度的国土面积", + "role": "user", + "knowledge_references": null, + "created_at": "2025-04-18T11:57:31.310671+08:00", + "updated_at": "2025-04-18T11:57:31.320384+08:00", + "is_completed": true + }, + { + "id": "6c9564c8-4801-47d6-86ee-2ed58c1e331e", + "session_id": "76218775-0af1-4933-9f76-40d0a6622e76", + "request_id": "6965bdcc-8264-43d5-b9f4-6bd844aa3aa7", + "content": "", + "role": "assistant", + "knowledge_references": [ + { + "id": "b3f489f8-278c-4f67-83ca-97b7bc39f35b", + "content": "", + "knowledge_id": "6c595abb-3086-408c-9e2e-40c543af23b6", + "knowledge_tite": "openaiassets_cfa8f2941d6b41cc79b91b4553ff8a4f_110591700536724035.txt" + }, + { + "id": "71008e5c-12bd-4da3-bd8b-799b6af0c112", + "content": "", + "knowledge_id": "6c595abb-3086-408c-9e2e-40c543af23b6", + "knowledge_tite": "openaiassets_cfa8f2941d6b41cc79b91b4553ff8a4f_110591700536724035.txt" + } + ], + "created_at": "2025-04-18T11:57:45.499279+08:00", + "updated_at": "2025-04-18T11:57:45.513845+08:00", + "is_completed": false + } + ], + "success": true +} +``` + +#### DELETE `/messages/:session_id/:id` - 删除消息 + +响应: + +```json +{ + "success": true, + "message": "消息删除成功" +} +``` + + + +### 评估功能API + +| 方法 | 路径 | 描述 | +| ---- | ------------- | --------------------- | +| GET | `/evaluation` | 获取评估任务 | +| POST | `/evaluation` | 创建评估任务 | + +#### GET `/evaluation` - 获取评估任务 + +**请求参数**: +- `task_id`: 从 `POST /evaluation` 接口中获取到的任务 ID +- `X-API-Key`: 用户 API Key + +**请求示例**: + +```bash +curl --location 'http://localhost:8080/api/v1/evaluation/?task_id=:task_id' \ +--header 'X-API-Key: sk-00000001abcdefg123456' +``` + +**响应示例**: + +```json +{ + "data": { + "id": ":task_id", + "tenant_id": 1, + "embedding_id": "default", + "rerank_id": "default", + "chat_id": "default", + "start_time": "2025-04-27T17:59:35.662145114+08:00", + "status": 1, + "total": 100, + "finished": 27, + "metric": { + "retrieval_metrics": { + "precision": 0.26419753086419756, + "recall": 1, + "ndcg3": 0.9716533097411622, + "ndcg10": 0.9914200384804508, + "mrr": 1, + "map": 0.9808641975308641 + }, + "generation_metrics": { + "bleu1": 0.07886284400848455, + "bleu2": 0.06475994660439699, + "bleu4": 0.046991784754461315, + "rouge1": 0.1922821051422303, + "rouge2": 0.0941559283518759, + "rougel": 0.1837134727619397 + } + } + }, + "success": true +} +``` + +#### POST `/evaluation` - 创建评估任务 + +**请求参数**: +- `embedding_id`(可选): 评估使用的嵌入模型,默认使用 model-embedding-00000001 模型 +- `chat_id`(可选): 评估使用的对话模型,默认使用 model-knowledgeqa-00000003 模型 +- `rerank_id`(可选): 评估使用的重排序模型,默认使用 model-rerank-00000002 模型 +- `dataset_id`(可选): 评估使用的数据集,暂时只支持官方测试数据集 + +**请求示例**: + +```bash +curl --location 'http://localhost:8080/api/v1/evaluation' \ +--header 'X-API-Key: sk-00000001abcdefg123456' \ +--header 'Content-Type: application/json' \ +--data '{}' +``` + +**响应示例**: + +```json +{ + "data": { + "id": "6f43d272-d65f-4005-96ad-104b7761ea65", + "tenant_id": 1, + "embedding_id": "default", + "rerank_id": "default", + "chat_id": "default", + "start_time": "2025-04-27T17:59:35.662145114+08:00", + "status": 1 + }, + "success": true +} +``` + + \ No newline at end of file diff --git a/docs/images/answer.png b/docs/images/answer.png new file mode 100644 index 0000000..f601a8a Binary files /dev/null and b/docs/images/answer.png differ diff --git a/docs/images/graph1.png b/docs/images/graph1.png new file mode 100644 index 0000000..b878fc2 Binary files /dev/null and b/docs/images/graph1.png differ diff --git a/docs/images/graph2.png b/docs/images/graph2.png new file mode 100644 index 0000000..549812f Binary files /dev/null and b/docs/images/graph2.png differ diff --git a/docs/images/knowledges.png b/docs/images/knowledges.png new file mode 100644 index 0000000..6bab844 Binary files /dev/null and b/docs/images/knowledges.png differ diff --git a/docs/images/logo.png b/docs/images/logo.png new file mode 100644 index 0000000..158aba2 Binary files /dev/null and b/docs/images/logo.png differ diff --git a/docs/images/pipeline.jpg b/docs/images/pipeline.jpg new file mode 100644 index 0000000..abf9c55 Binary files /dev/null and b/docs/images/pipeline.jpg differ diff --git a/docs/images/qa.png b/docs/images/qa.png new file mode 100644 index 0000000..ce06a87 Binary files /dev/null and b/docs/images/qa.png differ diff --git a/docs/使用其他向量数据库.md b/docs/使用其他向量数据库.md new file mode 100644 index 0000000..b013586 --- /dev/null +++ b/docs/使用其他向量数据库.md @@ -0,0 +1,190 @@ +### 如何集成新的向量数据库 + +本文提供了向 WeKnora 项目添加新向量数据库支持的完整指南。通过实现标准化接口和遵循结构化流程,开发者可以高效地集成自定义向量数据库。 + +### 集成流程 + +#### 1. 实现基础检索引擎接口 + +首先需要实现 `interfaces` 包中的 `RetrieveEngine` 接口,定义检索引擎的核心能力: + +```go +type RetrieveEngine interface { + // 返回检索引擎的类型标识 + EngineType() types.RetrieverEngineType + + // 执行检索操作,返回匹配结果 + Retrieve(ctx context.Context, params types.RetrieveParams) ([]*types.RetrieveResult, error) + + // 返回该引擎支持的检索类型列表 + Support() []types.RetrieverType +} +``` + +#### 2. 实现存储层接口 + +实现 `RetrieveEngineRepository` 接口,扩展基础检索引擎能力,添加索引管理功能: + +```go +type RetrieveEngineRepository interface { + // 保存单个索引信息 + Save(ctx context.Context, indexInfo *types.IndexInfo, params map[string]any) error + + // 批量保存多个索引信息 + BatchSave(ctx context.Context, indexInfoList []*types.IndexInfo, params map[string]any) error + + // 估算索引存储所需空间 + EstimateStorageSize(ctx context.Context, indexInfoList []*types.IndexInfo, params map[string]any) int64 + + // 通过分块ID列表删除索引 + DeleteByChunkIDList(ctx context.Context, indexIDList []string, dimension int) error + + // 复制索引数据,避免重新计算嵌入向量 + CopyIndices( + ctx context.Context, + sourceKnowledgeBaseID string, + sourceToTargetKBIDMap map[string]string, + sourceToTargetChunkIDMap map[string]string, + targetKnowledgeBaseID string, + dimension int, + ) error + + // 通过知识ID列表删除索引 + DeleteByKnowledgeIDList(ctx context.Context, knowledgeIDList []string, dimension int) error + + // 继承RetrieveEngine接口 + RetrieveEngine +} +``` + +#### 3. 实现服务层接口 + +创建实现 `RetrieveEngineService` 接口的服务,负责处理索引创建和管理的业务逻辑: + +```go +type RetrieveEngineService interface { + // 创建单个索引 + Index(ctx context.Context, + embedder embedding.Embedder, + indexInfo *types.IndexInfo, + retrieverTypes []types.RetrieverType, + ) error + + // 批量创建索引 + BatchIndex(ctx context.Context, + embedder embedding.Embedder, + indexInfoList []*types.IndexInfo, + retrieverTypes []types.RetrieverType, + ) error + + // 估算索引存储空间 + EstimateStorageSize(ctx context.Context, + embedder embedding.Embedder, + indexInfoList []*types.IndexInfo, + retrieverTypes []types.RetrieverType, + ) int64 + + // 复制索引数据 + CopyIndices( + ctx context.Context, + sourceKnowledgeBaseID string, + sourceToTargetKBIDMap map[string]string, + sourceToTargetChunkIDMap map[string]string, + targetKnowledgeBaseID string, + dimension int, + ) error + + // 删除索引 + DeleteByChunkIDList(ctx context.Context, indexIDList []string, dimension int) error + DeleteByKnowledgeIDList(ctx context.Context, knowledgeIDList []string, dimension int) error + + // 继承RetrieveEngine接口 + RetrieveEngine +} +``` + +#### 4. 添加环境变量配置 + +在环境配置中添加新数据库的必要连接参数: + +``` +# 在RETRIEVE_DRIVER中添加新数据库驱动名称(多个驱动用逗号分隔) +RETRIEVE_DRIVER=postgres,elasticsearch_v8,your_database + +# 新数据库的连接参数 +YOUR_DATABASE_ADDR=your_database_host:port +YOUR_DATABASE_USERNAME=username +YOUR_DATABASE_PASSWORD=password +# 其他必要的连接参数... +``` + +#### 5. 注册检索引擎 + +在 `internal/container/container.go` 文件的 `initRetrieveEngineRegistry` 函数中添加新数据库的初始化与注册逻辑: + +```go +func initRetrieveEngineRegistry(db *gorm.DB, cfg *config.Config) (interfaces.RetrieveEngineRegistry, error) { + registry := retriever.NewRetrieveEngineRegistry() + retrieveDriver := strings.Split(os.Getenv("RETRIEVE_DRIVER"), ",") + log := logger.GetLogger(context.Background()) + + // 已有的PostgreSQL和Elasticsearch初始化代码... + + // 添加新向量数据库的初始化代码 + if slices.Contains(retrieveDriver, "your_database") { + // 初始化数据库客户端 + client, err := your_database.NewClient(your_database.Config{ + Addresses: []string{os.Getenv("YOUR_DATABASE_ADDR")}, + Username: os.Getenv("YOUR_DATABASE_USERNAME"), + Password: os.Getenv("YOUR_DATABASE_PASSWORD"), + // 其他连接参数... + }) + + if err != nil { + log.Errorf("Create your_database client failed: %v", err) + } else { + // 创建检索引擎仓库 + yourDatabaseRepo := your_database.NewYourDatabaseRepository(client, cfg) + + // 注册检索引擎 + if err := registry.Register( + retriever.NewKVHybridRetrieveEngine( + yourDatabaseRepo, types.YourDatabaseRetrieverEngineType, + ), + ); err != nil { + log.Errorf("Register your_database retrieve engine failed: %v", err) + } else { + log.Infof("Register your_database retrieve engine success") + } + } + } + + return registry, nil +} +``` + +#### 6. 定义检索引擎类型常量 + +在 `internal/types/retriever.go` 文件中添加新的检索引擎类型常量: + +```go +// RetrieverEngineType 定义检索引擎类型 +const ( + ElasticsearchRetrieverEngineType RetrieverEngineType = "elasticsearch" + PostgresRetrieverEngineType RetrieverEngineType = "postgres" + YourDatabaseRetrieverEngineType RetrieverEngineType = "your_database" // 添加新数据库类型 +) +``` + +## 参考实现示例 + +建议参考现有的 PostgreSQL 和 Elasticsearch 实现作为开发模板。这些实现位于以下目录: + +- PostgreSQL: `internal/application/repository/retriever/postgres/` +- ElasticsearchV7: `internal/application/repository/retriever/elasticsearch/v7/` +- ElasticsearchV8: `internal/application/repository/retriever/elasticsearch/v8/` + +通过遵循以上步骤和参考现有实现,你可以成功集成新的向量数据库到 WeKnora 系统中,扩展其向量检索能力。 + + + diff --git a/frontend/.gitignore b/frontend/.gitignore new file mode 100644 index 0000000..8ee54e8 --- /dev/null +++ b/frontend/.gitignore @@ -0,0 +1,30 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +.DS_Store +dist +dist-ssr +coverage +*.local + +/cypress/videos/ +/cypress/screenshots/ + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? + +*.tsbuildinfo diff --git a/frontend/Dockerfile b/frontend/Dockerfile new file mode 100644 index 0000000..7fe7025 --- /dev/null +++ b/frontend/Dockerfile @@ -0,0 +1,35 @@ +# 构建阶段 +FROM node:20-alpine as build-stage + +WORKDIR /app + +# 设置环境变量,忽略类型检查错误 +ENV NODE_OPTIONS="--max-old-space-size=4096" +ENV VITE_IS_DOCKER=true + +# 复制依赖文件 +COPY package*.json ./ + +# 安装依赖 +RUN npm install + +# 复制项目文件 +COPY . . + +# 构建应用 +RUN npm run build + +# 生产阶段 +FROM nginx:stable-alpine as production-stage + +# 复制构建产物到nginx服务目录 +COPY --from=build-stage /app/dist /usr/share/nginx/html + +# 复制nginx配置文件 +COPY nginx.conf /etc/nginx/conf.d/default.conf + +# 暴露端口 +EXPOSE 80 + +# 启动nginx +CMD ["nginx", "-g", "daemon off;"] \ No newline at end of file diff --git a/frontend/env.d.ts b/frontend/env.d.ts new file mode 100644 index 0000000..5b580fc --- /dev/null +++ b/frontend/env.d.ts @@ -0,0 +1,6 @@ +/// +// 配置这个文件是 解决错误:找不到模块“@/views/login/index.vue”或其相应的类型声明。ts(2307) +// 这段代码告诉 TypeScript,所有以 .vue 结尾的文件都是 Vue 组件,可以通过 import 语句进行导入。这样做通常可以解决无法识别模块的问题。 +declare module '*.vue' { + import { Component } from 'vue'; const component: Component; export default component; +} \ No newline at end of file diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..e2ab8ef --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,22 @@ + + + + WeKnora + + + + + + + + + + + + + +
+ + + + diff --git a/frontend/nginx.conf b/frontend/nginx.conf new file mode 100644 index 0000000..2e09ed8 --- /dev/null +++ b/frontend/nginx.conf @@ -0,0 +1,36 @@ +server { + listen 80; + server_name localhost; + client_max_body_size 50M; + + # 前端静态文件 + location / { + root /usr/share/nginx/html; + index index.html; + try_files $uri $uri/ /index.html; + } + + # API请求代理到后端服务 + location /api/ { + proxy_pass http://app:8080/api/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # SSE 相关配置 + proxy_http_version 1.1; # 使用 HTTP/1.1 + proxy_set_header Connection ""; # 禁用 Connection: close,保持连接打开 + chunked_transfer_encoding off; # 关闭分块传输编码 + proxy_buffering off; # 关闭缓冲 + proxy_cache off; # 关闭缓存 + proxy_read_timeout 3600s; # 增加读取超时时间 + proxy_send_timeout 3600s; # 增加发送超时时间 + } + + # 错误页面 + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root /usr/share/nginx/html; + } +} \ No newline at end of file diff --git a/frontend/package-lock.json b/frontend/package-lock.json new file mode 100644 index 0000000..ec47d43 --- /dev/null +++ b/frontend/package-lock.json @@ -0,0 +1,3781 @@ +{ + "name": "knowledage-base", + "version": "0.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "knowledage-base", + "version": "0.0.0", + "dependencies": { + "@microsoft/fetch-event-source": "^2.0.1", + "axios": "^1.8.4", + "marked": "^5.1.2", + "pagefind": "^1.1.1", + "pinia": "^3.0.1", + "tdesign-vue-next": "^1.11.5", + "vue": "^3.5.13", + "vue-router": "^4.5.0", + "webpack": "^5.94.0" + }, + "devDependencies": { + "@tsconfig/node22": "^22.0.1", + "@types/marked": "^5.0.2", + "@types/node": "^22.14.0", + "@vitejs/plugin-vue": "6.0.0", + "@vitejs/plugin-vue-jsx": "5.0.1", + "@vue/tsconfig": "^0.7.0", + "less": "^4.3.0", + "less-loader": "^12.2.0", + "npm-run-all2": "^7.0.2", + "typescript": "~5.8.0", + "vite": "7.0.4", + "vue-tsc": "^2.2.8" + } + }, + "node_modules/@ampproject/remapping": { + "version": "2.3.0", + "resolved": "https://mirrors.tencent.com/npm/@ampproject/remapping/-/remapping-2.3.0.tgz", + "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.24" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/code-frame": { + "version": "7.27.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/code-frame/-/code-frame-7.27.1.tgz", + "integrity": "sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==", + "dev": true, + "dependencies": { + "@babel/helper-validator-identifier": "^7.27.1", + "js-tokens": "^4.0.0", + "picocolors": "^1.1.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/compat-data": { + "version": "7.28.0", + "resolved": "https://mirrors.tencent.com/npm/@babel/compat-data/-/compat-data-7.28.0.tgz", + "integrity": "sha512-60X7qkglvrap8mn1lh2ebxXdZYtUcpd7gsmy9kLaBJ4i/WdY8PqTSdxyA8qraikqKQK5C1KRBKXqznrVapyNaw==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/core": { + "version": "7.28.0", + "resolved": "https://mirrors.tencent.com/npm/@babel/core/-/core-7.28.0.tgz", + "integrity": "sha512-UlLAnTPrFdNGoFtbSXwcGFQBtQZJCNjaN6hQNP3UPvuNXT1i82N26KL3dZeIpNalWywr9IuQuncaAfUaS1g6sQ==", + "dev": true, + "dependencies": { + "@ampproject/remapping": "^2.2.0", + "@babel/code-frame": "^7.27.1", + "@babel/generator": "^7.28.0", + "@babel/helper-compilation-targets": "^7.27.2", + "@babel/helper-module-transforms": "^7.27.3", + "@babel/helpers": "^7.27.6", + "@babel/parser": "^7.28.0", + "@babel/template": "^7.27.2", + "@babel/traverse": "^7.28.0", + "@babel/types": "^7.28.0", + "convert-source-map": "^2.0.0", + "debug": "^4.1.0", + "gensync": "^1.0.0-beta.2", + "json5": "^2.2.3", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/babel" + } + }, + "node_modules/@babel/generator": { + "version": "7.28.0", + "resolved": "https://mirrors.tencent.com/npm/@babel/generator/-/generator-7.28.0.tgz", + "integrity": "sha512-lJjzvrbEeWrhB4P3QBsH7tey117PjLZnDbLiQEKjQ/fNJTjuq4HSqgFA+UNSwZT8D7dxxbnuSBMsa1lrWzKlQg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.28.0", + "@babel/types": "^7.28.0", + "@jridgewell/gen-mapping": "^0.3.12", + "@jridgewell/trace-mapping": "^0.3.28", + "jsesc": "^3.0.2" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-annotate-as-pure": { + "version": "7.27.3", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-annotate-as-pure/-/helper-annotate-as-pure-7.27.3.tgz", + "integrity": "sha512-fXSwMQqitTGeHLBC08Eq5yXz2m37E4pJX1qAU1+2cNedz/ifv/bVXft90VeSav5nFO61EcNgwr0aJxbyPaWBPg==", + "dev": true, + "dependencies": { + "@babel/types": "^7.27.3" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-compilation-targets": { + "version": "7.27.2", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-compilation-targets/-/helper-compilation-targets-7.27.2.tgz", + "integrity": "sha512-2+1thGUUWWjLTYTHZWK1n8Yga0ijBz1XAhUXcKy81rd5g6yh7hGqMp45v7cadSbEHc9G3OTv45SyneRN3ps4DQ==", + "dev": true, + "dependencies": { + "@babel/compat-data": "^7.27.2", + "@babel/helper-validator-option": "^7.27.1", + "browserslist": "^4.24.0", + "lru-cache": "^5.1.1", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-create-class-features-plugin": { + "version": "7.27.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-create-class-features-plugin/-/helper-create-class-features-plugin-7.27.1.tgz", + "integrity": "sha512-QwGAmuvM17btKU5VqXfb+Giw4JcN0hjuufz3DYnpeVDvZLAObloM77bhMXiqry3Iio+Ai4phVRDwl6WU10+r5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-annotate-as-pure": "^7.27.1", + "@babel/helper-member-expression-to-functions": "^7.27.1", + "@babel/helper-optimise-call-expression": "^7.27.1", + "@babel/helper-replace-supers": "^7.27.1", + "@babel/helper-skip-transparent-expression-wrappers": "^7.27.1", + "@babel/traverse": "^7.27.1", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/@babel/helper-globals": { + "version": "7.28.0", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-globals/-/helper-globals-7.28.0.tgz", + "integrity": "sha512-+W6cISkXFa1jXsDEdYA8HeevQT/FULhxzR99pxphltZcVaugps53THCeiWA8SguxxpSp3gKPiuYfSWopkLQ4hw==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-member-expression-to-functions": { + "version": "7.27.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-member-expression-to-functions/-/helper-member-expression-to-functions-7.27.1.tgz", + "integrity": "sha512-E5chM8eWjTp/aNoVpcbfM7mLxu9XGLWYise2eBKGQomAk/Mb4XoxyqXTZbuTohbsl8EKqdlMhnDI2CCLfcs9wA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/traverse": "^7.27.1", + "@babel/types": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-imports": { + "version": "7.27.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-module-imports/-/helper-module-imports-7.27.1.tgz", + "integrity": "sha512-0gSFWUPNXNopqtIPQvlD5WgXYI5GY2kP2cCvoT8kczjbfcfuIljTbcWrulD1CIPIX2gt1wghbDy08yE1p+/r3w==", + "dev": true, + "dependencies": { + "@babel/traverse": "^7.27.1", + "@babel/types": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-transforms": { + "version": "7.27.3", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-module-transforms/-/helper-module-transforms-7.27.3.tgz", + "integrity": "sha512-dSOvYwvyLsWBeIRyOeHXp5vPj5l1I011r52FM1+r1jCERv+aFXYk4whgQccYEGYxK2H3ZAIA8nuPkQ0HaUo3qg==", + "dev": true, + "dependencies": { + "@babel/helper-module-imports": "^7.27.1", + "@babel/helper-validator-identifier": "^7.27.1", + "@babel/traverse": "^7.27.3" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/@babel/helper-optimise-call-expression": { + "version": "7.27.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-optimise-call-expression/-/helper-optimise-call-expression-7.27.1.tgz", + "integrity": "sha512-URMGH08NzYFhubNSGJrpUEphGKQwMQYBySzat5cAByY1/YgIRkULnIy3tAMeszlL/so2HbeilYloUmSpd7GdVw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/types": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-plugin-utils": { + "version": "7.27.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-plugin-utils/-/helper-plugin-utils-7.27.1.tgz", + "integrity": "sha512-1gn1Up5YXka3YYAHGKpbideQ5Yjf1tDa9qYcgysz+cNCXukyLl6DjPXhD3VRwSb8c0J9tA4b2+rHEZtc6R0tlw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-replace-supers": { + "version": "7.27.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-replace-supers/-/helper-replace-supers-7.27.1.tgz", + "integrity": "sha512-7EHz6qDZc8RYS5ElPoShMheWvEgERonFCs7IAonWLLUTXW59DP14bCZt89/GKyreYn8g3S83m21FelHKbeDCKA==", + "dev": true, + "dependencies": { + "@babel/helper-member-expression-to-functions": "^7.27.1", + "@babel/helper-optimise-call-expression": "^7.27.1", + "@babel/traverse": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/@babel/helper-skip-transparent-expression-wrappers": { + "version": "7.27.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-skip-transparent-expression-wrappers/-/helper-skip-transparent-expression-wrappers-7.27.1.tgz", + "integrity": "sha512-Tub4ZKEXqbPjXgWLl2+3JpQAYBJ8+ikpQ2Ocj/q/r0LwE3UhENh7EUabyHjz2kCEsrRY83ew2DQdHluuiDQFzg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/traverse": "^7.27.1", + "@babel/types": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-string-parser": { + "version": "7.27.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz", + "integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.27.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-validator-identifier/-/helper-validator-identifier-7.27.1.tgz", + "integrity": "sha512-D2hP9eA+Sqx1kBZgzxZh0y1trbuU+JoDkiEwqhQ36nodYqJwyEIhPSdMNd7lOm/4io72luTPWH20Yda0xOuUow==", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-option": { + "version": "7.27.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/helper-validator-option/-/helper-validator-option-7.27.1.tgz", + "integrity": "sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helpers": { + "version": "7.27.6", + "resolved": "https://mirrors.tencent.com/npm/@babel/helpers/-/helpers-7.27.6.tgz", + "integrity": "sha512-muE8Tt8M22638HU31A3CgfSUciwz1fhATfoVai05aPXGor//CdWDCbnlY1yvBPo07njuVOCNGCSp/GTt12lIug==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/template": "^7.27.2", + "@babel/types": "^7.27.6" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/parser": { + "version": "7.28.0", + "resolved": "https://mirrors.tencent.com/npm/@babel/parser/-/parser-7.28.0.tgz", + "integrity": "sha512-jVZGvOxOuNSsuQuLRTh13nU0AogFlw32w/MT+LV6D3sP5WdbW61E77RnkbaO2dUvmPAYrBDJXGn5gGS6tH4j8g==", + "dependencies": { + "@babel/types": "^7.28.0" + }, + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/plugin-syntax-jsx": { + "version": "7.27.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.27.1.tgz", + "integrity": "sha512-y8YTNIeKoyhGd9O0Jiyzyyqk8gdjnumGTQPsz0xOZOQ2RmkVJeZ1vmmfIvFEKqucBG6axJGBZDE/7iI5suUI/w==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-typescript": { + "version": "7.27.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.27.1.tgz", + "integrity": "sha512-xfYCBMxveHrRMnAWl1ZlPXOZjzkN82THFvLhQhFXFt81Z5HnN+EtUkZhv/zcKpmT3fzmWZB0ywiBrbC3vogbwQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-plugin-utils": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-transform-typescript": { + "version": "7.28.0", + "resolved": "https://mirrors.tencent.com/npm/@babel/plugin-transform-typescript/-/plugin-transform-typescript-7.28.0.tgz", + "integrity": "sha512-4AEiDEBPIZvLQaWlc9liCavE0xRM0dNca41WtBeM3jgFptfUOSG9z0uteLhq6+3rq+WB6jIvUwKDTpXEHPJ2Vg==", + "dev": true, + "dependencies": { + "@babel/helper-annotate-as-pure": "^7.27.3", + "@babel/helper-create-class-features-plugin": "^7.27.1", + "@babel/helper-plugin-utils": "^7.27.1", + "@babel/helper-skip-transparent-expression-wrappers": "^7.27.1", + "@babel/plugin-syntax-typescript": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/runtime": { + "version": "7.27.6", + "resolved": "https://mirrors.tencent.com/npm/@babel/runtime/-/runtime-7.27.6.tgz", + "integrity": "sha512-vbavdySgbTTrmFE+EsiqUTzlOr5bzlnJtUv9PynGCAKvfQqjIXbvFdumPM/GxMDfyuGMJaJAU6TO4zc1Jf1i8Q==", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/template": { + "version": "7.27.2", + "resolved": "https://mirrors.tencent.com/npm/@babel/template/-/template-7.27.2.tgz", + "integrity": "sha512-LPDZ85aEJyYSd18/DkjNh4/y1ntkE5KwUHWTiqgRxruuZL2F1yuHligVHLvcHY2vMHXttKFpJn6LwfI7cw7ODw==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.27.1", + "@babel/parser": "^7.27.2", + "@babel/types": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/traverse": { + "version": "7.28.0", + "resolved": "https://mirrors.tencent.com/npm/@babel/traverse/-/traverse-7.28.0.tgz", + "integrity": "sha512-mGe7UK5wWyh0bKRfupsUchrQGqvDbZDbKJw+kcRGSmdHVYrv+ltd0pnpDTVpiTqnaBru9iEvA8pz8W46v0Amwg==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.27.1", + "@babel/generator": "^7.28.0", + "@babel/helper-globals": "^7.28.0", + "@babel/parser": "^7.28.0", + "@babel/template": "^7.27.2", + "@babel/types": "^7.28.0", + "debug": "^4.3.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/types": { + "version": "7.28.1", + "resolved": "https://mirrors.tencent.com/npm/@babel/types/-/types-7.28.1.tgz", + "integrity": "sha512-x0LvFTekgSX+83TI28Y9wYPUfzrnl2aT5+5QLnO6v7mSJYtEEevuDRN0F0uSHRk1G1IWZC43o00Y0xDDrpBGPQ==", + "license": "MIT", + "dependencies": { + "@babel/helper-string-parser": "^7.27.1", + "@babel/helper-validator-identifier": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/aix-ppc64/-/aix-ppc64-0.25.6.tgz", + "integrity": "sha512-ShbM/3XxwuxjFiuVBHA+d3j5dyac0aEVVq1oluIDf71hUw0aRF59dV/efUsIwFnR6m8JNM2FjZOzmaZ8yG61kw==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/android-arm/-/android-arm-0.25.6.tgz", + "integrity": "sha512-S8ToEOVfg++AU/bHwdksHNnyLyVM+eMVAOf6yRKFitnwnbwwPNqKr3srzFRe7nzV69RQKb5DgchIX5pt3L53xg==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/android-arm64/-/android-arm64-0.25.6.tgz", + "integrity": "sha512-hd5zdUarsK6strW+3Wxi5qWws+rJhCCbMiC9QZyzoxfk5uHRIE8T287giQxzVpEvCwuJ9Qjg6bEjcRJcgfLqoA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/android-x64/-/android-x64-0.25.6.tgz", + "integrity": "sha512-0Z7KpHSr3VBIO9A/1wcT3NTy7EB4oNC4upJ5ye3R7taCc2GUdeynSLArnon5G8scPwaU866d3H4BCrE5xLW25A==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/darwin-arm64/-/darwin-arm64-0.25.6.tgz", + "integrity": "sha512-FFCssz3XBavjxcFxKsGy2DYK5VSvJqa6y5HXljKzhRZ87LvEi13brPrf/wdyl/BbpbMKJNOr1Sd0jtW4Ge1pAA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/darwin-x64/-/darwin-x64-0.25.6.tgz", + "integrity": "sha512-GfXs5kry/TkGM2vKqK2oyiLFygJRqKVhawu3+DOCk7OxLy/6jYkWXhlHwOoTb0WqGnWGAS7sooxbZowy+pK9Yg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.6.tgz", + "integrity": "sha512-aoLF2c3OvDn2XDTRvn8hN6DRzVVpDlj2B/F66clWd/FHLiHaG3aVZjxQX2DYphA5y/evbdGvC6Us13tvyt4pWg==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/freebsd-x64/-/freebsd-x64-0.25.6.tgz", + "integrity": "sha512-2SkqTjTSo2dYi/jzFbU9Plt1vk0+nNg8YC8rOXXea+iA3hfNJWebKYPs3xnOUf9+ZWhKAaxnQNUf2X9LOpeiMQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/linux-arm/-/linux-arm-0.25.6.tgz", + "integrity": "sha512-SZHQlzvqv4Du5PrKE2faN0qlbsaW/3QQfUUc6yO2EjFcA83xnwm91UbEEVx4ApZ9Z5oG8Bxz4qPE+HFwtVcfyw==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/linux-arm64/-/linux-arm64-0.25.6.tgz", + "integrity": "sha512-b967hU0gqKd9Drsh/UuAm21Khpoh6mPBSgz8mKRq4P5mVK8bpA+hQzmm/ZwGVULSNBzKdZPQBRT3+WuVavcWsQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/linux-ia32/-/linux-ia32-0.25.6.tgz", + "integrity": "sha512-aHWdQ2AAltRkLPOsKdi3xv0mZ8fUGPdlKEjIEhxCPm5yKEThcUjHpWB1idN74lfXGnZ5SULQSgtr5Qos5B0bPw==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/linux-loong64/-/linux-loong64-0.25.6.tgz", + "integrity": "sha512-VgKCsHdXRSQ7E1+QXGdRPlQ/e08bN6WMQb27/TMfV+vPjjTImuT9PmLXupRlC90S1JeNNW5lzkAEO/McKeJ2yg==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/linux-mips64el/-/linux-mips64el-0.25.6.tgz", + "integrity": "sha512-WViNlpivRKT9/py3kCmkHnn44GkGXVdXfdc4drNmRl15zVQ2+D2uFwdlGh6IuK5AAnGTo2qPB1Djppj+t78rzw==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/linux-ppc64/-/linux-ppc64-0.25.6.tgz", + "integrity": "sha512-wyYKZ9NTdmAMb5730I38lBqVu6cKl4ZfYXIs31Baf8aoOtB4xSGi3THmDYt4BTFHk7/EcVixkOV2uZfwU3Q2Jw==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/linux-riscv64/-/linux-riscv64-0.25.6.tgz", + "integrity": "sha512-KZh7bAGGcrinEj4qzilJ4hqTY3Dg2U82c8bv+e1xqNqZCrCyc+TL9AUEn5WGKDzm3CfC5RODE/qc96OcbIe33w==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/linux-s390x/-/linux-s390x-0.25.6.tgz", + "integrity": "sha512-9N1LsTwAuE9oj6lHMyyAM+ucxGiVnEqUdp4v7IaMmrwb06ZTEVCIs3oPPplVsnjPfyjmxwHxHMF8b6vzUVAUGw==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/linux-x64/-/linux-x64-0.25.6.tgz", + "integrity": "sha512-A6bJB41b4lKFWRKNrWoP2LHsjVzNiaurf7wyj/XtFNTsnPuxwEBWHLty+ZE0dWBKuSK1fvKgrKaNjBS7qbFKig==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.6.tgz", + "integrity": "sha512-IjA+DcwoVpjEvyxZddDqBY+uJ2Snc6duLpjmkXm/v4xuS3H+3FkLZlDm9ZsAbF9rsfP3zeA0/ArNDORZgrxR/Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/netbsd-x64/-/netbsd-x64-0.25.6.tgz", + "integrity": "sha512-dUXuZr5WenIDlMHdMkvDc1FAu4xdWixTCRgP7RQLBOkkGgwuuzaGSYcOpW4jFxzpzL1ejb8yF620UxAqnBrR9g==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.6.tgz", + "integrity": "sha512-l8ZCvXP0tbTJ3iaqdNf3pjaOSd5ex/e6/omLIQCVBLmHTlfXW3zAxQ4fnDmPLOB1x9xrcSi/xtCWFwCZRIaEwg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/openbsd-x64/-/openbsd-x64-0.25.6.tgz", + "integrity": "sha512-hKrmDa0aOFOr71KQ/19JC7az1P0GWtCN1t2ahYAf4O007DHZt/dW8ym5+CUdJhQ/qkZmI1HAF8KkJbEFtCL7gw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openharmony-arm64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/openharmony-arm64/-/openharmony-arm64-0.25.6.tgz", + "integrity": "sha512-+SqBcAWoB1fYKmpWoQP4pGtx+pUUC//RNYhFdbcSA16617cchuryuhOCRpPsjCblKukAckWsV+aQ3UKT/RMPcA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/sunos-x64/-/sunos-x64-0.25.6.tgz", + "integrity": "sha512-dyCGxv1/Br7MiSC42qinGL8KkG4kX0pEsdb0+TKhmJZgCUDBGmyo1/ArCjNGiOLiIAgdbWgmWgib4HoCi5t7kA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/win32-arm64/-/win32-arm64-0.25.6.tgz", + "integrity": "sha512-42QOgcZeZOvXfsCBJF5Afw73t4veOId//XD3i+/9gSkhSV6Gk3VPlWncctI+JcOyERv85FUo7RxuxGy+z8A43Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/win32-ia32/-/win32-ia32-0.25.6.tgz", + "integrity": "sha512-4AWhgXmDuYN7rJI6ORB+uU9DHLq/erBbuMoAuB4VWJTu5KtCgcKYPynF0YI1VkBNuEfjNlLrFr9KZPJzrtLkrQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/@esbuild/win32-x64/-/win32-x64-0.25.6.tgz", + "integrity": "sha512-NgJPHHbEpLQgDH2MjQu90pzW/5vvXIZ7KOnPyNBm92A6WgZ/7b6fJyUBjoumLqeOQQGqY2QjQxRo97ah4Sj0cA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.12", + "resolved": "https://mirrors.tencent.com/npm/@jridgewell/gen-mapping/-/gen-mapping-0.3.12.tgz", + "integrity": "sha512-OuLGC46TjB5BbN1dH8JULVVZY4WTdkF7tV9Ys6wLL1rubZnCMstOhNHueU5bLCrnRuDhKPDM4g6sw4Bel5Gzqg==", + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.0", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://mirrors.tencent.com/npm/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/source-map": { + "version": "0.3.10", + "resolved": "https://mirrors.tencent.com/npm/@jridgewell/source-map/-/source-map-0.3.10.tgz", + "integrity": "sha512-0pPkgz9dY+bijgistcTTJ5mR+ocqRXLuhXHYdzoMmmoJ2C9S46RCm2GMUbatPEUK9Yjy26IrAy8D/M00lLkv+Q==", + "license": "MIT", + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.25" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.4", + "resolved": "https://mirrors.tencent.com/npm/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.4.tgz", + "integrity": "sha512-VT2+G1VQs/9oz078bLrYbecdZKs912zQlkelYpuf+SXF+QvZDYJlbx/LSx+meSAwdDFnF8FVXW92AVjjkVmgFw==" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.29", + "resolved": "https://mirrors.tencent.com/npm/@jridgewell/trace-mapping/-/trace-mapping-0.3.29.tgz", + "integrity": "sha512-uw6guiW/gcAGPDhLmd77/6lW8QLeiV5RUTsAX46Db6oLhGaVj4lhnPwb184s1bkc8kdVg/+h988dro8GRDpmYQ==", + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "node_modules/@microsoft/fetch-event-source": { + "version": "2.0.1", + "resolved": "https://mirrors.tencent.com/npm/@microsoft/fetch-event-source/-/fetch-event-source-2.0.1.tgz", + "integrity": "sha512-W6CLUJ2eBMw3Rec70qrsEW0jOm/3twwJv21mrmj2yORiaVmVYGS4sSS5yUwvQc1ZlDLYGPnClVWmUUMagKNsfA==" + }, + "node_modules/@pagefind/darwin-arm64": { + "version": "1.3.0", + "resolved": "https://mirrors.tencent.com/npm/@pagefind/darwin-arm64/-/darwin-arm64-1.3.0.tgz", + "integrity": "sha512-365BEGl6ChOsauRjyVpBjXybflXAOvoMROw3TucAROHIcdBvXk9/2AmEvGFU0r75+vdQI4LJdJdpH4Y6Yqaj4A==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@pagefind/darwin-x64": { + "version": "1.3.0", + "resolved": "https://mirrors.tencent.com/npm/@pagefind/darwin-x64/-/darwin-x64-1.3.0.tgz", + "integrity": "sha512-zlGHA23uuXmS8z3XxEGmbHpWDxXfPZ47QS06tGUq0HDcZjXjXHeLG+cboOy828QIV5FXsm9MjfkP5e4ZNbOkow==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@pagefind/linux-arm64": { + "version": "1.3.0", + "resolved": "https://mirrors.tencent.com/npm/@pagefind/linux-arm64/-/linux-arm64-1.3.0.tgz", + "integrity": "sha512-8lsxNAiBRUk72JvetSBXs4WRpYrQrVJXjlRRnOL6UCdBN9Nlsz0t7hWstRk36+JqHpGWOKYiuHLzGYqYAqoOnQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@pagefind/linux-x64": { + "version": "1.3.0", + "resolved": "https://mirrors.tencent.com/npm/@pagefind/linux-x64/-/linux-x64-1.3.0.tgz", + "integrity": "sha512-hAvqdPJv7A20Ucb6FQGE6jhjqy+vZ6pf+s2tFMNtMBG+fzcdc91uTw7aP/1Vo5plD0dAOHwdxfkyw0ugal4kcQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@pagefind/windows-x64": { + "version": "1.3.0", + "resolved": "https://mirrors.tencent.com/npm/@pagefind/windows-x64/-/windows-x64-1.3.0.tgz", + "integrity": "sha512-BR1bIRWOMqkf8IoU576YDhij1Wd/Zf2kX/kCI0b2qzCKC8wcc2GQJaaRMCpzvCCrmliO4vtJ6RITp/AnoYUUmQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@popperjs/core": { + "version": "2.11.8", + "resolved": "https://mirrors.tencent.com/npm/@popperjs/core/-/core-2.11.8.tgz", + "integrity": "sha512-P1st0aksCrn9sGZhp8GMYwBnQsbvAWsZAX44oXNNvLHGqAOcoVxmjZiohstwQ7SqKnbR47akdNi+uleWD8+g6A==", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/popperjs" + } + }, + "node_modules/@rolldown/pluginutils": { + "version": "1.0.0-beta.19", + "resolved": "https://mirrors.tencent.com/npm/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.19.tgz", + "integrity": "sha512-3FL3mnMbPu0muGOCaKAhhFEYmqv9eTfPSJRJmANrCwtgK8VuxpsZDGK+m0LYAGoyO8+0j5uRe4PeyPDK1yA/hA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@rollup/rollup-android-arm-eabi": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.45.0.tgz", + "integrity": "sha512-2o/FgACbji4tW1dzXOqAV15Eu7DdgbKsF2QKcxfG4xbh5iwU7yr5RRP5/U+0asQliSYv5M4o7BevlGIoSL0LXg==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-android-arm64": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.45.0.tgz", + "integrity": "sha512-PSZ0SvMOjEAxwZeTx32eI/j5xSYtDCRxGu5k9zvzoY77xUNssZM+WV6HYBLROpY5CkXsbQjvz40fBb7WPwDqtQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-darwin-arm64": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.45.0.tgz", + "integrity": "sha512-BA4yPIPssPB2aRAWzmqzQ3y2/KotkLyZukVB7j3psK/U3nVJdceo6qr9pLM2xN6iRP/wKfxEbOb1yrlZH6sYZg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-darwin-x64": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.45.0.tgz", + "integrity": "sha512-Pr2o0lvTwsiG4HCr43Zy9xXrHspyMvsvEw4FwKYqhli4FuLE5FjcZzuQ4cfPe0iUFCvSQG6lACI0xj74FDZKRA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-freebsd-arm64": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.45.0.tgz", + "integrity": "sha512-lYE8LkE5h4a/+6VnnLiL14zWMPnx6wNbDG23GcYFpRW1V9hYWHAw9lBZ6ZUIrOaoK7NliF1sdwYGiVmziUF4vA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-freebsd-x64": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.45.0.tgz", + "integrity": "sha512-PVQWZK9sbzpvqC9Q0GlehNNSVHR+4m7+wET+7FgSnKG3ci5nAMgGmr9mGBXzAuE5SvguCKJ6mHL6vq1JaJ/gvw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-linux-arm-gnueabihf": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.45.0.tgz", + "integrity": "sha512-hLrmRl53prCcD+YXTfNvXd776HTxNh8wPAMllusQ+amcQmtgo3V5i/nkhPN6FakW+QVLoUUr2AsbtIRPFU3xIA==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm-musleabihf": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.45.0.tgz", + "integrity": "sha512-XBKGSYcrkdiRRjl+8XvrUR3AosXU0NvF7VuqMsm7s5nRy+nt58ZMB19Jdp1RdqewLcaYnpk8zeVs/4MlLZEJxw==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-gnu": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.45.0.tgz", + "integrity": "sha512-fRvZZPUiBz7NztBE/2QnCS5AtqLVhXmUOPj9IHlfGEXkapgImf4W9+FSkL8cWqoAjozyUzqFmSc4zh2ooaeF6g==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-musl": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.45.0.tgz", + "integrity": "sha512-Btv2WRZOcUGi8XU80XwIvzTg4U6+l6D0V6sZTrZx214nrwxw5nAi8hysaXj/mctyClWgesyuxbeLylCBNauimg==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-loongarch64-gnu": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-loongarch64-gnu/-/rollup-linux-loongarch64-gnu-4.45.0.tgz", + "integrity": "sha512-Li0emNnwtUZdLwHjQPBxn4VWztcrw/h7mgLyHiEI5Z0MhpeFGlzaiBHpSNVOMB/xucjXTTcO+dhv469Djr16KA==", + "cpu": [ + "loong64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-powerpc64le-gnu": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.45.0.tgz", + "integrity": "sha512-sB8+pfkYx2kvpDCfd63d5ScYT0Fz1LO6jIb2zLZvmK9ob2D8DeVqrmBDE0iDK8KlBVmsTNzrjr3G1xV4eUZhSw==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-gnu": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.45.0.tgz", + "integrity": "sha512-5GQ6PFhh7E6jQm70p1aW05G2cap5zMOvO0se5JMecHeAdj5ZhWEHbJ4hiKpfi1nnnEdTauDXxPgXae/mqjow9w==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-musl": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.45.0.tgz", + "integrity": "sha512-N/euLsBd1rekWcuduakTo/dJw6U6sBP3eUq+RXM9RNfPuWTvG2w/WObDkIvJ2KChy6oxZmOSC08Ak2OJA0UiAA==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-s390x-gnu": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.45.0.tgz", + "integrity": "sha512-2l9sA7d7QdikL0xQwNMO3xURBUNEWyHVHfAsHsUdq+E/pgLTUcCE+gih5PCdmyHmfTDeXUWVhqL0WZzg0nua3g==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-gnu": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.45.0.tgz", + "integrity": "sha512-XZdD3fEEQcwG2KrJDdEQu7NrHonPxxaV0/w2HpvINBdcqebz1aL+0vM2WFJq4DeiAVT6F5SUQas65HY5JDqoPw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-musl": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.45.0.tgz", + "integrity": "sha512-7ayfgvtmmWgKWBkCGg5+xTQ0r5V1owVm67zTrsEY1008L5ro7mCyGYORomARt/OquB9KY7LpxVBZes+oSniAAQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-win32-arm64-msvc": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.45.0.tgz", + "integrity": "sha512-B+IJgcBnE2bm93jEW5kHisqvPITs4ddLOROAcOc/diBgrEiQJJ6Qcjby75rFSmH5eMGrqJryUgJDhrfj942apQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-ia32-msvc": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.45.0.tgz", + "integrity": "sha512-+CXwwG66g0/FpWOnP/v1HnrGVSOygK/osUbu3wPRy8ECXjoYKjRAyfxYpDQOfghC5qPJYLPH0oN4MCOjwgdMug==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-msvc": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.45.0.tgz", + "integrity": "sha512-SRf1cytG7wqcHVLrBc9VtPK4pU5wxiB/lNIkNmW2ApKXIg+RpqwHfsaEK+e7eH4A1BpI6BX/aBWXxZCIrJg3uA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@tsconfig/node22": { + "version": "22.0.2", + "resolved": "https://mirrors.tencent.com/npm/@tsconfig/node22/-/node22-22.0.2.tgz", + "integrity": "sha512-Kmwj4u8sDRDrMYRoN9FDEcXD8UpBSaPQQ24Gz+Gamqfm7xxn+GBR7ge/Z7pK8OXNGyUzbSwJj+TH6B+DS/epyA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/eslint": { + "version": "9.6.1", + "resolved": "https://mirrors.tencent.com/npm/@types/eslint/-/eslint-9.6.1.tgz", + "integrity": "sha512-FXx2pKgId/WyYo2jXw63kk7/+TY7u7AziEJxJAnSFzHlqTAS3Ync6SvgYAN/k4/PQpnnVuzoMuVnByKK2qp0ag==", + "license": "MIT", + "dependencies": { + "@types/estree": "*", + "@types/json-schema": "*" + } + }, + "node_modules/@types/eslint-scope": { + "version": "3.7.7", + "resolved": "https://mirrors.tencent.com/npm/@types/eslint-scope/-/eslint-scope-3.7.7.tgz", + "integrity": "sha512-MzMFlSLBqNF2gcHWO0G1vP/YQyfvrxZ0bF+u7mzUdZ1/xK4A4sru+nraZz5i3iEIk1l1uyicaDVTB4QbbEkAYg==", + "license": "MIT", + "dependencies": { + "@types/eslint": "*", + "@types/estree": "*" + } + }, + "node_modules/@types/estree": { + "version": "1.0.8", + "resolved": "https://mirrors.tencent.com/npm/@types/estree/-/estree-1.0.8.tgz", + "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==" + }, + "node_modules/@types/json-schema": { + "version": "7.0.15", + "resolved": "https://mirrors.tencent.com/npm/@types/json-schema/-/json-schema-7.0.15.tgz", + "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==" + }, + "node_modules/@types/lodash": { + "version": "4.17.20", + "resolved": "https://mirrors.tencent.com/npm/@types/lodash/-/lodash-4.17.20.tgz", + "integrity": "sha512-H3MHACvFUEiujabxhaI/ImO6gUrd8oOurg7LQtS7mbwIXA/cUqWrvBsaeJ23aZEPk1TAYkurjfMbSELfoCXlGA==", + "license": "MIT" + }, + "node_modules/@types/lodash-es": { + "version": "4.17.12", + "resolved": "https://mirrors.tencent.com/npm/@types/lodash-es/-/lodash-es-4.17.12.tgz", + "integrity": "sha512-0NgftHUcV4v34VhXm8QBSftKVXtbkBG3ViCjs6+eJ5a6y6Mi/jiFGPc1sC7QK+9BFhWrURE3EOggmWaSxL9OzQ==", + "license": "MIT", + "dependencies": { + "@types/lodash": "*" + } + }, + "node_modules/@types/marked": { + "version": "5.0.2", + "resolved": "https://mirrors.tencent.com/npm/@types/marked/-/marked-5.0.2.tgz", + "integrity": "sha512-OucS4KMHhFzhz27KxmWg7J+kIYqyqoW5kdIEI319hqARQQUTqhao3M/F+uFnDXD0Rg72iDDZxZNxq5gvctmLlg==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "22.16.3", + "resolved": "https://mirrors.tencent.com/npm/@types/node/-/node-22.16.3.tgz", + "integrity": "sha512-sr4Xz74KOUeYadexo1r8imhRtlVXcs+j3XK3TcoiYk7B1t3YRVJgtaD3cwX73NYb71pmVuMLNRhJ9XKdoDB74g==", + "license": "MIT", + "dependencies": { + "undici-types": "~6.21.0" + } + }, + "node_modules/@types/sortablejs": { + "version": "1.15.8", + "resolved": "https://mirrors.tencent.com/npm/@types/sortablejs/-/sortablejs-1.15.8.tgz", + "integrity": "sha512-b79830lW+RZfwaztgs1aVPgbasJ8e7AXtZYHTELNXZPsERt4ymJdjV4OccDbHQAvHrCcFpbF78jkm0R6h/pZVg==", + "license": "MIT" + }, + "node_modules/@types/tinycolor2": { + "version": "1.4.6", + "resolved": "https://mirrors.tencent.com/npm/@types/tinycolor2/-/tinycolor2-1.4.6.tgz", + "integrity": "sha512-iEN8J0BoMnsWBqjVbWH/c0G0Hh7O21lpR2/+PrvAVgWdzL7eexIFm4JN/Wn10PTcmNdtS6U67r499mlWMXOxNw==" + }, + "node_modules/@types/validator": { + "version": "13.15.2", + "resolved": "https://mirrors.tencent.com/npm/@types/validator/-/validator-13.15.2.tgz", + "integrity": "sha512-y7pa/oEJJ4iGYBxOpfAKn5b9+xuihvzDVnC/OSvlVnGxVg0pOqmjiMafiJ1KVNQEaPZf9HsEp5icEwGg8uIe5Q==", + "license": "MIT" + }, + "node_modules/@vitejs/plugin-vue": { + "version": "6.0.0", + "resolved": "https://mirrors.tencent.com/npm/@vitejs/plugin-vue/-/plugin-vue-6.0.0.tgz", + "integrity": "sha512-iAliE72WsdhjzTOp2DtvKThq1VBC4REhwRcaA+zPAAph6I+OQhUXv+Xu2KS7ElxYtb7Zc/3R30Hwv1DxEo7NXQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@rolldown/pluginutils": "1.0.0-beta.19" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "peerDependencies": { + "vite": "^5.0.0 || ^6.0.0 || ^7.0.0", + "vue": "^3.2.25" + } + }, + "node_modules/@vitejs/plugin-vue-jsx": { + "version": "5.0.1", + "resolved": "https://mirrors.tencent.com/npm/@vitejs/plugin-vue-jsx/-/plugin-vue-jsx-5.0.1.tgz", + "integrity": "sha512-X7qmQMXbdDh+sfHUttXokPD0cjPkMFoae7SgbkF9vi3idGUKmxLcnU2Ug49FHwiKXebfzQRIm5yK3sfCJzNBbg==", + "dev": true, + "dependencies": { + "@babel/core": "^7.27.7", + "@babel/plugin-transform-typescript": "^7.27.1", + "@rolldown/pluginutils": "^1.0.0-beta.21", + "@vue/babel-plugin-jsx": "^1.4.0" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "peerDependencies": { + "vite": "^5.0.0 || ^6.0.0 || ^7.0.0", + "vue": "^3.0.0" + } + }, + "node_modules/@vitejs/plugin-vue-jsx/node_modules/@rolldown/pluginutils": { + "version": "1.0.0-beta.27", + "resolved": "https://mirrors.tencent.com/npm/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz", + "integrity": "sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@volar/language-core": { + "version": "2.4.15", + "resolved": "https://mirrors.tencent.com/npm/@volar/language-core/-/language-core-2.4.15.tgz", + "integrity": "sha512-3VHw+QZU0ZG9IuQmzT68IyN4hZNd9GchGPhbD9+pa8CVv7rnoOZwo7T8weIbrRmihqy3ATpdfXFnqRrfPVK6CA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@volar/source-map": "2.4.15" + } + }, + "node_modules/@volar/source-map": { + "version": "2.4.15", + "resolved": "https://mirrors.tencent.com/npm/@volar/source-map/-/source-map-2.4.15.tgz", + "integrity": "sha512-CPbMWlUN6hVZJYGcU/GSoHu4EnCHiLaXI9n8c9la6RaI9W5JHX+NqG+GSQcB0JdC2FIBLdZJwGsfKyBB71VlTg==", + "dev": true, + "license": "MIT" + }, + "node_modules/@volar/typescript": { + "version": "2.4.15", + "resolved": "https://mirrors.tencent.com/npm/@volar/typescript/-/typescript-2.4.15.tgz", + "integrity": "sha512-2aZ8i0cqPGjXb4BhkMsPYDkkuc2ZQ6yOpqwAuNwUoncELqoy5fRgOQtLR9gB0g902iS0NAkvpIzs27geVyVdPg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@volar/language-core": "2.4.15", + "path-browserify": "^1.0.1", + "vscode-uri": "^3.0.8" + } + }, + "node_modules/@vue/babel-helper-vue-transform-on": { + "version": "1.4.0", + "resolved": "https://mirrors.tencent.com/npm/@vue/babel-helper-vue-transform-on/-/babel-helper-vue-transform-on-1.4.0.tgz", + "integrity": "sha512-mCokbouEQ/ocRce/FpKCRItGo+013tHg7tixg3DUNS+6bmIchPt66012kBMm476vyEIJPafrvOf4E5OYj3shSw==", + "dev": true + }, + "node_modules/@vue/babel-plugin-jsx": { + "version": "1.4.0", + "resolved": "https://mirrors.tencent.com/npm/@vue/babel-plugin-jsx/-/babel-plugin-jsx-1.4.0.tgz", + "integrity": "sha512-9zAHmwgMWlaN6qRKdrg1uKsBKHvnUU+Py+MOCTuYZBoZsopa90Di10QRjB+YPnVss0BZbG/H5XFwJY1fTxJWhA==", + "dev": true, + "dependencies": { + "@babel/helper-module-imports": "^7.25.9", + "@babel/helper-plugin-utils": "^7.26.5", + "@babel/plugin-syntax-jsx": "^7.25.9", + "@babel/template": "^7.26.9", + "@babel/traverse": "^7.26.9", + "@babel/types": "^7.26.9", + "@vue/babel-helper-vue-transform-on": "1.4.0", + "@vue/babel-plugin-resolve-type": "1.4.0", + "@vue/shared": "^3.5.13" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + }, + "peerDependenciesMeta": { + "@babel/core": { + "optional": true + } + } + }, + "node_modules/@vue/babel-plugin-resolve-type": { + "version": "1.4.0", + "resolved": "https://mirrors.tencent.com/npm/@vue/babel-plugin-resolve-type/-/babel-plugin-resolve-type-1.4.0.tgz", + "integrity": "sha512-4xqDRRbQQEWHQyjlYSgZsWj44KfiF6D+ktCuXyZ8EnVDYV3pztmXJDf1HveAjUAXxAnR8daCQT51RneWWxtTyQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.26.2", + "@babel/helper-module-imports": "^7.25.9", + "@babel/helper-plugin-utils": "^7.26.5", + "@babel/parser": "^7.26.9", + "@vue/compiler-sfc": "^3.5.13" + }, + "funding": { + "url": "https://github.com/sponsors/sxzz" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@vue/compiler-core": { + "version": "3.5.17", + "resolved": "https://mirrors.tencent.com/npm/@vue/compiler-core/-/compiler-core-3.5.17.tgz", + "integrity": "sha512-Xe+AittLbAyV0pabcN7cP7/BenRBNcteM4aSDCtRvGw0d9OL+HG1u/XHLY/kt1q4fyMeZYXyIYrsHuPSiDPosA==", + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.27.5", + "@vue/shared": "3.5.17", + "entities": "^4.5.0", + "estree-walker": "^2.0.2", + "source-map-js": "^1.2.1" + } + }, + "node_modules/@vue/compiler-dom": { + "version": "3.5.17", + "resolved": "https://mirrors.tencent.com/npm/@vue/compiler-dom/-/compiler-dom-3.5.17.tgz", + "integrity": "sha512-+2UgfLKoaNLhgfhV5Ihnk6wB4ljyW1/7wUIog2puUqajiC29Lp5R/IKDdkebh9jTbTogTbsgB+OY9cEWzG95JQ==", + "dependencies": { + "@vue/compiler-core": "3.5.17", + "@vue/shared": "3.5.17" + } + }, + "node_modules/@vue/compiler-sfc": { + "version": "3.5.17", + "resolved": "https://mirrors.tencent.com/npm/@vue/compiler-sfc/-/compiler-sfc-3.5.17.tgz", + "integrity": "sha512-rQQxbRJMgTqwRugtjw0cnyQv9cP4/4BxWfTdRBkqsTfLOHWykLzbOc3C4GGzAmdMDxhzU/1Ija5bTjMVrddqww==", + "dependencies": { + "@babel/parser": "^7.27.5", + "@vue/compiler-core": "3.5.17", + "@vue/compiler-dom": "3.5.17", + "@vue/compiler-ssr": "3.5.17", + "@vue/shared": "3.5.17", + "estree-walker": "^2.0.2", + "magic-string": "^0.30.17", + "postcss": "^8.5.6", + "source-map-js": "^1.2.1" + } + }, + "node_modules/@vue/compiler-ssr": { + "version": "3.5.17", + "resolved": "https://mirrors.tencent.com/npm/@vue/compiler-ssr/-/compiler-ssr-3.5.17.tgz", + "integrity": "sha512-hkDbA0Q20ZzGgpj5uZjb9rBzQtIHLS78mMilwrlpWk2Ep37DYntUz0PonQ6kr113vfOEdM+zTBuJDaceNIW0tQ==", + "dependencies": { + "@vue/compiler-dom": "3.5.17", + "@vue/shared": "3.5.17" + } + }, + "node_modules/@vue/compiler-vue2": { + "version": "2.7.16", + "resolved": "https://mirrors.tencent.com/npm/@vue/compiler-vue2/-/compiler-vue2-2.7.16.tgz", + "integrity": "sha512-qYC3Psj9S/mfu9uVi5WvNZIzq+xnXMhOwbTFKKDD7b1lhpnn71jXSFdTQ+WsIEk0ONCd7VV2IMm7ONl6tbQ86A==", + "dev": true, + "license": "MIT", + "dependencies": { + "de-indent": "^1.0.2", + "he": "^1.2.0" + } + }, + "node_modules/@vue/devtools-api": { + "version": "7.7.7", + "resolved": "https://mirrors.tencent.com/npm/@vue/devtools-api/-/devtools-api-7.7.7.tgz", + "integrity": "sha512-lwOnNBH2e7x1fIIbVT7yF5D+YWhqELm55/4ZKf45R9T8r9dE2AIOy8HKjfqzGsoTHFbWbr337O4E0A0QADnjBg==", + "license": "MIT", + "dependencies": { + "@vue/devtools-kit": "^7.7.7" + } + }, + "node_modules/@vue/devtools-kit": { + "version": "7.7.7", + "resolved": "https://mirrors.tencent.com/npm/@vue/devtools-kit/-/devtools-kit-7.7.7.tgz", + "integrity": "sha512-wgoZtxcTta65cnZ1Q6MbAfePVFxfM+gq0saaeytoph7nEa7yMXoi6sCPy4ufO111B9msnw0VOWjPEFCXuAKRHA==", + "license": "MIT", + "dependencies": { + "@vue/devtools-shared": "^7.7.7", + "birpc": "^2.3.0", + "hookable": "^5.5.3", + "mitt": "^3.0.1", + "perfect-debounce": "^1.0.0", + "speakingurl": "^14.0.1", + "superjson": "^2.2.2" + } + }, + "node_modules/@vue/devtools-shared": { + "version": "7.7.7", + "resolved": "https://mirrors.tencent.com/npm/@vue/devtools-shared/-/devtools-shared-7.7.7.tgz", + "integrity": "sha512-+udSj47aRl5aKb0memBvcUG9koarqnxNM5yjuREvqwK6T3ap4mn3Zqqc17QrBFTqSMjr3HK1cvStEZpMDpfdyw==", + "license": "MIT", + "dependencies": { + "rfdc": "^1.4.1" + } + }, + "node_modules/@vue/language-core": { + "version": "2.2.12", + "resolved": "https://mirrors.tencent.com/npm/@vue/language-core/-/language-core-2.2.12.tgz", + "integrity": "sha512-IsGljWbKGU1MZpBPN+BvPAdr55YPkj2nB/TBNGNC32Vy2qLG25DYu/NBN2vNtZqdRbTRjaoYrahLrToim2NanA==", + "dev": true, + "dependencies": { + "@volar/language-core": "2.4.15", + "@vue/compiler-dom": "^3.5.0", + "@vue/compiler-vue2": "^2.7.16", + "@vue/shared": "^3.5.0", + "alien-signals": "^1.0.3", + "minimatch": "^9.0.3", + "muggle-string": "^0.4.1", + "path-browserify": "^1.0.1" + }, + "peerDependencies": { + "typescript": "*" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/@vue/reactivity": { + "version": "3.5.17", + "resolved": "https://mirrors.tencent.com/npm/@vue/reactivity/-/reactivity-3.5.17.tgz", + "integrity": "sha512-l/rmw2STIscWi7SNJp708FK4Kofs97zc/5aEPQh4bOsReD/8ICuBcEmS7KGwDj5ODQLYWVN2lNibKJL1z5b+Lw==", + "license": "MIT", + "dependencies": { + "@vue/shared": "3.5.17" + } + }, + "node_modules/@vue/runtime-core": { + "version": "3.5.17", + "resolved": "https://mirrors.tencent.com/npm/@vue/runtime-core/-/runtime-core-3.5.17.tgz", + "integrity": "sha512-QQLXa20dHg1R0ri4bjKeGFKEkJA7MMBxrKo2G+gJikmumRS7PTD4BOU9FKrDQWMKowz7frJJGqBffYMgQYS96Q==", + "dependencies": { + "@vue/reactivity": "3.5.17", + "@vue/shared": "3.5.17" + } + }, + "node_modules/@vue/runtime-dom": { + "version": "3.5.17", + "resolved": "https://mirrors.tencent.com/npm/@vue/runtime-dom/-/runtime-dom-3.5.17.tgz", + "integrity": "sha512-8El0M60TcwZ1QMz4/os2MdlQECgGoVHPuLnQBU3m9h3gdNRW9xRmI8iLS4t/22OQlOE6aJvNNlBiCzPHur4H9g==", + "license": "MIT", + "dependencies": { + "@vue/reactivity": "3.5.17", + "@vue/runtime-core": "3.5.17", + "@vue/shared": "3.5.17", + "csstype": "^3.1.3" + } + }, + "node_modules/@vue/server-renderer": { + "version": "3.5.17", + "resolved": "https://mirrors.tencent.com/npm/@vue/server-renderer/-/server-renderer-3.5.17.tgz", + "integrity": "sha512-BOHhm8HalujY6lmC3DbqF6uXN/K00uWiEeF22LfEsm9Q93XeJ/plHTepGwf6tqFcF7GA5oGSSAAUock3VvzaCA==", + "dependencies": { + "@vue/compiler-ssr": "3.5.17", + "@vue/shared": "3.5.17" + }, + "peerDependencies": { + "vue": "3.5.17" + } + }, + "node_modules/@vue/shared": { + "version": "3.5.17", + "resolved": "https://mirrors.tencent.com/npm/@vue/shared/-/shared-3.5.17.tgz", + "integrity": "sha512-CabR+UN630VnsJO/jHWYBC1YVXyMq94KKp6iF5MQgZJs5I8cmjw6oVMO1oDbtBkENSHSSn/UadWlW/OAgdmKrg==", + "license": "MIT" + }, + "node_modules/@vue/tsconfig": { + "version": "0.7.0", + "resolved": "https://mirrors.tencent.com/npm/@vue/tsconfig/-/tsconfig-0.7.0.tgz", + "integrity": "sha512-ku2uNz5MaZ9IerPPUyOHzyjhXoX2kVJaVf7hL315DC17vS6IiZRmmCPfggNbU16QTvM80+uYYy3eYJB59WCtvg==", + "dev": true, + "license": "MIT", + "peerDependencies": { + "typescript": "5.x", + "vue": "^3.4.0" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + }, + "vue": { + "optional": true + } + } + }, + "node_modules/@webassemblyjs/ast": { + "version": "1.14.1", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/ast/-/ast-1.14.1.tgz", + "integrity": "sha512-nuBEDgQfm1ccRp/8bCQrx1frohyufl4JlbMMZ4P1wpeOfDhF6FQkxZJ1b/e+PLwr6X1Nhw6OLme5usuBWYBvuQ==", + "license": "MIT", + "dependencies": { + "@webassemblyjs/helper-numbers": "1.13.2", + "@webassemblyjs/helper-wasm-bytecode": "1.13.2" + } + }, + "node_modules/@webassemblyjs/floating-point-hex-parser": { + "version": "1.13.2", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.13.2.tgz", + "integrity": "sha512-6oXyTOzbKxGH4steLbLNOu71Oj+C8Lg34n6CqRvqfS2O71BxY6ByfMDRhBytzknj9yGUPVJ1qIKhRlAwO1AovA==" + }, + "node_modules/@webassemblyjs/helper-api-error": { + "version": "1.13.2", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/helper-api-error/-/helper-api-error-1.13.2.tgz", + "integrity": "sha512-U56GMYxy4ZQCbDZd6JuvvNV/WFildOjsaWD3Tzzvmw/mas3cXzRJPMjP83JqEsgSbyrmaGjBfDtV7KDXV9UzFQ==", + "license": "MIT" + }, + "node_modules/@webassemblyjs/helper-buffer": { + "version": "1.14.1", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/helper-buffer/-/helper-buffer-1.14.1.tgz", + "integrity": "sha512-jyH7wtcHiKssDtFPRB+iQdxlDf96m0E39yb0k5uJVhFGleZFoNw1c4aeIcVUPPbXUVJ94wwnMOAqUHyzoEPVMA==", + "license": "MIT" + }, + "node_modules/@webassemblyjs/helper-numbers": { + "version": "1.13.2", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/helper-numbers/-/helper-numbers-1.13.2.tgz", + "integrity": "sha512-FE8aCmS5Q6eQYcV3gI35O4J789wlQA+7JrqTTpJqn5emA4U2hvwJmvFRC0HODS+3Ye6WioDklgd6scJ3+PLnEA==", + "license": "MIT", + "dependencies": { + "@webassemblyjs/floating-point-hex-parser": "1.13.2", + "@webassemblyjs/helper-api-error": "1.13.2", + "@xtuc/long": "4.2.2" + } + }, + "node_modules/@webassemblyjs/helper-wasm-bytecode": { + "version": "1.13.2", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.13.2.tgz", + "integrity": "sha512-3QbLKy93F0EAIXLh0ogEVR6rOubA9AoZ+WRYhNbFyuB70j3dRdwH9g+qXhLAO0kiYGlg3TxDV+I4rQTr/YNXkA==" + }, + "node_modules/@webassemblyjs/helper-wasm-section": { + "version": "1.14.1", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.14.1.tgz", + "integrity": "sha512-ds5mXEqTJ6oxRoqjhWDU83OgzAYjwsCV8Lo/N+oRsNDmx/ZDpqalmrtgOMkHwxsG0iI//3BwWAErYRHtgn0dZw==", + "dependencies": { + "@webassemblyjs/ast": "1.14.1", + "@webassemblyjs/helper-buffer": "1.14.1", + "@webassemblyjs/helper-wasm-bytecode": "1.13.2", + "@webassemblyjs/wasm-gen": "1.14.1" + } + }, + "node_modules/@webassemblyjs/ieee754": { + "version": "1.13.2", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/ieee754/-/ieee754-1.13.2.tgz", + "integrity": "sha512-4LtOzh58S/5lX4ITKxnAK2USuNEvpdVV9AlgGQb8rJDHaLeHciwG4zlGr0j/SNWlr7x3vO1lDEsuePvtcDNCkw==", + "dependencies": { + "@xtuc/ieee754": "^1.2.0" + } + }, + "node_modules/@webassemblyjs/leb128": { + "version": "1.13.2", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/leb128/-/leb128-1.13.2.tgz", + "integrity": "sha512-Lde1oNoIdzVzdkNEAWZ1dZ5orIbff80YPdHx20mrHwHrVNNTjNr8E3xz9BdpcGqRQbAEa+fkrCb+fRFTl/6sQw==", + "dependencies": { + "@xtuc/long": "4.2.2" + } + }, + "node_modules/@webassemblyjs/utf8": { + "version": "1.13.2", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/utf8/-/utf8-1.13.2.tgz", + "integrity": "sha512-3NQWGjKTASY1xV5m7Hr0iPeXD9+RDobLll3T9d2AO+g3my8xy5peVyjSag4I50mR1bBSN/Ct12lo+R9tJk0NZQ==", + "license": "MIT" + }, + "node_modules/@webassemblyjs/wasm-edit": { + "version": "1.14.1", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/wasm-edit/-/wasm-edit-1.14.1.tgz", + "integrity": "sha512-RNJUIQH/J8iA/1NzlE4N7KtyZNHi3w7at7hDjvRNm5rcUXa00z1vRz3glZoULfJ5mpvYhLybmVcwcjGrC1pRrQ==", + "dependencies": { + "@webassemblyjs/ast": "1.14.1", + "@webassemblyjs/helper-buffer": "1.14.1", + "@webassemblyjs/helper-wasm-bytecode": "1.13.2", + "@webassemblyjs/helper-wasm-section": "1.14.1", + "@webassemblyjs/wasm-gen": "1.14.1", + "@webassemblyjs/wasm-opt": "1.14.1", + "@webassemblyjs/wasm-parser": "1.14.1", + "@webassemblyjs/wast-printer": "1.14.1" + } + }, + "node_modules/@webassemblyjs/wasm-gen": { + "version": "1.14.1", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/wasm-gen/-/wasm-gen-1.14.1.tgz", + "integrity": "sha512-AmomSIjP8ZbfGQhumkNvgC33AY7qtMCXnN6bL2u2Js4gVCg8fp735aEiMSBbDR7UQIj90n4wKAFUSEd0QN2Ukg==", + "dependencies": { + "@webassemblyjs/ast": "1.14.1", + "@webassemblyjs/helper-wasm-bytecode": "1.13.2", + "@webassemblyjs/ieee754": "1.13.2", + "@webassemblyjs/leb128": "1.13.2", + "@webassemblyjs/utf8": "1.13.2" + } + }, + "node_modules/@webassemblyjs/wasm-opt": { + "version": "1.14.1", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/wasm-opt/-/wasm-opt-1.14.1.tgz", + "integrity": "sha512-PTcKLUNvBqnY2U6E5bdOQcSM+oVP/PmrDY9NzowJjislEjwP/C4an2303MCVS2Mg9d3AJpIGdUFIQQWbPds0Sw==", + "license": "MIT", + "dependencies": { + "@webassemblyjs/ast": "1.14.1", + "@webassemblyjs/helper-buffer": "1.14.1", + "@webassemblyjs/wasm-gen": "1.14.1", + "@webassemblyjs/wasm-parser": "1.14.1" + } + }, + "node_modules/@webassemblyjs/wasm-parser": { + "version": "1.14.1", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/wasm-parser/-/wasm-parser-1.14.1.tgz", + "integrity": "sha512-JLBl+KZ0R5qB7mCnud/yyX08jWFw5MsoalJ1pQ4EdFlgj9VdXKGuENGsiCIjegI1W7p91rUlcB/LB5yRJKNTcQ==", + "license": "MIT", + "dependencies": { + "@webassemblyjs/ast": "1.14.1", + "@webassemblyjs/helper-api-error": "1.13.2", + "@webassemblyjs/helper-wasm-bytecode": "1.13.2", + "@webassemblyjs/ieee754": "1.13.2", + "@webassemblyjs/leb128": "1.13.2", + "@webassemblyjs/utf8": "1.13.2" + } + }, + "node_modules/@webassemblyjs/wast-printer": { + "version": "1.14.1", + "resolved": "https://mirrors.tencent.com/npm/@webassemblyjs/wast-printer/-/wast-printer-1.14.1.tgz", + "integrity": "sha512-kPSSXE6De1XOR820C90RIo2ogvZG+c3KiHzqUoO/F34Y2shGzesfqv7o57xrxovZJH/MetF5UjroJ/R/3isoiw==", + "license": "MIT", + "dependencies": { + "@webassemblyjs/ast": "1.14.1", + "@xtuc/long": "4.2.2" + } + }, + "node_modules/@xtuc/ieee754": { + "version": "1.2.0", + "resolved": "https://mirrors.tencent.com/npm/@xtuc/ieee754/-/ieee754-1.2.0.tgz", + "integrity": "sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==" + }, + "node_modules/@xtuc/long": { + "version": "4.2.2", + "resolved": "https://mirrors.tencent.com/npm/@xtuc/long/-/long-4.2.2.tgz", + "integrity": "sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==", + "license": "Apache-2.0" + }, + "node_modules/acorn": { + "version": "8.15.0", + "resolved": "https://mirrors.tencent.com/npm/acorn/-/acorn-8.15.0.tgz", + "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-import-phases": { + "version": "1.0.4", + "resolved": "https://mirrors.tencent.com/npm/acorn-import-phases/-/acorn-import-phases-1.0.4.tgz", + "integrity": "sha512-wKmbr/DDiIXzEOiWrTTUcDm24kQ2vGfZQvM2fwg2vXqR5uW6aapr7ObPtj1th32b9u90/Pf4AItvdTh42fBmVQ==", + "license": "MIT", + "engines": { + "node": ">=10.13.0" + }, + "peerDependencies": { + "acorn": "^8.14.0" + } + }, + "node_modules/ajv": { + "version": "8.17.1", + "resolved": "https://mirrors.tencent.com/npm/ajv/-/ajv-8.17.1.tgz", + "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ajv-formats": { + "version": "2.1.1", + "resolved": "https://mirrors.tencent.com/npm/ajv-formats/-/ajv-formats-2.1.1.tgz", + "integrity": "sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA==", + "dependencies": { + "ajv": "^8.0.0" + }, + "peerDependencies": { + "ajv": "^8.0.0" + }, + "peerDependenciesMeta": { + "ajv": { + "optional": true + } + } + }, + "node_modules/ajv-keywords": { + "version": "5.1.0", + "resolved": "https://mirrors.tencent.com/npm/ajv-keywords/-/ajv-keywords-5.1.0.tgz", + "integrity": "sha512-YCS/JNFAUyr5vAuhk1DWm1CBxRHW9LbJ2ozWeemrIqpbsqKjHVxYPyi5GC0rjZIT5JxJ3virVTS8wk4i/Z+krw==", + "dependencies": { + "fast-deep-equal": "^3.1.3" + }, + "peerDependencies": { + "ajv": "^8.8.2" + } + }, + "node_modules/alien-signals": { + "version": "1.0.13", + "resolved": "https://mirrors.tencent.com/npm/alien-signals/-/alien-signals-1.0.13.tgz", + "integrity": "sha512-OGj9yyTnJEttvzhTUWuscOvtqxq5vrhF7vL9oS0xJ2mK0ItPYP1/y+vCFebfxoEyAz0++1AIwJ5CMr+Fk3nDmg==", + "dev": true, + "license": "MIT" + }, + "node_modules/ansi-styles": { + "version": "6.2.1", + "resolved": "https://mirrors.tencent.com/npm/ansi-styles/-/ansi-styles-6.2.1.tgz", + "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://mirrors.tencent.com/npm/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, + "node_modules/axios": { + "version": "1.10.0", + "resolved": "https://mirrors.tencent.com/npm/axios/-/axios-1.10.0.tgz", + "integrity": "sha512-/1xYAC4MP/HEG+3duIhFr4ZQXR4sQXOIe+o6sdqzeykGLx6Upp/1p8MHqhINOvGeP7xyNHe7tsiJByc4SSVUxw==", + "dependencies": { + "follow-redirects": "^1.15.6", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://mirrors.tencent.com/npm/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true + }, + "node_modules/birpc": { + "version": "2.5.0", + "resolved": "https://mirrors.tencent.com/npm/birpc/-/birpc-2.5.0.tgz", + "integrity": "sha512-VSWO/W6nNQdyP520F1mhf+Lc2f8pjGQOtoHHm7Ze8Go1kX7akpVIrtTa0fn+HB0QJEDVacl6aO08YE0PgXfdnQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/brace-expansion": { + "version": "2.0.2", + "resolved": "https://mirrors.tencent.com/npm/brace-expansion/-/brace-expansion-2.0.2.tgz", + "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/browserslist": { + "version": "4.25.1", + "resolved": "https://mirrors.tencent.com/npm/browserslist/-/browserslist-4.25.1.tgz", + "integrity": "sha512-KGj0KoOMXLpSNkkEI6Z6mShmQy0bc1I+T7K9N81k4WWMrfz+6fQ6es80B/YLAeRoKvjYE1YSHHOW1qe9xIVzHw==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "caniuse-lite": "^1.0.30001726", + "electron-to-chromium": "^1.5.173", + "node-releases": "^2.0.19", + "update-browserslist-db": "^1.1.3" + }, + "bin": { + "browserslist": "cli.js" + }, + "engines": { + "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" + } + }, + "node_modules/buffer-from": { + "version": "1.1.2", + "resolved": "https://mirrors.tencent.com/npm/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", + "license": "MIT" + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://mirrors.tencent.com/npm/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/caniuse-lite": { + "version": "1.0.30001727", + "resolved": "https://mirrors.tencent.com/npm/caniuse-lite/-/caniuse-lite-1.0.30001727.tgz", + "integrity": "sha512-pB68nIHmbN6L/4C6MH1DokyR3bYqFwjaSs/sWDHGj4CTcFtQUQMuJftVwWkXq7mNWOybD3KhUv3oWHoGxgP14Q==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/caniuse-lite" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "CC-BY-4.0" + }, + "node_modules/chrome-trace-event": { + "version": "1.0.4", + "resolved": "https://mirrors.tencent.com/npm/chrome-trace-event/-/chrome-trace-event-1.0.4.tgz", + "integrity": "sha512-rNjApaLzuwaOTjCiT8lSDdGN1APCiqkChLMJxJPWLunPAt5fy8xgU9/jNOchV84wfIxrA0lRQB7oCT8jrn/wrQ==", + "license": "MIT", + "engines": { + "node": ">=6.0" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://mirrors.tencent.com/npm/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/commander": { + "version": "2.20.3", + "resolved": "https://mirrors.tencent.com/npm/commander/-/commander-2.20.3.tgz", + "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==" + }, + "node_modules/convert-source-map": { + "version": "2.0.0", + "resolved": "https://mirrors.tencent.com/npm/convert-source-map/-/convert-source-map-2.0.0.tgz", + "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", + "dev": true, + "license": "MIT" + }, + "node_modules/copy-anything": { + "version": "2.0.6", + "resolved": "https://mirrors.tencent.com/npm/copy-anything/-/copy-anything-2.0.6.tgz", + "integrity": "sha512-1j20GZTsvKNkc4BY3NpMOM8tt///wY3FpIzozTOFO2ffuZcV61nojHXVKIy3WM+7ADCy5FVhdZYHYDdgTU0yJw==", + "dev": true, + "dependencies": { + "is-what": "^3.14.1" + }, + "funding": { + "url": "https://github.com/sponsors/mesqueeb" + } + }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://mirrors.tencent.com/npm/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dev": true, + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/cross-spawn/node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://mirrors.tencent.com/npm/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true + }, + "node_modules/cross-spawn/node_modules/which": { + "version": "2.0.2", + "resolved": "https://mirrors.tencent.com/npm/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/csstype": { + "version": "3.1.3", + "resolved": "https://mirrors.tencent.com/npm/csstype/-/csstype-3.1.3.tgz", + "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==" + }, + "node_modules/dayjs": { + "version": "1.11.10", + "resolved": "https://mirrors.tencent.com/npm/dayjs/-/dayjs-1.11.10.tgz", + "integrity": "sha512-vjAczensTgRcqDERK0SR2XMwsF/tSvnvlv6VcF2GIhg6Sx4yOIt/irsr1RDJsKiIyBzJDpCoXiWWq28MqH2cnQ==" + }, + "node_modules/de-indent": { + "version": "1.0.2", + "resolved": "https://mirrors.tencent.com/npm/de-indent/-/de-indent-1.0.2.tgz", + "integrity": "sha512-e/1zu3xH5MQryN2zdVaF0OrdNLUbvWxzMbi+iNA6Bky7l1RoP8a2fIbRocyHclXt/arDrrR6lL3TqFD9pMQTsg==", + "dev": true + }, + "node_modules/debug": { + "version": "4.4.1", + "resolved": "https://mirrors.tencent.com/npm/debug/-/debug-4.4.1.tgz", + "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", + "dev": true, + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://mirrors.tencent.com/npm/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://mirrors.tencent.com/npm/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/electron-to-chromium": { + "version": "1.5.183", + "resolved": "https://mirrors.tencent.com/npm/electron-to-chromium/-/electron-to-chromium-1.5.183.tgz", + "integrity": "sha512-vCrDBYjQCAEefWGjlK3EpoSKfKbT10pR4XXPdn65q7snuNOZnthoVpBfZPykmDapOKfoD+MMIPG8ZjKyyc9oHA==" + }, + "node_modules/enhanced-resolve": { + "version": "5.18.2", + "resolved": "https://mirrors.tencent.com/npm/enhanced-resolve/-/enhanced-resolve-5.18.2.tgz", + "integrity": "sha512-6Jw4sE1maoRJo3q8MsSIn2onJFbLTOjY9hlx4DZXmOKvLRd1Ok2kXmAGXaafL2+ijsJZ1ClYbl/pmqr9+k4iUQ==", + "dependencies": { + "graceful-fs": "^4.2.4", + "tapable": "^2.2.0" + }, + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://mirrors.tencent.com/npm/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/errno": { + "version": "0.1.8", + "resolved": "https://mirrors.tencent.com/npm/errno/-/errno-0.1.8.tgz", + "integrity": "sha512-dJ6oBr5SQ1VSd9qkk7ByRgb/1SH4JZjCHSW/mr63/QcXO9zLVxvJ6Oy13nio03rxpSnVDDjFor75SjVeZWPW/A==", + "dev": true, + "optional": true, + "dependencies": { + "prr": "~1.0.1" + }, + "bin": { + "errno": "cli.js" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://mirrors.tencent.com/npm/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://mirrors.tencent.com/npm/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-module-lexer": { + "version": "1.7.0", + "resolved": "https://mirrors.tencent.com/npm/es-module-lexer/-/es-module-lexer-1.7.0.tgz", + "integrity": "sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==" + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://mirrors.tencent.com/npm/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://mirrors.tencent.com/npm/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/esbuild": { + "version": "0.25.6", + "resolved": "https://mirrors.tencent.com/npm/esbuild/-/esbuild-0.25.6.tgz", + "integrity": "sha512-GVuzuUwtdsghE3ocJ9Bs8PNoF13HNQ5TXbEi2AhvVb8xU1Iwt9Fos9FEamfoee+u/TOsn7GUWc04lz46n2bbTg==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.25.6", + "@esbuild/android-arm": "0.25.6", + "@esbuild/android-arm64": "0.25.6", + "@esbuild/android-x64": "0.25.6", + "@esbuild/darwin-arm64": "0.25.6", + "@esbuild/darwin-x64": "0.25.6", + "@esbuild/freebsd-arm64": "0.25.6", + "@esbuild/freebsd-x64": "0.25.6", + "@esbuild/linux-arm": "0.25.6", + "@esbuild/linux-arm64": "0.25.6", + "@esbuild/linux-ia32": "0.25.6", + "@esbuild/linux-loong64": "0.25.6", + "@esbuild/linux-mips64el": "0.25.6", + "@esbuild/linux-ppc64": "0.25.6", + "@esbuild/linux-riscv64": "0.25.6", + "@esbuild/linux-s390x": "0.25.6", + "@esbuild/linux-x64": "0.25.6", + "@esbuild/netbsd-arm64": "0.25.6", + "@esbuild/netbsd-x64": "0.25.6", + "@esbuild/openbsd-arm64": "0.25.6", + "@esbuild/openbsd-x64": "0.25.6", + "@esbuild/openharmony-arm64": "0.25.6", + "@esbuild/sunos-x64": "0.25.6", + "@esbuild/win32-arm64": "0.25.6", + "@esbuild/win32-ia32": "0.25.6", + "@esbuild/win32-x64": "0.25.6" + } + }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://mirrors.tencent.com/npm/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "engines": { + "node": ">=6" + } + }, + "node_modules/eslint-scope": { + "version": "5.1.1", + "resolved": "https://mirrors.tencent.com/npm/eslint-scope/-/eslint-scope-5.1.1.tgz", + "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==", + "dependencies": { + "esrecurse": "^4.3.0", + "estraverse": "^4.1.1" + }, + "engines": { + "node": ">=8.0.0" + } + }, + "node_modules/esrecurse": { + "version": "4.3.0", + "resolved": "https://mirrors.tencent.com/npm/esrecurse/-/esrecurse-4.3.0.tgz", + "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", + "license": "BSD-2-Clause", + "dependencies": { + "estraverse": "^5.2.0" + }, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/esrecurse/node_modules/estraverse": { + "version": "5.3.0", + "resolved": "https://mirrors.tencent.com/npm/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/estraverse": { + "version": "4.3.0", + "resolved": "https://mirrors.tencent.com/npm/estraverse/-/estraverse-4.3.0.tgz", + "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/estree-walker": { + "version": "2.0.2", + "resolved": "https://mirrors.tencent.com/npm/estree-walker/-/estree-walker-2.0.2.tgz", + "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==" + }, + "node_modules/events": { + "version": "3.3.0", + "resolved": "https://mirrors.tencent.com/npm/events/-/events-3.3.0.tgz", + "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", + "engines": { + "node": ">=0.8.x" + } + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://mirrors.tencent.com/npm/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "license": "MIT" + }, + "node_modules/fast-uri": { + "version": "3.0.6", + "resolved": "https://mirrors.tencent.com/npm/fast-uri/-/fast-uri-3.0.6.tgz", + "integrity": "sha512-Atfo14OibSv5wAp4VWNsFYE1AchQRTv9cBGWET4pZWHzYshFSS9NQI6I57rdKn9croWVMbYFbLhJ+yJvmZIIHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/fdir": { + "version": "6.4.6", + "resolved": "https://mirrors.tencent.com/npm/fdir/-/fdir-6.4.6.tgz", + "integrity": "sha512-hiFoqpyZcfNm1yc4u8oWCf9A2c4D3QjCrks3zmoVKVxpQRzmPNar1hUJcBG2RQHvEVGDN+Jm81ZheVLAQMK6+w==", + "dev": true, + "peerDependencies": { + "picomatch": "^3 || ^4" + }, + "peerDependenciesMeta": { + "picomatch": { + "optional": true + } + } + }, + "node_modules/follow-redirects": { + "version": "1.15.9", + "resolved": "https://mirrors.tencent.com/npm/follow-redirects/-/follow-redirects-1.15.9.tgz", + "integrity": "sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.3", + "resolved": "https://mirrors.tencent.com/npm/form-data/-/form-data-4.0.3.tgz", + "integrity": "sha512-qsITQPfmvMOSAdeyZ+12I1c+CKSstAFAwu+97zrnWAbIr5u8wfsExUzCesVLC8NgHuRUqNN4Zy6UPWUTRGslcA==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://mirrors.tencent.com/npm/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://mirrors.tencent.com/npm/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/gensync": { + "version": "1.0.0-beta.2", + "resolved": "https://mirrors.tencent.com/npm/gensync/-/gensync-1.0.0-beta.2.tgz", + "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://mirrors.tencent.com/npm/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://mirrors.tencent.com/npm/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/glob-to-regexp": { + "version": "0.4.1", + "resolved": "https://mirrors.tencent.com/npm/glob-to-regexp/-/glob-to-regexp-0.4.1.tgz", + "integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==", + "license": "BSD-2-Clause" + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://mirrors.tencent.com/npm/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://mirrors.tencent.com/npm/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "license": "ISC" + }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://mirrors.tencent.com/npm/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://mirrors.tencent.com/npm/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://mirrors.tencent.com/npm/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://mirrors.tencent.com/npm/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/he": { + "version": "1.2.0", + "resolved": "https://mirrors.tencent.com/npm/he/-/he-1.2.0.tgz", + "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", + "dev": true, + "license": "MIT", + "bin": { + "he": "bin/he" + } + }, + "node_modules/hookable": { + "version": "5.5.3", + "resolved": "https://mirrors.tencent.com/npm/hookable/-/hookable-5.5.3.tgz", + "integrity": "sha512-Yc+BQe8SvoXH1643Qez1zqLRmbA5rCL+sSmk6TVos0LWVfNIB7PGncdlId77WzLGSIB5KaWgTaNTs2lNVEI6VQ==", + "license": "MIT" + }, + "node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://mirrors.tencent.com/npm/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "dev": true, + "optional": true, + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/image-size": { + "version": "0.5.5", + "resolved": "https://mirrors.tencent.com/npm/image-size/-/image-size-0.5.5.tgz", + "integrity": "sha512-6TDAlDPZxUFCv+fuOkIoXT/V/f3Qbq8e37p+YOiYrUv3v9cc3/6x78VdfPgFVaB9dZYeLUfKgHRebpkm/oP2VQ==", + "dev": true, + "optional": true, + "bin": { + "image-size": "bin/image-size.js" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-what": { + "version": "3.14.1", + "resolved": "https://mirrors.tencent.com/npm/is-what/-/is-what-3.14.1.tgz", + "integrity": "sha512-sNxgpk9793nzSs7bA6JQJGeIuRBQhAaNGG77kzYQgMkrID+lS6SlK07K5LaptscDlSaIgH+GPFzf+d75FVxozA==", + "dev": true, + "license": "MIT" + }, + "node_modules/isexe": { + "version": "3.1.1", + "resolved": "https://mirrors.tencent.com/npm/isexe/-/isexe-3.1.1.tgz", + "integrity": "sha512-LpB/54B+/2J5hqQ7imZHfdU31OlgQqx7ZicVlkm9kzg9/w8GKLEcFfJl/t7DCEDueOyBAD6zCCwTO6Fzs0NoEQ==", + "dev": true, + "engines": { + "node": ">=16" + } + }, + "node_modules/jest-worker": { + "version": "27.5.1", + "resolved": "https://mirrors.tencent.com/npm/jest-worker/-/jest-worker-27.5.1.tgz", + "integrity": "sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg==", + "dependencies": { + "@types/node": "*", + "merge-stream": "^2.0.0", + "supports-color": "^8.0.0" + }, + "engines": { + "node": ">= 10.13.0" + } + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "resolved": "https://mirrors.tencent.com/npm/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/jsesc": { + "version": "3.1.0", + "resolved": "https://mirrors.tencent.com/npm/jsesc/-/jsesc-3.1.0.tgz", + "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==", + "dev": true, + "bin": { + "jsesc": "bin/jsesc" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/json-parse-even-better-errors": { + "version": "4.0.0", + "resolved": "https://mirrors.tencent.com/npm/json-parse-even-better-errors/-/json-parse-even-better-errors-4.0.0.tgz", + "integrity": "sha512-lR4MXjGNgkJc7tkQ97kb2nuEMnNCyU//XYVH0MKTGcXEiSudQ5MKGKen3C5QubYy0vmq+JGitUg92uuywGEwIA==", + "dev": true, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, + "node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://mirrors.tencent.com/npm/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "license": "MIT" + }, + "node_modules/json5": { + "version": "2.2.3", + "resolved": "https://mirrors.tencent.com/npm/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", + "dev": true, + "bin": { + "json5": "lib/cli.js" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/less": { + "version": "4.3.0", + "resolved": "https://mirrors.tencent.com/npm/less/-/less-4.3.0.tgz", + "integrity": "sha512-X9RyH9fvemArzfdP8Pi3irr7lor2Ok4rOttDXBhlwDg+wKQsXOXgHWduAJE1EsF7JJx0w0bcO6BC6tCKKYnXKA==", + "dev": true, + "dependencies": { + "copy-anything": "^2.0.1", + "parse-node-version": "^1.0.1", + "tslib": "^2.3.0" + }, + "bin": { + "lessc": "bin/lessc" + }, + "engines": { + "node": ">=14" + }, + "optionalDependencies": { + "errno": "^0.1.1", + "graceful-fs": "^4.1.2", + "image-size": "~0.5.0", + "make-dir": "^2.1.0", + "mime": "^1.4.1", + "needle": "^3.1.0", + "source-map": "~0.6.0" + } + }, + "node_modules/less-loader": { + "version": "12.3.0", + "resolved": "https://mirrors.tencent.com/npm/less-loader/-/less-loader-12.3.0.tgz", + "integrity": "sha512-0M6+uYulvYIWs52y0LqN4+QM9TqWAohYSNTo4htE8Z7Cn3G/qQMEmktfHmyJT23k+20kU9zHH2wrfFXkxNLtVw==", + "dev": true, + "engines": { + "node": ">= 18.12.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + }, + "peerDependencies": { + "@rspack/core": "0.x || 1.x", + "less": "^3.5.0 || ^4.0.0", + "webpack": "^5.0.0" + }, + "peerDependenciesMeta": { + "@rspack/core": { + "optional": true + }, + "webpack": { + "optional": true + } + } + }, + "node_modules/loader-runner": { + "version": "4.3.0", + "resolved": "https://mirrors.tencent.com/npm/loader-runner/-/loader-runner-4.3.0.tgz", + "integrity": "sha512-3R/1M+yS3j5ou80Me59j7F9IMs4PXs3VqRrm0TU3AbKPxlmpoY1TNscJV/oGJXo8qCatFGTfDbY6W6ipGOYXfg==", + "license": "MIT", + "engines": { + "node": ">=6.11.5" + } + }, + "node_modules/lodash-es": { + "version": "4.17.21", + "resolved": "https://mirrors.tencent.com/npm/lodash-es/-/lodash-es-4.17.21.tgz", + "integrity": "sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==" + }, + "node_modules/lru-cache": { + "version": "5.1.1", + "resolved": "https://mirrors.tencent.com/npm/lru-cache/-/lru-cache-5.1.1.tgz", + "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", + "dev": true, + "dependencies": { + "yallist": "^3.0.2" + } + }, + "node_modules/magic-string": { + "version": "0.30.17", + "resolved": "https://mirrors.tencent.com/npm/magic-string/-/magic-string-0.30.17.tgz", + "integrity": "sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==", + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.0" + } + }, + "node_modules/make-dir": { + "version": "2.1.0", + "resolved": "https://mirrors.tencent.com/npm/make-dir/-/make-dir-2.1.0.tgz", + "integrity": "sha512-LS9X+dc8KLxXCb8dni79fLIIUA5VyZoyjSMCwTluaXA0o27cCK0bhXkpgw+sTXVpPy/lSO57ilRixqk0vDmtRA==", + "dev": true, + "optional": true, + "dependencies": { + "pify": "^4.0.1", + "semver": "^5.6.0" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/make-dir/node_modules/semver": { + "version": "5.7.2", + "resolved": "https://mirrors.tencent.com/npm/semver/-/semver-5.7.2.tgz", + "integrity": "sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==", + "dev": true, + "license": "ISC", + "optional": true, + "bin": { + "semver": "bin/semver" + } + }, + "node_modules/marked": { + "version": "5.1.2", + "resolved": "https://mirrors.tencent.com/npm/marked/-/marked-5.1.2.tgz", + "integrity": "sha512-ahRPGXJpjMjwSOlBoTMZAK7ATXkli5qCPxZ21TG44rx1KEo44bii4ekgTDQPNRQ4Kh7JMb9Ub1PVk1NxRSsorg==", + "license": "MIT", + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 16" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://mirrors.tencent.com/npm/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/memorystream": { + "version": "0.3.1", + "resolved": "https://mirrors.tencent.com/npm/memorystream/-/memorystream-0.3.1.tgz", + "integrity": "sha512-S3UwM3yj5mtUSEfP41UZmt/0SCoVYUcU1rkXv+BQ5Ig8ndL4sPoJNBUJERafdPb5jjHJGuMgytgKvKIf58XNBw==", + "dev": true, + "engines": { + "node": ">= 0.10.0" + } + }, + "node_modules/merge-stream": { + "version": "2.0.0", + "resolved": "https://mirrors.tencent.com/npm/merge-stream/-/merge-stream-2.0.0.tgz", + "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==", + "license": "MIT" + }, + "node_modules/mime": { + "version": "1.6.0", + "resolved": "https://mirrors.tencent.com/npm/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", + "dev": true, + "license": "MIT", + "optional": true, + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://mirrors.tencent.com/npm/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://mirrors.tencent.com/npm/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/minimatch": { + "version": "9.0.5", + "resolved": "https://mirrors.tencent.com/npm/minimatch/-/minimatch-9.0.5.tgz", + "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", + "dev": true, + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/mitt": { + "version": "3.0.1", + "resolved": "https://mirrors.tencent.com/npm/mitt/-/mitt-3.0.1.tgz", + "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==" + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://mirrors.tencent.com/npm/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true + }, + "node_modules/muggle-string": { + "version": "0.4.1", + "resolved": "https://mirrors.tencent.com/npm/muggle-string/-/muggle-string-0.4.1.tgz", + "integrity": "sha512-VNTrAak/KhO2i8dqqnqnAHOa3cYBwXEZe9h+D5h/1ZqFSTEFHdM65lR7RoIqq3tBBYavsOXV84NoHXZ0AkPyqQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/nanoid": { + "version": "3.3.11", + "resolved": "https://mirrors.tencent.com/npm/nanoid/-/nanoid-3.3.11.tgz", + "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/needle": { + "version": "3.3.1", + "resolved": "https://mirrors.tencent.com/npm/needle/-/needle-3.3.1.tgz", + "integrity": "sha512-6k0YULvhpw+RoLNiQCRKOl09Rv1dPLr8hHnVjHqdolKwDrdNyk+Hmrthi4lIGPPz3r39dLx0hsF5s40sZ3Us4Q==", + "dev": true, + "optional": true, + "dependencies": { + "iconv-lite": "^0.6.3", + "sax": "^1.2.4" + }, + "bin": { + "needle": "bin/needle" + }, + "engines": { + "node": ">= 4.4.x" + } + }, + "node_modules/neo-async": { + "version": "2.6.2", + "resolved": "https://mirrors.tencent.com/npm/neo-async/-/neo-async-2.6.2.tgz", + "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==" + }, + "node_modules/node-releases": { + "version": "2.0.19", + "resolved": "https://mirrors.tencent.com/npm/node-releases/-/node-releases-2.0.19.tgz", + "integrity": "sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==", + "license": "MIT" + }, + "node_modules/npm-normalize-package-bin": { + "version": "4.0.0", + "resolved": "https://mirrors.tencent.com/npm/npm-normalize-package-bin/-/npm-normalize-package-bin-4.0.0.tgz", + "integrity": "sha512-TZKxPvItzai9kN9H/TkmCtx/ZN/hvr3vUycjlfmH0ootY9yFBzNOpiXAdIn1Iteqsvk4lQn6B5PTrt+n6h8k/w==", + "dev": true, + "license": "ISC", + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, + "node_modules/npm-run-all2": { + "version": "7.0.2", + "resolved": "https://mirrors.tencent.com/npm/npm-run-all2/-/npm-run-all2-7.0.2.tgz", + "integrity": "sha512-7tXR+r9hzRNOPNTvXegM+QzCuMjzUIIq66VDunL6j60O4RrExx32XUhlrS7UK4VcdGw5/Wxzb3kfNcFix9JKDA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^6.2.1", + "cross-spawn": "^7.0.6", + "memorystream": "^0.3.1", + "minimatch": "^9.0.0", + "pidtree": "^0.6.0", + "read-package-json-fast": "^4.0.0", + "shell-quote": "^1.7.3", + "which": "^5.0.0" + }, + "bin": { + "npm-run-all": "bin/npm-run-all/index.js", + "npm-run-all2": "bin/npm-run-all/index.js", + "run-p": "bin/run-p/index.js", + "run-s": "bin/run-s/index.js" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0", + "npm": ">= 9" + } + }, + "node_modules/pagefind": { + "version": "1.3.0", + "resolved": "https://mirrors.tencent.com/npm/pagefind/-/pagefind-1.3.0.tgz", + "integrity": "sha512-8KPLGT5g9s+olKMRTU9LFekLizkVIu9tes90O1/aigJ0T5LmyPqTzGJrETnSw3meSYg58YH7JTzhTTW/3z6VAw==", + "license": "MIT", + "bin": { + "pagefind": "lib/runner/bin.cjs" + }, + "optionalDependencies": { + "@pagefind/darwin-arm64": "1.3.0", + "@pagefind/darwin-x64": "1.3.0", + "@pagefind/linux-arm64": "1.3.0", + "@pagefind/linux-x64": "1.3.0", + "@pagefind/windows-x64": "1.3.0" + } + }, + "node_modules/parse-node-version": { + "version": "1.0.1", + "resolved": "https://mirrors.tencent.com/npm/parse-node-version/-/parse-node-version-1.0.1.tgz", + "integrity": "sha512-3YHlOa/JgH6Mnpr05jP9eDG254US9ek25LyIxZlDItp2iJtwyaXQb57lBYLdT3MowkUFYEV2XXNAYIPlESvJlA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/path-browserify": { + "version": "1.0.1", + "resolved": "https://mirrors.tencent.com/npm/path-browserify/-/path-browserify-1.0.1.tgz", + "integrity": "sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g==", + "dev": true + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://mirrors.tencent.com/npm/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/perfect-debounce": { + "version": "1.0.0", + "resolved": "https://mirrors.tencent.com/npm/perfect-debounce/-/perfect-debounce-1.0.0.tgz", + "integrity": "sha512-xCy9V055GLEqoFaHoC1SoLIaLmWctgCUaBaWxDZ7/Zx4CTyX7cJQLJOok/orfjZAh9kEYpjJa4d0KcJmCbctZA==" + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://mirrors.tencent.com/npm/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==" + }, + "node_modules/picomatch": { + "version": "4.0.2", + "resolved": "https://mirrors.tencent.com/npm/picomatch/-/picomatch-4.0.2.tgz", + "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/pidtree": { + "version": "0.6.0", + "resolved": "https://mirrors.tencent.com/npm/pidtree/-/pidtree-0.6.0.tgz", + "integrity": "sha512-eG2dWTVw5bzqGRztnHExczNxt5VGsE6OwTeCG3fdUf9KBsZzO3R5OIIIzWR+iZA0NtZ+RDVdaoE2dK1cn6jH4g==", + "dev": true, + "license": "MIT", + "bin": { + "pidtree": "bin/pidtree.js" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/pify": { + "version": "4.0.1", + "resolved": "https://mirrors.tencent.com/npm/pify/-/pify-4.0.1.tgz", + "integrity": "sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==", + "dev": true, + "optional": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/pinia": { + "version": "3.0.3", + "resolved": "https://mirrors.tencent.com/npm/pinia/-/pinia-3.0.3.tgz", + "integrity": "sha512-ttXO/InUULUXkMHpTdp9Fj4hLpD/2AoJdmAbAeW2yu1iy1k+pkFekQXw5VpC0/5p51IOR/jDaDRfRWRnMMsGOA==", + "license": "MIT", + "dependencies": { + "@vue/devtools-api": "^7.7.2" + }, + "funding": { + "url": "https://github.com/sponsors/posva" + }, + "peerDependencies": { + "typescript": ">=4.4.4", + "vue": "^2.7.0 || ^3.5.11" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/postcss": { + "version": "8.5.6", + "resolved": "https://mirrors.tencent.com/npm/postcss/-/postcss-8.5.6.tgz", + "integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "nanoid": "^3.3.11", + "picocolors": "^1.1.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://mirrors.tencent.com/npm/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, + "node_modules/prr": { + "version": "1.0.1", + "resolved": "https://mirrors.tencent.com/npm/prr/-/prr-1.0.1.tgz", + "integrity": "sha512-yPw4Sng1gWghHQWj0B3ZggWUm4qVbPwPFcRG8KyxiU7J2OHFSoEHKS+EZ3fv5l1t9CyCiop6l/ZYeWbrgoQejw==", + "dev": true, + "optional": true + }, + "node_modules/randombytes": { + "version": "2.1.0", + "resolved": "https://mirrors.tencent.com/npm/randombytes/-/randombytes-2.1.0.tgz", + "integrity": "sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==", + "license": "MIT", + "dependencies": { + "safe-buffer": "^5.1.0" + } + }, + "node_modules/read-package-json-fast": { + "version": "4.0.0", + "resolved": "https://mirrors.tencent.com/npm/read-package-json-fast/-/read-package-json-fast-4.0.0.tgz", + "integrity": "sha512-qpt8EwugBWDw2cgE2W+/3oxC+KTez2uSVR8JU9Q36TXPAGCaozfQUs59v4j4GFpWTaw0i6hAZSvOmu1J0uOEUg==", + "dev": true, + "dependencies": { + "json-parse-even-better-errors": "^4.0.0", + "npm-normalize-package-bin": "^4.0.0" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://mirrors.tencent.com/npm/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/rfdc": { + "version": "1.4.1", + "resolved": "https://mirrors.tencent.com/npm/rfdc/-/rfdc-1.4.1.tgz", + "integrity": "sha512-q1b3N5QkRUWUl7iyylaaj3kOpIT0N2i9MqIEQXP73GVsN9cw3fdx8X63cEmWhJGi2PPCF23Ijp7ktmd39rawIA==", + "license": "MIT" + }, + "node_modules/rollup": { + "version": "4.45.0", + "resolved": "https://mirrors.tencent.com/npm/rollup/-/rollup-4.45.0.tgz", + "integrity": "sha512-WLjEcJRIo7i3WDDgOIJqVI2d+lAC3EwvOGy+Xfq6hs+GQuAA4Di/H72xmXkOhrIWFg2PFYSKZYfH0f4vfKXN4A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "1.0.8" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=18.0.0", + "npm": ">=8.0.0" + }, + "optionalDependencies": { + "@rollup/rollup-android-arm-eabi": "4.45.0", + "@rollup/rollup-android-arm64": "4.45.0", + "@rollup/rollup-darwin-arm64": "4.45.0", + "@rollup/rollup-darwin-x64": "4.45.0", + "@rollup/rollup-freebsd-arm64": "4.45.0", + "@rollup/rollup-freebsd-x64": "4.45.0", + "@rollup/rollup-linux-arm-gnueabihf": "4.45.0", + "@rollup/rollup-linux-arm-musleabihf": "4.45.0", + "@rollup/rollup-linux-arm64-gnu": "4.45.0", + "@rollup/rollup-linux-arm64-musl": "4.45.0", + "@rollup/rollup-linux-loongarch64-gnu": "4.45.0", + "@rollup/rollup-linux-powerpc64le-gnu": "4.45.0", + "@rollup/rollup-linux-riscv64-gnu": "4.45.0", + "@rollup/rollup-linux-riscv64-musl": "4.45.0", + "@rollup/rollup-linux-s390x-gnu": "4.45.0", + "@rollup/rollup-linux-x64-gnu": "4.45.0", + "@rollup/rollup-linux-x64-musl": "4.45.0", + "@rollup/rollup-win32-arm64-msvc": "4.45.0", + "@rollup/rollup-win32-ia32-msvc": "4.45.0", + "@rollup/rollup-win32-x64-msvc": "4.45.0", + "fsevents": "~2.3.2" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://mirrors.tencent.com/npm/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://mirrors.tencent.com/npm/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "dev": true, + "license": "MIT", + "optional": true + }, + "node_modules/sax": { + "version": "1.4.1", + "resolved": "https://mirrors.tencent.com/npm/sax/-/sax-1.4.1.tgz", + "integrity": "sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==", + "dev": true, + "license": "ISC", + "optional": true + }, + "node_modules/schema-utils": { + "version": "4.3.2", + "resolved": "https://mirrors.tencent.com/npm/schema-utils/-/schema-utils-4.3.2.tgz", + "integrity": "sha512-Gn/JaSk/Mt9gYubxTtSn/QCV4em9mpAPiR1rqy/Ocu19u/G9J5WWdNoUT4SiV6mFC3y6cxyFcFwdzPM3FgxGAQ==", + "license": "MIT", + "dependencies": { + "@types/json-schema": "^7.0.9", + "ajv": "^8.9.0", + "ajv-formats": "^2.1.1", + "ajv-keywords": "^5.1.0" + }, + "engines": { + "node": ">= 10.13.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + } + }, + "node_modules/semver": { + "version": "6.3.1", + "resolved": "https://mirrors.tencent.com/npm/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/serialize-javascript": { + "version": "6.0.2", + "resolved": "https://mirrors.tencent.com/npm/serialize-javascript/-/serialize-javascript-6.0.2.tgz", + "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==", + "dependencies": { + "randombytes": "^2.1.0" + } + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://mirrors.tencent.com/npm/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://mirrors.tencent.com/npm/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/shell-quote": { + "version": "1.8.3", + "resolved": "https://mirrors.tencent.com/npm/shell-quote/-/shell-quote-1.8.3.tgz", + "integrity": "sha512-ObmnIF4hXNg1BqhnHmgbDETF8dLPCggZWBjkQfhZpbszZnYur5DUljTcCHii5LC3J5E0yeO/1LIMyH+UvHQgyw==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/sortablejs": { + "version": "1.15.6", + "resolved": "https://mirrors.tencent.com/npm/sortablejs/-/sortablejs-1.15.6.tgz", + "integrity": "sha512-aNfiuwMEpfBM/CN6LY0ibyhxPfPbyFeBTYJKCvzkJ2GkUpazIt3H+QIPAMHwqQ7tMKaHz1Qj+rJJCqljnf4p3A==", + "license": "MIT" + }, + "node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://mirrors.tencent.com/npm/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/source-map-js": { + "version": "1.2.1", + "resolved": "https://mirrors.tencent.com/npm/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/source-map-support": { + "version": "0.5.21", + "resolved": "https://mirrors.tencent.com/npm/source-map-support/-/source-map-support-0.5.21.tgz", + "integrity": "sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w==", + "dependencies": { + "buffer-from": "^1.0.0", + "source-map": "^0.6.0" + } + }, + "node_modules/speakingurl": { + "version": "14.0.1", + "resolved": "https://mirrors.tencent.com/npm/speakingurl/-/speakingurl-14.0.1.tgz", + "integrity": "sha512-1POYv7uv2gXoyGFpBCmpDVSNV74IfsWlDW216UPjbWufNf+bSU6GdbDsxdcxtfwb4xlI3yxzOTKClUosxARYrQ==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/superjson": { + "version": "2.2.2", + "resolved": "https://mirrors.tencent.com/npm/superjson/-/superjson-2.2.2.tgz", + "integrity": "sha512-5JRxVqC8I8NuOUjzBbvVJAKNM8qoVuH0O77h4WInc/qC2q5IreqKxYwgkga3PfA22OayK2ikceb/B26dztPl+Q==", + "dependencies": { + "copy-anything": "^3.0.2" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/superjson/node_modules/copy-anything": { + "version": "3.0.5", + "resolved": "https://mirrors.tencent.com/npm/copy-anything/-/copy-anything-3.0.5.tgz", + "integrity": "sha512-yCEafptTtb4bk7GLEQoM8KVJpxAfdBJYaXyzQEgQQQgYrZiDp8SJmGKlYza6CYjEDNstAdNdKA3UuoULlEbS6w==", + "dependencies": { + "is-what": "^4.1.8" + }, + "engines": { + "node": ">=12.13" + }, + "funding": { + "url": "https://github.com/sponsors/mesqueeb" + } + }, + "node_modules/superjson/node_modules/is-what": { + "version": "4.1.16", + "resolved": "https://mirrors.tencent.com/npm/is-what/-/is-what-4.1.16.tgz", + "integrity": "sha512-ZhMwEosbFJkA0YhFnNDgTM4ZxDRsS6HqTo7qsZM08fehyRYIYa0yHu5R6mgo1n/8MgaPBXiPimPD77baVFYg+A==", + "license": "MIT", + "engines": { + "node": ">=12.13" + }, + "funding": { + "url": "https://github.com/sponsors/mesqueeb" + } + }, + "node_modules/supports-color": { + "version": "8.1.1", + "resolved": "https://mirrors.tencent.com/npm/supports-color/-/supports-color-8.1.1.tgz", + "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/supports-color?sponsor=1" + } + }, + "node_modules/tapable": { + "version": "2.2.2", + "resolved": "https://mirrors.tencent.com/npm/tapable/-/tapable-2.2.2.tgz", + "integrity": "sha512-Re10+NauLTMCudc7T5WLFLAwDhQ0JWdrMK+9B2M8zR5hRExKmsRDCBA7/aV/pNJFltmBFO5BAMlQFi/vq3nKOg==", + "engines": { + "node": ">=6" + } + }, + "node_modules/tdesign-icons-vue-next": { + "version": "0.3.6", + "resolved": "https://mirrors.tencent.com/npm/tdesign-icons-vue-next/-/tdesign-icons-vue-next-0.3.6.tgz", + "integrity": "sha512-X9u90dBv8tPhfpguUyx+BzF8CU2ef2L4RXOO7MYOj1ufHCHwBXTF8L3GPfq6KZd/2u4vMLYAA8lGURn4PZZICw==", + "dependencies": { + "@babel/runtime": "^7.16.3" + }, + "peerDependencies": { + "vue": "^3.0.0" + } + }, + "node_modules/tdesign-vue-next": { + "version": "1.14.2", + "resolved": "https://mirrors.tencent.com/npm/tdesign-vue-next/-/tdesign-vue-next-1.14.2.tgz", + "integrity": "sha512-Wcg0SjT4W7JmMhwCI4/rnk8bJzcmjed/YAemVDaOY33U8PqUsU3DxxP/vJ2gbLH7NkFgLGVNpeAdTqYdmHnV4w==", + "dependencies": { + "@babel/runtime": "^7.22.6", + "@popperjs/core": "^2.11.8", + "@types/lodash-es": "^4.17.12", + "@types/sortablejs": "^1.15.1", + "@types/tinycolor2": "^1.4.3", + "@types/validator": "^13.7.17", + "dayjs": "1.11.10", + "lodash-es": "^4.17.21", + "mitt": "^3.0.1", + "sortablejs": "^1.15.0", + "tdesign-icons-vue-next": "^0.3.6", + "tinycolor2": "^1.6.0", + "validator": "^13.9.0" + }, + "engines": { + "node": ">= 18" + }, + "peerDependencies": { + "vue": ">=3.1.0" + } + }, + "node_modules/terser": { + "version": "5.43.1", + "resolved": "https://mirrors.tencent.com/npm/terser/-/terser-5.43.1.tgz", + "integrity": "sha512-+6erLbBm0+LROX2sPXlUYx/ux5PyE9K/a92Wrt6oA+WDAoFTdpHE5tCYCI5PNzq2y8df4rA+QgHLJuR4jNymsg==", + "license": "BSD-2-Clause", + "dependencies": { + "@jridgewell/source-map": "^0.3.3", + "acorn": "^8.14.0", + "commander": "^2.20.0", + "source-map-support": "~0.5.20" + }, + "bin": { + "terser": "bin/terser" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/terser-webpack-plugin": { + "version": "5.3.14", + "resolved": "https://mirrors.tencent.com/npm/terser-webpack-plugin/-/terser-webpack-plugin-5.3.14.tgz", + "integrity": "sha512-vkZjpUjb6OMS7dhV+tILUW6BhpDR7P2L/aQSAv+Uwk+m8KATX9EccViHTJR2qDtACKPIYndLGCyl3FMo+r2LMw==", + "dependencies": { + "@jridgewell/trace-mapping": "^0.3.25", + "jest-worker": "^27.4.5", + "schema-utils": "^4.3.0", + "serialize-javascript": "^6.0.2", + "terser": "^5.31.1" + }, + "engines": { + "node": ">= 10.13.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + }, + "peerDependencies": { + "webpack": "^5.1.0" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "esbuild": { + "optional": true + }, + "uglify-js": { + "optional": true + } + } + }, + "node_modules/tinycolor2": { + "version": "1.6.0", + "resolved": "https://mirrors.tencent.com/npm/tinycolor2/-/tinycolor2-1.6.0.tgz", + "integrity": "sha512-XPaBkWQJdsf3pLKJV9p4qN/S+fm2Oj8AIPo1BTUhg5oxkvm9+SVEGFdhyOz7tTdUTfvxMiAs4sp6/eZO2Ew+pw==", + "license": "MIT" + }, + "node_modules/tinyglobby": { + "version": "0.2.14", + "resolved": "https://mirrors.tencent.com/npm/tinyglobby/-/tinyglobby-0.2.14.tgz", + "integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "fdir": "^6.4.4", + "picomatch": "^4.0.2" + }, + "engines": { + "node": ">=12.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/SuperchupuDev" + } + }, + "node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://mirrors.tencent.com/npm/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "dev": true + }, + "node_modules/typescript": { + "version": "5.8.3", + "resolved": "https://mirrors.tencent.com/npm/typescript/-/typescript-5.8.3.tgz", + "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", + "devOptional": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "6.21.0", + "resolved": "https://mirrors.tencent.com/npm/undici-types/-/undici-types-6.21.0.tgz", + "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==" + }, + "node_modules/update-browserslist-db": { + "version": "1.1.3", + "resolved": "https://mirrors.tencent.com/npm/update-browserslist-db/-/update-browserslist-db-1.1.3.tgz", + "integrity": "sha512-UxhIZQ+QInVdunkDAaiazvvT/+fXL5Osr0JZlJulepYu6Jd7qJtDZjlur0emRlT71EN3ScPoE7gvsuIKKNavKw==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "escalade": "^3.2.0", + "picocolors": "^1.1.1" + }, + "bin": { + "update-browserslist-db": "cli.js" + }, + "peerDependencies": { + "browserslist": ">= 4.21.0" + } + }, + "node_modules/validator": { + "version": "13.15.15", + "resolved": "https://mirrors.tencent.com/npm/validator/-/validator-13.15.15.tgz", + "integrity": "sha512-BgWVbCI72aIQy937xbawcs+hrVaN/CZ2UwutgaJ36hGqRrLNM+f5LUT/YPRbo8IV/ASeFzXszezV+y2+rq3l8A==", + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/vite": { + "version": "7.0.4", + "resolved": "https://mirrors.tencent.com/npm/vite/-/vite-7.0.4.tgz", + "integrity": "sha512-SkaSguuS7nnmV7mfJ8l81JGBFV7Gvzp8IzgE8A8t23+AxuNX61Q5H1Tpz5efduSN7NHC8nQXD3sKQKZAu5mNEA==", + "dev": true, + "dependencies": { + "esbuild": "^0.25.0", + "fdir": "^6.4.6", + "picomatch": "^4.0.2", + "postcss": "^8.5.6", + "rollup": "^4.40.0", + "tinyglobby": "^0.2.14" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^20.19.0 || >=22.12.0", + "jiti": ">=1.21.0", + "less": "^4.0.0", + "lightningcss": "^1.21.0", + "sass": "^1.70.0", + "sass-embedded": "^1.70.0", + "stylus": ">=0.54.8", + "sugarss": "^5.0.0", + "terser": "^5.16.0", + "tsx": "^4.8.1", + "yaml": "^2.4.2" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "jiti": { + "optional": true + }, + "less": { + "optional": true + }, + "lightningcss": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + }, + "tsx": { + "optional": true + }, + "yaml": { + "optional": true + } + } + }, + "node_modules/vscode-uri": { + "version": "3.1.0", + "resolved": "https://mirrors.tencent.com/npm/vscode-uri/-/vscode-uri-3.1.0.tgz", + "integrity": "sha512-/BpdSx+yCQGnCvecbyXdxHDkuk55/G3xwnC0GqY4gmQ3j+A+g8kzzgB4Nk/SINjqn6+waqw3EgbVF2QKExkRxQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/vue": { + "version": "3.5.17", + "resolved": "https://mirrors.tencent.com/npm/vue/-/vue-3.5.17.tgz", + "integrity": "sha512-LbHV3xPN9BeljML+Xctq4lbz2lVHCR6DtbpTf5XIO6gugpXUN49j2QQPcMj086r9+AkJ0FfUT8xjulKKBkkr9g==", + "license": "MIT", + "dependencies": { + "@vue/compiler-dom": "3.5.17", + "@vue/compiler-sfc": "3.5.17", + "@vue/runtime-dom": "3.5.17", + "@vue/server-renderer": "3.5.17", + "@vue/shared": "3.5.17" + }, + "peerDependencies": { + "typescript": "*" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/vue-router": { + "version": "4.5.1", + "resolved": "https://mirrors.tencent.com/npm/vue-router/-/vue-router-4.5.1.tgz", + "integrity": "sha512-ogAF3P97NPm8fJsE4by9dwSYtDwXIY1nFY9T6DyQnGHd1E2Da94w9JIolpe42LJGIl0DwOHBi8TcRPlPGwbTtw==", + "dependencies": { + "@vue/devtools-api": "^6.6.4" + }, + "funding": { + "url": "https://github.com/sponsors/posva" + }, + "peerDependencies": { + "vue": "^3.2.0" + } + }, + "node_modules/vue-router/node_modules/@vue/devtools-api": { + "version": "6.6.4", + "resolved": "https://mirrors.tencent.com/npm/@vue/devtools-api/-/devtools-api-6.6.4.tgz", + "integrity": "sha512-sGhTPMuXqZ1rVOk32RylztWkfXTRhuS7vgAKv0zjqk8gbsHkJ7xfFf+jbySxt7tWObEJwyKaHMikV/WGDiQm8g==", + "license": "MIT" + }, + "node_modules/vue-tsc": { + "version": "2.2.12", + "resolved": "https://mirrors.tencent.com/npm/vue-tsc/-/vue-tsc-2.2.12.tgz", + "integrity": "sha512-P7OP77b2h/Pmk+lZdJ0YWs+5tJ6J2+uOQPo7tlBnY44QqQSPYvS0qVT4wqDJgwrZaLe47etJLLQRFia71GYITw==", + "dev": true, + "dependencies": { + "@volar/typescript": "2.4.15", + "@vue/language-core": "2.2.12" + }, + "bin": { + "vue-tsc": "bin/vue-tsc.js" + }, + "peerDependencies": { + "typescript": ">=5.0.0" + } + }, + "node_modules/watchpack": { + "version": "2.4.4", + "resolved": "https://mirrors.tencent.com/npm/watchpack/-/watchpack-2.4.4.tgz", + "integrity": "sha512-c5EGNOiyxxV5qmTtAB7rbiXxi1ooX1pQKMLX/MIabJjRA0SJBQOjKF+KSVfHkr9U1cADPon0mRiVe/riyaiDUA==", + "dependencies": { + "glob-to-regexp": "^0.4.1", + "graceful-fs": "^4.1.2" + }, + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/webpack": { + "version": "5.100.1", + "resolved": "https://mirrors.tencent.com/npm/webpack/-/webpack-5.100.1.tgz", + "integrity": "sha512-YJB/ESPUe2Locd0NKXmw72Dx8fZQk1gTzI6rc9TAT4+Sypbnhl8jd8RywB1bDsDF9Dy1RUR7gn3q/ZJTd0OZZg==", + "dependencies": { + "@types/eslint-scope": "^3.7.7", + "@types/estree": "^1.0.8", + "@types/json-schema": "^7.0.15", + "@webassemblyjs/ast": "^1.14.1", + "@webassemblyjs/wasm-edit": "^1.14.1", + "@webassemblyjs/wasm-parser": "^1.14.1", + "acorn": "^8.15.0", + "acorn-import-phases": "^1.0.3", + "browserslist": "^4.24.0", + "chrome-trace-event": "^1.0.2", + "enhanced-resolve": "^5.17.2", + "es-module-lexer": "^1.2.1", + "eslint-scope": "5.1.1", + "events": "^3.2.0", + "glob-to-regexp": "^0.4.1", + "graceful-fs": "^4.2.11", + "json-parse-even-better-errors": "^2.3.1", + "loader-runner": "^4.2.0", + "mime-types": "^2.1.27", + "neo-async": "^2.6.2", + "schema-utils": "^4.3.2", + "tapable": "^2.1.1", + "terser-webpack-plugin": "^5.3.11", + "watchpack": "^2.4.1", + "webpack-sources": "^3.3.3" + }, + "bin": { + "webpack": "bin/webpack.js" + }, + "engines": { + "node": ">=10.13.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + }, + "peerDependenciesMeta": { + "webpack-cli": { + "optional": true + } + } + }, + "node_modules/webpack-sources": { + "version": "3.3.3", + "resolved": "https://mirrors.tencent.com/npm/webpack-sources/-/webpack-sources-3.3.3.tgz", + "integrity": "sha512-yd1RBzSGanHkitROoPFd6qsrxt+oFhg/129YzheDGqeustzX0vTZJZsSsQjVQC4yzBQ56K55XU8gaNCtIzOnTg==", + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/webpack/node_modules/json-parse-even-better-errors": { + "version": "2.3.1", + "resolved": "https://mirrors.tencent.com/npm/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", + "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==" + }, + "node_modules/which": { + "version": "5.0.0", + "resolved": "https://mirrors.tencent.com/npm/which/-/which-5.0.0.tgz", + "integrity": "sha512-JEdGzHwwkrbWoGOlIHqQ5gtprKGOenpDHpxE9zVR1bWbOtYRyPPHMe9FaP6x61CmNaTThSkb0DAJte5jD+DmzQ==", + "dev": true, + "dependencies": { + "isexe": "^3.1.1" + }, + "bin": { + "node-which": "bin/which.js" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, + "node_modules/yallist": { + "version": "3.1.1", + "resolved": "https://mirrors.tencent.com/npm/yallist/-/yallist-3.1.1.tgz", + "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", + "dev": true + } + } +} diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..96ff154 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,47 @@ +{ + "name": "knowledage-base", + "version": "0.0.0", + "private": true, + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "build-with-types": "run-p type-check \"build-only {@}\" --", + "preview": "vite preview", + "build-only": "vite build", + "type-check": "vue-tsc --build" + }, + "dependencies": { + "@microsoft/fetch-event-source": "^2.0.1", + "axios": "^1.8.4", + "marked": "^5.1.2", + "pagefind": "^1.1.1", + "pinia": "^3.0.1", + "tdesign-vue-next": "^1.11.5", + "vue": "^3.5.13", + "vue-router": "^4.5.0", + "webpack": "^5.94.0" + }, + "devDependencies": { + "@tsconfig/node22": "^22.0.1", + "@types/marked": "^5.0.2", + "@types/node": "^22.14.0", + "@vitejs/plugin-vue": "6.0.0", + "@vitejs/plugin-vue-jsx": "5.0.1", + "@vue/tsconfig": "^0.7.0", + "less": "^4.3.0", + "less-loader": "^12.2.0", + "npm-run-all2": "^7.0.2", + "typescript": "~5.8.0", + "vite": "7.0.4", + "vue-tsc": "^2.2.8" + }, + "overrides": { + "lightningcss": "none", + "esbuild": "^0.25.0" + }, + "resolutions": { + "lightningcss": "none", + "esbuild": "^0.25.0" + } +} diff --git a/frontend/public/favicon.ico b/frontend/public/favicon.ico new file mode 100644 index 0000000..c8931ef Binary files /dev/null and b/frontend/public/favicon.ico differ diff --git a/frontend/src/App.vue b/frontend/src/App.vue new file mode 100644 index 0000000..e7d4d2b --- /dev/null +++ b/frontend/src/App.vue @@ -0,0 +1,23 @@ + + + diff --git a/frontend/src/api/chat/index.ts b/frontend/src/api/chat/index.ts new file mode 100644 index 0000000..c948eca --- /dev/null +++ b/frontend/src/api/chat/index.ts @@ -0,0 +1,62 @@ +import { get, post, put, del, postChat } from "../../utils/request"; +import { loadTestData } from "../test-data"; + +// 从localStorage获取设置 +function getSettings() { + const settingsStr = localStorage.getItem("WeKnora_settings"); + if (settingsStr) { + try { + const settings = JSON.parse(settingsStr); + if (settings.apiKey && settings.endpoint) { + return settings; + } + } catch (e) { + console.error("解析设置失败:", e); + } + } + return null; +} + +// 根据是否有设置决定是否需要加载测试数据 +async function ensureConfigured() { + const settings = getSettings(); + // 如果没有设置APIKey和Endpoint,则加载测试数据 + if (!settings) { + await loadTestData(); + } +} + +export async function createSessions(data = {}) { + await ensureConfigured(); + return post("/api/v1/sessions", data); +} + +export async function getSessionsList(page: number, page_size: number) { + await ensureConfigured(); + return get(`/api/v1/sessions?page=${page}&page_size=${page_size}`); +} + +export async function generateSessionsTitle(session_id: string, data: any) { + await ensureConfigured(); + return post(`/api/v1/sessions/${session_id}/generate_title`, data); +} + +export async function knowledgeChat(data: { session_id: string; query: string; }) { + await ensureConfigured(); + return postChat(`/api/v1/knowledge-chat/${data.session_id}`, { query: data.query }); +} + +export async function getMessageList(data: { session_id: string; limit: number, created_at: string }) { + await ensureConfigured(); + + if (data.created_at) { + return get(`/api/v1/messages/${data.session_id}/load?before_time=${encodeURIComponent(data.created_at)}&limit=${data.limit}`); + } else { + return get(`/api/v1/messages/${data.session_id}/load?limit=${data.limit}`); + } +} + +export async function delSession(session_id: string) { + await ensureConfigured(); + return del(`/api/v1/sessions/${session_id}`); +} \ No newline at end of file diff --git a/frontend/src/api/chat/streame.ts b/frontend/src/api/chat/streame.ts new file mode 100644 index 0000000..562fb16 --- /dev/null +++ b/frontend/src/api/chat/streame.ts @@ -0,0 +1,147 @@ +import { fetchEventSource } from '@microsoft/fetch-event-source' +import { ref, type Ref, onUnmounted, nextTick } from 'vue' +import { generateRandomString } from '@/utils/index'; +import { getTestData } from '@/utils/request'; +import { loadTestData } from '@/api/test-data'; + +// 从localStorage获取设置 +function getSettings() { + const settingsStr = localStorage.getItem("WeKnora_settings"); + if (settingsStr) { + try { + const settings = JSON.parse(settingsStr); + return settings; + } catch (e) { + console.error("解析设置失败:", e); + } + } + return null; +} + +interface StreamOptions { + // 请求方法 (默认POST) + method?: 'GET' | 'POST' + // 请求头 + headers?: Record + // 请求体自动序列化 + body?: Record + // 流式渲染间隔 (ms) + chunkInterval?: number +} + +export function useStream() { + // 响应式状态 + const output = ref('') // 显示内容 + const isStreaming = ref(false) // 流状态 + const isLoading = ref(false) // 初始加载 + const error = ref(null)// 错误信息 + let controller = new AbortController() + + // 流式渲染缓冲 + let buffer: string[] = [] + let renderTimer: number | null = null + + // 启动流式请求 + const startStream = async (params: { session_id: any; query: any; method: string; url: string }) => { + // 重置状态 + output.value = ''; + error.value = null; + isStreaming.value = true; + isLoading.value = true; + + // 获取设置信息 + const settings = getSettings(); + let apiUrl = ''; + let apiKey = ''; + + // 如果有设置信息,优先使用设置信息 + if (settings && settings.endpoint && settings.apiKey) { + apiUrl = settings.endpoint; + apiKey = settings.apiKey; + } else { + // 否则加载测试数据 + await loadTestData(); + const testData = getTestData(); + if (!testData) { + error.value = "测试数据未初始化,无法进行聊天"; + stopStream(); + return; + } + apiUrl = import.meta.env.VITE_IS_DOCKER ? "" : "http://localhost:8080"; + apiKey = testData.tenant.api_key; + } + + try { + let url = + params.method == "POST" + ? `${apiUrl}${params.url}/${params.session_id}` + : `${apiUrl}${params.url}/${params.session_id}?message_id=${params.query}`; + await fetchEventSource(url, { + method: params.method, + headers: { + "Content-Type": "application/json", + "X-API-Key": apiKey, + "X-Request-ID": `${generateRandomString(12)}`, + }, + body: + params.method == "POST" + ? JSON.stringify({ query: params.query }) + : null, + signal: controller.signal, + openWhenHidden: true, + + onopen: async (res) => { + if (!res.ok) throw new Error(`HTTP ${res.status}`); + isLoading.value = false; + }, + + onmessage: (ev) => { + buffer.push(JSON.parse(ev.data)); // 数据存入缓冲 + // 执行自定义处理 + if (chunkHandler) { + chunkHandler(JSON.parse(ev.data)); + } + }, + + onerror: (err) => { + throw new Error(`流式连接失败: ${err}`); + }, + + onclose: () => { + stopStream(); + }, + }); + } catch (err) { + error.value = err instanceof Error ? err.message : String(err) + stopStream() + } + } + + let chunkHandler: ((data: any) => void) | null = null + // 注册块处理器 + const onChunk = (handler: () => void) => { + chunkHandler = handler + } + + + // 停止流 + const stopStream = () => { + controller.abort(); + controller = new AbortController(); // 重置控制器(如需重新发起) + isStreaming.value = false; + isLoading.value = false; + } + + // 组件卸载时自动清理 + onUnmounted(stopStream) + + return { + output, // 显示内容 + isStreaming, // 是否在流式传输中 + isLoading, // 初始连接状态 + error, + onChunk, + startStream, // 启动流 + stopStream // 手动停止 + } +} \ No newline at end of file diff --git a/frontend/src/api/knowledge-base/index.ts b/frontend/src/api/knowledge-base/index.ts new file mode 100644 index 0000000..abee9d7 --- /dev/null +++ b/frontend/src/api/knowledge-base/index.ts @@ -0,0 +1,62 @@ +import { get, post, put, del, postUpload, getDown, getTestData } from "../../utils/request"; +import { loadTestData } from "../test-data"; + +// 获取知识库ID(优先从设置中获取) +async function getKnowledgeBaseID() { + // 从localStorage获取设置中的知识库ID + const settingsStr = localStorage.getItem("WeKnora_settings"); + let knowledgeBaseId = ""; + + if (settingsStr) { + try { + const settings = JSON.parse(settingsStr); + if (settings.knowledgeBaseId) { + return settings.knowledgeBaseId; + } + } catch (e) { + console.error("解析设置失败:", e); + } + } + + // 如果设置中没有知识库ID,则使用测试数据 + await loadTestData(); + + const testData = getTestData(); + if (!testData || testData.knowledge_bases.length === 0) { + console.error("测试数据未初始化或不包含知识库"); + throw new Error("测试数据未初始化或不包含知识库"); + } + return testData.knowledge_bases[0].id; +} + +export async function uploadKnowledgeBase(data = {}) { + const kbId = await getKnowledgeBaseID(); + return postUpload(`/api/v1/knowledge-bases/${kbId}/knowledge/file`, data); +} + +export async function getKnowledgeBase({page, page_size}) { + const kbId = await getKnowledgeBaseID(); + return get( + `/api/v1/knowledge-bases/${kbId}/knowledge?page=${page}&page_size=${page_size}` + ); +} + +export function getKnowledgeDetails(id: any) { + return get(`/api/v1/knowledge/${id}`); +} + +export function delKnowledgeDetails(id: any) { + return del(`/api/v1/knowledge/${id}`); +} + +export function downKnowledgeDetails(id: any) { + return getDown(`/api/v1/knowledge/${id}/download`); +} + +export function batchQueryKnowledge(ids: any) { + return get(`/api/v1/knowledge/batch?${ids}`); +} + +export function getKnowledgeDetailsCon(id: any, page) { + return get(`/api/v1/chunks/${id}?page=${page}&page_size=25`); +} \ No newline at end of file diff --git a/frontend/src/api/test-data/index.ts b/frontend/src/api/test-data/index.ts new file mode 100644 index 0000000..c4d43ae --- /dev/null +++ b/frontend/src/api/test-data/index.ts @@ -0,0 +1,55 @@ +import { get, setTestData } from '../../utils/request'; + +export interface TestDataResponse { + success: boolean; + data: { + tenant: { + id: number; + name: string; + api_key: string; + }; + knowledge_bases: Array<{ + id: string; + name: string; + description: string; + }>; + } +} + +// 是否已加载测试数据 +let isTestDataLoaded = false; + +/** + * 加载测试数据 + * 在API调用前调用此函数以确保测试数据已加载 + * @returns Promise 是否成功加载 + */ +export async function loadTestData(): Promise { + // 如果已经加载过,直接返回 + if (isTestDataLoaded) { + return true; + } + + try { + console.log('开始加载测试数据...'); + const response = await get('/api/v1/test-data'); + console.log('测试数据', response); + + if (response && response.data) { + // 设置测试数据 + setTestData({ + tenant: response.data.tenant, + knowledge_bases: response.data.knowledge_bases + }); + isTestDataLoaded = true; + console.log('测试数据加载成功'); + return true; + } else { + console.warn('测试数据响应为空'); + return false; + } + } catch (error) { + console.error('加载测试数据失败:', error); + return false; + } +} diff --git a/frontend/src/assets/fonts.css b/frontend/src/assets/fonts.css new file mode 100644 index 0000000..3a52489 --- /dev/null +++ b/frontend/src/assets/fonts.css @@ -0,0 +1,6 @@ +@font-face { + font-family: 'TencentSans'; + src: url('fonts/TencentSans.ttf') format('truetype'); + font-weight: normal; + font-style: normal; +} \ No newline at end of file diff --git a/frontend/src/assets/fonts/TencentSans.ttf b/frontend/src/assets/fonts/TencentSans.ttf new file mode 100644 index 0000000..386dd20 Binary files /dev/null and b/frontend/src/assets/fonts/TencentSans.ttf differ diff --git a/frontend/src/assets/img/Frame3718.svg b/frontend/src/assets/img/Frame3718.svg new file mode 100644 index 0000000..16de2d9 --- /dev/null +++ b/frontend/src/assets/img/Frame3718.svg @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/frontend/src/assets/img/botanswer_loading.gif b/frontend/src/assets/img/botanswer_loading.gif new file mode 100644 index 0000000..3d1b9cb Binary files /dev/null and b/frontend/src/assets/img/botanswer_loading.gif differ diff --git a/frontend/src/assets/img/circle.png b/frontend/src/assets/img/circle.png new file mode 100644 index 0000000..042603f Binary files /dev/null and b/frontend/src/assets/img/circle.png differ diff --git a/frontend/src/assets/img/delete.svg b/frontend/src/assets/img/delete.svg new file mode 100644 index 0000000..fe00ae5 --- /dev/null +++ b/frontend/src/assets/img/delete.svg @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/assets/img/download.svg b/frontend/src/assets/img/download.svg new file mode 100644 index 0000000..20ce796 --- /dev/null +++ b/frontend/src/assets/img/download.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/frontend/src/assets/img/file-add-green.svg b/frontend/src/assets/img/file-add-green.svg new file mode 100644 index 0000000..6fe63f7 --- /dev/null +++ b/frontend/src/assets/img/file-add-green.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/frontend/src/assets/img/file-add-icon.svg b/frontend/src/assets/img/file-add-icon.svg new file mode 100644 index 0000000..2326060 --- /dev/null +++ b/frontend/src/assets/img/file-add-icon.svg @@ -0,0 +1,4 @@ + + + + diff --git a/frontend/src/assets/img/file-add.svg b/frontend/src/assets/img/file-add.svg new file mode 100644 index 0000000..c8b3456 --- /dev/null +++ b/frontend/src/assets/img/file-add.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/frontend/src/assets/img/more.png b/frontend/src/assets/img/more.png new file mode 100644 index 0000000..d6f3654 Binary files /dev/null and b/frontend/src/assets/img/more.png differ diff --git a/frontend/src/assets/img/prefixIcon-green.svg b/frontend/src/assets/img/prefixIcon-green.svg new file mode 100644 index 0000000..b83c674 --- /dev/null +++ b/frontend/src/assets/img/prefixIcon-green.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/frontend/src/assets/img/prefixIcon-grey.svg b/frontend/src/assets/img/prefixIcon-grey.svg new file mode 100644 index 0000000..af434f5 --- /dev/null +++ b/frontend/src/assets/img/prefixIcon-grey.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/frontend/src/assets/img/prefixIcon.svg b/frontend/src/assets/img/prefixIcon.svg new file mode 100644 index 0000000..e38907f --- /dev/null +++ b/frontend/src/assets/img/prefixIcon.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/frontend/src/assets/img/sending-aircraft.svg b/frontend/src/assets/img/sending-aircraft.svg new file mode 100644 index 0000000..077247c --- /dev/null +++ b/frontend/src/assets/img/sending-aircraft.svg @@ -0,0 +1,3 @@ + + + diff --git a/frontend/src/assets/img/setting-green.svg b/frontend/src/assets/img/setting-green.svg new file mode 100644 index 0000000..7148887 --- /dev/null +++ b/frontend/src/assets/img/setting-green.svg @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/assets/img/setting.svg b/frontend/src/assets/img/setting.svg new file mode 100644 index 0000000..e0f5b33 --- /dev/null +++ b/frontend/src/assets/img/setting.svg @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/assets/img/think.gif b/frontend/src/assets/img/think.gif new file mode 100644 index 0000000..02cadce Binary files /dev/null and b/frontend/src/assets/img/think.gif differ diff --git a/frontend/src/assets/img/upload-mask.svg b/frontend/src/assets/img/upload-mask.svg new file mode 100644 index 0000000..15c8bc1 --- /dev/null +++ b/frontend/src/assets/img/upload-mask.svg @@ -0,0 +1,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/frontend/src/assets/img/upload.svg b/frontend/src/assets/img/upload.svg new file mode 100644 index 0000000..2bb5f61 --- /dev/null +++ b/frontend/src/assets/img/upload.svg @@ -0,0 +1,100 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/frontend/src/assets/img/weknora.png b/frontend/src/assets/img/weknora.png new file mode 100644 index 0000000..23a2cc4 Binary files /dev/null and b/frontend/src/assets/img/weknora.png differ diff --git a/frontend/src/assets/img/zhishiku-green.svg b/frontend/src/assets/img/zhishiku-green.svg new file mode 100644 index 0000000..c61f850 --- /dev/null +++ b/frontend/src/assets/img/zhishiku-green.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/frontend/src/assets/img/zhishiku.svg b/frontend/src/assets/img/zhishiku.svg new file mode 100644 index 0000000..a590d03 --- /dev/null +++ b/frontend/src/assets/img/zhishiku.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/frontend/src/assets/img/ziliao.svg b/frontend/src/assets/img/ziliao.svg new file mode 100644 index 0000000..b1338f3 --- /dev/null +++ b/frontend/src/assets/img/ziliao.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/frontend/src/assets/theme/theme.css b/frontend/src/assets/theme/theme.css new file mode 100644 index 0000000..c032b32 --- /dev/null +++ b/frontend/src/assets/theme/theme.css @@ -0,0 +1,95 @@ +:root,:root[theme-mode="light"]{ --brand-main: var(--td-brand-color-4); --td-brand-color-light: var(--td-brand-color-1); --td-brand-color-focus: var(--td-brand-color-2); --td-brand-color-disabled: var(--td-brand-color-3); --td-brand-color-hover: var(--td-brand-color-3); --td-brand-color: var(--td-brand-color-4); --td-brand-color-active:var(--td-brand-color-5); --td-brand-color-1: #e9f8ec; --td-brand-color-2: #09f479; --td-brand-color-3: #08dd6e; --td-brand-color-4: #07c05f; --td-brand-color-5: #06b04d; --td-brand-color-6: #049b38; --td-brand-color-7: #038626; --td-brand-color-8: #027218; --td-brand-color-9: #015e0d; --td-brand-color-10: #004b05; --td-warning-color-1: #fef3e6;--td-warning-color-2: #f9e0c7;--td-warning-color-3: #f7c797;--td-warning-color-4: #f2995f;--td-warning-color-5: #ed7b2f;--td-warning-color-6: #d35a21;--td-warning-color-7: #ba431b;--td-warning-color-8: #9e3610;--td-warning-color-9: #842b0b;--td-warning-color-10: #5a1907;--td-warning-color: var(--td-warning-color-5);--td-warning-color-hover: var(--td-warning-color-4);--td-warning-color-focus: var(--td-warning-color-2);--td-warning-color-active: var(--td-warning-color-6);--td-warning-color-disabled: var(--td-warning-color-3);--td-warning-color-light: var(--td-warning-color-1); --td-error-color-1: #fdecee;--td-error-color-2: #f9d7d9;--td-error-color-3: #f8b9be;--td-error-color-4: #f78d94;--td-error-color-5: #f36d78;--td-error-color-6: #e34d59;--td-error-color-7: #c9353f;--td-error-color-8: #b11f26;--td-error-color-9: #951114;--td-error-color-10: #680506;--td-error-color: var(--td-error-color-6);--td-error-color-hover: var(--td-error-color-5);--td-error-color-focus: var(--td-error-color-2);--td-error-color-active: var(--td-error-color-7);--td-error-color-disabled: var(--td-error-color-3);--td-error-color-light: var(--td-error-color-1); --td-success-color-1: #e8f8f2;--td-success-color-2: #bcebdc;--td-success-color-3: #85dbbe;--td-success-color-4: #48c79c;--td-success-color-5: #00a870;--td-success-color-6: #078d5c;--td-success-color-7: #067945;--td-success-color-8: #056334;--td-success-color-9: #044f2a;--td-success-color-10: #033017;--td-success-color: var(--td-success-color-5);--td-success-color-hover: var(--td-success-color-4);--td-success-color-focus: var(--td-success-color-2);--td-success-color-active: var(--td-success-color-6);--td-success-color-disabled: var(--td-success-color-3);--td-success-color-light: var(--td-success-color-1); --td-gray-color-1: #f3f3f3;--td-gray-color-2: #eee;--td-gray-color-3: #e7e7e7;--td-gray-color-4: #dcdcdc;--td-gray-color-5: #c5c5c5;--td-gray-color-6: #a6a6a6;--td-gray-color-7: #8b8b8b;--td-gray-color-8: #777;--td-gray-color-9: #5e5e5e;--td-gray-color-10: #4b4b4b;--td-gray-color-11: #383838;--td-gray-color-12: #2c2c2c;--td-gray-color-13: #242424;--td-gray-color-14: #181818;--td-bg-color-container: #fff;--td-bg-color-container-select: #fff;--td-bg-color-page: var(--td-gray-color-2);--td-bg-color-container-hover: var(--td-gray-color-1);--td-bg-color-container-active: var(--td-gray-color-3);--td-bg-color-secondarycontainer: var(--td-gray-color-1);--td-bg-color-secondarycontainer-hover: var(--td-gray-color-2);--td-bg-color-secondarycontainer-active: var(--td-gray-color-4);--td-bg-color-component: var(--td-gray-color-3);--td-bg-color-component-hover: var(--td-gray-color-4);--td-bg-color-component-active: var(--td-gray-color-6);--td-bg-color-component-disabled: var(--td-gray-color-2);--td-component-stroke: var(--td-gray-color-3);--td-component-border: var(--td-gray-color-4); --td-font-white-1: #ffffff;--td-font-white-2: rgba(255, 255, 255, 0.55);--td-font-white-3: rgba(255, 255, 255, 0.35);--td-font-white-4: rgba(255, 255, 255, 0.22);--td-font-gray-1: rgba(0, 0, 0, 0.9);--td-font-gray-2: rgba(0, 0, 0, 0.6);--td-font-gray-3: rgba(0, 0, 0, 0.4);--td-font-gray-4: rgba(0, 0, 0, 0.26);--td-text-color-primary: var(--td-font-gray-1);--td-text-color-secondary: var(--td-font-gray-2);--td-text-color-placeholder: var(--td-font-gray-3);--td-text-color-disabled: var(--td-font-gray-4);--td-text-color-anti: #fff;--td-text-color-brand: var(--td-brand-color);--td-text-color-link: var(--td-brand-color); /* 字体配置 */ --td-font-family: PingFang SC, Microsoft YaHei, Arial Regular; --td-font-family-medium: PingFang SC, Microsoft YaHei, Arial Medium; --td-font-size-link-small: 12px; --td-font-size-link-medium: 14px; --td-font-size-link-large: 16px; --td-font-size-mark-small: 12px; --td-font-size-mark-medium: 14px; --td-font-size-body-small: 12px; --td-font-size-body-medium: 14px; --td-font-size-body-large: 16px; --td-font-size-title-small: 14px; --td-font-size-title-medium: 16px; --td-font-size-title-large: 20px; --td-font-size-headline-small: 24px; --td-font-size-headline-medium: 28px; --td-font-size-headline-large: 36px; --td-font-size-display-medium: 48px; --td-font-size-display-large: 64px; --td-line-height-common: 8px; --td-line-height-link-small: calc( var(--td-font-size-link-small) + var(--td-line-height-common) ); --td-line-height-link-medium: calc( var(--td-font-size-link-medium) + var(--td-line-height-common) ); --td-line-height-link-large: calc( var(--td-font-size-link-large) + var(--td-line-height-common) ); --td-line-height-mark-small: calc( var(--td-font-size-mark-small) + var(--td-line-height-common) ); --td-line-height-mark-medium: calc( var(--td-font-size-mark-medium) + var(--td-line-height-common) ); --td-line-height-body-small: calc( var(--td-font-size-body-small) + var(--td-line-height-common) ); --td-line-height-body-medium: calc( var(--td-font-size-body-medium) + var(--td-line-height-common) ); --td-line-height-body-large: calc( var(--td-font-size-body-large) + var(--td-line-height-common) ); --td-line-height-title-small: calc( var(--td-font-size-title-small) + var(--td-line-height-common) ); --td-line-height-title-medium: calc( var(--td-font-size-title-medium) + var(--td-line-height-common) ); --td-line-height-title-large: calc( var(--td-font-size-title-medium) + var(--td-line-height-common) ); --td-line-height-headline-small: calc( var(--td-font-size-headline-small) + var(--td-line-height-common) ); --td-line-height-headline-medium: calc( var(--td-font-size-headline-medium) + var(--td-line-height-common) ); --td-line-height-headline-large: calc( var(--td-font-size-headline-large) + var(--td-line-height-common) ); --td-line-height-display-medium: calc( var(--td-font-size-display-medium) + var(--td-line-height-common) ); --td-line-height-display-large: calc( var(--td-font-size-display-large) + var(--td-line-height-common) ); --td-font-link-small: var(--td-font-size-link-small) / var(--td-line-height-link-small) var(--td-font-family); --td-font-link-medium: var(--td-font-size-link-medium) / var(--td-line-height-link-medium) var(--td-font-family); --td-font-link-large: var(--td-font-size-link-large) / var(--td-line-height-link-large) var(--td-font-family); --td-font-mark-small: 600 var(--td-font-size-mark-small) / var(--td-line-height-mark-small) var(--td-font-family); --td-font-mark-medium: 600 var(--td-font-size-mark-medium) / var(--td-line-height-mark-medium) var(--td-font-family); --td-font-body-small: var(--td-font-size-body-small) / var(--td-line-height-body-small) var(--td-font-family); --td-font-body-medium: var(--td-font-size-body-medium) / var(--td-line-height-body-medium) var(--td-font-family); --td-font-body-large: var(--td-font-size-body-large) / var(--td-line-height-body-large) var(--td-font-family); --td-font-title-small: var(--td-font-size-title-small) / var(--td-line-height-title-small) var(--td-font-family); --td-font-title-medium: var(--td-font-size-title-medium) / var(--td-line-height-title-medium) var(--td-font-family); --td-font-title-large: var(--td-font-size-title-large) / var(--td-line-height-title-large) var(--td-font-family); --td-font-headline-small: var(--td-font-size-headline-small) / var(--td-line-height-headline-small) var(--td-font-family); --td-font-headline-medium: var(--td-font-size-headline-medium) / var(--td-line-height-headline-medium) var(--td-font-family); --td-font-headline-large: var(--td-font-size-headline-large) / var(--td-line-height-headline-large) var(--td-font-family); --td-font-display-medium: var(--td-font-size-display-medium) / var(--td-line-height-display-medium) var(--td-font-family); --td-font-display-large: var(--td-font-size-display-large) / var(--td-line-height-display-large) var(--td-font-family); /* 字体颜色 */ --td-text-color-primary: var(--td-font-gray-1); --td-text-color-secondary: var(--td-font-gray-2); --td-text-color-placeholder: var(--td-font-gray-3); --td-text-color-disabled: var(--td-font-gray-4); --td-text-color-anti: #fff; --td-text-color-brand: var(--td-brand-color); --td-text-color-link: var(--td-brand-color); /* end 字体配置 */ /* 圆角配置 */ --td-radius-small: 2px; --td-radius-default: 3px; --td-radius-medium: 6px; --td-radius-large: 9px; --td-radius-extraLarge: 12px; --td-radius-round: 999px; --td-radius-circle: 50%; /* end 圆角配置 */ /* 阴影配置 */ --td-shadow-1: 0px 1px 10px rgba(0, 0, 0, 0.05), 0px 4px 5px rgba(0, 0, 0, 0.08), 0px 2px 4px -1px rgba(0, 0, 0, 0.12); --td-shadow-2: 0px 3px 14px 2px rgba(0, 0, 0, 0.05), 0px 8px 10px 1px rgba(0, 0, 0, 0.06), 0px 5px 5px -3px rgba(0, 0, 0, 0.1); --td-shadow-3: 0px 6px 30px 5px rgba(0, 0, 0, 0.05), 0px 16px 24px 2px rgba(0, 0, 0, 0.04), 0px 8px 10px -5px rgba(0, 0, 0, 0.08); /* end 阴影配置 */ } +:root[theme-mode="dark"]{ + --brand-main: var(--td-brand-color-6); + --td-brand-color-light: var(--td-brand-color-1); + --td-brand-color-focus: var(--td-brand-color-2); + --td-brand-color-disabled: var(--td-brand-color-3); + --td-brand-color-hover: var(--td-brand-color-5); + --td-brand-color: var(--td-brand-color-6); + --td-brand-color-active:var(--td-brand-color-7); + --td-brand-color-1: #06b04d20; + --td-brand-color-2: #015e0d; + --td-brand-color-3: #027218; + --td-brand-color-4: #038626; + --td-brand-color-5: #049b38; + --td-brand-color-6: #06b04d; + --td-brand-color-7: #07c05f; + --td-brand-color-8: #08dd6e; + --td-brand-color-9: #09f479; + --td-brand-color-10: #a6fccf; + --td-warning-color-1: #4f2a1d; +--td-warning-color-2: #582f21; +--td-warning-color-3: #733c23; +--td-warning-color-4: #a75d2b; +--td-warning-color-5: #cf6e2d; +--td-warning-color-6: #dc7633; +--td-warning-color-7: #e8935c; +--td-warning-color-8: #ecbf91; +--td-warning-color-9: #eed7bf; +--td-warning-color-10: #f3e9dc; --td-error-color-1: #472324; +--td-error-color-2: #5e2a2d; +--td-error-color-3: #703439; +--td-error-color-4: #83383e; +--td-error-color-5: #a03f46; +--td-error-color-6: #c64751; +--td-error-color-7: #de6670; +--td-error-color-8: #ec888e; +--td-error-color-9: #edb1b6; +--td-error-color-10: #eeced0; --td-success-color-1: #193a2a; +--td-success-color-2: #1a4230; +--td-success-color-3: #17533d; +--td-success-color-4: #0d7a55; +--td-success-color-5: #059465; +--td-success-color-6: #43af8a; +--td-success-color-7: #46bf96; +--td-success-color-8: #80d2b6; +--td-success-color-9: #b4e1d3; +--td-success-color-10: #deede8; --td-gray-color-1: #f3f3f3; +--td-gray-color-2: #eee; +--td-gray-color-3: #e7e7e7; +--td-gray-color-4: #dcdcdc; +--td-gray-color-5: #c5c5c5; +--td-gray-color-6: #a6a6a6; +--td-gray-color-7: #8b8b8b; +--td-gray-color-8: #777; +--td-gray-color-9: #5e5e5e; +--td-gray-color-10: #4b4b4b; +--td-gray-color-11: #383838; +--td-gray-color-12: #2c2c2c; +--td-gray-color-13: #242424; +--td-gray-color-14: #181818; +--td-bg-color-page: var(--td-gray-color-14); +--td-bg-color-container: var(--td-gray-color-13); +--td-bg-color-container-hover: var(--td-gray-color-12); +--td-bg-color-container-active: var(--td-gray-color-10); +--td-bg-color-container-select: var(--td-gray-color-9); +--td-bg-color-secondarycontainer: var(--td-gray-color-12); +--td-bg-color-secondarycontainer-hover: var(--td-gray-color-11); +--td-bg-color-secondarycontainer-active: var(--td-gray-color-9); +--td-bg-color-component: var(--td-gray-color-11); +--td-bg-color-component-hover: var(--td-gray-color-10); +--td-bg-color-component-active: var(--td-gray-color-9); +--td-bg-color-component-disabled: var(--td-gray-color-12); +--td-component-stroke: var(--td-gray-color-11); +--td-component-border: var(--td-gray-color-9); --td-font-white-1: rgba(255, 255, 255, 0.9); +--td-font-white-2: rgba(255, 255, 255, 0.55); +--td-font-white-3: rgba(255, 255, 255, 0.35); +--td-font-white-4: rgba(255, 255, 255, 0.22); +--td-font-gray-1: rgba(0, 0, 0, 0.9); +--td-font-gray-2: rgba(0, 0, 0, 0.6); +--td-font-gray-3: rgba(0, 0, 0, 0.4); +--td-font-gray-4: rgba(0, 0, 0, 0.26); +--td-text-color-primary: var(--td-font-white-1); +--td-text-color-secondary: var(--td-font-white-2); +--td-text-color-placeholder: var(--td-font-white-3); +--td-text-color-disabled: var(--td-font-white-4); +--td-text-color-anti: #fff; +--td-text-color-brand: var(--td-brand-color); +--td-text-color-link: var(--td-brand-color); + + /* 字体配置 */ --td-font-family: PingFang SC, Microsoft YaHei, Arial Regular; --td-font-family-medium: PingFang SC, Microsoft YaHei, Arial Medium; --td-font-size-link-small: 12px; --td-font-size-link-medium: 14px; --td-font-size-link-large: 16px; --td-font-size-mark-small: 12px; --td-font-size-mark-medium: 14px; --td-font-size-body-small: 12px; --td-font-size-body-medium: 14px; --td-font-size-body-large: 16px; --td-font-size-title-small: 14px; --td-font-size-title-medium: 16px; --td-font-size-title-large: 20px; --td-font-size-headline-small: 24px; --td-font-size-headline-medium: 28px; --td-font-size-headline-large: 36px; --td-font-size-display-medium: 48px; --td-font-size-display-large: 64px; --td-line-height-common: 8px; --td-line-height-link-small: calc( var(--td-font-size-link-small) + var(--td-line-height-common) ); --td-line-height-link-medium: calc( var(--td-font-size-link-medium) + var(--td-line-height-common) ); --td-line-height-link-large: calc( var(--td-font-size-link-large) + var(--td-line-height-common) ); --td-line-height-mark-small: calc( var(--td-font-size-mark-small) + var(--td-line-height-common) ); --td-line-height-mark-medium: calc( var(--td-font-size-mark-medium) + var(--td-line-height-common) ); --td-line-height-body-small: calc( var(--td-font-size-body-small) + var(--td-line-height-common) ); --td-line-height-body-medium: calc( var(--td-font-size-body-medium) + var(--td-line-height-common) ); --td-line-height-body-large: calc( var(--td-font-size-body-large) + var(--td-line-height-common) ); --td-line-height-title-small: calc( var(--td-font-size-title-small) + var(--td-line-height-common) ); --td-line-height-title-medium: calc( var(--td-font-size-title-medium) + var(--td-line-height-common) ); --td-line-height-title-large: calc( var(--td-font-size-title-medium) + var(--td-line-height-common) ); --td-line-height-headline-small: calc( var(--td-font-size-headline-small) + var(--td-line-height-common) ); --td-line-height-headline-medium: calc( var(--td-font-size-headline-medium) + var(--td-line-height-common) ); --td-line-height-headline-large: calc( var(--td-font-size-headline-large) + var(--td-line-height-common) ); --td-line-height-display-medium: calc( var(--td-font-size-display-medium) + var(--td-line-height-common) ); --td-line-height-display-large: calc( var(--td-font-size-display-large) + var(--td-line-height-common) ); --td-font-link-small: var(--td-font-size-link-small) / var(--td-line-height-link-small) var(--td-font-family); --td-font-link-medium: var(--td-font-size-link-medium) / var(--td-line-height-link-medium) var(--td-font-family); --td-font-link-large: var(--td-font-size-link-large) / var(--td-line-height-link-large) var(--td-font-family); --td-font-mark-small: 600 var(--td-font-size-mark-small) / var(--td-line-height-mark-small) var(--td-font-family); --td-font-mark-medium: 600 var(--td-font-size-mark-medium) / var(--td-line-height-mark-medium) var(--td-font-family); --td-font-body-small: var(--td-font-size-body-small) / var(--td-line-height-body-small) var(--td-font-family); --td-font-body-medium: var(--td-font-size-body-medium) / var(--td-line-height-body-medium) var(--td-font-family); --td-font-body-large: var(--td-font-size-body-large) / var(--td-line-height-body-large) var(--td-font-family); --td-font-title-small: var(--td-font-size-title-small) / var(--td-line-height-title-small) var(--td-font-family); --td-font-title-medium: var(--td-font-size-title-medium) / var(--td-line-height-title-medium) var(--td-font-family); --td-font-title-large: var(--td-font-size-title-large) / var(--td-line-height-title-large) var(--td-font-family); --td-font-headline-small: var(--td-font-size-headline-small) / var(--td-line-height-headline-small) var(--td-font-family); --td-font-headline-medium: var(--td-font-size-headline-medium) / var(--td-line-height-headline-medium) var(--td-font-family); --td-font-headline-large: var(--td-font-size-headline-large) / var(--td-line-height-headline-large) var(--td-font-family); --td-font-display-medium: var(--td-font-size-display-medium) / var(--td-line-height-display-medium) var(--td-font-family); --td-font-display-large: var(--td-font-size-display-large) / var(--td-line-height-display-large) var(--td-font-family); /* 字体颜色 */ --td-text-color-primary: var(--td-font-gray-1); --td-text-color-secondary: var(--td-font-gray-2); --td-text-color-placeholder: var(--td-font-gray-3); --td-text-color-disabled: var(--td-font-gray-4); --td-text-color-anti: #fff; --td-text-color-brand: var(--td-brand-color); --td-text-color-link: var(--td-brand-color); /* end 字体配置 */ + + /* 圆角配置 */ --td-radius-small: 2px; --td-radius-default: 3px; --td-radius-medium: 6px; --td-radius-large: 9px; --td-radius-extraLarge: 12px; --td-radius-round: 999px; --td-radius-circle: 50%; /* end 圆角配置 */ + + /* 阴影配置 */ --td-shadow-1: 0px 1px 10px rgba(0, 0, 0, 0.05), 0px 4px 5px rgba(0, 0, 0, 0.08), 0px 2px 4px -1px rgba(0, 0, 0, 0.12); --td-shadow-2: 0px 3px 14px 2px rgba(0, 0, 0, 0.05), 0px 8px 10px 1px rgba(0, 0, 0, 0.06), 0px 5px 5px -3px rgba(0, 0, 0, 0.1); --td-shadow-3: 0px 6px 30px 5px rgba(0, 0, 0, 0.05), 0px 16px 24px 2px rgba(0, 0, 0, 0.04), 0px 8px 10px -5px rgba(0, 0, 0, 0.08); /* end 阴影配置 */ + } \ No newline at end of file diff --git a/frontend/src/components/Input-field.vue b/frontend/src/components/Input-field.vue new file mode 100644 index 0000000..5d45e14 --- /dev/null +++ b/frontend/src/components/Input-field.vue @@ -0,0 +1,135 @@ + + + diff --git a/frontend/src/components/css/markdown.less b/frontend/src/components/css/markdown.less new file mode 100644 index 0000000..744b851 --- /dev/null +++ b/frontend/src/components/css/markdown.less @@ -0,0 +1,426 @@ +:deep(.md-content) { + box-sizing: border-box !important; + + img { + max-width: 444px; + cursor: pointer; + } + + h1, + h2, + h3, + h4, + h5, + h6 { + margin-top: 5px; + font-weight: bold; + color: #00000099; + font-family: "PingFang SC", "Cascadia Code"; + transition: all 0.2s ease-out; + font-size: 20px; + } + + .hljs-title, + .hljs-title.class_, + .hljs-title.class_.inherited__, + .hljs-title.function_ { + white-space: pre-wrap; + word-break: break-all; + } + + .proto { + word-break: break-all; + white-space: pre-wrap; + } + + h1 tt, + h1 code { + font-size: inherit !important; + } + + h2 tt, + h2 code { + font-size: inherit !important; + } + + h3 tt, + h3 code { + font-size: inherit !important; + } + + h4 tt, + h4 code { + font-size: inherit !important; + } + + h5 tt, + h5 code { + font-size: inherit !important; + } + + h6 tt, + h6 code { + font-size: inherit !important; + } + + h2 a, + h3 a { + color: #34495e; + } + + p, + blockquote, + ul, + ol, + dl, + table { + font-size: 14px; + margin: 10px 0; + font-family: "PingFang SC", "Cascadia Code"; + } + + h2 { + font-size: 18px; + } + + h3 { + font-size: 16px; + font-weight: 500; + } + + summary { + font-size: 14px; + cursor: pointer; + } + + li>ol, + li>ul { + margin: 0 0; + } + + hr { + padding: 0; + margin: 32px 0; + border-top: 0.5rem dotted #0590ff57; + overflow: hidden; + box-sizing: content-box; + } + + body>h2:first-child { + margin-top: 0; + padding-top: 0; + } + + body>h1:first-child { + margin-top: 0; + padding-top: 0; + } + + body>h1:first-child+h2 { + margin-top: 0; + padding-top: 0; + } + + body>h3:first-child, + body>h4:first-child, + body>h5:first-child, + body>h6:first-child { + margin-top: 0; + padding-top: 0; + } + + a:first-child h1, + a:first-child h2, + a:first-child h3, + a:first-child h4, + a:first-child h5, + a:first-child h6 { + margin-top: 0; + padding-top: 0; + } + + p { + margin: 0; + } + + code { + white-space: pre-wrap; + word-break: break-all; + } + + h1 p, + h2 p, + h3 p, + h4 p, + h5 p, + h6 p { + margin-top: 0; + } + + li p.first { + display: inline-block; + } + + ul, + ol { + padding-left: 30px; + } + + ul:first-child, + ol:first-child { + margin-top: 0; + } + + ul:last-child, + ol:last-child { + margin-bottom: 0; + } + + blockquote { + padding: 0.8em 1.4rem; + margin: 1em 0; + font-weight: 400; + border-left: 4px solid #2196f3; + background-color: #2196f321; + border-radius: 0px 8px 8px 0px; + box-shadow: rgb(149 149 149 / 13%) 0px 5px 10px; + } + + table { + padding: 0; + word-break: initial; + /* border-radius: 4px; */ + border-collapse: collapse; + border-spacing: 0; + width: 100%; + } + + table tr { + border-top: 1px solid #2196f31f; + margin: 0; + padding: 0; + } + + table tr:nth-child(2n), + thead { + background-color: #fafafa; + } + + table tr th { + font-weight: bold; + border: 1px solid #9b9b9b3b; + border-bottom: 0; + text-align: left; + margin: 0; + padding: 6px 13px; + } + + table tr td { + border: 1px solid #9b9b9b3b; + text-align: left; + margin: 0; + padding: 6px 13px; + } + + table tr th:first-child, + table tr td:first-child { + margin-top: 0; + } + + table tr th:last-child, + table tr td:last-child { + margin-bottom: 0; + } + + + tt { + margin: 0 2px; + } + + figure { + border-radius: 8px; + margin-left: 0; + margin-right: 0; + background: #fff; + } + + + + .md-task-list-item>input { + margin-left: -1.3em; + } + + @media print { + html { + font-size: 13px; + } + + table, + pre { + page-break-inside: avoid; + } + + pre { + word-wrap: break-word; + } + } + + .md-fences { + background-color: #f8f8f8; + } + + .md-diagram-panel { + position: static !important; + } + + + .mathjax-block>.code-tooltip { + bottom: 0.375rem; + } + + h3.md-focus:before, + h4.md-focus:before, + h5.md-focus:before, + h6.md-focus:before { + border: 0px; + position: unset; + padding: 0px; + font-size: unset; + line-height: unset; + float: unset; + } + + .md-image>.md-meta { + border-radius: 3px; + font-family: var(--font-monospace); + padding: 2px 0 0 4px; + font-size: 0.9em; + color: inherit; + } + + .md-tag { + color: inherit; + } + + .md-toc { + margin-top: 20px; + padding-bottom: 20px; + } + + .sidebar-tabs { + border-bottom: none; + } + + + /** focus mode */ + + .on-focus-mode blockquote { + border-left-color: rgba(85, 85, 85, 0.12); + } + + header, + .context-menu, + .megamenu-content, + footer { + font-family: var(--font-sans-serif); + } + + .file-node-content:hover .file-node-icon, + .file-node-content:hover .file-node-open-state { + visibility: visible; + } + + .mac-seamless-mode #typora-sidebar { + background-color: var(--side-bar-bg-color); + } + + .md-lang { + color: #b4654d; + } + + .html-for-mac .context-menu { + --item-hover-bg-color: #e6f0fe; + } + + .pin-outline #outline-content .outline-active strong, + .pin-outline .outline-active { + color: #2196f3; + } + + .code-tooltip { + border-radius: 4px; + border: 1px solid #ededed; + background-color: #f8f8f8; + } + + .cm-s-inner .cm-comment, + .cm-s-inner.cm-comment { + color: #57a64a; + font-style: italic; + /* font-family: 'PingFang'; */ + } + + h1.md-end-block.md-heading:after, + h2.md-end-block.md-heading:after, + h3.md-end-block.md-heading:after, + h4.md-end-block.md-heading:after, + h5.md-end-block.md-heading:after, + h6.md-end-block.md-heading:after { + color: #bfbfbf !important; + border: 1px solid; + border-radius: 4px; + position: absolute; + left: -2.5rem; + float: left; + font-size: 14px; + padding-left: 4px; + padding-right: 5px; + vertical-align: bottom; + font-weight: 400; + line-height: normal; + opacity: 0; + } + + h1.md-end-block.md-heading:hover:after, + h2.md-end-block.md-heading:hover:after, + h3.md-end-block.md-heading:hover:after, + h4.md-end-block.md-heading:hover:after, + h5.md-end-block.md-heading:hover:after, + h6.md-end-block.md-heading:hover:after { + opacity: 1; + } + + h1.md-end-block.md-heading:hover:after { + content: "h1"; + top: 1.1rem; + } + + h2.md-end-block.md-heading:hover:after { + content: "h2"; + top: 0.63rem; + } + + h3.md-end-block.md-heading:hover:after { + content: "h3"; + top: 0.55rem; + } + + h4.md-end-block.md-heading:hover:after { + content: "h4"; + top: 0.3rem; + } + + h5.md-end-block.md-heading:hover:after { + content: "h5"; + top: 0.18rem; + } + + h6.md-end-block.md-heading:hover:after { + content: "h6"; + top: 0.16rem; + } + + .outline-label { + font-family: "Cascadia Code", "PingFang SC"; + } +} \ No newline at end of file diff --git a/frontend/src/components/doc-content.vue b/frontend/src/components/doc-content.vue new file mode 100644 index 0000000..5c73e35 --- /dev/null +++ b/frontend/src/components/doc-content.vue @@ -0,0 +1,242 @@ + + + diff --git a/frontend/src/components/empty-knowledge.vue b/frontend/src/components/empty-knowledge.vue new file mode 100644 index 0000000..1310c88 --- /dev/null +++ b/frontend/src/components/empty-knowledge.vue @@ -0,0 +1,42 @@ + + + diff --git a/frontend/src/components/menu.vue b/frontend/src/components/menu.vue new file mode 100644 index 0000000..b656c04 --- /dev/null +++ b/frontend/src/components/menu.vue @@ -0,0 +1,453 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/components/picture-preview.vue b/frontend/src/components/picture-preview.vue new file mode 100644 index 0000000..0f69c98 --- /dev/null +++ b/frontend/src/components/picture-preview.vue @@ -0,0 +1,18 @@ + + + diff --git a/frontend/src/components/upload-mask.vue b/frontend/src/components/upload-mask.vue new file mode 100644 index 0000000..a718120 --- /dev/null +++ b/frontend/src/components/upload-mask.vue @@ -0,0 +1,40 @@ + + + diff --git a/frontend/src/hooks/useKnowledgeBase.ts b/frontend/src/hooks/useKnowledgeBase.ts new file mode 100644 index 0000000..08ce546 --- /dev/null +++ b/frontend/src/hooks/useKnowledgeBase.ts @@ -0,0 +1,145 @@ +import { ref, reactive, onMounted } from "vue"; +import { storeToRefs } from "pinia"; +import { formatStringDate, kbFileTypeVerification } from "../utils/index"; +import { MessagePlugin } from "tdesign-vue-next"; +import { + uploadKnowledgeBase, + getKnowledgeBase, + getKnowledgeDetails, + delKnowledgeDetails, + getKnowledgeDetailsCon, +} from "@/api/knowledge-base/index"; +import { knowledgeStore } from "@/stores/knowledge"; +const usemenuStore = knowledgeStore(); +export default function () { + const { cardList, total } = storeToRefs(usemenuStore); + let moreIndex = ref(-1); + const details = reactive({ + title: "", + time: "", + md: [], + id: "", + total: 0 + }); + const getKnowled = (query = { page: 1, page_size: 35 }) => { + getKnowledgeBase(query) + .then((result: object) => { + let { data, total: totalResult } = result; + let cardList_ = data.map((item) => { + item["file_name"] = item.file_name.substring( + 0, + item.file_name.lastIndexOf(".") + ); + return { + ...item, + updated_at: formatStringDate(new Date(item.updated_at)), + isMore: false, + file_type: item.file_type.toLocaleUpperCase(), + }; + }); + if (query.page == 1) { + cardList.value = cardList_; + } else { + cardList.value.push(...cardList_); + } + total.value = totalResult; + }) + .catch((err) => {}); + }; + const delKnowledge = (index: number, item) => { + cardList.value[index].isMore = false; + moreIndex.value = -1; + delKnowledgeDetails(item.id) + .then((result) => { + if (result.success) { + MessagePlugin.info("知识删除成功!"); + getKnowled(); + } else { + MessagePlugin.error("知识删除失败!"); + } + }) + .catch((err) => { + MessagePlugin.error("知识删除失败!"); + }); + }; + const openMore = (index: number) => { + moreIndex.value = index; + }; + const onVisibleChange = (visible: boolean) => { + if (!visible) { + moreIndex.value = -1; + } + }; + const requestMethod = (file: any, uploadInput) => { + if (file instanceof File && uploadInput) { + if (kbFileTypeVerification(file)) { + return; + } + uploadKnowledgeBase({ file }) + .then((result) => { + if (result.success) { + MessagePlugin.info("上传成功!"); + getKnowled(); + } else { + MessagePlugin.error("上传失败!"); + } + uploadInput.value.value = ""; + }) + .catch((err) => { + MessagePlugin.error("上传失败!"); + uploadInput.value.value = ""; + }); + } else { + MessagePlugin.error("file文件类型错误!"); + } + }; + const getCardDetails = (item) => { + Object.assign(details, { + title: "", + time: "", + md: [], + id: "", + }); + getKnowledgeDetails(item.id) + .then((result) => { + if (result.success && result.data) { + let { data } = result; + Object.assign(details, { + title: data.file_name, + time: formatStringDate(new Date(data.updated_at)), + id: data.id, + }); + } + }) + .catch((err) => {}); + getfDetails(item.id, 1); + }; + const getfDetails = (id, page) => { + getKnowledgeDetailsCon(id, page) + .then((result) => { + if (result.success && result.data) { + let { data, total: totalResult } = result; + if (page == 1) { + details.md = data; + } else { + details.md.push(...data); + } + details.total = totalResult; + } + }) + .catch((err) => {}); + }; + return { + cardList, + moreIndex, + getKnowled, + details, + delKnowledge, + openMore, + onVisibleChange, + requestMethod, + getCardDetails, + total, + getfDetails, + }; +} diff --git a/frontend/src/main.ts b/frontend/src/main.ts new file mode 100644 index 0000000..0628e01 --- /dev/null +++ b/frontend/src/main.ts @@ -0,0 +1,16 @@ +import { createApp } from "vue"; +import { createPinia } from "pinia"; +import App from "./App.vue"; +import router from "./router"; +import "./assets/fonts.css"; +import TDesign from "tdesign-vue-next"; +// 引入组件库的少量全局样式变量 +import "tdesign-vue-next/es/style/index.css"; +import "@/assets/theme/theme.css"; +const app = createApp(App); + +app.use(TDesign); +app.use(createPinia()); +app.use(router); + +app.mount("#app"); diff --git a/frontend/src/router/index.ts b/frontend/src/router/index.ts new file mode 100644 index 0000000..2e37e21 --- /dev/null +++ b/frontend/src/router/index.ts @@ -0,0 +1,46 @@ +import { createRouter, createWebHistory } from 'vue-router' + +const router = createRouter({ + history: createWebHistory(import.meta.env.BASE_URL), + routes: [ + { + path: "/", + redirect: "/platform", + }, + { + path: "/knowledgeBase", + name: "home", + component: () => import("../views/knowledge/KnowledgeBase.vue"), + }, + { + path: "/platform", + name: "Platform", + redirect: "/platform/knowledgeBase", + component: () => import("../views/platform/index.vue"), + children: [ + { + path: "knowledgeBase", + name: "knowledgeBase", + component: () => import("../views/knowledge/KnowledgeBase.vue"), + }, + { + path: "creatChat", + name: "creatChat", + component: () => import("../views/creatChat/creatChat.vue"), + }, + { + path: "chat/:chatid", + name: "chat", + component: () => import("../views/chat/index.vue"), + }, + { + path: "settings", + name: "settings", + component: () => import("../views/settings/Settings.vue"), + }, + ], + }, + ], +}); + +export default router diff --git a/frontend/src/stores/knowledge.ts b/frontend/src/stores/knowledge.ts new file mode 100644 index 0000000..98a7ed2 --- /dev/null +++ b/frontend/src/stores/knowledge.ts @@ -0,0 +1,11 @@ +import { ref, computed, reactive } from "vue"; + +import { defineStore } from "pinia"; + +export const knowledgeStore = defineStore("knowledge", { + state: () => ({ + cardList: ref([]), + total: ref(0), + }), + actions: {}, +}); diff --git a/frontend/src/stores/menu.ts b/frontend/src/stores/menu.ts new file mode 100644 index 0000000..3823ac2 --- /dev/null +++ b/frontend/src/stores/menu.ts @@ -0,0 +1,46 @@ +import { ref, computed, reactive } from 'vue' + +import { defineStore } from 'pinia'; + +export const useMenuStore = defineStore('menuStore', { + state: () => ({ + menuArr: reactive([ + { title: '知识库', icon: 'zhishiku', path: 'knowledgeBase' }, + { + title: '对话', + icon: 'prefixIcon', + path: 'creatChat', + childrenPath: 'chat', + children: reactive([]), + }, + { title: '系统设置', icon: 'setting', path: 'settings' } + ]), + isFirstSession: false, + firstQuery: '' + } + ), + actions: { + updatemenuArr(obj: any) { + this.menuArr[1].children?.push(obj); + }, + updataMenuChildren(item: object) { + this.menuArr[1].children?.unshift(item) + }, + updatasessionTitle(session_id: string, title: string) { + this.menuArr[1].children?.forEach(item => { + if (item.id == session_id) { + item.title = title; + item.isNoTitle = false; + } + }); + }, + changeIsFirstSession(payload: boolean) { + this.isFirstSession = payload; + }, + changeFirstQuery(payload: string) { + this.firstQuery = payload; + } + } +}); + + diff --git a/frontend/src/stores/settings.ts b/frontend/src/stores/settings.ts new file mode 100644 index 0000000..b22a8b4 --- /dev/null +++ b/frontend/src/stores/settings.ts @@ -0,0 +1,51 @@ +import { defineStore } from "pinia"; + +// 定义设置接口 +interface Settings { + endpoint: string; + apiKey: string; + knowledgeBaseId: string; +} + +// 默认设置 +const defaultSettings: Settings = { + endpoint: import.meta.env.VITE_IS_DOCKER ? "" : "http://localhost:8080", + apiKey: "", + knowledgeBaseId: "", +}; + +export const useSettingsStore = defineStore("settings", { + state: () => ({ + // 从本地存储加载设置,如果没有则使用默认设置 + settings: JSON.parse(localStorage.getItem("WeKnora_settings") || JSON.stringify(defaultSettings)), + }), + + actions: { + // 保存设置 + saveSettings(settings: Settings) { + this.settings = { ...settings }; + // 保存到localStorage + localStorage.setItem("WeKnora_settings", JSON.stringify(this.settings)); + }, + + // 获取设置 + getSettings(): Settings { + return this.settings; + }, + + // 获取API端点 + getEndpoint(): string { + return this.settings.endpoint || defaultSettings.endpoint; + }, + + // 获取API Key + getApiKey(): string { + return this.settings.apiKey; + }, + + // 获取知识库ID + getKnowledgeBaseId(): string { + return this.settings.knowledgeBaseId; + }, + }, +}); \ No newline at end of file diff --git a/frontend/src/utils/index.ts b/frontend/src/utils/index.ts new file mode 100644 index 0000000..65e8989 --- /dev/null +++ b/frontend/src/utils/index.ts @@ -0,0 +1,44 @@ +import { MessagePlugin } from "tdesign-vue-next"; +export function generateRandomString(length: number) { + let result = ""; + const characters = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + const charactersLength = characters.length; + for (let i = 0; i < length; i++) { + result += characters.charAt(Math.floor(Math.random() * charactersLength)); + } + return result; +} + +export function formatStringDate(date) { + let data = new Date(date); + let year = data.getFullYear(); + let month = data.getMonth() + 1; + let day = data.getDate(); + let hour = data.getHours(); + let minute = data.getMinutes(); + let second = data.getSeconds(); + return ( + year + "-" + month + "-" + day + " " + hour + ":" + minute + ":" + second + ); +} +export function kbFileTypeVerification(file) { + let validTypes = ["pdf", "txt", "md", "docx", "doc", "jpg", "jpeg", "png"]; + let type = file.name.substring(file.name.lastIndexOf(".") + 1); + if (!validTypes.includes(type)) { + MessagePlugin.error("文件类型错误!"); + return true; + } + if ( + (type == "pdf" || type == "docx" || type == "doc") && + file.size > 31457280 + ) { + MessagePlugin.error("pdf/doc文件不能超过30M!"); + return true; + } + if ((type == "txt" || type == "md") && file.size > 31457280) { + MessagePlugin.error("txt/md文件不能超过30M!"); + return true; + } + return false +} diff --git a/frontend/src/utils/request.ts b/frontend/src/utils/request.ts new file mode 100644 index 0000000..16c7e12 --- /dev/null +++ b/frontend/src/utils/request.ts @@ -0,0 +1,146 @@ +// src/utils/request.js +import axios from "axios"; +import { generateRandomString } from "./index"; + +// 从localStorage获取设置 +function getSettings() { + const settingsStr = localStorage.getItem("WeKnora_settings"); + if (settingsStr) { + try { + return JSON.parse(settingsStr); + } catch (e) { + console.error("解析设置失败:", e); + } + } + return { + endpoint: import.meta.env.VITE_IS_DOCKER ? "" : "http://localhost:8080", + apiKey: "", + knowledgeBaseId: "", + }; +} + +// API基础URL,优先使用设置中的endpoint +const settings = getSettings(); +const BASE_URL = settings.endpoint; + +// 测试数据 +let testData: { + tenant: { + id: number; + name: string; + api_key: string; + }; + knowledge_bases: Array<{ + id: string; + name: string; + description: string; + }>; +} | null = null; + +// 创建Axios实例 +const instance = axios.create({ + baseURL: BASE_URL, // 使用配置的API基础URL + timeout: 30000, // 请求超时时间 + headers: { + "Content-Type": "application/json", + "X-Request-ID": `${generateRandomString(12)}`, + }, +}); + +// 设置测试数据 +export function setTestData(data: typeof testData) { + testData = data; + if (data) { + // 优先使用设置中的ApiKey,如果没有则使用测试数据中的 + const apiKey = settings.apiKey || (data?.tenant?.api_key || ""); + if (apiKey) { + instance.defaults.headers["X-API-Key"] = apiKey; + } + } +} + +// 获取测试数据 +export function getTestData() { + return testData; +} + +instance.interceptors.request.use( + (config) => { + // 每次请求前检查是否有更新的设置 + const currentSettings = getSettings(); + + // 更新BaseURL (如果有变化) + if (currentSettings.endpoint && config.baseURL !== currentSettings.endpoint) { + config.baseURL = currentSettings.endpoint; + } + + // 更新API Key (如果有) + if (currentSettings.apiKey) { + config.headers["X-API-Key"] = currentSettings.apiKey; + } + + config.headers["X-Request-ID"] = `${generateRandomString(12)}`; + return config; + }, + (error) => {} +); + +instance.interceptors.response.use( + (response) => { + // 根据业务状态码处理逻辑 + const { status, data } = response; + if (status === 200 || status === 201) { + return data; + } else { + return Promise.reject(data); + } + }, + (error: any) => { + if (!error.response) { + return Promise.reject({ message: "网络错误,请检查您的网络连接" }); + } + const { data } = error.response; + return Promise.reject(data); + } +); + +export function get(url: string) { + return instance.get(url); +} + +export async function getDown(url: string) { + let res = await instance.get(url, { + responseType: "blob", + }); + return res +} + +export function postUpload(url: string, data = {}) { + return instance.post(url, data, { + headers: { + "Content-Type": "multipart/form-data", + "X-Request-ID": `${generateRandomString(12)}`, + }, + }); +} + +export function postChat(url: string, data = {}) { + return instance.post(url, data, { + headers: { + "Content-Type": "text/event-stream;charset=utf-8", + "X-Request-ID": `${generateRandomString(12)}`, + }, + }); +} + +export function post(url: string, data = {}) { + return instance.post(url, data); +} + +export function put(url: string, data = {}) { + return instance.put(url, data); +} + +export function del(url: string) { + return instance.delete(url); +} diff --git a/frontend/src/views/chat/components/botmsg.vue b/frontend/src/views/chat/components/botmsg.vue new file mode 100644 index 0000000..54badc8 --- /dev/null +++ b/frontend/src/views/chat/components/botmsg.vue @@ -0,0 +1,246 @@ + + + \ No newline at end of file diff --git a/frontend/src/views/chat/components/deepThink.vue b/frontend/src/views/chat/components/deepThink.vue new file mode 100644 index 0000000..1de12c3 --- /dev/null +++ b/frontend/src/views/chat/components/deepThink.vue @@ -0,0 +1,138 @@ + + + \ No newline at end of file diff --git a/frontend/src/views/chat/components/docInfo.vue b/frontend/src/views/chat/components/docInfo.vue new file mode 100644 index 0000000..d953859 --- /dev/null +++ b/frontend/src/views/chat/components/docInfo.vue @@ -0,0 +1,129 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/views/chat/components/sendMsg.vue b/frontend/src/views/chat/components/sendMsg.vue new file mode 100644 index 0000000..ab9a8a0 --- /dev/null +++ b/frontend/src/views/chat/components/sendMsg.vue @@ -0,0 +1,20 @@ + + + \ No newline at end of file diff --git a/frontend/src/views/chat/components/usermsg.vue b/frontend/src/views/chat/components/usermsg.vue new file mode 100644 index 0000000..fed5d6b --- /dev/null +++ b/frontend/src/views/chat/components/usermsg.vue @@ -0,0 +1,38 @@ + + + \ No newline at end of file diff --git a/frontend/src/views/chat/index.vue b/frontend/src/views/chat/index.vue new file mode 100644 index 0000000..dc897f7 --- /dev/null +++ b/frontend/src/views/chat/index.vue @@ -0,0 +1,303 @@ + + + \ No newline at end of file diff --git a/frontend/src/views/creatChat/creatChat.vue b/frontend/src/views/creatChat/creatChat.vue new file mode 100644 index 0000000..f072089 --- /dev/null +++ b/frontend/src/views/creatChat/creatChat.vue @@ -0,0 +1,188 @@ + + + + \ No newline at end of file diff --git a/frontend/src/views/knowledge/KnowledgeBase.vue b/frontend/src/views/knowledge/KnowledgeBase.vue new file mode 100644 index 0000000..1278b71 --- /dev/null +++ b/frontend/src/views/knowledge/KnowledgeBase.vue @@ -0,0 +1,574 @@ + + + + + diff --git a/frontend/src/views/platform/index.vue b/frontend/src/views/platform/index.vue new file mode 100644 index 0000000..aa7b06e --- /dev/null +++ b/frontend/src/views/platform/index.vue @@ -0,0 +1,76 @@ + + + \ No newline at end of file diff --git a/frontend/src/views/settings/Settings.vue b/frontend/src/views/settings/Settings.vue new file mode 100644 index 0000000..616e43a --- /dev/null +++ b/frontend/src/views/settings/Settings.vue @@ -0,0 +1,100 @@ + + + + + \ No newline at end of file diff --git a/frontend/tsconfig.app.json b/frontend/tsconfig.app.json new file mode 100644 index 0000000..913b8f2 --- /dev/null +++ b/frontend/tsconfig.app.json @@ -0,0 +1,12 @@ +{ + "extends": "@vue/tsconfig/tsconfig.dom.json", + "include": ["env.d.ts", "src/**/*", "src/**/*.vue"], + "exclude": ["src/**/__tests__/*"], + "compilerOptions": { + "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo", + + "paths": { + "@/*": ["./src/*"] + } + } +} diff --git a/frontend/tsconfig.json b/frontend/tsconfig.json new file mode 100644 index 0000000..66b5e57 --- /dev/null +++ b/frontend/tsconfig.json @@ -0,0 +1,11 @@ +{ + "files": [], + "references": [ + { + "path": "./tsconfig.node.json" + }, + { + "path": "./tsconfig.app.json" + } + ] +} diff --git a/frontend/tsconfig.node.json b/frontend/tsconfig.node.json new file mode 100644 index 0000000..a83dfc9 --- /dev/null +++ b/frontend/tsconfig.node.json @@ -0,0 +1,19 @@ +{ + "extends": "@tsconfig/node22/tsconfig.json", + "include": [ + "vite.config.*", + "vitest.config.*", + "cypress.config.*", + "nightwatch.conf.*", + "playwright.config.*", + "eslint.config.*" + ], + "compilerOptions": { + "noEmit": true, + "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo", + + "module": "ESNext", + "moduleResolution": "Bundler", + "types": ["node"] + } +} diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts new file mode 100644 index 0000000..758d2a0 --- /dev/null +++ b/frontend/vite.config.ts @@ -0,0 +1,16 @@ +import { fileURLToPath, URL } from 'node:url' +import { defineConfig } from 'vite' +import vue from '@vitejs/plugin-vue' +import vueJsx from '@vitejs/plugin-vue-jsx' + +export default defineConfig({ + plugins: [ + vue(), + vueJsx(), + ], + resolve: { + alias: { + '@': fileURLToPath(new URL('./src', import.meta.url)) + }, + } +}) diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..ef5699a --- /dev/null +++ b/go.mod @@ -0,0 +1,108 @@ +module github.com/Tencent/WeKnora + +go 1.24.0 + +toolchain go1.24.2 + +require ( + github.com/elastic/go-elasticsearch/v7 v7.17.10 + github.com/elastic/go-elasticsearch/v8 v8.18.0 + github.com/gin-contrib/cors v1.7.5 + github.com/gin-gonic/gin v1.10.0 + github.com/go-viper/mapstructure/v2 v2.2.1 + github.com/google/uuid v1.6.0 + github.com/hibiken/asynq v0.25.1 + github.com/minio/minio-go/v7 v7.0.90 + github.com/ollama/ollama v0.9.6 + github.com/panjf2000/ants/v2 v2.11.2 + github.com/parquet-go/parquet-go v0.25.0 + github.com/pgvector/pgvector-go v0.3.0 + github.com/redis/go-redis/v9 v9.7.3 + github.com/sashabaranov/go-openai v1.40.5 + github.com/sirupsen/logrus v1.9.3 + github.com/spf13/viper v1.20.1 + github.com/tencentyun/cos-go-sdk-v5 v0.7.65 + github.com/yanyiwu/gojieba v1.4.5 + go.opentelemetry.io/otel v1.37.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.37.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.37.0 + go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.35.0 + go.opentelemetry.io/otel/sdk v1.37.0 + go.opentelemetry.io/otel/trace v1.37.0 + go.uber.org/dig v1.18.1 + golang.org/x/sync v0.15.0 + google.golang.org/grpc v1.73.0 + google.golang.org/protobuf v1.36.6 + gorm.io/driver/postgres v1.5.11 + gorm.io/gorm v1.25.12 +) + +require ( + github.com/andybalholm/brotli v1.1.0 // indirect + github.com/bytedance/sonic v1.13.2 // indirect + github.com/bytedance/sonic/loader v0.2.4 // indirect + github.com/cenkalti/backoff/v5 v5.0.2 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/clbanning/mxj v1.8.4 // indirect + github.com/cloudwego/base64x v0.1.5 // indirect + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/elastic/elastic-transport-go/v8 v8.7.0 // indirect + github.com/fsnotify/fsnotify v1.8.0 // indirect + github.com/gabriel-vasile/mimetype v1.4.8 // indirect + github.com/gin-contrib/sse v1.0.0 // indirect + github.com/go-ini/ini v1.67.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.26.0 // indirect + github.com/goccy/go-json v0.10.5 // indirect + github.com/google/go-querystring v1.1.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1 // indirect + github.com/jackc/pgpassfile v1.0.0 // indirect + github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect + github.com/jackc/pgx/v5 v5.7.2 // indirect + github.com/jackc/puddle/v2 v2.2.2 // indirect + github.com/jinzhu/inflection v1.0.0 // indirect + github.com/jinzhu/now v1.1.5 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/compress v1.18.0 // indirect + github.com/klauspost/cpuid/v2 v2.2.10 // indirect + github.com/leodido/go-urn v1.4.0 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/minio/crc64nvme v1.0.1 // indirect + github.com/minio/md5-simd v1.1.2 // indirect + github.com/mitchellh/mapstructure v1.4.3 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/mozillazg/go-httpheader v0.2.1 // indirect + github.com/olekukonko/tablewriter v0.0.5 // indirect + github.com/pelletier/go-toml/v2 v2.2.3 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect + github.com/rivo/uniseg v0.4.7 // indirect + github.com/robfig/cron/v3 v3.0.1 // indirect + github.com/rs/xid v1.6.0 // indirect + github.com/sagikazarmark/locafero v0.7.0 // indirect + github.com/sourcegraph/conc v0.3.0 // indirect + github.com/spf13/afero v1.12.0 // indirect + github.com/spf13/cast v1.7.1 // indirect + github.com/spf13/pflag v1.0.6 // indirect + github.com/subosito/gotenv v1.6.0 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/ugorji/go/codec v1.2.12 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/otel/metric v1.37.0 // indirect + go.opentelemetry.io/proto/otlp v1.7.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + golang.org/x/arch v0.15.0 // indirect + golang.org/x/crypto v0.39.0 // indirect + golang.org/x/net v0.41.0 // indirect + golang.org/x/sys v0.33.0 // indirect + golang.org/x/text v0.26.0 // indirect + golang.org/x/time v0.11.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..b0c93ea --- /dev/null +++ b/go.sum @@ -0,0 +1,292 @@ +entgo.io/ent v0.14.3 h1:wokAV/kIlH9TeklJWGGS7AYJdVckr0DloWjIcO9iIIQ= +entgo.io/ent v0.14.3/go.mod h1:aDPE/OziPEu8+OWbzy4UlvWmD2/kbRuWfK2A40hcxJM= +github.com/QcloudApi/qcloud_sign_golang v0.0.0-20141224014652-e4130a326409/go.mod h1:1pk82RBxDY/JZnPQrtqHlUFfCctgdorsd9M06fMynOM= +github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= +github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= +github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= +github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= +github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= +github.com/bytedance/sonic v1.13.2 h1:8/H1FempDZqC4VqjptGo14QQlJx8VdZJegxs6wwfqpQ= +github.com/bytedance/sonic v1.13.2/go.mod h1:o68xyaF9u2gvVBuGHPlUVCy+ZfmNNO5ETf1+KgkJhz4= +github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= +github.com/bytedance/sonic/loader v0.2.4 h1:ZWCw4stuXUsn1/+zQDqeE7JKP+QO47tz7QCNan80NzY= +github.com/bytedance/sonic/loader v0.2.4/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI= +github.com/cenkalti/backoff/v5 v5.0.2 h1:rIfFVxEf1QsI7E1ZHfp/B4DF/6QBAUhmgkxc0H7Zss8= +github.com/cenkalti/backoff/v5 v5.0.2/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/clbanning/mxj v1.8.4 h1:HuhwZtbyvyOw+3Z1AowPkU87JkJUSv751ELWaiTpj8I= +github.com/clbanning/mxj v1.8.4/go.mod h1:BVjHeAH+rl9rs6f+QIpeRl0tfu10SXn1pUSa5PVGJng= +github.com/cloudwego/base64x v0.1.5 h1:XPciSp1xaq2VCSt6lF0phncD4koWyULpl5bUxbfCyP4= +github.com/cloudwego/base64x v0.1.5/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= +github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/elastic/elastic-transport-go/v8 v8.7.0 h1:OgTneVuXP2uip4BA658Xi6Hfw+PeIOod2rY3GVMGoVE= +github.com/elastic/elastic-transport-go/v8 v8.7.0/go.mod h1:YLHer5cj0csTzNFXoNQ8qhtGY1GTvSqPnKWKaqQE3Hk= +github.com/elastic/go-elasticsearch/v7 v7.17.10 h1:TCQ8i4PmIJuBunvBS6bwT2ybzVFxxUhhltAs3Gyu1yo= +github.com/elastic/go-elasticsearch/v7 v7.17.10/go.mod h1:OJ4wdbtDNk5g503kvlHLyErCgQwwzmDtaFC4XyOxXA4= +github.com/elastic/go-elasticsearch/v8 v8.18.0 h1:ANNq1h7DEiPUaALb8+5w3baQzaS08WfHV0DNzp0VG4M= +github.com/elastic/go-elasticsearch/v8 v8.18.0/go.mod h1:WLqwXsJmQoYkoA9JBFeEwPkQhCfAZuUvfpdU/NvSSf0= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= +github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM= +github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8= +github.com/gin-contrib/cors v1.7.5 h1:cXC9SmofOrRg0w9PigwGlHG3ztswH6bqq4vJVXnvYMk= +github.com/gin-contrib/cors v1.7.5/go.mod h1:4q3yi7xBEDDWKapjT2o1V7mScKDDr8k+jZ0fSquGoy0= +github.com/gin-contrib/sse v1.0.0 h1:y3bT1mUWUxDpW4JLQg/HnTqV4rozuW4tC9eFKTxYI9E= +github.com/gin-contrib/sse v1.0.0/go.mod h1:zNuFdwarAygJBht0NTKiSi3jRf6RbqeILZ9Sp6Slhe0= +github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= +github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= +github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= +github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-pg/pg/v10 v10.11.0 h1:CMKJqLgTrfpE/aOVeLdybezR2om071Vh38OLZjsyMI0= +github.com/go-pg/pg/v10 v10.11.0/go.mod h1:4BpHRoxE61y4Onpof3x1a2SQvi9c+q1dJnrNdMjsroA= +github.com/go-pg/zerochecker v0.2.0 h1:pp7f72c3DobMWOb2ErtZsnrPaSvHd2W4o9//8HtF4mU= +github.com/go-pg/zerochecker v0.2.0/go.mod h1:NJZ4wKL0NmTtz0GKCoJ8kym6Xn/EQzXRl2OnAe7MmDo= +github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= +github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= +github.com/go-playground/validator/v10 v10.26.0 h1:SP05Nqhjcvz81uJaRfEV0YBSSSGMc/iMaVtFbr3Sw2k= +github.com/go-playground/validator/v10 v10.26.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo= +github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss= +github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= +github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= +github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= +github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1 h1:X5VWvz21y3gzm9Nw/kaUeku/1+uBhcekkmy4IkffJww= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1/go.mod h1:Zanoh4+gvIgluNqcfMVTJueD4wSS5hT7zTt4Mrutd90= +github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= +github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= +github.com/hibiken/asynq v0.25.1 h1:phj028N0nm15n8O2ims+IvJ2gz4k2auvermngh9JhTw= +github.com/hibiken/asynq v0.25.1/go.mod h1:pazWNOLBu0FEynQRBvHA26qdIKRSmfdIfUm4HdsLmXg= +github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= +github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= +github.com/jackc/pgx/v5 v5.7.2 h1:mLoDLV6sonKlvjIEsV56SkWNCnuNv531l94GaIzO+XI= +github.com/jackc/pgx/v5 v5.7.2/go.mod h1:ncY89UGWxg82EykZUwSpUKEfccBGGYq1xjrOpsbsfGQ= +github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= +github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= +github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= +github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= +github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/jmoiron/sqlx v1.3.5 h1:vFFPA71p1o5gAeqtEAwLU4dnX2napprKtHr7PYIcN3g= +github.com/jmoiron/sqlx v1.3.5/go.mod h1:nRVWtLre0KfCLJvgxzCsLVMogSvQ1zNJtpYr2Ccp0mQ= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE= +github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= +github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= +github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= +github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= +github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/minio/crc64nvme v1.0.1 h1:DHQPrYPdqK7jQG/Ls5CTBZWeex/2FMS3G5XGkycuFrY= +github.com/minio/crc64nvme v1.0.1/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg= +github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= +github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= +github.com/minio/minio-go/v7 v7.0.90 h1:TmSj1083wtAD0kEYTx7a5pFsv3iRYMsOJ6A4crjA1lE= +github.com/minio/minio-go/v7 v7.0.90/go.mod h1:uvMUcGrpgeSAAI6+sD3818508nUyMULw94j2Nxku/Go= +github.com/mitchellh/mapstructure v1.4.3 h1:OVowDSCllw/YjdLkam3/sm7wEtOy59d8ndGgCcyj8cs= +github.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/mozillazg/go-httpheader v0.2.1 h1:geV7TrjbL8KXSyvghnFm+NyTux/hxwueTSrwhe88TQQ= +github.com/mozillazg/go-httpheader v0.2.1/go.mod h1:jJ8xECTlalr6ValeXYdOF8fFUISeBAdw6E61aqQma60= +github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= +github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= +github.com/ollama/ollama v0.9.6 h1:HZNJmB52pMt6zLkGkkheBuXBXM5478eiSAj7GR75AMc= +github.com/ollama/ollama v0.9.6/go.mod h1:zLwx3iZ3AI4Rc/egsrx3u1w4RU2MHQ/Ylxse48jvyt4= +github.com/panjf2000/ants/v2 v2.11.2 h1:AVGpMSePxUNpcLaBO34xuIgM1ZdKOiGnpxLXixLi5Jo= +github.com/panjf2000/ants/v2 v2.11.2/go.mod h1:8u92CYMUc6gyvTIw8Ru7Mt7+/ESnJahz5EVtqfrilek= +github.com/parquet-go/parquet-go v0.25.0 h1:GwKy11MuF+al/lV6nUsFw8w8HCiPOSAx1/y8yFxjH5c= +github.com/parquet-go/parquet-go v0.25.0/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= +github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M= +github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= +github.com/pgvector/pgvector-go v0.3.0 h1:Ij+Yt78R//uYqs3Zk35evZFvr+G0blW0OUN+Q2D1RWc= +github.com/pgvector/pgvector-go v0.3.0/go.mod h1:duFy+PXWfW7QQd5ibqutBO4GxLsUZ9RVXhFZGIBsWSA= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/redis/go-redis/v9 v9.7.3 h1:YpPyAayJV+XErNsatSElgRZZVCwXX9QzkKYNvO7x0wM= +github.com/redis/go-redis/v9 v9.7.3/go.mod h1:bGUrSggJ9X9GUmZpZNEOQKaANxSGgOEBRltRTZHSvrA= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= +github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU= +github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= +github.com/sagikazarmark/locafero v0.7.0 h1:5MqpDsTGNDhY8sGp0Aowyf0qKsPrhewaLSsFaodPcyo= +github.com/sagikazarmark/locafero v0.7.0/go.mod h1:2za3Cg5rMaTMoG/2Ulr9AwtFaIppKXTRYnozin4aB5k= +github.com/sashabaranov/go-openai v1.40.5 h1:SwIlNdWflzR1Rxd1gv3pUg6pwPc6cQ2uMoHs8ai+/NY= +github.com/sashabaranov/go-openai v1.40.5/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= +github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= +github.com/spf13/afero v1.12.0 h1:UcOPyRBYczmFn6yvphxkn9ZEOY65cpwGKb5mL36mrqs= +github.com/spf13/afero v1.12.0/go.mod h1:ZTlWwG4/ahT8W7T0WQ5uYmjI9duaLQGy3Q2OAl4sk/4= +github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y= +github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.20.1 h1:ZMi+z/lvLyPSCoNtFCpqjy0S4kPbirhpTMwl8BkW9X4= +github.com/spf13/viper v1.20.1/go.mod h1:P9Mdzt1zoHIG8m2eZQinpiBjo6kCmZSKBClNNqjJvu4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= +github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.563/go.mod h1:7sCQWVkxcsR38nffDW057DRGk8mUjK1Ing/EFOK8s8Y= +github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/kms v1.0.563/go.mod h1:uom4Nvi9W+Qkom0exYiJ9VWJjXwyxtPYTkKkaLMlfE0= +github.com/tencentyun/cos-go-sdk-v5 v0.7.65 h1:+WBbfwThfZSbxpf1Dw6fyMwyzVtWBBExqfDJ5giiR2s= +github.com/tencentyun/cos-go-sdk-v5 v0.7.65/go.mod h1:8+hG+mQMuRP/OIS9d83syAvXvrMj9HhkND6Q1fLghw0= +github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc h1:9lRDQMhESg+zvGYmW5DyG0UqvY96Bu5QYsTLvCHdrgo= +github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc/go.mod h1:bciPuU6GHm1iF1pBvUfxfsH0Wmnc2VbpgvbI9ZWuIRs= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= +github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= +github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +github.com/uptrace/bun v1.1.12 h1:sOjDVHxNTuM6dNGaba0wUuz7KvDE1BmNu9Gqs2gJSXQ= +github.com/uptrace/bun v1.1.12/go.mod h1:NPG6JGULBeQ9IU6yHp7YGELRa5Agmd7ATZdz4tGZ6z0= +github.com/uptrace/bun/dialect/pgdialect v1.1.12 h1:m/CM1UfOkoBTglGO5CUTKnIKKOApOYxkcP2qn0F9tJk= +github.com/uptrace/bun/dialect/pgdialect v1.1.12/go.mod h1:Ij6WIxQILxLlL2frUBxUBOZJtLElD2QQNDcu/PWDHTc= +github.com/uptrace/bun/driver/pgdriver v1.1.12 h1:3rRWB1GK0psTJrHwxzNfEij2MLibggiLdTqjTtfHc1w= +github.com/uptrace/bun/driver/pgdriver v1.1.12/go.mod h1:ssYUP+qwSEgeDDS1xm2XBip9el1y9Mi5mTAvLoiADLM= +github.com/vmihailenco/bufpool v0.1.11 h1:gOq2WmBrq0i2yW5QJ16ykccQ4wH9UyEsgLm6czKAd94= +github.com/vmihailenco/bufpool v0.1.11/go.mod h1:AFf/MOy3l2CFTKbxwt0mp2MwnqjNEs5H/UxrkA5jxTQ= +github.com/vmihailenco/msgpack/v5 v5.3.5 h1:5gO0H1iULLWGhs2H5tbAHIZTV8/cYafcFOr9znI5mJU= +github.com/vmihailenco/msgpack/v5 v5.3.5/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc= +github.com/vmihailenco/tagparser v0.1.2 h1:gnjoVuB/kljJ5wICEEOpx98oXMWPLj22G67Vbd1qPqc= +github.com/vmihailenco/tagparser v0.1.2/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI= +github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= +github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yanyiwu/gojieba v1.4.5 h1:VyZogGtdFSnJbACHvDRvDreXPPVPCg8axKFUdblU/JI= +github.com/yanyiwu/gojieba v1.4.5/go.mod h1:JUq4DddFVGdHXJHxxepxRmhrKlDpaBxR8O28v6fKYLY= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= +go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.37.0 h1:Ahq7pZmv87yiyn3jeFz/LekZmPLLdKejuO3NcK9MssM= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.37.0/go.mod h1:MJTqhM0im3mRLw1i8uGHnCvUEeS7VwRyxlLC78PA18M= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.37.0 h1:EtFWSnwW9hGObjkIdmlnWSydO+Qs8OwzfzXLUPg4xOc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.37.0/go.mod h1:QjUEoiGCPkvFZ/MjK6ZZfNOS6mfVEVKYE99dFhuN2LI= +go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.35.0 h1:T0Ec2E+3YZf5bgTNQVet8iTDW7oIk03tXHq+wkwIDnE= +go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.35.0/go.mod h1:30v2gqH+vYGJsesLWFov8u47EpYTcIQcBjKpI6pJThg= +go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= +go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= +go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI= +go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg= +go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o= +go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w= +go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= +go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= +go.opentelemetry.io/proto/otlp v1.7.0 h1:jX1VolD6nHuFzOYso2E73H85i92Mv8JQYk0K9vz09os= +go.opentelemetry.io/proto/otlp v1.7.0/go.mod h1:fSKjH6YJ7HDlwzltzyMj036AJ3ejJLCgCSHGj4efDDo= +go.uber.org/dig v1.18.1 h1:rLww6NuajVjeQn+49u5NcezUJEGwd5uXmyoCKW2g5Es= +go.uber.org/dig v1.18.1/go.mod h1:Us0rSJiThwCv2GteUN0Q7OKvU7n5J4dxZ9JKUXozFdE= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +golang.org/x/arch v0.15.0 h1:QtOrQd0bTUnhNVNndMpLHNWrDmYzZ2KDqSrEymqInZw= +golang.org/x/arch v0.15.0/go.mod h1:JmwW7aLIoRUKgaTzhkiEFxvcEiQGyOg9BMonBJUS7EE= +golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= +golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= +golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= +golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= +golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= +golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= +golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= +golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= +golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= +golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= +golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 h1:oWVWY3NzT7KJppx2UKhKmzPq4SRe0LdCijVRwvGeikY= +google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822/go.mod h1:h3c4v36UTKzUiuaOKQ6gr3S+0hovBtUrXzTG/i3+XEc= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 h1:fc6jSaCT0vBduLYZHYrBBNY4dsWuvgyff9noRNDdBeE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= +google.golang.org/grpc v1.73.0 h1:VIWSmpI2MegBtTuFt5/JWy2oXxtjJ/e89Z70ImfD2ok= +google.golang.org/grpc v1.73.0/go.mod h1:50sbHOUqWoCQGI8V2HQLJM0B+LMlIUjNSZmow7EVBQc= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gorm.io/driver/postgres v1.5.11 h1:ubBVAfbKEUld/twyKZ0IYn9rSQh448EdelLYk9Mv314= +gorm.io/driver/postgres v1.5.11/go.mod h1:DX3GReXH+3FPWGrrgffdvCk3DQ1dwDPdmbenSkweRGI= +gorm.io/gorm v1.25.12 h1:I0u8i2hWQItBq1WfE0o2+WuL9+8L21K9e2HHSTE/0f8= +gorm.io/gorm v1.25.12/go.mod h1:xh7N7RHfYlNc5EmcI/El95gXusucDrQnHXe0+CgWcLQ= +mellium.im/sasl v0.3.1 h1:wE0LW6g7U83vhvxjC1IY8DnXM+EU095yeo8XClvCdfo= +mellium.im/sasl v0.3.1/go.mod h1:xm59PUYpZHhgQ9ZqoJ5QaCqzWMi8IeS49dhp6plPCzw= +nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= diff --git a/internal/application/repository/chunk.go b/internal/application/repository/chunk.go new file mode 100644 index 0000000..053cfc5 --- /dev/null +++ b/internal/application/repository/chunk.go @@ -0,0 +1,127 @@ +package repository + +import ( + "context" + "errors" + + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "gorm.io/gorm" +) + +// chunkRepository implements the ChunkRepository interface +type chunkRepository struct { + db *gorm.DB +} + +// NewChunkRepository creates a new chunk repository +func NewChunkRepository(db *gorm.DB) interfaces.ChunkRepository { + return &chunkRepository{db: db} +} + +// CreateChunks creates multiple chunks in batches +func (r *chunkRepository) CreateChunks(ctx context.Context, chunks []*types.Chunk) error { + return r.db.WithContext(ctx).CreateInBatches(chunks, 100).Error +} + +// GetChunkByID retrieves a chunk by its ID and tenant ID +func (r *chunkRepository) GetChunkByID(ctx context.Context, tenantID uint, id string) (*types.Chunk, error) { + var chunk types.Chunk + if err := r.db.WithContext(ctx).Where("tenant_id = ? AND id = ?", tenantID, id).First(&chunk).Error; err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return nil, errors.New("chunk not found") + } + return nil, err + } + return &chunk, nil +} + +// ListChunksByID retrieves multiple chunks by their IDs +func (r *chunkRepository) ListChunksByID( + ctx context.Context, tenantID uint, ids []string, +) ([]*types.Chunk, error) { + var chunks []*types.Chunk + if err := r.db.WithContext(ctx). + Where("tenant_id = ? AND id IN ?", tenantID, ids). + Find(&chunks).Error; err != nil { + return nil, err + } + return chunks, nil +} + +// ListChunksByKnowledgeID lists all chunks for a knowledge ID +func (r *chunkRepository) ListChunksByKnowledgeID( + ctx context.Context, tenantID uint, knowledgeID string, +) ([]*types.Chunk, error) { + var chunks []*types.Chunk + if err := r.db.WithContext(ctx). + Select("id, content, knowledge_id, knowledge_base_id, start_at, end_at, chunk_index, is_enabled, chunk_type, parent_chunk_id, image_info"). + Where("tenant_id = ? AND knowledge_id = ? and chunk_type = ?", tenantID, knowledgeID, "text"). + Order("chunk_index ASC"). + Find(&chunks).Error; err != nil { + return nil, err + } + return chunks, nil +} + +// ListPagedChunksByKnowledgeID lists chunks for a knowledge ID with pagination +func (r *chunkRepository) ListPagedChunksByKnowledgeID( + ctx context.Context, tenantID uint, knowledgeID string, page *types.Pagination, chunk_type []types.ChunkType, +) ([]*types.Chunk, int64, error) { + var chunks []*types.Chunk + var total int64 + + // First query the total count + if err := r.db.WithContext(ctx).Model(&types.Chunk{}). + Where("tenant_id = ? AND knowledge_id = ?", tenantID, knowledgeID). + Count(&total).Error; err != nil { + return nil, 0, err + } + + // Then query the paginated data + if err := r.db.WithContext(ctx). + Select("id, content, knowledge_id, knowledge_base_id, start_at, end_at, chunk_index, is_enabled, chunk_type, parent_chunk_id, image_info"). + Where("tenant_id = ? AND knowledge_id = ? and chunk_type in (?)", tenantID, knowledgeID, chunk_type). + Order("chunk_index ASC"). + Offset(page.Offset()). + Limit(page.Limit()). + Find(&chunks).Error; err != nil { + return nil, 0, err + } + + return chunks, total, nil +} + +func (r *chunkRepository) ListChunkByParentID(ctx context.Context, tenantID uint, parentID string) ([]*types.Chunk, error) { + var chunks []*types.Chunk + if err := r.db.WithContext(ctx). + Where("tenant_id = ? AND parent_chunk_id = ?", tenantID, parentID). + Find(&chunks).Error; err != nil { + return nil, err + } + return chunks, nil +} + +// UpdateChunk updates a chunk +func (r *chunkRepository) UpdateChunk(ctx context.Context, chunk *types.Chunk) error { + return r.db.WithContext(ctx).Save(chunk).Error +} + +// DeleteChunk deletes a chunk by its ID +func (r *chunkRepository) DeleteChunk(ctx context.Context, tenantID uint, id string) error { + return r.db.WithContext(ctx).Where("tenant_id = ? AND id = ?", tenantID, id).Delete(&types.Chunk{}).Error +} + +// DeleteChunksByKnowledgeID deletes all chunks for a knowledge ID +func (r *chunkRepository) DeleteChunksByKnowledgeID(ctx context.Context, tenantID uint, knowledgeID string) error { + return r.db.WithContext(ctx).Where( + "tenant_id = ? AND knowledge_id = ?", tenantID, knowledgeID, + ).Delete(&types.Chunk{}).Error +} + +// DeleteByKnowledgeList deletes all chunks for a knowledge list +func (r *chunkRepository) DeleteByKnowledgeList(ctx context.Context, tenantID uint, knowledgeIDs []string) error { + return r.db.WithContext(ctx).Where( + "tenant_id = ? AND knowledge_id in ?", tenantID, knowledgeIDs, + ).Delete(&types.Chunk{}).Error +} diff --git a/internal/application/repository/knowledge.go b/internal/application/repository/knowledge.go new file mode 100644 index 0000000..b9b118a --- /dev/null +++ b/internal/application/repository/knowledge.go @@ -0,0 +1,187 @@ +package repository + +import ( + "context" + "errors" + + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "gorm.io/gorm" +) + +var ErrKnowledgeNotFound = errors.New("knowledge not found") + +// knowledgeRepository implements knowledge base and knowledge repository interface +type knowledgeRepository struct { + db *gorm.DB +} + +// NewKnowledgeRepository creates a new knowledge repository +func NewKnowledgeRepository(db *gorm.DB) interfaces.KnowledgeRepository { + return &knowledgeRepository{db: db} +} + +// CreateKnowledge creates knowledge +func (r *knowledgeRepository) CreateKnowledge(ctx context.Context, knowledge *types.Knowledge) error { + err := r.db.WithContext(ctx).Create(knowledge).Error + return err +} + +// GetKnowledgeByID gets knowledge +func (r *knowledgeRepository) GetKnowledgeByID(ctx context.Context, tenantID uint, id string) (*types.Knowledge, error) { + var knowledge types.Knowledge + if err := r.db.WithContext(ctx).Where("tenant_id = ? AND id = ?", tenantID, id).First(&knowledge).Error; err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return nil, ErrKnowledgeNotFound + } + return nil, err + } + return &knowledge, nil +} + +// ListKnowledgeByKnowledgeBaseID lists all knowledge in a knowledge base +func (r *knowledgeRepository) ListKnowledgeByKnowledgeBaseID( + ctx context.Context, tenantID uint, kbID string, +) ([]*types.Knowledge, error) { + var knowledges []*types.Knowledge + if err := r.db.WithContext(ctx).Where("tenant_id = ? AND knowledge_base_id = ?", tenantID, kbID). + Order("created_at DESC").Find(&knowledges).Error; err != nil { + return nil, err + } + return knowledges, nil +} + +// ListPagedKnowledgeByKnowledgeBaseID lists all knowledge in a knowledge base with pagination +func (r *knowledgeRepository) ListPagedKnowledgeByKnowledgeBaseID( + ctx context.Context, + tenantID uint, + kbID string, + page *types.Pagination, +) ([]*types.Knowledge, int64, error) { + var knowledges []*types.Knowledge + var total int64 + + // Query total count first + if err := r.db.WithContext(ctx).Model(&types.Knowledge{}). + Where("tenant_id = ? AND knowledge_base_id = ?", tenantID, kbID). + Count(&total).Error; err != nil { + return nil, 0, err + } + + // Then query paginated data + if err := r.db.WithContext(ctx). + Where("tenant_id = ? AND knowledge_base_id = ?", tenantID, kbID). + Order("created_at DESC"). + Offset(page.Offset()). + Limit(page.Limit()). + Find(&knowledges).Error; err != nil { + return nil, 0, err + } + + return knowledges, total, nil +} + +// UpdateKnowledge updates knowledge +func (r *knowledgeRepository) UpdateKnowledge(ctx context.Context, knowledge *types.Knowledge) error { + err := r.db.WithContext(ctx).Save(knowledge).Error + return err +} + +// DeleteKnowledge deletes knowledge +func (r *knowledgeRepository) DeleteKnowledge(ctx context.Context, tenantID uint, id string) error { + return r.db.WithContext(ctx).Where("tenant_id = ? AND id = ?", tenantID, id).Delete(&types.Knowledge{}).Error +} + +// DeleteKnowledge deletes knowledge +func (r *knowledgeRepository) DeleteKnowledgeList(ctx context.Context, tenantID uint, ids []string) error { + return r.db.WithContext(ctx).Where("tenant_id = ? AND id in ?", tenantID, ids).Delete(&types.Knowledge{}).Error +} + +// GetKnowledgeBatch gets knowledge in batch +func (r *knowledgeRepository) GetKnowledgeBatch( + ctx context.Context, tenantID uint, ids []string, +) ([]*types.Knowledge, error) { + var knowledge []*types.Knowledge + if err := r.db.WithContext(ctx).Debug(). + Where("tenant_id = ? AND id IN ?", tenantID, ids). + Find(&knowledge).Error; err != nil { + return nil, err + } + return knowledge, nil +} + +// CheckKnowledgeExists checks if knowledge already exists +func (r *knowledgeRepository) CheckKnowledgeExists( + ctx context.Context, + tenantID uint, + kbID string, + params *types.KnowledgeCheckParams, +) (bool, *types.Knowledge, error) { + query := r.db.WithContext(ctx).Model(&types.Knowledge{}). + Where("tenant_id = ? AND knowledge_base_id = ? AND enable_status = ?", tenantID, kbID, "enabled") + + if params.Type == "file" { + // If file hash exists, prioritize exact match using hash + if params.FileHash != "" { + var knowledge types.Knowledge + err := query.Where("file_hash = ?", params.FileHash).First(&knowledge).Error + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return false, nil, nil + } + return false, nil, err + } + return true, &knowledge, nil + } + + // If no hash or hash doesn't match, use filename and size + if params.FileName != "" && params.FileSize > 0 { + var knowledge types.Knowledge + err := query.Where( + "file_name = ? AND file_size = ?", + params.FileName, params.FileSize, + ).First(&knowledge).Error + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return false, nil, nil + } + return false, nil, err + } + return true, &knowledge, nil + } + } else if params.Type == "url" { + if params.URL != "" { + var knowledge types.Knowledge + err := query.Where("type = 'url' AND source = ?", params.URL).First(&knowledge).Error + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return false, nil, nil + } + return false, nil, err + } + return true, &knowledge, nil + } + } + + // No valid parameters, default to not existing + return false, nil, nil +} + +func (r *knowledgeRepository) AminusB( + ctx context.Context, + Atenant uint, A string, + Btenant uint, B string, +) ([]string, error) { + knowledgeIDs := []string{} + subQuery := r.db.Model(&types.Knowledge{}). + Where("tenant_id = ? AND knowledge_base_id = ?", Btenant, B).Select("file_hash") + err := r.db.Model(&types.Knowledge{}). + Where("tenant_id = ? AND knowledge_base_id = ?", Atenant, A). + Where("file_hash NOT IN (?)", subQuery). + Pluck("id", &knowledgeIDs). + Error + if errors.Is(err, gorm.ErrRecordNotFound) { + return knowledgeIDs, nil + } + return knowledgeIDs, err +} diff --git a/internal/application/repository/knowledgebase.go b/internal/application/repository/knowledgebase.go new file mode 100644 index 0000000..efd7ca5 --- /dev/null +++ b/internal/application/repository/knowledgebase.go @@ -0,0 +1,70 @@ +package repository + +import ( + "context" + "errors" + + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "gorm.io/gorm" +) + +var ErrKnowledgeBaseNotFound = errors.New("knowledge base not found") + +// knowledgeBaseRepository implements the KnowledgeBaseRepository interface +type knowledgeBaseRepository struct { + db *gorm.DB +} + +// NewKnowledgeBaseRepository creates a new knowledge base repository +func NewKnowledgeBaseRepository(db *gorm.DB) interfaces.KnowledgeBaseRepository { + return &knowledgeBaseRepository{db: db} +} + +// CreateKnowledgeBase creates a new knowledge base +func (r *knowledgeBaseRepository) CreateKnowledgeBase(ctx context.Context, kb *types.KnowledgeBase) error { + return r.db.WithContext(ctx).Create(kb).Error +} + +// GetKnowledgeBaseByID gets a knowledge base by id +func (r *knowledgeBaseRepository) GetKnowledgeBaseByID(ctx context.Context, id string) (*types.KnowledgeBase, error) { + var kb types.KnowledgeBase + if err := r.db.WithContext(ctx).Where("id = ?", id).First(&kb).Error; err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return nil, ErrKnowledgeBaseNotFound + } + return nil, err + } + return &kb, nil +} + +// ListKnowledgeBases lists all knowledge bases +func (r *knowledgeBaseRepository) ListKnowledgeBases(ctx context.Context) ([]*types.KnowledgeBase, error) { + var kbs []*types.KnowledgeBase + if err := r.db.WithContext(ctx).Find(&kbs).Error; err != nil { + return nil, err + } + return kbs, nil +} + +// ListKnowledgeBasesByTenantID lists all knowledge bases by tenant id +func (r *knowledgeBaseRepository) ListKnowledgeBasesByTenantID( + ctx context.Context, tenantID uint, +) ([]*types.KnowledgeBase, error) { + var kbs []*types.KnowledgeBase + if err := r.db.WithContext(ctx).Where("tenant_id = ?", tenantID). + Order("created_at DESC").Find(&kbs).Error; err != nil { + return nil, err + } + return kbs, nil +} + +// UpdateKnowledgeBase updates a knowledge base +func (r *knowledgeBaseRepository) UpdateKnowledgeBase(ctx context.Context, kb *types.KnowledgeBase) error { + return r.db.WithContext(ctx).Save(kb).Error +} + +// DeleteKnowledgeBase deletes a knowledge base +func (r *knowledgeBaseRepository) DeleteKnowledgeBase(ctx context.Context, id string) error { + return r.db.WithContext(ctx).Where("id = ?", id).Delete(&types.KnowledgeBase{}).Error +} diff --git a/internal/application/repository/message.go b/internal/application/repository/message.go new file mode 100644 index 0000000..0808e78 --- /dev/null +++ b/internal/application/repository/message.go @@ -0,0 +1,153 @@ +package repository + +import ( + "context" + "slices" + "time" + + "gorm.io/gorm" + + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// messageRepository implements the message repository interface +type messageRepository struct { + db *gorm.DB +} + +// NewMessageRepository creates a new message repository +func NewMessageRepository(db *gorm.DB) interfaces.MessageRepository { + return &messageRepository{ + db: db, + } +} + +// CreateMessage creates a new message +func (r *messageRepository) CreateMessage( + ctx context.Context, message *types.Message, +) (*types.Message, error) { + if err := r.db.WithContext(ctx).Create(message).Error; err != nil { + return nil, err + } + return message, nil +} + +// GetMessage retrieves a message +func (r *messageRepository) GetMessage( + ctx context.Context, sessionID string, messageID string, +) (*types.Message, error) { + var message types.Message + if err := r.db.WithContext(ctx).Where( + "id = ? AND session_id = ?", messageID, sessionID, + ).First(&message).Error; err != nil { + return nil, err + } + return &message, nil +} + +// GetMessagesBySession retrieves all messages for a session with pagination +func (r *messageRepository) GetMessagesBySession( + ctx context.Context, sessionID string, page int, pageSize int, +) ([]*types.Message, error) { + var messages []*types.Message + if err := r.db.WithContext(ctx).Where("session_id = ?", sessionID).Order("created_at ASC"). + Offset((page - 1) * pageSize).Limit(pageSize).Find(&messages).Error; err != nil { + return nil, err + } + return messages, nil +} + +// GetRecentMessagesBySession retrieves recent messages for a session +func (r *messageRepository) GetRecentMessagesBySession( + ctx context.Context, sessionID string, limit int, +) ([]*types.Message, error) { + var messages []*types.Message + if err := r.db.WithContext(ctx).Where( + "session_id = ?", sessionID, + ).Order("created_at DESC").Limit(limit).Find(&messages).Error; err != nil { + if err == gorm.ErrRecordNotFound { + return nil, nil + } + return nil, err + } + slices.SortFunc(messages, func(a, b *types.Message) int { + cmp := a.CreatedAt.Compare(b.CreatedAt) + if cmp == 0 { + if a.Role == "user" { // User messages come first + return -1 + } + return 1 // Assistant messages come last + } + return cmp + }) + return messages, nil +} + +// GetMessagesBySessionBeforeTime retrieves messages from a session created before a specific time +func (r *messageRepository) GetMessagesBySessionBeforeTime( + ctx context.Context, sessionID string, beforeTime time.Time, limit int, +) ([]*types.Message, error) { + var messages []*types.Message + if err := r.db.WithContext(ctx).Where( + "session_id = ? AND created_at < ?", sessionID, beforeTime, + ).Order("created_at DESC").Limit(limit).Find(&messages).Error; err != nil { + return nil, err + } + slices.SortFunc(messages, func(a, b *types.Message) int { + cmp := a.CreatedAt.Compare(b.CreatedAt) + if cmp == 0 { + if a.Role == "user" { // User messages come first + return -1 + } + return 1 // Assistant messages come last + } + return cmp + }) + return messages, nil +} + +// UpdateMessage updates an existing message +func (r *messageRepository) UpdateMessage(ctx context.Context, message *types.Message) error { + return r.db.WithContext(ctx).Model(&types.Message{}).Where( + "id = ? AND session_id = ?", message.ID, message.SessionID, + ).Updates(message).Error +} + +// DeleteMessage deletes a message +func (r *messageRepository) DeleteMessage(ctx context.Context, sessionID string, messageID string) error { + return r.db.WithContext(ctx).Where( + "id = ? AND session_id = ?", messageID, sessionID, + ).Delete(&types.Message{}).Error +} + +// GetFirstMessageOfUser retrieves the first message from a user in a session +func (r *messageRepository) GetFirstMessageOfUser(ctx context.Context, sessionID string) (*types.Message, error) { + var message types.Message + if err := r.db.WithContext(ctx).Where( + "session_id = ? and role = ?", sessionID, "user", + ).Order("created_at ASC").First(&message).Error; err != nil { + return nil, err + } + return &message, nil +} + +// GetMessageByRequestID retrieves a message by request ID +func (r *messageRepository) GetMessageByRequestID( + ctx context.Context, sessionID string, requestID string, +) (*types.Message, error) { + var message types.Message + + result := r.db.WithContext(ctx). + Where("session_id = ? AND request_id = ?", sessionID, requestID). + First(&message) + + if result.Error != nil { + if result.Error == gorm.ErrRecordNotFound { + return nil, nil + } + return nil, result.Error + } + + return &message, nil +} diff --git a/internal/application/repository/model.go b/internal/application/repository/model.go new file mode 100644 index 0000000..568f76c --- /dev/null +++ b/internal/application/repository/model.go @@ -0,0 +1,77 @@ +package repository + +import ( + "context" + "errors" + + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "gorm.io/gorm" +) + +// modelRepository implements the model repository interface +type modelRepository struct { + db *gorm.DB +} + +// NewModelRepository creates a new model repository +func NewModelRepository(db *gorm.DB) interfaces.ModelRepository { + return &modelRepository{db: db} +} + +// Create creates a new model +func (r *modelRepository) Create(ctx context.Context, m *types.Model) error { + return r.db.WithContext(ctx).Create(m).Error +} + +// GetByID retrieves a model by ID +func (r *modelRepository) GetByID(ctx context.Context, tenantID uint, id string) (*types.Model, error) { + var m types.Model + if err := r.db.WithContext(ctx).Where("id = ?", id).Where( + "tenant_id = ? or tenant_id = 0", tenantID, + ).First(&m).Error; err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return nil, nil + } + return nil, err + } + return &m, nil +} + +// List lists models with optional filtering +func (r *modelRepository) List( + ctx context.Context, tenantID uint, modelType types.ModelType, source types.ModelSource, +) ([]*types.Model, error) { + var models []*types.Model + query := r.db.WithContext(ctx).Where( + "tenant_id = ? or tenant_id = 0", tenantID, + ) + + if modelType != "" { + query = query.Where("type = ?", modelType) + } + + if source != "" { + query = query.Where("source = ?", source) + } + + if err := query.Find(&models).Error; err != nil { + return nil, err + } + + return models, nil +} + +// Update updates a model +func (r *modelRepository) Update(ctx context.Context, m *types.Model) error { + return r.db.WithContext(ctx).Debug().Model(&types.Model{}).Where( + "id = ? AND tenant_id = ?", m.ID, m.TenantID, + ).Updates(m).Error +} + +// Delete deletes a model +func (r *modelRepository) Delete(ctx context.Context, tenantID uint, id string) error { + return r.db.WithContext(ctx).Where( + "id = ? AND tenant_id = ?", id, tenantID, + ).Delete(&types.Model{}).Error +} diff --git a/internal/application/repository/retriever/elasticsearch/structs.go b/internal/application/repository/retriever/elasticsearch/structs.go new file mode 100644 index 0000000..b30b8bb --- /dev/null +++ b/internal/application/repository/retriever/elasticsearch/structs.go @@ -0,0 +1,62 @@ +package elasticsearch + +import ( + "maps" + "slices" + + "github.com/Tencent/WeKnora/internal/types" +) + +// VectorEmbedding defines the Elasticsearch document structure for vector embeddings +type VectorEmbedding struct { + Content string `json:"content" gorm:"column:content;not null"` // Text content of the chunk + SourceID string `json:"source_id" gorm:"column:source_id;not null"` // ID of the source document + SourceType int `json:"source_type" gorm:"column:source_type;not null"` // Type of the source document + ChunkID string `json:"chunk_id" gorm:"column:chunk_id"` // Unique ID of the text chunk + KnowledgeID string `json:"knowledge_id" gorm:"column:knowledge_id"` // ID of the knowledge item + KnowledgeBaseID string `json:"knowledge_base_id" gorm:"column:knowledge_base_id"` // ID of the knowledge base + Embedding []float32 `json:"embedding" gorm:"column:embedding;not null"` // Vector embedding of the content +} + +// VectorEmbeddingWithScore extends VectorEmbedding with similarity score +type VectorEmbeddingWithScore struct { + VectorEmbedding + Score float64 // Similarity score from vector search +} + +// ToDBVectorEmbedding converts IndexInfo to Elasticsearch document format +func ToDBVectorEmbedding(embedding *types.IndexInfo, additionalParams map[string]interface{}) *VectorEmbedding { + vector := &VectorEmbedding{ + Content: embedding.Content, + SourceID: embedding.SourceID, + SourceType: int(embedding.SourceType), + ChunkID: embedding.ChunkID, + KnowledgeID: embedding.KnowledgeID, + KnowledgeBaseID: embedding.KnowledgeBaseID, + } + // Add embedding data if available in additionalParams + if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), "embedding") { + if embeddingMap, ok := additionalParams["embedding"].(map[string][]float32); ok { + vector.Embedding = embeddingMap[embedding.SourceID] + } + } + return vector +} + +// FromDBVectorEmbeddingWithScore converts Elasticsearch document to IndexWithScore domain model +func FromDBVectorEmbeddingWithScore(id string, + embedding *VectorEmbeddingWithScore, + matchType types.MatchType, +) *types.IndexWithScore { + return &types.IndexWithScore{ + ID: id, + SourceID: embedding.SourceID, + SourceType: types.SourceType(embedding.SourceType), + ChunkID: embedding.ChunkID, + KnowledgeID: embedding.KnowledgeID, + KnowledgeBaseID: embedding.KnowledgeBaseID, + Content: embedding.Content, + Score: embedding.Score, + MatchType: matchType, + } +} diff --git a/internal/application/repository/retriever/elasticsearch/v7/repository.go b/internal/application/repository/retriever/elasticsearch/v7/repository.go new file mode 100644 index 0000000..b92c175 --- /dev/null +++ b/internal/application/repository/retriever/elasticsearch/v7/repository.go @@ -0,0 +1,1025 @@ +// Package v7 implements the Elasticsearch v7 retriever engine repository +// It provides functionality for storing and retrieving embeddings using Elasticsearch v7 +// The repository supports both single and batch operations for saving and deleting embeddings +// It also supports retrieving embeddings using different retrieval methods +// The repository is used to store and retrieve embeddings for different tasks +// It is used to store and retrieve embeddings for different tasks +package v7 + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "os" + "strings" + + elasticsearchRetriever "github.com/Tencent/WeKnora/internal/application/repository/retriever/elasticsearch" + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/logger" + typesLocal "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/elastic/go-elasticsearch/v7" + "github.com/elastic/go-elasticsearch/v7/esapi" + "github.com/google/uuid" +) + +type elasticsearchRepository struct { + client *elasticsearch.Client + index string +} + +func NewElasticsearchEngineRepository(client *elasticsearch.Client, + config *config.Config, +) interfaces.RetrieveEngineRepository { + log := logger.GetLogger(context.Background()) + log.Info("[ElasticsearchV7] Initializing Elasticsearch v7 retriever engine repository") + + indexName := os.Getenv("ELASTICSEARCH_INDEX") + if indexName == "" { + log.Warn("[ElasticsearchV7] ELASTICSEARCH_INDEX environment variable not set, using default index name") + indexName = "xwrag_default" + } + + log.Infof("[ElasticsearchV7] Using index: %s", indexName) + res := &elasticsearchRepository{client: client, index: indexName} + return res +} + +func (e *elasticsearchRepository) EngineType() typesLocal.RetrieverEngineType { + return typesLocal.ElasticsearchRetrieverEngineType +} + +func (e *elasticsearchRepository) Support() []typesLocal.RetrieverType { + return []typesLocal.RetrieverType{typesLocal.KeywordsRetrieverType} +} + +// EstimateStorageSize 估算存储空间大小 +func (e *elasticsearchRepository) EstimateStorageSize(ctx context.Context, + indexInfoList []*typesLocal.IndexInfo, params map[string]any, +) int64 { + log := logger.GetLogger(ctx) + log.Infof("[ElasticsearchV7] Estimating storage size for %d indices", len(indexInfoList)) + + // 计算总存储大小 + var totalStorageSize int64 = 0 + for _, indexInfo := range indexInfoList { + embeddingDB := elasticsearchRetriever.ToDBVectorEmbedding(indexInfo, params) + // 计算单个文档的存储大小并累加 + totalStorageSize += e.calculateStorageSize(embeddingDB) + } + + // 记录存储大小 + log.Infof("[ElasticsearchV7] Estimated storage size: %d bytes (%d MB) for %d indices", + totalStorageSize, totalStorageSize/(1024*1024), len(indexInfoList)) + return totalStorageSize +} + +// 计算单个索引的存储占用大小(Bytes) +func (e *elasticsearchRepository) calculateStorageSize(embedding *elasticsearchRetriever.VectorEmbedding) int64 { + // 1. 文本内容大小 + contentSizeBytes := int64(len(embedding.Content)) + + // 2. 向量存储大小 + var vectorSizeBytes int64 = 0 + if embedding.Embedding != nil { + // 4字节/维度 (全精度浮点数) + vectorSizeBytes = int64(len(embedding.Embedding) * 4) + } + + // 3. 元数据大小 (ID、时间戳等固定开销) + metadataSizeBytes := int64(250) // 约250字节的元数据 + + // 4. 索引开销 (ES索引的放大系数约为1.5) + indexOverheadBytes := (contentSizeBytes + vectorSizeBytes) * 5 / 10 + + // 总大小 (字节) + totalSizeBytes := contentSizeBytes + vectorSizeBytes + metadataSizeBytes + indexOverheadBytes + + return totalSizeBytes +} + +// Save 保存索引 +func (e *elasticsearchRepository) Save(ctx context.Context, + embedding *typesLocal.IndexInfo, additionalParams map[string]any, +) error { + log := logger.GetLogger(ctx) + log.Debugf("[ElasticsearchV7] Saving index for chunk ID: %s", embedding.ChunkID) + + embeddingDB := elasticsearchRetriever.ToDBVectorEmbedding(embedding, additionalParams) + if len(embeddingDB.Embedding) == 0 { + err := fmt.Errorf("empty embedding vector for chunk ID: %s", embedding.ChunkID) + log.Errorf("[ElasticsearchV7] %v", err) + return err + } + + docBytes, err := json.Marshal(embeddingDB) + if err != nil { + log.Errorf("[ElasticsearchV7] Failed to marshal embedding: %v", err) + return err + } + + docID := uuid.New().String() + log.Debugf("[ElasticsearchV7] Creating document with ID: %s for chunk ID: %s", docID, embedding.ChunkID) + + resp, err := e.client.Create( + e.index, + docID, + bytes.NewReader(docBytes), + e.client.Create.WithContext(ctx), + ) + if err != nil { + log.Errorf("[ElasticsearchV7] Failed to create document: %v", err) + return err + } + defer resp.Body.Close() + + if resp.IsError() { + log.Errorf("[ElasticsearchV7] Failed to index document: %s", resp.String()) + return fmt.Errorf("failed to index document: %s", resp.String()) + } + + log.Infof( + "[ElasticsearchV7] Successfully saved index for chunk ID: %s with document ID: %s", + embedding.ChunkID, docID, + ) + return nil +} + +// BatchSave performs bulk indexing of multiple embeddings in Elasticsearch +// It constructs a bulk request with index operations for each embedding +// Returns error if any operation fails during the bulk indexing process +func (e *elasticsearchRepository) BatchSave(ctx context.Context, + embeddingList []*typesLocal.IndexInfo, additionalParams map[string]any, +) error { + log := logger.GetLogger(ctx) + if len(embeddingList) == 0 { + log.Warn("[ElasticsearchV7] Empty list provided to BatchSave, skipping") + return nil + } + + log.Infof("[ElasticsearchV7] Batch saving %d indices", len(embeddingList)) + + // Prepare bulk request body + body, processedCount, err := e.prepareBulkRequestBody(ctx, embeddingList, additionalParams) + if err != nil { + return err + } + + if processedCount == 0 { + log.Warn("[ElasticsearchV7] No valid documents to index after filtering, skipping bulk request") + return nil + } + + // Execute bulk request + log.Debugf("[ElasticsearchV7] Executing bulk request with %d documents", processedCount) + resp, err := e.client.Bulk( + body, + e.client.Bulk.WithIndex(e.index), + e.client.Bulk.WithContext(ctx), + ) + if err != nil { + log.Errorf("[ElasticsearchV7] Failed to execute bulk index operation: %v", err) + return err + } + defer resp.Body.Close() + + // Process bulk response + err = e.processBulkResponse(ctx, resp, len(embeddingList)) + if err != nil { + return err + } + + log.Infof("[ElasticsearchV7] Successfully batch saved %d indices", processedCount) + return nil +} + +// prepareBulkRequestBody prepares the bulk request body for batch indexing +func (e *elasticsearchRepository) prepareBulkRequestBody(ctx context.Context, + embeddingList []*typesLocal.IndexInfo, additionalParams map[string]any, +) (*bytes.Buffer, int, error) { + log := logger.GetLogger(ctx) + body := &bytes.Buffer{} + processedCount := 0 + + // Prepare bulk request body + for _, embedding := range embeddingList { + // Convert to Elasticsearch document format + embeddingDB := elasticsearchRetriever.ToDBVectorEmbedding(embedding, additionalParams) + + // Generate document ID and metadata line + docID := uuid.New().String() + meta := []byte(fmt.Sprintf(`{ "index" : { "_id" : "%s" } }%s`, docID, "\n")) + + // Marshal document to JSON + data, err := json.Marshal(embeddingDB) + if err != nil { + log.Errorf("[ElasticsearchV7] Failed to marshal embedding for chunk ID %s: %v", embedding.ChunkID, err) + return nil, 0, err + } + + // Append newline and add to bulk request body + data = append(data, "\n"...) + body.Grow(len(meta) + len(data)) + body.Write(meta) + body.Write(data) + processedCount++ + + log.Debugf("[ElasticsearchV7] Added document for chunk ID: %s to bulk request", embedding.ChunkID) + } + + return body, processedCount, nil +} + +// processBulkResponse processes the response from a bulk indexing operation +func (e *elasticsearchRepository) processBulkResponse(ctx context.Context, + resp *esapi.Response, totalDocuments int, +) error { + log := logger.GetLogger(ctx) + + // Check for bulk operation errors + if resp.IsError() { + log.Errorf("[ElasticsearchV7] Bulk operation failed: %s", resp.String()) + return fmt.Errorf("failed to index documents: %s", resp.String()) + } + + // Parse bulk response to check for individual document errors + var bulkResponse map[string]interface{} + if err := json.NewDecoder(resp.Body).Decode(&bulkResponse); err != nil { + log.Warnf("[ElasticsearchV7] Could not parse bulk response: %v", err) + return nil + } + + // Check for errors in individual operations + if hasErrors, ok := bulkResponse["errors"].(bool); ok && hasErrors { + errorCount := e.countBulkErrors(ctx, bulkResponse, totalDocuments) + if errorCount > 0 { + log.Warnf("[ElasticsearchV7] %d/%d documents failed to index", errorCount, totalDocuments) + } + } + + return nil +} + +// countBulkErrors counts the number of errors in a bulk response +func (e *elasticsearchRepository) countBulkErrors(ctx context.Context, + bulkResponse map[string]interface{}, totalDocuments int, +) int { + log := logger.GetLogger(ctx) + log.Warn("[ElasticsearchV7] Bulk operation completed with some errors") + + errorCount := 0 + if items, ok := bulkResponse["items"].([]interface{}); ok { + for _, item := range items { + if itemMap, ok := item.(map[string]interface{}); ok { + if indexResp, ok := itemMap["index"].(map[string]interface{}); ok { + if indexResp["error"] != nil { + errorCount++ + log.Errorf("[ElasticsearchV7] Item error: %v", indexResp["error"]) + } + } + } + } + } + + return errorCount +} + +// DeleteByChunkIDList Delete indices by chunk ID list +func (e *elasticsearchRepository) DeleteByChunkIDList(ctx context.Context, chunkIDList []string, dimension int) error { + return e.deleteByFieldList(ctx, "chunk_id.keyword", chunkIDList) +} + +// DeleteByKnowledgeIDList Delete indices by knowledge ID list +func (e *elasticsearchRepository) DeleteByKnowledgeIDList(ctx context.Context, + knowledgeIDList []string, dimension int, +) error { + return e.deleteByFieldList(ctx, "knowledge_id.keyword", knowledgeIDList) +} + +// deleteByFieldList Delete documents by field value list +func (e *elasticsearchRepository) deleteByFieldList(ctx context.Context, field string, valueList []string) error { + log := logger.GetLogger(ctx) + if len(valueList) == 0 { + log.Warnf("[ElasticsearchV7] Empty %s list provided for deletion, skipping", field) + return nil + } + + log.Infof("[ElasticsearchV7] Deleting indices by %s, count: %d", field, len(valueList)) + ids, err := json.Marshal(valueList) + if err != nil { + log.Errorf("[ElasticsearchV7] Failed to marshal %s list: %v", field, err) + return err + } + + query := fmt.Sprintf(`{"query": {"terms": {"%s": %s}}}`, field, ids) + log.Debugf("[ElasticsearchV7] Executing delete by query: %s", query) + + resp, err := e.client.DeleteByQuery( + []string{e.index}, + bytes.NewReader([]byte(query)), + e.client.DeleteByQuery.WithContext(ctx), + ) + if err != nil { + log.Errorf("[ElasticsearchV7] Failed to execute delete by query: %v", err) + return err + } + defer resp.Body.Close() + + if resp.IsError() { + errMsg := fmt.Sprintf("failed to delete by query: %s", resp.String()) + log.Errorf("[ElasticsearchV7] %s", errMsg) + return fmt.Errorf(errMsg) + } + + // Try to extract deletion count from response + var deleteResponse map[string]interface{} + if err := json.NewDecoder(resp.Body).Decode(&deleteResponse); err != nil { + log.Warnf("[ElasticsearchV7] Could not parse delete response: %v", err) + } else { + if deleted, ok := deleteResponse["deleted"].(float64); ok { + log.Infof("[ElasticsearchV7] Successfully deleted %d documents by %s", int(deleted), field) + } else { + log.Infof("[ElasticsearchV7] Successfully deleted documents by %s", field) + } + } + + return nil +} + +// getBaseConds Construct base Elasticsearch query conditions based on retrieval parameters +// It creates MUST conditions for required fields and MUST_NOT conditions for excluded fields +// Returns a JSON string representing the query conditions +func (e *elasticsearchRepository) getBaseConds(params typesLocal.RetrieveParams) string { + // Build MUST conditions (positive filters) + must := make([]string, 0) + if len(params.KnowledgeBaseIDs) > 0 { + ids, _ := json.Marshal(params.KnowledgeBaseIDs) + must = append(must, fmt.Sprintf(`{"terms": {"knowledge_base_id.keyword": %s}}`, ids)) + } + + // Build MUST_NOT conditions (negative filters) + mustNot := make([]string, 0) + if len(params.ExcludeKnowledgeIDs) > 0 { + ids, _ := json.Marshal(params.ExcludeKnowledgeIDs) + mustNot = append(mustNot, fmt.Sprintf(`{"terms": {"knowledge_id.keyword": %s}}`, ids)) + } + if len(params.ExcludeChunkIDs) > 0 { + ids, _ := json.Marshal(params.ExcludeChunkIDs) + mustNot = append(mustNot, fmt.Sprintf(`{"terms": {"chunk_id.keyword": %s}}`, ids)) + } + + // Combine conditions based on presence + switch { + case len(must) == 0 && len(mustNot) == 0: + return "{}" // Empty query if no conditions + case len(must) == 0: + return fmt.Sprintf(`{"bool": {"must_not": [%s]}}`, strings.Join(mustNot, ",")) + case len(mustNot) == 0: + return fmt.Sprintf(`{"bool": {"must": [%s]}}`, strings.Join(must, ",")) + default: + return fmt.Sprintf(`{"bool": {"must": [%s], "must_not": [%s]}}`, + strings.Join(must, ","), strings.Join(mustNot, ",")) + } +} + +func (e *elasticsearchRepository) Retrieve(ctx context.Context, + params typesLocal.RetrieveParams, +) ([]*typesLocal.RetrieveResult, error) { + log := logger.GetLogger(ctx) + log.Debugf("[ElasticsearchV7] Processing retrieval request of type: %s", params.RetrieverType) + + switch params.RetrieverType { + case typesLocal.KeywordsRetrieverType: + return e.KeywordsRetrieve(ctx, params) + } + + err := fmt.Errorf("invalid retriever type: %v", params.RetrieverType) + log.Errorf("[ElasticsearchV7] %v", err) + return nil, err +} + +// VectorRetrieve Implement vector similarity retrieval +func (e *elasticsearchRepository) VectorRetrieve(ctx context.Context, + params typesLocal.RetrieveParams, +) ([]*typesLocal.RetrieveResult, error) { + log := logger.GetLogger(ctx) + log.Infof("[ElasticsearchV7] Vector retrieval: dim=%d, topK=%d, threshold=%.4f", + len(params.Embedding), params.TopK, params.Threshold) + + // Build search query + query, err := e.buildVectorSearchQuery(ctx, params) + if err != nil { + return nil, err + } + + // Execute search + results, err := e.executeVectorSearch(ctx, query) + if err != nil { + return nil, err + } + + return []*typesLocal.RetrieveResult{ + { + Results: results, + RetrieverEngineType: typesLocal.ElasticsearchRetrieverEngineType, + RetrieverType: typesLocal.VectorRetrieverType, + Error: nil, + }, + }, nil +} + +// buildVectorSearchQuery builds the vector search query JSON +func (e *elasticsearchRepository) buildVectorSearchQuery(ctx context.Context, + params typesLocal.RetrieveParams, +) (string, error) { + log := logger.GetLogger(ctx) + + filter := e.getBaseConds(params) + + // Serialize the query vector + queryVectorJSON, err := json.Marshal(params.Embedding) + if err != nil { + log.Errorf("[ElasticsearchV7] Failed to marshal query vector: %v", err) + return "", fmt.Errorf("failed to marshal query embedding: %w", err) + } + + // Construct the script_score query + query := fmt.Sprintf( + `{"query":{"script_score":{"query":{"bool":{"filter":[%s]}}, + "script":{"source":"cosineSimilarity(params.query_vector,'embedding')", + "params":{"query_vector":%s}},"min_score":%f}},"size":%d}`, + filter, + string(queryVectorJSON), + params.Threshold, + params.TopK, + ) + + log.Debugf("[ElasticsearchV7] Executing vector search with query: %s", query) + return query, nil +} + +// executeVectorSearch executes the vector search query +func (e *elasticsearchRepository) executeVectorSearch( + ctx context.Context, + query string, +) ([]*typesLocal.IndexWithScore, error) { + log := logger.GetLogger(ctx) + + response, err := e.client.Search( + e.client.Search.WithIndex(e.index), + e.client.Search.WithBody(strings.NewReader(query)), + e.client.Search.WithContext(ctx), + ) + if err != nil { + log.Errorf("[ElasticsearchV7] Vector search failed: %v", err) + return nil, err + } + defer response.Body.Close() + + results, err := e.processSearchResponse(ctx, response, typesLocal.VectorRetrieverType) + if err != nil { + return nil, err + } + + return results, nil +} + +// KeywordsRetrieve Implement keyword retrieval +func (e *elasticsearchRepository) KeywordsRetrieve(ctx context.Context, + params typesLocal.RetrieveParams, +) ([]*typesLocal.RetrieveResult, error) { + log := logger.GetLogger(ctx) + log.Infof("[ElasticsearchV7] Keywords retrieval: query=%s, topK=%d", params.Query, params.TopK) + + // Build search query + query, err := e.buildKeywordSearchQuery(ctx, params) + if err != nil { + return nil, err + } + + // Execute search + results, err := e.executeKeywordSearch(ctx, query) + if err != nil { + return nil, err + } + + return []*typesLocal.RetrieveResult{ + { + Results: results, + RetrieverEngineType: typesLocal.ElasticsearchRetrieverEngineType, + RetrieverType: typesLocal.KeywordsRetrieverType, + Error: nil, + }, + }, nil +} + +// buildKeywordSearchQuery builds the keyword search query JSON +func (e *elasticsearchRepository) buildKeywordSearchQuery(ctx context.Context, + params typesLocal.RetrieveParams, +) (string, error) { + log := logger.GetLogger(ctx) + + filter := e.getBaseConds(params) + query := fmt.Sprintf( + `{"query": {"bool": {"must": [{"match": {"content": "%s"}}], "filter": [%s]}}}`, + params.Query, filter, + ) + + log.Debugf("[ElasticsearchV7] Executing keyword search with query: %s", query) + return query, nil +} + +// executeKeywordSearch executes the keyword search query +func (e *elasticsearchRepository) executeKeywordSearch( + ctx context.Context, query string, +) ([]*typesLocal.IndexWithScore, error) { + log := logger.GetLogger(ctx) + + response, err := e.client.Search( + e.client.Search.WithIndex(e.index), + e.client.Search.WithBody( + strings.NewReader(query), + ), + e.client.Search.WithContext(ctx), + ) + if err != nil { + log.Errorf("[ElasticsearchV7] Keywords search failed: %v", err) + return nil, err + } + defer response.Body.Close() + + results, err := e.processSearchResponse(ctx, response, typesLocal.KeywordsRetrieverType) + if err != nil { + return nil, err + } + + return results, nil +} + +// processSearchResponse Process search response +func (e *elasticsearchRepository) processSearchResponse(ctx context.Context, + response *esapi.Response, retrieverType typesLocal.RetrieverType, +) ([]*typesLocal.IndexWithScore, error) { + log := logger.GetLogger(ctx) + + if response.IsError() { + errMsg := fmt.Sprintf("failed to retrieve: %s", response.String()) + log.Errorf("[ElasticsearchV7] %s", errMsg) + return nil, errors.New(errMsg) + } + + // Decode response body + rJson, err := e.decodeSearchResponse(ctx, response) + if err != nil { + return nil, err + } + + // Extract hits from response + hitsList, err := e.extractHitsFromResponse(ctx, rJson) + if err != nil { + return nil, err + } + + // Process hits into results + results, err := e.processHits(ctx, hitsList, retrieverType) + if err != nil { + return nil, err + } + + // Log results summary + e.logResultsSummary(ctx, results, retrieverType) + + return results, nil +} + +// decodeSearchResponse decodes the search response body +func (e *elasticsearchRepository) decodeSearchResponse(ctx context.Context, + response *esapi.Response, +) (map[string]any, error) { + log := logger.GetLogger(ctx) + var rJson map[string]any + + if err := json.NewDecoder(response.Body).Decode(&rJson); err != nil { + log.Errorf("[ElasticsearchV7] Failed to decode search response: %v", err) + return nil, err + } + + return rJson, nil +} + +// extractHitsFromResponse extracts the hits list from the response JSON +func (e *elasticsearchRepository) extractHitsFromResponse(ctx context.Context, + rJson map[string]any, +) ([]interface{}, error) { + log := logger.GetLogger(ctx) + + // Extract hits from response + hitsObj, ok := rJson["hits"].(map[string]interface{}) + if !ok { + log.Errorf("[ElasticsearchV7] Invalid search response format: 'hits' object missing") + return nil, fmt.Errorf("invalid search response format") + } + + hitsList, ok := hitsObj["hits"].([]interface{}) + if !ok { + log.Warnf("[ElasticsearchV7] No hits found in search response") + return []interface{}{}, nil + } + + return hitsList, nil +} + +// processHits processes the hits into IndexWithScore results +func (e *elasticsearchRepository) processHits(ctx context.Context, + hitsList []interface{}, retrieverType typesLocal.RetrieverType, +) ([]*typesLocal.IndexWithScore, error) { + log := logger.GetLogger(ctx) + results := make([]*typesLocal.IndexWithScore, 0, len(hitsList)) + + for _, hit := range hitsList { + indexWithScore, err := e.processHit(ctx, hit, retrieverType) + if err != nil { + // Log error but continue processing other hits + log.Warnf("[ElasticsearchV7] Error processing hit: %v", err) + continue + } + + if indexWithScore != nil { + results = append(results, indexWithScore) + } + } + + return results, nil +} + +// processHit processes a single hit into an IndexWithScore +func (e *elasticsearchRepository) processHit(ctx context.Context, + hit interface{}, retrieverType typesLocal.RetrieverType, +) (*typesLocal.IndexWithScore, error) { + log := logger.GetLogger(ctx) + + hitMap, ok := hit.(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("invalid hit object format") + } + + // Get document ID + docID, ok := hitMap["_id"].(string) + if !ok { + return nil, fmt.Errorf("hit missing document ID") + } + + // Get document source + sourceObj, ok := hitMap["_source"] + if !ok { + return nil, fmt.Errorf("hit %s missing _source", docID) + } + + // Get score + score, ok := hitMap["_score"].(float64) + if !ok { + return nil, fmt.Errorf("hit %s missing score", docID) + } + + // Convert source to embedding + embedding, err := e.convertSourceToEmbedding(ctx, sourceObj, docID, score) + if err != nil { + return nil, err + } + + result := elasticsearchRetriever.FromDBVectorEmbeddingWithScore( + docID, embedding, typesLocal.MatchTypeKeywords, + ) + + matchType := "keyword" + if retrieverType == typesLocal.VectorRetrieverType { + matchType = "vector" + } + log.Debugf("[ElasticsearchV7] %s search result: id=%s, score=%.4f", matchType, docID, score) + + return result, nil +} + +// convertSourceToEmbedding converts the source object to an embedding +func (e *elasticsearchRepository) convertSourceToEmbedding(ctx context.Context, + sourceObj interface{}, docID string, score float64, +) (*elasticsearchRetriever.VectorEmbeddingWithScore, error) { + log := logger.GetLogger(ctx) + + // Convert source to embedding + var embedding *elasticsearchRetriever.VectorEmbeddingWithScore + sourceBytes, err := json.Marshal(sourceObj) + if err != nil { + log.Warnf("[ElasticsearchV7] Failed to marshal source for hit %s: %v", docID, err) + return nil, fmt.Errorf("failed to marshal source for hit %s: %v", docID, err) + } + + if err := json.Unmarshal(sourceBytes, &embedding); err != nil { + log.Warnf("[ElasticsearchV7] Failed to unmarshal source for hit %s: %v", docID, err) + return nil, fmt.Errorf("failed to unmarshal source for hit %s: %v", docID, err) + } + + embedding.Score = score + return embedding, nil +} + +// logResultsSummary logs a summary of the results +func (e *elasticsearchRepository) logResultsSummary(ctx context.Context, + results []*typesLocal.IndexWithScore, retrieverType typesLocal.RetrieverType, +) { + log := logger.GetLogger(ctx) + + if len(results) == 0 { + if retrieverType == typesLocal.KeywordsRetrieverType { + log.Warnf("[ElasticsearchV7] No keyword matches found") + } else { + log.Warnf("[ElasticsearchV7] No vector matches found that meet threshold") + } + } else { + retrievalType := "Keywords" + if retrieverType == typesLocal.VectorRetrieverType { + retrievalType = "Vector" + } + log.Infof("[ElasticsearchV7] %s retrieval found %d results", retrievalType, len(results)) + log.Debugf("[ElasticsearchV7] Top result score: %.4f", results[0].Score) + } +} + +// CopyIndices Copy index data +func (e *elasticsearchRepository) CopyIndices(ctx context.Context, + sourceKnowledgeBaseID string, + sourceToTargetKBIDMap map[string]string, + sourceToTargetChunkIDMap map[string]string, + targetKnowledgeBaseID string, + dimension int, +) error { + log := logger.GetLogger(ctx) + log.Infof( + "[ElasticsearchV7] Copying indices from source knowledge base %s to target knowledge base %s, count: %d", + sourceKnowledgeBaseID, targetKnowledgeBaseID, len(sourceToTargetChunkIDMap), + ) + + if len(sourceToTargetChunkIDMap) == 0 { + log.Warnf("[ElasticsearchV7] Empty mapping, skipping copy") + return nil + } + + // Build query parameters + retrieveParams := typesLocal.RetrieveParams{ + KnowledgeBaseIDs: []string{sourceKnowledgeBaseID}, + } + + // Set batch processing parameters + batchSize := 500 + from := 0 + totalCopied := 0 + + for { + // Query source data batch + hitsList, err := e.querySourceBatch(ctx, retrieveParams, from, batchSize) + if err != nil { + return err + } + + // If no more data, break the loop + if len(hitsList) == 0 { + break + } + + log.Infof("[ElasticsearchV7] Found %d source index data, batch start position: %d", len(hitsList), from) + + // Process the batch and create index information + indexInfoList, err := e.processSourceBatch(ctx, hitsList, sourceToTargetKBIDMap, + sourceToTargetChunkIDMap, targetKnowledgeBaseID) + if err != nil { + return err + } + + // Save processed indices + if len(indexInfoList) > 0 { + err := e.saveCopiedIndices(ctx, indexInfoList) + if err != nil { + return err + } + + totalCopied += len(indexInfoList) + log.Infof("[ElasticsearchV7] Successfully copied batch data, batch size: %d, total copied: %d", + len(indexInfoList), totalCopied) + } + + // Move to next batch + from += len(hitsList) + + // If the number of returned records is less than the request size, it means the last page has been reached + if len(hitsList) < batchSize { + break + } + } + + log.Infof("[ElasticsearchV7] Index copy completed, total copied: %d", totalCopied) + return nil +} + +// querySourceBatch queries a batch of source data +func (e *elasticsearchRepository) querySourceBatch(ctx context.Context, + retrieveParams typesLocal.RetrieveParams, from int, batchSize int, +) ([]interface{}, error) { + log := logger.GetLogger(ctx) + + // Build query request + filter := e.getBaseConds(retrieveParams) + query := fmt.Sprintf(`{ + "query": %s, + "from": %d, + "size": %d + }`, filter, from, batchSize) + + // Execute query + response, err := e.client.Search( + e.client.Search.WithIndex(e.index), + e.client.Search.WithBody(strings.NewReader(query)), + e.client.Search.WithContext(ctx), + ) + if err != nil { + log.Errorf("[ElasticsearchV7] Failed to query source index data: %v", err) + return nil, err + } + defer response.Body.Close() + + if response.IsError() { + log.Errorf("[ElasticsearchV7] Failed to query source index data: %s", response.String()) + return nil, fmt.Errorf("failed to query source index data: %s", response.String()) + } + + // 解析搜索结果 + var searchResult map[string]interface{} + if err := json.NewDecoder(response.Body).Decode(&searchResult); err != nil { + log.Errorf("[ElasticsearchV7] Failed to parse query result: %v", err) + return nil, err + } + + // 提取结果列表 + hitsObj, ok := searchResult["hits"].(map[string]interface{}) + if !ok { + log.Errorf("[ElasticsearchV7] Invalid search result format: 'hits' object missing") + return nil, fmt.Errorf("invalid search result format") + } + + hitsList, ok := hitsObj["hits"].([]interface{}) + if !ok || len(hitsList) == 0 { + if from == 0 { + log.Warnf("[ElasticsearchV7] No source index data found") + } + return []interface{}{}, nil + } + + return hitsList, nil +} + +// processSourceBatch processes a batch of source data and creates index information +func (e *elasticsearchRepository) processSourceBatch(ctx context.Context, + hitsList []interface{}, + sourceToTargetKBIDMap map[string]string, + sourceToTargetChunkIDMap map[string]string, + targetKnowledgeBaseID string, +) ([]*typesLocal.IndexInfo, error) { + log := logger.GetLogger(ctx) + + // Prepare index information for batch save + indexInfoList := make([]*typesLocal.IndexInfo, 0, len(hitsList)) + embeddingMap := make(map[string][]float32) + + // Process each hit result + for _, hit := range hitsList { + indexInfo, embeddingVector, err := e.processSingleHit(ctx, hit, + sourceToTargetKBIDMap, sourceToTargetChunkIDMap, targetKnowledgeBaseID) + if err != nil { + log.Warnf("[ElasticsearchV7] Error processing hit: %v", err) + continue + } + + if indexInfo != nil { + indexInfoList = append(indexInfoList, indexInfo) + if embeddingVector != nil { + embeddingMap[indexInfo.ChunkID] = embeddingVector + } + } + } + + return indexInfoList, nil +} + +// processSingleHit processes a single hit and creates index information +func (e *elasticsearchRepository) processSingleHit(ctx context.Context, + hit interface{}, + sourceToTargetKBIDMap map[string]string, + sourceToTargetChunkIDMap map[string]string, + targetKnowledgeBaseID string, +) (*typesLocal.IndexInfo, []float32, error) { + log := logger.GetLogger(ctx) + + hitMap, ok := hit.(map[string]interface{}) + if !ok { + log.Warnf("[ElasticsearchV7] Invalid hit object format") + return nil, nil, fmt.Errorf("invalid hit object format") + } + + // Get document source + sourceObj, ok := hitMap["_source"].(map[string]interface{}) + if !ok { + log.Warnf("[ElasticsearchV7] Hit missing _source field") + return nil, nil, fmt.Errorf("hit missing _source field") + } + + // Get source ChunkID and corresponding target ChunkID + sourceChunkID, ok := sourceObj["chunk_id"].(string) + if !ok { + log.Warnf("[ElasticsearchV7] Source index data missing chunk_id field") + return nil, nil, fmt.Errorf("source index data missing chunk_id field") + } + + targetChunkID, ok := sourceToTargetChunkIDMap[sourceChunkID] + if !ok { + log.Warnf("[ElasticsearchV7] Source chunk ID %s not found in mapping", sourceChunkID) + return nil, nil, fmt.Errorf("source chunk ID %s not found in mapping", sourceChunkID) + } + + // Get mapped target knowledge ID + sourceKnowledgeID, ok := sourceObj["knowledge_id"].(string) + if !ok { + log.Warnf("[ElasticsearchV7] Source index data missing knowledge_id field") + return nil, nil, fmt.Errorf("source index data missing knowledge_id field") + } + + targetKnowledgeID, ok := sourceToTargetKBIDMap[sourceKnowledgeID] + if !ok { + log.Warnf("[ElasticsearchV7] Source knowledge ID %s not found in mapping", sourceKnowledgeID) + return nil, nil, fmt.Errorf("source knowledge ID %s not found in mapping", sourceKnowledgeID) + } + + // Extract basic content + content, _ := sourceObj["content"].(string) + sourceType := 0 + if st, ok := sourceObj["source_type"].(float64); ok { + sourceType = int(st) + } + + // Extract embedding vector (if exists) + var embedding []float32 + if embeddingInterface, ok := sourceObj["embedding"].([]interface{}); ok { + embedding = make([]float32, len(embeddingInterface)) + for i, v := range embeddingInterface { + if f, ok := v.(float64); ok { + embedding[i] = float32(f) + } + } + log.Debugf("[ElasticsearchV7] Extracted embedding vector with %d dimensions for chunk %s", + len(embedding), targetChunkID) + } + + // Create IndexInfo object + indexInfo := &typesLocal.IndexInfo{ + ChunkID: targetChunkID, + SourceID: targetChunkID, + KnowledgeID: targetKnowledgeID, + KnowledgeBaseID: targetKnowledgeBaseID, + Content: content, + SourceType: typesLocal.SourceType(sourceType), + } + + return indexInfo, embedding, nil +} + +// saveCopiedIndices saves the copied indices +func (e *elasticsearchRepository) saveCopiedIndices(ctx context.Context, indexInfoList []*typesLocal.IndexInfo) error { + log := logger.GetLogger(ctx) + + if len(indexInfoList) == 0 { + log.Info("[ElasticsearchV7] No indices to save, skipping") + return nil + } + + // Prepare additional params with embedding map + additionalParams := make(map[string]any) + embeddingMap := make(map[string][]float32) + + // No need to extract embeddings from metadata as they're not stored there + // We'll use the embeddings directly from the embedding map created in processSourceBatch + + if len(embeddingMap) > 0 { + additionalParams["embedding"] = embeddingMap + log.Infof("[ElasticsearchV7] Found %d embeddings to save", len(embeddingMap)) + } + + // Perform batch save + err := e.BatchSave(ctx, indexInfoList, additionalParams) + if err != nil { + log.Errorf("[ElasticsearchV7] Failed to batch save copied indices: %v", err) + return err + } + + log.Infof("[ElasticsearchV7] Successfully saved %d indices", len(indexInfoList)) + return nil +} diff --git a/internal/application/repository/retriever/elasticsearch/v8/repository.go b/internal/application/repository/retriever/elasticsearch/v8/repository.go new file mode 100644 index 0000000..17c9883 --- /dev/null +++ b/internal/application/repository/retriever/elasticsearch/v8/repository.go @@ -0,0 +1,541 @@ +package v8 + +import ( + "context" + "encoding/json" + "fmt" + "os" + + elasticsearchRetriever "github.com/Tencent/WeKnora/internal/application/repository/retriever/elasticsearch" + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/logger" + typesLocal "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/elastic/go-elasticsearch/v8" + "github.com/elastic/go-elasticsearch/v8/typedapi/core/search" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" +) + +// elasticsearchRepository implements the RetrieveEngineRepository interface for Elasticsearch v8 +type elasticsearchRepository struct { + client *elasticsearch.TypedClient // Elasticsearch client instance + index string // Name of the Elasticsearch index to use +} + +// NewElasticsearchEngineRepository creates and initializes a new Elasticsearch v8 repository +// It sets up the index and returns a repository instance ready for use +func NewElasticsearchEngineRepository(client *elasticsearch.TypedClient, + config *config.Config, +) interfaces.RetrieveEngineRepository { + log := logger.GetLogger(context.Background()) + log.Info("[Elasticsearch] Initializing Elasticsearch v8 retriever engine repository") + + // Get index name from environment variable or use default + indexName := os.Getenv("ELASTICSEARCH_INDEX") + if indexName == "" { + log.Warn("[Elasticsearch] ELASTICSEARCH_INDEX environment variable not set, using default index name") + indexName = "xwrag_default" + } + + // Create repository instance and ensure index exists + res := &elasticsearchRepository{client: client, index: indexName} + if err := res.createIndexIfNotExists(context.Background()); err != nil { + log.Errorf("[Elasticsearch] Failed to create index: %v", err) + } else { + log.Info("[Elasticsearch] Successfully initialized repository") + } + return res +} + +// EngineType returns the type of retriever engine (Elasticsearch) +func (e *elasticsearchRepository) EngineType() typesLocal.RetrieverEngineType { + return typesLocal.ElasticsearchRetrieverEngineType +} + +// Support returns the retrieval types supported by this repository (Keywords and Vector) +func (e *elasticsearchRepository) Support() []typesLocal.RetrieverType { + return []typesLocal.RetrieverType{typesLocal.KeywordsRetrieverType, typesLocal.VectorRetrieverType} +} + +// calculateStorageSize estimates the storage size in bytes for a single index document +func (e *elasticsearchRepository) calculateStorageSize(embedding *elasticsearchRetriever.VectorEmbedding) int64 { + // 1. Content text size + contentSizeBytes := int64(len(embedding.Content)) + + // 2. Vector embedding size + var vectorSizeBytes int64 = 0 + if embedding.Embedding != nil { + // 4 bytes per dimension (full precision float) + vectorSizeBytes = int64(len(embedding.Embedding) * 4) + } + + // 3. Metadata size (IDs, timestamps, and other fixed overhead) + metadataSizeBytes := int64(250) // Approximately 250 bytes of metadata + + // 4. Index overhead (Elasticsearch index expansion factor ~1.5) + indexOverheadBytes := (contentSizeBytes + vectorSizeBytes) * 5 / 10 + + // Total size in bytes + totalSizeBytes := contentSizeBytes + vectorSizeBytes + metadataSizeBytes + indexOverheadBytes + return totalSizeBytes +} + +// EstimateStorageSize calculates the estimated storage size for a list of indices +// Returns the total size in bytes +func (e *elasticsearchRepository) EstimateStorageSize(ctx context.Context, + indexInfoList []*typesLocal.IndexInfo, params map[string]any, +) int64 { + var totalStorageSize int64 = 0 + for _, embedding := range indexInfoList { + embeddingDB := elasticsearchRetriever.ToDBVectorEmbedding(embedding, params) + totalStorageSize += e.calculateStorageSize(embeddingDB) + } + logger.GetLogger(ctx).Infof( + "[Elasticsearch] Storage size for %d indices: %d bytes", len(indexInfoList), totalStorageSize, + ) + return totalStorageSize +} + +// Save stores a single index document in Elasticsearch +// Returns an error if the operation fails +func (e *elasticsearchRepository) Save(ctx context.Context, + embedding *typesLocal.IndexInfo, + additionalParams map[string]any, +) error { + log := logger.GetLogger(ctx) + log.Debugf("[Elasticsearch] Saving index for chunk ID: %s", embedding.ChunkID) + + // Convert to database format + embeddingDB := elasticsearchRetriever.ToDBVectorEmbedding(embedding, additionalParams) + if len(embeddingDB.Embedding) == 0 { + err := fmt.Errorf("empty embedding vector for chunk ID: %s", embedding.ChunkID) + log.Errorf("[Elasticsearch] %v", err) + return err + } + + // Index the document + resp, err := e.client.Index(e.index).Request(embeddingDB).Do(ctx) + if err != nil { + log.Errorf("[Elasticsearch] Failed to save index: %v", err) + return err + } + + log.Infof("[Elasticsearch] Successfully saved index for chunk ID: %s, document ID: %s", embedding.ChunkID, resp.Id_) + return nil +} + +// BatchSave stores multiple index documents in Elasticsearch using bulk API +// Returns an error if the operation fails +func (e *elasticsearchRepository) BatchSave(ctx context.Context, + embeddingList []*typesLocal.IndexInfo, additionalParams map[string]any, +) error { + log := logger.GetLogger(ctx) + if len(embeddingList) == 0 { + log.Warn("[Elasticsearch] Empty list provided to BatchSave, skipping") + return nil + } + + log.Infof("[Elasticsearch] Batch saving %d indices", len(embeddingList)) + indexRequest := e.client.Bulk().Index(e.index) + + // Add each document to the bulk request + for _, embedding := range embeddingList { + embeddingDB := elasticsearchRetriever.ToDBVectorEmbedding(embedding, additionalParams) + err := indexRequest.CreateOp(types.CreateOperation{Index_: &e.index}, embeddingDB) + if err != nil { + log.Errorf("[Elasticsearch] Failed to create bulk operation: %v", err) + return fmt.Errorf("failed to create op: %w", err) + } + log.Debugf("[Elasticsearch] Added chunk ID %s to bulk request", embedding.ChunkID) + } + + // Execute the bulk request + _, err := indexRequest.Do(ctx) + if err != nil { + log.Errorf("[Elasticsearch] Failed to execute bulk operation: %v", err) + return fmt.Errorf("failed to do bulk: %w", err) + } + + log.Infof("[Elasticsearch] Successfully batch saved %d indices", len(embeddingList)) + return nil +} + +// DeleteByChunkIDList removes documents from the index based on chunk IDs +// Returns an error if the delete operation fails +func (e *elasticsearchRepository) DeleteByChunkIDList(ctx context.Context, chunkIDList []string, dimension int) error { + log := logger.GetLogger(ctx) + if len(chunkIDList) == 0 { + log.Warn("[Elasticsearch] Empty chunk ID list provided for deletion, skipping") + return nil + } + + log.Infof("[Elasticsearch] Deleting indices by chunk IDs, count: %d", len(chunkIDList)) + // Use DeleteByQuery to delete all documents matching the chunk IDs + _, err := e.client.DeleteByQuery(e.index).Query(&types.Query{ + Terms: &types.TermsQuery{TermsQuery: map[string]types.TermsQueryField{"chunk_id.keyword": chunkIDList}}, + }).Do(ctx) + if err != nil { + log.Errorf("[Elasticsearch] Failed to delete by chunk IDs: %v", err) + return fmt.Errorf("failed to delete by query: %w", err) + } + + log.Infof("[Elasticsearch] Successfully deleted documents by chunk IDs") + return nil +} + +// DeleteByKnowledgeIDList removes documents from the index based on knowledge IDs +// Returns an error if the delete operation fails +func (e *elasticsearchRepository) DeleteByKnowledgeIDList(ctx context.Context, + knowledgeIDList []string, dimension int, +) error { + log := logger.GetLogger(ctx) + if len(knowledgeIDList) == 0 { + log.Warn("[Elasticsearch] Empty knowledge ID list provided for deletion, skipping") + return nil + } + + log.Infof("[Elasticsearch] Deleting indices by knowledge IDs, count: %d", len(knowledgeIDList)) + // Use DeleteByQuery to delete all documents matching the knowledge IDs + _, err := e.client.DeleteByQuery(e.index).Query(&types.Query{ + Terms: &types.TermsQuery{TermsQuery: map[string]types.TermsQueryField{"knowledge_id.keyword": knowledgeIDList}}, + }).Do(ctx) + if err != nil { + log.Errorf("[Elasticsearch] Failed to delete by knowledge IDs: %v", err) + return fmt.Errorf("failed to delete by query: %w", err) + } + + log.Infof("[Elasticsearch] Successfully deleted documents by knowledge IDs") + return nil +} + +// getBaseConds creates the base query conditions for retrieval operations +// Returns a slice of Query objects with must and must_not conditions +func (e *elasticsearchRepository) getBaseConds(params typesLocal.RetrieveParams) []types.Query { + must := []types.Query{} + if len(params.KnowledgeBaseIDs) > 0 { + must = append(must, types.Query{Terms: &types.TermsQuery{ + TermsQuery: map[string]types.TermsQueryField{ + "knowledge_base_id.keyword": params.KnowledgeBaseIDs, + }, + }}) + } + mustNot := make([]types.Query, 0) + if len(params.ExcludeKnowledgeIDs) > 0 { + mustNot = append(mustNot, types.Query{Terms: &types.TermsQuery{ + TermsQuery: map[string]types.TermsQueryField{"knowledge_id.keyword": params.ExcludeKnowledgeIDs}, + }}) + } + if len(params.ExcludeChunkIDs) > 0 { + mustNot = append(mustNot, types.Query{Terms: &types.TermsQuery{ + TermsQuery: map[string]types.TermsQueryField{"chunk_id.keyword": params.ExcludeChunkIDs}, + }}) + } + return []types.Query{{Bool: &types.BoolQuery{Must: must, MustNot: mustNot}}} +} + +// createIndexIfNotExists checks if the specified index exists and creates it if not +// Returns an error if the operation fails +func (e *elasticsearchRepository) createIndexIfNotExists(ctx context.Context) error { + log := logger.GetLogger(ctx) + log.Debugf("[Elasticsearch] Checking if index exists: %s", e.index) + + // Check if index exists + exists, err := e.client.Indices.Exists(e.index).Do(ctx) + if err != nil { + log.Errorf("[Elasticsearch] Failed to check if index exists: %v", err) + return err + } + + if exists { + log.Debugf("[Elasticsearch] Index already exists: %s", e.index) + return nil + } + + // Create index if it doesn't exist + log.Infof("[Elasticsearch] Creating index: %s", e.index) + _, err = e.client.Indices.Create(e.index).Do(ctx) + if err != nil { + log.Errorf("[Elasticsearch] Failed to create index: %v", err) + return err + } + + log.Infof("[Elasticsearch] Index created successfully: %s", e.index) + return nil +} + +// Retrieve dispatches the retrieval operation to the appropriate method based on retriever type +// Returns a slice of RetrieveResult and an error if the operation fails +func (e *elasticsearchRepository) Retrieve(ctx context.Context, + params typesLocal.RetrieveParams, +) ([]*typesLocal.RetrieveResult, error) { + log := logger.GetLogger(ctx) + log.Debugf("[Elasticsearch] Processing retrieval request of type: %s", params.RetrieverType) + + // Route to appropriate retrieval method + switch params.RetrieverType { + case typesLocal.VectorRetrieverType: + return e.VectorRetrieve(ctx, params) + case typesLocal.KeywordsRetrieverType: + return e.KeywordsRetrieve(ctx, params) + } + + err := fmt.Errorf("invalid retriever type: %v", params.RetrieverType) + log.Errorf("[Elasticsearch] %v", err) + return nil, err +} + +// VectorRetrieve performs vector similarity search using cosine similarity +// Returns a slice of RetrieveResult containing matching documents +func (e *elasticsearchRepository) VectorRetrieve(ctx context.Context, + params typesLocal.RetrieveParams, +) ([]*typesLocal.RetrieveResult, error) { + log := logger.GetLogger(ctx) + log.Infof("[Elasticsearch] Vector retrieval: dim=%d, topK=%d, threshold=%.4f", + len(params.Embedding), params.TopK, params.Threshold) + + filter := e.getBaseConds(params) + + // Build script scoring query with cosine similarity + queryVectorJSON, err := json.Marshal(params.Embedding) + if err != nil { + log.Errorf("[Elasticsearch] Failed to marshal query vector: %v", err) + return nil, fmt.Errorf("failed to marshal query embedding: %w", err) + } + + scoreSource := "cosineSimilarity(params.query_vector, 'embedding')" + minScore := float32(params.Threshold) + scriptScore := &types.ScriptScoreQuery{ + Query: types.Query{Bool: &types.BoolQuery{Filter: filter}}, + Script: types.Script{ + Source: &scoreSource, + Params: map[string]json.RawMessage{ + "query_vector": json.RawMessage(queryVectorJSON), + }, + }, + MinScore: &minScore, + } + + log.Debugf("[Elasticsearch] Executing vector search in index: %s", e.index) + // Execute search with minimum score threshold + response, err := e.client.Search().Index(e.index).Request(&search.Request{ + Query: &types.Query{ScriptScore: scriptScore}, + Size: ¶ms.TopK, + }).Do(ctx) + if err != nil { + log.Errorf("[Elasticsearch] Vector search failed: %v", err) + return nil, err + } + + // Process search results + var results []*typesLocal.IndexWithScore + for _, hit := range response.Hits.Hits { + var embedding *elasticsearchRetriever.VectorEmbeddingWithScore + if err := json.Unmarshal(hit.Source_, &embedding); err != nil { + log.Errorf("[Elasticsearch] Failed to unmarshal search result: %v", err) + return nil, err + } + embedding.Score = float64(*hit.Score_) + results = append(results, + elasticsearchRetriever.FromDBVectorEmbeddingWithScore(*hit.Id_, embedding, typesLocal.MatchTypeEmbedding)) + } + + if len(results) == 0 { + log.Warnf("[Elasticsearch] No vector matches found that meet threshold %.4f", params.Threshold) + } else { + log.Infof("[Elasticsearch] Vector retrieval found %d results", len(results)) + log.Debugf("[Elasticsearch] Top result score: %.4f", results[0].Score) + } + + return []*typesLocal.RetrieveResult{ + { + Results: results, + RetrieverEngineType: typesLocal.ElasticsearchRetrieverEngineType, + RetrieverType: typesLocal.VectorRetrieverType, + Error: nil, + }, + }, nil +} + +// KeywordsRetrieve performs keyword-based search in document content +// Returns a slice of RetrieveResult containing matching documents +func (e *elasticsearchRepository) KeywordsRetrieve(ctx context.Context, + params typesLocal.RetrieveParams, +) ([]*typesLocal.RetrieveResult, error) { + log := logger.GetLogger(ctx) + log.Infof("[Elasticsearch] Performing keywords retrieval with query: %s, topK: %d", params.Query, params.TopK) + + filter := e.getBaseConds(params) + // Build must conditions for content matching + must := []types.Query{ + {Match: map[string]types.MatchQuery{"content": {Query: params.Query}}}, + } + + log.Debugf("[Elasticsearch] Executing keyword search in index: %s", e.index) + response, err := e.client.Search().Index(e.index).Request(&search.Request{ + Query: &types.Query{Bool: &types.BoolQuery{Filter: filter, Must: must}}, + Size: ¶ms.TopK, + }).Do(ctx) + if err != nil { + log.Errorf("[Elasticsearch] Keywords search failed: %v", err) + return nil, err + } + + // Process search results + var results []*typesLocal.IndexWithScore + for _, hit := range response.Hits.Hits { + var embedding *elasticsearchRetriever.VectorEmbeddingWithScore + if err := json.Unmarshal(hit.Source_, &embedding); err != nil { + log.Errorf("[Elasticsearch] Failed to unmarshal search result: %v", err) + return nil, err + } + embedding.Score = float64(*hit.Score_) + results = append(results, + elasticsearchRetriever.FromDBVectorEmbeddingWithScore(*hit.Id_, embedding, typesLocal.MatchTypeKeywords), + ) + } + + if len(results) == 0 { + log.Warnf("[Elasticsearch] No keyword matches found for query: %s", params.Query) + } else { + log.Infof("[Elasticsearch] Keywords retrieval found %d results", len(results)) + log.Debugf("[Elasticsearch] Top result score: %.4f", results[0].Score) + } + + return []*typesLocal.RetrieveResult{ + { + Results: results, + RetrieverEngineType: typesLocal.ElasticsearchRetrieverEngineType, + RetrieverType: typesLocal.KeywordsRetrieverType, + Error: nil, + }, + }, nil +} + +// CopyIndices 复制索引数据 +func (e *elasticsearchRepository) CopyIndices(ctx context.Context, + sourceKnowledgeBaseID string, + sourceToTargetKBIDMap map[string]string, + sourceToTargetChunkIDMap map[string]string, + targetKnowledgeBaseID string, + dimension int, +) error { + log := logger.GetLogger(ctx) + log.Infof( + "[Elasticsearch] Copying indices from source knowledge base %s to target knowledge base %s, count: %d", + sourceKnowledgeBaseID, targetKnowledgeBaseID, len(sourceToTargetChunkIDMap), + ) + + if len(sourceToTargetChunkIDMap) == 0 { + log.Warn("[Elasticsearch] Empty mapping, skipping copy") + return nil + } + + // Build query parameters + params := typesLocal.RetrieveParams{ + KnowledgeBaseIDs: []string{sourceKnowledgeBaseID}, + } + + // Build base query conditions + filter := e.getBaseConds(params) + + // Set batch processing parameters + batchSize := 500 + from := 0 + totalCopied := 0 + + for { + // Execute pagination query + searchResponse, err := e.client.Search().Index(e.index). + Query(&types.Query{Bool: &types.BoolQuery{Filter: filter}}). + From(from). + Size(batchSize). + Do(ctx) + if err != nil { + log.Errorf("[Elasticsearch] Failed to query source index data: %v", err) + return err + } + + hitsCount := len(searchResponse.Hits.Hits) + if hitsCount == 0 { + break + } + + log.Infof("[Elasticsearch] Found %d source index data, batch start position: %d", hitsCount, from) + + // Prepare index list for BatchSave + var indexInfoList []*typesLocal.IndexInfo + + // Collect all embedding vector data for additionalParams + embeddingMap := make(map[string][]float32) + + for _, hit := range searchResponse.Hits.Hits { + // Parse source document + var sourceDoc elasticsearchRetriever.VectorEmbedding + if err := json.Unmarshal(hit.Source_, &sourceDoc); err != nil { + log.Errorf("[Elasticsearch] Failed to parse source index data: %v", err) + continue + } + + // Get mapped target chunk ID and knowledge ID + targetChunkID, ok := sourceToTargetChunkIDMap[sourceDoc.ChunkID] + if !ok { + log.Warnf("[Elasticsearch] Source chunk %s not found in target mapping, skipping", sourceDoc.ChunkID) + continue + } + + targetKnowledgeID, ok := sourceToTargetKBIDMap[sourceDoc.KnowledgeID] + if !ok { + log.Warnf( + "[Elasticsearch] Source knowledge %s not found in target mapping, skipping", + sourceDoc.KnowledgeID, + ) + continue + } + + // Save embedding vector to embeddingMap + if len(sourceDoc.Embedding) > 0 { + embeddingMap[targetChunkID] = sourceDoc.Embedding + } + + // Create new index information + indexInfo := &typesLocal.IndexInfo{ + Content: sourceDoc.Content, + SourceID: targetChunkID, + SourceType: typesLocal.SourceType(sourceDoc.SourceType), + ChunkID: targetChunkID, + KnowledgeID: targetKnowledgeID, + KnowledgeBaseID: targetKnowledgeBaseID, + } + + indexInfoList = append(indexInfoList, indexInfo) + totalCopied++ + } + + // Use BatchSave function to save index + if len(indexInfoList) > 0 { + // Add embedding vector to additional parameters + additionalParams := map[string]any{ + "embedding": embeddingMap, + } + + if err := e.BatchSave(ctx, indexInfoList, additionalParams); err != nil { + log.Errorf("[Elasticsearch] Failed to batch save index: %v", err) + return err + } + + log.Infof("[Elasticsearch] Successfully copied batch data, batch size: %d, total copied: %d", + len(indexInfoList), totalCopied) + } + + // Move to next batch + from += hitsCount + + // If the number of returned records is less than the request size, it means the last page has been reached + if hitsCount < batchSize { + break + } + } + + log.Infof("[Elasticsearch] Index copy completed, total copied: %d", totalCopied) + return nil +} diff --git a/internal/application/repository/retriever/postgres/repository.go b/internal/application/repository/retriever/postgres/repository.go new file mode 100644 index 0000000..9d29317 --- /dev/null +++ b/internal/application/repository/retriever/postgres/repository.go @@ -0,0 +1,390 @@ +package postgres + +import ( + "context" + "errors" + "fmt" + + "github.com/Tencent/WeKnora/internal/common" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/pgvector/pgvector-go" + "gorm.io/gorm" + "gorm.io/gorm/clause" +) + +// pgRepository implements PostgreSQL-based retrieval operations +type pgRepository struct { + db *gorm.DB // Database connection +} + +// NewPostgresRetrieveEngineRepository creates a new PostgreSQL retriever repository +func NewPostgresRetrieveEngineRepository(db *gorm.DB) interfaces.RetrieveEngineRepository { + logger.GetLogger(context.Background()).Info("[Postgres] Initializing PostgreSQL retriever engine repository") + return &pgRepository{db: db} +} + +// EngineType returns the retriever engine type (PostgreSQL) +func (r *pgRepository) EngineType() types.RetrieverEngineType { + return types.PostgresRetrieverEngineType +} + +// Support returns supported retriever types (keywords and vector) +func (r *pgRepository) Support() []types.RetrieverType { + return []types.RetrieverType{types.KeywordsRetrieverType, types.VectorRetrieverType} +} + +// calculateIndexStorageSize calculates storage size for a single index entry +func (g *pgRepository) calculateIndexStorageSize(embeddingDB *pgVector) int64 { + // 1. Text content size + contentSizeBytes := int64(len(embeddingDB.Content)) + + // 2. Vector storage size (2 bytes per dimension for half-precision float) + var vectorSizeBytes int64 = 0 + if embeddingDB.Dimension > 0 { + vectorSizeBytes = int64(embeddingDB.Dimension * 2) + } + + // 3. Metadata size (fixed overhead for IDs, timestamps etc.) + metadataSizeBytes := int64(200) + + // 4. Index overhead (HNSW index is ~2x vector size) + indexOverheadBytes := vectorSizeBytes * 2 + + // Total size in bytes + totalSizeBytes := contentSizeBytes + vectorSizeBytes + metadataSizeBytes + indexOverheadBytes + + return totalSizeBytes +} + +// EstimateStorageSize estimates total storage size for multiple indices +func (g *pgRepository) EstimateStorageSize( + ctx context.Context, indexInfoList []*types.IndexInfo, additionalParams map[string]any, +) int64 { + var totalStorageSize int64 = 0 + for _, indexInfo := range indexInfoList { + embeddingDB := toDBVectorEmbedding(indexInfo, additionalParams) + totalStorageSize += g.calculateIndexStorageSize(embeddingDB) + } + logger.GetLogger(ctx).Infof( + "[Postgres] Estimated storage size for %d indices: %d bytes", + len(indexInfoList), totalStorageSize, + ) + return totalStorageSize +} + +// Save stores a single index entry +func (g *pgRepository) Save(ctx context.Context, indexInfo *types.IndexInfo, additionalParams map[string]any) error { + logger.GetLogger(ctx).Debugf("[Postgres] Saving index for source ID: %s", indexInfo.SourceID) + embeddingDB := toDBVectorEmbedding(indexInfo, additionalParams) + err := g.db.WithContext(ctx).Create(embeddingDB).Error + if err != nil { + logger.GetLogger(ctx).Errorf("[Postgres] Failed to save index: %v", err) + return err + } + logger.GetLogger(ctx).Infof("[Postgres] Successfully saved index for source ID: %s", indexInfo.SourceID) + return nil +} + +// BatchSave stores multiple index entries in batch +func (g *pgRepository) BatchSave( + ctx context.Context, indexInfoList []*types.IndexInfo, additionalParams map[string]any, +) error { + logger.GetLogger(ctx).Infof("[Postgres] Batch saving %d indices", len(indexInfoList)) + indexInfoDBList := make([]*pgVector, len(indexInfoList)) + for i := range indexInfoList { + indexInfoDBList[i] = toDBVectorEmbedding(indexInfoList[i], additionalParams) + } + err := g.db.WithContext(ctx).Clauses(clause.OnConflict{DoNothing: true}).Create(indexInfoDBList).Error + if err != nil { + logger.GetLogger(ctx).Errorf("[Postgres] Batch save failed: %v", err) + return err + } + logger.GetLogger(ctx).Infof("[Postgres] Successfully batch saved %d indices", len(indexInfoList)) + return nil +} + +// DeleteByChunkIDList deletes indices by chunk IDs +func (g *pgRepository) DeleteByChunkIDList(ctx context.Context, chunkIDList []string, dimension int) error { + logger.GetLogger(ctx).Infof("[Postgres] Deleting indices by chunk IDs, count: %d", len(chunkIDList)) + result := g.db.WithContext(ctx).Where("chunk_id IN ?", chunkIDList).Delete(&pgVector{}) + if result.Error != nil { + logger.GetLogger(ctx).Errorf("[Postgres] Failed to delete indices by chunk IDs: %v", result.Error) + return result.Error + } + logger.GetLogger(ctx).Infof("[Postgres] Successfully deleted %d indices by chunk IDs", result.RowsAffected) + return nil +} + +// DeleteByKnowledgeIDList deletes indices by knowledge IDs +func (g *pgRepository) DeleteByKnowledgeIDList(ctx context.Context, knowledgeIDList []string, dimension int) error { + logger.GetLogger(ctx).Infof("[Postgres] Deleting indices by knowledge IDs, count: %d", len(knowledgeIDList)) + result := g.db.WithContext(ctx).Where("knowledge_id IN ?", knowledgeIDList).Delete(&pgVector{}) + if result.Error != nil { + logger.GetLogger(ctx).Errorf("[Postgres] Failed to delete indices by knowledge IDs: %v", result.Error) + return result.Error + } + logger.GetLogger(ctx).Infof("[Postgres] Successfully deleted %d indices by knowledge IDs", result.RowsAffected) + return nil +} + +// Retrieve handles retrieval requests and routes to appropriate method +func (g *pgRepository) Retrieve(ctx context.Context, params types.RetrieveParams) ([]*types.RetrieveResult, error) { + logger.GetLogger(ctx).Debugf("[Postgres] Processing retrieval request of type: %s", params.RetrieverType) + switch params.RetrieverType { + case types.KeywordsRetrieverType: + return g.KeywordsRetrieve(ctx, params) + case types.VectorRetrieverType: + return g.VectorRetrieve(ctx, params) + } + err := errors.New("invalid retriever type") + logger.GetLogger(ctx).Errorf("[Postgres] %v: %s", err, params.RetrieverType) + return nil, err +} + +// KeywordsRetrieve performs keyword-based search using PostgreSQL full-text search +func (g *pgRepository) KeywordsRetrieve(ctx context.Context, + params types.RetrieveParams, +) ([]*types.RetrieveResult, error) { + logger.GetLogger(ctx).Infof("[Postgres] Keywords retrieval: query=%s, topK=%d", params.Query, params.TopK) + conds := make([]clause.Expression, 0) + if len(params.KnowledgeBaseIDs) > 0 { + logger.GetLogger(ctx).Debugf("[Postgres] Filtering by knowledge base IDs: %v", params.KnowledgeBaseIDs) + conds = append(conds, clause.Expr{ + SQL: fmt.Sprintf("knowledge_base_id @@@ 'in (%s)'", common.StringSliceJoin(params.KnowledgeBaseIDs)), + }) + } + conds = append(conds, clause.Expr{ + SQL: "id @@@ paradedb.match(field => 'content', value => ?, distance => 1)", + Vars: []interface{}{params.Query}, + }) + conds = append(conds, clause.OrderBy{Columns: []clause.OrderByColumn{ + {Column: clause.Column{Name: "score"}, Desc: true}, + }}) + + var embeddingDBList []pgVectorWithScore + err := g.db.WithContext(ctx).Clauses(conds...).Debug(). + Select([]string{ + "paradedb.score(id) as score", + "id", + "content", + "source_id", + "source_type", + "chunk_id", + "knowledge_id", + "knowledge_base_id", + }). + Limit(int(params.TopK)). + Find(&embeddingDBList).Error + + if err == gorm.ErrRecordNotFound { + logger.GetLogger(ctx).Warnf("[Postgres] No records found for keywords query: %s", params.Query) + return nil, nil + } + if err != nil { + logger.GetLogger(ctx).Errorf("[Postgres] Keywords retrieval failed: %v", err) + return nil, err + } + + logger.GetLogger(ctx).Infof("[Postgres] Keywords retrieval found %d results", len(embeddingDBList)) + results := make([]*types.IndexWithScore, len(embeddingDBList)) + for i := range embeddingDBList { + results[i] = fromDBVectorEmbeddingWithScore(&embeddingDBList[i], types.MatchTypeKeywords) + logger.GetLogger(ctx).Debugf("[Postgres] Keywords result %d: chunk=%s, score=%f", + i, results[i].ChunkID, results[i].Score) + } + return []*types.RetrieveResult{ + { + Results: results, + RetrieverEngineType: types.PostgresRetrieverEngineType, + RetrieverType: types.KeywordsRetrieverType, + Error: nil, + }, + }, nil +} + +// VectorRetrieve performs vector similarity search using pgvector +func (g *pgRepository) VectorRetrieve(ctx context.Context, + params types.RetrieveParams, +) ([]*types.RetrieveResult, error) { + logger.GetLogger(ctx).Infof("[Postgres] Vector retrieval: dim=%d, topK=%d, threshold=%.4f", + len(params.Embedding), params.TopK, params.Threshold) + + conds := make([]clause.Expression, 0) + if len(params.KnowledgeBaseIDs) > 0 { + logger.GetLogger(ctx).Debugf( + "[Postgres] Filtering vector search by knowledge base IDs: %v", + params.KnowledgeBaseIDs, + ) + conds = append(conds, clause.IN{ + Column: "knowledge_base_id", + Values: common.ToInterfaceSlice(params.KnowledgeBaseIDs), + }) + } + // <=> Cosine similarity operator + // <-> L2 distance operator + // <#> Inner product operator + dimension := len(params.Embedding) + conds = append(conds, clause.Expr{SQL: "dimension = ?", Vars: []interface{}{dimension}}) + conds = append(conds, clause.Expr{ + SQL: fmt.Sprintf("embedding::halfvec(%d) <=> ?::halfvec < ?", dimension), + Vars: []interface{}{pgvector.NewHalfVector(params.Embedding), 1 - params.Threshold}, + }) + conds = append(conds, clause.OrderBy{Expression: clause.Expr{ + SQL: fmt.Sprintf("embedding::halfvec(%d) <=> ?::halfvec", dimension), + Vars: []interface{}{pgvector.NewHalfVector(params.Embedding)}, + }}) + + var embeddingDBList []pgVectorWithScore + + err := g.db.WithContext(ctx).Clauses(conds...). + Select(fmt.Sprintf( + "id, content, source_id, source_type, chunk_id, knowledge_id, knowledge_base_id, "+ + "(1 - (embedding::halfvec(%d) <=> ?::halfvec)) as score", + dimension, + ), pgvector.NewHalfVector(params.Embedding)). + Limit(int(params.TopK)). + Find(&embeddingDBList).Error + + if err == gorm.ErrRecordNotFound { + logger.GetLogger(ctx).Warnf("[Postgres] No vector matches found that meet threshold %.4f", params.Threshold) + return nil, nil + } + if err != nil { + logger.GetLogger(ctx).Errorf("[Postgres] Vector retrieval failed: %v", err) + return nil, err + } + + logger.GetLogger(ctx).Infof("[Postgres] Vector retrieval found %d results", len(embeddingDBList)) + results := make([]*types.IndexWithScore, len(embeddingDBList)) + for i := range embeddingDBList { + results[i] = fromDBVectorEmbeddingWithScore(&embeddingDBList[i], types.MatchTypeEmbedding) + logger.GetLogger(ctx).Debugf("[Postgres] Vector search result %d: chunk_id %s, score %.4f", + i, results[i].ChunkID, results[i].Score) + } + return []*types.RetrieveResult{ + { + Results: results, + RetrieverEngineType: types.PostgresRetrieverEngineType, + RetrieverType: types.VectorRetrieverType, + Error: nil, + }, + }, nil +} + +// CopyIndices copies index data +func (g *pgRepository) CopyIndices(ctx context.Context, + sourceKnowledgeBaseID string, + sourceToTargetKBIDMap map[string]string, + sourceToTargetChunkIDMap map[string]string, + targetKnowledgeBaseID string, + dimension int, +) error { + logger.GetLogger(ctx).Infof( + "[Postgres] Copying indices, source knowledge base: %s, target knowledge base: %s, mapping count: %d", + sourceKnowledgeBaseID, targetKnowledgeBaseID, len(sourceToTargetChunkIDMap), + ) + + if len(sourceToTargetChunkIDMap) == 0 { + logger.GetLogger(ctx).Warnf("[Postgres] Mapping is empty, no need to copy") + return nil + } + + // Batch processing parameters + batchSize := 500 // Number of records to process per batch + offset := 0 // Offset for pagination + totalCopied := 0 // Total number of copied records + + for { + // Paginated query for source data + var sourceVectors []*pgVector + if err := g.db.WithContext(ctx). + Where("knowledge_base_id = ?", sourceKnowledgeBaseID). + Limit(batchSize). + Offset(offset). + Find(&sourceVectors).Error; err != nil { + logger.GetLogger(ctx).Errorf("[Postgres] Failed to query source index data: %v", err) + return err + } + + // If no more data, exit the loop + if len(sourceVectors) == 0 { + if offset == 0 { + logger.GetLogger(ctx).Warnf("[Postgres] No source index data found") + } + break + } + + batchCount := len(sourceVectors) + logger.GetLogger(ctx).Infof( + "[Postgres] Found %d source index data, batch start position: %d", + batchCount, offset, + ) + + // Create target vector index + targetVectors := make([]*pgVector, 0, batchCount) + for _, sourceVector := range sourceVectors { + // Get the mapped target chunk ID + targetChunkID, ok := sourceToTargetChunkIDMap[sourceVector.ChunkID] + if !ok { + logger.GetLogger(ctx).Warnf( + "[Postgres] Source chunk %s not found in target chunk mapping, skipping", + sourceVector.ChunkID, + ) + continue + } + + // Get the mapped target knowledge ID + targetKnowledgeID, ok := sourceToTargetKBIDMap[sourceVector.KnowledgeID] + if !ok { + logger.GetLogger(ctx).Warnf( + "[Postgres] Source knowledge %s not found in target knowledge mapping, skipping", + sourceVector.KnowledgeID, + ) + continue + } + + // Create new vector index, copy the content and vector of the source index + targetVector := &pgVector{ + Content: sourceVector.Content, + SourceID: targetChunkID, // Update to target chunk ID + SourceType: sourceVector.SourceType, + ChunkID: targetChunkID, // Update to target chunk ID + KnowledgeID: targetKnowledgeID, // Update to target knowledge ID + KnowledgeBaseID: targetKnowledgeBaseID, // Update to target knowledge base ID + Dimension: sourceVector.Dimension, + Embedding: sourceVector.Embedding, // Copy the vector embedding directly, avoid recalculation + } + + targetVectors = append(targetVectors, targetVector) + } + + // Batch insert target vector index + if len(targetVectors) > 0 { + if err := g.db.WithContext(ctx). + Clauses(clause.OnConflict{DoNothing: true}).Create(targetVectors).Error; err != nil { + logger.GetLogger(ctx).Errorf("[Postgres] Failed to batch create target index: %v", err) + return err + } + + totalCopied += len(targetVectors) + logger.GetLogger(ctx).Infof( + "[Postgres] Successfully copied batch data, batch size: %d, total copied: %d", + len(targetVectors), + totalCopied, + ) + } + + // Move to the next batch + offset += batchCount + + // If the number of returned records is less than the requested size, it means the last page has been reached + if batchCount < batchSize { + break + } + } + + logger.GetLogger(ctx).Infof("[Postgres] Index copying completed, total copied: %d", totalCopied) + return nil +} diff --git a/internal/application/repository/retriever/postgres/structs.go b/internal/application/repository/retriever/postgres/structs.go new file mode 100644 index 0000000..1f5eafd --- /dev/null +++ b/internal/application/repository/retriever/postgres/structs.go @@ -0,0 +1,87 @@ +package postgres + +import ( + "maps" + "slices" + "strconv" + "time" + + "github.com/Tencent/WeKnora/internal/types" + "github.com/pgvector/pgvector-go" +) + +// pgVector defines the database model for vector embeddings storage +type pgVector struct { + ID uint `json:"id" gorm:"primarykey"` + CreatedAt time.Time `json:"created_at" gorm:"column:created_at"` + UpdatedAt time.Time `json:"updated_at" gorm:"column:updated_at"` + SourceID string `json:"source_id" gorm:"column:source_id;not null"` + SourceType int `json:"source_type" gorm:"column:source_type;not null"` + ChunkID string `json:"chunk_id" gorm:"column:chunk_id"` + KnowledgeID string `json:"knowledge_id" gorm:"column:knowledge_id"` + KnowledgeBaseID string `json:"knowledge_base_id" gorm:"column:knowledge_base_id"` + Content string `json:"content" gorm:"column:content;not null"` + Dimension int `json:"dimension" gorm:"column:dimension;not null"` + Embedding pgvector.HalfVector `json:"embedding" gorm:"column:embedding;not null"` +} + +// pgVectorWithScore extends pgVector with similarity score field +type pgVectorWithScore struct { + ID uint `json:"id" gorm:"primarykey"` + CreatedAt time.Time `json:"created_at" gorm:"column:created_at"` + UpdatedAt time.Time `json:"updated_at" gorm:"column:updated_at"` + SourceID string `json:"source_id" gorm:"column:source_id;not null"` + SourceType int `json:"source_type" gorm:"column:source_type;not null"` + ChunkID string `json:"chunk_id" gorm:"column:chunk_id"` + KnowledgeID string `json:"knowledge_id" gorm:"column:knowledge_id"` + KnowledgeBaseID string `json:"knowledge_base_id" gorm:"column:knowledge_base_id"` + Content string `json:"content" gorm:"column:content;not null"` + Dimension int `json:"dimension" gorm:"column:dimension;not null"` + Embedding pgvector.HalfVector `json:"embedding" gorm:"column:embedding;not null"` + Score float64 `json:"score" gorm:"column:score"` +} + +// TableName specifies the database table name for pgVectorWithScore +func (pgVectorWithScore) TableName() string { + return "embeddings" +} + +// TableName specifies the database table name for pgVector +func (pgVector) TableName() string { + return "embeddings" +} + +// toDBVectorEmbedding converts IndexInfo to pgVector database model +func toDBVectorEmbedding(indexInfo *types.IndexInfo, additionalParams map[string]any) *pgVector { + pgVector := &pgVector{ + SourceID: indexInfo.SourceID, + SourceType: int(indexInfo.SourceType), + ChunkID: indexInfo.ChunkID, + KnowledgeID: indexInfo.KnowledgeID, + KnowledgeBaseID: indexInfo.KnowledgeBaseID, + Content: indexInfo.Content, + } + // Add embedding data if available in additionalParams + if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), "embedding") { + if embeddingMap, ok := additionalParams["embedding"].(map[string][]float32); ok { + pgVector.Embedding = pgvector.NewHalfVector(embeddingMap[indexInfo.SourceID]) + pgVector.Dimension = len(pgVector.Embedding.Slice()) + } + } + return pgVector +} + +// fromDBVectorEmbeddingWithScore converts pgVectorWithScore to IndexWithScore domain model +func fromDBVectorEmbeddingWithScore(embedding *pgVectorWithScore, matchType types.MatchType) *types.IndexWithScore { + return &types.IndexWithScore{ + ID: strconv.FormatInt(int64(embedding.ID), 10), + SourceID: embedding.SourceID, + SourceType: types.SourceType(embedding.SourceType), + ChunkID: embedding.ChunkID, + KnowledgeID: embedding.KnowledgeID, + KnowledgeBaseID: embedding.KnowledgeBaseID, + Content: embedding.Content, + Score: embedding.Score, + MatchType: matchType, + } +} diff --git a/internal/application/repository/session.go b/internal/application/repository/session.go new file mode 100644 index 0000000..b0f12fe --- /dev/null +++ b/internal/application/repository/session.go @@ -0,0 +1,89 @@ +package repository + +import ( + "context" + "time" + + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "gorm.io/gorm" +) + +// sessionRepository implements the SessionRepository interface +type sessionRepository struct { + db *gorm.DB +} + +// NewSessionRepository creates a new session repository instance +func NewSessionRepository(db *gorm.DB) interfaces.SessionRepository { + return &sessionRepository{db: db} +} + +// Create creates a new session +func (r *sessionRepository) Create(ctx context.Context, session *types.Session) (*types.Session, error) { + session.CreatedAt = time.Now() + session.UpdatedAt = time.Now() + if err := r.db.WithContext(ctx).Create(session).Error; err != nil { + return nil, err + } + // Return the session with generated ID + return session, nil +} + +// Get retrieves a session by ID +func (r *sessionRepository) Get(ctx context.Context, tenantID uint, id string) (*types.Session, error) { + var session types.Session + err := r.db.WithContext(ctx).Where("tenant_id = ?", tenantID).First(&session, "id = ?", id).Error + if err != nil { + return nil, err + } + return &session, nil +} + +// GetByTenantID retrieves all sessions for a tenant +func (r *sessionRepository) GetByTenantID(ctx context.Context, tenantID uint) ([]*types.Session, error) { + var sessions []*types.Session + err := r.db.WithContext(ctx).Where("tenant_id = ?", tenantID).Order("created_at DESC").Find(&sessions).Error + if err != nil { + return nil, err + } + return sessions, nil +} + +// GetPagedByTenantID retrieves sessions for a tenant with pagination +func (r *sessionRepository) GetPagedByTenantID( + ctx context.Context, tenantID uint, page *types.Pagination, +) ([]*types.Session, int64, error) { + var sessions []*types.Session + var total int64 + + // First query the total count + err := r.db.WithContext(ctx).Model(&types.Session{}).Where("tenant_id = ?", tenantID).Count(&total).Error + if err != nil { + return nil, 0, err + } + + // Then query the paginated data + err = r.db.WithContext(ctx). + Where("tenant_id = ?", tenantID). + Order("created_at DESC"). + Offset(page.Offset()). + Limit(page.Limit()). + Find(&sessions).Error + if err != nil { + return nil, 0, err + } + + return sessions, total, nil +} + +// Update updates a session +func (r *sessionRepository) Update(ctx context.Context, session *types.Session) error { + session.UpdatedAt = time.Now() + return r.db.WithContext(ctx).Where("tenant_id = ?", session.TenantID).Save(session).Error +} + +// Delete deletes a session +func (r *sessionRepository) Delete(ctx context.Context, tenantID uint, id string) error { + return r.db.WithContext(ctx).Where("tenant_id = ?", tenantID).Delete(&types.Session{}, "id = ?", id).Error +} diff --git a/internal/application/repository/tenant.go b/internal/application/repository/tenant.go new file mode 100644 index 0000000..2faf007 --- /dev/null +++ b/internal/application/repository/tenant.go @@ -0,0 +1,82 @@ +package repository + +import ( + "context" + "errors" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "gorm.io/gorm" + "gorm.io/gorm/clause" +) + +var ( + ErrTenantNotFound = errors.New("tenant not found") + ErrTenantHasKnowledgeBase = errors.New("tenant has associated knowledge bases") +) + +// tenantRepository implements tenant repository interface +type tenantRepository struct { + db *gorm.DB +} + +// NewTenantRepository creates a new tenant repository +func NewTenantRepository(db *gorm.DB) interfaces.TenantRepository { + return &tenantRepository{db: db} +} + +// CreateTenant creates tenant +func (r *tenantRepository) CreateTenant(ctx context.Context, tenant *types.Tenant) error { + return r.db.WithContext(ctx).Create(tenant).Error +} + +// GetTenantByID gets tenant by ID +func (r *tenantRepository) GetTenantByID(ctx context.Context, id uint) (*types.Tenant, error) { + var tenant types.Tenant + if err := r.db.WithContext(ctx).Where("id = ?", id).First(&tenant).Error; err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return nil, ErrTenantNotFound + } + return nil, err + } + return &tenant, nil +} + +// ListTenants lists all tenants +func (r *tenantRepository) ListTenants(ctx context.Context) ([]*types.Tenant, error) { + var tenants []*types.Tenant + if err := r.db.WithContext(ctx).Order("created_at DESC").Find(&tenants).Error; err != nil { + return nil, err + } + return tenants, nil +} + +// UpdateTenant updates tenant +func (r *tenantRepository) UpdateTenant(ctx context.Context, tenant *types.Tenant) error { + return r.db.WithContext(ctx).Model(&types.Tenant{}).Where("id = ?", tenant.ID).Updates(tenant).Error +} + +// DeleteTenant deletes tenant +func (r *tenantRepository) DeleteTenant(ctx context.Context, id uint) error { + return r.db.WithContext(ctx).Where("id = ?", id).Delete(&types.Tenant{}).Error +} + +func (r *tenantRepository) AdjustStorageUsed(ctx context.Context, tenantID uint, delta int64) error { + return r.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error { + var tenant types.Tenant + // 使用悲观锁确保并发安全 + if err := tx.Clauses(clause.Locking{Strength: "UPDATE"}).First(&tenant, tenantID).Error; err != nil { + return err + } + + tenant.StorageUsed += delta + // 保存更新并验证业务规则 + if tenant.StorageUsed < 0 { + logger.Error(ctx, "tenant storage used is negative %s: %d", tenant.ID, tenant.StorageUsed) + tenant.StorageUsed = 0 + } + + return tx.Save(&tenant).Error + }) +} diff --git a/internal/application/service/chat_pipline/chat_completion.go b/internal/application/service/chat_pipline/chat_completion.go new file mode 100644 index 0000000..e589698 --- /dev/null +++ b/internal/application/service/chat_pipline/chat_completion.go @@ -0,0 +1,60 @@ +package chatpipline + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// PluginChatCompletion implements chat completion functionality +// as a plugin that can be registered to EventManager +type PluginChatCompletion struct { + modelService interfaces.ModelService // Interface for model operations +} + +// NewPluginChatCompletion creates a new PluginChatCompletion instance +// and registers it with the EventManager +func NewPluginChatCompletion(eventManager *EventManager, modelService interfaces.ModelService) *PluginChatCompletion { + res := &PluginChatCompletion{ + modelService: modelService, + } + eventManager.Register(res) + return res +} + +// ActivationEvents returns the event types this plugin handles +func (p *PluginChatCompletion) ActivationEvents() []types.EventType { + return []types.EventType{types.CHAT_COMPLETION} +} + +// OnEvent handles chat completion events +// It prepares the chat model, messages, and calls the model to generate responses +func (p *PluginChatCompletion) OnEvent( + ctx context.Context, eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + logger.Info(ctx, "Starting chat completion") + + // Prepare chat model and options + chatModel, opt, err := prepareChatModel(ctx, p.modelService, chatManage) + if err != nil { + return ErrGetChatModel.WithError(err) + } + + // Prepare messages including conversation history + logger.Info(ctx, "Preparing chat messages with history") + chatMessages := prepareMessagesWithHistory(chatManage) + + // Call the chat model to generate response + logger.Info(ctx, "Calling chat model") + chatResponse, err := chatModel.Chat(ctx, chatMessages, opt) + if err != nil { + logger.Errorf(ctx, "Failed to call chat model: %v", err) + return ErrModelCall.WithError(err) + } + + logger.Info(ctx, "Chat completion successful") + chatManage.ChatResponse = chatResponse + return next() +} diff --git a/internal/application/service/chat_pipline/chat_completion_stream.go b/internal/application/service/chat_pipline/chat_completion_stream.go new file mode 100644 index 0000000..7f5644b --- /dev/null +++ b/internal/application/service/chat_pipline/chat_completion_stream.go @@ -0,0 +1,62 @@ +package chatpipline + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// PluginChatCompletionStream implements streaming chat completion functionality +// as a plugin that can be registered to EventManager +type PluginChatCompletionStream struct { + modelService interfaces.ModelService // Interface for model operations +} + +// NewPluginChatCompletionStream creates a new PluginChatCompletionStream instance +// and registers it with the EventManager +func NewPluginChatCompletionStream(eventManager *EventManager, + modelService interfaces.ModelService, +) *PluginChatCompletionStream { + res := &PluginChatCompletionStream{ + modelService: modelService, + } + eventManager.Register(res) + return res +} + +// ActivationEvents returns the event types this plugin handles +func (p *PluginChatCompletionStream) ActivationEvents() []types.EventType { + return []types.EventType{types.CHAT_COMPLETION_STREAM} +} + +// OnEvent handles streaming chat completion events +// It prepares the chat model, messages, and initiates streaming response +func (p *PluginChatCompletionStream) OnEvent(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + logger.Info(ctx, "Starting chat completion stream") + + // Prepare chat model and options + chatModel, opt, err := prepareChatModel(ctx, p.modelService, chatManage) + if err != nil { + return ErrGetChatModel.WithError(err) + } + + // Prepare base messages without history + logger.Info(ctx, "Preparing chat messages") + chatMessages := prepareMessagesWithHistory(chatManage) + + // Initiate streaming chat model call + logger.Info(ctx, "Calling chat stream model") + responseChan, err := chatModel.ChatStream(ctx, chatMessages, opt) + if err != nil { + logger.Errorf(ctx, "Failed to call chat stream model: %v", err) + return ErrModelCall.WithError(err) + } + + logger.Info(ctx, "Chat stream initiated successfully") + chatManage.ResponseChan = responseChan + return next() +} diff --git a/internal/application/service/chat_pipline/chat_pipline.go b/internal/application/service/chat_pipline/chat_pipline.go new file mode 100644 index 0000000..c82a7bb --- /dev/null +++ b/internal/application/service/chat_pipline/chat_pipline.go @@ -0,0 +1,161 @@ +package chatpipline + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/types" +) + +// Plugin defines the interface for chat pipeline plugins +// Plugins can handle specific events in the chat pipeline +type Plugin interface { + // OnEvent handles the event with given context and chat management object + OnEvent( + ctx context.Context, + eventType types.EventType, + chatManage *types.ChatManage, + next func() *PluginError, + ) *PluginError + // ActivationEvents returns the event types this plugin can handle + ActivationEvents() []types.EventType +} + +// EventManager manages plugins and their event handling +type EventManager struct { + // Map of event types to registered plugins + listeners map[types.EventType][]Plugin + // Map of event types to handler functions + handlers map[types.EventType]func(context.Context, types.EventType, *types.ChatManage) *PluginError +} + +// NewEventManager creates and initializes a new EventManager +func NewEventManager() *EventManager { + return &EventManager{ + listeners: make(map[types.EventType][]Plugin), + handlers: make(map[types.EventType]func(context.Context, types.EventType, *types.ChatManage) *PluginError), + } +} + +// Register adds a plugin to the EventManager and sets up its event handlers +func (e *EventManager) Register(plugin Plugin) { + if e.listeners == nil { + e.listeners = make(map[types.EventType][]Plugin) + } + if e.handlers == nil { + e.handlers = make(map[types.EventType]func(context.Context, types.EventType, *types.ChatManage) *PluginError) + } + for _, eventType := range plugin.ActivationEvents() { + e.listeners[eventType] = append(e.listeners[eventType], plugin) + e.handlers[eventType] = e.buildHandler(e.listeners[eventType]) + } +} + +// buildHandler constructs a handler chain for the given plugins +func (e *EventManager) buildHandler(plugins []Plugin) func( + ctx context.Context, eventType types.EventType, chatManage *types.ChatManage, +) *PluginError { + next := func(context.Context, types.EventType, *types.ChatManage) *PluginError { return nil } + for i := len(plugins) - 1; i >= 0; i-- { + current := plugins[i] + prevNext := next + next = func(ctx context.Context, eventType types.EventType, chatManage *types.ChatManage) *PluginError { + return current.OnEvent(ctx, eventType, chatManage, func() *PluginError { + return prevNext(ctx, eventType, chatManage) + }) + } + } + return next +} + +// Trigger invokes the handler for the specified event type +func (e *EventManager) Trigger(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, +) *PluginError { + if handler, ok := e.handlers[eventType]; ok { + return handler(ctx, eventType, chatManage) + } + return nil +} + +// PluginError represents an error in plugin execution +type PluginError struct { + Err error // Original error + Description string // Human-readable description + ErrorType string // Error type identifier +} + +// Predefined plugin errors +var ( + ErrSearchNothing = &PluginError{ + Description: "No relevant content found", + ErrorType: "search_nothing", + } + ErrSearch = &PluginError{ + Description: "Failed to search knowledge base", + ErrorType: "search_failed", + } + ErrRerank = &PluginError{ + Description: "Reranking failed", + ErrorType: "rerank_failed", + } + ErrGetRerankModel = &PluginError{ + Description: "Failed to get rerank model", + ErrorType: "get_rerank_model_failed", + } + ErrGetChatModel = &PluginError{ + Description: "Failed to get chat model", + ErrorType: "get_chat_model_failed", + } + ErrTemplateParse = &PluginError{ + Description: "Failed to parse context template", + ErrorType: "template_parse_failed", + } + ErrTemplateExecute = &PluginError{ + Description: "Failed to generate search content", + ErrorType: "template_execution_failed", + } + ErrModelCall = &PluginError{ + Description: "Failed to call model", + ErrorType: "model_call_failed", + } + ErrGetHistory = &PluginError{ + Description: "Failed to get conversation history", + ErrorType: "get_history_failed", + } +) + +// clone creates a copy of the PluginError +func (p *PluginError) clone() *PluginError { + return &PluginError{ + Description: p.Description, + ErrorType: p.ErrorType, + } +} + +// WithError attaches an error to the PluginError and returns a new instance +func (p *PluginError) WithError(err error) *PluginError { + pp := p.clone() + pp.Err = err + return pp +} + +// NewFallbackChan creates a channel with a single fallback response +func NewFallbackChan(ctx context.Context, fallbackResponse string) <-chan types.StreamResponse { + fallabackChan := make(chan types.StreamResponse) + go func() { + fallabackChan <- NewFallback(ctx, fallbackResponse) + close(fallabackChan) + }() + return fallabackChan +} + +// NewFallback creates a fallback stream response with the given content +func NewFallback(ctx context.Context, fallbackResponse string) types.StreamResponse { + requestID := ctx.Value(types.RequestIDContextKey).(string) + return types.StreamResponse{ + ID: requestID, + ResponseType: types.ResponseTypeAnswer, + Content: fallbackResponse, + Done: true, + } +} diff --git a/internal/application/service/chat_pipline/chat_pipline_test.go b/internal/application/service/chat_pipline/chat_pipline_test.go new file mode 100644 index 0000000..66b69ab --- /dev/null +++ b/internal/application/service/chat_pipline/chat_pipline_test.go @@ -0,0 +1,125 @@ +package chatpipline + +import ( + "context" + "fmt" + "testing" + + "github.com/Tencent/WeKnora/internal/types" +) + +// Define a test Plugin implementation +type testPlugin struct { + name string + events []types.EventType + shouldError bool + errorToReturn *PluginError +} + +func (p *testPlugin) OnEvent(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + if p.shouldError { + return p.errorToReturn + } + fmt.Printf("Plugin %s triggered\n", p.name) + err := next() + fmt.Printf("Plugin %s finished\n", p.name) + return err +} + +func (p *testPlugin) ActivationEvents() []types.EventType { + return p.events +} + +func TestTrigger(t *testing.T) { + // Prepare test data + ctx := context.Background() + chatManage := &types.ChatManage{} + testEvent := types.EventType("test_event") + + // Test scenario 1: No plugins registered + t.Run("NoPluginsRegistered", func(t *testing.T) { + manager := &EventManager{} + err := manager.Trigger(ctx, testEvent, chatManage) + if err != nil { + t.Errorf("Expected nil error, got %v", err) + } + }) + + // Test scenario 2: Register a normally working plugin + t.Run("SinglePluginSuccess", func(t *testing.T) { + manager := &EventManager{} + plugin := &testPlugin{ + name: "test_plugin", + events: []types.EventType{testEvent}, + } + manager.Register(plugin) + + err := manager.Trigger(ctx, testEvent, chatManage) + if err != nil { + t.Errorf("Expected nil error, got %v", err) + } + }) + + // Test scenario 3: Plugin chain call + t.Run("PluginChain", func(t *testing.T) { + manager := &EventManager{} + plugin1 := &testPlugin{ + name: "plugin1", + events: []types.EventType{testEvent}, + } + plugin2 := &testPlugin{ + name: "plugin2", + events: []types.EventType{testEvent}, + } + manager.Register(plugin1) + manager.Register(plugin2) + + err := manager.Trigger(ctx, testEvent, chatManage) + if err != nil { + t.Errorf("Expected nil error, got %v", err) + } + }) + + // Test scenario 4: Plugin returns error + t.Run("PluginReturnsError", func(t *testing.T) { + manager := &EventManager{} + expectedErr := &PluginError{Description: "test error"} + plugin := &testPlugin{ + name: "error_plugin", + events: []types.EventType{testEvent}, + shouldError: true, + errorToReturn: expectedErr, + } + manager.Register(plugin) + + err := manager.Trigger(ctx, testEvent, chatManage) + if err != expectedErr { + t.Errorf("Expected error %v, got %v", expectedErr, err) + } + }) + + // Test scenario 5: A plugin in the chain returns error + t.Run("ErrorInPluginChain", func(t *testing.T) { + manager := &EventManager{} + expectedErr := &PluginError{Description: "test error"} + plugin1 := &testPlugin{ + name: "plugin1", + events: []types.EventType{testEvent}, + } + plugin2 := &testPlugin{ + name: "plugin2", + events: []types.EventType{testEvent}, + shouldError: true, + errorToReturn: expectedErr, + } + manager.Register(plugin1) + manager.Register(plugin2) + + err := manager.Trigger(ctx, testEvent, chatManage) + if err != expectedErr { + t.Errorf("Expected error %v, got %v", expectedErr, err) + } + }) +} diff --git a/internal/application/service/chat_pipline/common.go b/internal/application/service/chat_pipline/common.go new file mode 100644 index 0000000..9dff415 --- /dev/null +++ b/internal/application/service/chat_pipline/common.go @@ -0,0 +1,67 @@ +package chatpipline + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/chat" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// prepareChatModel shared logic to prepare chat model and options +func prepareChatModel(ctx context.Context, modelService interfaces.ModelService, + chatManage *types.ChatManage, +) (chat.Chat, *chat.ChatOptions, error) { + logger.Infof(ctx, "Getting chat model, model ID: %s", chatManage.ChatModelID) + + chatModel, err := modelService.GetChatModel(ctx, chatManage.ChatModelID) + if err != nil { + logger.Errorf(ctx, "Failed to get chat model: %v", err) + return nil, nil, err + } + + logger.Info(ctx, "Setting up chat options") + opt := &chat.ChatOptions{ + Temperature: chatManage.SummaryConfig.Temperature, + TopP: chatManage.SummaryConfig.TopP, + Seed: chatManage.SummaryConfig.Seed, + MaxTokens: chatManage.SummaryConfig.MaxTokens, + MaxCompletionTokens: chatManage.SummaryConfig.MaxCompletionTokens, + FrequencyPenalty: chatManage.SummaryConfig.FrequencyPenalty, + PresencePenalty: chatManage.SummaryConfig.PresencePenalty, + } + + return chatModel, opt, nil +} + +// prepareBaseMessages prepare basic messages (system prompt and current user content) +func prepareBaseMessages(chatManage *types.ChatManage) []chat.Message { + var chatMessages []chat.Message + chatMessages = append(chatMessages, chat.Message{Role: "system", Content: chatManage.SummaryConfig.Prompt}) + chatMessages = append(chatMessages, chat.Message{Role: "user", Content: chatManage.UserContent}) + return chatMessages +} + +// prepareMessagesWithHistory prepare complete messages including history +func prepareMessagesWithHistory(chatManage *types.ChatManage) []chat.Message { + chatMessages := []chat.Message{ + {Role: "system", Content: chatManage.SummaryConfig.Prompt}, + } + + chatHistory := chatManage.History + if len(chatHistory) > 2 { + chatHistory = chatHistory[len(chatHistory)-2:] + } + + // Add conversation history + for _, history := range chatHistory { + chatMessages = append(chatMessages, chat.Message{Role: "user", Content: history.Query}) + chatMessages = append(chatMessages, chat.Message{Role: "assistant", Content: history.Answer}) + } + + // Add current user message + chatMessages = append(chatMessages, chat.Message{Role: "user", Content: chatManage.UserContent}) + + return chatMessages +} diff --git a/internal/application/service/chat_pipline/filter_top_k.go b/internal/application/service/chat_pipline/filter_top_k.go new file mode 100644 index 0000000..d57b4b8 --- /dev/null +++ b/internal/application/service/chat_pipline/filter_top_k.go @@ -0,0 +1,53 @@ +package chatpipline + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" +) + +// PluginFilterTopK is a plugin that filters search results to keep only the top K items +type PluginFilterTopK struct{} + +// NewPluginFilterTopK creates a new instance of PluginFilterTopK and registers it with the event manager +func NewPluginFilterTopK(eventManager *EventManager) *PluginFilterTopK { + res := &PluginFilterTopK{} + eventManager.Register(res) + return res +} + +// ActivationEvents returns the event types that this plugin responds to +func (p *PluginFilterTopK) ActivationEvents() []types.EventType { + return []types.EventType{types.FILTER_TOP_K} +} + +// OnEvent handles the FILTER_TOP_K event by filtering results to keep only the top K items +// It can filter MergeResult, RerankResult, or SearchResult depending on which is available +func (p *PluginFilterTopK) OnEvent(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + logger.Info(ctx, "Starting filter top-K process") + logger.Infof(ctx, "Filter configuration: top-K = %d", chatManage.RerankTopK) + + filterTopK := func(searchResult []*types.SearchResult, topK int) []*types.SearchResult { + if topK > 0 && len(searchResult) > topK { + logger.Infof(ctx, "Filtering results: before=%d, after=%d", len(searchResult), topK) + searchResult = searchResult[:topK] + } + return searchResult + } + + if len(chatManage.MergeResult) > 0 { + chatManage.MergeResult = filterTopK(chatManage.MergeResult, chatManage.RerankTopK) + } else if len(chatManage.RerankResult) > 0 { + chatManage.RerankResult = filterTopK(chatManage.RerankResult, chatManage.RerankTopK) + } else if len(chatManage.SearchResult) > 0 { + chatManage.SearchResult = filterTopK(chatManage.SearchResult, chatManage.RerankTopK) + } else { + logger.Info(ctx, "No results to filter") + } + + logger.Info(ctx, "Filter top-K process completed") + return next() +} diff --git a/internal/application/service/chat_pipline/into_chat_message.go b/internal/application/service/chat_pipline/into_chat_message.go new file mode 100644 index 0000000..87f5d4e --- /dev/null +++ b/internal/application/service/chat_pipline/into_chat_message.go @@ -0,0 +1,183 @@ +package chatpipline + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "html/template" + "regexp" + "strings" + "time" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" +) + +// PluginIntoChatMessage handles the transformation of search results into chat messages +type PluginIntoChatMessage struct{} + +// NewPluginIntoChatMessage creates and registers a new PluginIntoChatMessage instance +func NewPluginIntoChatMessage(eventManager *EventManager) *PluginIntoChatMessage { + res := &PluginIntoChatMessage{} + eventManager.Register(res) + return res +} + +// ActivationEvents returns the event types this plugin handles +func (p *PluginIntoChatMessage) ActivationEvents() []types.EventType { + return []types.EventType{types.INTO_CHAT_MESSAGE} +} + +// OnEvent processes the INTO_CHAT_MESSAGE event to format chat message content +func (p *PluginIntoChatMessage) OnEvent(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + // Extract content from merge results + passages := make([]string, len(chatManage.MergeResult)) + for i, result := range chatManage.MergeResult { + // 合并内容和图片信息 + passages[i] = getEnrichedPassageForChat(ctx, result) + } + + // Parse the context template + tmpl, err := template.New("searchContent").Parse(chatManage.SummaryConfig.ContextTemplate) + if err != nil { + return ErrTemplateParse.WithError(err) + } + + // Prepare weekday names for template + weekdayName := []string{"星期日", "星期一", "星期二", "星期三", "星期四", "星期五", "星期六"} + var userContent bytes.Buffer + + // Execute template with context data + err = tmpl.Execute(&userContent, map[string]interface{}{ + "Query": chatManage.Query, // User's original query + "Contexts": passages, // Extracted passages from search results + "CurrentTime": time.Now().Format("2006-01-02 15:04:05"), // Formatted current time + "CurrentWeek": weekdayName[time.Now().Weekday()], // Current weekday in Chinese + }) + if err != nil { + return ErrTemplateExecute.WithError(err) + } + + // Set formatted content back to chat management + chatManage.UserContent = userContent.String() + return next() +} + +// getEnrichedPassageForChat 合并Content和ImageInfo的文本内容,为聊天消息准备 +func getEnrichedPassageForChat(ctx context.Context, result *types.SearchResult) string { + // 如果没有图片信息,直接返回内容 + if result.Content == "" && result.ImageInfo == "" { + return "" + } + + // 如果只有内容,没有图片信息 + if result.ImageInfo == "" { + return result.Content + } + + // 处理图片信息并与内容合并 + return enrichContentWithImageInfo(ctx, result.Content, result.ImageInfo) +} + +// 正则表达式用于匹配Markdown图片链接 +var markdownImageRegex = regexp.MustCompile(`!\[([^\]]*)\]\(([^)]+)\)`) + +// enrichContentWithImageInfo 将图片信息与文本内容合并 +func enrichContentWithImageInfo(ctx context.Context, content string, imageInfoJSON string) string { + // 解析ImageInfo + var imageInfos []types.ImageInfo + err := json.Unmarshal([]byte(imageInfoJSON), &imageInfos) + if err != nil { + logger.Warnf(ctx, "Failed to parse ImageInfo: %v, using content only", err) + return content + } + + if len(imageInfos) == 0 { + return content + } + + // 创建图片URL到信息的映射 + imageInfoMap := make(map[string]*types.ImageInfo) + for i := range imageInfos { + if imageInfos[i].URL != "" { + imageInfoMap[imageInfos[i].URL] = &imageInfos[i] + } + // 同时检查原始URL + if imageInfos[i].OriginalURL != "" { + imageInfoMap[imageInfos[i].OriginalURL] = &imageInfos[i] + } + } + + // 查找内容中的所有Markdown图片链接 + matches := markdownImageRegex.FindAllStringSubmatch(content, -1) + + // 用于存储已处理的图片URL + processedURLs := make(map[string]bool) + + logger.Infof(ctx, "Found %d Markdown image links in content", len(matches)) + + // 替换每个图片链接,添加描述和OCR文本 + for _, match := range matches { + if len(match) < 3 { + continue + } + + // 提取图片URL,忽略alt文本 + imgURL := match[2] + + // 标记该URL已处理 + processedURLs[imgURL] = true + + // 查找匹配的图片信息 + imgInfo, found := imageInfoMap[imgURL] + + // 如果找到匹配的图片信息,添加描述和OCR文本 + if found && imgInfo != nil { + replacement := match[0] + "\n" + if imgInfo.Caption != "" { + replacement += fmt.Sprintf("图片描述: %s\n", imgInfo.Caption) + } + if imgInfo.OCRText != "" { + replacement += fmt.Sprintf("图片文本: %s\n", imgInfo.OCRText) + } + content = strings.Replace(content, match[0], replacement, 1) + } + } + + // 处理未在内容中找到但存在于ImageInfo中的图片 + var additionalImageTexts []string + for _, imgInfo := range imageInfos { + // 如果图片URL已经处理过,跳过 + if processedURLs[imgInfo.URL] || processedURLs[imgInfo.OriginalURL] { + continue + } + + var imgTexts []string + if imgInfo.Caption != "" { + imgTexts = append(imgTexts, fmt.Sprintf("图片 %s 的描述信息: %s", imgInfo.URL, imgInfo.Caption)) + } + if imgInfo.OCRText != "" { + imgTexts = append(imgTexts, fmt.Sprintf("图片 %s 的文本: %s", imgInfo.URL, imgInfo.OCRText)) + } + + if len(imgTexts) > 0 { + additionalImageTexts = append(additionalImageTexts, imgTexts...) + } + } + + // 如果有额外的图片信息,添加到内容末尾 + if len(additionalImageTexts) > 0 { + if content != "" { + content += "\n\n" + } + content += "附加图片信息:\n" + strings.Join(additionalImageTexts, "\n") + } + + logger.Debugf(ctx, "Enhanced content with image info: found %d Markdown images, added %d additional images", + len(matches), len(additionalImageTexts)) + + return content +} diff --git a/internal/application/service/chat_pipline/merge.go b/internal/application/service/chat_pipline/merge.go new file mode 100644 index 0000000..dbdde26 --- /dev/null +++ b/internal/application/service/chat_pipline/merge.go @@ -0,0 +1,175 @@ +package chatpipline + +import ( + "context" + "encoding/json" + "sort" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" +) + +// PluginMerge handles merging of search result chunks +type PluginMerge struct{} + +// NewPluginMerge creates and registers a new PluginMerge instance +func NewPluginMerge(eventManager *EventManager) *PluginMerge { + res := &PluginMerge{} + eventManager.Register(res) + return res +} + +// ActivationEvents returns the event types this plugin handles +func (p *PluginMerge) ActivationEvents() []types.EventType { + return []types.EventType{types.CHUNK_MERGE} +} + +// OnEvent processes the CHUNK_MERGE event to merge search result chunks +func (p *PluginMerge) OnEvent(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + logger.Info(ctx, "Starting chunk merge process") + + // Use rerank results if available, fallback to search results + searchResult := chatManage.RerankResult + if len(searchResult) == 0 { + logger.Info(ctx, "No rerank results available, using search results") + searchResult = chatManage.SearchResult + } + + logger.Infof(ctx, "Processing %d chunks for merging", len(searchResult)) + + if len(searchResult) == 0 { + logger.Info(ctx, "No chunks available for merging") + return next() + } + + // Group chunks by their knowledge source ID + knowledgeGroup := make(map[string][]*types.SearchResult) + for _, chunk := range searchResult { + knowledgeGroup[chunk.KnowledgeID] = append(knowledgeGroup[chunk.KnowledgeID], chunk) + } + + logger.Infof(ctx, "Grouped chunks by knowledge ID, %d knowledge sources", len(knowledgeGroup)) + + mergedChunks := []*types.SearchResult{} + // Process each knowledge source separately + for knowledgeID, chunks := range knowledgeGroup { + logger.Infof(ctx, "Processing knowledge ID: %s with %d chunks", knowledgeID, len(chunks)) + + // Sort chunks by their start position in the original document + sort.Slice(chunks, func(i, j int) bool { + if chunks[i].StartAt == chunks[j].StartAt { + return chunks[i].EndAt < chunks[j].EndAt + } + return chunks[i].StartAt < chunks[j].StartAt + }) + + // Merge overlapping or adjacent chunks + knowledgeMergedChunks := []*types.SearchResult{} + if chunks[0].ChunkType == types.ChunkTypeSummary { + knowledgeMergedChunks = append(knowledgeMergedChunks, chunks[0]) + // skip the first chunk if it is summary chunk + // This is to avoid merging the summary chunk with the first content chunk + chunks = chunks[1:] + } + if len(chunks) > 0 { + knowledgeMergedChunks = append(knowledgeMergedChunks, chunks[0]) + } + for i := 1; i < len(chunks); i++ { + lastChunk := knowledgeMergedChunks[len(knowledgeMergedChunks)-1] + // If the current chunk starts after the last chunk ends, add it to the merged chunks + if chunks[i].StartAt > lastChunk.EndAt { + knowledgeMergedChunks = append(knowledgeMergedChunks, chunks[i]) + continue + } + // Merge overlapping chunks + if chunks[i].EndAt > lastChunk.EndAt { + lastChunk.Content = lastChunk.Content + + string([]rune(chunks[i].Content)[lastChunk.EndAt-chunks[i].StartAt:]) + lastChunk.EndAt = chunks[i].EndAt + lastChunk.SubChunkID = append(lastChunk.SubChunkID, chunks[i].ID) + + // 合并 ImageInfo + if err := mergeImageInfo(ctx, lastChunk, chunks[i]); err != nil { + logger.Warnf(ctx, "Failed to merge ImageInfo: %v", err) + } + } + if chunks[i].Score > lastChunk.Score { + lastChunk.Score = chunks[i].Score + } + } + + logger.Infof(ctx, "Merged %d chunks into %d chunks for knowledge ID: %s", + len(chunks), len(knowledgeMergedChunks), knowledgeID) + + mergedChunks = append(mergedChunks, knowledgeMergedChunks...) + } + + // Sort merged chunks by their score (highest first) + sort.Slice(mergedChunks, func(i, j int) bool { + return mergedChunks[i].Score > mergedChunks[j].Score + }) + + logger.Infof(ctx, "Final merged result: %d chunks, sorted by score", len(mergedChunks)) + + chatManage.MergeResult = mergedChunks + return next() +} + +// mergeImageInfo 合并两个chunk的ImageInfo +func mergeImageInfo(ctx context.Context, target *types.SearchResult, source *types.SearchResult) error { + // 如果source没有ImageInfo,不需要合并 + if source.ImageInfo == "" { + return nil + } + + var sourceImageInfos []types.ImageInfo + if err := json.Unmarshal([]byte(source.ImageInfo), &sourceImageInfos); err != nil { + logger.Warnf(ctx, "Failed to unmarshal source ImageInfo: %v", err) + return err + } + + // 如果source的ImageInfo为空,不需要合并 + if len(sourceImageInfos) == 0 { + return nil + } + + // 处理target的ImageInfo + var targetImageInfos []types.ImageInfo + if target.ImageInfo != "" { + if err := json.Unmarshal([]byte(target.ImageInfo), &targetImageInfos); err != nil { + logger.Warnf(ctx, "Failed to unmarshal target ImageInfo: %v", err) + // 如果目标解析失败,直接使用源数据 + target.ImageInfo = source.ImageInfo + return nil + } + } + + // 合并ImageInfo + targetImageInfos = append(targetImageInfos, sourceImageInfos...) + + // 去重 + uniqueMap := make(map[string]bool) + uniqueImageInfos := make([]types.ImageInfo, 0, len(targetImageInfos)) + + for _, imgInfo := range targetImageInfos { + // 使用URL作为唯一标识 + if imgInfo.URL != "" && !uniqueMap[imgInfo.URL] { + uniqueMap[imgInfo.URL] = true + uniqueImageInfos = append(uniqueImageInfos, imgInfo) + } + } + + // 序列化合并后的ImageInfo + mergedImageInfoJSON, err := json.Marshal(uniqueImageInfos) + if err != nil { + logger.Warnf(ctx, "Failed to marshal merged ImageInfo: %v", err) + return err + } + + // 更新目标chunk的ImageInfo + target.ImageInfo = string(mergedImageInfoJSON) + logger.Infof(ctx, "Successfully merged ImageInfo, total count: %d", len(uniqueImageInfos)) + return nil +} diff --git a/internal/application/service/chat_pipline/preprocess.go b/internal/application/service/chat_pipline/preprocess.go new file mode 100644 index 0000000..dad7274 --- /dev/null +++ b/internal/application/service/chat_pipline/preprocess.go @@ -0,0 +1,180 @@ +package chatpipline + +import ( + "context" + "regexp" + "strings" + "unicode" + + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/yanyiwu/gojieba" +) + +// PluginPreprocess Query preprocessing plugin +type PluginPreprocess struct { + config *config.Config + jieba *gojieba.Jieba + stopwords map[string]struct{} +} + +// Regular expressions for text cleaning +var ( + multiSpaceRegex = regexp.MustCompile(`\s+`) // Multiple spaces + urlRegex = regexp.MustCompile(`https?://\S+`) // URLs + emailRegex = regexp.MustCompile(`\b[\w.%+-]+@[\w.-]+\.[a-zA-Z]{2,}\b`) // Email addresses + punctRegex = regexp.MustCompile(`[^\p{L}\p{N}\s]`) // Punctuation marks +) + +// NewPluginPreprocess Creates a new query preprocessing plugin +func NewPluginPreprocess( + eventManager *EventManager, + config *config.Config, + cleaner interfaces.ResourceCleaner, +) *PluginPreprocess { + // Use default dictionary for Jieba tokenizer + jieba := gojieba.NewJieba() + + // Load stopwords from built-in stopword library + stopwords := loadStopwords() + + res := &PluginPreprocess{ + config: config, + jieba: jieba, + stopwords: stopwords, + } + + // Register resource cleanup function + if cleaner != nil { + cleaner.RegisterWithName("JiebaPreprocessor", func() error { + res.Close() + return nil + }) + } + + eventManager.Register(res) + return res +} + +// Load stopwords +func loadStopwords() map[string]struct{} { + // Directly use some common stopwords built into Jieba + commonStopwords := []string{ + "的", "了", "和", "是", "在", "我", "你", "他", "她", "它", + "这", "那", "什么", "怎么", "如何", "为什么", "哪里", "什么时候", + "the", "is", "are", "am", "I", "you", "he", "she", "it", "this", + "that", "what", "how", "a", "an", "and", "or", "but", "if", "of", + "to", "in", "on", "at", "by", "for", "with", "about", "from", + "有", "无", "好", "来", "去", "说", "看", "想", "会", "可以", + "吗", "呢", "啊", "吧", "的话", "就是", "只是", "因为", "所以", + } + + result := make(map[string]struct{}, len(commonStopwords)) + for _, word := range commonStopwords { + result[word] = struct{}{} + } + return result +} + +// ActivationEvents Register activation events +func (p *PluginPreprocess) ActivationEvents() []types.EventType { + return []types.EventType{types.PREPROCESS_QUERY} +} + +// OnEvent Process events +func (p *PluginPreprocess) OnEvent(ctx context.Context, eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError) *PluginError { + if chatManage.RewriteQuery == "" { + return next() + } + + logger.GetLogger(ctx).Infof("Starting query preprocessing, original query: %s", chatManage.RewriteQuery) + + // 1. Basic text cleaning + processed := p.cleanText(chatManage.RewriteQuery) + + // 2. Tokenization + segments := p.segmentText(processed) + + // 3. Stopword filtering and reconstruction + filteredSegments := p.filterStopwords(segments) + + // Update preprocessed query + chatManage.ProcessedQuery = strings.Join(filteredSegments, " ") + + logger.GetLogger(ctx).Infof("Query preprocessing complete, processed query: %s", chatManage.ProcessedQuery) + + return next() +} + +// cleanText Basic text cleaning +func (p *PluginPreprocess) cleanText(text string) string { + // Remove URLs + text = urlRegex.ReplaceAllString(text, " ") + + // Remove email addresses + text = emailRegex.ReplaceAllString(text, " ") + + // Remove excessive spaces + text = multiSpaceRegex.ReplaceAllString(text, " ") + + // Remove punctuation marks + text = punctRegex.ReplaceAllString(text, " ") + + // Trim leading and trailing spaces + text = strings.TrimSpace(text) + + return text +} + +// segmentText Text tokenization +func (p *PluginPreprocess) segmentText(text string) []string { + // Use Jieba tokenizer for tokenization, using search engine mode + segments := p.jieba.CutForSearch(text, true) + return segments +} + +// filterStopwords Filter stopwords +func (p *PluginPreprocess) filterStopwords(segments []string) []string { + var filtered []string + + for _, word := range segments { + // If not a stopword and not blank, keep it + if _, isStopword := p.stopwords[word]; !isStopword && !isBlank(word) { + filtered = append(filtered, word) + } + } + + // If filtering results in empty list, return original tokenization results + if len(filtered) == 0 { + return segments + } + + return filtered +} + +// isBlank Check if a string is blank +func isBlank(str string) bool { + for _, r := range str { + if !unicode.IsSpace(r) { + return false + } + } + return true +} + +// Ensure resources are properly released +func (p *PluginPreprocess) Close() { + if p.jieba != nil { + p.jieba.Free() + p.jieba = nil + } +} + +// ShutdownHandler Returns shutdown function +func (p *PluginPreprocess) ShutdownHandler() func() { + return func() { + p.Close() + } +} diff --git a/internal/application/service/chat_pipline/rerank.go b/internal/application/service/chat_pipline/rerank.go new file mode 100644 index 0000000..c8ceea2 --- /dev/null +++ b/internal/application/service/chat_pipline/rerank.go @@ -0,0 +1,171 @@ +package chatpipline + +import ( + "context" + "encoding/json" + "fmt" + "math" + "strings" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/rerank" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// PluginRerank implements reranking functionality for chat pipeline +type PluginRerank struct { + modelService interfaces.ModelService // Service to access rerank models +} + +// NewPluginRerank creates a new rerank plugin instance +func NewPluginRerank(eventManager *EventManager, modelService interfaces.ModelService) *PluginRerank { + res := &PluginRerank{ + modelService: modelService, + } + eventManager.Register(res) + return res +} + +// ActivationEvents returns the event types this plugin handles +func (p *PluginRerank) ActivationEvents() []types.EventType { + return []types.EventType{types.CHUNK_RERANK} +} + +// OnEvent handles reranking events in the chat pipeline +func (p *PluginRerank) OnEvent(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + logger.Info(ctx, "Starting reranking process") + logger.Infof(ctx, "Getting rerank model, model ID: %s", chatManage.RerankModelID) + if len(chatManage.SearchResult) == 0 { + logger.Infof(ctx, "No search result, skip reranking") + return next() + } + if chatManage.RerankModelID == "" { + logger.Warn(ctx, "Rerank model ID is empty, skipping reranking") + return next() + } + + // Get rerank model from service + rerankModel, err := p.modelService.GetRerankModel(ctx, chatManage.RerankModelID) + if err != nil { + logger.Errorf(ctx, "Failed to get rerank model: %v, rerank model ID: %s", err, chatManage.RerankModelID) + return ErrGetRerankModel.WithError(err) + } + + // Prepare passages for reranking + logger.Infof(ctx, "Preparing passages for reranking, search result count: %d", len(chatManage.SearchResult)) + var passages []string + for _, result := range chatManage.SearchResult { + // 合并Content和ImageInfo的文本内容 + passage := getEnrichedPassage(ctx, result) + passages = append(passages, passage) + } + + // Try reranking with different query variants in priority order + rerankResp := p.rerank(ctx, chatManage, rerankModel, chatManage.RewriteQuery, passages) + if len(rerankResp) == 0 { + rerankResp = p.rerank(ctx, chatManage, rerankModel, chatManage.ProcessedQuery, passages) + if len(rerankResp) == 0 { + rerankResp = p.rerank(ctx, chatManage, rerankModel, chatManage.Query, passages) + } + } + + // Update search results with reranked scores + logger.Infof(ctx, "Filtered rerank results, original: %d, filtered: %d", len(rerankResp), len(rerankResp)) + result := []*types.SearchResult{} + for _, rr := range rerankResp { + chatManage.SearchResult[rr.Index].Score = rr.RelevanceScore + result = append(result, chatManage.SearchResult[rr.Index]) + } + chatManage.RerankResult = result + + if len(chatManage.RerankResult) == 0 { + logger.Warn(ctx, "Reranking produced no results above threshold") + return ErrSearchNothing + } + + logger.Infof(ctx, "Reranking process completed successfully, result count: %d", len(chatManage.RerankResult)) + return next() +} + +// rerank performs the actual reranking operation with given query and passages +func (p *PluginRerank) rerank(ctx context.Context, + chatManage *types.ChatManage, rerankModel rerank.Reranker, query string, passages []string, +) []rerank.RankResult { + logger.Infof(ctx, "Executing reranking with query: %s, passage count: %d", query, len(passages)) + rerankResp, err := rerankModel.Rerank(ctx, query, passages) + if err != nil { + logger.Errorf(ctx, "Reranking failed: %v", err) + return nil + } + + // Log top scores for debugging + logger.Infof(ctx, "Reranking completed, filtering results with threshold: %f", chatManage.RerankThreshold) + for i := range min(3, len(rerankResp)) { + logger.Infof(ctx, "Top %d score of rerankResp: %f, passages: %s, index: %d", + i+1, rerankResp[i].RelevanceScore, rerankResp[i].Document.Text, rerankResp[i].Index, + ) + } + + // Filter results based on threshold with special handling for history matches + rankFilter := []rerank.RankResult{} + for _, result := range rerankResp { + th := chatManage.RerankThreshold + matchType := chatManage.SearchResult[result.Index].MatchType + if matchType == types.MatchTypeHistory { + th = math.Max(th-0.1, 0.5) // Lower threshold for history matches + } + if result.RelevanceScore > th { + rankFilter = append(rankFilter, result) + } + } + return rankFilter +} + +// getEnrichedPassage 合并Content和ImageInfo的文本内容 +func getEnrichedPassage(ctx context.Context, result *types.SearchResult) string { + if result.ImageInfo == "" { + return result.Content + } + + // 解析ImageInfo + var imageInfos []types.ImageInfo + err := json.Unmarshal([]byte(result.ImageInfo), &imageInfos) + if err != nil { + logger.Warnf(ctx, "Failed to parse ImageInfo: %v, using content only", err) + return result.Content + } + + if len(imageInfos) == 0 { + return result.Content + } + + // 提取所有图片的描述和OCR文本 + var imageTexts []string + for _, img := range imageInfos { + if img.Caption != "" { + imageTexts = append(imageTexts, fmt.Sprintf("图片描述: %s", img.Caption)) + } + if img.OCRText != "" { + imageTexts = append(imageTexts, fmt.Sprintf("图片文本: %s", img.OCRText)) + } + } + + if len(imageTexts) == 0 { + return result.Content + } + + // 组合内容和图片信息 + combinedText := result.Content + if combinedText != "" { + combinedText += "\n\n" + } + combinedText += strings.Join(imageTexts, "\n") + + logger.Debugf(ctx, "Enhanced passage with image info: content length %d, image texts length %d", + len(result.Content), len(strings.Join(imageTexts, "\n"))) + + return combinedText +} diff --git a/internal/application/service/chat_pipline/rewrite.go b/internal/application/service/chat_pipline/rewrite.go new file mode 100644 index 0000000..9c118cb --- /dev/null +++ b/internal/application/service/chat_pipline/rewrite.go @@ -0,0 +1,174 @@ +// Package chatpipline provides chat pipeline processing capabilities +// Including query rewriting, history processing, model invocation and other features +package chatpipline + +import ( + "bytes" + "context" + "html/template" + "regexp" + "slices" + "sort" + "time" + + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/chat" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// PluginRewrite is a plugin for rewriting user queries +// It uses historical dialog context and large language models to optimize the user's original query +type PluginRewrite struct { + modelService interfaces.ModelService // Model service for calling large language models + messageService interfaces.MessageService // Message service for retrieving historical messages + config *config.Config // System configuration +} + +// reg is a regular expression used to match and remove content between tags +var reg = regexp.MustCompile(`(?s).*?`) + +// NewPluginRewrite creates a new query rewriting plugin instance +// Also registers the plugin with the event manager +func NewPluginRewrite(eventManager *EventManager, + modelService interfaces.ModelService, messageService interfaces.MessageService, + config *config.Config, +) *PluginRewrite { + res := &PluginRewrite{ + modelService: modelService, + messageService: messageService, + config: config, + } + eventManager.Register(res) + return res +} + +// ActivationEvents returns the list of event types this plugin responds to +// This plugin only responds to REWRITE_QUERY events +func (p *PluginRewrite) ActivationEvents() []types.EventType { + return []types.EventType{types.REWRITE_QUERY} +} + +// OnEvent processes triggered events +// When receiving a REWRITE_QUERY event, it rewrites the user query using conversation history and the language model +func (p *PluginRewrite) OnEvent(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + // Initialize rewritten query as original query + chatManage.RewriteQuery = chatManage.Query + + // Get conversation history + history, err := p.messageService.GetRecentMessagesBySession(ctx, chatManage.SessionID, 20) + if err != nil { + logger.Errorf(ctx, "Failed to get conversation history, session_id: %s, error: %v", chatManage.SessionID, err) + } + + // Convert historical messages to conversation history structure + historyMap := make(map[string]*types.History) + + // Process historical messages, grouped by requestID + for _, message := range history { + history, ok := historyMap[message.RequestID] + if !ok { + history = &types.History{} + } + if message.Role == "user" { + // User message as query + history.Query = message.Content + history.CreateAt = message.CreatedAt + } else { + // System message as answer, while removing thinking process + history.Answer = reg.ReplaceAllString(message.Content, "") + history.KnowledgeReferences = message.KnowledgeReferences + } + historyMap[message.RequestID] = history + } + + // Convert to list and filter incomplete conversations + historyList := make([]*types.History, 0) + for _, history := range historyMap { + if history.Answer != "" && history.Query != "" { + historyList = append(historyList, history) + } + } + + // Sort by time, keep the most recent conversations + sort.Slice(historyList, func(i, j int) bool { + return historyList[i].CreateAt.After(historyList[j].CreateAt) + }) + + // Limit the number of historical records + if len(historyList) > p.config.Conversation.MaxRounds { + historyList = historyList[:p.config.Conversation.MaxRounds] + } + + // Reverse to chronological order + slices.Reverse(historyList) + chatManage.History = historyList + + userTmpl, err := template.New("rewriteContent").Parse(p.config.Conversation.RewritePromptUser) + if err != nil { + logger.Errorf(ctx, "Failed to execute template, session_id: %s, error: %v", chatManage.SessionID, err) + return next() + } + systemTmpl, err := template.New("rewriteContent").Parse(p.config.Conversation.RewritePromptSystem) + if err != nil { + logger.GetLogger(ctx).Errorf("Failed to execute template, session_id: %s, error: %v", chatManage.SessionID, err) + return next() + } + currentTime := time.Now().Format("2006-01-02 15:04:05") + var userContent, systemContent bytes.Buffer + err = userTmpl.Execute(&userContent, map[string]interface{}{ + "Query": chatManage.Query, + "CurrentTime": currentTime, + "Yesterday": time.Now().AddDate(0, 0, -1).Format("2006-01-02"), + "Conversation": historyList, + }) + if err != nil { + logger.GetLogger(ctx).Errorf("Failed to execute template, session_id: %s, error: %v", chatManage.SessionID, err) + return next() + } + err = systemTmpl.Execute(&systemContent, map[string]interface{}{ + "Query": chatManage.Query, + "CurrentTime": currentTime, + "Yesterday": time.Now().AddDate(0, 0, -1).Format("2006-01-02"), + "Conversation": historyList, + }) + if err != nil { + logger.Errorf(ctx, "Failed to execute template, session_id: %s, error: %v", chatManage.SessionID, err) + return next() + } + rewriteModel, err := p.modelService.GetChatModel(ctx, chatManage.ChatModelID) + if err != nil { + logger.Errorf(ctx, "Failed to get model, session_id: %s, error: %v", chatManage.SessionID, err) + return next() + } + + // Call model to rewrite query + thinking := false + response, err := rewriteModel.Chat(ctx, []chat.Message{ + { + Role: "system", + Content: systemContent.String(), + }, + { + Role: "user", + Content: userContent.String(), + }, + }, &chat.ChatOptions{ + Temperature: 0.3, + MaxCompletionTokens: 50, + Thinking: &thinking, + }) + if err != nil { + logger.Errorf(ctx, "Failed to execute model, session_id: %s, error: %v", chatManage.SessionID, err) + return next() + } + + // Update rewritten query + chatManage.RewriteQuery = response.Content + logger.GetLogger(ctx).Infof("Rewritten query, session_id: %s, rewrite_query: %s", + chatManage.SessionID, chatManage.RewriteQuery) + return next() +} diff --git a/internal/application/service/chat_pipline/search.go b/internal/application/service/chat_pipline/search.go new file mode 100644 index 0000000..92a019d --- /dev/null +++ b/internal/application/service/chat_pipline/search.go @@ -0,0 +1,124 @@ +package chatpipline + +import ( + "context" + "strings" + + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// PluginSearch implements search functionality for chat pipeline +type PluginSearch struct { + knowledgeBaseService interfaces.KnowledgeBaseService + modelService interfaces.ModelService + config *config.Config +} + +func NewPluginSearch(eventManager *EventManager, + knowledgeBaseService interfaces.KnowledgeBaseService, + modelService interfaces.ModelService, + config *config.Config, +) *PluginSearch { + res := &PluginSearch{ + knowledgeBaseService: knowledgeBaseService, + modelService: modelService, + config: config, + } + eventManager.Register(res) + return res +} + +// ActivationEvents returns the event types this plugin handles +func (p *PluginSearch) ActivationEvents() []types.EventType { + return []types.EventType{types.CHUNK_SEARCH} +} + +// OnEvent handles search events in the chat pipeline +func (p *PluginSearch) OnEvent(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + // Prepare search parameters + searchParams := types.SearchParams{ + QueryText: strings.TrimSpace(chatManage.RewriteQuery), + VectorThreshold: chatManage.VectorThreshold, + KeywordThreshold: chatManage.KeywordThreshold, + MatchCount: chatManage.EmbeddingTopK, + } + logger.Infof(ctx, "Search parameters: %v", searchParams) + + // Perform initial hybrid search + searchResults, err := p.knowledgeBaseService.HybridSearch(ctx, chatManage.KnowledgeBaseID, searchParams) + logger.Infof(ctx, "Search results count: %d, error: %v", len(searchResults), err) + if err != nil { + return ErrSearch.WithError(err) + } + chatManage.SearchResult = searchResults + logger.Infof(ctx, "Search result count: %d", len(chatManage.SearchResult)) + + // Add relevant results from chat history + historyResult := p.getSearchResultFromHistory(chatManage) + if historyResult != nil { + logger.Infof(ctx, "Add history result, result count: %d", len(historyResult)) + chatManage.SearchResult = append(chatManage.SearchResult, historyResult...) + } + + // Try search with processed query if different from rewrite query + if chatManage.RewriteQuery != chatManage.ProcessedQuery { + searchParams.QueryText = strings.TrimSpace(chatManage.ProcessedQuery) + searchResults, err = p.knowledgeBaseService.HybridSearch(ctx, chatManage.KnowledgeBaseID, searchParams) + logger.Infof(ctx, "Search by processed query: %s, results count: %d, error: %v", + searchParams.QueryText, len(searchResults), err, + ) + if err != nil { + return ErrSearch.WithError(err) + } + chatManage.SearchResult = append(chatManage.SearchResult, searchResults...) + } + // remove duplicate results + chatManage.SearchResult = removeDuplicateResults(chatManage.SearchResult) + + // Return if we have results + if len(chatManage.SearchResult) != 0 { + logger.Infof( + ctx, + "Get search results, count: %d, session_id: %s", + len(chatManage.SearchResult), chatManage.SessionID, + ) + return next() + } + logger.Infof(ctx, "No search result, session_id: %s", chatManage.SessionID) + return ErrSearchNothing +} + +// getSearchResultFromHistory retrieves relevant knowledge references from chat history +func (p *PluginSearch) getSearchResultFromHistory(chatManage *types.ChatManage) []*types.SearchResult { + if len(chatManage.History) == 0 { + return nil + } + // Search history in reverse chronological order + for i := len(chatManage.History) - 1; i >= 0; i-- { + if len(chatManage.History[i].KnowledgeReferences) > 0 { + // Mark all references as history matches + for _, reference := range chatManage.History[i].KnowledgeReferences { + reference.MatchType = types.MatchTypeHistory + } + return chatManage.History[i].KnowledgeReferences + } + } + return nil +} + +func removeDuplicateResults(results []*types.SearchResult) []*types.SearchResult { + seen := make(map[string]bool) + var uniqueResults []*types.SearchResult + for _, result := range results { + if !seen[result.ID] { + seen[result.ID] = true + uniqueResults = append(uniqueResults, result) + } + } + return uniqueResults +} diff --git a/internal/application/service/chat_pipline/stream_filter.go b/internal/application/service/chat_pipline/stream_filter.go new file mode 100644 index 0000000..06d65a5 --- /dev/null +++ b/internal/application/service/chat_pipline/stream_filter.go @@ -0,0 +1,84 @@ +package chatpipline + +import ( + "context" + "strings" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" +) + +// PluginStreamFilter implements stream filtering functionality for chat pipeline +type PluginStreamFilter struct{} + +// NewPluginStreamFilter creates a new stream filter plugin instance +func NewPluginStreamFilter(eventManager *EventManager) *PluginStreamFilter { + res := &PluginStreamFilter{} + eventManager.Register(res) + return res +} + +// ActivationEvents returns the event types this plugin handles +func (p *PluginStreamFilter) ActivationEvents() []types.EventType { + return []types.EventType{types.STREAM_FILTER} +} + +// OnEvent handles stream filtering events in the chat pipeline +func (p *PluginStreamFilter) OnEvent(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + logger.Info(ctx, "Starting stream filter") + logger.Info(ctx, "Creating new stream channel") + + // Create new stream channel and replace the original one + oldStream := chatManage.ResponseChan + newStream := make(chan types.StreamResponse) + chatManage.ResponseChan = newStream + + // Initialize response builder and check if no-match prefix filtering is needed + responseBuilder := &strings.Builder{} + matchNoMatchBuilderPrefix := chatManage.SummaryConfig.NoMatchPrefix != "" + + if matchNoMatchBuilderPrefix { + logger.Infof(ctx, "Using no match prefix filter: %s", chatManage.SummaryConfig.NoMatchPrefix) + } + + // Start goroutine to filter the stream + go func() { + logger.Info(ctx, "Starting stream filter goroutine") + for resp := range oldStream { + // Accumulate answer content + if resp.ResponseType == types.ResponseTypeAnswer { + responseBuilder.WriteString(resp.Content) + } + + // Skip filtering if no prefix matching is required + if !matchNoMatchBuilderPrefix { + newStream <- resp + continue + } + + // Check if content matches the no-match prefix + if !strings.HasPrefix(chatManage.SummaryConfig.NoMatchPrefix, responseBuilder.String()) { + resp.Content = responseBuilder.String() + newStream <- resp + logger.Info( + ctx, "Content does not match no-match prefix, passing through, content: ", + responseBuilder.String(), + ) + matchNoMatchBuilderPrefix = false + } + } + + // Handle NO_MATCH case when stream ends + if matchNoMatchBuilderPrefix { + logger.Info(ctx, "Content matches no-match prefix, using fallback response") + newStream <- NewFallback(ctx, chatManage.FallbackResponse) + } + logger.Info(ctx, "Stream filter completed, closing new stream") + close(newStream) + }() + + logger.Info(ctx, "Stream filter initialized") + return next() +} diff --git a/internal/application/service/chat_pipline/tracing.go b/internal/application/service/chat_pipline/tracing.go new file mode 100644 index 0000000..8a10e48 --- /dev/null +++ b/internal/application/service/chat_pipline/tracing.go @@ -0,0 +1,270 @@ +package chatpipline + +import ( + "context" + "encoding/json" + "strings" + "time" + + "github.com/Tencent/WeKnora/internal/tracing" + "github.com/Tencent/WeKnora/internal/types" + "go.opentelemetry.io/otel/attribute" +) + +// PluginTracing implements tracing functionality for chat pipeline events +type PluginTracing struct{} + +// NewPluginTracing creates a new tracing plugin instance +func NewPluginTracing(eventManager *EventManager) *PluginTracing { + res := &PluginTracing{} + eventManager.Register(res) + return res +} + +// ActivationEvents returns the event types this plugin handles +func (p *PluginTracing) ActivationEvents() []types.EventType { + return []types.EventType{ + types.CHUNK_SEARCH, + types.CHUNK_RERANK, + types.CHUNK_MERGE, + types.INTO_CHAT_MESSAGE, + types.CHAT_COMPLETION, + types.CHAT_COMPLETION_STREAM, + types.FILTER_TOP_K, + types.REWRITE_QUERY, + types.PREPROCESS_QUERY, + } +} + +// OnEvent handles incoming events and routes them to the appropriate tracing handler based on event type. +// It acts as the central dispatcher for all tracing-related events in the chat pipeline. +// +// Parameters: +// - ctx: context.Context for request-scoped values, cancellation signals, and deadlines +// - eventType: the type of event being processed (e.g., CHUNK_SEARCH, CHAT_COMPLETION) +// - chatManage: contains all the chat-related data and state for the current request +// - next: callback function to continue processing in the pipeline +// +// Returns: +// - *PluginError: error if any occurred during processing, or nil if successful +func (p *PluginTracing) OnEvent(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + switch eventType { + case types.CHUNK_SEARCH: + return p.Search(ctx, eventType, chatManage, next) + case types.CHUNK_RERANK: + return p.Rerank(ctx, eventType, chatManage, next) + case types.CHUNK_MERGE: + return p.Merge(ctx, eventType, chatManage, next) + case types.INTO_CHAT_MESSAGE: + return p.IntoChatMessage(ctx, eventType, chatManage, next) + case types.CHAT_COMPLETION: + return p.ChatCompletion(ctx, eventType, chatManage, next) + case types.CHAT_COMPLETION_STREAM: + return p.ChatCompletionStream(ctx, eventType, chatManage, next) + case types.FILTER_TOP_K: + return p.FilterTopK(ctx, eventType, chatManage, next) + case types.REWRITE_QUERY: + return p.RewriteQuery(ctx, eventType, chatManage, next) + case types.PREPROCESS_QUERY: + return p.PreprocessQuery(ctx, eventType, chatManage, next) + } + return next() +} + +// Search traces search operations in the chat pipeline +func (p *PluginTracing) Search(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + _, span := tracing.ContextWithSpan(ctx, "PluginTracing.Search") + defer span.End() + span.SetAttributes( + attribute.String("query", chatManage.Query), + attribute.Float64("vector_threshold", chatManage.VectorThreshold), + attribute.Float64("keyword_threshold", chatManage.KeywordThreshold), + attribute.Int("match_count", chatManage.EmbeddingTopK), + ) + err := next() + searchResultJson, _ := json.Marshal(chatManage.SearchResult) + span.SetAttributes( + attribute.String("hybrid_search", string(searchResultJson)), + attribute.String("processed_query", chatManage.ProcessedQuery), + ) + return err +} + +// Rerank traces rerank operations in the chat pipeline +func (p *PluginTracing) Rerank(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + _, span := tracing.ContextWithSpan(ctx, "PluginTracing.Rerank") + defer span.End() + span.SetAttributes( + attribute.String("query", chatManage.Query), + attribute.Int("passages_count", len(chatManage.SearchResult)), + attribute.String("rerank_model_id", chatManage.RerankModelID), + attribute.Float64("rerank_filter_threshold", chatManage.RerankThreshold), + attribute.Int("rerank_filter_topk", chatManage.RerankTopK), + ) + err := next() + resultJson, _ := json.Marshal(chatManage.RerankResult) + span.SetAttributes( + attribute.Int("rerank_resp_count", len(chatManage.RerankResult)), + attribute.String("rerank_resp_results", string(resultJson)), + ) + return err +} + +// Merge traces merge operations in the chat pipeline +func (p *PluginTracing) Merge(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + _, span := tracing.ContextWithSpan(ctx, "PluginTracing.Merge") + defer span.End() + span.SetAttributes( + attribute.Int("search_results_count", len(chatManage.SearchResult)), + attribute.Int("rerank_results_count", len(chatManage.RerankResult)), + ) + err := next() + mergeResultJson, _ := json.Marshal(chatManage.MergeResult) + span.SetAttributes( + attribute.Int("merge_results_count", len(chatManage.MergeResult)), + attribute.String("merge_results", string(mergeResultJson)), + ) + return err +} + +// IntoChatMessage traces message conversion operations +func (p *PluginTracing) IntoChatMessage(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + _, span := tracing.ContextWithSpan(ctx, "PluginTracing.IntoChatMessage") + defer span.End() + span.SetAttributes( + attribute.Int("search_results_count", len(chatManage.SearchResult)), + attribute.Int("rerank_results_count", len(chatManage.RerankResult)), + attribute.Int("merge_results_count", len(chatManage.MergeResult)), + ) + err := next() + span.SetAttributes(attribute.Int("generated_content_length", len(chatManage.UserContent))) + return err +} + +// ChatCompletion traces chat completion operations +func (p *PluginTracing) ChatCompletion(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + _, span := tracing.ContextWithSpan(ctx, "PluginTracing.ChatCompletion") + defer span.End() + span.SetAttributes( + attribute.String("model_id", chatManage.ChatModelID), + attribute.String("system_prompt", chatManage.SummaryConfig.Prompt), + attribute.String("user_prompt", chatManage.UserContent), + attribute.Int("total_references", len(chatManage.RerankResult)), + ) + err := next() + span.SetAttributes( + attribute.String("chat_response", chatManage.ChatResponse.Content), + attribute.Int("chat_response_tokens", chatManage.ChatResponse.Usage.TotalTokens), + attribute.Int("chat_response_prompt_tokens", chatManage.ChatResponse.Usage.PromptTokens), + attribute.Int("chat_response_completion_tokens", chatManage.ChatResponse.Usage.CompletionTokens), + ) + return err +} + +// ChatCompletionStream traces streaming chat completion operations +func (p *PluginTracing) ChatCompletionStream(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + ctx, span := tracing.ContextWithSpan(ctx, "PluginTracing.ChatCompletionStream") + startTime := time.Now() + span.SetAttributes( + attribute.String("model_id", chatManage.ChatModelID), + attribute.String("system_prompt", chatManage.SummaryConfig.Prompt), + attribute.String("user_prompt", chatManage.UserContent), + attribute.Int("total_references", len(chatManage.RerankResult)), + ) + + responseBuilder := &strings.Builder{} + oldStream := chatManage.ResponseChan + newStream := make(chan types.StreamResponse) + chatManage.ResponseChan = newStream + + go func(ctx context.Context) { + for resp := range oldStream { + if resp.ResponseType == types.ResponseTypeAnswer { + responseBuilder.WriteString(resp.Content) + } + newStream <- resp + } + elapsedMS := time.Since(startTime).Milliseconds() + span.SetAttributes( + attribute.Bool("chat_completion_success", true), + attribute.Int64("response_time_ms", elapsedMS), + attribute.String("chat_response", responseBuilder.String()), + attribute.Int("final_response_length", responseBuilder.Len()), + attribute.Float64("tokens_per_second", float64(responseBuilder.Len())/float64(elapsedMS)*1000), + ) + span.End() + close(newStream) + }(ctx) + + return next() +} + +// FilterTopK traces filtering operations in the chat pipeline +func (p *PluginTracing) FilterTopK(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + _, span := tracing.ContextWithSpan(ctx, "PluginTracing.FilterTopK") + defer span.End() + span.SetAttributes( + attribute.Int("before_filter_search_results_count", len(chatManage.SearchResult)), + attribute.Int("before_filter_rerank_results_count", len(chatManage.RerankResult)), + attribute.Int("before_filter_merge_results_count", len(chatManage.MergeResult)), + ) + err := next() + span.SetAttributes( + attribute.Int("after_filter_search_results_count", len(chatManage.SearchResult)), + attribute.Int("after_filter_rerank_results_count", len(chatManage.RerankResult)), + attribute.Int("after_filter_merge_results_count", len(chatManage.MergeResult)), + ) + return err +} + +// RewriteQuery traces query rewriting operations +func (p *PluginTracing) RewriteQuery(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + _, span := tracing.ContextWithSpan(ctx, "PluginTracing.RewriteQuery") + defer span.End() + span.SetAttributes( + attribute.String("query", chatManage.Query), + ) + err := next() + span.SetAttributes( + attribute.String("rewrite_query", chatManage.RewriteQuery), + ) + return err +} + +// PreprocessQuery traces query preprocessing operations +func (p *PluginTracing) PreprocessQuery(ctx context.Context, + eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, +) *PluginError { + _, span := tracing.ContextWithSpan(ctx, "PluginTracing.PreprocessQuery") + defer span.End() + + span.SetAttributes( + attribute.String("query", chatManage.Query), + ) + + err := next() + + span.SetAttributes( + attribute.String("processed_query", chatManage.ProcessedQuery), + ) + + return err +} diff --git a/internal/application/service/chunk.go b/internal/application/service/chunk.go new file mode 100644 index 0000000..b82a4bd --- /dev/null +++ b/internal/application/service/chunk.go @@ -0,0 +1,278 @@ +// Package service provides business logic implementations for WeKnora application +// This package contains service layer implementations that coordinate between +// repositories and handlers, applying business rules and transaction management +package service + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// chunkService implements the ChunkService interface +// It provides operations for managing document chunks in the knowledge base +// Chunks are segments of documents that have been processed and prepared for indexing +type chunkService struct { + chunkRepository interfaces.ChunkRepository // Repository for chunk data persistence + kbRepository interfaces.KnowledgeBaseRepository + modelService interfaces.ModelService +} + +// NewChunkService creates a new chunk service +// It initializes a service with the provided chunk repository +// Parameters: +// - chunkRepository: Repository for chunk operations +// +// Returns: +// - interfaces.ChunkService: Initialized chunk service implementation +func NewChunkService( + chunkRepository interfaces.ChunkRepository, + kbRepository interfaces.KnowledgeBaseRepository, + modelService interfaces.ModelService, +) interfaces.ChunkService { + return &chunkService{ + chunkRepository: chunkRepository, + kbRepository: kbRepository, + modelService: modelService, + } +} + +// CreateChunks creates multiple chunks +// This method persists a batch of document chunks to the repository +// Parameters: +// - ctx: Context with authentication and request information +// - chunks: Slice of document chunks to create +// +// Returns: +// - error: Any error encountered during chunk creation +func (s *chunkService) CreateChunks(ctx context.Context, chunks []*types.Chunk) error { + logger.Info(ctx, "Start creating chunks") + logger.Infof(ctx, "Creating %d chunks", len(chunks)) + + err := s.chunkRepository.CreateChunks(ctx, chunks) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "chunk_count": len(chunks), + }) + return err + } + + logger.Info(ctx, "Chunks created successfully") + return nil +} + +// GetChunkByID retrieves a chunk by its ID +// This method fetches a specific chunk using its ID and validates tenant access +// Parameters: +// - ctx: Context with authentication and request information +// - knowledgeID: ID of the knowledge document containing the chunk +// - id: ID of the chunk to retrieve +// +// Returns: +// - *types.Chunk: Retrieved chunk if found +// - error: Any error encountered during retrieval +func (s *chunkService) GetChunkByID(ctx context.Context, knowledgeID string, id string) (*types.Chunk, error) { + logger.Info(ctx, "Start getting chunk by ID") + logger.Infof(ctx, "Getting chunk, ID: %s, knowledge ID: %s", id, knowledgeID) + + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Tenant ID: %d", tenantID) + + chunk, err := s.chunkRepository.GetChunkByID(ctx, tenantID, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "chunk_id": id, + "knowledge_id": knowledgeID, + "tenant_id": tenantID, + }) + return nil, err + } + + logger.Info(ctx, "Chunk retrieved successfully") + return chunk, nil +} + +// ListChunksByKnowledgeID lists all chunks for a knowledge ID +// This method retrieves all chunks belonging to a specific knowledge document +// Parameters: +// - ctx: Context with authentication and request information +// - knowledgeID: ID of the knowledge document +// +// Returns: +// - []*types.Chunk: List of chunks belonging to the knowledge document +// - error: Any error encountered during retrieval +func (s *chunkService) ListChunksByKnowledgeID(ctx context.Context, knowledgeID string) ([]*types.Chunk, error) { + logger.Info(ctx, "Start listing chunks by knowledge ID") + logger.Infof(ctx, "Knowledge ID: %s", knowledgeID) + + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Tenant ID: %d", tenantID) + + chunks, err := s.chunkRepository.ListChunksByKnowledgeID(ctx, tenantID, knowledgeID) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_id": knowledgeID, + "tenant_id": tenantID, + }) + return nil, err + } + + logger.Infof(ctx, "Retrieved %d chunks successfully", len(chunks)) + return chunks, nil +} + +// ListPagedChunksByKnowledgeID lists chunks for a knowledge ID with pagination +// This method retrieves chunks with pagination support for better performance with large datasets +// Parameters: +// - ctx: Context with authentication and request information +// - knowledgeID: ID of the knowledge document +// - page: Pagination parameters including page number and page size +// +// Returns: +// - *types.PageResult: Paginated result containing chunks and pagination metadata +// - error: Any error encountered during retrieval +func (s *chunkService) ListPagedChunksByKnowledgeID(ctx context.Context, + knowledgeID string, page *types.Pagination, +) (*types.PageResult, error) { + logger.Info(ctx, "Start listing paged chunks by knowledge ID") + logger.Infof(ctx, "Knowledge ID: %s, page: %d, page size: %d", knowledgeID, page.Page, page.PageSize) + + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Tenant ID: %d", tenantID) + chunkType := []types.ChunkType{types.ChunkTypeText} + chunks, total, err := s.chunkRepository.ListPagedChunksByKnowledgeID(ctx, tenantID, knowledgeID, page, chunkType) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_id": knowledgeID, + "tenant_id": tenantID, + "page": page.Page, + "page_size": page.PageSize, + }) + return nil, err + } + + logger.Infof(ctx, "Retrieved %d chunks out of %d total chunks", len(chunks), total) + return types.NewPageResult(total, page, chunks), nil +} + +// updateChunk updates a chunk +// This method updates an existing chunk in the repository +// Parameters: +// - ctx: Context with authentication and request information +// - chunk: Chunk with updated fields +// +// Returns: +// - error: Any error encountered during update +// +// This method handles the actual update logic for a chunk, including updating the vector database representation +func (s *chunkService) UpdateChunk(ctx context.Context, chunk *types.Chunk) error { + logger.Infof(ctx, "Updating chunk, ID: %s, knowledge ID: %s", chunk.ID, chunk.KnowledgeID) + + // Update the chunk in the repository + err := s.chunkRepository.UpdateChunk(ctx, chunk) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "chunk_id": chunk.ID, + "knowledge_id": chunk.KnowledgeID, + }) + return err + } + + logger.Info(ctx, "Chunk updated successfully") + return nil +} + +// DeleteChunk deletes a chunk by ID +// This method removes a specific chunk from the repository +// Parameters: +// - ctx: Context with authentication and request information +// - id: ID of the chunk to delete +// +// Returns: +// - error: Any error encountered during deletion +func (s *chunkService) DeleteChunk(ctx context.Context, id string) error { + logger.Info(ctx, "Start deleting chunk") + logger.Infof(ctx, "Deleting chunk, ID: %s", id) + + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Tenant ID: %d", tenantID) + + err := s.chunkRepository.DeleteChunk(ctx, tenantID, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "chunk_id": id, + "tenant_id": tenantID, + }) + return err + } + + logger.Info(ctx, "Chunk deleted successfully") + return nil +} + +// DeleteChunksByKnowledgeID deletes all chunks for a knowledge ID +// This method removes all chunks belonging to a specific knowledge document +// Parameters: +// - ctx: Context with authentication and request information +// - knowledgeID: ID of the knowledge document +// +// Returns: +// - error: Any error encountered during bulk deletion +func (s *chunkService) DeleteChunksByKnowledgeID(ctx context.Context, knowledgeID string) error { + logger.Info(ctx, "Start deleting all chunks by knowledge ID") + logger.Infof(ctx, "Knowledge ID: %s", knowledgeID) + + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Tenant ID: %d", tenantID) + + err := s.chunkRepository.DeleteChunksByKnowledgeID(ctx, tenantID, knowledgeID) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_id": knowledgeID, + "tenant_id": tenantID, + }) + return err + } + + logger.Info(ctx, "All chunks under knowledge deleted successfully") + return nil +} + +func (s *chunkService) DeleteByKnowledgeList(ctx context.Context, ids []string) error { + logger.Info(ctx, "Start deleting all chunks by knowledge IDs") + logger.Infof(ctx, "Knowledge IDs: %v", ids) + + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Tenant ID: %d", tenantID) + + err := s.chunkRepository.DeleteByKnowledgeList(ctx, tenantID, ids) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_id": ids, + "tenant_id": tenantID, + }) + return err + } + + logger.Info(ctx, "All chunks under knowledge deleted successfully") + return nil +} + +func (s *chunkService) ListChunkByParentID(ctx context.Context, tenantID uint, parentID string) ([]*types.Chunk, error) { + logger.Info(ctx, "Start listing chunk by parent ID") + logger.Infof(ctx, "Parent ID: %s", parentID) + + chunks, err := s.chunkRepository.ListChunkByParentID(ctx, tenantID, parentID) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "parent_id": parentID, + "tenant_id": tenantID, + }) + return nil, err + } + + logger.Info(ctx, "Chunk listed successfully") + return chunks, nil +} diff --git a/internal/application/service/dataset.go b/internal/application/service/dataset.go new file mode 100644 index 0000000..259c656 --- /dev/null +++ b/internal/application/service/dataset.go @@ -0,0 +1,243 @@ +package service + +import ( + "context" + "errors" + "fmt" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/parquet-go/parquet-go" +) + +// DatasetService provides operations for working with datasets +type DatasetService struct{} + +// NewDatasetService creates a new DatasetService instance +func NewDatasetService() interfaces.DatasetService { + return &DatasetService{} +} + +// TextInfo represents text data with ID in parquet format +type TextInfo struct { + ID int64 `parquet:"id"` // Unique identifier + Text string `parquet:"text"` // Text content +} + +// RelsInfo represents question-passage relations in parquet format +type RelsInfo struct { + QID int64 `parquet:"qid"` // Question ID + PID int64 `parquet:"pid"` // Passage ID +} + +// QaInfo represents question-answer relations in parquet format +type QaInfo struct { + QID int64 `parquet:"qid"` // Question ID + AID int64 `parquet:"aid"` // Answer ID +} + +// GetDatasetByID retrieves QA pairs from dataset by ID +func (d *DatasetService) GetDatasetByID(ctx context.Context, datasetID string) ([]*types.QAPair, error) { + logger.Info(ctx, "Start getting dataset by ID") + logger.Infof(ctx, "Getting dataset with ID: %s", datasetID) + + dataset := DefaultDataset() + dataset.PrintStats(ctx) + qaPairs := dataset.Iterate() + + logger.Infof(ctx, "Retrieved %d QA pairs from dataset", len(qaPairs)) + return qaPairs, nil +} + +// DefaultDataset loads and initializes the default dataset from parquet files +func DefaultDataset() dataset { + datasetDir := "./dataset/samples" + queries, err := loadParquet[TextInfo](fmt.Sprintf("%s/queries.parquet", datasetDir)) + if err != nil { + panic(err) + } + corpus, err := loadParquet[TextInfo](fmt.Sprintf("%s/corpus.parquet", datasetDir)) + if err != nil { + panic(err) + } + answers, err := loadParquet[TextInfo](fmt.Sprintf("%s/answers.parquet", datasetDir)) + if err != nil { + panic(err) + } + qrels, err := loadParquet[RelsInfo](fmt.Sprintf("%s/qrels.parquet", datasetDir)) + if err != nil { + panic(err) + } + qas, err := loadParquet[QaInfo](fmt.Sprintf("%s/qas.parquet", datasetDir)) + if err != nil { + panic(err) + } + + res := dataset{ + queries: make(map[int64]string), // qid -> question text + corpus: make(map[int64]string), // pid -> passage text + answers: make(map[int64]string), // aid -> answer text + qrels: make(map[int64][]int64), // qid -> list of pid + qas: make(map[int64]int64), // qid -> aid + } + for _, qi := range queries { + res.queries[qi.ID] = qi.Text + } + for _, ci := range corpus { + res.corpus[ci.ID] = ci.Text + } + for _, ai := range answers { + res.answers[ai.ID] = ai.Text + } + for _, ri := range qrels { + res.qrels[ri.QID] = append(res.qrels[ri.QID], ri.PID) + } + for _, qi := range qas { + res.qas[qi.QID] = qi.AID + } + return res +} + +// dataset represents the in-memory dataset structure +type dataset struct { + queries map[int64]string // qid -> question text + corpus map[int64]string // pid -> passage text + answers map[int64]string // aid -> answer text + qrels map[int64][]int64 // qid -> list of related pids + qas map[int64]int64 // qid -> aid +} + +// Iterate generates QA pairs from the dataset +func (d *dataset) Iterate() []*types.QAPair { + var pairs []*types.QAPair + + for qid, question := range d.queries { + // Get answer info + aid, hasAnswer := d.qas[qid] + answer := "" + if hasAnswer { + answer = d.answers[aid] + } + + // Get related passages + pids := d.qrels[qid] + var pidStr []int + for _, pid := range pids { + pidStr = append(pidStr, int(pid)) + } + var passages []string + for _, pid := range pids { + passages = append(passages, d.corpus[pid]) + } + + pairs = append(pairs, &types.QAPair{ + QID: int(qid), + Question: question, + PIDs: pidStr, + Passages: passages, + AID: int(aid), + Answer: answer, + }) + } + + return pairs +} + +// GetContextForQID retrieves context passages for a given question ID +func (d *dataset) GetContextForQID(qid int64) ([]string, error) { + pids, ok := d.qrels[qid] + if !ok { + return nil, errors.New("question ID not found") + } + + var contextParts []string + for _, pid := range pids { + if text, exists := d.corpus[pid]; exists { + contextParts = append(contextParts, text) + } + } + + return contextParts, nil +} + +// PrintStats prints dataset statistics to the logger +func (d *dataset) PrintStats(ctx context.Context) { + logger.Infof(ctx, "QA System Statistics:") + logger.Infof(ctx, "- Total queries: %d", len(d.queries)) + logger.Infof(ctx, "- Total corpus passages: %d", len(d.corpus)) + logger.Infof(ctx, "- Total answers: %d", len(d.answers)) + + // Calculate average passages per query + totalRelations := 0 + for _, pids := range d.qrels { + totalRelations += len(pids) + } + avgPassages := float64(totalRelations) / float64(len(d.qrels)) + logger.Infof(ctx, "- Average passages per query: %.2f", avgPassages) + + // Calculate coverage + coveredQueries := len(d.qas) + coverage := float64(coveredQueries) / float64(len(d.queries)) * 100 + logger.Infof(ctx, "- Answer coverage: %.2f%% (%d/%d)", coverage, coveredQueries, len(d.queries)) +} + +// PrintRandomQA prints a random question with its related passages and answer +func (d *dataset) PrintRandomQA() error { + // Get a random qid + var qid int64 + for k := range d.qas { + qid = k + break + } + if qid == 0 { + return errors.New("no questions available") + } + + // Get question text + question, ok := d.queries[qid] + if !ok { + return fmt.Errorf("question %d not found", qid) + } + + // Get answer info + aid, ok := d.qas[qid] + if !ok { + return fmt.Errorf("answer for question %d not found", qid) + } + answer, ok := d.answers[aid] + if !ok { + return fmt.Errorf("answer %d not found", aid) + } + + // Print formatted QA + fmt.Println("===== Random QA =====") + fmt.Printf("QID: %d\n", qid) + fmt.Printf("Question: %s\n", question) + + // Print passages if available + if pids, exists := d.qrels[qid]; exists && len(pids) > 0 { + fmt.Println("\nRelated passages:") + for i, pid := range pids { + if text, exists := d.corpus[pid]; exists { + fmt.Printf("\nPassage %d (PID: %d):\n%s\n", i+1, pid, text) + } + } + } else { + fmt.Println("\nNo related passages found") + } + + // Print answer + fmt.Printf("\nAnswer (AID: %d):\n%s\n", aid, answer) + + return nil +} + +// loadParquet loads data from parquet file into specified type +func loadParquet[T any](filePath string) ([]T, error) { + rows, err := parquet.ReadFile[T](filePath) + if err != nil { + return nil, err + } + return rows, nil +} diff --git a/internal/application/service/evaluation.go b/internal/application/service/evaluation.go new file mode 100644 index 0000000..84c4a47 --- /dev/null +++ b/internal/application/service/evaluation.go @@ -0,0 +1,407 @@ +package service + +import ( + "context" + "errors" + "runtime" + "sync" + "time" + + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/google/uuid" + "golang.org/x/sync/errgroup" +) + +/* +corpus: pid -> content +queries: qid -> content +answers: aid -> content +qrels: qid -> pid +arels: qid -> aid +*/ + +// EvaluationService handles evaluation tasks for knowledge base and chat models +type EvaluationService struct { + config *config.Config // Application configuration + dataset interfaces.DatasetService // Service for dataset operations + knowledgeBaseService interfaces.KnowledgeBaseService // Service for knowledge base operations + knowledgeService interfaces.KnowledgeService // Service for knowledge operations + sessionService interfaces.SessionService // Service for chat sessions + testData *TestDataService // Service for test data + + evaluationMemoryStorage *evaluationMemoryStorage // In-memory storage for evaluation tasks +} + +func NewEvaluationService( + config *config.Config, + dataset interfaces.DatasetService, + knowledgeBaseService interfaces.KnowledgeBaseService, + knowledgeService interfaces.KnowledgeService, + sessionService interfaces.SessionService, + testData *TestDataService, +) interfaces.EvaluationService { + evaluationMemoryStorage := newEvaluationMemoryStorage() + return &EvaluationService{ + config: config, + dataset: dataset, + knowledgeBaseService: knowledgeBaseService, + knowledgeService: knowledgeService, + sessionService: sessionService, + testData: testData, + evaluationMemoryStorage: evaluationMemoryStorage, + } +} + +// evaluationMemoryStorage stores evaluation tasks in memory with thread-safe access +type evaluationMemoryStorage struct { + store map[string]*types.EvaluationDetail // Map of taskID to evaluation details + mu *sync.RWMutex // Read-write lock for concurrent access +} + +func newEvaluationMemoryStorage() *evaluationMemoryStorage { + res := &evaluationMemoryStorage{ + store: make(map[string]*types.EvaluationDetail), + mu: &sync.RWMutex{}, + } + return res +} + +func (e *evaluationMemoryStorage) register(params *types.EvaluationDetail) { + e.mu.Lock() + defer e.mu.Unlock() + logger.Infof(context.Background(), "Registering evaluation task: %s", params.Task.ID) + e.store[params.Task.ID] = params +} + +func (e *evaluationMemoryStorage) get(taskID string) (*types.EvaluationDetail, error) { + e.mu.RLock() + defer e.mu.RUnlock() + logger.Infof(context.Background(), "Getting evaluation task: %s", taskID) + res, ok := e.store[taskID] + if !ok { + return nil, errors.New("task not found") + } + return res, nil +} + +func (e *evaluationMemoryStorage) update(taskID string, fn func(params *types.EvaluationDetail)) error { + e.mu.Lock() + defer e.mu.Unlock() + params, ok := e.store[taskID] + if !ok { + return errors.New("task not found") + } + fn(params) + return nil +} + +func (e *EvaluationService) EvaluationResult(ctx context.Context, taskID string) (*types.EvaluationDetail, error) { + logger.Info(ctx, "Start getting evaluation result") + logger.Infof(ctx, "Task ID: %s", taskID) + + detail, err := e.evaluationMemoryStorage.get(taskID) + if err != nil { + logger.Errorf(ctx, "Failed to get evaluation task: %v", err) + return nil, err + } + + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof( + ctx, + "Checking tenant ID match, task tenant ID: %d, current tenant ID: %d", + detail.Task.TenantID, tenantID, + ) + + if tenantID != detail.Task.TenantID { + logger.Error(ctx, "Tenant ID mismatch") + return nil, errors.New("tenant ID does not match") + } + + logger.Info(ctx, "Evaluation result retrieved successfully") + return detail, nil +} + +// Evaluation starts a new evaluation task with given parameters +// datasetID: ID of the dataset to evaluate against +// knowledgeBaseID: ID of the knowledge base to use (empty to create new) +// chatModelID: ID of the chat model to evaluate +// rerankModelID: ID of the rerank model to evaluate +func (e *EvaluationService) Evaluation(ctx context.Context, + datasetID string, knowledgeBaseID string, chatModelID string, rerankModelID string, +) (*types.EvaluationDetail, error) { + logger.Info(ctx, "Start evaluation") + logger.Infof(ctx, "Dataset ID: %s, Knowledge Base ID: %s, Chat Model ID: %s, Rerank Model ID: %s", + datasetID, knowledgeBaseID, chatModelID, rerankModelID) + + // Get tenant ID from context for multi-tenancy support + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Tenant ID: %d", tenantID) + + // Handle knowledge base creation if not provided + if knowledgeBaseID == "" { + logger.Info(ctx, "No knowledge base ID provided, creating new knowledge base") + // Create new knowledge base with default evaluation settings + kb, err := e.knowledgeBaseService.CreateKnowledgeBase(ctx, &types.KnowledgeBase{ + Name: "evaluation", + Description: "evaluation", + EmbeddingModelID: e.testData.EmbedModel.GetModelID(), + SummaryModelID: e.testData.LLMModel.GetModelID(), + }) + if err != nil { + logger.Errorf(ctx, "Failed to create knowledge base: %v", err) + return nil, err + } + knowledgeBaseID = kb.ID + logger.Infof(ctx, "Created new knowledge base with ID: %s", knowledgeBaseID) + } else { + logger.Infof(ctx, "Using existing knowledge base ID: %s", knowledgeBaseID) + // Create evaluation-specific knowledge base based on existing one + kb, err := e.knowledgeBaseService.GetKnowledgeBaseByID(ctx, knowledgeBaseID) + if err != nil { + logger.Errorf(ctx, "Failed to get knowledge base: %v", err) + return nil, err + } + + kb, err = e.knowledgeBaseService.CreateKnowledgeBase(ctx, &types.KnowledgeBase{ + Name: "evaluation", + Description: "evaluation", + EmbeddingModelID: kb.EmbeddingModelID, + SummaryModelID: kb.SummaryModelID, + }) + if err != nil { + logger.Errorf(ctx, "Failed to create knowledge base: %v", err) + return nil, err + } + knowledgeBaseID = kb.ID + logger.Infof(ctx, "Created new knowledge base with ID: %s based on existing one", knowledgeBaseID) + } + + // Set default values for optional parameters + if datasetID == "" { + datasetID = "default" + logger.Info(ctx, "Using default dataset") + } + + if rerankModelID == "" { + rerankModelID = e.testData.RerankModel.GetModelID() + logger.Infof(ctx, "Using default rerank model: %s", rerankModelID) + } + + if chatModelID == "" { + chatModelID = e.testData.LLMModel.GetModelID() + logger.Infof(ctx, "Using default chat model: %s", chatModelID) + } + + // Create evaluation task with unique ID + logger.Info(ctx, "Creating evaluation task") + taskID := uuid.New().String() + logger.Infof(ctx, "Generated task ID: %s", taskID) + + // Prepare evaluation detail with all parameters + detail := &types.EvaluationDetail{ + Task: &types.EvaluationTask{ + ID: taskID, + TenantID: tenantID, + DatasetID: datasetID, + Status: types.EvaluationStatuePending, + StartTime: time.Now(), + }, + Params: &types.ChatManage{ + KnowledgeBaseID: knowledgeBaseID, + VectorThreshold: e.config.Conversation.VectorThreshold, + KeywordThreshold: e.config.Conversation.KeywordThreshold, + EmbeddingTopK: e.config.Conversation.EmbeddingTopK, + RerankModelID: rerankModelID, + RerankTopK: e.config.Conversation.RerankTopK, + RerankThreshold: e.config.Conversation.RerankThreshold, + ChatModelID: chatModelID, + SummaryConfig: types.SummaryConfig{ + MaxTokens: e.config.Conversation.Summary.MaxTokens, + RepeatPenalty: e.config.Conversation.Summary.RepeatPenalty, + TopK: e.config.Conversation.Summary.TopK, + TopP: e.config.Conversation.Summary.TopP, + Prompt: e.config.Conversation.Summary.Prompt, + ContextTemplate: e.config.Conversation.Summary.ContextTemplate, + FrequencyPenalty: e.config.Conversation.Summary.FrequencyPenalty, + PresencePenalty: e.config.Conversation.Summary.PresencePenalty, + NoMatchPrefix: e.config.Conversation.Summary.NoMatchPrefix, + Temperature: e.config.Conversation.Summary.Temperature, + Seed: e.config.Conversation.Summary.Seed, + MaxCompletionTokens: e.config.Conversation.Summary.MaxCompletionTokens, + }, + FallbackResponse: e.config.Conversation.FallbackResponse, + }, + } + + // Store evaluation task in memory storage + logger.Info(ctx, "Registering evaluation task") + e.evaluationMemoryStorage.register(detail) + + // Start evaluation in background goroutine + logger.Info(ctx, "Starting evaluation in background") + go func() { + // Create new context with logger for background task + newCtx := logger.CloneContext(ctx) + logger.Infof(newCtx, "Background evaluation started for task ID: %s", taskID) + + // Update task status to running + detail.Task.Status = types.EvaluationStatueRunning + logger.Info(newCtx, "Evaluation task status set to running") + + // Execute actual evaluation + if err := e.EvalDataset(newCtx, detail); err != nil { + detail.Task.Status = types.EvaluationStatueFailed + detail.Task.ErrMsg = err.Error() + logger.Errorf(newCtx, "Evaluation task failed: %v, task ID: %s", err, taskID) + return + } + + // Mark task as completed successfully + logger.Infof(newCtx, "Evaluation task completed successfully, task ID: %s", taskID) + detail.Task.Status = types.EvaluationStatueSuccess + }() + + logger.Infof(ctx, "Evaluation task created successfully, task ID: %s", taskID) + return detail, nil +} + +// EvalDataset performs the actual evaluation of a dataset +// Processes each QA pair in parallel and records metrics +func (e *EvaluationService) EvalDataset(ctx context.Context, detail *types.EvaluationDetail) error { + logger.Info(ctx, "Start evaluating dataset") + logger.Infof(ctx, "Task ID: %s, Dataset ID: %s", detail.Task.ID, detail.Task.DatasetID) + + // Retrieve dataset from storage + dataset, err := e.dataset.GetDatasetByID(ctx, detail.Task.DatasetID) + if err != nil { + logger.Errorf(ctx, "Failed to get dataset: %v", err) + return err + } + logger.Infof(ctx, "Dataset retrieved successfully with %d QA pairs", len(dataset)) + + // Update total QA pairs count in task details + e.evaluationMemoryStorage.update(detail.Task.ID, func(params *types.EvaluationDetail) { + params.Task.Total = len(dataset) + logger.Infof(ctx, "Updated task total to %d QA pairs", params.Task.Total) + }) + + // Extract and organize passages from dataset + passages := getPassageList(dataset) + logger.Infof(ctx, "Creating knowledge from %d passages", len(passages)) + + // Create knowledge base from passages + knowledge, err := e.knowledgeService.CreateKnowledgeFromPassage(ctx, detail.Params.KnowledgeBaseID, passages) + if err != nil { + logger.Errorf(ctx, "Failed to create knowledge from passages: %v", err) + return err + } + logger.Infof(ctx, "Knowledge created successfully, ID: %s", knowledge.ID) + + // Setup cleanup of temporary resources + defer func() { + logger.Infof(ctx, "Cleaning up resources - deleting knowledge: %s", knowledge.ID) + if err := e.knowledgeService.DeleteKnowledge(ctx, knowledge.ID); err != nil { + logger.Errorf(ctx, "Failed to delete knowledge: %v, knowledge ID: %s", err, knowledge.ID) + } + + logger.Infof(ctx, "Cleaning up resources - deleting knowledge base: %s", detail.Params.KnowledgeBaseID) + if err := e.knowledgeBaseService.DeleteKnowledgeBase(ctx, detail.Params.KnowledgeBaseID); err != nil { + logger.Errorf( + ctx, + "Failed to delete knowledge base: %v, knowledge base ID: %s", + err, detail.Params.KnowledgeBaseID, + ) + } + }() + + // Initialize parallel evaluation metrics + var finished int + var mu sync.Mutex + var g errgroup.Group + metricHook := NewHookMetric(len(dataset)) + + // Set worker limit based on available CPUs + g.SetLimit(max(runtime.GOMAXPROCS(0)-1, 1)) + logger.Infof(ctx, "Starting evaluation with %d parallel workers", max(runtime.GOMAXPROCS(0)-1, 1)) + + // Process each QA pair in parallel + for i, qaPair := range dataset { + qaPair := qaPair + i := i + g.Go(func() error { + logger.Infof(ctx, "Processing QA pair %d, question: %s", i, qaPair.Question) + + // Prepare chat management parameters for this QA pair + chatManage := detail.Params.Clone() + chatManage.Query = qaPair.Question + + // Execute knowledge QA pipeline + logger.Infof(ctx, "Running knowledge QA for question: %s", qaPair.Question) + err = e.sessionService.KnowledgeQAByEvent(ctx, chatManage, types.Pipline["rag"]) + if err != nil { + logger.Errorf(ctx, "Failed to process question %d: %v", i, err) + return err + } + + // Record evaluation metrics + logger.Infof(ctx, "Recording metrics for QA pair %d", i) + metricHook.recordInit(i) + metricHook.recordQaPair(i, qaPair) + metricHook.recordSearchResult(i, chatManage.SearchResult) + metricHook.recordRerankResult(i, chatManage.RerankResult) + metricHook.recordChatResponse(i, chatManage.ChatResponse) + metricHook.recordFinish(i) + + // Update progress metrics + mu.Lock() + finished += 1 + metricResult := metricHook.MetricResult() + mu.Unlock() + e.evaluationMemoryStorage.update(detail.Task.ID, func(params *types.EvaluationDetail) { + params.Metric = metricResult + params.Task.Finished = finished + logger.Infof(ctx, "Updated task progress: %d/%d completed", finished, params.Task.Total) + }) + return nil + }) + } + + // Wait for all parallel evaluations to complete + logger.Info(ctx, "Waiting for all evaluation tasks to complete") + if err := g.Wait(); err != nil { + logger.Errorf(ctx, "Evaluation error: %v", err) + return err + } + + // Final update of evaluation metrics + e.evaluationMemoryStorage.update(detail.Task.ID, func(params *types.EvaluationDetail) { + params.Metric = metricHook.MetricResult() + params.Task.Finished = finished + }) + + logger.Infof(ctx, "Dataset evaluation completed successfully, task ID: %s", detail.Task.ID) + return nil +} + +// getPassageList extracts and organizes passages from QA pairs +// Returns a slice of passages indexed by their passage IDs +func getPassageList(dataset []*types.QAPair) []string { + pIDMap := make(map[int]string) + maxPID := 0 + for _, qaPair := range dataset { + for i := 0; i < len(qaPair.PIDs); i++ { + pIDMap[qaPair.PIDs[i]] = qaPair.Passages[i] + maxPID = max(maxPID, qaPair.PIDs[i]) + } + } + passages := make([]string, maxPID) + for i := 0; i < maxPID; i++ { + if _, ok := pIDMap[i]; ok { + passages[i] = pIDMap[i] + } + } + return passages +} diff --git a/internal/application/service/file/cos.go b/internal/application/service/file/cos.go new file mode 100644 index 0000000..cae2518 --- /dev/null +++ b/internal/application/service/file/cos.go @@ -0,0 +1,83 @@ +package file + +import ( + "context" + "fmt" + "io" + "mime/multipart" + "net/http" + "net/url" + "path/filepath" + "strings" + + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/google/uuid" + "github.com/tencentyun/cos-go-sdk-v5" +) + +// cosFileService implements the FileService interface for Tencent Cloud COS +type cosFileService struct { + client *cos.Client + bucketURL string + cosPathPrefix string +} + +// NewCosFileService creates a new COS file service instance +func NewCosFileService(appId, region, secretId, secretKey, cosPathPrefix string) (interfaces.FileService, error) { + bucketURL := fmt.Sprintf("https://%s.cos.%s.myqcloud.com", appId, region) + u, err := url.Parse(bucketURL) + if err != nil { + return nil, fmt.Errorf("failed to parse bucketURL: %w", err) + } + b := &cos.BaseURL{BucketURL: u} + client := cos.NewClient(b, &http.Client{ + Transport: &cos.AuthorizationTransport{ + SecretID: secretId, + SecretKey: secretKey, + }, + }) + return &cosFileService{ + client: client, + bucketURL: bucketURL, + cosPathPrefix: cosPathPrefix, + }, nil +} + +// SaveFile saves a file to COS storage +// It generates a unique name for the file and organizes it by tenant and knowledge ID +func (s *cosFileService) SaveFile(ctx context.Context, + file *multipart.FileHeader, tenantID uint, knowledgeID string, +) (string, error) { + ext := filepath.Ext(file.Filename) + objectName := fmt.Sprintf("%s/%d/%s/%s%s", s.cosPathPrefix, tenantID, knowledgeID, uuid.New().String(), ext) + src, err := file.Open() + if err != nil { + return "", fmt.Errorf("failed to open file: %w", err) + } + defer src.Close() + _, err = s.client.Object.Put(ctx, objectName, src, nil) + if err != nil { + return "", fmt.Errorf("failed to upload file to COS: %w", err) + } + return fmt.Sprintf("https://%s/%s", s.bucketURL, objectName), nil +} + +// GetFile retrieves a file from COS storage by its path URL +func (s *cosFileService) GetFile(ctx context.Context, filePathUrl string) (io.ReadCloser, error) { + objectName := strings.TrimPrefix(filePathUrl, s.bucketURL) + resp, err := s.client.Object.Get(ctx, objectName, nil) + if err != nil { + return nil, fmt.Errorf("failed to get file from COS: %w", err) + } + return resp.Body, nil +} + +// DeleteFile removes a file from COS storage +func (s *cosFileService) DeleteFile(ctx context.Context, filePath string) error { + objectName := strings.TrimPrefix(filePath, s.bucketURL) + _, err := s.client.Object.Delete(ctx, objectName) + if err != nil { + return fmt.Errorf("failed to delete file: %w", err) + } + return nil +} diff --git a/internal/application/service/file/dummy.go b/internal/application/service/file/dummy.go new file mode 100644 index 0000000..3c9e5ce --- /dev/null +++ b/internal/application/service/file/dummy.go @@ -0,0 +1,38 @@ +package file + +import ( + "context" + "errors" + "io" + "mime/multipart" + + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/google/uuid" +) + +// DummyFileService is a no-op implementation of the FileService interface +// used for testing or when file storage is not required +type DummyFileService struct{} + +// NewDummyFileService creates a new instance of DummyFileService +func NewDummyFileService() interfaces.FileService { + return &DummyFileService{} +} + +// SaveFile pretends to save a file but just returns a random UUID +// This is useful for testing without actual file operations +func (s *DummyFileService) SaveFile(ctx context.Context, + file *multipart.FileHeader, tenantID uint, knowledgeID string, +) (string, error) { + return uuid.New().String(), nil +} + +// GetFile always returns an error as dummy service doesn't store files +func (s *DummyFileService) GetFile(ctx context.Context, filePath string) (io.ReadCloser, error) { + return nil, errors.New("not implemented") +} + +// DeleteFile is a no-op operation that always succeeds +func (s *DummyFileService) DeleteFile(ctx context.Context, filePath string) error { + return nil +} diff --git a/internal/application/service/file/local.go b/internal/application/service/file/local.go new file mode 100644 index 0000000..de2c682 --- /dev/null +++ b/internal/application/service/file/local.go @@ -0,0 +1,111 @@ +package file + +import ( + "context" + "fmt" + "io" + "mime/multipart" + "os" + "path/filepath" + "time" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// localFileService implements the FileService interface for local file system storage +type localFileService struct { + baseDir string // Base directory for file storage +} + +// NewLocalFileService creates a new local file service instance +func NewLocalFileService(baseDir string) interfaces.FileService { + return &localFileService{ + baseDir: baseDir, + } +} + +// SaveFile stores an uploaded file to the local file system +// The file is stored in a directory structure: baseDir/tenantID/knowledgeID/filename +// Returns the full file path or an error if saving fails +func (s *localFileService) SaveFile(ctx context.Context, + file *multipart.FileHeader, tenantID uint, knowledgeID string, +) (string, error) { + logger.Info(ctx, "Starting to save file locally") + logger.Infof(ctx, "File information: name=%s, size=%d, tenant ID=%d, knowledge ID=%s", + file.Filename, file.Size, tenantID, knowledgeID) + + // Create storage directory with tenant and knowledge ID + dir := filepath.Join(s.baseDir, fmt.Sprintf("%d", tenantID), knowledgeID) + logger.Infof(ctx, "Creating directory: %s", dir) + if err := os.MkdirAll(dir, 0o755); err != nil { + logger.Errorf(ctx, "Failed to create directory: %v", err) + return "", fmt.Errorf("failed to create directory: %w", err) + } + + // Generate unique filename using timestamp + ext := filepath.Ext(file.Filename) + filename := fmt.Sprintf("%d%s", time.Now().UnixNano(), ext) + filePath := filepath.Join(dir, filename) + logger.Infof(ctx, "Generated file path: %s", filePath) + + // Open source file for reading + logger.Info(ctx, "Opening source file") + src, err := file.Open() + if err != nil { + logger.Errorf(ctx, "Failed to open source file: %v", err) + return "", fmt.Errorf("failed to open file: %w", err) + } + defer src.Close() + + // Create destination file for writing + logger.Info(ctx, "Creating destination file") + dst, err := os.Create(filePath) + if err != nil { + logger.Errorf(ctx, "Failed to create destination file: %v", err) + return "", fmt.Errorf("failed to create file: %w", err) + } + defer dst.Close() + + // Copy content from source to destination + logger.Info(ctx, "Copying file content") + if _, err := io.Copy(dst, src); err != nil { + logger.Errorf(ctx, "Failed to copy file content: %v", err) + return "", fmt.Errorf("failed to save file: %w", err) + } + + logger.Infof(ctx, "File saved successfully: %s", filePath) + return filePath, nil +} + +// GetFile retrieves a file from the local file system by its path +// Returns a ReadCloser for reading the file content +func (s *localFileService) GetFile(ctx context.Context, filePath string) (io.ReadCloser, error) { + logger.Infof(ctx, "Getting file: %s", filePath) + + // Open the file for reading + file, err := os.Open(filePath) + if err != nil { + logger.Errorf(ctx, "Failed to open file: %v", err) + return nil, fmt.Errorf("failed to open file: %w", err) + } + + logger.Info(ctx, "File opened successfully") + return file, nil +} + +// DeleteFile removes a file from the local file system +// Returns an error if deletion fails +func (s *localFileService) DeleteFile(ctx context.Context, filePath string) error { + logger.Infof(ctx, "Deleting file: %s", filePath) + + // Remove the file + err := os.Remove(filePath) + if err != nil { + logger.Errorf(ctx, "Failed to delete file: %v", err) + return fmt.Errorf("failed to delete file: %w", err) + } + + logger.Info(ctx, "File deleted successfully") + return nil +} diff --git a/internal/application/service/file/minio.go b/internal/application/service/file/minio.go new file mode 100644 index 0000000..af2141f --- /dev/null +++ b/internal/application/service/file/minio.go @@ -0,0 +1,126 @@ +package file + +import ( + "context" + "fmt" + "io" + "mime/multipart" + "path/filepath" + + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/google/uuid" + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" +) + +// minioFileService MinIO file service implementation +type minioFileService struct { + client *minio.Client + bucketName string +} + +// NewMinioFileService creates a MinIO file service +func NewMinioFileService(endpoint, + accessKeyID, secretAccessKey, bucketName string, useSSL bool) (interfaces.FileService, error) { + // Initialize MinIO client + client, err := minio.New(endpoint, &minio.Options{ + Creds: credentials.NewStaticV4(accessKeyID, secretAccessKey, ""), + Secure: useSSL, + }) + if err != nil { + return nil, fmt.Errorf("failed to initialize MinIO client: %w", err) + } + + // Check if bucket exists, create if not + exists, err := client.BucketExists(context.Background(), bucketName) + if err != nil { + return nil, fmt.Errorf("failed to check bucket: %w", err) + } + + if !exists { + err = client.MakeBucket(context.Background(), bucketName, minio.MakeBucketOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to create bucket: %w", err) + } + } + + return &minioFileService{ + client: client, + bucketName: bucketName, + }, nil +} + +// SaveFile saves a file to MinIO +func (s *minioFileService) SaveFile(ctx context.Context, + file *multipart.FileHeader, tenantID uint, knowledgeID string, +) (string, error) { + // Generate object name + ext := filepath.Ext(file.Filename) + objectName := fmt.Sprintf("%d/%s/%s%s", tenantID, knowledgeID, uuid.New().String(), ext) + + // Open file + src, err := file.Open() + if err != nil { + return "", fmt.Errorf("failed to open file: %w", err) + } + defer src.Close() + + // Upload file to MinIO + _, err = s.client.PutObject(ctx, s.bucketName, objectName, src, file.Size, minio.PutObjectOptions{ + ContentType: file.Header.Get("Content-Type"), + }) + if err != nil { + return "", fmt.Errorf("failed to upload file to MinIO: %w", err) + } + + // Return the complete path to the object + return fmt.Sprintf("minio://%s/%s", s.bucketName, objectName), nil +} + +// GetFile gets a file from MinIO +func (s *minioFileService) GetFile(ctx context.Context, filePath string) (io.ReadCloser, error) { + // Parse MinIO path + // Format: minio://bucketName/objectName + if len(filePath) < 9 || filePath[:8] != "minio://" { + return nil, fmt.Errorf("invalid MinIO file path: %s", filePath) + } + + // Extract object name + objectName := filePath[9+len(s.bucketName):] + if objectName[0] == '/' { + objectName = objectName[1:] + } + + // Get object + obj, err := s.client.GetObject(ctx, s.bucketName, objectName, minio.GetObjectOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get file from MinIO: %w", err) + } + + return obj, nil +} + +// DeleteFile deletes a file +func (s *minioFileService) DeleteFile(ctx context.Context, filePath string) error { + // Parse MinIO path + // Format: minio://bucketName/objectName + if len(filePath) < 9 || filePath[:8] != "minio://" { + return fmt.Errorf("invalid MinIO file path: %s", filePath) + } + + // Extract object name + objectName := filePath[9+len(s.bucketName):] + if objectName[0] == '/' { + objectName = objectName[1:] + } + + // Delete object + err := s.client.RemoveObject(ctx, s.bucketName, objectName, minio.RemoveObjectOptions{ + GovernanceBypass: true, + }) + if err != nil { + return fmt.Errorf("failed to delete file: %w", err) + } + + return nil +} diff --git a/internal/application/service/graph.go b/internal/application/service/graph.go new file mode 100644 index 0000000..d1271db --- /dev/null +++ b/internal/application/service/graph.go @@ -0,0 +1,995 @@ +package service + +import ( + "context" + "encoding/json" + "fmt" + "math" + "slices" + "strings" + "sync" + "time" + + "github.com/Tencent/WeKnora/internal/common" + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/chat" + "github.com/Tencent/WeKnora/internal/models/utils" + "github.com/Tencent/WeKnora/internal/types" + "github.com/google/uuid" + "golang.org/x/sync/errgroup" +) + +const ( + // DefaultLLMTemperature Use low temperature for more deterministic results + DefaultLLMTemperature = 0.1 + + // PMIWeight Proportion of PMI in calculating relationship weight + PMIWeight = 0.6 + + // StrengthWeight Proportion of relationship strength in calculating relationship weight + StrengthWeight = 0.4 + + // IndirectRelationWeightDecay Decay coefficient for indirect relationship weights + IndirectRelationWeightDecay = 0.5 + + // MaxConcurrentEntityExtractions Maximum concurrency for entity extraction + MaxConcurrentEntityExtractions = 4 + + // MaxConcurrentRelationExtractions Maximum concurrency for relationship extraction + MaxConcurrentRelationExtractions = 4 + + // DefaultRelationBatchSize Default batch size for relationship extraction + DefaultRelationBatchSize = 5 + + // MinEntitiesForRelation Minimum number of entities required for relationship extraction + MinEntitiesForRelation = 2 + + // MinWeightValue Minimum weight value to avoid division by zero + MinWeightValue = 1.0 + + // WeightScaleFactor Weight scaling factor to normalize weights to 1-10 range + WeightScaleFactor = 9.0 +) + +// ChunkRelation represents a relationship between two Chunks +type ChunkRelation struct { + // Weight relationship weight, calculated based on PMI and strength + Weight float64 + + // Degree total degree of related entities + Degree int +} + +// graphBuilder implements knowledge graph construction functionality +type graphBuilder struct { + config *config.Config + entityMap map[string]*types.Entity // Entities indexed by ID + entityMapByTitle map[string]*types.Entity // Entities indexed by title + relationshipMap map[string]*types.Relationship // Relationship mapping + chatModel chat.Chat + chunkGraph map[string]map[string]*ChunkRelation // Document chunk relationship graph + mutex sync.RWMutex // Mutex for concurrent operations +} + +// NewGraphBuilder creates a new graph builder +func NewGraphBuilder(config *config.Config, chatModel chat.Chat) types.GraphBuilder { + logger.Info(context.Background(), "Creating new graph builder") + return &graphBuilder{ + config: config, + chatModel: chatModel, + entityMap: make(map[string]*types.Entity), + entityMapByTitle: make(map[string]*types.Entity), + relationshipMap: make(map[string]*types.Relationship), + chunkGraph: make(map[string]map[string]*ChunkRelation), + } +} + +// extractEntities extracts entities from text chunks +// It uses LLM to analyze text content and identify relevant entities +func (b *graphBuilder) extractEntities(ctx context.Context, chunk *types.Chunk) ([]*types.Entity, error) { + log := logger.GetLogger(ctx) + log.Infof("Extracting entities from chunk: %s", chunk.ID) + + if chunk.Content == "" { + log.Warn("Empty chunk content, skipping entity extraction") + return []*types.Entity{}, nil + } + + // Create prompt for entity extraction + thinking := false + messages := []chat.Message{ + { + Role: "system", + Content: b.config.Conversation.ExtractEntitiesPrompt, + }, + { + Role: "user", + Content: chunk.Content, + }, + } + + // Call LLM to extract entities + log.Debug("Calling LLM to extract entities") + resp, err := b.chatModel.Chat(ctx, messages, &chat.ChatOptions{ + Temperature: DefaultLLMTemperature, + Thinking: &thinking, + }) + if err != nil { + log.WithError(err).Error("Failed to extract entities from chunk") + return nil, fmt.Errorf("LLM entity extraction failed: %w", err) + } + + // Parse JSON response + var extractedEntities []*types.Entity + if err := common.ParseLLMJsonResponse(resp.Content, &extractedEntities); err != nil { + log.WithError(err).Errorf("Failed to parse entity extraction response, rsp content: %s", resp.Content) + return nil, fmt.Errorf("failed to parse entity extraction response: %w", err) + } + log.Infof("Extracted %d entities from chunk", len(extractedEntities)) + + // Print detailed entity information in a clear format + log.Info("=========== EXTRACTED ENTITIES ===========") + for i, entity := range extractedEntities { + log.Infof("[Entity %d] Title: '%s', Description: '%s'", i+1, entity.Title, entity.Description) + } + log.Info("=========================================") + + var entities []*types.Entity + + // Process entities and update entityMap + b.mutex.Lock() + defer b.mutex.Unlock() + + for _, entity := range extractedEntities { + if entity.Title == "" || entity.Description == "" { + log.WithField("entity", entity).Warn("Invalid entity with empty title or description") + continue + } + if existEntity, exists := b.entityMapByTitle[entity.Title]; !exists { + // This is a new entity + entity.ID = uuid.New().String() + entity.ChunkIDs = []string{chunk.ID} + entity.Frequency = 1 + b.entityMapByTitle[entity.Title] = entity + b.entityMap[entity.ID] = entity + entities = append(entities, entity) + log.Debugf("New entity added: %s (ID: %s)", entity.Title, entity.ID) + } else { + // Entity already exists, update its ChunkIDs + if !slices.Contains(existEntity.ChunkIDs, chunk.ID) { + existEntity.ChunkIDs = append(existEntity.ChunkIDs, chunk.ID) + log.Debugf("Updated existing entity: %s with chunk: %s", entity.Title, chunk.ID) + } + existEntity.Frequency++ + entities = append(entities, existEntity) + } + } + + log.Infof("Completed entity extraction for chunk %s: %d entities", chunk.ID, len(entities)) + return entities, nil +} + +// extractRelationships extracts relationships between entities +// It analyzes semantic connections between multiple entities and establishes relationships +func (b *graphBuilder) extractRelationships(ctx context.Context, + chunks []*types.Chunk, entities []*types.Entity) error { + log := logger.GetLogger(ctx) + log.Infof("Extracting relationships from %d entities across %d chunks", len(entities), len(chunks)) + + if len(entities) < MinEntitiesForRelation { + log.Info("Not enough entities to form relationships (minimum 2)") + return nil + } + + // Serialize entities to build prompt + entitiesJSON, err := json.Marshal(entities) + if err != nil { + log.WithError(err).Error("Failed to serialize entities to JSON") + return fmt.Errorf("failed to serialize entities: %w", err) + } + + // Merge chunk contents + content := b.mergeChunkContents(chunks) + if content == "" { + log.Warn("No content to extract relationships from") + return nil + } + + // Create relationship extraction prompt + thinking := false + messages := []chat.Message{ + { + Role: "system", + Content: b.config.Conversation.ExtractRelationshipsPrompt, + }, + { + Role: "user", + Content: fmt.Sprintf("Entities: %s\n\nText: %s", string(entitiesJSON), content), + }, + } + + // Call LLM to extract relationships + log.Debug("Calling LLM to extract relationships") + resp, err := b.chatModel.Chat(ctx, messages, &chat.ChatOptions{ + Temperature: DefaultLLMTemperature, + Thinking: &thinking, + }) + if err != nil { + log.WithError(err).Error("Failed to extract relationships") + return fmt.Errorf("LLM relationship extraction failed: %w", err) + } + + // Parse JSON response + var extractedRelationships []*types.Relationship + if err := common.ParseLLMJsonResponse(resp.Content, &extractedRelationships); err != nil { + log.WithError(err).Error("Failed to parse relationship extraction response") + return fmt.Errorf("failed to parse relationship extraction response: %w", err) + } + log.Infof("Extracted %d relationships", len(extractedRelationships)) + + // Print detailed relationship information in a clear format + log.Info("========= EXTRACTED RELATIONSHIPS =========") + for i, rel := range extractedRelationships { + log.Infof("[Relation %d] Source: '%s', Target: '%s', Description: '%s', Strength: %d", + i+1, rel.Source, rel.Target, rel.Description, rel.Strength) + } + log.Info("===========================================") + + // Process relationships and update relationshipMap + b.mutex.Lock() + defer b.mutex.Unlock() + + relationshipsAdded := 0 + relationshipsUpdated := 0 + for _, relationship := range extractedRelationships { + key := fmt.Sprintf("%s#%s", relationship.Source, relationship.Target) + relationChunkIDs := b.findRelationChunkIDs(relationship.Source, relationship.Target, entities) + if len(relationChunkIDs) == 0 { + log.Debugf("Skipping relationship %s -> %s: no common chunks", relationship.Source, relationship.Target) + continue + } + if existingRel, exists := b.relationshipMap[key]; !exists { + // This is a new relationship + relationship.ID = uuid.New().String() + relationship.ChunkIDs = relationChunkIDs + b.relationshipMap[key] = relationship + relationshipsAdded++ + log.Debugf("New relationship added: %s -> %s (ID: %s)", + relationship.Source, relationship.Target, relationship.ID) + } else { + // This relationship already exists, update its properties + chunkIDsAdded := 0 + for _, chunkID := range relationChunkIDs { + if !slices.Contains(existingRel.ChunkIDs, chunkID) { + existingRel.ChunkIDs = append(existingRel.ChunkIDs, chunkID) + chunkIDsAdded++ + } + } + // Update strength, considering weighted average of existing strength and new relationship strength + if len(existingRel.ChunkIDs) > 0 { + existingRel.Strength = (existingRel.Strength*len(existingRel.ChunkIDs) + relationship.Strength) / + (len(existingRel.ChunkIDs) + 1) + } + + if chunkIDsAdded > 0 { + relationshipsUpdated++ + log.Debugf("Updated relationship: %s -> %s with %d new chunks", + relationship.Source, relationship.Target, chunkIDsAdded) + } + } + } + + log.Infof("Relationship extraction completed: added %d, updated %d relationships", + relationshipsAdded, relationshipsUpdated) + return nil +} + +// findRelationChunkIDs finds common document chunk IDs between two entities +func (b *graphBuilder) findRelationChunkIDs(source, target string, entities []*types.Entity) []string { + relationChunkIDs := make(map[string]struct{}) + + // Collect all document chunk IDs for source and target entities + for _, entity := range entities { + if entity.Title == source || entity.Title == target { + for _, chunkID := range entity.ChunkIDs { + relationChunkIDs[chunkID] = struct{}{} + } + } + } + + if len(relationChunkIDs) == 0 { + return []string{} + } + + // Convert map keys to slice + result := make([]string, 0, len(relationChunkIDs)) + for chunkID := range relationChunkIDs { + result = append(result, chunkID) + } + return result +} + +// mergeChunkContents merges content from multiple document chunks +// It accounts for overlapping portions between chunks to ensure coherent content +func (b *graphBuilder) mergeChunkContents(chunks []*types.Chunk) string { + if len(chunks) == 0 { + return "" + } + + var chunkContents = chunks[0].Content + preChunk := chunks[0] + + for i := 1; i < len(chunks); i++ { + // Only add non-overlapping content parts + if preChunk.EndAt > chunks[i].StartAt { + // Calculate overlap starting position + startPos := preChunk.EndAt - chunks[i].StartAt + if startPos >= 0 && startPos < len([]rune(chunks[i].Content)) { + chunkContents = chunkContents + string([]rune(chunks[i].Content)[startPos:]) + } + } else { + // If there's no overlap between chunks, add all content + chunkContents = chunkContents + chunks[i].Content + } + preChunk = chunks[i] + } + + return chunkContents +} + +// BuildGraph constructs the knowledge graph +// It serves as the main entry point for the graph building process, coordinating all components +func (b *graphBuilder) BuildGraph(ctx context.Context, chunks []*types.Chunk) error { + log := logger.GetLogger(ctx) + log.Infof("Building knowledge graph from %d chunks", len(chunks)) + startTime := time.Now() + + // Concurrently extract entities from each document chunk + var chunkEntities = make([][]*types.Entity, len(chunks)) + g, gctx := errgroup.WithContext(ctx) + g.SetLimit(MaxConcurrentEntityExtractions) // Limit concurrency + + for i, chunk := range chunks { + i, chunk := i, chunk // Create local variables to avoid closure issues + g.Go(func() error { + log.Debugf("Processing chunk %d/%d (ID: %s)", i+1, len(chunks), chunk.ID) + entities, err := b.extractEntities(gctx, chunk) + if err != nil { + log.WithError(err).Errorf("Failed to extract entities from chunk %s", chunk.ID) + return fmt.Errorf("entity extraction failed for chunk %s: %w", chunk.ID, err) + } + chunkEntities[i] = entities + return nil + }) + } + + // Wait for all entity extractions to complete + if err := g.Wait(); err != nil { + log.WithError(err).Error("Entity extraction failed") + return fmt.Errorf("entity extraction process failed: %w", err) + } + + // Count total extracted entities + totalEntityCount := 0 + for _, entities := range chunkEntities { + totalEntityCount += len(entities) + } + log.Infof("Successfully extracted %d total entities across %d chunks", + totalEntityCount, len(chunks)) + + // Process relationships in batches concurrently + relationChunkSize := DefaultRelationBatchSize + log.Infof("Processing relationships concurrently in batches of %d chunks", relationChunkSize) + + // prepare relationship extraction batches + var relationBatches []struct { + batchChunks []*types.Chunk + relationUseEntities []*types.Entity + batchIndex int + } + + for i, batchChunks := range utils.ChunkSlice(chunks, relationChunkSize) { + start := i * relationChunkSize + end := start + relationChunkSize + if end > len(chunkEntities) { + end = len(chunkEntities) + } + + // Merge all entities in this batch + relationUseEntities := make([]*types.Entity, 0) + for j := start; j < end; j++ { + if j < len(chunkEntities) { + relationUseEntities = append(relationUseEntities, chunkEntities[j]...) + } + } + + if len(relationUseEntities) < MinEntitiesForRelation { + log.Debugf("Skipping batch %d: not enough entities (%d)", i+1, len(relationUseEntities)) + continue + } + + relationBatches = append(relationBatches, struct { + batchChunks []*types.Chunk + relationUseEntities []*types.Entity + batchIndex int + }{ + batchChunks: batchChunks, + relationUseEntities: relationUseEntities, + batchIndex: i, + }) + } + + // extract relationships concurrently + relG, relGctx := errgroup.WithContext(ctx) + relG.SetLimit(MaxConcurrentRelationExtractions) // use dedicated relationship extraction concurrency limit + + for _, batch := range relationBatches { + relG.Go(func() error { + log.Debugf("Processing relationship batch %d (chunks %d)", batch.batchIndex+1, len(batch.batchChunks)) + err := b.extractRelationships(relGctx, batch.batchChunks, batch.relationUseEntities) + if err != nil { + log.WithError(err).Errorf("Failed to extract relationships for batch %d", batch.batchIndex+1) + } + return nil // continue to process other batches even if the current batch fails + }) + } + + // wait for all relationship extractions to complete + if err := relG.Wait(); err != nil { + log.WithError(err).Error("Some relationship extraction tasks failed") + // but we continue to process the next steps because some relationship extractions are still useful + } + + // Calculate relationship weights + log.Info("Calculating weights for relationships") + b.calculateWeights(ctx) + + // Calculate entity degrees + log.Info("Calculating degrees for entities") + b.calculateDegrees(ctx) + + // Build Chunk graph + log.Info("Building chunk relationship graph") + b.buildChunkGraph(ctx) + + log.Infof("Graph building completed in %.2f seconds: %d entities, %d relationships", + time.Since(startTime).Seconds(), len(b.entityMap), len(b.relationshipMap)) + + // generate knowledge graph visualization diagram + mermaidDiagram := b.generateKnowledgeGraphDiagram(ctx) + log.Info("Knowledge graph visualization diagram:") + log.Info(mermaidDiagram) + + return nil +} + +// calculateWeights calculates relationship weights +// It uses Point Mutual Information (PMI) and strength values to calculate relationship weights +func (b *graphBuilder) calculateWeights(ctx context.Context) { + log := logger.GetLogger(ctx) + log.Info("Calculating relationship weights using PMI and strength") + + // Calculate total entity occurrences + totalEntityOccurrences := 0 + entityFrequency := make(map[string]int) + + for _, entity := range b.entityMap { + frequency := len(entity.ChunkIDs) + entityFrequency[entity.Title] = frequency + totalEntityOccurrences += frequency + } + + // Calculate total relationship occurrences + totalRelOccurrences := 0 + for _, rel := range b.relationshipMap { + totalRelOccurrences += len(rel.ChunkIDs) + } + + // Skip calculation if insufficient data + if totalEntityOccurrences == 0 || totalRelOccurrences == 0 { + log.Warn("Insufficient data for weight calculation") + return + } + + // Track maximum PMI and Strength values for normalization + maxPMI := 0.0 + maxStrength := MinWeightValue // Avoid division by zero + + // First calculate PMI and find maximum values + pmiValues := make(map[string]float64) + for _, rel := range b.relationshipMap { + sourceFreq := entityFrequency[rel.Source] + targetFreq := entityFrequency[rel.Target] + relFreq := len(rel.ChunkIDs) + + if sourceFreq > 0 && targetFreq > 0 && relFreq > 0 { + sourceProbability := float64(sourceFreq) / float64(totalEntityOccurrences) + targetProbability := float64(targetFreq) / float64(totalEntityOccurrences) + relProbability := float64(relFreq) / float64(totalRelOccurrences) + + // PMI calculation: log(P(x,y) / (P(x) * P(y))) + pmi := math.Max(math.Log2(relProbability/(sourceProbability*targetProbability)), 0) + pmiValues[rel.ID] = pmi + + if pmi > maxPMI { + maxPMI = pmi + } + } + + // Record maximum Strength value + if float64(rel.Strength) > maxStrength { + maxStrength = float64(rel.Strength) + } + } + + // Combine PMI and Strength to calculate final weights + for _, rel := range b.relationshipMap { + pmi := pmiValues[rel.ID] + + // Normalize PMI and Strength (0-1 range) + normalizedPMI := 0.0 + if maxPMI > 0 { + normalizedPMI = pmi / maxPMI + } + + normalizedStrength := float64(rel.Strength) / maxStrength + + // Combine PMI and Strength using configured weights + combinedWeight := normalizedPMI*PMIWeight + normalizedStrength*StrengthWeight + + // Scale weight to 1-10 range + scaledWeight := 1.0 + WeightScaleFactor*combinedWeight + + rel.Weight = scaledWeight + } + + log.Infof("Weight calculation completed for %d relationships", len(b.relationshipMap)) +} + +// calculateDegrees calculates entity degrees +// Degree represents the number of connections an entity has with other entities, a key metric in graph structures +func (b *graphBuilder) calculateDegrees(ctx context.Context) { + log := logger.GetLogger(ctx) + log.Info("Calculating entity degrees") + + // Calculate in-degree and out-degree for each entity + inDegree := make(map[string]int) + outDegree := make(map[string]int) + + for _, rel := range b.relationshipMap { + outDegree[rel.Source]++ + inDegree[rel.Target]++ + } + + // Set degree for each entity + for _, entity := range b.entityMap { + entity.Degree = inDegree[entity.Title] + outDegree[entity.Title] + } + + // Set combined degree for relationships + for _, rel := range b.relationshipMap { + sourceEntity := b.getEntityByTitle(rel.Source) + targetEntity := b.getEntityByTitle(rel.Target) + + if sourceEntity != nil && targetEntity != nil { + rel.CombinedDegree = sourceEntity.Degree + targetEntity.Degree + } + } + + log.Info("Entity degree calculation completed") +} + +// buildChunkGraph builds relationship graph between Chunks +// It creates a network of relationships between document chunks based on entity relationships +func (b *graphBuilder) buildChunkGraph(ctx context.Context) { + log := logger.GetLogger(ctx) + log.Info("Building chunk relationship graph") + + // Create document chunk relationship graph based on entity relationships + for _, rel := range b.relationshipMap { + // Ensure source and target entities exist for the relationship + sourceEntity := b.entityMapByTitle[rel.Source] + targetEntity := b.entityMapByTitle[rel.Target] + + if sourceEntity == nil || targetEntity == nil { + log.Warnf("Missing entity for relationship %s -> %s", rel.Source, rel.Target) + continue + } + + // Build Chunk graph - connect all related document chunks + for _, sourceChunkID := range sourceEntity.ChunkIDs { + if _, exists := b.chunkGraph[sourceChunkID]; !exists { + b.chunkGraph[sourceChunkID] = make(map[string]*ChunkRelation) + } + + for _, targetChunkID := range targetEntity.ChunkIDs { + if _, exists := b.chunkGraph[targetChunkID]; !exists { + b.chunkGraph[targetChunkID] = make(map[string]*ChunkRelation) + } + + relation := &ChunkRelation{ + Weight: rel.Weight, + Degree: rel.CombinedDegree, + } + + b.chunkGraph[sourceChunkID][targetChunkID] = relation + b.chunkGraph[targetChunkID][sourceChunkID] = relation + } + } + } + + log.Infof("Chunk graph built with %d nodes", len(b.chunkGraph)) +} + +// GetAllEntities returns all entities +func (b *graphBuilder) GetAllEntities() []*types.Entity { + b.mutex.RLock() + defer b.mutex.RUnlock() + + entities := make([]*types.Entity, 0, len(b.entityMap)) + for _, entity := range b.entityMap { + entities = append(entities, entity) + } + return entities +} + +// GetAllRelationships returns all relationships +func (b *graphBuilder) GetAllRelationships() []*types.Relationship { + b.mutex.RLock() + defer b.mutex.RUnlock() + + relationships := make([]*types.Relationship, 0, len(b.relationshipMap)) + for _, relationship := range b.relationshipMap { + relationships = append(relationships, relationship) + } + return relationships +} + +// GetRelationChunks retrieves document chunks directly related to the given chunkID +// It returns a list of related document chunk IDs sorted by weight and degree +func (b *graphBuilder) GetRelationChunks(chunkID string, topK int) []string { + b.mutex.RLock() + defer b.mutex.RUnlock() + + log := logger.GetLogger(context.Background()) + log.Debugf("Getting related chunks for %s (topK=%d)", chunkID, topK) + + // Create weighted chunk structure for sorting + type weightedChunk struct { + id string + weight float64 + degree int + } + + // Collect related chunks with their weights and degrees + weightedChunks := make([]weightedChunk, 0) + for relationChunkID, relation := range b.chunkGraph[chunkID] { + weightedChunks = append(weightedChunks, weightedChunk{ + id: relationChunkID, + weight: relation.Weight, + degree: relation.Degree, + }) + } + + // Sort by weight and degree in descending order + slices.SortFunc(weightedChunks, func(a, b weightedChunk) int { + // Sort by weight first + if a.weight > b.weight { + return -1 // Descending order + } else if a.weight < b.weight { + return 1 + } + + // If weights are equal, sort by degree + if a.degree > b.degree { + return -1 // Descending order + } else if a.degree < b.degree { + return 1 + } + + return 0 + }) + + // Take top K results + resultCount := len(weightedChunks) + if topK > 0 && topK < resultCount { + resultCount = topK + } + + // Extract chunk IDs + chunks := make([]string, 0, resultCount) + for i := 0; i < resultCount; i++ { + chunks = append(chunks, weightedChunks[i].id) + } + + log.Debugf("Found %d related chunks for %s (limited to %d)", + len(weightedChunks), chunkID, resultCount) + return chunks +} + +// GetIndirectRelationChunks retrieves document chunks indirectly related to the given chunkID +// It returns document chunk IDs found through second-degree connections +func (b *graphBuilder) GetIndirectRelationChunks(chunkID string, topK int) []string { + b.mutex.RLock() + defer b.mutex.RUnlock() + + log := logger.GetLogger(context.Background()) + log.Debugf("Getting indirectly related chunks for %s (topK=%d)", chunkID, topK) + + // Create weighted chunk structure for sorting + type weightedChunk struct { + id string + weight float64 + degree int + } + + // Get directly related chunks (first-degree connections) + directChunks := make(map[string]struct{}) + directChunks[chunkID] = struct{}{} // Add original chunkID + for directChunkID := range b.chunkGraph[chunkID] { + directChunks[directChunkID] = struct{}{} + } + log.Debugf("Found %d directly related chunks to exclude", len(directChunks)) + + // Use map to deduplicate and store second-degree connections + indirectChunkMap := make(map[string]*ChunkRelation) + + // Get first-degree connections + for directChunkID, directRelation := range b.chunkGraph[chunkID] { + // Get second-degree connections + for indirectChunkID, indirectRelation := range b.chunkGraph[directChunkID] { + // Skip self and all direct connections + if _, isDirect := directChunks[indirectChunkID]; isDirect { + continue + } + + // Weight decay: second-degree relationship weight is the product of two direct relationship weights + // multiplied by decay coefficient + combinedWeight := directRelation.Weight * indirectRelation.Weight * IndirectRelationWeightDecay + // Degree calculation: take the maximum degree from the two path segments + combinedDegree := max(directRelation.Degree, indirectRelation.Degree) + + // If already exists, take the higher weight + if existingRel, exists := indirectChunkMap[indirectChunkID]; !exists || + combinedWeight > existingRel.Weight { + indirectChunkMap[indirectChunkID] = &ChunkRelation{ + Weight: combinedWeight, + Degree: combinedDegree, + } + } + } + } + + // Convert to sortable slice + weightedChunks := make([]weightedChunk, 0, len(indirectChunkMap)) + for id, relation := range indirectChunkMap { + weightedChunks = append(weightedChunks, weightedChunk{ + id: id, + weight: relation.Weight, + degree: relation.Degree, + }) + } + + // Sort by weight and degree in descending order + slices.SortFunc(weightedChunks, func(a, b weightedChunk) int { + // Sort by weight first + if a.weight > b.weight { + return -1 // Descending order + } else if a.weight < b.weight { + return 1 + } + + // If weights are equal, sort by degree + if a.degree > b.degree { + return -1 // Descending order + } else if a.degree < b.degree { + return 1 + } + + return 0 + }) + + // Take top K results + resultCount := len(weightedChunks) + if topK > 0 && topK < resultCount { + resultCount = topK + } + + // Extract chunk IDs + chunks := make([]string, 0, resultCount) + for i := 0; i < resultCount; i++ { + chunks = append(chunks, weightedChunks[i].id) + } + + log.Debugf("Found %d indirect related chunks for %s (limited to %d)", + len(weightedChunks), chunkID, resultCount) + return chunks +} + +// getEntityByTitle retrieves an entity by its title +func (b *graphBuilder) getEntityByTitle(title string) *types.Entity { + return b.entityMapByTitle[title] +} + +// dfs depth-first search to find connected components +func dfs(entityTitle string, + adjacencyList map[string]map[string]*types.Relationship, + visited map[string]bool, component *[]string) { + visited[entityTitle] = true + *component = append(*component, entityTitle) + + // traverse all relationships of the current entity + for targetEntity := range adjacencyList[entityTitle] { + if !visited[targetEntity] { + dfs(targetEntity, adjacencyList, visited, component) + } + } + + // check reverse relationships (check if other entities point to the current entity) + for source, targets := range adjacencyList { + for target := range targets { + if target == entityTitle && !visited[source] { + dfs(source, adjacencyList, visited, component) + } + } + } +} + +// generateKnowledgeGraphDiagram generate Mermaid diagram for knowledge graph +func (b *graphBuilder) generateKnowledgeGraphDiagram(ctx context.Context) string { + log := logger.GetLogger(ctx) + log.Info("Generating knowledge graph visualization diagram...") + + var sb strings.Builder + + // Mermaid diagram header + sb.WriteString("```mermaid\ngraph TD\n") + sb.WriteString(" %% entity style definition\n") + sb.WriteString(" classDef entity fill:#f9f,stroke:#333,stroke-width:1px;\n") + sb.WriteString(" classDef highFreq fill:#bbf,stroke:#333,stroke-width:2px;\n\n") + + // get all entities and sort by frequency + entities := b.GetAllEntities() + slices.SortFunc(entities, func(a, b *types.Entity) int { + if a.Frequency > b.Frequency { + return -1 + } else if a.Frequency < b.Frequency { + return 1 + } + return 0 + }) + + // get relationships and sort by weight + relationships := b.GetAllRelationships() + slices.SortFunc(relationships, func(a, b *types.Relationship) int { + if a.Weight > b.Weight { + return -1 + } else if a.Weight < b.Weight { + return 1 + } + return 0 + }) + + // create entity ID mapping + entityMap := make(map[string]string) // store entity title to node ID mapping + for i, entity := range entities { + nodeID := fmt.Sprintf("E%d", i) + entityMap[entity.Title] = nodeID + } + + // create adjacency list to represent graph structure + adjacencyList := make(map[string]map[string]*types.Relationship) + for _, entity := range entities { + adjacencyList[entity.Title] = make(map[string]*types.Relationship) + } + + // fill adjacency list + for _, rel := range relationships { + if _, sourceExists := entityMap[rel.Source]; sourceExists { + if _, targetExists := entityMap[rel.Target]; targetExists { + adjacencyList[rel.Source][rel.Target] = rel + } + } + } + + // use DFS to find connected components (subgraphs) + visited := make(map[string]bool) + subgraphs := make([][]string, 0) // store entity titles in each subgraph + + for _, entity := range entities { + if !visited[entity.Title] { + component := make([]string, 0) + dfs(entity.Title, adjacencyList, visited, &component) + if len(component) > 0 { + subgraphs = append(subgraphs, component) + } + } + } + + // generate Mermaid subgraphs + subgraphCount := 0 + for _, component := range subgraphs { + // check if this component has relationships + hasRelations := false + nodeCount := len(component) + + // if there is only 1 node, check if it has relationships + if nodeCount == 1 { + entityTitle := component[0] + // check if this entity appears as source or target in any relationship + for _, rel := range relationships { + if rel.Source == entityTitle || rel.Target == entityTitle { + hasRelations = true + break + } + } + + // if there is only 1 node and no relationships, skip this subgraph + if !hasRelations { + continue + } + } else if nodeCount > 1 { + // a subgraph with more than 1 node must have relationships + hasRelations = true + } + + // only draw if there are multiple entities or at least one relationship in the subgraph + if hasRelations { + subgraphCount++ + sb.WriteString(fmt.Sprintf("\n subgraph 子图%d\n", subgraphCount)) + + // add all entities in this subgraph + entitiesInComponent := make(map[string]bool) + for _, entityTitle := range component { + nodeID := entityMap[entityTitle] + entitiesInComponent[entityTitle] = true + + // add node definition for each entity + entity := b.entityMapByTitle[entityTitle] + if entity != nil { + sb.WriteString(fmt.Sprintf(" %s[\"%s\"]\n", nodeID, entityTitle)) + } + } + + // add relationships in this subgraph + for _, rel := range relationships { + if entitiesInComponent[rel.Source] && entitiesInComponent[rel.Target] { + sourceID := entityMap[rel.Source] + targetID := entityMap[rel.Target] + + linkStyle := "-->" + // adjust link style based on relationship strength + if rel.Strength > 7 { + linkStyle = "==>" + } + + sb.WriteString(fmt.Sprintf(" %s %s|%s| %s\n", + sourceID, linkStyle, rel.Description, targetID)) + } + } + + // subgraph ends + sb.WriteString(" end\n") + + // apply style class + for _, entityTitle := range component { + nodeID := entityMap[entityTitle] + entity := b.entityMapByTitle[entityTitle] + if entity != nil { + if entity.Frequency > 5 { + sb.WriteString(fmt.Sprintf(" class %s highFreq;\n", nodeID)) + } else { + sb.WriteString(fmt.Sprintf(" class %s entity;\n", nodeID)) + } + } + } + } + } + + // close Mermaid diagram + sb.WriteString("```\n") + + log.Infof("Knowledge graph visualization diagram generated with %d subgraphs", subgraphCount) + return sb.String() +} diff --git a/internal/application/service/knowledge.go b/internal/application/service/knowledge.go new file mode 100644 index 0000000..ccee9b3 --- /dev/null +++ b/internal/application/service/knowledge.go @@ -0,0 +1,1561 @@ +package service + +import ( + "context" + "crypto/md5" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "io" + "mime/multipart" + "os" + "regexp" + "slices" + "sort" + "strings" + "sync" + "time" + + "github.com/Tencent/WeKnora/internal/application/service/retriever" + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/chat" + "github.com/Tencent/WeKnora/internal/models/utils" + "github.com/Tencent/WeKnora/internal/tracing" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/Tencent/WeKnora/services/docreader/src/client" + "github.com/Tencent/WeKnora/services/docreader/src/proto" + "github.com/google/uuid" + "go.opentelemetry.io/otel/attribute" +) + +// Error definitions for knowledge service operations +var ( + // ErrInvalidFileType is returned when an unsupported file type is provided + ErrInvalidFileType = errors.New("unsupported file type") + // ErrInvalidURL is returned when an invalid URL is provided + ErrInvalidURL = errors.New("invalid URL") + // ErrChunkNotFound is returned when a requested chunk cannot be found + ErrChunkNotFound = errors.New("chunk not found") + // ErrDuplicateFile is returned when trying to add a file that already exists + ErrDuplicateFile = errors.New("file already exists") + // ErrDuplicateURL is returned when trying to add a URL that already exists + ErrDuplicateURL = errors.New("URL already exists") +) + +// knowledgeService implements the knowledge service interface +// service 实现知识服务接口 +type knowledgeService struct { + config *config.Config + repo interfaces.KnowledgeRepository + kbService interfaces.KnowledgeBaseService + tenantRepo interfaces.TenantRepository + docReaderClient *client.Client + chunkService interfaces.ChunkService + chunkRepo interfaces.ChunkRepository + fileSvc interfaces.FileService + modelService interfaces.ModelService +} + +// NewKnowledgeService creates a new knowledge service instance +func NewKnowledgeService( + config *config.Config, + repo interfaces.KnowledgeRepository, + docReaderClient *client.Client, + kbService interfaces.KnowledgeBaseService, + tenantRepo interfaces.TenantRepository, + chunkService interfaces.ChunkService, + chunkRepo interfaces.ChunkRepository, + fileSvc interfaces.FileService, + modelService interfaces.ModelService, +) (interfaces.KnowledgeService, error) { + return &knowledgeService{ + config: config, + repo: repo, + kbService: kbService, + tenantRepo: tenantRepo, + docReaderClient: docReaderClient, + chunkService: chunkService, + chunkRepo: chunkRepo, + fileSvc: fileSvc, + modelService: modelService, + }, nil +} + +// CreateKnowledgeFromFile creates a knowledge entry from an uploaded file +func (s *knowledgeService) CreateKnowledgeFromFile(ctx context.Context, + kbID string, file *multipart.FileHeader, metadata map[string]string, +) (*types.Knowledge, error) { + logger.Info(ctx, "Start creating knowledge from file") + logger.Infof(ctx, "Knowledge base ID: %s, file: %s", kbID, file.Filename) + if metadata != nil { + logger.Infof(ctx, "Received metadata: %v", metadata) + } + + // Get knowledge base configuration + logger.Info(ctx, "Getting knowledge base configuration") + kb, err := s.kbService.GetKnowledgeBaseByID(ctx, kbID) + if err != nil { + logger.Errorf(ctx, "Failed to get knowledge base: %v", err) + return nil, err + } + + // Validate file type + logger.Infof(ctx, "Checking file type: %s", file.Filename) + if !isValidFileType(file.Filename) { + logger.Error(ctx, "Invalid file type") + return nil, ErrInvalidFileType + } + + // Calculate file hash for deduplication + logger.Info(ctx, "Calculating file hash") + hash, err := calculateFileHash(file) + if err != nil { + logger.Errorf(ctx, "Failed to calculate file hash: %v", err) + return nil, err + } + + // Check if file already exists + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Checking if file exists, tenant ID: %d", tenantID) + exists, existingKnowledge, err := s.repo.CheckKnowledgeExists(ctx, tenantID, kbID, &types.KnowledgeCheckParams{ + Type: "file", + FileName: file.Filename, + FileSize: file.Size, + FileHash: hash, + }) + if err != nil { + logger.Errorf(ctx, "Failed to check knowledge existence: %v", err) + return nil, err + } + if exists { + logger.Infof(ctx, "File already exists: %s", file.Filename) + // Update creation time for existing knowledge + existingKnowledge.CreatedAt = time.Now() + existingKnowledge.UpdatedAt = time.Now() + if err := s.repo.UpdateKnowledge(ctx, existingKnowledge); err != nil { + logger.Errorf(ctx, "Failed to update existing knowledge: %v", err) + return nil, err + } + return existingKnowledge, types.NewDuplicateFileError(existingKnowledge) + } + + // Check storage quota + tenantInfo := ctx.Value(types.TenantInfoContextKey).(*types.Tenant) + if tenantInfo.StorageQuota > 0 && tenantInfo.StorageUsed >= tenantInfo.StorageQuota { + logger.Error(ctx, "Storage quota exceeded") + return nil, types.NewStorageQuotaExceededError() + } + + // Convert metadata to JSON format if provided + var metadataJSON types.JSON + if metadata != nil { + metadataBytes, err := json.Marshal(metadata) + if err != nil { + logger.Errorf(ctx, "Failed to marshal metadata: %v", err) + return nil, err + } + metadataJSON = types.JSON(metadataBytes) + } + + // Create knowledge record + logger.Info(ctx, "Creating knowledge record") + knowledge := &types.Knowledge{ + TenantID: tenantID, + KnowledgeBaseID: kbID, + Type: "file", + Title: file.Filename, + FileName: file.Filename, + FileType: getFileType(file.Filename), + FileSize: file.Size, + FileHash: hash, + ParseStatus: "pending", + EnableStatus: "disabled", + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + EmbeddingModelID: kb.EmbeddingModelID, + Metadata: metadataJSON, + } + // Save knowledge record to database + logger.Info(ctx, "Saving knowledge record to database") + if err := s.repo.CreateKnowledge(ctx, knowledge); err != nil { + logger.Errorf(ctx, "Failed to create knowledge record, ID: %s, error: %v", knowledge.ID, err) + return nil, err + } + // Save the file to storage + logger.Infof(ctx, "Saving file, knowledge ID: %s", knowledge.ID) + filePath, err := s.fileSvc.SaveFile(ctx, file, knowledge.TenantID, knowledge.ID) + if err != nil { + logger.Errorf(ctx, "Failed to save file, knowledge ID: %s, error: %v", knowledge.ID, err) + return nil, err + } + knowledge.FilePath = filePath + + // Update knowledge record with file path + logger.Info(ctx, "Updating knowledge record with file path") + if err := s.repo.UpdateKnowledge(ctx, knowledge); err != nil { + logger.Errorf(ctx, "Failed to update knowledge with file path, ID: %s, error: %v", knowledge.ID, err) + return nil, err + } + + // Process document asynchronously + logger.Info(ctx, "Starting asynchronous document processing") + newCtx := logger.CloneContext(ctx) + go s.processDocument(newCtx, kb, knowledge, file) + + logger.Infof(ctx, "Knowledge from file created successfully, ID: %s", knowledge.ID) + return knowledge, nil +} + +// CreateKnowledgeFromURL creates a knowledge entry from a URL source +func (s *knowledgeService) CreateKnowledgeFromURL(ctx context.Context, + kbID string, url string, +) (*types.Knowledge, error) { + logger.Info(ctx, "Start creating knowledge from URL") + logger.Infof(ctx, "Knowledge base ID: %s, URL: %s", kbID, url) + + // Get knowledge base configuration + logger.Info(ctx, "Getting knowledge base configuration") + kb, err := s.kbService.GetKnowledgeBaseByID(ctx, kbID) + if err != nil { + logger.Errorf(ctx, "Failed to get knowledge base: %v", err) + return nil, err + } + + // Validate URL format + logger.Info(ctx, "Validating URL") + if !isValidURL(url) { + logger.Error(ctx, "Invalid URL format") + return nil, ErrInvalidURL + } + + // Check if URL already exists in the knowledge base + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Checking if URL exists, tenant ID: %d", tenantID) + exists, existingKnowledge, err := s.repo.CheckKnowledgeExists(ctx, tenantID, kbID, &types.KnowledgeCheckParams{ + Type: "url", + URL: url, + }) + if err != nil { + logger.Errorf(ctx, "Failed to check knowledge existence: %v", err) + return nil, err + } + if exists { + logger.Infof(ctx, "URL already exists: %s", url) + // Update creation time for existing knowledge + existingKnowledge.CreatedAt = time.Now() + existingKnowledge.UpdatedAt = time.Now() + if err := s.repo.UpdateKnowledge(ctx, existingKnowledge); err != nil { + logger.Errorf(ctx, "Failed to update existing knowledge: %v", err) + return nil, err + } + return existingKnowledge, types.NewDuplicateURLError(existingKnowledge) + } + + // Check storage quota + tenantInfo := ctx.Value(types.TenantInfoContextKey).(*types.Tenant) + if tenantInfo.StorageQuota > 0 && tenantInfo.StorageUsed >= tenantInfo.StorageQuota { + logger.Error(ctx, "Storage quota exceeded") + return nil, types.NewStorageQuotaExceededError() + } + + // Create knowledge record + logger.Info(ctx, "Creating knowledge record") + knowledge := &types.Knowledge{ + ID: uuid.New().String(), + TenantID: tenantID, + KnowledgeBaseID: kbID, + Type: "url", + Source: url, + ParseStatus: "pending", + EnableStatus: "disabled", + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + EmbeddingModelID: kb.EmbeddingModelID, + } + + // Save knowledge record + logger.Infof(ctx, "Saving knowledge record to database, ID: %s", knowledge.ID) + if err := s.repo.CreateKnowledge(ctx, knowledge); err != nil { + logger.Errorf(ctx, "Failed to create knowledge record: %v", err) + return nil, err + } + + // Process URL asynchronously + logger.Info(ctx, "Starting asynchronous URL processing") + go s.processDocumentFromURL(ctx, kb, knowledge, url) + + logger.Infof(ctx, "Knowledge from URL created successfully, ID: %s", knowledge.ID) + return knowledge, nil +} + +// CreateKnowledgeFromPassage creates a knowledge entry from text passages +func (s *knowledgeService) CreateKnowledgeFromPassage(ctx context.Context, + kbID string, passage []string, +) (*types.Knowledge, error) { + logger.Info(ctx, "Start creating knowledge from passage") + logger.Infof(ctx, "Knowledge base ID: %s, passage count: %d", kbID, len(passage)) + + // Get knowledge base configuration + logger.Info(ctx, "Getting knowledge base configuration") + kb, err := s.kbService.GetKnowledgeBaseByID(ctx, kbID) + if err != nil { + logger.Errorf(ctx, "Failed to get knowledge base: %v", err) + return nil, err + } + + // Create knowledge record + logger.Info(ctx, "Creating knowledge record") + knowledge := &types.Knowledge{ + ID: uuid.New().String(), + TenantID: ctx.Value(types.TenantIDContextKey).(uint), + KnowledgeBaseID: kbID, + Type: "passage", + ParseStatus: "pending", + EnableStatus: "disabled", + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + EmbeddingModelID: kb.EmbeddingModelID, + } + + // Save knowledge record + logger.Infof(ctx, "Saving knowledge record to database, ID: %s", knowledge.ID) + if err := s.repo.CreateKnowledge(ctx, knowledge); err != nil { + logger.Errorf(ctx, "Failed to create knowledge record: %v", err) + return nil, err + } + + // Process passages asynchronously + logger.Info(ctx, "Starting asynchronous passage processing") + go s.processDocumentFromPassage(ctx, kb, knowledge, passage) + + logger.Infof(ctx, "Knowledge from passage created successfully, ID: %s", knowledge.ID) + return knowledge, nil +} + +// GetKnowledgeByID retrieves a knowledge entry by its ID +func (s *knowledgeService) GetKnowledgeByID(ctx context.Context, id string) (*types.Knowledge, error) { + logger.Info(ctx, "Start getting knowledge by ID") + logger.Infof(ctx, "Knowledge ID: %s", id) + + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Tenant ID: %d", tenantID) + + knowledge, err := s.repo.GetKnowledgeByID(ctx, tenantID, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_id": id, + "tenant_id": tenantID, + }) + return nil, err + } + + logger.Infof(ctx, "Knowledge retrieved successfully, ID: %s, type: %s", knowledge.ID, knowledge.Type) + return knowledge, nil +} + +// ListKnowledgeByKnowledgeBaseID returns all knowledge entries in a knowledge base +func (s *knowledgeService) ListKnowledgeByKnowledgeBaseID(ctx context.Context, + kbID string, +) ([]*types.Knowledge, error) { + return s.repo.ListKnowledgeByKnowledgeBaseID(ctx, ctx.Value(types.TenantIDContextKey).(uint), kbID) +} + +// ListPagedKnowledgeByKnowledgeBaseID returns paginated knowledge entries in a knowledge base +func (s *knowledgeService) ListPagedKnowledgeByKnowledgeBaseID(ctx context.Context, + kbID string, page *types.Pagination, +) (*types.PageResult, error) { + knowledges, total, err := s.repo.ListPagedKnowledgeByKnowledgeBaseID(ctx, + ctx.Value(types.TenantIDContextKey).(uint), kbID, page) + if err != nil { + return nil, err + } + + return types.NewPageResult(total, page, knowledges), nil +} + +// DeleteKnowledge deletes a knowledge entry and all related resources +func (s *knowledgeService) DeleteKnowledge(ctx context.Context, id string) error { + // Get the knowledge entry + knowledge, err := s.repo.GetKnowledgeByID(ctx, ctx.Value(types.TenantIDContextKey).(uint), id) + if err != nil { + return err + } + // Delete knowledge embeddings from vector store + tenantInfo := ctx.Value(types.TenantInfoContextKey).(*types.Tenant) + retrieveEngine, err := retriever.NewCompositeRetrieveEngine(tenantInfo.RetrieverEngines.Engines) + if err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("DeleteKnowledge delete knowledge embedding failed") + } + embeddingModel, err := s.modelService.GetEmbeddingModel(ctx, knowledge.EmbeddingModelID) + if err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("DeleteKnowledge delete knowledge embedding failed") + } + + if err := retrieveEngine.DeleteByKnowledgeIDList(ctx, []string{id}, embeddingModel.GetDimensions()); err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("DeleteKnowledge delete knowledge embedding failed") + } + // Delete all chunks associated with this knowledge + if err := s.chunkService.DeleteChunksByKnowledgeID(ctx, id); err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("DeleteKnowledge delete chunks failed") + } + // Delete the physical file if it exists + if knowledge.FilePath != "" { + if err := s.fileSvc.DeleteFile(ctx, knowledge.FilePath); err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("DeleteKnowledge delete file failed") + } + } + tenantInfo.StorageUsed -= knowledge.StorageSize + if err := s.tenantRepo.AdjustStorageUsed(ctx, tenantInfo.ID, -knowledge.StorageSize); err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("DeleteKnowledge update tenant storage used failed") + } + // Delete the knowledge entry itself from the database + return s.repo.DeleteKnowledge(ctx, ctx.Value(types.TenantIDContextKey).(uint), id) +} + +// DeleteKnowledge deletes a knowledge entry and all related resources +func (s *knowledgeService) DeleteKnowledgeList(ctx context.Context, ids []string) error { + if len(ids) == 0 { + return nil + } + tenantInfo := ctx.Value(types.TenantInfoContextKey).(*types.Tenant) + // 1. Get the knowledge entry + knowledgeList, err := s.repo.GetKnowledgeBatch(ctx, tenantInfo.ID, ids) + if err != nil { + return err + } + + // 2. Delete knowledge embeddings from vector store + retrieveEngine, err := retriever.NewCompositeRetrieveEngine(tenantInfo.RetrieverEngines.Engines) + if err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("DeleteKnowledge delete knowledge embedding failed") + return err + } + group := map[string][]string{} + for _, knowledge := range knowledgeList { + group[knowledge.EmbeddingModelID] = append(group[knowledge.EmbeddingModelID], knowledge.ID) + } + for embeddingModelID, knowledgeList := range group { + embeddingModel, err := s.modelService.GetEmbeddingModel(ctx, embeddingModelID) + if err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("DeleteKnowledge get embedding model failed") + continue + } + if err := retrieveEngine.DeleteByKnowledgeIDList(ctx, knowledgeList, embeddingModel.GetDimensions()); err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("DeleteKnowledge delete knowledge embedding failed") + continue + } + } + + // 3. Delete all chunks associated with this knowledge + if err := s.chunkService.DeleteByKnowledgeList(ctx, ids); err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("DeleteKnowledge delete chunks failed") + } + + // 4. Delete the physical file if it exists + storageAdjust := int64(0) + for _, knowledge := range knowledgeList { + if knowledge.FilePath != "" { + if err := s.fileSvc.DeleteFile(ctx, knowledge.FilePath); err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("DeleteKnowledge delete file failed") + } + } + storageAdjust -= knowledge.StorageSize + } + tenantInfo.StorageUsed += storageAdjust + if err := s.tenantRepo.AdjustStorageUsed(ctx, tenantInfo.ID, storageAdjust); err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("DeleteKnowledge update tenant storage used failed") + } + + // 5. Delete the knowledge entry itself from the database + return s.repo.DeleteKnowledgeList(ctx, tenantInfo.ID, ids) +} + +func (s *knowledgeService) cloneKnowledge(ctx context.Context, src *types.Knowledge, targetKB *types.KnowledgeBase) (err error) { + if src.ParseStatus != "completed" { + logger.GetLogger(ctx).WithField("knowledge_id", src.ID).Errorf("MoveKnowledge parse status is not completed") + return nil + } + tenantInfo := ctx.Value(types.TenantInfoContextKey).(*types.Tenant) + dst := &types.Knowledge{ + ID: uuid.New().String(), + TenantID: targetKB.TenantID, + KnowledgeBaseID: targetKB.ID, + Type: src.Type, + Title: src.Title, + Description: src.Description, + Source: src.Source, + ParseStatus: "processing", + EnableStatus: "disabled", + EmbeddingModelID: targetKB.EmbeddingModelID, + FileName: src.FileName, + FileType: src.FileType, + FileSize: src.FileSize, + FileHash: src.FileHash, + FilePath: src.FilePath, + StorageSize: src.StorageSize, + Metadata: src.Metadata, + } + defer func() { + if err != nil { + dst.ParseStatus = "failed" + dst.ErrorMessage = err.Error() + _ = s.repo.UpdateKnowledge(ctx, dst) + logger.GetLogger(ctx).WithField("error", err).Errorf("MoveKnowledge failed to move knowledge") + } else { + dst.ParseStatus = "completed" + dst.EnableStatus = "enabled" + _ = s.repo.UpdateKnowledge(ctx, dst) + logger.GetLogger(ctx).WithField("knowledge_id", dst.ID).Infof("MoveKnowledge move knowledge successfully") + } + }() + + if err = s.repo.CreateKnowledge(ctx, dst); err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("MoveKnowledge create knowledge failed") + return + } + tenantInfo.StorageUsed += dst.StorageSize + if err = s.tenantRepo.AdjustStorageUsed(ctx, tenantInfo.ID, dst.StorageSize); err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("MoveKnowledge update tenant storage used failed") + return + } + if err = s.CloneChunk(ctx, src, dst); err != nil { + logger.GetLogger(ctx).WithField("knowledge_id", dst.ID). + WithField("error", err).Errorf("MoveKnowledge move chunks failed") + return + } + return +} + +// processDocument handles asynchronous processing of document files +func (s *knowledgeService) processDocument(ctx context.Context, + kb *types.KnowledgeBase, knowledge *types.Knowledge, file *multipart.FileHeader, +) { + ctx, span := tracing.ContextWithSpan(ctx, "knowledgeService.processDocument") + defer span.End() + span.SetAttributes( + attribute.String("request_id", ctx.Value(types.RequestIDContextKey).(string)), + attribute.String("knowledge_base_id", kb.ID), + attribute.Int("tenant_id", int(kb.TenantID)), + attribute.String("knowledge_id", knowledge.ID), + attribute.String("file_name", knowledge.FileName), + attribute.String("file_type", knowledge.FileType), + attribute.String("file_path", knowledge.FilePath), + attribute.Int64("file_size", knowledge.FileSize), + attribute.String("embedding_model", knowledge.EmbeddingModelID), + ) + + // Update status to processing + knowledge.ParseStatus = "processing" + knowledge.UpdatedAt = time.Now() + if err := s.repo.UpdateKnowledge(ctx, knowledge); err != nil { + span.RecordError(err) + return + } + + // Read and chunk the document + f, err := file.Open() + if err != nil { + logger.GetLogger(ctx).WithField("knowledge_id", knowledge.ID). + WithField("error", err).Errorf("processDocument open file failed") + knowledge.ParseStatus = "failed" + knowledge.ErrorMessage = err.Error() + knowledge.UpdatedAt = time.Now() + s.repo.UpdateKnowledge(ctx, knowledge) + span.RecordError(err) + return + } + defer f.Close() + + span.AddEvent("start read file") + contentBytes, err := io.ReadAll(f) + if err != nil { + knowledge.ParseStatus = "failed" + knowledge.ErrorMessage = err.Error() + knowledge.UpdatedAt = time.Now() + s.repo.UpdateKnowledge(ctx, knowledge) + span.RecordError(err) + return + } + + // Split file into chunks using document reader service + span.AddEvent("start split file") + resp, err := s.docReaderClient.ReadFromFile(ctx, &proto.ReadFromFileRequest{ + FileContent: contentBytes, + FileName: knowledge.FileName, + FileType: knowledge.FileType, + ReadConfig: &proto.ReadConfig{ + ChunkSize: int32(kb.ChunkingConfig.ChunkSize), + ChunkOverlap: int32(kb.ChunkingConfig.ChunkOverlap), + Separators: kb.ChunkingConfig.Separators, + EnableMultimodal: kb.ChunkingConfig.EnableMultimodal, + }, + RequestId: ctx.Value(types.RequestIDContextKey).(string), + }) + if err != nil { + logger.GetLogger(ctx).WithField("knowledge_id", knowledge.ID). + WithField("error", err).Errorf("processDocument read file failed") + knowledge.ParseStatus = "failed" + knowledge.ErrorMessage = err.Error() + knowledge.UpdatedAt = time.Now() + s.repo.UpdateKnowledge(ctx, knowledge) + span.RecordError(err) + return + } + + // Process and store chunks + span.AddEvent("start process chunks") + s.processChunks(ctx, kb, knowledge, resp.Chunks) +} + +// processDocumentFromURL handles asynchronous processing of URL content +func (s *knowledgeService) processDocumentFromURL(ctx context.Context, + kb *types.KnowledgeBase, knowledge *types.Knowledge, url string, +) { + // Update status to processing + knowledge.ParseStatus = "processing" + knowledge.UpdatedAt = time.Now() + if err := s.repo.UpdateKnowledge(ctx, knowledge); err != nil { + return + } + + // Fetch and chunk content from URL + resp, err := s.docReaderClient.ReadFromURL(ctx, &proto.ReadFromURLRequest{ + Url: url, + Title: knowledge.Title, + ReadConfig: &proto.ReadConfig{ + ChunkSize: int32(kb.ChunkingConfig.ChunkSize), + ChunkOverlap: int32(kb.ChunkingConfig.ChunkOverlap), + Separators: kb.ChunkingConfig.Separators, + EnableMultimodal: kb.ChunkingConfig.EnableMultimodal, + }, + RequestId: ctx.Value(types.RequestIDContextKey).(string), + }) + if err != nil { + knowledge.ParseStatus = "failed" + knowledge.ErrorMessage = err.Error() + knowledge.UpdatedAt = time.Now() + s.repo.UpdateKnowledge(ctx, knowledge) + return + } + + // Process and store chunks + s.processChunks(ctx, kb, knowledge, resp.Chunks) +} + +// processDocumentFromPassage handles asynchronous processing of text passages +func (s *knowledgeService) processDocumentFromPassage(ctx context.Context, + kb *types.KnowledgeBase, knowledge *types.Knowledge, passage []string, +) { + // Update status to processing + knowledge.ParseStatus = "processing" + knowledge.UpdatedAt = time.Now() + if err := s.repo.UpdateKnowledge(ctx, knowledge); err != nil { + return + } + + // Convert passages to chunks + chunks := make([]*proto.Chunk, 0, len(passage)) + start, end := 0, 0 + for i, p := range passage { + if p == "" { + continue + } + end += len([]rune(p)) + chunk := &proto.Chunk{ + Content: p, + Seq: int32(i), + Start: int32(start), + End: int32(end), + } + start = end + chunks = append(chunks, chunk) + } + // Process and store chunks + s.processChunks(ctx, kb, knowledge, chunks) +} + +// processChunks processes chunks and creates embeddings for knowledge content +func (s *knowledgeService) processChunks(ctx context.Context, + kb *types.KnowledgeBase, knowledge *types.Knowledge, chunks []*proto.Chunk, +) { + ctx, span := tracing.ContextWithSpan(ctx, "knowledgeService.processChunks") + defer span.End() + span.SetAttributes( + attribute.Int("tenant_id", int(knowledge.TenantID)), + attribute.String("knowledge_base_id", knowledge.KnowledgeBaseID), + attribute.String("knowledge_id", knowledge.ID), + attribute.String("embedding_model_id", kb.EmbeddingModelID), + attribute.Int("chunk_count", len(chunks)), + ) + + // Get embedding model for vectorization + embeddingModel, err := s.modelService.GetEmbeddingModel(ctx, kb.EmbeddingModelID) + if err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("processChunks get embedding model failed") + span.RecordError(err) + return + } + + // Generate document summary - 只使用文本类型的 Chunk + chatModel, err := s.modelService.GetChatModel(ctx, kb.SummaryModelID) + if err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("processChunks get summary model failed") + span.RecordError(err) + return + } + + enableGraphRAG := os.Getenv("ENABLE_GRAPH_RAG") == "true" + + // Create chunk objects from proto chunks + maxSeq := 0 + + // 统计图片相关的子Chunk数量,用于扩展insertChunks的容量 + imageChunkCount := 0 + for _, chunkData := range chunks { + if len(chunkData.Images) > 0 { + // 为每个图片的OCR和Caption分别创建一个Chunk + imageChunkCount += len(chunkData.Images) * 2 + } + if int(chunkData.Seq) > maxSeq { + maxSeq = int(chunkData.Seq) + } + } + + // 重新分配容量,考虑图片相关的Chunk + insertChunks := make([]*types.Chunk, 0, len(chunks)+imageChunkCount) + + for _, chunkData := range chunks { + if strings.TrimSpace(chunkData.Content) == "" { + continue + } + + // 创建主文本Chunk + textChunk := &types.Chunk{ + ID: uuid.New().String(), + TenantID: knowledge.TenantID, + KnowledgeID: knowledge.ID, + KnowledgeBaseID: knowledge.KnowledgeBaseID, + Content: chunkData.Content, + ChunkIndex: int(chunkData.Seq), + IsEnabled: true, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + StartAt: int(chunkData.Start), + EndAt: int(chunkData.End), + ChunkType: types.ChunkTypeText, + } + var chunkImages []types.ImageInfo + insertChunks = append(insertChunks, textChunk) + + // 处理图片信息 + if len(chunkData.Images) > 0 { + logger.GetLogger(ctx).Infof("Processing %d images in chunk #%d", len(chunkData.Images), chunkData.Seq) + + for i, img := range chunkData.Images { + // 保存图片信息到文本Chunk + imageInfo := types.ImageInfo{ + URL: img.Url, + OriginalURL: img.OriginalUrl, + StartPos: int(img.Start), + EndPos: int(img.End), + OCRText: img.OcrText, + Caption: img.Caption, + } + chunkImages = append(chunkImages, imageInfo) + + // 将ImageInfo序列化为JSON + imageInfoJSON, err := json.Marshal([]types.ImageInfo{imageInfo}) + if err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("Failed to marshal image info to JSON") + continue + } + + // 如果有OCR文本,创建OCR Chunk + if img.OcrText != "" { + ocrChunk := &types.Chunk{ + ID: uuid.New().String(), + TenantID: knowledge.TenantID, + KnowledgeID: knowledge.ID, + KnowledgeBaseID: knowledge.KnowledgeBaseID, + Content: fmt.Sprintf("图片OCR文本: %s", img.OcrText), + ChunkIndex: maxSeq + i*100 + 1, // 使用不冲突的索引方式 + IsEnabled: true, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + StartAt: int(img.Start), + EndAt: int(img.End), + ChunkType: types.ChunkTypeImageOCR, + ParentChunkID: textChunk.ID, + ImageInfo: string(imageInfoJSON), + } + insertChunks = append(insertChunks, ocrChunk) + logger.GetLogger(ctx).Infof("Created OCR chunk for image %d in chunk #%d", i, chunkData.Seq) + } + + // 如果有图片描述,创建Caption Chunk + if img.Caption != "" { + captionChunk := &types.Chunk{ + ID: uuid.New().String(), + TenantID: knowledge.TenantID, + KnowledgeID: knowledge.ID, + KnowledgeBaseID: knowledge.KnowledgeBaseID, + Content: fmt.Sprintf("图片描述: %s", img.Caption), + ChunkIndex: maxSeq + i*100 + 2, // 使用不冲突的索引方式 + IsEnabled: true, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + StartAt: int(img.Start), + EndAt: int(img.End), + ChunkType: types.ChunkTypeImageCaption, + ParentChunkID: textChunk.ID, + ImageInfo: string(imageInfoJSON), + } + insertChunks = append(insertChunks, captionChunk) + logger.GetLogger(ctx).Infof("Created caption chunk for image %d in chunk #%d", i, chunkData.Seq) + } + } + + imageInfoJSON, err := json.Marshal(chunkImages) + if err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("Failed to marshal image info to JSON") + continue + } + textChunk.ImageInfo = string(imageInfoJSON) + } + } + + // Sort chunks by index for proper ordering + sort.Slice(insertChunks, func(i, j int) bool { + return insertChunks[i].ChunkIndex < insertChunks[j].ChunkIndex + }) + + // 仅为文本类型的Chunk设置前后关系 + textChunks := make([]*types.Chunk, 0, len(chunks)) + for _, chunk := range insertChunks { + if chunk.ChunkType == types.ChunkTypeText { + textChunks = append(textChunks, chunk) + } + } + + // 设置文本Chunk之间的前后关系 + for i, chunk := range textChunks { + if i > 0 { + textChunks[i-1].NextChunkID = chunk.ID + } + if i < len(textChunks)-1 { + textChunks[i+1].PreChunkID = chunk.ID + } + } + if enableGraphRAG { + relationChunkSize := 5 + indirectRelationChunkSize := 5 + graphBuilder := NewGraphBuilder(s.config, chatModel) + err = graphBuilder.BuildGraph(ctx, textChunks) + if err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("processChunks build graph failed") + span.RecordError(err) + } else { + for _, chunk := range textChunks { + chunk.RelationChunks, _ = json.Marshal(graphBuilder.GetRelationChunks(chunk.ID, relationChunkSize)) + chunk.IndirectRelationChunks, _ = + json.Marshal(graphBuilder.GetIndirectRelationChunks(chunk.ID, indirectRelationChunkSize)) + } + for i, entity := range graphBuilder.GetAllEntities() { + relationChunks, _ := json.Marshal(entity.ChunkIDs) + entityChunk := &types.Chunk{ + ID: entity.ID, + TenantID: knowledge.TenantID, + KnowledgeID: knowledge.ID, + KnowledgeBaseID: knowledge.KnowledgeBaseID, + Content: entity.Description, + ChunkIndex: maxSeq + i*100 + 3, + IsEnabled: true, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + ChunkType: types.ChunkTypeEntity, + RelationChunks: types.JSON(relationChunks), + } + insertChunks = append(insertChunks, entityChunk) + } + for i, relationship := range graphBuilder.GetAllRelationships() { + relationChunks, _ := json.Marshal(relationship.ChunkIDs) + relationshipChunk := &types.Chunk{ + ID: relationship.ID, + TenantID: knowledge.TenantID, + KnowledgeID: knowledge.ID, + KnowledgeBaseID: knowledge.KnowledgeBaseID, + Content: relationship.Description, + ChunkIndex: maxSeq + i*100 + 4, + IsEnabled: true, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + ChunkType: types.ChunkTypeRelationship, + RelationChunks: types.JSON(relationChunks), + } + insertChunks = append(insertChunks, relationshipChunk) + } + } + } + + span.AddEvent("extract summary") + summary, err := s.getSummary(ctx, chatModel, knowledge, textChunks) + if err != nil { + logger.GetLogger(ctx).WithField("knowledge_id", knowledge.ID). + WithField("error", err).Errorf("processChunks get summary failed, use first chunk as description") + if len(textChunks) > 0 { + knowledge.Description = textChunks[0].Content + } + } else { + knowledge.Description = summary + } + span.SetAttributes(attribute.String("summary", knowledge.Description)) + + // 批量索引 + if strings.TrimSpace(knowledge.Description) != "" && len(textChunks) > 0 { + sChunk := &types.Chunk{ + ID: uuid.New().String(), + TenantID: knowledge.TenantID, + KnowledgeID: knowledge.ID, + KnowledgeBaseID: knowledge.KnowledgeBaseID, + Content: fmt.Sprintf("# 文档名称\n%s\n\n# 摘要\n%s", knowledge.FileName, knowledge.Description), + ChunkIndex: maxSeq + 3, // 使用不冲突的索引方式 + IsEnabled: true, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + StartAt: 0, + EndAt: 0, + ChunkType: types.ChunkTypeSummary, + ParentChunkID: textChunks[0].ID, + } + logger.GetLogger(ctx).Infof("Created summary chunk for %s with index %d", + sChunk.ParentChunkID, sChunk.ChunkIndex) + insertChunks = append(insertChunks, sChunk) + } + + // Create index information for each chunk + indexInfoList := utils.MapSlice(insertChunks, func(chunk *types.Chunk) *types.IndexInfo { + return &types.IndexInfo{ + Content: chunk.Content, + SourceID: chunk.ID, + SourceType: types.ChunkSourceType, + ChunkID: chunk.ID, + KnowledgeID: knowledge.ID, + KnowledgeBaseID: knowledge.KnowledgeBaseID, + } + }) + + // Initialize retrieval engine + tenantInfo := ctx.Value(types.TenantInfoContextKey).(*types.Tenant) + retrieveEngine, err := retriever.NewCompositeRetrieveEngine(tenantInfo.RetrieverEngines.Engines) + if err != nil { + knowledge.ParseStatus = "failed" + knowledge.ErrorMessage = err.Error() + knowledge.UpdatedAt = time.Now() + s.repo.UpdateKnowledge(ctx, knowledge) + span.RecordError(err) + return + } + + // Calculate storage size required for embeddings + span.AddEvent("estimate storage size") + totalStorageSize := retrieveEngine.EstimateStorageSize(ctx, embeddingModel, indexInfoList) + if tenantInfo.StorageQuota > 0 { + // Re-fetch tenant storage information + tenantInfo, err = s.tenantRepo.GetTenantByID(ctx, tenantInfo.ID) + if err != nil { + knowledge.ParseStatus = "failed" + knowledge.ErrorMessage = err.Error() + knowledge.UpdatedAt = time.Now() + s.repo.UpdateKnowledge(ctx, knowledge) + span.RecordError(err) + return + } + // Check if there's enough storage quota available + if tenantInfo.StorageUsed+totalStorageSize > tenantInfo.StorageQuota { + knowledge.ParseStatus = "failed" + knowledge.ErrorMessage = "存储空间不足" + knowledge.UpdatedAt = time.Now() + s.repo.UpdateKnowledge(ctx, knowledge) + span.RecordError(errors.New("storage quota exceeded")) + return + } + } + + // Save chunks to database + span.AddEvent("create chunks") + if err := s.chunkService.CreateChunks(ctx, insertChunks); err != nil { + knowledge.ParseStatus = "failed" + knowledge.ErrorMessage = err.Error() + knowledge.UpdatedAt = time.Now() + s.repo.UpdateKnowledge(ctx, knowledge) + span.RecordError(err) + return + } + + span.AddEvent("batch index") + err = retrieveEngine.BatchIndex(ctx, embeddingModel, indexInfoList) + if err != nil { + knowledge.ParseStatus = "failed" + knowledge.ErrorMessage = err.Error() + knowledge.UpdatedAt = time.Now() + s.repo.UpdateKnowledge(ctx, knowledge) + + // delete failed chunks + if err := s.chunkService.DeleteChunksByKnowledgeID(ctx, knowledge.ID); err != nil { + logger.Errorf(ctx, "Delete chunks failed: %v", err) + } + + // delete index + if err := retrieveEngine.DeleteByKnowledgeIDList( + ctx, []string{knowledge.ID}, embeddingModel.GetDimensions(), + ); err != nil { + logger.Errorf(ctx, "Delete index failed: %v", err) + } + span.RecordError(err) + return + } + + // Update knowledge status to completed + knowledge.ParseStatus = "completed" + knowledge.EnableStatus = "enabled" + knowledge.StorageSize = totalStorageSize + now := time.Now() + knowledge.ProcessedAt = &now + knowledge.UpdatedAt = now + if err := s.repo.UpdateKnowledge(ctx, knowledge); err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("processChunks update knowledge failed") + } + + // Update tenant's storage usage + tenantInfo.StorageUsed += totalStorageSize + if err := s.tenantRepo.AdjustStorageUsed(ctx, tenantInfo.ID, totalStorageSize); err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("processChunks update tenant storage used failed") + } +} + +// GetSummary generates a summary for knowledge content using an AI model +func (s *knowledgeService) getSummary(ctx context.Context, + summaryModel chat.Chat, knowledge *types.Knowledge, chunks []*types.Chunk, +) (string, error) { + // Get knowledge info from the first chunk + if len(chunks) == 0 { + return "", fmt.Errorf("no chunks provided for summary generation") + } + + // concat chunk contents + chunkContents := "" + allImageInfos := make([]*types.ImageInfo, 0) + + // then, sort chunks by StartAt + sortedChunks := make([]*types.Chunk, len(chunks)) + copy(sortedChunks, chunks) + sort.Slice(sortedChunks, func(i, j int) bool { + return sortedChunks[i].StartAt < sortedChunks[j].StartAt + }) + + // concat chunk contents and collect image infos + for _, chunk := range sortedChunks { + if chunk.EndAt > 4096 { + break + } + chunkContents = string([]rune(chunkContents)[:chunk.StartAt]) + chunk.Content + if chunk.ImageInfo != "" { + var images []*types.ImageInfo + if err := json.Unmarshal([]byte(chunk.ImageInfo), &images); err == nil { + allImageInfos = append(allImageInfos, images...) + } + } + } + // remove markdown image syntax + re := regexp.MustCompile(`!\[[^\]]*\]\([^)]+\)`) + chunkContents = re.ReplaceAllString(chunkContents, "") + // collect all image infos + if len(allImageInfos) > 0 { + // add image infos to chunk contents + var imageAnnotations string + for _, img := range allImageInfos { + if img.Caption != "" { + imageAnnotations += fmt.Sprintf("\n[图片描述: %s]", img.Caption) + } + if img.OCRText != "" { + imageAnnotations += fmt.Sprintf("\n[图片文字: %s]", img.OCRText) + } + } + + // concat chunk contents and image annotations + chunkContents = chunkContents + imageAnnotations + } + + if len(chunkContents) < 30 { + return chunkContents, nil + } + + // Prepare content with metadata for summary generation + contentWithMetadata := chunkContents + + // Add knowledge metadata if available + if knowledge != nil { + metadataIntro := fmt.Sprintf("文档类型: %s\n文件名称: %s\n", knowledge.FileType, knowledge.FileName) + + // Add additional metadata if available + if knowledge.Type != "" { + metadataIntro += fmt.Sprintf("知识类型: %s\n", knowledge.Type) + } + + // Prepend metadata to content + contentWithMetadata = metadataIntro + "\n内容:\n" + contentWithMetadata + } + + // Generate summary using AI model + thinking := false + summary, err := summaryModel.Chat(ctx, []chat.Message{ + { + Role: "system", + Content: s.config.Conversation.GenerateSummaryPrompt, + }, + { + Role: "user", + Content: contentWithMetadata, + }, + }, &chat.ChatOptions{ + Temperature: 0.3, + MaxTokens: 1024, + Thinking: &thinking, + }) + if err != nil { + logger.GetLogger(ctx).WithField("error", err).Errorf("GetSummary failed") + return "", err + } + logger.GetLogger(ctx).WithField("summary", summary.Content).Infof("GetSummary success") + return summary.Content, nil +} + +// GetKnowledgeFile retrieves the physical file associated with a knowledge entry +func (s *knowledgeService) GetKnowledgeFile(ctx context.Context, id string) (io.ReadCloser, string, error) { + // Get knowledge record + knowledge, err := s.repo.GetKnowledgeByID(ctx, ctx.Value(types.TenantIDContextKey).(uint), id) + if err != nil { + return nil, "", err + } + + // Get the file from storage + file, err := s.fileSvc.GetFile(ctx, knowledge.FilePath) + if err != nil { + return nil, "", err + } + + return file, knowledge.FileName, nil +} + +func (s *knowledgeService) UpdateKnowledge(ctx context.Context, knowledge *types.Knowledge) error { + record, err := s.repo.GetKnowledgeByID(ctx, ctx.Value(types.TenantIDContextKey).(uint), knowledge.ID) + if err != nil { + logger.Errorf(ctx, "Failed to get knowledge record: %v", err) + return err + } + // if need other fields update, please add here + if knowledge.Title != "" { + record.Title = knowledge.Title + } + + // Update knowledge record in the repository + if err := s.repo.UpdateKnowledge(ctx, record); err != nil { + logger.Errorf(ctx, "Failed to update knowledge: %v", err) + return err + } + logger.Infof(ctx, "Knowledge updated successfully, ID: %s", knowledge.ID) + return nil +} + +// isValidFileType checks if a file type is supported +func isValidFileType(filename string) bool { + switch strings.ToLower(getFileType(filename)) { + case "pdf", "txt", "docx", "doc", "md", "markdown", "png", "jpg", "jpeg", "gif": + return true + default: + return false + } +} + +// getFileType extracts the file extension from a filename +func getFileType(filename string) string { + ext := strings.Split(filename, ".") + if len(ext) < 2 { + return "unknown" + } + return ext[len(ext)-1] +} + +// isValidURL verifies if a URL is valid +// isValidURL 检查URL是否有效 +func isValidURL(url string) bool { + if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") { + return true + } + return false +} + +// GetKnowledgeBatch retrieves multiple knowledge entries by their IDs +func (s *knowledgeService) GetKnowledgeBatch(ctx context.Context, + tenantID uint, ids []string, +) ([]*types.Knowledge, error) { + if len(ids) == 0 { + return nil, nil + } + return s.repo.GetKnowledgeBatch(ctx, tenantID, ids) +} + +// calculateFileHash calculates MD5 hash of a file +func calculateFileHash(file *multipart.FileHeader) (string, error) { + f, err := file.Open() + if err != nil { + return "", err + } + defer f.Close() + + h := md5.New() + if _, err := io.Copy(h, f); err != nil { + return "", err + } + + // Reset file pointer for subsequent operations + if _, err := f.Seek(0, 0); err != nil { + return "", err + } + + return hex.EncodeToString(h.Sum(nil)), nil +} + +func calculateStr(strList ...string) string { + h := md5.New() + input := strings.Join(strList, "") + h.Write([]byte(input)) + return hex.EncodeToString(h.Sum(nil)) +} + +func (s *knowledgeService) CloneKnowledgeBase(ctx context.Context, srcID, dstID string) error { + srcKB, dstKB, err := s.kbService.CopyKnowledgeBase(ctx, srcID, dstID) + if err != nil { + logger.Errorf(ctx, "Failed to copy knowledge base: %v", err) + return err + } + + addKnowledge, err := s.repo.AminusB(ctx, srcKB.TenantID, srcKB.ID, dstKB.TenantID, dstKB.ID) + if err != nil { + logger.Errorf(ctx, "Failed to get knowledge: %v", err) + return err + } + + delKnowledge, err := s.repo.AminusB(ctx, dstKB.TenantID, dstKB.ID, srcKB.TenantID, srcKB.ID) + if err != nil { + logger.Errorf(ctx, "Failed to get knowledge: %v", err) + return err + } + logger.Infof(ctx, "Knowledge after update to add: %d, delete: %d", len(addKnowledge), len(delKnowledge)) + + batch := 10 + wg := sync.WaitGroup{} + errCh := make(chan error, len(delKnowledge)) + for ids := range slices.Chunk(delKnowledge, batch) { + wg.Add(1) + go func(ids []string) { + defer wg.Done() + if err := s.DeleteKnowledgeList(ctx, ids); err != nil { + errCh <- fmt.Errorf("delete knowledge %v: %w", ids, err) + } + }(ids) + } + wg.Wait() + close(errCh) + for err := range errCh { + if err != nil { + return err + } + } + + wg = sync.WaitGroup{} + errCh = make(chan error, len(addKnowledge)+len(delKnowledge)) + for ids := range slices.Chunk(addKnowledge, batch) { + wg.Add(1) + go func(ids []string) { + defer wg.Done() + for _, kID := range ids { + srcKn, err := s.repo.GetKnowledgeByID(ctx, srcKB.TenantID, kID) + if err != nil { + errCh <- fmt.Errorf("get knowledge %s: %w", kID, err) + continue + } + if err := s.cloneKnowledge(ctx, srcKn, dstKB); err != nil { + errCh <- fmt.Errorf("move knowledge %s: %w", kID, err) + } + } + }(ids) + } + wg.Wait() + close(errCh) + for err := range errCh { + if err != nil { + return err + } + } + return nil +} + +func (s *knowledgeService) updateChunkVector(ctx context.Context, kbID string, chunks []*types.Chunk) error { + // Get embedding model from knowledge base + sourceKB, err := s.kbService.GetKnowledgeBaseByID(ctx, kbID) + if err != nil { + return err + } + embeddingModel, err := s.modelService.GetEmbeddingModel(ctx, sourceKB.EmbeddingModelID) + if err != nil { + return err + } + + // Initialize composite retrieve engine from tenant configuration + indexInfo := make([]*types.IndexInfo, 0, len(chunks)) + ids := make([]string, 0, len(chunks)) + for _, chunk := range chunks { + if chunk.KnowledgeBaseID != kbID { + logger.Warnf(ctx, "Knowledge base ID mismatch: %s != %s", chunk.KnowledgeBaseID, kbID) + continue + } + indexInfo = append(indexInfo, &types.IndexInfo{ + Content: chunk.Content, + SourceID: chunk.ID, + SourceType: types.ChunkSourceType, + ChunkID: chunk.ID, + KnowledgeID: chunk.KnowledgeID, + KnowledgeBaseID: chunk.KnowledgeBaseID, + }) + ids = append(ids, chunk.ID) + } + + tenantInfo := ctx.Value(types.TenantInfoContextKey).(*types.Tenant) + retrieveEngine, err := retriever.NewCompositeRetrieveEngine(tenantInfo.RetrieverEngines.Engines) + if err != nil { + return err + } + + // Delete old vector representation of the chunk + err = retrieveEngine.DeleteByChunkIDList(ctx, ids, embeddingModel.GetDimensions()) + if err != nil { + return err + } + + // Index updated chunk content with new vector representation + err = retrieveEngine.BatchIndex(ctx, embeddingModel, indexInfo) + if err != nil { + return err + } + return nil +} + +func (s *knowledgeService) UpdateImageInfo(ctx context.Context, knowledgeID string, chunkID string, imageInfo string) error { + var images []*types.ImageInfo + if err := json.Unmarshal([]byte(imageInfo), &images); err != nil { + logger.Errorf(ctx, "Failed to unmarshal image info: %v", err) + return err + } + if len(images) != 1 { + logger.Warnf(ctx, "Expected exactly one image info, got %d", len(images)) + return nil + } + image := images[0] + + // Retrieve all chunks with the given parent chunk ID + chunk, err := s.chunkService.GetChunkByID(ctx, knowledgeID, chunkID) + if err != nil { + logger.Errorf(ctx, "Failed to get chunk: %v", err) + return err + } + chunk.ImageInfo = imageInfo + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + chunkChildren, err := s.chunkService.ListChunkByParentID(ctx, tenantID, chunkID) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "parent_chunk_id": chunkID, + "tenant_id": tenantID, + }) + return err + } + logger.Infof(ctx, "Found %d chunks with parent chunk ID: %s", len(chunkChildren), chunkID) + + // Iterate through each chunk and update its content based on the image information + updateChunk := []*types.Chunk{chunk} + for i, child := range chunkChildren { + // Skip chunks that are not image types + var cImageInfo []*types.ImageInfo + err = json.Unmarshal([]byte(child.ImageInfo), &cImageInfo) + if err != nil { + logger.Warnf(ctx, "Failed to unmarshal image %s info: %v", child.ID, err) + continue + } + if len(cImageInfo) == 0 { + continue + } + if cImageInfo[0].OriginalURL != image.OriginalURL { + logger.Warnf(ctx, "Skipping chunk ID: %s, image URL mismatch: %s != %s", + child.ID, cImageInfo[0].OriginalURL, image.OriginalURL) + continue + } + + // Update the chunk content based on the image type + if child.ChunkType == types.ChunkTypeImageCaption && image.Caption != cImageInfo[0].Caption { + child.Content = fmt.Sprintf("图片描述: %s", image.Caption) + child.ImageInfo = imageInfo + updateChunk = append(updateChunk, chunkChildren[i]) + } else if child.ChunkType == types.ChunkTypeImageOCR && image.OCRText != cImageInfo[0].OCRText { + child.Content = fmt.Sprintf("图片OCR文本: %s", image.OCRText) + child.ImageInfo = imageInfo + updateChunk = append(updateChunk, chunkChildren[i]) + } + } + logger.Infof(ctx, "Updated %d chunks out of %d total chunks", len(updateChunk), len(chunkChildren)+1) + + // Update the chunks + for _, c := range updateChunk { + err := s.chunkService.UpdateChunk(ctx, c) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "chunk_id": c.ID, + "knowledge_id": c.KnowledgeID, + }) + return err + } + } + + // Update the chunk vector + err = s.updateChunkVector(ctx, chunk.KnowledgeBaseID, updateChunk) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "chunk_id": chunk.ID, + "knowledge_id": chunk.KnowledgeID, + }) + return err + } + + // Update the knowledge file hash + knowledge, err := s.repo.GetKnowledgeByID(ctx, tenantID, knowledgeID) + if err != nil { + logger.Errorf(ctx, "Failed to get knowledge: %v", err) + return err + } + fileHash := calculateStr(knowledgeID, knowledge.FileHash, imageInfo) + knowledge.FileHash = fileHash + err = s.repo.UpdateKnowledge(ctx, knowledge) + if err != nil { + logger.Warnf(ctx, "Failed to update knowledge file hash: %v", err) + } + + logger.Infof(ctx, "Updated chunk successfully, chunk ID: %s, knowledge ID: %s", chunk.ID, chunk.KnowledgeID) + return nil +} + +// CloneChunk clone chunks from one knowledge to another +// This method transfers a chunk from a source knowledge document to a target knowledge document +// It handles the creation of new chunks in the target knowledge and updates the vector database accordingly +// Parameters: +// - ctx: Context with authentication and request information +// - src: Source knowledge document containing the chunk to move +// - dst: Target knowledge document where the chunk will be moved +// +// Returns: +// - error: Any error encountered during the move operation +// +// This method handles the chunk transfer logic, including creating new chunks in the target knowledge +// and updating the vector database representation of the moved chunks. +// It also ensures that the chunk's relationships (like pre and next chunk IDs) are maintained +// by mapping the source chunk IDs to the new target chunk IDs. +func (s *knowledgeService) CloneChunk(ctx context.Context, src, dst *types.Knowledge) error { + chunkPage := 1 + chunkPageSize := 100 + srcTodst := map[string]string{} + targetChunks := make([]*types.Chunk, 0, 10) + chunkType := []types.ChunkType{ + types.ChunkTypeText, types.ChunkTypeSummary, + types.ChunkTypeImageCaption, types.ChunkTypeImageOCR, + } + for { + sourceChunks, _, err := s.chunkRepo.ListPagedChunksByKnowledgeID(ctx, + src.TenantID, + src.ID, + &types.Pagination{ + Page: chunkPage, + PageSize: chunkPageSize, + }, + chunkType, + ) + chunkPage++ + if err != nil { + return err + } + if len(sourceChunks) == 0 { + break + } + for _, sourceChunk := range sourceChunks { + targetChunk := &types.Chunk{ + ID: uuid.New().String(), + TenantID: dst.TenantID, + KnowledgeID: dst.ID, + KnowledgeBaseID: dst.KnowledgeBaseID, + Content: sourceChunk.Content, + ChunkIndex: sourceChunk.ChunkIndex, + IsEnabled: sourceChunk.IsEnabled, + StartAt: sourceChunk.StartAt, + EndAt: sourceChunk.EndAt, + PreChunkID: sourceChunk.PreChunkID, + NextChunkID: sourceChunk.NextChunkID, + ChunkType: sourceChunk.ChunkType, + ParentChunkID: sourceChunk.ParentChunkID, + ImageInfo: sourceChunk.ImageInfo, + } + targetChunks = append(targetChunks, targetChunk) + srcTodst[sourceChunk.ID] = targetChunk.ID + } + } + for _, targetChunk := range targetChunks { + if val, ok := srcTodst[targetChunk.PreChunkID]; ok { + targetChunk.PreChunkID = val + } else { + targetChunk.PreChunkID = "" + } + if val, ok := srcTodst[targetChunk.NextChunkID]; ok { + targetChunk.NextChunkID = val + } else { + targetChunk.NextChunkID = "" + } + if val, ok := srcTodst[targetChunk.ParentChunkID]; ok { + targetChunk.ParentChunkID = val + } else { + targetChunk.ParentChunkID = "" + } + } + for chunks := range slices.Chunk(targetChunks, chunkPageSize) { + err := s.chunkRepo.CreateChunks(ctx, chunks) + if err != nil { + return err + } + } + + tenantInfo := ctx.Value(types.TenantInfoContextKey).(*types.Tenant) + retrieveEngine, err := retriever.NewCompositeRetrieveEngine(tenantInfo.RetrieverEngines.Engines) + if err != nil { + return err + } + embeddingModel, err := s.modelService.GetEmbeddingModel(ctx, dst.EmbeddingModelID) + if err != nil { + return err + } + if err := retrieveEngine.CopyIndices(ctx, src.KnowledgeBaseID, dst.KnowledgeBaseID, + map[string]string{src.ID: dst.ID}, + srcTodst, + embeddingModel.GetDimensions(), + ); err != nil { + return err + } + return nil +} diff --git a/internal/application/service/knowledgebase.go b/internal/application/service/knowledgebase.go new file mode 100644 index 0000000..326290a --- /dev/null +++ b/internal/application/service/knowledgebase.go @@ -0,0 +1,574 @@ +package service + +import ( + "context" + "errors" + "slices" + "time" + + "encoding/json" + + "github.com/Tencent/WeKnora/internal/application/service/retriever" + "github.com/Tencent/WeKnora/internal/common" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/embedding" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/google/uuid" +) + +// ErrInvalidTenantID represents an error for invalid tenant ID +var ErrInvalidTenantID = errors.New("invalid tenant ID") + +// knowledgeBaseService implements the knowledge base service interface +type knowledgeBaseService struct { + repo interfaces.KnowledgeBaseRepository + kgRepo interfaces.KnowledgeRepository + chunkRepo interfaces.ChunkRepository + modelService interfaces.ModelService +} + +// NewKnowledgeBaseService creates a new knowledge base service +func NewKnowledgeBaseService(repo interfaces.KnowledgeBaseRepository, + kgRepo interfaces.KnowledgeRepository, + chunkRepo interfaces.ChunkRepository, + modelService interfaces.ModelService, +) interfaces.KnowledgeBaseService { + return &knowledgeBaseService{ + repo: repo, + kgRepo: kgRepo, + chunkRepo: chunkRepo, + modelService: modelService, + } +} + +// CreateKnowledgeBase creates a new knowledge base +func (s *knowledgeBaseService) CreateKnowledgeBase(ctx context.Context, + kb *types.KnowledgeBase, +) (*types.KnowledgeBase, error) { + // Generate UUID and set creation timestamps + kb.ID = uuid.New().String() + kb.CreatedAt = time.Now() + kb.TenantID = ctx.Value(types.TenantIDContextKey).(uint) + kb.UpdatedAt = time.Now() + + logger.Infof(ctx, "Creating knowledge base, ID: %s, tenant ID: %d, name: %s", kb.ID, kb.TenantID, kb.Name) + + if err := s.repo.CreateKnowledgeBase(ctx, kb); err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_base_id": kb.ID, + "tenant_id": kb.TenantID, + }) + return nil, err + } + + logger.Infof(ctx, "Knowledge base created successfully, ID: %s, name: %s", kb.ID, kb.Name) + return kb, nil +} + +// GetKnowledgeBaseByID retrieves a knowledge base by its ID +func (s *knowledgeBaseService) GetKnowledgeBaseByID(ctx context.Context, id string) (*types.KnowledgeBase, error) { + if id == "" { + logger.Error(ctx, "Knowledge base ID is empty") + return nil, errors.New("knowledge base ID cannot be empty") + } + + logger.Infof(ctx, "Retrieving knowledge base, ID: %s", id) + + kb, err := s.repo.GetKnowledgeBaseByID(ctx, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_base_id": id, + }) + return nil, err + } + + logger.Infof(ctx, "Knowledge base retrieved successfully, ID: %s, name: %s", kb.ID, kb.Name) + return kb, nil +} + +// ListKnowledgeBases returns all knowledge bases for a tenant +func (s *knowledgeBaseService) ListKnowledgeBases(ctx context.Context) ([]*types.KnowledgeBase, error) { + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Retrieving knowledge base list for tenant, tenant ID: %d", tenantID) + + kbs, err := s.repo.ListKnowledgeBasesByTenantID(ctx, tenantID) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_id": tenantID, + }) + return nil, err + } + + logger.Infof( + ctx, + "Knowledge base list retrieved successfully, tenant ID: %d, knowledge base count: %d", + tenantID, + len(kbs), + ) + return kbs, nil +} + +// UpdateKnowledgeBase updates a knowledge base's properties +func (s *knowledgeBaseService) UpdateKnowledgeBase(ctx context.Context, + id string, + name string, + description string, + config *types.KnowledgeBaseConfig, +) (*types.KnowledgeBase, error) { + if id == "" { + logger.Error(ctx, "Knowledge base ID is empty") + return nil, errors.New("knowledge base ID cannot be empty") + } + + logger.Infof(ctx, "Updating knowledge base, ID: %s, name: %s", id, name) + + // Get existing knowledge base + kb, err := s.repo.GetKnowledgeBaseByID(ctx, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_base_id": id, + }) + return nil, err + } + + // Update the knowledge base properties + kb.Name = name + kb.Description = description + kb.ChunkingConfig = config.ChunkingConfig + kb.ImageProcessingConfig = config.ImageProcessingConfig + kb.UpdatedAt = time.Now() + + logger.Info(ctx, "Saving knowledge base update") + if err := s.repo.UpdateKnowledgeBase(ctx, kb); err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_base_id": id, + }) + return nil, err + } + + logger.Infof(ctx, "Knowledge base updated successfully, ID: %s, name: %s", kb.ID, kb.Name) + return kb, nil +} + +// DeleteKnowledgeBase deletes a knowledge base by its ID +func (s *knowledgeBaseService) DeleteKnowledgeBase(ctx context.Context, id string) error { + if id == "" { + logger.Error(ctx, "Knowledge base ID is empty") + return errors.New("knowledge base ID cannot be empty") + } + + logger.Infof(ctx, "Deleting knowledge base, ID: %s", id) + + err := s.repo.DeleteKnowledgeBase(ctx, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_base_id": id, + }) + return err + } + + logger.Infof(ctx, "Knowledge base deleted successfully, ID: %s", id) + return nil +} + +// SetEmbeddingModel sets the embedding model for a knowledge base +func (s *knowledgeBaseService) SetEmbeddingModel(ctx context.Context, id string, modelID string) error { + if id == "" { + logger.Error(ctx, "Knowledge base ID is empty") + return errors.New("knowledge base ID cannot be empty") + } + + if modelID == "" { + logger.Error(ctx, "Model ID is empty") + return errors.New("model ID cannot be empty") + } + + logger.Infof(ctx, "Setting embedding model for knowledge base, knowledge base ID: %s, model ID: %s", id, modelID) + + // Get the knowledge base + kb, err := s.repo.GetKnowledgeBaseByID(ctx, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_base_id": id, + }) + return err + } + + // Update the knowledge base's embedding model + kb.EmbeddingModelID = modelID + kb.UpdatedAt = time.Now() + + logger.Info(ctx, "Saving knowledge base embedding model update") + err = s.repo.UpdateKnowledgeBase(ctx, kb) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_base_id": id, + "embedding_model_id": modelID, + }) + return err + } + + logger.Infof( + ctx, + "Knowledge base embedding model set successfully, knowledge base ID: %s, model ID: %s", + id, + modelID, + ) + return nil +} + +// CopyKnowledgeBase copies a knowledge base to a new knowledge base +// 浅拷贝 +func (s *knowledgeBaseService) CopyKnowledgeBase(ctx context.Context, + srcKB string, dstKB string, +) (*types.KnowledgeBase, *types.KnowledgeBase, error) { + sourceKB, err := s.repo.GetKnowledgeBaseByID(ctx, srcKB) + if err != nil { + logger.Errorf(ctx, "Get source knowledge base failed: %v", err) + return nil, nil, err + } + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + var targetKB *types.KnowledgeBase + if dstKB != "" { + targetKB, err = s.repo.GetKnowledgeBaseByID(ctx, dstKB) + if err != nil { + return nil, nil, err + } + } else { + targetKB = &types.KnowledgeBase{ + ID: uuid.New().String(), + Name: sourceKB.Name, + Description: sourceKB.Description, + TenantID: tenantID, + ChunkingConfig: sourceKB.ChunkingConfig, + ImageProcessingConfig: sourceKB.ImageProcessingConfig, + EmbeddingModelID: sourceKB.EmbeddingModelID, + SummaryModelID: sourceKB.SummaryModelID, + } + if err := s.repo.CreateKnowledgeBase(ctx, targetKB); err != nil { + return nil, nil, err + } + } + return sourceKB, targetKB, nil +} + +// HybridSearch performs hybrid search, including vector retrieval and keyword retrieval +func (s *knowledgeBaseService) HybridSearch(ctx context.Context, + id string, + params types.SearchParams, +) ([]*types.SearchResult, error) { + logger.Infof(ctx, "Hybrid search parameters, knowledge base ID: %s, query text: %s", id, params.QueryText) + + tenantInfo := ctx.Value(types.TenantInfoContextKey).(*types.Tenant) + logger.Infof(ctx, "Creating composite retrieval engine, tenant ID: %d", tenantInfo.ID) + + // Create a composite retrieval engine with tenant's configured retrievers + retrieveEngine, err := retriever.NewCompositeRetrieveEngine(tenantInfo.RetrieverEngines.Engines) + if err != nil { + logger.Errorf(ctx, "Failed to create retrieval engine: %v", err) + return nil, err + } + + var retrieveParams []types.RetrieveParams + var embeddingModel embedding.Embedder + var kb *types.KnowledgeBase + + // Add vector retrieval params if supported + if retrieveEngine.SupportRetriever(types.VectorRetrieverType) { + logger.Info(ctx, "Vector retrieval supported, preparing vector retrieval parameters") + + kb, err = s.repo.GetKnowledgeBaseByID(ctx, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_base_id": id, + }) + return nil, err + } + + logger.Infof(ctx, "Getting embedding model, model ID: %s", kb.EmbeddingModelID) + embeddingModel, err = s.modelService.GetEmbeddingModel(ctx, kb.EmbeddingModelID) + if err != nil { + logger.Errorf(ctx, "Failed to get embedding model, model ID: %s, error: %v", kb.EmbeddingModelID, err) + return nil, err + } + logger.Infof(ctx, "Embedding model retrieved: %v", embeddingModel) + + // Generate embedding vector for the query text + logger.Info(ctx, "Starting to generate query embedding") + queryEmbedding, err := embeddingModel.Embed(ctx, params.QueryText) + if err != nil { + logger.Errorf(ctx, "Failed to embed query text, query text: %s, error: %v", params.QueryText, err) + return nil, err + } + logger.Infof(ctx, "Query embedding generated successfully, embedding vector length: %d", len(queryEmbedding)) + + retrieveParams = append(retrieveParams, types.RetrieveParams{ + Query: params.QueryText, + Embedding: queryEmbedding, + KnowledgeBaseIDs: []string{id}, + TopK: params.MatchCount, + Threshold: params.VectorThreshold, + RetrieverType: types.VectorRetrieverType, + }) + logger.Info(ctx, "Vector retrieval parameters setup completed") + } + + // Add keyword retrieval params if supported + if retrieveEngine.SupportRetriever(types.KeywordsRetrieverType) { + logger.Info(ctx, "Keyword retrieval supported, preparing keyword retrieval parameters") + retrieveParams = append(retrieveParams, types.RetrieveParams{ + Query: params.QueryText, + KnowledgeBaseIDs: []string{id}, + TopK: params.MatchCount, + Threshold: params.KeywordThreshold, + RetrieverType: types.KeywordsRetrieverType, + }) + logger.Info(ctx, "Keyword retrieval parameters setup completed") + } + + if len(retrieveParams) == 0 { + logger.Error(ctx, "No retrieval parameters available") + return nil, errors.New("no retrieve params") + } + + // Execute retrieval using the configured engines + logger.Infof(ctx, "Starting retrieval, parameter count: %d", len(retrieveParams)) + retrieveResults, err := retrieveEngine.Retrieve(ctx, retrieveParams) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "knowledge_base_id": id, + "query_text": params.QueryText, + }) + return nil, err + } + + // Collect all results from different retrievers and deduplicate by chunk ID + logger.Infof(ctx, "Processing retrieval results") + matchResults := []*types.IndexWithScore{} + for _, retrieveResult := range retrieveResults { + logger.Infof(ctx, "Retrieval results, engine: %v, retriever: %v, count: %v", + retrieveResult.RetrieverEngineType, + retrieveResult.RetrieverType, + len(retrieveResult.Results), + ) + matchResults = append(matchResults, retrieveResult.Results...) + } + + // Early return if no results + if len(matchResults) == 0 { + logger.Info(ctx, "No search results found") + return nil, nil + } + + // Deduplicate results by chunk ID + logger.Infof(ctx, "Result count before deduplication: %d", len(matchResults)) + deduplicatedChunks := common.Deduplicate(func(r *types.IndexWithScore) string { return r.ChunkID }, matchResults...) + logger.Infof(ctx, "Result count after deduplication: %d", len(deduplicatedChunks)) + + return s.processSearchResults(ctx, deduplicatedChunks) +} + +// processSearchResults handles the processing of search results, optimizing database queries +func (s *knowledgeBaseService) processSearchResults(ctx context.Context, + chunks []*types.IndexWithScore) ([]*types.SearchResult, error) { + + if len(chunks) == 0 { + return nil, nil + } + + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + + // Prepare data structures for efficient processing + var knowledgeIDs []string + var chunkIDs []string + chunkScores := make(map[string]float64) + chunkMatchTypes := make(map[string]types.MatchType) + processedKnowledgeIDs := make(map[string]bool) + + // Collect all knowledge and chunk IDs + for _, chunk := range chunks { + if !processedKnowledgeIDs[chunk.KnowledgeID] { + knowledgeIDs = append(knowledgeIDs, chunk.KnowledgeID) + processedKnowledgeIDs[chunk.KnowledgeID] = true + } + + chunkIDs = append(chunkIDs, chunk.ChunkID) + chunkScores[chunk.ChunkID] = chunk.Score + chunkMatchTypes[chunk.ChunkID] = chunk.MatchType + } + + // Batch fetch knowledge data + logger.Infof(ctx, "Fetching knowledge data for %d IDs", len(knowledgeIDs)) + knowledgeMap, err := s.fetchKnowledgeData(ctx, tenantID, knowledgeIDs) + if err != nil { + return nil, err + } + + // Batch fetch all chunks in one go + logger.Infof(ctx, "Fetching chunk data for %d IDs", len(chunkIDs)) + allChunks, err := s.chunkRepo.ListChunksByID(ctx, tenantID, chunkIDs) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_id": tenantID, + "chunk_ids": chunkIDs, + }) + return nil, err + } + + // Build chunk map and collect additional IDs to fetch + chunkMap := make(map[string]*types.Chunk, len(allChunks)) + var additionalChunkIDs []string + processedChunkIDs := make(map[string]bool) + + // First pass: Build chunk map and collect parent IDs + for _, chunk := range allChunks { + chunkMap[chunk.ID] = chunk + processedChunkIDs[chunk.ID] = true + + // Collect parent chunks + if chunk.ParentChunkID != "" && !processedChunkIDs[chunk.ParentChunkID] { + additionalChunkIDs = append(additionalChunkIDs, chunk.ParentChunkID) + processedChunkIDs[chunk.ParentChunkID] = true + + // Pass score to parent + chunkScores[chunk.ParentChunkID] = chunkScores[chunk.ID] + chunkMatchTypes[chunk.ParentChunkID] = types.MatchTypeParentChunk + } + + // Collect related chunks + relationChunkIDs := s.collectRelatedChunkIDs(chunk, processedChunkIDs) + for _, chunkID := range relationChunkIDs { + additionalChunkIDs = append(additionalChunkIDs, chunkID) + chunkMatchTypes[chunkID] = types.MatchTypeRelationChunk + } + + // Add nearby chunks (prev and next) + if chunk.ChunkType == types.ChunkTypeText { + if chunk.NextChunkID != "" && !processedChunkIDs[chunk.NextChunkID] { + additionalChunkIDs = append(additionalChunkIDs, chunk.NextChunkID) + processedChunkIDs[chunk.NextChunkID] = true + chunkMatchTypes[chunk.NextChunkID] = types.MatchTypeNearByChunk + } + if chunk.PreChunkID != "" && !processedChunkIDs[chunk.PreChunkID] { + additionalChunkIDs = append(additionalChunkIDs, chunk.PreChunkID) + processedChunkIDs[chunk.PreChunkID] = true + chunkMatchTypes[chunk.PreChunkID] = types.MatchTypeNearByChunk + } + } + } + + // Fetch all additional chunks in one go if needed + if len(additionalChunkIDs) > 0 { + logger.Infof(ctx, "Fetching %d additional chunks", len(additionalChunkIDs)) + additionalChunks, err := s.chunkRepo.ListChunksByID(ctx, tenantID, additionalChunkIDs) + if err != nil { + logger.Warnf(ctx, "Failed to fetch some additional chunks: %v", err) + // Continue with what we have + } else { + // Add to chunk map + for _, chunk := range additionalChunks { + chunkMap[chunk.ID] = chunk + } + } + } + + // Build final search results + var searchResults []*types.SearchResult + for chunkID, chunk := range chunkMap { + if !s.isValidTextChunk(chunk) { + continue + } + + score, hasScore := chunkScores[chunkID] + if !hasScore || score <= 0 { + score = 0.0 + } + + if knowledge, ok := knowledgeMap[chunk.KnowledgeID]; ok { + matchType := types.MatchTypeParentChunk + if specificType, exists := chunkMatchTypes[chunkID]; exists { + matchType = specificType + } else { + logger.Warnf(ctx, "Unkonwn match type for chunk: %s", chunkID) + continue + } + searchResults = append(searchResults, s.buildSearchResult(chunk, knowledge, score, matchType)) + } + } + logger.Infof(ctx, "Search results processed, total: %d", len(searchResults)) + return searchResults, nil +} + +// collectRelatedChunkIDs extracts related chunk IDs from a chunk +func (s *knowledgeBaseService) collectRelatedChunkIDs(chunk *types.Chunk, processedIDs map[string]bool) []string { + var relatedIDs []string + // Process direct relations + if len(chunk.RelationChunks) > 0 { + var relations []string + if err := json.Unmarshal(chunk.RelationChunks, &relations); err == nil { + for _, id := range relations { + if !processedIDs[id] { + relatedIDs = append(relatedIDs, id) + processedIDs[id] = true + } + } + } + } + return relatedIDs +} + +// buildSearchResult creates a search result from chunk and knowledge +func (s *knowledgeBaseService) buildSearchResult(chunk *types.Chunk, + knowledge *types.Knowledge, + score float64, + matchType types.MatchType) *types.SearchResult { + + return &types.SearchResult{ + ID: chunk.ID, + Content: chunk.Content, + KnowledgeID: chunk.KnowledgeID, + ChunkIndex: chunk.ChunkIndex, + KnowledgeTitle: knowledge.Title, + StartAt: chunk.StartAt, + EndAt: chunk.EndAt, + Seq: chunk.ChunkIndex, + Score: score, + MatchType: matchType, + Metadata: knowledge.GetMetadata(), + ChunkType: string(chunk.ChunkType), + ParentChunkID: chunk.ParentChunkID, + ImageInfo: chunk.ImageInfo, + KnowledgeFilename: knowledge.FileName, + KnowledgeSource: knowledge.Source, + } +} + +// isValidTextChunk checks if a chunk is a valid text chunk +func (s *knowledgeBaseService) isValidTextChunk(chunk *types.Chunk) bool { + return slices.Contains([]types.ChunkType{ + types.ChunkTypeText, types.ChunkTypeSummary, + }, chunk.ChunkType) +} + +// fetchKnowledgeData gets knowledge data in batch +func (s *knowledgeBaseService) fetchKnowledgeData(ctx context.Context, + tenantID uint, + knowledgeIDs []string, +) (map[string]*types.Knowledge, error) { + knowledges, err := s.kgRepo.GetKnowledgeBatch(ctx, tenantID, knowledgeIDs) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_id": tenantID, + "knowledge_ids": knowledgeIDs, + }) + return nil, err + } + + knowledgeMap := make(map[string]*types.Knowledge, len(knowledges)) + for _, knowledge := range knowledges { + knowledgeMap[knowledge.ID] = knowledge + } + + return knowledgeMap, nil +} diff --git a/internal/application/service/message.go b/internal/application/service/message.go new file mode 100644 index 0000000..5304de9 --- /dev/null +++ b/internal/application/service/message.go @@ -0,0 +1,287 @@ +package service + +import ( + "context" + "time" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// messageService implements the MessageService interface for managing messaging operations +// It handles creating, retrieving, updating, and deleting messages within sessions +type messageService struct { + messageRepo interfaces.MessageRepository // Repository for message storage operations + sessionRepo interfaces.SessionRepository // Repository for session validation +} + +// NewMessageService creates a new message service instance with the required repositories +// Parameters: +// - messageRepo: Repository for persisting and retrieving messages +// - sessionRepo: Repository for validating session existence +// +// Returns an implementation of the MessageService interface +func NewMessageService(messageRepo interfaces.MessageRepository, + sessionRepo interfaces.SessionRepository, +) interfaces.MessageService { + return &messageService{ + messageRepo: messageRepo, + sessionRepo: sessionRepo, + } +} + +// CreateMessage creates a new message within an existing session +// It validates that the session exists before creating the message +// Parameters: +// - ctx: Context containing tenant information +// - message: The message to be created +// +// Returns the created message or an error if creation fails +func (s *messageService) CreateMessage(ctx context.Context, message *types.Message) (*types.Message, error) { + logger.Info(ctx, "Start creating message") + logger.Infof(ctx, "Creating message for session ID: %s", message.SessionID) + + // Check if the session exists to validate the message belongs to a valid session + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Checking if session exists, tenant ID: %d, session ID: %s", tenantID, message.SessionID) + _, err := s.sessionRepo.Get(ctx, tenantID, message.SessionID) + if err != nil { + logger.Errorf(ctx, "Failed to get session: %v", err) + return nil, err + } + + // Create the message in the repository + logger.Info(ctx, "Session exists, creating message") + createdMessage, err := s.messageRepo.CreateMessage(ctx, message) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "session_id": message.SessionID, + }) + return nil, err + } + + logger.Infof(ctx, "Message created successfully, ID: %s", createdMessage.ID) + return createdMessage, nil +} + +// GetMessage retrieves a specific message by its ID within a session +// Parameters: +// - ctx: Context containing tenant information +// - sessionID: The ID of the session containing the message +// - messageID: The ID of the message to retrieve +// +// Returns the requested message or an error if retrieval fails +func (s *messageService) GetMessage(ctx context.Context, sessionID string, messageID string) (*types.Message, error) { + logger.Info(ctx, "Start getting message") + logger.Infof(ctx, "Getting message, session ID: %s, message ID: %s", sessionID, messageID) + + // Verify the session exists before attempting to retrieve the message + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Checking if session exists, tenant ID: %d", tenantID) + _, err := s.sessionRepo.Get(ctx, tenantID, sessionID) + if err != nil { + logger.Errorf(ctx, "Failed to get session: %v", err) + return nil, err + } + + // Retrieve the message from the repository + logger.Info(ctx, "Session exists, getting message") + message, err := s.messageRepo.GetMessage(ctx, sessionID, messageID) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "session_id": sessionID, + "message_id": messageID, + }) + return nil, err + } + + logger.Info(ctx, "Message retrieved successfully") + return message, nil +} + +// GetMessagesBySession retrieves paginated messages for a specific session +// Parameters: +// - ctx: Context containing tenant information +// - sessionID: The ID of the session to get messages from +// - page: The page number for pagination (0-based) +// - pageSize: The number of messages per page +// +// Returns a slice of messages or an error if retrieval fails +func (s *messageService) GetMessagesBySession(ctx context.Context, + sessionID string, page int, pageSize int, +) ([]*types.Message, error) { + logger.Info(ctx, "Start getting messages by session") + logger.Infof(ctx, "Getting messages for session ID: %s, page: %d, pageSize: %d", sessionID, page, pageSize) + + // Verify the session exists before retrieving messages + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Checking if session exists, tenant ID: %d", tenantID) + _, err := s.sessionRepo.Get(ctx, tenantID, sessionID) + if err != nil { + logger.Errorf(ctx, "Failed to get session: %v", err) + return nil, err + } + + // Retrieve paginated messages + logger.Info(ctx, "Session exists, getting messages") + messages, err := s.messageRepo.GetMessagesBySession(ctx, sessionID, page, pageSize) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "session_id": sessionID, + "page": page, + "page_size": pageSize, + }) + return nil, err + } + + logger.Infof(ctx, "Retrieved %d messages successfully", len(messages)) + return messages, nil +} + +// GetRecentMessagesBySession retrieves the most recent messages from a session +// This is typically used for loading the initial conversation history +// Parameters: +// - ctx: Context containing tenant information +// - sessionID: The ID of the session to get messages from +// - limit: Maximum number of messages to retrieve +// +// Returns a slice of recent messages or an error if retrieval fails +func (s *messageService) GetRecentMessagesBySession(ctx context.Context, + sessionID string, limit int, +) ([]*types.Message, error) { + logger.Info(ctx, "Start getting recent messages by session") + logger.Infof(ctx, "Getting recent messages for session ID: %s, limit: %d", sessionID, limit) + + // Verify the session exists before retrieving messages + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Checking if session exists, tenant ID: %d", tenantID) + _, err := s.sessionRepo.Get(ctx, tenantID, sessionID) + if err != nil { + logger.Errorf(ctx, "Failed to get session: %v", err) + return nil, err + } + + // Retrieve the most recent messages + logger.Info(ctx, "Session exists, getting recent messages") + messages, err := s.messageRepo.GetRecentMessagesBySession(ctx, sessionID, limit) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "session_id": sessionID, + "limit": limit, + }) + return nil, err + } + + logger.Infof(ctx, "Retrieved %d recent messages successfully", len(messages)) + return messages, nil +} + +// GetMessagesBySessionBeforeTime retrieves messages sent before a specific time +// This is typically used for pagination when scrolling through conversation history +// Parameters: +// - ctx: Context containing tenant information +// - sessionID: The ID of the session to get messages from +// - beforeTime: Timestamp to retrieve messages before +// - limit: Maximum number of messages to retrieve +// +// Returns a slice of messages or an error if retrieval fails +func (s *messageService) GetMessagesBySessionBeforeTime(ctx context.Context, + sessionID string, beforeTime time.Time, limit int, +) ([]*types.Message, error) { + logger.Info(ctx, "Start getting messages before time") + logger.Infof(ctx, "Getting messages before %v for session ID: %s, limit: %d", beforeTime, sessionID, limit) + + // Verify the session exists before retrieving messages + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Checking if session exists, tenant ID: %d", tenantID) + _, err := s.sessionRepo.Get(ctx, tenantID, sessionID) + if err != nil { + logger.Errorf(ctx, "Failed to get session: %v", err) + return nil, err + } + + // Retrieve messages before the specified time + logger.Info(ctx, "Session exists, getting messages before time") + messages, err := s.messageRepo.GetMessagesBySessionBeforeTime(ctx, sessionID, beforeTime, limit) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "session_id": sessionID, + "before_time": beforeTime, + "limit": limit, + }) + return nil, err + } + + logger.Infof(ctx, "Retrieved %d messages before time successfully", len(messages)) + return messages, nil +} + +// UpdateMessage updates an existing message's content or metadata +// Parameters: +// - ctx: Context containing tenant information +// - message: The message with updated fields +// +// Returns an error if the update fails +func (s *messageService) UpdateMessage(ctx context.Context, message *types.Message) error { + logger.Info(ctx, "Start updating message") + logger.Infof(ctx, "Updating message, ID: %s, session ID: %s", message.ID, message.SessionID) + + // Verify the session exists before updating the message + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Checking if session exists, tenant ID: %d", tenantID) + _, err := s.sessionRepo.Get(ctx, tenantID, message.SessionID) + if err != nil { + logger.Errorf(ctx, "Failed to get session: %v", err) + return err + } + + // Update the message in the repository + logger.Info(ctx, "Session exists, updating message") + err = s.messageRepo.UpdateMessage(ctx, message) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "session_id": message.SessionID, + "message_id": message.ID, + }) + return err + } + + logger.Info(ctx, "Message updated successfully") + return nil +} + +// DeleteMessage removes a message from a session +// Parameters: +// - ctx: Context containing tenant information +// - sessionID: The ID of the session containing the message +// - messageID: The ID of the message to delete +// +// Returns an error if deletion fails +func (s *messageService) DeleteMessage(ctx context.Context, sessionID string, messageID string) error { + logger.Info(ctx, "Start deleting message") + logger.Infof(ctx, "Deleting message, session ID: %s, message ID: %s", sessionID, messageID) + + // Verify the session exists before deleting the message + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Checking if session exists, tenant ID: %d", tenantID) + _, err := s.sessionRepo.Get(ctx, tenantID, sessionID) + if err != nil { + logger.Errorf(ctx, "Failed to get session: %v", err) + return err + } + + // Delete the message from the repository + logger.Info(ctx, "Session exists, deleting message") + err = s.messageRepo.DeleteMessage(ctx, sessionID, messageID) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "session_id": sessionID, + "message_id": messageID, + }) + return err + } + + logger.Info(ctx, "Message deleted successfully") + return nil +} diff --git a/internal/application/service/metric/bleu.go b/internal/application/service/metric/bleu.go new file mode 100644 index 0000000..c1f6dec --- /dev/null +++ b/internal/application/service/metric/bleu.go @@ -0,0 +1,165 @@ +package metric + +// references: https://github.com/waygo/bleu + +// Package bleu implements the BLEU method, which is used to evaluate +// the quality of machine translation. [1] +// +// The code in this package was largely ported from the corresponding package +// in Python NLTK. [2] +// +// [1] Papineni, Kishore, et al. "BLEU: a method for automatic evaluation of +// machine translation." Proceedings of the 40th annual meeting on +// association for computational linguistics. Association for Computational +// Linguistics, 2002. +// +// [2] http://www.nltk.org/_modules/nltk/align/bleu.html + +import ( + "encoding/json" + "log" + "math" + "strings" + + "github.com/Tencent/WeKnora/internal/types" +) + +type BLEUMetric struct { + smoothing bool + weights BLEUWeight +} + +func NewBLEUMetric(smoothing bool, weights BLEUWeight) *BLEUMetric { + return &BLEUMetric{smoothing: smoothing, weights: weights} +} + +type Sentence []string + +type BLEUWeight []float64 + +var ( + BLEU1Gram BLEUWeight = []float64{1.0, 0.0, 0.0, 0.0} + BLEU2Gram BLEUWeight = []float64{0.5, 0.5, 0.0, 0.0} + BLEU3Gram BLEUWeight = []float64{0.33, 0.33, 0.33, 0.0} + BLEU4Gram BLEUWeight = []float64{0.25, 0.25, 0.25, 0.25} +) + +func (b *BLEUMetric) Compute(metricInput *types.MetricInput) float64 { + candidate := splitIntoWords(splitSentences(metricInput.GeneratedTexts)) + references := []Sentence{splitIntoWords(splitSentences(metricInput.GeneratedGT))} + + for i := range candidate { + candidate[i] = strings.ToLower(candidate[i]) + } + + for i := range references { + for u := range references[i] { + references[i][u] = strings.ToLower(references[i][u]) + } + } + + ps := make([]float64, len(b.weights)) + for i := range b.weights { + ps[i] = b.modifiedPrecision(candidate, references, i+1) + } + + s := 0.0 + overlap := 0 + for i := range b.weights { + w := b.weights[i] + pn := ps[i] + if pn > 0.0 { + overlap++ + s += w * math.Log(pn) + } + } + + if overlap == 0 { + return 0 + } + + bp := b.brevityPenalty(candidate, references) + return bp * math.Exp(s) +} + +type phrase []string + +func (p phrase) String() string { + b, err := json.Marshal(p) + if err != nil { + log.Fatal("encode error:", err) + } + return string(b) +} + +func (b *BLEUMetric) getNphrase(s Sentence, n int) []phrase { + nphrase := []phrase{} + for i := 0; i < len(s)-n+1; i++ { + nphrase = append(nphrase, phrase(s[i:i+n])) + } + return nphrase +} + +func (b *BLEUMetric) countNphrase(nphrase []phrase) map[string]int { + counts := map[string]int{} + for _, gram := range nphrase { + counts[gram.String()]++ + } + return counts +} + +func (b *BLEUMetric) modifiedPrecision(candidate Sentence, references []Sentence, n int) float64 { + nphrase := b.getNphrase(candidate, n) + if len(nphrase) == 0 { + return 0.0 + } + + counts := b.countNphrase(nphrase) + + if len(counts) == 0 { + return 0.0 + } + + maxCounts := map[string]int{} + for i := range references { + referenceCounts := b.countNphrase(b.getNphrase(references[i], n)) + for ngram := range counts { + if v, ok := maxCounts[ngram]; !ok { + maxCounts[ngram] = referenceCounts[ngram] + } else if v < referenceCounts[ngram] { + maxCounts[ngram] = referenceCounts[ngram] + } + } + } + + clippedCounts := map[string]int{} + for ngram, count := range counts { + clippedCounts[ngram] = min(count, maxCounts[ngram]) + } + + smoothingFactor := 0.0 + if b.smoothing { + smoothingFactor = 1.0 + } + return (float64(sum(clippedCounts)) + smoothingFactor) / (float64(sum(counts)) + smoothingFactor) +} + +func (b *BLEUMetric) brevityPenalty(candidate Sentence, references []Sentence) float64 { + c := len(candidate) + refLens := []int{} + for i := range references { + refLens = append(refLens, len(references[i])) + } + minDiffInd, minDiff := 0, -1 + for i := range refLens { + if minDiff == -1 || abs(refLens[i]-c) < minDiff { + minDiffInd = i + minDiff = abs(refLens[i] - c) + } + } + r := refLens[minDiffInd] + if c > r { + return 1 + } + return math.Exp(float64(1 - float64(r)/float64(c))) +} diff --git a/internal/application/service/metric/common.go b/internal/application/service/metric/common.go new file mode 100644 index 0000000..8f48a74 --- /dev/null +++ b/internal/application/service/metric/common.go @@ -0,0 +1,133 @@ +package metric + +import ( + "regexp" + "strings" + + "github.com/Tencent/WeKnora/internal/types" +) + +func sum(m map[string]int) int { + s := 0 + for _, v := range m { + s += v + } + return s +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +func abs(a int) int { + if a < 0 { + return -a + } + return a +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} + +func splitSentences(text string) []string { + // 编译正则表达式(匹配中文句号或英文句号) + re := regexp.MustCompile(`([。.])`) + + // 分割文本并保留分隔符用于定位 + split := re.Split(text, -1) + + var sentences []string + current := strings.Builder{} + + for i, s := range split { + // 交替获取文本段和分隔符(奇数为分隔符) + if i%2 == 0 { + current.WriteString(s) + } else { + // 当遇到分隔符时,完成当前句子 + if current.Len() > 0 { + sentence := strings.TrimSpace(current.String()) + if sentence != "" { + sentences = append(sentences, sentence) + } + current.Reset() + } + } + } + + // 处理最后一个无分隔符的文本段 + if remaining := strings.TrimSpace(current.String()); remaining != "" { + sentences = append(sentences, remaining) + } + + return sentences +} + +func splitIntoWords(sentences []string) []string { + // 正则匹配中英文段落(中文块、英文块、其他字符) + re := regexp.MustCompile(`([\p{Han}]+)|([a-zA-Z0-9_.,!?]+)|(\p{P})`) + + var tokens []string + for _, text := range sentences { + matches := re.FindAllStringSubmatch(text, -1) + + for _, groups := range matches { + chineseBlock := groups[1] + englishBlock := groups[2] + punctuation := groups[3] + + switch { + case chineseBlock != "": // 处理中文部分 + words := types.Jieba.Cut(chineseBlock, true) + tokens = append(tokens, words...) + case englishBlock != "": // 处理英文部分 + engTokens := strings.Fields(englishBlock) + tokens = append(tokens, engTokens...) + case punctuation != "": // 保留标点符号 + tokens = append(tokens, punctuation) + } + } + } + return tokens +} + +func ToSet[T comparable](li []T) map[T]struct{} { + res := make(map[T]struct{}, len(li)) + for _, v := range li { + res[v] = struct{}{} + } + return res +} + +func SliceMap[T any, Y any](li []T, fn func(T) Y) []Y { + res := make([]Y, len(li)) + for i, v := range li { + res[i] = fn(v) + } + return res +} + +func Hit[T comparable](li []T, set map[T]struct{}) int { + count := 0 + for _, v := range li { + if _, exist := set[v]; exist { + count++ + } + } + return count +} + +func Fold[T any, Y any](slice []T, initial Y, f func(Y, T) Y) Y { + accumulator := initial + for _, item := range slice { + accumulator = f(accumulator, item) + } + return accumulator +} diff --git a/internal/application/service/metric/map.go b/internal/application/service/metric/map.go new file mode 100644 index 0000000..10f463b --- /dev/null +++ b/internal/application/service/metric/map.go @@ -0,0 +1,70 @@ +package metric + +import ( + "github.com/Tencent/WeKnora/internal/types" +) + +// MAPMetric calculates Mean Average Precision for retrieval evaluation +type MAPMetric struct{} + +// NewMAPMetric creates a new MAPMetric instance +func NewMAPMetric() *MAPMetric { + return &MAPMetric{} +} + +// Compute calculates the Mean Average Precision score +func (m *MAPMetric) Compute(metricInput *types.MetricInput) float64 { + // Convert ground truth to sets for efficient lookup + gts := metricInput.RetrievalGT + ids := metricInput.RetrievalIDs + + // Create sets of relevant document IDs for each query + gtSets := make([]map[int]struct{}, len(gts)) + for i, gt := range gts { + gtSets[i] = make(map[int]struct{}) + for _, docID := range gt { + gtSets[i][docID] = struct{}{} + } + } + + var apSum float64 // Sum of average precision for all queries + + // Calculate average precision for each query + for _, gtSet := range gtSets { + // Mark which predicted documents are relevant + predHits := make([]bool, len(ids)) + for i, predID := range ids { + if _, ok := gtSet[predID]; ok { + predHits[i] = true + } else { + predHits[i] = false + } + } + + var ( + ap float64 // Average precision for current query + hitCount int // Number of relevant documents found + ) + + // Calculate precision at each rank position + for k := 0; k < len(predHits); k++ { + if predHits[k] { + hitCount++ + // Precision at k: relevant docs found up to k / k + ap += float64(hitCount) / float64(k+1) + } + } + // Normalize by number of relevant documents + if hitCount > 0 { + ap /= float64(hitCount) + } + apSum += ap + } + + // Handle case with no ground truth + if len(gtSets) == 0 { + return 0 + } + // Return mean of average precision across all queries + return apSum / float64(len(gtSets)) +} diff --git a/internal/application/service/metric/map_test.go b/internal/application/service/metric/map_test.go new file mode 100644 index 0000000..7bcaf0e --- /dev/null +++ b/internal/application/service/metric/map_test.go @@ -0,0 +1,85 @@ +package metric + +import ( + "testing" + + "github.com/Tencent/WeKnora/internal/types" +) + +func TestMAPMetric_Compute(t *testing.T) { + tests := []struct { + name string + input *types.MetricInput + expected float64 + }{ + { + name: "total match", + input: &types.MetricInput{ + RetrievalGT: [][]int{{2, 4, 6}}, + RetrievalIDs: []int{2, 4, 6}, + }, + expected: 1.0, + }, + { + name: "no match", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2}}, + RetrievalIDs: []int{3, 4}, + }, + expected: 0.0, + }, + { + name: "partial match", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2, 3}}, + RetrievalIDs: []int{2, 5, 1, 3}, + }, + // AP = (1/1 + 2/3 + 3/4)/3 ≈ 0.80555555 + expected: 0.8055555555555555, + }, + { + name: "empty ground truth", + input: &types.MetricInput{ + RetrievalGT: [][]int{}, + RetrievalIDs: []int{1, 2}, + }, + expected: 0.0, + }, + { + name: "multiple queries", + input: &types.MetricInput{ + RetrievalGT: [][]int{ + {1, 2}, + {3, 4}, + }, + RetrievalIDs: []int{1, 3, 2, 4}, + }, + // Query1 AP: (1/1 + 2/3)/2 ≈ 0.8333 + // Query2 AP: (1/2 + 2/4)/2 = 0.5 + // MAP: (0.8333 + 0.5)/2 ≈ 0.6667 + expected: 0.6666666666666666, + }, + } + + metric := NewMAPMetric() + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := metric.Compute(tt.input) + if !almostEqual(got, tt.expected, 1e-6) { + t.Errorf("Compute() = %v, want %v", got, tt.expected) + } + }) + } +} + +// Helper function to compare floating point numbers with tolerance +func almostEqual(a, b, tolerance float64) bool { + if a == b { + return true + } + diff := a - b + if diff < 0 { + diff = -diff + } + return diff < tolerance +} diff --git a/internal/application/service/metric/mrr.go b/internal/application/service/metric/mrr.go new file mode 100644 index 0000000..4d89712 --- /dev/null +++ b/internal/application/service/metric/mrr.go @@ -0,0 +1,48 @@ +package metric + +import ( + "github.com/Tencent/WeKnora/internal/types" +) + +// MRRMetric calculates Mean Reciprocal Rank for retrieval evaluation +type MRRMetric struct{} + +// NewMRRMetric creates a new MRRMetric instance +func NewMRRMetric() *MRRMetric { + return &MRRMetric{} +} + +// Compute calculates the Mean Reciprocal Rank score +func (m *MRRMetric) Compute(metricInput *types.MetricInput) float64 { + // Get ground truth and predicted IDs + gts := metricInput.RetrievalGT + ids := metricInput.RetrievalIDs + + // Convert ground truth to sets for efficient lookup + gtSets := make([]map[int]struct{}, len(gts)) + for i, gt := range gts { + gtSets[i] = make(map[int]struct{}) + for _, docID := range gt { + gtSets[i][docID] = struct{}{} + } + } + + var sumRR float64 // Sum of reciprocal ranks + // Calculate reciprocal rank for each query + for _, gtSet := range gtSets { + // Find first relevant document in results + for i, predID := range ids { + if _, ok := gtSet[predID]; ok { + // Reciprocal rank is 1/position (1-based) + sumRR += 1.0 / float64(i+1) + break // Only consider first relevant document + } + } + } + // Handle case with no ground truth + if len(gtSets) == 0 { + return 0 + } + // Return mean of reciprocal ranks + return sumRR / float64(len(gtSets)) +} diff --git a/internal/application/service/metric/mrr_test.go b/internal/application/service/metric/mrr_test.go new file mode 100644 index 0000000..4d4531d --- /dev/null +++ b/internal/application/service/metric/mrr_test.go @@ -0,0 +1,72 @@ +package metric + +import ( + "testing" + + "github.com/Tencent/WeKnora/internal/types" +) + +func TestMRRMetric_Compute(t *testing.T) { + tests := []struct { + name string + input *types.MetricInput + expected float64 + }{ + { + name: "perfect match - first position", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2}}, + RetrievalIDs: []int{1, 2, 3}, + }, + // RR = 1/1 = 1.0 + expected: 1.0, + }, + { + name: "match at second position", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2}}, + RetrievalIDs: []int{3, 1, 2}, + }, + // RR = 1/2 = 0.5 + expected: 0.5, + }, + { + name: "no match", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2}}, + RetrievalIDs: []int{3, 4}, + }, + expected: 0.0, + }, + { + name: "multiple queries", + input: &types.MetricInput{ + RetrievalGT: [][]int{ + {1, 2}, // RR = 1/1 = 1.0 + {3, 4}, // RR = 1/2 = 0.5 + }, + RetrievalIDs: []int{1, 3, 2, 4}, + }, + // MRR = (1.0 + 0.5)/2 = 0.75 + expected: 0.75, + }, + { + name: "empty ground truth", + input: &types.MetricInput{ + RetrievalGT: [][]int{}, + RetrievalIDs: []int{1, 2}, + }, + expected: 0.0, + }, + } + + metric := NewMRRMetric() + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := metric.Compute(tt.input) + if !almostEqual(got, tt.expected, 1e-6) { + t.Errorf("Compute() = %v, want %v", got, tt.expected) + } + }) + } +} diff --git a/internal/application/service/metric/ndcg.go b/internal/application/service/metric/ndcg.go new file mode 100644 index 0000000..07fd2e1 --- /dev/null +++ b/internal/application/service/metric/ndcg.go @@ -0,0 +1,78 @@ +package metric + +import ( + "math" + + "github.com/Tencent/WeKnora/internal/types" +) + +// NDCGMetric calculates Normalized Discounted Cumulative Gain +type NDCGMetric struct { + k int // Top k results to consider +} + +// NewNDCGMetric creates a new NDCGMetric instance with given k value +func NewNDCGMetric(k int) *NDCGMetric { + return &NDCGMetric{k: k} +} + +// Compute calculates the NDCG score +func (n *NDCGMetric) Compute(metricInput *types.MetricInput) float64 { + gts := metricInput.RetrievalGT + ids := metricInput.RetrievalIDs + + // Limit results to top k + if len(ids) > n.k { + ids = ids[:n.k] + } + + // Create set of relevant documents and count total relevant + gtSets := make(map[int]struct{}, len(gts)) + countGt := 0 + for _, gt := range gts { + countGt += len(gt) + for _, g := range gt { + gtSets[g] = struct{}{} + } + } + + // Assign relevance scores (1 for relevant, 0 otherwise) + relevanceScores := make(map[int]int) + for _, docID := range ids { + if _, exist := gtSets[docID]; exist { + relevanceScores[docID] = 1 + } else { + relevanceScores[docID] = 0 + } + } + + // Calculate DCG (Discounted Cumulative Gain) + var dcg float64 + for i, docID := range ids { + dcg += (math.Pow(2, float64(relevanceScores[docID])) - 1) / math.Log2(float64(i+2)) + } + + // Create ideal ranking (all relevant docs first) + idealLen := min(countGt, len(ids)) + idealPred := make([]int, len(ids)) + for i := 0; i < len(ids); i++ { + if i < idealLen { + idealPred[i] = 1 + } else { + idealPred[i] = 0 + } + } + + // Calculate IDCG (Ideal DCG) + var idcg float64 + for i, relevance := range idealPred { + idcg += float64(relevance) / math.Log2(float64(i+2)) + } + + // Handle division by zero case + if idcg == 0 { + return 0 + } + // NDCG = DCG / IDCG + return dcg / idcg +} diff --git a/internal/application/service/metric/precision.go b/internal/application/service/metric/precision.go new file mode 100644 index 0000000..9121120 --- /dev/null +++ b/internal/application/service/metric/precision.go @@ -0,0 +1,33 @@ +package metric + +import ( + "github.com/Tencent/WeKnora/internal/types" +) + +// PrecisionMetric calculates precision for retrieval evaluation +type PrecisionMetric struct{} + +// NewPrecisionMetric creates a new PrecisionMetric instance +func NewPrecisionMetric() *PrecisionMetric { + return &PrecisionMetric{} +} + +// Compute calculates the precision score +func (r *PrecisionMetric) Compute(metricInput *types.MetricInput) float64 { + // Get ground truth and predicted IDs + gts := metricInput.RetrievalGT + ids := metricInput.RetrievalIDs + + // Convert ground truth to sets for efficient lookup + gtSets := SliceMap(gts, ToSet) + // Count total hits across all queries + ahit := Fold(gtSets, 0, func(a int, b map[int]struct{}) int { return a + Hit(ids, b) }) + + // Handle case with no ground truth + if len(gts) == 0 { + return 0.0 + } + + // Precision = total hits / number of queries + return float64(ahit) / float64(len(gts)) +} diff --git a/internal/application/service/metric/precision_test.go b/internal/application/service/metric/precision_test.go new file mode 100644 index 0000000..4b947f1 --- /dev/null +++ b/internal/application/service/metric/precision_test.go @@ -0,0 +1,67 @@ +package metric + +import ( + "testing" + + "github.com/Tencent/WeKnora/internal/types" +) + +func TestPrecisionMetric_Compute(t *testing.T) { + tests := []struct { + name string + input *types.MetricInput + expected float64 + }{ + { + name: "perfect match", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 3, 5}}, + RetrievalIDs: []int{1, 3, 5}, + }, + expected: 1.0, + }, + { + name: "half match", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2, 3}}, + RetrievalIDs: []int{1, 4, 2}, + }, + expected: 0.6666666666666666, + }, + { + name: "no match", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2, 3}}, + RetrievalIDs: []int{4, 5, 6}, + }, + expected: 0.0, + }, + { + name: "empty retrieval", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2, 3}}, + RetrievalIDs: []int{}, + }, + expected: 0.0, + }, + { + name: "multiple ground truths", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2}, {3, 4}}, + RetrievalIDs: []int{1, 3, 5}, + }, + expected: 0.3333333333333333, + }, + } + + pm := NewPrecisionMetric() + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := pm.Compute(tt.input) + if got != tt.expected { + t.Errorf("Compute() = %v, want %v", got, tt.expected) + } + }) + } +} diff --git a/internal/application/service/metric/recall.go b/internal/application/service/metric/recall.go new file mode 100644 index 0000000..807ad00 --- /dev/null +++ b/internal/application/service/metric/recall.go @@ -0,0 +1,33 @@ +package metric + +import ( + "github.com/Tencent/WeKnora/internal/types" +) + +// RecallMetric calculates recall for retrieval evaluation +type RecallMetric struct{} + +// NewRecallMetric creates a new RecallMetric instance +func NewRecallMetric() *RecallMetric { + return &RecallMetric{} +} + +// Compute calculates the recall score +func (r *RecallMetric) Compute(metricInput *types.MetricInput) float64 { + // Get ground truth and predicted IDs + gts := metricInput.RetrievalGT + ids := metricInput.RetrievalIDs + + // Convert ground truth to sets for efficient lookup + gtSets := SliceMap(gts, ToSet) + // Count total hits across all relevant documents + ahit := Fold(gtSets, 0, func(a int, b map[int]struct{}) int { return a + Hit(ids, b) }) + + // Handle case with no ground truth + if len(gtSets) == 0 { + return 0.0 + } + + // Recall = total hits / total relevant documents + return float64(ahit) / float64(len(gtSets)) +} diff --git a/internal/application/service/metric/recall_test.go b/internal/application/service/metric/recall_test.go new file mode 100644 index 0000000..e83a54d --- /dev/null +++ b/internal/application/service/metric/recall_test.go @@ -0,0 +1,70 @@ +package metric + +import ( + "testing" + + "github.com/Tencent/WeKnora/internal/types" +) + +func TestRecallMetric_Compute(t *testing.T) { + tests := []struct { + name string + input *types.MetricInput + expected float64 + }{ + { + name: "perfect recall - all ground truth retrieved", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2, 3}}, + RetrievalIDs: []int{1, 2, 3, 4}, + }, + expected: 1.0, + }, + { + name: "partial recall - some ground truth retrieved", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2, 3}, {4, 5}}, + RetrievalIDs: []int{1, 4, 6}, + }, + // 命中2个ground truth集合中的元素(a和d) + expected: 0.41666666666666663, // (1/3 + 1/2) / 2 = 0.41666 (每个ground truth集合只要命中一个就算召回) + + }, + { + name: "no recall - no ground truth retrieved", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2, 3}}, + RetrievalIDs: []int{4, 5, 6}, + }, + expected: 0.0, + }, + { + name: "empty retrieval list", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2, 3}}, + RetrievalIDs: []int{}, + }, + expected: 0.0, + }, + { + name: "multiple ground truth sets", + input: &types.MetricInput{ + RetrievalGT: [][]int{{1, 2}, {3, 4}, {5, 6}}, + RetrievalIDs: []int{1, 3, 7}, + }, + // 命中了前两个ground truth集合(a和c) + expected: 0.3333333333333333, // 1/3≈0.333... + }, + } + + rm := NewRecallMetric() + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := rm.Compute(tt.input) + if got != tt.expected { + t.Errorf("Compute() = %v, want %v", got, tt.expected) + } + }) + } +} diff --git a/internal/application/service/metric/rouge.go b/internal/application/service/metric/rouge.go new file mode 100644 index 0000000..22ebe68 --- /dev/null +++ b/internal/application/service/metric/rouge.go @@ -0,0 +1,72 @@ +package metric + +import "github.com/Tencent/WeKnora/internal/types" + +// reference: https://github.com/dd-Rebecca/rouge + +// RougeMetric implements ROUGE (Recall-Oriented Understudy for Gisting Evaluation) metrics +// for evaluating text summarization quality by comparing generated text to reference text +type RougeMetric struct { + exclusive bool // Whether to use exclusive matching mode + metric string // ROUGE metric type (e.g. "rouge-1", "rouge-l") + stats string // Statistic to return (e.g. "f", "p", "r") +} + +// AvailableMetrics defines all supported ROUGE variants and their calculation functions +var AvailableMetrics = map[string]func([]string, []string, bool) map[string]float64{ + "rouge-1": func(hyp, ref []string, exclusive bool) map[string]float64 { + return rougeN(hyp, ref, 1, false, exclusive) // Unigram-based ROUGE + }, + "rouge-2": func(hyp, ref []string, exclusive bool) map[string]float64 { + return rougeN(hyp, ref, 2, false, exclusive) // Bigram-based ROUGE + }, + "rouge-3": func(hyp, ref []string, exclusive bool) map[string]float64 { + return rougeN(hyp, ref, 3, false, exclusive) // Trigram-based ROUGE + }, + "rouge-4": func(hyp, ref []string, exclusive bool) map[string]float64 { + return rougeN(hyp, ref, 4, false, exclusive) // 4-gram based ROUGE + }, + "rouge-5": func(hyp, ref []string, exclusive bool) map[string]float64 { + return rougeN(hyp, ref, 5, false, exclusive) // 5-gram based ROUGE + }, + "rouge-l": func(hyp, ref []string, exclusive bool) map[string]float64 { + return rougeLSummaryLevel(hyp, ref, false, exclusive) // Longest common subsequence based ROUGE + }, +} + +// NewRougeMetric creates a new ROUGE metric calculator +func NewRougeMetric(exclusive bool, metrics, stats string) *RougeMetric { + r := &RougeMetric{ + exclusive: exclusive, + metric: metrics, + stats: stats, + } + return r +} + +// Compute calculates the ROUGE score between generated text and reference text +func (r *RougeMetric) Compute(metricInput *types.MetricInput) float64 { + hyps := []string{metricInput.GeneratedTexts} // Generated/hypothesis text + refs := []string{metricInput.GeneratedGT} // Reference/ground truth text + + scores := 0.0 + count := 0 + + // Calculate scores for each hypothesis-reference pair + for i := 0; i < len(hyps); i++ { + hyp := splitSentences(hyps[i]) // Split into sentences + ref := splitSentences(refs[i]) + + // Get appropriate ROUGE calculation function + fn := AvailableMetrics[r.metric] + sc := fn(hyp, ref, r.exclusive) + scores += sc[r.stats] // Accumulate specified statistic (f1/precision/recall) + + count++ + } + + if count == 0 { + return 0 // Avoid division by zero + } + return scores / float64(count) // Return average score +} diff --git a/internal/application/service/metric/rouge_score.go b/internal/application/service/metric/rouge_score.go new file mode 100644 index 0000000..dd51e14 --- /dev/null +++ b/internal/application/service/metric/rouge_score.go @@ -0,0 +1,252 @@ +package metric + +/* +# -*- coding: utf-8 -*- +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""ROUGE Metric Implementation + +This is a very slightly version of: +https://github.com/pltrdy/seq2seq/blob/master/seq2seq/metrics/rouge.py + +--- + +ROUGe metric implementation. + +This is a modified and slightly extended verison of +https://github.com/miso-belica/sumy/blob/dev/sumy/evaluation/rouge.py. +*/ + +import ( + "strings" +) + +type Ngrams struct { + ngrams map[string]int + exclusive bool +} + +func NewNgrams(exclusive bool) *Ngrams { + return &Ngrams{ngrams: make(map[string]int), exclusive: exclusive} +} + +func (n *Ngrams) Add(o string) { + if n.exclusive { + n.ngrams[o] = 1 + } else { + n.ngrams[o]++ + } +} + +func (n *Ngrams) Len() int { + return len(n.ngrams) +} + +func (n *Ngrams) Intersection(o *Ngrams) *Ngrams { + intersection := NewNgrams(n.exclusive) + for k := range n.ngrams { + if _, ok := o.ngrams[k]; ok { + intersection.Add(k) + } + } + return intersection +} + +func (n *Ngrams) BatchAdd(o []string) { + for _, v := range o { + n.Add(v) + } +} + +func (n *Ngrams) Union(others ...*Ngrams) *Ngrams { + union := NewNgrams(n.exclusive) + for k := range n.ngrams { + union.Add(k) + } + for _, other := range others { + for k := range other.ngrams { + union.Add(k) + } + } + return union +} + +func getNgrams(n int, text []string, exclusive bool) *Ngrams { + ngramSet := NewNgrams(exclusive) + for i := 0; i <= len(text)-n; i++ { + ngramSet.Add(strings.Join(text[i:i+n], " ")) + } + return ngramSet +} + +func getWordNgrams(n int, sentences []string, exclusive bool) *Ngrams { + words := splitIntoWords(sentences) + return getNgrams(n, words, exclusive) +} + +func lenLcs(x, y []string) int { + table := lcs(x, y) + return table[len(x)][len(y)] +} + +func lcs(x, y []string) [][]int { + n, m := len(x), len(y) + table := make([][]int, n+1) + for i := range table { + table[i] = make([]int, m+1) + } + for i := 1; i <= n; i++ { + for j := 1; j <= m; j++ { + if x[i-1] == y[j-1] { + table[i][j] = table[i-1][j-1] + 1 + } else { + table[i][j] = max(table[i-1][j], table[i][j-1]) + } + } + } + return table +} + +func reconLcs(x, y []string, exclusive bool) *Ngrams { + i, j := len(x), len(y) + table := lcs(x, y) + + var reconFunc func(int, int) []string + reconFunc = func(i, j int) []string { + if i == 0 || j == 0 { + return []string{} + } else if x[i-1] == y[j-1] { + return append(reconFunc(i-1, j-1), x[i-1]) + } else if table[i-1][j] > table[i][j-1] { + return reconFunc(i-1, j) + } else { + return reconFunc(i, j-1) + } + } + + reconList := reconFunc(i, j) + ngramList := NewNgrams(exclusive) + for _, word := range reconList { + ngramList.Add(word) + } + return ngramList +} + +func rougeN(evaluatedSentences, referenceSentences []string, n int, rawResults, exclusive bool) map[string]float64 { + evaluatedNgrams := getWordNgrams(n, evaluatedSentences, exclusive) + referenceNgrams := getWordNgrams(n, referenceSentences, exclusive) + referenceCount := referenceNgrams.Len() + evaluatedCount := evaluatedNgrams.Len() + + overlappingNgrams := evaluatedNgrams.Intersection(referenceNgrams) + overlappingCount := overlappingNgrams.Len() + + results := make(map[string]float64) + if rawResults { + results["hyp"] = float64(evaluatedCount) + results["ref"] = float64(referenceCount) + results["overlap"] = float64(overlappingCount) + return results + } else { + return calculateRougeN(evaluatedCount, referenceCount, overlappingCount) + } +} + +func calculateRougeN(evaluatedCount, referenceCount, overlappingCount int) map[string]float64 { + results := make(map[string]float64) + if evaluatedCount == 0 { + results["p"] = 0.0 + } else { + results["p"] = float64(overlappingCount) / float64(evaluatedCount) + } + + if referenceCount == 0 { + results["r"] = 0.0 + } else { + results["r"] = float64(overlappingCount) / float64(referenceCount) + } + + results["f"] = 2.0 * ((results["p"] * results["r"]) / (results["p"] + results["r"] + 1e-8)) + return results +} + +func unionLcs(evaluatedSentences []string, referenceSentence string, prevUnion *Ngrams, exclusive bool) (int, *Ngrams) { + if prevUnion == nil { + prevUnion = NewNgrams(exclusive) + } + + lcsUnion := prevUnion + prevCount := len(prevUnion.ngrams) + referenceWords := splitIntoWords([]string{referenceSentence}) + + combinedLcsLength := 0 + for _, evalS := range evaluatedSentences { + evaluatedWords := splitIntoWords([]string{evalS}) + lcs := reconLcs(referenceWords, evaluatedWords, exclusive) + combinedLcsLength += lcs.Len() + lcsUnion = lcsUnion.Union(lcs) + } + + newLcsCount := lcsUnion.Len() - prevCount + return newLcsCount, lcsUnion +} + +func rougeLSummaryLevel(evaluatedSentences, referenceSentences []string, rawResults, exclusive bool) map[string]float64 { + referenceNgrams := NewNgrams(exclusive) + referenceNgrams.BatchAdd(splitIntoWords(referenceSentences)) + m := referenceNgrams.Len() + + evaluatedNgrams := NewNgrams(exclusive) + evaluatedNgrams.BatchAdd(splitIntoWords(evaluatedSentences)) + n := evaluatedNgrams.Len() + + unionLcsSumAcrossAllReferences := 0 + union := NewNgrams(exclusive) + for _, refS := range referenceSentences { + lcsCount, newUnion := unionLcs(evaluatedSentences, refS, union, exclusive) + union = newUnion + unionLcsSumAcrossAllReferences += lcsCount + } + + llcs := unionLcsSumAcrossAllReferences + + var rLcs float64 + if m == 0 { + rLcs = 0.0 + } else { + rLcs = float64(llcs) / float64(m) + } + var pLcs float64 + if n == 0 { + pLcs = 0.0 + } else { + pLcs = float64(llcs) / float64(n) + } + + fLcs := 2.0 * ((pLcs * rLcs) / (pLcs + rLcs + 1e-8)) + + results := make(map[string]float64) + if rawResults { + results["hyp"] = float64(n) + results["ref"] = float64(m) + results["overlap"] = float64(llcs) + return results + } else { + results["f"] = fLcs + results["p"] = pLcs + results["r"] = rLcs + return results + } +} diff --git a/internal/application/service/metric_hook.go b/internal/application/service/metric_hook.go new file mode 100644 index 0000000..ec3e80b --- /dev/null +++ b/internal/application/service/metric_hook.go @@ -0,0 +1,167 @@ +package service + +import ( + "context" + "sync" + + "github.com/Tencent/WeKnora/internal/application/service/metric" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// MetricList stores and aggregates metric results +type MetricList struct { + results []*types.MetricResult +} + +// metricCalculators defines all metrics to be calculated +var metricCalculators = []struct { + calc interfaces.Metrics // Metric calculator implementation + getField func(*types.MetricResult) *float64 // Field accessor for result +}{ + // Retrieval Metrics + {metric.NewPrecisionMetric(), func(r *types.MetricResult) *float64 { return &r.RetrievalMetrics.Precision }}, + {metric.NewRecallMetric(), func(r *types.MetricResult) *float64 { return &r.RetrievalMetrics.Recall }}, + {metric.NewNDCGMetric(3), func(r *types.MetricResult) *float64 { return &r.RetrievalMetrics.NDCG3 }}, + {metric.NewNDCGMetric(10), func(r *types.MetricResult) *float64 { return &r.RetrievalMetrics.NDCG10 }}, + {metric.NewMRRMetric(), func(r *types.MetricResult) *float64 { return &r.RetrievalMetrics.MRR }}, + {metric.NewMAPMetric(), func(r *types.MetricResult) *float64 { return &r.RetrievalMetrics.MAP }}, + + // Generation Metrics + {metric.NewBLEUMetric(true, metric.BLEU1Gram), func(r *types.MetricResult) *float64 { + return &r.GenerationMetrics.BLEU1 + }}, + {metric.NewBLEUMetric(true, metric.BLEU2Gram), func(r *types.MetricResult) *float64 { + return &r.GenerationMetrics.BLEU2 + }}, + {metric.NewBLEUMetric(true, metric.BLEU4Gram), func(r *types.MetricResult) *float64 { + return &r.GenerationMetrics.BLEU4 + }}, + {metric.NewRougeMetric(true, "rouge-1", "f"), func(r *types.MetricResult) *float64 { + return &r.GenerationMetrics.ROUGE1 + }}, + {metric.NewRougeMetric(true, "rouge-2", "f"), func(r *types.MetricResult) *float64 { + return &r.GenerationMetrics.ROUGE2 + }}, + {metric.NewRougeMetric(true, "rouge-l", "f"), func(r *types.MetricResult) *float64 { + return &r.GenerationMetrics.ROUGEL + }}, +} + +// Append calculates and stores metrics for given input +func (m *MetricList) Append(metricInput *types.MetricInput) { + result := &types.MetricResult{} + // Calculate all configured metrics + for _, c := range metricCalculators { + score := c.calc.Compute(metricInput) + *c.getField(result) = score + } + logger.Infof(context.Background(), "metric: %v", result) + m.results = append(m.results, result) +} + +// Avg calculates average of all stored metric results +func (m *MetricList) Avg() *types.MetricResult { + if len(m.results) == 0 { + return &types.MetricResult{} + } + + avgResult := &types.MetricResult{} + count := float64(len(m.results)) + + // Calculate average for each metric + for _, config := range metricCalculators { + sum := 0.0 + for _, r := range m.results { + sum += *config.getField(r) + } + *config.getField(avgResult) = sum / count + } + return avgResult +} + +// HookMetric tracks evaluation metrics for QA pairs +type HookMetric struct { + task *types.EvaluationTask + qaPairMetricList []*qaPairMetric // Per-QA pair metrics + metricResults *MetricList // Aggregated results + mu *sync.RWMutex // Thread safety +} + +// qaPairMetric stores metrics for a single QA pair +type qaPairMetric struct { + qaPair *types.QAPair + searchResult []*types.SearchResult + rerankResult []*types.SearchResult + chatResponse *types.ChatResponse +} + +// NewHookMetric creates a new HookMetric with given capacity +func NewHookMetric(capacity int) *HookMetric { + return &HookMetric{ + metricResults: &MetricList{}, + qaPairMetricList: make([]*qaPairMetric, capacity), + mu: &sync.RWMutex{}, + } +} + +// recordInit initializes metric tracking for a QA pair +func (h *HookMetric) recordInit(index int) { + h.qaPairMetricList[index] = &qaPairMetric{} +} + +// recordQaPair records the QA pair data +func (h *HookMetric) recordQaPair(index int, qaPair *types.QAPair) { + h.qaPairMetricList[index].qaPair = qaPair +} + +// recordSearchResult records search results +func (h *HookMetric) recordSearchResult(index int, searchResult []*types.SearchResult) { + h.qaPairMetricList[index].searchResult = searchResult +} + +// recordRerankResult records reranked results +func (h *HookMetric) recordRerankResult(index int, rerankResult []*types.SearchResult) { + h.qaPairMetricList[index].rerankResult = rerankResult +} + +// recordChatResponse records the generated chat response +func (h *HookMetric) recordChatResponse(index int, chatResponse *types.ChatResponse) { + h.qaPairMetricList[index].chatResponse = chatResponse +} + +// recordFinish finalizes metrics for a QA pair +func (h *HookMetric) recordFinish(index int) { + // Prepare retrieval IDs from rerank results + retrievalIDs := make([]int, len(h.qaPairMetricList[index].rerankResult)) + for i, r := range h.qaPairMetricList[index].rerankResult { + retrievalIDs[i] = r.ChunkIndex + } + + // Get generated text if available + generatedTexts := "" + if h.qaPairMetricList[index].chatResponse != nil { + generatedTexts = h.qaPairMetricList[index].chatResponse.Content + } + + // Prepare metric input data + metricInput := &types.MetricInput{ + RetrievalGT: [][]int{h.qaPairMetricList[index].qaPair.PIDs}, + RetrievalIDs: retrievalIDs, + GeneratedTexts: generatedTexts, + GeneratedGT: h.qaPairMetricList[index].qaPair.Answer, + } + + // Thread-safe append of metrics + h.mu.Lock() + defer h.mu.Unlock() + h.metricResults.Append(metricInput) +} + +// MetricResult returns the averaged metric results +func (h *HookMetric) MetricResult() *types.MetricResult { + h.mu.RLock() + defer h.mu.RUnlock() + return h.metricResults.Avg() +} diff --git a/internal/application/service/model.go b/internal/application/service/model.go new file mode 100644 index 0000000..48cb789 --- /dev/null +++ b/internal/application/service/model.go @@ -0,0 +1,327 @@ +package service + +import ( + "context" + "errors" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/chat" + "github.com/Tencent/WeKnora/internal/models/embedding" + "github.com/Tencent/WeKnora/internal/models/rerank" + "github.com/Tencent/WeKnora/internal/models/utils/ollama" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// ErrModelNotFound is returned when a model cannot be found in the repository +var ErrModelNotFound = errors.New("model not found") + +// modelService implements the model service interface +type modelService struct { + repo interfaces.ModelRepository + ollamaService *ollama.OllamaService +} + +// NewModelService creates a new model service instance +func NewModelService(repo interfaces.ModelRepository, ollamaService *ollama.OllamaService) interfaces.ModelService { + return &modelService{ + repo: repo, + ollamaService: ollamaService, + } +} + +// CreateModel creates a new model in the repository +// For local models, it initiates an asynchronous download process +// Remote models are immediately set to active status +func (s *modelService) CreateModel(ctx context.Context, model *types.Model) error { + logger.Info(ctx, "Start creating model") + logger.Infof(ctx, "Creating model: %s, type: %s, source: %s", model.Name, model.Type, model.Source) + + // Handle remote models (e.g., OpenAI, Azure) + if model.Source == types.ModelSourceRemote { + logger.Info(ctx, "Remote model detected, setting status to active") + model.Status = types.ModelStatusActive + + logger.Info(ctx, "Saving remote model to repository") + err := s.repo.Create(ctx, model) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "model_name": model.Name, + "model_type": model.Type, + }) + return err + } + + logger.Infof(ctx, "Remote model created successfully: %s", model.ID) + return nil + } + + // Handle local models (e.g., Ollama) + logger.Info(ctx, "Local model detected, setting status to downloading") + model.Status = types.ModelStatusDownloading + + logger.Info(ctx, "Saving local model to repository") + err := s.repo.Create(ctx, model) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "model_name": model.Name, + "model_type": model.Type, + }) + return err + } + + // Start asynchronous model download + logger.Infof(ctx, "Starting background download for model: %s", model.Name) + newCtx := logger.CloneContext(ctx) + go func() { + logger.Info(newCtx, "Background download started") + err := s.ollamaService.PullModel(newCtx, model.Name) + if err != nil { + logger.ErrorWithFields(newCtx, err, map[string]interface{}{ + "model_name": model.Name, + }) + model.Status = types.ModelStatusDownloadFailed + } else { + logger.Infof(newCtx, "Model download completed successfully: %s", model.Name) + model.Status = types.ModelStatusActive + } + logger.Infof(newCtx, "Updating model status to: %s", model.Status) + s.repo.Update(newCtx, model) + }() + + logger.Infof(ctx, "Model creation initiated successfully: %s", model.ID) + return nil +} + +// GetModelByID retrieves a model by its ID +// Returns an error if the model is not found or is in a non-active state +func (s *modelService) GetModelByID(ctx context.Context, id string) (*types.Model, error) { + logger.Info(ctx, "Start getting model by ID") + logger.Infof(ctx, "Getting model with ID: %s", id) + + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Tenant ID: %d", tenantID) + + // Fetch model from repository + model, err := s.repo.GetByID(ctx, tenantID, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "model_id": id, + "tenant_id": tenantID, + }) + return nil, err + } + + // Check if model exists + if model == nil { + logger.Error(ctx, "Model not found") + return nil, ErrModelNotFound + } + + logger.Infof(ctx, "Model found, name: %s, status: %s", model.Name, model.Status) + + // Check model status + if model.Status == types.ModelStatusActive { + logger.Info(ctx, "Model is active and ready to use") + return model, nil + } + + if model.Status == types.ModelStatusDownloading { + logger.Warn(ctx, "Model is currently downloading") + return nil, errors.New("model is currently downloading") + } + + if model.Status == types.ModelStatusDownloadFailed { + logger.Error(ctx, "Model download failed") + return nil, errors.New("model download failed") + } + + logger.Error(ctx, "Model status is abnormal") + return nil, errors.New("abnormal model status") +} + +// ListModels returns all models belonging to the tenant +func (s *modelService) ListModels(ctx context.Context) ([]*types.Model, error) { + logger.Info(ctx, "Start listing models") + + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Listing models for tenant ID: %d", tenantID) + + // List models from repository with no additional filters + models, err := s.repo.List(ctx, tenantID, "", "") + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_id": tenantID, + }) + return nil, err + } + + logger.Infof(ctx, "Retrieved %d models successfully", len(models)) + return models, nil +} + +// UpdateModel updates an existing model in the repository +func (s *modelService) UpdateModel(ctx context.Context, model *types.Model) error { + logger.Info(ctx, "Start updating model") + logger.Infof(ctx, "Updating model ID: %s, name: %s", model.ID, model.Name) + + // Update model in repository + err := s.repo.Update(ctx, model) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "model_id": model.ID, + "model_name": model.Name, + }) + return err + } + + logger.Infof(ctx, "Model updated successfully: %s", model.ID) + return nil +} + +// DeleteModel removes a model from the repository +func (s *modelService) DeleteModel(ctx context.Context, id string) error { + logger.Info(ctx, "Start deleting model") + logger.Infof(ctx, "Deleting model ID: %s", id) + + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Tenant ID: %d", tenantID) + + // Delete model from repository + err := s.repo.Delete(ctx, tenantID, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "model_id": id, + "tenant_id": tenantID, + }) + return err + } + + logger.Infof(ctx, "Model deleted successfully: %s", id) + return nil +} + +// GetEmbeddingModel retrieves and initializes an embedding model instance +// Takes a model ID and returns an Embedder interface implementation +func (s *modelService) GetEmbeddingModel(ctx context.Context, modelId string) (embedding.Embedder, error) { + logger.Info(ctx, "Start getting embedding model") + logger.Infof(ctx, "Getting embedding model with ID: %s", modelId) + + // Get the model details + model, err := s.GetModelByID(ctx, modelId) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "model_id": modelId, + }) + return nil, err + } + + logger.Info(ctx, "Creating embedder instance") + logger.Infof(ctx, "Model name: %s, source: %s", model.Name, model.Source) + + // Initialize the embedder with model configuration + embedder, err := embedding.NewEmbedder(embedding.Config{ + Source: model.Source, + BaseURL: model.Parameters.BaseURL, + APIKey: model.Parameters.APIKey, + ModelID: model.ID, + ModelName: model.Name, + Dimensions: model.Parameters.EmbeddingParameters.Dimension, + TruncatePromptTokens: model.Parameters.EmbeddingParameters.TruncatePromptTokens, + }) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "model_id": model.ID, + "model_name": model.Name, + }) + return nil, err + } + + logger.Info(ctx, "Embedding model initialized successfully") + return embedder, nil +} + +// GetRerankModel retrieves and initializes a reranking model instance +// Takes a model ID and returns a Reranker interface implementation +func (s *modelService) GetRerankModel(ctx context.Context, modelId string) (rerank.Reranker, error) { + logger.Info(ctx, "Start getting rerank model") + logger.Infof(ctx, "Getting rerank model with ID: %s", modelId) + + // Get the model details + model, err := s.GetModelByID(ctx, modelId) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "model_id": modelId, + }) + return nil, err + } + + logger.Info(ctx, "Creating reranker instance") + logger.Infof(ctx, "Model name: %s, source: %s", model.Name, model.Source) + + // Initialize the reranker with model configuration + reranker, err := rerank.NewReranker(&rerank.RerankerConfig{ + ModelID: model.ID, + APIKey: model.Parameters.APIKey, + BaseURL: model.Parameters.BaseURL, + ModelName: model.Name, + Source: model.Source, + }) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "model_id": model.ID, + "model_name": model.Name, + }) + return nil, err + } + + logger.Info(ctx, "Rerank model initialized successfully") + return reranker, nil +} + +// GetChatModel retrieves and initializes a chat model instance +// Takes a model ID and returns a Chat interface implementation +func (s *modelService) GetChatModel(ctx context.Context, modelId string) (chat.Chat, error) { + logger.Info(ctx, "Start getting chat model") + logger.Infof(ctx, "Getting chat model with ID: %s", modelId) + + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Tenant ID: %d", tenantID) + + // Get the model directly from repository to avoid status checks + model, err := s.repo.GetByID(ctx, tenantID, modelId) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "model_id": modelId, + "tenant_id": tenantID, + }) + return nil, err + } + + if model == nil { + logger.Error(ctx, "Chat model not found") + return nil, ErrModelNotFound + } + + logger.Info(ctx, "Creating chat model instance") + logger.Infof(ctx, "Model name: %s, source: %s", model.Name, model.Source) + + // Initialize the chat model with model configuration + chatModel, err := chat.NewChat(&chat.ChatConfig{ + ModelID: model.ID, + APIKey: model.Parameters.APIKey, + BaseURL: model.Parameters.BaseURL, + ModelName: model.Name, + Source: model.Source, + }) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "model_id": model.ID, + "model_name": model.Name, + }) + return nil, err + } + + logger.Info(ctx, "Chat model initialized successfully") + return chatModel, nil +} diff --git a/internal/application/service/retriever/composite.go b/internal/application/service/retriever/composite.go new file mode 100644 index 0000000..701a658 --- /dev/null +++ b/internal/application/service/retriever/composite.go @@ -0,0 +1,289 @@ +package retriever + +import ( + "context" + "fmt" + "maps" + "slices" + "sync" + "sync/atomic" + + "github.com/Tencent/WeKnora/internal/common" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/embedding" + "github.com/Tencent/WeKnora/internal/runtime" + "github.com/Tencent/WeKnora/internal/tracing" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "go.opentelemetry.io/otel/attribute" +) + +// engineInfo holds information about a retrieve engine and its supported retriever types +type engineInfo struct { + retrieveEngine interfaces.RetrieveEngineService + retrieverType []types.RetrieverType +} + +// CompositeRetrieveEngine implements a composite pattern for retrieval engines, +// delegating operations to all registered engines +type CompositeRetrieveEngine struct { + engineInfos []*engineInfo +} + +// Retrieve performs retrieval operations by delegating to the appropriate engine +// based on the retriever type specified in the parameters +func (c *CompositeRetrieveEngine) Retrieve(ctx context.Context, + retrieveParams []types.RetrieveParams, +) ([]*types.RetrieveResult, error) { + return concurrentRetrieve(ctx, retrieveParams, + func(ctx context.Context, param types.RetrieveParams, results *[]*types.RetrieveResult, mu *sync.Mutex) error { + found := false + for _, engineInfo := range c.engineInfos { + if slices.Contains(engineInfo.retrieverType, param.RetrieverType) { + result, err := engineInfo.retrieveEngine.Retrieve(ctx, param) + if err != nil { + return err + } + mu.Lock() + *results = append(*results, result...) + mu.Unlock() + found = true + break + } + } + if !found { + return fmt.Errorf("retriever type %s not found", param.RetrieverType) + } + return nil + }, + ) +} + +// NewCompositeRetrieveEngine creates a new composite retrieve engine with the given parameters +func NewCompositeRetrieveEngine(engineParams []types.RetrieverEngineParams) (*CompositeRetrieveEngine, error) { + var registry interfaces.RetrieveEngineRegistry + runtime.GetContainer().Invoke(func(r interfaces.RetrieveEngineRegistry) { + registry = r + }) + engineInfos := make(map[types.RetrieverEngineType]*engineInfo) + for _, engineParam := range engineParams { + repo, err := registry.GetRetrieveEngineService(engineParam.RetrieverEngineType) + if err != nil { + return nil, err + } + if !slices.Contains(repo.Support(), engineParam.RetrieverType) { + return nil, fmt.Errorf("retrieval engine %s does not support retriever type: %s", + repo.EngineType(), engineParam.RetrieverType) + } + if _, exists := engineInfos[repo.EngineType()]; exists { + engineInfos[repo.EngineType()].retrieverType = append(engineInfos[repo.EngineType()].retrieverType, + engineParam.RetrieverType) + continue + } + engineInfos[repo.EngineType()] = &engineInfo{ + retrieveEngine: repo, + retrieverType: []types.RetrieverType{engineParam.RetrieverType}, + } + } + return &CompositeRetrieveEngine{engineInfos: slices.Collect(maps.Values(engineInfos))}, nil +} + +// SupportRetriever checks if a retriever type is supported by any of the registered engines +func (c *CompositeRetrieveEngine) SupportRetriever(r types.RetrieverType) bool { + for _, engineInfo := range c.engineInfos { + if slices.Contains(engineInfo.retrieverType, r) { + return true + } + } + return false +} + +// concurrentRetrieve is a helper function for concurrent processing of retrieval parameters +// and collecting results +func concurrentRetrieve( + ctx context.Context, + retrieveParams []types.RetrieveParams, + fn func(ctx context.Context, param types.RetrieveParams, results *[]*types.RetrieveResult, mu *sync.Mutex) error, +) ([]*types.RetrieveResult, error) { + var results []*types.RetrieveResult + var mu sync.Mutex + var wg sync.WaitGroup + errCh := make(chan error, len(retrieveParams)) + + for _, param := range retrieveParams { + wg.Add(1) + p := param // Create local copy for safe use in closure + go func() { + defer wg.Done() + if err := fn(ctx, p, &results, &mu); err != nil { + errCh <- err + } + }() + } + + wg.Wait() + close(errCh) + + // Check for errors + for err := range errCh { + if err != nil { + return nil, err + } + } + + return results, nil +} + +// concurrentExecWithError is a generic function for concurrent execution of operations +// and handling errors +func (c *CompositeRetrieveEngine) concurrentExecWithError( + ctx context.Context, + fn func(ctx context.Context, engineInfo *engineInfo) error, +) error { + var wg sync.WaitGroup + errCh := make(chan error, len(c.engineInfos)) + + for _, engineInfo := range c.engineInfos { + wg.Add(1) + eng := engineInfo // Create local copy for safe use in closure + go func() { + defer wg.Done() + if err := fn(ctx, eng); err != nil { + errCh <- err + } + }() + } + + wg.Wait() + close(errCh) + + // Return the first error (if any) + for err := range errCh { + if err != nil { + return err + } + } + return nil +} + +// Index saves vector embeddings to all registered repositories +func (c *CompositeRetrieveEngine) Index(ctx context.Context, + embedder embedding.Embedder, indexInfo *types.IndexInfo, +) error { + ctx, span := tracing.ContextWithSpan(ctx, "CompositeRetrieveEngine.Index") + defer span.End() + err := c.concurrentExecWithError(ctx, func(ctx context.Context, engineInfo *engineInfo) error { + if err := engineInfo.retrieveEngine.Index(ctx, embedder, indexInfo, engineInfo.retrieverType); err != nil { + logger.Errorf(ctx, "Repository %s failed to save: %v", engineInfo.retrieveEngine.EngineType(), err) + return err + } + return nil + }) + span.RecordError(err) + span.SetAttributes( + attribute.String("embedder", embedder.GetModelName()), + attribute.String("source_id", indexInfo.SourceID), + ) + return err +} + +// BatchIndex batch saves vector embeddings to all registered repositories +func (c *CompositeRetrieveEngine) BatchIndex(ctx context.Context, + embedder embedding.Embedder, indexInfoList []*types.IndexInfo, +) error { + ctx, span := tracing.ContextWithSpan(ctx, "CompositeRetrieveEngine.BatchIndex") + defer span.End() + // Deduplicate sourceIDs + indexInfoList = common.Deduplicate(func(info *types.IndexInfo) string { return info.SourceID }, indexInfoList...) + err := c.concurrentExecWithError(ctx, func(ctx context.Context, engineInfo *engineInfo) error { + if err := engineInfo.retrieveEngine.BatchIndex( + ctx, + embedder, + indexInfoList, + engineInfo.retrieverType, + ); err != nil { + logger.Errorf(ctx, "Repository %s failed to batch save: %v", engineInfo.retrieveEngine.EngineType(), err) + return err + } + return nil + }) + span.RecordError(err) + span.SetAttributes( + attribute.String("embedder", embedder.GetModelName()), + attribute.Int("index_info_count", len(indexInfoList)), + ) + return err +} + +// DeleteByChunkIDList deletes vector embeddings by chunk ID list from all registered repositories +func (c *CompositeRetrieveEngine) DeleteByChunkIDList(ctx context.Context, + chunkIDList []string, dimension int, +) error { + return c.concurrentExecWithError(ctx, func(ctx context.Context, engineInfo *engineInfo) error { + if err := engineInfo.retrieveEngine.DeleteByChunkIDList(ctx, chunkIDList, dimension); err != nil { + logger.GetLogger(ctx).Errorf("Repository %s failed to delete chunk ID list: %v", + engineInfo.retrieveEngine.EngineType(), err) + return err + } + return nil + }) +} + +// CopyIndices copies indices from a source knowledge base to a target knowledge base +func (c *CompositeRetrieveEngine) CopyIndices( + ctx context.Context, + sourceKnowledgeBaseID string, + targetKnowledgeBaseID string, + sourceToTargetKBIDMap map[string]string, + sourceToTargetChunkIDMap map[string]string, + dimension int, +) error { + return c.concurrentExecWithError(ctx, func(ctx context.Context, engineInfo *engineInfo) error { + if err := engineInfo.retrieveEngine.CopyIndices( + ctx, + sourceKnowledgeBaseID, + sourceToTargetKBIDMap, + sourceToTargetChunkIDMap, + targetKnowledgeBaseID, + dimension, + ); err != nil { + logger.Errorf(ctx, "Repository %s failed to copy indices: %v", engineInfo.retrieveEngine.EngineType(), err) + return err + } + return nil + }) +} + +// DeleteByKnowledgeIDList deletes vector embeddings by knowledge ID list from all registered repositories +func (c *CompositeRetrieveEngine) DeleteByKnowledgeIDList(ctx context.Context, + knowledgeIDList []string, dimension int, +) error { + return c.concurrentExecWithError(ctx, func(ctx context.Context, engineInfo *engineInfo) error { + if err := engineInfo.retrieveEngine.DeleteByKnowledgeIDList(ctx, knowledgeIDList, dimension); err != nil { + logger.GetLogger(ctx).Errorf("Repository %s failed to delete knowledge ID list: %v", + engineInfo.retrieveEngine.EngineType(), err) + return err + } + return nil + }) +} + +// EstimateStorageSize estimates the storage size required for the provided index information +func (c *CompositeRetrieveEngine) EstimateStorageSize(ctx context.Context, + embedder embedding.Embedder, indexInfoList []*types.IndexInfo, +) int64 { + ctx, span := tracing.ContextWithSpan(ctx, "CompositeRetrieveEngine.EstimateStorageSize") + defer span.End() + sum := atomic.Int64{} + err := c.concurrentExecWithError(ctx, func(ctx context.Context, engineInfo *engineInfo) error { + sum.Add(engineInfo.retrieveEngine.EstimateStorageSize(ctx, embedder, indexInfoList, engineInfo.retrieverType)) + return nil + }) + span.RecordError(err) + span.SetAttributes( + attribute.String("embedder", embedder.GetModelName()), + attribute.Int("index_info_count", len(indexInfoList)), + attribute.Int64("storage_size", sum.Load()), + ) + return sum.Load() +} diff --git a/internal/application/service/retriever/keywords_vector_hybrid_indexer.go b/internal/application/service/retriever/keywords_vector_hybrid_indexer.go new file mode 100644 index 0000000..0345994 --- /dev/null +++ b/internal/application/service/retriever/keywords_vector_hybrid_indexer.go @@ -0,0 +1,163 @@ +package retriever + +import ( + "context" + "slices" + "time" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/embedding" + "github.com/Tencent/WeKnora/internal/models/utils" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// KeywordsVectorHybridRetrieveEngineService implements a hybrid retrieval engine +// that supports both keyword-based and vector-based retrieval +type KeywordsVectorHybridRetrieveEngineService struct { + indexRepository interfaces.RetrieveEngineRepository + engineType types.RetrieverEngineType +} + +// NewKVHybridRetrieveEngine creates a new instance of the hybrid retrieval engine +// KV stands for KeywordsVector +func NewKVHybridRetrieveEngine(indexRepository interfaces.RetrieveEngineRepository, + engineType types.RetrieverEngineType, +) interfaces.RetrieveEngineService { + return &KeywordsVectorHybridRetrieveEngineService{indexRepository: indexRepository, engineType: engineType} +} + +// EngineType returns the type of the retrieval engine +func (v *KeywordsVectorHybridRetrieveEngineService) EngineType() types.RetrieverEngineType { + return v.engineType +} + +// Retrieve performs retrieval based on the provided parameters +func (v *KeywordsVectorHybridRetrieveEngineService) Retrieve(ctx context.Context, + params types.RetrieveParams, +) ([]*types.RetrieveResult, error) { + return v.indexRepository.Retrieve(ctx, params) +} + +// Index creates embeddings for the content and saves it to the repository +// if vector retrieval is enabled in the retriever types +func (v *KeywordsVectorHybridRetrieveEngineService) Index(ctx context.Context, + embedder embedding.Embedder, indexInfo *types.IndexInfo, retrieverTypes []types.RetrieverType, +) error { + params := make(map[string]any) + embeddingMap := make(map[string][]float32) + if slices.Contains(retrieverTypes, types.VectorRetrieverType) { + embedding, err := embedder.Embed(ctx, indexInfo.Content) + if err != nil { + return err + } + embeddingMap[indexInfo.ChunkID] = embedding + } + params["embedding"] = embeddingMap + return v.indexRepository.Save(ctx, indexInfo, params) +} + +// BatchIndex creates embeddings for multiple content items and saves them to the repository +// in batches for efficiency +func (v *KeywordsVectorHybridRetrieveEngineService) BatchIndex(ctx context.Context, + embedder embedding.Embedder, indexInfoList []*types.IndexInfo, retrieverTypes []types.RetrieverType, +) error { + if len(indexInfoList) == 0 { + return nil + } + params := make(map[string]any) + if slices.Contains(retrieverTypes, types.VectorRetrieverType) { + var contentList []string + for _, indexInfo := range indexInfoList { + contentList = append(contentList, indexInfo.Content) + } + var embeddings [][]float32 + var err error + for range 5 { + embeddings, err = embedder.BatchEmbedWithPool(ctx, embedder, contentList) + if err == nil { + break + } + time.Sleep(100 * time.Millisecond) + } + if err != nil { + return err + } + batchSize := 20 + for i, indexChunk := range utils.ChunkSlice(indexInfoList, batchSize) { + embeddingMap := make(map[string][]float32) + for j, indexInfo := range indexChunk { + embeddingMap[indexInfo.SourceID] = embeddings[i*batchSize+j] + } + params["embedding"] = embeddingMap + err = v.indexRepository.BatchSave(ctx, indexChunk, params) + if err != nil { + return err + } + } + return nil + } + var err error + for _, indexChunk := range utils.ChunkSlice(indexInfoList, 10) { + err = v.indexRepository.BatchSave(ctx, indexChunk, params) + if err != nil { + return err + } + } + return nil +} + +// DeleteByChunkIDList deletes vectors by their chunk IDs +func (v *KeywordsVectorHybridRetrieveEngineService) DeleteByChunkIDList(ctx context.Context, + indexIDList []string, dimension int, +) error { + return v.indexRepository.DeleteByChunkIDList(ctx, indexIDList, dimension) +} + +// DeleteByKnowledgeIDList deletes vectors by their knowledge IDs +func (v *KeywordsVectorHybridRetrieveEngineService) DeleteByKnowledgeIDList(ctx context.Context, + knowledgeIDList []string, dimension int, +) error { + return v.indexRepository.DeleteByKnowledgeIDList(ctx, knowledgeIDList, dimension) +} + +// Support returns the retriever types supported by this engine +func (v *KeywordsVectorHybridRetrieveEngineService) Support() []types.RetrieverType { + return v.indexRepository.Support() +} + +// EstimateStorageSize estimates the storage space needed for the provided index information +func (v *KeywordsVectorHybridRetrieveEngineService) EstimateStorageSize( + ctx context.Context, + embedder embedding.Embedder, + indexInfoList []*types.IndexInfo, + retrieverTypes []types.RetrieverType, +) int64 { + params := make(map[string]any) + if slices.Contains(retrieverTypes, types.VectorRetrieverType) { + embeddingMap := make(map[string][]float32) + // just for estimate storage size + for _, indexInfo := range indexInfoList { + embeddingMap[indexInfo.ChunkID] = make([]float32, embedder.GetDimensions()) + } + params["embedding"] = embeddingMap + } + return v.indexRepository.EstimateStorageSize(ctx, indexInfoList, params) +} + +// CopyIndices copies indices from a source knowledge base to a target knowledge base +func (v *KeywordsVectorHybridRetrieveEngineService) CopyIndices( + ctx context.Context, + sourceKnowledgeBaseID string, + sourceToTargetKBIDMap map[string]string, + sourceToTargetChunkIDMap map[string]string, + targetKnowledgeBaseID string, + dimension int, +) error { + logger.Infof(ctx, "Copy indices from knowledge base %s to %s, mapping relation count: %d", + sourceKnowledgeBaseID, targetKnowledgeBaseID, len(sourceToTargetChunkIDMap), + ) + return v.indexRepository.CopyIndices( + ctx, sourceKnowledgeBaseID, sourceToTargetKBIDMap, sourceToTargetChunkIDMap, targetKnowledgeBaseID, dimension, + ) +} diff --git a/internal/application/service/retriever/registry.go b/internal/application/service/retriever/registry.go new file mode 100644 index 0000000..f489c58 --- /dev/null +++ b/internal/application/service/retriever/registry.go @@ -0,0 +1,64 @@ +package retriever + +import ( + "fmt" + "sync" + + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// RetrieveEngineRegistry implements the retrieval engine registry +type RetrieveEngineRegistry struct { + repositories map[types.RetrieverEngineType]interfaces.RetrieveEngineService + mu sync.RWMutex +} + +// NewRetrieveEngineRegistry creates a new retrieval engine registry +func NewRetrieveEngineRegistry() interfaces.RetrieveEngineRegistry { + return &RetrieveEngineRegistry{ + repositories: make(map[types.RetrieverEngineType]interfaces.RetrieveEngineService), + } +} + +// Register registers a retrieval engine service +func (r *RetrieveEngineRegistry) Register(repo interfaces.RetrieveEngineService) error { + r.mu.Lock() + defer r.mu.Unlock() + + if _, exists := r.repositories[repo.EngineType()]; exists { + return fmt.Errorf("Repository type %s already registered", repo.EngineType()) + } + + r.repositories[repo.EngineType()] = repo + return nil +} + +// GetRetrieveEngineService retrieves a retrieval engine service by type +func (r *RetrieveEngineRegistry) GetRetrieveEngineService(repoType types.RetrieverEngineType) ( + interfaces.RetrieveEngineService, error, +) { + r.mu.RLock() + defer r.mu.RUnlock() + + repo, exists := r.repositories[repoType] + if !exists { + return nil, fmt.Errorf("Repository of type %s not found", repoType) + } + + return repo, nil +} + +// GetAllRetrieveEngineServices retrieves all registered retrieval engine services +func (r *RetrieveEngineRegistry) GetAllRetrieveEngineServices() []interfaces.RetrieveEngineService { + r.mu.RLock() + defer r.mu.RUnlock() + + // Create a copy to avoid modifying the original map + result := make([]interfaces.RetrieveEngineService, 0, len(r.repositories)) + for _, v := range r.repositories { + result = append(result, v) + } + + return result +} diff --git a/internal/application/service/session.go b/internal/application/service/session.go new file mode 100644 index 0000000..a229ba3 --- /dev/null +++ b/internal/application/service/session.go @@ -0,0 +1,517 @@ +package service + +import ( + "context" + "errors" + "strings" + + chatpipline "github.com/Tencent/WeKnora/internal/application/service/chat_pipline" + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/chat" + "github.com/Tencent/WeKnora/internal/tracing" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" +) + +// sessionService implements the SessionService interface for managing conversation sessions +type sessionService struct { + cfg *config.Config // Application configuration + sessionRepo interfaces.SessionRepository // Repository for session data + messageRepo interfaces.MessageRepository // Repository for message data + knowledgeBaseService interfaces.KnowledgeBaseService // Service for knowledge base operations + modelService interfaces.ModelService // Service for model operations + eventManager *chatpipline.EventManager // Event manager for chat pipeline +} + +// NewSessionService creates a new session service instance with all required dependencies +func NewSessionService(cfg *config.Config, + sessionRepo interfaces.SessionRepository, + messageRepo interfaces.MessageRepository, + knowledgeBaseService interfaces.KnowledgeBaseService, + modelService interfaces.ModelService, + eventManager *chatpipline.EventManager, +) interfaces.SessionService { + return &sessionService{ + cfg: cfg, + sessionRepo: sessionRepo, + messageRepo: messageRepo, + knowledgeBaseService: knowledgeBaseService, + modelService: modelService, + eventManager: eventManager, + } +} + +// CreateSession creates a new conversation session +func (s *sessionService) CreateSession(ctx context.Context, session *types.Session) (*types.Session, error) { + logger.Info(ctx, "Start creating session") + + // Validate tenant ID + if session.TenantID == 0 { + logger.Error(ctx, "Failed to create session: tenant ID cannot be empty") + return nil, errors.New("tenant ID is required") + } + + logger.Infof(ctx, "Creating session, tenant ID: %d, model ID: %s, knowledge base ID: %s", + session.TenantID, session.SummaryModelID, session.KnowledgeBaseID) + + // Create session in repository + createdSession, err := s.sessionRepo.Create(ctx, session) + if err != nil { + return nil, err + } + + logger.Infof(ctx, "Session created successfully, ID: %s, tenant ID: %d", createdSession.ID, createdSession.TenantID) + return createdSession, nil +} + +// GetSession retrieves a session by its ID +func (s *sessionService) GetSession(ctx context.Context, id string) (*types.Session, error) { + logger.Info(ctx, "Start retrieving session") + + // Validate session ID + if id == "" { + logger.Error(ctx, "Failed to get session: session ID cannot be empty") + return nil, errors.New("session id is required") + } + + // Get tenant ID from context + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Retrieving session, ID: %s, tenant ID: %d", id, tenantID) + + // Get session from repository + session, err := s.sessionRepo.Get(ctx, tenantID, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "session_id": id, + "tenant_id": tenantID, + }) + return nil, err + } + + logger.Infof(ctx, "Session retrieved successfully, ID: %s, tenant ID: %d", session.ID, session.TenantID) + return session, nil +} + +// GetSessionsByTenant retrieves all sessions for the current tenant +func (s *sessionService) GetSessionsByTenant(ctx context.Context) ([]*types.Session, error) { + logger.Info(ctx, "Start retrieving all sessions for tenant") + + // Get tenant ID from context + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Retrieving all sessions for tenant, tenant ID: %d", tenantID) + + // Get sessions from repository + sessions, err := s.sessionRepo.GetByTenantID(ctx, tenantID) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_id": tenantID, + }) + return nil, err + } + + logger.Infof( + ctx, "Tenant sessions retrieved successfully, tenant ID: %d, session count: %d", tenantID, len(sessions), + ) + return sessions, nil +} + +// GetPagedSessionsByTenant retrieves sessions for the current tenant with pagination +func (s *sessionService) GetPagedSessionsByTenant(ctx context.Context, + pagination *types.Pagination, +) (*types.PageResult, error) { + logger.Info(ctx, "Start retrieving paged sessions for tenant") + + // Get tenant ID from context + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Retrieving paged sessions for tenant, tenant ID: %d, page: %d, page size: %d", + tenantID, pagination.Page, pagination.PageSize) + + // Get paged sessions from repository + sessions, total, err := s.sessionRepo.GetPagedByTenantID(ctx, tenantID, pagination) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_id": tenantID, + "page": pagination.Page, + "page_size": pagination.PageSize, + }) + return nil, err + } + + logger.Infof(ctx, "Tenant paged sessions retrieved successfully, tenant ID: %d, total: %d", tenantID, total) + return types.NewPageResult(total, pagination, sessions), nil +} + +// UpdateSession updates an existing session's properties +func (s *sessionService) UpdateSession(ctx context.Context, session *types.Session) error { + logger.Info(ctx, "Start updating session") + + // Validate session ID + if session.ID == "" { + logger.Error(ctx, "Failed to update session: session ID cannot be empty") + return errors.New("session id is required") + } + + logger.Infof(ctx, "Updating session, ID: %s, tenant ID: %d", session.ID, session.TenantID) + + // Update session in repository + err := s.sessionRepo.Update(ctx, session) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "session_id": session.ID, + "tenant_id": session.TenantID, + }) + return err + } + + logger.Infof(ctx, "Session updated successfully, ID: %s", session.ID) + return nil +} + +// DeleteSession removes a session by its ID +func (s *sessionService) DeleteSession(ctx context.Context, id string) error { + logger.Info(ctx, "Start deleting session") + + // Validate session ID + if id == "" { + logger.Error(ctx, "Failed to delete session: session ID cannot be empty") + return errors.New("session id is required") + } + + // Get tenant ID from context + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Deleting session, ID: %s, tenant ID: %d", id, tenantID) + + // Delete session from repository + err := s.sessionRepo.Delete(ctx, tenantID, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "session_id": id, + "tenant_id": tenantID, + }) + return err + } + + logger.Infof(ctx, "Session deleted successfully, ID: %s", id) + return nil +} + +// GenerateTitle generates a title for the current conversation content +func (s *sessionService) GenerateTitle(ctx context.Context, + sessionID string, messages []types.Message, +) (string, error) { + logger.Info(ctx, "Start generating session title") + + // Validate session ID + if sessionID == "" { + logger.Error(ctx, "Failed to generate title: session ID cannot be empty") + return "", errors.New("session id is required") + } + + // Get tenant ID from context + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Getting session info, session ID: %s, tenant ID: %d", sessionID, tenantID) + + // Get session from repository + session, err := s.sessionRepo.Get(ctx, tenantID, sessionID) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "session_id": sessionID, + "tenant_id": tenantID, + }) + return "", err + } + + // Skip if title already exists + if session.Title != "" { + logger.Infof(ctx, "Session already has a title, session ID: %s, title: %s", sessionID, session.Title) + return session.Title, nil + } + + // Get the first user message, either from provided messages or repository + var message *types.Message + if len(messages) == 0 { + logger.Info(ctx, "Message list is empty, getting the first user message") + message, err = s.messageRepo.GetFirstMessageOfUser(ctx, sessionID) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "session_id": sessionID, + }) + return "", err + } + } else { + logger.Info(ctx, "Searching for user message in message list") + for _, m := range messages { + if m.Role == "user" { + message = &m + break + } + } + } + + // Ensure a user message was found + if message == nil { + logger.Error(ctx, "No user message found, cannot generate title") + return "", errors.New("no user message found") + } + + // Get chat model + logger.Infof(ctx, "Getting chat model, model ID: %s", session.SummaryModelID) + chatModel, err := s.modelService.GetChatModel(ctx, session.SummaryModelID) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "model_id": session.SummaryModelID, + }) + return "", err + } + + // Prepare messages for title generation + logger.Info(ctx, "Preparing to generate session title") + var chatMessages []chat.Message + chatMessages = append(chatMessages, + chat.Message{Role: "system", Content: s.cfg.Conversation.GenerateSessionTitlePrompt}, + ) + chatMessages = append(chatMessages, + chat.Message{Role: "user", Content: message.Content + " /no_think"}, + ) + + // Call model to generate title + thinking := false + logger.Info(ctx, "Calling model to generate title") + response, err := chatModel.Chat(ctx, chatMessages, &chat.ChatOptions{ + Temperature: 0.3, + Thinking: &thinking, + }) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + return "", err + } + + // Process and store the generated title + session.Title = strings.TrimPrefix(response.Content, "\n\n") + logger.Infof(ctx, "Title generated successfully: %s", session.Title) + + // Update session with new title + logger.Info(ctx, "Updating session title") + err = s.sessionRepo.Update(ctx, session) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + return "", err + } + + logger.Infof(ctx, "Session title updated successfully, ID: %s, title: %s", sessionID, session.Title) + return session.Title, nil +} + +// KnowledgeQA performs knowledge base question answering with LLM summarization +func (s *sessionService) KnowledgeQA(ctx context.Context, sessionID, query string) ( + []*types.SearchResult, <-chan types.StreamResponse, error, +) { + logger.Info(ctx, "Start knowledge base question answering") + logger.Infof(ctx, "Knowledge base question answering parameters, session ID: %s, query: %s", sessionID, query) + + // Get tenant ID from context + tenantID := ctx.Value(types.TenantIDContextKey).(uint) + logger.Infof(ctx, "Getting session info, session ID: %s, tenant ID: %d", sessionID, tenantID) + + // Get session information + session, err := s.sessionRepo.Get(ctx, tenantID, sessionID) + if err != nil { + logger.Errorf(ctx, "Failed to get session, session ID: %s, error: %v", sessionID, err) + return nil, nil, err + } + + // Validate knowledge base association + if session.KnowledgeBaseID == "" { + logger.Warnf(ctx, "Session has no associated knowledge base, session ID: %s", sessionID) + return nil, nil, errors.New("session has no knowledge base") + } + + // Create chat management object with session settings + logger.Infof(ctx, "Creating chat manage object, knowledge base ID: %s", session.KnowledgeBaseID) + chatManage := &types.ChatManage{ + Query: query, + RewriteQuery: query, + SessionID: sessionID, + KnowledgeBaseID: session.KnowledgeBaseID, + VectorThreshold: session.VectorThreshold, + KeywordThreshold: session.KeywordThreshold, + EmbeddingTopK: session.EmbeddingTopK, + RerankModelID: session.RerankModelID, + RerankTopK: session.RerankTopK, + RerankThreshold: session.RerankThreshold, + ChatModelID: session.SummaryModelID, + SummaryConfig: types.SummaryConfig{ + MaxTokens: session.SummaryParameters.MaxTokens, + RepeatPenalty: session.SummaryParameters.RepeatPenalty, + TopK: session.SummaryParameters.TopK, + TopP: session.SummaryParameters.TopP, + FrequencyPenalty: session.SummaryParameters.FrequencyPenalty, + PresencePenalty: session.SummaryParameters.PresencePenalty, + Prompt: session.SummaryParameters.Prompt, + ContextTemplate: session.SummaryParameters.ContextTemplate, + Temperature: session.SummaryParameters.Temperature, + Seed: session.SummaryParameters.Seed, + NoMatchPrefix: session.SummaryParameters.NoMatchPrefix, + MaxCompletionTokens: session.SummaryParameters.MaxCompletionTokens, + }, + FallbackResponse: session.FallbackResponse, + } + + // Start knowledge QA event processing + logger.Info(ctx, "Triggering knowledge base question answering event") + err = s.KnowledgeQAByEvent(ctx, chatManage, types.Pipline["rag_stream"]) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "session_id": sessionID, + "knowledge_base_id": session.KnowledgeBaseID, + }) + return nil, nil, err + } + + logger.Info(ctx, "Knowledge base question answering completed") + return chatManage.MergeResult, chatManage.ResponseChan, nil +} + +// KnowledgeQAByEvent processes knowledge QA through a series of events in the pipeline +func (s *sessionService) KnowledgeQAByEvent(ctx context.Context, + chatManage *types.ChatManage, eventList []types.EventType, +) error { + ctx, span := tracing.ContextWithSpan(ctx, "SessionService.KnowledgeQAByEvent") + defer span.End() + + logger.Info(ctx, "Start processing knowledge base question answering through events") + logger.Infof(ctx, "Knowledge base question answering parameters, session ID: %s, knowledge base ID: %s, query: %s", + chatManage.SessionID, chatManage.KnowledgeBaseID, chatManage.Query) + + // Prepare method list for logging and tracing + methods := []string{} + for _, event := range eventList { + methods = append(methods, string(event)) + } + + // Set up tracing attributes + logger.Infof(ctx, "Trigger event list: %v", methods) + span.SetAttributes( + attribute.String("request_id", ctx.Value(types.RequestIDContextKey).(string)), + attribute.String("query", chatManage.Query), + attribute.String("method", strings.Join(methods, ",")), + ) + + // Process each event in sequence + for _, event := range eventList { + logger.Infof(ctx, "Starting to trigger event: %v", event) + err := s.eventManager.Trigger(ctx, event, chatManage) + + // Handle case where search returns no results + if err == chatpipline.ErrSearchNothing { + logger.Warnf(ctx, "Event %v triggered, search result is empty, using fallback response", event) + chatManage.ResponseChan = chatpipline.NewFallbackChan(ctx, chatManage.FallbackResponse) + chatManage.ChatResponse = &types.ChatResponse{Content: chatManage.FallbackResponse} + return nil + } + + // Handle other errors + if err != nil { + logger.Errorf(ctx, "Event triggering failed, event: %v, error type: %s, description: %s, error: %v", + event, err.ErrorType, err.Description, err.Err) + span.RecordError(err.Err) + span.SetStatus(codes.Error, err.Description) + span.SetAttributes(attribute.String("error_type", err.ErrorType)) + return err.Err + } + logger.Infof(ctx, "Event %v triggered successfully", event) + } + + logger.Info(ctx, "All events triggered successfully") + return nil +} + +// SearchKnowledge performs knowledge base search without LLM summarization +func (s *sessionService) SearchKnowledge(ctx context.Context, + knowledgeBaseID, query string, +) ([]*types.SearchResult, error) { + logger.Info(ctx, "Start knowledge base search without LLM summary") + logger.Infof(ctx, "Knowledge base search parameters, knowledge base ID: %s, query: %s", knowledgeBaseID, query) + + // Create default retrieval parameters + chatManage := &types.ChatManage{ + Query: query, + RewriteQuery: query, + KnowledgeBaseID: knowledgeBaseID, + VectorThreshold: s.cfg.Conversation.VectorThreshold, // Use default configuration + KeywordThreshold: s.cfg.Conversation.KeywordThreshold, // Use default configuration + EmbeddingTopK: s.cfg.Conversation.EmbeddingTopK, // Use default configuration + RerankTopK: s.cfg.Conversation.RerankTopK, // Use default configuration + RerankThreshold: s.cfg.Conversation.RerankThreshold, // Use default configuration + } + + // Get default models + models, err := s.modelService.ListModels(ctx) + if err != nil { + logger.Errorf(ctx, "Failed to get models: %v", err) + return nil, err + } + + // Find the first available rerank model + for _, model := range models { + if model.Type == types.ModelTypeRerank { + chatManage.RerankModelID = model.ID + break + } + } + + // Use specific event list, only including retrieval-related events, not LLM summarization + searchEvents := []types.EventType{ + types.PREPROCESS_QUERY, // Preprocess query + types.CHUNK_SEARCH, // Vector search + types.CHUNK_RERANK, // Rerank search results + types.CHUNK_MERGE, // Merge search results + types.FILTER_TOP_K, // Filter top K results + } + + ctx, span := tracing.ContextWithSpan(ctx, "SessionService.SearchKnowledge") + defer span.End() + + // Prepare method list for logging and tracing + methods := []string{} + for _, event := range searchEvents { + methods = append(methods, string(event)) + } + + // Set up tracing attributes + logger.Infof(ctx, "Trigger search event list: %v", methods) + span.SetAttributes( + attribute.String("query", query), + attribute.String("knowledge_base_id", knowledgeBaseID), + attribute.String("method", strings.Join(methods, ",")), + ) + + // Process each search event in sequence + for _, event := range searchEvents { + logger.Infof(ctx, "Starting to trigger search event: %v", event) + err := s.eventManager.Trigger(ctx, event, chatManage) + + // Handle case where search returns no results + if err == chatpipline.ErrSearchNothing { + logger.Warnf(ctx, "Event %v triggered, search result is empty", event) + return []*types.SearchResult{}, nil + } + + // Handle other errors + if err != nil { + logger.Errorf(ctx, "Event triggering failed, event: %v, error type: %s, description: %s, error: %v", + event, err.ErrorType, err.Description, err.Err) + span.RecordError(err.Err) + span.SetStatus(codes.Error, err.Description) + span.SetAttributes(attribute.String("error_type", err.ErrorType)) + return nil, err.Err + } + logger.Infof(ctx, "Event %v triggered successfully", event) + } + + logger.Infof(ctx, "Knowledge base search completed, found %d results", len(chatManage.MergeResult)) + return chatManage.MergeResult, nil +} diff --git a/internal/application/service/tenant.go b/internal/application/service/tenant.go new file mode 100644 index 0000000..2306c9a --- /dev/null +++ b/internal/application/service/tenant.go @@ -0,0 +1,289 @@ +package service + +import ( + "context" + "crypto/aes" + "crypto/cipher" + "crypto/rand" + "encoding/base64" + "encoding/binary" + "errors" + "io" + "os" + "strings" + "time" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +var apiKeySecret = []byte(os.Getenv("TENANT_AES_KEY")) + +// ListTenantsParams defines parameters for listing tenants with filtering and pagination +type ListTenantsParams struct { + Page int // Page number for pagination + PageSize int // Number of items per page + Status string // Filter by tenant status + Name string // Filter by tenant name +} + +// tenantService implements the TenantService interface +type tenantService struct { + repo interfaces.TenantRepository // Repository for tenant data operations +} + +// NewTenantService creates a new tenant service instance +func NewTenantService(repo interfaces.TenantRepository) interfaces.TenantService { + return &tenantService{repo: repo} +} + +// CreateTenant creates a new tenant +func (s *tenantService) CreateTenant(ctx context.Context, tenant *types.Tenant) (*types.Tenant, error) { + logger.Info(ctx, "Start creating tenant") + + if tenant.Name == "" { + logger.Error(ctx, "Tenant name cannot be empty") + return nil, errors.New("tenant name cannot be empty") + } + + logger.Infof(ctx, "Creating tenant, name: %s", tenant.Name) + + // Create tenant with initial values + tenant.APIKey = s.generateApiKey(0) + tenant.Status = "active" + tenant.CreatedAt = time.Now() + tenant.UpdatedAt = time.Now() + + logger.Info(ctx, "Saving tenant information to database") + if err := s.repo.CreateTenant(ctx, tenant); err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_name": tenant.Name, + }) + return nil, err + } + + logger.Infof(ctx, "Tenant created successfully, ID: %d, generating official API Key", tenant.ID) + tenant.APIKey = s.generateApiKey(tenant.ID) + if err := s.repo.UpdateTenant(ctx, tenant); err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_id": tenant.ID, + "tenant_name": tenant.Name, + }) + return nil, err + } + + logger.Infof(ctx, "Tenant creation and update completed, ID: %d, name: %s", tenant.ID, tenant.Name) + return tenant, nil +} + +// GetTenantByID retrieves a tenant by their ID +func (s *tenantService) GetTenantByID(ctx context.Context, id uint) (*types.Tenant, error) { + logger.Info(ctx, "Start retrieving tenant") + + if id == 0 { + logger.Error(ctx, "Tenant ID cannot be 0") + return nil, errors.New("tenant ID cannot be 0") + } + + logger.Infof(ctx, "Retrieving tenant, ID: %d", id) + + tenant, err := s.repo.GetTenantByID(ctx, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_id": id, + }) + return nil, err + } + + logger.Infof(ctx, "Tenant retrieved successfully, ID: %d, name: %s", tenant.ID, tenant.Name) + return tenant, nil +} + +// ListTenants retrieves a list of all tenants +func (s *tenantService) ListTenants(ctx context.Context) ([]*types.Tenant, error) { + logger.Info(ctx, "Start retrieving tenant list") + + tenants, err := s.repo.ListTenants(ctx) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + return nil, err + } + + logger.Infof(ctx, "Tenant list retrieved successfully, total: %d", len(tenants)) + return tenants, nil +} + +// UpdateTenant updates an existing tenant's information +func (s *tenantService) UpdateTenant(ctx context.Context, tenant *types.Tenant) (*types.Tenant, error) { + if tenant.ID == 0 { + logger.Error(ctx, "Tenant ID cannot be 0") + return nil, errors.New("tenant ID cannot be 0") + } + + logger.Infof(ctx, "Updating tenant, ID: %d, name: %s", tenant.ID, tenant.Name) + + // Generate new API key if empty + if tenant.APIKey == "" { + logger.Info(ctx, "API Key is empty, generating new API Key") + tenant.APIKey = s.generateApiKey(tenant.ID) + } + + tenant.UpdatedAt = time.Now() + logger.Info(ctx, "Saving tenant information to database") + + if err := s.repo.UpdateTenant(ctx, tenant); err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_id": tenant.ID, + }) + return nil, err + } + + logger.Infof(ctx, "Tenant updated successfully, ID: %d", tenant.ID) + return tenant, nil +} + +// DeleteTenant removes a tenant by their ID +func (s *tenantService) DeleteTenant(ctx context.Context, id uint) error { + logger.Info(ctx, "Start deleting tenant") + + if id == 0 { + logger.Error(ctx, "Tenant ID cannot be 0") + return errors.New("tenant ID cannot be 0") + } + + logger.Infof(ctx, "Deleting tenant, ID: %d", id) + + // Get tenant information for logging + tenant, err := s.repo.GetTenantByID(ctx, id) + if err != nil { + if err.Error() == "record not found" { + logger.Warnf(ctx, "Tenant to be deleted does not exist, ID: %d", id) + } else { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_id": id, + }) + return err + } + } else { + logger.Infof(ctx, "Deleting tenant, ID: %d, name: %s", id, tenant.Name) + } + + err = s.repo.DeleteTenant(ctx, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_id": id, + }) + return err + } + + logger.Infof(ctx, "Tenant deleted successfully, ID: %d", id) + return nil +} + +// UpdateAPIKey updates the API key for a specific tenant +func (s *tenantService) UpdateAPIKey(ctx context.Context, id uint) (string, error) { + logger.Info(ctx, "Start updating tenant API Key") + + if id == 0 { + logger.Error(ctx, "Tenant ID cannot be 0") + return "", errors.New("tenant ID cannot be 0") + } + + logger.Infof(ctx, "Retrieving tenant information, ID: %d", id) + + tenant, err := s.repo.GetTenantByID(ctx, id) + if err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_id": id, + }) + return "", err + } + + logger.Infof(ctx, "Generating new API Key for tenant, ID: %d", id) + tenant.APIKey = s.generateApiKey(tenant.ID) + + if err := s.repo.UpdateTenant(ctx, tenant); err != nil { + logger.ErrorWithFields(ctx, err, map[string]interface{}{ + "tenant_id": id, + }) + return "", err + } + + logger.Infof(ctx, "Tenant API Key updated successfully, ID: %d", id) + return tenant.APIKey, nil +} + +// generateApiKey generates a secure API key for tenant authentication +func (r *tenantService) generateApiKey(tenantID uint) string { + // 1. Convert tenant_id to bytes + idBytes := make([]byte, 8) + binary.LittleEndian.PutUint64(idBytes, uint64(tenantID)) + + // 2. Encrypt tenant_id using AES-GCM + block, err := aes.NewCipher(apiKeySecret) + if err != nil { + panic("Failed to create AES cipher: " + err.Error()) + } + + nonce := make([]byte, 12) + if _, err := io.ReadFull(rand.Reader, nonce); err != nil { + panic(err.Error()) + } + + aesgcm, err := cipher.NewGCM(block) + if err != nil { + panic("Failed to create GCM cipher: " + err.Error()) + } + + ciphertext := aesgcm.Seal(nil, nonce, idBytes, nil) + + // 3. Combine nonce and ciphertext, then encode with base64 + combined := append(nonce, ciphertext...) + encoded := base64.RawURLEncoding.EncodeToString(combined) + + // Create final API Key in format: sk-{encrypted_part} + return "sk-" + encoded +} + +// ExtractTenantIDFromAPIKey extracts the tenant ID from an API key +func (r *tenantService) ExtractTenantIDFromAPIKey(apiKey string) (uint, error) { + // 1. Validate format and extract encrypted part + parts := strings.SplitN(apiKey, "-", 2) + if len(parts) != 2 || parts[0] != "sk" { + return 0, errors.New("invalid API key format") + } + + // 2. Decode the base64 part + encryptedData, err := base64.RawURLEncoding.DecodeString(parts[1]) + if err != nil { + return 0, errors.New("invalid API key encoding") + } + + // 3. Separate nonce and ciphertext + if len(encryptedData) < 12 { + return 0, errors.New("invalid API key length") + } + nonce, ciphertext := encryptedData[:12], encryptedData[12:] + + // 4. Decrypt + block, err := aes.NewCipher(apiKeySecret) + if err != nil { + return 0, errors.New("decryption error") + } + + aesgcm, err := cipher.NewGCM(block) + if err != nil { + return 0, errors.New("decryption error") + } + + plaintext, err := aesgcm.Open(nil, nonce, ciphertext, nil) + if err != nil { + return 0, errors.New("API key is invalid or has been tampered with") + } + + // 5. Convert back to tenant_id + tenantID := binary.LittleEndian.Uint64(plaintext) + + return uint(tenantID), nil +} diff --git a/internal/application/service/test_data.go b/internal/application/service/test_data.go new file mode 100644 index 0000000..b0679b3 --- /dev/null +++ b/internal/application/service/test_data.go @@ -0,0 +1,443 @@ +// Package service 提供应用程序的核心业务逻辑服务层 +// 此包包含了知识库管理、用户租户管理、模型服务等核心功能实现 +package service + +import ( + "context" + "fmt" + "os" + "strconv" + + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/chat" + "github.com/Tencent/WeKnora/internal/models/embedding" + "github.com/Tencent/WeKnora/internal/models/rerank" + "github.com/Tencent/WeKnora/internal/models/utils/ollama" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// TestDataService 测试数据服务 +// 负责初始化测试环境所需的数据,包括创建测试租户、测试知识库 +// 以及配置必要的模型服务实例 +type TestDataService struct { + config *config.Config // 应用程序配置 + kbRepo interfaces.KnowledgeBaseRepository // 知识库存储库接口 + tenantService interfaces.TenantService // 租户服务接口 + ollamaService *ollama.OllamaService // Ollama模型服务 + modelService interfaces.ModelService // 模型服务接口 + EmbedModel embedding.Embedder // 嵌入模型实例 + RerankModel rerank.Reranker // 重排模型实例 + LLMModel chat.Chat // 大语言模型实例 +} + +// NewTestDataService 创建测试数据服务 +// 注入所需的依赖服务和组件 +func NewTestDataService( + config *config.Config, + kbRepo interfaces.KnowledgeBaseRepository, + tenantService interfaces.TenantService, + ollamaService *ollama.OllamaService, + modelService interfaces.ModelService, +) *TestDataService { + return &TestDataService{ + config: config, + kbRepo: kbRepo, + tenantService: tenantService, + ollamaService: ollamaService, + modelService: modelService, + } +} + +// initTenant 初始化测试租户 +// 通过环境变量获取租户ID,如果租户不存在则创建新租户,否则更新现有租户 +// 同时配置租户的检索引擎参数 +func (s *TestDataService) initTenant(ctx context.Context) error { + logger.Info(ctx, "Start initializing test tenant") + + // 从环境变量获取租户ID + tenantID := os.Getenv("INIT_TEST_TENANT_ID") + logger.Infof(ctx, "Test tenant ID from environment: %s", tenantID) + + // 将字符串ID转换为uint64 + tenantIDUint, err := strconv.ParseUint(tenantID, 10, 64) + if err != nil { + logger.Errorf(ctx, "Failed to parse tenant ID: %v", err) + return err + } + + // 创建租户配置 + tenantConfig := &types.Tenant{ + Name: "Test Tenant", + Description: "Test Tenant for Testing", + RetrieverEngines: types.RetrieverEngines{ + Engines: []types.RetrieverEngineParams{ + { + RetrieverType: types.KeywordsRetrieverType, + RetrieverEngineType: types.PostgresRetrieverEngineType, + }, + { + RetrieverType: types.VectorRetrieverType, + RetrieverEngineType: types.PostgresRetrieverEngineType, + }, + }, + }, + } + + // 获取或创建测试租户 + logger.Infof(ctx, "Attempting to get tenant with ID: %d", tenantIDUint) + tenant, err := s.tenantService.GetTenantByID(ctx, uint(tenantIDUint)) + if err != nil { + // 租户不存在,创建新租户 + logger.Info(ctx, "Tenant not found, creating a new test tenant") + tenant, err = s.tenantService.CreateTenant(ctx, tenantConfig) + if err != nil { + logger.Errorf(ctx, "Failed to create tenant: %v", err) + return err + } + logger.Infof(ctx, "Created new test tenant with ID: %d", tenant.ID) + } else { + // 租户存在,更新检索引擎配置 + logger.Info(ctx, "Test tenant found, updating retriever engines") + tenant.RetrieverEngines = tenantConfig.RetrieverEngines + tenant, err = s.tenantService.UpdateTenant(ctx, tenant) + if err != nil { + logger.Errorf(ctx, "Failed to update tenant: %v", err) + return err + } + logger.Info(ctx, "Test tenant updated successfully") + } + + logger.Infof(ctx, "Test tenant configured - ID: %d, Name: %s, API Key: %s", + tenant.ID, tenant.Name, tenant.APIKey) + return nil +} + +// initKnowledgeBase 初始化测试知识库 +// 从环境变量获取知识库ID,创建或更新知识库 +// 配置知识库的分块策略、嵌入模型和摘要模型 +func (s *TestDataService) initKnowledgeBase(ctx context.Context) error { + logger.Info(ctx, "Start initializing test knowledge base") + + // 检查上下文中的租户ID + if ctx.Value(types.TenantIDContextKey).(uint) == 0 { + logger.Warn(ctx, "Tenant ID is 0, skipping knowledge base initialization") + return nil + } + + // 从环境变量获取知识库ID + knowledgeBaseID := os.Getenv("INIT_TEST_KNOWLEDGE_BASE_ID") + logger.Infof(ctx, "Test knowledge base ID from environment: %s", knowledgeBaseID) + + // 创建知识库配置 + kbConfig := &types.KnowledgeBase{ + ID: knowledgeBaseID, + Name: "Test Knowledge Base", + Description: "Knowledge Base for Testing", + TenantID: ctx.Value(types.TenantIDContextKey).(uint), + ChunkingConfig: types.ChunkingConfig{ + ChunkSize: s.config.KnowledgeBase.ChunkSize, + ChunkOverlap: s.config.KnowledgeBase.ChunkOverlap, + Separators: s.config.KnowledgeBase.SplitMarkers, + EnableMultimodal: s.config.KnowledgeBase.ImageProcessing.EnableMultimodal, + }, + EmbeddingModelID: s.EmbedModel.GetModelID(), + SummaryModelID: s.LLMModel.GetModelID(), + } + + // 初始化测试知识库 + logger.Info(ctx, "Attempting to get existing knowledge base") + _, err := s.kbRepo.GetKnowledgeBaseByID(ctx, knowledgeBaseID) + if err != nil { + // 知识库不存在,创建新知识库 + logger.Info(ctx, "Knowledge base not found, creating a new one") + logger.Infof(ctx, "Creating knowledge base with ID: %s, tenant ID: %d", + kbConfig.ID, kbConfig.TenantID) + + if err := s.kbRepo.CreateKnowledgeBase(ctx, kbConfig); err != nil { + logger.Errorf(ctx, "Failed to create knowledge base: %v", err) + return err + } + logger.Info(ctx, "Knowledge base created successfully") + } else { + // 知识库存在,更新配置 + logger.Info(ctx, "Knowledge base found, updating configuration") + logger.Infof(ctx, "Updating knowledge base with ID: %s", kbConfig.ID) + + err = s.kbRepo.UpdateKnowledgeBase(ctx, kbConfig) + if err != nil { + logger.Errorf(ctx, "Failed to update knowledge base: %v", err) + return err + } + logger.Info(ctx, "Knowledge base updated successfully") + } + + logger.Infof(ctx, "Test knowledge base configured - ID: %s, Name: %s", kbConfig.ID, kbConfig.Name) + return nil +} + +// InitializeTestData 初始化测试数据 +// 这是对外暴露的主要方法,负责协调所有测试数据的初始化过程 +// 包括初始化租户、嵌入模型、重排模型、LLM模型和知识库 +func (s *TestDataService) InitializeTestData(ctx context.Context) error { + logger.Info(ctx, "Start initializing test data") + + // 从环境变量获取租户ID + tenantID := os.Getenv("INIT_TEST_TENANT_ID") + logger.Infof(ctx, "Test tenant ID from environment: %s", tenantID) + + // 解析租户ID + tenantIDUint, err := strconv.ParseUint(tenantID, 10, 64) + if err != nil { + // 解析失败时使用默认值0 + logger.Warn(ctx, "Failed to parse tenant ID, using default value 0") + tenantIDUint = 0 + } else { + // 初始化租户 + logger.Info(ctx, "Initializing tenant") + err = s.initTenant(ctx) + if err != nil { + logger.Errorf(ctx, "Failed to initialize tenant: %v", err) + return err + } + logger.Info(ctx, "Tenant initialized successfully") + } + + // 创建带有租户ID的新上下文 + newCtx := context.Background() + newCtx = context.WithValue(newCtx, types.TenantIDContextKey, uint(tenantIDUint)) + logger.Infof(ctx, "Created new context with tenant ID: %d", tenantIDUint) + + // 初始化模型 + modelInitFuncs := []struct { + name string + fn func(context.Context) error + }{ + {"embedding model", s.initEmbeddingModel}, + {"rerank model", s.initRerankModel}, + {"LLM model", s.initLLMModel}, + } + + for _, initFunc := range modelInitFuncs { + logger.Infof(ctx, "Initializing %s", initFunc.name) + if err := initFunc.fn(newCtx); err != nil { + logger.Errorf(ctx, "Failed to initialize %s: %v", initFunc.name, err) + return err + } + logger.Infof(ctx, "%s initialized successfully", initFunc.name) + } + + // 初始化知识库 + logger.Info(ctx, "Initializing knowledge base") + if err := s.initKnowledgeBase(newCtx); err != nil { + logger.Errorf(ctx, "Failed to initialize knowledge base: %v", err) + return err + } + logger.Info(ctx, "Knowledge base initialized successfully") + + logger.Info(ctx, "Test data initialization completed") + return nil +} + +// getEnvOrError 获取环境变量值,如果不存在则返回错误 +func (s *TestDataService) getEnvOrError(name string) (string, error) { + value := os.Getenv(name) + if value == "" { + return "", fmt.Errorf("%s environment variable is not set", name) + } + return value, nil +} + +// updateOrCreateModel 更新或创建模型 +func (s *TestDataService) updateOrCreateModel(ctx context.Context, modelConfig *types.Model) error { + model, err := s.modelService.GetModelByID(ctx, modelConfig.ID) + if err != nil { + // 模型不存在,创建新模型 + return s.modelService.CreateModel(ctx, modelConfig) + } + + // 模型存在,更新属性 + model.TenantID = modelConfig.TenantID + model.Name = modelConfig.Name + model.Source = modelConfig.Source + model.Type = modelConfig.Type + model.Parameters = modelConfig.Parameters + model.Status = modelConfig.Status + + return s.modelService.UpdateModel(ctx, model) +} + +// initEmbeddingModel 初始化嵌入模型 +func (s *TestDataService) initEmbeddingModel(ctx context.Context) error { + // 从环境变量获取模型参数 + modelName, err := s.getEnvOrError("INIT_EMBEDDING_MODEL_NAME") + if err != nil { + return err + } + + dimensionStr := os.Getenv("INIT_EMBEDDING_MODEL_DIMENSION") + dimension, err := strconv.Atoi(dimensionStr) + if err != nil || dimension == 0 { + return fmt.Errorf("invalid embedding model dimension: %s", dimensionStr) + } + + baseURL := os.Getenv("INIT_EMBEDDING_MODEL_BASE_URL") + apiKey := os.Getenv("INIT_EMBEDDING_MODEL_API_KEY") + + // 确定模型来源 + source := types.ModelSourceRemote + if baseURL == "" { + source = types.ModelSourceLocal + } + + // 确定模型ID + modelID := os.Getenv("INIT_EMBEDDING_MODEL_ID") + if modelID == "" { + modelID = fmt.Sprintf("builtin:%s:%d", modelName, dimension) + } + + // 创建嵌入模型实例 + s.EmbedModel, err = embedding.NewEmbedder(embedding.Config{ + Source: source, + BaseURL: baseURL, + ModelName: modelName, + APIKey: apiKey, + Dimensions: dimension, + ModelID: modelID, + }) + if err != nil { + return fmt.Errorf("failed to create embedder: %w", err) + } + + // 如果是本地模型,使用Ollama拉取模型 + if source == types.ModelSourceLocal && s.ollamaService != nil { + if err := s.ollamaService.PullModel(context.Background(), modelName); err != nil { + return fmt.Errorf("failed to pull embedding model: %w", err) + } + } + + // 创建模型配置 + modelConfig := &types.Model{ + ID: modelID, + TenantID: ctx.Value(types.TenantIDContextKey).(uint), + Name: modelName, + Source: source, + Type: types.ModelTypeEmbedding, + Parameters: types.ModelParameters{ + BaseURL: baseURL, + APIKey: apiKey, + EmbeddingParameters: types.EmbeddingParameters{ + Dimension: dimension, + }, + }, + Status: "active", + } + + // 更新或创建模型 + return s.updateOrCreateModel(ctx, modelConfig) +} + +// initRerankModel 初始化重排模型 +func (s *TestDataService) initRerankModel(ctx context.Context) error { + // 从环境变量获取模型参数 + modelName, err := s.getEnvOrError("INIT_RERANK_MODEL_NAME") + if err != nil { + logger.Warnf(ctx, "Skip Rerank Model: %v", err) + return nil + } + + baseURL, err := s.getEnvOrError("INIT_RERANK_MODEL_BASE_URL") + if err != nil { + return err + } + + apiKey := os.Getenv("INIT_RERANK_MODEL_API_KEY") + modelID := fmt.Sprintf("builtin:%s:rerank:%s", types.ModelSourceRemote, modelName) + + // 创建重排模型实例 + s.RerankModel, err = rerank.NewReranker(&rerank.RerankerConfig{ + Source: types.ModelSourceRemote, + BaseURL: baseURL, + ModelName: modelName, + APIKey: apiKey, + ModelID: modelID, + }) + if err != nil { + return fmt.Errorf("failed to create reranker: %w", err) + } + + // 创建模型配置 + modelConfig := &types.Model{ + ID: modelID, + TenantID: ctx.Value(types.TenantIDContextKey).(uint), + Name: modelName, + Source: types.ModelSourceRemote, + Type: types.ModelTypeRerank, + Parameters: types.ModelParameters{ + BaseURL: baseURL, + APIKey: apiKey, + }, + Status: "active", + } + + // 更新或创建模型 + return s.updateOrCreateModel(ctx, modelConfig) +} + +// initLLMModel 初始化大语言模型 +func (s *TestDataService) initLLMModel(ctx context.Context) error { + // 从环境变量获取模型参数 + modelName, err := s.getEnvOrError("INIT_LLM_MODEL_NAME") + if err != nil { + return err + } + + baseURL := os.Getenv("INIT_LLM_MODEL_BASE_URL") + apiKey := os.Getenv("INIT_LLM_MODEL_API_KEY") + + // 确定模型来源 + source := types.ModelSourceRemote + if baseURL == "" { + source = types.ModelSourceLocal + } + + // 确定模型ID + modelID := fmt.Sprintf("builtin:%s:llm:%s", source, modelName) + + // 创建大语言模型实例 + s.LLMModel, err = chat.NewChat(&chat.ChatConfig{ + Source: source, + BaseURL: baseURL, + ModelName: modelName, + APIKey: apiKey, + ModelID: modelID, + }) + if err != nil { + return fmt.Errorf("failed to create llm: %w", err) + } + + // 如果是本地模型,使用Ollama拉取模型 + if source == types.ModelSourceLocal && s.ollamaService != nil { + if err := s.ollamaService.PullModel(context.Background(), modelName); err != nil { + return fmt.Errorf("failed to pull llm model: %w", err) + } + } + + // 创建模型配置 + modelConfig := &types.Model{ + ID: modelID, + TenantID: ctx.Value(types.TenantIDContextKey).(uint), + Name: modelName, + Source: source, + Type: types.ModelTypeKnowledgeQA, + Parameters: types.ModelParameters{ + BaseURL: baseURL, + APIKey: apiKey, + }, + Status: "active", + } + + // 更新或创建模型 + return s.updateOrCreateModel(ctx, modelConfig) +} diff --git a/internal/common/asyncq.go b/internal/common/asyncq.go new file mode 100644 index 0000000..9316f89 --- /dev/null +++ b/internal/common/asyncq.go @@ -0,0 +1,72 @@ +package common + +import ( + "log" + + "github.com/Tencent/WeKnora/internal/config" + "github.com/hibiken/asynq" +) + +// client is the global asyncq client instance +var client *asynq.Client + +// InitAsyncq initializes the asyncq client with configuration +// It creates a new client and starts the server in a goroutine +func InitAsyncq(config *config.Config) error { + cfg := config.Asynq + client = asynq.NewClient(asynq.RedisClientOpt{ + Addr: cfg.Addr, + Username: cfg.Username, + Password: cfg.Password, + ReadTimeout: cfg.ReadTimeout, + WriteTimeout: cfg.WriteTimeout, + }) + go run(cfg) + return nil +} + +// GetAsyncqClient returns the global asyncq client instance +func GetAsyncqClient() *asynq.Client { + return client +} + +// handleFunc stores registered task handlers +var handleFunc = map[string]asynq.HandlerFunc{} + +// RegisterHandlerFunc registers a handler function for a specific task type +func RegisterHandlerFunc(taskType string, handlerFunc asynq.HandlerFunc) { + handleFunc[taskType] = handlerFunc +} + +// run starts the asyncq server with the given configuration +// It creates a new server, sets up handlers, and runs the server +func run(config *config.AsynqConfig) { + srv := asynq.NewServer( + asynq.RedisClientOpt{ + Addr: config.Addr, + Username: config.Username, + Password: config.Password, + ReadTimeout: config.ReadTimeout, + WriteTimeout: config.WriteTimeout, + }, + asynq.Config{ + Concurrency: config.Concurrency, + Queues: map[string]int{ + "critical": 6, // Highest priority queue + "default": 3, // Default priority queue + "low": 1, // Lowest priority queue + }, + }, + ) + + // Create a new mux and register all handlers + mux := asynq.NewServeMux() + for typ, handler := range handleFunc { + mux.HandleFunc(typ, handler) + } + + // Start the server + if err := srv.Run(mux); err != nil { + log.Fatalf("could not run server: %v", err) + } +} diff --git a/internal/common/tools.go b/internal/common/tools.go new file mode 100644 index 0000000..3e79632 --- /dev/null +++ b/internal/common/tools.go @@ -0,0 +1,75 @@ +package common + +import ( + "encoding/json" + "maps" + "regexp" + "slices" + "strings" +) + +// ToInterfaceSlice converts a slice of strings to a slice of empty interfaces. +func ToInterfaceSlice[T any](slice []T) []interface{} { + interfaceSlice := make([]interface{}, len(slice)) + for i, v := range slice { + interfaceSlice[i] = v + } + return interfaceSlice +} + +// []string -> string, " join, space separated +func StringSliceJoin(slice []string) string { + result := make([]string, len(slice)) + for i, v := range slice { + result[i] = `"` + v + `"` + } + return strings.Join(result, " ") +} + +func GetAttrs[A, B any](extract func(A) B, attrs ...A) []B { + result := make([]B, len(attrs)) + for i, attr := range attrs { + result[i] = extract(attr) + } + return result +} + +// Deduplicate removes duplicates from a slice based on a key function +// T: the type of elements in the slice +// K: the type of key used for deduplication +func Deduplicate[T any, K comparable](keyFunc func(T) K, items ...T) []T { + seen := make(map[K]T) + for _, item := range items { + key := keyFunc(item) + if _, exists := seen[key]; !exists { + seen[key] = item + } + } + return slices.Collect(maps.Values(seen)) +} + +// ParseLLMJsonResponse parses a JSON response from LLM, handling cases where JSON is wrapped in code blocks. +// This is useful when LLMs return responses like: +// ```json +// {"key": "value"} +// ``` +// or regular JSON responses directly. +func ParseLLMJsonResponse(content string, target interface{}) error { + // First, try to parse directly as JSON + err := json.Unmarshal([]byte(content), target) + if err == nil { + return nil + } + + // If direct parsing fails, try to extract JSON from code blocks + re := regexp.MustCompile("```(?:json)?\\s*([\\s\\S]*?)```") + matches := re.FindStringSubmatch(content) + if len(matches) >= 2 { + // Extract the JSON content within the code block + jsonContent := strings.TrimSpace(matches[1]) + return json.Unmarshal([]byte(jsonContent), target) + } + + // If no code block found, return the original error + return err +} diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..afbae03 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,186 @@ +package config + +import ( + "fmt" + "os" + "regexp" + "strings" + "time" + + "github.com/go-viper/mapstructure/v2" + "github.com/spf13/viper" +) + +// Config 应用程序总配置 +type Config struct { + Conversation *ConversationConfig `yaml:"conversation" json:"conversation"` + Server *ServerConfig `yaml:"server" json:"server"` + KnowledgeBase *KnowledgeBaseConfig `yaml:"knowledge_base" json:"knowledge_base"` + Tenant *TenantConfig `yaml:"tenant" json:"tenant"` + Models []ModelConfig `yaml:"models" json:"models"` + Asynq *AsynqConfig `yaml:"asynq" json:"asynq"` + VectorDatabase *VectorDatabaseConfig `yaml:"vector_database" json:"vector_database"` + DocReader *DocReaderConfig `yaml:"docreader" json:"docreader"` + StreamManager *StreamManagerConfig `yaml:"stream_manager" json:"stream_manager"` +} + +type DocReaderConfig struct { + Addr string `yaml:"addr" json:"addr"` +} + +type VectorDatabaseConfig struct { + Driver string `yaml:"driver" json:"driver"` +} + +// ConversationConfig 对话服务配置 +type ConversationConfig struct { + MaxRounds int `yaml:"max_rounds" json:"max_rounds"` + KeywordThreshold float64 `yaml:"keyword_threshold" json:"keyword_threshold"` + EmbeddingTopK int `yaml:"embedding_top_k" json:"embedding_top_k"` + VectorThreshold float64 `yaml:"vector_threshold" json:"vector_threshold"` + RerankTopK int `yaml:"rerank_top_k" json:"rerank_top_k"` + RerankThreshold float64 `yaml:"rerank_threshold" json:"rerank_threshold"` + FallbackStrategy string `yaml:"fallback_strategy" json:"fallback_strategy"` + FallbackResponse string `yaml:"fallback_response" json:"fallback_response"` + FallbackPrompt string `yaml:"fallback_prompt" json:"fallback_prompt"` + EnableRewrite bool `yaml:"enable_rewrite" json:"enable_rewrite"` + EnableRerank bool `yaml:"enable_rerank" json:"enable_rerank"` + Summary *SummaryConfig `yaml:"summary" json:"summary"` + GenerateSessionTitlePrompt string `yaml:"generate_session_title_prompt" json:"generate_session_title_prompt"` + GenerateSummaryPrompt string `yaml:"generate_summary_prompt" json:"generate_summary_prompt"` + RewritePromptSystem string `yaml:"rewrite_prompt_system" json:"rewrite_prompt_system"` + RewritePromptUser string `yaml:"rewrite_prompt_user" json:"rewrite_prompt_user"` + SimplifyQueryPrompt string `yaml:"simplify_query_prompt" json:"simplify_query_prompt"` + SimplifyQueryPromptUser string `yaml:"simplify_query_prompt_user" json:"simplify_query_prompt_user"` + ExtractEntitiesPrompt string `yaml:"extract_entities_prompt" json:"extract_entities_prompt"` + ExtractRelationshipsPrompt string `yaml:"extract_relationships_prompt" json:"extract_relationships_prompt"` +} + +// SummaryConfig 摘要配置 +type SummaryConfig struct { + MaxTokens int `yaml:"max_tokens" json:"max_tokens"` + RepeatPenalty float64 `yaml:"repeat_penalty" json:"repeat_penalty"` + TopK int `yaml:"top_k" json:"top_k"` + TopP float64 `yaml:"top_p" json:"top_p"` + FrequencyPenalty float64 `yaml:"frequency_penalty" json:"frequency_penalty"` + PresencePenalty float64 `yaml:"presence_penalty" json:"presence_penalty"` + Prompt string `yaml:"prompt" json:"prompt"` + ContextTemplate string `yaml:"context_template" json:"context_template"` + Temperature float64 `yaml:"temperature" json:"temperature"` + Seed int `yaml:"seed" json:"seed"` + MaxCompletionTokens int `yaml:"max_completion_tokens" json:"max_completion_tokens"` + NoMatchPrefix string `yaml:"no_match_prefix" json:"no_match_prefix"` +} + +// ServerConfig 服务器配置 +type ServerConfig struct { + Port int `yaml:"port" json:"port"` + Host string `yaml:"host" json:"host"` + LogPath string `yaml:"log_path" json:"log_path"` + ShutdownTimeout time.Duration `yaml:"shutdown_timeout" json:"shutdown_timeout" default:"30s"` +} + +// KnowledgeBaseConfig 知识库配置 +type KnowledgeBaseConfig struct { + ChunkSize int `yaml:"chunk_size" json:"chunk_size"` + ChunkOverlap int `yaml:"chunk_overlap" json:"chunk_overlap"` + SplitMarkers []string `yaml:"split_markers" json:"split_markers"` + KeepSeparator bool `yaml:"keep_separator" json:"keep_separator"` + ImageProcessing *ImageProcessingConfig `yaml:"image_processing" json:"image_processing"` +} + +// ImageProcessingConfig 图像处理配置 +type ImageProcessingConfig struct { + EnableMultimodal bool `yaml:"enable_multimodal" json:"enable_multimodal"` +} + +// TenantConfig 租户配置 +type TenantConfig struct { + DefaultSessionName string `yaml:"default_session_name" json:"default_session_name"` + DefaultSessionTitle string `yaml:"default_session_title" json:"default_session_title"` + DefaultSessionDescription string `yaml:"default_session_description" json:"default_session_description"` +} + +// ModelConfig 模型配置 +type ModelConfig struct { + Type string `yaml:"type" json:"type"` + Source string `yaml:"source" json:"source"` + ModelName string `yaml:"model_name" json:"model_name"` + Parameters map[string]interface{} `yaml:"parameters" json:"parameters"` +} + +type AsynqConfig struct { + Addr string `yaml:"addr" json:"addr"` + Username string `yaml:"username" json:"username"` + Password string `yaml:"password" json:"password"` + ReadTimeout time.Duration `yaml:"read_timeout" json:"read_timeout"` + WriteTimeout time.Duration `yaml:"write_timeout" json:"write_timeout"` + Concurrency int `yaml:"concurrency" json:"concurrency"` +} + +// StreamManagerConfig 流管理器配置 +type StreamManagerConfig struct { + Type string `yaml:"type" json:"type"` // 类型: "memory" 或 "redis" + Redis RedisConfig `yaml:"redis" json:"redis"` // Redis配置 + CleanupTimeout time.Duration `yaml:"cleanup_timeout" json:"cleanup_timeout"` // 清理超时,单位秒 +} + +// RedisConfig Redis配置 +type RedisConfig struct { + Address string `yaml:"address" json:"address"` // Redis地址 + Password string `yaml:"password" json:"password"` // Redis密码 + DB int `yaml:"db" json:"db"` // Redis数据库 + Prefix string `yaml:"prefix" json:"prefix"` // 键前缀 + TTL time.Duration `yaml:"ttl" json:"ttl"` // 过期时间(小时) +} + +// LoadConfig 从配置文件加载配置 +func LoadConfig() (*Config, error) { + // 设置配置文件名和路径 + viper.SetConfigName("config") // 配置文件名称(不带扩展名) + viper.SetConfigType("yaml") // 配置文件类型 + viper.AddConfigPath(".") // 当前目录 + viper.AddConfigPath("./config") // config子目录 + viper.AddConfigPath("$HOME/.appname") // 用户目录 + viper.AddConfigPath("/etc/appname/") // etc目录 + + // 启用环境变量替换 + viper.AutomaticEnv() + viper.SetEnvKeyReplacer(strings.NewReplacer(".", "_")) + + // 读取配置文件 + if err := viper.ReadInConfig(); err != nil { + return nil, fmt.Errorf("error reading config file: %w", err) + } + + // 替换配置中的环境变量引用 + configFileContent, err := os.ReadFile(viper.ConfigFileUsed()) + if err != nil { + return nil, fmt.Errorf("error reading config file content: %w", err) + } + + // 替换${ENV_VAR}格式的环境变量引用 + re := regexp.MustCompile(`\${([^}]+)}`) + result := re.ReplaceAllStringFunc(string(configFileContent), func(match string) string { + // 提取环境变量名称(去掉${}部分) + envVar := match[2 : len(match)-1] + // 获取环境变量值,如果不存在则保持原样 + if value := os.Getenv(envVar); value != "" { + return value + } + return match + }) + + // 使用处理后的配置内容 + viper.ReadConfig(strings.NewReader(result)) + + // 解析配置到结构体 + var cfg Config + if err := viper.Unmarshal(&cfg, func(dc *mapstructure.DecoderConfig) { + dc.TagName = "yaml" + }); err != nil { + return nil, fmt.Errorf("unable to decode config into struct: %w", err) + } + fmt.Printf("Using configuration file: %s\n", viper.ConfigFileUsed()) + return &cfg, nil +} diff --git a/internal/container/cleanup.go b/internal/container/cleanup.go new file mode 100644 index 0000000..fd14357 --- /dev/null +++ b/internal/container/cleanup.go @@ -0,0 +1,86 @@ +package container + +import ( + "context" + "log" + "sync" + + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// ResourceCleaner is a resource cleaner that can be used to clean up resources +type ResourceCleaner struct { + mu sync.Mutex + cleanups []types.CleanupFunc +} + +// NewResourceCleaner creates a new resource cleaner +func NewResourceCleaner() interfaces.ResourceCleaner { + return &ResourceCleaner{ + cleanups: make([]types.CleanupFunc, 0), + } +} + +// Register registers a cleanup function +// Note: the cleanup function will be executed in reverse order (the last registered will be executed first) +func (c *ResourceCleaner) Register(cleanup types.CleanupFunc) { + if cleanup == nil { + return + } + + c.mu.Lock() + defer c.mu.Unlock() + + c.cleanups = append(c.cleanups, cleanup) +} + +// RegisterWithName registers a cleanup function with a name, for logging tracking +func (c *ResourceCleaner) RegisterWithName(name string, cleanup types.CleanupFunc) { + if cleanup == nil { + return + } + + wrappedCleanup := func() error { + log.Printf("Cleaning up resource: %s", name) + err := cleanup() + if err != nil { + log.Printf("Error cleaning up resource %s: %v", name, err) + } else { + log.Printf("Successfully cleaned up resource: %s", name) + } + return err + } + + c.Register(wrappedCleanup) +} + +// Cleanup executes all cleanup functions +// Even if a cleanup function fails, other cleanup functions will still be executed +func (c *ResourceCleaner) Cleanup(ctx context.Context) (errs []error) { + c.mu.Lock() + defer c.mu.Unlock() + + // Execute cleanup functions in reverse order (the last registered will be executed first) + for i := len(c.cleanups) - 1; i >= 0; i-- { + select { + case <-ctx.Done(): + errs = append(errs, ctx.Err()) + return errs + default: + if err := c.cleanups[i](); err != nil { + errs = append(errs, err) + } + } + } + + return errs +} + +// Reset clears all registered cleanup functions +func (c *ResourceCleaner) Reset() { + c.mu.Lock() + defer c.mu.Unlock() + + c.cleanups = make([]types.CleanupFunc, 0) +} diff --git a/internal/container/container.go b/internal/container/container.go new file mode 100644 index 0000000..6808760 --- /dev/null +++ b/internal/container/container.go @@ -0,0 +1,366 @@ +// Package container implements dependency injection container setup +// Provides centralized configuration for services, repositories, and handlers +// This package is responsible for wiring up all dependencies and ensuring proper lifecycle management +package container + +import ( + "context" + "fmt" + "os" + "slices" + "strings" + "time" + + esv7 "github.com/elastic/go-elasticsearch/v7" + "github.com/elastic/go-elasticsearch/v8" + "github.com/panjf2000/ants/v2" + "go.uber.org/dig" + "gorm.io/driver/postgres" + "gorm.io/gorm" + + "github.com/Tencent/WeKnora/internal/application/repository" + elasticsearchRepoV7 "github.com/Tencent/WeKnora/internal/application/repository/retriever/elasticsearch/v7" + elasticsearchRepoV8 "github.com/Tencent/WeKnora/internal/application/repository/retriever/elasticsearch/v8" + postgresRepo "github.com/Tencent/WeKnora/internal/application/repository/retriever/postgres" + "github.com/Tencent/WeKnora/internal/application/service" + chatpipline "github.com/Tencent/WeKnora/internal/application/service/chat_pipline" + "github.com/Tencent/WeKnora/internal/application/service/file" + "github.com/Tencent/WeKnora/internal/application/service/retriever" + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/handler" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/embedding" + "github.com/Tencent/WeKnora/internal/models/utils/ollama" + "github.com/Tencent/WeKnora/internal/router" + "github.com/Tencent/WeKnora/internal/stream" + "github.com/Tencent/WeKnora/internal/tracing" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/Tencent/WeKnora/services/docreader/src/client" +) + +// BuildContainer constructs the dependency injection container +// Registers all components, services, repositories and handlers needed by the application +// Creates a fully configured application container with proper dependency resolution +// Parameters: +// - container: Base dig container to add dependencies to +// +// Returns: +// - Configured container with all application dependencies registered +func BuildContainer(container *dig.Container) *dig.Container { + // Register resource cleaner for proper cleanup of resources + must(container.Provide(NewResourceCleaner, dig.As(new(interfaces.ResourceCleaner)))) + + // Core infrastructure configuration + must(container.Provide(config.LoadConfig)) + must(container.Provide(initTracer)) + must(container.Provide(initDatabase)) + must(container.Provide(initFileService)) + must(container.Provide(initAntsPool)) + + // Register goroutine pool cleanup handler + must(container.Invoke(registerPoolCleanup)) + + // Initialize retrieval engine registry for search capabilities + must(container.Provide(initRetrieveEngineRegistry)) + + // External service clients + must(container.Provide(initDocReaderClient)) + must(container.Provide(initOllamaService)) + must(container.Provide(stream.NewStreamManager)) + + // Data repositories layer + must(container.Provide(repository.NewTenantRepository)) + must(container.Provide(repository.NewKnowledgeBaseRepository)) + must(container.Provide(repository.NewKnowledgeRepository)) + must(container.Provide(repository.NewChunkRepository)) + must(container.Provide(repository.NewSessionRepository)) + must(container.Provide(repository.NewMessageRepository)) + must(container.Provide(repository.NewModelRepository)) + + // Business service layer + must(container.Provide(service.NewTenantService)) + must(container.Provide(service.NewKnowledgeBaseService)) + must(container.Provide(service.NewKnowledgeService)) + must(container.Provide(service.NewSessionService)) + must(container.Provide(service.NewMessageService)) + must(container.Provide(service.NewChunkService)) + must(container.Provide(embedding.NewBatchEmbedder)) + must(container.Provide(service.NewTestDataService)) + must(container.Provide(service.NewModelService)) + must(container.Provide(service.NewDatasetService)) + must(container.Provide(service.NewEvaluationService)) + + // Chat pipeline components for processing chat requests + must(container.Provide(chatpipline.NewEventManager)) + must(container.Invoke(chatpipline.NewPluginTracing)) + must(container.Invoke(chatpipline.NewPluginSearch)) + must(container.Invoke(chatpipline.NewPluginRerank)) + must(container.Invoke(chatpipline.NewPluginMerge)) + must(container.Invoke(chatpipline.NewPluginIntoChatMessage)) + must(container.Invoke(chatpipline.NewPluginChatCompletion)) + must(container.Invoke(chatpipline.NewPluginChatCompletionStream)) + must(container.Invoke(chatpipline.NewPluginStreamFilter)) + must(container.Invoke(chatpipline.NewPluginFilterTopK)) + must(container.Invoke(chatpipline.NewPluginPreprocess)) + must(container.Invoke(chatpipline.NewPluginRewrite)) + + // HTTP handlers layer + must(container.Provide(handler.NewTenantHandler)) + must(container.Provide(handler.NewKnowledgeBaseHandler)) + must(container.Provide(handler.NewKnowledgeHandler)) + must(container.Provide(handler.NewChunkHandler)) + must(container.Provide(handler.NewSessionHandler)) + must(container.Provide(handler.NewMessageHandler)) + must(container.Provide(handler.NewTestDataHandler)) + must(container.Provide(handler.NewModelHandler)) + must(container.Provide(handler.NewEvaluationHandler)) + + // Router configuration + must(container.Provide(router.NewRouter)) + + return container +} + +// must is a helper function for error handling +// Panics if the error is not nil, useful for configuration steps that must succeed +// Parameters: +// - err: Error to check +func must(err error) { + if err != nil { + panic(err) + } +} + +// initTracer initializes OpenTelemetry tracer +// Sets up distributed tracing for observability across the application +// Parameters: +// - None +// +// Returns: +// - Configured tracer instance +// - Error if initialization fails +func initTracer() (*tracing.Tracer, error) { + return tracing.InitTracer() +} + +// initDatabase initializes database connection +// Creates and configures database connection based on environment configuration +// Supports multiple database backends (PostgreSQL) +// Parameters: +// - cfg: Application configuration +// +// Returns: +// - Configured database connection +// - Error if connection fails +func initDatabase(cfg *config.Config) (*gorm.DB, error) { + var dialector gorm.Dialector + switch os.Getenv("DB_DRIVER") { + case "postgres": + dsn := fmt.Sprintf( + "host=%s port=%s user=%s password=%s dbname=%s sslmode=%s", + os.Getenv("DB_HOST"), + os.Getenv("DB_PORT"), + os.Getenv("DB_USER"), + os.Getenv("DB_PASSWORD"), + os.Getenv("DB_NAME"), + "disable", + ) + dialector = postgres.Open(dsn) + default: + return nil, fmt.Errorf("unsupported database driver: %s", os.Getenv("DB_DRIVER")) + } + db, err := gorm.Open(dialector, &gorm.Config{}) + if err != nil { + return nil, err + } + + // Get underlying SQL DB object + sqlDB, err := db.DB() + if err != nil { + return nil, err + } + + // Configure connection pool parameters + sqlDB.SetMaxIdleConns(10) + sqlDB.SetConnMaxLifetime(time.Duration(10) * time.Minute) + + return db, nil +} + +// initFileService initializes file storage service +// Creates the appropriate file storage service based on configuration +// Supports multiple storage backends (MinIO, COS, local filesystem) +// Parameters: +// - cfg: Application configuration +// +// Returns: +// - Configured file service implementation +// - Error if initialization fails +func initFileService(cfg *config.Config) (interfaces.FileService, error) { + switch os.Getenv("STORAGE_TYPE") { + case "minio": + if os.Getenv("MINIO_ENDPOINT") == "" || + os.Getenv("MINIO_ACCESS_KEY_ID") == "" || + os.Getenv("MINIO_SECRET_ACCESS_KEY") == "" || + os.Getenv("MINIO_BUCKET_NAME") == "" { + return nil, fmt.Errorf("missing MinIO configuration") + } + return file.NewMinioFileService( + os.Getenv("MINIO_ENDPOINT"), + os.Getenv("MINIO_ACCESS_KEY_ID"), + os.Getenv("MINIO_SECRET_ACCESS_KEY"), + os.Getenv("MINIO_BUCKET_NAME"), + false, + ) + case "cos": + if os.Getenv("COS_APP_ID") == "" || + os.Getenv("COS_REGION") == "" || + os.Getenv("COS_SECRET_ID") == "" || + os.Getenv("COS_SECRET_KEY") == "" || + os.Getenv("COS_PATH_PREFIX") == "" { + return nil, fmt.Errorf("missing COS configuration") + } + return file.NewCosFileService( + os.Getenv("COS_APP_ID"), + os.Getenv("COS_REGION"), + os.Getenv("COS_SECRET_ID"), + os.Getenv("COS_SECRET_KEY"), + os.Getenv("COS_PATH_PREFIX"), + ) + case "local": + return file.NewLocalFileService(os.Getenv("LOCAL_STORAGE_BASE_DIR")), nil + case "dummy": + return file.NewDummyFileService(), nil + default: + return nil, fmt.Errorf("unsupported storage type: %s", os.Getenv("STORAGE_TYPE")) + } +} + +// initRetrieveEngineRegistry initializes the retrieval engine registry +// Sets up and configures various search engine backends based on configuration +// Supports multiple retrieval engines (PostgreSQL, ElasticsearchV7, ElasticsearchV8) +// Parameters: +// - db: Database connection +// - cfg: Application configuration +// +// Returns: +// - Configured retrieval engine registry +// - Error if initialization fails +func initRetrieveEngineRegistry(db *gorm.DB, cfg *config.Config) (interfaces.RetrieveEngineRegistry, error) { + registry := retriever.NewRetrieveEngineRegistry() + retrieveDriver := strings.Split(os.Getenv("RETRIEVE_DRIVER"), ",") + log := logger.GetLogger(context.Background()) + + if slices.Contains(retrieveDriver, "postgres") { + postgresRepo := postgresRepo.NewPostgresRetrieveEngineRepository(db) + if err := registry.Register( + retriever.NewKVHybridRetrieveEngine(postgresRepo, types.PostgresRetrieverEngineType), + ); err != nil { + log.Errorf("Register postgres retrieve engine failed: %v", err) + } else { + log.Infof("Register postgres retrieve engine success") + } + } + if slices.Contains(retrieveDriver, "elasticsearch_v8") { + client, err := elasticsearch.NewTypedClient(elasticsearch.Config{ + Addresses: []string{os.Getenv("ELASTICSEARCH_ADDR")}, + Username: os.Getenv("ELASTICSEARCH_USERNAME"), + Password: os.Getenv("ELASTICSEARCH_PASSWORD"), + }) + if err != nil { + log.Errorf("Create elasticsearch_v8 client failed: %v", err) + } else { + elasticsearchRepo := elasticsearchRepoV8.NewElasticsearchEngineRepository(client, cfg) + if err := registry.Register( + retriever.NewKVHybridRetrieveEngine( + elasticsearchRepo, types.ElasticsearchRetrieverEngineType, + ), + ); err != nil { + log.Errorf("Register elasticsearch_v8 retrieve engine failed: %v", err) + } else { + log.Infof("Register elasticsearch_v8 retrieve engine success") + } + } + } + + if slices.Contains(retrieveDriver, "elasticsearch_v7") { + client, err := esv7.NewClient(esv7.Config{ + Addresses: []string{os.Getenv("ELASTICSEARCH_ADDR")}, + Username: os.Getenv("ELASTICSEARCH_USERNAME"), + Password: os.Getenv("ELASTICSEARCH_PASSWORD"), + }) + if err != nil { + log.Errorf("Create elasticsearch_v7 client failed: %v", err) + } else { + elasticsearchRepo := elasticsearchRepoV7.NewElasticsearchEngineRepository(client, cfg) + if err := registry.Register( + retriever.NewKVHybridRetrieveEngine( + elasticsearchRepo, types.ElasticsearchRetrieverEngineType, + ), + ); err != nil { + log.Errorf("Register elasticsearch_v7 retrieve engine failed: %v", err) + } else { + log.Infof("Register elasticsearch_v7 retrieve engine success") + } + } + } + return registry, nil +} + +// initAntsPool initializes the goroutine pool +// Creates a managed goroutine pool for concurrent task execution +// Parameters: +// - cfg: Application configuration +// +// Returns: +// - Configured goroutine pool +// - Error if initialization fails +func initAntsPool(cfg *config.Config) (*ants.Pool, error) { + // Default to 50 if not specified in config + poolSize := 300 + // Set up the pool with pre-allocation for better performance + return ants.NewPool(poolSize, ants.WithPreAlloc(true)) +} + +// registerPoolCleanup registers the goroutine pool for cleanup +// Ensures proper cleanup of the goroutine pool when application shuts down +// Parameters: +// - pool: Goroutine pool +// - cleaner: Resource cleaner +func registerPoolCleanup(pool *ants.Pool, cleaner interfaces.ResourceCleaner) { + cleaner.RegisterWithName("AntsPool", func() error { + pool.Release() + return nil + }) +} + +// initDocReaderClient initializes the document reader client +// Creates a client for interacting with the document reader service +// Parameters: +// - cfg: Application configuration +// +// Returns: +// - Configured document reader client +// - Error if initialization fails +func initDocReaderClient(cfg *config.Config) (*client.Client, error) { + // Use the DocReader URL from environment or config + docReaderURL := os.Getenv("DOCREADER_ADDR") + if docReaderURL == "" && cfg.DocReader != nil { + docReaderURL = cfg.DocReader.Addr + } + return client.NewClient(docReaderURL) +} + +// initOllamaService initializes the Ollama service client +// Creates a client for interacting with Ollama API for model inference +// Parameters: +// - None +// +// Returns: +// - Configured Ollama service client +// - Error if initialization fails +func initOllamaService() (*ollama.OllamaService, error) { + // Get Ollama service from existing factory function + return ollama.GetOllamaService() +} diff --git a/internal/errors/errors.go b/internal/errors/errors.go new file mode 100644 index 0000000..c682e4a --- /dev/null +++ b/internal/errors/errors.go @@ -0,0 +1,152 @@ +package errors + +import ( + "fmt" + "net/http" +) + +// ErrorCode defines the error code type +type ErrorCode int + +// System error codes +const ( + // Common error codes (1000-1999) + ErrBadRequest ErrorCode = 1000 + ErrUnauthorized ErrorCode = 1001 + ErrForbidden ErrorCode = 1002 + ErrNotFound ErrorCode = 1003 + ErrMethodNotAllowed ErrorCode = 1004 + ErrConflict ErrorCode = 1005 + ErrTooManyRequests ErrorCode = 1006 + ErrInternalServer ErrorCode = 1007 + ErrServiceUnavailable ErrorCode = 1008 + ErrTimeout ErrorCode = 1009 + ErrValidation ErrorCode = 1010 + + // Tenant related error codes (2000-2099) + ErrTenantNotFound ErrorCode = 2000 + ErrTenantAlreadyExists ErrorCode = 2001 + ErrTenantInactive ErrorCode = 2002 + ErrTenantNameRequired ErrorCode = 2003 + ErrTenantInvalidStatus ErrorCode = 2004 + + // Add more error codes here +) + +// AppError defines the application error structure +type AppError struct { + Code ErrorCode `json:"code"` + Message string `json:"message"` + Details any `json:"details,omitempty"` + HTTPCode int `json:"-"` +} + +// Error implements the error interface +func (e *AppError) Error() string { + return fmt.Sprintf("error code: %d, error message: %s", e.Code, e.Message) +} + +// WithDetails adds error details +func (e *AppError) WithDetails(details any) *AppError { + e.Details = details + return e +} + +// NewBadRequestError creates a bad request error +func NewBadRequestError(message string) *AppError { + return &AppError{ + Code: ErrBadRequest, + Message: message, + HTTPCode: http.StatusBadRequest, + } +} + +// NewUnauthorizedError creates an unauthorized error +func NewUnauthorizedError(message string) *AppError { + return &AppError{ + Code: ErrUnauthorized, + Message: message, + HTTPCode: http.StatusUnauthorized, + } +} + +// NewForbiddenError creates a forbidden error +func NewForbiddenError(message string) *AppError { + return &AppError{ + Code: ErrForbidden, + Message: message, + HTTPCode: http.StatusForbidden, + } +} + +// NewNotFoundError creates a not found error +func NewNotFoundError(message string) *AppError { + return &AppError{ + Code: ErrNotFound, + Message: message, + HTTPCode: http.StatusNotFound, + } +} + +// NewConflictError creates a conflict error +func NewConflictError(message string) *AppError { + return &AppError{ + Code: ErrConflict, + Message: message, + HTTPCode: http.StatusConflict, + } +} + +// NewInternalServerError creates an internal server error +func NewInternalServerError(message string) *AppError { + if message == "" { + message = "服务器内部错误" + } + return &AppError{ + Code: ErrInternalServer, + Message: message, + HTTPCode: http.StatusInternalServerError, + } +} + +// NewValidationError creates a validation error +func NewValidationError(message string) *AppError { + return &AppError{ + Code: ErrValidation, + Message: message, + HTTPCode: http.StatusBadRequest, + } +} + +// Tenant related errors +func NewTenantNotFoundError() *AppError { + return &AppError{ + Code: ErrTenantNotFound, + Message: "租户不存在", + HTTPCode: http.StatusNotFound, + } +} + +// NewTenantAlreadyExistsError creates a tenant already exists error +func NewTenantAlreadyExistsError() *AppError { + return &AppError{ + Code: ErrTenantAlreadyExists, + Message: "租户已存在", + HTTPCode: http.StatusConflict, + } +} + +// NewTenantInactiveError creates a tenant inactive error +func NewTenantInactiveError() *AppError { + return &AppError{ + Code: ErrTenantInactive, + Message: "租户已停用", + HTTPCode: http.StatusForbidden, + } +} + +// IsAppError checks if the error is an AppError type +func IsAppError(err error) (*AppError, bool) { + appErr, ok := err.(*AppError) + return appErr, ok +} diff --git a/internal/errors/session.go b/internal/errors/session.go new file mode 100644 index 0000000..8e9d3b1 --- /dev/null +++ b/internal/errors/session.go @@ -0,0 +1,16 @@ +package errors + +import "errors" + +var ( + // ErrSessionNotFound session not found error + ErrSessionNotFound = errors.New("session not found") + // ErrSessionExpired session expired error + ErrSessionExpired = errors.New("session expired") + // ErrSessionLimitExceeded session limit exceeded error + ErrSessionLimitExceeded = errors.New("session limit exceeded") + // ErrInvalidSessionID invalid session ID error + ErrInvalidSessionID = errors.New("invalid session id") + // ErrInvalidTenantID invalid tenant ID error + ErrInvalidTenantID = errors.New("invalid tenant id") +) diff --git a/internal/handler/chunk.go b/internal/handler/chunk.go new file mode 100644 index 0000000..0dbdb1c --- /dev/null +++ b/internal/handler/chunk.go @@ -0,0 +1,233 @@ +package handler + +import ( + "net/http" + + "github.com/Tencent/WeKnora/internal/application/service" + "github.com/Tencent/WeKnora/internal/errors" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/gin-gonic/gin" +) + +// ChunkHandler defines HTTP handlers for chunk operations +type ChunkHandler struct { + service interfaces.ChunkService +} + +// NewChunkHandler creates a new chunk handler +func NewChunkHandler(service interfaces.ChunkService) *ChunkHandler { + return &ChunkHandler{service: service} +} + +// ListKnowledgeChunks lists all chunks for a given knowledge ID +func (h *ChunkHandler) ListKnowledgeChunks(c *gin.Context) { + ctx := c.Request.Context() + logger.Info(ctx, "Start retrieving knowledge chunks list") + + knowledgeID := c.Param("knowledge_id") + if knowledgeID == "" { + logger.Error(ctx, "Knowledge ID is empty") + c.Error(errors.NewBadRequestError("Knowledge ID cannot be empty")) + return + } + + // Parse pagination parameters + var pagination types.Pagination + if err := c.ShouldBindQuery(&pagination); err != nil { + logger.Error(ctx, "Failed to parse pagination parameters", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + logger.Infof(ctx, "Retrieving knowledge chunks list, knowledge ID: %s, page: %d, page size: %d", + knowledgeID, pagination.Page, pagination.PageSize) + + // Use pagination for query + result, err := h.service.ListPagedChunksByKnowledgeID(ctx, knowledgeID, &pagination) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof( + ctx, "Successfully retrieved knowledge chunks list, knowledge ID: %s, total: %d", + knowledgeID, result.Total, + ) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": result.Data, + "total": result.Total, + "page": result.Page, + "page_size": result.PageSize, + }) +} + +// UpdateChunkRequest defines the request structure for updating a chunk +type UpdateChunkRequest struct { + Content string `json:"content"` + Embedding []float32 `json:"embedding"` + ChunkIndex int `json:"chunk_index"` + IsEnabled bool `json:"is_enabled"` + StartAt int `json:"start_at"` + EndAt int `json:"end_at"` + ImageInfo string `json:"image_info"` +} + +// validateAndGetChunk validates request parameters and retrieves the chunk +// Returns chunk information, knowledge ID, and error +func (h *ChunkHandler) validateAndGetChunk(c *gin.Context) (*types.Chunk, string, error) { + ctx := c.Request.Context() + + // Validate knowledge ID + knowledgeID := c.Param("knowledge_id") + if knowledgeID == "" { + logger.Error(ctx, "Knowledge ID is empty") + return nil, "", errors.NewBadRequestError("Knowledge ID cannot be empty") + } + + // Validate chunk ID + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Chunk ID is empty") + return nil, knowledgeID, errors.NewBadRequestError("Chunk ID cannot be empty") + } + + // Get tenant ID from context + tenantID, exists := c.Get(types.TenantIDContextKey.String()) + if !exists { + logger.Error(ctx, "Failed to get tenant ID") + return nil, knowledgeID, errors.NewUnauthorizedError("Unauthorized") + } + + logger.Infof(ctx, "Retrieving knowledge chunk information, knowledge ID: %s, chunk ID: %s", knowledgeID, id) + + // Get existing chunk + chunk, err := h.service.GetChunkByID(ctx, knowledgeID, id) + if err != nil { + if err == service.ErrChunkNotFound { + logger.Warnf(ctx, "Chunk not found, knowledge ID: %s, chunk ID: %s", knowledgeID, id) + return nil, knowledgeID, errors.NewNotFoundError("Chunk not found") + } + logger.ErrorWithFields(ctx, err, nil) + return nil, knowledgeID, errors.NewInternalServerError(err.Error()) + } + + // Validate tenant ID + if chunk.TenantID != tenantID.(uint) || chunk.KnowledgeID != knowledgeID { + logger.Warnf( + ctx, + "Tenant has no permission to access chunk, knowledge ID: %s, chunk ID: %s, req tenant: %d, chunk tenant: %d", + knowledgeID, id, tenantID.(uint), chunk.TenantID, + ) + return nil, knowledgeID, errors.NewForbiddenError("No permission to access this chunk") + } + + return chunk, knowledgeID, nil +} + +// UpdateChunk updates a chunk's properties +func (h *ChunkHandler) UpdateChunk(c *gin.Context) { + ctx := c.Request.Context() + logger.Info(ctx, "Start updating knowledge chunk") + + // Validate parameters and get chunk + chunk, knowledgeID, err := h.validateAndGetChunk(c) + if err != nil { + c.Error(err) + return + } + var req UpdateChunkRequest + if err := c.ShouldBindJSON(&req); err != nil { + logger.Error(ctx, "Failed to parse request parameters", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + // Update chunk properties + if req.Content != "" { + chunk.Content = req.Content + } + + chunk.IsEnabled = req.IsEnabled + + logger.Infof(ctx, "Updating knowledge chunk, knowledge ID: %s, chunk ID: %s", knowledgeID, chunk.ID) + + if err := h.service.UpdateChunk(ctx, chunk); err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Knowledge chunk updated successfully, knowledge ID: %s, chunk ID: %s", knowledgeID, chunk.ID) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": chunk, + }) +} + +// DeleteChunk deletes a specific chunk +func (h *ChunkHandler) DeleteChunk(c *gin.Context) { + ctx := c.Request.Context() + logger.Info(ctx, "Start deleting knowledge chunk") + + // Validate parameters and get chunk + chunk, knowledgeID, err := h.validateAndGetChunk(c) + if err != nil { + c.Error(err) + return + } + + logger.Infof(ctx, "Deleting knowledge chunk, knowledge ID: %s, chunk ID: %s", knowledgeID, chunk.ID) + + if err := h.service.DeleteChunk(ctx, chunk.ID); err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Knowledge chunk deleted successfully, knowledge ID: %s, chunk ID: %s", knowledgeID, chunk.ID) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "message": "Chunk deleted", + }) +} + +// DeleteChunksByKnowledgeID deletes all chunks for a given knowledge ID +func (h *ChunkHandler) DeleteChunksByKnowledgeID(c *gin.Context) { + ctx := c.Request.Context() + logger.Info(ctx, "Start deleting all chunks under knowledge") + + knowledgeID := c.Param("knowledge_id") + if knowledgeID == "" { + logger.Error(ctx, "Knowledge ID is empty") + c.Error(errors.NewBadRequestError("Knowledge ID cannot be empty")) + return + } + + // Get tenant ID from context + tenantID, exists := c.Get(types.TenantIDContextKey.String()) + if !exists { + logger.Error(ctx, "Failed to get tenant ID") + c.Error(errors.NewUnauthorizedError("Unauthorized")) + return + } + + logger.Infof(ctx, "Deleting all chunks under knowledge, knowledge ID: %s, tenant ID: %d", knowledgeID, tenantID.(uint)) + + // Delete all chunks under the knowledge + err := h.service.DeleteChunksByKnowledgeID(ctx, knowledgeID) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "All chunks under knowledge deleted successfully, knowledge ID: %s", knowledgeID) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "message": "All chunks under knowledge deleted", + }) +} diff --git a/internal/handler/evaluation.go b/internal/handler/evaluation.go new file mode 100644 index 0000000..8d0c0ab --- /dev/null +++ b/internal/handler/evaluation.go @@ -0,0 +1,105 @@ +package handler + +import ( + "net/http" + + "github.com/Tencent/WeKnora/internal/errors" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/gin-gonic/gin" +) + +// EvaluationHandler handles evaluation related HTTP requests +type EvaluationHandler struct { + evaluationService interfaces.EvaluationService // Service for evaluation operations +} + +// NewEvaluationHandler creates a new EvaluationHandler instance +func NewEvaluationHandler(evaluationService interfaces.EvaluationService) *EvaluationHandler { + return &EvaluationHandler{evaluationService: evaluationService} +} + +// EvaluationRequest contains parameters for evaluation request +type EvaluationRequest struct { + DatasetID string `json:"dataset_id"` // ID of dataset to evaluate + EmbeddingModelID string `json:"embedding_id"` // ID of embedding model to use + ChatModelID string `json:"chat_id"` // ID of chat model to use + RerankModelID string `json:"rerank_id"` // ID of rerank model to use +} + +// Evaluation handles evaluation request +func (e *EvaluationHandler) Evaluation(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start processing evaluation request") + + var request *EvaluationRequest + if err := c.ShouldBind(&request); err != nil { + logger.Error(ctx, "Failed to parse request parameters", err) + c.Error(errors.NewBadRequestError("Invalid request parameters").WithDetails(err.Error())) + return + } + + tenantID, exists := c.Get(string(types.TenantIDContextKey)) + if !exists { + logger.Error(ctx, "Failed to get tenant ID") + c.Error(errors.NewUnauthorizedError("Unauthorized")) + return + } + + logger.Infof(ctx, "Executing evaluation, tenant: %v, dataset: %s, embedding: %s, chat: %s, rerank: %s", + tenantID, request.DatasetID, request.EmbeddingModelID, request.ChatModelID, request.RerankModelID) + + task, err := e.evaluationService.Evaluation(ctx, + request.DatasetID, + request.EmbeddingModelID, + request.ChatModelID, + request.RerankModelID, + ) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Evaluation task created successfully") + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": task, + }) +} + +// GetEvaluationRequest contains parameters for getting evaluation result +type GetEvaluationRequest struct { + TaskID string `form:"task_id" binding:"required"` // ID of evaluation task +} + +// GetEvaluationResult retrieves evaluation result by task ID +func (e *EvaluationHandler) GetEvaluationResult(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start retrieving evaluation result") + + var request *GetEvaluationRequest + if err := c.ShouldBind(&request); err != nil { + logger.Error(ctx, "Failed to parse request parameters", err) + c.Error(errors.NewBadRequestError("Invalid request parameters").WithDetails(err.Error())) + return + } + + logger.Infof(ctx, "Retrieving evaluation result, task ID: %s", request.TaskID) + + result, err := e.evaluationService.EvaluationResult(ctx, request.TaskID) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Retrieved evaluation result successfully, task ID: %s", request.TaskID) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": result, + }) +} diff --git a/internal/handler/knowledge.go b/internal/handler/knowledge.go new file mode 100644 index 0000000..533f30b --- /dev/null +++ b/internal/handler/knowledge.go @@ -0,0 +1,458 @@ +package handler + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + + "github.com/Tencent/WeKnora/internal/errors" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/gin-gonic/gin" +) + +// KnowledgeHandler processes HTTP requests related to knowledge resources +type KnowledgeHandler struct { + kgService interfaces.KnowledgeService + kbService interfaces.KnowledgeBaseService +} + +// NewKnowledgeHandler creates a new knowledge handler instance +func NewKnowledgeHandler( + kgService interfaces.KnowledgeService, + kbService interfaces.KnowledgeBaseService, +) *KnowledgeHandler { + return &KnowledgeHandler{kgService: kgService, kbService: kbService} +} + +// validateKnowledgeBaseAccess validates access permissions to a knowledge base +// Returns the knowledge base, the knowledge base ID, and any errors encountered +func (h *KnowledgeHandler) validateKnowledgeBaseAccess(c *gin.Context) (*types.KnowledgeBase, string, error) { + ctx := c.Request.Context() + + // Get knowledge base ID from URL path parameter + kbID := c.Param("id") + if kbID == "" { + logger.Error(ctx, "Knowledge base ID is empty") + return nil, "", errors.NewBadRequestError("Knowledge base ID cannot be empty") + } + + // Get knowledge base details + kb, err := h.kbService.GetKnowledgeBaseByID(ctx, kbID) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + return nil, kbID, errors.NewInternalServerError(err.Error()) + } + + // Verify tenant permissions + if kb.TenantID != c.GetUint(types.TenantIDContextKey.String()) { + logger.Warnf( + ctx, + "Permission denied to access this knowledge base, tenant ID mismatch, "+ + "requested tenant ID: %d, knowledge base tenant ID: %d", + c.GetUint(types.TenantIDContextKey.String()), + kb.TenantID, + ) + return nil, kbID, errors.NewForbiddenError("Permission denied to access this knowledge base") + } + + return kb, kbID, nil +} + +// handleDuplicateKnowledgeError handles cases where duplicate knowledge is detected +// Returns true if the error was a duplicate error and was handled, false otherwise +func (h *KnowledgeHandler) handleDuplicateKnowledgeError(c *gin.Context, + err error, knowledge *types.Knowledge, duplicateType string, +) bool { + if dupErr, ok := err.(*types.DuplicateKnowledgeError); ok { + ctx := c.Request.Context() + logger.Warnf(ctx, "Detected duplicate %s: %s", duplicateType, dupErr.Error()) + c.JSON(http.StatusConflict, gin.H{ + "success": false, + "message": dupErr.Error(), + "data": knowledge, // knowledge contains the existing document + "code": fmt.Sprintf("duplicate_%s", duplicateType), + }) + return true + } + return false +} + +// CreateKnowledgeFromFile handles requests to create knowledge from an uploaded file +func (h *KnowledgeHandler) CreateKnowledgeFromFile(c *gin.Context) { + ctx := c.Request.Context() + logger.Info(ctx, "Start creating knowledge from file") + + // Validate access to the knowledge base + _, kbID, err := h.validateKnowledgeBaseAccess(c) + if err != nil { + c.Error(err) + return + } + + // Get the uploaded file + file, err := c.FormFile("file") + if err != nil { + logger.Error(ctx, "File upload failed", err) + c.Error(errors.NewBadRequestError("File upload failed").WithDetails(err.Error())) + return + } + + logger.Infof(ctx, "File upload successful, filename: %s, size: %.2f KB", file.Filename, float64(file.Size)/1024) + logger.Infof(ctx, "Creating knowledge, knowledge base ID: %s, filename: %s", kbID, file.Filename) + + // Parse metadata if provided + var metadata map[string]string + metadataStr := c.PostForm("metadata") + if metadataStr != "" { + if err := json.Unmarshal([]byte(metadataStr), &metadata); err != nil { + logger.Error(ctx, "Failed to parse metadata", err) + c.Error(errors.NewBadRequestError("Invalid metadata format").WithDetails(err.Error())) + return + } + logger.Infof(ctx, "Received file metadata: %v", metadata) + } + + // Create knowledge entry from the file + knowledge, err := h.kgService.CreateKnowledgeFromFile(ctx, kbID, file, metadata) + // Check for duplicate knowledge error + if err != nil { + if h.handleDuplicateKnowledgeError(c, err, knowledge, "file") { + return + } + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Knowledge created successfully, ID: %s, title: %s", knowledge.ID, knowledge.Title) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": knowledge, + }) +} + +// CreateKnowledgeFromURL handles requests to create knowledge from a URL +func (h *KnowledgeHandler) CreateKnowledgeFromURL(c *gin.Context) { + ctx := c.Request.Context() + logger.Info(ctx, "Start creating knowledge from URL") + + // Validate access to the knowledge base + _, kbID, err := h.validateKnowledgeBaseAccess(c) + if err != nil { + c.Error(err) + return + } + + // Parse URL from request body + var req struct { + URL string `json:"url" binding:"required"` + } + if err := c.ShouldBindJSON(&req); err != nil { + logger.Error(ctx, "Failed to parse URL request", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + logger.Infof(ctx, "Received URL request: %s", req.URL) + logger.Infof(ctx, "Creating knowledge from URL, knowledge base ID: %s, URL: %s", kbID, req.URL) + + // Create knowledge entry from the URL + knowledge, err := h.kgService.CreateKnowledgeFromURL(ctx, kbID, req.URL) + // Check for duplicate knowledge error + if err != nil { + if h.handleDuplicateKnowledgeError(c, err, knowledge, "url") { + return + } + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Knowledge created successfully from URL, ID: %s, title: %s", knowledge.ID, knowledge.Title) + c.JSON(http.StatusCreated, gin.H{ + "success": true, + "data": knowledge, + }) +} + +// GetKnowledge retrieves a knowledge entry by its ID +func (h *KnowledgeHandler) GetKnowledge(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start retrieving knowledge") + + // Get knowledge ID from URL path parameter + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Knowledge ID is empty") + c.Error(errors.NewBadRequestError("Knowledge ID cannot be empty")) + return + } + + logger.Infof(ctx, "Retrieving knowledge, ID: %s", id) + knowledge, err := h.kgService.GetKnowledgeByID(ctx, id) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Knowledge retrieved successfully, ID: %s, title: %s", knowledge.ID, knowledge.Title) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": knowledge, + }) +} + +// ListKnowledge retrieves a paginated list of knowledge entries from a knowledge base +func (h *KnowledgeHandler) ListKnowledge(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start retrieving knowledge list") + + // Get knowledge base ID from URL path parameter + kbID := c.Param("id") + if kbID == "" { + logger.Error(ctx, "Knowledge base ID is empty") + c.Error(errors.NewBadRequestError("Knowledge base ID cannot be empty")) + return + } + + // Parse pagination parameters from query string + var pagination types.Pagination + if err := c.ShouldBindQuery(&pagination); err != nil { + logger.Error(ctx, "Failed to parse pagination parameters", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + logger.Infof(ctx, "Retrieving knowledge list under knowledge base, knowledge base ID: %s, page: %d, page size: %d", + kbID, pagination.Page, pagination.PageSize) + + // Retrieve paginated knowledge entries + result, err := h.kgService.ListPagedKnowledgeByKnowledgeBaseID(ctx, kbID, &pagination) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Knowledge list retrieved successfully, knowledge base ID: %s, total: %d", kbID, result.Total) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": result.Data, + "total": result.Total, + "page": result.Page, + "page_size": result.PageSize, + }) +} + +// DeleteKnowledge handles requests to delete a knowledge entry by its ID +func (h *KnowledgeHandler) DeleteKnowledge(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start deleting knowledge") + + // Get knowledge ID from URL path parameter + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Knowledge ID is empty") + c.Error(errors.NewBadRequestError("Knowledge ID cannot be empty")) + return + } + + logger.Infof(ctx, "Deleting knowledge, ID: %s", id) + err := h.kgService.DeleteKnowledge(ctx, id) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Knowledge deleted successfully, ID: %s", id) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "message": "Deleted successfully", + }) +} + +// DownloadKnowledgeFile handles requests to download a file associated with a knowledge entry +func (h *KnowledgeHandler) DownloadKnowledgeFile(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start downloading knowledge file") + + // Get knowledge ID from URL path parameter + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Knowledge ID is empty") + c.Error(errors.NewBadRequestError("Knowledge ID cannot be empty")) + return + } + + logger.Infof(ctx, "Retrieving knowledge file, ID: %s", id) + + // Get file content and filename + file, filename, err := h.kgService.GetKnowledgeFile(ctx, id) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError("Failed to retrieve file").WithDetails(err.Error())) + return + } + defer file.Close() + + logger.Infof(ctx, "Knowledge file retrieved successfully, ID: %s, filename: %s", id, filename) + + // Set response headers for file download + c.Header("Content-Description", "File Transfer") + c.Header("Content-Transfer-Encoding", "binary") + c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=%s", filename)) + c.Header("Content-Type", "application/octet-stream") + c.Header("Expires", "0") + c.Header("Cache-Control", "must-revalidate") + c.Header("Pragma", "public") + + // Stream file content to response + c.Stream(func(w io.Writer) bool { + if _, err := io.Copy(w, file); err != nil { + logger.Errorf(ctx, "Failed to send file: %v", err) + return false + } + logger.Debug(ctx, "File sending completed") + return false + }) +} + +// GetKnowledgeBatchRequest defines parameters for batch knowledge retrieval +type GetKnowledgeBatchRequest struct { + IDs []string `form:"ids" binding:"required"` // List of knowledge IDs +} + +// GetKnowledgeBatch handles requests to retrieve multiple knowledge entries in a batch +func (h *KnowledgeHandler) GetKnowledgeBatch(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start batch retrieving knowledge") + + // Get tenant ID from context + tenantID, ok := c.Get(types.TenantIDContextKey.String()) + if !ok { + logger.Error(ctx, "Failed to get tenant ID") + c.Error(errors.NewUnauthorizedError("Unauthorized")) + return + } + + // Parse request parameters from query string + var req GetKnowledgeBatchRequest + if err := c.ShouldBindQuery(&req); err != nil { + logger.Error(ctx, "Failed to parse request parameters", err) + c.Error(errors.NewBadRequestError("Invalid request parameters").WithDetails(err.Error())) + return + } + + logger.Infof( + ctx, + "Batch retrieving knowledge, tenant ID: %d, number of knowledge IDs: %d", + tenantID.(uint), len(req.IDs), + ) + + // Retrieve knowledge entries in batch + knowledges, err := h.kgService.GetKnowledgeBatch(ctx, tenantID.(uint), req.IDs) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError("Failed to retrieve knowledge list").WithDetails(err.Error())) + return + } + + logger.Infof( + ctx, + "Batch knowledge retrieval successful, requested count: %d, returned count: %d", + len(req.IDs), len(knowledges), + ) + + // Return results + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": knowledges, + }) +} + +func (h *KnowledgeHandler) UpdateKnowledge(c *gin.Context) { + ctx := c.Request.Context() + logger.Info(ctx, "Start Update knowledge") + + // Get knowledge ID from URL path parameter + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Knowledge ID is empty") + c.Error(errors.NewBadRequestError("Knowledge ID cannot be empty")) + return + } + + var knowledge types.Knowledge + if err := c.ShouldBindJSON(&knowledge); err != nil { + logger.Error(ctx, "Failed to parse request parameters", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + if err := h.kgService.UpdateKnowledge(ctx, &knowledge); err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Knowledge updated successfully, knowledge ID: %s", knowledge.ID) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "message": "Knowledge chunk updated successfully", + }) +} + +// UpdateImageInfo updates a chunk's properties +func (h *KnowledgeHandler) UpdateImageInfo(c *gin.Context) { + ctx := c.Request.Context() + logger.Info(ctx, "Start updating image info") + + // Get knowledge ID from URL path parameter + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Knowledge ID is empty") + c.Error(errors.NewBadRequestError("Knowledge ID cannot be empty")) + return + } + chunkID := c.Param("chunk_id") + if id == "" { + logger.Error(ctx, "Chunk ID is empty") + c.Error(errors.NewBadRequestError("Chunk ID cannot be empty")) + return + } + + var request struct { + ImageInfo string `json:"image_info"` + } + + if err := c.ShouldBindJSON(&request); err != nil { + logger.Error(ctx, "Failed to parse request parameters", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + // Update chunk properties + logger.Infof(ctx, "Updating knowledge chunk, knowledge ID: %s, chunk ID: %s", id, chunkID) + err := h.kgService.UpdateImageInfo(ctx, id, chunkID, request.ImageInfo) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Knowledge chunk updated successfully, knowledge ID: %s, chunk ID: %s", id, chunkID) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "message": "Knowledge chunk image updated successfully", + }) +} diff --git a/internal/handler/knowledgebase.go b/internal/handler/knowledgebase.go new file mode 100644 index 0000000..c5b6b4c --- /dev/null +++ b/internal/handler/knowledgebase.go @@ -0,0 +1,298 @@ +package handler + +import ( + "net/http" + + "github.com/Tencent/WeKnora/internal/errors" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/gin-gonic/gin" +) + +// KnowledgeBaseHandler defines the HTTP handler for knowledge base operations +type KnowledgeBaseHandler struct { + service interfaces.KnowledgeBaseService + knowledgeService interfaces.KnowledgeService +} + +// NewKnowledgeBaseHandler creates a new knowledge base handler instance +func NewKnowledgeBaseHandler( + service interfaces.KnowledgeBaseService, + knowledgeService interfaces.KnowledgeService, +) *KnowledgeBaseHandler { + return &KnowledgeBaseHandler{service: service, knowledgeService: knowledgeService} +} + +// HybridSearch handles requests to perform hybrid vector and keyword search on a knowledge base +func (h *KnowledgeBaseHandler) HybridSearch(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start hybrid search") + + // Validate query parameter existence + query := c.Query("query") + if query == "" { + logger.Error(ctx, "Query parameter is empty") + c.Error(errors.NewBadRequestError("Query parameter cannot be empty")) + return + } + + // Validate knowledge base ID + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Knowledge base ID is empty") + c.Error(errors.NewBadRequestError("Knowledge base ID cannot be empty")) + return + } + + logger.Infof(ctx, "Executing hybrid search, knowledge base ID: %s, query: %s", id, query) + + // Execute hybrid search with default search parameters + results, err := h.service.HybridSearch(ctx, id, types.SearchParams{ + QueryText: query, + KeywordThreshold: 0.5, + VectorThreshold: 0.3, + MatchCount: 10, + }) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Hybrid search completed, knowledge base ID: %s, result count: %d", id, len(results)) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": results, + }) +} + +// CreateKnowledgeBase handles requests to create a new knowledge base +func (h *KnowledgeBaseHandler) CreateKnowledgeBase(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start creating knowledge base") + + // Parse request body + var req types.KnowledgeBase + if err := c.ShouldBindJSON(&req); err != nil { + logger.Error(ctx, "Failed to parse request parameters", err) + c.Error(errors.NewBadRequestError("Invalid request parameters").WithDetails(err.Error())) + return + } + + logger.Infof(ctx, "Creating knowledge base, name: %s", req.Name) + // Create knowledge base using the service + kb, err := h.service.CreateKnowledgeBase(ctx, &req) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Knowledge base created successfully, ID: %s, name: %s", kb.ID, kb.Name) + c.JSON(http.StatusCreated, gin.H{ + "success": true, + "data": kb, + }) +} + +// validateAndGetKnowledgeBase validates request parameters and retrieves the knowledge base +// Returns the knowledge base, knowledge base ID, and any errors encountered +func (h *KnowledgeBaseHandler) validateAndGetKnowledgeBase(c *gin.Context) (*types.KnowledgeBase, string, error) { + ctx := c.Request.Context() + + // Get tenant ID from context + tenantID, exists := c.Get(types.TenantIDContextKey.String()) + if !exists { + logger.Error(ctx, "Failed to get tenant ID") + return nil, "", errors.NewUnauthorizedError("Unauthorized") + } + + // Get knowledge base ID from URL parameter + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Knowledge base ID is empty") + return nil, "", errors.NewBadRequestError("Knowledge base ID cannot be empty") + } + + logger.Infof(ctx, "Retrieving knowledge base, ID: %s", id) + + // Verify tenant has permission to access this knowledge base + kb, err := h.service.GetKnowledgeBaseByID(ctx, id) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + return nil, id, errors.NewInternalServerError(err.Error()) + } + + // Verify tenant ownership + if kb.TenantID != tenantID.(uint) { + logger.Warnf( + ctx, + "Tenant has no permission to access this knowledge base, knowledge base ID: %s, "+ + "request tenant ID: %d, knowledge base tenant ID: %d", + id, tenantID.(uint), kb.TenantID, + ) + return nil, id, errors.NewForbiddenError("No permission to operate") + } + + return kb, id, nil +} + +// GetKnowledgeBase handles requests to retrieve a knowledge base by ID +func (h *KnowledgeBaseHandler) GetKnowledgeBase(c *gin.Context) { + ctx := c.Request.Context() + logger.Info(ctx, "Start retrieving knowledge base") + + // Validate and get the knowledge base + kb, id, err := h.validateAndGetKnowledgeBase(c) + if err != nil { + c.Error(err) + return + } + + logger.Infof(ctx, "Retrieved knowledge base successfully, ID: %s, name: %s", id, kb.Name) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": kb, + }) +} + +// ListKnowledgeBases handles requests to list all knowledge bases for a tenant +func (h *KnowledgeBaseHandler) ListKnowledgeBases(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start retrieving knowledge base list") + + // Get tenant ID from context + tenantID, exists := c.Get(types.TenantIDContextKey.String()) + if !exists { + logger.Error(ctx, "Failed to get tenant ID") + c.Error(errors.NewUnauthorizedError("Unauthorized")) + return + } + + logger.Infof(ctx, "Retrieving knowledge base list for tenant, tenant ID: %d", tenantID.(uint)) + + // Get all knowledge bases for this tenant + kbs, err := h.service.ListKnowledgeBases(ctx) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof( + ctx, + "Retrieved knowledge base list successfully, tenant ID: %d, total: %d knowledge bases", + tenantID.(uint), len(kbs), + ) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": kbs, + }) +} + +// UpdateKnowledgeBaseRequest defines the request body structure for updating a knowledge base +type UpdateKnowledgeBaseRequest struct { + Name string `json:"name" binding:"required"` + Description string `json:"description"` + Config *types.KnowledgeBaseConfig `json:"config" binding:"required"` +} + +// UpdateKnowledgeBase handles requests to update an existing knowledge base +func (h *KnowledgeBaseHandler) UpdateKnowledgeBase(c *gin.Context) { + ctx := c.Request.Context() + logger.Info(ctx, "Start updating knowledge base") + + // Validate and get the knowledge base + _, id, err := h.validateAndGetKnowledgeBase(c) + if err != nil { + c.Error(err) + return + } + + // Parse request body + var req UpdateKnowledgeBaseRequest + if err := c.ShouldBindJSON(&req); err != nil { + logger.Error(ctx, "Failed to parse request parameters", err) + c.Error(errors.NewBadRequestError("Invalid request parameters").WithDetails(err.Error())) + return + } + + logger.Infof(ctx, "Updating knowledge base, ID: %s, name: %s", id, req.Name) + + // Update the knowledge base + kb, err := h.service.UpdateKnowledgeBase(ctx, id, req.Name, req.Description, req.Config) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Knowledge base updated successfully, ID: %s", id) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": kb, + }) +} + +// DeleteKnowledgeBase handles requests to delete a knowledge base +func (h *KnowledgeBaseHandler) DeleteKnowledgeBase(c *gin.Context) { + ctx := c.Request.Context() + logger.Info(ctx, "Start deleting knowledge base") + + // Validate and get the knowledge base + kb, id, err := h.validateAndGetKnowledgeBase(c) + if err != nil { + c.Error(err) + return + } + + logger.Infof(ctx, "Deleting knowledge base, ID: %s, name: %s", id, kb.Name) + + // Delete the knowledge base + if err := h.service.DeleteKnowledgeBase(ctx, id); err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Knowledge base deleted successfully, ID: %s", id) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "message": "Knowledge base deleted successfully", + }) +} + +type CopyKnowledgeBaseRequest struct { + SourceID string `json:"source_id" binding:"required"` + TargetID string `json:"target_id"` +} + +func (h *KnowledgeBaseHandler) CopyKnowledgeBase(c *gin.Context) { + ctx := c.Request.Context() + logger.Info(ctx, "Start copy knowledge base") + + var req CopyKnowledgeBaseRequest + if err := c.ShouldBindJSON(&req); err != nil { + logger.Error(ctx, "Failed to parse request parameters", err) + c.Error(errors.NewBadRequestError("Invalid request parameters").WithDetails(err.Error())) + return + } + + logger.Infof(ctx, "Copy knowledge base, ID: %s to ID: %s", req.SourceID, req.TargetID) + + err := h.knowledgeService.CloneKnowledgeBase(ctx, req.SourceID, req.TargetID) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + logger.Infof(ctx, "Knowledge base copy successfully, ID: %s to ID: %s", req.SourceID, req.TargetID) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "message": "Knowledge base copy successfully", + }) +} diff --git a/internal/handler/message.go b/internal/handler/message.go new file mode 100644 index 0000000..f84a18c --- /dev/null +++ b/internal/handler/message.go @@ -0,0 +1,135 @@ +package handler + +import ( + "net/http" + "strconv" + "time" + + "github.com/gin-gonic/gin" + + "github.com/Tencent/WeKnora/internal/errors" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// MessageHandler handles HTTP requests related to messages within chat sessions +// It provides endpoints for loading and managing message history +type MessageHandler struct { + MessageService interfaces.MessageService // Service that implements message business logic +} + +// NewMessageHandler creates a new message handler instance with the required service +// Parameters: +// - messageService: Service that implements message business logic +// +// Returns a pointer to a new MessageHandler +func NewMessageHandler(messageService interfaces.MessageService) *MessageHandler { + return &MessageHandler{ + MessageService: messageService, + } +} + +// LoadMessages handles requests to load message history +// It supports both loading recent messages and loading messages before a specific timestamp +// This endpoint is used for scrolling through conversation history +func (h *MessageHandler) LoadMessages(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start loading messages") + + // Get path parameters and query parameters + sessionID := c.Param("session_id") + limit := c.DefaultQuery("limit", "20") + beforeTimeStr := c.DefaultQuery("before_time", "") + + logger.Infof(ctx, "Loading messages params, session ID: %s, limit: %s, before time: %s", + sessionID, limit, beforeTimeStr) + + // Parse limit parameter with fallback to default + limitInt, err := strconv.Atoi(limit) + if err != nil { + logger.Warnf(ctx, "Invalid limit value, using default value 20, input: %s", limit) + limitInt = 20 + } + + // If no beforeTime is provided, retrieve the most recent messages + if beforeTimeStr == "" { + logger.Infof(ctx, "Getting recent messages for session, session ID: %s, limit: %d", sessionID, limitInt) + messages, err := h.MessageService.GetRecentMessagesBySession(ctx, sessionID, limitInt) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof( + ctx, + "Successfully retrieved recent messages, session ID: %s, message count: %d", + sessionID, len(messages), + ) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": messages, + }) + return + } + + // If beforeTime is provided, parse the timestamp + beforeTime, err := time.Parse(time.RFC3339Nano, beforeTimeStr) + if err != nil { + logger.Errorf( + ctx, + "Invalid time format, please use RFC3339Nano format, err: %v, beforeTimeStr: %s", + err, beforeTimeStr, + ) + c.Error(errors.NewBadRequestError("Invalid time format, please use RFC3339Nano format")) + return + } + + // Retrieve messages before the specified timestamp + logger.Infof(ctx, "Getting messages before specific time, session ID: %s, before time: %s, limit: %d", + sessionID, beforeTime.Format(time.RFC3339Nano), limitInt) + messages, err := h.MessageService.GetMessagesBySessionBeforeTime(ctx, sessionID, beforeTime, limitInt) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof( + ctx, + "Successfully retrieved messages before time, session ID: %s, message count: %d", + sessionID, len(messages), + ) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": messages, + }) +} + +// DeleteMessage handles requests to delete a message from a session +// It requires both session ID and message ID to identify the specific message to delete +func (h *MessageHandler) DeleteMessage(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start deleting message") + + // Get path parameters for session and message identification + sessionID := c.Param("session_id") + messageID := c.Param("id") + + logger.Infof(ctx, "Deleting message, session ID: %s, message ID: %s", sessionID, messageID) + + // Delete the message using the message service + if err := h.MessageService.DeleteMessage(ctx, sessionID, messageID); err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Message deleted successfully, session ID: %s, message ID: %s", sessionID, messageID) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "message": "Message deleted successfully", + }) +} diff --git a/internal/handler/model.go b/internal/handler/model.go new file mode 100644 index 0000000..7be800a --- /dev/null +++ b/internal/handler/model.go @@ -0,0 +1,265 @@ +package handler + +import ( + "net/http" + + "github.com/Tencent/WeKnora/internal/application/service" + "github.com/Tencent/WeKnora/internal/errors" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/gin-gonic/gin" +) + +// ModelHandler handles HTTP requests for model-related operations +// It implements the necessary methods to create, retrieve, update, and delete models +type ModelHandler struct { + service interfaces.ModelService +} + +// NewModelHandler creates a new instance of ModelHandler +// It requires a model service implementation that handles business logic +// Parameters: +// - service: An implementation of the ModelService interface +// +// Returns a pointer to the newly created ModelHandler +func NewModelHandler(service interfaces.ModelService) *ModelHandler { + return &ModelHandler{service: service} +} + +// CreateModelRequest defines the structure for model creation requests +// Contains all fields required to create a new model in the system +type CreateModelRequest struct { + Name string `json:"name" binding:"required"` + Type types.ModelType `json:"type" binding:"required"` + Source types.ModelSource `json:"source" binding:"required"` + Description string `json:"description"` + Parameters types.ModelParameters `json:"parameters" binding:"required"` + IsDefault bool `json:"is_default"` +} + +// CreateModel handles the HTTP request to create a new model +// It validates the request, processes it using the model service, +// and returns the created model to the client +// Parameters: +// - c: Gin context for the HTTP request +func (h *ModelHandler) CreateModel(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start creating model") + + var req CreateModelRequest + if err := c.ShouldBindJSON(&req); err != nil { + logger.Error(ctx, "Failed to parse request parameters", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + tenantID := c.GetUint(types.TenantIDContextKey.String()) + if tenantID == 0 { + logger.Error(ctx, "Tenant ID is empty") + c.Error(errors.NewBadRequestError("Tenant ID cannot be empty")) + return + } + + logger.Infof(ctx, "Creating model, Tenant ID: %d, Model name: %s, Model type: %s", + tenantID, req.Name, req.Type) + + model := &types.Model{ + TenantID: tenantID, + Name: req.Name, + Type: req.Type, + Source: req.Source, + Description: req.Description, + Parameters: req.Parameters, + IsDefault: req.IsDefault, + } + + if err := h.service.CreateModel(ctx, model); err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Model created successfully, ID: %s, Name: %s", model.ID, model.Name) + c.JSON(http.StatusCreated, gin.H{ + "success": true, + "data": model, + }) +} + +// GetModel handles the HTTP request to retrieve a model by its ID +// It fetches the model from the service and returns it to the client, +// or returns appropriate error messages if the model cannot be found +// Parameters: +// - c: Gin context for the HTTP request +func (h *ModelHandler) GetModel(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start retrieving model") + + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Model ID is empty") + c.Error(errors.NewBadRequestError("Model ID cannot be empty")) + return + } + + logger.Infof(ctx, "Retrieving model, ID: %s", id) + model, err := h.service.GetModelByID(ctx, id) + if err != nil { + if err == service.ErrModelNotFound { + logger.Warnf(ctx, "Model not found, ID: %s", id) + c.Error(errors.NewNotFoundError("Model not found")) + return + } + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Retrieved model successfully, ID: %s, Name: %s", model.ID, model.Name) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": model, + }) +} + +// ListModels handles the HTTP request to retrieve all models for a tenant +// It validates the tenant ID, fetches models from the service, and returns them to the client +// Parameters: +// - c: Gin context for the HTTP request +func (h *ModelHandler) ListModels(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start retrieving model list") + + tenantID := c.GetUint(types.TenantIDContextKey.String()) + if tenantID == 0 { + logger.Error(ctx, "Tenant ID is empty") + c.Error(errors.NewBadRequestError("Tenant ID cannot be empty")) + return + } + + logger.Infof(ctx, "Retrieving model list, Tenant ID: %d", tenantID) + models, err := h.service.ListModels(ctx) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Retrieved model list successfully, Tenant ID: %d, Total: %d models", tenantID, len(models)) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": models, + }) +} + +// UpdateModelRequest defines the structure for model update requests +// Contains fields that can be updated for an existing model +type UpdateModelRequest struct { + Name string `json:"name"` + Description string `json:"description"` + Parameters types.ModelParameters `json:"parameters"` + IsDefault bool `json:"is_default"` +} + +// UpdateModel handles the HTTP request to update an existing model +// It validates the request, retrieves the current model, applies changes, +// and updates the model in the service +// Parameters: +// - c: Gin context for the HTTP request +func (h *ModelHandler) UpdateModel(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start updating model") + + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Model ID is empty") + c.Error(errors.NewBadRequestError("Model ID cannot be empty")) + return + } + + var req UpdateModelRequest + if err := c.ShouldBindJSON(&req); err != nil { + logger.Error(ctx, "Failed to parse request parameters", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + logger.Infof(ctx, "Retrieving model information, ID: %s", id) + model, err := h.service.GetModelByID(ctx, id) + if err != nil { + if err == service.ErrModelNotFound { + logger.Warnf(ctx, "Model not found, ID: %s", id) + c.Error(errors.NewNotFoundError("Model not found")) + return + } + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + // Update model fields if they are provided in the request + if req.Name != "" { + model.Name = req.Name + } + if req.Description != "" { + model.Description = req.Description + } + if req.Parameters != (types.ModelParameters{}) { + model.Parameters = req.Parameters + } + model.IsDefault = req.IsDefault + + logger.Infof(ctx, "Updating model, ID: %s, Name: %s", id, model.Name) + if err := h.service.UpdateModel(ctx, model); err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Model updated successfully, ID: %s", id) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": model, + }) +} + +// DeleteModel handles the HTTP request to delete a model by its ID +// It validates the model ID, attempts to delete the model through the service, +// and returns appropriate status and messages +// Parameters: +// - c: Gin context for the HTTP request +func (h *ModelHandler) DeleteModel(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start deleting model") + + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Model ID is empty") + c.Error(errors.NewBadRequestError("Model ID cannot be empty")) + return + } + + logger.Infof(ctx, "Deleting model, ID: %s", id) + if err := h.service.DeleteModel(ctx, id); err != nil { + if err == service.ErrModelNotFound { + logger.Warnf(ctx, "Model not found, ID: %s", id) + c.Error(errors.NewNotFoundError("Model not found")) + return + } + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Model deleted successfully, ID: %s", id) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "message": "Model deleted", + }) +} diff --git a/internal/handler/session.go b/internal/handler/session.go new file mode 100644 index 0000000..73a88de --- /dev/null +++ b/internal/handler/session.go @@ -0,0 +1,809 @@ +package handler + +import ( + "context" + "fmt" + "io" + "net/http" + "time" + + "github.com/Tencent/WeKnora/internal/application/service" + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/errors" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/gin-gonic/gin" +) + +// SessionHandler handles all HTTP requests related to conversation sessions +type SessionHandler struct { + messageService interfaces.MessageService // Service for managing messages + sessionService interfaces.SessionService // Service for managing sessions + streamManager interfaces.StreamManager // Manager for handling streaming responses + config *config.Config // Application configuration + testDataService *service.TestDataService // Service for test data (models, etc.) +} + +// NewSessionHandler creates a new instance of SessionHandler with all necessary dependencies +func NewSessionHandler( + sessionService interfaces.SessionService, + messageService interfaces.MessageService, + streamManager interfaces.StreamManager, + config *config.Config, + testDataService *service.TestDataService, +) *SessionHandler { + return &SessionHandler{ + sessionService: sessionService, + messageService: messageService, + streamManager: streamManager, + config: config, + testDataService: testDataService, + } +} + +// SessionStrategy defines the configuration for a conversation session strategy +type SessionStrategy struct { + // Maximum number of conversation rounds to maintain + MaxRounds int `json:"max_rounds"` + // Whether to enable query rewrite for multi-round conversations + EnableRewrite bool `json:"enable_rewrite"` + // Strategy to use when no relevant knowledge is found + FallbackStrategy types.FallbackStrategy `json:"fallback_strategy"` + // Fixed response content for fallback + FallbackResponse string `json:"fallback_response"` + // Number of top results to retrieve from vector search + EmbeddingTopK int `json:"embedding_top_k"` + // Threshold for keyword-based retrieval + KeywordThreshold float64 `json:"keyword_threshold"` + // Threshold for vector-based retrieval + VectorThreshold float64 `json:"vector_threshold"` + // ID of the model used for reranking results + RerankModelID string `json:"rerank_model_id"` + // Number of top results after reranking + RerankTopK int `json:"rerank_top_k"` + // Threshold for reranking results + RerankThreshold float64 `json:"rerank_threshold"` + // ID of the model used for summarization + SummaryModelID string `json:"summary_model_id"` + // Parameters for the summary model + SummaryParameters *types.SummaryConfig `json:"summary_parameters" gorm:"type:json"` + // Prefix for responses when no match is found + NoMatchPrefix string `json:"no_match_prefix"` +} + +// CreateSessionRequest represents a request to create a new session +type CreateSessionRequest struct { + // ID of the associated knowledge base + KnowledgeBaseID string `json:"knowledge_base_id" binding:"required"` + // Session strategy configuration + SessionStrategy *SessionStrategy `json:"session_strategy"` +} + +// CreateSession handles the creation of a new conversation session +func (h *SessionHandler) CreateSession(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start creating session") + + // Parse and validate the request body + var request CreateSessionRequest + if err := c.ShouldBindJSON(&request); err != nil { + logger.Error(ctx, "Failed to validate session creation parameters", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + // Get tenant ID from context + tenantID, exists := c.Get(types.TenantIDContextKey.String()) + if !exists { + logger.Error(ctx, "Failed to get tenant ID") + c.Error(errors.NewUnauthorizedError("Unauthorized")) + return + } + + // Validate knowledge base ID + if request.KnowledgeBaseID == "" { + logger.Error(ctx, "Knowledge base ID is empty") + c.Error(errors.NewBadRequestError("Knowledge base cannot be empty")) + return + } + + logger.Infof( + ctx, + "Processing session creation request, tenant ID: %d, knowledge base ID: %s", + tenantID.(uint), + request.KnowledgeBaseID, + ) + + // Create session object with base properties + createdSession := &types.Session{ + TenantID: tenantID.(uint), + KnowledgeBaseID: request.KnowledgeBaseID, + } + + // If summary model parameters are empty, set defaults + if request.SessionStrategy != nil { + createdSession.RerankModelID = request.SessionStrategy.RerankModelID + createdSession.SummaryModelID = request.SessionStrategy.SummaryModelID + createdSession.MaxRounds = request.SessionStrategy.MaxRounds + createdSession.EnableRewrite = request.SessionStrategy.EnableRewrite + createdSession.FallbackStrategy = request.SessionStrategy.FallbackStrategy + createdSession.FallbackResponse = request.SessionStrategy.FallbackResponse + createdSession.EmbeddingTopK = request.SessionStrategy.EmbeddingTopK + createdSession.KeywordThreshold = request.SessionStrategy.KeywordThreshold + createdSession.VectorThreshold = request.SessionStrategy.VectorThreshold + createdSession.RerankTopK = request.SessionStrategy.RerankTopK + createdSession.RerankThreshold = request.SessionStrategy.RerankThreshold + if request.SessionStrategy.SummaryParameters != nil { + createdSession.SummaryParameters = request.SessionStrategy.SummaryParameters + } else { + createdSession.SummaryParameters = &types.SummaryConfig{ + MaxTokens: h.config.Conversation.Summary.MaxTokens, + TopP: h.config.Conversation.Summary.TopP, + TopK: h.config.Conversation.Summary.TopK, + FrequencyPenalty: h.config.Conversation.Summary.FrequencyPenalty, + PresencePenalty: h.config.Conversation.Summary.PresencePenalty, + RepeatPenalty: h.config.Conversation.Summary.RepeatPenalty, + NoMatchPrefix: h.config.Conversation.Summary.NoMatchPrefix, + Temperature: h.config.Conversation.Summary.Temperature, + Seed: h.config.Conversation.Summary.Seed, + MaxCompletionTokens: h.config.Conversation.Summary.MaxCompletionTokens, + } + } + if createdSession.SummaryParameters.Prompt == "" { + createdSession.SummaryParameters.Prompt = h.config.Conversation.Summary.Prompt + } + if createdSession.SummaryParameters.ContextTemplate == "" { + createdSession.SummaryParameters.ContextTemplate = h.config.Conversation.Summary.ContextTemplate + } + if createdSession.SummaryParameters.NoMatchPrefix == "" { + createdSession.SummaryParameters.NoMatchPrefix = h.config.Conversation.Summary.NoMatchPrefix + } + + logger.Debug(ctx, "Custom session strategy set") + } else { + // Use default configuration from global config + createdSession.MaxRounds = h.config.Conversation.MaxRounds + createdSession.EnableRewrite = h.config.Conversation.EnableRewrite + createdSession.FallbackStrategy = types.FallbackStrategy(h.config.Conversation.FallbackStrategy) + createdSession.FallbackResponse = h.config.Conversation.FallbackResponse + createdSession.EmbeddingTopK = h.config.Conversation.EmbeddingTopK + createdSession.KeywordThreshold = h.config.Conversation.KeywordThreshold + createdSession.VectorThreshold = h.config.Conversation.VectorThreshold + createdSession.RerankThreshold = h.config.Conversation.RerankThreshold + createdSession.RerankTopK = h.config.Conversation.RerankTopK + createdSession.SummaryParameters = &types.SummaryConfig{ + MaxTokens: h.config.Conversation.Summary.MaxTokens, + TopP: h.config.Conversation.Summary.TopP, + TopK: h.config.Conversation.Summary.TopK, + FrequencyPenalty: h.config.Conversation.Summary.FrequencyPenalty, + PresencePenalty: h.config.Conversation.Summary.PresencePenalty, + RepeatPenalty: h.config.Conversation.Summary.RepeatPenalty, + Prompt: h.config.Conversation.Summary.Prompt, + ContextTemplate: h.config.Conversation.Summary.ContextTemplate, + NoMatchPrefix: h.config.Conversation.Summary.NoMatchPrefix, + Temperature: h.config.Conversation.Summary.Temperature, + Seed: h.config.Conversation.Summary.Seed, + MaxCompletionTokens: h.config.Conversation.Summary.MaxCompletionTokens, + } + + logger.Debug(ctx, "Using default session strategy") + } + + // Get model IDs from test data service if not provided + if createdSession.SummaryModelID == "" { + if h.testDataService != nil { + createdSession.SummaryModelID = h.testDataService.LLMModel.GetModelID() + logger.Debug(ctx, "Using summary model ID from test data service") + } else { + logger.Error(ctx, "Summary model ID is empty and cannot get default value") + c.Error(errors.NewBadRequestError("Summary model ID cannot be empty")) + return + } + } + if createdSession.RerankModelID == "" { + if h.testDataService != nil && h.testDataService.RerankModel != nil { + createdSession.RerankModelID = h.testDataService.RerankModel.GetModelID() + logger.Debug(ctx, "Using rerank model ID from test data service") + } + } + + // Call service to create session + logger.Infof(ctx, "Calling session service to create session") + createdSession, err := h.sessionService.CreateSession(ctx, createdSession) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + // Return created session + logger.Infof(ctx, "Session created successfully, ID: %s", createdSession.ID) + c.JSON(http.StatusCreated, gin.H{ + "success": true, + "data": createdSession, + }) +} + +// GetSession retrieves a session by its ID +func (h *SessionHandler) GetSession(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start retrieving session") + + // Get session ID from URL parameter + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Session ID is empty") + c.Error(errors.NewBadRequestError(errors.ErrInvalidSessionID.Error())) + return + } + + // Call service to get session details + logger.Infof(ctx, "Retrieving session, ID: %s", id) + session, err := h.sessionService.GetSession(ctx, id) + if err != nil { + if err == errors.ErrSessionNotFound { + logger.Warnf(ctx, "Session not found, ID: %s", id) + c.Error(errors.NewNotFoundError(err.Error())) + return + } + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + // Return session data + logger.Infof(ctx, "Session retrieved successfully, ID: %s", id) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": session, + }) +} + +// GetSessionsByTenant retrieves all sessions for the current tenant with pagination +func (h *SessionHandler) GetSessionsByTenant(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start retrieving all sessions for tenant") + + // Parse pagination parameters from query + var pagination types.Pagination + if err := c.ShouldBindQuery(&pagination); err != nil { + logger.Error(ctx, "Failed to parse pagination parameters", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + logger.Debugf(ctx, "Using pagination parameters: page=%d, page_size=%d", pagination.Page, pagination.PageSize) + + // Use paginated query to get sessions + result, err := h.sessionService.GetPagedSessionsByTenant(ctx, &pagination) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + // Return sessions with pagination data + logger.Infof(ctx, "Successfully retrieved tenant sessions, total: %d", result.Total) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": result.Data, + "total": result.Total, + "page": result.Page, + "page_size": result.PageSize, + }) +} + +// UpdateSession updates an existing session's properties +func (h *SessionHandler) UpdateSession(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start updating session") + + // Get session ID from URL parameter + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Session ID is empty") + c.Error(errors.NewBadRequestError(errors.ErrInvalidSessionID.Error())) + return + } + + // Verify tenant ID from context for authorization + tenantID, exists := c.Get(types.TenantIDContextKey.String()) + if !exists { + logger.Error(ctx, "Failed to get tenant ID") + c.Error(errors.NewUnauthorizedError("Unauthorized")) + return + } + + // Parse request body to session object + var session types.Session + if err := c.ShouldBindJSON(&session); err != nil { + logger.Error(ctx, "Failed to parse session data", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + // Set session ID and tenant ID + logger.Infof(ctx, "Updating session, ID: %s, tenant ID: %d", id, tenantID.(uint)) + session.ID = id + session.TenantID = tenantID.(uint) + + // Call service to update session + if err := h.sessionService.UpdateSession(ctx, &session); err != nil { + if err == errors.ErrSessionNotFound { + logger.Warnf(ctx, "Session not found, ID: %s", id) + c.Error(errors.NewNotFoundError(err.Error())) + return + } + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + // Return updated session + logger.Infof(ctx, "Session updated successfully, ID: %s", id) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": session, + }) +} + +// DeleteSession deletes a session by its ID +func (h *SessionHandler) DeleteSession(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start deleting session") + + // Get session ID from URL parameter + id := c.Param("id") + if id == "" { + logger.Error(ctx, "Session ID is empty") + c.Error(errors.NewBadRequestError(errors.ErrInvalidSessionID.Error())) + return + } + + // Call service to delete session + logger.Infof(ctx, "Deleting session, ID: %s", id) + if err := h.sessionService.DeleteSession(ctx, id); err != nil { + if err == errors.ErrSessionNotFound { + logger.Warnf(ctx, "Session not found, ID: %s", id) + c.Error(errors.NewNotFoundError(err.Error())) + return + } + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + // Return success message + logger.Infof(ctx, "Session deleted successfully, ID: %s", id) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "message": "Session deleted successfully", + }) +} + +// GenerateTitleRequest defines the request structure for generating a session title +type GenerateTitleRequest struct { + Messages []types.Message `json:"messages" binding:"required"` // Messages to use as context for title generation +} + +// GenerateTitle generates a title for a session based on message content +func (h *SessionHandler) GenerateTitle(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start generating session title") + + // Get session ID from URL parameter + sessionID := c.Param("session_id") + if sessionID == "" { + logger.Error(ctx, "Session ID is empty") + c.Error(errors.NewBadRequestError(errors.ErrInvalidSessionID.Error())) + return + } + + // Parse request body + var request GenerateTitleRequest + if err := c.ShouldBindJSON(&request); err != nil { + logger.Error(ctx, "Failed to parse request data", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + // Call service to generate title + logger.Infof(ctx, "Generating session title, session ID: %s, message count: %d", sessionID, len(request.Messages)) + title, err := h.sessionService.GenerateTitle(ctx, sessionID, request.Messages) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + // Return generated title + logger.Infof(ctx, "Session title generated successfully, session ID: %s, title: %s", sessionID, title) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": title, + }) +} + +// CreateKnowledgeQARequest defines the request structure for knowledge QA +type CreateKnowledgeQARequest struct { + Query string `json:"query" binding:"required"` // Query text for knowledge base search +} + +// SearchKnowledgeRequest defines the request structure for searching knowledge without LLM summarization +type SearchKnowledgeRequest struct { + Query string `json:"query" binding:"required"` // Query text to search for + KnowledgeBaseID string `json:"knowledge_base_id" binding:"required"` // ID of the knowledge base to search +} + +// SearchKnowledge performs knowledge base search without LLM summarization +func (h *SessionHandler) SearchKnowledge(c *gin.Context) { + ctx := logger.CloneContext(c.Request.Context()) + + logger.Info(ctx, "Start processing knowledge search request") + + // Parse request body + var request SearchKnowledgeRequest + if err := c.ShouldBindJSON(&request); err != nil { + logger.Error(ctx, "Failed to parse request data", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + // Validate request parameters + if request.Query == "" { + logger.Error(ctx, "Query content is empty") + c.Error(errors.NewBadRequestError("Query content cannot be empty")) + return + } + + if request.KnowledgeBaseID == "" { + logger.Error(ctx, "Knowledge base ID is empty") + c.Error(errors.NewBadRequestError("Knowledge base ID cannot be empty")) + return + } + + logger.Infof( + ctx, + "Knowledge search request, knowledge base ID: %s, query: %s", + request.KnowledgeBaseID, + request.Query, + ) + + // Directly call knowledge retrieval service without LLM summarization + searchResults, err := h.sessionService.SearchKnowledge(ctx, request.KnowledgeBaseID, request.Query) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + logger.Infof(ctx, "Knowledge search completed, found %d results", len(searchResults)) + + // Return search results + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": searchResults, + }) +} + +// ContinueStream handles continued streaming of an active response stream +func (h *SessionHandler) ContinueStream(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start continuing stream response processing") + + // Get session ID from URL parameter + sessionID := c.Param("session_id") + if sessionID == "" { + logger.Error(ctx, "Session ID is empty") + c.Error(errors.NewBadRequestError(errors.ErrInvalidSessionID.Error())) + return + } + + // Get message ID from query parameter + messageID := c.Query("message_id") + if messageID == "" { + logger.Error(ctx, "Message ID is empty") + c.Error(errors.NewBadRequestError("Missing message ID")) + return + } + + logger.Infof(ctx, "Continuing stream, session ID: %s, message ID: %s", sessionID, messageID) + + // Verify that the session exists and belongs to this tenant + _, err := h.sessionService.GetSession(ctx, sessionID) + if err != nil { + if err == errors.ErrSessionNotFound { + logger.Warnf(ctx, "Session not found, ID: %s", sessionID) + c.Error(errors.NewNotFoundError(err.Error())) + } else { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + } + return + } + + // Get the incomplete message + message, err := h.messageService.GetMessage(ctx, sessionID, messageID) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + if message == nil { + logger.Warnf(ctx, "Incomplete message not found, session ID: %s, message ID: %s", sessionID, messageID) + c.JSON(http.StatusNotFound, gin.H{ + "success": false, + "error": "Incomplete message not found", + }) + return + } + + // Get stream information + streamInfo, err := h.streamManager.GetStream(ctx, sessionID, messageID) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(fmt.Sprintf("Failed to get stream data: %s", err.Error()))) + return + } + + if streamInfo == nil { + logger.Warnf(ctx, "Active stream not found, session ID: %s, message ID: %s", sessionID, messageID) + c.JSON(http.StatusNotFound, gin.H{ + "success": false, + "error": "Active stream not found", + }) + return + } + + // If stream is already completed, return the full message + if streamInfo.IsCompleted { + logger.Infof( + ctx, "Stream already completed, returning directly, session ID: %s, message ID: %s", sessionID, messageID, + ) + c.JSON(http.StatusOK, gin.H{ + "id": message.ID, + "role": message.Role, + "content": message.Content, + "created_at": message.CreatedAt, + "done": true, + }) + return + } + + logger.Infof( + ctx, "Preparing to set SSE headers and send stream data, session ID: %s, message ID: %s", sessionID, messageID, + ) + + // Send knowledge references first if available + if len(streamInfo.KnowledgeReferences) > 0 { + logger.Debug(ctx, "Sending knowledge references") + c.SSEvent("message", &types.StreamResponse{ + ID: message.RequestID, + ResponseType: types.ResponseTypeReferences, + Done: false, + KnowledgeReferences: streamInfo.KnowledgeReferences, + }) + } + + // Send existing content + if streamInfo.Content != "" { + logger.Debug(ctx, "Sending current existing content") + c.SSEvent("message", &types.StreamResponse{ + ID: message.RequestID, + ResponseType: types.ResponseTypeAnswer, + Content: streamInfo.Content, + Done: streamInfo.IsCompleted, + }) + } + + // Create channels to monitor content updates + contentCh := make(chan string, 10) + doneCh := make(chan bool, 1) + + logger.Debug(ctx, "Starting content update monitoring") + + // Start a goroutine to monitor for content updates + go func() { + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + currentContent := streamInfo.Content + + for { + select { + case <-ticker.C: + latestStreamInfo, err := h.streamManager.GetStream(ctx, sessionID, messageID) + if err != nil { + logger.Errorf(ctx, "Failed to get stream data: %v", err) + doneCh <- true + return + } + + if latestStreamInfo == nil { + logger.Debug(ctx, "Stream no longer exists") + doneCh <- true + return + } + + if latestStreamInfo.IsCompleted { + logger.Debug(ctx, "Stream completed") + doneCh <- true + return + } + + // Calculate new content delta + if len(latestStreamInfo.Content) > len(currentContent) { + newContent := latestStreamInfo.Content[len(currentContent):] + contentCh <- newContent + currentContent = latestStreamInfo.Content + logger.Debugf(ctx, "Sending new content: %d bytes", len(newContent)) + } + + case <-c.Request.Context().Done(): + logger.Debug(ctx, "Client connection closed") + return + } + } + }() + + logger.Info(ctx, "Starting stream response") + + // Stream updated content to client + c.Stream(func(w io.Writer) bool { + select { + case <-c.Request.Context().Done(): + logger.Debug(ctx, "Client connection closed") + return false + + case <-doneCh: + logger.Debug(ctx, "Stream completed, sending completion notification") + c.SSEvent("message", &types.StreamResponse{ + ID: message.RequestID, + ResponseType: types.ResponseTypeAnswer, + Content: "", + Done: true, + }) + return false + + case content := <-contentCh: + logger.Debugf(ctx, "Sending content fragment: %d bytes", len(content)) + c.SSEvent("message", &types.StreamResponse{ + ID: message.RequestID, + ResponseType: types.ResponseTypeAnswer, + Content: content, + Done: false, + }) + return true + } + }) +} + +// KnowledgeQA handles knowledge base question answering requests with LLM summarization +func (h *SessionHandler) KnowledgeQA(c *gin.Context) { + ctx := logger.CloneContext(c.Request.Context()) + + logger.Info(ctx, "Start processing knowledge QA request") + + // Get session ID from URL parameter + sessionID := c.Param("session_id") + if sessionID == "" { + logger.Error(ctx, "Session ID is empty") + c.Error(errors.NewBadRequestError(errors.ErrInvalidSessionID.Error())) + return + } + + // Parse request body + var request CreateKnowledgeQARequest + if err := c.ShouldBindJSON(&request); err != nil { + logger.Error(ctx, "Failed to parse request data", err) + c.Error(errors.NewBadRequestError(err.Error())) + return + } + + // Create assistant message + assistantMessage := &types.Message{ + SessionID: sessionID, + Role: "assistant", + RequestID: c.GetString(types.RequestIDContextKey.String()), + IsCompleted: false, + } + defer h.completeAssistantMessage(ctx, assistantMessage) + + // Validate query content + if request.Query == "" { + logger.Error(ctx, "Query content is empty") + c.Error(errors.NewBadRequestError("Query content cannot be empty")) + return + } + + logger.Infof(ctx, "Knowledge QA request, session ID: %s, query: %s", sessionID, request.Query) + + // Create user message + if _, err := h.messageService.CreateMessage(ctx, &types.Message{ + SessionID: sessionID, + Role: "user", + Content: request.Query, + RequestID: c.GetString(types.RequestIDContextKey.String()), + CreatedAt: time.Now(), + IsCompleted: true, + }); err != nil { + c.Error(errors.NewInternalServerError(err.Error())) + return + } + + // Create assistant message (response) + assistantMessage.CreatedAt = time.Now() + if _, err := h.messageService.CreateMessage(ctx, assistantMessage); err != nil { + c.Error(errors.NewInternalServerError(err.Error())) + return + } + logger.Infof(ctx, "Calling knowledge QA service, session ID: %s", sessionID) + + // Call service to perform knowledge QA + searchResults, respCh, err := h.sessionService.KnowledgeQA(ctx, sessionID, request.Query) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError(err.Error())) + return + } + assistantMessage.KnowledgeReferences = searchResults + + // Register new stream with stream manager + requestID := c.GetString(types.RequestIDContextKey.String()) + if err := h.streamManager.RegisterStream(ctx, sessionID, assistantMessage.ID, request.Query); err != nil { + logger.GetLogger(ctx).Error("Register stream failed", "error", err) + } + + // Send knowledge references if available + if len(searchResults) > 0 { + logger.Debugf(ctx, "Sending reference content, total %d", len(searchResults)) + c.SSEvent("message", &types.StreamResponse{ + ID: requestID, + ResponseType: types.ResponseTypeReferences, + KnowledgeReferences: searchResults, + }) + c.Writer.Flush() + } else { + logger.Debug(ctx, "No reference content to send") + } + + // Process streamed response + func() { + defer func() { + // Mark stream as completed when done + if err := h.streamManager.CompleteStream(ctx, sessionID, assistantMessage.ID); err != nil { + logger.GetLogger(ctx).Error("Complete stream failed", "error", err) + } + }() + for response := range respCh { + response.ID = requestID + c.SSEvent("message", response) + c.Writer.Flush() + if response.ResponseType == types.ResponseTypeAnswer { + assistantMessage.Content += response.Content + // Update stream manager with new content + if err := h.streamManager.UpdateStream( + ctx, sessionID, assistantMessage.ID, response.Content, searchResults, + ); err != nil { + logger.GetLogger(ctx).Error("Update stream content failed", "error", err) + } + } + } + }() +} + +// completeAssistantMessage marks an assistant message as complete and updates it +func (h *SessionHandler) completeAssistantMessage(ctx context.Context, assistantMessage *types.Message) { + assistantMessage.UpdatedAt = time.Now() + assistantMessage.IsCompleted = true + _ = h.messageService.UpdateMessage(ctx, assistantMessage) +} diff --git a/internal/handler/tenant.go b/internal/handler/tenant.go new file mode 100644 index 0000000..c862894 --- /dev/null +++ b/internal/handler/tenant.go @@ -0,0 +1,226 @@ +package handler + +import ( + "net/http" + "strconv" + + "github.com/gin-gonic/gin" + + "github.com/Tencent/WeKnora/internal/errors" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// TenantHandler implements HTTP request handlers for tenant management +// Provides functionality for creating, retrieving, updating, and deleting tenants +// through the REST API endpoints +type TenantHandler struct { + service interfaces.TenantService +} + +// NewTenantHandler creates a new tenant handler instance with the provided service +// Parameters: +// - service: An implementation of the TenantService interface for business logic +// +// Returns a pointer to the newly created TenantHandler +func NewTenantHandler(service interfaces.TenantService) *TenantHandler { + return &TenantHandler{ + service: service, + } +} + +// CreateTenant handles the HTTP request for creating a new tenant +// It deserializes the request body into a tenant object, validates it, +// calls the service to create the tenant, and returns the result +// Parameters: +// - c: Gin context for the HTTP request +func (h *TenantHandler) CreateTenant(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start creating tenant") + + var tenantData types.Tenant + if err := c.ShouldBindJSON(&tenantData); err != nil { + logger.Error(ctx, "Failed to parse request parameters", err) + appErr := errors.NewValidationError("Invalid request parameters").WithDetails(err.Error()) + c.Error(appErr) + return + } + + logger.Infof(ctx, "Creating tenant, name: %s", tenantData.Name) + + createdTenant, err := h.service.CreateTenant(ctx, &tenantData) + if err != nil { + // Check if this is an application-specific error + if appErr, ok := errors.IsAppError(err); ok { + logger.Error(ctx, "Failed to create tenant: application error", appErr) + c.Error(appErr) + } else { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError("Failed to create tenant").WithDetails(err.Error())) + } + return + } + + logger.Infof(ctx, "Tenant created successfully, ID: %d, name: %s", createdTenant.ID, createdTenant.Name) + c.JSON(http.StatusCreated, gin.H{ + "success": true, + "data": createdTenant, + }) +} + +// GetTenant handles the HTTP request for retrieving a tenant by ID +// It extracts and validates the tenant ID from the URL parameter, +// retrieves the tenant from the service, and returns it in the response +// Parameters: +// - c: Gin context for the HTTP request +func (h *TenantHandler) GetTenant(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start retrieving tenant") + + id, err := strconv.ParseUint(c.Param("id"), 10, 64) + if err != nil { + logger.Errorf(ctx, "Invalid tenant ID: %s", c.Param("id")) + c.Error(errors.NewBadRequestError("Invalid tenant ID")) + return + } + + logger.Infof(ctx, "Retrieving tenant, ID: %d", id) + + tenant, err := h.service.GetTenantByID(ctx, uint(id)) + if err != nil { + // Check if this is an application-specific error + if appErr, ok := errors.IsAppError(err); ok { + logger.Error(ctx, "Failed to retrieve tenant: application error", appErr) + c.Error(appErr) + } else { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError("Failed to retrieve tenant").WithDetails(err.Error())) + } + return + } + + logger.Infof(ctx, "Retrieved tenant successfully, ID: %d, Name: %s", tenant.ID, tenant.Name) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": tenant, + }) +} + +// UpdateTenant handles the HTTP request for updating an existing tenant +// It extracts the tenant ID from the URL parameter, deserializes the request body, +// validates the data, updates the tenant through the service, and returns the result +// Parameters: +// - c: Gin context for the HTTP request +func (h *TenantHandler) UpdateTenant(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start updating tenant") + + id, err := strconv.ParseUint(c.Param("id"), 10, 64) + if err != nil { + logger.Errorf(ctx, "Invalid tenant ID: %s", c.Param("id")) + c.Error(errors.NewBadRequestError("Invalid tenant ID")) + return + } + + var tenantData types.Tenant + if err := c.ShouldBindJSON(&tenantData); err != nil { + logger.Error(ctx, "Failed to parse request parameters", err) + c.Error(errors.NewValidationError("Invalid request data").WithDetails(err.Error())) + return + } + + logger.Infof(ctx, "Updating tenant, ID: %d, Name: %s", id, tenantData.Name) + + tenantData.ID = uint(id) + updatedTenant, err := h.service.UpdateTenant(ctx, &tenantData) + if err != nil { + // Check if this is an application-specific error + if appErr, ok := errors.IsAppError(err); ok { + logger.Error(ctx, "Failed to update tenant: application error", appErr) + c.Error(appErr) + } else { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError("Failed to update tenant").WithDetails(err.Error())) + } + return + } + + logger.Infof(ctx, "Tenant updated successfully, ID: %d, Name: %s", updatedTenant.ID, updatedTenant.Name) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": updatedTenant, + }) +} + +// DeleteTenant handles the HTTP request for deleting a tenant +// It extracts and validates the tenant ID from the URL parameter, +// calls the service to delete the tenant, and returns the result +// Parameters: +// - c: Gin context for the HTTP request +func (h *TenantHandler) DeleteTenant(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start deleting tenant") + + id, err := strconv.ParseUint(c.Param("id"), 10, 64) + if err != nil { + logger.Errorf(ctx, "Invalid tenant ID: %s", c.Param("id")) + c.Error(errors.NewBadRequestError("Invalid tenant ID")) + return + } + + logger.Infof(ctx, "Deleting tenant, ID: %d", id) + + if err := h.service.DeleteTenant(ctx, uint(id)); err != nil { + // Check if this is an application-specific error + if appErr, ok := errors.IsAppError(err); ok { + logger.Error(ctx, "Failed to delete tenant: application error", appErr) + c.Error(appErr) + } else { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError("Failed to delete tenant").WithDetails(err.Error())) + } + return + } + + logger.Infof(ctx, "Tenant deleted successfully, ID: %d", id) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "message": "Tenant deleted successfully", + }) +} + +// ListTenants handles the HTTP request for retrieving a list of all tenants +// It calls the service to fetch the tenant list and returns it in the response +// Parameters: +// - c: Gin context for the HTTP request +func (h *TenantHandler) ListTenants(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start retrieving tenant list") + + tenants, err := h.service.ListTenants(ctx) + if err != nil { + // Check if this is an application-specific error + if appErr, ok := errors.IsAppError(err); ok { + logger.Error(ctx, "Failed to retrieve tenant list: application error", appErr) + c.Error(appErr) + } else { + logger.ErrorWithFields(ctx, err, nil) + c.Error(errors.NewInternalServerError("Failed to retrieve tenant list").WithDetails(err.Error())) + } + return + } + + logger.Infof(ctx, "Retrieved tenant list successfully, Total: %d tenants", len(tenants)) + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": gin.H{ + "items": tenants, + }, + }) +} diff --git a/internal/handler/test_data.go b/internal/handler/test_data.go new file mode 100644 index 0000000..bdfe12b --- /dev/null +++ b/internal/handler/test_data.go @@ -0,0 +1,108 @@ +package handler + +import ( + "errors" + "net/http" + "os" + "strconv" + + "github.com/gin-gonic/gin" + + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// TestDataHandler handles HTTP requests related to test data operations +// Used for development and testing purposes to provide sample data +type TestDataHandler struct { + config *config.Config + kbService interfaces.KnowledgeBaseService + tenantService interfaces.TenantService +} + +// NewTestDataHandler creates a new instance of the test data handler +// Parameters: +// - config: Application configuration instance +// - kbService: Knowledge base service for accessing knowledge base data +// - tenantService: Tenant service for accessing tenant data +// +// Returns a pointer to the new TestDataHandler instance +func NewTestDataHandler( + config *config.Config, + kbService interfaces.KnowledgeBaseService, + tenantService interfaces.TenantService, +) *TestDataHandler { + return &TestDataHandler{ + config: config, + kbService: kbService, + tenantService: tenantService, + } +} + +// GetTestData handles the HTTP request to retrieve test data for development purposes +// It returns predefined test tenant and knowledge base information +// This endpoint is only available in non-production environments +// Parameters: +// - c: Gin context for the HTTP request +func (h *TestDataHandler) GetTestData(c *gin.Context) { + ctx := c.Request.Context() + + logger.Info(ctx, "Start retrieving test data") + + // Check if we're running in release/production mode + if gin.Mode() == gin.ReleaseMode { + logger.Warn(ctx, "Attempting to retrieve test data in production mode") + c.Error(errors.New("This API is only available in development mode")) + return + } + + // Check if test data environment variables are configured + if os.Getenv("INIT_TEST_TENANT_ID") == "" || os.Getenv("INIT_TEST_KNOWLEDGE_BASE_ID") == "" { + logger.Warn(ctx, "Test data environment variables not set") + c.Error(errors.New("Test data not enabled")) + return + } + + tenantID := os.Getenv("INIT_TEST_TENANT_ID") + logger.Debugf(ctx, "Test tenant ID environment variable: %s", tenantID) + + tenantIDUint, err := strconv.ParseUint(tenantID, 10, 64) + if err != nil { + logger.Errorf(ctx, "Failed to parse tenant ID: %s", tenantID) + c.Error(err) + return + } + + // Retrieve the test tenant data + logger.Infof(ctx, "Retrieving test tenant, ID: %d", tenantIDUint) + tenant, err := h.tenantService.GetTenantByID(ctx, uint(tenantIDUint)) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(err) + return + } + + knowledgeBaseID := os.Getenv("INIT_TEST_KNOWLEDGE_BASE_ID") + logger.Debugf(ctx, "Test knowledge base ID environment variable: %s", knowledgeBaseID) + + // Retrieve the test knowledge base data + logger.Infof(ctx, "Retrieving test knowledge base, ID: %s", knowledgeBaseID) + knowledgeBase, err := h.kbService.GetKnowledgeBaseByID(ctx, knowledgeBaseID) + if err != nil { + logger.ErrorWithFields(ctx, err, nil) + c.Error(err) + return + } + + logger.Info(ctx, "Test data retrieved successfully") + // Return the test data in the response + c.JSON(http.StatusOK, gin.H{ + "data": gin.H{ + "tenant": tenant, + "knowledge_bases": []types.KnowledgeBase{*knowledgeBase}, + }, + "success": true, + }) +} diff --git a/internal/logger/logger.go b/internal/logger/logger.go new file mode 100644 index 0000000..fd9ef9d --- /dev/null +++ b/internal/logger/logger.go @@ -0,0 +1,252 @@ +package logger + +import ( + "context" + "fmt" + "path" + "runtime" + "sort" + "strings" + + "github.com/Tencent/WeKnora/internal/types" + "github.com/sirupsen/logrus" +) + +// LogLevel 日志级别类型 +type LogLevel string + +// 日志级别常量 +const ( + LevelDebug LogLevel = "debug" + LevelInfo LogLevel = "info" + LevelWarn LogLevel = "warn" + LevelError LogLevel = "error" + LevelFatal LogLevel = "fatal" +) + +// ANSI颜色代码 +const ( + colorRed = "\033[31m" + colorGreen = "\033[32m" + colorYellow = "\033[33m" + colorBlue = "\033[34m" + colorPurple = "\033[35m" + colorCyan = "\033[36m" + colorReset = "\033[0m" +) + +type CustomFormatter struct { + ForceColor bool // 是否强制使用颜色,即使在非终端环境下 +} + +func (f *CustomFormatter) Format(entry *logrus.Entry) ([]byte, error) { + timestamp := entry.Time.Format("2006-01-02 15:04:05.000") + level := strings.ToUpper(entry.Level.String()) + + // 根据日志级别设置颜色 + var levelColor, resetColor string + if f.ForceColor { + switch entry.Level { + case logrus.DebugLevel: + levelColor = colorCyan + case logrus.InfoLevel: + levelColor = colorGreen + case logrus.WarnLevel: + levelColor = colorYellow + case logrus.ErrorLevel: + levelColor = colorRed + case logrus.FatalLevel: + levelColor = colorPurple + default: + levelColor = colorReset + } + resetColor = colorReset + } + + // 取出 caller 字段 + caller := "" + if val, ok := entry.Data["caller"]; ok { + caller = fmt.Sprintf("%v", val) + } + + // 拼接字段部分:request_id 优先,其他排序后输出 + fields := "" + + // request_id 优先输出 + if v, ok := entry.Data["request_id"]; ok { + fields += fmt.Sprintf("request_id=%v ", v) + } + + // 其余字段排序后输出 + keys := make([]string, 0, len(entry.Data)) + for k := range entry.Data { + if k != "caller" && k != "request_id" { + keys = append(keys, k) + } + } + sort.Strings(keys) + for _, k := range keys { + fields += fmt.Sprintf("%s=%v ", k, entry.Data[k]) + } + + fields = strings.TrimSpace(fields) + + // 拼接最终输出内容,添加颜色 + return []byte(fmt.Sprintf("%s%-5s%s[%s] [%s] %-20s | %s\n", + levelColor, level, resetColor, timestamp, fields, caller, entry.Message)), nil +} + +// 初始化全局日志设置 +func init() { + // 设置日志格式而不修改全局时区 + logrus.SetFormatter(&CustomFormatter{ForceColor: true}) + logrus.SetReportCaller(false) +} + +// GetLogger 获取日志实例 +func GetLogger(c context.Context) *logrus.Entry { + if logger := c.Value(types.LoggerContextKey); logger != nil { + return logger.(*logrus.Entry) + } + newLogger := logrus.New() + newLogger.SetFormatter(&CustomFormatter{ForceColor: true}) + // 设置默认日志级别 + newLogger.SetLevel(logrus.DebugLevel) + // 启用调用者信息 + return logrus.NewEntry(newLogger) +} + +// SetLogLevel 设置日志级别 +func SetLogLevel(level LogLevel) { + var logLevel logrus.Level + + switch level { + case LevelDebug: + logLevel = logrus.DebugLevel + case LevelInfo: + logLevel = logrus.InfoLevel + case LevelWarn: + logLevel = logrus.WarnLevel + case LevelError: + logLevel = logrus.ErrorLevel + case LevelFatal: + logLevel = logrus.FatalLevel + default: + logLevel = logrus.InfoLevel + } + + logrus.SetLevel(logLevel) +} + +// 添加调用者字段 +func addCaller(entry *logrus.Entry, skip int) *logrus.Entry { + pc, file, line, ok := runtime.Caller(skip) + if !ok { + return entry + } + shortFile := path.Base(file) + funcName := "unknown" + if fn := runtime.FuncForPC(pc); fn != nil { + // 只保留函数名,不带包路径(如 doSomething) + fullName := path.Base(fn.Name()) + parts := strings.Split(fullName, ".") + funcName = parts[len(parts)-1] + } + return entry.WithField("caller", fmt.Sprintf("%s:%d[%s]", shortFile, line, funcName)) +} + +// WithRequestID 在日志中添加请求ID +func WithRequestID(c context.Context, requestID string) context.Context { + return WithField(c, "request_id", requestID) +} + +// WithField 向日志中添加一个字段 +func WithField(c context.Context, key string, value interface{}) context.Context { + logger := GetLogger(c).WithField(key, value) + return context.WithValue(c, types.LoggerContextKey, logger) +} + +// WithFields 向日志中添加多个字段 +func WithFields(c context.Context, fields logrus.Fields) context.Context { + logger := GetLogger(c).WithFields(fields) + return context.WithValue(c, types.LoggerContextKey, logger) +} + +// Debug 输出调试级别的日志 +func Debug(c context.Context, args ...interface{}) { + addCaller(GetLogger(c), 2).Debug(args...) +} + +// Debugf 使用格式化字符串输出调试级别的日志 +func Debugf(c context.Context, format string, args ...interface{}) { + addCaller(GetLogger(c), 2).Debugf(format, args...) +} + +// Info 输出信息级别的日志 +func Info(c context.Context, args ...interface{}) { + addCaller(GetLogger(c), 2).Info(args...) +} + +// Infof 使用格式化字符串输出信息级别的日志 +func Infof(c context.Context, format string, args ...interface{}) { + addCaller(GetLogger(c), 2).Infof(format, args...) +} + +// Warn 输出警告级别的日志 +func Warn(c context.Context, args ...interface{}) { + addCaller(GetLogger(c), 2).Warn(args...) +} + +// Warnf 使用格式化字符串输出警告级别的日志 +func Warnf(c context.Context, format string, args ...interface{}) { + addCaller(GetLogger(c), 2).Warnf(format, args...) +} + +// Error 输出错误级别的日志 +func Error(c context.Context, args ...interface{}) { + addCaller(GetLogger(c), 2).Error(args...) +} + +// Errorf 使用格式化字符串输出错误级别的日志 +func Errorf(c context.Context, format string, args ...interface{}) { + addCaller(GetLogger(c), 2).Errorf(format, args...) +} + +// ErrorWithFields 输出带有额外字段的错误级别日志 +func ErrorWithFields(c context.Context, err error, fields logrus.Fields) { + if fields == nil { + fields = logrus.Fields{} + } + if err != nil { + fields["error"] = err.Error() + } + addCaller(GetLogger(c), 2).WithFields(fields).Error("发生错误") +} + +// Fatal 输出致命级别的日志并退出程序 +func Fatal(c context.Context, args ...interface{}) { + addCaller(GetLogger(c), 2).Fatal(args...) +} + +// Fatalf 使用格式化字符串输出致命级别的日志并退出程序 +func Fatalf(c context.Context, format string, args ...interface{}) { + addCaller(GetLogger(c), 2).Fatalf(format, args...) +} + +// CloneContext 复制上下文中的关键信息到新上下文 +func CloneContext(ctx context.Context) context.Context { + newCtx := context.Background() + + for _, k := range []types.ContextKey{ + types.LoggerContextKey, + types.TenantIDContextKey, + types.RequestIDContextKey, + types.TenantInfoContextKey, + } { + if v := ctx.Value(k); v != nil { + newCtx = context.WithValue(newCtx, k, v) + } + } + + return newCtx +} diff --git a/internal/middleware/auth.go b/internal/middleware/auth.go new file mode 100644 index 0000000..ddd7072 --- /dev/null +++ b/internal/middleware/auth.go @@ -0,0 +1,104 @@ +package middleware + +import ( + "context" + "errors" + "log" + "net/http" + "slices" + + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/gin-gonic/gin" +) + +// 无需认证的API列表 +var noAuthAPI = map[string][]string{ + "/api/v1/test-data": {"GET"}, + "/api/v1/tenants": {"POST"}, +} + +// 检查请求是否在无需认证的API列表中 +func isNoAuthAPI(path string, method string) bool { + for api, methods := range noAuthAPI { + if api == path && slices.Contains(methods, method) { + return true + } + } + return false +} + +// Auth 认证中间件 +func Auth(tenantService interfaces.TenantService, cfg *config.Config) gin.HandlerFunc { + return func(c *gin.Context) { + // ignore OPTIONS request + if c.Request.Method == "OPTIONS" { + c.Next() + return + } + + // 检查请求是否在无需认证的API列表中 + if isNoAuthAPI(c.Request.URL.Path, c.Request.Method) { + c.Next() + return + } + + // Get API Key from request header + apiKey := c.GetHeader("X-API-Key") + if apiKey == "" { + c.JSON(http.StatusUnauthorized, gin.H{"error": "Unauthorized"}) + c.Abort() + return + } + + // Get tenant information + tenantID, err := tenantService.ExtractTenantIDFromAPIKey(apiKey) + if err != nil { + c.JSON(http.StatusUnauthorized, gin.H{ + "error": "Unauthorized: invalid API key format", + }) + c.Abort() + return + } + + // Verify API key validity (matches the one in database) + t, err := tenantService.GetTenantByID(c.Request.Context(), tenantID) + if err != nil { + log.Printf("Error getting tenant by ID: %v, tenantID: %d, apiKey: %s", err, tenantID, apiKey) + c.JSON(http.StatusUnauthorized, gin.H{ + "error": "Unauthorized: invalid API key", + }) + c.Abort() + return + } + + if t == nil || t.APIKey != apiKey { + c.JSON(http.StatusUnauthorized, gin.H{ + "error": "Unauthorized: invalid API key", + }) + c.Abort() + return + } + + // Store tenant ID in context + c.Set(types.TenantIDContextKey.String(), tenantID) + c.Set(types.TenantInfoContextKey.String(), t) + c.Request = c.Request.WithContext( + context.WithValue( + context.WithValue(c.Request.Context(), types.TenantIDContextKey, tenantID), + types.TenantInfoContextKey, t, + ), + ) + c.Next() + } +} + +// GetTenantIDFromContext helper function to get tenant ID from context +func GetTenantIDFromContext(ctx context.Context) (uint, error) { + tenantID, ok := ctx.Value("tenantID").(uint) + if !ok { + return 0, errors.New("tenant ID not found in context") + } + return tenantID, nil +} diff --git a/internal/middleware/error_handler.go b/internal/middleware/error_handler.go new file mode 100644 index 0000000..2379e6a --- /dev/null +++ b/internal/middleware/error_handler.go @@ -0,0 +1,46 @@ +package middleware + +import ( + "net/http" + + "github.com/gin-gonic/gin" + + "github.com/Tencent/WeKnora/internal/errors" +) + +// ErrorHandler 是一个处理应用错误的中间件 +func ErrorHandler() gin.HandlerFunc { + return func(c *gin.Context) { + // 处理请求 + c.Next() + + // 检查是否有错误 + if len(c.Errors) > 0 { + // 获取最后一个错误 + err := c.Errors.Last().Err + + // 检查是否为应用错误 + if appErr, ok := errors.IsAppError(err); ok { + // 返回应用错误 + c.JSON(appErr.HTTPCode, gin.H{ + "success": false, + "error": gin.H{ + "code": appErr.Code, + "message": appErr.Message, + "details": appErr.Details, + }, + }) + return + } + + // 处理其他类型的错误 + c.JSON(http.StatusInternalServerError, gin.H{ + "success": false, + "error": gin.H{ + "code": errors.ErrInternalServer, + "message": "Internal server error", + }, + }) + } + } +} diff --git a/internal/middleware/logger.go b/internal/middleware/logger.go new file mode 100644 index 0000000..346b91e --- /dev/null +++ b/internal/middleware/logger.go @@ -0,0 +1,83 @@ +package middleware + +import ( + "context" + "time" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/types" + "github.com/gin-gonic/gin" + "github.com/google/uuid" +) + +// RequestID middleware adds a unique request ID to the context +func RequestID() gin.HandlerFunc { + return func(c *gin.Context) { + // Get request ID from header or generate a new one + requestID := c.GetHeader("X-Request-ID") + if requestID == "" { + requestID = uuid.New().String() + } + // Set request ID in header + c.Header("X-Request-ID", requestID) + + // Set request ID in context + c.Set(types.RequestIDContextKey.String(), requestID) + + // Set logger in context + requestLogger := logger.GetLogger(c) + requestLogger = requestLogger.WithField("request_id", requestID) + c.Set(types.LoggerContextKey.String(), requestLogger) + + // Set request ID in the global context for logging + c.Request = c.Request.WithContext( + context.WithValue( + context.WithValue(c.Request.Context(), types.RequestIDContextKey, requestID), + types.LoggerContextKey, requestLogger, + ), + ) + + c.Next() + } +} + +// Logger middleware logs request details with request ID +func Logger() gin.HandlerFunc { + return func(c *gin.Context) { + start := time.Now() + path := c.Request.URL.Path + raw := c.Request.URL.RawQuery + + // Process request + c.Next() + + // Get request ID from context + requestID, exists := c.Get(types.RequestIDContextKey.String()) + if !exists { + requestID = "unknown" + } + + // Calculate latency + latency := time.Since(start) + + // Get client IP and status code + clientIP := c.ClientIP() + statusCode := c.Writer.Status() + method := c.Request.Method + + if raw != "" { + path = path + "?" + raw + } + + // Log with request ID + logger.GetLogger(c).Infof("[%s] %d | %3d | %13v | %15s | %s %s", + requestID, + statusCode, + c.Writer.Size(), + latency, + clientIP, + method, + path, + ) + } +} diff --git a/internal/middleware/recovery.go b/internal/middleware/recovery.go new file mode 100644 index 0000000..9b60f13 --- /dev/null +++ b/internal/middleware/recovery.go @@ -0,0 +1,34 @@ +package middleware + +import ( + "fmt" + "log" + "runtime/debug" + + "github.com/gin-gonic/gin" +) + +// Recovery is a middleware that recovers from panics +func Recovery() gin.HandlerFunc { + return func(c *gin.Context) { + defer func() { + if err := recover(); err != nil { + // Get request ID + requestID, _ := c.Get("RequestID") + + // Print stacktrace + stacktrace := debug.Stack() + // Log error + log.Printf("[PANIC] %s | %v | %s", requestID, err, stacktrace) + + // 返回500错误 + c.AbortWithStatusJSON(500, gin.H{ + "error": "Internal Server Error", + "message": fmt.Sprintf("%v", err), + }) + } + }() + + c.Next() + } +} diff --git a/internal/middleware/trace.go b/internal/middleware/trace.go new file mode 100644 index 0000000..e7febfc --- /dev/null +++ b/internal/middleware/trace.go @@ -0,0 +1,124 @@ +package middleware + +import ( + "bytes" + "fmt" + "io" + "strings" + + "github.com/gin-gonic/gin" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + + "github.com/Tencent/WeKnora/internal/tracing" + "github.com/Tencent/WeKnora/internal/types" +) + +// Custom ResponseWriter to capture response content +type responseBodyWriter struct { + gin.ResponseWriter + body *bytes.Buffer +} + +// Override Write method to write response content to buffer and original writer +func (r responseBodyWriter) Write(b []byte) (int, error) { + r.body.Write(b) + return r.ResponseWriter.Write(b) +} + +// TracingMiddleware provides a Gin middleware that creates a trace span for each request +func TracingMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + // Extract trace context from request headers + propagator := tracing.GetTracer() + if propagator == nil { + c.Next() + return + } + + // Get request ID as Span ID + requestID := c.GetString(string(types.RequestIDContextKey)) + if requestID == "" { + requestID = c.GetHeader("X-Request-ID") + } + + // Create new span + spanName := fmt.Sprintf("%s %s", c.Request.Method, c.FullPath()) + ctx, span := tracing.ContextWithSpan(c.Request.Context(), spanName) + defer span.End() + + // Set basic span attributes + span.SetAttributes( + attribute.String("http.method", c.Request.Method), + attribute.String("http.url", c.Request.URL.String()), + attribute.String("http.path", c.FullPath()), + ) + + // Record request headers (optional, or selectively record important headers) + for key, values := range c.Request.Header { + // Skip sensitive or unnecessary headers + if strings.ToLower(key) == "authorization" || strings.ToLower(key) == "cookie" { + continue + } + span.SetAttributes(attribute.String("http.request.header."+key, strings.Join(values, ";"))) + } + + // Record request body (for POST/PUT/PATCH requests) + if c.Request.Method == "POST" || c.Request.Method == "PUT" || c.Request.Method == "PATCH" { + if c.Request.Body != nil { + bodyBytes, _ := io.ReadAll(c.Request.Body) + span.SetAttributes(attribute.String("http.request.body", string(bodyBytes))) + // Reset request body because ReadAll consumes the Reader content + c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) + } + } + + // Record query parameters + if len(c.Request.URL.RawQuery) > 0 { + span.SetAttributes(attribute.String("http.request.query", c.Request.URL.RawQuery)) + } + + // Set request context with span context + c.Request = c.Request.WithContext(ctx) + + // Store tracing context in Gin context + c.Set("trace.span", span) + c.Set("trace.ctx", ctx) + + // Create response body capturer + responseBody := &bytes.Buffer{} + responseWriter := &responseBodyWriter{ + ResponseWriter: c.Writer, + body: responseBody, + } + c.Writer = responseWriter + + // Process request + c.Next() + + // Set response status code + statusCode := c.Writer.Status() + span.SetAttributes(attribute.Int("http.status_code", statusCode)) + + // Record response body + responseContent := responseBody.String() + if len(responseContent) > 0 { + span.SetAttributes(attribute.String("http.response.body", responseContent)) + } + + // Record response headers (optional, or selectively record important headers) + for key, values := range c.Writer.Header() { + span.SetAttributes(attribute.String("http.response.header."+key, strings.Join(values, ";"))) + } + + // Mark as error if status code >= 400 + if statusCode >= 400 { + span.SetStatus(codes.Error, fmt.Sprintf("HTTP %d", statusCode)) + if err := c.Errors.Last(); err != nil { + span.RecordError(err.Err) + } + } else { + span.SetStatus(codes.Ok, "") + } + } +} diff --git a/internal/models/chat/chat.go b/internal/models/chat/chat.go new file mode 100644 index 0000000..04d300d --- /dev/null +++ b/internal/models/chat/chat.go @@ -0,0 +1,72 @@ +package chat + +import ( + "context" + "fmt" + "strings" + + "github.com/Tencent/WeKnora/internal/models/utils/ollama" + "github.com/Tencent/WeKnora/internal/runtime" + "github.com/Tencent/WeKnora/internal/types" +) + +// ChatOptions 聊天选项 +type ChatOptions struct { + Temperature float64 `json:"temperature"` // 温度参数 + TopP float64 `json:"top_p"` // Top P 参数 + Seed int `json:"seed"` // 随机种子 + MaxTokens int `json:"max_tokens"` // 最大 token 数 + MaxCompletionTokens int `json:"max_completion_tokens"` // 最大完成 token 数 + FrequencyPenalty float64 `json:"frequency_penalty"` // 频率惩罚 + PresencePenalty float64 `json:"presence_penalty"` // 存在惩罚 + Thinking *bool `json:"thinking"` // 是否启用思考 +} + +// Message 表示聊天消息 +type Message struct { + Role string `json:"role"` // 角色:system, user, assistant + Content string `json:"content"` // 消息内容 +} + +// Chat 定义了聊天接口 +type Chat interface { + // Chat 进行非流式聊天 + Chat(ctx context.Context, messages []Message, opts *ChatOptions) (*types.ChatResponse, error) + + // ChatStream 进行流式聊天 + ChatStream(ctx context.Context, messages []Message, opts *ChatOptions) (<-chan types.StreamResponse, error) + + // GetModelName 获取模型名称 + GetModelName() string + + // GetModelID 获取模型ID + GetModelID() string +} + +type ChatConfig struct { + Source types.ModelSource + BaseURL string + ModelName string + APIKey string + ModelID string +} + +// NewChat 创建聊天实例 +func NewChat(config *ChatConfig) (Chat, error) { + var chat Chat + var err error + switch strings.ToLower(string(config.Source)) { + case string(types.ModelSourceLocal): + runtime.GetContainer().Invoke(func(ollamaService *ollama.OllamaService) { + chat, err = NewOllamaChat(config, ollamaService) + }) + if err != nil { + return nil, err + } + return chat, nil + case string(types.ModelSourceRemote): + return NewRemoteAPIChat(config) + default: + return nil, fmt.Errorf("unsupported chat model source: %s", config.Source) + } +} diff --git a/internal/models/chat/ollama.go b/internal/models/chat/ollama.go new file mode 100644 index 0000000..5cdfe3b --- /dev/null +++ b/internal/models/chat/ollama.go @@ -0,0 +1,189 @@ +package chat + +import ( + "context" + "fmt" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/utils/ollama" + "github.com/Tencent/WeKnora/internal/types" + ollamaapi "github.com/ollama/ollama/api" +) + +// OllamaChat 实现了基于 Ollama 的聊天 +type OllamaChat struct { + modelName string + modelID string + ollamaService *ollama.OllamaService +} + +// NewOllamaChat 创建 Ollama 聊天实例 +func NewOllamaChat(config *ChatConfig, ollamaService *ollama.OllamaService) (*OllamaChat, error) { + return &OllamaChat{ + modelName: config.ModelName, + modelID: config.ModelID, + ollamaService: ollamaService, + }, nil +} + +// convertMessages 转换消息格式为Ollama API格式 +func (c *OllamaChat) convertMessages(messages []Message) []ollamaapi.Message { + ollamaMessages := make([]ollamaapi.Message, len(messages)) + for i, msg := range messages { + ollamaMessages[i] = ollamaapi.Message{ + Role: msg.Role, + Content: msg.Content, + } + } + return ollamaMessages +} + +// buildChatRequest 构建聊天请求参数 +func (c *OllamaChat) buildChatRequest(messages []Message, opts *ChatOptions, isStream bool) *ollamaapi.ChatRequest { + // 设置流式标志 + streamFlag := isStream + + // 构建请求参数 + chatReq := &ollamaapi.ChatRequest{ + Model: c.modelName, + Messages: c.convertMessages(messages), + Stream: &streamFlag, + Options: make(map[string]interface{}), + } + + // 添加可选参数 + if opts != nil { + if opts.Temperature > 0 { + chatReq.Options["temperature"] = opts.Temperature + } + if opts.TopP > 0 { + chatReq.Options["top_p"] = opts.TopP + } + if opts.MaxTokens > 0 { + chatReq.Options["num_predict"] = opts.MaxTokens + } + if opts.Thinking != nil { + chatReq.Options["think"] = *opts.Thinking + } + } + + return chatReq +} + +// Chat 进行非流式聊天 +func (c *OllamaChat) Chat(ctx context.Context, messages []Message, opts *ChatOptions) (*types.ChatResponse, error) { + // 确保模型可用 + if err := c.ensureModelAvailable(ctx); err != nil { + return nil, err + } + + // 构建请求参数 + chatReq := c.buildChatRequest(messages, opts, false) + + // 记录请求日志 + logger.GetLogger(ctx).Infof("发送聊天请求到模型 %s", c.modelName) + + var responseContent string + var promptTokens, completionTokens int + + // 使用 Ollama 客户端发送请求 + err := c.ollamaService.Chat(ctx, chatReq, func(resp ollamaapi.ChatResponse) error { + responseContent = resp.Message.Content + + // 获取token计数 + if resp.EvalCount > 0 { + promptTokens = resp.PromptEvalCount + completionTokens = resp.EvalCount - promptTokens + } + + return nil + }) + if err != nil { + return nil, fmt.Errorf("聊天请求失败: %w", err) + } + + // 构建响应 + return &types.ChatResponse{ + Content: responseContent, + Usage: struct { + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` + }{ + PromptTokens: promptTokens, + CompletionTokens: completionTokens, + TotalTokens: promptTokens + completionTokens, + }, + }, nil +} + +// ChatStream 进行流式聊天 +func (c *OllamaChat) ChatStream( + ctx context.Context, + messages []Message, + opts *ChatOptions, +) (<-chan types.StreamResponse, error) { + // 确保模型可用 + if err := c.ensureModelAvailable(ctx); err != nil { + return nil, err + } + + // 构建请求参数 + chatReq := c.buildChatRequest(messages, opts, true) + + // 记录请求日志 + logger.GetLogger(ctx).Infof("发送流式聊天请求到模型 %s", c.modelName) + + // 创建流式响应通道 + streamChan := make(chan types.StreamResponse) + + // 启动goroutine处理流式响应 + go func() { + defer close(streamChan) + + err := c.ollamaService.Chat(ctx, chatReq, func(resp ollamaapi.ChatResponse) error { + if resp.Message.Content != "" { + streamChan <- types.StreamResponse{ + ResponseType: types.ResponseTypeAnswer, + Content: resp.Message.Content, + Done: false, + } + } + + if resp.Done { + streamChan <- types.StreamResponse{ + ResponseType: types.ResponseTypeAnswer, + Done: true, + } + } + + return nil + }) + if err != nil { + logger.GetLogger(ctx).Errorf("流式聊天请求失败: %v", err) + // 发送错误响应 + streamChan <- types.StreamResponse{ + ResponseType: types.ResponseTypeAnswer, + Done: true, + } + } + }() + + return streamChan, nil +} + +// 确保模型可用 +func (c *OllamaChat) ensureModelAvailable(ctx context.Context) error { + logger.GetLogger(ctx).Infof("确保模型 %s 可用", c.modelName) + return c.ollamaService.EnsureModelAvailable(ctx, c.modelName) +} + +// GetModelName 获取模型名称 +func (c *OllamaChat) GetModelName() string { + return c.modelName +} + +// GetModelID 获取模型ID +func (c *OllamaChat) GetModelID() string { + return c.modelID +} diff --git a/internal/models/chat/remote_api.go b/internal/models/chat/remote_api.go new file mode 100644 index 0000000..4260f79 --- /dev/null +++ b/internal/models/chat/remote_api.go @@ -0,0 +1,166 @@ +package chat + +import ( + "context" + "fmt" + + "github.com/Tencent/WeKnora/internal/types" + "github.com/sashabaranov/go-openai" +) + +// RemoteAPIChat 实现了基于的聊天 +type RemoteAPIChat struct { + modelName string + client *openai.Client + modelID string +} + +// NewRemoteAPIChat 调用远程API 聊天实例 +func NewRemoteAPIChat(chatConfig *ChatConfig) (*RemoteAPIChat, error) { + apiKey := chatConfig.APIKey + config := openai.DefaultConfig(apiKey) + if baseURL := chatConfig.BaseURL; baseURL != "" { + config.BaseURL = baseURL + } + return &RemoteAPIChat{ + modelName: chatConfig.ModelName, + client: openai.NewClientWithConfig(config), + modelID: chatConfig.ModelID, + }, nil +} + +// convertMessages 转换消息格式为OpenAI格式 +func (c *RemoteAPIChat) convertMessages(messages []Message) []openai.ChatCompletionMessage { + openaiMessages := make([]openai.ChatCompletionMessage, len(messages)) + for i, msg := range messages { + openaiMessages[i] = openai.ChatCompletionMessage{ + Role: msg.Role, + Content: msg.Content, + } + } + return openaiMessages +} + +// buildChatCompletionRequest 构建聊天请求参数 +func (c *RemoteAPIChat) buildChatCompletionRequest(messages []Message, + opts *ChatOptions, isStream bool, +) openai.ChatCompletionRequest { + req := openai.ChatCompletionRequest{ + Model: c.modelName, + Messages: c.convertMessages(messages), + Stream: isStream, + } + + // 添加可选参数 + if opts != nil { + if opts.Temperature > 0 { + req.Temperature = float32(opts.Temperature) + } + if opts.TopP > 0 { + req.TopP = float32(opts.TopP) + } + if opts.MaxTokens > 0 { + req.MaxTokens = opts.MaxTokens + } + if opts.MaxCompletionTokens > 0 { + req.MaxCompletionTokens = opts.MaxCompletionTokens + } + if opts.FrequencyPenalty > 0 { + req.FrequencyPenalty = float32(opts.FrequencyPenalty) + } + if opts.PresencePenalty > 0 { + req.PresencePenalty = float32(opts.PresencePenalty) + } + if opts.Thinking != nil { + req.ChatTemplateKwargs = map[string]any{ + "enable_thinking": *opts.Thinking, + } + } + } + + return req +} + +// Chat 进行非流式聊天 +func (c *RemoteAPIChat) Chat(ctx context.Context, messages []Message, opts *ChatOptions) (*types.ChatResponse, error) { + // 构建请求参数 + req := c.buildChatCompletionRequest(messages, opts, false) + + // 发送请求 + resp, err := c.client.CreateChatCompletion(ctx, req) + if err != nil { + return nil, fmt.Errorf("create chat completion: %w", err) + } + + if len(resp.Choices) == 0 { + return nil, fmt.Errorf("no response from OpenAI") + } + + // 转换响应格式 + return &types.ChatResponse{ + Content: resp.Choices[0].Message.Content, + Usage: struct { + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` + }{ + PromptTokens: resp.Usage.PromptTokens, + CompletionTokens: resp.Usage.CompletionTokens, + TotalTokens: resp.Usage.TotalTokens, + }, + }, nil +} + +// ChatStream 进行流式聊天 +func (c *RemoteAPIChat) ChatStream(ctx context.Context, + messages []Message, opts *ChatOptions, +) (<-chan types.StreamResponse, error) { + // 构建请求参数 + req := c.buildChatCompletionRequest(messages, opts, true) + + // 创建流式响应通道 + streamChan := make(chan types.StreamResponse) + + // 启动流式请求 + stream, err := c.client.CreateChatCompletionStream(ctx, req) + if err != nil { + close(streamChan) + return nil, fmt.Errorf("create chat completion stream: %w", err) + } + + // 在后台处理流式响应 + go func() { + defer close(streamChan) + defer stream.Close() + + for { + response, err := stream.Recv() + if err != nil { + streamChan <- types.StreamResponse{ + ResponseType: types.ResponseTypeAnswer, + Done: true, + } + return + } + if len(response.Choices) > 0 { + streamChan <- types.StreamResponse{ + ResponseType: types.ResponseTypeAnswer, + Content: response.Choices[0].Delta.Content, + Done: false, + } + } + } + }() + + return streamChan, nil +} + +// GetModelName 获取模型名称 +func (c *RemoteAPIChat) GetModelName() string { + return c.modelName +} + +// GetModelID 获取模型ID +func (c *RemoteAPIChat) GetModelID() string { + return c.modelID +} diff --git a/internal/models/embedding/batch.go b/internal/models/embedding/batch.go new file mode 100644 index 0000000..2849173 --- /dev/null +++ b/internal/models/embedding/batch.go @@ -0,0 +1,83 @@ +package embedding + +import ( + "context" + "sync" + + "github.com/Tencent/WeKnora/internal/models/utils" + "github.com/panjf2000/ants/v2" +) + +type batchEmbedder struct { + pool *ants.Pool +} + +func NewBatchEmbedder(pool *ants.Pool) EmbedderPooler { + return &batchEmbedder{pool: pool} +} + +type textEmbedding struct { + text string + results []float32 +} + +func (e *batchEmbedder) BatchEmbedWithPool(ctx context.Context, model Embedder, texts []string) ([][]float32, error) { + // Create goroutine pool for concurrent processing of document chunks + var wg sync.WaitGroup + var mu sync.Mutex // For synchronizing access to error + var firstErr error // Record the first error that occurs + batchSize := 5 + textEmbeddings := utils.MapSlice(texts, func(text string) *textEmbedding { + return &textEmbedding{text: text} + }) + + // Function to process each document chunk + processChunk := func(texts []*textEmbedding) func() { + return func() { + defer wg.Done() + // If an error has already occurred, don't continue processing + if firstErr != nil { + return + } + // Embed text + embedding, err := model.BatchEmbed(ctx, utils.MapSlice(texts, func(text *textEmbedding) string { + return text.text + })) + if err != nil { + mu.Lock() + if firstErr == nil { + firstErr = err + } + mu.Unlock() + return + } + mu.Lock() + for i, text := range texts { + text.results = embedding[i] + } + mu.Unlock() + } + } + + // Submit all tasks to the goroutine pool + for _, texts := range utils.ChunkSlice(textEmbeddings, batchSize) { + wg.Add(1) + err := e.pool.Submit(processChunk(texts)) + if err != nil { + return nil, err + } + } + + // Wait for all tasks to complete + wg.Wait() + + // Check if any errors occurred + if firstErr != nil { + return nil, firstErr + } + + results := utils.MapSlice(textEmbeddings, func(text *textEmbedding) []float32 { + return text.results + }) + return results, nil +} diff --git a/internal/models/embedding/embedder.go b/internal/models/embedding/embedder.go new file mode 100644 index 0000000..ece81c0 --- /dev/null +++ b/internal/models/embedding/embedder.go @@ -0,0 +1,76 @@ +package embedding + +import ( + "context" + "fmt" + "strings" + + "github.com/Tencent/WeKnora/internal/models/utils/ollama" + "github.com/Tencent/WeKnora/internal/runtime" + "github.com/Tencent/WeKnora/internal/types" +) + +// Embedder defines the interface for text vectorization +type Embedder interface { + // Embed converts text to vector + Embed(ctx context.Context, text string) ([]float32, error) + + // BatchEmbed converts multiple texts to vectors in batch + BatchEmbed(ctx context.Context, texts []string) ([][]float32, error) + + // GetModelName returns the model name + GetModelName() string + + // GetDimensions returns the vector dimensions + GetDimensions() int + + // GetModelID returns the model ID + GetModelID() string + + EmbedderPooler +} + +type EmbedderPooler interface { + BatchEmbedWithPool(ctx context.Context, model Embedder, texts []string) ([][]float32, error) +} + +// EmbedderType represents the embedder type +type EmbedderType string + +// Config represents the embedder configuration +type Config struct { + Source types.ModelSource `json:"source"` + BaseURL string `json:"base_url"` + ModelName string `json:"model_name"` + APIKey string `json:"api_key"` + TruncatePromptTokens int `json:"truncate_prompt_tokens"` + Dimensions int `json:"dimensions"` + ModelID string `json:"model_id"` +} + +// NewEmbedder creates an embedder based on the configuration +func NewEmbedder(config Config) (Embedder, error) { + var embedder Embedder + var err error + switch strings.ToLower(string(config.Source)) { + case string(types.ModelSourceLocal): + runtime.GetContainer().Invoke(func(pooler EmbedderPooler, ollamaService *ollama.OllamaService) { + embedder, err = NewOllamaEmbedder(config.BaseURL, + config.ModelName, config.TruncatePromptTokens, config.Dimensions, config.ModelID, pooler, ollamaService) + }) + return embedder, err + case string(types.ModelSourceRemote): + runtime.GetContainer().Invoke(func(pooler EmbedderPooler) { + embedder, err = NewOpenAIEmbedder(config.APIKey, + config.BaseURL, + config.ModelName, + config.TruncatePromptTokens, + config.Dimensions, + config.ModelID, + pooler) + }) + return embedder, err + default: + return nil, fmt.Errorf("unsupported embedder source: %s", config.Source) + } +} diff --git a/internal/models/embedding/ollama.go b/internal/models/embedding/ollama.go new file mode 100644 index 0000000..12e5196 --- /dev/null +++ b/internal/models/embedding/ollama.go @@ -0,0 +1,125 @@ +package embedding + +import ( + "context" + "fmt" + "time" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/Tencent/WeKnora/internal/models/utils/ollama" + ollamaapi "github.com/ollama/ollama/api" +) + +// OllamaEmbedder implements text vectorization functionality using Ollama +type OllamaEmbedder struct { + modelName string + truncatePromptTokens int + ollamaService *ollama.OllamaService + dimensions int + modelID string + EmbedderPooler +} + +// OllamaEmbedRequest represents an Ollama embedding request +type OllamaEmbedRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt"` + TruncatePromptTokens int `json:"truncate_prompt_tokens"` +} + +// OllamaEmbedResponse represents an Ollama embedding response +type OllamaEmbedResponse struct { + Embedding []float32 `json:"embedding"` +} + +// NewOllamaEmbedder creates a new Ollama embedder +func NewOllamaEmbedder(baseURL, + modelName string, + truncatePromptTokens int, + dimensions int, + modelID string, + pooler EmbedderPooler, + ollamaService *ollama.OllamaService, +) (*OllamaEmbedder, error) { + if modelName == "" { + modelName = "nomic-embed-text" + } + + if truncatePromptTokens == 0 { + truncatePromptTokens = 511 + } + + return &OllamaEmbedder{ + modelName: modelName, + truncatePromptTokens: truncatePromptTokens, + ollamaService: ollamaService, + EmbedderPooler: pooler, + dimensions: dimensions, + modelID: modelID, + }, nil +} + +// ensureModelAvailable ensures that the model is available +func (e *OllamaEmbedder) ensureModelAvailable(ctx context.Context) error { + logger.GetLogger(ctx).Infof("Ensuring model %s is available", e.modelName) + return e.ollamaService.EnsureModelAvailable(ctx, e.modelName) +} + +// Embed converts text to vector +func (e *OllamaEmbedder) Embed(ctx context.Context, text string) ([]float32, error) { + embedding, err := e.BatchEmbed(ctx, []string{text}) + if err != nil { + return nil, fmt.Errorf("failed to embed text: %w", err) + } + + if len(embedding) == 0 { + return nil, fmt.Errorf("failed to embed text: %w", err) + } + + return embedding[0], nil +} + +// BatchEmbed converts multiple texts to vectors in batch +func (e *OllamaEmbedder) BatchEmbed(ctx context.Context, texts []string) ([][]float32, error) { + // Ensure model is available + if err := e.ensureModelAvailable(ctx); err != nil { + return nil, err + } + + // Create request + req := &ollamaapi.EmbedRequest{ + Model: e.modelName, + Input: texts, + Options: make(map[string]interface{}), + } + + // Set truncation parameters + if e.truncatePromptTokens > 0 { + req.Options["truncate"] = e.truncatePromptTokens + } + + // Send request + startTime := time.Now() + resp, err := e.ollamaService.Embeddings(ctx, req) + if err != nil { + return nil, fmt.Errorf("failed to get embedding vectors: %w", err) + } + + logger.GetLogger(ctx).Debugf("Embedding vector retrieval took: %v", time.Since(startTime)) + return resp.Embeddings, nil +} + +// GetModelName returns the model name +func (e *OllamaEmbedder) GetModelName() string { + return e.modelName +} + +// GetDimensions returns the vector dimensions +func (e *OllamaEmbedder) GetDimensions() int { + return e.dimensions +} + +// GetModelID returns the model ID +func (e *OllamaEmbedder) GetModelID() string { + return e.modelID +} diff --git a/internal/models/embedding/openai.go b/internal/models/embedding/openai.go new file mode 100644 index 0000000..ef987d4 --- /dev/null +++ b/internal/models/embedding/openai.go @@ -0,0 +1,198 @@ +package embedding + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + "github.com/Tencent/WeKnora/internal/logger" +) + +// OpenAIEmbedder implements text vectorization functionality using OpenAI API +type OpenAIEmbedder struct { + apiKey string + baseURL string + modelName string + truncatePromptTokens int + dimensions int + modelID string + httpClient *http.Client + timeout time.Duration + maxRetries int + EmbedderPooler +} + +// OpenAIEmbedRequest represents an OpenAI embedding request +type OpenAIEmbedRequest struct { + Model string `json:"model"` + Input []string `json:"input"` + TruncatePromptTokens int `json:"truncate_prompt_tokens"` +} + +// OpenAIEmbedResponse represents an OpenAI embedding response +type OpenAIEmbedResponse struct { + Data []struct { + Embedding []float32 `json:"embedding"` + Index int `json:"index"` + } `json:"data"` +} + +// NewOpenAIEmbedder creates a new OpenAI embedder +func NewOpenAIEmbedder(apiKey, baseURL, modelName string, + truncatePromptTokens int, dimensions int, modelID string, pooler EmbedderPooler, +) (*OpenAIEmbedder, error) { + if baseURL == "" { + baseURL = "https://api.openai.com/v1" + } + + if modelName == "" { + return nil, fmt.Errorf("model name is required") + } + + if truncatePromptTokens == 0 { + truncatePromptTokens = 511 + } + + timeout := 60 * time.Second + + // Create HTTP client + client := &http.Client{ + Timeout: timeout, + } + + return &OpenAIEmbedder{ + apiKey: apiKey, + baseURL: baseURL, + modelName: modelName, + httpClient: client, + truncatePromptTokens: truncatePromptTokens, + EmbedderPooler: pooler, + dimensions: dimensions, + modelID: modelID, + timeout: timeout, + maxRetries: 3, // Maximum retry count + }, nil +} + +// Embed converts text to vector +func (e *OpenAIEmbedder) Embed(ctx context.Context, text string) ([]float32, error) { + for range 3 { + embeddings, err := e.BatchEmbed(ctx, []string{text}) + if err != nil { + return nil, err + } + if len(embeddings) > 0 { + return embeddings[0], nil + } + } + return nil, fmt.Errorf("no embedding returned") +} + +func (e *OpenAIEmbedder) doRequestWithRetry(ctx context.Context, jsonData []byte) (*http.Response, error) { + var resp *http.Response + var err error + url := e.baseURL + "/embeddings" + + for i := 0; i <= e.maxRetries; i++ { + if i > 0 { + backoffTime := time.Duration(1< 10*time.Second { + backoffTime = 10 * time.Second + } + logger.GetLogger(ctx).Infof("OpenAIEmbedder retrying request (%d/%d), waiting %v", i, e.maxRetries, backoffTime) + + select { + case <-time.After(backoffTime): + case <-ctx.Done(): + return nil, ctx.Err() + } + } + + // Rebuild request each time to ensure Body is valid + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonData)) + if err != nil { + logger.GetLogger(ctx).Errorf("OpenAIEmbedder failed to create request: %v", err) + continue + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer "+e.apiKey) + + resp, err = e.httpClient.Do(req) + if err == nil { + return resp, nil + } + + logger.GetLogger(ctx).Errorf("OpenAIEmbedder request failed (attempt %d/%d): %v", i+1, e.maxRetries+1, err) + } + + return nil, err +} + +func (e *OpenAIEmbedder) BatchEmbed(ctx context.Context, texts []string) ([][]float32, error) { + // Create request body + reqBody := OpenAIEmbedRequest{ + Model: e.modelName, + Input: texts, + TruncatePromptTokens: e.truncatePromptTokens, + } + + jsonData, err := json.Marshal(reqBody) + if err != nil { + logger.GetLogger(ctx).Errorf("OpenAIEmbedder EmbedBatch marshal request error: %v", err) + return nil, fmt.Errorf("marshal request: %w", err) + } + + // Send request (passing jsonData instead of constructing http.Request) + resp, err := e.doRequestWithRetry(ctx, jsonData) + if err != nil { + logger.GetLogger(ctx).Errorf("OpenAIEmbedder EmbedBatch send request error: %v", err) + return nil, fmt.Errorf("send request: %w", err) + } + defer resp.Body.Close() + + // Read response + body, err := io.ReadAll(resp.Body) + if err != nil { + logger.GetLogger(ctx).Errorf("OpenAIEmbedder EmbedBatch read response error: %v", err) + return nil, fmt.Errorf("read response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + logger.GetLogger(ctx).Errorf("OpenAIEmbedder EmbedBatch API error: Http Status %s", resp.Status) + return nil, fmt.Errorf("EmbedBatch API error: Http Status %s", resp.Status) + } + + // Parse response + var response OpenAIEmbedResponse + if err := json.Unmarshal(body, &response); err != nil { + logger.GetLogger(ctx).Errorf("OpenAIEmbedder EmbedBatch unmarshal response error: %v", err) + return nil, fmt.Errorf("unmarshal response: %w", err) + } + + // Extract embedding vectors + embeddings := make([][]float32, 0, len(response.Data)) + for _, data := range response.Data { + embeddings = append(embeddings, data.Embedding) + } + + return embeddings, nil +} + +// GetModelName returns the model name +func (e *OpenAIEmbedder) GetModelName() string { + return e.modelName +} + +// GetDimensions returns the vector dimensions +func (e *OpenAIEmbedder) GetDimensions() int { + return e.dimensions +} + +// GetModelID returns the model ID +func (e *OpenAIEmbedder) GetModelID() string { + return e.modelID +} diff --git a/internal/models/rerank/remote_api.go b/internal/models/rerank/remote_api.go new file mode 100644 index 0000000..f0ad8ea --- /dev/null +++ b/internal/models/rerank/remote_api.go @@ -0,0 +1,122 @@ +package rerank + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + + "github.com/Tencent/WeKnora/internal/logger" +) + +// OpenAIReranker implements a reranking system based on OpenAI models +type OpenAIReranker struct { + modelName string // Name of the model used for reranking + modelID string // Unique identifier of the model + apiKey string // API key for authentication + baseURL string // Base URL for API requests + client *http.Client // HTTP client for making API requests +} + +// RerankRequest represents a request to rerank documents based on relevance to a query +type RerankRequest struct { + Model string `json:"model"` // Model to use for reranking + Query string `json:"query"` // Query text to compare documents against + Documents []string `json:"documents"` // List of document texts to rerank + AdditionalData map[string]interface{} `json:"additional_data"` // Optional additional data for the model + TruncatePromptTokens int `json:"truncate_prompt_tokens"` // Maximum prompt tokens to use +} + +// RerankResponse represents the response from a reranking request +type RerankResponse struct { + ID string `json:"id"` // Request ID + Model string `json:"model"` // Model used for reranking + Usage UsageInfo `json:"usage"` // Token usage information + Results []RankResult `json:"results"` // Ranked results with relevance scores +} + +// UsageInfo contains information about token usage in the API request +type UsageInfo struct { + TotalTokens int `json:"total_tokens"` // Total tokens consumed +} + +// NewOpenAIReranker creates a new instance of OpenAI reranker with the provided configuration +func NewOpenAIReranker(config *RerankerConfig) (*OpenAIReranker, error) { + apiKey := config.APIKey + baseURL := "https://api.openai.com/v1" + if url := config.BaseURL; url != "" { + baseURL = url + } + + return &OpenAIReranker{ + modelName: config.ModelName, + modelID: config.ModelID, + apiKey: apiKey, + baseURL: baseURL, + client: &http.Client{}, + }, nil +} + +// Rerank performs document reranking based on relevance to the query +func (r *OpenAIReranker) Rerank(ctx context.Context, query string, documents []string) ([]RankResult, error) { + // Build the request body + requestBody := &RerankRequest{ + Model: r.modelName, + Query: query, + Documents: documents, + TruncatePromptTokens: 511, + } + + jsonData, err := json.Marshal(requestBody) + if err != nil { + return nil, fmt.Errorf("marshal request body: %w", err) + } + + // Send the request + req, err := http.NewRequestWithContext(ctx, "POST", fmt.Sprintf("%s/rerank", r.baseURL), bytes.NewBuffer(jsonData)) + if err != nil { + return nil, fmt.Errorf("create request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", r.apiKey)) + + // Log the curl equivalent for debugging + logger.GetLogger(ctx).Infof( + "curl -X POST %s/rerank -H \"Content-Type: application/json\" -H \"Authorization: Bearer %s\" -d '%s'", + r.baseURL, r.apiKey, string(jsonData), + ) + + resp, err := r.client.Do(req) + if err != nil { + return nil, fmt.Errorf("do request: %w", err) + } + defer resp.Body.Close() + + // Read the response + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("read response body: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("Rerank API error: Http Status: %s", resp.Status) + } + + var response RerankResponse + if err := json.Unmarshal(body, &response); err != nil { + return nil, fmt.Errorf("unmarshal response: %w", err) + } + return response.Results, nil +} + +// GetModelName returns the name of the reranking model +func (r *OpenAIReranker) GetModelName() string { + return r.modelName +} + +// GetModelID returns the unique identifier of the reranking model +func (r *OpenAIReranker) GetModelID() string { + return r.modelID +} diff --git a/internal/models/rerank/reranker.go b/internal/models/rerank/reranker.go new file mode 100644 index 0000000..f62c66b --- /dev/null +++ b/internal/models/rerank/reranker.go @@ -0,0 +1,49 @@ +package rerank + +import ( + "context" + "fmt" + "strings" + + "github.com/Tencent/WeKnora/internal/types" +) + +// Reranker defines the interface for document reranking +type Reranker interface { + // Rerank reranks documents based on relevance to the query + Rerank(ctx context.Context, query string, documents []string) ([]RankResult, error) + + // GetModelName returns the model name + GetModelName() string + + // GetModelID returns the model ID + GetModelID() string +} + +type RankResult struct { + Index int `json:"index"` + Document DocumentInfo `json:"document"` + RelevanceScore float64 `json:"relevance_score"` +} + +type DocumentInfo struct { + Text string `json:"text"` +} + +type RerankerConfig struct { + APIKey string + BaseURL string + ModelName string + Source types.ModelSource + ModelID string +} + +// NewReranker creates a reranker +func NewReranker(config *RerankerConfig) (Reranker, error) { + switch strings.ToLower(string(config.Source)) { + case string(types.ModelSourceRemote): + return NewOpenAIReranker(config) + default: + return nil, fmt.Errorf("unsupported rerank model source: %s", config.Source) + } +} diff --git a/internal/models/utils/ollama/ollama.go b/internal/models/utils/ollama/ollama.go new file mode 100644 index 0000000..c4fbf9f --- /dev/null +++ b/internal/models/utils/ollama/ollama.go @@ -0,0 +1,311 @@ +package ollama + +import ( + "context" + "fmt" + "net/http" + "net/url" + "os" + "strings" + "sync" + + "github.com/Tencent/WeKnora/internal/logger" + "github.com/ollama/ollama/api" +) + +// OllamaService manages Ollama service +type OllamaService struct { + client *api.Client + baseURL string + mu sync.Mutex + isAvailable bool + isOptional bool // Added: marks if Ollama service is optional +} + +// GetOllamaService gets Ollama service instance (singleton pattern) +func GetOllamaService() (*OllamaService, error) { + // Get Ollama base URL from environment variable, if not set use provided baseURL or default value + logger.GetLogger(context.Background()).Infof("Ollama base URL: %s", os.Getenv("OLLAMA_BASE_URL")) + baseURL := "http://localhost:11434" + envURL := os.Getenv("OLLAMA_BASE_URL") + if envURL != "" { + baseURL = envURL + } + + // Create URL object + parsedURL, err := url.Parse(baseURL) + if err != nil { + return nil, fmt.Errorf("invalid Ollama service URL: %w", err) + } + + // Create official client + client := api.NewClient(parsedURL, http.DefaultClient) + + // Check if Ollama is set as optional + isOptional := false + if os.Getenv("OLLAMA_OPTIONAL") == "true" { + isOptional = true + logger.GetLogger(context.Background()).Info("Ollama service set to optional mode") + } + + service := &OllamaService{ + client: client, + baseURL: baseURL, + isOptional: isOptional, + } + + return service, nil +} + +// StartService checks if Ollama service is available +func (s *OllamaService) StartService(ctx context.Context) error { + s.mu.Lock() + defer s.mu.Unlock() + + // Check if service is available + err := s.client.Heartbeat(ctx) + if err != nil { + logger.GetLogger(ctx).Warnf("Ollama service unavailable: %v", err) + s.isAvailable = false + + // If configured as optional, don't return an error + if s.isOptional { + logger.GetLogger(ctx).Info("Ollama service set as optional, will continue running the application") + return nil + } + + return fmt.Errorf("Ollama service unavailable: %w", err) + } + + s.isAvailable = true + logger.GetLogger(ctx).Info("Ollama service ready") + return nil +} + +// IsAvailable returns whether the service is available +func (s *OllamaService) IsAvailable() bool { + s.mu.Lock() + defer s.mu.Unlock() + return s.isAvailable +} + +// IsModelAvailable checks if a model is available +func (s *OllamaService) IsModelAvailable(ctx context.Context, modelName string) (bool, error) { + // First check if the service is available + if err := s.StartService(ctx); err != nil { + return false, err + } + + // If service is not available but set as optional, return false but no error + if !s.isAvailable && s.isOptional { + return false, nil + } + + // Get model list + listResp, err := s.client.List(ctx) + if err != nil { + return false, fmt.Errorf("failed to get model list: %w", err) + } + + // Check if model is in the list + for _, model := range listResp.Models { + if model.Name == modelName { + return true, nil + } + } + + return false, nil +} + +// PullModel pulls a model +func (s *OllamaService) PullModel(ctx context.Context, modelName string) error { + // First check if the service is available + if err := s.StartService(ctx); err != nil { + return err + } + + // If service is not available but set as optional, return nil without further operations + if !s.isAvailable && s.isOptional { + logger.GetLogger(ctx).Warnf("Ollama service unavailable, unable to pull model %s", modelName) + return nil + } + + // Check if model already exists + available, err := s.IsModelAvailable(ctx, modelName) + if err != nil { + return err + } + if available { + logger.GetLogger(ctx).Infof("Model %s already exists", modelName) + return nil + } + + logger.GetLogger(ctx).Infof("Pulling model %s...", modelName) + + // Use official client to pull model + pullReq := &api.PullRequest{ + Name: modelName, + } + + err = s.client.Pull(ctx, pullReq, func(progress api.ProgressResponse) error { + if progress.Status != "" { + if progress.Total > 0 && progress.Completed > 0 { + percentage := float64(progress.Completed) / float64(progress.Total) * 100 + logger.GetLogger(ctx).Infof("Pull progress: %s (%.2f%%)", + progress.Status, percentage) + } else { + logger.GetLogger(ctx).Infof("Pull status: %s", progress.Status) + } + } + + if progress.Total > 0 && progress.Completed == progress.Total { + logger.GetLogger(ctx).Infof("Model %s pull completed", modelName) + } + + return nil + }) + if err != nil { + return fmt.Errorf("failed to pull model: %w", err) + } + + return nil +} + +// EnsureModelAvailable ensures the model is available, pulls it if not available +func (s *OllamaService) EnsureModelAvailable(ctx context.Context, modelName string) error { + // If service is not available but set as optional, return nil directly + if !s.IsAvailable() && s.isOptional { + logger.GetLogger(ctx).Warnf("Ollama service unavailable, skipping ensuring model %s availability", modelName) + return nil + } + + available, err := s.IsModelAvailable(ctx, modelName) + if err != nil { + if s.isOptional { + logger.GetLogger(ctx).Warnf("Failed to check model %s availability, but Ollama is set as optional", modelName) + return nil + } + return err + } + + if !available { + return s.PullModel(ctx, modelName) + } + + return nil +} + +// GetVersion gets Ollama version +func (s *OllamaService) GetVersion(ctx context.Context) (string, error) { + // If service is not available but set as optional, return empty version info + if !s.IsAvailable() && s.isOptional { + return "unavailable", nil + } + + version, err := s.client.Version(ctx) + if err != nil { + return "", fmt.Errorf("failed to get Ollama version: %w", err) + } + return version, nil +} + +// CreateModel creates a custom model +func (s *OllamaService) CreateModel(ctx context.Context, name, modelfile string) error { + req := &api.CreateRequest{ + Model: name, + Template: modelfile, // Use Template field instead of Modelfile + } + + err := s.client.Create(ctx, req, func(progress api.ProgressResponse) error { + if progress.Status != "" { + logger.GetLogger(ctx).Infof("Model creation status: %s", progress.Status) + } + return nil + }) + if err != nil { + return fmt.Errorf("failed to create model: %w", err) + } + + return nil +} + +// GetModelInfo gets model information +func (s *OllamaService) GetModelInfo(ctx context.Context, modelName string) (*api.ShowResponse, error) { + req := &api.ShowRequest{ + Name: modelName, + } + + resp, err := s.client.Show(ctx, req) + if err != nil { + return nil, fmt.Errorf("failed to get model information: %w", err) + } + + return resp, nil +} + +// ListModels lists all available models +func (s *OllamaService) ListModels(ctx context.Context) ([]string, error) { + listResp, err := s.client.List(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get model list: %w", err) + } + + modelNames := make([]string, len(listResp.Models)) + for i, model := range listResp.Models { + modelNames[i] = model.Name + } + + return modelNames, nil +} + +// DeleteModel deletes a model +func (s *OllamaService) DeleteModel(ctx context.Context, modelName string) error { + req := &api.DeleteRequest{ + Name: modelName, + } + + err := s.client.Delete(ctx, req) + if err != nil { + return fmt.Errorf("failed to delete model: %w", err) + } + + return nil +} + +// IsValidModelName checks if model name is valid +func IsValidModelName(name string) bool { + // Simple check for model name format + return name != "" && !strings.Contains(name, " ") +} + +// Chat uses Ollama chat +func (s *OllamaService) Chat(ctx context.Context, req *api.ChatRequest, fn api.ChatResponseFunc) error { + // First check if service is available + if err := s.StartService(ctx); err != nil { + return err + } + + // Use official client Chat method + return s.client.Chat(ctx, req, fn) +} + +// Embeddings gets text embedding vectors +func (s *OllamaService) Embeddings(ctx context.Context, req *api.EmbedRequest) (*api.EmbedResponse, error) { + // First check if service is available + if err := s.StartService(ctx); err != nil { + return nil, err + } + // Use official client Embed method + return s.client.Embed(ctx, req) +} + +// Generate generates text (used for Rerank) +func (s *OllamaService) Generate(ctx context.Context, req *api.GenerateRequest, fn api.GenerateResponseFunc) error { + // First check if service is available + if err := s.StartService(ctx); err != nil { + return err + } + + // Use official client Generate method + return s.client.Generate(ctx, req, fn) +} diff --git a/internal/models/utils/slices.go b/internal/models/utils/slices.go new file mode 100644 index 0000000..aac713b --- /dev/null +++ b/internal/models/utils/slices.go @@ -0,0 +1,36 @@ +package utils + +// ChunkSlice splits a slice into multiple sub-slices of the specified size +func ChunkSlice[T any](slice []T, chunkSize int) [][]T { + // Handle edge cases + if len(slice) == 0 { + return [][]T{} + } + + if chunkSize <= 0 { + panic("chunkSize must be greater than 0") + } + + // Calculate how many sub-slices are needed + chunks := make([][]T, 0, (len(slice)+chunkSize-1)/chunkSize) + + // Split the slice + for i := 0; i < len(slice); i += chunkSize { + end := i + chunkSize + if end > len(slice) { + end = len(slice) + } + chunks = append(chunks, slice[i:end]) + } + + return chunks +} + +// MapSlice applies a function to each element of a slice and returns a new slice with the results +func MapSlice[A any, B any](in []A, f func(A) B) []B { + out := make([]B, 0, len(in)) + for _, item := range in { + out = append(out, f(item)) + } + return out +} diff --git a/internal/router/router.go b/internal/router/router.go new file mode 100644 index 0000000..1826cd3 --- /dev/null +++ b/internal/router/router.go @@ -0,0 +1,229 @@ +package router + +import ( + "time" + + "github.com/gin-contrib/cors" + "github.com/gin-gonic/gin" + "go.uber.org/dig" + + "github.com/Tencent/WeKnora/internal/config" + "github.com/Tencent/WeKnora/internal/handler" + "github.com/Tencent/WeKnora/internal/middleware" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// RouterParams 路由参数 +type RouterParams struct { + dig.In + + Config *config.Config + KBHandler *handler.KnowledgeBaseHandler + KnowledgeHandler *handler.KnowledgeHandler + TenantHandler *handler.TenantHandler + TenantService interfaces.TenantService + ChunkHandler *handler.ChunkHandler + SessionHandler *handler.SessionHandler + MessageHandler *handler.MessageHandler + TestDataHandler *handler.TestDataHandler + ModelHandler *handler.ModelHandler + EvaluationHandler *handler.EvaluationHandler +} + +// NewRouter 创建新的路由 +func NewRouter(params RouterParams) *gin.Engine { + r := gin.New() + + // CORS 中间件应放在最前面 + r.Use(cors.New(cors.Config{ + AllowOrigins: []string{"*"}, + AllowMethods: []string{"GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"}, + AllowHeaders: []string{"Origin", "Content-Type", "Accept", "Authorization", "X-API-Key", "X-Request-ID"}, + ExposeHeaders: []string{"Content-Length", "Access-Control-Allow-Origin"}, + AllowCredentials: true, + MaxAge: 12 * time.Hour, + })) + + // 其他中间件 + r.Use(middleware.RequestID()) + r.Use(middleware.Logger()) + r.Use(middleware.Recovery()) + r.Use(middleware.ErrorHandler()) + r.Use(middleware.Auth(params.TenantService, params.Config)) + + // 添加OpenTelemetry追踪中间件 + r.Use(middleware.TracingMiddleware()) + + // 健康检查 + r.GET("/health", func(c *gin.Context) { + c.JSON(200, gin.H{"status": "ok"}) + }) + + // 测试数据接口(不需要认证) + r.GET("/api/v1/test-data", params.TestDataHandler.GetTestData) + + // 需要认证的API路由 + v1 := r.Group("/api/v1") + { + RegisterTenantRoutes(v1, params.TenantHandler) + RegisterKnowledgeBaseRoutes(v1, params.KBHandler) + RegisterKnowledgeRoutes(v1, params.KnowledgeHandler) + RegisterChunkRoutes(v1, params.ChunkHandler) + RegisterSessionRoutes(v1, params.SessionHandler) + RegisterChatRoutes(v1, params.SessionHandler) + RegisterMessageRoutes(v1, params.MessageHandler) + RegisterModelRoutes(v1, params.ModelHandler) + RegisterEvaluationRoutes(v1, params.EvaluationHandler) + } + + return r +} + +// RegisterChunkRoutes 注册分块相关的路由 +func RegisterChunkRoutes(r *gin.RouterGroup, handler *handler.ChunkHandler) { + // 分块路由组 + chunks := r.Group("/chunks") + { + // 获取分块列表 + chunks.GET("/:knowledge_id", handler.ListKnowledgeChunks) + // 删除分块 + chunks.DELETE("/:knowledge_id/:id", handler.DeleteChunk) + // 删除知识下的所有分块 + chunks.DELETE("/:knowledge_id", handler.DeleteChunksByKnowledgeID) + // 更新分块信息 + chunks.PUT("/:knowledge_id/:id", handler.UpdateChunk) + } +} + +// RegisterKnowledgeRoutes 注册知识相关的路由 +func RegisterKnowledgeRoutes(r *gin.RouterGroup, handler *handler.KnowledgeHandler) { + // 知识库下的知识路由组 + kb := r.Group("/knowledge-bases/:id/knowledge") + { + // 从文件创建知识 + kb.POST("/file", handler.CreateKnowledgeFromFile) + // 从URL创建知识 + kb.POST("/url", handler.CreateKnowledgeFromURL) + // 获取知识库下的知识列表 + kb.GET("", handler.ListKnowledge) + } + + // 知识路由组 + k := r.Group("/knowledge") + { + // 批量获取知识 + k.GET("/batch", handler.GetKnowledgeBatch) + // 获取知识详情 + k.GET("/:id", handler.GetKnowledge) + // 删除知识 + k.DELETE("/:id", handler.DeleteKnowledge) + // 更新知识 + k.PUT("/:id", handler.UpdateKnowledge) + // 获取知识文件 + k.GET("/:id/download", handler.DownloadKnowledgeFile) + // 更新图像分块信息 + k.PUT("/image/:id/:chunk_id", handler.UpdateImageInfo) + } +} + +// RegisterKnowledgeBaseRoutes 注册知识库相关的路由 +func RegisterKnowledgeBaseRoutes(r *gin.RouterGroup, handler *handler.KnowledgeBaseHandler) { + // 知识库路由组 + kb := r.Group("/knowledge-bases") + { + // 创建知识库 + kb.POST("", handler.CreateKnowledgeBase) + // 获取知识库列表 + kb.GET("", handler.ListKnowledgeBases) + // 获取知识库详情 + kb.GET("/:id", handler.GetKnowledgeBase) + // 更新知识库 + kb.PUT("/:id", handler.UpdateKnowledgeBase) + // 删除知识库 + kb.DELETE("/:id", handler.DeleteKnowledgeBase) + // 混合搜索 + kb.GET("/:id/hybrid-search", handler.HybridSearch) + // 拷贝知识库 + kb.POST("/copy", handler.CopyKnowledgeBase) + } +} + +// RegisterMessageRoutes 注册消息相关的路由 +func RegisterMessageRoutes(r *gin.RouterGroup, handler *handler.MessageHandler) { + // 消息路由组 + messages := r.Group("/messages") + { + // 加载更早的消息,用于向上滚动加载 + messages.GET("/:session_id/load", handler.LoadMessages) + // 删除消息 + messages.DELETE("/:session_id/:id", handler.DeleteMessage) + } +} + +// RegisterSessionRoutes 注册路由 +func RegisterSessionRoutes(r *gin.RouterGroup, handler *handler.SessionHandler) { + sessions := r.Group("/sessions") + { + sessions.POST("", handler.CreateSession) + sessions.GET("/:id", handler.GetSession) + sessions.GET("", handler.GetSessionsByTenant) + sessions.PUT("/:id", handler.UpdateSession) + sessions.DELETE("/:id", handler.DeleteSession) + sessions.POST("/:session_id/generate_title", handler.GenerateTitle) + // 继续接收活跃流 + sessions.GET("/continue-stream/:session_id", handler.ContinueStream) + } +} + +// RegisterChatRoutes 注册路由 +func RegisterChatRoutes(r *gin.RouterGroup, handler *handler.SessionHandler) { + knowledgeChat := r.Group("/knowledge-chat") + { + knowledgeChat.POST("/:session_id", handler.KnowledgeQA) + } + + // 新增知识检索接口,不需要session_id + knowledgeSearch := r.Group("/knowledge-search") + { + knowledgeSearch.POST("", handler.SearchKnowledge) + } +} + +// RegisterTenantRoutes 注册租户相关的路由 +func RegisterTenantRoutes(r *gin.RouterGroup, handler *handler.TenantHandler) { + // 租户路由组 + tenantRoutes := r.Group("/tenants") + { + tenantRoutes.POST("", handler.CreateTenant) + tenantRoutes.GET("/:id", handler.GetTenant) + tenantRoutes.PUT("/:id", handler.UpdateTenant) + tenantRoutes.DELETE("/:id", handler.DeleteTenant) + tenantRoutes.GET("", handler.ListTenants) + } +} + +// RegisterModelRoutes 注册模型相关的路由 +func RegisterModelRoutes(r *gin.RouterGroup, handler *handler.ModelHandler) { + // 模型路由组 + models := r.Group("/models") + { + // 创建模型 + models.POST("", handler.CreateModel) + // 获取模型列表 + models.GET("", handler.ListModels) + // 获取单个模型 + models.GET("/:id", handler.GetModel) + // 更新模型 + models.PUT("/:id", handler.UpdateModel) + // 删除模型 + models.DELETE("/:id", handler.DeleteModel) + } +} + +func RegisterEvaluationRoutes(r *gin.RouterGroup, handler *handler.EvaluationHandler) { + evaluationRoutes := r.Group("/evaluation") + { + evaluationRoutes.POST("/", handler.Evaluation) + evaluationRoutes.GET("/", handler.GetEvaluationResult) + } +} diff --git a/internal/runtime/container.go b/internal/runtime/container.go new file mode 100644 index 0000000..00fceb5 --- /dev/null +++ b/internal/runtime/container.go @@ -0,0 +1,23 @@ +// Package runtime 提供应用程序运行时的依赖注入容器 +// 该包使用 uber 的 dig 库来管理依赖项注入 +package runtime + +import ( + "go.uber.org/dig" +) + +// container 是应用程序的全局依赖注入容器 +// 所有服务和组件都通过它进行注册和解析 +var container *dig.Container + +// init 初始化依赖注入容器 +// 在程序启动时自动调用 +func init() { + container = dig.New() +} + +// GetContainer 返回全局依赖注入容器的引用 +// 供其他包使用以注册或获取服务 +func GetContainer() *dig.Container { + return container +} diff --git a/internal/stream/factory.go b/internal/stream/factory.go new file mode 100644 index 0000000..7205441 --- /dev/null +++ b/internal/stream/factory.go @@ -0,0 +1,36 @@ +package stream + +import ( + "os" + "strconv" + "time" + + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// 流管理器类型 +const ( + TypeMemory = "memory" + TypeRedis = "redis" +) + +// NewStreamManager 创建流管理器 +func NewStreamManager() (interfaces.StreamManager, error) { + switch os.Getenv("STREAM_MANAGER_TYPE") { + case TypeRedis: + db, err := strconv.Atoi(os.Getenv("REDIS_DB")) + if err != nil { + db = 0 + } + ttl := time.Hour // 默认1小时 + return NewRedisStreamManager( + os.Getenv("REDIS_ADDR"), + os.Getenv("REDIS_PASSWORD"), + db, + os.Getenv("REDIS_PREFIX"), + ttl, + ) + default: + return NewMemoryStreamManager(), nil + } +} diff --git a/internal/stream/memory_manager.go b/internal/stream/memory_manager.go new file mode 100644 index 0000000..3210eea --- /dev/null +++ b/internal/stream/memory_manager.go @@ -0,0 +1,123 @@ +package stream + +import ( + "context" + "sync" + "time" + + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" +) + +// 内存流信息 +type memoryStreamInfo struct { + sessionID string + requestID string + query string + content string + knowledgeReferences types.References + lastUpdated time.Time + isCompleted bool +} + +// MemoryStreamManager 基于内存的流管理器实现 +type MemoryStreamManager struct { + // 会话ID -> 请求ID -> 流数据 + activeStreams map[string]map[string]*memoryStreamInfo + mu sync.RWMutex +} + +// NewMemoryStreamManager 创建一个新的内存流管理器 +func NewMemoryStreamManager() *MemoryStreamManager { + return &MemoryStreamManager{ + activeStreams: make(map[string]map[string]*memoryStreamInfo), + } +} + +// RegisterStream 注册一个新的流 +func (m *MemoryStreamManager) RegisterStream(ctx context.Context, sessionID, requestID, query string) error { + m.mu.Lock() + defer m.mu.Unlock() + + info := &memoryStreamInfo{ + sessionID: sessionID, + requestID: requestID, + query: query, + lastUpdated: time.Now(), + } + + if _, exists := m.activeStreams[sessionID]; !exists { + m.activeStreams[sessionID] = make(map[string]*memoryStreamInfo) + } + + m.activeStreams[sessionID][requestID] = info + return nil +} + +// UpdateStream 更新流内容 +func (m *MemoryStreamManager) UpdateStream(ctx context.Context, + sessionID, requestID string, content string, references types.References, +) error { + m.mu.Lock() + defer m.mu.Unlock() + + if sessionMap, exists := m.activeStreams[sessionID]; exists { + if stream, found := sessionMap[requestID]; found { + stream.content += content + if len(references) > 0 { + stream.knowledgeReferences = references + } + stream.lastUpdated = time.Now() + } + } + return nil +} + +// CompleteStream 完成流 +func (m *MemoryStreamManager) CompleteStream(ctx context.Context, sessionID, requestID string) error { + m.mu.Lock() + defer m.mu.Unlock() + + if sessionMap, exists := m.activeStreams[sessionID]; exists { + if stream, found := sessionMap[requestID]; found { + stream.isCompleted = true + // 30s 后删除流 + go func() { + time.Sleep(30 * time.Second) + m.mu.Lock() + defer m.mu.Unlock() + delete(sessionMap, requestID) + if len(sessionMap) == 0 { + delete(m.activeStreams, sessionID) + } + }() + } + } + return nil +} + +// GetStream 获取特定流 +func (m *MemoryStreamManager) GetStream(ctx context.Context, + sessionID, requestID string, +) (*interfaces.StreamInfo, error) { + m.mu.RLock() + defer m.mu.RUnlock() + + if sessionMap, exists := m.activeStreams[sessionID]; exists { + if stream, found := sessionMap[requestID]; found { + return &interfaces.StreamInfo{ + SessionID: stream.sessionID, + RequestID: stream.requestID, + Query: stream.query, + Content: stream.content, + KnowledgeReferences: stream.knowledgeReferences, + LastUpdated: stream.lastUpdated, + IsCompleted: stream.isCompleted, + }, nil + } + } + return nil, nil +} + +// 确保实现了接口 +var _ interfaces.StreamManager = (*MemoryStreamManager)(nil) diff --git a/internal/stream/redis_manager.go b/internal/stream/redis_manager.go new file mode 100644 index 0000000..ec4e138 --- /dev/null +++ b/internal/stream/redis_manager.go @@ -0,0 +1,192 @@ +package stream + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/Tencent/WeKnora/internal/types" + "github.com/Tencent/WeKnora/internal/types/interfaces" + "github.com/redis/go-redis/v9" +) + +// redisStreamInfo Redis存储的流信息 +type redisStreamInfo struct { + SessionID string `json:"session_id"` + RequestID string `json:"request_id"` + Query string `json:"query"` + Content string `json:"content"` + KnowledgeReferences types.References `json:"knowledge_references"` + LastUpdated time.Time `json:"last_updated"` + IsCompleted bool `json:"is_completed"` +} + +// RedisStreamManager 基于Redis的流管理器实现 +type RedisStreamManager struct { + client *redis.Client + ttl time.Duration // 流数据在Redis中的过期时间 + prefix string // Redis键前缀 +} + +// NewRedisStreamManager 创建一个新的Redis流管理器 +func NewRedisStreamManager(redisAddr, redisPassword string, + redisDB int, prefix string, ttl time.Duration, +) (*RedisStreamManager, error) { + client := redis.NewClient(&redis.Options{ + Addr: redisAddr, + Password: redisPassword, + DB: redisDB, + }) + + // 验证连接 + _, err := client.Ping(context.Background()).Result() + if err != nil { + return nil, fmt.Errorf("连接Redis失败: %w", err) + } + + if ttl == 0 { + ttl = 24 * time.Hour // 默认TTL为24小时 + } + + if prefix == "" { + prefix = "stream:" // 默认前缀 + } + + return &RedisStreamManager{ + client: client, + ttl: ttl, + prefix: prefix, + }, nil +} + +// 构建Redis键 +func (r *RedisStreamManager) buildKey(sessionID, requestID string) string { + return fmt.Sprintf("%s:%s:%s", r.prefix, sessionID, requestID) +} + +// RegisterStream 注册一个新的流 +func (r *RedisStreamManager) RegisterStream(ctx context.Context, sessionID, requestID, query string) error { + info := &redisStreamInfo{ + SessionID: sessionID, + RequestID: requestID, + Query: query, + LastUpdated: time.Now(), + } + + data, err := json.Marshal(info) + if err != nil { + return fmt.Errorf("序列化流信息失败: %w", err) + } + + key := r.buildKey(sessionID, requestID) + return r.client.Set(ctx, key, data, r.ttl).Err() +} + +// UpdateStream 更新流内容 +func (r *RedisStreamManager) UpdateStream(ctx context.Context, sessionID, requestID string, content string, references types.References) error { + key := r.buildKey(sessionID, requestID) + + // 获取当前数据 + data, err := r.client.Get(ctx, key).Bytes() + if err != nil { + if err == redis.Nil { + return nil // 键不存在,可能已过期 + } + return fmt.Errorf("获取流数据失败: %w", err) + } + + var info redisStreamInfo + if err := json.Unmarshal(data, &info); err != nil { + return fmt.Errorf("解析流数据失败: %w", err) + } + + // 更新数据 + info.Content += content + if len(references) > 0 { + info.KnowledgeReferences = references + } + info.LastUpdated = time.Now() + + // 保存回Redis + updatedData, err := json.Marshal(info) + if err != nil { + return fmt.Errorf("序列化更新的流信息失败: %w", err) + } + + return r.client.Set(ctx, key, updatedData, r.ttl).Err() +} + +// CompleteStream 完成流 +func (r *RedisStreamManager) CompleteStream(ctx context.Context, sessionID, requestID string) error { + key := r.buildKey(sessionID, requestID) + + // 获取当前数据 + data, err := r.client.Get(ctx, key).Bytes() + if err != nil { + if err == redis.Nil { + return nil // 键不存在,可能已过期 + } + return fmt.Errorf("获取流数据失败: %w", err) + } + + var info redisStreamInfo + if err := json.Unmarshal(data, &info); err != nil { + return fmt.Errorf("解析流数据失败: %w", err) + } + + // 标记为完成 + info.IsCompleted = true + info.LastUpdated = time.Now() + + // 保存回Redis + updatedData, err := json.Marshal(info) + if err != nil { + return fmt.Errorf("序列化更新的流信息失败: %w", err) + } + + // 30s 后删除流 + go func() { + time.Sleep(30 * time.Second) + r.client.Del(ctx, key) + }() + return r.client.Set(ctx, key, updatedData, r.ttl).Err() +} + +// GetStream 获取特定流 +func (r *RedisStreamManager) GetStream(ctx context.Context, sessionID, requestID string) (*interfaces.StreamInfo, error) { + key := r.buildKey(sessionID, requestID) + + // 获取数据 + data, err := r.client.Get(ctx, key).Bytes() + if err != nil { + if err == redis.Nil { + return nil, nil // 键不存在 + } + return nil, fmt.Errorf("获取流数据失败: %w", err) + } + + var info redisStreamInfo + if err := json.Unmarshal(data, &info); err != nil { + return nil, fmt.Errorf("解析流数据失败: %w", err) + } + + // 转换为接口结构 + return &interfaces.StreamInfo{ + SessionID: info.SessionID, + RequestID: info.RequestID, + Query: info.Query, + Content: info.Content, + KnowledgeReferences: info.KnowledgeReferences, + LastUpdated: info.LastUpdated, + IsCompleted: info.IsCompleted, + }, nil +} + +// Close 关闭Redis连接 +func (r *RedisStreamManager) Close() error { + return r.client.Close() +} + +// 确保实现了接口 +var _ interfaces.StreamManager = (*RedisStreamManager)(nil) diff --git a/internal/tracing/init.go b/internal/tracing/init.go new file mode 100644 index 0000000..0d32245 --- /dev/null +++ b/internal/tracing/init.go @@ -0,0 +1,105 @@ +package tracing + +import ( + "context" + "log" + "os" + "time" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/exporters/stdout/stdouttrace" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.24.0" + "go.opentelemetry.io/otel/trace" +) + +const ( + AppName = "WeKnoraApp" +) + +type Tracer struct { + Cleanup func(context.Context) error +} + +var tracer trace.Tracer + +// InitTracer initializes OpenTelemetry tracer +func InitTracer() (*Tracer, error) { + // Create resource description + labels := []attribute.KeyValue{ + semconv.TelemetrySDKLanguageGo, + semconv.ServiceNameKey.String(AppName), + } + res := resource.NewWithAttributes(semconv.SchemaURL, labels...) + var err error + + // First try to create OTLP exporter (can connect to Jaeger, Zipkin, etc.) + var traceExporter sdktrace.SpanExporter + if endpoint := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"); endpoint != "" { + // Use gRPC exporter + client := otlptracegrpc.NewClient( + otlptracegrpc.WithEndpoint(endpoint), + otlptracegrpc.WithInsecure(), + ) + traceExporter, err = otlptrace.New(context.Background(), client) + if err != nil { + return nil, err + } + } else { + // If no OTLP endpoint is set, default to standard output + traceExporter, err = stdouttrace.New() + if err != nil { + return nil, err + } + } + + // Create batch SpanProcessor + bsp := sdktrace.NewBatchSpanProcessor(traceExporter) + + sampler := sdktrace.AlwaysSample() + + // Create and register TracerProvider + tp := sdktrace.NewTracerProvider( + sdktrace.WithSampler(sampler), + sdktrace.WithResource(res), + sdktrace.WithSpanProcessor(bsp), + ) + otel.SetTracerProvider(tp) + + // Set global propagator + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + )) + + // Create Tracer for project use + tracer = tp.Tracer(AppName) + + // Return cleanup function + return &Tracer{ + Cleanup: func(ctx context.Context) error { + ctx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + if err := tp.Shutdown(ctx); err != nil { + log.Printf("Error shutting down tracer provider: %v", err) + return err + } + return nil + }, + }, nil +} + +// GetTracer gets global Tracer +func GetTracer() trace.Tracer { + return tracer +} + +// Create context with span +func ContextWithSpan(ctx context.Context, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) { + return GetTracer().Start(ctx, name, opts...) +} diff --git a/internal/types/chat.go b/internal/types/chat.go new file mode 100644 index 0000000..3229cd3 --- /dev/null +++ b/internal/types/chat.go @@ -0,0 +1,64 @@ +package types + +import ( + "database/sql/driver" + "encoding/json" +) + +// ChatResponse chat response +type ChatResponse struct { + Content string `json:"content"` + // Usage information + Usage struct { + // Prompt tokens + PromptTokens int `json:"prompt_tokens"` + // Completion tokens + CompletionTokens int `json:"completion_tokens"` + // Total tokens + TotalTokens int `json:"total_tokens"` + } `json:"usage"` +} + +// Response type +type ResponseType string + +const ( + // Answer response type + ResponseTypeAnswer ResponseType = "answer" + // References response type + ResponseTypeReferences ResponseType = "references" +) + +// StreamResponse stream response +type StreamResponse struct { + // Unique identifier + ID string `json:"id"` + // Response type + ResponseType ResponseType `json:"response_type"` + // Current fragment content + Content string `json:"content"` + // Whether the response is complete + Done bool `json:"done"` + // Knowledge references + KnowledgeReferences References `json:"knowledge_references"` +} + +// References references +type References []*SearchResult + +// Value implements the driver.Valuer interface, used to convert References to database values +func (c References) Value() (driver.Value, error) { + return json.Marshal(c) +} + +// Scan implements the sql.Scanner interface, used to convert database values to References +func (c *References) Scan(value interface{}) error { + if value == nil { + return nil + } + b, ok := value.([]byte) + if !ok { + return nil + } + return json.Unmarshal(b, c) +} diff --git a/internal/types/chat_manage.go b/internal/types/chat_manage.go new file mode 100644 index 0000000..917cbdb --- /dev/null +++ b/internal/types/chat_manage.go @@ -0,0 +1,114 @@ +package types + +// ChatManage represents the configuration and state for a chat session +// including query processing, search parameters, and model configurations +type ChatManage struct { + SessionID string `json:"session_id"` // Unique identifier for the chat session + Query string `json:"query,omitempty"` // Original user query + ProcessedQuery string `json:"processed_query,omitempty"` // Query after preprocessing + RewriteQuery string `json:"rewrite_query,omitempty"` // Query after rewriting for better retrieval + History []*History `json:"history,omitempty"` // Chat history for context + + KnowledgeBaseID string `json:"knowledge_base_id"` // ID of the knowledge base to search against + VectorThreshold float64 `json:"vector_threshold"` // Minimum score threshold for vector search results + KeywordThreshold float64 `json:"keyword_threshold"` // Minimum score threshold for keyword search results + EmbeddingTopK int `json:"embedding_top_k"` // Number of top results to retrieve from embedding search + VectorDatabase string `json:"vector_database"` // Vector database type/name to use + + RerankModelID string `json:"rerank_model_id"` // Model ID for reranking search results + RerankTopK int `json:"rerank_top_k"` // Number of top results after reranking + RerankThreshold float64 `json:"rerank_threshold"` // Minimum score threshold for reranked results + + ChatModelID string `json:"chat_model_id"` // Model ID for chat completion + SummaryConfig SummaryConfig `json:"summary_config"` // Configuration for summary generation + FallbackStrategy FallbackStrategy `json:"fallback_strategy"` // Strategy when no relevant results are found + FallbackResponse string `json:"fallback_response"` // Default response when fallback occurs + + // Internal fields for pipeline data processing + SearchResult []*SearchResult `json:"-"` // Results from search phase + RerankResult []*SearchResult `json:"-"` // Results after reranking + MergeResult []*SearchResult `json:"-"` // Final merged results after all processing + UserContent string `json:"-"` // Processed user content + ChatResponse *ChatResponse `json:"-"` // Final response from chat model + ResponseChan <-chan StreamResponse `json:"-"` // Channel for streaming responses +} + +// Clone creates a deep copy of the ChatManage object +func (c *ChatManage) Clone() *ChatManage { + return &ChatManage{ + Query: c.Query, + ProcessedQuery: c.ProcessedQuery, + RewriteQuery: c.RewriteQuery, + SessionID: c.SessionID, + KnowledgeBaseID: c.KnowledgeBaseID, + VectorThreshold: c.VectorThreshold, + KeywordThreshold: c.KeywordThreshold, + EmbeddingTopK: c.EmbeddingTopK, + VectorDatabase: c.VectorDatabase, + RerankModelID: c.RerankModelID, + RerankTopK: c.RerankTopK, + RerankThreshold: c.RerankThreshold, + ChatModelID: c.ChatModelID, + SummaryConfig: SummaryConfig{ + MaxTokens: c.SummaryConfig.MaxTokens, + RepeatPenalty: c.SummaryConfig.RepeatPenalty, + TopK: c.SummaryConfig.TopK, + TopP: c.SummaryConfig.TopP, + FrequencyPenalty: c.SummaryConfig.FrequencyPenalty, + PresencePenalty: c.SummaryConfig.PresencePenalty, + Prompt: c.SummaryConfig.Prompt, + ContextTemplate: c.SummaryConfig.ContextTemplate, + NoMatchPrefix: c.SummaryConfig.NoMatchPrefix, + Temperature: c.SummaryConfig.Temperature, + Seed: c.SummaryConfig.Seed, + MaxCompletionTokens: c.SummaryConfig.MaxCompletionTokens, + }, + FallbackStrategy: c.FallbackStrategy, + FallbackResponse: c.FallbackResponse, + } +} + +// EventType represents different stages in the RAG (Retrieval Augmented Generation) pipeline +type EventType string + +const ( + PREPROCESS_QUERY EventType = "preprocess_query" // Query preprocessing stage + REWRITE_QUERY EventType = "rewrite_query" // Query rewriting for better retrieval + CHUNK_SEARCH EventType = "chunk_search" // Search for relevant chunks + CHUNK_RERANK EventType = "chunk_rerank" // Rerank search results + CHUNK_MERGE EventType = "chunk_merge" // Merge similar chunks + INTO_CHAT_MESSAGE EventType = "into_chat_message" // Convert chunks into chat messages + CHAT_COMPLETION EventType = "chat_completion" // Generate chat completion + CHAT_COMPLETION_STREAM EventType = "chat_completion_stream" // Stream chat completion + STREAM_FILTER EventType = "stream_filter" // Filter streaming output + FILTER_TOP_K EventType = "filter_top_k" // Keep only top K results +) + +// Pipline defines the sequence of events for different chat modes +var Pipline = map[string][]EventType{ + "chat": { // Simple chat without retrieval + CHAT_COMPLETION, + }, + "chat_stream": { // Streaming chat without retrieval + CHAT_COMPLETION_STREAM, + STREAM_FILTER, + }, + "rag": { // Retrieval Augmented Generation + CHUNK_SEARCH, + CHUNK_RERANK, + CHUNK_MERGE, + INTO_CHAT_MESSAGE, + CHAT_COMPLETION, + }, + "rag_stream": { // Streaming Retrieval Augmented Generation + REWRITE_QUERY, + PREPROCESS_QUERY, + CHUNK_SEARCH, + CHUNK_RERANK, + CHUNK_MERGE, + FILTER_TOP_K, + INTO_CHAT_MESSAGE, + CHAT_COMPLETION_STREAM, + STREAM_FILTER, + }, +} diff --git a/internal/types/chunk.go b/internal/types/chunk.go new file mode 100644 index 0000000..a8448f6 --- /dev/null +++ b/internal/types/chunk.go @@ -0,0 +1,90 @@ +// Package types defines data structures and types used throughout the system +// These types are shared across different service modules to ensure data consistency +package types + +import ( + "time" + + "gorm.io/gorm" +) + +// ChunkType 定义了不同类型的 Chunk +type ChunkType string + +const ( + // ChunkTypeText 表示普通的文本 Chunk + ChunkTypeText ChunkType = "text" + // ChunkTypeImageOCR 表示图片 OCR 文本的 Chunk + ChunkTypeImageOCR ChunkType = "image_ocr" + // ChunkTypeImageCaption 表示图片描述的 Chunk + ChunkTypeImageCaption ChunkType = "image_caption" + // ChunkTypeSummary 表示摘要类型的 Chunk + ChunkTypeSummary = "summary" + // ChunkTypeEntity 表示实体类型的 Chunk + ChunkTypeEntity ChunkType = "entity" + // ChunkTypeRelationship 表示关系类型的 Chunk + ChunkTypeRelationship ChunkType = "relationship" +) + +// ImageInfo 表示与 Chunk 关联的图片信息 +type ImageInfo struct { + // 图片URL(COS) + URL string `json:"url" gorm:"type:text"` + // 原始图片URL + OriginalURL string `json:"original_url" gorm:"type:text"` + // 图片在文本中的开始位置 + StartPos int `json:"start_pos"` + // 图片在文本中的结束位置 + EndPos int `json:"end_pos"` + // 图片描述 + Caption string `json:"caption"` + // 图片OCR文本 + OCRText string `json:"ocr_text"` +} + +// Chunk represents a document chunk +// Chunks are meaningful text segments extracted from original documents +// and are the basic units of knowledge base retrieval +// Each chunk contains a portion of the original content +// and maintains its positional relationship with the original text +// Chunks can be independently embedded as vectors and retrieved, supporting precise content localization +type Chunk struct { + // Unique identifier of the chunk, using UUID format + ID string `json:"id" gorm:"type:varchar(36);primaryKey"` + // Tenant ID, used for multi-tenant isolation + TenantID uint `json:"tenant_id"` + // ID of the parent knowledge, associated with the Knowledge model + KnowledgeID string `json:"knowledge_id"` + // ID of the knowledge base, for quick location + KnowledgeBaseID string `json:"knowledge_base_id"` + // Actual text content of the chunk + Content string `json:"content"` + // Index position of the chunk in the original document + ChunkIndex int `json:"chunk_index"` + // Whether the chunk is enabled, can be used to temporarily disable certain chunks + IsEnabled bool `json:"is_enabled" gorm:"default:true"` + // Starting character position in the original text + StartAt int `json:"start_at" ` + // Ending character position in the original text + EndAt int `json:"end_at"` + // Previous chunk ID + PreChunkID string `json:"pre_chunk_id"` + // Next chunk ID + NextChunkID string `json:"next_chunk_id"` + // Chunk 类型,用于区分不同类型的 Chunk + ChunkType ChunkType `json:"chunk_type" gorm:"type:varchar(20);default:'text'"` + // 父 Chunk ID,用于关联图片 Chunk 和原始文本 Chunk + ParentChunkID string `json:"parent_chunk_id" gorm:"type:varchar(36);index"` + // 关系 Chunk ID,用于关联关系 Chunk 和原始文本 Chunk + RelationChunks JSON `json:"relation_chunks" gorm:"type:json"` + // 间接关系 Chunk ID,用于关联间接关系 Chunk 和原始文本 Chunk + IndirectRelationChunks JSON `json:"indirect_relation_chunks" gorm:"type:json"` + // 图片信息,存储为 JSON + ImageInfo string `json:"image_info" gorm:"type:text"` + // Chunk creation time + CreatedAt time.Time `json:"created_at"` + // Chunk last update time + UpdatedAt time.Time `json:"updated_at"` + // Soft delete marker, supports data recovery + DeletedAt gorm.DeletedAt `json:"deleted_at" gorm:"index"` +} diff --git a/internal/types/cleanup.go b/internal/types/cleanup.go new file mode 100644 index 0000000..6fcb066 --- /dev/null +++ b/internal/types/cleanup.go @@ -0,0 +1,4 @@ +package types + +// CleanupFunc represents the resource cleanup function +type CleanupFunc func() error diff --git a/internal/types/const.go b/internal/types/const.go new file mode 100644 index 0000000..1588868 --- /dev/null +++ b/internal/types/const.go @@ -0,0 +1,20 @@ +package types + +// ContextKey defines a type for context keys to avoid string collision +type ContextKey string + +const ( + // TenantIDContextKey is the context key for tenant ID + TenantIDContextKey ContextKey = "TenantID" + // TenantInfoContextKey is the context key for tenant information + TenantInfoContextKey ContextKey = "TenantInfo" + // RequestIDContextKey is the context key for request ID + RequestIDContextKey ContextKey = "RequestID" + // LoggerContextKey is the context key for logger + LoggerContextKey ContextKey = "Logger" +) + +// String returns the string representation of the context key +func (c ContextKey) String() string { + return string(c) +} diff --git a/internal/types/dataset.go b/internal/types/dataset.go new file mode 100644 index 0000000..d4fc7df --- /dev/null +++ b/internal/types/dataset.go @@ -0,0 +1,11 @@ +package types + +// QAPair represents a complete QA example with question, related passages and answer +type QAPair struct { + QID int // Question ID + Question string // Question text + PIDs []int // Related passage IDs + Passages []string // Passage texts + AID int // Answer ID + Answer string // Answer text +} diff --git a/internal/types/embedding.go b/internal/types/embedding.go new file mode 100644 index 0000000..b21cd78 --- /dev/null +++ b/internal/types/embedding.go @@ -0,0 +1,33 @@ +package types + +// SourceType represents the type of content source +type SourceType int + +const ( + ChunkSourceType SourceType = iota // Source is a text chunk + PassageSourceType // Source is a passage + SummarySourceType // Source is a summary +) + +// MatchType represents the type of matching algorithm +type MatchType int + +const ( + MatchTypeEmbedding MatchType = iota + MatchTypeKeywords + MatchTypeNearByChunk + MatchTypeHistory + MatchTypeParentChunk // 父Chunk匹配类型 + MatchTypeRelationChunk // 关系Chunk匹配类型 +) + +// IndexInfo contains information about indexed content +type IndexInfo struct { + ID string // Unique identifier + Content string // Content text + SourceID string // ID of the source document + SourceType SourceType // Type of the source + ChunkID string // ID of the text chunk + KnowledgeID string // ID of the knowledge + KnowledgeBaseID string // ID of the knowledge base +} diff --git a/internal/types/errors.go b/internal/types/errors.go new file mode 100644 index 0000000..c9b0dfc --- /dev/null +++ b/internal/types/errors.go @@ -0,0 +1,46 @@ +package types + +import "fmt" + +// StorageQuotaExceededError represents the storage quota exceeded error +type StorageQuotaExceededError struct { + Message string +} + +// Error implements the error interface +func (e *StorageQuotaExceededError) Error() string { + return e.Message +} + +// NewStorageQuotaExceededError creates a storage quota exceeded error +func NewStorageQuotaExceededError() *StorageQuotaExceededError { + return &StorageQuotaExceededError{ + Message: "Storage quota exceeded", + } +} + +// DuplicateKnowledgeError duplicate knowledge error, contains the existing knowledge object +type DuplicateKnowledgeError struct { + Message string + Knowledge *Knowledge +} + +func (e *DuplicateKnowledgeError) Error() string { + return e.Message +} + +// NewDuplicateFileError creates a duplicate file error +func NewDuplicateFileError(knowledge *Knowledge) *DuplicateKnowledgeError { + return &DuplicateKnowledgeError{ + Message: fmt.Sprintf("File already exists: %s", knowledge.FileName), + Knowledge: knowledge, + } +} + +// NewDuplicateURLError creates a duplicate URL error +func NewDuplicateURLError(knowledge *Knowledge) *DuplicateKnowledgeError { + return &DuplicateKnowledgeError{ + Message: fmt.Sprintf("URL already exists: %s", knowledge.Source), + Knowledge: knowledge, + } +} diff --git a/internal/types/evaluation.go b/internal/types/evaluation.go new file mode 100644 index 0000000..5e477c6 --- /dev/null +++ b/internal/types/evaluation.go @@ -0,0 +1,99 @@ +package types + +import ( + "encoding/json" + "time" + + "github.com/yanyiwu/gojieba" +) + +// Jieba is a global instance of Chinese text segmentation tool +var Jieba *gojieba.Jieba = gojieba.NewJieba() + +// EvaluationStatue represents the status of an evaluation task +type EvaluationStatue int + +const ( + EvaluationStatuePending EvaluationStatue = iota // Task is waiting to start + EvaluationStatueRunning // Task is in progress + EvaluationStatueSuccess // Task completed successfully + EvaluationStatueFailed // Task failed +) + +// EvaluationTask contains information about an evaluation task +type EvaluationTask struct { + ID string `json:"id"` // Unique task ID + TenantID uint `json:"tenant_id"` // Tenant/Organization ID + DatasetID string `json:"dataset_id"` // Dataset ID for evaluation + + StartTime time.Time `json:"start_time"` // Task start time + Status EvaluationStatue `json:"status"` // Current task status + ErrMsg string `json:"err_msg,omitempty"` // Error message if failed + + Total int `json:"total,omitempty"` // Total items to evaluate + Finished int `json:"finished,omitempty"` // Completed items count +} + +// EvaluationDetail contains detailed evaluation information +type EvaluationDetail struct { + Task *EvaluationTask `json:"task"` // Evaluation task info + Params *ChatManage `json:"params"` // Evaluation parameters + Metric *MetricResult `json:"metric,omitempty"` // Evaluation metrics +} + +// String returns JSON representation of EvaluationTask +func (e *EvaluationTask) String() string { + b, _ := json.Marshal(e) + return string(b) +} + +// MetricInput contains input data for metric calculation +type MetricInput struct { + RetrievalGT [][]int // Ground truth for retrieval + RetrievalIDs []int // Retrieved IDs + + GeneratedTexts string // Generated text for evaluation + GeneratedGT string // Ground truth text for comparison +} + +// MetricResult contains evaluation metrics +type MetricResult struct { + RetrievalMetrics RetrievalMetrics `json:"retrieval_metrics"` // Retrieval performance metrics + GenerationMetrics GenerationMetrics `json:"generation_metrics"` // Text generation quality metrics +} + +// RetrievalMetrics contains metrics for retrieval evaluation +type RetrievalMetrics struct { + Precision float64 `json:"precision"` // Precision score + Recall float64 `json:"recall"` // Recall score + + NDCG3 float64 `json:"ndcg3"` // Normalized Discounted Cumulative Gain at 3 + NDCG10 float64 `json:"ndcg10"` // Normalized Discounted Cumulative Gain at 10 + MRR float64 `json:"mrr"` // Mean Reciprocal Rank + MAP float64 `json:"map"` // Mean Average Precision +} + +// GenerationMetrics contains metrics for text generation evaluation +type GenerationMetrics struct { + BLEU1 float64 `json:"bleu1"` // BLEU-1 score + BLEU2 float64 `json:"bleu2"` // BLEU-2 score + BLEU4 float64 `json:"bleu4"` // BLEU-4 score + + ROUGE1 float64 `json:"rouge1"` // ROUGE-1 score + ROUGE2 float64 `json:"rouge2"` // ROUGE-2 score + ROUGEL float64 `json:"rougel"` // ROUGE-L score +} + +// EvalState represents different stages of evaluation process +type EvalState int + +const ( + StateBegin EvalState = iota // Evaluation started + StateAfterQaPairs // After loading QA pairs + StateAfterDataset // After processing dataset + StateAfterEmbedding // After generating embeddings + StateAfterVectorSearch // After vector search + StateAfterRerank // After reranking + StateAfterComplete // After completion + StateEnd // Evaluation ended +) diff --git a/internal/types/graph.go b/internal/types/graph.go new file mode 100644 index 0000000..e3f750a --- /dev/null +++ b/internal/types/graph.go @@ -0,0 +1,53 @@ +// Package types defines the core data structures and interfaces used throughout the WeKnora system. +package types + +import "context" + +// Entity represents a node in the knowledge graph extracted from document chunks. +// Each entity corresponds to a meaningful concept, person, place or thing identified in the text. +type Entity struct { + ID string // Unique identifier for the entity + ChunkIDs []string // References to document chunks where this entity appears + Frequency int `json:"-"` // Number of occurrences in the corpus + Degree int `json:"-"` // Number of connections to other entities + Title string `json:"title"` // Display name of the entity + Type string `json:"type"` // Classification of the entity (e.g., person, concept, organization) + Description string `json:"description"` // Brief explanation or context about the entity +} + +// Relationship represents a connection between two entities in the knowledge graph. +// It captures the semantic connection between entities identified in the document chunks. +type Relationship struct { + ID string `json:"-"` // Unique identifier for the relationship + ChunkIDs []string `json:"-"` // References to document chunks where this relationship is established + CombinedDegree int `json:"-"` // Sum of degrees of the connected entities, used for ranking + Weight float64 `json:"-"` // Strength of the relationship based on textual evidence + Source string `json:"source"` // ID of the entity where the relationship starts + Target string `json:"target"` // ID of the entity where the relationship ends + Description string `json:"description"` // Description of how these entities are related + Strength int `json:"strength"` // Normalized measure of relationship importance (1-10) +} + +// GraphBuilder defines the interface for building and querying the knowledge graph. +// It provides methods to construct the graph from document chunks and retrieve related information. +type GraphBuilder interface { + // BuildGraph constructs a knowledge graph from the provided document chunks. + // It extracts entities and relationships, then builds the graph structure. + BuildGraph(ctx context.Context, chunks []*Chunk) error + + // GetRelationChunks retrieves the IDs of chunks directly related to the specified chunk. + // The topK parameter limits the number of results returned, based on relationship strength. + GetRelationChunks(chunkID string, topK int) []string + + // GetIndirectRelationChunks finds chunk IDs that are indirectly connected to the specified chunk. + // These are "second-degree" connections, useful for expanding the context during retrieval. + GetIndirectRelationChunks(chunkID string, topK int) []string + + // GetAllEntities returns all entities currently in the knowledge graph. + // This is primarily used for visualization and diagnostics. + GetAllEntities() []*Entity + + // GetAllRelationships returns all relationships currently in the knowledge graph. + // This is primarily used for visualization and diagnostics. + GetAllRelationships() []*Relationship +} diff --git a/internal/types/interfaces/chunk.go b/internal/types/interfaces/chunk.go new file mode 100644 index 0000000..81ea87b --- /dev/null +++ b/internal/types/interfaces/chunk.go @@ -0,0 +1,62 @@ +package interfaces + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/types" +) + +// ChunkRepository defines the interface for chunk repository operations +type ChunkRepository interface { + // CreateChunks creates chunks + CreateChunks(ctx context.Context, chunks []*types.Chunk) error + // GetChunkByID gets a chunk by id + GetChunkByID(ctx context.Context, tenantID uint, id string) (*types.Chunk, error) + // ListChunksByID lists chunks by ids + ListChunksByID(ctx context.Context, tenantID uint, ids []string) ([]*types.Chunk, error) + // ListChunksByKnowledgeID lists chunks by knowledge id + ListChunksByKnowledgeID(ctx context.Context, tenantID uint, knowledgeID string) ([]*types.Chunk, error) + // ListPagedChunksByKnowledgeID lists paged chunks by knowledge id + ListPagedChunksByKnowledgeID( + ctx context.Context, + tenantID uint, + knowledgeID string, + page *types.Pagination, + chunk_type []types.ChunkType, + ) ([]*types.Chunk, int64, error) + ListChunkByParentID(ctx context.Context, tenantID uint, parentID string) ([]*types.Chunk, error) + // UpdateChunk updates a chunk + UpdateChunk(ctx context.Context, chunk *types.Chunk) error + // DeleteChunk deletes a chunk + DeleteChunk(ctx context.Context, tenantID uint, id string) error + // DeleteChunksByKnowledgeID deletes chunks by knowledge id + DeleteChunksByKnowledgeID(ctx context.Context, tenantID uint, knowledgeID string) error + // DeleteByKnowledgeList deletes all chunks for a knowledge list + DeleteByKnowledgeList(ctx context.Context, tenantID uint, knowledgeIDs []string) error +} + +// ChunkService defines the interface for chunk service operations +type ChunkService interface { + // CreateChunks creates chunks + CreateChunks(ctx context.Context, chunks []*types.Chunk) error + // GetChunkByID gets a chunk by id + GetChunkByID(ctx context.Context, knowledgeID string, id string) (*types.Chunk, error) + // ListChunksByKnowledgeID lists chunks by knowledge id + ListChunksByKnowledgeID(ctx context.Context, knowledgeID string) ([]*types.Chunk, error) + // ListPagedChunksByKnowledgeID lists paged chunks by knowledge id + ListPagedChunksByKnowledgeID( + ctx context.Context, + knowledgeID string, + page *types.Pagination, + ) (*types.PageResult, error) + // UpdateChunk updates a chunk + UpdateChunk(ctx context.Context, chunk *types.Chunk) error + // DeleteChunk deletes a chunk + DeleteChunk(ctx context.Context, id string) error + // DeleteChunksByKnowledgeID deletes chunks by knowledge id + DeleteChunksByKnowledgeID(ctx context.Context, knowledgeID string) error + // DeleteByKnowledgeList deletes all chunks for a knowledge list + DeleteByKnowledgeList(ctx context.Context, ids []string) error + // ListChunkByParentID lists chunks by parent id + ListChunkByParentID(ctx context.Context, tenantID uint, parentID string) ([]*types.Chunk, error) +} diff --git a/internal/types/interfaces/evaluation.go b/internal/types/interfaces/evaluation.go new file mode 100644 index 0000000..3113601 --- /dev/null +++ b/internal/types/interfaces/evaluation.go @@ -0,0 +1,35 @@ +package interfaces + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/types" +) + +// EvaluationService defines operations for evaluation tasks +type EvaluationService interface { + // Evaluation starts a new evaluation task + Evaluation(ctx context.Context, datasetID string, embeddingModelID string, + chatModelID string, rerankModelID string, + ) (*types.EvaluationDetail, error) + // EvaluationResult retrieves evaluation result by task ID + EvaluationResult(ctx context.Context, taskID string) (*types.EvaluationDetail, error) +} + +// Metrics defines interface for computing evaluation metrics +type Metrics interface { + // Compute calculates metric score based on input data + Compute(metricInput *types.MetricInput) float64 +} + +// EvalHook defines interface for evaluation process hooks +type EvalHook interface { + // Handle processes evaluation state change + Handle(ctx context.Context, state types.EvalState, index int, data interface{}) error +} + +// DatasetService defines operations for dataset management +type DatasetService interface { + // GetDatasetByID retrieves QA pairs from dataset by ID + GetDatasetByID(ctx context.Context, datasetID string) ([]*types.QAPair, error) +} diff --git a/internal/types/interfaces/file.go b/internal/types/interfaces/file.go new file mode 100644 index 0000000..307aaea --- /dev/null +++ b/internal/types/interfaces/file.go @@ -0,0 +1,18 @@ +package interfaces + +import ( + "context" + "io" + "mime/multipart" +) + +// FileService is the interface for file services. +// FileService provides methods to save, retrieve, and delete files. +type FileService interface { + // SaveFile saves a file. + SaveFile(ctx context.Context, file *multipart.FileHeader, tenantID uint, knowledgeID string) (string, error) + // GetFile retrieves a file. + GetFile(ctx context.Context, filePath string) (io.ReadCloser, error) + // DeleteFile deletes a file. + DeleteFile(ctx context.Context, filePath string) error +} diff --git a/internal/types/interfaces/knowledge.go b/internal/types/interfaces/knowledge.go new file mode 100644 index 0000000..48c49c4 --- /dev/null +++ b/internal/types/interfaces/knowledge.go @@ -0,0 +1,72 @@ +package interfaces + +import ( + "context" + "io" + "mime/multipart" + + "github.com/Tencent/WeKnora/internal/types" +) + +// KnowledgeService defines the interface for knowledge services. +type KnowledgeService interface { + // CreateKnowledgeFromFile creates knowledge from a file. + CreateKnowledgeFromFile( + ctx context.Context, + kbID string, + file *multipart.FileHeader, + metadata map[string]string, + ) (*types.Knowledge, error) + // CreateKnowledgeFromURL creates knowledge from a URL. + CreateKnowledgeFromURL(ctx context.Context, kbID string, url string) (*types.Knowledge, error) + // CreateKnowledgeFromPassage creates knowledge from text passages. + CreateKnowledgeFromPassage(ctx context.Context, kbID string, passage []string) (*types.Knowledge, error) + // GetKnowledgeByID retrieves knowledge by ID. + GetKnowledgeByID(ctx context.Context, id string) (*types.Knowledge, error) + // GetKnowledgeBatch retrieves a batch of knowledge by IDs. + GetKnowledgeBatch(ctx context.Context, tenantID uint, ids []string) ([]*types.Knowledge, error) + // ListKnowledgeByKnowledgeBaseID lists all knowledge under a knowledge base. + ListKnowledgeByKnowledgeBaseID(ctx context.Context, kbID string) ([]*types.Knowledge, error) + // ListPagedKnowledgeByKnowledgeBaseID lists all knowledge under a knowledge base with pagination. + ListPagedKnowledgeByKnowledgeBaseID( + ctx context.Context, + kbID string, + page *types.Pagination, + ) (*types.PageResult, error) + // DeleteKnowledge deletes knowledge by ID. + DeleteKnowledge(ctx context.Context, id string) error + // GetKnowledgeFile retrieves the file associated with the knowledge. + GetKnowledgeFile(ctx context.Context, id string) (io.ReadCloser, string, error) + // UpdateKnowledge updates knowledge information. + UpdateKnowledge(ctx context.Context, knowledge *types.Knowledge) error + // CloneKnowledgeBase clones knowledge to another knowledge base. + CloneKnowledgeBase(ctx context.Context, srcID, dstID string) error + // UpdateImageInfo updates image information for a knowledge chunk. + UpdateImageInfo(ctx context.Context, knowledgeID string, chunkID string, imageInfo string) error +} + +// KnowledgeRepository defines the interface for knowledge repositories. +type KnowledgeRepository interface { + CreateKnowledge(ctx context.Context, knowledge *types.Knowledge) error + GetKnowledgeByID(ctx context.Context, tenantID uint, id string) (*types.Knowledge, error) + ListKnowledgeByKnowledgeBaseID(ctx context.Context, tenantID uint, kbID string) ([]*types.Knowledge, error) + ListPagedKnowledgeByKnowledgeBaseID(ctx context.Context, + tenantID uint, kbID string, page *types.Pagination, + ) ([]*types.Knowledge, int64, error) + UpdateKnowledge(ctx context.Context, knowledge *types.Knowledge) error + DeleteKnowledge(ctx context.Context, tenantID uint, id string) error + DeleteKnowledgeList(ctx context.Context, tenantID uint, ids []string) error + GetKnowledgeBatch(ctx context.Context, tenantID uint, ids []string) ([]*types.Knowledge, error) + // CheckKnowledgeExists checks if knowledge already exists. + // For file types, check by fileHash or (fileName+fileSize). + // For URL types, check by URL. + // Returns whether it exists, the existing knowledge object (if any), and possible error. + CheckKnowledgeExists( + ctx context.Context, + tenantID uint, + kbID string, + params *types.KnowledgeCheckParams, + ) (bool, *types.Knowledge, error) + // AminusB returns the difference set of A and B. + AminusB(ctx context.Context, Atenant uint, A string, Btenant uint, B string) ([]string, error) +} diff --git a/internal/types/interfaces/knowledgebase.go b/internal/types/interfaces/knowledgebase.go new file mode 100644 index 0000000..38bcf34 --- /dev/null +++ b/internal/types/interfaces/knowledgebase.go @@ -0,0 +1,138 @@ +// Package interfaces defines the interface contracts between different system components +// Through interface definitions, business logic can be decoupled from specific implementations, +// improving code testability and maintainability +// Knowledge base related interfaces are used to manage knowledge base resources and their contents +package interfaces + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/types" +) + +// KnowledgeBaseService defines the knowledge base service interface +// Provides high-level operations for knowledge base creation, querying, updating, deletion, and content searching +type KnowledgeBaseService interface { + // CreateKnowledgeBase creates a new knowledge base + // Parameters: + // - ctx: Context information, carrying request tracking, user identity, etc. + // - kb: Knowledge base object containing basic information + // Returns: + // - Created knowledge base object (including automatically generated ID) + // - Possible errors such as insufficient permissions, duplicate names, etc. + CreateKnowledgeBase(ctx context.Context, kb *types.KnowledgeBase) (*types.KnowledgeBase, error) + + // GetKnowledgeBaseByID retrieves knowledge base information by ID + // Parameters: + // - ctx: Context information + // - id: Unique identifier of the knowledge base + // Returns: + // - Knowledge base object, if found + // - Possible errors such as not existing, insufficient permissions, etc. + GetKnowledgeBaseByID(ctx context.Context, id string) (*types.KnowledgeBase, error) + + // ListKnowledgeBases lists all knowledge bases under the current tenant + // Parameters: + // - ctx: Context information, containing tenant information + // Returns: + // - List of knowledge base objects + // - Possible errors such as insufficient permissions, etc. + ListKnowledgeBases(ctx context.Context) ([]*types.KnowledgeBase, error) + + // UpdateKnowledgeBase updates knowledge base information + // Parameters: + // - ctx: Context information + // - id: Unique identifier of the knowledge base + // - name: New knowledge base name + // - description: New knowledge base description + // - config: Knowledge base configuration, including chunking strategy, vectorization settings, etc. + // Returns: + // - Updated knowledge base object + // - Possible errors such as not existing, insufficient permissions, etc. + UpdateKnowledgeBase(ctx context.Context, + id string, name string, description string, config *types.KnowledgeBaseConfig, + ) (*types.KnowledgeBase, error) + + // DeleteKnowledgeBase deletes a knowledge base + // Parameters: + // - ctx: Context information + // - id: Unique identifier of the knowledge base + // Returns: + // - Possible errors such as not existing, insufficient permissions, etc. + DeleteKnowledgeBase(ctx context.Context, id string) error + + // HybridSearch performs hybrid search (vector + keywords) in the knowledge base + // Parameters: + // - ctx: Context information + // - id: Unique identifier of the knowledge base + // - params: Search parameters, including query text, thresholds, etc. + // Returns: + // - List of search results, sorted by relevance + // - Possible errors such as not existing, insufficient permissions, search engine errors, etc. + HybridSearch(ctx context.Context, id string, params types.SearchParams) ([]*types.SearchResult, error) + + // CopyKnowledgeBase copies a knowledge base + // Parameters: + // - ctx: Context information + // - sourceID: Source knowledge base ID + // - targetID: Target knowledge base ID + // Returns: + // - Copied knowledge base object + // - Possible errors such as not existing, insufficient permissions, etc. + CopyKnowledgeBase(ctx context.Context, src string, dst string) (*types.KnowledgeBase, *types.KnowledgeBase, error) +} + +// KnowledgeBaseRepository defines the knowledge base repository interface +// Responsible for knowledge base data persistence and retrieval, +// serving as a bridge between the service layer and data storage +type KnowledgeBaseRepository interface { + // CreateKnowledgeBase creates a knowledge base record + // Parameters: + // - ctx: Context information + // - kb: Knowledge base object + // Returns: + // - Possible errors such as database connection failure, unique constraint conflicts, etc. + CreateKnowledgeBase(ctx context.Context, kb *types.KnowledgeBase) error + + // GetKnowledgeBaseByID queries a knowledge base by ID + // Parameters: + // - ctx: Context information + // - id: Knowledge base ID + // Returns: + // - Knowledge base object, if found + // - Possible errors such as record not existing, database errors, etc. + GetKnowledgeBaseByID(ctx context.Context, id string) (*types.KnowledgeBase, error) + + // ListKnowledgeBases lists all knowledge bases in the system + // Parameters: + // - ctx: Context information + // Returns: + // - List of knowledge base objects + // - Possible errors such as database errors, etc. + ListKnowledgeBases(ctx context.Context) ([]*types.KnowledgeBase, error) + + // ListKnowledgeBasesByTenantID lists all knowledge bases for a specific tenant + // Parameters: + // - ctx: Context information + // - tenantID: Tenant ID + // Returns: + // - List of knowledge base objects + // - Possible errors such as database errors, etc. + ListKnowledgeBasesByTenantID(ctx context.Context, tenantID uint) ([]*types.KnowledgeBase, error) + + // UpdateKnowledgeBase updates a knowledge base record + // Parameters: + // - ctx: Context information + // - kb: Knowledge base object containing update information + // Returns: + // - Possible errors such as record not existing, database errors, etc. + UpdateKnowledgeBase(ctx context.Context, kb *types.KnowledgeBase) error + + // DeleteKnowledgeBase deletes a knowledge base record + // Parameters: + // - ctx: Context information + // - id: Knowledge base ID + // Returns: + // - Possible errors such as record not existing, database errors, etc. + DeleteKnowledgeBase(ctx context.Context, id string) error +} diff --git a/internal/types/interfaces/message.go b/internal/types/interfaces/message.go new file mode 100644 index 0000000..ffb1777 --- /dev/null +++ b/internal/types/interfaces/message.go @@ -0,0 +1,41 @@ +package interfaces + +import ( + "context" + "time" + + "github.com/Tencent/WeKnora/internal/types" +) + +// MessageService defines the message service interface +type MessageService interface { + // CreateMessage creates a message + CreateMessage(ctx context.Context, message *types.Message) (*types.Message, error) + + // GetMessage gets a message + GetMessage(ctx context.Context, sessionID string, id string) (*types.Message, error) + + // GetMessagesBySession gets all messages of a session + GetMessagesBySession(ctx context.Context, sessionID string, page int, pageSize int) ([]*types.Message, error) + + // GetRecentMessagesBySession gets recent messages of a session + GetRecentMessagesBySession(ctx context.Context, sessionID string, limit int) ([]*types.Message, error) + + // GetMessagesBySessionBeforeTime gets messages before a specific time of a session + GetMessagesBySessionBeforeTime( + ctx context.Context, sessionID string, beforeTime time.Time, limit int, + ) ([]*types.Message, error) + + // UpdateMessage updates a message + UpdateMessage(ctx context.Context, message *types.Message) error + + // DeleteMessage deletes a message + DeleteMessage(ctx context.Context, sessionID string, id string) error +} + +// MessageRepository defines the message repository interface +type MessageRepository interface { + MessageService + // GetFirstMessageOfUser gets the first message of a user + GetFirstMessageOfUser(ctx context.Context, sessionID string) (*types.Message, error) +} diff --git a/internal/types/interfaces/model.go b/internal/types/interfaces/model.go new file mode 100644 index 0000000..7742222 --- /dev/null +++ b/internal/types/interfaces/model.go @@ -0,0 +1,49 @@ +package interfaces + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/models/chat" + "github.com/Tencent/WeKnora/internal/models/embedding" + "github.com/Tencent/WeKnora/internal/models/rerank" + "github.com/Tencent/WeKnora/internal/types" +) + +// ModelService defines the model service interface +type ModelService interface { + // CreateModel creates a model + CreateModel(ctx context.Context, model *types.Model) error + // GetModelByID gets a model by ID + GetModelByID(ctx context.Context, id string) (*types.Model, error) + // ListModels lists all models + ListModels(ctx context.Context) ([]*types.Model, error) + // UpdateModel updates a model + UpdateModel(ctx context.Context, model *types.Model) error + // DeleteModel deletes a model + DeleteModel(ctx context.Context, id string) error + // GetEmbeddingModel gets an embedding model + GetEmbeddingModel(ctx context.Context, modelId string) (embedding.Embedder, error) + // GetRerankModel gets a rerank model + GetRerankModel(ctx context.Context, modelId string) (rerank.Reranker, error) + // GetChatModel gets a chat model + GetChatModel(ctx context.Context, modelId string) (chat.Chat, error) +} + +// ModelRepository defines the model repository interface +type ModelRepository interface { + // Create creates a model + Create(ctx context.Context, model *types.Model) error + // GetByID gets a model by ID + GetByID(ctx context.Context, tenantID uint, id string) (*types.Model, error) + // List lists all models + List( + ctx context.Context, + tenantID uint, + modelType types.ModelType, + source types.ModelSource, + ) ([]*types.Model, error) + // Update updates a model + Update(ctx context.Context, model *types.Model) error + // Delete deletes a model + Delete(ctx context.Context, tenantID uint, id string) error +} diff --git a/internal/types/interfaces/resource.go b/internal/types/interfaces/resource.go new file mode 100644 index 0000000..07c6cd8 --- /dev/null +++ b/internal/types/interfaces/resource.go @@ -0,0 +1,19 @@ +package interfaces + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/types" +) + +// ResourceCleaner defines the resource cleaner interface +type ResourceCleaner interface { + // Register registers a resource cleanup function + Register(cleanup types.CleanupFunc) + + // RegisterWithName registers a resource cleanup function with a name + RegisterWithName(name string, cleanup types.CleanupFunc) + + // Cleanup executes all resource cleanup functions + Cleanup(ctx context.Context) []error +} diff --git a/internal/types/interfaces/retriever.go b/internal/types/interfaces/retriever.go new file mode 100644 index 0000000..7969ab7 --- /dev/null +++ b/internal/types/interfaces/retriever.go @@ -0,0 +1,109 @@ +package interfaces + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/models/embedding" + "github.com/Tencent/WeKnora/internal/types" +) + +// RetrieveEngine defines the retrieve engine interface +type RetrieveEngine interface { + // EngineType gets the retrieve engine type + EngineType() types.RetrieverEngineType + + // Retrieve executes the retrieve + Retrieve(ctx context.Context, params types.RetrieveParams) ([]*types.RetrieveResult, error) + + // Support gets the supported retrieve types + Support() []types.RetrieverType +} + +// RetrieveEngineRepository defines the retrieve engine repository interface +type RetrieveEngineRepository interface { + // Save saves the index info + Save(ctx context.Context, indexInfo *types.IndexInfo, params map[string]any) error + + // BatchSave saves the index info list + BatchSave(ctx context.Context, indexInfoList []*types.IndexInfo, params map[string]any) error + + // EstimateStorageSize estimates the storage size + EstimateStorageSize(ctx context.Context, indexInfoList []*types.IndexInfo, params map[string]any) int64 + + // DeleteByChunkIDList deletes the index info by chunk id list + DeleteByChunkIDList(ctx context.Context, indexIDList []string, dimension int) error + // 复制索引数据 + // sourceKnowledgeBaseID: 源知识库ID + // sourceToTargetChunkIDMap: 源分块ID到目标分块ID的映射关系 + // targetKnowledgeBaseID: 目标知识库ID + // params: 额外参数,如向量表示等 + CopyIndices( + ctx context.Context, + sourceKnowledgeBaseID string, + sourceToTargetKBIDMap map[string]string, + sourceToTargetChunkIDMap map[string]string, + targetKnowledgeBaseID string, + dimension int, + ) error + + // DeleteByKnowledgeIDList deletes the index info by knowledge id list + DeleteByKnowledgeIDList(ctx context.Context, knowledgeIDList []string, dimension int) error + + // RetrieveEngine retrieves the engine + RetrieveEngine +} + +// RetrieveEngineRegistry defines the retrieve engine registry interface +type RetrieveEngineRegistry interface { + // Register registers the retrieve engine service + Register(indexService RetrieveEngineService) error + // GetRetrieveEngineService gets the retrieve engine service + GetRetrieveEngineService(engineType types.RetrieverEngineType) (RetrieveEngineService, error) + // GetAllRetrieveEngineServices gets all retrieve engine services + GetAllRetrieveEngineServices() []RetrieveEngineService +} + +// RetrieveEngineService defines the retrieve engine service interface +type RetrieveEngineService interface { + // Index indexes the index info + Index(ctx context.Context, + embedder embedding.Embedder, + indexInfo *types.IndexInfo, + retrieverTypes []types.RetrieverType, + ) error + + // BatchIndex indexes the index info list + BatchIndex(ctx context.Context, + embedder embedding.Embedder, + indexInfoList []*types.IndexInfo, + retrieverTypes []types.RetrieverType, + ) error + + // EstimateStorageSize estimates the storage size + EstimateStorageSize(ctx context.Context, + embedder embedding.Embedder, + indexInfoList []*types.IndexInfo, + retrieverTypes []types.RetrieverType, + ) int64 + // CopyIndices 从源知识库复制索引到目标知识库,免去重新计算嵌入向量的开销 + // sourceKnowledgeBaseID: 源知识库ID + // sourceToTargetChunkIDMap: 源分块ID到目标分块ID的映射关系,key为源分块ID,value为目标分块ID + // targetKnowledgeBaseID: 目标知识库ID + CopyIndices( + ctx context.Context, + sourceKnowledgeBaseID string, + sourceToTargetKBIDMap map[string]string, + sourceToTargetChunkIDMap map[string]string, + targetKnowledgeBaseID string, + dimension int, + ) error + + // DeleteByChunkIDList deletes the index info by chunk id list + DeleteByChunkIDList(ctx context.Context, indexIDList []string, dimension int) error + + // DeleteByKnowledgeIDList deletes the index info by knowledge id list + DeleteByKnowledgeIDList(ctx context.Context, knowledgeIDList []string, dimension int) error + + // RetrieveEngine retrieves the engine + RetrieveEngine +} diff --git a/internal/types/interfaces/session.go b/internal/types/interfaces/session.go new file mode 100644 index 0000000..1f5c5ab --- /dev/null +++ b/internal/types/interfaces/session.go @@ -0,0 +1,49 @@ +package interfaces + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/types" +) + +// SessionService defines the session service interface +type SessionService interface { + // CreateSession creates a session + CreateSession(ctx context.Context, session *types.Session) (*types.Session, error) + // GetSession gets a session + GetSession(ctx context.Context, id string) (*types.Session, error) + // GetSessionsByTenant gets all sessions of a tenant + GetSessionsByTenant(ctx context.Context) ([]*types.Session, error) + // GetPagedSessionsByTenant gets paged sessions of a tenant + GetPagedSessionsByTenant(ctx context.Context, page *types.Pagination) (*types.PageResult, error) + // UpdateSession updates a session + UpdateSession(ctx context.Context, session *types.Session) error + // DeleteSession deletes a session + DeleteSession(ctx context.Context, id string) error + // GenerateTitle generates a title for the current conversation + GenerateTitle(ctx context.Context, sessionID string, messages []types.Message) (string, error) + // KnowledgeQA performs knowledge-based question answering + KnowledgeQA(ctx context.Context, + sessionID, query string, + ) ([]*types.SearchResult, <-chan types.StreamResponse, error) + // KnowledgeQAByEvent performs knowledge-based question answering by event + KnowledgeQAByEvent(ctx context.Context, chatManage *types.ChatManage, eventList []types.EventType) error + // SearchKnowledge performs knowledge-based search, without summarization + SearchKnowledge(ctx context.Context, knowledgeBaseID, query string) ([]*types.SearchResult, error) +} + +// SessionRepository defines the session repository interface +type SessionRepository interface { + // Create creates a session + Create(ctx context.Context, session *types.Session) (*types.Session, error) + // Get gets a session + Get(ctx context.Context, tenantID uint, id string) (*types.Session, error) + // GetByTenantID gets all sessions of a tenant + GetByTenantID(ctx context.Context, tenantID uint) ([]*types.Session, error) + // GetPagedByTenantID gets paged sessions of a tenant + GetPagedByTenantID(ctx context.Context, tenantID uint, page *types.Pagination) ([]*types.Session, int64, error) + // Update updates a session + Update(ctx context.Context, session *types.Session) error + // Delete deletes a session + Delete(ctx context.Context, tenantID uint, id string) error +} diff --git a/internal/types/interfaces/stream_manager.go b/internal/types/interfaces/stream_manager.go new file mode 100644 index 0000000..d6a26ca --- /dev/null +++ b/internal/types/interfaces/stream_manager.go @@ -0,0 +1,34 @@ +package interfaces + +import ( + "context" + "time" + + "github.com/Tencent/WeKnora/internal/types" +) + +// StreamInfo stream information +type StreamInfo struct { + SessionID string // session ID + RequestID string // request ID + Query string // query content + Content string // current content + KnowledgeReferences types.References // knowledge references + LastUpdated time.Time // last updated time + IsCompleted bool // whether completed +} + +// StreamManager stream manager interface +type StreamManager interface { + // RegisterStream registers a new stream + RegisterStream(ctx context.Context, sessionID, requestID, query string) error + + // UpdateStream updates the stream content + UpdateStream(ctx context.Context, sessionID, requestID string, content string, references types.References) error + + // CompleteStream completes the stream + CompleteStream(ctx context.Context, sessionID, requestID string) error + + // GetStream gets a specific stream + GetStream(ctx context.Context, sessionID, requestID string) (*StreamInfo, error) +} diff --git a/internal/types/interfaces/tenant.go b/internal/types/interfaces/tenant.go new file mode 100644 index 0000000..df5839e --- /dev/null +++ b/internal/types/interfaces/tenant.go @@ -0,0 +1,41 @@ +package interfaces + +import ( + "context" + + "github.com/Tencent/WeKnora/internal/types" +) + +// TenantService defines the tenant service interface +type TenantService interface { + // CreateTenant creates a tenant + CreateTenant(ctx context.Context, tenant *types.Tenant) (*types.Tenant, error) + // GetTenantByID gets a tenant by ID + GetTenantByID(ctx context.Context, id uint) (*types.Tenant, error) + // ListTenants lists all tenants + ListTenants(ctx context.Context) ([]*types.Tenant, error) + // UpdateTenant updates a tenant + UpdateTenant(ctx context.Context, tenant *types.Tenant) (*types.Tenant, error) + // DeleteTenant deletes a tenant + DeleteTenant(ctx context.Context, id uint) error + // UpdateAPIKey updates the API key + UpdateAPIKey(ctx context.Context, id uint) (string, error) + // ExtractTenantIDFromAPIKey extracts the tenant ID from the API key + ExtractTenantIDFromAPIKey(apiKey string) (uint, error) +} + +// TenantRepository defines the tenant repository interface +type TenantRepository interface { + // CreateTenant creates a tenant + CreateTenant(ctx context.Context, tenant *types.Tenant) error + // GetTenantByID gets a tenant by ID + GetTenantByID(ctx context.Context, id uint) (*types.Tenant, error) + // ListTenants lists all tenants + ListTenants(ctx context.Context) ([]*types.Tenant, error) + // UpdateTenant updates a tenant + UpdateTenant(ctx context.Context, tenant *types.Tenant) error + // DeleteTenant deletes a tenant + DeleteTenant(ctx context.Context, id uint) error + // AdjustStorageUsed adjusts the storage used for a tenant + AdjustStorageUsed(ctx context.Context, tenantID uint, delta int64) error +} diff --git a/internal/types/json.go b/internal/types/json.go new file mode 100644 index 0000000..e2aae2d --- /dev/null +++ b/internal/types/json.go @@ -0,0 +1,68 @@ +package types + +import ( + "database/sql/driver" + "encoding/json" + "errors" +) + +// JSON is a custom type that wraps json.RawMessage. +// Used for storing JSON data in the database. +type JSON json.RawMessage + +// Scan implements the sql.Scanner interface. +func (j *JSON) Scan(value interface{}) error { + bytes, ok := value.([]byte) + if !ok { + return errors.New("type assertion to []byte failed") + } + + result := json.RawMessage{} + err := json.Unmarshal(bytes, &result) + *j = JSON(result) + return err +} + +// Value implements the driver.Valuer interface. +func (j JSON) Value() (driver.Value, error) { + if len(j) == 0 { + return nil, nil + } + return json.RawMessage(j).MarshalJSON() +} + +// MarshalJSON implements the json.Marshaler interface. +func (j JSON) MarshalJSON() ([]byte, error) { + if len(j) == 0 { + return []byte("null"), nil + } + return j, nil +} + +// UnmarshalJSON implements the json.Unmarshaler interface. +func (j *JSON) UnmarshalJSON(data []byte) error { + if j == nil { + return errors.New("JSON: UnmarshalJSON on nil pointer") + } + *j = JSON(data) + return nil +} + +// ToString converts JSON to a string. +func (j JSON) ToString() string { + if len(j) == 0 { + return "{}" + } + return string(j) +} + +// Map converts JSON to a map. +func (j JSON) Map() (map[string]interface{}, error) { + if len(j) == 0 { + return map[string]interface{}{}, nil + } + + var m map[string]interface{} + err := json.Unmarshal(j, &m) + return m, err +} diff --git a/internal/types/knowledge.go b/internal/types/knowledge.go new file mode 100644 index 0000000..46b68d1 --- /dev/null +++ b/internal/types/knowledge.go @@ -0,0 +1,92 @@ +package types + +import ( + "fmt" + "time" + + "github.com/google/uuid" + "gorm.io/gorm" +) + +// Knowledge represents a knowledge entity in the system. +// It contains metadata about the knowledge source, its processing status, +// and references to the physical file if applicable. +type Knowledge struct { + // Unique identifier of the knowledge + ID string `json:"id" gorm:"type:varchar(36);primaryKey"` + // Tenant ID + TenantID uint `json:"tenant_id"` + // ID of the knowledge base + KnowledgeBaseID string `json:"knowledge_base_id"` + // Type of the knowledge + Type string `json:"type"` + // Title of the knowledge + Title string `json:"title"` + // Description of the knowledge + Description string `json:"description"` + // Source of the knowledge + Source string `json:"source"` + // Parse status of the knowledge + ParseStatus string `json:"parse_status"` + // Enable status of the knowledge + EnableStatus string `json:"enable_status"` + // ID of the embedding model + EmbeddingModelID string `json:"embedding_model_id"` + // File name of the knowledge + FileName string `json:"file_name"` + // File type of the knowledge + FileType string `json:"file_type"` + // File size of the knowledge + FileSize int64 `json:"file_size"` + // File hash of the knowledge + FileHash string `json:"file_hash"` + // File path of the knowledge + FilePath string `json:"file_path"` + // Storage size of the knowledge + StorageSize int64 `json:"storage_size"` + // Metadata of the knowledge + Metadata JSON `json:"metadata" gorm:"type:json"` + // Creation time of the knowledge + CreatedAt time.Time `json:"created_at"` + // Last updated time of the knowledge + UpdatedAt time.Time `json:"updated_at"` + // Processed time of the knowledge + ProcessedAt *time.Time `json:"processed_at"` + // Error message of the knowledge + ErrorMessage string `json:"error_message"` + // Deletion time of the knowledge + DeletedAt gorm.DeletedAt `json:"deleted_at" gorm:"index"` +} + +// GetMetadata returns the metadata as a map[string]string. +func (k *Knowledge) GetMetadata() map[string]string { + metadata := make(map[string]string) + metadataMap, err := k.Metadata.Map() + if err != nil { + return nil + } + for k, v := range metadataMap { + metadata[k] = fmt.Sprintf("%v", v) + } + return metadata +} + +// BeforeCreate hook generates a UUID for new Knowledge entities before they are created. +func (k *Knowledge) BeforeCreate(tx *gorm.DB) (err error) { + k.ID = uuid.New().String() + return nil +} + +// KnowledgeCheckParams defines parameters used to check if knowledge already exists. +type KnowledgeCheckParams struct { + // File parameters + FileName string + FileSize int64 + FileHash string + // URL parameters + URL string + // Text passage parameters + Passages []string + // Knowledge type + Type string +} diff --git a/internal/types/knowledgebase.go b/internal/types/knowledgebase.go new file mode 100644 index 0000000..8e14f27 --- /dev/null +++ b/internal/types/knowledgebase.go @@ -0,0 +1,95 @@ +package types + +import ( + "database/sql/driver" + "encoding/json" + "time" + + "gorm.io/gorm" +) + +// KnowledgeBase represents a knowledge base +type KnowledgeBase struct { + // Unique identifier of the knowledge base + ID string `yaml:"id" json:"id" gorm:"type:varchar(36);primaryKey"` + // Name of the knowledge base + Name string `yaml:"name" json:"name"` + // Description of the knowledge base + Description string `yaml:"description" json:"description"` + // Tenant ID + TenantID uint `yaml:"tenant_id" json:"tenant_id"` + // Chunking configuration + ChunkingConfig ChunkingConfig `yaml:"chunking_config" json:"chunking_config" gorm:"type:json"` + // Image processing configuration + ImageProcessingConfig ImageProcessingConfig `yaml:"image_processing_config" json:"image_processing_config" gorm:"type:json"` + // ID of the embedding model + EmbeddingModelID string `yaml:"embedding_model_id" json:"embedding_model_id"` + // Summary model ID + SummaryModelID string `yaml:"summary_model_id" json:"summary_model_id"` + // Creation time of the knowledge base + CreatedAt time.Time `yaml:"created_at" json:"created_at"` + // Last updated time of the knowledge base + UpdatedAt time.Time `yaml:"updated_at" json:"updated_at"` + // Deletion time of the knowledge base + DeletedAt gorm.DeletedAt `yaml:"deleted_at" json:"deleted_at" gorm:"index"` +} + +// KnowledgeBaseConfig represents the knowledge base configuration +type KnowledgeBaseConfig struct { + // Chunking configuration + ChunkingConfig ChunkingConfig `yaml:"chunking_config" json:"chunking_config"` + // Image processing configuration + ImageProcessingConfig ImageProcessingConfig `yaml:"image_processing_config" json:"image_processing_config"` +} + +// ChunkingConfig represents the document splitting configuration +type ChunkingConfig struct { + // Chunk size + ChunkSize int `yaml:"chunk_size" json:"chunk_size"` + // Chunk overlap + ChunkOverlap int `yaml:"chunk_overlap" json:"chunk_overlap"` + // Separators + Separators []string `yaml:"separators" json:"separators"` + // Enable multimodal + EnableMultimodal bool `yaml:"enable_multimodal" json:"enable_multimodal"` +} + +// ImageProcessingConfig represents the image processing configuration +type ImageProcessingConfig struct { + // Model ID + ModelID string `yaml:"model_id" json:"model_id"` +} + +// Value implements the driver.Valuer interface, used to convert ChunkingConfig to database value +func (c ChunkingConfig) Value() (driver.Value, error) { + return json.Marshal(c) +} + +// Scan implements the sql.Scanner interface, used to convert database value to ChunkingConfig +func (c *ChunkingConfig) Scan(value interface{}) error { + if value == nil { + return nil + } + b, ok := value.([]byte) + if !ok { + return nil + } + return json.Unmarshal(b, c) +} + +// Value implements the driver.Valuer interface, used to convert ImageProcessingConfig to database value +func (c ImageProcessingConfig) Value() (driver.Value, error) { + return json.Marshal(c) +} + +// Scan implements the sql.Scanner interface, used to convert database value to ImageProcessingConfig +func (c *ImageProcessingConfig) Scan(value interface{}) error { + if value == nil { + return nil + } + b, ok := value.([]byte) + if !ok { + return nil + } + return json.Unmarshal(b, c) +} diff --git a/internal/types/message.go b/internal/types/message.go new file mode 100644 index 0000000..21409e2 --- /dev/null +++ b/internal/types/message.go @@ -0,0 +1,60 @@ +// Package types defines data structures and types used throughout the system +package types + +import ( + "time" + + "github.com/google/uuid" + "gorm.io/gorm" +) + +// History represents a conversation history entry +// Contains query-answer pairs and associated knowledge references +// Used for tracking conversation context and history +type History struct { + Query string // User query text + Answer string // System response text + CreateAt time.Time // When this history entry was created + KnowledgeReferences References // Knowledge references used in the answer +} + +// Message represents a conversation message +// Each message belongs to a conversation session and can be from either user or system +// Messages can contain references to knowledge chunks used to generate responses +type Message struct { + // Unique identifier for the message + ID string `json:"id" gorm:"type:varchar(36);primaryKey"` + // ID of the session this message belongs to + SessionID string `json:"session_id"` + // Request identifier for tracking API requests + RequestID string `json:"request_id"` + // Message text content + Content string `json:"content"` + // Message role: "user", "assistant", "system" + Role string `json:"role"` + // References to knowledge chunks used in the response + KnowledgeReferences References `json:"knowledge_references" gorm:"type:json,column:knowledge_references"` + // Whether message generation is complete + IsCompleted bool `json:"is_completed"` + // Message creation timestamp + CreatedAt time.Time `json:"created_at"` + // Last update timestamp + UpdatedAt time.Time `json:"updated_at"` + // Soft delete timestamp + DeletedAt gorm.DeletedAt `json:"deleted_at" gorm:"index"` +} + +// BeforeCreate is a GORM hook that runs before creating a new message record +// Automatically generates a UUID for new messages and initializes knowledge references +// Parameters: +// - tx: GORM database transaction +// +// Returns: +// - error: Any error encountered during the hook execution +func (m *Message) BeforeCreate(tx *gorm.DB) (err error) { + m.ID = uuid.New().String() + if m.KnowledgeReferences == nil { + m.KnowledgeReferences = make(References, 0) + } + return nil +} diff --git a/internal/types/model.go b/internal/types/model.go new file mode 100644 index 0000000..c1e39b3 --- /dev/null +++ b/internal/types/model.go @@ -0,0 +1,91 @@ +package types + +import ( + "database/sql/driver" + "encoding/json" + "time" + + "gorm.io/gorm" +) + +// ModelType represents the type of AI model +type ModelType string + +const ( + ModelTypeEmbedding ModelType = "Embedding" // Embedding model + ModelTypeRerank ModelType = "Rerank" // Rerank model + ModelTypeKnowledgeQA ModelType = "KnowledgeQA" // KnowledgeQA model + ModelTypeVLLM ModelType = "VLLM" // VLLM model +) + +type ModelStatus string + +const ( + ModelStatusActive ModelStatus = "active" // Model is active + ModelStatusDownloading ModelStatus = "downloading" // Model is downloading + ModelStatusDownloadFailed ModelStatus = "download_failed" // Model download failed +) + +// ModelSource represents the source of the model +type ModelSource string + +const ( + ModelSourceLocal ModelSource = "local" // Local model + ModelSourceRemote ModelSource = "remote" // Remote model +) + +type EmbeddingParameters struct { + Dimension int `yaml:"dimension" json:"dimension"` + TruncatePromptTokens int `yaml:"truncate_prompt_tokens" json:"truncate_prompt_tokens"` +} + +type ModelParameters struct { + BaseURL string `yaml:"base_url" json:"base_url"` + APIKey string `yaml:"api_key" json:"api_key"` + EmbeddingParameters EmbeddingParameters `yaml:"embedding_parameters" json:"embedding_parameters"` +} + +// Model represents the AI model +type Model struct { + // Unique identifier of the model + ID string `yaml:"id" json:"id" gorm:"type:varchar(36);primaryKey"` + // Tenant ID + TenantID uint `yaml:"tenant_id" json:"tenant_id"` + // Name of the model + Name string `yaml:"name" json:"name"` + // Type of the model + Type ModelType `yaml:"type" json:"type"` + // Source of the model + Source ModelSource `yaml:"source" json:"source"` + // Description of the model + Description string `yaml:"description" json:"description"` + // Model parameters in JSON format + Parameters ModelParameters `yaml:"parameters" json:"parameters" gorm:"type:json"` + // Whether the model is the default model + IsDefault bool `yaml:"is_default" json:"is_default"` + // Model status, default: active, possible: downloading, download_failed + Status ModelStatus `yaml:"status" json:"status"` + // Creation time of the model + CreatedAt time.Time `yaml:"created_at" json:"created_at"` + // Last updated time of the model + UpdatedAt time.Time `yaml:"updated_at" json:"updated_at"` + // Deletion time of the model + DeletedAt gorm.DeletedAt `yaml:"deleted_at" json:"deleted_at" gorm:"index"` +} + +// Value implements the driver.Valuer interface, used to convert ModelParameters to database value +func (c ModelParameters) Value() (driver.Value, error) { + return json.Marshal(c) +} + +// Scan implements the sql.Scanner interface, used to convert database value to ModelParameters +func (c *ModelParameters) Scan(value interface{}) error { + if value == nil { + return nil + } + b, ok := value.([]byte) + if !ok { + return nil + } + return json.Unmarshal(b, c) +} diff --git a/internal/types/retriever.go b/internal/types/retriever.go new file mode 100644 index 0000000..e4de533 --- /dev/null +++ b/internal/types/retriever.go @@ -0,0 +1,82 @@ +package types + +// RetrieverEngineType represents the type of retriever engine +type RetrieverEngineType string + +// RetrieverEngineType constants +const ( + PostgresRetrieverEngineType RetrieverEngineType = "postgres" + ElasticsearchRetrieverEngineType RetrieverEngineType = "elasticsearch" + InfinityRetrieverEngineType RetrieverEngineType = "infinity" + ElasticFaissRetrieverEngineType RetrieverEngineType = "elasticfaiss" +) + +// RetrieverType represents the type of retriever +type RetrieverType string + +// RetrieverType constants +const ( + KeywordsRetrieverType RetrieverType = "keywords" // Keywords retriever + VectorRetrieverType RetrieverType = "vector" // Vector retriever + WebSearchRetrieverType RetrieverType = "websearch" // Web search retriever +) + +// RetrieveParams represents the parameters for retrieval +type RetrieveParams struct { + // Query text + Query string + // Query embedding (used for vector retrieval) + Embedding []float32 + // Knowledge base IDs + KnowledgeBaseIDs []string + // Excluded knowledge IDs + ExcludeKnowledgeIDs []string + // Excluded chunk IDs + ExcludeChunkIDs []string + // Number of results to return + TopK int + // Similarity threshold + Threshold float64 + // Additional parameters, different retrievers may require different parameters + AdditionalParams map[string]interface{} + // Retriever type + RetrieverType RetrieverType // Retriever type +} + +// RetrieverEngineParams represents the parameters for retriever engine +type RetrieverEngineParams struct { + // Retriever engine type + RetrieverEngineType RetrieverEngineType `yaml:"retriever_engine_type" json:"retriever_engine_type"` + // Retriever type + RetrieverType RetrieverType `yaml:"retriever_type" json:"retriever_type"` +} + +// IndexWithScore represents the index with score +type IndexWithScore struct { + // ID + ID string + // Content + Content string + // Source ID + SourceID string + // Source type + SourceType SourceType + // Chunk ID + ChunkID string + // Knowledge ID + KnowledgeID string + // Knowledge base ID + KnowledgeBaseID string + // Score + Score float64 + // Match type + MatchType MatchType +} + +// RetrieveResult represents the result of retrieval +type RetrieveResult struct { + Results []*IndexWithScore // Retrieval results + RetrieverEngineType RetrieverEngineType // Retrieval source type + RetrieverType RetrieverType // Retrieval type + Error error // Retrieval error +} diff --git a/internal/types/search.go b/internal/types/search.go new file mode 100644 index 0000000..289dd82 --- /dev/null +++ b/internal/types/search.go @@ -0,0 +1,129 @@ +package types + +import ( + "database/sql/driver" + "encoding/json" +) + +// SearchResult represents the search result +type SearchResult struct { + // ID + ID string `gorm:"column:id" json:"id"` + // Content + Content string `gorm:"column:content" json:"content"` + // Knowledge ID + KnowledgeID string `gorm:"column:knowledge_id" json:"knowledge_id"` + // Chunk index + ChunkIndex int `gorm:"column:chunk_index" json:"chunk_index"` + // Knowledge title + KnowledgeTitle string `gorm:"column:knowledge_title" json:"knowledge_title"` + // Start at + StartAt int `gorm:"column:start_at" json:"start_at"` + // End at + EndAt int `gorm:"column:end_at" json:"end_at"` + // Seq + Seq int `gorm:"column:seq" json:"seq"` + // Score + Score float64 `json:"score"` + // Match type + MatchType MatchType `json:"match_type"` + // SubChunkIndex + SubChunkID []string `json:"sub_chunk_id"` + // Metadata + Metadata map[string]string `json:"metadata"` + + // Chunk 类型 + ChunkType string `json:"chunk_type"` + // 父 Chunk ID + ParentChunkID string `json:"parent_chunk_id"` + // 图片信息 (JSON 格式) + ImageInfo string `json:"image_info"` + + // Knowledge file name + // Used for file type knowledge, contains the original file name + KnowledgeFilename string `json:"knowledge_filename"` + + // Knowledge source + // Used to indicate the source of the knowledge, such as "url" + KnowledgeSource string `json:"knowledge_source"` +} + +// SearchParams represents the search parameters +type SearchParams struct { + QueryText string + VectorThreshold float64 + KeywordThreshold float64 + MatchCount int +} + +// Value implements the driver.Valuer interface, used to convert SearchResult to database value +func (c SearchResult) Value() (driver.Value, error) { + return json.Marshal(c) +} + +// Scan implements the sql.Scanner interface, used to convert database value to SearchResult +func (c *SearchResult) Scan(value interface{}) error { + if value == nil { + return nil + } + b, ok := value.([]byte) + if !ok { + return nil + } + return json.Unmarshal(b, c) +} + +// Pagination represents the pagination parameters +type Pagination struct { + // Page + Page int `form:"page" json:"page" binding:"omitempty,min=1"` + // Page size + PageSize int `form:"page_size" json:"page_size" binding:"omitempty,min=1,max=100"` +} + +// GetPage gets the page number, default is 1 +func (p *Pagination) GetPage() int { + if p.Page < 1 { + return 1 + } + return p.Page +} + +// GetPageSize gets the page size, default is 20 +func (p *Pagination) GetPageSize() int { + if p.PageSize < 1 { + return 20 + } + if p.PageSize > 100 { + return 100 + } + return p.PageSize +} + +// Offset gets the offset for database query +func (p *Pagination) Offset() int { + return (p.GetPage() - 1) * p.GetPageSize() +} + +// Limit gets the limit for database query +func (p *Pagination) Limit() int { + return p.GetPageSize() +} + +// PageResult represents the pagination query result +type PageResult struct { + Total int64 `json:"total"` // Total number of records + Page int `json:"page"` // Current page number + PageSize int `json:"page_size"` // Page size + Data interface{} `json:"data"` // Data +} + +// NewPageResult creates a new pagination result +func NewPageResult(total int64, page *Pagination, data interface{}) *PageResult { + return &PageResult{ + Total: total, + Page: page.GetPage(), + PageSize: page.GetPageSize(), + Data: data, + } +} diff --git a/internal/types/session.go b/internal/types/session.go new file mode 100644 index 0000000..7d209ed --- /dev/null +++ b/internal/types/session.go @@ -0,0 +1,120 @@ +package types + +import ( + "database/sql/driver" + "encoding/json" + "time" + + "github.com/google/uuid" + "gorm.io/gorm" +) + +// FallbackStrategy represents the fallback strategy type +type FallbackStrategy string + +const ( + FallbackStrategyFixed FallbackStrategy = "fixed" // Fixed response + FallbackStrategyModel FallbackStrategy = "model" // Model fallback response +) + +type SummaryConfig struct { + // Max tokens + MaxTokens int `json:"max_tokens"` + // Repeat penalty + RepeatPenalty float64 `json:"repeat_penalty"` + // TopK + TopK int `json:"top_k"` + // TopP + TopP float64 `json:"top_p"` + // Frequency penalty + FrequencyPenalty float64 `json:"frequency_penalty"` + // Presence penalty + PresencePenalty float64 `json:"presence_penalty"` + // Prompt + Prompt string `json:"prompt"` + // Context template + ContextTemplate string `json:"context_template"` + // No match prefix + NoMatchPrefix string `json:"no_match_prefix"` + // Temperature + Temperature float64 `json:"temperature"` + // Seed + Seed int `json:"seed"` + // Max completion tokens + MaxCompletionTokens int `json:"max_completion_tokens"` +} + +// Session represents the session +type Session struct { + // ID + ID string `json:"id" gorm:"type:varchar(36);primaryKey"` + // Title + Title string `json:"title"` + // Description + Description string `json:"description"` + // Tenant ID + TenantID uint `json:"tenant_id" gorm:"index"` + + // Strategy configuration + KnowledgeBaseID string `json:"knowledge_base_id"` // 关联的知识库ID + MaxRounds int `json:"max_rounds"` // 多轮保持轮数 + EnableRewrite bool `json:"enable_rewrite"` // 多轮改写开关 + FallbackStrategy FallbackStrategy `json:"fallback_strategy"` // 兜底策略 + FallbackResponse string `json:"fallback_response"` // 固定回复内容 + EmbeddingTopK int `json:"embedding_top_k"` // 向量召回TopK + KeywordThreshold float64 `json:"keyword_threshold"` // 关键词召回阈值 + VectorThreshold float64 `json:"vector_threshold"` // 向量召回阈值 + RerankModelID string `json:"rerank_model_id"` // 排序模型ID + RerankTopK int `json:"rerank_top_k"` // 排序TopK + RerankThreshold float64 `json:"rerank_threshold"` // 排序阈值 + SummaryModelID string `json:"summary_model_id"` // 总结模型ID + SummaryParameters *SummaryConfig `json:"summary_parameters" gorm:"type:json"` // 总结模型参数 + + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + DeletedAt gorm.DeletedAt `json:"deleted_at" gorm:"index"` + + // Association relationship, not stored in the database + Messages []Message `json:"-" gorm:"foreignKey:SessionID"` +} + +func (s *Session) BeforeCreate(tx *gorm.DB) (err error) { + s.ID = uuid.New().String() + return nil +} + +type StringArray []string + +// Value implements the driver.Valuer interface, used to convert StringArray to database value +func (c StringArray) Value() (driver.Value, error) { + return json.Marshal(c) +} + +// Scan implements the sql.Scanner interface, used to convert database value to StringArray +func (c *StringArray) Scan(value interface{}) error { + if value == nil { + return nil + } + b, ok := value.([]byte) + if !ok { + return nil + } + return json.Unmarshal(b, c) +} + +// Value implements the driver.Valuer interface, used to convert SummaryConfig to database value +func (c *SummaryConfig) Value() (driver.Value, error) { + return json.Marshal(c) +} + +// Scan implements the sql.Scanner interface, used to convert database value to SummaryConfig +func (c *SummaryConfig) Scan(value interface{}) error { + if value == nil { + return nil + } + b, ok := value.([]byte) + if !ok { + return nil + } + return json.Unmarshal(b, c) +} diff --git a/internal/types/tenant.go b/internal/types/tenant.go new file mode 100644 index 0000000..64485f2 --- /dev/null +++ b/internal/types/tenant.go @@ -0,0 +1,65 @@ +package types + +import ( + "database/sql/driver" + "encoding/json" + "time" + + "gorm.io/gorm" +) + +// Tenant represents the tenant +type Tenant struct { + // ID + ID uint `yaml:"id" json:"id" gorm:"primaryKey"` + // Name + Name string `yaml:"name" json:"name"` + // Description + Description string `yaml:"description" json:"description"` + // API key + APIKey string `yaml:"api_key" json:"api_key"` + // Status + Status string `yaml:"status" json:"status" gorm:"default:'active'"` + // Retriever engines + RetrieverEngines RetrieverEngines `yaml:"retriever_engines" json:"retriever_engines" gorm:"type:json"` + // Business + Business string `yaml:"business" json:"business"` + // Storage quota (Bytes), default is 10GB, including vector, original file, text, index, etc. + StorageQuota int64 `yaml:"storage_quota" json:"storage_quota" gorm:"default:10737418240"` + // Storage used (Bytes) + StorageUsed int64 `yaml:"storage_used" json:"storage_used" gorm:"default:0"` + // Creation time + CreatedAt time.Time `yaml:"created_at" json:"created_at"` + // Last updated time + UpdatedAt time.Time `yaml:"updated_at" json:"updated_at"` + // Deletion time + DeletedAt gorm.DeletedAt `yaml:"deleted_at" json:"deleted_at" gorm:"index"` +} + +type RetrieverEngines struct { + Engines []RetrieverEngineParams `yaml:"engines" json:"engines" gorm:"type:json"` +} + +func (t *Tenant) BeforeCreate(tx *gorm.DB) error { + if t.RetrieverEngines.Engines == nil { + t.RetrieverEngines.Engines = []RetrieverEngineParams{} + } + return nil +} + +// Value implements the driver.Valuer interface, used to convert RetrieverEngines to database value +func (c RetrieverEngines) Value() (driver.Value, error) { + return json.Marshal(c) +} + +// Scan implements the sql.Scanner interface, used to convert database value to RetrieverEngines +func (c *RetrieverEngines) Scan(value interface{}) error { + if value == nil { + return nil + } + b, ok := value.([]byte) + if !ok { + return nil + } + return json.Unmarshal(b, c) +} diff --git a/migrations/mysql/00-init-db.sql b/migrations/mysql/00-init-db.sql new file mode 100644 index 0000000..93f97b2 --- /dev/null +++ b/migrations/mysql/00-init-db.sql @@ -0,0 +1,148 @@ +DROP TABLE IF EXISTS tenants; +DROP TABLE IF EXISTS models; +DROP TABLE IF EXISTS knowledge_bases; +DROP TABLE IF EXISTS knowledges; +DROP TABLE IF EXISTS sessions; +DROP TABLE IF EXISTS messages; +DROP TABLE IF EXISTS chunks; + +CREATE TABLE tenants ( + id BIGINT AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255) NOT NULL, + description TEXT, + api_key VARCHAR(64) NOT NULL, + retriever_engines JSON NOT NULL, + status VARCHAR(50) DEFAULT 'active', + business VARCHAR(255) NOT NULL, + storage_quota BIGINT NOT NULL DEFAULT 10737418240, + storage_used BIGINT NOT NULL DEFAULT 0, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + deleted_at TIMESTAMP NULL DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +CREATE TABLE models ( + id VARCHAR(36) PRIMARY KEY, + tenant_id INT NOT NULL, + name VARCHAR(255) NOT NULL, + type VARCHAR(50) NOT NULL, + source VARCHAR(50) NOT NULL, + description TEXT, + parameters JSON NOT NULL, + is_default BOOLEAN NOT NULL DEFAULT FALSE, + status VARCHAR(50) NOT NULL DEFAULT 'active', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + deleted_at TIMESTAMP NULL DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +CREATE INDEX idx_models_tenant_source_type ON models(tenant_id, source, type); + +CREATE TABLE knowledge_bases ( + id VARCHAR(36) PRIMARY KEY, + name VARCHAR(255) NOT NULL, + description TEXT, + tenant_id INT NOT NULL, + chunking_config JSON NOT NULL, + image_processing_config JSON NOT NULL, + embedding_model_id VARCHAR(36) NOT NULL, + summary_model_id VARCHAR(36) NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + deleted_at TIMESTAMP NULL DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +CREATE INDEX idx_knowledge_bases_tenant_name ON knowledge_bases(tenant_id, name); + +CREATE TABLE knowledges ( + id VARCHAR(36) PRIMARY KEY, + tenant_id INT NOT NULL, + knowledge_base_id VARCHAR(36) NOT NULL, + type VARCHAR(50) NOT NULL, + title VARCHAR(255) NOT NULL, + description TEXT, + source VARCHAR(128) NOT NULL, + parse_status VARCHAR(50) NOT NULL DEFAULT 'unprocessed', + enable_status VARCHAR(50) NOT NULL DEFAULT 'enabled', + embedding_model_id VARCHAR(36), + file_name VARCHAR(255), + file_type VARCHAR(50), + file_size BIGINT, + file_path TEXT, + file_hash VARCHAR(64), + storage_size BIGINT NOT NULL DEFAULT 0, + metadata JSON, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + deleted_at TIMESTAMP NULL DEFAULT NULL, + processed_at TIMESTAMP, + error_message TEXT, +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +CREATE INDEX idx_knowledges_tenant_id ON knowledges(tenant_id, knowledge_base_id); + +CREATE TABLE sessions ( + id VARCHAR(36) PRIMARY KEY, + tenant_id INTEGER NOT NULL, + title VARCHAR(255), + description TEXT, + knowledge_base_id VARCHAR(36), + max_rounds INT NOT NULL DEFAULT 5, + enable_rewrite BOOLEAN NOT NULL DEFAULT TRUE, + fallback_strategy VARCHAR(255) NOT NULL DEFAULT 'fixed', + fallback_response VARCHAR(255) NOT NULL DEFAULT '很抱歉,我暂时无法回答这个问题。', + keyword_threshold FLOAT NOT NULL DEFAULT 0.5, + vector_threshold FLOAT NOT NULL DEFAULT 0.5, + rerank_model_id VARCHAR(36), + embedding_top_k INTEGER NOT NULL DEFAULT 10, + rerank_top_k INTEGER NOT NULL DEFAULT 10, + rerank_threshold FLOAT NOT NULL DEFAULT 0.65, + summary_model_id VARCHAR(36), + summary_parameters JSON NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + deleted_at TIMESTAMP NULL DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +CREATE INDEX idx_sessions_tenant_id ON sessions(tenant_id); + +CREATE TABLE messages ( + id VARCHAR(36) PRIMARY KEY, + request_id VARCHAR(36) NOT NULL, + session_id VARCHAR(36) NOT NULL, + role VARCHAR(50) NOT NULL, + content TEXT NOT NULL, + knowledge_references JSON NOT NULL, + is_completed BOOLEAN NOT NULL DEFAULT FALSE, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + deleted_at TIMESTAMP NULL DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +CREATE INDEX idx_messages_session_role ON messages(session_id, role); + +CREATE TABLE chunks ( + id VARCHAR(36) PRIMARY KEY, + tenant_id INTEGER NOT NULL, + knowledge_base_id VARCHAR(36) NOT NULL, + knowledge_id VARCHAR(36) NOT NULL, + content TEXT NOT NULL, + chunk_index INTEGER NOT NULL, + is_enabled BOOLEAN NOT NULL DEFAULT TRUE, + start_at INTEGER NOT NULL, + end_at INTEGER NOT NULL, + pre_chunk_id VARCHAR(36), + next_chunk_id VARCHAR(36), + chunk_type VARCHAR(20) NOT NULL DEFAULT 'text', + parent_chunk_id VARCHAR(36), + image_info TEXT, + relation_chunks JSON, + indirect_relation_chunks JSON, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + deleted_at TIMESTAMP NULL DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +CREATE INDEX idx_chunks_tenant_knowledge ON chunks(tenant_id, knowledge_id); +CREATE INDEX idx_chunks_parent_id ON chunks(parent_chunk_id); +CREATE INDEX idx_chunks_chunk_type ON chunks(chunk_type); \ No newline at end of file diff --git a/migrations/paradedb/00-init-db.sql b/migrations/paradedb/00-init-db.sql new file mode 100644 index 0000000..4d51688 --- /dev/null +++ b/migrations/paradedb/00-init-db.sql @@ -0,0 +1,197 @@ +-- Create extensions +CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; +CREATE EXTENSION IF NOT EXISTS vector; +CREATE EXTENSION IF NOT EXISTS pg_trgm; +CREATE EXTENSION IF NOT EXISTS pg_search; + + +-- Create tenant table +CREATE TABLE IF NOT EXISTS tenants ( + id SERIAL PRIMARY KEY, + name VARCHAR(255) NOT NULL, + description TEXT, + api_key VARCHAR(64) NOT NULL, + retriever_engines JSONB NOT NULL DEFAULT '[]', + status VARCHAR(50) DEFAULT 'active', + business VARCHAR(255) NOT NULL, + storage_quota BIGINT NOT NULL DEFAULT 10737418240, -- 默认10GB配额(Bytes) + storage_used BIGINT NOT NULL DEFAULT 0, -- 已使用的存储空间(Bytes) + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP WITH TIME ZONE +); + +-- Add indexes +CREATE INDEX IF NOT EXISTS idx_tenants_api_key ON tenants(api_key); +CREATE INDEX IF NOT EXISTS idx_tenants_status ON tenants(status); + +-- Create model table +CREATE TABLE IF NOT EXISTS models ( + id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(), + tenant_id INTEGER NOT NULL, + name VARCHAR(255) NOT NULL, + type VARCHAR(50) NOT NULL, + source VARCHAR(50) NOT NULL, + description TEXT, + parameters JSONB NOT NULL, + is_default BOOLEAN NOT NULL DEFAULT false, + status VARCHAR(50) NOT NULL DEFAULT 'active', + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP WITH TIME ZONE +); + +-- Add indexes for models +CREATE INDEX IF NOT EXISTS idx_models_type ON models(type); +CREATE INDEX IF NOT EXISTS idx_models_source ON models(source); + +-- Create knowledge_base table +CREATE TABLE IF NOT EXISTS knowledge_bases ( + id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(), + name VARCHAR(255) NOT NULL, + description TEXT, + tenant_id INTEGER NOT NULL, + chunking_config JSONB NOT NULL DEFAULT '{"chunk_size": 512, "chunk_overlap": 50, "split_markers": ["\n\n", "\n", "。"], "keep_separator": true}', + image_processing_config JSONB NOT NULL DEFAULT '{"enable_multimodal": false, "model_id": ""}', + embedding_model_id VARCHAR(36) NOT NULL, + summary_model_id VARCHAR(36) NOT NULL, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP WITH TIME ZONE +); + +-- Add indexes for knowledge_bases +CREATE INDEX IF NOT EXISTS idx_knowledge_bases_tenant_id ON knowledge_bases(tenant_id); + +-- Create knowledge table +CREATE TABLE IF NOT EXISTS knowledges ( + id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(), + tenant_id INTEGER NOT NULL, + knowledge_base_id VARCHAR(36) NOT NULL, + type VARCHAR(50) NOT NULL, + title VARCHAR(255) NOT NULL, + description TEXT, + source VARCHAR(128) NOT NULL, + parse_status VARCHAR(50) NOT NULL DEFAULT 'unprocessed', + enable_status VARCHAR(50) NOT NULL DEFAULT 'enabled', + embedding_model_id VARCHAR(36), + file_name VARCHAR(255), + file_type VARCHAR(50), + file_size BIGINT, + file_path TEXT, + file_hash VARCHAR(64), + storage_size BIGINT NOT NULL DEFAULT 0, -- 存储大小(Byte) + metadata JSONB, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + processed_at TIMESTAMP WITH TIME ZONE, + error_message TEXT, + deleted_at TIMESTAMP WITH TIME ZONE +); + +-- Add indexes for knowledge +CREATE INDEX IF NOT EXISTS idx_knowledges_tenant_id ON knowledges(tenant_id); +CREATE INDEX IF NOT EXISTS idx_knowledges_base_id ON knowledges(knowledge_base_id); +CREATE INDEX IF NOT EXISTS idx_knowledges_parse_status ON knowledges(parse_status); +CREATE INDEX IF NOT EXISTS idx_knowledges_enable_status ON knowledges(enable_status); + +-- Create session table +CREATE TABLE IF NOT EXISTS sessions ( + id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(), + tenant_id INTEGER NOT NULL, + title VARCHAR(255), + description TEXT, + knowledge_base_id VARCHAR(36), + max_rounds INTEGER NOT NULL DEFAULT 5, + enable_rewrite BOOLEAN NOT NULL DEFAULT true, + fallback_strategy VARCHAR(255) NOT NULL DEFAULT 'fixed', + fallback_response TEXT NOT NULL DEFAULT '很抱歉,我暂时无法回答这个问题。', + keyword_threshold FLOAT NOT NULL DEFAULT 0.5, + vector_threshold FLOAT NOT NULL DEFAULT 0.5, + rerank_model_id VARCHAR(36), + embedding_top_k INTEGER NOT NULL DEFAULT 10, + rerank_top_k INTEGER NOT NULL DEFAULT 10, + rerank_threshold FLOAT NOT NULL DEFAULT 0.65, + summary_model_id VARCHAR(36), + summary_parameters JSONB NOT NULL DEFAULT '{}', + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP WITH TIME ZONE +); + +-- Create Index for sessions +CREATE INDEX IF NOT EXISTS idx_sessions_tenant_id ON sessions(tenant_id); + + +-- Create message table +CREATE TABLE IF NOT EXISTS messages ( + id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(), + request_id VARCHAR(36) NOT NULL, + session_id VARCHAR(36) NOT NULL, + role VARCHAR(50) NOT NULL, + content TEXT NOT NULL, + knowledge_references JSONB NOT NULL DEFAULT '[]', + is_completed BOOLEAN NOT NULL DEFAULT false, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP WITH TIME ZONE +); + +-- Create Index for messages +CREATE INDEX IF NOT EXISTS idx_messages_session_id ON messages(session_id); + + +CREATE TABLE IF NOT EXISTS chunks ( + id VARCHAR(36) PRIMARY KEY DEFAULT uuid_generate_v4(), + tenant_id INTEGER NOT NULL, + knowledge_base_id VARCHAR(36) NOT NULL, + knowledge_id VARCHAR(36) NOT NULL, + content TEXT NOT NULL, + chunk_index INTEGER NOT NULL, + is_enabled BOOLEAN NOT NULL DEFAULT true, + start_at INTEGER NOT NULL, + end_at INTEGER NOT NULL, + pre_chunk_id VARCHAR(36), + next_chunk_id VARCHAR(36), + chunk_type VARCHAR(20) NOT NULL DEFAULT 'text', + parent_chunk_id VARCHAR(36), + image_info TEXT, + relation_chunks JSONB, + indirect_relation_chunks JSONB, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP WITH TIME ZONE +); + +CREATE INDEX IF NOT EXISTS idx_chunks_tenant_kg ON chunks(tenant_id, knowledge_id); +CREATE INDEX IF NOT EXISTS idx_chunks_parent_id ON chunks(parent_chunk_id); +CREATE INDEX IF NOT EXISTS idx_chunks_chunk_type ON chunks(chunk_type); + +CREATE TABLE IF NOT EXISTS embeddings ( + id SERIAL PRIMARY KEY, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + + source_id VARCHAR(64) NOT NULL, + source_type INTEGER NOT NULL, + chunk_id VARCHAR(64), + knowledge_id VARCHAR(64), + knowledge_base_id VARCHAR(64), + content TEXT, + dimension INTEGER NOT NULL, + embedding halfvec +); + +CREATE UNIQUE INDEX IF NOT EXISTS embeddings_unique_source ON embeddings(source_id, source_type); +CREATE INDEX IF NOT EXISTS embeddings_search_idx ON embeddings +USING bm25 (id, knowledge_base_id, content, knowledge_id, chunk_id) +WITH ( + key_field = 'id', + text_fields = '{ + "content": { + "tokenizer": {"type": "chinese_lindera"} + } + }' +); +CREATE INDEX ON embeddings USING hnsw ((embedding::halfvec(3584)) halfvec_cosine_ops) WITH (m = 16, ef_construction = 64) WHERE (dimension = 3584); +CREATE INDEX ON embeddings USING hnsw ((embedding::halfvec(798)) halfvec_cosine_ops) WITH (m = 16, ef_construction = 64) WHERE (dimension = 798); \ No newline at end of file diff --git a/migrations/paradedb/01-migrate-to-paradedb.sql b/migrations/paradedb/01-migrate-to-paradedb.sql new file mode 100644 index 0000000..2ad51b6 --- /dev/null +++ b/migrations/paradedb/01-migrate-to-paradedb.sql @@ -0,0 +1,69 @@ +-- 迁移脚本:从PostgreSQL迁移到ParadeDB +-- 注意:在执行此脚本前,请确保已经备份了数据 + +-- 1. 导出数据(在PostgreSQL中执行) +-- pg_dump -U postgres -h localhost -p 5432 -d your_database > backup.sql + +-- 2. 导入数据(在ParadeDB中执行) +-- psql -U postgres -h localhost -p 5432 -d your_database < backup.sql + +-- 3. 验证数据 + + +-- Insert some sample data +-- INSERT INTO tenants (id, name, description, status, api_key) +-- VALUES +-- (1, 'Demo Tenant', 'This is a demo tenant for testing', 'active', 'sk-00000001abcdefg123456') +-- ON CONFLICT DO NOTHING; + +-- SELECT setval('tenants_id_seq', (SELECT MAX(id) FROM tenants)); + + +-- -- Create knowledge base +-- INSERT INTO knowledge_bases (id, name, description, tenant_id, chunking_config, image_processing_config, embedding_model_id) +-- VALUES +-- ('kb-00000001', 'Default Knowledge Base', 'Default knowledge base for testing', 1, '{"chunk_size": 512, "chunk_overlap": 50, "separators": ["\n\n", "\n", "。"], "keep_separator": true}', '{"enable_multimodal": false, "model_id": ""}', 'model-embedding-00000001'), +-- ('kb-00000002', 'Test Knowledge Base', 'Test knowledge base for development', 1, '{"chunk_size": 512, "chunk_overlap": 50, "separators": ["\n\n", "\n", "。"], "keep_separator": true}', '{"enable_multimodal": false, "model_id": ""}', 'model-embedding-00000001'), +-- ('kb-00000003', 'Test Knowledge Base 2', 'Test knowledge base for development 2', 1, '{"chunk_size": 512, "chunk_overlap": 50, "separators": ["\n\n", "\n", "。"], "keep_separator": true}', '{"enable_multimodal": false, "model_id": ""}', 'model-embedding-00000001') +-- ON CONFLICT DO NOTHING; + + +SELECT COUNT(*) FROM tenants; +SELECT COUNT(*) FROM models; +SELECT COUNT(*) FROM knowledge_bases; +SELECT COUNT(*) FROM knowledges; + + +-- 测试中文全文搜索 + +-- 创建文档表 +CREATE TABLE chinese_documents ( + id SERIAL PRIMARY KEY, + title TEXT, + content TEXT, + published_date DATE +); + +-- 在表上创建 BM25 索引,使用结巴分词器支持中文 +CREATE INDEX idx_documents_bm25 ON chinese_documents +USING bm25 (id, content) +WITH ( + key_field = 'id', + text_fields = '{ + "content": { + "tokenizer": {"type": "chinese_lindera"} + } + }' +); + +INSERT INTO chinese_documents (title, content, published_date) +VALUES +('人工智能的发展', '人工智能技术正在快速发展,影响了我们生活的方方面面。大语言模型是最近的一个重要突破。', '2023-01-15'), +('机器学习基础', '机器学习是人工智能的一个重要分支,包括监督学习、无监督学习和强化学习等方法。', '2023-02-20'), +('深度学习应用', '深度学习在图像识别、自然语言处理和语音识别等领域有广泛应用。', '2023-03-10'), +('自然语言处理技术', '自然语言处理允许计算机理解、解释和生成人类语言,是人工智能的核心技术之一。', '2023-04-05'), +('计算机视觉入门', '计算机视觉让机器能够"看到"并理解视觉世界,广泛应用于安防、医疗等领域。', '2023-05-12'); + +INSERT INTO chinese_documents (title, content, published_date) +VALUES +('hello world', 'hello world', '2023-05-12'); diff --git a/scripts/migrate.sh b/scripts/migrate.sh new file mode 100644 index 0000000..97e35a2 --- /dev/null +++ b/scripts/migrate.sh @@ -0,0 +1,54 @@ +#!/bin/bash +set -e + +# Database connection details (can be overridden by environment variables) +DB_HOST=${DB_HOST:-localhost} +DB_PORT=${DB_PORT:-5432} +DB_USER=${DB_USER:-postgres} +DB_PASSWORD=${DB_PASSWORD:-postgres} +DB_NAME=${DB_NAME:-WeKnora} +MIGRATIONS_DIR="/app/migrations" + +# Check if migrate tool is installed +if ! command -v migrate &> /dev/null; then + echo "Error: migrate tool is not installed" + echo "Install it with: go install -tags 'postgres' github.com/golang-migrate/migrate/v4/cmd/migrate@latest" + exit 1 +fi + +# Construct the database URL +DB_URL="postgres://${DB_USER}:${DB_PASSWORD}@${DB_HOST}:${DB_PORT}/${DB_NAME}?sslmode=disable" + +# Execute migration based on command +case "$1" in + up) + echo "Running migrations up..." + echo "DB_URL: ${DB_URL}" + echo "DB_USER: ${DB_USER}" + echo "DB_PASSWORD: ${DB_PASSWORD}" + echo "DB_HOST: ${DB_HOST}" + echo "DB_PORT: ${DB_PORT}" + echo "DB_NAME: ${DB_NAME}" + echo "MIGRATIONS_DIR: ${MIGRATIONS_DIR}" + migrate -path ${MIGRATIONS_DIR} -database ${DB_URL} up + ;; + down) + echo "Running migrations down..." + migrate -path ${MIGRATIONS_DIR} -database ${DB_URL} down + ;; + create) + if [ -z "$2" ]; then + echo "Error: Migration name is required" + echo "Usage: $0 create " + exit 1 + fi + echo "Creating migration files for $2..." + migrate create -ext sql -dir ${MIGRATIONS_DIR} -seq $2 + ;; + *) + echo "Usage: $0 {up|down|create }" + exit 1 + ;; +esac + +echo "Migration command completed successfully" \ No newline at end of file diff --git a/scripts/start_all.sh b/scripts/start_all.sh new file mode 100755 index 0000000..7bdb0c3 --- /dev/null +++ b/scripts/start_all.sh @@ -0,0 +1,561 @@ +#!/bin/bash +# 该脚本用于按需启动/停止Ollama和docker-compose服务 + +# 设置颜色 +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +BLUE='\033[0;34m' +NC='\033[0m' # 无颜色 + +# 获取项目根目录(脚本所在目录的上一级) +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +PROJECT_ROOT="$( cd "$SCRIPT_DIR/.." && pwd )" + +# 版本信息 +VERSION="1.0.0" +SCRIPT_NAME=$(basename "$0") + +# 显示帮助信息 +show_help() { + echo -e "${GREEN}WeKnora 启动脚本 v${VERSION}${NC}" + echo -e "${GREEN}用法:${NC} $0 [选项]" + echo "选项:" + echo " -h, --help 显示帮助信息" + echo " -o, --ollama 启动Ollama服务" + echo " -d, --docker 启动Docker容器服务" + echo " -a, --all 启动所有服务(默认)" + echo " -s, --stop 停止所有服务" + echo " -c, --check 检查环境并诊断问题" + echo " -r, --restart 重新构建并重启指定容器" + echo " -l, --list 列出所有正在运行的容器" + echo " -v, --version 显示版本信息" + exit 0 +} + +# 显示版本信息 +show_version() { + echo -e "${GREEN}WeKnora 启动脚本 v${VERSION}${NC}" + exit 0 +} + +# 日志函数 +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +# 检查并创建.env文件 +check_env_file() { + log_info "检查环境变量配置..." + if [ ! -f "$PROJECT_ROOT/.env" ]; then + log_warning ".env 文件不存在,将从模板创建" + if [ -f "$PROJECT_ROOT/.env.example" ]; then + cp "$PROJECT_ROOT/.env.example" "$PROJECT_ROOT/.env" + log_success "已从 .env.example 创建 .env 文件" + else + log_error "未找到 .env.example 模板文件,无法创建 .env 文件" + return 1 + fi + else + log_info ".env 文件已存在" + fi + + # 检查必要的环境变量是否已设置 + source "$PROJECT_ROOT/.env" + local missing_vars=() + + # 检查基础变量 + if [ -z "$DB_DRIVER" ]; then missing_vars+=("DB_DRIVER"); fi + if [ -z "$STORAGE_TYPE" ]; then missing_vars+=("STORAGE_TYPE"); fi + + if [ ${#missing_vars[@]} -gt 0 ]; then + log_warning "以下环境变量未设置,将使用默认值: ${missing_vars[*]}" + else + log_success "所有必要的环境变量已设置" + fi + + return 0 +} + +# 安装Ollama(根据平台不同采用不同方法) +install_ollama() { + log_info "Ollama未安装,正在安装..." + + OS=$(uname) + if [ "$OS" = "Darwin" ]; then + # Mac安装方式 + log_info "检测到Mac系统,使用brew安装Ollama..." + if ! command -v brew &> /dev/null; then + # 通过安装包安装 + log_info "Homebrew未安装,使用直接下载方式..." + curl -fsSL https://ollama.com/download/Ollama-darwin.zip -o ollama.zip + unzip ollama.zip + mv ollama /usr/local/bin + rm ollama.zip + else + brew install ollama + fi + else + # Linux安装方式 + log_info "检测到Linux系统,使用安装脚本..." + curl -fsSL https://ollama.com/install.sh | sh + fi + + if [ $? -eq 0 ]; then + log_success "Ollama安装完成" + return 0 + else + log_error "Ollama安装失败" + return 1 + fi +} + +# 启动Ollama服务 +start_ollama() { + log_info "正在检查Ollama服务..." + + # 检查Ollama是否已安装 + if ! command -v ollama &> /dev/null; then + install_ollama + if [ $? -ne 0 ]; then + return 1 + fi + fi + + # 检查Ollama服务是否已运行 + if curl -s http://localhost:11435/api/version &> /dev/null; then + log_success "Ollama服务已经在运行" + else + log_info "启动Ollama服务..." + export OLLAMA_HOST=0.0.0.0:11435 + ollama serve & > /dev/null 2>&1 + + # 等待服务启动 + MAX_RETRIES=30 + COUNT=0 + while [ $COUNT -lt $MAX_RETRIES ]; do + if curl -s http://localhost:11435/api/version &> /dev/null; then + log_success "Ollama服务已成功启动" + break + fi + echo "等待Ollama服务启动... ($COUNT/$MAX_RETRIES)" + sleep 1 + COUNT=$((COUNT + 1)) + done + + if [ $COUNT -eq $MAX_RETRIES ]; then + log_error "Ollama服务启动失败" + return 1 + fi + fi + + log_success "Ollama服务地址: http://localhost:11435" + return 0 +} + +# 停止Ollama服务 +stop_ollama() { + log_info "正在停止Ollama服务..." + + # 检查Ollama是否已安装 + if ! command -v ollama &> /dev/null; then + log_info "Ollama未安装,无需停止" + return 0 + fi + + # 查找并终止Ollama进程 + if pgrep -x "ollama" > /dev/null; then + pkill -f "ollama serve" + log_success "Ollama服务已停止" + else + log_info "Ollama服务未运行" + fi + + return 0 +} + +# 检查Docker是否已安装 +check_docker() { + log_info "检查Docker环境..." + + if ! command -v docker &> /dev/null; then + log_error "未安装Docker,请先安装Docker" + return 1 + fi + + if ! command -v docker-compose &> /dev/null; then + log_error "未安装docker-compose,请先安装docker-compose" + return 1 + fi + + # 检查Docker服务运行状态 + if ! docker info &> /dev/null; then + log_error "Docker服务未运行,请启动Docker服务" + return 1 + fi + + log_success "Docker环境检查通过" + return 0 +} + +# 启动Docker容器 +start_docker() { + log_info "正在启动Docker容器..." + + # 检查Docker环境 + check_docker + if [ $? -ne 0 ]; then + return 1 + fi + + # 检查.env文件 + check_env_file + + # 读取.env文件 + source "$PROJECT_ROOT/.env" + storage_type=${STORAGE_TYPE:-local} + + # 进入项目根目录再执行docker-compose命令 + cd "$PROJECT_ROOT" + + # 启动基本服务 + log_info "启动核心服务容器..." + docker-compose up --build -d + if [ $? -ne 0 ]; then + log_error "Docker容器启动失败" + return 1 + fi + + # 如果存储类型是minio,则启动MinIO服务 + if [ "$storage_type" == "minio" ]; then + log_info "检测到MinIO存储配置,启动MinIO服务..." + docker-compose -f ./docker/docker-compose.minio.yml up --build -d + if [ $? -ne 0 ]; then + log_error "MinIO服务启动失败" + return 1 + fi + log_success "MinIO服务已启动" + else + log_info "使用本地存储,不启动MinIO服务" + fi + + log_success "所有Docker容器已成功启动" + + # 显示容器状态 + log_info "当前容器状态:" + docker-compose ps + + return 0 +} + +# 停止Docker容器 +stop_docker() { + log_info "正在停止Docker容器..." + + # 检查Docker环境 + check_docker + if [ $? -ne 0 ]; then + return 1 + fi + + # 进入项目根目录再执行docker-compose命令 + cd "$PROJECT_ROOT" + + # 停止所有容器 + docker-compose down --remove-orphans + if [ $? -ne 0 ]; then + log_error "Docker容器停止失败" + return 1 + fi + + # 如果存在minio配置,也停止minio + if [ -f "$PROJECT_ROOT/docker/docker-compose.minio.yml" ]; then + docker-compose -f "./docker/docker-compose.minio.yml" down + fi + + log_success "所有Docker容器已停止" + return 0 +} + +# 列出所有正在运行的容器 +list_containers() { + log_info "列出所有正在运行的容器..." + + # 检查Docker环境 + check_docker + if [ $? -ne 0 ]; then + return 1 + fi + + # 进入项目根目录再执行docker-compose命令 + cd "$PROJECT_ROOT" + + # 列出所有容器 + echo -e "${BLUE}当前正在运行的容器:${NC}" + docker-compose ps --services | sort + + return 0 +} + +# 重启指定容器 +restart_container() { + local container_name="$1" + + if [ -z "$container_name" ]; then + log_error "未指定容器名称" + echo "可用的容器有:" + list_containers + return 1 + fi + + log_info "正在重新构建并重启容器: $container_name" + + # 检查Docker环境 + check_docker + if [ $? -ne 0 ]; then + return 1 + fi + + # 进入项目根目录再执行docker-compose命令 + cd "$PROJECT_ROOT" + + # 检查容器是否存在 + if ! docker-compose ps --services | grep -q "^$container_name$"; then + log_error "容器 '$container_name' 不存在或未运行" + echo "可用的容器有:" + list_containers + return 1 + fi + + # 构建并重启容器 + log_info "正在重新构建容器 '$container_name'..." + docker-compose build "$container_name" + if [ $? -ne 0 ]; then + log_error "容器 '$container_name' 构建失败" + return 1 + fi + + log_info "正在重启容器 '$container_name'..." + docker-compose up -d --no-deps "$container_name" + if [ $? -ne 0 ]; then + log_error "容器 '$container_name' 重启失败" + return 1 + fi + + log_success "容器 '$container_name' 已成功重新构建并重启" + return 0 +} + +# 检查系统环境 +check_environment() { + log_info "开始环境检查..." + + # 检查操作系统 + OS=$(uname) + log_info "操作系统: $OS" + + # 检查Docker + check_docker + + # 检查.env文件 + check_env_file + + # 检查Ollama + if command -v ollama &> /dev/null; then + log_success "Ollama已安装" + if curl -s http://localhost:11435/api/version &> /dev/null; then + version=$(curl -s http://localhost:11435/api/version | grep -o '"version":"[^"]*"' | cut -d'"' -f4) + log_success "Ollama服务正在运行,版本: $version" + else + log_warning "Ollama已安装但服务未运行" + fi + else + log_warning "Ollama未安装" + fi + + # 检查磁盘空间 + log_info "检查磁盘空间..." + df -h | grep -E "(Filesystem|/$)" + + # 检查内存 + log_info "检查内存使用情况..." + if [ "$OS" = "Darwin" ]; then + vm_stat | perl -ne '/page size of (\d+)/ and $size=$1; /Pages free: (\d+)/ and print "Free Memory: ", $1 * $size / 1048576, " MB\n"' + else + free -h | grep -E "(total|Mem:)" + fi + + # 检查CPU + log_info "CPU信息:" + if [ "$OS" = "Darwin" ]; then + sysctl -n machdep.cpu.brand_string + echo "CPU核心数: $(sysctl -n hw.ncpu)" + else + grep "model name" /proc/cpuinfo | head -1 + echo "CPU核心数: $(nproc)" + fi + + # 检查容器状态 + log_info "检查容器状态..." + if docker ps &> /dev/null; then + docker ps -a + else + log_warning "无法获取容器状态,Docker可能未运行" + fi + + log_success "环境检查完成" + return 0 +} + +# 解析命令行参数 +START_OLLAMA=false +START_DOCKER=false +STOP_SERVICES=false +CHECK_ENVIRONMENT=false +LIST_CONTAINERS=false +RESTART_CONTAINER=false +CONTAINER_NAME="" + +# 没有参数时默认启动所有服务 +if [ $# -eq 0 ]; then + START_OLLAMA=true + START_DOCKER=true +fi + +while [ "$1" != "" ]; do + case $1 in + -h | --help ) show_help + ;; + -o | --ollama ) START_OLLAMA=true + ;; + -d | --docker ) START_DOCKER=true + ;; + -a | --all ) START_OLLAMA=true + START_DOCKER=true + ;; + -s | --stop ) STOP_SERVICES=true + ;; + -c | --check ) CHECK_ENVIRONMENT=true + ;; + -l | --list ) LIST_CONTAINERS=true + ;; + -r | --restart ) RESTART_CONTAINER=true + CONTAINER_NAME="$2" + shift + ;; + -v | --version ) show_version + ;; + * ) log_error "未知选项: $1" + show_help + ;; + esac + shift +done + +# 执行环境检查 +if [ "$CHECK_ENVIRONMENT" = true ]; then + check_environment + exit $? +fi + +# 列出所有容器 +if [ "$LIST_CONTAINERS" = true ]; then + list_containers + exit $? +fi + +# 重启指定容器 +if [ "$RESTART_CONTAINER" = true ]; then + restart_container "$CONTAINER_NAME" + exit $? +fi + +# 执行服务操作 +if [ "$STOP_SERVICES" = true ]; then + # 停止服务 + stop_ollama + OLLAMA_RESULT=$? + + stop_docker + DOCKER_RESULT=$? + + # 显示总结 + echo "" + log_info "=== 停止结果 ===" + if [ $OLLAMA_RESULT -eq 0 ]; then + log_success "✓ Ollama服务已停止" + else + log_error "✗ Ollama服务停止失败" + fi + + if [ $DOCKER_RESULT -eq 0 ]; then + log_success "✓ Docker容器已停止" + else + log_error "✗ Docker容器停止失败" + fi + + log_success "服务停止完成。" +else + # 启动服务 + if [ "$START_OLLAMA" = true ]; then + start_ollama + OLLAMA_RESULT=$? + fi + + if [ "$START_DOCKER" = true ]; then + start_docker + DOCKER_RESULT=$? + fi + + # 显示总结 + echo "" + log_info "=== 启动结果 ===" + if [ "$START_OLLAMA" = true ]; then + if [ $OLLAMA_RESULT -eq 0 ]; then + log_success "✓ Ollama服务已启动" + else + log_error "✗ Ollama服务启动失败" + fi + fi + + if [ "$START_DOCKER" = true ]; then + if [ $DOCKER_RESULT -eq 0 ]; then + log_success "✓ Docker容器已启动" + else + log_error "✗ Docker容器启动失败" + fi + fi + + if [ "$START_OLLAMA" = true ] && [ "$START_DOCKER" = true ]; then + if [ $OLLAMA_RESULT -eq 0 ] && [ $DOCKER_RESULT -eq 0 ]; then + log_success "所有服务启动完成,可通过以下地址访问:" + echo -e "${GREEN} - 前端界面: http://localhost${NC}" + echo -e "${GREEN} - API接口: http://localhost:8080${NC}" + echo -e "${GREEN} - Jaeger链路追踪: http://localhost:16686${NC}" + else + log_error "部分服务启动失败,请检查日志并修复问题" + fi + elif [ "$START_OLLAMA" = true ] && [ $OLLAMA_RESULT -eq 0 ]; then + log_success "Ollama服务启动完成,可通过以下地址访问:" + echo -e "${GREEN} - Ollama API: http://localhost:11435${NC}" + elif [ "$START_DOCKER" = true ] && [ $DOCKER_RESULT -eq 0 ]; then + log_success "Docker容器启动完成,可通过以下地址访问:" + echo -e "${GREEN} - 前端界面: http://localhost${NC}" + echo -e "${GREEN} - API接口: http://localhost:8080${NC}" + echo -e "${GREEN} - Jaeger链路追踪: http://localhost:16686${NC}" + fi +fi + +exit 0 \ No newline at end of file diff --git a/services/docreader/Makefile b/services/docreader/Makefile new file mode 100644 index 0000000..b1ffb69 --- /dev/null +++ b/services/docreader/Makefile @@ -0,0 +1,23 @@ +.PHONY: proto build run docker-build docker-run clean + +# 生成 protobuf 代码 +proto: + @echo "Generating protobuf code..." + @sh ./scripts/generate_proto.sh + +# 构建 Go 客户端 +build: + @echo "Building Go client..." + @go build -o bin/client ./src/client + +# 运行 Python 服务 +run: + @echo "Running Python server..." + @python src/server/server.py + +# 清理 +clean: + @echo "Cleaning up..." + @rm -rf bin/ + @find . -name "*.pyc" -delete + @find . -name "__pycache__" -delete \ No newline at end of file diff --git a/services/docreader/poetry.lock b/services/docreader/poetry.lock new file mode 100644 index 0000000..28e0f11 --- /dev/null +++ b/services/docreader/poetry.lock @@ -0,0 +1,2228 @@ +# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. + +[[package]] +name = "albucore" +version = "0.0.23" +description = "High-performance image processing functions for deep learning and computer vision." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "albucore-0.0.23-py3-none-any.whl", hash = "sha256:99274ac0c15a1a7d9a726df9d54d5ab70d9d0c189e2a935399dba3d4bafad415"}, + {file = "albucore-0.0.23.tar.gz", hash = "sha256:57823982b954913b84a9e2cf71058c4577b02397a62c41885be2d9b295efa8ab"}, +] + +[package.dependencies] +numpy = ">=1.24.4" +opencv-python-headless = ">=4.9.0.80" +simsimd = ">=5.9.2" +stringzilla = ">=3.10.4" + +[[package]] +name = "albumentations" +version = "2.0.5" +description = "Fast, flexible, and advanced augmentation library for deep learning, computer vision, and medical imaging. Albumentations offers a wide range of transformations for both 2D (images, masks, bboxes, keypoints) and 3D (volumes, volumetric masks, keypoints) data, with optimized performance and seamless integration into ML workflows." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "albumentations-2.0.5-py3-none-any.whl", hash = "sha256:1fc253942d34dd7c07652bf6511049c8bb7d522baec7f1fe355df16293c3c7b6"}, + {file = "albumentations-2.0.5.tar.gz", hash = "sha256:e19e1c0f14c903c3c230f3d83f14814b84f1180393189bf96779f653031f3278"}, +] + +[package.dependencies] +albucore = "0.0.23" +numpy = ">=1.24.4" +opencv-python-headless = ">=4.9.0.80" +pydantic = ">=2.9.2" +PyYAML = "*" +scipy = ">=1.10.0" + +[package.extras] +hub = ["huggingface-hub"] +pytorch = ["torch"] +text = ["pillow"] + +[[package]] +name = "annotated-types" +version = "0.7.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + +[[package]] +name = "anyio" +version = "4.9.0" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c"}, + {file = "anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028"}, +] + +[package.dependencies] +idna = ">=2.8" +sniffio = ">=1.1" +typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} + +[package.extras] +doc = ["Sphinx (>=8.2,<9.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"] +test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""] +trio = ["trio (>=0.26.1)"] + +[[package]] +name = "astor" +version = "0.8.1" +description = "Read/rewrite/write Python ASTs" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" +groups = ["main"] +files = [ + {file = "astor-0.8.1-py2.py3-none-any.whl", hash = "sha256:070a54e890cefb5b3739d19f30f5a5ec840ffc9c50ffa7d23cc9fc1a38ebbfc5"}, + {file = "astor-0.8.1.tar.gz", hash = "sha256:6a6effda93f4e1ce9f618779b2dd1d9d84f1e32812c23a29b3fff6fd7f63fa5e"}, +] + +[[package]] +name = "beautifulsoup4" +version = "4.13.3" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.7.0" +groups = ["main"] +files = [ + {file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"}, + {file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"}, +] + +[package.dependencies] +soupsieve = ">1.2" +typing-extensions = ">=4.0.0" + +[package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] +html5lib = ["html5lib"] +lxml = ["lxml"] + +[[package]] +name = "certifi" +version = "2025.1.31" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, + {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.1" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-win32.whl", hash = "sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-win32.whl", hash = "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-win32.whl", hash = "sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765"}, + {file = "charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85"}, + {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"}, +] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main"] +markers = "platform_system == \"Windows\"" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "cython" +version = "3.0.12" +description = "The Cython compiler for writing C extensions in the Python language." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" +groups = ["main"] +files = [ + {file = "Cython-3.0.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba67eee9413b66dd9fbacd33f0bc2e028a2a120991d77b5fd4b19d0b1e4039b9"}, + {file = "Cython-3.0.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bee2717e5b5f7d966d0c6e27d2efe3698c357aa4d61bb3201997c7a4f9fe485a"}, + {file = "Cython-3.0.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cffc3464f641c8d0dda942c7c53015291beea11ec4d32421bed2f13b386b819"}, + {file = "Cython-3.0.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d3a8f81980ffbd74e52f9186d8f1654e347d0c44bfea6b5997028977f481a179"}, + {file = "Cython-3.0.12-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8d32856716c369d01f2385ad9177cdd1a11079ac89ea0932dc4882de1aa19174"}, + {file = "Cython-3.0.12-cp310-cp310-win32.whl", hash = "sha256:712c3f31adec140dc60d064a7f84741f50e2c25a8edd7ae746d5eb4d3ef7072a"}, + {file = "Cython-3.0.12-cp310-cp310-win_amd64.whl", hash = "sha256:d6945694c5b9170cfbd5f2c0d00ef7487a2de7aba83713a64ee4ebce7fad9e05"}, + {file = "Cython-3.0.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:feb86122a823937cc06e4c029d80ff69f082ebb0b959ab52a5af6cdd271c5dc3"}, + {file = "Cython-3.0.12-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfdbea486e702c328338314adb8e80f5f9741f06a0ae83aaec7463bc166d12e8"}, + {file = "Cython-3.0.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:563de1728c8e48869d2380a1b76bbc1b1b1d01aba948480d68c1d05e52d20c92"}, + {file = "Cython-3.0.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:398d4576c1e1f6316282aa0b4a55139254fbed965cba7813e6d9900d3092b128"}, + {file = "Cython-3.0.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1e5eadef80143026944ea8f9904715a008f5108d1d644a89f63094cc37351e73"}, + {file = "Cython-3.0.12-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5a93cbda00a5451175b97dea5a9440a3fcee9e54b4cba7a7dbcba9a764b22aec"}, + {file = "Cython-3.0.12-cp311-cp311-win32.whl", hash = "sha256:3109e1d44425a2639e9a677b66cd7711721a5b606b65867cb2d8ef7a97e2237b"}, + {file = "Cython-3.0.12-cp311-cp311-win_amd64.whl", hash = "sha256:d4b70fc339adba1e2111b074ee6119fe9fd6072c957d8597bce9a0dd1c3c6784"}, + {file = "Cython-3.0.12-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fe030d4a00afb2844f5f70896b7f2a1a0d7da09bf3aa3d884cbe5f73fff5d310"}, + {file = "Cython-3.0.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7fec4f052b8fe173fe70eae75091389955b9a23d5cec3d576d21c5913b49d47"}, + {file = "Cython-3.0.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0faa5e39e5c8cdf6f9c3b1c3f24972826e45911e7f5b99cf99453fca5432f45e"}, + {file = "Cython-3.0.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d53de996ed340e9ab0fc85a88aaa8932f2591a2746e1ab1c06e262bd4ec4be7"}, + {file = "Cython-3.0.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ea3a0e19ab77266c738aa110684a753a04da4e709472cadeff487133354d6ab8"}, + {file = "Cython-3.0.12-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c151082884be468f2f405645858a857298ac7f7592729e5b54788b5c572717ba"}, + {file = "Cython-3.0.12-cp312-cp312-win32.whl", hash = "sha256:3083465749911ac3b2ce001b6bf17f404ac9dd35d8b08469d19dc7e717f5877a"}, + {file = "Cython-3.0.12-cp312-cp312-win_amd64.whl", hash = "sha256:c0b91c7ebace030dd558ea28730de8c580680b50768e5af66db2904a3716c3e3"}, + {file = "Cython-3.0.12-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4ee6f1ea1bead8e6cbc4e64571505b5d8dbdb3b58e679d31f3a84160cebf1a1a"}, + {file = "Cython-3.0.12-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57aefa6d3341109e46ec1a13e3a763aaa2cbeb14e82af2485b318194be1d9170"}, + {file = "Cython-3.0.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:879ae9023958d63c0675015369384642d0afb9c9d1f3473df9186c42f7a9d265"}, + {file = "Cython-3.0.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:36fcd584dae547de6f095500a380f4a0cce72b7a7e409e9ff03cb9beed6ac7a1"}, + {file = "Cython-3.0.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:62b79dcc0de49efe9e84b9d0e2ae0a6fc9b14691a65565da727aa2e2e63c6a28"}, + {file = "Cython-3.0.12-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4aa255781b093a8401109d8f2104bbb2e52de7639d5896aefafddc85c30e0894"}, + {file = "Cython-3.0.12-cp313-cp313-win32.whl", hash = "sha256:77d48f2d4bab9fe1236eb753d18f03e8b2619af5b6f05d51df0532a92dfb38ab"}, + {file = "Cython-3.0.12-cp313-cp313-win_amd64.whl", hash = "sha256:86c304b20bd57c727c7357e90d5ba1a2b6f1c45492de2373814d7745ef2e63b4"}, + {file = "Cython-3.0.12-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ff5c0b6a65b08117d0534941d404833d516dac422eee88c6b4fd55feb409a5ed"}, + {file = "Cython-3.0.12-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:680f1d6ed4436ae94805db264d6155ed076d2835d84f20dcb31a7a3ad7f8668c"}, + {file = "Cython-3.0.12-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc24609613fa06d0d896309f7164ba168f7e8d71c1e490ed2a08d23351c3f41"}, + {file = "Cython-3.0.12-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c1879c073e2b34924ce9b7ca64c212705dcc416af4337c45f371242b2e5f6d32"}, + {file = "Cython-3.0.12-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:bfb75123dd4ff767baa37d7036da0de2dfb6781ff256eef69b11b88b9a0691d1"}, + {file = "Cython-3.0.12-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:f39640f8df0400cde6882e23c734f15bb8196de0a008ae5dc6c8d1ec5957d7c8"}, + {file = "Cython-3.0.12-cp36-cp36m-win32.whl", hash = "sha256:8c9efe9a0895abee3cadfdad4130b30f7b5e57f6e6a51ef2a44f9fc66a913880"}, + {file = "Cython-3.0.12-cp36-cp36m-win_amd64.whl", hash = "sha256:63d840f2975e44d74512f8f34f1f7cb8121c9428e26a3f6116ff273deb5e60a2"}, + {file = "Cython-3.0.12-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:75c5acd40b97cff16fadcf6901a91586cbca5dcdba81f738efaf1f4c6bc8dccb"}, + {file = "Cython-3.0.12-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e62564457851db1c40399bd95a5346b9bb99e17a819bf583b362f418d8f3457a"}, + {file = "Cython-3.0.12-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ccd1228cc203b1f1b8a3d403f5a20ad1c40e5879b3fbf5851ce09d948982f2c"}, + {file = "Cython-3.0.12-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25529ee948f44d9a165ff960c49d4903267c20b5edf2df79b45924802e4cca6e"}, + {file = "Cython-3.0.12-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:90cf599372c5a22120609f7d3a963f17814799335d56dd0dcf8fe615980a8ae1"}, + {file = "Cython-3.0.12-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:9f8c48748a9c94ea5d59c26ab49ad0fad514d36f894985879cf3c3ca0e600bf4"}, + {file = "Cython-3.0.12-cp37-cp37m-win32.whl", hash = "sha256:3e4fa855d98bc7bd6a2049e0c7dc0dcf595e2e7f571a26e808f3efd84d2db374"}, + {file = "Cython-3.0.12-cp37-cp37m-win_amd64.whl", hash = "sha256:120681093772bf3600caddb296a65b352a0d3556e962b9b147efcfb8e8c9801b"}, + {file = "Cython-3.0.12-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:731d719423e041242c9303c80cae4327467299b90ffe62d4cc407e11e9ea3160"}, + {file = "Cython-3.0.12-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3238a29f37999e27494d120983eca90d14896b2887a0bd858a381204549137a"}, + {file = "Cython-3.0.12-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b588c0a089a9f4dd316d2f9275230bad4a7271e5af04e1dc41d2707c816be44b"}, + {file = "Cython-3.0.12-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ab9f5198af74eb16502cc143cdde9ca1cbbf66ea2912e67440dd18a36e3b5fa"}, + {file = "Cython-3.0.12-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8ee841c0e114efa1e849c281ac9b8df8aa189af10b4a103b1c5fd71cbb799679"}, + {file = "Cython-3.0.12-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:43c48b5789398b228ea97499f5b864843ba9b1ab837562a9227c6f58d16ede8b"}, + {file = "Cython-3.0.12-cp38-cp38-win32.whl", hash = "sha256:5e5f17c48a4f41557fbcc7ee660ccfebe4536a34c557f553b6893c1b3c83df2d"}, + {file = "Cython-3.0.12-cp38-cp38-win_amd64.whl", hash = "sha256:309c081057930bb79dc9ea3061a1af5086c679c968206e9c9c2ec90ab7cb471a"}, + {file = "Cython-3.0.12-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54115fcc126840926ff3b53cfd2152eae17b3522ae7f74888f8a41413bd32f25"}, + {file = "Cython-3.0.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:629db614b9c364596d7c975fa3fb3978e8c5349524353dbe11429896a783fc1e"}, + {file = "Cython-3.0.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:af081838b0f9e12a83ec4c3809a00a64c817f489f7c512b0e3ecaf5f90a2a816"}, + {file = "Cython-3.0.12-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:34ce459808f7d8d5d4007bc5486fe50532529096b43957af6cbffcb4d9cc5c8d"}, + {file = "Cython-3.0.12-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d6c6cd6a75c8393e6805d17f7126b96a894f310a1a9ea91c47d141fb9341bfa8"}, + {file = "Cython-3.0.12-cp39-cp39-win32.whl", hash = "sha256:a4032e48d4734d2df68235d21920c715c451ac9de15fa14c71b378e8986b83be"}, + {file = "Cython-3.0.12-cp39-cp39-win_amd64.whl", hash = "sha256:dcdc3e5d4ce0e7a4af6903ed580833015641e968d18d528d8371e2435a34132c"}, + {file = "Cython-3.0.12-py2.py3-none-any.whl", hash = "sha256:0038c9bae46c459669390e53a1ec115f8096b2e4647ae007ff1bf4e6dee92806"}, + {file = "cython-3.0.12.tar.gz", hash = "sha256:b988bb297ce76c671e28c97d017b95411010f7c77fa6623dd0bb47eed1aee1bc"}, +] + +[[package]] +name = "decorator" +version = "5.2.1" +description = "Decorators for Humans" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a"}, + {file = "decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360"}, +] + +[[package]] +name = "fire" +version = "0.7.0" +description = "A library for automatically generating command line interfaces." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "fire-0.7.0.tar.gz", hash = "sha256:961550f07936eaf65ad1dc8360f2b2bf8408fad46abbfa4d2a3794f8d2a95cdf"}, +] + +[package.dependencies] +termcolor = "*" + +[[package]] +name = "fonttools" +version = "4.57.0" +description = "Tools to manipulate font files" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "fonttools-4.57.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:babe8d1eb059a53e560e7bf29f8e8f4accc8b6cfb9b5fd10e485bde77e71ef41"}, + {file = "fonttools-4.57.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:81aa97669cd726349eb7bd43ca540cf418b279ee3caba5e2e295fb4e8f841c02"}, + {file = "fonttools-4.57.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0e9618630edd1910ad4f07f60d77c184b2f572c8ee43305ea3265675cbbfe7e"}, + {file = "fonttools-4.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34687a5d21f1d688d7d8d416cb4c5b9c87fca8a1797ec0d74b9fdebfa55c09ab"}, + {file = "fonttools-4.57.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:69ab81b66ebaa8d430ba56c7a5f9abe0183afefd3a2d6e483060343398b13fb1"}, + {file = "fonttools-4.57.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d639397de852f2ccfb3134b152c741406752640a266d9c1365b0f23d7b88077f"}, + {file = "fonttools-4.57.0-cp310-cp310-win32.whl", hash = "sha256:cc066cb98b912f525ae901a24cd381a656f024f76203bc85f78fcc9e66ae5aec"}, + {file = "fonttools-4.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:7a64edd3ff6a7f711a15bd70b4458611fb240176ec11ad8845ccbab4fe6745db"}, + {file = "fonttools-4.57.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3871349303bdec958360eedb619169a779956503ffb4543bb3e6211e09b647c4"}, + {file = "fonttools-4.57.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c59375e85126b15a90fcba3443eaac58f3073ba091f02410eaa286da9ad80ed8"}, + {file = "fonttools-4.57.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:967b65232e104f4b0f6370a62eb33089e00024f2ce143aecbf9755649421c683"}, + {file = "fonttools-4.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39acf68abdfc74e19de7485f8f7396fa4d2418efea239b7061d6ed6a2510c746"}, + {file = "fonttools-4.57.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9d077f909f2343daf4495ba22bb0e23b62886e8ec7c109ee8234bdbd678cf344"}, + {file = "fonttools-4.57.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:46370ac47a1e91895d40e9ad48effbe8e9d9db1a4b80888095bc00e7beaa042f"}, + {file = "fonttools-4.57.0-cp311-cp311-win32.whl", hash = "sha256:ca2aed95855506b7ae94e8f1f6217b7673c929e4f4f1217bcaa236253055cb36"}, + {file = "fonttools-4.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:17168a4670bbe3775f3f3f72d23ee786bd965395381dfbb70111e25e81505b9d"}, + {file = "fonttools-4.57.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:889e45e976c74abc7256d3064aa7c1295aa283c6bb19810b9f8b604dfe5c7f31"}, + {file = "fonttools-4.57.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0425c2e052a5f1516c94e5855dbda706ae5a768631e9fcc34e57d074d1b65b92"}, + {file = "fonttools-4.57.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44c26a311be2ac130f40a96769264809d3b0cb297518669db437d1cc82974888"}, + {file = "fonttools-4.57.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84c41ba992df5b8d680b89fd84c6a1f2aca2b9f1ae8a67400c8930cd4ea115f6"}, + {file = "fonttools-4.57.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ea1e9e43ca56b0c12440a7c689b1350066595bebcaa83baad05b8b2675129d98"}, + {file = "fonttools-4.57.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:84fd56c78d431606332a0627c16e2a63d243d0d8b05521257d77c6529abe14d8"}, + {file = "fonttools-4.57.0-cp312-cp312-win32.whl", hash = "sha256:f4376819c1c778d59e0a31db5dc6ede854e9edf28bbfa5b756604727f7f800ac"}, + {file = "fonttools-4.57.0-cp312-cp312-win_amd64.whl", hash = "sha256:57e30241524879ea10cdf79c737037221f77cc126a8cdc8ff2c94d4a522504b9"}, + {file = "fonttools-4.57.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:408ce299696012d503b714778d89aa476f032414ae57e57b42e4b92363e0b8ef"}, + {file = "fonttools-4.57.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bbceffc80aa02d9e8b99f2a7491ed8c4a783b2fc4020119dc405ca14fb5c758c"}, + {file = "fonttools-4.57.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f022601f3ee9e1f6658ed6d184ce27fa5216cee5b82d279e0f0bde5deebece72"}, + {file = "fonttools-4.57.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dea5893b58d4637ffa925536462ba626f8a1b9ffbe2f5c272cdf2c6ebadb817"}, + {file = "fonttools-4.57.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dff02c5c8423a657c550b48231d0a48d7e2b2e131088e55983cfe74ccc2c7cc9"}, + {file = "fonttools-4.57.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:767604f244dc17c68d3e2dbf98e038d11a18abc078f2d0f84b6c24571d9c0b13"}, + {file = "fonttools-4.57.0-cp313-cp313-win32.whl", hash = "sha256:8e2e12d0d862f43d51e5afb8b9751c77e6bec7d2dc00aad80641364e9df5b199"}, + {file = "fonttools-4.57.0-cp313-cp313-win_amd64.whl", hash = "sha256:f1d6bc9c23356908db712d282acb3eebd4ae5ec6d8b696aa40342b1d84f8e9e3"}, + {file = "fonttools-4.57.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:9d57b4e23ebbe985125d3f0cabbf286efa191ab60bbadb9326091050d88e8213"}, + {file = "fonttools-4.57.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:579ba873d7f2a96f78b2e11028f7472146ae181cae0e4d814a37a09e93d5c5cc"}, + {file = "fonttools-4.57.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e3e1ec10c29bae0ea826b61f265ec5c858c5ba2ce2e69a71a62f285cf8e4595"}, + {file = "fonttools-4.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1968f2a2003c97c4ce6308dc2498d5fd4364ad309900930aa5a503c9851aec8"}, + {file = "fonttools-4.57.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:aff40f8ac6763d05c2c8f6d240c6dac4bb92640a86d9b0c3f3fff4404f34095c"}, + {file = "fonttools-4.57.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:d07f1b64008e39fceae7aa99e38df8385d7d24a474a8c9872645c4397b674481"}, + {file = "fonttools-4.57.0-cp38-cp38-win32.whl", hash = "sha256:51d8482e96b28fb28aa8e50b5706f3cee06de85cbe2dce80dbd1917ae22ec5a6"}, + {file = "fonttools-4.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:03290e818782e7edb159474144fca11e36a8ed6663d1fcbd5268eb550594fd8e"}, + {file = "fonttools-4.57.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7339e6a3283e4b0ade99cade51e97cde3d54cd6d1c3744459e886b66d630c8b3"}, + {file = "fonttools-4.57.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:05efceb2cb5f6ec92a4180fcb7a64aa8d3385fd49cfbbe459350229d1974f0b1"}, + {file = "fonttools-4.57.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a97bb05eb24637714a04dee85bdf0ad1941df64fe3b802ee4ac1c284a5f97b7c"}, + {file = "fonttools-4.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:541cb48191a19ceb1a2a4b90c1fcebd22a1ff7491010d3cf840dd3a68aebd654"}, + {file = "fonttools-4.57.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:cdef9a056c222d0479a1fdb721430f9efd68268014c54e8166133d2643cb05d9"}, + {file = "fonttools-4.57.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3cf97236b192a50a4bf200dc5ba405aa78d4f537a2c6e4c624bb60466d5b03bd"}, + {file = "fonttools-4.57.0-cp39-cp39-win32.whl", hash = "sha256:e952c684274a7714b3160f57ec1d78309f955c6335c04433f07d36c5eb27b1f9"}, + {file = "fonttools-4.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2a722c0e4bfd9966a11ff55c895c817158fcce1b2b6700205a376403b546ad9"}, + {file = "fonttools-4.57.0-py3-none-any.whl", hash = "sha256:3122c604a675513c68bd24c6a8f9091f1c2376d18e8f5fe5a101746c81b3e98f"}, + {file = "fonttools-4.57.0.tar.gz", hash = "sha256:727ece10e065be2f9dd239d15dd5d60a66e17eac11aea47d447f9f03fdbc42de"}, +] + +[package.extras] +all = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\"", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0) ; python_version <= \"3.12\"", "xattr ; sys_platform == \"darwin\"", "zopfli (>=0.1.4)"] +graphite = ["lz4 (>=1.7.4.2)"] +interpolatable = ["munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\""] +lxml = ["lxml (>=4.0)"] +pathops = ["skia-pathops (>=0.5.0)"] +plot = ["matplotlib"] +repacker = ["uharfbuzz (>=0.23.0)"] +symfont = ["sympy"] +type1 = ["xattr ; sys_platform == \"darwin\""] +ufo = ["fs (>=2.2.0,<3)"] +unicode = ["unicodedata2 (>=15.1.0) ; python_version <= \"3.12\""] +woff = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "zopfli (>=0.1.4)"] + +[[package]] +name = "greenlet" +version = "3.1.1" +description = "Lightweight in-process concurrent programming" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36b89d13c49216cadb828db8dfa6ce86bbbc476a82d3a6c397f0efae0525bdd0"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94b6150a85e1b33b40b1464a3f9988dcc5251d6ed06842abff82e42632fac120"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93147c513fac16385d1036b7e5b102c7fbbdb163d556b791f0f11eada7ba65dc"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da7a9bff22ce038e19bf62c4dd1ec8391062878710ded0a845bcf47cc0200617"}, + {file = "greenlet-3.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b2795058c23988728eec1f36a4e5e4ebad22f8320c85f3587b539b9ac84128d7"}, + {file = "greenlet-3.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ed10eac5830befbdd0c32f83e8aa6288361597550ba669b04c48f0f9a2c843c6"}, + {file = "greenlet-3.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:77c386de38a60d1dfb8e55b8c1101d68c79dfdd25c7095d51fec2dd800892b80"}, + {file = "greenlet-3.1.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:e4d333e558953648ca09d64f13e6d8f0523fa705f51cae3f03b5983489958c70"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fc016b73c94e98e29af67ab7b9a879c307c6731a2c9da0db5a7d9b7edd1159"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5e975ca70269d66d17dd995dafc06f1b06e8cb1ec1e9ed54c1d1e4a7c4cf26e"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b2813dc3de8c1ee3f924e4d4227999285fd335d1bcc0d2be6dc3f1f6a318ec1"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e347b3bfcf985a05e8c0b7d462ba6f15b1ee1c909e2dcad795e49e91b152c383"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e8f8c9cb53cdac7ba9793c276acd90168f416b9ce36799b9b885790f8ad6c0a"}, + {file = "greenlet-3.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62ee94988d6b4722ce0028644418d93a52429e977d742ca2ccbe1c4f4a792511"}, + {file = "greenlet-3.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1776fd7f989fc6b8d8c8cb8da1f6b82c5814957264d1f6cf818d475ec2bf6395"}, + {file = "greenlet-3.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:48ca08c771c268a768087b408658e216133aecd835c0ded47ce955381105ba39"}, + {file = "greenlet-3.1.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:4afe7ea89de619adc868e087b4d2359282058479d7cfb94970adf4b55284574d"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f406b22b7c9a9b4f8aa9d2ab13d6ae0ac3e85c9a809bd590ad53fed2bf70dc79"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c3a701fe5a9695b238503ce5bbe8218e03c3bcccf7e204e455e7462d770268aa"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2846930c65b47d70b9d178e89c7e1a69c95c1f68ea5aa0a58646b7a96df12441"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99cfaa2110534e2cf3ba31a7abcac9d328d1d9f1b95beede58294a60348fba36"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1443279c19fca463fc33e65ef2a935a5b09bb90f978beab37729e1c3c6c25fe9"}, + {file = "greenlet-3.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b7cede291382a78f7bb5f04a529cb18e068dd29e0fb27376074b6d0317bf4dd0"}, + {file = "greenlet-3.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:23f20bb60ae298d7d8656c6ec6db134bca379ecefadb0b19ce6f19d1f232a942"}, + {file = "greenlet-3.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:7124e16b4c55d417577c2077be379514321916d5790fa287c9ed6f23bd2ffd01"}, + {file = "greenlet-3.1.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:05175c27cb459dcfc05d026c4232f9de8913ed006d42713cb8a5137bd49375f1"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:935e943ec47c4afab8965954bf49bfa639c05d4ccf9ef6e924188f762145c0ff"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:667a9706c970cb552ede35aee17339a18e8f2a87a51fba2ed39ceeeb1004798a"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8a678974d1f3aa55f6cc34dc480169d58f2e6d8958895d68845fa4ab566509e"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efc0f674aa41b92da8c49e0346318c6075d734994c3c4e4430b1c3f853e498e4"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0153404a4bb921f0ff1abeb5ce8a5131da56b953eda6e14b88dc6bbc04d2049e"}, + {file = "greenlet-3.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:275f72decf9932639c1c6dd1013a1bc266438eb32710016a1c742df5da6e60a1"}, + {file = "greenlet-3.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c4aab7f6381f38a4b42f269057aee279ab0fc7bf2e929e3d4abfae97b682a12c"}, + {file = "greenlet-3.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:b42703b1cf69f2aa1df7d1030b9d77d3e584a70755674d60e710f0af570f3761"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1695e76146579f8c06c1509c7ce4dfe0706f49c6831a817ac04eebb2fd02011"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7876452af029456b3f3549b696bb36a06db7c90747740c5302f74a9e9fa14b13"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ead44c85f8ab905852d3de8d86f6f8baf77109f9da589cb4fa142bd3b57b475"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8320f64b777d00dd7ccdade271eaf0cad6636343293a25074cc5566160e4de7b"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6510bf84a6b643dabba74d3049ead221257603a253d0a9873f55f6a59a65f822"}, + {file = "greenlet-3.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:04b013dc07c96f83134b1e99888e7a79979f1a247e2a9f59697fa14b5862ed01"}, + {file = "greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47da355d8687fd65240c364c90a31569a133b7b60de111c255ef5b606f2ae291"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98884ecf2ffb7d7fe6bd517e8eb99d31ff7855a840fa6d0d63cd07c037f6a981"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1d4aeb8891338e60d1ab6127af1fe45def5259def8094b9c7e34690c8858803"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db32b5348615a04b82240cc67983cb315309e88d444a288934ee6ceaebcad6cc"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dcc62f31eae24de7f8dce72134c8651c58000d3b1868e01392baea7c32c247de"}, + {file = "greenlet-3.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1d3755bcb2e02de341c55b4fca7a745a24a9e7212ac953f6b3a48d117d7257aa"}, + {file = "greenlet-3.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b8da394b34370874b4572676f36acabac172602abf054cbc4ac910219f3340af"}, + {file = "greenlet-3.1.1-cp37-cp37m-win32.whl", hash = "sha256:a0dfc6c143b519113354e780a50381508139b07d2177cb6ad6a08278ec655798"}, + {file = "greenlet-3.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:54558ea205654b50c438029505def3834e80f0869a70fb15b871c29b4575ddef"}, + {file = "greenlet-3.1.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:346bed03fe47414091be4ad44786d1bd8bef0c3fcad6ed3dee074a032ab408a9"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfc59d69fc48664bc693842bd57acfdd490acafda1ab52c7836e3fc75c90a111"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21e10da6ec19b457b82636209cbe2331ff4306b54d06fa04b7c138ba18c8a81"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37b9de5a96111fc15418819ab4c4432e4f3c2ede61e660b1e33971eba26ef9ba"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ef9ea3f137e5711f0dbe5f9263e8c009b7069d8a1acea822bd5e9dae0ae49c8"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85f3ff71e2e60bd4b4932a043fbbe0f499e263c628390b285cb599154a3b03b1"}, + {file = "greenlet-3.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:95ffcf719966dd7c453f908e208e14cde192e09fde6c7186c8f1896ef778d8cd"}, + {file = "greenlet-3.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:03a088b9de532cbfe2ba2034b2b85e82df37874681e8c470d6fb2f8c04d7e4b7"}, + {file = "greenlet-3.1.1-cp38-cp38-win32.whl", hash = "sha256:8b8b36671f10ba80e159378df9c4f15c14098c4fd73a36b9ad715f057272fbef"}, + {file = "greenlet-3.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:7017b2be767b9d43cc31416aba48aab0d2309ee31b4dbf10a1d38fb7972bdf9d"}, + {file = "greenlet-3.1.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:396979749bd95f018296af156201d6211240e7a23090f50a8d5d18c370084dc3"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca9d0ff5ad43e785350894d97e13633a66e2b50000e8a183a50a88d834752d42"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f6ff3b14f2df4c41660a7dec01045a045653998784bf8cfcb5a525bdffffbc8f"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94ebba31df2aa506d7b14866fed00ac141a867e63143fe5bca82a8e503b36437"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73aaad12ac0ff500f62cebed98d8789198ea0e6f233421059fa68a5aa7220145"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63e4844797b975b9af3a3fb8f7866ff08775f5426925e1e0bbcfe7932059a12c"}, + {file = "greenlet-3.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7939aa3ca7d2a1593596e7ac6d59391ff30281ef280d8632fa03d81f7c5f955e"}, + {file = "greenlet-3.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d0028e725ee18175c6e422797c407874da24381ce0690d6b9396c204c7f7276e"}, + {file = "greenlet-3.1.1-cp39-cp39-win32.whl", hash = "sha256:5e06afd14cbaf9e00899fae69b24a32f2196c19de08fcb9f4779dd4f004e5e7c"}, + {file = "greenlet-3.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:3319aa75e0e0639bc15ff54ca327e8dc7a6fe404003496e3c6925cd3142e0e22"}, + {file = "greenlet-3.1.1.tar.gz", hash = "sha256:4ce3ac6cdb6adf7946475d7ef31777c26d94bccc377e070a7986bd2d5c515467"}, +] + +[package.extras] +docs = ["Sphinx", "furo"] +test = ["objgraph", "psutil"] + +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "httpcore" +version = "1.0.7" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"}, + {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"}, +] + +[package.dependencies] +certifi = "*" +h11 = ">=0.13,<0.15" + +[package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<1.0)"] + +[[package]] +name = "httpx" +version = "0.28.1" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, + {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, +] + +[package.dependencies] +anyio = "*" +certifi = "*" +httpcore = "==1.*" +idna = "*" + +[package.extras] +brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "idna" +version = "3.10" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, + {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, +] + +[package.extras] +all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] + +[[package]] +name = "imageio" +version = "2.37.0" +description = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "imageio-2.37.0-py3-none-any.whl", hash = "sha256:11efa15b87bc7871b61590326b2d635439acc321cf7f8ce996f812543ce10eed"}, + {file = "imageio-2.37.0.tar.gz", hash = "sha256:71b57b3669666272c818497aebba2b4c5f20d5b37c81720e5e1a56d59c492996"}, +] + +[package.dependencies] +numpy = "*" +pillow = ">=8.3.2" + +[package.extras] +all-plugins = ["astropy", "av", "imageio-ffmpeg", "numpy (>2)", "pillow-heif", "psutil", "rawpy", "tifffile"] +all-plugins-pypy = ["av", "imageio-ffmpeg", "pillow-heif", "psutil", "tifffile"] +build = ["wheel"] +dev = ["black", "flake8", "fsspec[github]", "pytest", "pytest-cov"] +docs = ["numpydoc", "pydata-sphinx-theme", "sphinx (<6)"] +ffmpeg = ["imageio-ffmpeg", "psutil"] +fits = ["astropy"] +full = ["astropy", "av", "black", "flake8", "fsspec[github]", "gdal", "imageio-ffmpeg", "itk", "numpy (>2)", "numpydoc", "pillow-heif", "psutil", "pydata-sphinx-theme", "pytest", "pytest-cov", "rawpy", "sphinx (<6)", "tifffile", "wheel"] +gdal = ["gdal"] +itk = ["itk"] +linting = ["black", "flake8"] +pillow-heif = ["pillow-heif"] +pyav = ["av"] +rawpy = ["numpy (>2)", "rawpy"] +test = ["fsspec[github]", "pytest", "pytest-cov"] +tifffile = ["tifffile"] + +[[package]] +name = "lazy-loader" +version = "0.4" +description = "Makes it easy to load subpackages and functions on demand." +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"}, + {file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"}, +] + +[package.dependencies] +packaging = "*" + +[package.extras] +dev = ["changelist (==0.5)"] +lint = ["pre-commit (==3.7.0)"] +test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"] + +[[package]] +name = "lmdb" +version = "1.6.2" +description = "Universal Python binding for the LMDB 'Lightning' Database" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "lmdb-1.6.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:18422de2b4e70fc91b6d845a83f5b7355f85ea1c02f8180be0ab42c15191bead"}, + {file = "lmdb-1.6.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:167328892282950cbb5a358fa6c05b60ffa00b2f40fb240e2e676359720256ac"}, + {file = "lmdb-1.6.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d3efbbded74b9d213059a5a4048fb5bb47b5b7c4c2b366e43685cb6dd9d3100"}, + {file = "lmdb-1.6.2-cp310-cp310-win_amd64.whl", hash = "sha256:13d4fdab8ac9ea90c317d2a5711feb9977e675b4ce50cf4706860d3617ade996"}, + {file = "lmdb-1.6.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c42b3eae0437edb95dda96e582816766a9964436f844cc9c8ddb5d4219a63ff8"}, + {file = "lmdb-1.6.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e365a09be1926dbc3ed2d4eacd9ab6125cb1f211c37e417917c26494dea24b64"}, + {file = "lmdb-1.6.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eae318f72cac81363f5218f529016c2fde6fcf7acc0d1cde55c18e2952cbf34a"}, + {file = "lmdb-1.6.2-cp311-cp311-win_amd64.whl", hash = "sha256:40f7824888a8821adc4028cf70bd031395463aaf832d4c0460f5dab8ca0e0c7a"}, + {file = "lmdb-1.6.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:5dc6885b764aaccd2ecb6b244dc3ecbcce8b65d6f7e5b36fb39a603359909650"}, + {file = "lmdb-1.6.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de252e4a7895a0f1f8422b30c7f5b432612145538cf09e073b57f68fc5a88977"}, + {file = "lmdb-1.6.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c45d63a49a17ab2caa5ec0d1da35bc81c5c85fa38dfb6ab7eacf817242334a7"}, + {file = "lmdb-1.6.2-cp312-cp312-win_amd64.whl", hash = "sha256:129379009c65e67187208eff75cdad74f4ae6f840521498f3e2da407ce5aaf66"}, + {file = "lmdb-1.6.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3be24aa18f160958b90605fe5882b66998f9fa3d97221478d49782bd8f88392a"}, + {file = "lmdb-1.6.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32154f561d5cdbe833798401a752868bb1a9c056d8de1e8faf92e62ca8f22999"}, + {file = "lmdb-1.6.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45c772f24089a7ff1ebd8c59c6dc7f2da0c7b8b704b5e83ee9687c44c764554b"}, + {file = "lmdb-1.6.2-cp313-cp313-win_amd64.whl", hash = "sha256:d9676fcba2cd388f24a5d15055e4a6de351e1df9febf5b176c93cd39cc80585e"}, + {file = "lmdb-1.6.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b530f1bb092e85d344989bbf9d6555a5057f49ad97a95436af2e710b3ca824ed"}, + {file = "lmdb-1.6.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de6918cd36e1c10868b6a50e6a799238cb049e3db876c2d18a4dc7d32f516f4e"}, + {file = "lmdb-1.6.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e79c2dfe94f02e29424e9acb51de654ca992064b8379b0c1432d6639af5c40c2"}, + {file = "lmdb-1.6.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f07b91f2b2e5d120fca11b6b1e1aeeea41d25f9785df48308bc6daeedc75974"}, + {file = "lmdb-1.6.2-cp37-cp37m-win_amd64.whl", hash = "sha256:a65f78ebfd8b1d0d444e6ca904a2041edc2154d795ef0285dfbd2398d92d3564"}, + {file = "lmdb-1.6.2-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:fe6ffea8c003b5766115429b8d2da17d6b3e05f644f6c535148a66c68ee3defb"}, + {file = "lmdb-1.6.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0651d4287fbb022556361842073049193767662c0896d9976cb46977471fc247"}, + {file = "lmdb-1.6.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf7cc99172dfa5acb766bdb1802f6c3f8467e85d0f7f9055211a604c884d758c"}, + {file = "lmdb-1.6.2-cp38-cp38-win_amd64.whl", hash = "sha256:27d659daf93f555f518e6c3aa9c39028035ed5f387a197e54f87986130861695"}, + {file = "lmdb-1.6.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9ae8e082cf587d06a8aef2753c959003eca3963be778e0d29762d3520e61ecd2"}, + {file = "lmdb-1.6.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ab9f270988fbca983b8381882cd271951cc5b2e4eb8bfac87caea828339c2ec"}, + {file = "lmdb-1.6.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b13d0028ac8993c967b158b78b66cc170353635cdcc823f42adf93d0114bce48"}, + {file = "lmdb-1.6.2-cp39-cp39-win_amd64.whl", hash = "sha256:3c2cdfa34a2d0d5a465eeeabd6f4db219118fe1e71b50965b172e0803dafd236"}, + {file = "lmdb-1.6.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b2730b7be6667beb77b26dcc5e5668bfb16673715ca173af82c49be1783ae218"}, + {file = "lmdb-1.6.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d1d1d2eef0f898b7f6b37b8c72d57c6ce67b56dc4a854ee64ddbbdd3cc07a7fb"}, + {file = "lmdb-1.6.2.tar.gz", hash = "sha256:d28e3fa59935ff688858760ec52f202ecb8c1089a3f68d1f162ea3078d151e73"}, +] + +[[package]] +name = "lxml" +version = "5.3.2" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "lxml-5.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c4b84d6b580a9625dfa47269bf1fd7fbba7ad69e08b16366a46acb005959c395"}, + {file = "lxml-5.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b4c08ecb26e4270a62f81f81899dfff91623d349e433b126931c9c4577169666"}, + {file = "lxml-5.3.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef926e9f11e307b5a7c97b17c5c609a93fb59ffa8337afac8f89e6fe54eb0b37"}, + {file = "lxml-5.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:017ceeabe739100379fe6ed38b033cd244ce2da4e7f6f07903421f57da3a19a2"}, + {file = "lxml-5.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dae97d9435dc90590f119d056d233c33006b2fd235dd990d5564992261ee7ae8"}, + {file = "lxml-5.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:910f39425c6798ce63c93976ae5af5fff6949e2cb446acbd44d6d892103eaea8"}, + {file = "lxml-5.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9780de781a0d62a7c3680d07963db3048b919fc9e3726d9cfd97296a65ffce1"}, + {file = "lxml-5.3.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:1a06b0c6ba2e3ca45a009a78a4eb4d6b63831830c0a83dcdc495c13b9ca97d3e"}, + {file = "lxml-5.3.2-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:4c62d0a34d1110769a1bbaf77871a4b711a6f59c4846064ccb78bc9735978644"}, + {file = "lxml-5.3.2-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:8f961a4e82f411b14538fe5efc3e6b953e17f5e809c463f0756a0d0e8039b700"}, + {file = "lxml-5.3.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:3dfc78f5f9251b6b8ad37c47d4d0bfe63ceb073a916e5b50a3bf5fd67a703335"}, + {file = "lxml-5.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10e690bc03214d3537270c88e492b8612d5e41b884f232df2b069b25b09e6711"}, + {file = "lxml-5.3.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa837e6ee9534de8d63bc4c1249e83882a7ac22bd24523f83fad68e6ffdf41ae"}, + {file = "lxml-5.3.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:da4c9223319400b97a2acdfb10926b807e51b69eb7eb80aad4942c0516934858"}, + {file = "lxml-5.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:dc0e9bdb3aa4d1de703a437576007d366b54f52c9897cae1a3716bb44fc1fc85"}, + {file = "lxml-5.3.2-cp310-cp310-win32.whl", hash = "sha256:5f94909a1022c8ea12711db7e08752ca7cf83e5b57a87b59e8a583c5f35016ad"}, + {file = "lxml-5.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:d64ea1686474074b38da13ae218d9fde0d1dc6525266976808f41ac98d9d7980"}, + {file = "lxml-5.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9d61a7d0d208ace43986a92b111e035881c4ed45b1f5b7a270070acae8b0bfb4"}, + {file = "lxml-5.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:856dfd7eda0b75c29ac80a31a6411ca12209183e866c33faf46e77ace3ce8a79"}, + {file = "lxml-5.3.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a01679e4aad0727bedd4c9407d4d65978e920f0200107ceeffd4b019bd48529"}, + {file = "lxml-5.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b6b37b4c3acb8472d191816d4582379f64d81cecbdce1a668601745c963ca5cc"}, + {file = "lxml-5.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3df5a54e7b7c31755383f126d3a84e12a4e0333db4679462ef1165d702517477"}, + {file = "lxml-5.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c09a40f28dcded933dc16217d6a092be0cc49ae25811d3b8e937c8060647c353"}, + {file = "lxml-5.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1ef20f1851ccfbe6c5a04c67ec1ce49da16ba993fdbabdce87a92926e505412"}, + {file = "lxml-5.3.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f79a63289dbaba964eb29ed3c103b7911f2dce28c36fe87c36a114e6bd21d7ad"}, + {file = "lxml-5.3.2-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:75a72697d95f27ae00e75086aed629f117e816387b74a2f2da6ef382b460b710"}, + {file = "lxml-5.3.2-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:b9b00c9ee1cc3a76f1f16e94a23c344e0b6e5c10bec7f94cf2d820ce303b8c01"}, + {file = "lxml-5.3.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:77cbcab50cbe8c857c6ba5f37f9a3976499c60eada1bf6d38f88311373d7b4bc"}, + {file = "lxml-5.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:29424058f072a24622a0a15357bca63d796954758248a72da6d512f9bd9a4493"}, + {file = "lxml-5.3.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7d82737a8afe69a7c80ef31d7626075cc7d6e2267f16bf68af2c764b45ed68ab"}, + {file = "lxml-5.3.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:95473d1d50a5d9fcdb9321fdc0ca6e1edc164dce4c7da13616247d27f3d21e31"}, + {file = "lxml-5.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2162068f6da83613f8b2a32ca105e37a564afd0d7009b0b25834d47693ce3538"}, + {file = "lxml-5.3.2-cp311-cp311-win32.whl", hash = "sha256:f8695752cf5d639b4e981afe6c99e060621362c416058effd5c704bede9cb5d1"}, + {file = "lxml-5.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:d1a94cbb4ee64af3ab386c2d63d6d9e9cf2e256ac0fd30f33ef0a3c88f575174"}, + {file = "lxml-5.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:16b3897691ec0316a1aa3c6585f61c8b7978475587c5b16fc1d2c28d283dc1b0"}, + {file = "lxml-5.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a8d4b34a0eeaf6e73169dcfd653c8d47f25f09d806c010daf074fba2db5e2d3f"}, + {file = "lxml-5.3.2-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9cd7a959396da425022e1e4214895b5cfe7de7035a043bcc2d11303792b67554"}, + {file = "lxml-5.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cac5eaeec3549c5df7f8f97a5a6db6963b91639389cdd735d5a806370847732b"}, + {file = "lxml-5.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29b5f7d77334877c2146e7bb8b94e4df980325fab0a8af4d524e5d43cd6f789d"}, + {file = "lxml-5.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13f3495cfec24e3d63fffd342cc8141355d1d26ee766ad388775f5c8c5ec3932"}, + {file = "lxml-5.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e70ad4c9658beeff99856926fd3ee5fde8b519b92c693f856007177c36eb2e30"}, + {file = "lxml-5.3.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:507085365783abd7879fa0a6fa55eddf4bdd06591b17a2418403bb3aff8a267d"}, + {file = "lxml-5.3.2-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:5bb304f67cbf5dfa07edad904732782cbf693286b9cd85af27059c5779131050"}, + {file = "lxml-5.3.2-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:3d84f5c093645c21c29a4e972b84cb7cf682f707f8706484a5a0c7ff13d7a988"}, + {file = "lxml-5.3.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:bdc13911db524bd63f37b0103af014b7161427ada41f1b0b3c9b5b5a9c1ca927"}, + {file = "lxml-5.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1ec944539543f66ebc060ae180d47e86aca0188bda9cbfadff47d86b0dc057dc"}, + {file = "lxml-5.3.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:59d437cc8a7f838282df5a199cf26f97ef08f1c0fbec6e84bd6f5cc2b7913f6e"}, + {file = "lxml-5.3.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:0e275961adbd32e15672e14e0cc976a982075208224ce06d149c92cb43db5b93"}, + {file = "lxml-5.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:038aeb6937aa404480c2966b7f26f1440a14005cb0702078c173c028eca72c31"}, + {file = "lxml-5.3.2-cp312-cp312-win32.whl", hash = "sha256:3c2c8d0fa3277147bff180e3590be67597e17d365ce94beb2efa3138a2131f71"}, + {file = "lxml-5.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:77809fcd97dfda3f399102db1794f7280737b69830cd5c961ac87b3c5c05662d"}, + {file = "lxml-5.3.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:77626571fb5270ceb36134765f25b665b896243529eefe840974269b083e090d"}, + {file = "lxml-5.3.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:78a533375dc7aa16d0da44af3cf6e96035e484c8c6b2b2445541a5d4d3d289ee"}, + {file = "lxml-5.3.2-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6f62b2404b3f3f0744bbcabb0381c5fe186fa2a9a67ecca3603480f4846c585"}, + {file = "lxml-5.3.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ea918da00091194526d40c30c4996971f09dacab032607581f8d8872db34fbf"}, + {file = "lxml-5.3.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c35326f94702a7264aa0eea826a79547d3396a41ae87a70511b9f6e9667ad31c"}, + {file = "lxml-5.3.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e3bef90af21d31c4544bc917f51e04f94ae11b43156356aff243cdd84802cbf2"}, + {file = "lxml-5.3.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52fa7ba11a495b7cbce51573c73f638f1dcff7b3ee23697467dc063f75352a69"}, + {file = "lxml-5.3.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ad131e2c4d2c3803e736bb69063382334e03648de2a6b8f56a878d700d4b557d"}, + {file = "lxml-5.3.2-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:00a4463ca409ceacd20490a893a7e08deec7870840eff33dc3093067b559ce3e"}, + {file = "lxml-5.3.2-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:87e8d78205331cace2b73ac8249294c24ae3cba98220687b5b8ec5971a2267f1"}, + {file = "lxml-5.3.2-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:bf6389133bb255e530a4f2f553f41c4dd795b1fbb6f797aea1eff308f1e11606"}, + {file = "lxml-5.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b3709fc752b42fb6b6ffa2ba0a5b9871646d97d011d8f08f4d5b3ee61c7f3b2b"}, + {file = "lxml-5.3.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:abc795703d0de5d83943a4badd770fbe3d1ca16ee4ff3783d7caffc252f309ae"}, + {file = "lxml-5.3.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:98050830bb6510159f65d9ad1b8aca27f07c01bb3884ba95f17319ccedc4bcf9"}, + {file = "lxml-5.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6ba465a91acc419c5682f8b06bcc84a424a7aa5c91c220241c6fd31de2a72bc6"}, + {file = "lxml-5.3.2-cp313-cp313-win32.whl", hash = "sha256:56a1d56d60ea1ec940f949d7a309e0bff05243f9bd337f585721605670abb1c1"}, + {file = "lxml-5.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:1a580dc232c33d2ad87d02c8a3069d47abbcdce974b9c9cc82a79ff603065dbe"}, + {file = "lxml-5.3.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:1a59f7fe888d0ec1916d0ad69364c5400cfa2f885ae0576d909f342e94d26bc9"}, + {file = "lxml-5.3.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d67b50abc2df68502a26ed2ccea60c1a7054c289fb7fc31c12e5e55e4eec66bd"}, + {file = "lxml-5.3.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2cb08d2cb047c98d6fbbb2e77d6edd132ad6e3fa5aa826ffa9ea0c9b1bc74a84"}, + {file = "lxml-5.3.2-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:495ddb7e10911fb4d673d8aa8edd98d1eadafb3b56e8c1b5f427fd33cadc455b"}, + {file = "lxml-5.3.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:884d9308ac7d581b705a3371185282e1b8eebefd68ccf288e00a2d47f077cc51"}, + {file = "lxml-5.3.2-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:37f3d7cf7f2dd2520df6cc8a13df4c3e3f913c8e0a1f9a875e44f9e5f98d7fee"}, + {file = "lxml-5.3.2-cp36-cp36m-win32.whl", hash = "sha256:e885a1bf98a76dff0a0648850c3083b99d9358ef91ba8fa307c681e8e0732503"}, + {file = "lxml-5.3.2-cp36-cp36m-win_amd64.whl", hash = "sha256:b45f505d0d85f4cdd440cd7500689b8e95110371eaa09da0c0b1103e9a05030f"}, + {file = "lxml-5.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b53cd668facd60b4f0dfcf092e01bbfefd88271b5b4e7b08eca3184dd006cb30"}, + {file = "lxml-5.3.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5dea998c891f082fe204dec6565dbc2f9304478f2fc97bd4d7a940fec16c873"}, + {file = "lxml-5.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d46bc3e58b01e4f38d75e0d7f745a46875b7a282df145aca9d1479c65ff11561"}, + {file = "lxml-5.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:661feadde89159fd5f7d7639a81ccae36eec46974c4a4d5ccce533e2488949c8"}, + {file = "lxml-5.3.2-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:43af2a69af2cacc2039024da08a90174e85f3af53483e6b2e3485ced1bf37151"}, + {file = "lxml-5.3.2-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:1539f962d82436f3d386eb9f29b2a29bb42b80199c74a695dff51b367a61ec0a"}, + {file = "lxml-5.3.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:6673920bf976421b5fac4f29b937702eef4555ee42329546a5fc68bae6178a48"}, + {file = "lxml-5.3.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:9fa722a9cd8845594593cce399a49aa6bfc13b6c83a7ee05e2ab346d9253d52f"}, + {file = "lxml-5.3.2-cp37-cp37m-win32.whl", hash = "sha256:2eadd4efa487f4710755415aed3d6ae9ac8b4327ea45226ffccb239766c8c610"}, + {file = "lxml-5.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:83d8707b1b08cd02c04d3056230ec3b771b18c566ec35e723e60cdf037064e08"}, + {file = "lxml-5.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc6e8678bfa5ccba370103976ccfcf776c85c83da9220ead41ea6fd15d2277b4"}, + {file = "lxml-5.3.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0bed509662f67f719119ad56006cd4a38efa68cfa74383060612044915e5f7ad"}, + {file = "lxml-5.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e3925975fadd6fd72a6d80541a6ec75dfbad54044a03aa37282dafcb80fbdfa"}, + {file = "lxml-5.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83c0462dedc5213ac586164c6d7227da9d4d578cf45dd7fbab2ac49b63a008eb"}, + {file = "lxml-5.3.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:53e3f9ca72858834688afa17278649d62aa768a4b2018344be00c399c4d29e95"}, + {file = "lxml-5.3.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:32ba634ef3f1b20f781019a91d78599224dc45745dd572f951adbf1c0c9b0d75"}, + {file = "lxml-5.3.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:1b16504c53f41da5fcf04868a80ac40a39d3eec5329caf761114caec6e844ad1"}, + {file = "lxml-5.3.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:1f9682786138549da44ca4c49b20e7144d063b75f2b2ba611f4cff9b83db1062"}, + {file = "lxml-5.3.2-cp38-cp38-win32.whl", hash = "sha256:d8f74ef8aacdf6ee5c07566a597634bb8535f6b53dc89790db43412498cf6026"}, + {file = "lxml-5.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:49f1cee0fa27e1ee02589c696a9bdf4027e7427f184fa98e6bef0c6613f6f0fa"}, + {file = "lxml-5.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:741c126bcf9aa939e950e64e5e0a89c8e01eda7a5f5ffdfc67073f2ed849caea"}, + {file = "lxml-5.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ab6e9e6aca1fd7d725ffa132286e70dee5b9a4561c5ed291e836440b82888f89"}, + {file = "lxml-5.3.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58e8c9b9ed3c15c2d96943c14efc324b69be6352fe5585733a7db2bf94d97841"}, + {file = "lxml-5.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7811828ddfb8c23f4f1fbf35e7a7b2edec2f2e4c793dee7c52014f28c4b35238"}, + {file = "lxml-5.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:72968623efb1e12e950cbdcd1d0f28eb14c8535bf4be153f1bfffa818b1cf189"}, + {file = "lxml-5.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ebfceaa2ea588b54efb6160e3520983663d45aed8a3895bb2031ada080fb5f04"}, + {file = "lxml-5.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d685d458505b2bfd2e28c812749fe9194a2b0ce285a83537e4309a187ffa270b"}, + {file = "lxml-5.3.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:334e0e414dab1f5366ead8ca34ec3148415f236d5660e175f1d640b11d645847"}, + {file = "lxml-5.3.2-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:02e56f7de72fa82561eae69628a7d6febd7891d72248c7ff7d3e7814d4031017"}, + {file = "lxml-5.3.2-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:638d06b4e1d34d1a074fa87deed5fb55c18485fa0dab97abc5604aad84c12031"}, + {file = "lxml-5.3.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:354dab7206d22d7a796fa27c4c5bffddd2393da2ad61835355a4759d435beb47"}, + {file = "lxml-5.3.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d9d9f82ff2c3bf9bb777cb355149f7f3a98ec58f16b7428369dc27ea89556a4c"}, + {file = "lxml-5.3.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:95ad58340e3b7d2b828efc370d1791856613c5cb62ae267158d96e47b3c978c9"}, + {file = "lxml-5.3.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:30fe05f4b7f6e9eb32862745512e7cbd021070ad0f289a7f48d14a0d3fc1d8a9"}, + {file = "lxml-5.3.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:34c688fef86f73dbca0798e0a61bada114677006afa524a8ce97d9e5fabf42e6"}, + {file = "lxml-5.3.2-cp39-cp39-win32.whl", hash = "sha256:4d6d3d1436d57f41984920667ec5ef04bcb158f80df89ac4d0d3f775a2ac0c87"}, + {file = "lxml-5.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:2996e1116bbb3ae2a1fbb2ba4da8f92742290b4011e7e5bce2bd33bbc9d9485a"}, + {file = "lxml-5.3.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:521ab9c80b98c30b2d987001c3ede2e647e92eeb2ca02e8cb66ef5122d792b24"}, + {file = "lxml-5.3.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f1231b0f9810289d41df1eacc4ebb859c63e4ceee29908a0217403cddce38d0"}, + {file = "lxml-5.3.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271f1a4d5d2b383c36ad8b9b489da5ea9c04eca795a215bae61ed6a57cf083cd"}, + {file = "lxml-5.3.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:6fca8a5a13906ba2677a5252752832beb0f483a22f6c86c71a2bb320fba04f61"}, + {file = "lxml-5.3.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ea0c3b7922209160faef194a5b6995bfe7fa05ff7dda6c423ba17646b7b9de10"}, + {file = "lxml-5.3.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0a006390834603e5952a2ff74b9a31a6007c7cc74282a087aa6467afb4eea987"}, + {file = "lxml-5.3.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:eae4136a3b8c4cf76f69461fc8f9410d55d34ea48e1185338848a888d71b9675"}, + {file = "lxml-5.3.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d48e06be8d8c58e7feaedd8a37897a6122637efb1637d7ce00ddf5f11f9a92ad"}, + {file = "lxml-5.3.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4b83aed409134093d90e114007034d2c1ebcd92e501b71fd9ec70e612c8b2eb"}, + {file = "lxml-5.3.2-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:7a0e77edfe26d3703f954d46bed52c3ec55f58586f18f4b7f581fc56954f1d84"}, + {file = "lxml-5.3.2-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:19f6fcfd15b82036b4d235749d78785eb9c991c7812012dc084e0d8853b4c1c0"}, + {file = "lxml-5.3.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:d49919c95d31ee06eefd43d8c6f69a3cc9bdf0a9b979cc234c4071f0eb5cb173"}, + {file = "lxml-5.3.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2d0a60841410123c533990f392819804a8448853f06daf412c0f383443925e89"}, + {file = "lxml-5.3.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b7f729e03090eb4e3981f10efaee35e6004b548636b1a062b8b9a525e752abc"}, + {file = "lxml-5.3.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:579df6e20d8acce3bcbc9fb8389e6ae00c19562e929753f534ba4c29cfe0be4b"}, + {file = "lxml-5.3.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2abcf3f3b8367d6400b908d00d4cd279fc0b8efa287e9043820525762d383699"}, + {file = "lxml-5.3.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:348c06cb2e3176ce98bee8c397ecc89181681afd13d85870df46167f140a305f"}, + {file = "lxml-5.3.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:617ecaccd565cbf1ac82ffcaa410e7da5bd3a4b892bb3543fb2fe19bd1c4467d"}, + {file = "lxml-5.3.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c3eb4278dcdb9d86265ed2c20b9ecac45f2d6072e3904542e591e382c87a9c00"}, + {file = "lxml-5.3.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258b6b53458c5cbd2a88795557ff7e0db99f73a96601b70bc039114cd4ee9e02"}, + {file = "lxml-5.3.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0a9d8d25ed2f2183e8471c97d512a31153e123ac5807f61396158ef2793cb6e"}, + {file = "lxml-5.3.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:73bcb635a848c18a3e422ea0ab0092f2e4ef3b02d8ebe87ab49748ebc8ec03d8"}, + {file = "lxml-5.3.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1545de0a69a16ced5767bae8cca1801b842e6e49e96f5e4a8a5acbef023d970b"}, + {file = "lxml-5.3.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:165fcdc2f40fc0fe88a3c3c06c9c2a097388a90bda6a16e6f7c9199c903c9b8e"}, + {file = "lxml-5.3.2.tar.gz", hash = "sha256:773947d0ed809ddad824b7b14467e1a481b8976e87278ac4a730c2f7c7fcddc1"}, +] + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html-clean = ["lxml_html_clean"] +html5 = ["html5lib"] +htmlsoup = ["BeautifulSoup4"] +source = ["Cython (>=3.0.11,<3.1.0)"] + +[[package]] +name = "networkx" +version = "3.4.2" +description = "Python package for creating and manipulating graphs and networks" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f"}, + {file = "networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1"}, +] + +[package.extras] +default = ["matplotlib (>=3.7)", "numpy (>=1.24)", "pandas (>=2.0)", "scipy (>=1.10,!=1.11.0,!=1.11.1)"] +developer = ["changelist (==0.5)", "mypy (>=1.1)", "pre-commit (>=3.2)", "rtoml"] +doc = ["intersphinx-registry", "myst-nb (>=1.1)", "numpydoc (>=1.8.0)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.15)", "sphinx (>=7.3)", "sphinx-gallery (>=0.16)", "texext (>=0.6.7)"] +example = ["cairocffi (>=1.7)", "contextily (>=1.6)", "igraph (>=0.11)", "momepy (>=0.7.2)", "osmnx (>=1.9)", "scikit-learn (>=1.5)", "seaborn (>=0.13)"] +extra = ["lxml (>=4.6)", "pydot (>=3.0.1)", "pygraphviz (>=1.14)", "sympy (>=1.10)"] +test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] + +[[package]] +name = "numpy" +version = "2.2.4" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "numpy-2.2.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8146f3550d627252269ac42ae660281d673eb6f8b32f113538e0cc2a9aed42b9"}, + {file = "numpy-2.2.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e642d86b8f956098b564a45e6f6ce68a22c2c97a04f5acd3f221f57b8cb850ae"}, + {file = "numpy-2.2.4-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:a84eda42bd12edc36eb5b53bbcc9b406820d3353f1994b6cfe453a33ff101775"}, + {file = "numpy-2.2.4-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:4ba5054787e89c59c593a4169830ab362ac2bee8a969249dc56e5d7d20ff8df9"}, + {file = "numpy-2.2.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7716e4a9b7af82c06a2543c53ca476fa0b57e4d760481273e09da04b74ee6ee2"}, + {file = "numpy-2.2.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adf8c1d66f432ce577d0197dceaac2ac00c0759f573f28516246351c58a85020"}, + {file = "numpy-2.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:218f061d2faa73621fa23d6359442b0fc658d5b9a70801373625d958259eaca3"}, + {file = "numpy-2.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:df2f57871a96bbc1b69733cd4c51dc33bea66146b8c63cacbfed73eec0883017"}, + {file = "numpy-2.2.4-cp310-cp310-win32.whl", hash = "sha256:a0258ad1f44f138b791327961caedffbf9612bfa504ab9597157806faa95194a"}, + {file = "numpy-2.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:0d54974f9cf14acf49c60f0f7f4084b6579d24d439453d5fc5805d46a165b542"}, + {file = "numpy-2.2.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9e0a277bb2eb5d8a7407e14688b85fd8ad628ee4e0c7930415687b6564207a4"}, + {file = "numpy-2.2.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9eeea959168ea555e556b8188da5fa7831e21d91ce031e95ce23747b7609f8a4"}, + {file = "numpy-2.2.4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:bd3ad3b0a40e713fc68f99ecfd07124195333f1e689387c180813f0e94309d6f"}, + {file = "numpy-2.2.4-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:cf28633d64294969c019c6df4ff37f5698e8326db68cc2b66576a51fad634880"}, + {file = "numpy-2.2.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fa8fa7697ad1646b5c93de1719965844e004fcad23c91228aca1cf0800044a1"}, + {file = "numpy-2.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f4162988a360a29af158aeb4a2f4f09ffed6a969c9776f8f3bdee9b06a8ab7e5"}, + {file = "numpy-2.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:892c10d6a73e0f14935c31229e03325a7b3093fafd6ce0af704be7f894d95687"}, + {file = "numpy-2.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db1f1c22173ac1c58db249ae48aa7ead29f534b9a948bc56828337aa84a32ed6"}, + {file = "numpy-2.2.4-cp311-cp311-win32.whl", hash = "sha256:ea2bb7e2ae9e37d96835b3576a4fa4b3a97592fbea8ef7c3587078b0068b8f09"}, + {file = "numpy-2.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:f7de08cbe5551911886d1ab60de58448c6df0f67d9feb7d1fb21e9875ef95e91"}, + {file = "numpy-2.2.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a7b9084668aa0f64e64bd00d27ba5146ef1c3a8835f3bd912e7a9e01326804c4"}, + {file = "numpy-2.2.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dbe512c511956b893d2dacd007d955a3f03d555ae05cfa3ff1c1ff6df8851854"}, + {file = "numpy-2.2.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:bb649f8b207ab07caebba230d851b579a3c8711a851d29efe15008e31bb4de24"}, + {file = "numpy-2.2.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:f34dc300df798742b3d06515aa2a0aee20941c13579d7a2f2e10af01ae4901ee"}, + {file = "numpy-2.2.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3f7ac96b16955634e223b579a3e5798df59007ca43e8d451a0e6a50f6bfdfba"}, + {file = "numpy-2.2.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f92084defa704deadd4e0a5ab1dc52d8ac9e8a8ef617f3fbb853e79b0ea3592"}, + {file = "numpy-2.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7a4e84a6283b36632e2a5b56e121961f6542ab886bc9e12f8f9818b3c266bfbb"}, + {file = "numpy-2.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:11c43995255eb4127115956495f43e9343736edb7fcdb0d973defd9de14cd84f"}, + {file = "numpy-2.2.4-cp312-cp312-win32.whl", hash = "sha256:65ef3468b53269eb5fdb3a5c09508c032b793da03251d5f8722b1194f1790c00"}, + {file = "numpy-2.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:2aad3c17ed2ff455b8eaafe06bcdae0062a1db77cb99f4b9cbb5f4ecb13c5146"}, + {file = "numpy-2.2.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cf4e5c6a278d620dee9ddeb487dc6a860f9b199eadeecc567f777daace1e9e7"}, + {file = "numpy-2.2.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1974afec0b479e50438fc3648974268f972e2d908ddb6d7fb634598cdb8260a0"}, + {file = "numpy-2.2.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:79bd5f0a02aa16808fcbc79a9a376a147cc1045f7dfe44c6e7d53fa8b8a79392"}, + {file = "numpy-2.2.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:3387dd7232804b341165cedcb90694565a6015433ee076c6754775e85d86f1fc"}, + {file = "numpy-2.2.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f527d8fdb0286fd2fd97a2a96c6be17ba4232da346931d967a0630050dfd298"}, + {file = "numpy-2.2.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bce43e386c16898b91e162e5baaad90c4b06f9dcbe36282490032cec98dc8ae7"}, + {file = "numpy-2.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:31504f970f563d99f71a3512d0c01a645b692b12a63630d6aafa0939e52361e6"}, + {file = "numpy-2.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:81413336ef121a6ba746892fad881a83351ee3e1e4011f52e97fba79233611fd"}, + {file = "numpy-2.2.4-cp313-cp313-win32.whl", hash = "sha256:f486038e44caa08dbd97275a9a35a283a8f1d2f0ee60ac260a1790e76660833c"}, + {file = "numpy-2.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:207a2b8441cc8b6a2a78c9ddc64d00d20c303d79fba08c577752f080c4007ee3"}, + {file = "numpy-2.2.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8120575cb4882318c791f839a4fd66161a6fa46f3f0a5e613071aae35b5dd8f8"}, + {file = "numpy-2.2.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a761ba0fa886a7bb33c6c8f6f20213735cb19642c580a931c625ee377ee8bd39"}, + {file = "numpy-2.2.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:ac0280f1ba4a4bfff363a99a6aceed4f8e123f8a9b234c89140f5e894e452ecd"}, + {file = "numpy-2.2.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:879cf3a9a2b53a4672a168c21375166171bc3932b7e21f622201811c43cdd3b0"}, + {file = "numpy-2.2.4-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f05d4198c1bacc9124018109c5fba2f3201dbe7ab6e92ff100494f236209c960"}, + {file = "numpy-2.2.4-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2f085ce2e813a50dfd0e01fbfc0c12bbe5d2063d99f8b29da30e544fb6483b8"}, + {file = "numpy-2.2.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:92bda934a791c01d6d9d8e038363c50918ef7c40601552a58ac84c9613a665bc"}, + {file = "numpy-2.2.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ee4d528022f4c5ff67332469e10efe06a267e32f4067dc76bb7e2cddf3cd25ff"}, + {file = "numpy-2.2.4-cp313-cp313t-win32.whl", hash = "sha256:05c076d531e9998e7e694c36e8b349969c56eadd2cdcd07242958489d79a7286"}, + {file = "numpy-2.2.4-cp313-cp313t-win_amd64.whl", hash = "sha256:188dcbca89834cc2e14eb2f106c96d6d46f200fe0200310fc29089657379c58d"}, + {file = "numpy-2.2.4-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7051ee569db5fbac144335e0f3b9c2337e0c8d5c9fee015f259a5bd70772b7e8"}, + {file = "numpy-2.2.4-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:ab2939cd5bec30a7430cbdb2287b63151b77cf9624de0532d629c9a1c59b1d5c"}, + {file = "numpy-2.2.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0f35b19894a9e08639fd60a1ec1978cb7f5f7f1eace62f38dd36be8aecdef4d"}, + {file = "numpy-2.2.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b4adfbbc64014976d2f91084915ca4e626fbf2057fb81af209c1a6d776d23e3d"}, + {file = "numpy-2.2.4.tar.gz", hash = "sha256:9ba03692a45d3eef66559efe1d1096c4b9b75c0986b5dff5530c378fb8331d4f"}, +] + +[[package]] +name = "opencv-contrib-python" +version = "4.11.0.86" +description = "Wrapper package for OpenCV python bindings." +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "opencv-contrib-python-4.11.0.86.tar.gz", hash = "sha256:4ff773dab44911da366b906621c9592d4eb96f6ad3777098933a23f064aab38e"}, + {file = "opencv_contrib_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:d911cedc511d98f79994580b245d59fc97f57f0f9923a99945d8b92c7ac671f6"}, + {file = "opencv_contrib_python-4.11.0.86-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:e10a293af18aa5f842d012fa14e87345b3ee06db4c29bd592ff94b51f7ffca2b"}, + {file = "opencv_contrib_python-4.11.0.86-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f21034bc8b00eb286a0a0a92b99767bf596bfe426cf4bc2e79647d64ad0dd6da"}, + {file = "opencv_contrib_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c47c0ef1098461cdc6fa1cdce4c942b8ec974c87423f4b5951443d26bb9ae407"}, + {file = "opencv_contrib_python-4.11.0.86-cp37-abi3-win32.whl", hash = "sha256:194841c664ceaa0692410b4ed0af557425608e33db3a181ded28b87acb66748d"}, + {file = "opencv_contrib_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:654758a9ae8ca9a75fca7b64b19163636534f0eedffe1e14c3d7218988625c8d"}, +] + +[package.dependencies] +numpy = {version = ">=1.26.0", markers = "python_version >= \"3.12\""} + +[[package]] +name = "opencv-python" +version = "4.11.0.86" +description = "Wrapper package for OpenCV python bindings." +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "opencv-python-4.11.0.86.tar.gz", hash = "sha256:03d60ccae62304860d232272e4a4fda93c39d595780cb40b161b310244b736a4"}, + {file = "opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:432f67c223f1dc2824f5e73cdfcd9db0efc8710647d4e813012195dc9122a52a"}, + {file = "opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:9d05ef13d23fe97f575153558653e2d6e87103995d54e6a35db3f282fe1f9c66"}, + {file = "opencv_python-4.11.0.86-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b92ae2c8852208817e6776ba1ea0d6b1e0a1b5431e971a2a0ddd2a8cc398202"}, + {file = "opencv_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b02611523803495003bd87362db3e1d2a0454a6a63025dc6658a9830570aa0d"}, + {file = "opencv_python-4.11.0.86-cp37-abi3-win32.whl", hash = "sha256:810549cb2a4aedaa84ad9a1c92fbfdfc14090e2749cedf2c1589ad8359aa169b"}, + {file = "opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec"}, +] + +[package.dependencies] +numpy = {version = ">=1.26.0", markers = "python_version >= \"3.12\""} + +[[package]] +name = "opencv-python-headless" +version = "4.11.0.86" +description = "Wrapper package for OpenCV python bindings." +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "opencv-python-headless-4.11.0.86.tar.gz", hash = "sha256:996eb282ca4b43ec6a3972414de0e2331f5d9cda2b41091a49739c19fb843798"}, + {file = "opencv_python_headless-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:48128188ade4a7e517237c8e1e11a9cdf5c282761473383e77beb875bb1e61ca"}, + {file = "opencv_python_headless-4.11.0.86-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:a66c1b286a9de872c343ee7c3553b084244299714ebb50fbdcd76f07ebbe6c81"}, + {file = "opencv_python_headless-4.11.0.86-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6efabcaa9df731f29e5ea9051776715b1bdd1845d7c9530065c7951d2a2899eb"}, + {file = "opencv_python_headless-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e0a27c19dd1f40ddff94976cfe43066fbbe9dfbb2ec1907d66c19caef42a57b"}, + {file = "opencv_python_headless-4.11.0.86-cp37-abi3-win32.whl", hash = "sha256:f447d8acbb0b6f2808da71fddd29c1cdd448d2bc98f72d9bb78a7a898fc9621b"}, + {file = "opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:6c304df9caa7a6a5710b91709dd4786bf20a74d57672b3c31f7033cc638174ca"}, +] + +[package.dependencies] +numpy = {version = ">=1.26.0", markers = "python_version >= \"3.12\""} + +[[package]] +name = "opt-einsum" +version = "3.3.0" +description = "Optimizing numpys einsum function" +optional = false +python-versions = ">=3.5" +groups = ["main"] +files = [ + {file = "opt_einsum-3.3.0-py3-none-any.whl", hash = "sha256:2455e59e3947d3c275477df7f5205b30635e266fe6dc300e3d9f9646bfcea147"}, + {file = "opt_einsum-3.3.0.tar.gz", hash = "sha256:59f6475f77bbc37dcf7cd748519c0ec60722e91e63ca114e68821c0c54a46549"}, +] + +[package.dependencies] +numpy = ">=1.7" + +[package.extras] +docs = ["numpydoc", "sphinx (==1.2.3)", "sphinx-rtd-theme", "sphinxcontrib-napoleon"] +tests = ["pytest", "pytest-cov", "pytest-pep8"] + +[[package]] +name = "packaging" +version = "24.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, +] + +[[package]] +name = "paddleocr" +version = "2.10.0" +description = "Awesome OCR toolkits based on PaddlePaddle(8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embedded and IoT devices)" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "paddleocr-2.10.0-py3-none-any.whl", hash = "sha256:38df818d87a00af854cbfd14e33615edc3c4fa2caeb662149d51e6f2e212013f"}, + {file = "paddleocr-2.10.0.tar.gz", hash = "sha256:c0646aae769c957df698558e9264c94113ee60244a4ac0c46dfcd2324d215df4"}, +] + +[package.dependencies] +albucore = "*" +albumentations = "*" +beautifulsoup4 = "*" +cython = "*" +fire = ">=0.3.0" +fonttools = ">=4.24.0" +lmdb = "*" +numpy = "*" +opencv-contrib-python = "*" +opencv-python = "*" +Pillow = "*" +pyclipper = "*" +python-docx = "*" +pyyaml = "*" +rapidfuzz = "*" +requests = "*" +scikit-image = "*" +shapely = "*" +tqdm = "*" + +[[package]] +name = "paddlepaddle" +version = "3.0.0" +description = "Parallel Distributed Deep Learning" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "paddlepaddle-3.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:69321d2fed61f9c4b502843d213af0919e058f8399c3fb7da4b90ff9f9e8544d"}, + {file = "paddlepaddle-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a8a3de6238d7588224353d7412d1869772fa88d0cc0119cb7c921bee9b1869c"}, + {file = "paddlepaddle-3.0.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:73b1349d91a85dae81e23aee0faf1d04f244e26c688c23e387a504437066829a"}, + {file = "paddlepaddle-3.0.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:ea8841cdbea7f26dbe548d3b129dd9764f36a4338656c9b5257cac43b382a674"}, + {file = "paddlepaddle-3.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:9fe8544bce8d034da5cdab1417782c7d53e1bef97533bcef5710dfdb59e87ca5"}, + {file = "paddlepaddle-3.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f69b4ec5ec881e632bf80d8d2a0ca74f87925335dc57b14bb8971e72e2d87951"}, + {file = "paddlepaddle-3.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cb4a98192079cde5b5dac1109d334c12b376b97edc8682490b5f5065c8f708d9"}, + {file = "paddlepaddle-3.0.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:cfd73da79e2eb9325bac4a3a1a7995063c7662dde724bf452659dda88e55b76f"}, + {file = "paddlepaddle-3.0.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:3ef99688f4d5113dcf0e026a0b225efa77975da04e8ca1d0666ebdb729d4a409"}, + {file = "paddlepaddle-3.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:215daf5c154855e9f4139844076853cabc86425fd127c390405895ae2b820d85"}, + {file = "paddlepaddle-3.0.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dcfebfb10deb15aa33d2a2d66d5a12d9de65461d8a40d2687e2fb3221150d0ba"}, + {file = "paddlepaddle-3.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8677e8d42ff66f74fbca32ff32d8b64bbe3d56ed29a92d43dc418f0f168fcb69"}, + {file = "paddlepaddle-3.0.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:81a7c5b10150b07521d1cd1491dca1fb76ff621b14db10e899d959f3a77935ea"}, + {file = "paddlepaddle-3.0.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:ddaa996a8b0a00dbe4269da21d65052877ec3693a22ecc07b9cada3ecd116886"}, + {file = "paddlepaddle-3.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:68efccd9b64233243e48d89a20d34904e2827abc3458c7dddb9e97ef8e2209f6"}, + {file = "paddlepaddle-3.0.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:52c401ffe4aae56316660a33015a6fd755cda23d3d70a28b13b73d70cbdbb8cb"}, + {file = "paddlepaddle-3.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9d1051f2e208da1d439daee7c861bcb50240f09712496eb61e9ce8c618faf2f9"}, + {file = "paddlepaddle-3.0.0-cp313-cp313-manylinux1_x86_64.whl", hash = "sha256:8fc4865efd3656ee99424124792cb88a642151b4d591a7d6d4a5c6afdae95959"}, + {file = "paddlepaddle-3.0.0-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:bbc4d573b35d24a262e02d72696e80f5b4aee41f698671af9574deeb1a83e6f7"}, + {file = "paddlepaddle-3.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:2e1292d50f80a8577407008f7321f85c8309938c0c575927950aec3c45502c2a"}, + {file = "paddlepaddle-3.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5a98145ff8e554c6a85fe233ff5a6480def4b6b6390ee12436f028c819b4c36f"}, + {file = "paddlepaddle-3.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:92d5398b7c6fd92d31b5cacb81dd1196c87a3bc7ee5021325df914c8187d03c2"}, + {file = "paddlepaddle-3.0.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:414ae8b44259bc363f3e120ef917390f98a53bc57cc38de27d04f5f1e043aae1"}, + {file = "paddlepaddle-3.0.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:157a3b11f0599175ea07759357d331be0031df3d462ba81b9c3c31fea9a57faf"}, + {file = "paddlepaddle-3.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:93a21b03a960515e1431bfe5f404060e2ce8c82c63d885600e0b45b775f2d081"}, + {file = "paddlepaddle-3.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:394a942b7281a8eb87083f80a9592f31b8aec1a360464e536c1d955d3e6e0bb4"}, + {file = "paddlepaddle-3.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:00bd606cdce5dbae2b9752afdc07479c5fab9df73efdf1bf577d187fd492f289"}, + {file = "paddlepaddle-3.0.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:220f8010c30cfd7712d0b0364e4732a344779ac202888ae1045e3567e24a6213"}, + {file = "paddlepaddle-3.0.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f68630572edc4eb76ab6bbbe57689cb08b961f0d08bc0fc8c969d79b4bc213e9"}, + {file = "paddlepaddle-3.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:4b078cee4f6da995e4d2b1c46d1d94545f31a487b02ec1c61cfbb9e7d6d082f2"}, +] + +[package.dependencies] +astor = "*" +decorator = "*" +httpx = "*" +networkx = "*" +numpy = ">=1.21" +opt-einsum = "3.3.0" +Pillow = "*" +protobuf = ">=3.20.2" +typing-extensions = "*" + +[[package]] +name = "pillow" +version = "11.1.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pillow-11.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8"}, + {file = "pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192"}, + {file = "pillow-11.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a07dba04c5e22824816b2615ad7a7484432d7f540e6fa86af60d2de57b0fcee2"}, + {file = "pillow-11.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e267b0ed063341f3e60acd25c05200df4193e15a4a5807075cd71225a2386e26"}, + {file = "pillow-11.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd165131fd51697e22421d0e467997ad31621b74bfc0b75956608cb2906dda07"}, + {file = "pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:abc56501c3fd148d60659aae0af6ddc149660469082859fa7b066a298bde9482"}, + {file = "pillow-11.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:54ce1c9a16a9561b6d6d8cb30089ab1e5eb66918cb47d457bd996ef34182922e"}, + {file = "pillow-11.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:73ddde795ee9b06257dac5ad42fcb07f3b9b813f8c1f7f870f402f4dc54b5269"}, + {file = "pillow-11.1.0-cp310-cp310-win32.whl", hash = "sha256:3a5fe20a7b66e8135d7fd617b13272626a28278d0e578c98720d9ba4b2439d49"}, + {file = "pillow-11.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:b6123aa4a59d75f06e9dd3dac5bf8bc9aa383121bb3dd9a7a612e05eabc9961a"}, + {file = "pillow-11.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:a76da0a31da6fcae4210aa94fd779c65c75786bc9af06289cd1c184451ef7a65"}, + {file = "pillow-11.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e06695e0326d05b06833b40b7ef477e475d0b1ba3a6d27da1bb48c23209bf457"}, + {file = "pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96f82000e12f23e4f29346e42702b6ed9a2f2fea34a740dd5ffffcc8c539eb35"}, + {file = "pillow-11.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3cd561ded2cf2bbae44d4605837221b987c216cff94f49dfeed63488bb228d2"}, + {file = "pillow-11.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f189805c8be5ca5add39e6f899e6ce2ed824e65fb45f3c28cb2841911da19070"}, + {file = "pillow-11.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dd0052e9db3474df30433f83a71b9b23bd9e4ef1de13d92df21a52c0303b8ab6"}, + {file = "pillow-11.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:837060a8599b8f5d402e97197d4924f05a2e0d68756998345c829c33186217b1"}, + {file = "pillow-11.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa8dd43daa836b9a8128dbe7d923423e5ad86f50a7a14dc688194b7be5c0dea2"}, + {file = "pillow-11.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0a2f91f8a8b367e7a57c6e91cd25af510168091fb89ec5146003e424e1558a96"}, + {file = "pillow-11.1.0-cp311-cp311-win32.whl", hash = "sha256:c12fc111ef090845de2bb15009372175d76ac99969bdf31e2ce9b42e4b8cd88f"}, + {file = "pillow-11.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbd43429d0d7ed6533b25fc993861b8fd512c42d04514a0dd6337fb3ccf22761"}, + {file = "pillow-11.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:f7955ecf5609dee9442cbface754f2c6e541d9e6eda87fad7f7a989b0bdb9d71"}, + {file = "pillow-11.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2062ffb1d36544d42fcaa277b069c88b01bb7298f4efa06731a7fd6cc290b81a"}, + {file = "pillow-11.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a85b653980faad27e88b141348707ceeef8a1186f75ecc600c395dcac19f385b"}, + {file = "pillow-11.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9409c080586d1f683df3f184f20e36fb647f2e0bc3988094d4fd8c9f4eb1b3b3"}, + {file = "pillow-11.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fdadc077553621911f27ce206ffcbec7d3f8d7b50e0da39f10997e8e2bb7f6a"}, + {file = "pillow-11.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:93a18841d09bcdd774dcdc308e4537e1f867b3dec059c131fde0327899734aa1"}, + {file = "pillow-11.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9aa9aeddeed452b2f616ff5507459e7bab436916ccb10961c4a382cd3e03f47f"}, + {file = "pillow-11.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3cdcdb0b896e981678eee140d882b70092dac83ac1cdf6b3a60e2216a73f2b91"}, + {file = "pillow-11.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36ba10b9cb413e7c7dfa3e189aba252deee0602c86c309799da5a74009ac7a1c"}, + {file = "pillow-11.1.0-cp312-cp312-win32.whl", hash = "sha256:cfd5cd998c2e36a862d0e27b2df63237e67273f2fc78f47445b14e73a810e7e6"}, + {file = "pillow-11.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a697cd8ba0383bba3d2d3ada02b34ed268cb548b369943cd349007730c92bddf"}, + {file = "pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5"}, + {file = "pillow-11.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae98e14432d458fc3de11a77ccb3ae65ddce70f730e7c76140653048c71bfcbc"}, + {file = "pillow-11.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cc1331b6d5a6e144aeb5e626f4375f5b7ae9934ba620c0ac6b3e43d5e683a0f0"}, + {file = "pillow-11.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:758e9d4ef15d3560214cddbc97b8ef3ef86ce04d62ddac17ad39ba87e89bd3b1"}, + {file = "pillow-11.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b523466b1a31d0dcef7c5be1f20b942919b62fd6e9a9be199d035509cbefc0ec"}, + {file = "pillow-11.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:9044b5e4f7083f209c4e35aa5dd54b1dd5b112b108648f5c902ad586d4f945c5"}, + {file = "pillow-11.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:3764d53e09cdedd91bee65c2527815d315c6b90d7b8b79759cc48d7bf5d4f114"}, + {file = "pillow-11.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:31eba6bbdd27dde97b0174ddf0297d7a9c3a507a8a1480e1e60ef914fe23d352"}, + {file = "pillow-11.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b5d658fbd9f0d6eea113aea286b21d3cd4d3fd978157cbf2447a6035916506d3"}, + {file = "pillow-11.1.0-cp313-cp313-win32.whl", hash = "sha256:f86d3a7a9af5d826744fabf4afd15b9dfef44fe69a98541f666f66fbb8d3fef9"}, + {file = "pillow-11.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:593c5fd6be85da83656b93ffcccc2312d2d149d251e98588b14fbc288fd8909c"}, + {file = "pillow-11.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:11633d58b6ee5733bde153a8dafd25e505ea3d32e261accd388827ee987baf65"}, + {file = "pillow-11.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:70ca5ef3b3b1c4a0812b5c63c57c23b63e53bc38e758b37a951e5bc466449861"}, + {file = "pillow-11.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8000376f139d4d38d6851eb149b321a52bb8893a88dae8ee7d95840431977081"}, + {file = "pillow-11.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ee85f0696a17dd28fbcfceb59f9510aa71934b483d1f5601d1030c3c8304f3c"}, + {file = "pillow-11.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:dd0e081319328928531df7a0e63621caf67652c8464303fd102141b785ef9547"}, + {file = "pillow-11.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e63e4e5081de46517099dc30abe418122f54531a6ae2ebc8680bcd7096860eab"}, + {file = "pillow-11.1.0-cp313-cp313t-win32.whl", hash = "sha256:dda60aa465b861324e65a78c9f5cf0f4bc713e4309f83bc387be158b077963d9"}, + {file = "pillow-11.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ad5db5781c774ab9a9b2c4302bbf0c1014960a0a7be63278d13ae6fdf88126fe"}, + {file = "pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756"}, + {file = "pillow-11.1.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:bf902d7413c82a1bfa08b06a070876132a5ae6b2388e2712aab3a7cbc02205c6"}, + {file = "pillow-11.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c1eec9d950b6fe688edee07138993e54ee4ae634c51443cfb7c1e7613322718e"}, + {file = "pillow-11.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e275ee4cb11c262bd108ab2081f750db2a1c0b8c12c1897f27b160c8bd57bbc"}, + {file = "pillow-11.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4db853948ce4e718f2fc775b75c37ba2efb6aaea41a1a5fc57f0af59eee774b2"}, + {file = "pillow-11.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:ab8a209b8485d3db694fa97a896d96dd6533d63c22829043fd9de627060beade"}, + {file = "pillow-11.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:54251ef02a2309b5eec99d151ebf5c9904b77976c8abdcbce7891ed22df53884"}, + {file = "pillow-11.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5bb94705aea800051a743aa4874bb1397d4695fb0583ba5e425ee0328757f196"}, + {file = "pillow-11.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:89dbdb3e6e9594d512780a5a1c42801879628b38e3efc7038094430844e271d8"}, + {file = "pillow-11.1.0-cp39-cp39-win32.whl", hash = "sha256:e5449ca63da169a2e6068dd0e2fcc8d91f9558aba89ff6d02121ca8ab11e79e5"}, + {file = "pillow-11.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:3362c6ca227e65c54bf71a5f88b3d4565ff1bcbc63ae72c34b07bbb1cc59a43f"}, + {file = "pillow-11.1.0-cp39-cp39-win_arm64.whl", hash = "sha256:b20be51b37a75cc54c2c55def3fa2c65bb94ba859dde241cd0a4fd302de5ae0a"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8c730dc3a83e5ac137fbc92dfcfe1511ce3b2b5d7578315b63dbbb76f7f51d90"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d33d2fae0e8b170b6a6c57400e077412240f6f5bb2a342cf1ee512a787942bb"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8d65b38173085f24bc07f8b6c505cbb7418009fa1a1fcb111b1f4961814a442"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d44ff19eea13ae4acdaaab0179fa68c0c6f2f45d66a4d8ec1eda7d6cecbcc15f"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d3d8da4a631471dfaf94c10c85f5277b1f8e42ac42bade1ac67da4b4a7359b73"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0"}, + {file = "pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"] +typing = ["typing-extensions ; python_version < \"3.10\""] +xmp = ["defusedxml"] + +[[package]] +name = "playwright" +version = "1.51.0" +description = "A high-level API to automate web browsers" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "playwright-1.51.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:bcaaa3d5d73bda659bfb9ff2a288b51e85a91bd89eda86eaf8186550973e416a"}, + {file = "playwright-1.51.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e0ae6eb44297b24738e1a6d9c580ca4243b4e21b7e65cf936a71492c08dd0d4"}, + {file = "playwright-1.51.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:ab4c0ff00bded52c946be60734868febc964c8a08a9b448d7c20cb3811c6521c"}, + {file = "playwright-1.51.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:d5c9f67bc6ef49094618991c78a1466c5bac5ed09157660d78b8510b77f92746"}, + {file = "playwright-1.51.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:814e4ec2a1a0d6f6221f075622c06b31ceb2bdc6d622258cfefed900c01569ae"}, + {file = "playwright-1.51.0-py3-none-win32.whl", hash = "sha256:4cef804991867ea27f608b70fa288ee52a57651e22d02ab287f98f8620b9408c"}, + {file = "playwright-1.51.0-py3-none-win_amd64.whl", hash = "sha256:9ece9316c5d383aed1a207f079fc2d552fff92184f0ecf37cc596e912d00a8c3"}, +] + +[package.dependencies] +greenlet = ">=3.1.1,<4.0.0" +pyee = ">=12,<13" + +[[package]] +name = "protobuf" +version = "6.30.2" +description = "" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "protobuf-6.30.2-cp310-abi3-win32.whl", hash = "sha256:b12ef7df7b9329886e66404bef5e9ce6a26b54069d7f7436a0853ccdeb91c103"}, + {file = "protobuf-6.30.2-cp310-abi3-win_amd64.whl", hash = "sha256:7653c99774f73fe6b9301b87da52af0e69783a2e371e8b599b3e9cb4da4b12b9"}, + {file = "protobuf-6.30.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:0eb523c550a66a09a0c20f86dd554afbf4d32b02af34ae53d93268c1f73bc65b"}, + {file = "protobuf-6.30.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:50f32cc9fd9cb09c783ebc275611b4f19dfdfb68d1ee55d2f0c7fa040df96815"}, + {file = "protobuf-6.30.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:4f6c687ae8efae6cf6093389a596548214467778146b7245e886f35e1485315d"}, + {file = "protobuf-6.30.2-cp39-cp39-win32.whl", hash = "sha256:524afedc03b31b15586ca7f64d877a98b184f007180ce25183d1a5cb230ee72b"}, + {file = "protobuf-6.30.2-cp39-cp39-win_amd64.whl", hash = "sha256:acec579c39c88bd8fbbacab1b8052c793efe83a0a5bd99db4a31423a25c0a0e2"}, + {file = "protobuf-6.30.2-py3-none-any.whl", hash = "sha256:ae86b030e69a98e08c77beab574cbcb9fff6d031d57209f574a5aea1445f4b51"}, + {file = "protobuf-6.30.2.tar.gz", hash = "sha256:35c859ae076d8c56054c25b59e5e59638d86545ed6e2b6efac6be0b6ea3ba048"}, +] + +[[package]] +name = "pyclipper" +version = "1.3.0.post6" +description = "Cython wrapper for the C++ translation of the Angus Johnson's Clipper library (ver. 6.4.2)" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "pyclipper-1.3.0.post6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fa0f5e78cfa8262277bb3d0225537b3c2a90ef68fd90a229d5d24cf49955dcf4"}, + {file = "pyclipper-1.3.0.post6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a01f182d8938c1dc515e8508ed2442f7eebd2c25c7d5cb29281f583c1a8008a4"}, + {file = "pyclipper-1.3.0.post6-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:640f20975727994d4abacd07396f564e9e5665ba5cb66ceb36b300c281f84fa4"}, + {file = "pyclipper-1.3.0.post6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a63002f6bb0f1efa87c0b81634cbb571066f237067e23707dabf746306c92ba5"}, + {file = "pyclipper-1.3.0.post6-cp310-cp310-win32.whl", hash = "sha256:106b8622cd9fb07d80cbf9b1d752334c55839203bae962376a8c59087788af26"}, + {file = "pyclipper-1.3.0.post6-cp310-cp310-win_amd64.whl", hash = "sha256:9699e98862dadefd0bea2360c31fa61ca553c660cbf6fb44993acde1b959f58f"}, + {file = "pyclipper-1.3.0.post6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4247e7c44b34c87acbf38f99d48fb1acaf5da4a2cf4dcd601a9b24d431be4ef"}, + {file = "pyclipper-1.3.0.post6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:851b3e58106c62a5534a1201295fe20c21714dee2eda68081b37ddb0367e6caa"}, + {file = "pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16cc1705a915896d2aff52131c427df02265631279eac849ebda766432714cc0"}, + {file = "pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ace1f0753cf71c5c5f6488b8feef5dd0fa8b976ad86b24bb51f708f513df4aac"}, + {file = "pyclipper-1.3.0.post6-cp311-cp311-win32.whl", hash = "sha256:dbc828641667142751b1127fd5c4291663490cf05689c85be4c5bcc89aaa236a"}, + {file = "pyclipper-1.3.0.post6-cp311-cp311-win_amd64.whl", hash = "sha256:1c03f1ae43b18ee07730c3c774cc3cf88a10c12a4b097239b33365ec24a0a14a"}, + {file = "pyclipper-1.3.0.post6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:6363b9d79ba1b5d8f32d1623e797c1e9f994600943402e68d5266067bdde173e"}, + {file = "pyclipper-1.3.0.post6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:32cd7fb9c1c893eb87f82a072dbb5e26224ea7cebbad9dc306d67e1ac62dd229"}, + {file = "pyclipper-1.3.0.post6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3aab10e3c10ed8fa60c608fb87c040089b83325c937f98f06450cf9fcfdaf1d"}, + {file = "pyclipper-1.3.0.post6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58eae2ff92a8cae1331568df076c4c5775bf946afab0068b217f0cf8e188eb3c"}, + {file = "pyclipper-1.3.0.post6-cp312-cp312-win32.whl", hash = "sha256:793b0aa54b914257aa7dc76b793dd4dcfb3c84011d48df7e41ba02b571616eaf"}, + {file = "pyclipper-1.3.0.post6-cp312-cp312-win_amd64.whl", hash = "sha256:d3f9da96f83b8892504923beb21a481cd4516c19be1d39eb57a92ef1c9a29548"}, + {file = "pyclipper-1.3.0.post6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f129284d2c7bcd213d11c0f35e1ae506a1144ce4954e9d1734d63b120b0a1b58"}, + {file = "pyclipper-1.3.0.post6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:188fbfd1d30d02247f92c25ce856f5f3c75d841251f43367dbcf10935bc48f38"}, + {file = "pyclipper-1.3.0.post6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6d129d0c2587f2f5904d201a4021f859afbb45fada4261c9fdedb2205b09d23"}, + {file = "pyclipper-1.3.0.post6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c9c80b5c46eef38ba3f12dd818dc87f5f2a0853ba914b6f91b133232315f526"}, + {file = "pyclipper-1.3.0.post6-cp313-cp313-win32.whl", hash = "sha256:b15113ec4fc423b58e9ae80aa95cf5a0802f02d8f02a98a46af3d7d66ff0cc0e"}, + {file = "pyclipper-1.3.0.post6-cp313-cp313-win_amd64.whl", hash = "sha256:e5ff68fa770ac654c7974fc78792978796f068bd274e95930c0691c31e192889"}, + {file = "pyclipper-1.3.0.post6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:c92e41301a8f25f9adcd90954512038ed5f774a2b8c04a4a9db261b78ff75e3a"}, + {file = "pyclipper-1.3.0.post6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04214d23cf79f4ddcde36e299dea9f23f07abb88fa47ef399bf0e819438bbefd"}, + {file = "pyclipper-1.3.0.post6-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:aa604f8665ade434f9eafcd23f89435057d5d09427dfb4554c5e6d19f6d8aa1a"}, + {file = "pyclipper-1.3.0.post6-cp36-cp36m-win32.whl", hash = "sha256:1fd56855ca92fa7eb0d8a71cf3a24b80b9724c8adcc89b385bbaa8924e620156"}, + {file = "pyclipper-1.3.0.post6-cp36-cp36m-win_amd64.whl", hash = "sha256:6893f9b701f3132d86018594d99b724200b937a3a3ddfe1be0432c4ff0284e6e"}, + {file = "pyclipper-1.3.0.post6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2737df106b8487103916147fe30f887aff439d9f2bd2f67c9d9b5c13eac88ccf"}, + {file = "pyclipper-1.3.0.post6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33ab72260f144693e1f7735e93276c3031e1ed243a207eff1f8b98c7162ba22c"}, + {file = "pyclipper-1.3.0.post6-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:491ec1bfd2ee3013269c2b652dde14a85539480e0fb82f89bb12198fa59fff82"}, + {file = "pyclipper-1.3.0.post6-cp37-cp37m-win32.whl", hash = "sha256:2e257009030815853528ba4b2ef7fb7e172683a3f4255a63f00bde34cfab8b58"}, + {file = "pyclipper-1.3.0.post6-cp37-cp37m-win_amd64.whl", hash = "sha256:ed6e50c6e87ed190141573615d54118869bd63e9cd91ca5660d2ca926bf25110"}, + {file = "pyclipper-1.3.0.post6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:cf0a535cfa02b207435928e991c60389671fe1ea1dfae79170973f82f52335b2"}, + {file = "pyclipper-1.3.0.post6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:48dd55fbd55f63902cad511432ec332368cbbbc1dd2110c0c6c1e9edd735713a"}, + {file = "pyclipper-1.3.0.post6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05ae2ea878fdfa31dd375326f6191b03de98a9602cc9c2b6d4ff960b20a974c"}, + {file = "pyclipper-1.3.0.post6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:903176952a159c4195b8be55e597978e24804c838c7a9b12024c39704d341f72"}, + {file = "pyclipper-1.3.0.post6-cp38-cp38-win32.whl", hash = "sha256:fb1e52cf4ee0a9fa8b2254ed589cc51b0c989efc58fa8804289aca94a21253f7"}, + {file = "pyclipper-1.3.0.post6-cp38-cp38-win_amd64.whl", hash = "sha256:9cbdc517e75e647aa9bf6e356b3a3d2e3af344f82af38e36031eb46ba0ab5425"}, + {file = "pyclipper-1.3.0.post6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:383f3433b968f2e4b0843f338c1f63b85392b6e1d936de722e8c5d4f577dbff5"}, + {file = "pyclipper-1.3.0.post6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cf5ca2b9358d30a395ac6e14b3154a9fd1f9b557ad7153ea15cf697e88d07ce1"}, + {file = "pyclipper-1.3.0.post6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3404dfcb3415eee863564b5f49be28a8c7fb99ad5e31c986bcc33c8d47d97df7"}, + {file = "pyclipper-1.3.0.post6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:aa0e7268f8ceba218964bc3a482a5e9d32e352e8c3538b03f69a6b3db979078d"}, + {file = "pyclipper-1.3.0.post6-cp39-cp39-win32.whl", hash = "sha256:47a214f201ff930595a30649c2a063f78baa3a8f52e1f38da19f7930c90ed80c"}, + {file = "pyclipper-1.3.0.post6-cp39-cp39-win_amd64.whl", hash = "sha256:28bb590ae79e6beb15794eaee12b6f1d769589572d33e494faf5aa3b1f31b9fa"}, + {file = "pyclipper-1.3.0.post6-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3e5e65176506da6335f6cbab497ae1a29772064467fa69f66de6bab4b6304d34"}, + {file = "pyclipper-1.3.0.post6-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3d58202de8b8da4d1559afbda4e90a8c260a5373672b6d7bc5448c4614385144"}, + {file = "pyclipper-1.3.0.post6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2cd8600bd16d209d5d45a33b45c278e1cc8bedc169af1a1f2187b581c521395"}, + {file = "pyclipper-1.3.0.post6.tar.gz", hash = "sha256:42bff0102fa7a7f2abdd795a2594654d62b786d0c6cd67b72d469114fdeb608c"}, +] + +[[package]] +name = "pydantic" +version = "2.11.3" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pydantic-2.11.3-py3-none-any.whl", hash = "sha256:a082753436a07f9ba1289c6ffa01cd93db3548776088aa917cc43b63f68fa60f"}, + {file = "pydantic-2.11.3.tar.gz", hash = "sha256:7471657138c16adad9322fe3070c0116dd6c3ad8d649300e3cbdfe91f4db4ec3"}, +] + +[package.dependencies] +annotated-types = ">=0.6.0" +pydantic-core = "2.33.1" +typing-extensions = ">=4.12.2" +typing-inspection = ">=0.4.0" + +[package.extras] +email = ["email-validator (>=2.0.0)"] +timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""] + +[[package]] +name = "pydantic-core" +version = "2.33.1" +description = "Core functionality for Pydantic validation and serialization" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pydantic_core-2.33.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3077cfdb6125cc8dab61b155fdd714663e401f0e6883f9632118ec12cf42df26"}, + {file = "pydantic_core-2.33.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8ffab8b2908d152e74862d276cf5017c81a2f3719f14e8e3e8d6b83fda863927"}, + {file = "pydantic_core-2.33.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5183e4f6a2d468787243ebcd70cf4098c247e60d73fb7d68d5bc1e1beaa0c4db"}, + {file = "pydantic_core-2.33.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:398a38d323f37714023be1e0285765f0a27243a8b1506b7b7de87b647b517e48"}, + {file = "pydantic_core-2.33.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:87d3776f0001b43acebfa86f8c64019c043b55cc5a6a2e313d728b5c95b46969"}, + {file = "pydantic_core-2.33.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c566dd9c5f63d22226409553531f89de0cac55397f2ab8d97d6f06cfce6d947e"}, + {file = "pydantic_core-2.33.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0d5f3acc81452c56895e90643a625302bd6be351e7010664151cc55b7b97f89"}, + {file = "pydantic_core-2.33.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d3a07fadec2a13274a8d861d3d37c61e97a816beae717efccaa4b36dfcaadcde"}, + {file = "pydantic_core-2.33.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f99aeda58dce827f76963ee87a0ebe75e648c72ff9ba1174a253f6744f518f65"}, + {file = "pydantic_core-2.33.1-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:902dbc832141aa0ec374f4310f1e4e7febeebc3256f00dc359a9ac3f264a45dc"}, + {file = "pydantic_core-2.33.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fe44d56aa0b00d66640aa84a3cbe80b7a3ccdc6f0b1ca71090696a6d4777c091"}, + {file = "pydantic_core-2.33.1-cp310-cp310-win32.whl", hash = "sha256:ed3eb16d51257c763539bde21e011092f127a2202692afaeaccb50db55a31383"}, + {file = "pydantic_core-2.33.1-cp310-cp310-win_amd64.whl", hash = "sha256:694ad99a7f6718c1a498dc170ca430687a39894a60327f548e02a9c7ee4b6504"}, + {file = "pydantic_core-2.33.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e966fc3caaf9f1d96b349b0341c70c8d6573bf1bac7261f7b0ba88f96c56c24"}, + {file = "pydantic_core-2.33.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bfd0adeee563d59c598ceabddf2c92eec77abcb3f4a391b19aa7366170bd9e30"}, + {file = "pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91815221101ad3c6b507804178a7bb5cb7b2ead9ecd600041669c8d805ebd595"}, + {file = "pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9fea9c1869bb4742d174a57b4700c6dadea951df8b06de40c2fedb4f02931c2e"}, + {file = "pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d20eb4861329bb2484c021b9d9a977566ab16d84000a57e28061151c62b349a"}, + {file = "pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb935c5591573ae3201640579f30128ccc10739b45663f93c06796854405505"}, + {file = "pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c964fd24e6166420d18fb53996d8c9fd6eac9bf5ae3ec3d03015be4414ce497f"}, + {file = "pydantic_core-2.33.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:681d65e9011f7392db5aa002b7423cc442d6a673c635668c227c6c8d0e5a4f77"}, + {file = "pydantic_core-2.33.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e100c52f7355a48413e2999bfb4e139d2977a904495441b374f3d4fb4a170961"}, + {file = "pydantic_core-2.33.1-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:048831bd363490be79acdd3232f74a0e9951b11b2b4cc058aeb72b22fdc3abe1"}, + {file = "pydantic_core-2.33.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bdc84017d28459c00db6f918a7272a5190bec3090058334e43a76afb279eac7c"}, + {file = "pydantic_core-2.33.1-cp311-cp311-win32.whl", hash = "sha256:32cd11c5914d1179df70406427097c7dcde19fddf1418c787540f4b730289896"}, + {file = "pydantic_core-2.33.1-cp311-cp311-win_amd64.whl", hash = "sha256:2ea62419ba8c397e7da28a9170a16219d310d2cf4970dbc65c32faf20d828c83"}, + {file = "pydantic_core-2.33.1-cp311-cp311-win_arm64.whl", hash = "sha256:fc903512177361e868bc1f5b80ac8c8a6e05fcdd574a5fb5ffeac5a9982b9e89"}, + {file = "pydantic_core-2.33.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1293d7febb995e9d3ec3ea09caf1a26214eec45b0f29f6074abb004723fc1de8"}, + {file = "pydantic_core-2.33.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:99b56acd433386c8f20be5c4000786d1e7ca0523c8eefc995d14d79c7a081498"}, + {file = "pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35a5ec3fa8c2fe6c53e1b2ccc2454398f95d5393ab398478f53e1afbbeb4d939"}, + {file = "pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b172f7b9d2f3abc0efd12e3386f7e48b576ef309544ac3a63e5e9cdd2e24585d"}, + {file = "pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9097b9f17f91eea659b9ec58148c0747ec354a42f7389b9d50701610d86f812e"}, + {file = "pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc77ec5b7e2118b152b0d886c7514a4653bcb58c6b1d760134a9fab915f777b3"}, + {file = "pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3d15245b08fa4a84cefc6c9222e6f37c98111c8679fbd94aa145f9a0ae23d"}, + {file = "pydantic_core-2.33.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef99779001d7ac2e2461d8ab55d3373fe7315caefdbecd8ced75304ae5a6fc6b"}, + {file = "pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fc6bf8869e193855e8d91d91f6bf59699a5cdfaa47a404e278e776dd7f168b39"}, + {file = "pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:b1caa0bc2741b043db7823843e1bde8aaa58a55a58fda06083b0569f8b45693a"}, + {file = "pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ec259f62538e8bf364903a7d0d0239447059f9434b284f5536e8402b7dd198db"}, + {file = "pydantic_core-2.33.1-cp312-cp312-win32.whl", hash = "sha256:e14f369c98a7c15772b9da98987f58e2b509a93235582838bd0d1d8c08b68fda"}, + {file = "pydantic_core-2.33.1-cp312-cp312-win_amd64.whl", hash = "sha256:1c607801d85e2e123357b3893f82c97a42856192997b95b4d8325deb1cd0c5f4"}, + {file = "pydantic_core-2.33.1-cp312-cp312-win_arm64.whl", hash = "sha256:8d13f0276806ee722e70a1c93da19748594f19ac4299c7e41237fc791d1861ea"}, + {file = "pydantic_core-2.33.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:70af6a21237b53d1fe7b9325b20e65cbf2f0a848cf77bed492b029139701e66a"}, + {file = "pydantic_core-2.33.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:282b3fe1bbbe5ae35224a0dbd05aed9ccabccd241e8e6b60370484234b456266"}, + {file = "pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b315e596282bbb5822d0c7ee9d255595bd7506d1cb20c2911a4da0b970187d3"}, + {file = "pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1dfae24cf9921875ca0ca6a8ecb4bb2f13c855794ed0d468d6abbec6e6dcd44a"}, + {file = "pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6dd8ecfde08d8bfadaea669e83c63939af76f4cf5538a72597016edfa3fad516"}, + {file = "pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f593494876eae852dc98c43c6f260f45abdbfeec9e4324e31a481d948214764"}, + {file = "pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:948b73114f47fd7016088e5186d13faf5e1b2fe83f5e320e371f035557fd264d"}, + {file = "pydantic_core-2.33.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e11f3864eb516af21b01e25fac915a82e9ddad3bb0fb9e95a246067398b435a4"}, + {file = "pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:549150be302428b56fdad0c23c2741dcdb5572413776826c965619a25d9c6bde"}, + {file = "pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:495bc156026efafd9ef2d82372bd38afce78ddd82bf28ef5276c469e57c0c83e"}, + {file = "pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ec79de2a8680b1a67a07490bddf9636d5c2fab609ba8c57597e855fa5fa4dacd"}, + {file = "pydantic_core-2.33.1-cp313-cp313-win32.whl", hash = "sha256:ee12a7be1742f81b8a65b36c6921022301d466b82d80315d215c4c691724986f"}, + {file = "pydantic_core-2.33.1-cp313-cp313-win_amd64.whl", hash = "sha256:ede9b407e39949d2afc46385ce6bd6e11588660c26f80576c11c958e6647bc40"}, + {file = "pydantic_core-2.33.1-cp313-cp313-win_arm64.whl", hash = "sha256:aa687a23d4b7871a00e03ca96a09cad0f28f443690d300500603bd0adba4b523"}, + {file = "pydantic_core-2.33.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:401d7b76e1000d0dd5538e6381d28febdcacb097c8d340dde7d7fc6e13e9f95d"}, + {file = "pydantic_core-2.33.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aeb055a42d734c0255c9e489ac67e75397d59c6fbe60d155851e9782f276a9c"}, + {file = "pydantic_core-2.33.1-cp313-cp313t-win_amd64.whl", hash = "sha256:338ea9b73e6e109f15ab439e62cb3b78aa752c7fd9536794112e14bee02c8d18"}, + {file = "pydantic_core-2.33.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:5ab77f45d33d264de66e1884fca158bc920cb5e27fd0764a72f72f5756ae8bdb"}, + {file = "pydantic_core-2.33.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e7aaba1b4b03aaea7bb59e1b5856d734be011d3e6d98f5bcaa98cb30f375f2ad"}, + {file = "pydantic_core-2.33.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fb66263e9ba8fea2aa85e1e5578980d127fb37d7f2e292773e7bc3a38fb0c7b"}, + {file = "pydantic_core-2.33.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f2648b9262607a7fb41d782cc263b48032ff7a03a835581abbf7a3bec62bcf5"}, + {file = "pydantic_core-2.33.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:723c5630c4259400818b4ad096735a829074601805d07f8cafc366d95786d331"}, + {file = "pydantic_core-2.33.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d100e3ae783d2167782391e0c1c7a20a31f55f8015f3293647544df3f9c67824"}, + {file = "pydantic_core-2.33.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:177d50460bc976a0369920b6c744d927b0ecb8606fb56858ff542560251b19e5"}, + {file = "pydantic_core-2.33.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3edde68d1a1f9af1273b2fe798997b33f90308fb6d44d8550c89fc6a3647cf6"}, + {file = "pydantic_core-2.33.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a62c3c3ef6a7e2c45f7853b10b5bc4ddefd6ee3cd31024754a1a5842da7d598d"}, + {file = "pydantic_core-2.33.1-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:c91dbb0ab683fa0cd64a6e81907c8ff41d6497c346890e26b23de7ee55353f96"}, + {file = "pydantic_core-2.33.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9f466e8bf0a62dc43e068c12166281c2eca72121dd2adc1040f3aa1e21ef8599"}, + {file = "pydantic_core-2.33.1-cp39-cp39-win32.whl", hash = "sha256:ab0277cedb698749caada82e5d099dc9fed3f906a30d4c382d1a21725777a1e5"}, + {file = "pydantic_core-2.33.1-cp39-cp39-win_amd64.whl", hash = "sha256:5773da0ee2d17136b1f1c6fbde543398d452a6ad2a7b54ea1033e2daa739b8d2"}, + {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c834f54f8f4640fd7e4b193f80eb25a0602bba9e19b3cd2fc7ffe8199f5ae02"}, + {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:049e0de24cf23766f12cc5cc71d8abc07d4a9deb9061b334b62093dedc7cb068"}, + {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a28239037b3d6f16916a4c831a5a0eadf856bdd6d2e92c10a0da3a59eadcf3e"}, + {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d3da303ab5f378a268fa7d45f37d7d85c3ec19769f28d2cc0c61826a8de21fe"}, + {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:25626fb37b3c543818c14821afe0fd3830bc327a43953bc88db924b68c5723f1"}, + {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3ab2d36e20fbfcce8f02d73c33a8a7362980cff717926bbae030b93ae46b56c7"}, + {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:2f9284e11c751b003fd4215ad92d325d92c9cb19ee6729ebd87e3250072cdcde"}, + {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:048c01eee07d37cbd066fc512b9d8b5ea88ceeb4e629ab94b3e56965ad655add"}, + {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5ccd429694cf26af7997595d627dd2637e7932214486f55b8a357edaac9dae8c"}, + {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3a371dc00282c4b84246509a5ddc808e61b9864aa1eae9ecc92bb1268b82db4a"}, + {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:f59295ecc75a1788af8ba92f2e8c6eeaa5a94c22fc4d151e8d9638814f85c8fc"}, + {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08530b8ac922003033f399128505f513e30ca770527cc8bbacf75a84fcc2c74b"}, + {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bae370459da6a5466978c0eacf90690cb57ec9d533f8e63e564ef3822bfa04fe"}, + {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e3de2777e3b9f4d603112f78006f4ae0acb936e95f06da6cb1a45fbad6bdb4b5"}, + {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a64e81e8cba118e108d7126362ea30e021291b7805d47e4896e52c791be2761"}, + {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:52928d8c1b6bda03cc6d811e8923dffc87a2d3c8b3bfd2ce16471c7147a24850"}, + {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:1b30d92c9412beb5ac6b10a3eb7ef92ccb14e3f2a8d7732e2d739f58b3aa7544"}, + {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f995719707e0e29f0f41a8aa3bcea6e761a36c9136104d3189eafb83f5cec5e5"}, + {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7edbc454a29fc6aeae1e1eecba4f07b63b8d76e76a748532233c4c167b4cb9ea"}, + {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ad05b683963f69a1d5d2c2bdab1274a31221ca737dbbceaa32bcb67359453cdd"}, + {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df6a94bf9452c6da9b5d76ed229a5683d0306ccb91cca8e1eea883189780d568"}, + {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7965c13b3967909a09ecc91f21d09cfc4576bf78140b988904e94f130f188396"}, + {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3f1fdb790440a34f6ecf7679e1863b825cb5ffde858a9197f851168ed08371e5"}, + {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:5277aec8d879f8d05168fdd17ae811dd313b8ff894aeeaf7cd34ad28b4d77e33"}, + {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:8ab581d3530611897d863d1a649fb0644b860286b4718db919bfd51ece41f10b"}, + {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0483847fa9ad5e3412265c1bd72aad35235512d9ce9d27d81a56d935ef489672"}, + {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:de9e06abe3cc5ec6a2d5f75bc99b0bdca4f5c719a5b34026f8c57efbdecd2ee3"}, + {file = "pydantic_core-2.33.1.tar.gz", hash = "sha256:bcc9c6fdb0ced789245b02b7d6603e17d1563064ddcfc36f046b61c0c05dd9df"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + +[[package]] +name = "pyee" +version = "12.1.1" +description = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pyee-12.1.1-py3-none-any.whl", hash = "sha256:18a19c650556bb6b32b406d7f017c8f513aceed1ef7ca618fb65de7bd2d347ef"}, + {file = "pyee-12.1.1.tar.gz", hash = "sha256:bbc33c09e2ff827f74191e3e5bbc6be7da02f627b7ec30d86f5ce1a6fb2424a3"}, +] + +[package.dependencies] +typing-extensions = "*" + +[package.extras] +dev = ["black", "build", "flake8", "flake8-black", "isort", "jupyter-console", "mkdocs", "mkdocs-include-markdown-plugin", "mkdocstrings[python]", "pytest", "pytest-asyncio ; python_version >= \"3.4\"", "pytest-trio ; python_version >= \"3.7\"", "sphinx", "toml", "tox", "trio", "trio ; python_version > \"3.6\"", "trio-typing ; python_version > \"3.6\"", "twine", "twisted", "validate-pyproject[all]"] + +[[package]] +name = "python-docx" +version = "1.1.2" +description = "Create, read, and update Microsoft Word .docx files." +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "python_docx-1.1.2-py3-none-any.whl", hash = "sha256:08c20d6058916fb19853fcf080f7f42b6270d89eac9fa5f8c15f691c0017fabe"}, + {file = "python_docx-1.1.2.tar.gz", hash = "sha256:0cf1f22e95b9002addca7948e16f2cd7acdfd498047f1941ca5d293db7762efd"}, +] + +[package.dependencies] +lxml = ">=3.1.0" +typing-extensions = ">=4.9.0" + +[[package]] +name = "pyyaml" +version = "6.0.2" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, + {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, + {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, + {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, + {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, + {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, + {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, + {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, + {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, + {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, + {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, + {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, + {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, + {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, + {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, + {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, +] + +[[package]] +name = "rapidfuzz" +version = "3.13.0" +description = "rapid fuzzy string matching" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "rapidfuzz-3.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:aafc42a1dc5e1beeba52cd83baa41372228d6d8266f6d803c16dbabbcc156255"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:85c9a131a44a95f9cac2eb6e65531db014e09d89c4f18c7b1fa54979cb9ff1f3"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d7cec4242d30dd521ef91c0df872e14449d1dffc2a6990ede33943b0dae56c3"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e297c09972698c95649e89121e3550cee761ca3640cd005e24aaa2619175464e"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ef0f5f03f61b0e5a57b1df7beafd83df993fd5811a09871bad6038d08e526d0d"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d8cf5f7cd6e4d5eb272baf6a54e182b2c237548d048e2882258336533f3f02b7"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9256218ac8f1a957806ec2fb9a6ddfc6c32ea937c0429e88cf16362a20ed8602"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e1bdd2e6d0c5f9706ef7595773a81ca2b40f3b33fd7f9840b726fb00c6c4eb2e"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5280be8fd7e2bee5822e254fe0a5763aa0ad57054b85a32a3d9970e9b09bbcbf"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fd742c03885db1fce798a1cd87a20f47f144ccf26d75d52feb6f2bae3d57af05"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:5435fcac94c9ecf0504bf88a8a60c55482c32e18e108d6079a0089c47f3f8cf6"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:93a755266856599be4ab6346273f192acde3102d7aa0735e2f48b456397a041f"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-win32.whl", hash = "sha256:3abe6a4e8eb4cfc4cda04dd650a2dc6d2934cbdeda5def7e6fd1c20f6e7d2a0b"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:e8ddb58961401da7d6f55f185512c0d6bd24f529a637078d41dd8ffa5a49c107"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-win_arm64.whl", hash = "sha256:c523620d14ebd03a8d473c89e05fa1ae152821920c3ff78b839218ff69e19ca3"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d395a5cad0c09c7f096433e5fd4224d83b53298d53499945a9b0e5a971a84f3a"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b7b3eda607a019169f7187328a8d1648fb9a90265087f6903d7ee3a8eee01805"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98e0bfa602e1942d542de077baf15d658bd9d5dcfe9b762aff791724c1c38b70"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bef86df6d59667d9655905b02770a0c776d2853971c0773767d5ef8077acd624"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fedd316c165beed6307bf754dee54d3faca2c47e1f3bcbd67595001dfa11e969"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5158da7f2ec02a930be13bac53bb5903527c073c90ee37804090614cab83c29e"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b6f913ee4618ddb6d6f3e387b76e8ec2fc5efee313a128809fbd44e65c2bbb2"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d25fdbce6459ccbbbf23b4b044f56fbd1158b97ac50994eaae2a1c0baae78301"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:25343ccc589a4579fbde832e6a1e27258bfdd7f2eb0f28cb836d6694ab8591fc"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a9ad1f37894e3ffb76bbab76256e8a8b789657183870be11aa64e306bb5228fd"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5dc71ef23845bb6b62d194c39a97bb30ff171389c9812d83030c1199f319098c"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b7f4c65facdb94f44be759bbd9b6dda1fa54d0d6169cdf1a209a5ab97d311a75"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-win32.whl", hash = "sha256:b5104b62711565e0ff6deab2a8f5dbf1fbe333c5155abe26d2cfd6f1849b6c87"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:9093cdeb926deb32a4887ebe6910f57fbcdbc9fbfa52252c10b56ef2efb0289f"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-win_arm64.whl", hash = "sha256:f70f646751b6aa9d05be1fb40372f006cc89d6aad54e9d79ae97bd1f5fce5203"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a1a6a906ba62f2556372282b1ef37b26bca67e3d2ea957277cfcefc6275cca7"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2fd0975e015b05c79a97f38883a11236f5a24cca83aa992bd2558ceaa5652b26"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d4e13593d298c50c4f94ce453f757b4b398af3fa0fd2fde693c3e51195b7f69"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed6f416bda1c9133000009d84d9409823eb2358df0950231cc936e4bf784eb97"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1dc82b6ed01acb536b94a43996a94471a218f4d89f3fdd9185ab496de4b2a981"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9d824de871daa6e443b39ff495a884931970d567eb0dfa213d234337343835f"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d18228a2390375cf45726ce1af9d36ff3dc1f11dce9775eae1f1b13ac6ec50f"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9f5fe634c9482ec5d4a6692afb8c45d370ae86755e5f57aa6c50bfe4ca2bdd87"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:694eb531889f71022b2be86f625a4209c4049e74be9ca836919b9e395d5e33b3"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:11b47b40650e06147dee5e51a9c9ad73bb7b86968b6f7d30e503b9f8dd1292db"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:98b8107ff14f5af0243f27d236bcc6e1ef8e7e3b3c25df114e91e3a99572da73"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b836f486dba0aceb2551e838ff3f514a38ee72b015364f739e526d720fdb823a"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-win32.whl", hash = "sha256:4671ee300d1818d7bdfd8fa0608580d7778ba701817216f0c17fb29e6b972514"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:6e2065f68fb1d0bf65adc289c1bdc45ba7e464e406b319d67bb54441a1b9da9e"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:65cc97c2fc2c2fe23586599686f3b1ceeedeca8e598cfcc1b7e56dc8ca7e2aa7"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:09e908064d3684c541d312bd4c7b05acb99a2c764f6231bd507d4b4b65226c23"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:57c390336cb50d5d3bfb0cfe1467478a15733703af61f6dffb14b1cd312a6fae"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0da54aa8547b3c2c188db3d1c7eb4d1bb6dd80baa8cdaeaec3d1da3346ec9caa"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df8e8c21e67afb9d7fbe18f42c6111fe155e801ab103c81109a61312927cc611"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:461fd13250a2adf8e90ca9a0e1e166515cbcaa5e9c3b1f37545cbbeff9e77f6b"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2b3dd5d206a12deca16870acc0d6e5036abeb70e3cad6549c294eff15591527"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1343d745fbf4688e412d8f398c6e6d6f269db99a54456873f232ba2e7aeb4939"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b1b065f370d54551dcc785c6f9eeb5bd517ae14c983d2784c064b3aa525896df"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:11b125d8edd67e767b2295eac6eb9afe0b1cdc82ea3d4b9257da4b8e06077798"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c33f9c841630b2bb7e69a3fb5c84a854075bb812c47620978bddc591f764da3d"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ae4574cb66cf1e85d32bb7e9ec45af5409c5b3970b7ceb8dea90168024127566"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e05752418b24bbd411841b256344c26f57da1148c5509e34ea39c7eb5099ab72"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-win32.whl", hash = "sha256:0e1d08cb884805a543f2de1f6744069495ef527e279e05370dd7c83416af83f8"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:9a7c6232be5f809cd39da30ee5d24e6cadd919831e6020ec6c2391f4c3bc9264"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-win_arm64.whl", hash = "sha256:3f32f15bacd1838c929b35c84b43618481e1b3d7a61b5ed2db0291b70ae88b53"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cc64da907114d7a18b5e589057e3acaf2fec723d31c49e13fedf043592a3f6a7"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4d9d7f84c8e992a8dbe5a3fdbea73d733da39bf464e62c912ac3ceba9c0cff93"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a79a2f07786a2070669b4b8e45bd96a01c788e7a3c218f531f3947878e0f956"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9f338e71c45b69a482de8b11bf4a029993230760120c8c6e7c9b71760b6825a1"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:adb40ca8ddfcd4edd07b0713a860be32bdf632687f656963bcbce84cea04b8d8"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48719f7dcf62dfb181063b60ee2d0a39d327fa8ad81b05e3e510680c44e1c078"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9327a4577f65fc3fb712e79f78233815b8a1c94433d0c2c9f6bc5953018b3565"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:200030dfc0a1d5d6ac18e993c5097c870c97c41574e67f227300a1fb74457b1d"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:cc269e74cad6043cb8a46d0ce580031ab642b5930562c2bb79aa7fbf9c858d26"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:e62779c6371bd2b21dbd1fdce89eaec2d93fd98179d36f61130b489f62294a92"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f4797f821dc5d7c2b6fc818b89f8a3f37bcc900dd9e4369e6ebf1e525efce5db"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d21f188f6fe4fbf422e647ae9d5a68671d00218e187f91859c963d0738ccd88c"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-win32.whl", hash = "sha256:45dd4628dd9c21acc5c97627dad0bb791764feea81436fb6e0a06eef4c6dceaa"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-win_amd64.whl", hash = "sha256:624a108122039af89ddda1a2b7ab2a11abe60c1521956f142f5d11bcd42ef138"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-win_arm64.whl", hash = "sha256:435071fd07a085ecbf4d28702a66fd2e676a03369ee497cc38bcb69a46bc77e2"}, + {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fe5790a36d33a5d0a6a1f802aa42ecae282bf29ac6f7506d8e12510847b82a45"}, + {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:cdb33ee9f8a8e4742c6b268fa6bd739024f34651a06b26913381b1413ebe7590"}, + {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c99b76b93f7b495eee7dcb0d6a38fb3ce91e72e99d9f78faa5664a881cb2b7d"}, + {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6af42f2ede8b596a6aaf6d49fdee3066ca578f4856b85ab5c1e2145de367a12d"}, + {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c0efa73afbc5b265aca0d8a467ae2a3f40d6854cbe1481cb442a62b7bf23c99"}, + {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7ac21489de962a4e2fc1e8f0b0da4aa1adc6ab9512fd845563fecb4b4c52093a"}, + {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1ba007f4d35a45ee68656b2eb83b8715e11d0f90e5b9f02d615a8a321ff00c27"}, + {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d7a217310429b43be95b3b8ad7f8fc41aba341109dc91e978cd7c703f928c58f"}, + {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:558bf526bcd777de32b7885790a95a9548ffdcce68f704a81207be4a286c1095"}, + {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:202a87760f5145140d56153b193a797ae9338f7939eb16652dd7ff96f8faf64c"}, + {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfcccc08f671646ccb1e413c773bb92e7bba789e3a1796fd49d23c12539fe2e4"}, + {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:1f219f1e3c3194d7a7de222f54450ce12bc907862ff9a8962d83061c1f923c86"}, + {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ccbd0e7ea1a216315f63ffdc7cd09c55f57851afc8fe59a74184cb7316c0598b"}, + {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a50856f49a4016ef56edd10caabdaf3608993f9faf1e05c3c7f4beeac46bd12a"}, + {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fd05336db4d0b8348d7eaaf6fa3c517b11a56abaa5e89470ce1714e73e4aca7"}, + {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:573ad267eb9b3f6e9b04febce5de55d8538a87c56c64bf8fd2599a48dc9d8b77"}, + {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30fd1451f87ccb6c2f9d18f6caa483116bbb57b5a55d04d3ddbd7b86f5b14998"}, + {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a6dd36d4916cf57ddb05286ed40b09d034ca5d4bca85c17be0cb6a21290597d9"}, + {file = "rapidfuzz-3.13.0.tar.gz", hash = "sha256:d2eaf3839e52cbcc0accbe9817a67b4b0fcf70aaeb229cfddc1c28061f9ce5d8"}, +] + +[package.extras] +all = ["numpy"] + +[[package]] +name = "requests" +version = "2.32.3" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "scikit-image" +version = "0.25.2" +description = "Image processing in Python" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "scikit_image-0.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d3278f586793176599df6a4cf48cb6beadae35c31e58dc01a98023af3dc31c78"}, + {file = "scikit_image-0.25.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:5c311069899ce757d7dbf1d03e32acb38bb06153236ae77fcd820fd62044c063"}, + {file = "scikit_image-0.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be455aa7039a6afa54e84f9e38293733a2622b8c2fb3362b822d459cc5605e99"}, + {file = "scikit_image-0.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4c464b90e978d137330be433df4e76d92ad3c5f46a22f159520ce0fdbea8a09"}, + {file = "scikit_image-0.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:60516257c5a2d2f74387c502aa2f15a0ef3498fbeaa749f730ab18f0a40fd054"}, + {file = "scikit_image-0.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f4bac9196fb80d37567316581c6060763b0f4893d3aca34a9ede3825bc035b17"}, + {file = "scikit_image-0.25.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d989d64ff92e0c6c0f2018c7495a5b20e2451839299a018e0e5108b2680f71e0"}, + {file = "scikit_image-0.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2cfc96b27afe9a05bc92f8c6235321d3a66499995675b27415e0d0c76625173"}, + {file = "scikit_image-0.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24cc986e1f4187a12aa319f777b36008764e856e5013666a4a83f8df083c2641"}, + {file = "scikit_image-0.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:b4f6b61fc2db6340696afe3db6b26e0356911529f5f6aee8c322aa5157490c9b"}, + {file = "scikit_image-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8db8dd03663112783221bf01ccfc9512d1cc50ac9b5b0fe8f4023967564719fb"}, + {file = "scikit_image-0.25.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:483bd8cc10c3d8a7a37fae36dfa5b21e239bd4ee121d91cad1f81bba10cfb0ed"}, + {file = "scikit_image-0.25.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d1e80107bcf2bf1291acfc0bf0425dceb8890abe9f38d8e94e23497cbf7ee0d"}, + {file = "scikit_image-0.25.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a17e17eb8562660cc0d31bb55643a4da996a81944b82c54805c91b3fe66f4824"}, + {file = "scikit_image-0.25.2-cp312-cp312-win_amd64.whl", hash = "sha256:bdd2b8c1de0849964dbc54037f36b4e9420157e67e45a8709a80d727f52c7da2"}, + {file = "scikit_image-0.25.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7efa888130f6c548ec0439b1a7ed7295bc10105458a421e9bf739b457730b6da"}, + {file = "scikit_image-0.25.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dd8011efe69c3641920614d550f5505f83658fe33581e49bed86feab43a180fc"}, + {file = "scikit_image-0.25.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28182a9d3e2ce3c2e251383bdda68f8d88d9fff1a3ebe1eb61206595c9773341"}, + {file = "scikit_image-0.25.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8abd3c805ce6944b941cfed0406d88faeb19bab3ed3d4b50187af55cf24d147"}, + {file = "scikit_image-0.25.2-cp313-cp313-win_amd64.whl", hash = "sha256:64785a8acefee460ec49a354706db0b09d1f325674107d7fa3eadb663fb56d6f"}, + {file = "scikit_image-0.25.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:330d061bd107d12f8d68f1d611ae27b3b813b8cdb0300a71d07b1379178dd4cd"}, + {file = "scikit_image-0.25.2.tar.gz", hash = "sha256:e5a37e6cd4d0c018a7a55b9d601357e3382826d3888c10d0213fc63bff977dde"}, +] + +[package.dependencies] +imageio = ">=2.33,<2.35.0 || >2.35.0" +lazy-loader = ">=0.4" +networkx = ">=3.0" +numpy = ">=1.24" +packaging = ">=21" +pillow = ">=10.1" +scipy = ">=1.11.4" +tifffile = ">=2022.8.12" + +[package.extras] +build = ["Cython (>=3.0.8)", "build (>=1.2.1)", "meson-python (>=0.16)", "ninja (>=1.11.1.1)", "numpy (>=2.0)", "pythran (>=0.16)", "spin (==0.13)"] +data = ["pooch (>=1.6.0)"] +developer = ["ipython", "pre-commit", "tomli ; python_version < \"3.11\""] +docs = ["PyWavelets (>=1.6)", "dask[array] (>=2023.2.0)", "intersphinx-registry (>=0.2411.14)", "ipykernel", "ipywidgets", "kaleido (==0.2.1)", "matplotlib (>=3.7)", "myst-parser", "numpydoc (>=1.7)", "pandas (>=2.0)", "plotly (>=5.20)", "pooch (>=1.6)", "pydata-sphinx-theme (>=0.16)", "pytest-doctestplus", "scikit-learn (>=1.2)", "seaborn (>=0.11)", "sphinx (>=8.0)", "sphinx-copybutton", "sphinx-gallery[parallel] (>=0.18)", "sphinx_design (>=0.5)", "tifffile (>=2022.8.12)"] +optional = ["PyWavelets (>=1.6)", "SimpleITK", "astropy (>=5.0)", "cloudpickle (>=1.1.1)", "dask[array] (>=2023.2.0)", "matplotlib (>=3.7)", "pooch (>=1.6.0)", "pyamg (>=5.2)", "scikit-learn (>=1.2)"] +test = ["asv", "numpydoc (>=1.7)", "pooch (>=1.6.0)", "pytest (>=8)", "pytest-cov (>=2.11.0)", "pytest-doctestplus", "pytest-faulthandler", "pytest-localserver"] + +[[package]] +name = "scipy" +version = "1.15.2" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "scipy-1.15.2-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a2ec871edaa863e8213ea5df811cd600734f6400b4af272e1c011e69401218e9"}, + {file = "scipy-1.15.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:6f223753c6ea76983af380787611ae1291e3ceb23917393079dcc746ba60cfb5"}, + {file = "scipy-1.15.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:ecf797d2d798cf7c838c6d98321061eb3e72a74710e6c40540f0e8087e3b499e"}, + {file = "scipy-1.15.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:9b18aa747da280664642997e65aab1dd19d0c3d17068a04b3fe34e2559196cb9"}, + {file = "scipy-1.15.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87994da02e73549dfecaed9e09a4f9d58a045a053865679aeb8d6d43747d4df3"}, + {file = "scipy-1.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69ea6e56d00977f355c0f84eba69877b6df084516c602d93a33812aa04d90a3d"}, + {file = "scipy-1.15.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:888307125ea0c4466287191e5606a2c910963405ce9671448ff9c81c53f85f58"}, + {file = "scipy-1.15.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9412f5e408b397ff5641080ed1e798623dbe1ec0d78e72c9eca8992976fa65aa"}, + {file = "scipy-1.15.2-cp310-cp310-win_amd64.whl", hash = "sha256:b5e025e903b4f166ea03b109bb241355b9c42c279ea694d8864d033727205e65"}, + {file = "scipy-1.15.2-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:92233b2df6938147be6fa8824b8136f29a18f016ecde986666be5f4d686a91a4"}, + {file = "scipy-1.15.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:62ca1ff3eb513e09ed17a5736929429189adf16d2d740f44e53270cc800ecff1"}, + {file = "scipy-1.15.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:4c6676490ad76d1c2894d77f976144b41bd1a4052107902238047fb6a473e971"}, + {file = "scipy-1.15.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:a8bf5cb4a25046ac61d38f8d3c3426ec11ebc350246a4642f2f315fe95bda655"}, + {file = "scipy-1.15.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a8e34cf4c188b6dd004654f88586d78f95639e48a25dfae9c5e34a6dc34547e"}, + {file = "scipy-1.15.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28a0d2c2075946346e4408b211240764759e0fabaeb08d871639b5f3b1aca8a0"}, + {file = "scipy-1.15.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:42dabaaa798e987c425ed76062794e93a243be8f0f20fff6e7a89f4d61cb3d40"}, + {file = "scipy-1.15.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6f5e296ec63c5da6ba6fa0343ea73fd51b8b3e1a300b0a8cae3ed4b1122c7462"}, + {file = "scipy-1.15.2-cp311-cp311-win_amd64.whl", hash = "sha256:597a0c7008b21c035831c39927406c6181bcf8f60a73f36219b69d010aa04737"}, + {file = "scipy-1.15.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c4697a10da8f8765bb7c83e24a470da5797e37041edfd77fd95ba3811a47c4fd"}, + {file = "scipy-1.15.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:869269b767d5ee7ea6991ed7e22b3ca1f22de73ab9a49c44bad338b725603301"}, + {file = "scipy-1.15.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:bad78d580270a4d32470563ea86c6590b465cb98f83d760ff5b0990cb5518a93"}, + {file = "scipy-1.15.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b09ae80010f52efddb15551025f9016c910296cf70adbf03ce2a8704f3a5ad20"}, + {file = "scipy-1.15.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a6fd6eac1ce74a9f77a7fc724080d507c5812d61e72bd5e4c489b042455865e"}, + {file = "scipy-1.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b871df1fe1a3ba85d90e22742b93584f8d2b8e6124f8372ab15c71b73e428b8"}, + {file = "scipy-1.15.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:03205d57a28e18dfd39f0377d5002725bf1f19a46f444108c29bdb246b6c8a11"}, + {file = "scipy-1.15.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:601881dfb761311045b03114c5fe718a12634e5608c3b403737ae463c9885d53"}, + {file = "scipy-1.15.2-cp312-cp312-win_amd64.whl", hash = "sha256:e7c68b6a43259ba0aab737237876e5c2c549a031ddb7abc28c7b47f22e202ded"}, + {file = "scipy-1.15.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01edfac9f0798ad6b46d9c4c9ca0e0ad23dbf0b1eb70e96adb9fa7f525eff0bf"}, + {file = "scipy-1.15.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:08b57a9336b8e79b305a143c3655cc5bdbe6d5ece3378578888d2afbb51c4e37"}, + {file = "scipy-1.15.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:54c462098484e7466362a9f1672d20888f724911a74c22ae35b61f9c5919183d"}, + {file = "scipy-1.15.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:cf72ff559a53a6a6d77bd8eefd12a17995ffa44ad86c77a5df96f533d4e6c6bb"}, + {file = "scipy-1.15.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9de9d1416b3d9e7df9923ab23cd2fe714244af10b763975bea9e4f2e81cebd27"}, + {file = "scipy-1.15.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb530e4794fc8ea76a4a21ccb67dea33e5e0e60f07fc38a49e821e1eae3b71a0"}, + {file = "scipy-1.15.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5ea7ed46d437fc52350b028b1d44e002646e28f3e8ddc714011aaf87330f2f32"}, + {file = "scipy-1.15.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:11e7ad32cf184b74380f43d3c0a706f49358b904fa7d5345f16ddf993609184d"}, + {file = "scipy-1.15.2-cp313-cp313-win_amd64.whl", hash = "sha256:a5080a79dfb9b78b768cebf3c9dcbc7b665c5875793569f48bf0e2b1d7f68f6f"}, + {file = "scipy-1.15.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:447ce30cee6a9d5d1379087c9e474628dab3db4a67484be1b7dc3196bfb2fac9"}, + {file = "scipy-1.15.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:c90ebe8aaa4397eaefa8455a8182b164a6cc1d59ad53f79943f266d99f68687f"}, + {file = "scipy-1.15.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:def751dd08243934c884a3221156d63e15234a3155cf25978b0a668409d45eb6"}, + {file = "scipy-1.15.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:302093e7dfb120e55515936cb55618ee0b895f8bcaf18ff81eca086c17bd80af"}, + {file = "scipy-1.15.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7cd5b77413e1855351cdde594eca99c1f4a588c2d63711388b6a1f1c01f62274"}, + {file = "scipy-1.15.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d0194c37037707b2afa7a2f2a924cf7bac3dc292d51b6a925e5fcb89bc5c776"}, + {file = "scipy-1.15.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:bae43364d600fdc3ac327db99659dcb79e6e7ecd279a75fe1266669d9a652828"}, + {file = "scipy-1.15.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f031846580d9acccd0044efd1a90e6f4df3a6e12b4b6bd694a7bc03a89892b28"}, + {file = "scipy-1.15.2-cp313-cp313t-win_amd64.whl", hash = "sha256:fe8a9eb875d430d81755472c5ba75e84acc980e4a8f6204d402849234d3017db"}, + {file = "scipy-1.15.2.tar.gz", hash = "sha256:cd58a314d92838f7e6f755c8a2167ead4f27e1fd5c1251fd54289569ef3495ec"}, +] + +[package.dependencies] +numpy = ">=1.23.5,<2.5" + +[package.extras] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"] +doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.16.5)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.0.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)"] +test = ["Cython", "array-api-strict (>=2.0,<2.1.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja ; sys_platform != \"emscripten\"", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "setuptools" +version = "79.0.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "setuptools-79.0.0-py3-none-any.whl", hash = "sha256:b9ab3a104bedb292323f53797b00864e10e434a3ab3906813a7169e4745b912a"}, + {file = "setuptools-79.0.0.tar.gz", hash = "sha256:9828422e7541213b0aacb6e10bbf9dd8febeaa45a48570e09b6d100e063fc9f9"}, +] + +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""] +core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"] + +[[package]] +name = "shapely" +version = "2.1.0" +description = "Manipulation and analysis of geometric objects" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "shapely-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d3e5c5e3864d4dc431dd85a8e5137ebd39c8ac287b009d3fa80a07017b29c940"}, + {file = "shapely-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6eea89b16f5f3a064659126455d23fa3066bc3d6cd385c35214f06bf5871aa6"}, + {file = "shapely-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:183174ad0b21a81ee661f05e7c47aa92ebfae01814cd3cbe54adea7a4213f5f4"}, + {file = "shapely-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f239c1484af66bc14b81a76f2a8e0fada29d59010423253ff857d0ccefdaa93f"}, + {file = "shapely-2.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6220a466d1475141dad0cd8065d2549a5c2ed3fa4e2e02fb8ea65d494cfd5b07"}, + {file = "shapely-2.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4822d3ed3efb06145c34d29d5b56792f72b7d713300f603bfd5d825892c6f79f"}, + {file = "shapely-2.1.0-cp310-cp310-win32.whl", hash = "sha256:ea51ddf3d3c60866dca746081b56c75f34ff1b01acbd4d44269071a673c735b9"}, + {file = "shapely-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:a6f5e02e2cded9f4ec5709900a296c7f2cce5f8e9e9d80ba7d89ae2f4ed89d7b"}, + {file = "shapely-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c8323031ef7c1bdda7a92d5ddbc7b6b62702e73ba37e9a8ccc8da99ec2c0b87c"}, + {file = "shapely-2.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4da7c6cd748d86ec6aace99ad17129d30954ccf5e73e9911cdb5f0fa9658b4f8"}, + {file = "shapely-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f0cdf85ff80831137067e7a237085a3ee72c225dba1b30beef87f7d396cf02b"}, + {file = "shapely-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41f2be5d79aac39886f23000727cf02001aef3af8810176c29ee12cdc3ef3a50"}, + {file = "shapely-2.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:21a4515009f56d7a159cf5c2554264e82f56405b4721f9a422cb397237c5dca8"}, + {file = "shapely-2.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:15cebc323cec2cb6b2eaa310fdfc621f6dbbfaf6bde336d13838fcea76c885a9"}, + {file = "shapely-2.1.0-cp311-cp311-win32.whl", hash = "sha256:cad51b7a5c8f82f5640472944a74f0f239123dde9a63042b3c5ea311739b7d20"}, + {file = "shapely-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:d4005309dde8658e287ad9c435c81877f6a95a9419b932fa7a1f34b120f270ae"}, + {file = "shapely-2.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53e7ee8bd8609cf12ee6dce01ea5affe676976cf7049315751d53d8db6d2b4b2"}, + {file = "shapely-2.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3cab20b665d26dbec0b380e15749bea720885a481fa7b1eedc88195d4a98cfa4"}, + {file = "shapely-2.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4a38b39a09340273c3c92b3b9a374272a12cc7e468aeeea22c1c46217a03e5c"}, + {file = "shapely-2.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:edaec656bdd9b71278b98e6f77c464b1c3b2daa9eace78012ff0f0b4b5b15b04"}, + {file = "shapely-2.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c8a732ddd9b25e7a54aa748e7df8fd704e23e5d5d35b7d376d80bffbfc376d04"}, + {file = "shapely-2.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9c93693ad8adfdc9138a5a2d42da02da94f728dd2e82d2f0f442f10e25027f5f"}, + {file = "shapely-2.1.0-cp312-cp312-win32.whl", hash = "sha256:d8ac6604eefe807e71a908524de23a37920133a1729fe3a4dfe0ed82c044cbf4"}, + {file = "shapely-2.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:f4f47e631aa4f9ec5576eac546eb3f38802e2f82aeb0552f9612cb9a14ece1db"}, + {file = "shapely-2.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b64423295b563f43a043eb786e7a03200ebe68698e36d2b4b1c39f31dfb50dfb"}, + {file = "shapely-2.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1b5578f45adc25b235b22d1ccb9a0348c8dc36f31983e57ea129a88f96f7b870"}, + {file = "shapely-2.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1a7e83d383b27f02b684e50ab7f34e511c92e33b6ca164a6a9065705dd64bcb"}, + {file = "shapely-2.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:942031eb4d8f7b3b22f43ba42c09c7aa3d843aa10d5cc1619fe816e923b66e55"}, + {file = "shapely-2.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d2843c456a2e5627ee6271800f07277c0d2652fb287bf66464571a057dbc00b3"}, + {file = "shapely-2.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8c4b17469b7f39a5e6a7cfea79f38ae08a275427f41fe8b48c372e1449147908"}, + {file = "shapely-2.1.0-cp313-cp313-win32.whl", hash = "sha256:30e967abd08fce49513d4187c01b19f139084019f33bec0673e8dbeb557c45e4"}, + {file = "shapely-2.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:1dc8d4364483a14aba4c844b7bd16a6fa3728887e2c33dfa1afa34a3cf4d08a5"}, + {file = "shapely-2.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:673e073fea099d1c82f666fb7ab0a00a77eff2999130a69357ce11941260d855"}, + {file = "shapely-2.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6d1513f915a56de67659fe2047c1ad5ff0f8cbff3519d1e74fced69c9cb0e7da"}, + {file = "shapely-2.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d6a7043178890b9e028d80496ff4c79dc7629bff4d78a2f25323b661756bab8"}, + {file = "shapely-2.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb638378dc3d76f7e85b67d7e2bb1366811912430ac9247ac00c127c2b444cdc"}, + {file = "shapely-2.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:737124e87d91d616acf9a911f74ac55e05db02a43a6a7245b3d663817b876055"}, + {file = "shapely-2.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e6c229e7bb87aae5df82fa00b6718987a43ec168cc5affe095cca59d233f314"}, + {file = "shapely-2.1.0-cp313-cp313t-win32.whl", hash = "sha256:a9580bda119b1f42f955aa8e52382d5c73f7957e0203bc0c0c60084846f3db94"}, + {file = "shapely-2.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:e8ff4e5cfd799ba5b6f37b5d5527dbd85b4a47c65b6d459a03d0962d2a9d4d10"}, + {file = "shapely-2.1.0.tar.gz", hash = "sha256:2cbe90e86fa8fc3ca8af6ffb00a77b246b918c7cf28677b7c21489b678f6b02e"}, +] + +[package.dependencies] +numpy = ">=1.21" + +[package.extras] +docs = ["matplotlib", "numpydoc (==1.1.*)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"] +test = ["pytest", "pytest-cov", "scipy-doctest"] + +[[package]] +name = "simsimd" +version = "6.2.1" +description = "Portable mixed-precision BLAS-like vector math library for x86 and ARM" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "simsimd-6.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9c79486cf75eb06c5e1f623e8315f9fb73620ac63b846d5a6c843f14905de43f"}, + {file = "simsimd-6.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:104d53f2489dcbf569b8260d678e2183af605510115dc2b22ed0340aa47fe892"}, + {file = "simsimd-6.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fef886c8220d3566b9f43d441226ca267a11682dea5496bb6e007f655eee1fd1"}, + {file = "simsimd-6.2.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:522e56451481bff3468653c2818ad1240b4cb13cff0ec76bc88d8860bfc775c9"}, + {file = "simsimd-6.2.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a5dfb02fa141a6e039803044930753aef1df5ed05cae8b14fe348cdc160cef1e"}, + {file = "simsimd-6.2.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39eb6abdd44adfddec181a713e9cfad8742d03abbc6247c4e5ca2caee38e4775"}, + {file = "simsimd-6.2.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:9ca68b9d2cc1c19af6afe6f01a764861fc8bb919d688a64cf0b0ac0abae7e0fa"}, + {file = "simsimd-6.2.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:2b56b1ca7b76c0d4515938a036e688b73a866b19e6f6eb743596144fdf498a0c"}, + {file = "simsimd-6.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:02d7b7c7afecc63ddf501460f09c1da90625bfd59b4da5fda126c1aa5c54bb95"}, + {file = "simsimd-6.2.1-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:8abc529daf0a61649ca4a237cd9e63723f3355394686898654c643bd63846cf5"}, + {file = "simsimd-6.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9ea60422d0f45d3a1899984c3fc3a14dbd248cfca8f67c24751029441464a806"}, + {file = "simsimd-6.2.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:98e38a0ca4805c1de2882d0641b54e249eabca4ed2980c82465822130d7f8c98"}, + {file = "simsimd-6.2.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:cbbc2434286493b88f3b8211e922d37b46588b34d4cc28f3262f154c8ca1141c"}, + {file = "simsimd-6.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4f2ecd459f4917facdb287c42c5e68030b21cb98edac0fec9919a7215968e38a"}, + {file = "simsimd-6.2.1-cp310-cp310-win32.whl", hash = "sha256:4ec31c076dc839114bff5d83526ddf46551d4720cc8cd0f16516896809a4fca6"}, + {file = "simsimd-6.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:94282e040be985c993d415290371f6b22bec3eeadafe747a6d8dfbd2c317f35e"}, + {file = "simsimd-6.2.1-cp310-cp310-win_arm64.whl", hash = "sha256:0784e98ca48a0075fb0cbd7782df11eaa17ce15c60f09a65e8477864208afb8a"}, + {file = "simsimd-6.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e9614309af75be4d08a051dc61ed5cf41b5239b8303b37dc2f9c8a7223534392"}, + {file = "simsimd-6.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ea4f0f68be5f85bbcf4322bfdd1b449176cf5fdd99960c546514457635632443"}, + {file = "simsimd-6.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:12a8d60ccc8991dfbbf056c221ce4f02135f5892492894972f421a6f155015d9"}, + {file = "simsimd-6.2.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a74142ea21a6fd3ec5c64e4d4acf1ec6f4d80c0bb1a5989d68af6e84f7ac612e"}, + {file = "simsimd-6.2.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:298f7c793fc2a1eeedcefa1278eb2ef6f52ce0b36aaa8780885f96a39ce1a4e8"}, + {file = "simsimd-6.2.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4025ebad36fb3fa5cffcd48d33375d5e5decc59c1129a259b74fed097eab1ab5"}, + {file = "simsimd-6.2.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f486682aa7a8918d86df411d3c11c635db4b67d514cb6bb499c0edab7fb8ec58"}, + {file = "simsimd-6.2.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:173e66699597a4fcf6fa50b52cced40216fdcfba15f60b761a2bd9cb1d98a444"}, + {file = "simsimd-6.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5b5c6f79f797cc020a2ff64950162dfb6d130c51a07cdac5ad97ec836e85ce50"}, + {file = "simsimd-6.2.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:25812637f43feaef1a33ae00b81a4d2b0116aadae3a08267486c1e57236fc368"}, + {file = "simsimd-6.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:592a578c788a9cb7877eff41487cc7f50474e00f774de74bea8590fa95c804ae"}, + {file = "simsimd-6.2.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:191c020f312350ac06eee829376b11d8c1282da8fefb4381fe0625edfb678d8d"}, + {file = "simsimd-6.2.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e9ad2c247ed58ba9bb170a01295cb315a45c817775cc7e51ad342f70978a1057"}, + {file = "simsimd-6.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0ff603134600da12175e66b842b7a7331c827fa070d1d8b63386a40bc8d09fcd"}, + {file = "simsimd-6.2.1-cp311-cp311-win32.whl", hash = "sha256:99dff4e04663c82284152ecc2e8bf76b2825f3f17e179abf7892e06196061056"}, + {file = "simsimd-6.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:0efc6343c440a26cf16463c4c667655af9597bcbd55ad66f33a80b2b84de7412"}, + {file = "simsimd-6.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:2d364f2c24dd38578bf0eec436c4b901c900ae1893680f46eb5632e01330d814"}, + {file = "simsimd-6.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9b3315e41bb759dc038ecd6f4fa7bcf278bf72ee7d982f752482cdc732aea271"}, + {file = "simsimd-6.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8d476c874bafa0d12d4c8c5c47faf17407f3c96140616384421c2aa980342b6f"}, + {file = "simsimd-6.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9d4f15c06cc221d29e181197c7bbf92c5e829220cbeb3cd1cf080de78b04f2a"}, + {file = "simsimd-6.2.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d286fd4538cb1a1c70e69da00a3acee301519d578931b41161f4f1379d1195c6"}, + {file = "simsimd-6.2.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:050f68cfa85f1fb2cfa156280928e42926e3977034b755023ce1315bf59e87ff"}, + {file = "simsimd-6.2.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:67bb4b17e04919545f29c7b708faaccbe027f164f8b5c9f4328604fa8f5560ea"}, + {file = "simsimd-6.2.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3d6bffd999dbb36e606b065e0180365efac2606049c4f7818e4cba2d34c3678f"}, + {file = "simsimd-6.2.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:25adb244fb75dbf49af0d1bcac4ed4a3fef8e847d78449faa5595af0a3e20d61"}, + {file = "simsimd-6.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b4542cee77e801a9c27370fc36ae271514fc0fb2ce14a35f8b25f47989e3d267"}, + {file = "simsimd-6.2.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:4f665228f8ff4911790b485e74b00fa9586a141dde6011970be71bb303b5a22f"}, + {file = "simsimd-6.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:783b4308f80ae00763b0eaa0dac26196958f9c2df60d35a0347ebd2f82ece46d"}, + {file = "simsimd-6.2.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:95055e72cfe313c1c8694783bf8a631cc15673b3b775abef367e396d931db0b8"}, + {file = "simsimd-6.2.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a98f2b383f51b4f4ee568a637fc7958a347fdae0bd184cff8faa8030b6454a39"}, + {file = "simsimd-6.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2e474fd10ceb38e2c9f826108a7762f8ff7912974846d86f08c4e7b19cd35ed4"}, + {file = "simsimd-6.2.1-cp312-cp312-win32.whl", hash = "sha256:b2530ea44fffeab25e5752bec6a5991f30fbc430b04647980db5b195c0971d48"}, + {file = "simsimd-6.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:dc23283235d5b8f0373b95a547e26da2d7785647a5d0fa15c282fc8c49c0dcb0"}, + {file = "simsimd-6.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:5692ce7e56253178eea9dbd58191734918409b83d54b07cfdcecf868d0150a73"}, + {file = "simsimd-6.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:76b32fdc7142c9714e94651ece8bc00dd5139c554813211552aa358e44af0e07"}, + {file = "simsimd-6.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f44e5e2319427f94db658c6f75caae78850da505902874a1664a83ef5713f333"}, + {file = "simsimd-6.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:05323cbad7200592c2e53fbcc759e615594e8ca444ef5eddf9f3fb196ad4de9c"}, + {file = "simsimd-6.2.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b1f3cbe5c39db2bb64f30999104de1215ba3805d6059af7bc5a9d662d50f4707"}, + {file = "simsimd-6.2.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eaa94e0932ae2a48b7e4df8c29204dc9fe59f72b1faeb08e9d5015bf51fb9f21"}, + {file = "simsimd-6.2.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:508465f8d4e3e0fff07c939921aeedf55b0ade9f56f64e938c350c283dea42fb"}, + {file = "simsimd-6.2.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ca67f6273ef544c74c48b134af756de7c98a711ccf69cd0791225f26dd449281"}, + {file = "simsimd-6.2.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:d470b43ce606f21f54a23fc19ad6928333e17d0956b02eb27b7b112edc156a10"}, + {file = "simsimd-6.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59518b9834c167a1dd8900600718e95cdadc9d74525452f426aa8455a38c55ef"}, + {file = "simsimd-6.2.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:59c2978c4e402097d8a4b38f076ff98cc43e6b059d53f89736404f26e9a9bd5a"}, + {file = "simsimd-6.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:edc68e727d53ed2866dcfb625f15e52be8f1e6809f4be2147bf8d2115a2542b7"}, + {file = "simsimd-6.2.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:9e5e82551d75c0e2cd0d4b8af8db1cae7b5ac6dcc076c0c760870ff81f78135b"}, + {file = "simsimd-6.2.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:2fa19f8c9786757d19afcbda9f8fb68de55e4f5562725ae8727f887d01bf0e4d"}, + {file = "simsimd-6.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5b0748aa6bd4df4c5a3f5e979aec14b26588f1b2e0d44075dcc9eaf4d555e15b"}, + {file = "simsimd-6.2.1-cp313-cp313-win32.whl", hash = "sha256:7f43721e1a4ebe8d2245b0e85dd7de7153d1bf22839579d5f69a345909c68d9e"}, + {file = "simsimd-6.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:6af1565e0ef7060bc52a38e3273a8e6e92aff47835965dc5311298563475935e"}, + {file = "simsimd-6.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:e690b41377c8dd157d585713b0bc35c845aee7742334bf12d1f087fc8a65b6c3"}, + {file = "simsimd-6.2.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9264abf5dabe046d3951d162dbba21c7a3c3f491587c84038df1b94de0b6742a"}, + {file = "simsimd-6.2.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e93ffe6ea7417bffdee9a1b9ebb682f35f41e3e75b7e51f0f3a2fb5f7dd4c079"}, + {file = "simsimd-6.2.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d09ea4d3c0224bedf9f72881d1e5896a265fc89311abba078e615b0c06d989da"}, + {file = "simsimd-6.2.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dae5f7c37ffd0313ea59aa0a20203e7624bc5a39065fc5505991268689f2b6a2"}, + {file = "simsimd-6.2.1-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:2f573d706e44018cba63a6ff44f4a1a7733fb55ee504a12b345c012bc114f7d5"}, + {file = "simsimd-6.2.1-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:63a48c50c0ff44ac4d463f8c963f718de5aff54e1c4a6ce8363e291ac2f1fc14"}, + {file = "simsimd-6.2.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:77912f9b4c230eea2bca7ba35c33dfd5590b41a867abba9fe7e152a7ae976307"}, + {file = "simsimd-6.2.1-cp37-cp37m-musllinux_1_2_armv7l.whl", hash = "sha256:731635de9e771571fbf61edb81cfa466fed37845fbfb35d719afb7c6ea3d4bce"}, + {file = "simsimd-6.2.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:03c94c9dcf80c93c58c9435f295fd35399d88097464d1a0a5995372868d852e3"}, + {file = "simsimd-6.2.1-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:bbcfc905d90343c7b7e07f7b80385abc017405125246908181f6841c5f3cbde3"}, + {file = "simsimd-6.2.1-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:4cf0180f4b17ea3758523f644eddc38124ac98c4aac1c5572f44fd04c3bcb2f3"}, + {file = "simsimd-6.2.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31163917ce2848f7896e633b8d1ae0db9004dc8eb6605cf959f6319e31cd569c"}, + {file = "simsimd-6.2.1-cp37-cp37m-win32.whl", hash = "sha256:c7af7da114f81af0bcfbf9563ea109479550e62dd5dde39ea2e93bc5f1e306ca"}, + {file = "simsimd-6.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:cad9b5503d35b7be3e704594bcdf3883bbcdb9987086d942a2a52e7b0927288e"}, + {file = "simsimd-6.2.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5b62fcf02e33a88e4c4a93da9d682e475bb08979d7d18f91a76bee2fe2f9d335"}, + {file = "simsimd-6.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0d7eeed41600bb229c34d822e0011c80019c16c689f16c82b875012e7116b2d5"}, + {file = "simsimd-6.2.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0da7f30f11cbe7c6ced372af3f5da24b7df1038bad82cfd0032667024622b794"}, + {file = "simsimd-6.2.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae496f16f2d759dc103ed8b8a5533c0a52e5c96c88e5d6a9e26eff24f174537b"}, + {file = "simsimd-6.2.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9046d108b3fc7cd1808df53083b3a2e26f70a1efb4f378971fefe76c27d64488"}, + {file = "simsimd-6.2.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1919957071b6d19e337ebba9c04f4b48604f927fc9118ce877b1fbcec1975f57"}, + {file = "simsimd-6.2.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:ef6d998496e5569ce9b5ce21a9ecbe3b59f9426ce27e6bf1db0eae67613d8d9e"}, + {file = "simsimd-6.2.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:3cb54ec20235d81dd9596c8fe8b2bd35fad027d3f5cd52e23a17a71b3ac44d3f"}, + {file = "simsimd-6.2.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:598330828b922700aac8a7939c562f80e4ee9000671081ff264c8daae4692d76"}, + {file = "simsimd-6.2.1-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:1b45987216a5d5b9b1441ea8acbf5d731e5ee60c0727999f10438827d201b40d"}, + {file = "simsimd-6.2.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:8c9b79c189ab40e1633c4cecba1a58133a8454662c40af8abdf18f0766a1cf94"}, + {file = "simsimd-6.2.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:1324d7433f0cefd29a55716197112d22b259c49d7c62425517dc37d0c6494b69"}, + {file = "simsimd-6.2.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:c5101d1204e42b15c1e3772ec8b357cec9bce5eea0ccb76ec8faff5104233241"}, + {file = "simsimd-6.2.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:d8c7b7b286d7be1756fb837b9f3330f7d03eb6a7329cd717c88d635e441a8eb0"}, + {file = "simsimd-6.2.1-cp38-cp38-win32.whl", hash = "sha256:2e07e5b4abbb5561a62acfc4d1f2c4fb9051cc0f6919b0456d0bb37dc6749f0a"}, + {file = "simsimd-6.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:87b963f862ba50a61527af281a66e1d6cee34c535b621718e45de1df8f277cba"}, + {file = "simsimd-6.2.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:45010111c39117af851a323e78bd43e6a344349b4ed7b1f5ca4c4ebb2284c7e5"}, + {file = "simsimd-6.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dd6ecae57a481f9fc0bceb331cba7b18a0b23a71f15af7d06cdf8aa8aac38645"}, + {file = "simsimd-6.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ffbb874d4c3ed53443468f9c20704845cc8736d5717817c35d5cb12ad5548c7a"}, + {file = "simsimd-6.2.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b6147ddc390c08a802af258ad204b1d775bb3d180ec6f6fcea82f4fd71fb447"}, + {file = "simsimd-6.2.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0048df2245d239ed016e5f4b5d75e96987149bf7245e90713e1fe3b53e321a74"}, + {file = "simsimd-6.2.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc087d9dacab1eb4abc2f3d9f33047fc601db501cb43165e658973fe5fd50c9b"}, + {file = "simsimd-6.2.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:d1d2e6c3d655a34b42c6e0d0c28ac7b86498858ffb68c58733893fc538bd26a9"}, + {file = "simsimd-6.2.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:d063beb7a53d8525af56c4247e1e43a7fa161b70bcbacf30daab639b32ad4a10"}, + {file = "simsimd-6.2.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4a517ae74d18a8b7d4d349cf4afed45f33cd660cb44d0ae34c95d00c1f7fa760"}, + {file = "simsimd-6.2.1-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:a79a2bd32ba0f90f70c22accf4b441846049b55aeae73556f4b5c6e9fe6e024f"}, + {file = "simsimd-6.2.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4c9487acdae92b4089a0142cd3691328bfdcaaebf2587a0c11df4039ff7005e8"}, + {file = "simsimd-6.2.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:1c4760dee8f65a890b82a6175d5294d30271637495a9e4195969fc1ad38ec056"}, + {file = "simsimd-6.2.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:abee753fbb8584373218bf78396ae3d2b2a1202c7284cd9c70695535c62cdc31"}, + {file = "simsimd-6.2.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:300042eeab379923d77bca328fdc2ac1df8adfdffa9a6939f28ba6b196f02002"}, + {file = "simsimd-6.2.1-cp39-cp39-win32.whl", hash = "sha256:2eed0ad770b18a3b74b19ad744ee3224dae9bf1a86bd9126eae0636ada53eebd"}, + {file = "simsimd-6.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:e99cc8aa19af5ca3574aa72e1d0e959c4859345fdf553a887ce22e469c1145a8"}, + {file = "simsimd-6.2.1-cp39-cp39-win_arm64.whl", hash = "sha256:37b0db92ca0fec835ec1256d6dd167d7c9f727d3372b98bf27b1fd59ad299768"}, + {file = "simsimd-6.2.1.tar.gz", hash = "sha256:5e202c5386a4141946b7aee05faac8ebc2e36bca0a360b24080e57b59bc4ef6a"}, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, +] + +[[package]] +name = "soupsieve" +version = "2.6" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, + {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, +] + +[[package]] +name = "stringzilla" +version = "3.12.3" +description = "SIMD-accelerated string search, sort, hashes, fingerprints, & edit distances" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "stringzilla-3.12.3-cp310-cp310-macosx_10_11_universal2.whl", hash = "sha256:8ec8f95af09d62b4ca5dc8c6f557035acadfe794edd2291f515b54f3afb59260"}, + {file = "stringzilla-3.12.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:d305ed6f35132852844f59964910b202675f76ad48060fea8c6c959b67959c3d"}, + {file = "stringzilla-3.12.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8d1eb356ca15400b1410187bc7fde421f6e20460f05ae1dece4c60821bfffba6"}, + {file = "stringzilla-3.12.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d0f7c5bccff1125fb77c01d7166e525c8dbf40f700f4120a00ad2f4ccfb3f3d3"}, + {file = "stringzilla-3.12.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:db02c9baa70eceb6f1362237411ef4b6b59064e17008da339f378717050c2bdb"}, + {file = "stringzilla-3.12.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.manylinux_2_28_s390x.whl", hash = "sha256:da5938ba65d0f7bff83479662b7f397d71cac3e84fc37b4504616712e37d0417"}, + {file = "stringzilla-3.12.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3ce27645cbe2593ede23c581a50428f1d0ba1d666af06d445ba89cef6613df0"}, + {file = "stringzilla-3.12.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:680db2ce49bee340904a8c3149b0bca527cd9dc322a97071e4500f646d87404e"}, + {file = "stringzilla-3.12.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32537a5165f473cd99440baa23e9202ab2185d72896960daa4f8bddac5c80544"}, + {file = "stringzilla-3.12.3-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:c39d612f26ea7385b6a134292b097efe7444cae5b2dafbfccd3b65ce23d73ccc"}, + {file = "stringzilla-3.12.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:dea7c8ca195f8c79e85163fd5b76495b4a11effd2e17956e6825b4fffa3c915b"}, + {file = "stringzilla-3.12.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:0d21de495a84b05196832e5aec0d5ee83b0667f9c165d0e326a753022625adfa"}, + {file = "stringzilla-3.12.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:7c836c28a75cd4cccb0737854ed780444c45cce93012ed100dd4a431b60ebd86"}, + {file = "stringzilla-3.12.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:745f144c039c2c2787ef0d63296f06d4d0d776b141288a74670aea38d86b3078"}, + {file = "stringzilla-3.12.3-cp310-cp310-win32.whl", hash = "sha256:2d4803357a07592a7fdbea599e93cfd4366c4e887290cfa5988bc7ec44da93b5"}, + {file = "stringzilla-3.12.3-cp310-cp310-win_amd64.whl", hash = "sha256:0f41b85c38445f7a1fed677984590c943b16cbc00727e2b093b2f0b2bdbfcac5"}, + {file = "stringzilla-3.12.3-cp310-cp310-win_arm64.whl", hash = "sha256:088ca8105ff027172277d2221ea0241d5ed21cc10ee91d5f45c7961ddab3d12a"}, + {file = "stringzilla-3.12.3-cp311-cp311-macosx_10_11_universal2.whl", hash = "sha256:d0e79931ae66cd4566f25d77ccf646e9d180ead603fab4278a6ecdae7570e85b"}, + {file = "stringzilla-3.12.3-cp311-cp311-macosx_10_11_x86_64.whl", hash = "sha256:b3a2f047dfe21468f90e8cab3f6a4b8e46e876b6563b78dc54ba154a56f1e383"}, + {file = "stringzilla-3.12.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8b8404b55fa180d1e6da1fc10f739890af427d93afd02a408e229be8d7383a5e"}, + {file = "stringzilla-3.12.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdbbb9f0dd3f8d1ec3201a3fa7de3e0c92d056da9ca61ada2af8ca662cab4834"}, + {file = "stringzilla-3.12.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:162e68d44e87f3b7591a9c18f8a7794bc9bbf8ab9b16705bfe5c552c676b2d8c"}, + {file = "stringzilla-3.12.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ee7284d1c62cc4d4cf7772178c02cae91933a38e8b11390da6e8a8b4f20e0663"}, + {file = "stringzilla-3.12.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4d08a9fda6b9489667dfba38dcc8ebe7a94e4ebbe8e741557cccd5b09f864ed7"}, + {file = "stringzilla-3.12.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c11a94e9123e411e8d74cd0ea860dca58bec6e48a95b5ff3707b595eaf71eecd"}, + {file = "stringzilla-3.12.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:639844f35f0be5ade5849686067076add2214ce93a01ffd2ab64b2a6c506f666"}, + {file = "stringzilla-3.12.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:98579b756f10aa515b97b58d69a6aff3daefb7267cdf84e225e5b7fda584a431"}, + {file = "stringzilla-3.12.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:dd95a2f8782183340fd706f75aa8615c21a2eacc0c11684fd6b3ee17b1ba3542"}, + {file = "stringzilla-3.12.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:06d543e4949e43c43a8b4334fda8c162999249daeb787af5ea6b0e8d0682ce79"}, + {file = "stringzilla-3.12.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:783ff26460fc8028cf53ec08ddacf438d0efffed78f75f34131cdfc77d15c2cf"}, + {file = "stringzilla-3.12.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:39cfca4f26c70f262016ed16a035f42d1ed0917e28f78db6c61670690983b96b"}, + {file = "stringzilla-3.12.3-cp311-cp311-win32.whl", hash = "sha256:bb593a0e809451254a819e156a8180cb53a239f1f427a8bdb2a39f7c52e85a43"}, + {file = "stringzilla-3.12.3-cp311-cp311-win_amd64.whl", hash = "sha256:587f1934ef615b5e11ce1b1779cf391d40e0ead6f6be6083d313dc6b4cc7f4dd"}, + {file = "stringzilla-3.12.3-cp311-cp311-win_arm64.whl", hash = "sha256:329d734c4eb943d9746d8bb2fc2008063b8b33b8f9af27833abea876b6027aeb"}, + {file = "stringzilla-3.12.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28c644def937fd0baa887c1e4e758778d59773e996ac830013b3468671d96aa"}, + {file = "stringzilla-3.12.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:773da6ad2993c3bbbdbff87b583eb1cc4b655c33809bfd3f913f749be12bfdd0"}, + {file = "stringzilla-3.12.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e43e92b66d12c6d6487bccbb0b35e6dca670932a92ebb5e59b62e7212aaf739f"}, + {file = "stringzilla-3.12.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:27d29eb61ced7a2dcf44195eea058e5aa44d4c8b73c2095a435ca9533418f6d7"}, + {file = "stringzilla-3.12.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:07d51f4c20cbc7d68f5291ef43c2c6b6b772f4be27adb6c9a6f895ad06004fd8"}, + {file = "stringzilla-3.12.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e914d237b4d1829974cbda70e70b058373fef6a5fef90e36953d531f5bdc1063"}, + {file = "stringzilla-3.12.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32999c1c21eb799a97d0659b54cbdd01bfdd1b7ffc53a8c16183b5542e5052c9"}, + {file = "stringzilla-3.12.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6e18d9604b7a083c3da5df8e1029c692e9b69e4f17e08b773cfbf8c6a141106"}, + {file = "stringzilla-3.12.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cd8f488f281d8e198c28c77ef8638e062622d1c0ce18e9bee5380c0d848b248d"}, + {file = "stringzilla-3.12.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:4dd8cb6730f230a76ea623fc111372e06f07fdea3b27471ba1a8cf23e6751eda"}, + {file = "stringzilla-3.12.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5469ae13ffeb03fb30a99241af6f5584ee193c549d2635616ce7558415d13f22"}, + {file = "stringzilla-3.12.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:95f245d8e526fc25691329996a67b661acf0ea7baef9c3a402d555e32aa49863"}, + {file = "stringzilla-3.12.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2b1884630696cf51ac2ea53867b629aa80e01dead3a47c021b7d643eb0595f68"}, + {file = "stringzilla-3.12.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5e492370b5de18ce68c96213544124221dc52f833477e177a4052ec704506e58"}, + {file = "stringzilla-3.12.3-cp312-cp312-win32.whl", hash = "sha256:5f20f8679ce88dad1eaae8effff8262694c65b359618e8ed6476a389eaf172a7"}, + {file = "stringzilla-3.12.3-cp312-cp312-win_amd64.whl", hash = "sha256:291c024cc4365d6c0099d9ee7e61142392ab1315a6d4a8097e3b63af71d0d97c"}, + {file = "stringzilla-3.12.3-cp312-cp312-win_arm64.whl", hash = "sha256:3cf28d68273ba12ee970c682e67410516cdde087d207a2bb0cdd44ab2f533421"}, + {file = "stringzilla-3.12.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c29727cf895aef132ee7d1196bc04cc7900bbacf9ce08980c2557399cbb83222"}, + {file = "stringzilla-3.12.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:32631978e9fa79e9a579923a7c97a4603143c5fda5b63f9103507762604bd098"}, + {file = "stringzilla-3.12.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3df1a74419732a18a7885474c858bcb1270bdf5c35888d97d3b16c3646d539c5"}, + {file = "stringzilla-3.12.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9366d7a903f617472107767716502d4897a272512ead3a5e3c5e32d217f3a2e8"}, + {file = "stringzilla-3.12.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0fcebbea3ea4fe58b2bb0dc85a6be705e7d2cc4746113a79940d8bc2755df87d"}, + {file = "stringzilla-3.12.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e2a1219e4c79842893d6ace2e2875c879affdb14107877d3037e3373bedc8a56"}, + {file = "stringzilla-3.12.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b14251c50ec73eb75ce83aa07673f68c4f187b53b8b868ebc0311f3012ee71b"}, + {file = "stringzilla-3.12.3-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9cba1e18d1d167edf67fdfc5fa11ece06ec376ae55dd65401125d546e3d2150b"}, + {file = "stringzilla-3.12.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:43467714a52103f819ffbdef2d58aa39c938fcd42023ff6100a77bbb3f6cb398"}, + {file = "stringzilla-3.12.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:3d5c7fe1f2d04d595dd3fedf7776f8ab9819e4f4c681ae1f0fb3e28bb29247b3"}, + {file = "stringzilla-3.12.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bbd85919eaf7eb01ca25304dfe085b21d6db693499cd25da0a915d348ec42c38"}, + {file = "stringzilla-3.12.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:57867d712049b8222311e32fd93ebfd60864b48b35aefa860788f6eafba61bc2"}, + {file = "stringzilla-3.12.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:1db2e8092c72ca7750d7b5a0d367a80efb8d3831a7ea60deeb5885f301aac035"}, + {file = "stringzilla-3.12.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:793b8acfa4f1dae7d070742b69f423d8d073643eaa56eb89078e781d031caada"}, + {file = "stringzilla-3.12.3-cp313-cp313-win32.whl", hash = "sha256:7af63431268f018af41b15adeab7a732585f397a0941adaf5d2fc624f1f3a790"}, + {file = "stringzilla-3.12.3-cp313-cp313-win_amd64.whl", hash = "sha256:0a3dcd565d833e7c5814eeba2ebfcbf9d06a4ae32467423d4071702c1084e74a"}, + {file = "stringzilla-3.12.3-cp313-cp313-win_arm64.whl", hash = "sha256:bcce4ed759cee812d5fcec3b87eafa9996641483cbc1b0f81006ca15bf6a16b6"}, + {file = "stringzilla-3.12.3-cp36-cp36m-macosx_10_11_x86_64.whl", hash = "sha256:f94fb88b8064e228345ca5ab9d817b0bc1e5cd17dfbad3f0166334dee704b0c6"}, + {file = "stringzilla-3.12.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6a4063243b73170f4114d61b97da4c8ab1a94a0cd7d54b82593df6294a995ad3"}, + {file = "stringzilla-3.12.3-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:11587f5f298f6ae67f66e8257973e1a3aa6003194e18d3839f330bf95613ee9b"}, + {file = "stringzilla-3.12.3-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.manylinux_2_28_s390x.whl", hash = "sha256:81d900b1d3b2cd8a1c8a9db1643d7ba516213831f62b7142c4fd689b692bb293"}, + {file = "stringzilla-3.12.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:56bb42ebf203d2c87e6a385cd31d097e593f9c2138eb4b0d9c847dcd1cc5f484"}, + {file = "stringzilla-3.12.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:df20a8fc23133eb8578f85fb555c5d1e57d0c14c580a6a487a401f4ed1d6d957"}, + {file = "stringzilla-3.12.3-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:660f7b65a95d4265a42b15aee948e1b2977295c70c6cd298a1059a19c51cf692"}, + {file = "stringzilla-3.12.3-cp36-cp36m-musllinux_1_2_armv7l.whl", hash = "sha256:e38cfa8eb3a4a69c7a017957f376953e8d0150ef416eae46931d9a795358a856"}, + {file = "stringzilla-3.12.3-cp36-cp36m-musllinux_1_2_i686.whl", hash = "sha256:aac0ed4aa7ab8c00f8ffe0bfc8ecbf5ca1da8a57a7b72aa009deeab18b6c6b1e"}, + {file = "stringzilla-3.12.3-cp36-cp36m-musllinux_1_2_ppc64le.whl", hash = "sha256:53ff36ecde47a3fd76f1cf4983dcc449c7a22fcaa090678b1b8fea43b97edefd"}, + {file = "stringzilla-3.12.3-cp36-cp36m-musllinux_1_2_s390x.whl", hash = "sha256:cc5548891aa00866bcd61d3d7c0db38fef3f36a097112e030ffff0b7cf711755"}, + {file = "stringzilla-3.12.3-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:f9ec5615c807097d375eb84f13bccd9c0f8ed55a3ea162802336653d60c914a1"}, + {file = "stringzilla-3.12.3-cp36-cp36m-win32.whl", hash = "sha256:e7f2f8da99e3150717fb886fc428f21b997865de58b00ee45b9caa67cc3418a1"}, + {file = "stringzilla-3.12.3-cp36-cp36m-win_amd64.whl", hash = "sha256:58cb7e88770f1ca1b83398fe6fba8ed6e5bb56f225184dd0bda138cf398d3ef9"}, + {file = "stringzilla-3.12.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:7eeb34d049d49b5f05b14d2878e6d660d3b81651749f63fd898d6f799a9f5f55"}, + {file = "stringzilla-3.12.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cb74a67a1af51b4553e93daf681ec4b05b22bce0e582345e6305c31e4a790a30"}, + {file = "stringzilla-3.12.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fec66a18175edf8ce07ed9ca9e94287b0f6e5f4d8adac570f6c5f1df99f4ef21"}, + {file = "stringzilla-3.12.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.manylinux_2_28_s390x.whl", hash = "sha256:878cb7068fb21ccdc8529a34fb40a67a7dfce306245fe1d90c754acdadb3c9af"}, + {file = "stringzilla-3.12.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e65714dce2ee73e77e15dd705a9e6b8b61f539d2fbe2833800883083fd404cad"}, + {file = "stringzilla-3.12.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7c49a55da8a1c7a9a1e15955bdb330ce2023bf04fa0a922ac776236ec655a8ad"}, + {file = "stringzilla-3.12.3-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:0aaa6fb26f55b965598d8115187b36a292d994c63292a0f5bdc0ae6b3fdd8086"}, + {file = "stringzilla-3.12.3-cp37-cp37m-musllinux_1_2_armv7l.whl", hash = "sha256:e25d99cdc20d3ba239250bbb195667bf9254ebb9b15bf2a59b81ba36525ee112"}, + {file = "stringzilla-3.12.3-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:2855d5ad326cf4eb05c3a8d090b82d97ed22023210774c50c3beed9f5e5d1f16"}, + {file = "stringzilla-3.12.3-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:05649732a1f1c0ed76876259d6adbceb8691c36226ae85a63d1c456277c3c463"}, + {file = "stringzilla-3.12.3-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:76772c2c42020fa34e0d761edf93ec07fd36df5c38ce4c406fc5dac1749fbb0d"}, + {file = "stringzilla-3.12.3-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:87abcfe64a5088cbe5d78cf83124248686062c651b3b20d85723918b805c9652"}, + {file = "stringzilla-3.12.3-cp37-cp37m-win32.whl", hash = "sha256:1c4564425e44b11b5c5a1d569e786855c68d25ae7b9abfc48060516cb756167d"}, + {file = "stringzilla-3.12.3-cp37-cp37m-win_amd64.whl", hash = "sha256:f2fd71fa4c554653e476ea97d09f4d1f15e8f497f829db46139cf0aa698abd4f"}, + {file = "stringzilla-3.12.3-cp38-cp38-macosx_10_11_universal2.whl", hash = "sha256:1dc00bb8f18f0dc18da671af985ce05e0c89de357e54361b79c535a7567cbac1"}, + {file = "stringzilla-3.12.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:e06982803400023a105faff4e9ad78326fc55e18200a7dea60e2814c1fbc588a"}, + {file = "stringzilla-3.12.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fff766b0bfe8b93602e3066483ba16bd5be6b3bcdd369130bb242e50d29ef3b6"}, + {file = "stringzilla-3.12.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33a880aee9b227fbcecfe1104fdf814d69727d4de1002b0d1697e4dc13a0b10d"}, + {file = "stringzilla-3.12.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6b951825f83f8464fc49684c1b54e2ba12e07a38b8d8fc04d630bcae7b77284a"}, + {file = "stringzilla-3.12.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1bcc12a7e05919383663fc6c4c6352447c9e0a2fb91865168c8352e5202722d6"}, + {file = "stringzilla-3.12.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:484931036dc85ab0c768e1f14e0d22397c3c7619b675c07b37825f5524d5d31f"}, + {file = "stringzilla-3.12.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5c08d700cf7769beb2e6810629d60ea2036a2ed7cd66eb500af3f1d551f9f023"}, + {file = "stringzilla-3.12.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:cdd478f4ce671ab57a9ed7fb7c3673c91004fe65ad7220777683d4c3978936d0"}, + {file = "stringzilla-3.12.3-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:1915ac69956ce1fc7c793944259fc223e6bae6d0ad5693f53db42317d7728a9b"}, + {file = "stringzilla-3.12.3-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:436279413939e9ebed09bbfba7f5c6c4aadd05e0e639d2992b70a474524bc95b"}, + {file = "stringzilla-3.12.3-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:cb4abcf117d126028e8a917fd27390b8e259823d67ce85f85c73a3aac561881a"}, + {file = "stringzilla-3.12.3-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:d9e74323816e1e7984f13ce2453b65ee8a70a39e08b22ae9b22741cad3f894d1"}, + {file = "stringzilla-3.12.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3fd0836e0298e9a3b219a76c8179643b29b84090b41d6123c03277672035c852"}, + {file = "stringzilla-3.12.3-cp38-cp38-win32.whl", hash = "sha256:caf6165ec9a1cb4baf24146ca2d123c31fedf0b51b1d3c4f913257f956f1551e"}, + {file = "stringzilla-3.12.3-cp38-cp38-win_amd64.whl", hash = "sha256:810ff7f3a187f33f9027df6453045b65aecdb2f94a51ee7461ff918ba174cfb6"}, + {file = "stringzilla-3.12.3-cp39-cp39-macosx_10_11_universal2.whl", hash = "sha256:1207e68c89e0002c75f3c8bdcc19365262a31088c5f0341b54903c98f4428971"}, + {file = "stringzilla-3.12.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:f52273bdcb4086a3f0391f9ea292e3f463814afee4643a2e27f2ff991088b580"}, + {file = "stringzilla-3.12.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ce7776eb035f1e1450694abfb9c4e60e3c6f31358966ccd440f2bf7a58f5b2d4"}, + {file = "stringzilla-3.12.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f0c409967e23673dd500ca91d3889eba3944b75cb3ae70fc22b22e7298ab4c76"}, + {file = "stringzilla-3.12.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3f29c5aaaeaa1b6fd357726bdd1eb66ac6879f7be9136c585527e61d35e8250f"}, + {file = "stringzilla-3.12.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a73108cbe96f08f10de2c25e943e05c606b315ae8f02efb6600a2058485d76c2"}, + {file = "stringzilla-3.12.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b9bbb9efaf50a08ea3cfeb910c768b530f5d4d99b9a5c89de0535f8c0d8c0247"}, + {file = "stringzilla-3.12.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f5fa3b63eef32ae403b85a53783d892a65d9303c99eaf0659c3792e430ecd01"}, + {file = "stringzilla-3.12.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8270d07a2e68a46141af1cb959ac8c3ac823d0d6f8107c287211add01e8223fd"}, + {file = "stringzilla-3.12.3-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:f8673b11b8bcea17a574b038b1349672ee1067462f598c031f7e59bb35339fd8"}, + {file = "stringzilla-3.12.3-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f1a20cf3760176652affe5f66f1f7f6c106e7bdbfc6d27e4ffb5f0ebfc03a656"}, + {file = "stringzilla-3.12.3-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:23212e9da476d551a792f6e9a7ee7dd739d7b6a568685c10752275ca5f35fd9e"}, + {file = "stringzilla-3.12.3-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:ea06ee9a2b5a56f817e1742e8d502a60315aaeaad26631723c6a3405959be61e"}, + {file = "stringzilla-3.12.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4afee16ca8f89ac249b2d3581a24b9df63efb73e9d71d136411fbf088e036103"}, + {file = "stringzilla-3.12.3-cp39-cp39-win32.whl", hash = "sha256:28f05183f0a0c9461ba9f66155aa6fd21f26a8b4daeb07b7a8833e9b17ffb264"}, + {file = "stringzilla-3.12.3-cp39-cp39-win_amd64.whl", hash = "sha256:b6535c2768f4e365997b6483a6f38cd24ff1444a3e1da85fa411ba21a9c814dd"}, + {file = "stringzilla-3.12.3-cp39-cp39-win_arm64.whl", hash = "sha256:11e6e6ccede0eb59022d0634540adf29afa1a00c6612b0daa1c207b440ee8012"}, + {file = "stringzilla-3.12.3.tar.gz", hash = "sha256:33ed7cb71724373474d387a0e17751bd9ad21caa08a1b8b74b961dea4b890a66"}, +] + +[[package]] +name = "termcolor" +version = "3.0.1" +description = "ANSI color formatting for output in terminal" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "termcolor-3.0.1-py3-none-any.whl", hash = "sha256:da1ed4ec8a5dc5b2e17476d859febdb3cccb612be1c36e64511a6f2485c10c69"}, + {file = "termcolor-3.0.1.tar.gz", hash = "sha256:a6abd5c6e1284cea2934443ba806e70e5ec8fd2449021be55c280f8a3731b611"}, +] + +[package.extras] +tests = ["pytest", "pytest-cov"] + +[[package]] +name = "tifffile" +version = "2025.3.30" +description = "Read and write TIFF files" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "tifffile-2025.3.30-py3-none-any.whl", hash = "sha256:0ed6eee7b66771db2d1bfc42262a51b01887505d35539daef118f4ff8c0f629c"}, + {file = "tifffile-2025.3.30.tar.gz", hash = "sha256:3cdee47fe06cd75367c16bc3ff34523713156dae6cd498e3a392e5b39a51b789"}, +] + +[package.dependencies] +numpy = "*" + +[package.extras] +all = ["defusedxml", "fsspec", "imagecodecs (>=2024.12.30)", "lxml", "matplotlib", "zarr (<3)"] +codecs = ["imagecodecs (>=2024.12.30)"] +plot = ["matplotlib"] +test = ["cmapfile", "czifile", "dask", "defusedxml", "fsspec", "imagecodecs", "lfdfiles", "lxml", "ndtiff", "oiffile", "psdtags", "pytest", "roifile", "xarray", "zarr (<3)"] +xml = ["defusedxml", "lxml"] +zarr = ["fsspec", "zarr (<3)"] + +[[package]] +name = "tqdm" +version = "4.67.1" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, + {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["nbval", "pytest (>=6)", "pytest-asyncio (>=0.24)", "pytest-cov", "pytest-timeout"] +discord = ["requests"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "typing-extensions" +version = "4.13.1" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "typing_extensions-4.13.1-py3-none-any.whl", hash = "sha256:4b6cf02909eb5495cfbc3f6e8fd49217e6cc7944e145cdda8caa3734777f9e69"}, + {file = "typing_extensions-4.13.1.tar.gz", hash = "sha256:98795af00fb9640edec5b8e31fc647597b4691f099ad75f469a2616be1a76dff"}, +] + +[[package]] +name = "typing-inspection" +version = "0.4.0" +description = "Runtime typing introspection tools" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f"}, + {file = "typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122"}, +] + +[package.dependencies] +typing-extensions = ">=4.12.0" + +[[package]] +name = "urllib3" +version = "2.3.0" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, + {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[metadata] +lock-version = "2.1" +python-versions = ">=3.12,<4.0" +content-hash = "e9e804116cb06501db7bc04112ecc9abadb632372dc424232bbfbf9d4e04e109" diff --git a/services/docreader/pyproject.toml b/services/docreader/pyproject.toml new file mode 100644 index 0000000..7f017ec --- /dev/null +++ b/services/docreader/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "docreader" +version = "0.1.0" +description = "" +readme = "README.md" +requires-python = ">=3.12,<4.0" +dependencies = [ + "paddlepaddle (>=3.0.0,<4.0.0)", + "paddleocr (>=2.10.0,<3.0.0)", + "playwright (>=1.51.0,<2.0.0)", + "setuptools (>=79.0.0,<80.0.0)", + "textract (>=1.6.5,<2.0.0)", + "antiword (>=0.3.2,<0.4.0)" +] + + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/services/docreader/requirements.txt b/services/docreader/requirements.txt new file mode 100644 index 0000000..a811c43 --- /dev/null +++ b/services/docreader/requirements.txt @@ -0,0 +1,26 @@ +grpcio +grpcio-tools +protobuf +python-docx +PyPDF2 +requests +Pillow +beautifulsoup4 +lxml +playwright +asyncio +urllib3 +markdownify +mistletoe +goose3[all] +# paddlepaddle==3.0.0 (普通CPU版本已被注释) +paddleocr==3.0.0 +markdown +pypdf +cos-python-sdk-v5 +textract +antiword +openai + +--extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ +paddlepaddle-gpu==3.0.0 \ No newline at end of file diff --git a/services/docreader/scripts/generate_proto.sh b/services/docreader/scripts/generate_proto.sh new file mode 100644 index 0000000..bca24c1 --- /dev/null +++ b/services/docreader/scripts/generate_proto.sh @@ -0,0 +1,31 @@ +#!/bin/bash +set -x + +# 设置目录 +PROTO_DIR="src/proto" +PYTHON_OUT="src/proto" +GO_OUT="src/proto" + +# 生成Python代码 +python -m grpc_tools.protoc -I${PROTO_DIR} \ + --python_out=${PYTHON_OUT} \ + --grpc_python_out=${PYTHON_OUT} \ + ${PROTO_DIR}/docreader.proto + +# 生成Go代码 +protoc -I${PROTO_DIR} --go_out=${GO_OUT} \ + --go_opt=paths=source_relative \ + --go-grpc_out=${GO_OUT} \ + --go-grpc_opt=paths=source_relative \ + ${PROTO_DIR}/docreader.proto + +# 修复Python导入问题(MacOS兼容版本) +if [ "$(uname)" == "Darwin" ]; then + # MacOS版本 + sed -i '' 's/import docreader_pb2/from . import docreader_pb2/g' ${PYTHON_OUT}/docreader_pb2_grpc.py +else + # Linux版本 + sed -i 's/import docreader_pb2/from . import docreader_pb2/g' ${PYTHON_OUT}/docreader_pb2_grpc.py +fi + +echo "Proto files generated successfully!" \ No newline at end of file diff --git a/services/docreader/src/.DS_Store b/services/docreader/src/.DS_Store new file mode 100644 index 0000000..9946d37 Binary files /dev/null and b/services/docreader/src/.DS_Store differ diff --git a/services/docreader/src/__init__.py b/services/docreader/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/docreader/src/client/client.go b/services/docreader/src/client/client.go new file mode 100644 index 0000000..ee74246 --- /dev/null +++ b/services/docreader/src/client/client.go @@ -0,0 +1,115 @@ +package client + +import ( + "fmt" + "log" + "os" + "time" + + "github.com/Tencent/WeKnora/services/docreader/src/proto" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/resolver" +) + +const ( + maxMessageSize = 50 * 1024 * 1024 // 50MB +) + +var ( + // Logger is the default logger used by the client + Logger = log.New(os.Stdout, "[DocReader] ", log.LstdFlags|log.Lmicroseconds) +) + +// ImageInfo 表示一个图片的信息 +type ImageInfo struct { + URL string // 图片URL(COS) + Caption string // 图片描述 + OCRText string // OCR提取的文本 + OriginalURL string // 原始图片URL + Start int // 图片在文本中的开始位置 + End int // 图片在文本中的结束位置 +} + +// Client represents a DocReader service client +type Client struct { + conn *grpc.ClientConn + proto.DocReaderClient + debug bool +} + +// NewClient creates a new DocReader client with the specified address +func NewClient(addr string) (*Client, error) { + Logger.Printf("INFO: Creating new DocReader client connecting to %s", addr) + + // 设置消息大小限制 + opts := []grpc.DialOption{ + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithDefaultServiceConfig(`{"loadBalancingPolicy":"round_robin"}`), + grpc.WithDefaultCallOptions( + grpc.MaxCallRecvMsgSize(maxMessageSize), + grpc.MaxCallSendMsgSize(maxMessageSize), + ), + } + resolver.SetDefaultScheme("dns") + + startTime := time.Now() + conn, err := grpc.Dial("dns:///"+addr, opts...) + if err != nil { + Logger.Printf("ERROR: Failed to connect to DocReader service: %v", err) + return nil, err + } + Logger.Printf("INFO: Successfully connected to DocReader service in %v", time.Since(startTime)) + + return &Client{ + conn: conn, + DocReaderClient: proto.NewDocReaderClient(conn), + debug: false, + }, nil +} + +// Close closes the client connection +func (c *Client) Close() error { + Logger.Printf("INFO: Closing DocReader client connection") + return c.conn.Close() +} + +// SetDebug enables or disables debug logging +func (c *Client) SetDebug(debug bool) { + c.debug = debug + Logger.Printf("INFO: Debug logging set to %v", debug) +} + +// Log logs a message with the appropriate level +func (c *Client) Log(level string, format string, args ...interface{}) { + if level == "DEBUG" && !c.debug { + return + } + Logger.Printf("%s: %s", level, fmt.Sprintf(format, args...)) +} + +// GetImagesFromChunk 从一个Chunk中提取所有图片信息 +func GetImagesFromChunk(chunk *proto.Chunk) []ImageInfo { + if chunk == nil || len(chunk.Images) == 0 { + return nil + } + + images := make([]ImageInfo, 0, len(chunk.Images)) + for _, img := range chunk.Images { + images = append(images, ImageInfo{ + URL: img.Url, + Caption: img.Caption, + OCRText: img.OcrText, + OriginalURL: img.OriginalUrl, + Start: int(img.Start), + End: int(img.End), + }) + } + + return images +} + +// HasImagesInChunk 判断一个Chunk是否包含图片 +func HasImagesInChunk(chunk *proto.Chunk) bool { + return chunk != nil && len(chunk.Images) > 0 +} diff --git a/services/docreader/src/client/client_test.go b/services/docreader/src/client/client_test.go new file mode 100644 index 0000000..5551048 --- /dev/null +++ b/services/docreader/src/client/client_test.go @@ -0,0 +1,154 @@ +package client + +import ( + "context" + "log" + "os" + "testing" + "time" + + "github.com/Tencent/WeKnora/services/docreader/src/proto" +) + +func init() { + // 配置测试日志 + log.SetOutput(os.Stdout) + log.SetFlags(log.LstdFlags | log.Lmicroseconds | log.Lshortfile) + log.Println("INFO: Initializing DocReader client tests") +} + +func TestReadFromURL(t *testing.T) { + log.Println("INFO: Starting TestReadFromURL") + + // 创建测试客户端 + log.Println("INFO: Creating test client") + client, err := NewClient("localhost:50051") + if err != nil { + log.Printf("ERROR: Failed to create client: %v", err) + t.Fatalf("Failed to create client: %v", err) + } + defer client.Close() + + // 启用调试日志 + client.SetDebug(true) + + // 测试 ReadFromURL 方法 + log.Println("INFO: Sending ReadFromURL request to server") + startTime := time.Now() + resp, err := client.ReadFromURL( + context.Background(), + &proto.ReadFromURLRequest{ + Url: "https://example.com", + Title: "test", + ReadConfig: &proto.ReadConfig{ + ChunkSize: 512, + ChunkOverlap: 50, + Separators: []string{"\n\n", "\n", "。"}, + EnableMultimodal: true, + }, + }, + ) + + requestDuration := time.Since(startTime) + if err != nil { + log.Printf("ERROR: ReadFromURL failed: %v", err) + t.Fatalf("ReadFromURL failed: %v", err) + } + log.Printf("INFO: ReadFromURL completed in %v", requestDuration) + + // 验证结果 + chunkCount := len(resp.Chunks) + log.Printf("INFO: Received %d chunks from URL parsing", chunkCount) + if chunkCount == 0 { + log.Println("WARN: Expected non-empty content but received none") + t.Error("Expected non-empty content") + } + + // 打印结果 + for i, chunk := range resp.Chunks { + if i < 2 || i >= chunkCount-2 { // 只打印前两个和后两个块 + log.Printf("DEBUG: Chunk %d: %s", chunk.Seq, truncateString(chunk.Content, 50)) + } else if i == 2 && chunkCount > 4 { + log.Printf("DEBUG: ... %d more chunks ...", chunkCount-4) + } + } + + log.Println("INFO: TestReadFromURL completed successfully") +} + +func TestReadFromFileWithChunking(t *testing.T) { + log.Println("INFO: Starting TestReadFromFileWithChunking") + + // 创建测试客户端 + log.Println("INFO: Creating test client") + client, err := NewClient("localhost:50051") + if err != nil { + log.Printf("ERROR: Failed to create client: %v", err) + t.Fatalf("Failed to create client: %v", err) + } + defer client.Close() + + // 启用调试日志 + client.SetDebug(true) + + // 读取测试文件 + log.Println("INFO: Reading test file") + fileContent, err := os.ReadFile("../testdata/test.md") + if err != nil { + log.Printf("ERROR: Failed to read test file: %v", err) + t.Fatalf("Failed to read test file: %v", err) + } + log.Printf("INFO: Read test file, size: %d bytes", len(fileContent)) + + // 测试 ReadFromFile 方法,带分块参数 + log.Println("INFO: Sending ReadFromFile request to server") + startTime := time.Now() + resp, err := client.ReadFromFile( + context.Background(), + &proto.ReadFromFileRequest{ + FileContent: fileContent, + FileName: "test.md", + FileType: "md", + ReadConfig: &proto.ReadConfig{ + ChunkSize: 200, + ChunkOverlap: 50, + Separators: []string{"\n\n", "\n", "。"}, + EnableMultimodal: true, + }, + }, + ) + + requestDuration := time.Since(startTime) + if err != nil { + log.Printf("ERROR: ReadFromFile failed: %v", err) + t.Fatalf("ReadFromFile failed: %v", err) + } + log.Printf("INFO: ReadFromFile completed in %v", requestDuration) + + // 验证结果 + chunkCount := len(resp.Chunks) + log.Printf("INFO: Received %d chunks from file parsing", chunkCount) + if chunkCount == 0 { + log.Println("WARN: Expected non-empty content but received none") + t.Error("Expected non-empty content") + } + + // 打印结果 + for i, chunk := range resp.Chunks { + if i < 2 || i >= chunkCount-2 { // 只打印前两个和后两个块 + log.Printf("DEBUG: Chunk %d: %s", chunk.Seq, truncateString(chunk.Content, 50)) + } else if i == 2 && chunkCount > 4 { + log.Printf("DEBUG: ... %d more chunks ...", chunkCount-4) + } + } + + log.Println("INFO: TestReadFromFileWithChunking completed successfully") +} + +// 截断字符串以供日志打印 +func truncateString(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + return s[:maxLen] + "..." +} diff --git a/services/docreader/src/download_deps.py b/services/docreader/src/download_deps.py new file mode 100644 index 0000000..1eba539 --- /dev/null +++ b/services/docreader/src/download_deps.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import os +import logging +from paddleocr import PaddleOCR + +# 添加当前目录到Python路径 +current_dir = os.path.dirname(os.path.abspath(__file__)) +if current_dir not in sys.path: + sys.path.append(current_dir) + +# 导入ImageParser +from parser.image_parser import ImageParser + +# 配置日志 +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) +logger = logging.getLogger(__name__) + + +def init_ocr_model(): + PaddleOCR() diff --git a/services/docreader/src/parser/__init__.py b/services/docreader/src/parser/__init__.py new file mode 100644 index 0000000..a03ffcf --- /dev/null +++ b/services/docreader/src/parser/__init__.py @@ -0,0 +1,41 @@ +""" +Parser module for WeKnora document processing system. + +This module provides document parsers for various file formats including: +- Microsoft Word documents (.doc, .docx) +- PDF documents +- Markdown files +- Plain text files +- Images with text content +- Web pages + +The parsers extract content from documents and can split them into +meaningful chunks for further processing and indexing. +""" + +from .base_parser import BaseParser, ParseResult +from .docx_parser import DocxParser +from .doc_parser import DocParser +from .pdf_parser import PDFParser +from .markdown_parser import MarkdownParser +from .text_parser import TextParser +from .image_parser import ImageParser +from .web_parser import WebParser +from .parser import Parser, ChunkingConfig +from .ocr_engine import OCREngine + +# Export public classes and modules +__all__ = [ + "BaseParser", # Base parser class that all format parsers inherit from + "DocxParser", # Parser for .docx files (modern Word documents) + "DocParser", # Parser for .doc files (legacy Word documents) + "PDFParser", # Parser for PDF documents + "MarkdownParser", # Parser for Markdown text files + "TextParser", # Parser for plain text files + "ImageParser", # Parser for images with text content + "WebParser", # Parser for web pages + "Parser", # Main parser factory that selects the appropriate parser + "ChunkingConfig", # Configuration for text chunking behavior + "ParseResult", # Standard result format returned by all parsers + "OCREngine", # OCR engine for extracting text from images +] diff --git a/services/docreader/src/parser/base_parser.py b/services/docreader/src/parser/base_parser.py new file mode 100644 index 0000000..4a83fb6 --- /dev/null +++ b/services/docreader/src/parser/base_parser.py @@ -0,0 +1,1302 @@ +# -*- coding: utf-8 -*- +import re +import os +import uuid +import asyncio +from typing import List, Dict, Any, Optional, Tuple +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +import logging +from qcloud_cos import CosConfig, CosS3Client +import sys +import traceback +import numpy as np +import time +from .ocr_engine import OCREngine + +# Add parent directory to Python path for src imports +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(current_dir) +if parent_dir not in sys.path: + sys.path.insert(0, parent_dir) + +try: + from services.docreader.src.parser.caption import Caption +except ImportError: + # Fallback: try relative import + try: + from .caption import Caption + except ImportError: + # If both imports fail, set to None + Caption = None + logging.warning("Failed to import Caption, image captioning will be unavailable") + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +@dataclass +class Chunk: + """Chunk result""" + + content: str # Chunk content + seq: int # Chunk sequence number + start: int # Chunk start position + end: int # Chunk end position + images: List[Dict[str, Any]] = field(default_factory=list) # Images in the chunk + + +@dataclass +class ParseResult: + """Parse result""" + + text: str # Extracted text content + chunks: Optional[List[Chunk]] = None # Chunk results + + +class BaseParser(ABC): + """Base parser interface""" + + # Class variable for shared OCR engine instance + _ocr_engine = None + _ocr_engine_failed = False + + @classmethod + def get_ocr_engine(cls, backend_type="paddle", **kwargs): + """Get OCR engine instance + + Args: + backend_type: OCR engine type, e.g. "paddle", "nanonets" + **kwargs: Arguments for the OCR engine + + Returns: + OCR engine instance or None + """ + if cls._ocr_engine is None and not cls._ocr_engine_failed: + try: + cls._ocr_engine = OCREngine.get_instance(backend_type=backend_type, **kwargs) + if cls._ocr_engine is None: + cls._ocr_engine_failed = True + logger.error(f"Failed to initialize OCR engine ({backend_type})") + return None + logger.info(f"Successfully initialized OCR engine: {backend_type}") + except Exception as e: + cls._ocr_engine_failed = True + logger.error(f"Failed to initialize OCR engine: {str(e)}") + return None + return cls._ocr_engine + + + def __init__( + self, + file_name: str = "", + file_type: str = None, + enable_multimodal: bool = True, + chunk_size: int = 1000, + chunk_overlap: int = 200, + separators: list = ["\n\n", "\n", "。"], + ocr_backend: str = "paddle", + ocr_config: dict = None, + max_image_size: int = 1920, # Maximum image size + max_concurrent_tasks: int = 5, # Max concurrent tasks + max_chunks: int = 1000, # Max number of returned chunks + ): + """Initialize parser + + Args: + file_name: File name + file_type: File type, inferred from file_name if None + enable_multimodal: Whether to enable multimodal + chunk_size: Chunk size + chunk_overlap: Chunk overlap + separators: List of separators + ocr_backend: OCR engine type + ocr_config: OCR engine config + max_image_size: Maximum image size + max_concurrent_tasks: Max concurrent tasks + max_chunks: Max number of returned chunks + """ + self.file_name = file_name + self.file_type = file_type or os.path.splitext(file_name)[1] + self.enable_multimodal = enable_multimodal + self.chunk_size = chunk_size + self.chunk_overlap = chunk_overlap + self.separators = separators + self.ocr_backend = os.getenv("OCR_BACKEND", ocr_backend) + self.ocr_config = ocr_config or {} + self.max_image_size = max_image_size + self.max_concurrent_tasks = max_concurrent_tasks + self.max_chunks = max_chunks + + logger.info( + f"Initializing {self.__class__.__name__} for file: {file_name}, type: {self.file_type}" + ) + logger.info( + f"Parser config: chunk_size={chunk_size}, " + f"overlap={chunk_overlap}, " + f"multimodal={enable_multimodal}, " + f"ocr_backend={ocr_backend}, " + f"max_chunks={max_chunks}" + ) + # Only initialize Caption service if multimodal is enabled + if self.enable_multimodal: + try: + self.caption_parser = Caption() + except Exception as e: + logger.warning(f"Failed to initialize Caption service: {str(e)}") + self.caption_parser = None + else: + self.caption_parser = None + + def perform_ocr(self, image): + """Execute OCR recognition on the image + + Args: + image: Image object (PIL.Image or numpy array) + + Returns: + Extracted text string + """ + start_time = time.time() + logger.info("Starting OCR recognition") + resized_image = None + + try: + # Resize image to avoid processing large images + resized_image = self._resize_image_if_needed(image) + + # Get OCR engine + ocr_engine = self.get_ocr_engine(backend_type=self.ocr_backend, **self.ocr_config) + if ocr_engine is None: + logger.error(f"OCR engine ({self.ocr_backend}) initialization failed or unavailable, " + "skipping OCR recognition") + return "" + + # Execute OCR prediction + logger.info(f"Executing OCR prediction (using {self.ocr_backend} engine)") + # Add extra exception handling + try: + ocr_result = ocr_engine.predict(resized_image) + except RuntimeError as e: + # Handle common CUDA memory issues or other runtime errors + logger.error(f"OCR prediction runtime error: {str(e)}") + return "" + except Exception as e: + # Handle other prediction errors + logger.error(f"Unexpected OCR prediction error: {str(e)}") + return "" + + process_time = time.time() - start_time + logger.info(f"OCR recognition completed, time: {process_time:.2f} seconds") + return ocr_result + except Exception as e: + process_time = time.time() - start_time + logger.error(f"OCR recognition error: {str(e)}, time: {process_time:.2f} seconds") + return "" + finally: + # Release image resources + if resized_image is not image and hasattr(resized_image, 'close'): + # Only close the new image we created, not the original image + resized_image.close() + + def _resize_image_if_needed(self, image): + """Resize image to avoid processing large images + + Args: + image: Image object (PIL.Image or numpy array) + + Returns: + Resized image + """ + try: + # Check if it's a PIL.Image object + if hasattr(image, 'width') and hasattr(image, 'height'): + width, height = image.width, image.height + # Check if resizing is needed + if width > self.max_image_size or height > self.max_image_size: + logger.info(f"Resizing image, original size: {width}x{height}") + # Calculate scaling factor + scale = min(self.max_image_size / width, self.max_image_size / height) + new_width = int(width * scale) + new_height = int(height * scale) + # Resize + resized_image = image.resize((new_width, new_height)) + logger.info(f"Image resized to: {new_width}x{new_height}") + return resized_image + # If it's a numpy array + elif hasattr(image, 'shape') and len(image.shape) == 3: + height, width = image.shape[0], image.shape[1] + if width > self.max_image_size or height > self.max_image_size: + logger.info(f"Resizing numpy image, original size: {width}x{height}") + # Use PIL for resizing + from PIL import Image + pil_image = Image.fromarray(image) + try: + scale = min(self.max_image_size / width, self.max_image_size / height) + new_width = int(width * scale) + new_height = int(height * scale) + resized_image = pil_image.resize((new_width, new_height)) + # Convert back to numpy array + import numpy as np + resized_array = np.array(resized_image) + logger.info(f"numpy image resized to: {new_width}x{new_height}") + return resized_array + finally: + # Ensure PIL image is closed + pil_image.close() + if 'resized_image' in locals() and hasattr(resized_image, 'close'): + resized_image.close() + return image + except Exception as e: + logger.warning(f"Failed to resize image: {str(e)}, using original image") + return image + + def process_image(self, image, image_url=None): + """Process image: first perform OCR, then get caption if text is available + + Args: + image: Image object (PIL.Image or numpy array) + image_url: Image URL (if uploaded) + + Returns: + tuple: (ocr_text, caption, image_url) + - ocr_text: OCR extracted text + - caption: Image description (if OCR has text) or empty string + - image_url: Image URL (if provided) + """ + logger.info("Starting image processing (OCR + optional caption)") + + # Resize image + image = self._resize_image_if_needed(image) + + # Perform OCR recognition + ocr_text = self.perform_ocr(image) + caption = "" + + if self.caption_parser and image_url: + logger.info(f"OCR successfully extracted {len(ocr_text)} characters, continuing to get caption") + caption = self.get_image_caption(image_url) + if caption: + logger.info(f"Successfully obtained image caption: {caption}") + else: + logger.warning("Failed to get caption") + else: + logger.info("image_url not provided or Caption service not initialized, skipping caption retrieval") + + # Release image resources + del image + + return ocr_text, caption, image_url + + async def process_image_async(self, image, image_url=None): + """Asynchronously process image: first perform OCR, then get caption if text is available + + Args: + image: Image object (PIL.Image or numpy array) + image_url: Image URL (if uploaded) + + Returns: + tuple: (ocr_text, caption, image_url) + - ocr_text: OCR extracted text + - caption: Image description (if OCR has text) or empty string + - image_url: Image URL (if provided) + """ + logger.info("Starting asynchronous image processing (OCR + optional caption)") + resized_image = None + + try: + # Resize image + resized_image = self._resize_image_if_needed(image) + + # Perform OCR recognition (using run_in_executor to execute synchronous operations in the event loop) + loop = asyncio.get_event_loop() + try: + # Add timeout mechanism to avoid infinite blocking (30 seconds timeout) + ocr_task = loop.run_in_executor(None, self.perform_ocr, resized_image) + ocr_text = await asyncio.wait_for(ocr_task, timeout=30.0) + except asyncio.TimeoutError: + logger.error("OCR processing timed out (30 seconds), skipping this image") + ocr_text = "" + except Exception as e: + logger.error(f"OCR processing error: {str(e)}") + ocr_text = "" + + logger.info(f"OCR successfully extracted {len(ocr_text)} characters, continuing to get caption") + caption = "" + if self.caption_parser and image_url: + try: + # Add timeout to avoid blocking caption retrieval (30 seconds timeout) + caption_task = self.get_image_caption_async(image_url) + image_url, caption = await asyncio.wait_for(caption_task, timeout=30.0) + if caption: + logger.info(f"Successfully obtained image caption: {caption}") + else: + logger.warning("Failed to get caption") + except asyncio.TimeoutError: + logger.warning("Caption retrieval timed out, skipping") + except Exception as e: + logger.error(f"Failed to get caption: {str(e)}") + else: + logger.info("image_url not provided or Caption service not initialized, skipping caption retrieval") + + return ocr_text, caption, image_url + finally: + # Release image resources + if resized_image is not image and hasattr(resized_image, 'close'): + # Only close the new image we created, not the original image + resized_image.close() + + async def process_with_limit(self, idx, image, url, semaphore, current_request_id=None): + """Function to process a single image using a semaphore""" + try: + # Set request ID in the asynchronous task + if current_request_id: + try: + from utils.request import set_request_id + set_request_id(current_request_id) + logger.info(f"Asynchronous task {idx+1} setting request ID: {current_request_id}") + except Exception as e: + logger.warning(f"Failed to set request ID in asynchronous task: {str(e)}") + + logger.info(f"Waiting to process image {idx+1}") + async with semaphore: # Use semaphore to control concurrency + logger.info(f"Starting to process image {idx+1}") + result = await self.process_image_async(image, url) + logger.info(f"Completed processing image {idx+1}") + return result + except Exception as e: + logger.error(f"Error processing image {idx+1}: {str(e)}") + return ("", "", url) # Return empty result to avoid overall failure + finally: + # Manually release image resources + if hasattr(image, 'close'): + image.close() + + async def process_multiple_images(self, images_data): + """Process multiple images concurrently + + Args: + images_data: List of (image, image_url) tuples + + Returns: + List of (ocr_text, caption, image_url) tuples + """ + logger.info(f"Starting concurrent processing of {len(images_data)} images") + + if not images_data: + logger.warning("No image data to process") + return [] + + # Set max concurrency, reduce concurrency to avoid resource contention + max_concurrency = min(self.max_concurrent_tasks, 5) # Reduce concurrency to prevent excessive memory usage + + # Use semaphore to limit concurrency + semaphore = asyncio.Semaphore(max_concurrency) + + # Store results to avoid overall failure due to task failure + results = [] + + # Get current request ID to set in each asynchronous task + current_request_id = None + try: + from utils.request import get_request_id + current_request_id = get_request_id() + logger.info(f"Capturing current request ID before async processing: {current_request_id}") + except Exception as e: + logger.warning(f"Failed to get current request ID: {str(e)}") + + # Create all tasks, but use semaphore to limit actual concurrency + tasks = [ + self.process_with_limit(i, img, url, semaphore, current_request_id) + for i, (img, url) in enumerate(images_data) + ] + + try: + # Execute all tasks, but set overall timeout + completed_results = await asyncio.gather(*tasks, return_exceptions=True) + + # Handle possible exception results + for i, result in enumerate(completed_results): + if isinstance(result, Exception): + logger.error(f"Image {i+1} processing returned an exception: {str(result)}") + # For exceptions, add empty results + if i < len(images_data): + results.append(("", "", images_data[i][1])) + else: + results.append(result) + except Exception as e: + logger.error(f"Error during concurrent image processing: {str(e)}") + # Add empty results for all images + results = [("", "", url) for _, url in images_data] + finally: + # Clean up references and trigger garbage collection + images_data.clear() + logger.info("Image processing resource cleanup complete") + + logger.info(f"Completed concurrent processing of {len(results)}/{len(images_data)} images") + return results + + def decode_bytes(self, content: bytes) -> str: + """Intelligently decode byte stream, supports multiple encodings + + Tries to decode in common encodings, if all fail, uses latin-1 as fallback + + Args: + content: Byte stream to decode + + Returns: + Decoded string + """ + logger.info(f"Attempting to decode bytes of length: {len(content)}") + # Common encodings, sorted by priority + encodings = ["utf-8", "gb18030", "gb2312", "gbk", "big5", "ascii", "latin-1"] + text = None + + # Try decoding with each encoding format + for encoding in encodings: + try: + text = content.decode(encoding) + logger.info(f"Successfully decoded content using {encoding} encoding") + break + except UnicodeDecodeError: + logger.info(f"Failed to decode using {encoding} encoding") + continue + + # If all encodings fail, use latin-1 as fallback + if text is None: + text = content.decode("latin-1") + logger.warning( + f"Unable to determine correct encoding, using latin-1 as fallback. " + f"This may cause character issues." + ) + + logger.info(f"Decoded text length: {len(text)} characters") + return text + + def get_image_caption(self, image_url: str) -> str: + """Get image description + + Args: + image_url: Image URL + + Returns: + Image description + """ + start_time = time.time() + logger.info( + f"Getting caption for image: {image_url[:250]}..." + if len(image_url) > 250 + else f"Getting caption for image: {image_url}" + ) + caption = self.caption_parser.get_caption(image_url) + if caption: + logger.info( + f"Received caption of length: {len(caption)}, caption: {caption}, image_url: {image_url}," + f"cost: {time.time() - start_time} seconds" + ) + else: + logger.warning("Failed to get caption for image") + return caption + + async def get_image_caption_async(self, image_url: str) -> Tuple[str, str]: + """Asynchronously get image description + + Args: + image_url: Image URL + + Returns: + Tuple[str, str]: Image URL and corresponding description + """ + caption = self.get_image_caption(image_url) + return image_url, caption + + def _init_cos_client(self): + """Initialize Tencent Cloud COS client""" + try: + # Get COS configuration from environment variables + secret_id = os.getenv("COS_SECRET_ID") + secret_key = os.getenv("COS_SECRET_KEY") + region = os.getenv("COS_REGION") + bucket_name = os.getenv("COS_BUCKET_NAME") + appid = os.getenv("COS_APP_ID") + prefix = os.getenv("COS_PATH_PREFIX") + enable_old_domain = ( + os.getenv("COS_ENABLE_OLD_DOMAIN", "true").lower() == "true" + ) + + if not all([secret_id, secret_key, region, bucket_name, appid]): + logger.error( + "Incomplete COS configuration, missing required environment variables" + ) + return None, None, None, None + + # Initialize COS configuration + logger.info( + f"Initializing COS client with region: {region}, bucket: {bucket_name}" + ) + config = CosConfig( + Appid=appid, + Region=region, + SecretId=secret_id, + SecretKey=secret_key, + EnableOldDomain=enable_old_domain, + ) + + # Create client + client = CosS3Client(config) + return client, bucket_name, region, prefix + except Exception as e: + logger.error(f"Failed to initialize COS client: {str(e)}") + return None, None, None, None + + def _get_file_url(self, bucket_name, region, object_key): + """Generate COS object URL + + Args: + bucket_name: Bucket name + region: Region + object_key: Object key + + Returns: + File URL + """ + return f"https://{bucket_name}.cos.{region}.myqcloud.com/{object_key}" + + def upload_file(self, file_path: str) -> str: + """Upload file to Tencent Cloud COS + + Args: + file_path: File path + + Returns: + File URL + """ + logger.info(f"Uploading file to COS: {file_path}") + try: + client, bucket_name, region, prefix = self._init_cos_client() + if not client: + return "" + + # Generate object key, use UUID to avoid conflicts + file_name = os.path.basename(file_path) + object_key = ( + f"{prefix}/images/{uuid.uuid4().hex}{os.path.splitext(file_name)[1]}" + ) + logger.info(f"Generated object key: {object_key}") + + # Upload file + logger.info("Attempting to upload file to COS") + response = client.upload_file( + Bucket=bucket_name, LocalFilePath=file_path, Key=object_key + ) + + # Get file URL + file_url = self._get_file_url(bucket_name, region, object_key) + + logger.info(f"Successfully uploaded file to COS: {file_url}") + return file_url + + except Exception as e: + logger.error(f"Failed to upload file to COS: {str(e)}") + return "" + + def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str: + """Directly upload file content to Tencent Cloud COS + + Args: + content: File byte content + file_ext: File extension, default is .png + + Returns: + File URL + """ + logger.info( + f"Uploading bytes content to COS, content size: {len(content)} bytes" + ) + try: + client, bucket_name, region, prefix = self._init_cos_client() + if not client: + return "" + + # Generate object key, use UUID to avoid conflicts + object_key = f"{prefix}/images/{uuid.uuid4().hex}{file_ext}" + logger.info(f"Generated object key: {object_key}") + + # Directly upload file content + logger.info("Attempting to upload bytes content to COS") + response = client.put_object( + Bucket=bucket_name, Body=content, Key=object_key + ) + + # Get file URL + file_url = self._get_file_url(bucket_name, region, object_key) + + logger.info(f"Successfully uploaded bytes to COS: {file_url}") + return file_url + + except Exception as e: + logger.error(f"Failed to upload bytes to COS: {str(e)}") + traceback.print_exc() + return "" + + @abstractmethod + def parse_into_text(self, content: bytes) -> str: + """Parse document content + + Args: + content: Document content + + Returns: + Parse result + """ + pass + + def parse(self, content: bytes) -> ParseResult: + """Parse document content + + Args: + content: Document content + + Returns: + Parse result + """ + logger.info( + f"Parsing document with {self.__class__.__name__}, content size: {len(content)} bytes" + ) + text = self.parse_into_text(content) + logger.info(f"Extracted {len(text)} characters of text from {self.file_name}") + logger.info(f"Beginning chunking process for text") + chunks = self.chunk_text(text) + logger.info(f"Created {len(chunks)} chunks from document") + + # Limit the number of returned chunks + if len(chunks) > self.max_chunks: + logger.warning(f"Limiting chunks from {len(chunks)} to maximum {self.max_chunks}") + chunks = chunks[:self.max_chunks] + + # If multimodal is enabled and file type is supported, process images in each chunk + if self.enable_multimodal: + # Get file extension and convert to lowercase + file_ext = ( + os.path.splitext(self.file_name)[1].lower() + if self.file_name + else ( + self.file_type.lower() + if self.file_type + else "" + ) + ) + + # Define allowed file types for image processing + allowed_types = [ + '.pdf', # PDF files + '.md', '.markdown', # Markdown files + '.doc', '.docx', # Word documents + # Image files + '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp' + ] + + if file_ext in allowed_types: + logger.info(f"Processing images in each chunk for file type: {file_ext}") + chunks = self.process_chunks_images(chunks) + else: + logger.info(f"Skipping image processing for unsupported file type: {file_ext}") + + return ParseResult(text=text, chunks=chunks) + + def _split_into_units(self, text: str) -> List[str]: + """Split text into basic units, preserving Markdown structure + + Args: + text: Text content + + Returns: + List of basic units + """ + logger.info(f"Splitting text into basic units, text length: {len(text)}") + # Match Markdown image syntax + image_pattern = r"!\[.*?\]\(.*?\)" + # Match Markdown link syntax + link_pattern = r"\[.*?\]\(.*?\)" + + # First, find all structures that need to be kept intact + images = re.finditer(image_pattern, text) + links = re.finditer(link_pattern, text) + + # Record the start and end positions of all structures that need to be kept intact + protected_ranges = [] + for match in images: + protected_ranges.append((match.start(), match.end())) + for match in links: + protected_ranges.append((match.start(), match.end())) + + # Sort by start position + protected_ranges.sort(key=lambda x: x[0]) + logger.info(f"Found {len(protected_ranges)} protected ranges (images/links)") + + # Remove ranges that are completely within other ranges, keep the largest range + for start, end in protected_ranges[:]: # Create a copy to iterate over + is_inner = False + for start2, end2 in protected_ranges: + if ( + (start > start2 and end < end2) + or (start > start2 and end <= end2) + or (start >= start2 and end < end2) + ): + is_inner = True + break + if is_inner: + protected_ranges.remove((start, end)) + logger.info(f"Removed inner protected range: ({start}, {end})") + + logger.info(f"After cleanup: {len(protected_ranges)} protected ranges remain") + + # Split text, avoiding protected areas + units = [] + last_end = 0 + + for start, end in protected_ranges: + # Add text before protected range + if start <= last_end: + continue + pre_text = text[last_end:start] + if not pre_text.strip(): + continue + logger.info( + f"Processing text segment before protected range, length: {len(pre_text)}" + ) + separator_pattern = ( + f"({'|'.join(re.escape(s) for s in self.separators)})" + ) + segments = re.split(separator_pattern, pre_text) + for unit in segments: + if len(unit) <= self.chunk_size: + units.append(unit) + else: + # Split further by English . + logger.info( + f"Unit exceeds chunk size ({len(unit)}>{self.chunk_size}), splitting further" + ) + separators = ["."] + sep_pattern = ( + f"({'|'.join(re.escape(s) for s in separators)})" + ) + additional_units = re.split(sep_pattern, unit) + units.extend(additional_units) + logger.info( + f"Split into {len(additional_units)} additional units" + ) + + # Add protected range + protected_text = text[start:end] + logger.info( + f"Adding protected range: {protected_text[:30]}..." + if len(protected_text) > 30 + else f"Adding protected range: {protected_text}" + ) + units.append(protected_text) + + last_end = end + + # Add text after the last protected range + if last_end < len(text): + post_text = text[last_end:] + if post_text.strip(): + logger.info( + f"Processing final text segment after all protected ranges, length: {len(post_text)}" + ) + separator_pattern = ( + f"({'|'.join(re.escape(s) for s in self.separators)})" + ) + segments = re.split(separator_pattern, post_text) + for unit in segments: + if len(unit) <= self.chunk_size: + units.append(unit) + else: + # Split further by English . + logger.info( + f"Final unit exceeds chunk size ({len(unit)}>{self.chunk_size}), splitting" + ) + separators = ["."] + sep_pattern = f"({'|'.join(re.escape(s) for s in separators)})" + additional_units = re.split(sep_pattern, unit) + units.extend(additional_units) + logger.info( + f"Split into {len(additional_units)} additional units" + ) + + logger.info(f"Text splitting complete, created {len(units)} basic units") + return units + + def _find_complete_units(self, units: List[str], target_size: int) -> List[str]: + """Find a list of complete units that do not exceed the target size + + Args: + units: List of units + target_size: Target size + + Returns: + List of complete units + """ + logger.info(f"Finding complete units with target size: {target_size}") + result = [] + current_size = 0 + + for unit in units: + unit_size = len(unit) + if current_size + unit_size > target_size and result: + logger.info( + f"Reached target size limit at {current_size} characters, stopping" + ) + break + result.append(unit) + current_size += unit_size + logger.info( + f"Added unit of size {unit_size}, current total: {current_size}/{target_size}" + ) + + logger.info( + f"Found {len(result)} complete units totaling {current_size} characters" + ) + return result + + def chunk_text(self, text: str) -> List[Chunk]: + """Chunk text, preserving Markdown structure + + Args: + text: Text content + + Returns: + List of text chunks + """ + if not text: + logger.warning("Empty text provided for chunking, returning empty list") + return [] + + logger.info(f"Starting text chunking process, text length: {len(text)}") + logger.info( + f"Chunking parameters: size={self.chunk_size}, overlap={self.chunk_overlap}" + ) + + # Split text into basic units + units = self._split_into_units(text) + logger.info(f"Split text into {len(units)} basic units") + + chunks = [] + current_chunk = [] + current_size = 0 + current_start = 0 + + for i, unit in enumerate(units): + unit_size = len(unit) + logger.info(f"Processing unit {i+1}/{len(units)}, size: {unit_size}") + + # If current chunk plus new unit exceeds size limit, create new chunk + if current_size + unit_size > self.chunk_size and current_chunk: + chunk_text = "".join(current_chunk) + chunks.append( + Chunk( + seq=len(chunks), + content=chunk_text, + start=current_start, + end=current_start + len(chunk_text), + ) + ) + logger.info(f"Created chunk {len(chunks)}, size: {len(chunk_text)}") + + # Keep overlap, ensuring structure integrity + if self.chunk_overlap > 0: + # Calculate target overlap size + overlap_target = min(self.chunk_overlap, len(chunk_text)) + logger.info( + f"Calculating overlap with target size: {overlap_target}" + ) + + # Find complete units from the end + overlap_units = [] + overlap_size = 0 + + for u in reversed(current_chunk): + if overlap_size + len(u) > overlap_target: + logger.info( + f"Reached overlap target ({overlap_size}/{overlap_target})" + ) + break + overlap_units.insert(0, u) + overlap_size += len(u) + logger.info( + f"Added unit to overlap, current overlap size: {overlap_size}" + ) + + # Remove elements from overlap that are included in separators + start_index = 0 + for i, u in enumerate(overlap_units): + # Check if u is in separators + all_of_separator = True + for uu in u: + if uu not in self.separators: + all_of_separator = False + break + if all_of_separator: + # Remove the first element + start_index = i + 1 + overlap_size = overlap_size - len(u) + logger.info(f"Removed separator from overlap: '{u}'") + else: + break + + overlap_units = overlap_units[start_index:] + logger.info( + f"Final overlap: {len(overlap_units)} units, {overlap_size} characters" + ) + + current_chunk = overlap_units + current_size = overlap_size + # Update start position, considering overlap + current_start = current_start + len(chunk_text) - overlap_size + else: + logger.info("No overlap configured, starting fresh chunk") + current_chunk = [] + current_size = 0 + current_start = current_start + len(chunk_text) + + current_chunk.append(unit) + current_size += unit_size + logger.info( + f"Added unit to current chunk, now at {current_size}/{self.chunk_size} characters" + ) + + # Add the last chunk + if current_chunk: + chunk_text = "".join(current_chunk) + chunks.append( + Chunk( + seq=len(chunks), + content=chunk_text, + start=current_start, + end=current_start + len(chunk_text), + ) + ) + logger.info(f"Created final chunk {len(chunks)}, size: {len(chunk_text)}") + + logger.info(f"Chunking complete, created {len(chunks)} chunks from text") + return chunks + + def extract_images_from_chunk(self, chunk: Chunk) -> List[Dict[str, str]]: + """Extract image information from a chunk + + Args: + chunk: Document chunk + + Returns: + List of image information, each element contains image URL and match position + """ + logger.info(f"Extracting image information from Chunk #{chunk.seq}") + text = chunk.content + + # Regex to extract image information from text, supporting Markdown images and HTML images + img_pattern = r'!\[([^\]]*)\]\(([^)]+)\)|]*src="([^"]+)" [^>]*>' + + # Extract image information + img_matches = list(re.finditer(img_pattern, text)) + logger.info(f"Chunk #{chunk.seq} found {len(img_matches)} images") + + images_info = [] + for match_idx, match in enumerate(img_matches): + # Process image URL + img_url = match.group(2) if match.group(2) else match.group(3) + alt_text = match.group(1) if match.group(1) else "" + + # Record image information + image_info = { + "original_url": img_url, + "start": match.start(), + "end": match.end(), + "alt_text": alt_text, + "match_text": text[match.start():match.end()] + } + images_info.append(image_info) + + logger.info( + f"Image in Chunk #{chunk.seq} {match_idx+1}: " + f"URL={img_url[:50]}..." + if len(img_url) > 50 + else f"Image in Chunk #{chunk.seq} {match_idx+1}: URL={img_url}" + ) + + return images_info + + async def download_and_upload_image(self, img_url: str, current_request_id=None): + """Download image and upload to COS, if it's already a COS path or local path, use directly + + Args: + img_url: Image URL or local path + current_request_id: Current request ID + + Returns: + tuple: (original URL, COS URL, image object), if failed returns (original URL, None, None) + """ + # Set request ID context in the asynchronous task + try: + if current_request_id: + from utils.request import set_request_id + set_request_id(current_request_id) + logger.info(f"Asynchronous task setting request ID: {current_request_id}") + except Exception as e: + logger.warning(f"Failed to set request ID in asynchronous task: {str(e)}") + + try: + import requests + from PIL import Image + import io + + # Check if it's already a COS path + if "cos" in img_url and "myqcloud.com" in img_url: + logger.info(f"Image already on COS: {img_url}, no need to re-upload") + try: + # Still need to get image object for OCR processing + # Get proxy settings from environment variables + http_proxy = os.environ.get("EXTERNAL_HTTP_PROXY") + https_proxy = os.environ.get("EXTERNAL_HTTPS_PROXY") + proxies = {} + if http_proxy: + proxies["http"] = http_proxy + if https_proxy: + proxies["https"] = https_proxy + + response = requests.get(img_url, timeout=5, proxies=proxies) + if response.status_code == 200: + image = Image.open(io.BytesIO(response.content)) + try: + return img_url, img_url, image + finally: + # Ensure image resources are also released after the function returns + # Image will be closed by the caller + pass + else: + logger.warning(f"Failed to get COS image: {response.status_code}") + return img_url, img_url, None + except Exception as e: + logger.error(f"Error getting COS image: {str(e)}") + return img_url, img_url, None + + # Check if it's a local file path + elif os.path.exists(img_url) and os.path.isfile(img_url): + logger.info(f"Using local image file: {img_url}") + image = None + try: + # Read local image + image = Image.open(img_url) + # Upload to COS + with open(img_url, 'rb') as f: + content = f.read() + cos_url = self.upload_bytes(content) + logger.info(f"Successfully uploaded local image to COS: {cos_url}") + return img_url, cos_url, image + except Exception as e: + logger.error(f"Error processing local image: {str(e)}") + if image and hasattr(image, 'close'): + image.close() + return img_url, None, None + + # Normal remote URL download handling + else: + # Get proxy settings from environment variables + http_proxy = os.environ.get("EXTERNAL_HTTP_PROXY") + https_proxy = os.environ.get("EXTERNAL_HTTPS_PROXY") + proxies = {} + if http_proxy: + proxies["http"] = http_proxy + if https_proxy: + proxies["https"] = https_proxy + + logger.info(f"Downloading image {img_url}, using proxy: {proxies if proxies else 'None'}") + response = requests.get(img_url, timeout=5, proxies=proxies) + + if response.status_code == 200: + # Download successful, create image object + image = Image.open(io.BytesIO(response.content)) + try: + # Upload to COS using the method in BaseParser + cos_url = self.upload_bytes(response.content) + logger.info(f"Successfully uploaded image to COS: {cos_url}") + return img_url, cos_url, image + finally: + # Image will be closed by the caller + pass + else: + logger.warning(f"Failed to download image: {response.status_code}") + return img_url, None, None + + except Exception as e: + logger.error(f"Error downloading or processing image: {str(e)}") + return img_url, None, None + + async def process_chunk_images_async(self, chunk, chunk_idx, total_chunks, current_request_id=None): + """Asynchronously process images in a single Chunk + + Args: + chunk: Chunk object to process + chunk_idx: Chunk index + total_chunks: Total number of chunks + current_request_id: Current request ID + + Returns: + Processed Chunk object + """ + # Set request ID context in the asynchronous task + try: + if current_request_id: + from utils.request import set_request_id + set_request_id(current_request_id) + logger.info(f"Chunk processing task #{chunk_idx+1} setting request ID: {current_request_id}") + except Exception as e: + logger.warning(f"Failed to set request ID in Chunk processing task: {str(e)}") + + logger.info(f"Starting to process images in Chunk #{chunk_idx+1}/{total_chunks}") + + # Extract image information from the Chunk + images_info = self.extract_images_from_chunk(chunk) + if not images_info: + logger.info(f"Chunk #{chunk_idx+1} found no images") + return chunk + + # Prepare images that need to be downloaded and processed + images_to_process = [] + url_to_info_map = {} # Map URL to image information + + # Record all image URLs that need to be processed + for img_info in images_info: + url = img_info["original_url"] + url_to_info_map[url] = img_info + + # Create an asynchronous event loop (current loop) + loop = asyncio.get_event_loop() + + # Concurrent download and upload of images + tasks = [self.download_and_upload_image(url, current_request_id) for url in url_to_info_map.keys()] + results = await asyncio.gather(*tasks) + + # Process download results, prepare for OCR processing + for orig_url, cos_url, image in results: + if cos_url and image: + img_info = url_to_info_map[orig_url] + img_info["cos_url"] = cos_url + images_to_process.append((image, cos_url)) + + # If no images were successfully downloaded and uploaded, return the original Chunk + if not images_to_process: + logger.info(f"Chunk #{chunk_idx+1} found no successfully downloaded and uploaded images") + return chunk + + # Concurrent processing of all images (OCR + caption) + logger.info(f"Processing {len(images_to_process)} images in Chunk #{chunk_idx+1}") + + # Concurrent processing of all images + processed_results = await self.process_multiple_images(images_to_process) + + # Process OCR and Caption results + for ocr_text, caption, img_url in processed_results: + # Find the corresponding original URL + for orig_url, info in url_to_info_map.items(): + if info.get("cos_url") == img_url: + info["ocr_text"] = ocr_text if ocr_text else "" + info["caption"] = caption if caption else "" + + if ocr_text: + logger.info(f"Image OCR extracted {len(ocr_text)} characters: {img_url}") + if caption: + logger.info(f"Obtained image description: '{caption}'") + break + + # Add processed image information to the Chunk + processed_images = [] + for img_info in images_info: + if "cos_url" in img_info: + processed_images.append(img_info) + + # Update image information in the Chunk + chunk.images = processed_images + + logger.info(f"Completed image processing in Chunk #{chunk_idx+1}") + return chunk + + def process_chunks_images(self, chunks: List[Chunk]) -> List[Chunk]: + """Concurrent processing of images in all Chunks + + Args: + chunks: List of document chunks + + Returns: + List of processed document chunks + """ + logger.info(f"Starting concurrent processing of images in all {len(chunks)} chunks") + + if not chunks: + logger.warning("No chunks to process") + return chunks + + # Get current request ID to pass to asynchronous tasks + current_request_id = None + try: + from utils.request import get_request_id + current_request_id = get_request_id() + logger.info(f"Capturing current request ID before async processing: {current_request_id}") + except Exception as e: + logger.warning(f"Failed to get current request ID: {str(e)}") + + # Create and run all Chunk concurrent processing tasks + async def process_all_chunks(): + # Set max concurrency, reduce concurrency to avoid resource contention + max_concurrency = min(self.max_concurrent_tasks, 5) # Reduce concurrency + # Use semaphore to limit concurrency + semaphore = asyncio.Semaphore(max_concurrency) + + async def process_with_limit(chunk, idx, total): + """Use semaphore to control concurrent processing of Chunks""" + async with semaphore: + return await self.process_chunk_images_async(chunk, idx, total, current_request_id) + + # Create tasks for all Chunks + tasks = [ + process_with_limit(chunk, idx, len(chunks)) + for idx, chunk in enumerate(chunks) + ] + + # Execute all tasks concurrently + results = await asyncio.gather(*tasks, return_exceptions=True) + + # Handle possible exceptions + processed_chunks = [] + for i, result in enumerate(results): + if isinstance(result, Exception): + logger.error(f"Error processing Chunk {i+1}: {str(result)}") + # Keep original Chunk + if i < len(chunks): + processed_chunks.append(chunks[i]) + else: + processed_chunks.append(result) + + return processed_chunks + + # Create event loop and run all tasks + try: + # Check if event loop already exists + try: + loop = asyncio.get_event_loop() + if loop.is_closed(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + except RuntimeError: + # If no event loop, create a new one + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + # Execute processing for all Chunks + processed_chunks = loop.run_until_complete(process_all_chunks()) + logger.info(f"Successfully completed concurrent processing of {len(processed_chunks)}/{len(chunks)} chunks") + + return processed_chunks + except Exception as e: + logger.error(f"Error during concurrent chunk processing: {str(e)}") + return chunks diff --git a/services/docreader/src/parser/caption.py b/services/docreader/src/parser/caption.py new file mode 100644 index 0000000..851ef8b --- /dev/null +++ b/services/docreader/src/parser/caption.py @@ -0,0 +1,271 @@ +import json +import logging +import os +from dataclasses import dataclass, field +from typing import List, Optional, Union + +import requests + +logger = logging.getLogger(__name__) + + +@dataclass +class ImageUrl: + """Image URL data structure for caption requests.""" + + url: Optional[str] = None + detail: Optional[str] = None + + +@dataclass +class Content: + """Content data structure that can contain text or image URL.""" + + type: Optional[str] = None + text: Optional[str] = None + image_url: Optional[ImageUrl] = None + + +@dataclass +class SystemMessage: + """System message for VLM model requests.""" + + role: Optional[str] = None + content: Optional[str] = None + + +@dataclass +class UserMessage: + """User message for VLM model requests, can contain multiple content items.""" + + role: Optional[str] = None + content: List[Content] = field(default_factory=list) + + +@dataclass +class CompletionRequest: + """Request structure for VLM model completion API.""" + + model: str + temperature: float + top_p: float + messages: List[Union[SystemMessage, UserMessage]] + user: str + + +@dataclass +class Model: + """Model identifier structure.""" + + id: str + + +@dataclass +class ModelsResp: + """Response structure for available models API.""" + + data: List[Model] = field(default_factory=list) + + +@dataclass +class Message: + """Message structure in API response.""" + + role: Optional[str] = None + content: Optional[str] = None + tool_calls: Optional[str] = None + + +@dataclass +class Choice: + """Choice structure in API response.""" + + message: Optional[Message] = None + + +@dataclass +class Usage: + """Token usage information in API response.""" + + prompt_tokens: Optional[int] = 0 + total_tokens: Optional[int] = 0 + completion_tokens: Optional[int] = 0 + + +@dataclass +class CaptionChatResp: + """Response structure for caption chat API.""" + + id: Optional[str] = None + created: Optional[int] = None + model: Optional[Model] = None + object: Optional[str] = None + choices: List[Choice] = field(default_factory=list) + usage: Optional[Usage] = None + + @staticmethod + def from_json(json_data: dict) -> "CaptionChatResp": + """ + Parse API response JSON into a CaptionChatResp object. + + Args: + json_data: The JSON response from the API + + Returns: + A parsed CaptionChatResp object + """ + logger.info("Parsing CaptionChatResp from JSON") + # Manually parse nested fields + choices = [ + Choice(message=Message(**choice["message"])) + for choice in json_data.get("choices", []) + ] + usage = Usage(**json_data["usage"]) if "usage" in json_data else None + logger.info( + f"Parsed {len(choices)} choices and usage data: {usage is not None}" + ) + return CaptionChatResp( + id=json_data.get("id"), + created=json_data.get("created"), + model=json_data.get("model"), + object=json_data.get("object"), + choices=choices, + usage=usage, + ) + + def choice_data(self) -> str: + """ + Extract the content from the first choice in the response. + + Returns: + The content string from the first choice, or empty string if no choices + """ + if self.choices: + logger.info("Retrieving content from first choice") + return self.choices[0].message.content + logger.warning("No choices available in response") + return "" + + +class Caption: + """ + Service for generating captions for images using a Vision Language Model. + Uses an external API to process images and return textual descriptions. + """ + + def __init__(self): + """Initialize the Caption service with configuration from environment variables.""" + logger.info("Initializing Caption service") + self.prompt = """简单凝炼的描述图片的主要内容""" + if os.getenv("VLM_MODEL_BASE_URL") == "" or os.getenv("VLM_MODEL_NAME") == "": + logger.error("VLM_MODEL_BASE_URL or VLM_MODEL_NAME is not set") + return + self.completion_url = os.getenv("VLM_MODEL_BASE_URL") + "/v1/chat/completions" + self.model = os.getenv("VLM_MODEL_NAME") + logger.info( + f"Service configured with model: {self.model}, endpoint: {self.completion_url}" + ) + + def _call_caption_api(self, image_url: str) -> Optional[CaptionChatResp]: + """ + Call the Caption API to generate a description for the given image. + + Args: + image_url: URL of the image to be captioned + + Returns: + CaptionChatResp object if successful, None otherwise + """ + logger.info(f"Calling Caption API for image captioning") + logger.info(f"Processing image from URL: {image_url}") + + user_msg = UserMessage( + role="user", + content=[ + Content(type="text", text=self.prompt), + Content(type="image_url", image_url=ImageUrl(url=image_url)), + ], + ) + + gpt_req = CompletionRequest( + model=self.model, + temperature=0.3, + top_p=0.8, + messages=[user_msg], + user="abc", + ) + + headers = { + "Content-Type": "application/json", + "Accept": "text/event-stream", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + } + + try: + logger.info(f"Sending request to Caption API with model: {self.model}") + response = requests.post( + self.completion_url, + data=json.dumps(gpt_req, default=lambda o: o.__dict__, indent=4), + headers=headers, + timeout=30, + ) + if response.status_code != 200: + logger.error( + f"Caption API returned non-200 status code: {response.status_code}" + ) + response.raise_for_status() + + logger.info( + f"Successfully received response from Caption API with status: {response.status_code}" + ) + logger.info(f"Converting response to CaptionChatResp object") + caption_resp = CaptionChatResp.from_json(response.json()) + + if caption_resp.usage: + logger.info( + f"API usage: prompt_tokens={caption_resp.usage.prompt_tokens}, " + f"completion_tokens={caption_resp.usage.completion_tokens}" + ) + + return caption_resp + except requests.exceptions.Timeout: + logger.error(f"Timeout while calling Caption API after 30 seconds") + return None + except requests.exceptions.RequestException as e: + logger.error(f"Request error calling Caption API: {e}") + return None + except Exception as e: + logger.error(f"Error calling Caption API: {str(e)}") + logger.info( + f"Request details: model={self.model}, URL={self.completion_url}" + ) + return None + + def get_caption(self, image_url: str) -> str: + """ + Get a caption for the provided image URL. + + Args: + image_url: URL of the image to be captioned + + Returns: + Caption text as string, or empty string if captioning failed + """ + logger.info("Getting caption for image") + if not image_url or self.completion_url is None: + logger.error("Image URL is not set") + return "" + caption_resp = self._call_caption_api(image_url) + if caption_resp: + caption = caption_resp.choice_data() + caption_length = len(caption) + logger.info(f"Successfully generated caption of length {caption_length}") + logger.info( + f"Caption: {caption[:50]}..." + if caption_length > 50 + else f"Caption: {caption}" + ) + return caption + logger.warning("Failed to get caption from Caption API") + return "" diff --git a/services/docreader/src/parser/doc_parser.py b/services/docreader/src/parser/doc_parser.py new file mode 100644 index 0000000..8b36664 --- /dev/null +++ b/services/docreader/src/parser/doc_parser.py @@ -0,0 +1,314 @@ +import asyncio +import logging +import re +import tempfile +import os +import subprocess +import shutil +from io import BytesIO +from typing import Optional, List, Tuple +import textract +from PIL import Image +import zipfile +import xml.etree.ElementTree as ET + +from .base_parser import BaseParser +from .docx_parser import DocxParser, Docx + +logger = logging.getLogger(__name__) + + +class DocParser(BaseParser): + """DOC document parser""" + + def parse_into_text(self, content: bytes) -> str: + """Parse DOC document + + Args: + content: DOC document content + + Returns: + Parse result + """ + logger.info(f"Parsing DOC document, content size: {len(content)} bytes") + + # Save byte content as a temporary file + with tempfile.NamedTemporaryFile(suffix=".doc", delete=False) as temp_file: + temp_file_path = temp_file.name + temp_file.write(content) + temp_file.flush() + logger.info(f"Saved DOC content to temporary file: {temp_file_path}") + + try: + # First try to convert to docx format to extract images + if self.enable_multimodal: + logger.info("Multimodal enabled, attempting to extract images from DOC") + docx_content = self._convert_doc_to_docx(temp_file_path) + + if docx_content: + logger.info("Successfully converted DOC to DOCX, using DocxParser") + # Use existing DocxParser to parse the converted docx + docx_parser = DocxParser( + file_name=self.file_name, + file_type="docx", + enable_multimodal=self.enable_multimodal, + chunk_size=self.chunk_size, + chunk_overlap=self.chunk_overlap, + separators=self.separators, + ) + text = docx_parser.parse_into_text(docx_content) + logger.info(f"Extracted {len(text)} characters using DocxParser") + + # Clean up temporary file + os.unlink(temp_file_path) + logger.info(f"Deleted temporary file: {temp_file_path}") + + return text + else: + logger.warning( + "Failed to convert DOC to DOCX, falling back to text-only extraction" + ) + + # If image extraction is not needed or conversion failed, try using antiword to extract text + try: + logger.info("Attempting to parse DOC file with antiword") + # Check if antiword is installed + antiword_path = self._find_antiword_path() + + if antiword_path: + # Use antiword to extract text directly + logger.info(f"Using antiword at {antiword_path} to extract text") + process = subprocess.Popen( + [antiword_path, temp_file_path], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + stdout, stderr = process.communicate() + + if process.returncode == 0: + text = stdout.decode("utf-8", errors="ignore") + logger.info( + f"Successfully extracted {len(text)} characters using antiword" + ) + + # Clean up temporary file + os.unlink(temp_file_path) + logger.info(f"Deleted temporary file: {temp_file_path}") + + return text + else: + logger.warning( + f"antiword extraction failed: {stderr.decode('utf-8', errors='ignore')}" + ) + else: + logger.warning("antiword not found, falling back to textract") + except Exception as e: + logger.warning( + f"Error using antiword: {str(e)}, falling back to textract" + ) + + # If antiword fails, try using textract + logger.info("Parsing DOC file with textract") + text = textract.process(temp_file_path, method="antiword").decode("utf-8") + logger.info( + f"Successfully extracted {len(text)} characters of text from DOC document using textract" + ) + + # Clean up temporary file + os.unlink(temp_file_path) + logger.info(f"Deleted temporary file: {temp_file_path}") + + return text + except Exception as e: + logger.error(f"Error parsing DOC document: {str(e)}") + # Ensure temporary file is cleaned up + if os.path.exists(temp_file_path): + os.unlink(temp_file_path) + logger.info(f"Deleted temporary file after error: {temp_file_path}") + return "" + + def _convert_doc_to_docx(self, doc_path: str) -> Optional[bytes]: + """Convert DOC file to DOCX format + + Uses LibreOffice/OpenOffice for conversion + + Args: + doc_path: DOC file path + + Returns: + Byte stream of DOCX file content, or None if conversion fails + """ + logger.info(f"Converting DOC to DOCX: {doc_path}") + + # Create a temporary directory to store the converted file + temp_dir = tempfile.mkdtemp() + docx_path = os.path.join(temp_dir, "converted.docx") + + try: + # Check if LibreOffice or OpenOffice is installed + soffice_path = self._find_soffice_path() + if not soffice_path: + logger.error( + "LibreOffice/OpenOffice not found, cannot convert DOC to DOCX" + ) + return None + + # Execute conversion command + logger.info(f"Using {soffice_path} to convert DOC to DOCX") + cmd = [ + soffice_path, + "--headless", + "--convert-to", + "docx", + "--outdir", + temp_dir, + doc_path, + ] + + logger.info(f"Running command: {' '.join(cmd)}") + process = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + stdout, stderr = process.communicate() + + if process.returncode != 0: + logger.error( + f"Error converting DOC to DOCX: {stderr.decode('utf-8', errors='ignore')}" + ) + return None + + # Find the converted file + for file in os.listdir(temp_dir): + if file.endswith(".docx"): + converted_file = os.path.join(temp_dir, file) + logger.info(f"Found converted file: {converted_file}") + + # Read the converted file content + with open(converted_file, "rb") as f: + docx_content = f.read() + + logger.info( + f"Successfully read converted DOCX file, size: {len(docx_content)} bytes" + ) + return docx_content + + logger.error("No DOCX file found after conversion") + return None + + except Exception as e: + logger.error(f"Error during DOC to DOCX conversion: {str(e)}") + return None + finally: + # Clean up temporary directory + try: + shutil.rmtree(temp_dir) + logger.info(f"Cleaned up temporary directory: {temp_dir}") + except Exception as e: + logger.warning(f"Failed to clean up temporary directory: {str(e)}") + + def _find_soffice_path(self) -> Optional[str]: + """Find LibreOffice/OpenOffice executable path + + Returns: + Executable path, or None if not found + """ + # Common LibreOffice/OpenOffice executable paths + possible_paths = [ + # Linux + "/usr/bin/soffice", + "/usr/lib/libreoffice/program/soffice", + "/opt/libreoffice25.2/program/soffice", + # macOS + "/Applications/LibreOffice.app/Contents/MacOS/soffice", + # Windows + "C:\\Program Files\\LibreOffice\\program\\soffice.exe", + "C:\\Program Files (x86)\\LibreOffice\\program\\soffice.exe", + ] + + # Check if path is set in environment variable + if os.environ.get("LIBREOFFICE_PATH"): + possible_paths.insert(0, os.environ.get("LIBREOFFICE_PATH")) + + for path in possible_paths: + if os.path.exists(path): + logger.info(f"Found LibreOffice/OpenOffice at: {path}") + return path + + # Try to find in PATH + try: + result = subprocess.run( + ["which", "soffice"], capture_output=True, text=True + ) + if result.returncode == 0 and result.stdout.strip(): + path = result.stdout.strip() + logger.info(f"Found LibreOffice/OpenOffice in PATH: {path}") + return path + except Exception: + pass + + logger.warning("LibreOffice/OpenOffice not found") + return None + + def _find_antiword_path(self) -> Optional[str]: + """Find antiword executable path + + Returns: + Executable path, or None if not found + """ + # Common antiword executable paths + possible_paths = [ + # Linux/macOS + "/usr/bin/antiword", + "/usr/local/bin/antiword", + # Windows + "C:\\Program Files\\Antiword\\antiword.exe", + "C:\\Program Files (x86)\\Antiword\\antiword.exe", + ] + + # Check if path is set in environment variable + if os.environ.get("ANTIWORD_PATH"): + possible_paths.insert(0, os.environ.get("ANTIWORD_PATH")) + + for path in possible_paths: + if os.path.exists(path): + logger.info(f"Found antiword at: {path}") + return path + + # Try to find in PATH + try: + result = subprocess.run( + ["which", "antiword"], capture_output=True, text=True + ) + if result.returncode == 0 and result.stdout.strip(): + path = result.stdout.strip() + logger.info(f"Found antiword in PATH: {path}") + return path + except Exception: + pass + + logger.warning("antiword not found") + return None + + +if __name__ == "__main__": + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + logger.info("Running DocParser in standalone mode") + + file_name = "/path/to/your/test.doc" + logger.info(f"Processing file: {file_name}") + + doc_parser = DocParser( + file_name, enable_multimodal=True, chunk_size=512, chunk_overlap=60 + ) + logger.info("Parser initialized, starting processing") + + with open(file_name, "rb") as f: + content = f.read() + + text = doc_parser.parse_into_text(content) + logger.info(f"Processing complete, extracted text length: {len(text)}") + logger.info(f"Sample text: {text[:200]}...") diff --git a/services/docreader/src/parser/docx_parser.py b/services/docreader/src/parser/docx_parser.py new file mode 100644 index 0000000..c58eda9 --- /dev/null +++ b/services/docreader/src/parser/docx_parser.py @@ -0,0 +1,1585 @@ +import logging +import tempfile +import os +import sys +import time +from io import BytesIO +from typing import Optional +from PIL import Image +from docx import Document +from docx.image.exceptions import ( + UnrecognizedImageError, + UnexpectedEndOfFileError, + InvalidImageStreamError, +) +from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed +import threading +import traceback +from multiprocessing import Manager +import re + +from .base_parser import BaseParser + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +# Add thread local storage to track the processing status of each thread +thread_local = threading.local() + + +class DocxParser(BaseParser): + """DOCX document parser""" + + def __init__( + self, + file_name: str = "", + file_type: str = None, + enable_multimodal: bool = True, + chunk_size: int = 1000, + chunk_overlap: int = 200, + separators: list = ["\n\n", "\n", "。"], + ocr_backend: str = "paddle", + ocr_config: dict = None, + max_image_size: int = 1920, + max_concurrent_tasks: int = 5, + max_pages: int = 100, # Maximum number of pages to process, default to 50 pages + ): + """Initialize DOCX document parser + + Args: + file_name: File name + file_type: File type, if None, infer from file name + enable_multimodal: Whether to enable multimodal processing + chunk_size: Chunk size + chunk_overlap: Chunk overlap + separators: List of separators + ocr_backend: OCR engine type + ocr_config: OCR engine configuration + max_image_size: Maximum image size limit + max_concurrent_tasks: Maximum number of concurrent tasks + max_pages: Maximum number of pages to process, if more than this, only process the first max_pages pages + """ + super().__init__( + file_name=file_name, + file_type=file_type, + enable_multimodal=enable_multimodal, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + separators=separators, + ocr_backend=ocr_backend, + ocr_config=ocr_config, + max_image_size=max_image_size, + max_concurrent_tasks=max_concurrent_tasks, + ) + self.max_pages = max_pages + logger.info(f"DocxParser initialized with max_pages={max_pages}") + + def parse_into_text(self, content: bytes) -> str: + """Parse DOCX document, only extract text content and image Markdown links + + Args: + content: DOCX document content + + Returns: + Parsed text + """ + logger.info(f"Parsing DOCX document, content size: {len(content)} bytes") + logger.info(f"Max pages limit set to: {self.max_pages}") + logger.info("Converting DOCX content to sections and tables") + + start_time = time.time() + # Use concurrent processing to handle the document + max_workers = min(4, os.cpu_count() or 2) # Reduce thread count to avoid excessive memory consumption + logger.info(f"Setting max_workers to {max_workers} for document processing") + + try: + logger.info(f"Starting Docx processing with max_pages={self.max_pages}") + docx_processor = Docx(max_image_size=self.max_image_size, + enable_multimodal=self.enable_multimodal, + upload_file=self.upload_file) + all_lines, tables = docx_processor( + binary=content, + max_workers=max_workers, + to_page=self.max_pages, + ) + processing_time = time.time() - start_time + logger.info( + f"Docx processing completed in {processing_time:.2f}s, " + f"extracted {len(all_lines)} sections and {len(tables)} tables" + ) + + # Add debug log to print the structure of all_lines + if all_lines: + logger.info(f"all_lines type: {type(all_lines)}") + if len(all_lines) > 0: + first_line = all_lines[0] + logger.info(f"First line type: {type(first_line)}, length: {len(first_line)}") + logger.info(f"First line content types: {[type(item) for item in first_line]}") + if len(all_lines) > 1: + second_line = all_lines[1] + logger.info(f"Second line type: {type(second_line)}, length: {len(second_line)}") + logger.info(f"Second line content types: {[type(item) for item in second_line]}") + else: + logger.warning("all_lines is empty") + + logger.info("Processing document sections") + section_start_time = time.time() + + text_parts = [] + + for sec_idx, line in enumerate(all_lines): + try: + text = None + if isinstance(line, (tuple, list)) and len(line) >= 1: + text = line[0] + + if text: + text_parts.append(text) + if sec_idx < 3 or sec_idx % 50 == 0: + logger.info( + f"Added section {sec_idx+1} text: {text[:50]}..." + if len(text) > 50 + else f"Added section {sec_idx+1} text: {text}" + ) + + except Exception as e: + logger.error(f"Error processing section {sec_idx+1}: {str(e)}") + logger.error(f"Detailed stack trace: {traceback.format_exc()}") + continue + + # Combine text + section_processing_time = time.time() - section_start_time + logger.info(f"Section processing completed in {section_processing_time:.2f}s") + logger.info("Combining all text parts") + text = "\n\n".join([part for part in text_parts if part]) + + # Check if the generated text is empty + if not text: + logger.warning("Generated text is empty, trying alternative method") + return self._parse_using_simple_method(content) + + total_processing_time = time.time() - start_time + logger.info( + f"Parsing complete in {total_processing_time:.2f}s, generated {len(text)} characters of text " + ) + + return text + except Exception as e: + logger.error(f"Error parsing DOCX document: {str(e)}") + logger.error(f"Detailed stack trace: {traceback.format_exc()}") + return self._parse_using_simple_method(content) + + def _parse_using_simple_method(self, content: bytes) -> str: + """Parse document using a simplified method, as a fallback + + Args: + content: Document content + + Returns: + Parsed text + """ + logger.info("Attempting to parse document using simplified method") + start_time = time.time() + try: + doc = Document(BytesIO(content)) + logger.info( + f"Successfully loaded document in simplified method, " + f"contains {len(doc.paragraphs)} paragraphs and {len(doc.tables)} tables" + ) + text_parts = [] + + # Extract paragraph text + para_count = len(doc.paragraphs) + logger.info(f"Extracting text from {para_count} paragraphs") + para_with_text = 0 + for i, para in enumerate(doc.paragraphs): + if i % 100 == 0: + logger.info(f"Processing paragraph {i+1}/{para_count}") + if para.text.strip(): + text_parts.append(para.text.strip()) + para_with_text += 1 + + logger.info(f"Extracted text from {para_with_text}/{para_count} paragraphs") + + # Extract table text + table_count = len(doc.tables) + logger.info(f"Extracting text from {table_count} tables") + tables_with_content = 0 + rows_processed = 0 + for i, table in enumerate(doc.tables): + if i % 10 == 0: + logger.info(f"Processing table {i+1}/{table_count}") + + table_has_content = False + for row in table.rows: + rows_processed += 1 + row_text = ' | '.join([cell.text.strip() for cell in row.cells if cell.text.strip()]) + if row_text: + text_parts.append(row_text) + table_has_content = True + + if table_has_content: + tables_with_content += 1 + + logger.info( + f"Extracted content from {tables_with_content}/{table_count} tables, " + f"processed {rows_processed} rows" + ) + + # Combine text + result_text = "\n\n".join(text_parts) + processing_time = time.time() - start_time + logger.info( + f"Simplified parsing complete in {processing_time:.2f}s, " + f"generated {len(result_text)} characters of text" + ) + + # If the result is still empty, return an error message + if not result_text: + logger.warning("No text extracted using simplified method") + return "" + + return result_text + except Exception as backup_error: + processing_time = time.time() - start_time + logger.error(f"Simplified parsing failed after {processing_time:.2f}s: {str(backup_error)}") + logger.error(f"Detailed traceback: {traceback.format_exc()}") + return "" + +class Docx: + def __init__(self, max_image_size=1920, enable_multimodal=False, upload_file=None): + logger.info("Initializing DOCX processor") + self.max_image_size = max_image_size # Maximum image size limit + self.picture_cache = {} # Image cache to avoid processing the same image repeatedly + self.enable_multimodal = enable_multimodal + self.upload_file = upload_file + + def get_picture(self, document, paragraph) -> Optional[Image.Image]: + logger.info("Extracting image from paragraph") + img = paragraph._element.xpath(".//pic:pic") + if not img: + logger.info("No image found in paragraph") + return None + img = img[0] + try: + embed = img.xpath(".//a:blip/@r:embed")[0] + related_part = document.part.related_parts[embed] + logger.info(f"Found embedded image with ID: {embed}") + + try: + image_blob = related_part.image.blob + except UnrecognizedImageError: + logger.warning("Unrecognized image format. Skipping image.") + return None + except UnexpectedEndOfFileError: + logger.warning( + "EOF was unexpectedly encountered while reading an image stream. Skipping image." + ) + return None + except InvalidImageStreamError: + logger.warning( + "The recognized image stream appears to be corrupted. Skipping image." + ) + return None + + try: + logger.info("Converting image blob to PIL Image") + image = Image.open(BytesIO(image_blob)).convert("RGBA") + logger.info( + f"Successfully extracted image, size: {image.width}x{image.height}" + ) + return image + except Exception as e: + logger.error(f"Failed to open image: {str(e)}") + return None + except Exception as e: + logger.error(f"Error extracting image: {str(e)}") + return None + + def __clean(self, line): + logger.info(f"Cleaning text line of length: {len(line)}") + line = re.sub(r"\u3000", " ", line).strip() + return line + + def _identify_page_paragraph_mapping(self, max_page=100000): + """Identify the paragraph range included on each page + + Args: + max_page: Maximum number of pages to process + + Returns: + dict: Mapping of page numbers to lists of paragraph indices + """ + start_time = time.time() + logger.info(f"Identifying page to paragraph mapping (max_page={max_page})") + page_to_paragraphs = {} + current_page = 0 + + # Initialize page 0 + page_to_paragraphs[current_page] = [] + + # Record the total number of paragraphs processed + total_paragraphs = len(self.doc.paragraphs) + logger.info(f"Total paragraphs to map: {total_paragraphs}") + + # Heuristic method: estimate the number of paragraphs per page + # For large documents, using a heuristic can reduce XML parsing overhead + if total_paragraphs > 1000: + logger.info("Large document detected, using heuristic paragraph mapping") + estimated_paras_per_page = 25 # Estimate approximately 25 paragraphs per page + + # Create an estimated page mapping + for p_idx in range(total_paragraphs): + est_page = p_idx // estimated_paras_per_page + if est_page > max_page: + logger.info(f"Reached max page limit ({max_page}) at paragraph {p_idx}, stopping paragraph mapping") + break + + if est_page not in page_to_paragraphs: + page_to_paragraphs[est_page] = [] + + page_to_paragraphs[est_page].append(p_idx) + + if p_idx > 0 and p_idx % 1000 == 0: + logger.info(f"Heuristic mapping: processed {p_idx}/{total_paragraphs} paragraphs") + + mapping_time = time.time() - start_time + logger.info(f"Created heuristic mapping with {len(page_to_paragraphs)} pages in {mapping_time:.2f}s") + return page_to_paragraphs + + # Standard method: iterate through all paragraphs to find page breaks + logger.info("Using standard paragraph mapping method") + page_breaks_found = 0 + for p_idx, p in enumerate(self.doc.paragraphs): + # Add the current paragraph to the current page + page_to_paragraphs[current_page].append(p_idx) + + # Log every 100 paragraphs + if p_idx > 0 and p_idx % 100 == 0: + logger.info(f"Processed {p_idx}/{total_paragraphs} paragraphs in page mapping") + + # Check for page breaks + page_break_found = False + + # Method 1: Check for lastRenderedPageBreak + for run in p.runs: + if "lastRenderedPageBreak" in run._element.xml: + page_break_found = True + break + + if "w:br" in run._element.xml and 'type="page"' in run._element.xml: + page_break_found = True + break + + # Method 2: Check sectPr element (section break, usually indicates a new page) + if not page_break_found and p._element.xpath(".//w:sectPr"): + page_break_found = True + + # If a page break is found, create a new page + if page_break_found: + page_breaks_found += 1 + current_page += 1 + if current_page > max_page: + logger.info(f"Reached max page limit ({max_page}), stopping page mapping") + break + + # Initialize the paragraph list for the new page + if current_page not in page_to_paragraphs: + page_to_paragraphs[current_page] = [] + + if page_breaks_found % 10 == 0: + logger.info(f"Found {page_breaks_found} page breaks so far, current page: {current_page}") + + # Handle potential empty page mappings + empty_pages = [page for page, paras in page_to_paragraphs.items() if not paras] + if empty_pages: + logger.info(f"Removing {len(empty_pages)} empty pages from mapping") + for page in empty_pages: + del page_to_paragraphs[page] + + mapping_time = time.time() - start_time + logger.info(f"Created paragraph mapping with {len(page_to_paragraphs)} pages in {mapping_time:.2f}s") + + # Check the validity of the result + if not page_to_paragraphs: + logger.warning("No valid page mapping created, using fallback method") + # All paragraphs are on page 0 + page_to_paragraphs[0] = list(range(total_paragraphs)) + + # Log page distribution statistics + page_sizes = [len(paragraphs) for paragraphs in page_to_paragraphs.values()] + if page_sizes: + avg_paragraphs = sum(page_sizes) / len(page_sizes) + min_paragraphs = min(page_sizes) + max_paragraphs = max(page_sizes) + logger.info( + f"Page statistics: avg={avg_paragraphs:.1f}, " + f"min={min_paragraphs}, max={max_paragraphs} paragraphs per page" + ) + + return page_to_paragraphs + + + def _process_page(self, page_num, paragraphs, from_page, to_page): + """Process the content of one page + + Args: + page_num: Page number + paragraphs: List of paragraph indices + from_page: Starting page number + to_page: Ending page number + + Returns: + list: List of processed result lines + """ + logger.info(f"Processing page {page_num} with {len(paragraphs)} paragraphs") + + # Initialize thread local storage + if not hasattr(thread_local, 'last_text'): + thread_local.last_text = "" + thread_local.last_image = None + + # Track content in sequence + content_sequence = [] + current_text = "" + current_images = [] + + for para_idx in paragraphs: + if para_idx >= len(self.doc.paragraphs): + logger.warning(f"Paragraph index {para_idx} out of range") + continue + + paragraph = self.doc.paragraphs[para_idx] + + # Get paragraph text + text = paragraph.text.strip() + if text: + cleaned_text = self.__clean(text) + + # If we have pending images, add text after them + if current_images: + # Add all accumulated images to the sequence + for img in current_images: + content_sequence.append(("image", img)) + current_images = [] + + # Accumulate text + current_text += cleaned_text + "\n" + + # Attempt to extract image + image = self.get_picture(self.doc, paragraph) + if image: + # If we have text, add it to sequence first + if current_text: + content_sequence.append(("text", current_text)) + current_text = "" + + # Add image directly to sequence to maintain order + content_sequence.append(("image", image)) + + # Add any remaining text + if current_text: + content_sequence.append(("text", current_text)) + + # Add any remaining images + for img in current_images: + content_sequence.append(("image", img)) + + # Extract text and images in their original sequence for compatibility + text_parts = [] + images = [] + + for content_type, content in content_sequence: + if content_type == "text": + text_parts.append(content) + else: # image + images.append(content) + + combined_text = "\n\n".join(text_parts) if text_parts else "" + + # After processing all paragraphs, add to results + line_data = (combined_text, images, "", page_num, content_sequence) + + # Add to page-specific results + page_lines = [line_data] + + # Add to global results + with self.lines_lock: + self.all_lines.append(line_data) + + # Update thread local storage + thread_local.last_text = combined_text + thread_local.last_image = images[-1] if images else None + + # Set last_image for this page + if images: + with self.last_image_lock: + self.last_image_map[page_num] = images[-1] + + return page_lines + + def __call__(self, binary=None, from_page=0, to_page=100000, max_workers=None): + """ + Process DOCX document, supporting concurrent processing of each page + + Args: + binary: DOCX document binary content + from_page: Starting page number + to_page: Ending page number + max_workers: Maximum number of workers, default to None (system decides) + + Returns: + tuple: (List of text lines, List of tables) + """ + logger.info("Processing DOCX document") + + # Check CPU core count to determine parallel strategy + cpu_count = os.cpu_count() or 2 + logger.info(f"System has {cpu_count} CPU cores available") + + # Load document + self.doc = self._load_document(binary) + if not self.doc: + return [], [] + + # Identify page structure + self.para_page_mapping = self._identify_page_paragraph_mapping(to_page) + logger.info(f"Identified page to paragraph mapping for {len(self.para_page_mapping)} pages") + + # Apply page limits + pages_to_process = self._apply_page_limit(self.para_page_mapping, from_page, to_page) + if not pages_to_process: + logger.warning("No pages to process after applying page limits!") + return [], [] + + # Initialize shared resources + self._init_shared_resources() + + # Get current request ID + current_request_id = self._get_request_id() + + # Process document content + doc_size_mb = sys.getsizeof(binary) / (1024 * 1024) + logger.info(f"Document binary size: {doc_size_mb:.2f} MB") + + # Decide on processing strategy + if len(self.doc.paragraphs) < 30: + # For small documents, use single-threaded processing + self._process_small_document(pages_to_process, from_page, to_page) + else: + # For large documents, use multiprocessing + self._process_large_document(binary, + pages_to_process, + from_page, + to_page, + max_workers, + doc_size_mb, + current_request_id, + ) + + # Process tables + tbls = self._process_tables() + + # Clean up document resources + self.doc = None + self.last_image_map = None + + logger.info(f"Document processing complete, " + f"extracted {len(self.all_lines)} text sections and {len(tbls)} tables") + return self.all_lines, tbls + + def _load_document(self, binary): + """Load document + + Args: + binary: Document binary content + + Returns: + Document: Document object, or None (if loading fails) + """ + try: + doc = Document(BytesIO(binary)) + logger.info("Successfully loaded document from binary content") + return doc + except Exception as e: + logger.error(f"Failed to load DOCX document: {str(e)}") + return None + + def _init_shared_resources(self): + """Initialize shared resources""" + # Create shared resource locks to protect data structures shared between threads + self.lines_lock = threading.Lock() + self.last_image_lock = threading.Lock() + + # Initialize result containers + self.all_lines = [] + self.last_image_map = {} # Last image for each page + + def _get_request_id(self): + """Get current request ID""" + current_request_id = None + try: + from utils.request import get_request_id + current_request_id = get_request_id() + logger.info(f"Getting current request ID: {current_request_id} to pass to processing threads") + except Exception as e: + logger.warning(f"Failed to get current request ID: {str(e)}") + return current_request_id + + def _apply_page_limit(self, para_page_mapping, from_page, to_page): + """Apply page limits, return the list of pages to process + + Args: + para_page_mapping: Mapping of pages to paragraphs + from_page: Starting page number + to_page: Ending page number + + Returns: + list: List of pages to process + """ + # Add page limits + total_pages = len(para_page_mapping) + if total_pages > to_page: + logger.info(f"Document has {total_pages} pages, limiting processing to first {to_page} pages") + logger.info(f"Setting to_page limit to {to_page}") + else: + logger.info(f"Document has {total_pages} pages, processing all pages (limit: {to_page})") + + # Filter out pages outside the range + all_pages = sorted(para_page_mapping.keys()) + pages_to_process = [p for p in all_pages if from_page <= p < to_page] + + # Output the actual number of pages processed for debugging + if pages_to_process: + logger.info( + f"Will process {len(pages_to_process)} pages " + f"from page {from_page} to page {min(to_page, pages_to_process[-1] if pages_to_process else from_page)}" + ) + + if len(pages_to_process) < len(all_pages): + logger.info(f"Skipping {len(all_pages) - len(pages_to_process)} pages due to page limit") + + # Log detailed page index information + if len(pages_to_process) <= 10: + logger.info(f"Pages to process: {pages_to_process}") + else: + logger.info(f"First 5 pages to process: {pages_to_process[:5]}, last 5: {pages_to_process[-5:]}") + + return pages_to_process + + def _process_small_document(self, pages_to_process, from_page, to_page): + """Process small documents (less than 30 paragraphs) + + Args: + pages_to_process: List of pages to process + from_page: Starting page number + to_page: Ending page number + """ + logger.info(f"Small document detected ({len(self.doc.paragraphs)} paragraphs), " + f"processing without threading") + for page_num in sorted(pages_to_process): + if from_page <= page_num < to_page: + logger.info(f"Processing page {page_num} (single-threaded mode)") + self._process_page(page_num, self.para_page_mapping[page_num], from_page, to_page) + logger.info(f"Completed processing page {page_num} (single-threaded mode)") + else: + logger.info(f"Skipping page {page_num} (out of requested range: {from_page}-{to_page})") + + def _process_large_document(self, + binary, + pages_to_process, + from_page, + to_page, + max_workers, + doc_size_mb, + current_request_id, + ): + """Process large documents, using multiprocessing + + Args: + binary: Document binary content + pages_to_process: List of pages to process + from_page: Starting page number + to_page: Ending page number + max_workers: Maximum number of workers + doc_size_mb: Document size (MB) + current_request_id: Current request ID + """ + # If the number of pages is too large, process in batches to reduce memory consumption + cpu_count = os.cpu_count() or 2 + + # Check if the document contains images to optimize processing speed + doc_contains_images = self._check_document_has_images() + + # Optimize process count: dynamically adjust based on number of pages and CPU cores + if max_workers is None: + max_workers = self._calculate_optimal_workers(doc_contains_images, pages_to_process, cpu_count) + + # Use temporary file to share large documents + temp_file_path = self._prepare_document_sharing(binary, doc_size_mb) + + # Prepare multiprocess processing arguments + args_list = self._prepare_multiprocess_args( + pages_to_process, from_page, to_page, doc_contains_images, + binary, temp_file_path + ) + + # Execute multiprocess tasks + self._execute_multiprocess_tasks(args_list, max_workers) + + # Clean up temporary file + self._cleanup_temp_file(temp_file_path) + + def _check_document_has_images(self): + """Check if the document contains images + + Returns: + bool: Whether the document contains images + """ + doc_contains_images = False + if hasattr(self.doc, 'inline_shapes') and len(self.doc.inline_shapes) > 0: + doc_contains_images = True + logger.info(f"Document contains {len(self.doc.inline_shapes)} inline images") + return doc_contains_images + + def _calculate_optimal_workers(self, doc_contains_images, pages_to_process, cpu_count): + """Calculate the optimal number of workers + + Args: + doc_contains_images: Whether the document contains images + pages_to_process: List of pages to process + cpu_count: Number of CPU cores + + Returns: + int: Optimal number of workers + """ + # If no images or few pages, use fewer processes to avoid overhead + if not doc_contains_images or len(pages_to_process) < cpu_count: + max_workers = min(len(pages_to_process), max(1, cpu_count - 1)) + else: + max_workers = min(len(pages_to_process), cpu_count) + logger.info(f"Automatically set worker count to {max_workers}") + return max_workers + + def _prepare_document_sharing(self, binary, doc_size_mb): + """Prepare document sharing method + + Args: + binary: Document binary content + doc_size_mb: Document size (MB) + + Returns: + str: Temporary file path, or None if not using + """ + # For large documents, consider using temporary file to share data + use_temp_file = doc_size_mb > 50 # If document is larger than 50MB, use temporary file for sharing + temp_file_path = None + + if use_temp_file: + logger.info(f"Large document detected ({doc_size_mb:.2f} MB), using temporary file for sharing") + import tempfile + temp_file = tempfile.NamedTemporaryFile(delete=False) + temp_file_path = temp_file.name + temp_file.write(binary) + temp_file.close() + logger.info(f"Wrote document to temporary file: {temp_file_path}") + + return temp_file_path + + def _prepare_multiprocess_args(self, pages_to_process, from_page, to_page, + doc_contains_images, binary, temp_file_path): + """Prepare a list of arguments for multiprocess processing + + Args: + pages_to_process: List of pages to process + from_page: Starting page number + to_page: Ending page number + doc_contains_images: Whether the document contains images + binary: Document binary content + temp_file_path: Temporary file path + + Returns: + list: List of arguments + """ + use_temp_file = temp_file_path is not None + + logger.info(f"Preparing multiprocess args with enable_multimodal={self.enable_multimodal}") + + # Pass parameters required by the page processing function + args_list = [] + for page_num in pages_to_process: + args_list.append(( + page_num, + self.para_page_mapping[page_num], + from_page, + to_page, + doc_contains_images, + self.max_image_size, + binary if not use_temp_file else None, + temp_file_path if use_temp_file else None, + self.enable_multimodal + )) + + return args_list + + def _execute_multiprocess_tasks(self, args_list, max_workers): + """Execute multiprocess tasks + + Args: + args_list: List of arguments + max_workers: Maximum number of workers + """ + # Use a shared manager to share data + with Manager() as manager: + # Create shared data structures + self.all_lines = manager.list() + self.last_image_map = manager.dict() + + logger.info(f"Processing {len(args_list)} pages using {max_workers} processes") + + # Use ProcessPoolExecutor to truly implement multi-core parallelization + batch_start_time = time.time() + with ProcessPoolExecutor(max_workers=max_workers) as executor: + logger.info(f"Started ProcessPoolExecutor with {max_workers} workers") + + # Submit all tasks + future_to_idx = { + executor.submit( + process_page_multiprocess, + *args + ): i + for i, args in enumerate(args_list) + } + logger.info(f"Submitted {len(future_to_idx)} processing tasks to process pool") + + # Collect results + self._collect_process_results(future_to_idx, args_list, batch_start_time) + + def _collect_process_results(self, future_to_idx, args_list, batch_start_time): + """Collect multiprocess processing results + + Args: + future_to_idx: Mapping of Future to index + args_list: List of arguments + batch_start_time: Batch start time + """ + # Collect results + completed_count = 0 + results = [] + temp_img_paths = set() # Collect all temporary image paths + + for future in as_completed(future_to_idx): + idx = future_to_idx[future] + page_num = args_list[idx][0] + try: + page_lines = future.result() + + # Collect temporary image paths for later cleanup + for line in page_lines: + if len(line) > 1 and isinstance(line[1], list): + for item in line[1]: + if isinstance(item, str) and item.startswith("/tmp/docx_img_"): + temp_img_paths.add(item) + + results.extend(page_lines) + completed_count += 1 + + if completed_count % max(1, len(args_list) // 10) == 0 or completed_count == len(args_list): + elapsed_ms = int((time.time() - batch_start_time) * 1000) + progress_pct = int((completed_count / len(args_list)) * 100) + logger.info( + f"Progress: {completed_count}/{len(args_list)} pages processed " + f"({progress_pct}%, elapsed: {elapsed_ms}ms)" + ) + + except Exception as e: + logger.error(f"Error processing page {page_num}: {str(e)}") + logger.error(f"Detailed traceback for page {page_num}: {traceback.format_exc()}") + + # Process completion + processing_elapsed_ms = int((time.time() - batch_start_time) * 1000) + logger.info(f"All processing completed in {processing_elapsed_ms}ms") + + # Process results + self._process_multiprocess_results(results) + + # Clean up temporary image files + self._cleanup_temp_image_files(temp_img_paths) + + def _process_multiprocess_results(self, results): + """Process multiprocess results + + Args: + results: List of processed results + """ + lines = list(results) + + # Process images - must be handled in the main process for upload + # If images are being processed, they need to be handled in the main process for upload + image_upload_start = time.time() + + # Count total images to process + images_to_process = [] + for i, line_data in enumerate(lines): + if len(line_data) > 1 and line_data[1]: # Check if there are images + # Confirm image data is valid - it might be lost during inter-process communication + if isinstance(line_data[1], list) and len(line_data[1]) > 0: + images_to_process.append(i) + logger.info(f"Found line {i} with {len(line_data[1])} images to process") + else: + logger.warning(f"Line {i} has invalid image data: {type(line_data[1])}") + + # Process images if needed + image_url_map = {} # Map from image path to URL + if images_to_process: + logger.info(f"Found {len(images_to_process)} lines with images to process in main process") + + # First, create a mapping of image paths to uploaded URLs + for line_idx in images_to_process: + line_data = lines[line_idx] + _, image_paths, _, page_num = line_data[:4] + + # Process all image file paths + for img_path in image_paths: + if os.path.exists(img_path) and img_path not in image_url_map: + try: + image_url = self.upload_file(img_path) + if image_url: + # Add image URL as Markdown format + markdown_image = f"![]({image_url})" + image_url_map[img_path] = markdown_image + logger.info(f"Added image URL for {img_path}: {image_url}") + else: + logger.warning(f"Failed to upload image: {img_path}") + except Exception as e: + logger.error(f"Error processing image from page {page_num}: {str(e)}") + + image_upload_elapsed = time.time() - image_upload_start + logger.info(f"Finished uploading {len(image_url_map)} images in {image_upload_elapsed:.2f}s") + + # Process content in original sequence order + processed_lines = [] + for line_data in lines: + if len(line_data) >= 5 and line_data[4]: # Check if we have processed_content + processed_content = line_data[4] + page_num = line_data[3] + + # Reconstruct text with images in original positions + combined_parts = [] + for content_type, content in processed_content: + if content_type == "text": + combined_parts.append(content) + elif content_type == "image" and content in image_url_map: + combined_parts.append(image_url_map[content]) + + # Create the final text with proper ordering + final_text = "\n\n".join(part for part in combined_parts if part) + processed_lines.append((final_text, None, "", page_num)) + + else: + # Fallback to original approach if no processed_content + text, image_paths, _, page_num = line_data[:4] + + if image_paths: + # Process all image URLs + processed_urls = [] + for img_path in image_paths: + if img_path in image_url_map: + processed_urls.append(image_url_map[img_path]) + + if processed_urls: + # Add all images at the end (legacy approach) + image_text = "\n\n".join(processed_urls) + if text: + updated_text = text + "\n\n" + image_text + else: + updated_text = image_text + processed_lines.append((updated_text, None, "", page_num)) + else: + processed_lines.append((text, None, "", page_num)) + else: + processed_lines.append((text, None, "", page_num)) + + # Sort results by page number + sorted_lines = sorted(processed_lines, key=lambda x: x[3] if len(x) > 3 else 0) + self.all_lines = [(line[0], line[1], line[2]) for line in sorted_lines if len(line) > 2] + + logger.info(f"Finished processing {len(self.all_lines)} lines with interleaved images and text") + + def _cleanup_temp_image_files(self, temp_paths): + """Clean up temporary image files created by multiprocessing + + Args: + temp_paths: Set of temporary file paths + """ + if not temp_paths: + return + + logger.info(f"Cleaning up {len(temp_paths)} temporary image files") + deleted_count = 0 + error_count = 0 + + for path in temp_paths: + try: + if os.path.exists(path): + os.unlink(path) + deleted_count += 1 + # Delete temporary directory (if empty) + try: + temp_dir = os.path.dirname(path) + if temp_dir.startswith("/tmp/docx_img_") and os.path.exists(temp_dir): + os.rmdir(temp_dir) + except OSError: + # If directory is not empty, ignore error + pass + except Exception as e: + logger.error(f"Failed to delete temp file {path}: {str(e)}") + error_count += 1 + + logger.info(f"Temporary file cleanup: deleted {deleted_count}, errors {error_count}") + + def _process_images_in_main_process(self, lines, images_to_process): + """Process image uploads in the main process + + Args: + lines: List of line data + images_to_process: Indices of lines with images to process + """ + # Process all images + with tempfile.TemporaryDirectory() as temp_dir: + for line_idx in images_to_process: + line_data = lines[line_idx] + text, image_data, _, page_num = line_data + + image_paths = [] + + # If it's a list of image objects (single-process), save them as temporary files + if image_data and isinstance(image_data, list): + if all(isinstance(item, str) for item in image_data): + # Multi-process mode, images are already saved as temporary files + logger.info(f"Found {len(image_data)} image paths for page {page_num}") + image_paths = image_data + elif all(hasattr(item, 'save') for item in image_data): + # Single-process mode, need to save image objects as temporary files + logger.info(f"Converting {len(image_data)} image objects to files for page {page_num}") + for img_idx, image in enumerate(image_data): + try: + # Save temporary image file + temp_file_path = os.path.join(temp_dir, f"page_{page_num}_img_{img_idx}.png") + image.save(temp_file_path, format="PNG") + image_paths.append(temp_file_path) + logger.info(f"Saved image from page {page_num} to {temp_file_path}") + except Exception as e: + logger.error(f"Failed to save image: {str(e)}") + else: + logger.warning(f"Unknown image data format for page {page_num}: {type(image_data[0])}") + + # Process all image file paths + processed_urls = [] + for img_path in image_paths: + if os.path.exists(img_path): + try: + image_url = self.upload_file(img_path) + if image_url: + # Add image URL as Markdown format + markdown_image = f"![]({image_url})" + processed_urls.append(markdown_image) + logger.info(f"Added image URL to page {page_num} result: {image_url}") + else: + logger.warning(f"Failed to upload image: {img_path}") + + # Do not delete temporary files created by multiprocessing, they will be cleaned up later + if not img_path.startswith("/tmp/docx_img_"): + try: + os.unlink(img_path) + logger.info(f"Deleted temporary file: {img_path}") + except Exception as e: + logger.error(f"Failed to delete temporary file: {str(e)}") + + except Exception as e: + logger.error(f"Error processing image from page {page_num}: {str(e)}") + + # Update text, add all image links + if processed_urls: + image_text = "\n\n".join(processed_urls) + if text: + updated_text = text + "\n\n" + image_text + else: + updated_text = image_text + + # Update text in results, clear image list (since processing is complete) + lines[line_idx] = (updated_text, [], "", page_num) + logger.info(f"Updated text for page {page_num} with {len(processed_urls)} image URLs") + + def _cleanup_temp_file(self, temp_file_path): + """Clean up temporary file + + Args: + temp_file_path: Temporary file path + """ + if temp_file_path and os.path.exists(temp_file_path): + try: + os.unlink(temp_file_path) + logger.info(f"Removed temporary file: {temp_file_path}") + except Exception as e: + logger.error(f"Failed to remove temporary file: {str(e)}") + + def _process_tables(self): + """Process tables in the document + + Returns: + list: List of tables + """ + tbls = [] + table_count = len(self.doc.tables) + if table_count > 0: + logger.info(f"Processing {table_count} tables") + for tb_idx, tb in enumerate(self.doc.tables): + if tb_idx % 10 == 0: # Log only every 10 tables to reduce log volume + logger.info(f"Processing table {tb_idx+1}/{table_count}") + + # Optimize: Check if table is empty + if len(tb.rows) == 0 or all(len(r.cells) == 0 for r in tb.rows): + logger.info(f"Skipping empty table {tb_idx+1}") + continue + + table_html = self._convert_table_to_html(tb) + tbls.append(((None, table_html), "")) + + return tbls + + def _convert_table_to_html(self, table): + """Convert table to HTML + + Args: + table: Table object + + Returns: + str: HTML formatted table + """ + html = "" + for r in table.rows: + html += "" + i = 0 + while i < len(r.cells): + span = 1 + c = r.cells[i] + for j in range(i + 1, len(r.cells)): + if c.text == r.cells[j].text: + span += 1 + i = j + i += 1 + html += ( + f"" + if span == 1 + else f"" + ) + html += "" + html += "
{c.text}{c.text}
" + return html + + def _safe_concat_images(self, images): + """Safely concatenate image lists + + Args: + images: List of images + + Returns: + Image: Concatenated image, or the first image (if concatenation fails) + """ + if not images: + return None + + if len(images) == 1: + return images[0] + + try: + logger.info(f"Attempting to concatenate {len(images)} images") + from PIL import Image + + # Calculate the size of the concatenated image + total_width = max(img.width for img in images if hasattr(img, 'width')) + total_height = sum(img.height for img in images if hasattr(img, 'height')) + + if total_width <= 0 or total_height <= 0: + logger.warning("Invalid image size, returning the first image") + return images[0] + + # Create a new image + new_image = Image.new("RGBA", (total_width, total_height), (0, 0, 0, 0)) + + # Paste images one by one + y_offset = 0 + for img in images: + if not hasattr(img, 'width') or not hasattr(img, 'height'): + continue + + new_image.paste(img, (0, y_offset)) + y_offset += img.height + + logger.info(f"Successfully concatenated images, final size: {total_width}x{total_height}") + return new_image + except Exception as e: + logger.error(f"Failed to concatenate images: {str(e)}") + logger.error(f"Detailed error: {traceback.format_exc()}") + # If concatenation fails, return the first image + return images[0] + + + +def _save_image_to_temp(logger, image, page_num, img_idx): + """Save image to a temporary file to pass between processes + + Args: + logger: Logger + image: PIL image object + page_num: Page number + img_idx: Image index + + Returns: + str: Temporary file path, or None (if saving fails) + """ + if not image: + return None + + import tempfile + import os + + try: + # Create a temporary file + temp_dir = tempfile.mkdtemp(prefix="docx_img_") + temp_file_path = os.path.join(temp_dir, f"page_{page_num}_img_{img_idx}.png") + + # Save the image + image.save(temp_file_path, format="PNG") + logger.info(f"[PID:{os.getpid()}] Saved image to temporary file: {temp_file_path}") + + return temp_file_path + except Exception as e: + logger.error(f"[PID:{os.getpid()}] Failed to save image to temp file: {str(e)}") + return None + + +def process_page_multiprocess(page_num, + paragraphs, + from_page, + to_page, + doc_contains_images, + max_image_size, + doc_binary, + temp_file_path, + enable_multimodal, + ): + """Page processing function specifically designed for multiprocessing + + Args: + page_num: Page number + paragraphs: List of paragraph indices + from_page: Starting page number + to_page: Ending page number + doc_contains_images: Whether the document contains images + max_image_size: Maximum image size + doc_binary: Document binary content + temp_file_path: Temporary file path, if using + enable_multimodal: Whether to enable multimodal processing + + Returns: + list: List of processed result lines + """ + try: + # Set process-level logging + process_logger = logging.getLogger(__name__) + + # If outside processing range, do not process + if page_num < from_page or page_num >= to_page: + process_logger.info(f"[PID:{os.getpid()}] Skipping page {page_num} (out of requested range)") + return [] + + process_logger.info(f"[PID:{os.getpid()}] Processing page {page_num} with {len(paragraphs)} paragraphs, " + f"enable_multimodal={enable_multimodal}") + start_time = time.time() + + # Load document in the process + doc = _load_document_in_process(process_logger, page_num, doc_binary, temp_file_path) + if not doc: + return [] + + # If paragraph indices are empty, return empty result + if not paragraphs: + process_logger.info(f"[PID:{os.getpid()}] No paragraphs to process for page {page_num}") + return [] + + # Extract page content + combined_text, images, content_sequence = _extract_page_content_in_process( + process_logger, + doc, + page_num, + paragraphs, + enable_multimodal, + max_image_size + ) + + # Process content sequence to maintain order between processes + processed_content = [] + temp_image_index = 0 + + if enable_multimodal: + # First pass: save all images to temporary files + image_paths = [] + for i, img in enumerate(images): + img_path = _save_image_to_temp(process_logger, img, page_num, i) + if img_path: + image_paths.append(img_path) + + process_logger.info( + f"[PID:{os.getpid()}] Saved {len(image_paths)} images to temp files for page {page_num}" + ) + + # Second pass: reconstruct the content sequence with image paths + for content_type, content in content_sequence: + if content_type == "text": + processed_content.append(("text", content)) + else: # image + if temp_image_index < len(image_paths): + processed_content.append(("image", image_paths[temp_image_index])) + temp_image_index += 1 + + # Create result line with the ordered content sequence + line_data = (combined_text, image_paths, "", page_num, processed_content) + page_lines = [line_data] + + processing_time = time.time() - start_time + process_logger.info( + f"[PID:{os.getpid()}] Page {page_num} processing completed in {processing_time:.2f}s" + ) + + return page_lines + + except Exception as e: + process_logger = logging.getLogger(__name__) + process_logger.error(f"[PID:{os.getpid()}] Error processing page {page_num}: {str(e)}") + process_logger.error(f"[PID:{os.getpid()}] Traceback: {traceback.format_exc()}") + return [] + + +def _load_document_in_process(logger, page_num, doc_binary, temp_file_path): + """Load document in a process + + Args: + logger: Logger + page_num: Page number + doc_binary: Document binary data + temp_file_path: Temporary file path + + Returns: + Document: Loaded document object, or None (if loading fails) + """ + logger.info(f"[PID:{os.getpid()}] Loading document in process for page {page_num}") + try: + from docx import Document + from io import BytesIO + + # Load document based on the provided method + if doc_binary is not None: + # Load document from binary content + doc = Document(BytesIO(doc_binary)) + logger.info(f"[PID:{os.getpid()}] Loaded document from binary data") + elif temp_file_path is not None: + # Load document from temporary file + doc = Document(temp_file_path) + logger.info(f"[PID:{os.getpid()}] Loaded document from temp file: {temp_file_path}") + else: + logger.error(f"[PID:{os.getpid()}] No document source provided") + return None + + logger.info(f"[PID:{os.getpid()}] " + f"Successfully loaded document with {len(doc.paragraphs)} paragraphs") + return doc + + except Exception as e: + logger.error(f"[PID:{os.getpid()}] Failed to load document: {str(e)}") + logger.error(f"[PID:{os.getpid()}] Error traceback: {traceback.format_exc()}") + return None + + +def _extract_page_content_in_process(logger, doc, page_num, paragraphs, enable_multimodal, max_image_size): + """Extract page content in a process + + Args: + logger: Logger + doc: Document object + page_num: Page number + paragraphs: List of paragraph indices + enable_multimodal: Whether to enable multimodal processing + max_image_size: Maximum image size + + Returns: + tuple: (Extracted text, List of extracted images) + """ + logger.info(f"[PID:{os.getpid()}] Page {page_num}: Processing {len(paragraphs)} paragraphs, " + f"enable_multimodal={enable_multimodal}") + + # Instead of separate collections, track content in paragraph sequence + content_sequence = [] + current_text = "" + + processed_paragraphs = 0 + paragraphs_with_text = 0 + paragraphs_with_images = 0 + + for para_idx in paragraphs: + if para_idx >= len(doc.paragraphs): + logger.warning(f"[PID:{os.getpid()}] Paragraph index {para_idx} out of range") + continue + + paragraph = doc.paragraphs[para_idx] + processed_paragraphs += 1 + + + # Extract text content + text = paragraph.text.strip() + if text: + # Clean text + cleaned_text = re.sub(r"\u3000", " ", text).strip() + current_text += cleaned_text + "\n" + paragraphs_with_text += 1 + + # Process image - if multimodal processing is enabled + if enable_multimodal: + image = _extract_image_in_process(logger, doc, paragraph, page_num, para_idx, max_image_size) + if image: + # If we have accumulated text, add it to sequence first + if current_text: + content_sequence.append(("text", current_text)) + current_text = "" + + # Add image to sequence + content_sequence.append(("image", image)) + paragraphs_with_images += 1 + + if processed_paragraphs % 50 == 0: + logger.info( + f"[PID:{os.getpid()}] " + f"Page {page_num}: Processed {processed_paragraphs}/{len(paragraphs)} paragraphs" + ) + + # Add any remaining text + if current_text: + content_sequence.append(("text", current_text)) + + logger.info( + f"[PID:{os.getpid()}] Page {page_num}: Completed content extraction, " + f"found {paragraphs_with_text} paragraphs with text, " + f"{paragraphs_with_images} with images, " + f"total content items: {len(content_sequence)}" + ) + + # Extract text and images in their original sequence + text_parts = [] + images = [] + + for content_type, content in content_sequence: + if content_type == "text": + text_parts.append(content) + else: # image + images.append(content) + + combined_text = "\n\n".join(text_parts) if text_parts else "" + + return combined_text, images, content_sequence + + +def _extract_image_in_process(logger, doc, paragraph, page_num, para_idx, max_image_size): + """Extract image from a paragraph in a process + + Args: + logger: Logger + doc: Document object + paragraph: Paragraph object + page_num: Page number + para_idx: Paragraph index + max_image_size: Maximum image size + + Returns: + Image: Extracted image object, or None + """ + try: + # Attempt to extract image + img = paragraph._element.xpath(".//pic:pic") + if not img: + logger.warning(f"[PID:{os.getpid()}] Page {page_num}: No pic element found in paragraph {para_idx}") + return None + + img = img[0] + logger.info(f"[PID:{os.getpid()}] Page {page_num}: Found pic element in paragraph {para_idx}") + + try: + # Extract image ID and related part + embed = img.xpath(".//a:blip/@r:embed") + if not embed: + logger.warning(f"[PID:{os.getpid()}] Page {page_num}: No embed attribute found in image") + return None + + embed = embed[0] + if embed not in doc.part.related_parts: + logger.warning(f"[PID:{os.getpid()}] Page {page_num}: Embed ID {embed} not found in related parts") + return None + + related_part = doc.part.related_parts[embed] + logger.info(f"[PID:{os.getpid()}] Found embedded image with ID: {embed}") + + # Attempt to get image data + try: + image_blob = related_part.image.blob + logger.info(f"[PID:{os.getpid()}] Successfully extracted image blob, size: {len(image_blob)} bytes") + except Exception as blob_error: + logger.warning( + f"[PID:{os.getpid()}] Error extracting image blob: {str(blob_error)}" + ) + return None + + # Convert data to PIL image + try: + from PIL import Image + from io import BytesIO + image = Image.open(BytesIO(image_blob)).convert("RGBA") + + # Check image size + if hasattr(image, 'width') and hasattr(image, 'height'): + logger.info( + f"[PID:{os.getpid()}] Successfully created image object, " + f"size: {image.width}x{image.height}" + ) + + # Skip small images (usually decorative elements) + if image.width < 50 or image.height < 50: + logger.info( + f"[PID:{os.getpid()}] " + f"Skipping small image ({image.width}x{image.height})" + ) + return None + + # Scale large images + if image.width > max_image_size or image.height > max_image_size: + scale = min(max_image_size / image.width, max_image_size / image.height) + new_width = int(image.width * scale) + new_height = int(image.height * scale) + resized_image = image.resize((new_width, new_height)) + logger.info(f"[PID:{os.getpid()}] Resized image to {new_width}x{new_height}") + return resized_image + + logger.info(f"[PID:{os.getpid()}] Found image in paragraph {para_idx}") + return image + except Exception as e: + logger.error(f"[PID:{os.getpid()}] Failed to create image from blob: {str(e)}") + logger.error(f"[PID:{os.getpid()}] Error traceback: {traceback.format_exc()}") + return None + except Exception as e: + logger.error(f"[PID:{os.getpid()}] Error extracting image: {str(e)}") + logger.error(f"[PID:{os.getpid()}] Error traceback: {traceback.format_exc()}") + return None + except Exception as e: + logger.error(f"[PID:{os.getpid()}] Error processing image: {str(e)}") + logger.error(f"[PID:{os.getpid()}] Error traceback: {traceback.format_exc()}") + return None + finally: + logger.info(f"[PID:{os.getpid()}] Page {page_num}: Image extraction completed") diff --git a/services/docreader/src/parser/image_parser.py b/services/docreader/src/parser/image_parser.py new file mode 100644 index 0000000..fb94449 --- /dev/null +++ b/services/docreader/src/parser/image_parser.py @@ -0,0 +1,58 @@ +import logging +import os +import asyncio +from PIL import Image +import io +from .base_parser import BaseParser, ParseResult +import numpy as np + +# Set up logger for this module +logger = logging.getLogger(__name__) + + +class ImageParser(BaseParser): + """ + Parser for image files with OCR capability. + Extracts text from images and generates captions. + + This parser handles image processing by: + 1. Uploading the image to storage + 2. Generating a descriptive caption + 3. Performing OCR to extract text content + 4. Returning a combined result with both text and image reference + """ + + def parse_into_text(self, content: bytes) -> str: + """ + Parse image content, only upload the image and return Markdown reference, no OCR or caption processing. + + Args: + content: Raw image data (bytes) + + Returns: + String containing Markdown image reference + """ + logger.info(f"Parsing image content, size: {len(content)} bytes") + try: + # Upload image to storage service + logger.info("Uploading image to storage") + _, ext = os.path.splitext(self.file_name) + image_url = self.upload_bytes(content, file_ext=ext) + if not image_url: + logger.error("Failed to upload image to storage") + return "" + logger.info( + f"Successfully uploaded image, URL: {image_url[:50]}..." + if len(image_url) > 50 + else f"Successfully uploaded image, URL: {image_url}" + ) + + # Directly generate Markdown image reference, no OCR or caption processing + markdown_text = f"![{self.file_name}]({image_url})" + logger.info("Generated Markdown image reference without OCR or caption processing") + + return markdown_text + + except Exception as e: + logger.error(f"Error parsing image: {str(e)}") + return "" diff --git a/services/docreader/src/parser/markdown_parser.py b/services/docreader/src/parser/markdown_parser.py new file mode 100644 index 0000000..7e50451 --- /dev/null +++ b/services/docreader/src/parser/markdown_parser.py @@ -0,0 +1,33 @@ +import asyncio +import re +import logging +import numpy as np +import os # Import os module to get environment variables +from typing import Dict, List, Optional, Tuple +from .base_parser import BaseParser + +# Get logger object +logger = logging.getLogger(__name__) + + +class MarkdownParser(BaseParser): + """Markdown document parser""" + + def parse_into_text(self, content: bytes) -> str: + """Parse Markdown document, only extract text content, do not process images + + Args: + content: Markdown document content + + Returns: + Parsed text result + """ + logger.info(f"Parsing Markdown document, content size: {len(content)} bytes") + + # Convert byte content to string using universal decoding method + text = self.decode_bytes(content) + logger.info(f"Decoded Markdown content, text length: {len(text)} characters") + + logger.info(f"Markdown parsing complete, extracted {len(text)} characters of text") + return text + diff --git a/services/docreader/src/parser/ocr_engine.py b/services/docreader/src/parser/ocr_engine.py new file mode 100644 index 0000000..b7ae673 --- /dev/null +++ b/services/docreader/src/parser/ocr_engine.py @@ -0,0 +1,268 @@ +import os +import logging +import base64 +from typing import Optional, Union, Dict, Any +from abc import ABC, abstractmethod +from PIL import Image +import io +import numpy as np + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +class OCRBackend(ABC): + """Base class for OCR backends""" + + @abstractmethod + def predict(self, image: Union[str, bytes, Image.Image]) -> str: + """Extract text from an image + + Args: + image: Image file path, bytes, or PIL Image object + + Returns: + Extracted text + """ + pass + +class PaddleOCRBackend(OCRBackend): + """PaddleOCR backend implementation""" + + def __init__(self, **kwargs): + """Initialize PaddleOCR backend""" + self.ocr = None + try: + from paddleocr import PaddleOCR + # Default OCR configuration + ocr_config = { + "use_doc_orientation_classify": False, # Do not use document image orientation classification + "use_doc_unwarping": False, # Do not use document unwarping + "use_textline_orientation": False, # Do not use textline orientation classification + "text_recognition_model_name": "PP-OCRv5_server_rec", + "text_detection_model_name": "PP-OCRv5_server_det", + "text_recognition_model_dir": "/root/.paddlex/official_models/PP-OCRv5_server_rec_infer", + "text_detection_model_dir": "/root/.paddlex/official_models/PP-OCRv5_server_det_infer", + "text_det_limit_type": "min", # Limit by short side + "text_det_limit_side_len": 736, # Limit side length to 736 + "text_det_thresh": 0.3, # Text detection pixel threshold + "text_det_box_thresh": 0.6, # Text detection box threshold + "text_det_unclip_ratio": 1.5, # Text detection expansion ratio + "text_rec_score_thresh": 0.0, # Text recognition confidence threshold + "ocr_version": "PP-OCRv5", # Switch to PP-OCRv4 here to compare + "lang": "ch", + } + + self.ocr = PaddleOCR(**ocr_config) + logger.info("PaddleOCR engine initialized successfully") + except ImportError: + logger.error("Failed to import paddleocr. Please install it with 'pip install paddleocr'") + except Exception as e: + logger.error(f"Failed to initialize PaddleOCR: {str(e)}") + + def predict(self, image): + """Perform OCR recognition on the image + + Args: + image: Image object (PIL.Image or numpy array) + + Returns: + Extracted text string + """ + try: + # Ensure image is in RGB format + if hasattr(image, "convert") and image.mode != "RGBA": + img_for_ocr = image.convert("RGBA") + logger.info(f"Converted image from {image.mode} to RGB format") + else: + img_for_ocr = image + + # Convert to numpy array if not already + if hasattr(img_for_ocr, "convert"): + image_array = np.array(img_for_ocr) + else: + image_array = img_for_ocr + + ocr_result = self.ocr.predict(image_array) + logger.info(f"ocr_result: {ocr_result}") + + # Extract text + if ocr_result and any(ocr_result): + ocr_text = "" + for image_result in ocr_result: + ocr_text = ocr_text + " ".join(image_result["rec_texts"]) + text_length = len(ocr_text) + if text_length > 0: + logger.info(f"OCR extracted {text_length} characters") + logger.info( + f"OCR text sample: {ocr_text[:100]}..." + if text_length > 100 + else f"OCR text: {ocr_text}" + ) + return ocr_text + else: + logger.warning("OCR returned empty result") + else: + logger.warning("OCR did not return any result") + return "" + except Exception as e: + logger.error(f"OCR recognition error: {str(e)}") + return "" +class NanonetsOCRBackend(OCRBackend): + """Nanonets OCR backend implementation using OpenAI API format""" + + def __init__(self, **kwargs): + """Initialize Nanonets OCR backend + + Args: + api_key: API key for OpenAI API + base_url: Base URL for OpenAI API + model: Model name + """ + try: + from openai import OpenAI + self.api_key = kwargs.get("api_key", "123") + self.base_url = kwargs.get("base_url", "http://localhost:8000/v1") + self.model = kwargs.get("model", "nanonets/Nanonets-OCR-s") + self.temperature = kwargs.get("temperature", 0.0) + self.max_tokens = kwargs.get("max_tokens", 15000) + + self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) + self.prompt = """ +## 任务说明 + +请从上传的文档中提取文字内容,严格按自然阅读顺序(从上到下,从左到右)输出,并遵循以下格式规范。 + +### 1. **文本处理** + +* 按正常阅读顺序提取文字,语句流畅自然。 + +### 2. **表格** + +* 所有表格统一转换为 **Markdown 表格格式**。 +* 内容保持清晰、对齐整齐,便于阅读。 + +### 3. **公式** + +* 所有公式转换为 **LaTeX 格式**,使用 `$$公式$$` 包裹。 + +### 4. **图片** + +* 忽略图片信息 + +### 5. **链接** + +* 不要猜测或补全不确定的链接地址。 +""" + logger.info(f"Nanonets OCR engine initialized with model: {self.model}") + except ImportError: + logger.error("Failed to import openai. Please install it with 'pip install openai'") + self.client = None + except Exception as e: + logger.error(f"Failed to initialize Nanonets OCR: {str(e)}") + self.client = None + + def _encode_image(self, image: Union[str, bytes, Image.Image]) -> str: + """Encode image to base64 + + Args: + image: Image file path, bytes, or PIL Image object + + Returns: + Base64 encoded image + """ + try: + if isinstance(image, str): + # It's a file path + with open(image, "rb") as image_file: + return base64.b64encode(image_file.read()).decode("utf-8") + elif isinstance(image, bytes): + # It's bytes data + return base64.b64encode(image).decode("utf-8") + elif isinstance(image, Image.Image): + # It's a PIL Image + buffer = io.BytesIO() + image.save(buffer, format="PNG") + return base64.b64encode(buffer.getvalue()).decode("utf-8") + else: + logger.error(f"Unsupported image type: {type(image)}") + return "" + except Exception as e: + logger.error(f"Error encoding image: {str(e)}") + return "" + + def predict(self, image: Union[str, bytes, Image.Image]) -> str: + """Extract text from an image using Nanonets OCR + + Args: + image: Image file path, bytes, or PIL Image object + + Returns: + Extracted text + """ + if self.client is None: + logger.error("Nanonets OCR client not initialized") + return "" + + try: + # Encode image to base64 + img_base64 = self._encode_image(image) + if not img_base64: + return "" + + # Call Nanonets OCR API + logger.info(f"Calling Nanonets OCR API with model: {self.model}") + response = self.client.chat.completions.create( + model=self.model, + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{img_base64}"}, + }, + { + "type": "text", + "text": self.prompt, + }, + ], + } + ], + temperature=self.temperature, + max_tokens=self.max_tokens + ) + + return response.choices[0].message.content + except Exception as e: + logger.error(f"Nanonets OCR prediction error: {str(e)}") + return "" + +class OCREngine: + """OCR Engine factory class""" + + _instance = None + + @classmethod + def get_instance(cls, backend_type="paddle", **kwargs) -> Optional[OCRBackend]: + """Get OCR engine instance + + Args: + backend_type: OCR backend type, one of: "paddle", "nanonets" + **kwargs: Additional arguments for the backend + + Returns: + OCR engine instance or None if initialization fails + """ + if cls._instance is None: + logger.info(f"Initializing OCR engine with backend: {backend_type}") + + if backend_type.lower() == "paddle": + cls._instance = PaddleOCRBackend(**kwargs) + elif backend_type.lower() == "nanonets": + cls._instance = NanonetsOCRBackend(**kwargs) + else: + logger.error(f"Unknown OCR backend type: {backend_type}") + return None + + return cls._instance + diff --git a/services/docreader/src/parser/parser.py b/services/docreader/src/parser/parser.py new file mode 100644 index 0000000..14c30a7 --- /dev/null +++ b/services/docreader/src/parser/parser.py @@ -0,0 +1,222 @@ +import logging +from dataclasses import dataclass, field +from typing import Dict, Any, Optional, Type + +from .base_parser import BaseParser, ParseResult +from .docx_parser import DocxParser +from .doc_parser import DocParser +from .pdf_parser import PDFParser +from .markdown_parser import MarkdownParser +from .text_parser import TextParser +from .image_parser import ImageParser +from .web_parser import WebParser +import traceback + +logger = logging.getLogger(__name__) + + +@dataclass +class ChunkingConfig: + """ + Configuration for text chunking process. + Controls how documents are split into smaller pieces for processing. + """ + + chunk_size: int = 512 # Maximum size of each chunk in tokens/chars + chunk_overlap: int = 50 # Number of tokens/chars to overlap between chunks + separators: list = field( + default_factory=lambda: ["\n\n", "\n", "。"] + ) # Text separators in order of priority + enable_multimodal: bool = ( + False # Whether to enable multimodal processing (text + images) + ) + + +@dataclass +class Chunk: + """ + Represents a single text chunk with associated metadata. + Basic unit for document processing and embedding. + """ + + content: str # Text content of the chunk + metadata: Dict[str, Any] = None # Associated metadata (source, page number, etc.) + + +class Parser: + """ + Document parser facade that integrates all specialized parsers. + Provides a unified interface for parsing various document types. + """ + + def __init__(self): + logger.info("Initializing document parser") + # Initialize all parser types + self.parsers: Dict[str, Type[BaseParser]] = { + "docx": DocxParser, + "doc": DocParser, + "pdf": PDFParser, + "md": MarkdownParser, + "txt": TextParser, + "jpg": ImageParser, + "jpeg": ImageParser, + "png": ImageParser, + "gif": ImageParser, + "bmp": ImageParser, + "tiff": ImageParser, + "webp": ImageParser, + "markdown": MarkdownParser, + } + logger.info( + "Parser initialized with %d parsers: %s", + len(self.parsers), + ", ".join(self.parsers.keys()), + ) + + + def get_parser(self, file_type: str) -> Optional[Type[BaseParser]]: + """ + Get parser class for the specified file type. + + Args: + file_type: The file extension or type identifier + + Returns: + Parser class for the file type, or None if unsupported + """ + file_type = file_type.lower() + parser = self.parsers.get(file_type) + if parser: + logger.info(f"Found parser for file type: {file_type}") + else: + logger.warning(f"No parser found for file type: {file_type}") + return parser + + def parse_file( + self, + file_name: str, + file_type: str, + content: bytes, + config: ChunkingConfig, + enable_multimodal: bool = False, + ) -> Optional[ParseResult]: + """ + Parse file content using appropriate parser based on file type. + + Args: + file_name: Name of the file being parsed + file_type: Type/extension of the file + content: Raw file content as bytes + config: Configuration for chunking process + + Returns: + ParseResult containing chunks and metadata, or None if parsing failed + """ + logger.info(f"Parsing file: {file_name} with type: {file_type}") + logger.info( + f"Chunking config: size={config.chunk_size}, overlap={config.chunk_overlap}, " + f"multimodal={config.enable_multimodal}" + ) + + parser_instance = None + + try: + # Get appropriate parser for file type + cls = self.get_parser(file_type) + if cls is None: + logger.error(f"Unsupported file type: {file_type}") + return None + + # Parse file content + logger.info(f"Creating parser instance for {file_type} file") + parser_instance = cls( + file_name=file_name, + file_type=file_type, + chunk_size=config.chunk_size, + chunk_overlap=config.chunk_overlap, + separators=config.separators, + enable_multimodal=config.enable_multimodal, + max_image_size=1920, # Limit image size to 1920px + max_concurrent_tasks=5, # Limit concurrent tasks to 5 + ) + + logger.info(f"Starting to parse file content, size: {len(content)} bytes") + result = parser_instance.parse(content) + + if result: + logger.info( + f"Successfully parsed file {file_name}, generated {len(result.chunks)} chunks" + ) + if result.chunks and len(result.chunks) > 0: + logger.info( + f"First chunk content length: {len(result.chunks[0].content)}" + ) + else: + logger.warning(f"Parser returned empty chunks for file: {file_name}") + else: + logger.warning(f"Parser returned None result for file: {file_name}") + + # Return parse results + return result + + except Exception as e: + logger.error(f"Error parsing file {file_name}: {str(e)}") + logger.info(f"Detailed traceback: {traceback.format_exc()}") + return None + + def parse_url( + self, url: str, title: str, config: ChunkingConfig + ) -> Optional[ParseResult]: + """ + Parse content from a URL using the WebParser. + + Args: + url: URL to parse + title: Title of the webpage (for metadata) + config: Configuration for chunking process + + Returns: + ParseResult containing chunks and metadata, or None if parsing failed + """ + logger.info(f"Parsing URL: {url}, title: {title}") + logger.info( + f"Chunking config: size={config.chunk_size}, overlap={config.chunk_overlap}, " + f"multimodal={config.enable_multimodal}" + ) + + parser_instance = None + + try: + # Create web parser instance + logger.info("Creating WebParser instance") + parser_instance = WebParser( + title=title, + chunk_size=config.chunk_size, + chunk_overlap=config.chunk_overlap, + separators=config.separators, + enable_multimodal=config.enable_multimodal, + max_image_size=1920, # Limit image size + max_concurrent_tasks=5, # Limit concurrent tasks + ) + + logger.info(f"Starting to parse URL content") + result = parser_instance.parse(url) + + if result: + logger.info( + f"Successfully parsed URL, generated {len(result.chunks)} chunks" + ) + logger.info( + f"First chunk content length: {len(result.chunks[0].content) if result.chunks else 0}" + ) + else: + logger.warning(f"Parser returned empty result for URL: {url}") + + # Return parse results + return result + + except Exception as e: + logger.error(f"Error parsing URL {url}: {str(e)}") + logger.info(f"Detailed traceback: {traceback.format_exc()}") + return None + diff --git a/services/docreader/src/parser/pdf_parser.py b/services/docreader/src/parser/pdf_parser.py new file mode 100644 index 0000000..bc41a1d --- /dev/null +++ b/services/docreader/src/parser/pdf_parser.py @@ -0,0 +1,62 @@ +import logging +import os +import io +from typing import Any, List, Iterator, Optional, Mapping, Tuple, Dict + +from pypdf import PdfReader +from .base_parser import BaseParser + +logger = logging.getLogger(__name__) + +class PDFParser(BaseParser): + """ + PDF Document Parser + + This parser handles PDF documents by extracting text content. + It uses the pypdf library for simple text extraction. + """ + + def parse_into_text(self, content: bytes) -> str: + """ + Parse PDF document content into text + + This method processes a PDF document by extracting text content. + + Args: + content: PDF document content as bytes + + Returns: + Extracted text content + """ + logger.info(f"Parsing PDF document, content size: {len(content)} bytes") + + try: + # Use io.BytesIO to read content from bytes + pdf_file = io.BytesIO(content) + + # Create PdfReader object + pdf_reader = PdfReader(pdf_file) + num_pages = len(pdf_reader.pages) + logger.info(f"PDF has {num_pages} pages") + + # Extract text from all pages + all_text = [] + for page_num, page in enumerate(pdf_reader.pages): + try: + page_text = page.extract_text() + if page_text: + all_text.append(page_text) + logger.info(f"Successfully extracted text from page {page_num+1}/{num_pages}") + else: + logger.warning(f"No text extracted from page {page_num+1}/{num_pages}") + except Exception as e: + logger.error(f"Error extracting text from page {page_num+1}: {str(e)}") + + # Combine all extracted text + result = "\n\n".join(all_text) + logger.info(f"PDF parsing complete, extracted {len(result)} characters of text") + return result + + except Exception as e: + logger.error(f"Failed to parse PDF document: {str(e)}") + return "" diff --git a/services/docreader/src/parser/text_parser.py b/services/docreader/src/parser/text_parser.py new file mode 100644 index 0000000..625f346 --- /dev/null +++ b/services/docreader/src/parser/text_parser.py @@ -0,0 +1,57 @@ +import logging +from .base_parser import BaseParser + +logger = logging.getLogger(__name__) + + +class TextParser(BaseParser): + """ + Text document parser for processing plain text files. + This parser handles text extraction and chunking from plain text documents. + """ + + def parse_into_text(self, content: bytes) -> str: + """ + Parse text document content by decoding bytes to string. + + This is a straightforward parser that simply converts the binary content + to text using appropriate character encoding. + + Args: + content: Raw document content as bytes + + Returns: + Parsed text content as string + """ + logger.info(f"Parsing text document, content size: {len(content)} bytes") + text = self.decode_bytes(content) + logger.info( + f"Successfully parsed text document, extracted {len(text)} characters" + ) + return text + + +if __name__ == "__main__": + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + logger.info("Running TextParser in standalone mode") + + # Sample text for testing + text = """## 标题1 + ![alt text](image.png) + ## 标题2 + ![alt text](image2.png) + ## 标题3 + ![alt text](image3.png)""" + logger.info(f"Test text content: {text}") + + # Define separators for text splitting + seperators = ["\n\n", "\n", "。"] + parser = TextParser(separators=seperators) + logger.info("Splitting text into units") + units = parser._split_into_units(text) + logger.info(f"Split text into {len(units)} units") + logger.info(f"Units: {units}") diff --git a/services/docreader/src/parser/web_parser.py b/services/docreader/src/parser/web_parser.py new file mode 100644 index 0000000..dceca7f --- /dev/null +++ b/services/docreader/src/parser/web_parser.py @@ -0,0 +1,133 @@ +from typing import Any, Optional + +from playwright.async_api import async_playwright +from bs4 import BeautifulSoup +from .base_parser import BaseParser, ParseResult +import logging +import asyncio + +logger = logging.getLogger(__name__) + + +class WebParser(BaseParser): + """Web page parser""" + + def __init__(self, title: str, **kwargs): + self.title = title + super().__init__(file_name=title, **kwargs) + logger.info(f"Initialized WebParser with title: {title}") + + async def scrape(self, url: str) -> Any: + logger.info(f"Starting web page scraping for URL: {url}") + try: + async with async_playwright() as p: + args = [ + # "--single-process", + ] + logger.info("Launching WebKit browser") + browser = await p.webkit.launch(args=args) + page = await browser.new_page() + + logger.info(f"Navigating to URL: {url}") + try: + await page.goto(url, timeout=30000) + logger.info("Initial page load complete") + except Exception as e: + logger.error(f"Error navigating to URL: {str(e)}") + await browser.close() + return BeautifulSoup( + "", "html.parser" + ) # Return empty soup on navigation error + + try: + logger.info("Waiting for network idle") + await page.reload(wait_until="networkidle") + logger.info("Page reload complete") + except Exception as e: + logger.warning( + f"Network idle wait failed: {str(e)}, proceeding with current page content" + ) + + logger.info("Retrieving page HTML content") + content = await page.content() + logger.info(f"Retrieved {len(content)} bytes of HTML content") + + await browser.close() + logger.info("Browser closed") + + # Parse HTML content with BeautifulSoup + logger.info("Parsing HTML with BeautifulSoup") + soup = BeautifulSoup(content, "html.parser") + logger.info("Successfully parsed HTML content") + return soup + + except Exception as e: + logger.error(f"Failed to scrape web page: {str(e)}") + # Return empty BeautifulSoup object on error + return BeautifulSoup("", "html.parser") + + def parse_into_text(self, content: bytes) -> str: + """Parse web page + + Args: + content: Web page content + + Returns: + Parse result + """ + logger.info("Starting web page parsing") + + # Call async method synchronously + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + try: + # Run async method + # Handle content possibly being a string + if isinstance(content, bytes): + url = self.decode_bytes(content) + logger.info(f"Decoded URL from bytes: {url}") + else: + url = content + logger.info(f"Using content as URL directly: {url}") + + logger.info(f"Scraping web page: {url}") + soup = loop.run_until_complete(self.scrape(url)) + + # Extract page text + logger.info("Extracting text from web page") + text = soup.get_text("\n") + logger.info(f"Extracted {len(text)} characters of text from URL: {url}") + + # Get title, usually in or <h1> tag + if self.title != "": + title = self.title + logger.info(f"Using provided title: {title}") + else: + title = soup.title.string if soup.title else None + logger.info(f"Found title tag: {title}") + + if not title: # If <title> tag does not exist or is empty, try <h1> tag + h1_tag = soup.find("h1") + if h1_tag: + title = h1_tag.get_text() + logger.info(f"Using h1 tag as title: {title}") + else: + title = "Untitled Web Page" + logger.info("No title found, using default") + + logger.info(f"Web page title: {title}") + result = title + "\n\n" + text + logger.info( + f"Web page parsing complete, total content: {len(result)} characters" + ) + return result + + except Exception as e: + logger.error(f"Error parsing web page: {str(e)}") + return f"Error parsing web page: {str(e)}" + + finally: + # Close event loop + logger.info("Closing event loop") + loop.close() diff --git a/services/docreader/src/proto/docreader.pb.go b/services/docreader/src/proto/docreader.pb.go new file mode 100644 index 0000000..756f87e --- /dev/null +++ b/services/docreader/src/proto/docreader.pb.go @@ -0,0 +1,560 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.6 +// protoc v5.29.3 +// source: docreader.proto + +package proto + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type ReadConfig struct { + state protoimpl.MessageState `protogen:"open.v1"` + ChunkSize int32 `protobuf:"varint,1,opt,name=chunk_size,json=chunkSize,proto3" json:"chunk_size,omitempty"` // 分块大小 + ChunkOverlap int32 `protobuf:"varint,2,opt,name=chunk_overlap,json=chunkOverlap,proto3" json:"chunk_overlap,omitempty"` // 分块重叠 + Separators []string `protobuf:"bytes,3,rep,name=separators,proto3" json:"separators,omitempty"` // 分隔符 + EnableMultimodal bool `protobuf:"varint,4,opt,name=enable_multimodal,json=enableMultimodal,proto3" json:"enable_multimodal,omitempty"` // 多模态处理 + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ReadConfig) Reset() { + *x = ReadConfig{} + mi := &file_docreader_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ReadConfig) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ReadConfig) ProtoMessage() {} + +func (x *ReadConfig) ProtoReflect() protoreflect.Message { + mi := &file_docreader_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ReadConfig.ProtoReflect.Descriptor instead. +func (*ReadConfig) Descriptor() ([]byte, []int) { + return file_docreader_proto_rawDescGZIP(), []int{0} +} + +func (x *ReadConfig) GetChunkSize() int32 { + if x != nil { + return x.ChunkSize + } + return 0 +} + +func (x *ReadConfig) GetChunkOverlap() int32 { + if x != nil { + return x.ChunkOverlap + } + return 0 +} + +func (x *ReadConfig) GetSeparators() []string { + if x != nil { + return x.Separators + } + return nil +} + +func (x *ReadConfig) GetEnableMultimodal() bool { + if x != nil { + return x.EnableMultimodal + } + return false +} + +// 从文件读取文档请求 +type ReadFromFileRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + FileContent []byte `protobuf:"bytes,1,opt,name=file_content,json=fileContent,proto3" json:"file_content,omitempty"` // 文件内容 + FileName string `protobuf:"bytes,2,opt,name=file_name,json=fileName,proto3" json:"file_name,omitempty"` // 文件名 + FileType string `protobuf:"bytes,3,opt,name=file_type,json=fileType,proto3" json:"file_type,omitempty"` // 文件类型 + ReadConfig *ReadConfig `protobuf:"bytes,4,opt,name=read_config,json=readConfig,proto3" json:"read_config,omitempty"` + RequestId string `protobuf:"bytes,5,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ReadFromFileRequest) Reset() { + *x = ReadFromFileRequest{} + mi := &file_docreader_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ReadFromFileRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ReadFromFileRequest) ProtoMessage() {} + +func (x *ReadFromFileRequest) ProtoReflect() protoreflect.Message { + mi := &file_docreader_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ReadFromFileRequest.ProtoReflect.Descriptor instead. +func (*ReadFromFileRequest) Descriptor() ([]byte, []int) { + return file_docreader_proto_rawDescGZIP(), []int{1} +} + +func (x *ReadFromFileRequest) GetFileContent() []byte { + if x != nil { + return x.FileContent + } + return nil +} + +func (x *ReadFromFileRequest) GetFileName() string { + if x != nil { + return x.FileName + } + return "" +} + +func (x *ReadFromFileRequest) GetFileType() string { + if x != nil { + return x.FileType + } + return "" +} + +func (x *ReadFromFileRequest) GetReadConfig() *ReadConfig { + if x != nil { + return x.ReadConfig + } + return nil +} + +func (x *ReadFromFileRequest) GetRequestId() string { + if x != nil { + return x.RequestId + } + return "" +} + +// 从URL读取文档请求 +type ReadFromURLRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + Url string `protobuf:"bytes,1,opt,name=url,proto3" json:"url,omitempty"` // 文档URL + Title string `protobuf:"bytes,2,opt,name=title,proto3" json:"title,omitempty"` // 标题 + ReadConfig *ReadConfig `protobuf:"bytes,3,opt,name=read_config,json=readConfig,proto3" json:"read_config,omitempty"` + RequestId string `protobuf:"bytes,4,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ReadFromURLRequest) Reset() { + *x = ReadFromURLRequest{} + mi := &file_docreader_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ReadFromURLRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ReadFromURLRequest) ProtoMessage() {} + +func (x *ReadFromURLRequest) ProtoReflect() protoreflect.Message { + mi := &file_docreader_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ReadFromURLRequest.ProtoReflect.Descriptor instead. +func (*ReadFromURLRequest) Descriptor() ([]byte, []int) { + return file_docreader_proto_rawDescGZIP(), []int{2} +} + +func (x *ReadFromURLRequest) GetUrl() string { + if x != nil { + return x.Url + } + return "" +} + +func (x *ReadFromURLRequest) GetTitle() string { + if x != nil { + return x.Title + } + return "" +} + +func (x *ReadFromURLRequest) GetReadConfig() *ReadConfig { + if x != nil { + return x.ReadConfig + } + return nil +} + +func (x *ReadFromURLRequest) GetRequestId() string { + if x != nil { + return x.RequestId + } + return "" +} + +// 图片信息 +type Image struct { + state protoimpl.MessageState `protogen:"open.v1"` + Url string `protobuf:"bytes,1,opt,name=url,proto3" json:"url,omitempty"` // 图片URL + Caption string `protobuf:"bytes,2,opt,name=caption,proto3" json:"caption,omitempty"` // 图片描述 + OcrText string `protobuf:"bytes,3,opt,name=ocr_text,json=ocrText,proto3" json:"ocr_text,omitempty"` // OCR提取的文本内容 + OriginalUrl string `protobuf:"bytes,4,opt,name=original_url,json=originalUrl,proto3" json:"original_url,omitempty"` // 原始图片URL + Start int32 `protobuf:"varint,5,opt,name=start,proto3" json:"start,omitempty"` // 图片在文本中的开始位置 + End int32 `protobuf:"varint,6,opt,name=end,proto3" json:"end,omitempty"` // 图片在文本中的结束位置 + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Image) Reset() { + *x = Image{} + mi := &file_docreader_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Image) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Image) ProtoMessage() {} + +func (x *Image) ProtoReflect() protoreflect.Message { + mi := &file_docreader_proto_msgTypes[3] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Image.ProtoReflect.Descriptor instead. +func (*Image) Descriptor() ([]byte, []int) { + return file_docreader_proto_rawDescGZIP(), []int{3} +} + +func (x *Image) GetUrl() string { + if x != nil { + return x.Url + } + return "" +} + +func (x *Image) GetCaption() string { + if x != nil { + return x.Caption + } + return "" +} + +func (x *Image) GetOcrText() string { + if x != nil { + return x.OcrText + } + return "" +} + +func (x *Image) GetOriginalUrl() string { + if x != nil { + return x.OriginalUrl + } + return "" +} + +func (x *Image) GetStart() int32 { + if x != nil { + return x.Start + } + return 0 +} + +func (x *Image) GetEnd() int32 { + if x != nil { + return x.End + } + return 0 +} + +type Chunk struct { + state protoimpl.MessageState `protogen:"open.v1"` + Content string `protobuf:"bytes,1,opt,name=content,proto3" json:"content,omitempty"` // 块内容 + Seq int32 `protobuf:"varint,2,opt,name=seq,proto3" json:"seq,omitempty"` // 块在文档中的次序 + Start int32 `protobuf:"varint,3,opt,name=start,proto3" json:"start,omitempty"` // 块在文档中的起始位置 + End int32 `protobuf:"varint,4,opt,name=end,proto3" json:"end,omitempty"` // 块在文档中的结束位置 + Images []*Image `protobuf:"bytes,5,rep,name=images,proto3" json:"images,omitempty"` // 块中包含的图片信息 + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Chunk) Reset() { + *x = Chunk{} + mi := &file_docreader_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Chunk) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Chunk) ProtoMessage() {} + +func (x *Chunk) ProtoReflect() protoreflect.Message { + mi := &file_docreader_proto_msgTypes[4] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Chunk.ProtoReflect.Descriptor instead. +func (*Chunk) Descriptor() ([]byte, []int) { + return file_docreader_proto_rawDescGZIP(), []int{4} +} + +func (x *Chunk) GetContent() string { + if x != nil { + return x.Content + } + return "" +} + +func (x *Chunk) GetSeq() int32 { + if x != nil { + return x.Seq + } + return 0 +} + +func (x *Chunk) GetStart() int32 { + if x != nil { + return x.Start + } + return 0 +} + +func (x *Chunk) GetEnd() int32 { + if x != nil { + return x.End + } + return 0 +} + +func (x *Chunk) GetImages() []*Image { + if x != nil { + return x.Images + } + return nil +} + +// 从URL读取文档响应 +type ReadResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + Chunks []*Chunk `protobuf:"bytes,1,rep,name=chunks,proto3" json:"chunks,omitempty"` // 文档分块 + Error string `protobuf:"bytes,2,opt,name=error,proto3" json:"error,omitempty"` // 错误信息 + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ReadResponse) Reset() { + *x = ReadResponse{} + mi := &file_docreader_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ReadResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ReadResponse) ProtoMessage() {} + +func (x *ReadResponse) ProtoReflect() protoreflect.Message { + mi := &file_docreader_proto_msgTypes[5] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ReadResponse.ProtoReflect.Descriptor instead. +func (*ReadResponse) Descriptor() ([]byte, []int) { + return file_docreader_proto_rawDescGZIP(), []int{5} +} + +func (x *ReadResponse) GetChunks() []*Chunk { + if x != nil { + return x.Chunks + } + return nil +} + +func (x *ReadResponse) GetError() string { + if x != nil { + return x.Error + } + return "" +} + +var File_docreader_proto protoreflect.FileDescriptor + +const file_docreader_proto_rawDesc = "" + + "\n" + + "\x0fdocreader.proto\x12\tdocreader\"\x9d\x01\n" + + "\n" + + "ReadConfig\x12\x1d\n" + + "\n" + + "chunk_size\x18\x01 \x01(\x05R\tchunkSize\x12#\n" + + "\rchunk_overlap\x18\x02 \x01(\x05R\fchunkOverlap\x12\x1e\n" + + "\n" + + "separators\x18\x03 \x03(\tR\n" + + "separators\x12+\n" + + "\x11enable_multimodal\x18\x04 \x01(\bR\x10enableMultimodal\"\xc9\x01\n" + + "\x13ReadFromFileRequest\x12!\n" + + "\ffile_content\x18\x01 \x01(\fR\vfileContent\x12\x1b\n" + + "\tfile_name\x18\x02 \x01(\tR\bfileName\x12\x1b\n" + + "\tfile_type\x18\x03 \x01(\tR\bfileType\x126\n" + + "\vread_config\x18\x04 \x01(\v2\x15.docreader.ReadConfigR\n" + + "readConfig\x12\x1d\n" + + "\n" + + "request_id\x18\x05 \x01(\tR\trequestId\"\x93\x01\n" + + "\x12ReadFromURLRequest\x12\x10\n" + + "\x03url\x18\x01 \x01(\tR\x03url\x12\x14\n" + + "\x05title\x18\x02 \x01(\tR\x05title\x126\n" + + "\vread_config\x18\x03 \x01(\v2\x15.docreader.ReadConfigR\n" + + "readConfig\x12\x1d\n" + + "\n" + + "request_id\x18\x04 \x01(\tR\trequestId\"\x99\x01\n" + + "\x05Image\x12\x10\n" + + "\x03url\x18\x01 \x01(\tR\x03url\x12\x18\n" + + "\acaption\x18\x02 \x01(\tR\acaption\x12\x19\n" + + "\bocr_text\x18\x03 \x01(\tR\aocrText\x12!\n" + + "\foriginal_url\x18\x04 \x01(\tR\voriginalUrl\x12\x14\n" + + "\x05start\x18\x05 \x01(\x05R\x05start\x12\x10\n" + + "\x03end\x18\x06 \x01(\x05R\x03end\"\x85\x01\n" + + "\x05Chunk\x12\x18\n" + + "\acontent\x18\x01 \x01(\tR\acontent\x12\x10\n" + + "\x03seq\x18\x02 \x01(\x05R\x03seq\x12\x14\n" + + "\x05start\x18\x03 \x01(\x05R\x05start\x12\x10\n" + + "\x03end\x18\x04 \x01(\x05R\x03end\x12(\n" + + "\x06images\x18\x05 \x03(\v2\x10.docreader.ImageR\x06images\"N\n" + + "\fReadResponse\x12(\n" + + "\x06chunks\x18\x01 \x03(\v2\x10.docreader.ChunkR\x06chunks\x12\x14\n" + + "\x05error\x18\x02 \x01(\tR\x05error2\x9f\x01\n" + + "\tDocReader\x12I\n" + + "\fReadFromFile\x12\x1e.docreader.ReadFromFileRequest\x1a\x17.docreader.ReadResponse\"\x00\x12G\n" + + "\vReadFromURL\x12\x1d.docreader.ReadFromURLRequest\x1a\x17.docreader.ReadResponse\"\x00B5Z3github.com/Tencent/WeKnora/internal/docreader/protob\x06proto3" + +var ( + file_docreader_proto_rawDescOnce sync.Once + file_docreader_proto_rawDescData []byte +) + +func file_docreader_proto_rawDescGZIP() []byte { + file_docreader_proto_rawDescOnce.Do(func() { + file_docreader_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_docreader_proto_rawDesc), len(file_docreader_proto_rawDesc))) + }) + return file_docreader_proto_rawDescData +} + +var file_docreader_proto_msgTypes = make([]protoimpl.MessageInfo, 6) +var file_docreader_proto_goTypes = []any{ + (*ReadConfig)(nil), // 0: docreader.ReadConfig + (*ReadFromFileRequest)(nil), // 1: docreader.ReadFromFileRequest + (*ReadFromURLRequest)(nil), // 2: docreader.ReadFromURLRequest + (*Image)(nil), // 3: docreader.Image + (*Chunk)(nil), // 4: docreader.Chunk + (*ReadResponse)(nil), // 5: docreader.ReadResponse +} +var file_docreader_proto_depIdxs = []int32{ + 0, // 0: docreader.ReadFromFileRequest.read_config:type_name -> docreader.ReadConfig + 0, // 1: docreader.ReadFromURLRequest.read_config:type_name -> docreader.ReadConfig + 3, // 2: docreader.Chunk.images:type_name -> docreader.Image + 4, // 3: docreader.ReadResponse.chunks:type_name -> docreader.Chunk + 1, // 4: docreader.DocReader.ReadFromFile:input_type -> docreader.ReadFromFileRequest + 2, // 5: docreader.DocReader.ReadFromURL:input_type -> docreader.ReadFromURLRequest + 5, // 6: docreader.DocReader.ReadFromFile:output_type -> docreader.ReadResponse + 5, // 7: docreader.DocReader.ReadFromURL:output_type -> docreader.ReadResponse + 6, // [6:8] is the sub-list for method output_type + 4, // [4:6] is the sub-list for method input_type + 4, // [4:4] is the sub-list for extension type_name + 4, // [4:4] is the sub-list for extension extendee + 0, // [0:4] is the sub-list for field type_name +} + +func init() { file_docreader_proto_init() } +func file_docreader_proto_init() { + if File_docreader_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_docreader_proto_rawDesc), len(file_docreader_proto_rawDesc)), + NumEnums: 0, + NumMessages: 6, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_docreader_proto_goTypes, + DependencyIndexes: file_docreader_proto_depIdxs, + MessageInfos: file_docreader_proto_msgTypes, + }.Build() + File_docreader_proto = out.File + file_docreader_proto_goTypes = nil + file_docreader_proto_depIdxs = nil +} diff --git a/services/docreader/src/proto/docreader.proto b/services/docreader/src/proto/docreader.proto new file mode 100644 index 0000000..6ca3d5c --- /dev/null +++ b/services/docreader/src/proto/docreader.proto @@ -0,0 +1,61 @@ +syntax = "proto3"; + +package docreader; + +option go_package = "github.com/Tencent/WeKnora/internal/docreader/proto"; + +// 文档读取服务 +service DocReader { + // 从文件读取文档 + rpc ReadFromFile(ReadFromFileRequest) returns (ReadResponse) {} + // 从URL读取文档 + rpc ReadFromURL(ReadFromURLRequest) returns (ReadResponse) {} +} + +message ReadConfig { + int32 chunk_size = 1; // 分块大小 + int32 chunk_overlap = 2; // 分块重叠 + repeated string separators = 3; // 分隔符 + bool enable_multimodal = 4; // 多模态处理 +} + +// 从文件读取文档请求 +message ReadFromFileRequest { + bytes file_content = 1; // 文件内容 + string file_name = 2; // 文件名 + string file_type = 3; // 文件类型 + ReadConfig read_config = 4; + string request_id = 5; +} + +// 从URL读取文档请求 +message ReadFromURLRequest { + string url = 1; // 文档URL + string title = 2; // 标题 + ReadConfig read_config = 3; + string request_id = 4; +} + +// 图片信息 +message Image { + string url = 1; // 图片URL + string caption = 2; // 图片描述 + string ocr_text = 3; // OCR提取的文本内容 + string original_url = 4; // 原始图片URL + int32 start = 5; // 图片在文本中的开始位置 + int32 end = 6; // 图片在文本中的结束位置 +} + +message Chunk { + string content = 1; // 块内容 + int32 seq = 2; // 块在文档中的次序 + int32 start = 3; // 块在文档中的起始位置 + int32 end = 4; // 块在文档中的结束位置 + repeated Image images = 5; // 块中包含的图片信息 +} + +// 从URL读取文档响应 +message ReadResponse { + repeated Chunk chunks = 1; // 文档分块 + string error = 2; // 错误信息 +} \ No newline at end of file diff --git a/services/docreader/src/proto/docreader_grpc.pb.go b/services/docreader/src/proto/docreader_grpc.pb.go new file mode 100644 index 0000000..79bd7ef --- /dev/null +++ b/services/docreader/src/proto/docreader_grpc.pb.go @@ -0,0 +1,167 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.5.1 +// - protoc v5.29.3 +// source: docreader.proto + +package proto + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.64.0 or later. +const _ = grpc.SupportPackageIsVersion9 + +const ( + DocReader_ReadFromFile_FullMethodName = "/docreader.DocReader/ReadFromFile" + DocReader_ReadFromURL_FullMethodName = "/docreader.DocReader/ReadFromURL" +) + +// DocReaderClient is the client API for DocReader service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +// +// 文档读取服务 +type DocReaderClient interface { + // 从文件读取文档 + ReadFromFile(ctx context.Context, in *ReadFromFileRequest, opts ...grpc.CallOption) (*ReadResponse, error) + // 从URL读取文档 + ReadFromURL(ctx context.Context, in *ReadFromURLRequest, opts ...grpc.CallOption) (*ReadResponse, error) +} + +type docReaderClient struct { + cc grpc.ClientConnInterface +} + +func NewDocReaderClient(cc grpc.ClientConnInterface) DocReaderClient { + return &docReaderClient{cc} +} + +func (c *docReaderClient) ReadFromFile(ctx context.Context, in *ReadFromFileRequest, opts ...grpc.CallOption) (*ReadResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(ReadResponse) + err := c.cc.Invoke(ctx, DocReader_ReadFromFile_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *docReaderClient) ReadFromURL(ctx context.Context, in *ReadFromURLRequest, opts ...grpc.CallOption) (*ReadResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(ReadResponse) + err := c.cc.Invoke(ctx, DocReader_ReadFromURL_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +// DocReaderServer is the server API for DocReader service. +// All implementations must embed UnimplementedDocReaderServer +// for forward compatibility. +// +// 文档读取服务 +type DocReaderServer interface { + // 从文件读取文档 + ReadFromFile(context.Context, *ReadFromFileRequest) (*ReadResponse, error) + // 从URL读取文档 + ReadFromURL(context.Context, *ReadFromURLRequest) (*ReadResponse, error) + mustEmbedUnimplementedDocReaderServer() +} + +// UnimplementedDocReaderServer must be embedded to have +// forward compatible implementations. +// +// NOTE: this should be embedded by value instead of pointer to avoid a nil +// pointer dereference when methods are called. +type UnimplementedDocReaderServer struct{} + +func (UnimplementedDocReaderServer) ReadFromFile(context.Context, *ReadFromFileRequest) (*ReadResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method ReadFromFile not implemented") +} +func (UnimplementedDocReaderServer) ReadFromURL(context.Context, *ReadFromURLRequest) (*ReadResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method ReadFromURL not implemented") +} +func (UnimplementedDocReaderServer) mustEmbedUnimplementedDocReaderServer() {} +func (UnimplementedDocReaderServer) testEmbeddedByValue() {} + +// UnsafeDocReaderServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to DocReaderServer will +// result in compilation errors. +type UnsafeDocReaderServer interface { + mustEmbedUnimplementedDocReaderServer() +} + +func RegisterDocReaderServer(s grpc.ServiceRegistrar, srv DocReaderServer) { + // If the following call pancis, it indicates UnimplementedDocReaderServer was + // embedded by pointer and is nil. This will cause panics if an + // unimplemented method is ever invoked, so we test this at initialization + // time to prevent it from happening at runtime later due to I/O. + if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { + t.testEmbeddedByValue() + } + s.RegisterService(&DocReader_ServiceDesc, srv) +} + +func _DocReader_ReadFromFile_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ReadFromFileRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(DocReaderServer).ReadFromFile(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: DocReader_ReadFromFile_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(DocReaderServer).ReadFromFile(ctx, req.(*ReadFromFileRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _DocReader_ReadFromURL_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ReadFromURLRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(DocReaderServer).ReadFromURL(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: DocReader_ReadFromURL_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(DocReaderServer).ReadFromURL(ctx, req.(*ReadFromURLRequest)) + } + return interceptor(ctx, in, info, handler) +} + +// DocReader_ServiceDesc is the grpc.ServiceDesc for DocReader service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var DocReader_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "docreader.DocReader", + HandlerType: (*DocReaderServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "ReadFromFile", + Handler: _DocReader_ReadFromFile_Handler, + }, + { + MethodName: "ReadFromURL", + Handler: _DocReader_ReadFromURL_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "docreader.proto", +} diff --git a/services/docreader/src/proto/docreader_pb2_grpc.py b/services/docreader/src/proto/docreader_pb2_grpc.py new file mode 100644 index 0000000..fd391bd --- /dev/null +++ b/services/docreader/src/proto/docreader_pb2_grpc.py @@ -0,0 +1,104 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc + +from . import docreader_pb2 as docreader__pb2 + + +class DocReaderStub(object): + """文档读取服务 + """ + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.ReadFromFile = channel.unary_unary( + '/docreader.DocReader/ReadFromFile', + request_serializer=docreader__pb2.ReadFromFileRequest.SerializeToString, + response_deserializer=docreader__pb2.ReadResponse.FromString, + ) + self.ReadFromURL = channel.unary_unary( + '/docreader.DocReader/ReadFromURL', + request_serializer=docreader__pb2.ReadFromURLRequest.SerializeToString, + response_deserializer=docreader__pb2.ReadResponse.FromString, + ) + + +class DocReaderServicer(object): + """文档读取服务 + """ + + def ReadFromFile(self, request, context): + """从文件读取文档 + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def ReadFromURL(self, request, context): + """从URL读取文档 + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_DocReaderServicer_to_server(servicer, server): + rpc_method_handlers = { + 'ReadFromFile': grpc.unary_unary_rpc_method_handler( + servicer.ReadFromFile, + request_deserializer=docreader__pb2.ReadFromFileRequest.FromString, + response_serializer=docreader__pb2.ReadResponse.SerializeToString, + ), + 'ReadFromURL': grpc.unary_unary_rpc_method_handler( + servicer.ReadFromURL, + request_deserializer=docreader__pb2.ReadFromURLRequest.FromString, + response_serializer=docreader__pb2.ReadResponse.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'docreader.DocReader', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + + + # This class is part of an EXPERIMENTAL API. +class DocReader(object): + """文档读取服务 + """ + + @staticmethod + def ReadFromFile(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/docreader.DocReader/ReadFromFile', + docreader__pb2.ReadFromFileRequest.SerializeToString, + docreader__pb2.ReadResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def ReadFromURL(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/docreader.DocReader/ReadFromURL', + docreader__pb2.ReadFromURLRequest.SerializeToString, + docreader__pb2.ReadResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/services/docreader/src/server/server.py b/services/docreader/src/server/server.py new file mode 100644 index 0000000..6e391ff --- /dev/null +++ b/services/docreader/src/server/server.py @@ -0,0 +1,312 @@ +import os +import sys +import logging +from concurrent import futures +import traceback +import grpc +import uuid + +# Add parent directory to Python path +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(current_dir) +if parent_dir not in sys.path: + sys.path.insert(0, parent_dir) + +from proto.docreader_pb2 import ReadResponse, Chunk, Image +from proto import docreader_pb2_grpc +from parser import Parser, ChunkingConfig, OCREngine +from utils.request import request_id_context, init_logging_request_id + +# Ensure no existing handlers +for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + +# Configure logging - use stdout +handler = logging.StreamHandler(sys.stdout) +logging.root.addHandler(handler) + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +logger.info("Initializing server logging") + +# Initialize request ID logging +init_logging_request_id() + +# Set max message size to 50MB +MAX_MESSAGE_LENGTH = 50 * 1024 * 1024 + + +parser = Parser() + +class DocReaderServicer(docreader_pb2_grpc.DocReaderServicer): + def __init__(self): + super().__init__() + self.parser = Parser() + + def ReadFromFile(self, request, context): + # Get or generate request ID + request_id = ( + request.request_id + if hasattr(request, "request_id") and request.request_id + else str(uuid.uuid4()) + ) + + # Use request ID context + with request_id_context(request_id): + try: + # Get file type + file_type = ( + request.file_type or os.path.splitext(request.file_name)[1][1:] + ) + logger.info( + f"Received ReadFromFile request for file: {request.file_name}, type: {file_type}" + ) + logger.info(f"File content size: {len(request.file_content)} bytes") + + # Create chunking config + chunk_size = request.read_config.chunk_size or 512 + chunk_overlap = request.read_config.chunk_overlap or 50 + separators = request.read_config.separators or ["\n\n", "\n", "。"] + enable_multimodal = request.read_config.enable_multimodal or False + + logger.info( + f"Using chunking config: size={chunk_size}, overlap={chunk_overlap}, " + f"multimodal={enable_multimodal}" + ) + + chunking_config = ChunkingConfig( + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + separators=separators, + enable_multimodal=enable_multimodal, + ) + + # Parse file + logger.info(f"Starting file parsing process") + result = self.parser.parse_file( + request.file_name, file_type, request.file_content, chunking_config + ) + + if not result: + error_msg = "Failed to parse file" + logger.error(error_msg) + context.set_code(grpc.StatusCode.INTERNAL) + context.set_details(error_msg) + return ReadResponse() + + # Convert to protobuf message + logger.info( + f"Successfully parsed file {request.file_name}, returning {len(result.chunks)} chunks" + ) + + # Build response, including image info + response = ReadResponse( + chunks=[self._convert_chunk_to_proto(chunk) for chunk in result.chunks] + ) + logger.info(f"Response size: {response.ByteSize()} bytes") + return response + + except Exception as e: + error_msg = f"Error reading file: {str(e)}" + logger.error(error_msg) + logger.info(f"Detailed traceback: {traceback.format_exc()}") + context.set_code(grpc.StatusCode.INTERNAL) + context.set_details(str(e)) + return ReadResponse(error=str(e)) + + def ReadFromURL(self, request, context): + # Get or generate request ID + request_id = ( + request.request_id + if hasattr(request, "request_id") and request.request_id + else str(uuid.uuid4()) + ) + + # Use request ID context + with request_id_context(request_id): + try: + logger.info(f"Received ReadFromURL request for URL: {request.url}") + + # Create chunking config + chunk_size = request.read_config.chunk_size or 512 + chunk_overlap = request.read_config.chunk_overlap or 50 + separators = request.read_config.separators or ["\n\n", "\n", "。"] + enable_multimodal = request.read_config.enable_multimodal or False + + logger.info( + f"Using chunking config: size={chunk_size}, overlap={chunk_overlap}, " + f"multimodal={enable_multimodal}" + ) + + chunking_config = ChunkingConfig( + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + separators=separators, + enable_multimodal=enable_multimodal, + ) + + # Parse URL + logger.info(f"Starting URL parsing process") + result = self.parser.parse_url(request.url, request.title, chunking_config) + if not result: + error_msg = "Failed to parse URL" + logger.error(error_msg) + context.set_code(grpc.StatusCode.INTERNAL) + context.set_details(error_msg) + return ReadResponse(error=error_msg) + + # Convert to protobuf message, including image info + logger.info( + f"Successfully parsed URL {request.url}, returning {len(result.chunks)} chunks" + ) + + response = ReadResponse( + chunks=[self._convert_chunk_to_proto(chunk) for chunk in result.chunks] + ) + logger.info(f"Response size: {response.ByteSize()} bytes") + return response + + except Exception as e: + error_msg = f"Error reading URL: {str(e)}" + logger.error(error_msg) + logger.info(f"Detailed traceback: {traceback.format_exc()}") + context.set_code(grpc.StatusCode.INTERNAL) + context.set_details(str(e)) + return ReadResponse(error=str(e)) + + def _convert_chunk_to_proto(self, chunk): + """Convert internal Chunk object to protobuf Chunk message""" + proto_chunk = Chunk( + content=chunk.content, + seq=chunk.seq, + start=chunk.start, + end=chunk.end, + ) + + # If chunk has images attribute and is not empty, add image info + if hasattr(chunk, "images") and chunk.images: + logger.info(f"Adding {len(chunk.images)} images to chunk {chunk.seq}") + for img_info in chunk.images: + proto_image = Image( + url=img_info.get("cos_url", ""), + caption=img_info.get("caption", ""), + ocr_text=img_info.get("ocr_text", ""), + original_url=img_info.get("original_url", ""), + start=img_info.get("start", 0), + end=img_info.get("end", 0) + ) + proto_chunk.images.append(proto_image) + + return proto_chunk + +def init_ocr_engine(ocr_backend, ocr_config): + """Initialize OCR engine""" + try: + logger.info(f"Initializing OCR engine with backend: {ocr_backend}") + ocr_engine = OCREngine.get_instance(backend_type=ocr_backend, **ocr_config) + if ocr_engine: + logger.info("OCR engine initialized successfully") + return True + else: + logger.error("OCR engine initialization failed") + return False + except Exception as e: + logger.error(f"Error initializing OCR engine: {str(e)}") + return False + +def serve(): + init_ocr_engine(os.getenv("OCR_BACKEND", "paddle"), { + "OCR_API_BASE_URL": os.getenv("OCR_API_BASE_URL", ""), + }) + # Set max number of worker threads and processes + max_workers = int(os.environ.get("GRPC_MAX_WORKERS", "4")) + worker_processes = int(os.environ.get("GRPC_WORKER_PROCESSES", str(os.cpu_count() or 1))) + logger.info(f"Starting DocReader service, max worker threads per process: {max_workers}, " + f"processes: {worker_processes}") + + # Get port number + port = os.environ.get("GRPC_PORT", "50051") + + # Multi-process mode + if worker_processes > 1: + import multiprocessing + processes = [] + + def run_server(): + # Create server + server = grpc.server( + futures.ThreadPoolExecutor(max_workers=max_workers), + options=[ + ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH), + ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH), + ], + ) + + # Register service + docreader_pb2_grpc.add_DocReaderServicer_to_server(DocReaderServicer(), server) + + # Set listen address + server.add_insecure_port(f"[::]:{port}") + + # Start service + server.start() + + logger.info(f"Worker process {os.getpid()} started on port {port}") + + try: + # Wait for service termination + server.wait_for_termination() + except KeyboardInterrupt: + logger.info(f"Worker process {os.getpid()} received termination signal") + server.stop(0) + + # Start specified number of worker processes + for i in range(worker_processes): + process = multiprocessing.Process(target=run_server) + processes.append(process) + process.start() + logger.info(f"Started worker process {process.pid} ({i+1}/{worker_processes})") + + # Wait for all processes to complete + try: + for process in processes: + process.join() + except KeyboardInterrupt: + logger.info("Master process received termination signal") + for process in processes: + if process.is_alive(): + logger.info(f"Terminating worker process {process.pid}") + process.terminate() + + # Single-process mode + else: + # Create server + server = grpc.server( + futures.ThreadPoolExecutor(max_workers=max_workers), + options=[ + ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH), + ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH), + ], + ) + + # Register service + docreader_pb2_grpc.add_DocReaderServicer_to_server(DocReaderServicer(), server) + + # Set listen address + server.add_insecure_port(f"[::]:{port}") + + # Start service + server.start() + + logger.info(f"Server started on port {port} (single process mode)") + logger.info("Server is ready to accept connections") + + try: + # Wait for service termination + server.wait_for_termination() + except KeyboardInterrupt: + logger.info("Received termination signal, shutting down server") + server.stop(0) + +if __name__ == "__main__": + serve() diff --git a/services/docreader/src/testdata/images/test_text.png b/services/docreader/src/testdata/images/test_text.png new file mode 100644 index 0000000..2d3dd01 Binary files /dev/null and b/services/docreader/src/testdata/images/test_text.png differ diff --git a/services/docreader/src/testdata/test.html b/services/docreader/src/testdata/test.html new file mode 100644 index 0000000..f0cce25 --- /dev/null +++ b/services/docreader/src/testdata/test.html @@ -0,0 +1,56 @@ +<!DOCTYPE html> +<html lang="zh-CN"> +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <title>测试 HTML 文档 + + +

测试 HTML 文档

+ +

这是一个测试 HTML 文档,用于测试 HTML 解析功能。

+ +

包含图片

+ 测试图片 + +

包含链接

+

这是一个测试链接

+ +

包含代码块

+

+def hello_world():
+    print("Hello, World!")
+    
+ +

包含表格

+ + + + + + + + + + + + + + + + + +
表头1表头2
内容1内容2
内容3内容4
+ +

测试分块功能

+

这部分内容用于测试分块功能,确保 HTML 结构在分块时保持完整。

+
    +
  • 第一块内容
  • +
  • 第二块内容
  • +
  • 第三块内容
  • +
+ +

测试重叠功能

+

这部分内容可能会在分块时与前后块重叠,以确保上下文的连续性。

+ + \ No newline at end of file diff --git a/services/docreader/src/testdata/test.md b/services/docreader/src/testdata/test.md new file mode 100644 index 0000000..bc51c4b --- /dev/null +++ b/services/docreader/src/testdata/test.md @@ -0,0 +1,37 @@ +# 测试 Markdown 文档 + +这是一个测试 Markdown 文档,用于测试 Markdown 解析功能。 + +## 包含图片 + +![测试图片](https://geektutu.com/post/quick-go-protobuf/go-protobuf.jpg) + +## 包含链接 + +这是一个[测试链接](https://example.com)。 + +## 包含代码块 + +```python +def hello_world(): + print("Hello, World!") +``` + +## 包含表格 + +| 表头1 | 表头2 | +|-------|-------| +| 内容1 | 内容2 | +| 内容3 | 内容4 | + +## 测试分块功能 + +这部分内容用于测试分块功能,确保 Markdown 结构在分块时保持完整。 + +- 第一块内容 +- 第二块内容 +- 第三块内容 + +## 测试重叠功能 + +这部分内容可能会在分块时与前后块重叠,以确保上下文的连续性。 \ No newline at end of file diff --git a/services/docreader/src/testdata/test.txt b/services/docreader/src/testdata/test.txt new file mode 100644 index 0000000..bf6be04 --- /dev/null +++ b/services/docreader/src/testdata/test.txt @@ -0,0 +1,16 @@ +这是一个测试文档 +包含多行内容 +用于测试文档解析功能 + +这个文档包含以下内容: +1. 基本文本内容 +2. 多行段落 +3. 列表项 + +测试分块功能: +- 第一块内容 +- 第二块内容 +- 第三块内容 + +测试重叠功能: +这部分内容可能会在分块时与前后块重叠,以确保上下文的连续性。 \ No newline at end of file diff --git a/services/docreader/src/testdata/test_download.txt b/services/docreader/src/testdata/test_download.txt new file mode 100644 index 0000000..67bca7a --- /dev/null +++ b/services/docreader/src/testdata/test_download.txt @@ -0,0 +1,19 @@ +这是一个测试文档 +包含多行内容 +用于测试文档解析功能 + +这个文档包含以下内容: +1. 基本文本内容 +2. 多行段落 +3. 列表项 + +测试分块功能: +- 第一块内容 +- 第二块内容 +- 第三块内容 + +测试重叠功能: +这部分内容可能会在分块时与前后块重叠,以确保上下文的连续性。 + + +test \ No newline at end of file diff --git a/services/docreader/src/utils/__init__.py b/services/docreader/src/utils/__init__.py new file mode 100644 index 0000000..f1c7702 --- /dev/null +++ b/services/docreader/src/utils/__init__.py @@ -0,0 +1,83 @@ +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import re +import logging + +# 配置日志 +logger = logging.getLogger(__name__) + + +def singleton(cls, *args, **kw): + instances = {} + + def _singleton(): + key = str(cls) + str(os.getpid()) + if key not in instances: + logger.info(f"Creating new singleton instance with key: {key}") + instances[key] = cls(*args, **kw) + else: + logger.info(f"Returning existing singleton instance with key: {key}") + return instances[key] + + return _singleton + + +def rmSpace(txt): + logger.info(f"Removing spaces from text of length: {len(txt)}") + txt = re.sub(r"([^a-z0-9.,\)>]) +([^ ])", r"\1\2", txt, flags=re.IGNORECASE) + return re.sub(r"([^ ]) +([^a-z0-9.,\(<])", r"\1\2", txt, flags=re.IGNORECASE) + + +def findMaxDt(fnm): + m = "1970-01-01 00:00:00" + logger.info(f"Finding maximum date in file: {fnm}") + try: + with open(fnm, "r") as f: + while True: + l = f.readline() + if not l: + break + l = l.strip("\n") + if l == "nan": + continue + if l > m: + m = l + logger.info(f"Maximum date found: {m}") + except Exception as e: + logger.error(f"Error reading file {fnm} for max date: {str(e)}") + return m + + +def findMaxTm(fnm): + m = 0 + logger.info(f"Finding maximum time in file: {fnm}") + try: + with open(fnm, "r") as f: + while True: + l = f.readline() + if not l: + break + l = l.strip("\n") + if l == "nan": + continue + if int(l) > m: + m = int(l) + logger.info(f"Maximum time found: {m}") + except Exception as e: + logger.error(f"Error reading file {fnm} for max time: {str(e)}") + return m diff --git a/services/docreader/src/utils/request.py b/services/docreader/src/utils/request.py new file mode 100644 index 0000000..867892c --- /dev/null +++ b/services/docreader/src/utils/request.py @@ -0,0 +1,149 @@ +from contextvars import ContextVar +import logging +import uuid +import contextlib +import time +from typing import Optional +from logging import LogRecord + +# 配置日志 +logger = logging.getLogger(__name__) + +# 定义上下文变量 +request_id_var = ContextVar("request_id", default=None) +_request_start_time_ctx = ContextVar("request_start_time", default=None) + + +def set_request_id(request_id: str) -> None: + """设置当前上下文的请求ID""" + request_id_var.set(request_id) + + +def get_request_id() -> Optional[str]: + """获取当前上下文的请求ID""" + return request_id_var.get() + + +class MillisecondFormatter(logging.Formatter): + """自定义日志格式化器,只显示毫秒级时间戳(3位数字)而不是微秒(6位)""" + + def formatTime(self, record, datefmt=None): + """重写formatTime方法,将微秒格式化为毫秒""" + # 先获取标准的格式化时间 + result = super().formatTime(record, datefmt) + + # 如果使用了包含.%f的格式,则将微秒(6位)截断为毫秒(3位) + if datefmt and ".%f" in datefmt: + # 格式化的时间字符串应该在最后有6位微秒数 + parts = result.split('.') + if len(parts) > 1 and len(parts[1]) >= 6: + # 只保留前3位作为毫秒 + millis = parts[1][:3] + result = f"{parts[0]}.{millis}" + + return result + + +def init_logging_request_id(): + """ + Initialize logging to include request ID in log messages. + Add the custom filter to all existing handlers + """ + logger.info("Initializing request ID logging") + root_logger = logging.getLogger() + + # 添加自定义过滤器到所有处理器 + for handler in root_logger.handlers: + # 添加请求ID过滤器 + handler.addFilter(RequestIdFilter()) + + # 更新格式化器以包含请求ID,调整格式使其更紧凑整齐 + formatter = logging.Formatter( + fmt="%(asctime)s.%(msecs)03d [%(request_id)s] %(levelname)-5s %(name)-20s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + handler.setFormatter(formatter) + + logger.info( + f"Updated {len(root_logger.handlers)} handlers with request ID formatting" + ) + + # 如果没有处理器,添加一个标准输出处理器 + if not root_logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter( + fmt="%(asctime)s.%(msecs)03d [%(request_id)s] %(levelname)-5s %(name)-20s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + handler.setFormatter(formatter) + handler.addFilter(RequestIdFilter()) + root_logger.addHandler(handler) + logger.info("Added new StreamHandler with request ID formatting") + + +class RequestIdFilter(logging.Filter): + """Filter that adds request ID to log messages""" + + def filter(self, record: LogRecord) -> bool: + request_id = request_id_var.get() + if request_id is not None: + # 为日志记录添加请求ID属性,使用短格式 + if len(request_id) > 8: + # 截取ID的前8个字符,确保显示整齐 + short_id = request_id[:8] + if "-" in request_id: + # 尝试保留格式,例如 test-req-1-XXX + parts = request_id.split("-") + if len(parts) >= 3: + # 如果格式是 xxx-xxx-n-randompart + short_id = f"{parts[0]}-{parts[1]}-{parts[2]}" + record.request_id = short_id + else: + record.request_id = request_id + + # 添加执行时间属性 + start_time = _request_start_time_ctx.get() + if start_time is not None: + elapsed_ms = int((time.time() - start_time) * 1000) + record.elapsed_ms = elapsed_ms + # 添加执行时间到消息中 + if not hasattr(record, "message_with_elapsed"): + record.message_with_elapsed = True + record.msg = f"{record.msg} (elapsed: {elapsed_ms}ms)" + else: + # 如果没有请求ID,使用占位符 + record.request_id = "no-req-id" + + return True + + +@contextlib.contextmanager +def request_id_context(request_id: str = None): + """Context manager that sets a request ID for the current context + + Args: + request_id: 要使用的请求ID,如果为None则自动生成 + + Example: + with request_id_context("req-123"): + # 在这个代码块中的所有日志都会包含请求ID req-123 + logging.info("Processing request") + """ + # Generate or use provided request ID + req_id = request_id or str(uuid.uuid4()) + + # Set start time and request ID + start_time = time.time() + req_token = request_id_var.set(req_id) + time_token = _request_start_time_ctx.set(start_time) + + logger.info(f"Starting new request with ID: {req_id}") + + try: + yield request_id_var.get() + finally: + # Log completion and reset context vars + elapsed_ms = int((time.time() - start_time) * 1000) + logger.info(f"Request {req_id} completed in {elapsed_ms}ms") + request_id_var.reset(req_token) + _request_start_time_ctx.reset(time_token) diff --git a/services/docreader/supervisord.conf b/services/docreader/supervisord.conf new file mode 100644 index 0000000..e0e2c63 --- /dev/null +++ b/services/docreader/supervisord.conf @@ -0,0 +1,32 @@ +[supervisord] +nodaemon=true +logfile=/var/log/supervisord.log +logfile_maxbytes=50MB +logfile_backups=10 +loglevel=info +pidfile=/var/run/supervisord.pid +user=root + +[program:docreader] +command=/bin/sh -c "python /app/src/server/server.py 2>&1" +directory=/app +autostart=true +autorestart=true +startretries=5 +startsecs=5 +priority=10 +redirect_stderr=true +stdout_logfile=/var/log/docreader.log +stdout_logfile_maxbytes=50MB +stdout_logfile_backups=10 +environment=PYTHONPATH="/app/src" + +[unix_http_server] +file=/var/run/supervisor.sock +chmod=0700 + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[supervisorctl] +serverurl=unix:///var/run/supervisor.sock