mirror of
https://github.com/Tencent/WeKnora.git
synced 2025-11-25 03:15:00 +08:00
feat: 新增网页解析类,优化依赖及图片编码支持
This commit is contained in:
@@ -71,6 +71,7 @@ class BaseParser(ABC):
|
||||
max_concurrent_tasks: int = 5, # Max concurrent tasks
|
||||
max_chunks: int = 1000, # Max number of returned chunks
|
||||
chunking_config: Optional[ChunkingConfig] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize parser
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)
|
||||
class MarkdownImageUtil:
|
||||
def __init__(self):
|
||||
self.b64_pattern = re.compile(
|
||||
r"!\[([^\]]*)\]\(data:image/(\w+);base64,([^\)]+)\)"
|
||||
r"!\[([^\]]*)\]\(data:image/(\w+)\+?\w*;base64,([^\)]+)\)"
|
||||
)
|
||||
self.image_pattern = re.compile(r"!\[([^\]]*)\]\(([^)]+)\)")
|
||||
self.replace_pattern = re.compile(r"!\[([^\]]*)\]\(([^)]+)\)")
|
||||
|
||||
@@ -1,19 +1,20 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from playwright.async_api import async_playwright
|
||||
from trafilatura import extract
|
||||
|
||||
from docreader.models.document import Document
|
||||
from docreader.parser.base_parser import BaseParser
|
||||
from docreader.parser.chain_parser import PipelineParser
|
||||
from docreader.parser.markdown_parser import MarkdownParser
|
||||
from docreader.utils import endecode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WebParser(BaseParser):
|
||||
class StdWebParser(BaseParser):
|
||||
"""Web page parser"""
|
||||
|
||||
def __init__(self, title: str, **kwargs):
|
||||
@@ -22,7 +23,7 @@ class WebParser(BaseParser):
|
||||
super().__init__(file_name=title, **kwargs)
|
||||
logger.info(f"Initialized WebParser with title: {title}")
|
||||
|
||||
async def scrape(self, url: str) -> Any:
|
||||
async def scrape(self, url: str) -> str:
|
||||
logger.info(f"Starting web page scraping for URL: {url}")
|
||||
try:
|
||||
async with async_playwright() as p:
|
||||
@@ -40,9 +41,7 @@ class WebParser(BaseParser):
|
||||
except Exception as e:
|
||||
logger.error(f"Error navigating to URL: {str(e)}")
|
||||
await browser.close()
|
||||
return BeautifulSoup(
|
||||
"", "html.parser"
|
||||
) # Return empty soup on navigation error
|
||||
return ""
|
||||
|
||||
logger.info("Retrieving page HTML content")
|
||||
content = await page.content()
|
||||
@@ -53,14 +52,13 @@ class WebParser(BaseParser):
|
||||
|
||||
# Parse HTML content with BeautifulSoup
|
||||
logger.info("Parsing HTML with BeautifulSoup")
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
logger.info("Successfully parsed HTML content")
|
||||
return soup
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to scrape web page: {str(e)}")
|
||||
# Return empty BeautifulSoup object on error
|
||||
return BeautifulSoup("", "html.parser")
|
||||
return ""
|
||||
|
||||
def parse_into_text(self, content: bytes) -> Document:
|
||||
"""Parse web page
|
||||
@@ -71,63 +69,36 @@ class WebParser(BaseParser):
|
||||
Returns:
|
||||
Parse result
|
||||
"""
|
||||
logger.info("Starting web page parsing")
|
||||
url = endecode.decode_bytes(content)
|
||||
|
||||
# Call async method synchronously
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
logger.info(f"Scraping web page: {url}")
|
||||
chtml = asyncio.run(self.scrape(url))
|
||||
md_text = extract(
|
||||
chtml,
|
||||
output_format="markdown",
|
||||
with_metadata=True,
|
||||
include_images=True,
|
||||
include_tables=True,
|
||||
include_links=True,
|
||||
deduplicate=True,
|
||||
)
|
||||
if not md_text:
|
||||
logger.error("Failed to parse web page")
|
||||
return Document(content=f"Error parsing web page: {url}")
|
||||
return Document(content=md_text)
|
||||
|
||||
try:
|
||||
# Run async method
|
||||
# Handle content possibly being a string
|
||||
if isinstance(content, bytes):
|
||||
url = endecode.decode_bytes(content)
|
||||
logger.info(f"Decoded URL from bytes: {url}")
|
||||
else:
|
||||
url = str(content)
|
||||
logger.info(f"Using content as URL directly: {url}")
|
||||
|
||||
logger.info(f"Scraping web page: {url}")
|
||||
soup = loop.run_until_complete(self.scrape(url))
|
||||
class WebParser(PipelineParser):
|
||||
_parser_cls = (StdWebParser, MarkdownParser)
|
||||
|
||||
# Extract page text
|
||||
logger.info("Extracting text from web page")
|
||||
text = soup.get_text("\n")
|
||||
logger.info(f"Extracted {len(text)} characters of text from URL: {url}")
|
||||
|
||||
# Get title, usually in <title> or <h1> tag
|
||||
if self.title != "":
|
||||
title = self.title
|
||||
logger.info(f"Using provided title: {title}")
|
||||
else:
|
||||
title = soup.title.string if soup.title else None
|
||||
logger.info(f"Found title tag: {title}")
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
if not title: # If <title> tag does not exist or is empty, try <h1> tag
|
||||
h1_tag = soup.find("h1")
|
||||
if h1_tag:
|
||||
title = h1_tag.get_text()
|
||||
logger.info(f"Using h1 tag as title: {title}")
|
||||
else:
|
||||
title = "Untitled Web Page"
|
||||
logger.info("No title found, using default")
|
||||
url = "https://cloud.tencent.com/document/product/457/6759"
|
||||
|
||||
logger.info(f"Web page title: {title}")
|
||||
text = "\n".join(
|
||||
(line.strip() for line in text.splitlines() if line.strip())
|
||||
)
|
||||
|
||||
result = title + "\n\n" + text
|
||||
logger.info(
|
||||
f"Web page parsing complete, total content: {len(result)} characters"
|
||||
)
|
||||
return Document(content=result)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing web page: {str(e)}")
|
||||
return Document(content=f"Error parsing web page: {str(e)}")
|
||||
|
||||
finally:
|
||||
# Close event loop
|
||||
logger.info("Closing event loop")
|
||||
loop.close()
|
||||
parser = WebParser(title="")
|
||||
cc = parser.parse_into_text(url.encode())
|
||||
with open("./tencent.md", "w") as f:
|
||||
f.write(cc.content)
|
||||
|
||||
@@ -33,5 +33,6 @@ dependencies = [
|
||||
"python-docx>=1.2.0",
|
||||
"requests>=2.32.5",
|
||||
"textract==1.5.0",
|
||||
"trafilatura>=2.0.0",
|
||||
"urllib3>=2.5.0",
|
||||
]
|
||||
|
||||
124
docreader/uv.lock
generated
124
docreader/uv.lock
generated
@@ -214,6 +214,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f6/22/91616fe707a5c5510de2cac9b046a30defe7007ba8a0c04f9c08f27df312/audioop_lts-0.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:b492c3b040153e68b9fdaff5913305aaaba5bb433d8a7f73d5cf6a64ed3cc1dd", size = 25206, upload-time = "2025-08-05T16:43:16.444Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "babel"
|
||||
version = "2.17.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/7d/6b/d52e42361e1aa00709585ecc30b3f9684b3ab62530771402248b1b1d6240/babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d", size = 9951852, upload-time = "2025-02-01T15:17:41.026Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537, upload-time = "2025-02-01T15:17:37.39Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
version = "4.14.2"
|
||||
@@ -474,6 +483,20 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ab/c8/c9c156aa3bc7caba9b4f8a2b6abec3da6263215988f3fec0ea843f137a10/cos_python_sdk_v5-1.9.38-py3-none-any.whl", hash = "sha256:1d3dd3be2bd992b2e9c2dcd018e2596aa38eab022dbc86b4a5d14c8fc88370e6", size = 92601, upload-time = "2025-08-17T05:12:30.867Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "courlan"
|
||||
version = "1.3.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "babel" },
|
||||
{ name = "tld" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/6f/54/6d6ceeff4bed42e7a10d6064d35ee43a810e7b3e8beb4abeae8cff4713ae/courlan-1.3.2.tar.gz", hash = "sha256:0b66f4db3a9c39a6e22dd247c72cfaa57d68ea660e94bb2c84ec7db8712af190", size = 206382, upload-time = "2024-10-29T16:40:20.994Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/8e/ca/6a667ccbe649856dcd3458bab80b016681b274399d6211187c6ab969fc50/courlan-1.3.2-py3-none-any.whl", hash = "sha256:d0dab52cf5b5b1000ee2839fbc2837e93b2514d3cb5bb61ae158a55b7a04c6be", size = 33848, upload-time = "2024-10-29T16:40:18.325Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crcmod"
|
||||
version = "1.7"
|
||||
@@ -613,6 +636,21 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/18/d5/7a04640bf559bb890455ffb28978daf7d44f667c3f04a4d422c655c1ba92/cython-3.1.6-py3-none-any.whl", hash = "sha256:91dcf7eb9b6a089ce4e9e1140e571d84c3bca834afb77ec269be7aa9d31a8157", size = 1223550, upload-time = "2025-10-23T12:38:16.732Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dateparser"
|
||||
version = "1.2.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "python-dateutil" },
|
||||
{ name = "pytz" },
|
||||
{ name = "regex" },
|
||||
{ name = "tzlocal" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a9/30/064144f0df1749e7bb5faaa7f52b007d7c2d08ec08fed8411aba87207f68/dateparser-1.2.2.tar.gz", hash = "sha256:986316f17cb8cdc23ea8ce563027c5ef12fc725b6fb1d137c14ca08777c5ecf7", size = 329840, upload-time = "2025-06-26T09:29:23.211Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/87/22/f020c047ae1346613db9322638186468238bcfa8849b4668a22b97faad65/dateparser-1.2.2-py3-none-any.whl", hash = "sha256:5a5d7211a09013499867547023a2a0c91d5a27d15dd4dbcea676ea9fe66f2482", size = 315453, upload-time = "2025-06-26T09:29:21.412Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "defusedxml"
|
||||
version = "0.7.1"
|
||||
@@ -664,6 +702,7 @@ dependencies = [
|
||||
{ name = "python-docx" },
|
||||
{ name = "requests" },
|
||||
{ name = "textract" },
|
||||
{ name = "trafilatura" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
|
||||
@@ -697,6 +736,7 @@ requires-dist = [
|
||||
{ name = "python-docx", specifier = ">=1.2.0" },
|
||||
{ name = "requests", specifier = ">=2.32.5" },
|
||||
{ name = "textract", specifier = "==1.5.0" },
|
||||
{ name = "trafilatura", specifier = ">=2.0.0" },
|
||||
{ name = "urllib3", specifier = ">=2.5.0" },
|
||||
]
|
||||
|
||||
@@ -1100,6 +1140,22 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "htmldate"
|
||||
version = "1.9.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "charset-normalizer" },
|
||||
{ name = "dateparser" },
|
||||
{ name = "lxml" },
|
||||
{ name = "python-dateutil" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/9d/10/ead9dabc999f353c3aa5d0dc0835b1e355215a5ecb489a7f4ef2ddad5e33/htmldate-1.9.4.tar.gz", hash = "sha256:1129063e02dd0354b74264de71e950c0c3fcee191178321418ccad2074cc8ed0", size = 44690, upload-time = "2025-11-04T17:46:44.983Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/bd/adfcdaaad5805c0c5156aeefd64c1e868c05e9c1cd6fd21751f168cd88c7/htmldate-1.9.4-py3-none-any.whl", hash = "sha256:1b94bcc4e08232a5b692159903acf95548b6a7492dddca5bb123d89d6325921c", size = 31558, upload-time = "2025-11-04T17:46:43.258Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpcore"
|
||||
version = "1.0.9"
|
||||
@@ -1275,6 +1331,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/e8/685f47e0d754320684db4425a0967f7d3fa70126bffd76110b7009a0090f/joblib-1.5.2-py3-none-any.whl", hash = "sha256:4e1f0bdbb987e6d843c70cf43714cb276623def372df3c22fe5266b2670bc241", size = 308396, upload-time = "2025-08-27T12:15:45.188Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "justext"
|
||||
version = "3.0.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "lxml", extra = ["html-clean"] },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/49/f3/45890c1b314f0d04e19c1c83d534e611513150939a7cf039664d9ab1e649/justext-3.0.2.tar.gz", hash = "sha256:13496a450c44c4cd5b5a75a5efcd9996066d2a189794ea99a49949685a0beb05", size = 828521, upload-time = "2025-02-25T20:21:49.934Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/ac/52f4e86d1924a7fc05af3aeb34488570eccc39b4af90530dd6acecdf16b5/justext-3.0.2-py2.py3-none-any.whl", hash = "sha256:62b1c562b15c3c6265e121cc070874243a443bfd53060e869393f09d6b6cc9a7", size = 837940, upload-time = "2025-02-25T20:21:44.179Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "langdetect"
|
||||
version = "1.0.9"
|
||||
@@ -1465,6 +1533,23 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/77/d7f491cbc05303ac6801651aabeb262d43f319288c1ea96c66b1d2692ff3/lxml-6.0.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:27220da5be049e936c3aca06f174e8827ca6445a4353a1995584311487fc4e3e", size = 3518768, upload-time = "2025-09-22T04:04:57.097Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
html-clean = [
|
||||
{ name = "lxml-html-clean" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lxml-html-clean"
|
||||
version = "0.4.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "lxml" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d9/cb/c9c5bb2a9c47292e236a808dd233a03531f53b626f36259dcd32b49c76da/lxml_html_clean-0.4.3.tar.gz", hash = "sha256:c9df91925b00f836c807beab127aac82575110eacff54d0a75187914f1bd9d8c", size = 21498, upload-time = "2025-10-02T20:49:24.895Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/10/4a/63a9540e3ca73709f4200564a737d63a4c8c9c4dd032bab8535f507c190a/lxml_html_clean-0.4.3-py3-none-any.whl", hash = "sha256:63fd7b0b9c3a2e4176611c2ca5d61c4c07ffca2de76c14059a81a2825833731e", size = 14177, upload-time = "2025-10-02T20:49:23.749Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "magika"
|
||||
version = "0.6.3"
|
||||
@@ -3531,6 +3616,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e6/5e/56c751afab61336cf0e7aa671b134255a30f15f59cd9e04f59c598a37ff5/tifffile-2025.10.16-py3-none-any.whl", hash = "sha256:41463d979c1c262b0a5cdef2a7f95f0388a072ad82d899458b154a48609d759c", size = 231162, upload-time = "2025-10-16T22:56:07.214Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tld"
|
||||
version = "0.13.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/df/a1/5723b07a70c1841a80afc9ac572fdf53488306848d844cd70519391b0d26/tld-0.13.1.tar.gz", hash = "sha256:75ec00936cbcf564f67361c41713363440b6c4ef0f0c1592b5b0fbe72c17a350", size = 462000, upload-time = "2025-05-21T22:18:29.341Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/dc/70/b2f38360c3fc4bc9b5e8ef429e1fde63749144ac583c2dbdf7e21e27a9ad/tld-0.13.1-py2.py3-none-any.whl", hash = "sha256:a2d35109433ac83486ddf87e3c4539ab2c5c2478230e5d9c060a18af4b03aa7c", size = 274718, upload-time = "2025-05-21T22:18:25.811Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tqdm"
|
||||
version = "4.67.1"
|
||||
@@ -3543,6 +3637,24 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "trafilatura"
|
||||
version = "2.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "certifi" },
|
||||
{ name = "charset-normalizer" },
|
||||
{ name = "courlan" },
|
||||
{ name = "htmldate" },
|
||||
{ name = "justext" },
|
||||
{ name = "lxml" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/06/25/e3ebeefdebfdfae8c4a4396f5a6ea51fc6fa0831d63ce338e5090a8003dc/trafilatura-2.0.0.tar.gz", hash = "sha256:ceb7094a6ecc97e72fea73c7dba36714c5c5b577b6470e4520dca893706d6247", size = 253404, upload-time = "2024-12-03T15:23:24.16Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/8a/b6/097367f180b6383a3581ca1b86fcae284e52075fa941d1232df35293363c/trafilatura-2.0.0-py3-none-any.whl", hash = "sha256:77eb5d1e993747f6f20938e1de2d840020719735690c840b9a1024803a4cd51d", size = 132557, upload-time = "2024-12-03T15:23:21.41Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.15.0"
|
||||
@@ -3573,6 +3685,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tzlocal"
|
||||
version = "5.3.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "tzdata", marker = "sys_platform == 'win32'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd", size = 30761, upload-time = "2025-03-05T21:17:41.549Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d", size = 18026, upload-time = "2025-03-05T21:17:39.857Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unidic-lite"
|
||||
version = "1.0.8"
|
||||
|
||||
Reference in New Issue
Block a user