From 2e070de620b93d89f2b1d80f0f247d01df55d003 Mon Sep 17 00:00:00 2001 From: Shin'ya Minazuki Date: Thu, 16 Apr 2026 11:31:47 -0300 Subject: [PATCH] Give up on app-i18n/ibus-kkc Signed-off-by: Shin'ya Minazuki --- app-i18n/libkkc-data/Manifest | 1 - app-i18n/libkkc-data/files/genfilter.py | 121 ----------- app-i18n/libkkc-data/files/sortlm.py | 188 ------------------ .../libkkc-data/libkkc-data-0.2.7.ebuild.bak | 40 ---- app-i18n/libkkc-data/metadata.xml | 11 - 5 files changed, 361 deletions(-) delete mode 100644 app-i18n/libkkc-data/Manifest delete mode 100644 app-i18n/libkkc-data/files/genfilter.py delete mode 100644 app-i18n/libkkc-data/files/sortlm.py delete mode 100644 app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild.bak delete mode 100644 app-i18n/libkkc-data/metadata.xml diff --git a/app-i18n/libkkc-data/Manifest b/app-i18n/libkkc-data/Manifest deleted file mode 100644 index c0fc91c..0000000 --- a/app-i18n/libkkc-data/Manifest +++ /dev/null @@ -1 +0,0 @@ -DIST libkkc-data-0.2.7.tar.xz 22262552 BLAKE2B 2c735ee9fabf8f8f201591c9ed584cece22ddcd15da5f36b39bb422b1bce1dbcbcd66f71b5713e2dd4c5e2862b06b014c24a4a3db63c86ecee20519434da9261 SHA512 61c0cd8c0fa41ed8df49cac6709eebb245cc965d7e192b1ba945e95f2fc46aca8aa48c16e1977a12c157c55dab6b9f4c30f4905806725eca6e697b762eb7cbd7 diff --git a/app-i18n/libkkc-data/files/genfilter.py b/app-i18n/libkkc-data/files/genfilter.py deleted file mode 100644 index a6db99d..0000000 --- a/app-i18n/libkkc-data/files/genfilter.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/python3.13 - -# Copyright (C) 2011-2013 Daiki Ueno -# Copyright (C) 2011-2013 Red Hat, Inc. - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -import os -import mmap -import math -import struct - -ERROR_RATE = 0.25 - -def murmur_hash3_32(b0, b1, seed): - h1 = seed - - c1 = 0xcc9e2d51 - c2 = 0x1b873593 - - # body: b0 - b0 *= c1 - b0 &= 0xFFFFFFFF - b0 = (b0 << 15) | (b0 >> (32 - 15)) - b0 &= 0xFFFFFFFF - b0 *= c2 - b0 &= 0xFFFFFFFF - - h1 ^= b0 - h1 &= 0xFFFFFFFF - h1 = (h1 << 13) | (h1 >> (32 - 13)) - h1 &= 0xFFFFFFFF - h1 = h1 * 5 + 0xe6546b64 - h1 &= 0xFFFFFFFF - - # body: b1 - b1 *= c1 - b1 &= 0xFFFFFFFF - b1 = (b1 << 15) | (b1 >> (32 - 15)) - b1 &= 0xFFFFFFFF - b1 *= c2 - b1 &= 0xFFFFFFFF - - h1 ^= b1 - h1 &= 0xFFFFFFFF - h1 = (h1 << 13) | (h1 >> (32 - 13)) - h1 &= 0xFFFFFFFF - h1 = h1 * 5 + 0xe6546b64 - h1 &= 0xFFFFFFFF - - # No tail processing needed. - - # fmix - h1 ^= 8 - h1 &= 0xFFFFFFFF - h1 ^= h1 >> 16 - h1 &= 0xFFFFFFFF - h1 *= 0x85ebca6b - h1 &= 0xFFFFFFFF - h1 ^= h1 >> 13 - h1 &= 0xFFFFFFFF - h1 *= 0xc2b2ae35 - h1 &= 0xFFFFFFFF - h1 ^= h1 >> 16 - h1 &= 0xFFFFFFFF - return h1 - -class FilterGenerator(object): - def __init__(self, infile, outfile, record_size): - self.infile = infile - self.outfile = outfile - self.record_size = record_size - - def generate(self): - size = os.fstat(self.infile.fileno()).st_size - n = size / self.record_size - m = int(math.ceil(-n*math.log10(ERROR_RATE) / - math.pow(math.log10(2), 2))) - m = (m/8 + 1)*8 - inmem = mmap.mmap(self.infile.fileno(), - size, - access=mmap.ACCESS_READ) - outmem = bytearray(m/8) - for i in range(0, n): - offset = i*self.record_size - b0, b1 = struct.unpack("=LL", inmem[offset:offset+8]) - for k in range(0, 4): - h = murmur_hash3_32(b0, b1, k) - h = int(h * (m / float(0xFFFFFFFF))) - outmem[h/8] |= (1 << (h%8)) - inmem.close() - # Convert bytearray to str, for Python 2.6 compatibility. - self.outfile.write(str(outmem)) - -if __name__ == '__main__': - import sys - import argparse - - parser = argparse.ArgumentParser(description='filter') - parser.add_argument('infile', type=argparse.FileType('r'), - help='input file') - parser.add_argument('outfile', type=argparse.FileType('w'), - help='output file') - parser.add_argument('record_size', type=int, - help='record size') - args = parser.parse_args() - generator = FilterGenerator(args.infile, - args.outfile, - args.record_size) - generator.generate() \ No newline at end of file diff --git a/app-i18n/libkkc-data/files/sortlm.py b/app-i18n/libkkc-data/files/sortlm.py deleted file mode 100644 index 3390611..0000000 --- a/app-i18n/libkkc-data/files/sortlm.py +++ /dev/null @@ -1,188 +0,0 @@ -#!/usr/bin/python - -# Copyright (C) 2011-2013 Daiki Ueno -# Copyright (C) 2011-2013 Red Hat, Inc. - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -import struct -import marisa -import re - -NGRAM = 3 -NGRAM_LINE_REGEX = '^([-0-9.]+)[ \t]+([^\t]+?)(?:[ \t]+([-0-9.]+))?$' - -class SortedGenerator(object): - def __init__(self, infile, output_prefix): - self.__infile = infile - self.__output_prefix = output_prefix - self.__ngram_line_regex = re.compile(NGRAM_LINE_REGEX) - - self.__ngram_entries = [{} for x in range(0, NGRAM)] - - self.__vocab_keyset = marisa.Keyset() - self.__input_keyset = marisa.Keyset() - - self.__vocab_trie = marisa.Trie() - self.__input_trie = marisa.Trie() - - self.__min_cost = 0.0 - - def read(self): - print("reading N-grams") - self.__read_tries() - self.__read_ngrams() - print("min cost = %lf" % self.__min_cost) - - def __read_tries(self): - while True: - line = self.__infile.readline() - if line == "": - break - if line.startswith("\\1-grams"): - break - - unigram_count = 0 - while True: - line = self.__infile.readline() - if line == "": - break - line = line.strip() - if line == "": - break - match = self.__ngram_line_regex.match(line) - if not match: - continue - strv = match.groups() - self.__vocab_keyset.push_back(strv[1]) - if not strv[1] in ("", "", ""): - if "/" not in strv[1]: - continue - (input, output) = strv[1].split("/") - self.__input_keyset.push_back(input) - - self.__vocab_trie.build(self.__vocab_keyset) - self.__input_trie.build(self.__input_keyset) - - def __read_ngrams(self): - self.__infile.seek(0) - for n in range(1, NGRAM + 1): - while True: - line = self.__infile.readline() - if line == "": - break - if line.startswith("\\%s-grams:" % n): - break - - while True: - line = self.__infile.readline() - if line == "": - break - line = line.strip() - if line == "": - break - match = self.__ngram_line_regex.match(line) - if not match: - continue - strv = match.groups() - ngram = strv[1].split(" ") - ids = [] - for word in ngram: - agent = marisa.Agent() - agent.set_query(word) - if not self.__vocab_trie.lookup(agent): - continue - ids.append(agent.key_id()) - cost = float(strv[0]) - if cost != -99 and cost < self.__min_cost: - self.__min_cost = cost - backoff = 0.0 - if strv[2]: - backoff = float(strv[2]) - self.__ngram_entries[n - 1][tuple(ids)] = (cost, backoff) - - def write(self): - self.__min_cost = -8.0 - self.__write_tries() - self.__write_ngrams() - - def __write_tries(self): - self.__vocab_trie.save(self.__output_prefix + ".1gram.index") - self.__input_trie.save(self.__output_prefix + ".input") - - def __write_ngrams(self): - def quantize(cost, min_cost): - return max(0, min(65535, int(cost * 65535 / min_cost))) - - def cmp_header(a, b): - return cmp(a[0], b[0]) - - print("writing 1-gram file") - unigram_offsets = {} - unigram_file = open("%s.1gram" % self.__output_prefix, "wb") - offset = 0 - for ids, value in sorted(self.__ngram_entries[0].items()): - unigram_offsets[ids[0]] = offset - s = struct.pack("=HHH", - quantize(value[0], self.__min_cost), - quantize(value[1], self.__min_cost), - 0 # reserved - ) - unigram_file.write(s) - offset += 1 - unigram_file.close() - - print("writing 2-gram file") - bigram_offsets = {} - bigram_file = open("%s.2gram" % self.__output_prefix, "wb") - keys = list(self.__ngram_entries[1].keys()) - items = [(struct.pack("=LL", ids[1], unigram_offsets[ids[0]]), ids) for ids in keys] - offset = 0 - for header, ids in sorted(items, cmp=cmp_header): - value = self.__ngram_entries[1][ids] - bigram_offsets[ids] = offset - s = struct.pack("=HH", - quantize(value[0], self.__min_cost), - quantize(value[1], self.__min_cost)) - bigram_file.write(header + s) - offset += 1 - bigram_file.close() - - if len(self.__ngram_entries[2]) > 0: - print("writing 3-gram file") - trigram_file = open("%s.3gram" % self.__output_prefix, "wb") - keys = list(self.__ngram_entries[2].keys()) - items = [(struct.pack("=LL", ids[2], bigram_offsets[(ids[0], ids[1])]), ids) for ids in keys] - for header, ids in sorted(items, cmp=cmp_header): - value = self.__ngram_entries[2][ids] - s = struct.pack("=H", - quantize(value[0], self.__min_cost)) - trigram_file.write(header + s) - trigram_file.close() - -if __name__ == '__main__': - import sys - import argparse - - parser = argparse.ArgumentParser(description='sortlm') - parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), - default=sys.stdin, - help='language model file') - parser.add_argument('output_prefix', metavar='OUTPUT_PREFIX', type=str, - help='output file prefix') - args = parser.parse_args() - - generator = SortedGenerator(args.infile, args.output_prefix) - generator.read(); - generator.write(); \ No newline at end of file diff --git a/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild.bak b/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild.bak deleted file mode 100644 index b964a14..0000000 --- a/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild.bak +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2026 Shin'ya Minazuki -EAPI=8 - -PYTHON_COMPAT=( python3_{11..12} ) - -inherit autotools python-single-r1 - -LIBKKC_PV="0.3.5" - -DESCRIPTION="Language model data for app-i18n/libkkc" -HOMEPAGE="https://github.com/ueno/libkkc" -SRC_URI="https://github.com/ueno/libkkc/releases/download/v${LIBKKC_PV}/${P}.tar.xz" - -LICENSE="GPL-3+" -SLOT="0" -KEYWORDS="~amd64" - -REQUIRED_USE="${PYTHON_REQUIRED_USE}" - -DEPEND=" - dev-libs/marisa[python] -" -RDEPEND="${DEPEND}" - -src_prepare() { - default - cp -f "${FILESDIR}/genfilter.py" "${S}/tools/genfilter.py" - cp -f "${FILESDIR}/sortlm.py" "${S}/tools/sortlm.py" - python_fix_shebang tools/genfilter.py - python_fix_shebang tools/sortlm.py - eautoreconf -} - -src_configure() { - econf -} - -src_install() { - emake DESTDIR="${D}" install -} diff --git a/app-i18n/libkkc-data/metadata.xml b/app-i18n/libkkc-data/metadata.xml deleted file mode 100644 index 0b62c7a..0000000 --- a/app-i18n/libkkc-data/metadata.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - shinyoukai@laidback.moe - Shin'ya Minazuki - - - ueno/libkkc - -