Give up on app-i18n/ibus-kkc

Signed-off-by: Shin'ya Minazuki <shinyoukai@laidback.moe>
2026-04-16 11:31:47 -03:00
parent 10423f8793
commit 2e070de620
5 changed files with 0 additions and 361 deletions
--- a/app-i18n/libkkc-data/Manifest
+++ b/app-i18n/libkkc-data/Manifest
@@ -1 +0,0 @@
-DIST libkkc-data-0.2.7.tar.xz 22262552 BLAKE2B 2c735ee9fabf8f8f201591c9ed584cece22ddcd15da5f36b39bb422b1bce1dbcbcd66f71b5713e2dd4c5e2862b06b014c24a4a3db63c86ecee20519434da9261 SHA512 61c0cd8c0fa41ed8df49cac6709eebb245cc965d7e192b1ba945e95f2fc46aca8aa48c16e1977a12c157c55dab6b9f4c30f4905806725eca6e697b762eb7cbd7
--- a/app-i18n/libkkc-data/files/genfilter.py
+++ b/app-i18n/libkkc-data/files/genfilter.py
@@ -1,121 +0,0 @@
-#!/usr/bin/python3.13
-
-# Copyright (C) 2011-2013 Daiki Ueno <ueno@gnu.org>
-# Copyright (C) 2011-2013 Red Hat, Inc.
-
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-import os
-import mmap
-import math
-import struct
-
-ERROR_RATE = 0.25
-
-def murmur_hash3_32(b0, b1, seed):
-    h1 = seed
-
-    c1 = 0xcc9e2d51
-    c2 = 0x1b873593
-
-    # body: b0
-    b0 *= c1
-    b0 &= 0xFFFFFFFF
-    b0 = (b0 << 15) | (b0 >> (32 - 15))
-    b0 &= 0xFFFFFFFF
-    b0 *= c2
-    b0 &= 0xFFFFFFFF
-
-    h1 ^= b0
-    h1 &= 0xFFFFFFFF
-    h1 = (h1 << 13) | (h1 >> (32 - 13)) 
-    h1 &= 0xFFFFFFFF
-    h1 = h1 * 5 + 0xe6546b64
-    h1 &= 0xFFFFFFFF
-
-    # body: b1
-    b1 *= c1
-    b1 &= 0xFFFFFFFF
-    b1 = (b1 << 15) | (b1 >> (32 - 15))
-    b1 &= 0xFFFFFFFF
-    b1 *= c2
-    b1 &= 0xFFFFFFFF
-
-    h1 ^= b1
-    h1 &= 0xFFFFFFFF
-    h1 = (h1 << 13) | (h1 >> (32 - 13)) 
-    h1 &= 0xFFFFFFFF
-    h1 = h1 * 5 + 0xe6546b64
-    h1 &= 0xFFFFFFFF
-
-    # No tail processing needed.
-
-    # fmix
-    h1 ^= 8
-    h1 &= 0xFFFFFFFF
-    h1 ^= h1 >> 16
-    h1 &= 0xFFFFFFFF
-    h1 *= 0x85ebca6b
-    h1 &= 0xFFFFFFFF
-    h1 ^= h1 >> 13
-    h1 &= 0xFFFFFFFF
-    h1 *= 0xc2b2ae35
-    h1 &= 0xFFFFFFFF
-    h1 ^= h1 >> 16
-    h1 &= 0xFFFFFFFF
-    return h1
-
-class FilterGenerator(object):
-    def __init__(self, infile, outfile, record_size):
-        self.infile = infile
-        self.outfile = outfile
-        self.record_size = record_size
-
-    def generate(self):
-        size = os.fstat(self.infile.fileno()).st_size
-        n = size / self.record_size
-        m = int(math.ceil(-n*math.log10(ERROR_RATE) /
-                          math.pow(math.log10(2), 2)))
-        m = (m/8 + 1)*8
-        inmem = mmap.mmap(self.infile.fileno(),
-                          size,
-                          access=mmap.ACCESS_READ)
-        outmem = bytearray(m/8)
-        for i in range(0, n):
-            offset = i*self.record_size
-            b0, b1 = struct.unpack("=LL", inmem[offset:offset+8])
-            for k in range(0, 4):
-                h = murmur_hash3_32(b0, b1, k)
-                h = int(h * (m / float(0xFFFFFFFF)))
-                outmem[h/8] |= (1 << (h%8))
-        inmem.close()
-        # Convert bytearray to str, for Python 2.6 compatibility.
-        self.outfile.write(str(outmem))
-
-if __name__ == '__main__':
-    import sys
-    import argparse
-
-    parser = argparse.ArgumentParser(description='filter')
-    parser.add_argument('infile', type=argparse.FileType('r'),
-                        help='input file')
-    parser.add_argument('outfile', type=argparse.FileType('w'),
-                        help='output file')
-    parser.add_argument('record_size', type=int,
-                        help='record size')
-    args = parser.parse_args()
-    generator = FilterGenerator(args.infile,
-                                args.outfile,
-                                args.record_size)
-    generator.generate()
--- a/app-i18n/libkkc-data/files/sortlm.py
+++ b/app-i18n/libkkc-data/files/sortlm.py
@@ -1,188 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (C) 2011-2013 Daiki Ueno <ueno@gnu.org>
-# Copyright (C) 2011-2013 Red Hat, Inc.
-
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-import struct
-import marisa
-import re
-
-NGRAM = 3
-NGRAM_LINE_REGEX = '^([-0-9.]+)[ \t]+([^\t]+?)(?:[ \t]+([-0-9.]+))?$'
-
-class SortedGenerator(object):
-    def __init__(self, infile, output_prefix):
-        self.__infile = infile
-        self.__output_prefix = output_prefix
-        self.__ngram_line_regex = re.compile(NGRAM_LINE_REGEX)
-
-        self.__ngram_entries = [{} for x in range(0, NGRAM)]
-
-        self.__vocab_keyset = marisa.Keyset()
-        self.__input_keyset = marisa.Keyset()
-
-        self.__vocab_trie = marisa.Trie()
-        self.__input_trie = marisa.Trie()
-
-        self.__min_cost = 0.0
-
-    def read(self):
-        print("reading N-grams")
-        self.__read_tries()
-        self.__read_ngrams()
-        print("min cost = %lf" % self.__min_cost)
-
-    def __read_tries(self):
-        while True:
-            line = self.__infile.readline()
-            if line == "":
-                break
-            if line.startswith("\\1-grams"):
-                break
-
-        unigram_count = 0
-        while True:
-            line = self.__infile.readline()
-            if line == "":
-                break
-            line = line.strip()
-            if line == "":
-                break
-            match = self.__ngram_line_regex.match(line)
-            if not match:
-                continue
-            strv = match.groups()
-            self.__vocab_keyset.push_back(strv[1])
-            if not strv[1] in ("<s>", "</s>", "<UNK>"):
-                if "/" not in strv[1]:
-                    continue
-                (input, output) = strv[1].split("/")
-                self.__input_keyset.push_back(input)
-
-        self.__vocab_trie.build(self.__vocab_keyset)
-        self.__input_trie.build(self.__input_keyset)
-
-    def __read_ngrams(self):
-        self.__infile.seek(0)
-        for n in range(1, NGRAM + 1):
-            while True:
-                line = self.__infile.readline()
-                if line == "":
-                    break
-                if line.startswith("\\%s-grams:" % n):
-                    break
-
-            while True:
-                line = self.__infile.readline()
-                if line == "":
-                    break
-                line = line.strip()
-                if line == "":
-                    break
-                match = self.__ngram_line_regex.match(line)
-                if not match:
-                    continue
-                strv = match.groups()
-                ngram = strv[1].split(" ")
-                ids = []
-                for word in ngram:
-                    agent = marisa.Agent()
-                    agent.set_query(word)
-                    if not self.__vocab_trie.lookup(agent):
-                        continue
-                    ids.append(agent.key_id())
-                cost = float(strv[0])
-                if cost != -99 and cost < self.__min_cost:
-                    self.__min_cost = cost
-                backoff = 0.0
-                if strv[2]:
-                    backoff = float(strv[2])
-                self.__ngram_entries[n - 1][tuple(ids)] = (cost, backoff)
-
-    def write(self):
-        self.__min_cost = -8.0
-        self.__write_tries()
-        self.__write_ngrams()
-
-    def __write_tries(self):
-        self.__vocab_trie.save(self.__output_prefix + ".1gram.index")
-        self.__input_trie.save(self.__output_prefix + ".input")
-
-    def __write_ngrams(self):
-        def quantize(cost, min_cost):
-            return max(0, min(65535, int(cost * 65535 / min_cost)))
-
-        def cmp_header(a, b):
-            return cmp(a[0], b[0])
-
-        print("writing 1-gram file")
-        unigram_offsets = {}
-        unigram_file = open("%s.1gram" % self.__output_prefix, "wb")
-        offset = 0
-        for ids, value in sorted(self.__ngram_entries[0].items()):
-            unigram_offsets[ids[0]] = offset
-            s = struct.pack("=HHH",
-                            quantize(value[0], self.__min_cost),
-                            quantize(value[1], self.__min_cost),
-                            0   # reserved
-                            )
-            unigram_file.write(s)
-            offset += 1
-        unigram_file.close()
-
-        print("writing 2-gram file")
-        bigram_offsets = {}
-        bigram_file = open("%s.2gram" % self.__output_prefix, "wb")
-        keys = list(self.__ngram_entries[1].keys())
-        items = [(struct.pack("=LL", ids[1], unigram_offsets[ids[0]]), ids) for ids in keys]
-        offset = 0
-        for header, ids in sorted(items, cmp=cmp_header):
-            value = self.__ngram_entries[1][ids]
-            bigram_offsets[ids] = offset
-            s = struct.pack("=HH",
-                            quantize(value[0], self.__min_cost),
-                            quantize(value[1], self.__min_cost))
-            bigram_file.write(header + s)
-            offset += 1
-        bigram_file.close()
-
-        if len(self.__ngram_entries[2]) > 0:
-            print("writing 3-gram file")
-            trigram_file = open("%s.3gram" % self.__output_prefix, "wb")
-            keys = list(self.__ngram_entries[2].keys())
-            items = [(struct.pack("=LL", ids[2], bigram_offsets[(ids[0], ids[1])]), ids) for ids in keys]
-            for header, ids in sorted(items, cmp=cmp_header):
-                value = self.__ngram_entries[2][ids]
-                s = struct.pack("=H",
-                                quantize(value[0], self.__min_cost))
-                trigram_file.write(header + s)
-            trigram_file.close()
-
-if __name__ == '__main__':
-    import sys
-    import argparse
-
-    parser = argparse.ArgumentParser(description='sortlm')
-    parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
-                        default=sys.stdin,
-                        help='language model file')
-    parser.add_argument('output_prefix', metavar='OUTPUT_PREFIX', type=str,
-                        help='output file prefix')
-    args = parser.parse_args()
-
-    generator = SortedGenerator(args.infile, args.output_prefix)
-    generator.read();
-    generator.write();
--- a/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild.bak
+++ b/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild.bak
@@ -1,40 +0,0 @@
-# Copyright 2026 Shin'ya Minazuki
-EAPI=8
-
-PYTHON_COMPAT=( python3_{11..12} )
-
-inherit autotools python-single-r1
-
-LIBKKC_PV="0.3.5"
-
-DESCRIPTION="Language model data for app-i18n/libkkc"
-HOMEPAGE="https://github.com/ueno/libkkc"
-SRC_URI="https://github.com/ueno/libkkc/releases/download/v${LIBKKC_PV}/${P}.tar.xz"
-
-LICENSE="GPL-3+"
-SLOT="0"
-KEYWORDS="~amd64"
-
-REQUIRED_USE="${PYTHON_REQUIRED_USE}"
-
-DEPEND="
-	dev-libs/marisa[python]
-"
-RDEPEND="${DEPEND}"
-
-src_prepare() {
-	default
-	cp -f "${FILESDIR}/genfilter.py" "${S}/tools/genfilter.py"
-	cp -f "${FILESDIR}/sortlm.py" "${S}/tools/sortlm.py"
-	python_fix_shebang tools/genfilter.py
-	python_fix_shebang tools/sortlm.py
-	eautoreconf
-}
-
-src_configure() {
-	econf
-}
-
-src_install() {
-	emake DESTDIR="${D}" install
-}
--- a/app-i18n/libkkc-data/metadata.xml
+++ b/app-i18n/libkkc-data/metadata.xml
@@ -1,11 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE pkgmetadata SYSTEM "https://www.gentoo.org/dtd/metadata.dtd">
-<pkgmetadata>
-	<maintainer type="person">
-		<email>shinyoukai@laidback.moe</email>
-		<name>Shin'ya Minazuki</name>
-	</maintainer>
-	<upstream>
-		<remote-id type="github">ueno/libkkc</remote-id>
-	</upstream>
-</pkgmetadata>
				`@@ -1 +0,0 @@`
				`DIST libkkc-data-0.2.7.tar.xz 22262552 BLAKE2B 2c735ee9fabf8f8f201591c9ed584cece22ddcd15da5f36b39bb422b1bce1dbcbcd66f71b5713e2dd4c5e2862b06b014c24a4a3db63c86ecee20519434da9261 SHA512 61c0cd8c0fa41ed8df49cac6709eebb245cc965d7e192b1ba945e95f2fc46aca8aa48c16e1977a12c157c55dab6b9f4c30f4905806725eca6e697b762eb7cbd7`