PyPI - tokmor - Versions diffs - 1.2.9__py3-none-any.whl - Mend

tokmor 1.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

tokmor/__init__.py +77 -0
tokmor/api.py +194 -0
tokmor/assets.py +365 -0
tokmor/base.py +238 -0
tokmor/brahmic.py +516 -0
tokmor/cjk.py +497 -0
tokmor/domain/__init__.py +11 -0
tokmor/domain/sentiment.py +198 -0
tokmor/factory.py +394 -0
tokmor/indic.py +289 -0
tokmor/inventory.py +51 -0
tokmor/legacy_api.py +143 -0
tokmor/lemma_store.py +102 -0
tokmor/lookup_keys.py +145 -0
tokmor/models/domain/sentiment/en.json +54 -0
tokmor/models/domain/sentiment/ko.json +52 -0
tokmor/models/seg_lexicon/km_wordfreq.pkl +0 -0
tokmor/models/seg_lexicon/km_wordlist.pkl +0 -0
tokmor/models/seg_lexicon/lo_wordfreq.pkl +0 -0
tokmor/models/seg_lexicon/lo_wordlist.pkl +0 -0
tokmor/models/seg_lexicon/my_wordfreq.pkl +0 -0
tokmor/models/seg_lexicon/my_wordlist.pkl +0 -0
tokmor/models/seg_lexicon/th_wordfreq.pkl +0 -0
tokmor/models/seg_lexicon/th_wordlist.pkl +0 -0
tokmor/models/seg_lexicon/zh_extra_dict.json +35 -0
tokmor/models/seg_lexicon/zh_wordfreq.pkl +0 -0
tokmor/morphology/__init__.py +395 -0
tokmor/morphology/advanced_base.py +472 -0
tokmor/morphology/arabic_advanced.py +247 -0
tokmor/morphology/chinese.py +736 -0
tokmor/morphology/chinese_advanced.py +425 -0
tokmor/morphology/english.py +315 -0
tokmor/morphology/english_advanced.py +560 -0
tokmor/morphology/french_advanced.py +237 -0
tokmor/morphology/german_advanced.py +343 -0
tokmor/morphology/hindi_advanced.py +258 -0
tokmor/morphology/japanese.py +417 -0
tokmor/morphology/japanese_advanced.py +589 -0
tokmor/morphology/korean.py +534 -0
tokmor/morphology/korean_advanced.py +603 -0
tokmor/morphology/russian_advanced.py +217 -0
tokmor/morphology/spanish_advanced.py +226 -0
tokmor/morphology/templates/__init__.py +32 -0
tokmor/morphology/templates/arabic_script_template.py +162 -0
tokmor/morphology/templates/brahmic_template.py +181 -0
tokmor/morphology/templates/cyrillic_template.py +168 -0
tokmor/morphology/templates/latin_template.py +235 -0
tokmor/morphology/templates/other_scripts_template.py +475 -0
tokmor/morphology/thai_native.py +274 -0
tokmor/morphology/tier2.py +477 -0
tokmor/morphology/tier3.py +449 -0
tokmor/morphology/tier4.py +410 -0
tokmor/morphology/unified.py +855 -0
tokmor/morphology/universal_fallback.py +398 -0
tokmor/ner_prep.py +747 -0
tokmor/offline.py +89 -0
tokmor/preprocess.py +80 -0
tokmor/resources.py +288 -0
tokmor/routing.py +147 -0
tokmor/rtl.py +309 -0
tokmor/schema.py +17 -0
tokmor/sns_tags.py +281 -0
tokmor/space_based.py +272 -0
tokmor/token_quality.py +1185 -0
tokmor/unified_tokens.py +228 -0
tokmor-1.2.9.dist-info/METADATA +103 -0
tokmor-1.2.9.dist-info/RECORD +70 -0
tokmor-1.2.9.dist-info/WHEEL +5 -0
tokmor-1.2.9.dist-info/licenses/LICENSE +22 -0
tokmor-1.2.9.dist-info/top_level.txt +1 -0

tokmor/morphology/english_advanced.py ADDED Viewed

@@ -0,0 +1,560 @@
+"""
+English Advanced Morphological Analyzer
+=======================================
+5가지 고급 기능을 지원하는 영어 형태소 분석기
+Features:
+1. NER Gazetteer Integration - 개체명 경계 보존
+2. Real-time Dictionary Extension - 런타임 사전 확장
+3. Domain Adaptation - 도메인별 분석 최적화
+4. Code-switching - 다국어 혼용 텍스트 처리
+5. N-best Analysis - 다중 후보 + 신뢰도 점수
+"""
+import re
+import json
+from pathlib import Path
+from typing import List, Tuple, Dict, Set, Optional, Any
+from .advanced_base import (
+    AdvancedMorphologicalAnalyzer, Morpheme, AnalysisResult, NBestResult, Domain
+)
+# 확장 사전 경로
+from .. import resources
+# Optional external asset dir (default: none). If you want extended dictionaries,
+# provide them under: TOKMOR_DATA_DIR/extended_dict/{lang}_extended.json
+DICT_DIR = resources.data_dir() / "extended_dict"
+class EnglishAdvancedAnalyzer(AdvancedMorphologicalAnalyzer):
+    """
+    영어 고급 형태소 분석기
+    Usage:
+        analyzer = EnglishAdvancedAnalyzer()
+        # 기본 분석
+        result = analyzer.analyze("Apple announced new products")
+        # 개체명 보존
+        analyzer.add_entity("Apple", "ORG")
+        result = analyzer.analyze("Apple announced", preserve_entities=True)
+        # 도메인 적응
+        result = analyzer.analyze("apple", domain="food")  # fruit
+        result = analyzer.analyze("apple", domain="tech")  # company
+        # N-best 분석
+        result = analyzer.analyze("bank", n_best=3)
+    """
+    LANG_CODE = "en"
+    LANG_NAME = "English"
+    # 토큰 패턴
+    WORD_PATTERN = re.compile(r"[a-zA-Z]+(?:'[a-zA-Z]+)?")
+    NUMBER_PATTERN = re.compile(r'[0-9]+(?:\.[0-9]+)?')
+    def __init__(self):
+        super().__init__()
+    def _build_base_dictionary(self):
+        """기본 사전 구축"""
+        # =================================================================
+        # Irregular Verbs (불규칙 동사)
+        # =================================================================
+        self.irregular_verbs = {
+            # Past tense
+            'went': 'go', 'gone': 'go', 'goes': 'go',
+            'saw': 'see', 'seen': 'see', 'sees': 'see',
+            'came': 'come', 'comes': 'come',
+            'took': 'take', 'taken': 'take', 'takes': 'take',
+            'made': 'make', 'makes': 'make',
+            'said': 'say', 'says': 'say',
+            'got': 'get', 'gotten': 'get', 'gets': 'get',
+            'knew': 'know', 'known': 'know', 'knows': 'know',
+            'thought': 'think', 'thinks': 'think',
+            'found': 'find', 'finds': 'find',
+            'gave': 'give', 'given': 'give', 'gives': 'give',
+            'told': 'tell', 'tells': 'tell',
+            'became': 'become', 'becomes': 'become',
+            'left': 'leave', 'leaves': 'leave',
+            'felt': 'feel', 'feels': 'feel',
+            'brought': 'bring', 'brings': 'bring',
+            'began': 'begin', 'begun': 'begin', 'begins': 'begin',
+            'kept': 'keep', 'keeps': 'keep',
+            'held': 'hold', 'holds': 'hold',
+            'wrote': 'write', 'written': 'write', 'writes': 'write',
+            'stood': 'stand', 'stands': 'stand',
+            'heard': 'hear', 'hears': 'hear',
+            'let': 'let', 'lets': 'let',
+            'meant': 'mean', 'means': 'mean',
+            'set': 'set', 'sets': 'set',
+            'met': 'meet', 'meets': 'meet',
+            'ran': 'run', 'runs': 'run',
+            'paid': 'pay', 'pays': 'pay',
+            'sat': 'sit', 'sits': 'sit',
+            'spoke': 'speak', 'spoken': 'speak', 'speaks': 'speak',
+            'lay': 'lie', 'lain': 'lie', 'lies': 'lie',
+            'led': 'lead', 'leads': 'lead',
+            'read': 'read', 'reads': 'read',
+            'grew': 'grow', 'grown': 'grow', 'grows': 'grow',
+            'lost': 'lose', 'loses': 'lose',
+            'fell': 'fall', 'fallen': 'fall', 'falls': 'fall',
+            'sent': 'send', 'sends': 'send',
+            'built': 'build', 'builds': 'build',
+            'understood': 'understand', 'understands': 'understand',
+            'drew': 'draw', 'drawn': 'draw', 'draws': 'draw',
+            'broke': 'break', 'broken': 'break', 'breaks': 'break',
+            'spent': 'spend', 'spends': 'spend',
+            'cut': 'cut', 'cuts': 'cut',
+            'hit': 'hit', 'hits': 'hit',
+            'put': 'put', 'puts': 'put',
+            'shut': 'shut', 'shuts': 'shut',
+            # be동사
+            'am': 'be', 'is': 'be', 'are': 'be', 'was': 'be', 'were': 'be', 'been': 'be',
+            # have
+            'has': 'have', 'had': 'have',
+            # do
+            'does': 'do', 'did': 'do', 'done': 'do',
+            # will/would/can/could 등은 조동사로 처리
+        }
+        # =================================================================
+        # Irregular Plurals (불규칙 복수)
+        # =================================================================
+        self.irregular_plurals = {
+            'men': 'man', 'women': 'woman',
+            'children': 'child', 'feet': 'foot', 'teeth': 'tooth',
+            'mice': 'mouse', 'geese': 'goose', 'oxen': 'ox',
+            'people': 'person', 'lives': 'life', 'knives': 'knife',
+            'wives': 'wife', 'selves': 'self', 'leaves': 'leaf',
+            'loaves': 'loaf', 'halves': 'half', 'wolves': 'wolf',
+            'calves': 'calf', 'shelves': 'shelf', 'thieves': 'thief',
+            'phenomena': 'phenomenon', 'criteria': 'criterion',
+            'analyses': 'analysis', 'bases': 'basis',
+            'crises': 'crisis', 'theses': 'thesis',
+            'data': 'datum', 'media': 'medium',
+            'indices': 'index', 'matrices': 'matrix',
+        }
+        # =================================================================
+        # Function Words (기능어)
+        # =================================================================
+        self.determiners = {
+            'the', 'a', 'an', 'this', 'that', 'these', 'those',
+            'my', 'your', 'his', 'her', 'its', 'our', 'their',
+            'some', 'any', 'no', 'every', 'each', 'all', 'both',
+            'few', 'many', 'much', 'several', 'enough',
+        }
+        self.pronouns = {
+            'i', 'you', 'he', 'she', 'it', 'we', 'they',
+            'me', 'him', 'her', 'us', 'them',
+            'myself', 'yourself', 'himself', 'herself', 'itself',
+            'ourselves', 'yourselves', 'themselves',
+            'who', 'whom', 'whose', 'which', 'what', 'that',
+            'whoever', 'whomever', 'whatever', 'whichever',
+        }
+        self.prepositions = {
+            'in', 'on', 'at', 'to', 'for', 'with', 'by', 'from',
+            'of', 'about', 'into', 'through', 'during', 'before',
+            'after', 'above', 'below', 'between', 'under', 'over',
+            'against', 'among', 'around', 'behind', 'beside',
+            'without', 'within', 'along', 'across', 'beyond',
+        }
+        self.conjunctions = {
+            'and', 'or', 'but', 'nor', 'yet', 'so', 'for',
+            'because', 'although', 'though', 'while', 'if', 'unless',
+            'until', 'when', 'where', 'whether', 'since', 'as',
+        }
+        self.auxiliaries = {
+            'will', 'would', 'shall', 'should', 'can', 'could',
+            'may', 'might', 'must', 'need', 'dare', 'ought',
+        }
+        self.adverbs = {
+            'very', 'really', 'quite', 'rather', 'too', 'also',
+            'just', 'only', 'even', 'still', 'already', 'always',
+            'never', 'often', 'sometimes', 'usually', 'seldom',
+            'here', 'there', 'now', 'then', 'today', 'yesterday',
+            'tomorrow', 'soon', 'ago', 'well', 'badly', 'quickly',
+            'slowly', 'carefully', 'easily', 'hard', 'fast',
+        }
+        # =================================================================
+        # Adjectives (형용사)
+        # =================================================================
+        self.adjectives = {
+            # 기본 형용사
+            'good', 'bad', 'new', 'old', 'young', 'big', 'small', 'large',
+            'long', 'short', 'high', 'low', 'great', 'little', 'other',
+            'same', 'different', 'important', 'right', 'wrong', 'real',
+            'true', 'false', 'sure', 'certain', 'clear', 'free', 'full',
+            'empty', 'open', 'close', 'early', 'late', 'easy', 'hard',
+            'hot', 'cold', 'warm', 'cool', 'dark', 'light', 'bright',
+            'heavy', 'strong', 'weak', 'rich', 'poor', 'happy', 'sad',
+            'angry', 'afraid', 'alone', 'alive', 'dead', 'ready', 'busy',
+            'simple', 'complex', 'special', 'general', 'common', 'rare',
+            'strange', 'normal', 'natural', 'human', 'social', 'political',
+            'economic', 'public', 'private', 'local', 'national', 'international',
+            'main', 'major', 'minor', 'final', 'total', 'whole', 'single',
+            'double', 'various', 'similar', 'recent', 'current', 'present',
+            'past', 'future', 'ancient', 'modern', 'traditional', 'popular',
+            'famous', 'beautiful', 'pretty', 'ugly', 'nice', 'fine', 'perfect',
+            'terrible', 'wonderful', 'excellent', 'amazing', 'incredible',
+            'possible', 'impossible', 'necessary', 'available', 'responsible',
+            'successful', 'powerful', 'useful', 'dangerous', 'safe', 'healthy',
+        }
+        # =================================================================
+        # Ambiguous Words (다의어) - 도메인별로 다른 의미
+        # =================================================================
+        self.ambiguous_words = {
+            'apple': {'food': 'fruit', 'tech': 'company', 'default': 'fruit'},
+            'bank': {'finance': 'financial institution', 'nature': 'river bank', 'default': 'financial institution'},
+            'java': {'tech': 'programming language', 'food': 'coffee', 'default': 'programming language'},
+            'python': {'tech': 'programming language', 'nature': 'snake', 'default': 'programming language'},
+            'ruby': {'tech': 'programming language', 'default': 'gemstone'},
+            'shell': {'tech': 'command shell', 'nature': 'seashell', 'default': 'shell'},
+            'bug': {'tech': 'software bug', 'nature': 'insect', 'default': 'insect'},
+            'cloud': {'tech': 'cloud computing', 'nature': 'sky cloud', 'default': 'sky cloud'},
+            'mouse': {'tech': 'computer mouse', 'nature': 'animal', 'default': 'animal'},
+            'server': {'tech': 'computer server', 'food': 'person serving', 'default': 'computer server'},
+        }
+        # =================================================================
+        # 확장 사전 로드 (optional external asset)
+        # =================================================================
+        self._load_extended_dictionary()
+    def _load_extended_dictionary(self):
+        """Load optional external extended dictionary"""
+        dict_path = DICT_DIR / 'en_extended.json'
+        if not dict_path.exists():
+            return
+        # 확장 사전 초기화
+        self.extended_nouns = set()
+        self.extended_verbs = set()
+        with open(dict_path, 'r', encoding='utf-8') as f:
+            extended = json.load(f)
+        # 확장 사전에 추가
+        for word, upos in extended.items():
+            word_lower = word.lower()
+            if upos in ('NOUN', 'PROPN'):
+                self.extended_nouns.add(word_lower)
+            elif upos == 'VERB' and word_lower not in self.irregular_verbs:
+                self.extended_verbs.add(word_lower)
+            elif upos == 'ADJ':
+                self.adjectives.add(word_lower)
+            elif upos == 'ADV':
+                self.adverbs.add(word_lower)
+    def _build_domain_dictionaries(self):
+        """도메인별 사전 구축"""
+        # TECH 도메인
+        self._domain_dictionaries[Domain.TECH] = {
+            'apple': ('Apple', 'NNP'),
+            'java': ('Java', 'NNP'),
+            'python': ('Python', 'NNP'),
+            'ruby': ('Ruby', 'NNP'),
+            'shell': ('shell', 'NN'),
+            'bug': ('bug', 'NN'),
+            'cloud': ('cloud', 'NN'),
+            'mouse': ('mouse', 'NN'),
+            'server': ('server', 'NN'),
+            'google': ('Google', 'NNP'),
+            'microsoft': ('Microsoft', 'NNP'),
+            'amazon': ('Amazon', 'NNP'),
+        }
+        # FOOD 도메인
+        self._domain_dictionaries[Domain.FOOD] = {
+            'apple': ('apple', 'NN'),
+            'java': ('java', 'NN'),  # coffee
+            'server': ('server', 'NN'),
+            'dish': ('dish', 'NN'),
+        }
+        # FINANCE 도메인
+        self._domain_dictionaries[Domain.FINANCE] = {
+            'bank': ('bank', 'NN'),
+            'stock': ('stock', 'NN'),
+            'bond': ('bond', 'NN'),
+            'market': ('market', 'NN'),
+            'apple': ('Apple', 'NNP'),  # stock ticker
+            'amazon': ('Amazon', 'NNP'),
+        }
+        # SPORTS 도메인
+        self._domain_dictionaries[Domain.SPORTS] = {
+            'court': ('court', 'NN'),
+            'field': ('field', 'NN'),
+            'net': ('net', 'NN'),
+        }
+    def _generate_candidates(self, text: str, domain: Domain) -> List[AnalysisResult]:
+        """분석 후보 생성"""
+        if not text or not text.strip():
+            return [AnalysisResult([])]
+        candidates = []
+        # 기본 분석
+        main_morphemes = self._analyze_text(text, domain)
+        main_result = AnalysisResult(
+            morphemes=main_morphemes,
+            score=1.0,
+            domain=domain
+        )
+        main_result.score = self._score_analysis(main_result)
+        candidates.append(main_result)
+        return candidates
+    def _analyze_text(self, text: str, domain: Domain) -> List[Morpheme]:
+        """텍스트 분석"""
+        if not text:
+            return []
+        result = []
+        pos = 0
+        while pos < len(text):
+            # 공백 스킵
+            if text[pos].isspace():
+                pos += 1
+                continue
+            # 단어 매칭
+            word_match = self.WORD_PATTERN.match(text[pos:])
+            if word_match:
+                word = word_match.group()
+                morpheme = self._analyze_word(word, pos, domain)
+                result.append(morpheme)
+                pos += len(word)
+                continue
+            # 숫자
+            num_match = self.NUMBER_PATTERN.match(text[pos:])
+            if num_match:
+                num = num_match.group()
+                result.append(Morpheme(
+                    surface=num, lemma=num, pos='CD',
+                    start=pos, end=pos + len(num)
+                ))
+                pos += len(num)
+                continue
+            # 기타 (기호)
+            result.append(Morpheme(
+                surface=text[pos], lemma=text[pos], pos='SYM',
+                start=pos, end=pos + 1
+            ))
+            pos += 1
+        return result
+    def _analyze_word(self, word: str, offset: int, domain: Domain) -> Morpheme:
+        """단어 분석"""
+        word_lower = word.lower()
+        # 1. 런타임 사전 확인
+        if word_lower in self._user_dictionary:
+            lemma, pos_tag, _ = self._user_dictionary[word_lower]
+            return Morpheme(surface=word, lemma=lemma, pos=pos_tag, start=offset, end=offset + len(word))
+        # 2. 도메인 사전 확인
+        domain_sense = self._get_domain_sense(word_lower, domain)
+        if domain_sense:
+            return Morpheme(surface=word, lemma=domain_sense[0], pos=domain_sense[1], start=offset, end=offset + len(word))
+        # 3. 기능어 확인
+        if word_lower in self.determiners:
+            return Morpheme(surface=word, lemma=word_lower, pos='DT', start=offset, end=offset + len(word))
+        if word_lower in self.pronouns:
+            return Morpheme(surface=word, lemma=word_lower, pos='PRP', start=offset, end=offset + len(word))
+        if word_lower in self.prepositions:
+            return Morpheme(surface=word, lemma=word_lower, pos='IN', start=offset, end=offset + len(word))
+        if word_lower in self.conjunctions:
+            return Morpheme(surface=word, lemma=word_lower, pos='CC', start=offset, end=offset + len(word))
+        if word_lower in self.auxiliaries:
+            return Morpheme(surface=word, lemma=word_lower, pos='MD', start=offset, end=offset + len(word))
+        if word_lower in self.adverbs:
+            return Morpheme(surface=word, lemma=word_lower, pos='RB', start=offset, end=offset + len(word))
+        if word_lower in self.adjectives:
+            return Morpheme(surface=word, lemma=word_lower, pos='JJ', start=offset, end=offset + len(word))
+        # 4. 불규칙 동사
+        if word_lower in self.irregular_verbs:
+            lemma = self.irregular_verbs[word_lower]
+            return Morpheme(surface=word, lemma=lemma, pos='VB', start=offset, end=offset + len(word))
+        # 5. 불규칙 복수
+        if word_lower in self.irregular_plurals:
+            lemma = self.irregular_plurals[word_lower]
+            return Morpheme(surface=word, lemma=lemma, pos='NNS', start=offset, end=offset + len(word))
+        # 6. 확장 사전 (optional external)
+        if hasattr(self, 'extended_verbs') and word_lower in self.extended_verbs:
+            return Morpheme(surface=word, lemma=word_lower, pos='VB', start=offset, end=offset + len(word))
+        if hasattr(self, 'extended_nouns') and word_lower in self.extended_nouns:
+            return Morpheme(surface=word, lemma=word_lower, pos='NN', start=offset, end=offset + len(word))
+        # 7. 규칙 활용 분석
+        lemma, pos_tag = self._analyze_morphology(word_lower)
+        return Morpheme(surface=word, lemma=lemma, pos=pos_tag, start=offset, end=offset + len(word))
+    def _analyze_morphology(self, word: str) -> Tuple[str, str]:
+        """형태 분석 (lemmatization + POS guessing)"""
+        # -ing 형
+        if word.endswith('ing') and len(word) > 4:
+            stem = word[:-3]
+            if stem.endswith(('e',)):
+                return (stem + 'e', 'VBG')
+            if len(stem) >= 3 and stem[-1] == stem[-2]:  # running -> run
+                return (stem[:-1], 'VBG')
+            return (stem, 'VBG')
+        # -ed 형
+        if word.endswith('ed') and len(word) > 3:
+            # -ied → -y: carried → carry
+            if word.endswith('ied'):
+                return (word[:-3] + 'y', 'VBD')
+            stem = word[:-2]
+            # 자음 중복 제거: stopped → stop, planned → plan
+            if len(stem) >= 2 and stem[-1] == stem[-2] and stem[-1] in 'bdgklmnprst':
+                return (stem[:-1], 'VBD')
+            # 묵음 e 복원: announced → announce, danced → dance
+            if stem.endswith(('c', 'v', 'z')):
+                return (stem + 'e', 'VBD')
+            # -ged 뒤 모음이면 e 복원: changed → change
+            if stem.endswith('g') and len(stem) >= 2 and stem[-2] in 'aeioumn':
+                return (stem + 'e', 'VBD')
+            # CVC + e 패턴: liked → like, hoped → hope
+            if len(stem) >= 2 and stem[-2] in 'aeiou' and stem[-1] in 'kptd':
+                return (stem + 'e', 'VBD')
+            return (stem, 'VBD')
+        # -s/-es 형 (동사 3인칭 또는 복수)
+        if word.endswith('ies') and len(word) > 4:
+            return (word[:-3] + 'y', 'VBZ')  # or NNS
+        if word.endswith('es') and len(word) > 3:
+            return (word[:-2], 'VBZ')
+        if word.endswith('s') and len(word) > 2:
+            return (word[:-1], 'VBZ')
+        # -ly 형 (부사)
+        if word.endswith('ly') and len(word) > 3:
+            return (word[:-2], 'RB')
+        # -ness 형 (명사)
+        if word.endswith('ness') and len(word) > 5:
+            return (word[:-4], 'NN')
+        # -tion/-sion 형 (명사)
+        if word.endswith(('tion', 'sion')) and len(word) > 5:
+            return (word, 'NN')
+        # -or 형 (명사): doctor, actor, director
+        if word.endswith('or') and len(word) > 3:
+            return (word, 'NN')
+        # -est 형 (최상급)
+        if word.endswith('est') and len(word) > 4:
+            return (word[:-3], 'JJS')
+        # -er 형 (비교급/명사)
+        if word.endswith('er') and len(word) > 3:
+            base = word[:-2]
+            # 비교급 패턴: bigger → big, nicer → nice, taller → tall
+            # 자음 중복
+            if len(base) >= 2 and base[-1] == base[-2]:
+                return (base[:-1], 'JJR')
+            # e 탈락
+            if len(base) >= 2 and base[-1] in 'cgkptvlns':
+                return (base + 'e', 'JJR')
+            # 나머지는 명사로 (teacher, player)
+            return (word, 'NN')
+        # =================================================================
+        # Adjective Suffix Patterns (형용사 접미사)
+        # =================================================================
+        # -ful 형 (형용사): beautiful, wonderful, powerful
+        if word.endswith('ful') and len(word) > 5:
+            return (word[:-3], 'JJ')
+        # -less 형 (형용사): useless, helpless, careless
+        if word.endswith('less') and len(word) > 5:
+            return (word[:-4], 'JJ')
+        # -ous 형 (형용사): famous, dangerous, nervous
+        if word.endswith('ous') and len(word) > 4:
+            return (word, 'JJ')
+        # -ive 형 (형용사): active, creative, impressive
+        if word.endswith('ive') and len(word) > 4:
+            return (word, 'JJ')
+        # -able/-ible 형 (형용사): available, possible, incredible
+        if word.endswith(('able', 'ible')) and len(word) > 5:
+            return (word, 'JJ')
+        # -al/-ial/-ical 형 (형용사): natural, social, political
+        if word.endswith(('ical', 'ial')) and len(word) > 5:
+            return (word, 'JJ')
+        if word.endswith('al') and len(word) > 3:
+            return (word, 'JJ')
+        # -ent/-ant 형 (형용사): different, important, excellent
+        if word.endswith(('ent', 'ant')) and len(word) > 4:
+            return (word, 'JJ')
+        # 대문자로 시작 (고유명사)
+        if word[0].isupper():
+            return (word, 'NNP')
+        # 기본값: 명사
+        return (word, 'NN')
+    def _generate_alternatives(self, text: str, domain: Domain, count: int) -> List[AnalysisResult]:
+        """대안 분석 결과 생성"""
+        alternatives = []
+        # 다른 도메인으로 분석
+        other_domains = [d for d in Domain if d != domain][:count]
+        for alt_domain in other_domains:
+            morphemes = self._analyze_text(text, alt_domain)
+            result = AnalysisResult(
+                morphemes=morphemes,
+                score=0.8,
+                domain=alt_domain
+            )
+            result.score = self._score_analysis(result) * 0.9
+            alternatives.append(result)
+        return alternatives
+# Alias for backward compatibility
+EnglishAnalyzer = EnglishAdvancedAnalyzer