Nanami-UI/agent-tools/generate_consumable_db.py

from __future__ import annotations

import argparse
import re
import textwrap
import xml.etree.ElementTree as ET
import zipfile
from collections import defaultdict
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path, PurePosixPath


MAIN_NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
DOC_REL_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
PKG_REL_NS = "http://schemas.openxmlformats.org/package/2006/relationships"
NS = {
    "main": MAIN_NS,
}

ROLE_META = {
    "坦克 (物理坦克)": {
        "key": "tank_physical",
        "detail": "坦克 · 物理坦克",
        "color": (0.40, 0.70, 1.00),
    },
    "坦克 (法系坦克)": {
        "key": "tank_caster",
        "detail": "坦克 · 法系坦克",
        "color": (1.00, 0.80, 0.20),
    },
    "法系输出": {
        "key": "caster_dps",
        "detail": "输出 · 法系输出",
        "color": (0.65, 0.45, 1.00),
    },
    "物理近战": {
        "key": "melee_dps",
        "detail": "输出 · 物理近战",
        "color": (1.00, 0.55, 0.25),
    },
    "物理远程": {
        "key": "ranged_dps",
        "detail": "输出 · 物理远程",
        "color": (0.55, 0.88, 0.42),
    },
    "治疗": {
        "key": "healer",
        "detail": "治疗",
        "color": (0.42, 1.00, 0.72),
    },
}

CATEGORY_ORDER = [
    "合剂",
    "药剂",
    "攻强",
    "诅咒之地buff",
    "赞达拉",
    "武器",
    "食物",
    "酒",
    "药水",
]

# The exported sheet does not include item IDs. We only preserve a very small
# set of IDs that already existed in the addon and can be matched confidently.
ITEM_ID_OVERRIDES = {
    "巨人药剂": 9206,
    "猫鼬药剂": 13452,
    "精炼智慧合剂": 13511,
    "夜鳞鱼汤": 13931,
    "烤鱿鱼": 13928,
    "炎夏火水": 12820,
    "自由行动药水": 5634,
}

HEADER_MAP = {
    "A": "role",
    "B": "category",
    "C": "name",
    "D": "effect",
    "E": "duration",
}


@dataclass
class Row:
    role: str
    category: str
    name: str
    effect: str
    duration: str
    row_number: int


def read_xlsx_rows(path: Path) -> list[Row]:
    with zipfile.ZipFile(path) as archive:
        shared_strings = load_shared_strings(archive)
        workbook = ET.fromstring(archive.read("xl/workbook.xml"))
        relationships = ET.fromstring(archive.read("xl/_rels/workbook.xml.rels"))
        rel_map = {
            rel.attrib["Id"]: rel.attrib["Target"]
            for rel in relationships.findall(f"{{{PKG_REL_NS}}}Relationship")
        }

        sheet = workbook.find("main:sheets", NS)[0]
        target = rel_map[sheet.attrib[f"{{{DOC_REL_NS}}}id"]]
        sheet_path = normalize_sheet_path(target)
        root = ET.fromstring(archive.read(sheet_path))
        sheet_rows = root.find("main:sheetData", NS).findall("main:row", NS)

        rows: list[Row] = []
        for row in sheet_rows[1:]:
            values = {}
            for cell in row.findall("main:c", NS):
                ref = cell.attrib.get("r", "")
                col_match = re.match(r"([A-Z]+)", ref)
                if not col_match:
                    continue
                col = col_match.group(1)
                values[col] = read_cell_value(cell, shared_strings).strip()

            if not any(values.values()):
                continue

            normalized = {
                field: normalize_text(values.get(col, ""))
                for col, field in HEADER_MAP.items()
            }
            rows.append(
                Row(
                    role=normalized["role"],
                    category=normalized["category"],
                    name=normalized["name"],
                    effect=normalized["effect"],
                    duration=normalized["duration"],
                    row_number=int(row.attrib.get("r", "0")),
                )
            )

    return rows


def load_shared_strings(archive: zipfile.ZipFile) -> list[str]:
    if "xl/sharedStrings.xml" not in archive.namelist():
        return []

    root = ET.fromstring(archive.read("xl/sharedStrings.xml"))
    values: list[str] = []
    for string_item in root.findall("main:si", NS):
        text_parts = [
            text_node.text or ""
            for text_node in string_item.iter(f"{{{MAIN_NS}}}t")
        ]
        values.append("".join(text_parts))
    return values


def normalize_sheet_path(target: str) -> str:
    if target.startswith("/"):
        normalized = PurePosixPath("xl") / PurePosixPath(target).relative_to("/")
    else:
        normalized = PurePosixPath("xl") / PurePosixPath(target)
    return str(normalized).replace("xl/xl/", "xl/")


def read_cell_value(cell: ET.Element, shared_strings: list[str]) -> str:
    cell_type = cell.attrib.get("t")
    value_node = cell.find("main:v", NS)
    if cell_type == "s" and value_node is not None:
        return shared_strings[int(value_node.text)]
    if cell_type == "inlineStr":
        inline_node = cell.find("main:is", NS)
        if inline_node is None:
            return ""
        return "".join(
            text_node.text or ""
            for text_node in inline_node.iter(f"{{{MAIN_NS}}}t")
        )
    return value_node.text if value_node is not None else ""


def normalize_text(value: str) -> str:
    value = (value or "").strip()
    value = value.replace("\u3000", " ")
    value = re.sub(r"\s+", " ", value)
    value = value.replace("（", "(").replace("）", ")")
    return value


def normalize_rows(rows: list[Row]) -> list[Row]:
    normalized: list[Row] = []
    for row in rows:
        role = row.role
        category = row.category
        name = row.name
        effect = normalize_effect(row.effect)
        duration = normalize_duration(row.duration)

        if not role or not category or not name:
            raise ValueError(
                f"存在不完整数据行，Excel 行号 {row.row_number}: "
                f"{role!r}, {category!r}, {name!r}"
            )

        normalized.append(
            Row(
                role=role,
                category=category,
                name=name,
                effect=effect,
                duration=duration,
                row_number=row.row_number,
            )
        )

    return normalized


def normalize_effect(value: str) -> str:
    value = normalize_text(value)
    replacements = {
        "、": " / ",
        "，": " / ",
    }
    for old, new in replacements.items():
        value = value.replace(old, new)
    value = value.replace("提升治疗", "提升治疗效果")
    return value


def normalize_duration(value: str) -> str:
    value = normalize_text(value)
    replacements = {
        "2小时1": "2小时",
        "瞬发 ": "瞬发",
    }
    return replacements.get(value, value)


def build_groups(rows: list[Row]) -> list[dict]:
    buckets: dict[str, list[Row]] = defaultdict(list)
    for row in rows:
        buckets[row.role].append(row)

    role_order = [
        role
        for role in ROLE_META
        if role in buckets
    ]
    unknown_roles = sorted(role for role in buckets if role not in ROLE_META)
    role_order.extend(unknown_roles)

    category_index = {name: index for index, name in enumerate(CATEGORY_ORDER)}
    groups = []
    for role in role_order:
        meta = ROLE_META.get(role, {})
        role_rows = sorted(
            buckets[role],
            key=lambda item: (
                category_index.get(item.category, len(CATEGORY_ORDER)),
                item.row_number,
                item.name,
            ),
        )
        items = []
        for row in role_rows:
            items.append(
                {
                    "cat": row.category,
                    "name": row.name,
                    "effect": row.effect,
                    "duration": row.duration,
                    "id": ITEM_ID_OVERRIDES.get(row.name, 0),
                }
            )

        groups.append(
            {
                "key": meta.get("key", slugify(role)),
                "role": role,
                "detail": meta.get("detail", role),
                "color": meta.get("color", (0.85, 0.75, 0.90)),
                "items": items,
            }
        )

    return groups


def slugify(value: str) -> str:
    value = re.sub(r"[^0-9A-Za-z\u4e00-\u9fff]+", "_", value)
    value = value.strip("_")
    return value.lower() or "role"


def render_lua(groups: list[dict], source_path: Path, item_count: int) -> str:
    role_order = [group["role"] for group in groups]
    category_order = sorted(
        {item["cat"] for group in groups for item in group["items"]},
        key=lambda name: CATEGORY_ORDER.index(name)
        if name in CATEGORY_ORDER
        else len(CATEGORY_ORDER),
    )

    generated_at = datetime.fromtimestamp(source_path.stat().st_mtime).strftime("%Y-%m-%d %H:%M:%S")

    lines = [
        "-" * 80,
        "-- Nanami-UI: ConsumableDB.lua",
        "-- 食物药剂百科数据库（由导出表生成，请优先更新 Excel 后再重新生成）",
        f"-- Source: {source_path}",
        f"-- Stats : {len(groups)} roles / {len(category_order)} categories / {item_count} entries",
        "-" * 80,
        "SFrames = SFrames or {}",
        "",
        "SFrames.ConsumableDB = {",
        f'    generatedAt = "{generated_at}",',
        "    summary = {",
        f"        roleCount = {len(groups)},",
        f"        categoryCount = {len(category_order)},",
        f"        itemCount = {item_count},",
        "    },",
        "    roleOrder = {",
    ]

    for role in role_order:
        lines.append(f'        "{lua_escape(role)}",')
    lines.extend(
        [
            "    },",
            "    categoryOrder = {",
        ]
    )
    for category in category_order:
        lines.append(f'        "{lua_escape(category)}",')
    lines.extend(
        [
            "    },",
            "    groups = {",
        ]
    )

    for index, group in enumerate(groups, start=1):
        color = ", ".join(f"{component:.2f}" for component in group["color"])
        lines.extend(
            [
                "",
                f"        -- {index}. {group['detail']}",
                "        {",
                f'            key = "{lua_escape(group["key"])}",',
                f'            role = "{lua_escape(group["role"])}",',
                f'            detail = "{lua_escape(group["detail"])}",',
                f"            color = {{ {color} }},",
                "            items = {",
            ]
        )
        for item in group["items"]:
            lines.append(
                '                {{ cat="{cat}", name="{name}", effect="{effect}", duration="{duration}", id={item_id} }},'.format(
                    cat=lua_escape(item["cat"]),
                    name=lua_escape(item["name"]),
                    effect=lua_escape(item["effect"]),
                    duration=lua_escape(item["duration"]),
                    item_id=item["id"],
                )
            )
        lines.extend(
            [
                "            },",
                "        },",
            ]
        )

    lines.extend(
        [
            "    },",
            "}",
            "",
        ]
    )
    return "\n".join(lines)


def lua_escape(value: str) -> str:
    value = value.replace("\\", "\\\\").replace('"', '\\"')
    return value


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Generate ConsumableDB.lua from an exported WoW consumables XLSX."
    )
    parser.add_argument("xlsx", type=Path, help="Path to the exported XLSX file")
    parser.add_argument(
        "-o",
        "--output",
        type=Path,
        default=Path("ConsumableDB.lua"),
        help="Output Lua file path",
    )
    return parser.parse_args()


def main() -> None:
    args = parse_args()
    rows = normalize_rows(read_xlsx_rows(args.xlsx))
    groups = build_groups(rows)
    lua = render_lua(groups, args.xlsx, len(rows))
    args.output.write_text(lua, encoding="utf-8", newline="\n")
    print(
        textwrap.dedent(
            f"""\
            Generated {args.output}
              source : {args.xlsx}
              roles  : {len(groups)}
              items  : {len(rows)}
              categories: {len({item['cat'] for group in groups for item in group['items']})}
            """
        ).strip()
    )


if __name__ == "__main__":
    main()