235 lines
9.2 KiB
Python
235 lines
9.2 KiB
Python
"""Intellivision STIC Foreground/Background image encoder.
|
|
|
|
160x96 = 20x12 cells of 8x8. FGBG mode gives each cell TWO independent colours
|
|
(like C64 hires): a foreground from the 8 primary colours (0-7) and a background
|
|
from all 16. Each cell's 8x8 bitmap is drawn from a dictionary of 64 user tiles
|
|
(GRAM), so cell bitmaps are clustered to 64 patterns.
|
|
|
|
Card word (FGBG): FG(bits 0-2) | GRAMbit(0x800) | (tile<<3) | BG(bits 9,10,12,13).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import numpy as np
|
|
|
|
from ... import dither, palette as c64pal
|
|
from ...convert.base import (Conversion, cell_distance, cells_lab,
|
|
per_pixel_allowed, perceptual_error)
|
|
from .. import palette as ipal
|
|
|
|
WIDTH, HEIGHT = 160, 96
|
|
PIXEL_ASPECT = 1.2 # 160x96 shown on a ~4:3 screen -> pixels a touch tall
|
|
CELL_W, CELL_H = 8, 8
|
|
N_COLS, N_ROWS = 20, 12
|
|
N_TILES = 64
|
|
GRAM_BIT = 0x0800
|
|
N_FG = 8 # foreground limited to primary colours 0-7
|
|
|
|
|
|
def _bg_bits(c: int) -> int:
|
|
"""Pack a 0-15 background colour into FGBG card bits 9,10,12,13."""
|
|
return (((c >> 0) & 1) << 9) | (((c >> 1) & 1) << 10) | \
|
|
(((c >> 2) & 1) << 12) | (((c >> 3) & 1) << 13)
|
|
|
|
|
|
def _best_pairs(dist):
|
|
"""Per cell choose (fg in 0-7, bg in 0-15) minimising nearest-colour error.
|
|
dist: (n_cells, P, 16) squared distances. Returns (fg, bg) int arrays."""
|
|
n = dist.shape[0]
|
|
best_err = np.full(n, np.inf)
|
|
best_fg = np.zeros(n, np.int64)
|
|
best_bg = np.zeros(n, np.int64)
|
|
for fg in range(N_FG):
|
|
dfg = dist[:, :, fg]
|
|
for bg in range(16):
|
|
m = np.minimum(dfg, dist[:, :, bg])
|
|
err = m.sum(axis=1)
|
|
better = err < best_err
|
|
best_err[better] = err[better]
|
|
best_fg[better] = fg
|
|
best_bg[better] = bg
|
|
return best_fg, best_bg
|
|
|
|
|
|
def _cluster_tiles(bitmaps, k=N_TILES, iters=16, seed=0):
|
|
"""k-means on 64-dim {0,1} cell bitmaps -> k binary representative tiles."""
|
|
pats = bitmaps.astype(np.float64)
|
|
uniq, counts = np.unique(bitmaps, axis=0, return_counts=True)
|
|
if len(uniq) <= k:
|
|
tiles = np.zeros((k, 64), np.uint8)
|
|
tiles[:len(uniq)] = uniq
|
|
lut = {tuple(p): i for i, p in enumerate(uniq)}
|
|
labels = np.array([lut[tuple(b)] for b in bitmaps])
|
|
return tiles, labels
|
|
order = np.argsort(-counts)[:k]
|
|
cent = uniq[order].astype(np.float64)
|
|
rng = np.random.default_rng(seed)
|
|
for _ in range(iters):
|
|
d = ((pats[:, None, :] - cent[None, :, :]) ** 2).sum(-1)
|
|
labels = d.argmin(1)
|
|
for j in range(k):
|
|
msk = labels == j
|
|
if msk.any():
|
|
cent[j] = pats[msk].mean(0)
|
|
else:
|
|
cent[j] = pats[rng.integers(len(pats))]
|
|
tiles = (cent >= 0.5).astype(np.uint8)
|
|
d = ((pats[:, None, :] - tiles[None, :, :].astype(np.float64)) ** 2).sum(-1)
|
|
labels = d.argmin(1)
|
|
return tiles, labels
|
|
|
|
|
|
def _recolor(dist, tiles, labels):
|
|
"""Given each cell's tile (shape), pick the (fg in 0-7, bg in 0-15) that
|
|
minimise reconstruction error. Returns (fg, bg, total_error)."""
|
|
n = dist.shape[0]
|
|
M = tiles[labels].astype(np.float64) # (n, P) 1=fg
|
|
fg_cost = np.einsum('np,npk->nk', M, dist) # (n,16) error on fg pixels
|
|
bg_cost = np.einsum('np,npk->nk', 1.0 - M, dist) # (n,16) error on bg pixels
|
|
fg = fg_cost[:, :N_FG].argmin(1)
|
|
bg = bg_cost.argmin(1)
|
|
rows = np.arange(n)
|
|
total = fg_cost[rows, fg].sum() + bg_cost[rows, bg].sum()
|
|
return fg.astype(np.int64), bg.astype(np.int64), float(total)
|
|
|
|
|
|
def _reassign(dist, tiles):
|
|
"""Assign each cell to the tile that (after optimal recolouring) minimises its
|
|
error. Returns labels (n,)."""
|
|
M = tiles.astype(np.float64) # (T, P)
|
|
fg_sum = np.einsum('tp,npk->ntk', M, dist) # (n,T,16)
|
|
bg_sum = np.einsum('tp,npk->ntk', 1.0 - M, dist)
|
|
cost = fg_sum[:, :, :N_FG].min(2) + bg_sum.min(2) # (n,T)
|
|
return cost.argmin(1)
|
|
|
|
|
|
def _reshape(dist, labels, fg, bg, n_tiles=N_TILES):
|
|
"""Recompute each tile's 8x8 mask from all cells using it: a pixel is fg where
|
|
that lowers total error across those cells (block-truncation coding)."""
|
|
n, P, _ = dist.shape
|
|
rows = np.arange(n)[:, None]
|
|
cols = np.arange(P)[None, :]
|
|
dfg = dist[rows, cols, fg[:, None]] # (n,P) error if pixel = fg
|
|
dbg = dist[rows, cols, bg[:, None]] # (n,P) error if pixel = bg
|
|
tiles = np.zeros((n_tiles, P), np.uint8)
|
|
# cells with the largest fg-vs-bg disagreement seed any empty tiles
|
|
worst = np.argsort(-(np.abs(dfg - dbg).sum(1)))
|
|
wi = 0
|
|
for t in range(n_tiles):
|
|
msk = labels == t
|
|
if msk.any():
|
|
tiles[t] = (dfg[msk].sum(0) < dbg[msk].sum(0)).astype(np.uint8)
|
|
else:
|
|
# reseed from an unused high-contrast cell so all 64 tiles get used
|
|
c = int(worst[wi % n]); wi += 1
|
|
tiles[t] = (dfg[c] < dbg[c]).astype(np.uint8)
|
|
return tiles
|
|
|
|
|
|
def _refine_once(dist, tiles, max_iters=40):
|
|
"""One Lloyd run from a given tile set to convergence. Returns
|
|
(tiles, labels, fg, bg, total_error)."""
|
|
best = None
|
|
best_err = np.inf
|
|
for _ in range(max_iters):
|
|
labels = _reassign(dist, tiles)
|
|
fg, bg, _ = _recolor(dist, tiles, labels)
|
|
tiles = _reshape(dist, labels, fg, bg)
|
|
fg, bg, err = _recolor(dist, tiles, labels)
|
|
if err < best_err - 1e-6:
|
|
best_err = err
|
|
best = (tiles.copy(), labels.copy(), fg.copy(), bg.copy())
|
|
else:
|
|
break # converged
|
|
return (*best, best_err)
|
|
|
|
|
|
def _refine(dist, tiles0, n_restarts):
|
|
"""Joint optimisation of the 64 tile shapes and the per-cell (tile, fg, bg).
|
|
|
|
Lloyd iteration has local minima, so run it from the clustered initialisation
|
|
plus several random tile sets and keep the globally lowest-error result -- the
|
|
one that reproduces the picture most faithfully within the 64-tile budget.
|
|
"""
|
|
P = dist.shape[1]
|
|
best = None
|
|
best_err = np.inf
|
|
rng = np.random.default_rng(0)
|
|
inits = [tiles0] + [rng.integers(0, 2, (N_TILES, P)).astype(np.uint8)
|
|
for _ in range(n_restarts)]
|
|
for t0 in inits:
|
|
tiles, labels, fg, bg, err = _refine_once(dist, t0)
|
|
if err < best_err:
|
|
best_err = err
|
|
best = (tiles, labels, fg, bg)
|
|
return best
|
|
|
|
|
|
def convert(img_rgb, palette_name="stic", dither_mode="floyd",
|
|
intensive=False, base_color=None):
|
|
plab = ipal.palette_lab()
|
|
prgb = ipal.get_palette().astype(np.uint8)
|
|
img_lab = c64pal.srgb_to_lab(img_rgb)
|
|
|
|
cells, rows, cols = cells_lab(img_lab, CELL_W, CELL_H)
|
|
dist = cell_distance(cells, plab)
|
|
fg, bg = _best_pairs(dist) # (240,) each
|
|
|
|
# dither each cell between its two colours (order [bg, fg] for per_pixel_allowed)
|
|
sets = np.stack([bg, fg], axis=1)
|
|
allowed = per_pixel_allowed(sets, rows, cols, CELL_W, CELL_H, HEIGHT, WIDTH)
|
|
idx = dither.quantize(img_lab, allowed, plab, dither_mode).astype(np.uint8)
|
|
|
|
bitmaps = np.zeros((N_ROWS * N_COLS, 64), np.uint8)
|
|
for cr in range(rows):
|
|
for cc in range(cols):
|
|
ci = cr * cols + cc
|
|
block = idx[cr * 8:cr * 8 + 8, cc * 8:cc * 8 + 8]
|
|
bitmaps[ci] = (block == fg[ci]).astype(np.uint8).reshape(-1)
|
|
|
|
tiles, labels = _cluster_tiles(bitmaps)
|
|
|
|
# Joint vector-quantisation refinement: the one-shot select->cluster->recolour
|
|
# leaves the 64 shared tile shapes and the per-cell (tile, fg, bg) far from
|
|
# optimal. Alternately re-assign each cell to its best tile, re-pick the two
|
|
# colours, and re-cut every tile's shape (block-truncation coding) -- each step
|
|
# provably lowers total CIELAB error, so the picture converges much closer to
|
|
# the original within the 64-tile budget.
|
|
n_restarts = 16 if intensive else 4
|
|
refined = _refine(dist, tiles, n_restarts)
|
|
if refined is not None:
|
|
tiles, labels, fg, bg = refined
|
|
|
|
prev_idx = np.empty((HEIGHT, WIDTH), np.uint8)
|
|
cards = np.zeros(N_ROWS * N_COLS, np.uint16)
|
|
for ci in range(N_ROWS * N_COLS):
|
|
cr, cc = divmod(ci, cols)
|
|
tile = tiles[labels[ci]].reshape(8, 8)
|
|
f, b = int(fg[ci]), int(bg[ci])
|
|
prev_idx[cr * 8:cr * 8 + 8, cc * 8:cc * 8 + 8] = np.where(tile == 1, f, b)
|
|
cards[ci] = (f & 0x07) | GRAM_BIT | ((labels[ci] & 0x3F) << 3) | _bg_bits(b)
|
|
|
|
gram = np.zeros(N_TILES * 8, np.uint16)
|
|
for t in range(N_TILES):
|
|
rb = tiles[t].reshape(8, 8)
|
|
for r in range(8):
|
|
byte = 0
|
|
for x in range(8):
|
|
byte = (byte << 1) | int(rb[r, x])
|
|
gram[t * 8 + r] = byte
|
|
|
|
data = {"gram": gram, "cards": cards}
|
|
|
|
# pre-widen preview to display resolution (matches atari/apple/a2600 convention)
|
|
preview = prgb[prev_idx]
|
|
disp_w = int(round(WIDTH * PIXEL_ASPECT))
|
|
xs = (np.arange(disp_w) * WIDTH) // disp_w
|
|
preview = preview[:, xs]
|
|
|
|
return Conversion(
|
|
mode="stic", width=WIDTH, height=HEIGHT, pixel_aspect=PIXEL_ASPECT,
|
|
index_image=prev_idx.astype(np.uint16), data=data, data_addr=0,
|
|
viewer="stic", preview_rgb=preview,
|
|
error=perceptual_error(prev_idx, img_lab, plab),
|
|
meta={"palette": "stic", "dither": dither_mode, "mode": "fgbg"},
|
|
)
|