diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 9cb6b6f..967db2f 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -72,7 +72,60 @@ "Bash(DISPLAY=:0 timeout 4 ./vectorgons)", "Bash(python3 verify_smooth.py)", "Bash(convert smooth.ppm -crop 760x500+330+150 +repage -resize 200% smooth_zoom.png)", - "Bash(python3 verify_gm.py)" + "Bash(python3 verify_gm.py)", + "Bash(sort -t: -k2 -u)", + "Bash(awk -F: '{print $2}')", + "Bash(sed -n '3659,3667p' vectorgons.c)", + "Bash(sed -n '3868,3879p' vectorgons.c)", + "Bash(echo \"build: $?\")", + "Bash(python3 measure.py)", + "Bash(convert hud.ppm -crop 260x150+840+10 +repage -resize 250% -gamma 1.4 hud2.png)", + "Bash(convert hud.ppm -crop 320x170+1660+8 +repage -resize 220% hud3.png)", + "Bash(awk '{print $5, $NF}')", + "Bash(rm -f /tmp/vgtest/vg_meas* /tmp/vgtest/*.ppm /tmp/vgtest/hud*.png /tmp/vgtest/measure.py)", + "Bash(awk 'NR>=2440 && NR<=2505 && /^static |^void |^int .*\\\\\\(|^[a-z].*\\\\{$/' vectorgons.c)", + "Bash(awk 'NR<=2470 && /^[a-zA-Z].*\\\\\\(.*\\\\\\).*\\\\{?$/ {ln=NR; line=$0} END{}' vectorgons.c)", + "Bash(awk -F: '$1<2503')", + "Bash(sed -n '3691,3700p' vectorgons.c)", + "Bash(sed -n '55,75p' vectorgons.c)", + "Bash(sed -n '76,90p' vectorgons.c)", + "Bash(sed -n '3700,3712p' vectorgons.c)", + "Bash(cd *)", + "Bash(python3 bench.py)", + "Bash(python3 correct.py)", + "Bash(convert corr.ppm -crop 1700x600+150+250 +repage -resize 46% corr2.png)", + "Bash(DISPLAY=:0 timeout 5 wine vectorgons.exe)", + "Bash(echo \"exit=$? \\(124=ran full duration, healthy\\)\")", + "Bash(sed -n '/static void grab_scene/,/^}/p' vectorgons.c)", + "Bash(sed -n '/ensure_scene_tex/,/^}/p' vectorgons.c)", + "Bash(python3 bloom.py)", + "Bash(convert bloom_on.ppm -resize 46% bon.png)", + "Bash(convert bloom_off.ppm -resize 46% boff.png)", + "Bash(python3 bloom2.py)", + "Bash(convert b_on.ppm -resize 42% bon2.png)", + "Bash(convert b_off.ppm -resize 42% boff2.png)", + "Bash(convert b_on.ppm -resize 42% bon3.png)", + "Bash(convert b_on.ppm -crop 460x460+700+320 +repage -resize 130% bl_bloom.png)", + "Bash(convert b_off.ppm -crop 460x460+700+320 +repage -resize 130% bl_legacy.png)", + "Bash(montage bl_bloom.png bl_legacy.png -tile 2x1 -geometry +4+4 -background gray20 -label '%f' compare.png)", + "Bash(montage bl_bloom.png bl_legacy.png -tile 2x1 -geometry +4+4 compare.png)", + "Bash(python3 bbench.py)", + "Bash(awk '{print $5,$NF}')", + "Bash(echo \"exit=$? \\(124=healthy\\)\")", + "Bash(python3 -)", + "Bash(python3 cull.py)", + "Bash(convert cull_off.ppm -resize 42% cull_off.png)", + "Bash(convert diff.png -resize 42% diff_s.png)", + "Bash(compare -metric AE -fuzz 4% cull_on.ppm cull_off.ppm diff4.png)", + "Bash(convert diff4.png -resize 42% diff4_s.png)", + "Bash(compare -metric AE -fuzz 2% cull_on.ppm cull_off.ppm bdiff.png)", + "Bash(convert bdiff.png -resize 40% bdiff_s.png)", + "Bash(echo \"wine exit=$? \\(124=healthy\\)\")", + "Bash(rm -f /tmp/vgtest/vg_*.c /tmp/vgtest/vg_c /tmp/vgtest/*.ppm /tmp/vgtest/*.png /tmp/vgtest/*.py)", + "Bash(python3 glow.py)", + "Bash(python3 dense.py)", + "Bash(montage g_bloom.png g_legacy.png -tile 2x1 -geometry +2+2 g_cmp125.png)", + "Bash(montage g_bloom.png g_legacy.png -tile 2x1 -geometry +2+2 g_cmp_fold.png)" ] } } diff --git a/README.md b/README.md index dc539f9..e6f4bc7 100644 --- a/README.md +++ b/README.md @@ -194,6 +194,9 @@ out over a few seconds. Press any key to bring it back. | Mirror-ball count | `3` / `4` | How many reflecting mirror spheres are on screen (0–1000) | | Shapes | `N` | Toggle random ⇄ cycling shape spawns | | Fullscreen | `F` or `F11` | Toggle fullscreen | +| Perf HUD | `F1` | FPS / frame ms / body & draw-batch counts | +| VSync | `F2` | Toggle VSync (uncap the frame rate) | +| Glow mode | `F3` | Post-process bloom vs. legacy per-vector glow | | Pause | `Space` | | | Quit | `Esc` | | @@ -241,11 +244,36 @@ out over a few seconds. Press any key to bring it back. (`spin = 4^(seed · variance)`), so changing it re-spreads every solid's rotation rate instantly — `0` makes them all tumble alike, `100` gives a very wide range. -- **CRT glow** wraps each sharp vector in a soft phosphor mist: many faint - additive width layers fade outward into a halo, and a few faint enlarged - ghost copies bloom that glow into a larger volume than the hardware line-width - cap (~10px) could reach on its own. The crisp core is drawn last so the vector - stays sharp; the glow setting scales the mist's spread and brightness. +- **CRT glow** wraps each sharp vector in a soft phosphor mist. Two + implementations, switchable live with `F3`: + - **Bloom (default, when framebuffer objects are available):** the field's + sharp cores are rendered once, then a dual-filter pyramid (downsample, then + tent-filter upsample accumulating each scale into the next finer one) builds + a smooth, ever-widening, fading halo around the vectors. A fold factor + attenuates the wider scales so the halo fades out and the blacks stay clean. + This is **one draw per body instead of nine**, dramatically faster at high + body counts. + - **Legacy:** each vector is redrawn as many faint additive width layers plus + a few enlarged ghost copies. Kept as a fallback for GPUs without FBOs. + +- **Performance / retained mode.** Geometry is uploaded to vertex/index buffer + objects once and drawn with `glDrawElements` instead of per-vertex immediate + mode, so the CPU isn't re-submitting millions of vertices per frame. Rigid + shapes use static buffers; morphing 4/5/6-D polytopes stream positions into a + shared buffer with a static index buffer; clocks and animated objects (whose + topology is rebuilt each frame) stay on the immediate path. GL 1.5/3.0 entry + points are resolved at run time via `glfwGetProcAddress`, so the same code + works on the Linux, Windows, and screensaver builds; both retained mode and + bloom fall back gracefully if unavailable. +- **Frustum culling.** The field is a full sphere around the camera, so most + bodies are behind or beside the view. Six view-frustum planes are extracted + from the modelview·projection matrix each frame and each body's bounding + sphere is tested before the (relatively costly) projection and draw; the cull + margin includes the glow spread so edge halos don't pop. Off-screen bodies + still advance and recycle — only their drawing is skipped. At a full field + (~7200 bodies) this typically draws under ~1000 of them. +- The `F1` HUD shows FPS, frame time, and live body / drawn / draw-batch / + vertex counts; `F2` toggles VSync; `F3` switches bloom vs. legacy glow. - **Magnifying glasses** are real lenses. After the field is drawn, the back buffer is grabbed into a texture (`glCopyTexSubImage2D`); each lens body is then projected to its screen position and drawn as a disc that samples that diff --git a/vectorgons b/vectorgons index 38a139b..4c89cae 100755 Binary files a/vectorgons and b/vectorgons differ diff --git a/vectorgons.c b/vectorgons.c index 073df35..e8a9193 100644 --- a/vectorgons.c +++ b/vectorgons.c @@ -46,7 +46,12 @@ * - Random per-body sizes (user-settable min / max) * - Guaranteed no overlap between solids (bounding-sphere rejection) * - Adjustable tumble rate AND tumble-speed variance - * - Adjustable CRT-style glow / light bleed + * - Adjustable CRT-style glow / light bleed (post-process bloom; F3 toggles + * the legacy per-vector glow). Geometry is drawn from VBOs (glDrawElements), + * not immediate mode, and off-screen bodies are frustum-culled before draw, + * so the CPU stays fed at high body counts; GL 1.5/3.0 entry points are + * loaded at run time via glfwGetProcAddress. F1 = perf HUD, F2 = VSync + * toggle. Both retained mode and bloom fall back gracefully. * - Adjustable vector flicker * - On-screen display (self-contained vector font) that fades after idle * - Single-hue and multicolor modes; adjustable hue + continuous hue cycling @@ -81,6 +86,49 @@ #include #include #include +#include + +/* --- OpenGL 1.5 buffer objects (Step 1: retained-mode geometry) -------------- + * The hot wireframe geometry is uploaded to VBOs and drawn with glDrawElements + * instead of per-vertex glBegin/glVertex3fv, which removes the millions of + * driver calls/frame that bottlenecked the immediate-mode path. opengl32.dll on + * Windows only exports GL 1.1, so these four entry points are resolved at run + * time via glfwGetProcAddress (cross-platform). Values/signatures are standard. */ +#ifndef GL_ARRAY_BUFFER +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 +#define GL_STATIC_DRAW 0x88E4 +#define GL_STREAM_DRAW 0x88E0 +#endif +#ifndef APIENTRY +#define APIENTRY +#endif +typedef ptrdiff_t VGsizeiptr; +typedef void (APIENTRY *VG_GENBUFFERS)(GLsizei, GLuint *); +typedef void (APIENTRY *VG_BINDBUFFER)(GLenum, GLuint); +typedef void (APIENTRY *VG_BUFFERDATA)(GLenum, VGsizeiptr, const void *, GLenum); +static VG_GENBUFFERS vgGenBuffers; +static VG_BINDBUFFER vgBindBuffer; +static VG_BUFFERDATA vgBufferData; + +/* Framebuffer objects (GL 3.0 / EXT_framebuffer_object), for the Step 2 bloom + * post-process: render the field core once, then build a soft multi-scale glow + * off-screen instead of redrawing every body 9x. Same runtime-load story. */ +#ifndef GL_FRAMEBUFFER +#define GL_FRAMEBUFFER 0x8D40 +#define GL_COLOR_ATTACHMENT0 0x8CE0 +#define GL_FRAMEBUFFER_COMPLETE 0x8CD5 +#endif +typedef void (APIENTRY *VG_GENFRAMEBUFFERS)(GLsizei, GLuint *); +typedef void (APIENTRY *VG_BINDFRAMEBUFFER)(GLenum, GLuint); +typedef void (APIENTRY *VG_FRAMEBUFFERTEXTURE2D)(GLenum, GLenum, GLenum, GLuint, GLint); +typedef void (APIENTRY *VG_DELETEFRAMEBUFFERS)(GLsizei, const GLuint *); +typedef GLenum (APIENTRY *VG_CHECKFRAMEBUFFERSTATUS)(GLenum); +static VG_GENFRAMEBUFFERS vgGenFramebuffers; +static VG_BINDFRAMEBUFFER vgBindFramebuffer; +static VG_FRAMEBUFFERTEXTURE2D vgFramebufferTexture2D; +static VG_DELETEFRAMEBUFFERS vgDeleteFramebuffers; +static VG_CHECKFRAMEBUFFERSTATUS vgCheckFramebufferStatus; /* ================================================================== */ /* Geometry: an N-dimensional polytope engine (3..6 dimensions). */ @@ -2814,6 +2862,42 @@ static int clampi(int v, int lo, int hi) { return v < lo ? lo : (v > hi ? hi : v * reshuffling everything. */ static int g_filled = 0; /* high-water mark of ever-initialized indices */ static int g_total = 0; /* current active body count */ + +/* --- perf instrumentation (Step 0: measure before optimizing) ---------------- + * g_batches / g_verts count the immediate-mode line submissions per frame -- the + * exact CPU/driver load the retained-mode refactor targets. Reset each frame in + * the main loop, incremented in draw_edges, shown in the F1 perf HUD. */ +static unsigned g_batches = 0; /* glBegin/glEnd line batches this frame */ +static unsigned long g_verts = 0; /* vertices submitted this frame */ +static unsigned g_drawn = 0; /* bodies that passed frustum cull */ +static double g_frustum[6][4]; /* world-space frustum planes (a,b,c,d) */ +static int g_vsync = 1; /* F2 toggles glfwSwapInterval(0/1) */ +static int g_show_hud = 1; /* F1 toggles the perf HUD */ + +/* --- retained-mode geometry buffers (one per shape) -------------------------- + * g_shape_vbomode[si]: 0 = immediate mode (clocks/dynamics whose topology is + * rebuilt each frame), 1 = static VBO+IBO (rigid 3-D shapes), 2 = morphing + * 4/5/6-D shape (static topology IBO + per-frame vertex upload to g_dyn_vbo). */ +static int g_have_vbo = 0; +static GLuint g_dyn_vbo = 0; /* shared streaming vertex buffer */ +static GLuint *g_shape_vbo = NULL; /* per static shape (0 otherwise) */ +static GLuint *g_shape_ibo = NULL; /* per shape edge-index buffer */ +static unsigned char *g_shape_vbomode = NULL; + +/* --- bloom post-process (Step 2) -------------------------------------------- + * A pyramid of half-scaling render targets: the field core is downsampled with + * bilinear filtering through BLOOM_LEVELS levels, then the levels are summed + * back additively over the sharp scene -> a soft, wide CRT glow for one pass per + * body instead of nine. F3 toggles it against the legacy per-body glow. */ +#define BLOOM_LEVELS 6 +#define BLOOM_GAIN 1.5f +#define BLOOM_FOLD 0.78f /* <1 attenuates each wider scale -> halo fades out */ +static int g_have_fbo = 0; +static int g_bloom = 1; /* F3 toggles bloom vs legacy glow */ +static GLuint g_bl_tex[BLOOM_LEVELS]; +static GLuint g_bl_fbo[BLOOM_LEVELS]; +static int g_bl_w[BLOOM_LEVELS], g_bl_h[BLOOM_LEVELS]; +static int g_bl_fbw = 0, g_bl_fbh = 0; /* framebuffer size targets are built for */ static void recompose_field(void) { int mag = (g_mag_idx >= 0) ? clampi(cfg.mag_count, 0, MAX_SPECIAL) : 0; int glass = (g_glass_idx >= 0) ? clampi(cfg.glass_count, 0, MAX_SPECIAL) : 0; @@ -2988,7 +3072,7 @@ static void render_osd(int fbw, int fbh, float alpha) { const float h = 13.0f; const float left = 22.0f; const float lineStep = h + 9.0f; - const int nlines = 22; + const int nlines = 25; const float panelW = 384.0f; float panelTop = fbh - 18.0f; float panelH = nlines * lineStep + 16.0f; @@ -3053,7 +3137,10 @@ static void render_osd(int fbw, int fbh, float alpha) { LINE("SHAPES N %s", cfg.cycle_shapes ? "CYCLE" : "RANDOM"); LINE("MOVE CAM WASD %+.0f %+.0f", cam_x, cam_y); LINE("ROTATE CAM ARROWS %+.0f %+.0f", cam_yaw, cam_pitch); - LINE("FULLSCREEN F %s", cfg.fullscreen ? "ON" : "OFF"); + LINE("FULLSCREEN F/F11 %s", cfg.fullscreen ? "ON" : "OFF"); + LINE("PERF HUD F1 %s", g_show_hud ? "ON" : "OFF"); + LINE("VSYNC F2 %s", g_vsync ? "ON" : "OFF"); + LINE("GLOW MODE F3 %s", (g_have_fbo && g_bloom) ? "BLOOM" : "LEGACY"); LINE("PAUSE SPACE %s", cfg.paused ? "PAUSED" : "RUNNING"); LINE("%s", "QUIT ESC"); #undef LINE @@ -3065,6 +3152,31 @@ static void render_osd(int fbw, int fbh, float alpha) { /* leave additive blending on for the next 3-D frame */ } +/* Always-on performance HUD (top-right). The measurement tool for the refactor: + * FPS / frame time, active body count, and the per-frame immediate-mode line + * batch + vertex submission counts -- i.e. exactly the CPU/driver work that the + * retained-mode (VBO/glDrawElements) conversion is meant to collapse. */ +static void render_perf_hud(int fbw, int fbh, float fps, float ms) { + glMatrixMode(GL_PROJECTION); glPushMatrix(); glLoadIdentity(); gluOrtho2D(0, fbw, 0, fbh); + glMatrixMode(GL_MODELVIEW); glPushMatrix(); glLoadIdentity(); + glBlendFunc(GL_SRC_ALPHA, GL_ONE); + const float h = 13.0f, step = h + 8.0f, x = (float)fbw - 220.0f; + float y = (float)fbh - 26.0f; + char buf[64]; + glLineWidth(1.8f); + glColor4f(fps < 55.0f ? 1.0f : 0.4f, fps < 55.0f ? 0.55f : 1.0f, 0.35f, 1.0f); + snprintf(buf, sizeof buf, "FPS %.0f %.2f MS", fps, ms); stroke_text(buf, x, y, h); y -= step; + glColor4f(0.5f, 0.8f, 1.0f, 1.0f); glLineWidth(1.5f); + snprintf(buf, sizeof buf, "BODIES %d", g_total); stroke_text(buf, x, y, h); y -= step; + snprintf(buf, sizeof buf, "DRAWN %u", g_drawn); stroke_text(buf, x, y, h); y -= step; + snprintf(buf, sizeof buf, "BATCHES %u", g_batches); stroke_text(buf, x, y, h); y -= step; + snprintf(buf, sizeof buf, "VERTS %luK", g_verts / 1000UL); stroke_text(buf, x, y, h); y -= step; + snprintf(buf, sizeof buf, "VSYNC %s", g_vsync ? "ON" : "OFF"); stroke_text(buf, x, y, h); y -= step; + snprintf(buf, sizeof buf, "GLOW %s", (g_have_fbo && g_bloom) ? "BLOOM" : "LEGACY"); stroke_text(buf, x, y, h); + glMatrixMode(GL_PROJECTION); glPopMatrix(); + glMatrixMode(GL_MODELVIEW); glPopMatrix(); +} + /* ================================================================== */ /* Input. */ /* ================================================================== */ @@ -3256,6 +3368,10 @@ static void key_cb(GLFWwindow *win, int key, int sc, int action, int mods) { case GLFW_KEY_F: case GLFW_KEY_F11: toggle_fullscreen(win); break; + case GLFW_KEY_F1: g_show_hud = !g_show_hud; break; /* perf HUD */ + case GLFW_KEY_F2: g_vsync = !g_vsync; glfwSwapInterval(g_vsync); break; /* VSync */ + case GLFW_KEY_F3: g_bloom = !g_bloom; break; /* bloom vs legacy glow */ + case GLFW_KEY_SPACE: cfg.paused = !cfg.paused; break; default: break; } @@ -3313,7 +3429,80 @@ static void project_body(const Solid *s, float angf, float phasef, float out[][3 } } +/* Build a VBO+IBO for every shape, classifying each into one of the three draw + * modes. Called once after the GL context exists and all shapes are built. */ +static void init_gl_buffers(void) { + /* framebuffer-object entry points (bloom) -- independent of VBO support */ + vgGenFramebuffers = (VG_GENFRAMEBUFFERS) glfwGetProcAddress("glGenFramebuffers"); + vgBindFramebuffer = (VG_BINDFRAMEBUFFER) glfwGetProcAddress("glBindFramebuffer"); + vgFramebufferTexture2D = (VG_FRAMEBUFFERTEXTURE2D) glfwGetProcAddress("glFramebufferTexture2D"); + vgDeleteFramebuffers = (VG_DELETEFRAMEBUFFERS) glfwGetProcAddress("glDeleteFramebuffers"); + vgCheckFramebufferStatus = (VG_CHECKFRAMEBUFFERSTATUS) glfwGetProcAddress("glCheckFramebufferStatus"); + if (vgGenFramebuffers && vgBindFramebuffer && vgFramebufferTexture2D && vgDeleteFramebuffers) + g_have_fbo = 1; + + vgGenBuffers = (VG_GENBUFFERS) glfwGetProcAddress("glGenBuffers"); + vgBindBuffer = (VG_BINDBUFFER) glfwGetProcAddress("glBindBuffer"); + vgBufferData = (VG_BUFFERDATA) glfwGetProcAddress("glBufferData"); + if (!vgGenBuffers || !vgBindBuffer || !vgBufferData) return; /* keep immediate mode */ + + g_shape_vbo = (GLuint *)calloc((size_t)num_shapes, sizeof(GLuint)); + g_shape_ibo = (GLuint *)calloc((size_t)num_shapes, sizeof(GLuint)); + g_shape_vbomode = (unsigned char *)calloc((size_t)num_shapes, 1); + if (!g_shape_vbo || !g_shape_ibo || !g_shape_vbomode) return; + + vgGenBuffers(1, &g_dyn_vbo); + + unsigned short idx[MAX_EDGES * 2]; + float vbuf[MAX_VERTS * 3]; + for (int si = 0; si < num_shapes; si++) { + Solid *s = &solids[si]; + int mode; + if (s->dim != 3) { + mode = 2; /* morph: positions only */ + } else { + mode = 1; /* rigid 3-D: fully static */ + for (int k = 0; k < dyn_count; k++) if (dyn_solid[k] == si) mode = 0; + for (int k = 0; k < 4; k++) if (clock_idx[k] == si) mode = 0; + } + g_shape_vbomode[si] = (unsigned char)mode; + if (mode == 0) continue; /* topology changes -> immediate */ + + for (int e = 0; e < s->ne; e++) { /* static topology index buffer */ + idx[e*2] = (unsigned short)s->e[e][0]; + idx[e*2+1] = (unsigned short)s->e[e][1]; + } + vgGenBuffers(1, &g_shape_ibo[si]); + vgBindBuffer(GL_ELEMENT_ARRAY_BUFFER, g_shape_ibo[si]); + vgBufferData(GL_ELEMENT_ARRAY_BUFFER, + (VGsizeiptr)((size_t)s->ne * 2 * sizeof(unsigned short)), idx, GL_STATIC_DRAW); + if (mode == 1) { /* rigid: upload vertices once */ + for (int i = 0; i < s->nv; i++) { + vbuf[i*3] = s->v[i][0]; + vbuf[i*3+1] = s->v[i][1]; + vbuf[i*3+2] = s->v[i][2]; + } + vgGenBuffers(1, &g_shape_vbo[si]); + vgBindBuffer(GL_ARRAY_BUFFER, g_shape_vbo[si]); + vgBufferData(GL_ARRAY_BUFFER, + (VGsizeiptr)((size_t)s->nv * 3 * sizeof(float)), vbuf, GL_STATIC_DRAW); + } + } + vgBindBuffer(GL_ARRAY_BUFFER, 0); + vgBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + g_have_vbo = 1; +} + +/* Draw one solid's edges. For VBO-backed shapes the vertex source + index buffer + * are bound by the caller (per body) and this is a single glDrawElements; clocks + * and dynamics fall back to immediate mode (their topology is rebuilt each frame + * so a static index buffer would be stale). */ static void draw_edges(const Solid *s, float p[][3]) { + g_batches++; g_verts += (unsigned long)s->ne * 2u; + if (g_have_vbo && g_shape_vbomode[s - solids]) { + glDrawElements(GL_LINES, s->ne * 2, GL_UNSIGNED_SHORT, (const void *)0); + return; + } glBegin(GL_LINES); for (int e = 0; e < s->ne; e++) { glVertex3fv(p[s->e[e][0]]); @@ -3322,6 +3511,36 @@ static void draw_edges(const Solid *s, float p[][3]) { glEnd(); } +/* Build the 6 world-space view-frustum planes from the current modelview * + * projection (Gribb-Hartmann), normalized so plane[i]·(x,y,z,1) gives signed + * world distance. Used to cull bodies that can't be on screen before the + * (relatively expensive) projection + draw -- the field is a full sphere around + * the camera, so most bodies are behind or beside the view. */ +static void build_frustum(void) { + double mv[16], pj[16], m[16]; + glGetDoublev(GL_MODELVIEW_MATRIX, mv); + glGetDoublev(GL_PROJECTION_MATRIX, pj); + for (int c = 0; c < 4; c++) + for (int r = 0; r < 4; r++) { + double s = 0; + for (int k = 0; k < 4; k++) s += pj[k*4 + r] * mv[c*4 + k]; + m[c*4 + r] = s; + } + for (int i = 0; i < 6; i++) { /* L,R,B,T,N,F = row4 +/- row{x,y,z} */ + int sign = (i & 1) ? -1 : 1, row = i / 2; + for (int j = 0; j < 4; j++) g_frustum[i][j] = m[j*4 + 3] + sign * m[j*4 + row]; + double a = g_frustum[i][0], b = g_frustum[i][1], c = g_frustum[i][2]; + double len = sqrt(a*a + b*b + c*c); + if (len > 1e-12) for (int j = 0; j < 4; j++) g_frustum[i][j] /= len; + } +} +static int sphere_visible(double x, double y, double z, double rad) { + for (int i = 0; i < 6; i++) + if (g_frustum[i][0]*x + g_frustum[i][1]*y + g_frustum[i][2]*z + g_frustum[i][3] < -rad) + return 0; + return 1; +} + /* ================================================================== */ /* Help / main. */ /* ================================================================== */ @@ -3340,7 +3559,9 @@ static void print_help(void) { " B / P magnifier count 1 / 2 glass-ball count\n" " 3 / 4 mirror-ball count (each 0..1000, extra of the density)\n" " + / - density M color N shapes\n" - " F / F11 fullscreen Space pause Esc quit\n\n" + " F / F11 fullscreen Space pause Esc quit\n" + " F1 perf HUD F2 VSync on/off (uncap FPS)\n" + " F3 bloom / legacy glow\n\n" " (all settings are also shown in the on-screen display)\n\n"); fflush(stdout); } @@ -3390,6 +3611,114 @@ static void grab_scene(int fbw, int fbh) { glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0,0, 0,0, fbw, fbh); } +/* (Re)create the bloom pyramid render targets for the current framebuffer size. + * Each level is half the previous; an exact-size (NPOT) colour texture wrapped + * in an FBO. Returns 0 and disables bloom if any FBO is incomplete. */ +static int ensure_bloom_targets(int fbw, int fbh) { + if (fbw == g_bl_fbw && fbh == g_bl_fbh && g_bl_fbo[0]) return 1; + if (g_bl_fbo[0]) { vgDeleteFramebuffers(BLOOM_LEVELS, g_bl_fbo); glDeleteTextures(BLOOM_LEVELS, g_bl_tex); } + vgGenFramebuffers(BLOOM_LEVELS, g_bl_fbo); + glGenTextures(BLOOM_LEVELS, g_bl_tex); + int w = fbw, h = fbh; + for (int i = 0; i < BLOOM_LEVELS; i++) { + w = w > 1 ? w / 2 : 1; + h = h > 1 ? h / 2 : 1; + g_bl_w[i] = w; g_bl_h[i] = h; + glBindTexture(GL_TEXTURE_2D, g_bl_tex[i]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, w, h, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + vgBindFramebuffer(GL_FRAMEBUFFER, g_bl_fbo[i]); + vgFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, g_bl_tex[i], 0); + if (vgCheckFramebufferStatus && + vgCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + vgBindFramebuffer(GL_FRAMEBUFFER, 0); + g_have_fbo = 0; /* fall back to legacy glow for good */ + return 0; + } + } + vgBindFramebuffer(GL_FRAMEBUFFER, 0); + g_bl_fbw = fbw; g_bl_fbh = fbh; + return 1; +} + +/* Draw a unit quad in ortho [0,1] with the given source texcoord rectangle. */ +static void bloom_quad(float u0, float v0, float u1, float v1) { + glBegin(GL_QUADS); + glTexCoord2f(u0, v0); glVertex2f(0, 0); + glTexCoord2f(u1, v0); glVertex2f(1, 0); + glTexCoord2f(u1, v1); glVertex2f(1, 1); + glTexCoord2f(u0, v1); glVertex2f(0, 1); + glEnd(); +} + +/* 3x3 tent-filter upsample of the currently bound source texture, drawn + * additively over a unit quad. (tx,ty) = one source texel; the nine weighted, + * texel-offset taps blend smoothly, so a small level expands into a wide soft + * gradient with no blocky bilinear cells. Weights sum to 1 (energy-preserving). */ +static void bloom_tent(float tx, float ty) { + static const float ox[9] = {-1, 0, 1, -1, 0, 1, -1, 0, 1}; + static const float oy[9] = {-1,-1,-1, 0, 0, 0, 1, 1, 1}; + static const float wt[9] = { 1, 2, 1, 2, 4, 2, 1, 2, 1}; + for (int k = 0; k < 9; k++) { + float c = wt[k] / 16.0f * BLOOM_FOLD, dx = ox[k]*tx, dy = oy[k]*ty; + glColor3f(c, c, c); + bloom_quad(dx, dy, 1 + dx, 1 + dy); + } +} + +/* Dual-filter bloom: downsample the field core through the pyramid, then tent- + * upsample each coarse level additively into the next finer one, accumulating + * every scale into level 0. Summing the scales this way (rather than blitting + * each straight to full screen) yields a smooth, ever-widening, fading halo + * around the sharp vectors -- the old-CRT look. `amt` is cfg.glow/100. */ +static void build_bloom(int fbw, int fbh, float amt) { + if (!ensure_bloom_targets(fbw, fbh)) return; + grab_scene(fbw, fbh); /* g_scene_tex = field core */ + + glMatrixMode(GL_PROJECTION); glPushMatrix(); glLoadIdentity(); gluOrtho2D(0, 1, 0, 1); + glMatrixMode(GL_MODELVIEW); glPushMatrix(); glLoadIdentity(); + glEnable(GL_TEXTURE_2D); + + /* downsample (overwrite): level 0 from the POT grab's valid sub-rect */ + glDisable(GL_BLEND); glColor3f(1, 1, 1); + for (int i = 0; i < BLOOM_LEVELS; i++) { + vgBindFramebuffer(GL_FRAMEBUFFER, g_bl_fbo[i]); + glViewport(0, 0, g_bl_w[i], g_bl_h[i]); + if (i == 0) { + glBindTexture(GL_TEXTURE_2D, g_scene_tex); + bloom_quad(0, 0, (float)fbw / g_tex_w, (float)fbh / g_tex_h); + } else { + glBindTexture(GL_TEXTURE_2D, g_bl_tex[i-1]); + bloom_quad(0, 0, 1, 1); + } + } + + /* upsample (additive tent), folding coarse scales down into level 0 */ + glEnable(GL_BLEND); glBlendFunc(GL_ONE, GL_ONE); + for (int i = BLOOM_LEVELS - 1; i > 0; i--) { + vgBindFramebuffer(GL_FRAMEBUFFER, g_bl_fbo[i-1]); + glViewport(0, 0, g_bl_w[i-1], g_bl_h[i-1]); + glBindTexture(GL_TEXTURE_2D, g_bl_tex[i]); + bloom_tent(1.0f / g_bl_w[i], 1.0f / g_bl_h[i]); + } + + /* composite the accumulated glow over the sharp scene */ + vgBindFramebuffer(GL_FRAMEBUFFER, 0); + glViewport(0, 0, fbw, fbh); + glBindTexture(GL_TEXTURE_2D, g_bl_tex[0]); + float c = amt * BLOOM_GAIN; + glColor3f(c, c, c); + bloom_quad(0, 0, 1, 1); + + glBlendFunc(GL_SRC_ALPHA, GL_ONE); /* restore additive-alpha */ + glDisable(GL_TEXTURE_2D); + glMatrixMode(GL_PROJECTION); glPopMatrix(); + glMatrixMode(GL_MODELVIEW); glPopMatrix(); +} + /* Draw the collected magnifier bodies as lenses over the grabbed field. */ static void draw_magnifiers(const int *list, int n, int fbw, int fbh) { double mvm[16], pjm[16]; int vp[4]; @@ -3652,7 +3981,7 @@ int main(int argc, char **argv) { } if (!win) { fprintf(stderr, "Failed to create window\n"); glfwTerminate(); return 1; } glfwMakeContextCurrent(win); - glfwSwapInterval(1); + glfwSwapInterval(g_vsync); if (g_screensaver) { /* any input ends the screensaver */ glfwSetInputMode(win, GLFW_CURSOR, GLFW_CURSOR_HIDDEN); glfwSetKeyCallback(win, ss_key_cb); @@ -3666,6 +3995,8 @@ int main(int argc, char **argv) { glGetFloatv(GL_ALIASED_LINE_WIDTH_RANGE, lwr); max_line_width = lwr[1] > 1 ? lwr[1] : 10.0f; + init_gl_buffers(); /* retained-mode geometry; falls back to immediate on failure */ + if (!g_screensaver && cfg.fullscreen) { cfg.fullscreen = 0; toggle_fullscreen(win); } rebuild_field(); @@ -3682,9 +4013,20 @@ int main(int argc, char **argv) { while (!glfwWindowShouldClose(win) && !(g_screensaver && g_quit)) { double now = glfwGetTime(); - float dt = (float)(now - last); + float raw = (float)(now - last); /* true frame time (for FPS) */ + float dt = raw > 0.05f ? 0.05f : raw; /* clamped for the simulation */ last = now; - if (dt > 0.05f) dt = 0.05f; + + /* rolling FPS / frame time over ~0.25 s, plus reset the per-frame counters */ + static double fps_acc = 0.0; static int fps_n = 0; + static float fps_val = 0.0f, ms_val = 0.0f; + fps_acc += raw; fps_n++; + if (fps_acc >= 0.25) { + fps_val = (float)(fps_n / fps_acc); + ms_val = (float)(1000.0 * fps_acc / fps_n); + fps_acc = 0.0; fps_n = 0; + } + g_batches = 0; g_verts = 0; g_drawn = 0; int fbw, fbh; glfwGetFramebufferSize(win, &fbw, &fbh); @@ -3721,6 +4063,7 @@ int main(int argc, char **argv) { glRotatef(cam_pitch, 1.0f, 0.0f, 0.0f); /* apply camera rotation (arrows) */ glRotatef(cam_yaw, 0.0f, 1.0f, 0.0f); glTranslatef(-cam_x, -cam_y, 0.0f); /* apply camera pan (WASD) */ + build_frustum(); /* for CPU culling of off-screen bodies */ glClearColor(0.006f, 0.010f, 0.035f, 1.0f); glClear(GL_COLOR_BUFFER_BIT); @@ -3739,6 +4082,7 @@ int main(int argc, char **argv) { float var = cfg.tumble_var / 100.0f; float glow = cfg.glow / 100.0f; float fl = cfg.flicker / 100.0f; + int bloom_on = g_have_fbo && g_bloom && glow > 0.001f; /* post-process glow */ recompose_field(); int active = g_total; @@ -3748,6 +4092,7 @@ int main(int argc, char **argv) { static int mag_list[MAX_BODIES], glass_list[MAX_BODIES], mirror_list[MAX_BODIES]; int mag_n = 0, glass_n = 0, mirror_n = 0; + if (g_have_vbo) glEnableClientState(GL_VERTEX_ARRAY); for (int i = 0; i < active; i++) { Body *b = &bodies[i]; @@ -3781,6 +4126,16 @@ int main(int argc, char **argv) { continue; } + /* frustum cull: skip the projection + draw for bodies that can't be + * on screen. The margin covers the glow, not just the body: the + * legacy ghosts scale to ~1.8x, and the screen-space halo (a fixed + * pixel width) maps to a world size that grows with distance -- so a + * distance term keeps far-edge shapes' glow from popping. Motion and + * recycle above already ran, so culled bodies still advance. */ + double cmargin = b->size * 1.9 + sqrt((double)dist2) * 0.022 + 3.0; + if (!sphere_visible(b->x, b->y, -(double)b->z, cmargin)) continue; + g_drawn++; + float depth = sqrtf(dist2) / R; /* 0 at camera .. 1 at far sphere */ if (depth < 0) depth = 0; if (depth > 1) depth = 1; @@ -3799,9 +4154,25 @@ int main(int argc, char **argv) { float lw = 1.0f + (1.0f - depth) * 2.0f; const Solid *s = &solids[b->shape]; - /* project once (4/5/6-D shapes morph; 3-D shapes pass through) */ + /* Select the vertex source for this body. mode 1 (rigid) reads a + * prebuilt static VBO and needs no per-frame projection; mode 2 + * (morph) and mode 0 (immediate clocks/dynamics) project into p3, + * and mode 2 streams it to the shared dynamic VBO. */ static float p3[MAX_VERTS][3]; - project_body(s, b->angle, b->hue_offset * 0.01745329f, p3); + int mode = g_have_vbo ? g_shape_vbomode[b->shape] : 0; + if (mode != 1) + project_body(s, b->angle, b->hue_offset * 0.01745329f, p3); + if (mode) { + vgBindBuffer(GL_ELEMENT_ARRAY_BUFFER, g_shape_ibo[b->shape]); + if (mode == 1) { + vgBindBuffer(GL_ARRAY_BUFFER, g_shape_vbo[b->shape]); + } else { + vgBindBuffer(GL_ARRAY_BUFFER, g_dyn_vbo); + vgBufferData(GL_ARRAY_BUFFER, + (VGsizeiptr)((size_t)s->nv * 3 * sizeof(float)), p3, GL_STREAM_DRAW); + } + glVertexPointer(3, GL_FLOAT, 0, (const void *)0); + } glPushMatrix(); glTranslatef(b->x, b->y, -b->z); @@ -3813,8 +4184,9 @@ int main(int argc, char **argv) { * many faint antialiased width layers fading outward (a smooth * gradient, not one fat blurry line), and a few faint scaled-up * ghost copies bloom the glow into a larger volume than the line - * width alone could ever reach. */ - if (glow > 0.001f) { + * width alone could ever reach. (Skipped when the post-process bloom + * is active -- it produces the same look in one pass per body.) */ + if (!bloom_on && glow > 0.001f) { float maxw = max_line_width; /* (a) soft halo hugging each vector, fading out into mist */ @@ -3856,6 +4228,14 @@ int main(int argc, char **argv) { glPopMatrix(); } + if (g_have_vbo) { /* restore state for fixed-function passes */ + glDisableClientState(GL_VERTEX_ARRAY); + vgBindBuffer(GL_ARRAY_BUFFER, 0); + vgBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + } + + /* bloom: synthesize the CRT glow from the sharp core in one post pass */ + if (bloom_on) build_bloom(fbw, fbh, glow); /* magnifiers, glass + mirror spheres: grab the field, draw lenses over it */ if (mag_n > 0 || glass_n > 0 || mirror_n > 0) { @@ -3873,6 +4253,7 @@ int main(int argc, char **argv) { if (idle > 10.0f) osd_alpha = 1.0f - (idle - 10.0f) / 4.0f; if (osd_alpha < 0) osd_alpha = 0; render_osd(fbw, fbh, osd_alpha); + if (g_show_hud) render_perf_hud(fbw, fbh, fps_val, ms_val); } glfwSwapBuffers(win); diff --git a/vectorgons.exe b/vectorgons.exe index c909450..8c7c675 100755 Binary files a/vectorgons.exe and b/vectorgons.exe differ diff --git a/vectorgons.scr b/vectorgons.scr index 505f95c..349e0d6 100755 Binary files a/vectorgons.scr and b/vectorgons.scr differ