Skip to content

Commit 529abd7

Browse files
Correct clamped depth range from [0, 65535].
This changes a few things: * All backends clamp the depth range and keep it positive. * The depth rounding uniform is now properly dirtied. * Projection is updated to translate and scale appropriately. * Depth rounding is halved on OpenGL to account for [-1, 1] range. Fixes Phantasy Star Portable 2 without the need for a game-specific hack.
1 parent 18cdf9f commit 529abd7

File tree

8 files changed

+90
-46
lines changed

8 files changed

+90
-46
lines changed

GPU/Common/GPUStateUtils.cpp

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,8 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
597597
float xOffset = 0.0f;
598598
float hScale = 1.0f;
599599
float yOffset = 0.0f;
600+
float zScale = 1.0f;
601+
float zOffset = 0.0f;
600602

601603
// If we're within the bounds, we want clipping the viewport way. So leave it be.
602604
if (left < 0.0f || right > renderWidth) {
@@ -625,28 +627,54 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
625627
yOffset = drift / (bottom - top);
626628
}
627629

630+
out.viewportX = left + displayOffsetX;
631+
out.viewportY = top + displayOffsetY;
632+
out.viewportW = right - left;
633+
out.viewportH = bottom - top;
634+
635+
float vpZScale = gstate.getViewportZScale();
636+
float vpZCenter = gstate.getViewportZCenter();
637+
float depthRangeMin = vpZCenter - vpZScale;
638+
float depthRangeMax = vpZCenter + vpZScale;
639+
// Near/far can be inverted. Let's reverse while dealing with clamping, though.
640+
bool inverted = vpZScale < 0.0f;
641+
float near = (inverted ? depthRangeMax : depthRangeMin) * (1.0f / 65535.0f);
642+
float far = (inverted ? depthRangeMin : depthRangeMax) * (1.0f / 65535.0f);
643+
644+
if (near < 0.0f || far > 1.0f) {
645+
float overageNear = std::max(-near, 0.0f);
646+
float overageFar = std::max(far - 1.0f, 0.0f);
647+
float drift = overageFar - overageNear;
648+
649+
near += overageNear;
650+
far += overageFar;
651+
652+
zScale = fabsf(vpZScale * (2.0f / 65535.0f)) / (far - near);
653+
zOffset = drift / (far - near);
654+
}
655+
656+
if (inverted) {
657+
zScale = -zScale;
658+
inverted = false;
659+
}
660+
661+
out.depthRangeMin = inverted ? far : near;
662+
out.depthRangeMax = inverted ? near : far;
663+
628664
bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale;
629665
bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset;
630-
if (scaleChanged || offsetChanged) {
666+
bool depthChanged = gstate_c.vpDepthScale != zScale || gstate_c.vpZOffset != zOffset;
667+
if (scaleChanged || offsetChanged || depthChanged) {
631668
gstate_c.vpWidthScale = wScale;
632669
gstate_c.vpHeightScale = hScale;
670+
gstate_c.vpDepthScale = zScale;
633671
gstate_c.vpXOffset = xOffset;
634672
gstate_c.vpYOffset = yOffset;
673+
gstate_c.vpZOffset = zOffset;
635674
out.dirtyProj = true;
675+
out.dirtyDepth = depthChanged;
636676
}
637677

638-
out.viewportX = left + displayOffsetX;
639-
out.viewportY = top + displayOffsetY;
640-
out.viewportW = right - left;
641-
out.viewportH = bottom - top;
642-
643-
float zScale = gstate.getViewportZScale();
644-
float zCenter = gstate.getViewportZCenter();
645-
float depthRangeMin = zCenter - zScale;
646-
float depthRangeMax = zCenter + zScale;
647-
out.depthRangeMin = depthRangeMin * (1.0f / 65535.0f);
648-
out.depthRangeMax = depthRangeMax * (1.0f / 65535.0f);
649-
650678
#ifndef MOBILE_DEVICE
651679
float minz = gstate.getDepthRangeMin();
652680
float maxz = gstate.getDepthRangeMax();

GPU/Common/GPUStateUtils.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ struct ViewportAndScissor {
6363
float depthRangeMin;
6464
float depthRangeMax;
6565
bool dirtyProj;
66+
bool dirtyDepth;
6667
};
6768
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
6869

GPU/Directx9/ShaderManagerDX9.cpp

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ void ShaderManagerDX9::VSSetMatrix(int creg, const float* pMatrix) {
248248
}
249249

250250
// Depth in ogl is between -1;1 we need between 0;1 and optionally reverse it
251-
static void ConvertProjMatrixToD3D(Matrix4x4 &in, bool invertedX, bool invertedY, bool invertedZ) {
251+
static void ConvertProjMatrixToD3D(Matrix4x4 &in, bool invertedX, bool invertedY) {
252252
// Half pixel offset hack
253253
float xoff = 0.5f / gstate_c.curRTRenderWidth;
254254
xoff = gstate_c.vpXOffset + (invertedX ? xoff : -xoff);
@@ -260,7 +260,9 @@ static void ConvertProjMatrixToD3D(Matrix4x4 &in, bool invertedX, bool invertedY
260260
if (invertedY)
261261
yoff = -yoff;
262262

263-
in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(gstate_c.vpWidthScale, gstate_c.vpHeightScale, invertedZ ? -0.5 : 0.5f));
263+
const Vec3 trans(xoff, yoff, gstate_c.vpZOffset * 0.5f + 0.5f);
264+
const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
265+
in.translateAndScale(trans, scale);
264266
}
265267

266268
static void ConvertProjMatrixToD3DThrough(Matrix4x4 &in) {
@@ -342,8 +344,7 @@ void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) {
342344
flippedMatrix[12] = -flippedMatrix[12];
343345
}
344346

345-
const bool invertedZ = gstate_c.vpDepth < 0;
346-
ConvertProjMatrixToD3D(flippedMatrix, invertedX, invertedY, invertedZ);
347+
ConvertProjMatrixToD3D(flippedMatrix, invertedX, invertedY);
347348

348349
VSSetMatrix(CONST_VS_PROJ, flippedMatrix.getReadPtr());
349350
}
@@ -482,16 +483,21 @@ void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) {
482483
float viewZScale = gstate.getViewportZScale();
483484
float viewZCenter = gstate.getViewportZCenter();
484485

485-
// Given the way we do the rounding, the integer part of the offset is probably mostly irrelevant as we cancel
486-
// it afterwards anyway.
487-
// It seems that we should adjust for D3D projection matrix. We got squashed up to only 0-1, so we divide
488-
// the scale factor by 2, and add an offset. But, this doesn't work! I get near-perfect results not doing it.
489-
// viewZScale *= 2.0f;
486+
// We had to scale and translate Z to account for our clamped Z range.
487+
// Therefore, we also need to reverse this to round properly.
488+
//
489+
// Example: scale = 65535.0, center = 0.0
490+
// Resulting range = -65535 to 65535, clamped to [0, 65535]
491+
// gstate_c.vpDepthScale = 2.0f
492+
// gstate_c.vpZOffset = -1.0f
493+
//
494+
// The projection already accounts for those, so we need to reverse them.
495+
//
496+
// Additionally, D3D9 uses a range from [0, 1] which makes things easy.
497+
// We just correct the center.
498+
viewZScale *= (1.0f / gstate_c.vpDepthScale);
499+
viewZCenter -= 65535.0f * gstate_c.vpZOffset - 32767.5f;
490500

491-
// Need to take the possibly inverted proj matrix into account.
492-
if (gstate_c.vpDepth < 0.0)
493-
viewZScale *= -1.0f;
494-
viewZCenter -= 32767.5f;
495501
float viewZInvScale;
496502
if (viewZScale != 0.0) {
497503
viewZInvScale = 1.0f / viewZScale;

GPU/Directx9/StateMappingDX9.cpp

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -293,23 +293,13 @@ void TransformDrawEngineDX9::ApplyDrawState(int prim) {
293293
float depthMin = vpAndScissor.depthRangeMin;
294294
float depthMax = vpAndScissor.depthRangeMax;
295295

296-
if (!gstate.isModeThrough()) {
297-
// Direct3D can't handle negative depth ranges, so we fix it in the projection matrix.
298-
if (gstate_c.vpDepth != depthMax - depthMin) {
299-
gstate_c.vpDepth = depthMax - depthMin;
300-
vpAndScissor.dirtyProj = true;
301-
}
302-
if (depthMin > depthMax) {
303-
std::swap(depthMin, depthMax);
304-
}
305-
if (depthMin < 0.0f) depthMin = 0.0f;
306-
if (depthMax > 1.0f) depthMax = 1.0f;
307-
}
308-
309296
dxstate.viewport.set(vpAndScissor.viewportX, vpAndScissor.viewportY, vpAndScissor.viewportW, vpAndScissor.viewportH, depthMin, depthMax);
310297
if (vpAndScissor.dirtyProj) {
311298
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
312299
}
300+
if (vpAndScissor.dirtyDepth) {
301+
shaderManager_->DirtyUniform(DIRTY_DEPTHRANGE);
302+
}
313303
}
314304

315305
void TransformDrawEngineDX9::ApplyDrawStateLate() {

GPU/GLES/ShaderManager.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,8 +378,8 @@ static inline void ScaleProjMatrix(Matrix4x4 &in) {
378378
// GL upside down is a pain as usual.
379379
yOffset = -yOffset;
380380
}
381-
const Vec3 trans(gstate_c.vpXOffset, yOffset, 0.0f);
382-
const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, 1.0);
381+
const Vec3 trans(gstate_c.vpXOffset, yOffset, gstate_c.vpZOffset * 2.0f);
382+
const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale);
383383
in.translateAndScale(trans, scale);
384384
}
385385

@@ -592,11 +592,27 @@ void LinkedShader::UpdateUniforms(u32 vertType) {
592592
float viewZScale = gstate.getViewportZScale();
593593
float viewZCenter = gstate.getViewportZCenter();
594594
float viewZInvScale;
595+
596+
// We had to scale and translate Z to account for our clamped Z range.
597+
// Therefore, we also need to reverse this to round properly.
598+
//
599+
// Example: scale = 65535.0, center = 0.0
600+
// Resulting range = -65535 to 65535, clamped to [0, 65535]
601+
// gstate_c.vpDepthScale = 2.0f
602+
// gstate_c.vpZOffset = -1.0f
603+
//
604+
// The projection already accounts for those, so we need to reverse them.
605+
//
606+
// Additionally, OpenGL uses a range from [-1, 1]. So we halve the scale.
607+
viewZScale *= (1.0f / gstate_c.vpDepthScale) * 0.5f;
608+
viewZCenter -= 65535.0f * gstate_c.vpZOffset;
609+
595610
if (viewZScale != 0.0) {
596611
viewZInvScale = 1.0f / viewZScale;
597612
} else {
598613
viewZInvScale = 0.0;
599614
}
615+
600616
float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale };
601617
SetFloatUniform4(u_depthRange, data);
602618
}

GPU/GLES/StateMapping.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,9 @@ void TransformDrawEngine::ApplyDrawState(int prim) {
371371
if (vpAndScissor.dirtyProj) {
372372
shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
373373
}
374+
if (vpAndScissor.dirtyDepth) {
375+
shaderManager_->DirtyUniform(DIRTY_DEPTHRANGE);
376+
}
374377
}
375378

376379
void TransformDrawEngine::ApplyDrawStateLate() {

GPU/GPUState.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -254,10 +254,9 @@ void GPUStateCache::DoState(PointerWrap &p) {
254254

255255
p.Do(vpWidth);
256256
p.Do(vpHeight);
257-
if (s >= 4) {
258-
p.Do(vpDepth);
259-
} else {
260-
vpDepth = 1.0f; // any positive value should be fine
257+
if (s == 4) {
258+
float oldDepth = 1.0f;
259+
p.Do(oldDepth);
261260
}
262261

263262
p.Do(curRTWidth);

GPU/GPUState.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,12 +510,13 @@ struct GPUStateCache {
510510

511511
float vpWidth;
512512
float vpHeight;
513-
float vpDepth;
514513

515514
float vpXOffset;
516515
float vpYOffset;
516+
float vpZOffset;
517517
float vpWidthScale;
518518
float vpHeightScale;
519+
float vpDepthScale;
519520

520521
KnownVertexBounds vertBounds;
521522

0 commit comments

Comments
 (0)