Ziele Untersuchung
mit Columbo Integrität von
Datenbanken Interaktion und
Portierbarkeit Ergonomie der
Schnittstellen

Angebot Produkte Projekt Beratung

Mittel Analytik Modellierung Sprachen Algebra Logik Hardware Denken Kreativität

Zusammenhänge Gesellschaft Wirtschaft Branche Firma


products/Sources/formale Sprachen/C/Firefox/gfx/skia/skia/src/sksl/ (Browser von der Mozilla Stiftung Version 136.0.1^©) Datei vom 10.2.2025 mit Größe 56 kB

Quelle sksl_graphite_vert.sksl Sprache: unbekannt

Spracherkennung für: .sksl vermutete Sprache: Unknown {[0] [0] [0]} [Methode: Schwerpunktbildung, einfache Gewichte, sechs Dimensionen]

// Graphite-specific vertex shader code

const float $PI = 3.141592653589793238;

///////////////////////////////////////////////////////////////////////////////////////////////////
// Support functions for tessellating path renderers

const float $kCubicCurveType = 0;            // skgpu::tess::kCubicCurveType
const float $kConicCurveType = 1;            // skgpu::tess::kConicCurveType
const float $kTriangularConicCurveType = 2;  // skgpu::tess::kTriangularConicCurveType

// This function can be used on GPUs with infinity support to infer the curve type from the specific
// path control-point encoding used by tessellating path renderers. Calling this function on a
// platform that lacks infinity support may result in a shader compilation error.
$pure float curve_type_using_inf_support(float4 p23) {
    return isinf(p23.z) ? $kTriangularConicCurveType :
           isinf(p23.w) ? $kConicCurveType :
                          $kCubicCurveType;
}

$pure bool $is_conic_curve(float curveType) {
    return curveType != $kCubicCurveType;
}

$pure bool $is_triangular_conic_curve(float curveType) {
    return curveType == $kTriangularConicCurveType;
}

// Wang's formula gives the minimum number of evenly spaced (in the parametric sense) line segments
// that a bezier curve must be chopped into in order to guarantee all lines stay within a distance
// of "1/precision" pixels from the true curve. Its definition for a bezier curve of degree "n" is
// as follows:
//
//     maxLength = max([length(p[i+2] - 2p[i+1] + p[i]) for (0 <= i <= n-2)])
//     numParametricSegments = sqrt(maxLength * precision * n*(n - 1)/8)
//
// (Goldman, Ron. (2003). 5.6.3 Wang's Formula. "Pyramid Algorithms: A Dynamic Programming Approach
// to Curves and Surfaces for Geometric Modeling". Morgan Kaufmann Publishers.)

const float $kDegree = 3;
const float $kPrecision = 4; // Must match skgpu::tess::kPrecision
const float $kLengthTerm     = ($kDegree * ($kDegree - 1) / 8.0) * $kPrecision;
const float $kLengthTermPow2 = (($kDegree * $kDegree) * (($kDegree - 1) * ($kDegree - 1)) / 64.0) *
                               ($kPrecision * $kPrecision);

// Returns the length squared of the largest forward difference from Wang's cubic formula.
$pure float $wangs_formula_max_fdiff_p2(float2 p0, float2 p1, float2 p2, float2 p3,
                                        float2x2 matrix) {
    float2 d0 = matrix * (fma(float2(-2), p1, p2) + p0);
    float2 d1 = matrix * (fma(float2(-2), p2, p3) + p1);
    return max(dot(d0,d0), dot(d1,d1));
}

$pure float $wangs_formula_cubic(float2 p0, float2 p1, float2 p2, float2 p3,
                                 float2x2 matrix) {
    float m = $wangs_formula_max_fdiff_p2(p0, p1, p2, p3, matrix);
    return max(ceil(sqrt($kLengthTerm * sqrt(m))), 1.0);
}

$pure float $wangs_formula_cubic_log2(float2 p0, float2 p1, float2 p2, float2 p3,
                                      float2x2 matrix) {
    float m = $wangs_formula_max_fdiff_p2(p0, p1, p2, p3, matrix);
    return ceil(log2(max($kLengthTermPow2 * m, 1.0)) * .25);
}

$pure float $wangs_formula_conic_p2(float2 p0, float2 p1, float2 p2, float w) {
    // Translate the bounding box center to the origin.
    float2 C = (min(min(p0, p1), p2) + max(max(p0, p1), p2)) * 0.5;
    p0 -= C;
    p1 -= C;
    p2 -= C;

    // Compute max length.
    float m = sqrt(max(max(dot(p0,p0), dot(p1,p1)), dot(p2,p2)));

    // Compute forward differences.
    float2 dp = fma(float2(-2.0 * w), p1, p0) + p2;
    float dw = abs(fma(-2.0, w, 2.0));

    // Compute numerator and denominator for parametric step size of linearization. Here, the
    // epsilon referenced from the cited paper is 1/precision.
    float rp_minus_1 = max(0.0, fma(m, $kPrecision, -1.0));
    float numer = length(dp) * $kPrecision + rp_minus_1 * dw;
    float denom = 4 * min(w, 1.0);

    return numer/denom;
}

$pure float $wangs_formula_conic(float2 p0, float2 p1, float2 p2, float w) {
    float n2 = $wangs_formula_conic_p2(p0, p1, p2, w);
    return max(ceil(sqrt(n2)), 1.0);
}

$pure float $wangs_formula_conic_log2(float2 p0, float2 p1, float2 p2, float w) {
    float n2 = $wangs_formula_conic_p2(p0, p1, p2, w);
    return ceil(log2(max(n2, 1.0)) * .5);
}

// Returns the normalized difference between a and b, i.e. normalize(a - b), with care taken for
// if 'a' and/or 'b' have large coordinates.
$pure float2 $robust_normalize_diff(float2 a, float2 b) {
    float2 diff = a - b;
    if (diff == float2(0.0)) {
        return float2(0.0);
    } else {
        float invMag = 1.0 / max(abs(diff.x), abs(diff.y));
        return normalize(invMag * diff);
    }
}

// Returns the cosine of the angle between a and b, assuming a and b are unit vectors already.
// Guaranteed to be between [-1, 1].
$pure float $cosine_between_unit_vectors(float2 a, float2 b) {
    // Since a and b are assumed to be normalized, the cosine is equal to the dot product, although
    // we clamp that to ensure it falls within the expected range of [-1, 1].
    return clamp(dot(a, b), -1.0, 1.0);
}

// Extends the middle radius to either the miter point, or the bevel edge if we surpassed the
// miter limit and need to revert to a bevel join.
$pure float $miter_extent(float cosTheta, float miterLimit) {
    float x = fma(cosTheta, .5, .5);
    return (x * miterLimit * miterLimit >= 1.0) ? inversesqrt(x) : sqrt(x);
}

// Returns the number of radial segments required for each radian of rotation, in order for the
// curve to appear "smooth" as defined by the approximate device-space stroke radius.
$pure float $num_radial_segments_per_radian(float approxDevStrokeRadius) {
    return .5 / acos(max(1.0 - (1.0 / $kPrecision) / approxDevStrokeRadius, -1.0));
}

// Unlike mix(), this does not return b when t==1. But it otherwise seems to get better
// precision than "a*(1 - t) + b*t" for things like chopping cubics on exact cusp points.
// We override this result anyway when t==1 so it shouldn't be a problem.
$pure float $unchecked_mix(float a, float b, float T) {
    return fma(b - a, T, a);
}
$pure float2 $unchecked_mix(float2 a, float2 b, float T) {
    return fma(b - a, float2(T), a);
}
$pure float4 $unchecked_mix(float4 a, float4 b, float4 T) {
    return fma(b - a, T, a);
}

// Compute a vertex position for the curve described by p01 and p23 packed control points,
// tessellated to the given resolve level, and assuming it will be drawn as a filled curve.
$pure float2 tessellate_filled_curve(float2x2 vectorXform,
                                     float resolveLevel, float idxInResolveLevel,
                                     float4 p01, float4 p23,
                                     float curveType) {
    float2 localcoord;
    if ($is_triangular_conic_curve(curveType)) {
        // This patch is an exact triangle.
        localcoord = (resolveLevel != 0)      ? p01.zw
                   : (idxInResolveLevel != 0) ? p23.xy
                                              : p01.xy;
    } else {
        float2 p0=p01.xy, p1=p01.zw, p2=p23.xy, p3=p23.zw;
        float w = -1;  // w < 0 tells us to treat the instance as an integral cubic.
        float maxResolveLevel;
        if ($is_conic_curve(curveType)) {
            // Conics are 3 points, with the weight in p3.
            w = p3.x;
            maxResolveLevel = $wangs_formula_conic_log2(vectorXform*p0,
                                                        vectorXform*p1,
                                                        vectorXform*p2, w);
            p1 *= w;  // Unproject p1.
            p3 = p2;  // Duplicate the endpoint for shared code that also runs on cubics.
        } else {
            // The patch is an integral cubic.
            maxResolveLevel = $wangs_formula_cubic_log2(p0, p1, p2, p3, vectorXform);
        }
        if (resolveLevel > maxResolveLevel) {
            // This vertex is at a higher resolve level than we need. Demote to a lower
            // resolveLevel, which will produce a degenerate triangle.
            idxInResolveLevel = floor(ldexp(idxInResolveLevel,
                                            int(maxResolveLevel - resolveLevel)));
            resolveLevel = maxResolveLevel;
        }
        // Promote our location to a discrete position in the maximum fixed resolve level.
        // This is extra paranoia to ensure we get the exact same fp32 coordinates for
        // colocated points from different resolve levels (e.g., the vertices T=3/4 and
        // T=6/8 should be exactly colocated).
        float fixedVertexID = floor(.5 + ldexp(idxInResolveLevel, int(5 - resolveLevel)));
        if (0 < fixedVertexID && fixedVertexID < 32) {
            float T = fixedVertexID * (1 / 32.0);

            // Evaluate at T. Use De Casteljau's for its accuracy and stability.
            float2 ab = mix(p0, p1, T);
            float2 bc = mix(p1, p2, T);
            float2 cd = mix(p2, p3, T);
            float2 abc = mix(ab, bc, T);
            float2 bcd = mix(bc, cd, T);
            float2 abcd = mix(abc, bcd, T);

            // Evaluate the conic weight at T.
            float u = mix(1.0, w, T);
            float v = w + 1 - u;  // == mix(w, 1, T)
            float uv = mix(u, v, T);

            localcoord = (w < 0) ? /*cubic*/ abcd : /*conic*/ abc/uv;
        } else {
            localcoord = (fixedVertexID == 0) ? p0.xy : p3.xy;
        }
    }
    return localcoord;
}

// Device coords are in xy, local coords are in zw, since for now perspective isn't supported.
$pure float4 tessellate_stroked_curve(float edgeID, float maxEdges,
                                      float2x2 affineMatrix,
                                      float2 translate,
                                      float maxScale /* derived from affineMatrix */,
                                      float4 p01, float4 p23,
                                      float2 lastControlPoint,
                                      float2 strokeParams,
                                      float curveType) {
    float2 p0=p01.xy, p1=p01.zw, p2=p23.xy, p3=p23.zw;
    float w = -1;  // w<0 means the curve is an integral cubic.
    if ($is_conic_curve(curveType)) {
        // Conics are 3 points, with the weight in p3.
        w = p3.x;
        p3 = p2;  // Setting p3 equal to p2 works for the remaining rotational logic.
    }

    // Call Wang's formula to determine parametric segments before transform points for hairlines
    // so that it is consistent with how the CPU tested the control points for chopping.
    float numParametricSegments;
    if (w < 0) {
        if (p0 == p1 && p2 == p3) {
            numParametricSegments = 1; // a line
        } else {
            numParametricSegments = $wangs_formula_cubic(p0, p1, p2, p3, affineMatrix);
        }
    } else {
        numParametricSegments = $wangs_formula_conic(affineMatrix * p0,
                                                     affineMatrix * p1,
                                                     affineMatrix * p2, w);
    }

    // Matches skgpu::tess::StrokeParams
    float strokeRadius = strokeParams.x;
    float joinType = strokeParams.y; // <0 = round join, ==0 = bevel join, >0 encodes miter limit
    bool isHairline = strokeParams.x == 0.0;
    float numRadialSegmentsPerRadian;
    if (isHairline) {
        numRadialSegmentsPerRadian = $num_radial_segments_per_radian(1.0);
        strokeRadius = 0.5;
    } else {
        numRadialSegmentsPerRadian = $num_radial_segments_per_radian(maxScale * strokeParams.x);
    }

    if (isHairline) {
        // Hairline case. Transform the points before tessellation. We can still hold off on the
        // translate until the end; we just need to perform the scale and skew right now.
        p0 = affineMatrix * p0;
        p1 = affineMatrix * p1;
        p2 = affineMatrix * p2;
        p3 = affineMatrix * p3;
        lastControlPoint = affineMatrix * lastControlPoint;
    }

    // Find the starting and ending tangents.
    float2 tan0 = $robust_normalize_diff((p0 == p1) ? ((p1 == p2) ? p3 : p2) : p1, p0);
    float2 tan1 = $robust_normalize_diff(p3, (p3 == p2) ? ((p2 == p1) ? p0 : p1) : p2);
    if (tan0 == float2(0)) {
        // The stroke is a point. This special case tells us to draw a stroke-width circle as a
        // 180 degree point stroke instead.
        tan0 = float2(1,0);
        tan1 = float2(-1,0);
    }

    // Determine how many edges to give to the join. We emit the first and final edges
    // of the join twice: once full width and once restricted to half width. This guarantees
    // perfect seaming by matching the vertices from the join as well as from the strokes on
    // either side.
    float numEdgesInJoin;
    if (joinType >= 0 /*Is the join not a round type?*/) {
        // Bevel(0) and miter(+) joins get 1 and 2 segments respectively.
        // +2 because we emit the beginning and ending edges twice (see above comments).
        numEdgesInJoin = sign(joinType) + (1 + 2);
    } else {
        float2 prevTan = $robust_normalize_diff(p0, lastControlPoint);
        float joinRads = acos($cosine_between_unit_vectors(prevTan, tan0));
        float numRadialSegmentsInJoin = max(ceil(joinRads * numRadialSegmentsPerRadian), 1);
        // +2 because we emit the beginning and ending edges twice (see above comment).
        numEdgesInJoin = numRadialSegmentsInJoin + 2;
        // The stroke section needs at least two edges. Don't assign more to the join than
        // "maxEdges - 2". (This is only relevant when the ideal max edge count calculated
        // on the CPU had to be limited to maxEdges in the draw call).
        numEdgesInJoin = min(numEdgesInJoin, maxEdges - 2);
    }

    // Find which direction the curve turns.
    // NOTE: Since the curve is not allowed to inflect, we can just check F'(.5) x F''(.5).
    // NOTE: F'(.5) x F''(.5) has the same sign as (P2 - P0) x (P3 - P1)
    float turn = cross_length_2d(p2 - p0, p3 - p1);
    float combinedEdgeID = abs(edgeID) - numEdgesInJoin;
    if (combinedEdgeID < 0) {
        tan1 = tan0;
        // Don't let tan0 become zero. The code as-is isn't built to handle that case. tan0=0
        // means the join is disabled, and to disable it with the existing code we can leave
        // tan0 equal to tan1.
        if (lastControlPoint != p0) {
            tan0 = $robust_normalize_diff(p0, lastControlPoint);
        }
        turn = cross_length_2d(tan0, tan1);
    }

    // Calculate the curve's starting angle and rotation.
    float cosTheta = $cosine_between_unit_vectors(tan0, tan1);
    float rotation = acos(cosTheta);
    if (turn < 0) {
        // Adjust sign of rotation to match the direction the curve turns.
        rotation = -rotation;
    }

    float numRadialSegments;
    float strokeOutset = sign(edgeID);
    if (combinedEdgeID < 0) {
        // We belong to the preceding join. The first and final edges get duplicated, so we only
        // have "numEdgesInJoin - 2" segments.
        numRadialSegments = numEdgesInJoin - 2;
        numParametricSegments = 1;  // Joins don't have parametric segments.
        p3 = p2 = p1 = p0;  // Colocate all points on the junction point.
        // Shift combinedEdgeID to the range [-1, numRadialSegments]. This duplicates the first
        // edge and lands one edge at the very end of the join. (The duplicated final edge will
        // actually come from the section of our strip that belongs to the stroke.)
        combinedEdgeID += numRadialSegments + 1;
        if (combinedEdgeID < 0) {
            combinedEdgeID = 0;
        } else {
            // We normally restrict the join on one side of the junction, but if the tangents are
            // nearly equivalent this could theoretically result in bad seaming and/or cracks on the
            // side we don't put it on. If the tangents are nearly equivalent then we leave the join
            // double-sided.
            const float sinEpsilon = 1e-2;  // ~= sin(180deg / 3000)
            bool tangentsNearlyParallel =
                    (abs(turn) * inversesqrt(dot(tan0, tan0) * dot(tan1, tan1))) < sinEpsilon;
            if (!tangentsNearlyParallel || dot(tan0, tan1) < 0) {
                // There are two edges colocated at the beginning. Leave the first one double sided
                // for seaming with the previous stroke. (The double sided edge at the end will
                // actually come from the section of our strip that belongs to the stroke.)
                strokeOutset = (turn < 0) ? min(strokeOutset, 0) : max(strokeOutset, 0);
            }
        }
    } else {
        // We belong to the stroke. Unless numRadialSegmentsPerRadian is incredibly high,
        // clamping to maxCombinedSegments will be a no-op because the draw call was invoked with
        // sufficient vertices to cover the worst case scenario of 180 degree rotation.
        float maxCombinedSegments = maxEdges - numEdgesInJoin - 1;
        numRadialSegments = max(ceil(abs(rotation) * numRadialSegmentsPerRadian), 1);
        numRadialSegments = min(numRadialSegments, maxCombinedSegments);
        numParametricSegments = min(numParametricSegments,
                                    maxCombinedSegments - numRadialSegments + 1);
    }

    // Additional parameters for final tessellation evaluation.
    float radsPerSegment = rotation / numRadialSegments;
    float numCombinedSegments = numParametricSegments + numRadialSegments - 1;
    bool isFinalEdge = (combinedEdgeID >= numCombinedSegments);
    if (combinedEdgeID > numCombinedSegments) {
        strokeOutset = 0;  // The strip has more edges than we need. Drop this one.
    }
    // Edge #2 extends to the miter point.
    if (abs(edgeID) == 2 && joinType > 0/*Is the join a miter type?*/) {
        strokeOutset *= $miter_extent(cosTheta, joinType/*miterLimit*/);
    }

    float2 tangent, strokeCoord;
    if (combinedEdgeID != 0 && !isFinalEdge) {
        // Compute the location and tangent direction of the stroke edge with the integral id
        // "combinedEdgeID", where combinedEdgeID is the sorted-order index of parametric and radial
        // edges. Start by finding the tangent function's power basis coefficients. These define a
        // tangent direction (scaled by some uniform value) as:
        //                                                 |T^2|
        //     Tangent_Direction(T) = dx,dy = |A  2B  C| * |T  |
        //                                    |.   .  .|   |1  |
        float2 A, B, C = p1 - p0;
        float2 D = p3 - p0;
        if (w >= 0.0) {
            // P0..P2 represent a conic and P3==P2. The derivative of a conic has a cumbersome
            // order-4 denominator. However, this isn't necessary if we are only interested in a
            // vector in the same *direction* as a given tangent line. Since the denominator scales
            // dx and dy uniformly, we can throw it out completely after evaluating the derivative
            // with the standard quotient rule. This leaves us with a simpler quadratic function
            // that we use to find a tangent.
            C *= w;
            B = .5*D - C;
            A = (w - 1.0) * D;
            p1 *= w;
        } else {
            float2 E = p2 - p1;
            B = E - C;
            A = fma(float2(-3), E, D);
        }
        // FIXME(crbug.com/800804,skbug.com/11268): Consider normalizing the exponents in A,B,C at
        // this point in order to prevent fp32 overflow.

        // Now find the coefficients that give a tangent direction from a parametric edge ID:
        //
        //                                                                 |parametricEdgeID^2|
        //     Tangent_Direction(parametricEdgeID) = dx,dy = |A  B_  C_| * |parametricEdgeID  |
        //                                                   |.   .   .|   |1                 |
        //
        float2 B_ = B * (numParametricSegments * 2.0);
        float2 C_ = C * (numParametricSegments * numParametricSegments);

        // Run a binary search to determine the highest parametric edge that is located on or before
        // the combinedEdgeID. A combined ID is determined by the sum of complete parametric and
        // radial segments behind it. i.e., find the highest parametric edge where:
        //
        //    parametricEdgeID + floor(numRadialSegmentsAtParametricT) <= combinedEdgeID
        //
        float lastParametricEdgeID = 0.0;
        float maxParametricEdgeID = min(numParametricSegments - 1.0, combinedEdgeID);
        float negAbsRadsPerSegment = -abs(radsPerSegment);
        float maxRotation0 = (1.0 + combinedEdgeID) * abs(radsPerSegment);
        for (float exp = 32.0; exp >= 1.0; exp *= 0.5) {
            // Test the parametric edge at lastParametricEdgeID + (32, 16, 8, 4, 2, 1).
            float testParametricID = lastParametricEdgeID + exp;
            if (testParametricID <= maxParametricEdgeID) {
                float2 testTan = fma(float2(testParametricID), A, B_);
                testTan = fma(float2(testParametricID), testTan, C_);
                float cosRotation = dot(normalize(testTan), tan0);
                float maxRotation = fma(testParametricID, negAbsRadsPerSegment, maxRotation0);
                maxRotation = min(maxRotation, $PI);
                // Is rotation <= maxRotation? (i.e., is the number of complete radial segments
                // behind testT, + testParametricID <= combinedEdgeID?)
                if (cosRotation >= cos(maxRotation)) {
                    // testParametricID is on or before the combinedEdgeID. Keep it!
                    lastParametricEdgeID = testParametricID;
                }
            }
        }

        // Find the T value of the parametric edge at lastParametricEdgeID.
        float parametricT = lastParametricEdgeID / numParametricSegments;

        // Now that we've identified the highest parametric edge on or before the
        // combinedEdgeID, the highest radial edge is easy:
        float lastRadialEdgeID = combinedEdgeID - lastParametricEdgeID;

        // Find the angle of tan0, i.e. the angle between tan0 and the positive x axis.
        float angle0 = acos(clamp(tan0.x, -1.0, 1.0));
        angle0 = tan0.y >= 0.0 ? angle0 : -angle0;

        // Find the tangent vector on the edge at lastRadialEdgeID. By construction it is already
        // normalized.
        float radialAngle = fma(lastRadialEdgeID, radsPerSegment, angle0);
        tangent = float2(cos(radialAngle), sin(radialAngle));
        float2 norm = float2(-tangent.y, tangent.x);

        // Find the T value where the tangent is orthogonal to norm. This is a quadratic:
        //
        //     dot(norm, Tangent_Direction(T)) == 0
        //
        //                         |T^2|
        //     norm * |A  2B  C| * |T  | == 0
        //            |.   .  .|   |1  |
        //
        float a=dot(norm,A), b_over_2=dot(norm,B), c=dot(norm,C);
        float discr_over_4 = max(b_over_2*b_over_2 - a*c, 0.0);
        float q = sqrt(discr_over_4);
        if (b_over_2 > 0.0) {
            q = -q;
        }
        q -= b_over_2;

        // Roots are q/a and c/q. Since each curve section does not inflect or rotate more than 180
        // degrees, there can only be one tangent orthogonal to "norm" inside 0..1. Pick the root
        // nearest .5.
        float _5qa = -.5*q*a;
        float2 root = (abs(fma(q,q,_5qa)) < abs(fma(a,c,_5qa))) ? float2(q,a) : float2(c,q);

        // The root finder above can become unstable when lastRadialEdgeID == 0 (e.g., if there are
        // roots at exatly 0 and 1 both). radialT should always equal 0 in this case.
        float radialT = (lastRadialEdgeID != 0.0 && root.t != 0.0)
                            ? saturate(root.s / root.t)
                            : 0.0;

        // Now that we've identified the T values of the last parametric and radial edges, our final
        // T value for combinedEdgeID is whichever is larger.
        float T = max(parametricT, radialT);

        // Evaluate the cubic at T. Use De Casteljau's for its accuracy and stability.
        float2 ab = $unchecked_mix(p0, p1, T);
        float2 bc = $unchecked_mix(p1, p2, T);
        float2 cd = $unchecked_mix(p2, p3, T);
        float2 abc = $unchecked_mix(ab, bc, T);
        float2 bcd = $unchecked_mix(bc, cd, T);
        float2 abcd = $unchecked_mix(abc, bcd, T);

        // Evaluate the conic weight at T.
        float u = $unchecked_mix(1.0, w, T);
        float v = w + 1 - u;  // == mix(w, 1, T)
        float uv = $unchecked_mix(u, v, T);

        // If we went with T=parametricT, then update the tangent. Otherwise leave it at the radial
        // tangent found previously. (In the event that parametricT == radialT, we keep the radial
        // tangent.)
        if (T != radialT) {
            // We must re-normalize here because the tangent is determined by the curve coefficients
            tangent = w >= 0.0 ? $robust_normalize_diff(bc*u, ab*v)
                               : $robust_normalize_diff(bcd, abc);
        }

        strokeCoord = (w >= 0.0) ? abc/uv : abcd;
    } else {
        // Edges at the beginning and end of the strip use exact endpoints and tangents. This
        // ensures crack-free seaming between instances.
        tangent = (combinedEdgeID == 0) ? tan0 : tan1;
        strokeCoord = (combinedEdgeID == 0) ? p0 : p3;
    }

    // At this point 'tangent' is normalized, so the orthogonal vector is also normalized.
    float2 ortho = float2(tangent.y, -tangent.x);
    strokeCoord += ortho * (strokeRadius * strokeOutset);

    if (isHairline) {
        // Hairline case. The scale and skew already happened before tessellation.
        // TODO: There's probably a more efficient way to tessellate the hairline that lets us
        // avoid inverting the affine matrix to get back to local coords, but it's just a 2x2 so
        // this works for now.
        return float4(strokeCoord + translate, inverse(affineMatrix) * strokeCoord);
    } else {
        // Normal case. Do the transform after tessellation.
        return float4(affineMatrix * strokeCoord + translate, strokeCoord);
    }
}

float4 analytic_rrect_vertex_fn(// Vertex Attributes
                                float2 position,
                                float2 normal,
                                float normalScale,
                                float centerWeight,
                                // Instance Attributes
                                float4 xRadiiOrFlags,
                                float4 radiiOrQuadXs,
                                float4 ltrbOrQuadYs,
                                float4 center,
                                float depth,
                                float3x3 localToDevice,
                                // Varyings
                                out float4 jacobian,
                                out float4 edgeDistances,
                                out float4 xRadii,
                                out float4 yRadii,
                                out float2 strokeParams,
                                out float2 perPixelControl,
                                // Render Step
                                out float2 stepLocalCoords) {
    const uint kCornerVertexCount = 9; // KEEP IN SYNC WITH C++'s
                                       // AnalyticRRectRenderStep::kCornerVertexCount
    const float kMiterScale = 1.0;
    const float kBevelScale = 0.0;
    const float kRoundScale = 0.41421356237; // sqrt(2)-1

    const float kEpsilon = 0.00024; // SK_ScalarNearlyZero

    // Default to miter'ed vertex positioning. Corners with sufficiently large corner radii, or
    // bevel'ed strokes will adjust vertex placement on a per corner basis. This will not affect
    // the final coverage calculations in the fragment shader.
    float joinScale = kMiterScale;

    // Unpack instance-level state that determines the vertex placement and style of shape.
    bool bidirectionalCoverage = center.z <= 0.0;
    bool deviceSpaceDistances = false;
    float4 xs, ys; // ordered TL, TR, BR, BL
    float4 edgeAA = float4(1.0); // ordered L,T,R,B. 1 = AA, 0 = no AA
    bool strokedLine = false;
    if (xRadiiOrFlags.x < -1.0) {
        // Stroked [round] rect or line
        // If y > 0, unpack the line end points, otherwise unpack the rect edges
        strokedLine = xRadiiOrFlags.y > 0.0;
        xs = strokedLine ? ltrbOrQuadYs.LLRR : ltrbOrQuadYs.LRRL;
        ys = ltrbOrQuadYs.TTBB;

        if (xRadiiOrFlags.y < 0.0) {
            // A hairline [r]rect so the X radii are encoded as negative values in this field,
            // and Y radii are stored directly in the subsequent float4.
            xRadii = -xRadiiOrFlags - 2.0;
            yRadii = radiiOrQuadXs;

            // All hairlines use miter joins (join style > 0)
            strokeParams = float2(0.0, 1.0);
        } else {
            xRadii = radiiOrQuadXs;
            yRadii = xRadii; // regular strokes are circular
            strokeParams = xRadiiOrFlags.zw;

            // `sign(strokeParams.y)` evaluates to kMiterScale (1.0) when the
            // input is positive, and kBevelScale (0.0) when it is zero.
            // kRoundScale uses the stroke radius to round rectangular corners.
            joinScale = (strokeParams.y < 0.0) ? kRoundScale
                                               : sign(strokeParams.y);
        }
    } else if (any(greaterThan(xRadiiOrFlags, float4(0.0)))) {
        // Filled round rect
        xs = ltrbOrQuadYs.LRRL;
        ys = ltrbOrQuadYs.TTBB;

        xRadii = xRadiiOrFlags;
        yRadii = radiiOrQuadXs;

        strokeParams = float2(0.0, -1.0); // A negative join style is "round"
    } else {
        // Per-edge quadrilateral, so we have to calculate the corner's basis from the
        // quad's edges.
        xs = radiiOrQuadXs;
        ys = ltrbOrQuadYs;
        edgeAA = -xRadiiOrFlags; // AA flags needed to be < 0 on upload, so flip the sign.

        xRadii = float4(0.0);
        yRadii = float4(0.0);

        strokeParams = float2(0.0, 1.0); // Will be ignored, but set to a "miter"
        deviceSpaceDistances = true;
    }

    // Adjust state on a per-corner basis
    uint cornerID = uint(sk_VertexID) / kCornerVertexCount;
    float2 cornerRadii = float2(xRadii[cornerID], yRadii[cornerID]);
    if (cornerID % 2 != 0) {
        // Corner radii are uploaded in the local coordinate frame, but vertex placement happens
        // in a consistent winding before transforming to final local coords, so swap the
        // radii for odd corners.
        cornerRadii = cornerRadii.yx;
    }

    float2 cornerAspectRatio = float2(1.0);
    if (all(greaterThan(cornerRadii, float2(0.0)))) {
        // Position vertices for an elliptical corner; overriding any previous join style since
        // that only applies when radii are 0.
        joinScale = kRoundScale;
        cornerAspectRatio = cornerRadii.yx;
    }

    // Calculate the local edge vectors, ordered L, T, R, B starting from the bottom left point.
    // For quadrilaterals these are not necessarily axis-aligned, but in all cases they orient
    // the +X/+Y normalized vertex template for each corner.
    float4 dx = xs - xs.wxyz;
    float4 dy = ys - ys.wxyz;
    float4 edgeSquaredLen = dx*dx + dy*dy;

    float4 edgeMask = sign(edgeSquaredLen); // 0 for zero-length edge, 1 for non-zero edge.
    float4 edgeBias = float4(0.0); // adjustment to edge distance for butt cap correction
    float2 strokeRadius = float2(strokeParams.x);
    if (any(equal(edgeMask, float4(0.0)))) {
        // Must clean up (dx,dy) depending on the empty edge configuration
        if (all(equal(edgeMask, float4(0.0)))) {
            // A point so use the canonical basis
            dx = float4( 0.0, 1.0, 0.0, -1.0);
            dy = float4(-1.0, 0.0, 1.0,  0.0);
            edgeSquaredLen = float4(1.0);
        } else {
            // Triangles (3 non-zero edges) copy the adjacent edge. Otherwise it's a line so
            // replace empty edges with the left-hand normal vector of the adjacent edge.
            bool triangle = (edgeMask[0] + edgeMask[1] + edgeMask[2] + edgeMask[3]) > 2.5;
            float4 edgeX = triangle ? dx.yzwx :  dy.yzwx;
            float4 edgeY = triangle ? dy.yzwx : -dx.yzwx;

            dx = mix(edgeX, dx, edgeMask);
            dy = mix(edgeY, dy, edgeMask);
            edgeSquaredLen = mix(edgeSquaredLen.yzwx, edgeSquaredLen, edgeMask);
            edgeAA = mix(edgeAA.yzwx, edgeAA, edgeMask);

            if (!triangle && joinScale == kBevelScale) {
                // Don't outset by stroke radius for butt caps on the zero-length edge, but
                // adjust edgeBias and strokeParams to calculate an AA miter'ed shape with the
                // non-uniform stroke outset.
                strokeRadius *= float2(edgeMask[cornerID], edgeMask.yzwx[cornerID]);
                edgeBias = (edgeMask - 1.0) * strokeParams.x;
                strokeParams.y = 1.0;
                joinScale = kMiterScale;
            }
        }
    }

    float4 inverseEdgeLen = inversesqrt(edgeSquaredLen);
    dx *= inverseEdgeLen;
    dy *= inverseEdgeLen;

    // Calculate local coordinate for the vertex (relative to xAxis and yAxis at first).
    float2 xAxis = -float2(dx.yzwx[cornerID], dy.yzwx[cornerID]);
    float2 yAxis =  float2(dx.xyzw[cornerID], dy.xyzw[cornerID]);
    float2 localPos;
    bool snapToCenter = false;
    if (normalScale < 0.0) {
        // Vertex is inset from the base shape, so we scale by (cornerRadii - strokeRadius)
        // and have to check for the possibility of an inner miter. It is always inset by an
        // additional conservative AA amount.
        if (center.w < 0.0 || centerWeight * center.z != 0.0) {
            snapToCenter = true;
        } else {
            float localAARadius = center.w;
            float2 insetRadii =
                    cornerRadii + (bidirectionalCoverage ? -strokeRadius : strokeRadius);
            if (joinScale == kMiterScale ||
                any(lessThanEqual(insetRadii, float2(localAARadius)))) {
                // Miter the inset position
                localPos = (insetRadii - localAARadius);
            } else {
                localPos = insetRadii*position - localAARadius*normal;
            }
        }
    } else {
        // Vertex is outset from the base shape (and possibly with an additional AA outset later
        // in device space).
        localPos = (cornerRadii + strokeRadius) * (position + joinScale*position.yx);
    }

    if (snapToCenter) {
        // Center is already relative to true local coords, not the corner basis.
        localPos = center.xy;
    } else {
        // Transform from corner basis to true local coords.
        localPos -= cornerRadii;
        localPos = float2(xs[cornerID], ys[cornerID]) + xAxis*localPos.x + yAxis*localPos.y;
    }

    // Calculate edge distances and device space coordinate for the vertex
    edgeDistances = dy*(xs - localPos.x) - dx*(ys - localPos.y) + edgeBias;

    // NOTE: This 3x3 inverse is different than just taking the 1st two columns of the 4x4
    // inverse of the original SkM44 local-to-device matrix. We could calculate the 3x3 inverse
    // and upload it, but it does not seem to be a bottleneck and saves on bandwidth to
    // calculate it here instead.
    float3x3 deviceToLocal = inverse(localToDevice);
    float3 devPos = localToDevice * localPos.xy1;
    jacobian = float4(deviceToLocal[0].xy - deviceToLocal[0].z*localPos,
                      deviceToLocal[1].xy - deviceToLocal[1].z*localPos);

    if (deviceSpaceDistances) {
        // Apply the Jacobian in the vertex shader so any quadrilateral normals do not have to
        // be passed to the fragment shader. However, it's important to use the Jacobian at a
        // vertex on the edge, not the current vertex's Jacobian.
        float4 gx = -dy*(deviceToLocal[0].x - deviceToLocal[0].z*xs) +
                     dx*(deviceToLocal[0].y - deviceToLocal[0].z*ys);
        float4 gy = -dy*(deviceToLocal[1].x - deviceToLocal[1].z*xs) +
                     dx*(deviceToLocal[1].y - deviceToLocal[1].z*ys);
        // NOTE: The gradient is missing a W term so edgeDistances must still be multiplied by
        // 1/w in the fragment shader. The same goes for the encoded coverage scale.
        edgeDistances *= inversesqrt(gx*gx + gy*gy);

        // Bias non-AA edge distances by device W so its coverage contribution is >= 1.0
        edgeDistances += (1 - edgeAA)*abs(devPos.z);

        // Mixed edge AA shapes do not use subpixel scale+bias for coverage, since they tile
        // to a large shape of unknown--but likely not subpixel--size. Triangles and quads do
        // not use subpixel coverage since the scale+bias is not constant over the shape, but
        // we can't evaluate per-fragment since we aren't passing down their arbitrary normals.
        bool subpixelCoverage = edgeAA == float4(1.0) &&
                                dot(abs(dx*dx.yzwx + dy*dy.yzwx), float4(1.0)) < kEpsilon;
        if (subpixelCoverage) {
            // Reconstructs the actual device-space width and height for all rectangle vertices.
            float2 dim = edgeDistances.xy + edgeDistances.zw;
            perPixelControl.y = 1.0 + min(min(dim.x, dim.y), abs(devPos.z));
        } else {
            perPixelControl.y = 1.0 + abs(devPos.z); // standard 1px width pre W division.
        }
    }

    // Only outset for a vertex that is in front of the w=0 plane to avoid dealing with outset
    // triangles rasterizing differently from the main triangles as w crosses 0.
    if (normalScale > 0.0 && devPos.z > 0.0) {
        // Note that when there's no perspective, the jacobian is equivalent to the normal
        // matrix (inverse transpose), but produces correct results when there's perspective
        // because it accounts for the position's influence on a line's projected direction.
        float2x2 J = float2x2(jacobian);

        float2 edgeAANormal = float2(edgeAA[cornerID], edgeAA.yzwx[cornerID]) * normal;
        float2 nx = cornerAspectRatio.x * edgeAANormal.x * perp(-yAxis) * J;
        float2 ny = cornerAspectRatio.y * edgeAANormal.y * perp( xAxis) * J;

        bool isMidVertex = all(notEqual(edgeAANormal, float2(0)));
        if (joinScale == kMiterScale && isMidVertex) {
            // Produce a bisecting vector in device space.
            nx = normalize(nx);
            ny = normalize(ny);
            if (dot(nx, ny) < -0.8) {
                // Normals are in nearly opposite directions, so adjust to avoid float error.
                float s = sign(cross_length_2d(nx, ny));
                nx =  s*perp(nx);
                ny = -s*perp(ny);
            }
        }
        // Adding the normal components together directly results in what we'd have
        // calculated if we'd just transformed 'normal' in one go, assuming they weren't
        // normalized in the if-block above. If they were normalized, the sum equals the
        // bisector between the original nx and ny.
        //
        // We multiply by W so that after perspective division the new point is offset by the
        // now-unit normal.
        // NOTE: (nx + ny) can become the zero vector if the device outset is for an edge
        // marked as non-AA. In this case normalize() could produce the zero vector or NaN.
        // Until a counter-example is found, GPUs seem to discard triangles with NaN vertices,
        // which has the same effect as outsetting by the zero vector with this mesh, so we
        // don't bother guarding the normalize() (yet).
        devPos.xy += devPos.z * normalize(nx + ny);

        // By construction these points are 1px away from the outer edge in device space.
        if (deviceSpaceDistances) {
            // Apply directly to edgeDistances to save work per pixel later on.
            edgeDistances -= devPos.z;
        } else {
            // Otherwise store separately so edgeDistances can be used to reconstruct corner pos
            perPixelControl.y = -devPos.z;
        }
    } else if (!deviceSpaceDistances) {
        // Triangles are within the original shape so there's no additional outsetting to
        // take into account for coverage calculations.
        perPixelControl.y = 0.0;
    }

    perPixelControl.x = (centerWeight != 0.0)
            // A positive value signals that a pixel is trivially full coverage.
            ? 1.0
            // A negative value signals bidirectional coverage, and a zero value signals a solid
            // interior with per-pixel coverage.
            : bidirectionalCoverage ? -1.0 : 0.0;

    // The fragment shader operates in a canonical basis (x-axis = (1,0), y-axis = (0,1)). For
    // stroked lines, incorporate their local orientation into the Jacobian to preserve this.
    if (strokedLine) {
        // The updated Jacobian is J' = B^-1 * J, where B is float2x2(xAxis, yAxis) for the
        // top-left corner (so that B^-1 is constant over the whole shape). Since it's a line
        // the basis was constructed to be orthonormal, det(B) = 1 and B^-1 is trivial.
        // NOTE: float2x2 is column-major.
        jacobian = float4(float2x2(dy[0], -dy[1], -dx[0], dx[1]) * float2x2(jacobian));
    }

    // Write out final results
    stepLocalCoords = localPos;
    return float4(devPos.xy, devPos.z*depth, devPos.z);
}

float4 per_edge_aa_quad_vertex_fn(// Vertex Attributes
                                  float2 normal,
                                  // Instance Attributes
                                  float4 edgeAA,
                                  float4 xs, // ordered TL, TR, BR, BL
                                  float4 ys,
                                  float depth,
                                  float3x3 localToDevice,
                                  // Varyings
                                  out float4 edgeDistances,
                                  // Render Step
                                  out float2 stepLocalCoords) {
    const uint kCornerVertexCount = 4; // KEEP IN SYNC WITH C++'s
                                       // PerEdgeAAQuadRenderStep::kCornerVertexCount

    const float kEpsilon = 0.00024; // SK_ScalarNearlyZero

    // Calculate the local edge vectors, ordered L, T, R, B starting from the bottom left point.
    // For quadrilaterals these are not necessarily axis-aligned, but in all cases they orient
    // the +X/+Y normalized vertex template for each corner.
    float4 dx = xs - xs.wxyz;
    float4 dy = ys - ys.wxyz;
    float4 edgeSquaredLen = dx*dx + dy*dy;

    float4 edgeMask = sign(edgeSquaredLen); // 0 for zero-length edge, 1 for non-zero edge.
    if (any(equal(edgeMask, float4(0.0)))) {
        // Must clean up (dx,dy) depending on the empty edge configuration
        if (all(equal(edgeMask, float4(0.0)))) {
            // A point so use the canonical basis
            dx = float4( 0.0, 1.0, 0.0, -1.0);
            dy = float4(-1.0, 0.0, 1.0,  0.0);
            edgeSquaredLen = float4(1.0);
        } else {
            // Triangles (3 non-zero edges) copy the adjacent edge. Otherwise it's a line so
            // replace empty edges with the left-hand normal vector of the adjacent edge.
            bool triangle = (edgeMask[0] + edgeMask[1] + edgeMask[2] + edgeMask[3]) > 2.5;
            float4 edgeX = triangle ? dx.yzwx :  dy.yzwx;
            float4 edgeY = triangle ? dy.yzwx : -dx.yzwx;

            dx = mix(edgeX, dx, edgeMask);
            dy = mix(edgeY, dy, edgeMask);
            edgeSquaredLen = mix(edgeSquaredLen.yzwx, edgeSquaredLen, edgeMask);
            edgeAA = mix(edgeAA.yzwx, edgeAA, edgeMask);
        }
    }

    float4 inverseEdgeLen = inversesqrt(edgeSquaredLen);
    dx *= inverseEdgeLen;
    dy *= inverseEdgeLen;

    // Calculate local coordinate for the vertex (relative to xAxis and yAxis at first).
    uint cornerID = uint(sk_VertexID) / kCornerVertexCount;
    float2 xAxis = -float2(dx.yzwx[cornerID], dy.yzwx[cornerID]);
    float2 yAxis =  float2(dx.xyzw[cornerID], dy.xyzw[cornerID]);

    // Vertex is outset from the base shape (and possibly with an additional AA outset later
    // in device space).
    float2 localPos = float2(xs[cornerID], ys[cornerID]);

    // Calculate edge distances and device space coordinate for the vertex
    edgeDistances = dy*(xs - localPos.x) - dx*(ys - localPos.y);

    // NOTE: This 3x3 inverse is different than just taking the 1st two columns of the 4x4
    // inverse of the original SkM44 local-to-device matrix. We could calculate the 3x3 inverse
    // and upload it, but it does not seem to be a bottleneck and saves on bandwidth to
    // calculate it here instead.
    float3x3 deviceToLocal = inverse(localToDevice);
    float3 devPos = localToDevice * localPos.xy1;

    // Apply the Jacobian in the vertex shader so any quadrilateral normals do not have to
    // be passed to the fragment shader. However, it's important to use the Jacobian at a
    // vertex on the edge, not the current vertex's Jacobian.
    float4 gx = -dy*(deviceToLocal[0].x - deviceToLocal[0].z*xs) +
                 dx*(deviceToLocal[0].y - deviceToLocal[0].z*ys);
    float4 gy = -dy*(deviceToLocal[1].x - deviceToLocal[1].z*xs) +
                 dx*(deviceToLocal[1].y - deviceToLocal[1].z*ys);
    // NOTE: The gradient is missing a W term so edgeDistances must still be multiplied by
    // 1/w in the fragment shader. The same goes for the encoded coverage scale.
    edgeDistances *= inversesqrt(gx*gx + gy*gy);

    // Bias non-AA edge distances by device W so its coverage contribution is >= 1.0
    // Add additional 1/2 bias here so we don't have to do so in the fragment shader.
    edgeDistances += (1.5 - edgeAA)*abs(devPos.z);

    // Only outset for a vertex that is in front of the w=0 plane to avoid dealing with outset
    // triangles rasterizing differently from the main triangles as w crosses 0.
    if (any(notEqual(normal, float2(0.0))) && devPos.z > 0.0) {
        // Note that when there's no perspective, the jacobian is equivalent to the normal
        // matrix (inverse transpose), but produces correct results when there's perspective
        // because it accounts for the position's influence on a line's projected direction.
        float2x2 J = float2x2(deviceToLocal[0].xy - deviceToLocal[0].z*localPos,
                              deviceToLocal[1].xy - deviceToLocal[1].z*localPos);

        float2 edgeAANormal = float2(edgeAA[cornerID], edgeAA.yzwx[cornerID]) * normal;
        float2 nx = edgeAANormal.x * perp(-yAxis) * J;
        float2 ny = edgeAANormal.y * perp( xAxis) * J;

        bool isMidVertex = all(notEqual(edgeAANormal, float2(0)));
        if (isMidVertex) {
            // Produce a bisecting vector in device space.
            nx = normalize(nx);
            ny = normalize(ny);
            if (dot(nx, ny) < -0.8) {
                // Normals are in nearly opposite directions, so adjust to avoid float error.
                float s = sign(cross_length_2d(nx, ny));
                nx =  s*perp(nx);
                ny = -s*perp(ny);
            }
        }
        // Adding the normal components together directly results in what we'd have
        // calculated if we'd just transformed 'normal' in one go, assuming they weren't
        // normalized in the if-block above. If they were normalized, the sum equals the
        // bisector between the original nx and ny.
        //
        // We multiply by W so that after perspective division the new point is offset by the
        // now-unit normal.
        // NOTE: (nx + ny) can become the zero vector if the device outset is for an edge
        // marked as non-AA. In this case normalize() could produce the zero vector or NaN.
        // Until a counter-example is found, GPUs seem to discard triangles with NaN vertices,
        // which has the same effect as outsetting by the zero vector with this mesh, so we
        // don't bother guarding the normalize() (yet).
        devPos.xy += devPos.z * normalize(nx + ny);

        // By construction these points are 1px away from the outer edge in device space.
        // Apply directly to edgeDistances to save work per pixel later on.
        edgeDistances -= devPos.z;
    }

    // Write out final results
    stepLocalCoords = localPos;
    return float4(devPos.xy, devPos.z*depth, devPos.z);
}

float4 circular_arc_vertex_fn(float3 position,
                              // Instance Attributes
                              float4 centerScales,
                              float3 radiiAndFlags,
                              float3 geoClipPlane,
                              float3 fragClipPlane0,
                              float3 fragClipPlane1,
                              float4 inRoundCapPos,
                              float depth,
                              float3x3 localToDevice,
                              // Varyings
                              out float4 circleEdge,
                              out float3 clipPlane,
                              out float3 isectPlane,
                              out float3 unionPlane,
                              out float  roundCapRadius,
                              out float4 roundCapPos,
                              // Render Step
                              out float2 stepLocalCoords) {
    // TODO: clip offset against clip planes
    float2 localCenter = centerScales.xy;
    float2 localPos = localCenter;
    // do geometric clip in normalized space
    float dist = min(dot(position.xy, geoClipPlane.xy) + geoClipPlane.z, 0);
    position.xy -= geoClipPlane.xy * dist;
    // Get the new length to use below for scaling the offset
    // (origLength is the initial length of position.xy).
    float offsetScale = length(position.xy);

    // scale and translate to local space
    if (position.z > 0) {
        localPos += position.xy * centerScales.z;
    } else {
        localPos += position.xy * centerScales.w;
    }

    float3 devPos = localToDevice * localPos.xy1;
    float3 devCenter = localToDevice * localCenter.xy1;
    float2 offset = devPos.xy - devCenter.xy;
    // offset for AA and correct length of offset
    if (offset != float2(0)) {
        offset = normalize(offset);
        devPos.xy += position.z*offset;
        if (position.z > 0) {
            // Scale using distance from center of unit octagon to the vertex
            // Because of geometry clipping we need to scale by 1.0823922*newLength/origLength
            // But the original length is 1.0823922 so the offsetScale is just newLength
            offset *= offsetScale;
        } else {
            // Because of geometry clipping we need to scale by innerRadius*newLength/origLength
            // But the original length is 1 so this is just innerRadius*newLength
            offset *= offsetScale*radiiAndFlags.y;
        }
    }

    circleEdge = float4(offset, radiiAndFlags.xy);
    if (radiiAndFlags.z > 0) {
        clipPlane = fragClipPlane0;
        isectPlane = fragClipPlane1;
        unionPlane = float3(0, 0, 0);
    } else {
        clipPlane = fragClipPlane0;
        isectPlane = float3(0, 0, 1);
        unionPlane = fragClipPlane1;
    }
    if (abs(radiiAndFlags.z) > 1) {
        // This is the cap radius in normalized space where the outer radius is 1 and
        // radii.y is the normalized inner radius.
        roundCapRadius = (1.0 - radiiAndFlags.y) / 2.0;
    } else {
        roundCapRadius = 0;
    }
    roundCapPos = inRoundCapPos;
    stepLocalCoords = localPos;

    // We assume no perspective
    return float4(devPos.xy, depth, 1);
}

float4 text_vertex_fn(float2 baseCoords,
                      // Uniforms
                      float4x4 subRunDeviceMatrix,
                      float4x4 deviceToLocal,
                      float2 atlasSizeInv,
                      // Instance Attributes
                      float2 size,
                      float2 uvPos,
                      float2 xyPos,
                      float strikeToSourceScale,
                      float depth,
                      // Varyings
                      out float2 textureCoords,
                      out float2 unormTexCoords,  // used as varying in SDFText
                      // Render Step
                      out float2 stepLocalCoords) {
    baseCoords.xy *= float2(size);

    // Sub runs have a decomposed transform and are sometimes already transformed into device
    // space, in which `subRunCoords` represents the bounds projected to device space without
    // the local-to-device translation and `subRunDeviceMatrix` contains the translation.
    float2 subRunCoords = strikeToSourceScale * baseCoords + xyPos;
    float4 position = subRunDeviceMatrix * subRunCoords.xy01;

    // Calculate the local coords used for shading.
    // TODO(b/246963258): This is incorrect if the transform has perspective, which would
    // require a division + a valid z coordinate (which is currently set to 0).
    stepLocalCoords = (deviceToLocal * position).xy;

    unormTexCoords = baseCoords + uvPos;
    textureCoords = unormTexCoords * atlasSizeInv;

    return float4(position.xy, depth*position.w, position.w);
}

float4 coverage_mask_vertex_fn(float2 quadCoords,
                               // Uniforms
                               float3x3 maskToDeviceRemainder,
                               // Instance Attributes
                               float4 drawBounds,
                               float4 maskBoundsIn,
                               float2 deviceOrigin,
                               float depth,
                               float3x3 deviceToLocal,
                               // Varyings
                               out float4 maskBounds,
                               out float2 textureCoords,
                               out half invert,
                               // Render Step
                               out float2 stepLocalCoords) {
    // An atlas shape is an axis-aligned rectangle tessellated as a triangle strip.
    //
    // The bounds coordinates are in an intermediate space, pixel-aligned with the mask texture
    // that's sampled in the fragment shader. The coords must be transformed by both
    // maskToDeviceRemainder and translated by deviceOrigin to get device coords.
    textureCoords = mix(drawBounds.xy, drawBounds.zw, quadCoords);
    float3 drawCoords = maskToDeviceRemainder*((textureCoords + deviceOrigin).xy1);

    // Local coordinates used for shading are derived from the final device coords and the inverse
    // of the original local-to-device matrix.
    float3 localCoords = deviceToLocal * drawCoords;
    // TODO: Support float3 local coordinates if the matrix has perspective so that W is
    // interpolated correctly to the fragment shader.
    stepLocalCoords = localCoords.xy / localCoords.z;

    // For an inverse fill, `textureCoords` will get clamped to `maskBounds` and the edge pixels
    // will always land on a 0-coverage border pixel assuming the atlas was prepared with 1px
    // padding around each mask entry. This includes inverse fills where the mask was fully clipped
    // out, since then maskBounds.RBLT == (0,0,-1,-1) and we sample the top-left-most pixel of the
    // atlas, which is guaranteed to be transparent.
    if (all(lessThanEqual(maskBoundsIn.LT, maskBoundsIn.RB))) {
        // Regular fill
        maskBounds = maskBoundsIn;
        invert = 0;
    } else {
        // Re-arrange the mask bounds to sorted order for texture clamping in the fragment shader
        maskBounds = maskBoundsIn.RBLT;
        invert = 1;
    }

    return float4(drawCoords.xy, depth*drawCoords.z, drawCoords.z);
}

float4 cover_bounds_vertex_fn(float2 corner,
                              float4 bounds,
                              float depth,
                              float3x3 matrix,
                              out float2 stepLocalCoords) {
    if (all(lessThanEqual(bounds.LT, bounds.RB))) {
        // A regular fill
        corner = mix(bounds.LT, bounds.RB, corner);
        float3 devCorner = matrix * corner.xy1;
        stepLocalCoords = corner;
        return float4(devCorner.xy, depth*devCorner.z, devCorner.z);
    } else {
        // An inverse fill
        corner = mix(bounds.RB, bounds.LT, corner);
        // TODO(b/351923375): Get the 3x3 inverse  of the local-to-device transform from the CPU
        // if it can be computed fast enough on the CPU from the cached 4x4 inverse.
        float3 localCoords = inverse(matrix) * corner.xy1;
        // Dividing the inverse mapped local coords by its homogenous coordinate reconstructs the
        // original local coords.
        float invW = 1.0 / localCoords.z;
        stepLocalCoords = localCoords.xy * invW;

        // 1/W also happens to be equal to (matrix*stepLocalCoords.xy1).z, which is the device-space
        // homogenous coordinate we want perspective interpolation to respect. We multiply the
        // output position by 1/W and set the output position's homogenous coord to that same 1/W
        // which ensures the projected vertices are still the device-space corners, but
        // stepLocalCoords will be correctly perspective interpolated by HW.
        return float4(corner*invW, depth*invW, invW);
    }
}