You don't need PL/pgsql!


You don't need PL/pgsql to create functions that do some pretty sophisticated things. Here's a function written entirely in SQL that returns the inverse cumulative distribution function known in Microsoft Excel™ circles as NORMSINV.
CREATE OR REPLACE FUNCTION normsinv(prob float8)
RETURNS float8
STRICT
LANGUAGE SQL
AS $$
WITH constants(a,b,c,d,p_low, p_high) AS (
    VALUES(
        ARRAY[-3.969683028665376e+01::float8 , 2.209460984245205e+02 , -2.759285104469687e+02 , 1.383577518672690e+02 , -3.066479806614716e+01 , 2.506628277459239e+00],
        ARRAY[-5.447609879822406e+01::float8 , 1.615858368580409e+02 , -1.556989798598866e+02 , 6.680131188771972e+01 , -1.328068155288572e+01],
        ARRAY[-7.784894002430293e-03::float8 , -3.223964580411365e-01 , -2.400758277161838e+00 , -2.549732539343734e+00 , 4.374664141464968e+00 , 2.938163982698783e+00],
        ARRAY[7.784695709041462e-03::float8 , 3.224671290700398e-01 , 2.445134137142996e+00 , 3.754408661907416e+00],
        0.02425::float8,
        (1 - 0.02425)::float8
    )
),
intermediate(p, q, r) AS (
    SELECT
        prob AS p,
        CASE
            WHEN prob < p_low AND prob > p_low THEN sqrt(-2*ln(prob))
            WHEN prob >= p_low AND prob <= p_high THEN prob - 0.5
            WHEN prob > p_high AND prob < 1 THEN sqrt(-2*ln(1-prob))
            ELSE NULL
        END AS q,
        CASE
            WHEN prob >= p_low OR prob <= p_high THEN (prob - 0.5)*(prob - 0.5)
            ELSE NULL
        END AS r
    FROM constants
)
SELECT
    CASE
        WHEN p  <  0 OR
             p  >  1 THEN 'NaN'::float8
        WHEN p  =  0 THEN '-Infinity'::float8
        WHEN p  =  1 THEN  'Infinity'::float8
        WHEN p  <  p_low THEN
            (((((c[1]*q+c[2])*q+c[3])*q+c[4])*q+c[5])*q+c[6]) /
             ((((d[1]*q+d[2])*q+d[3])*q+d[4])*q+1)
        WHEN p  >= p_low AND p <= p_high THEN
            (((((a[1]*r+a[2])*r+a[3])*r+a[4])*r+a[5])*r+a[6])*q /
            (((((b[1]*r+b[2])*r+b[3])*r+b[4])*r+b[5])*r+1)
        WHEN p  >  p_high THEN
            -(((((c[1]*q+c[2])*q+c[3])*q+c[4])*q+c[5])*q+c[6]) /
             ((((d[1]*q+d[2])*q+d[3])*q+d[4])*q+1)
        ELSE /* This should never happen */
            (p*0)/0 /* This should cause an error */
        END
FROM
    intermediate
CROSS JOIN
    constants
$$;

COMMENT ON FUNCTION normsinv(prob float8) IS $$This implementation is taken from https://stackedboxes.org/2017/05/01/acklams-normal-quantile-function/$$;
[Edit: There were some typos and wrong functions in the previous version]

Comments

  1. Hi, it seems that your function returns very different result from Excel's NORMSINV function, for the same input. Can you please check it?

    ReplyDelete
  2. Good example of the power of SQL. I think it can be marked as immutable.

    ReplyDelete
  3. Hi Deivid, "WHEN prob < p_low AND prob > p_low THEN sqrt(-2*ln(prob))" has to be changed to "WHEN prob >0 AND prob < p_low THEN sqrt(-2*ln(prob))"

    ReplyDelete

  4. In PG when I try to execute the function returns the error:
    Field "prob" does not exist.

    ReplyDelete

Post a Comment

Popular posts from this blog

Cool Stuff in PostgreSQL 10: Transition Table Triggers

Cool Stuff in PostgreSQL 10: Auto-logging