Trying to find the second largest value in a column (postgres sql)

2023-02-08 14:49 问答作者：

I am trying to find the second largest value in a column and only the second largest value.

select a.name, max(a.word) as word
from apple a
where a.word < (select max(a.word) from apple a)
group by a.name;

For some reason, what I have now returns the second largest value AND all the lower values also but fortunately avoids the largest value.

Is there a way to fix开发者_JAVA百科 this?

Here's another conceptually simple solution that's been running for me in .1 milliseconds on a table of 21 million rows, according to EXPLAIN ANALYZE. It returns nothing in the case where there's only one value.

SELECT a.name, 
(SELECT word FROM apple ap WHERE ap.name=a.name ORDER BY word ASC OFFSET 1 LIMIT 1) 
FROM apple a

Note that my table already had existing indices on name, word, and (name, word), which allows me to use ORDER BY like that.

The simplest, albeit inefficient(array can exhaust memory):

select student, (array_agg(grade order by grade desc))[2]
from 
student_grades
group by student

The efficient one:

create aggregate two_elements(anyelement)
(
sfunc = array_limit_two,
stype = anyarray,
initcond = '{}'
);

create or replace function array_limit_two(anyarray, anyelement) returns anyarray
as 
$$
begin
    if array_upper($1,1) = 2 then
        return $1;
    else
        return array_append($1, $2);
    end if;
end;
$$ language 'plpgsql';

Test data:

create table student_grades
(
student text,
grade int
);



insert into student_grades values 
('john',70),
('john',80),
('john',90),
('john',100);


insert into student_grades values
('paul',20),
('paul',10),
('paul',50),
('paul',30);


insert into student_grades values
('george',40);

Test code:

-- second largest
select student, coalesce( (two_elements(grade order by grade desc))[2], max(grade) /* min would do too, since it's one element only */ )
from 
student_grades
group by student


-- second smallest
select student, coalesce( (two_elements(grade order by grade))[2], max(grade) /* min would do too, since it's one element only */ )
from 
student_grades
group by student

Output:

q_and_a=# -- second largest
q_and_a=# select student, coalesce( (two_elements(grade order by grade desc))[2], max(grade) /* min would do too, since it's one element only */ )
q_and_a-# from
q_and_a-# student_grades
q_and_a-# group by student;
 student | coalesce
---------+----------
 george  |       40
 john    |       90
 paul    |       30
(3 rows)


q_and_a=#
q_and_a=# -- second smallest
q_and_a=# select student, coalesce( (two_elements(grade order by grade))[2], max(grade) /* min would do too, since it's one element only */ )
q_and_a-# from
q_and_a-# student_grades
q_and_a-# group by student;
 student | coalesce
---------+----------
 george  |       40
 john    |       80
 paul    |       20
(3 rows)

EDIT @diesel The simplest(and efficient also):

-- second largest
select student, array_min(two_elements(grade order by grade desc))
from 
student_grades
group by student;

-- second smallest
select student, array_max(two_elements(grade order by grade))
from 
student_grades
group by student;

The array_max function:

create or replace function array_min(anyarray) returns anyelement
as
$$
select min(unnested) from( select unnest($1) unnested ) as x
$$ language sql;

create or replace function array_max(anyarray) returns anyelement
as
$$
select max(unnested) from( select unnest($1) unnested ) as x
$$ language sql;

EDIT

Could be the simplest and efficient of all, if only Postgresql would make array_max a built-in function and facilitates LIMIT clause on aggregations :-) LIMIT clause on aggregation is my dream feature on Postgresql

select student, array_max( array_agg(grade order by grade limit 2) )
from 
student_grades
group by student;

While that LIMIT on aggregation is not yet available, use this:

-- second largest
select student, 

    array_min
    (

        array ( 
               select grade from student_grades 
               where student = x.student order by grade desc limit 2 )

    )

from 
student_grades x
group by student;


-- second smallest
select student, 

    array_max
    (

        array ( 
               select grade from student_grades 
               where student = x.student order by grade limit 2 )

    )

from 
student_grades x
group by student;

This is also brute force, but is guaranteed to only pass the table exactly and only once:

select name,word
  from (
         select name,word
              , row_number() over (partition by name 
                                       order by word desc)
                as rowNum
           from apple
       ) x
 where rowNum = 2

This version below may perform better if you have a covering index on (name,word) and there is a high count of word values per name:

with recursive myCte as
(
 select name,max(word) as word
      , 1 as rowNum
   from apple
  group by name
  union all
 select par.name
      , (select max(word) as word
           from apple 
          where name = par.name
            AND word < par.word
        ) as word
      , 2 as rowNum
   from myCte par
  where par.rowNum = 1
)
select * from myCte where rownum = 2

SELECT *
FROM (
  SELEC name, 
        dense_rank() over (partition by name order by word desc) as word_rank,
        count(*) over (partition by name) as name_count
  FROM apple
) t
WHERE (word_rank = 2 OR name_count = 1)

Edit:
The name_count = 1 takes care of those cases where only a single row is present for a specific name.

Using dense_rank() instead of rank() makes sure there is a row with word_rank = 2 as dense_rank makes sure there are no gaps

A very brute force query, but it works

select a.name, a.word
from apple a
where (select count(distinct b.word) from apple b
    where b.word > a.word) = 1

Another approach, use RANK:

with ranking as
(
    select student, grade, rank() over(partition by student order by grade desc) as place
    from 
    student_grades
)
select * 
from
ranking
where 
    (student, place) 
    in 
    (
        select student, max(place)
        from ranking
        where place <= 2
        group by student
    )

Second to the MIN:

with ranking as
(
    select student, grade, 
        rank() 
        -- just change DESC to ASC
        over(partition by student order by grade ASC ) as place
    from 
    student_grades
)
select * 
from
ranking
where 
    (student, place) 
    in 
    (                
        select student, max(place) -- still max
        from ranking
        where place <= 2
        group by student
    )

Umm, you don't just mean:

select a.name, max(a.word) as word
from apple a
where a.word < (select max(b.word) from apple b WHERE a.name = b.name)
group by a.name;

do you? One row per name returning the second highest value per name (or no row if there is no second highest value).

If that's what you want, your query was just missing a constraint, although I suspect the above is probably two table scans if PostgreSQL has the sense to convert it to a JOIN.

继续阅读：greatest-n-per-group postgresql sql

Trying to find the second largest value in a column (postgres sql)

更多精彩内容

精彩评论

最新问答

央视是哪个频道？

请问买过的朋友，舒提啦旅行箱实际使用体验如何？？

检查不孕不育需要的费用？

海信ULED电视画质有什么不同的地方?？

钉子可以挂的住画框幕布吗？

问答排行榜

河神2九牛入海钓河妖是第几集河妖什么来历可活吞牛？

性激素六项检查的最佳时间是多久？多少钱？？

Easiest way to get words of one line from istream into a vector?

《梦在燃烧 (《三国演义》动画片主题曲)》MP3歌词-汤子星？

抽烟只抽炫赫门？