Sydney Oracle Lab: Analytics with ORDER BY and DISTINCT STRAGG

A recent request in a help forum required an SQL that would return rows until 5 distinct values of a specific column had been returned.

Using analytics was proving difficult as the DISTINCT clause wouldn't work with an ORDER BY. While a non-analytical function was found, I offered an analytic SQL that used a user-defined aggregate. This is similar to STRAGG but eliminates duplicates prior to returning the result string. [The elimination process involves 'associative arrays' - the old INDEX BY tables with a VARCHAR2 index. This was new in 9iR2 so won't work in earlier versions where you'd need to develop your own sort routine.]


create or replace type str_dist_agg_type as object
(
  total varchar2(4000),
  static function
       ODCIAggregateInitialize(sctx IN OUT str_dist_agg_type )
       return number,
  member function
       ODCIAggregateIterate(self IN OUT str_dist_agg_type ,
                            value IN varchar2 )
       return number,
  member function
       ODCIAggregateTerminate(self IN str_dist_agg_type,
                              returnValue OUT  varchar2,
                              flags IN number)
       return number,
  member function
       ODCIAggregateMerge(self IN OUT str_dist_agg_type,
                          ctx2 IN str_dist_agg_type)
       return number,
  member function
       ODCIAggregateDelete(self IN OUT str_dist_agg_type,
                           value in varchar2)
       return number
)
/

create or replace type body str_dist_agg_type
is
--
static function ODCIAggregateInitialize(sctx IN OUT str_dist_agg_type)
return number
is
begin
   sctx := str_dist_agg_type( null );
   return ODCIConst.Success;
end;
--
member function ODCIAggregateIterate(self IN OUT str_dist_agg_type,
                                    value IN varchar2 )
return number
is
begin
   --Use chr(7) as an internal separator because, since
   --it is a pretty useless control character, it is unlikely
   --to be in the input data or cause any internal problems.
   self.total := self.total || value || chr(7);
   return ODCIConst.Success;
end;
--
member function ODCIAggregateTerminate(self IN str_dist_agg_type,
                                      returnValue OUT varchar2,
                                      flags IN number)
return number
is
 type tab_dist is table of number index by varchar2(4000);
 t_dist tab_dist;
 v_point number := 0;
 v_str varchar2(4000);
 v_sep varchar2(1) := chr(7);
 v_ret varchar2(4000);
begin
 v_str := rtrim(ltrim(self.total,v_sep),v_sep);
 v_point := instr(v_str,v_sep);
 while nvl(v_point,0) != 0 loop
   t_dist(substr(v_str,1,v_point-1)) := 1;
   v_str := ltrim(substr(v_str,v_point),v_sep);
   v_point := instr(v_str,v_sep);
 end loop;
 t_dist(v_str) := 1;
 --
 v_str := t_dist.first;
 while v_str is not null loop
   v_ret := v_ret || v_str;
   v_str := t_dist.next(v_str);
 end loop;
 returnValue := v_ret;
 return ODCIConst.Success;
end;
--
member function ODCIAggregateDelete(self IN OUT str_dist_agg_type,
                                   value in varchar2)
return number
is
 v_point number;
begin
 v_point := instr(self.total,value||chr(7));
 if v_point > 0 then
   self.total := substr(self.total,1,v_point-1)||
                   substr(self.total, v_point+length(value)+1);
 end if;
 return ODCIConst.Success;
end;
--
member function ODCIAggregateMerge(self IN OUT str_dist_agg_type,
                                  ctx2 IN str_dist_agg_type)
return number
is
begin
 self.total := self.total ||chr(7)||ctx2.total;
 return ODCIConst.Success;
end;
end;
/

CREATE or replace
 FUNCTION strdagg(input varchar2 )
 RETURN varchar2
 PARALLEL_ENABLE AGGREGATE USING str_dist_agg_type;

grant execute on strdagg to public;

Note : STRDAGG uses the CHR(7) character internally as a delimiter, so if this is in the data you are likely to see problems. No 'visible' delimiter is shown, so if you want one you'll need to specify it as part of the expression :

select table_name, column_name, strdagg(data_type||',') over
(partition by table_name order by table_name, column_id) data_types
from user_tab_columns
where table_name in ('CHILD','PARENT')

TABLE_NAME COLUMN_NAME DATA_TYPES
---------- --------------- --------------------
CHILD ID NUMBER,
CHILD PARENT_ID NUMBER,
CHILD DESCRIPTION NUMBER,VARCHAR2,
PARENT ID NUMBER,
PARENT DESCRIPTION NUMBER,VARCHAR2,

I've put all the 'tricky' processing in the TERMINATE function so it can be easily amended (eg if you wanted to return the number of duplicates for each value or have a case-insensitive sort).

(If you are interested in the original problem and the non-analytical solution, it is in the Quest Pipelines' PL/SQL forum entitled 'Tricky SQL Challenge' in the 'Writing and Tuning SQL' conference.)

Sydney Oracle Lab

Pages

Thursday, September 08, 2005

Analytics with ORDER BY and DISTINCT STRAGG

No comments: