Erlang/OTP Forums

Author Message

<  Advanced Erlang/OTP  ~  qlc join query results

Spectra
Posted: Mon Dec 08, 2008 11:45 pm Reply with quote
User Joined: 08 Dec 2008 Posts: 56 Location: Australia
Hi all,
(Please forgive the cross-post with the erlang-questions list, but I'm kind of desperate to get a response here...) I'm hoping someone will be able to explain some interesting results we're getting when using qlc. Essentially, when we try to do what looks like a relatively simple "join" style operation, we're getting different results depending on the ordering of our generator terms, and we don't understand why. Here's some sample code that demonstrates the issue:

Code:

-module(post).                           
-export([start/0]).                       
-import(lists, [map/2]).           
-import(io, [fwrite/1, fwrite/2]).       
-include_lib("stdlib/include/qlc.hrl").   

-record(r1, {a, b}).
-record(r2, {c, d}).

start() ->
        % Start up mnesia and set up two tables, one for
        % each record type
        ok = mnesia:start(),
        {atomic, ok} = mnesia:create_table(r1
        , [{attributes, record_info(fields, r1)}]),
        {atomic, ok} = mnesia:create_table(r2
        , [{attributes, record_info(fields, r2)}]),

        % Populate tables with one element and 100 elements
        % respectively
        mnesia:dirty_write(r1, #r1{a=1, b=1}),
        map(
           fun (N) -> ok = mnesia:dirty_write(
              r2, #r2{c=N, d=1}) end,
              lists:seq(1, 100)
        ),

        Query = fun(N, Q) ->
                case mnesia:transaction(fun() ->
                 qlc:e(Q) end) of
                   {atomic, L} ->
                       fwrite("~s result = ~p entries\n",
                          [N, length(L)]);
                   Error ->
                       fwrite("~s error: ~p\n",
                          [N, Error])
                end
        end,

        Query("join by field r1, r2"
                , qlc:q([
                        {X#r1.b, A#r2.d}
                        || X <- mnesia:table(r1)
                        , A <- mnesia:table(r2)
                        , X#r1.b == A#r2.d
                ])),
        Query("join by field r2, r1"
                , qlc:q([
                        {X#r1.b, A#r2.d}
                        || A <- mnesia:table(r2)
                        , X <- mnesia:table(r1)
                        , X#r1.b == A#r2.d
                ])),
        Query("join by field r1, r2 nested"
                , qlc:q([
                        {X#r1.b, A#r2.d}
                        || X <- mnesia:table(r1)
                        , A <- mnesia:table(r2)
                        , X#r1.b == A#r2.d
                ], {join, nested_loop})),
        ok.


This produces the following output:

join by field r1, r2 result = 2 entries
join by field r2, r1 result = 100 entries
join by field r1, r2 nested result = 100 entries

The second and third lines are what we'd expect to get - the question is, why do we get two results from the first query? Checking the qlc:info() results for them shows that they're both using the 'merge' join method, so what exactly is it about that method that makes the ordering of generators affect the result? Also, why exactly 2 results? I can understand 100, or possibly 1, but 2?

Thanks in advance.
View user's profile Send private message
Spectra
Posted: Tue Dec 09, 2008 8:14 pm Reply with quote
User Joined: 08 Dec 2008 Posts: 56 Location: Australia
Hey - in case anyone should run up against the same problem, it turns out it's a bug in stdlib. Here's the patch Hans Bolinder was kind enough to provide (which will presumably be incorporated into the next release):

Code:
*** /usr/local/otp/releases/otp_beam_linux_sles10_x64_r12b_patched/lib/stdlib-1.15.5/src/qlc.erl   Thu Nov  6 12:10:06 2008
--- /clearcase/otp/erts/lib/stdlib/src/qlc.erl   Tue Dec  9 16:00:15 2008
***************
*** 2978,2985 ****
      end.
 
  %% element(C2, E2_0) == K1, element(C2, E2) == K1_0
! same_keys1(E1_0, _K1_0, [], _C1, E2, _C2, E2_0, _L2, M) ->
!     end_merge_join([?JWRAP(E1_0, E2_0), ?JWRAP(E1_0, E2)], M);
  same_keys1(E1_0, K1_0, [E1 | _]=L1, C1, E2, C2, E2_0, L2, M) ->
      K1 = element(C1, E1),
      if
--- 2978,2986 ----
      end.
 
  %% element(C2, E2_0) == K1, element(C2, E2) == K1_0
! same_keys1(E1_0, K1_0, []=L1, C1, E2, C2, E2_0, L2, M) ->
!     [?JWRAP(E1_0, E2_0), ?JWRAP(E1_0, E2) |
!      fun() -> same_keys(K1_0, E1_0, L1, C1, L2, C2, M) end];
  same_keys1(E1_0, K1_0, [E1 | _]=L1, C1, E2, C2, E2_0, L2, M) ->
      K1 = element(C1, E1),
      if


Cheers,

Bernard
View user's profile Send private message

Display posts from previous:  

All times are GMT
Page 1 of 1
This forum is locked: you cannot post, reply to, or edit topics.

Jump to:  

You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You cannot download files in this forum