Skip to content

Commit ef865f7

Browse files
authored
Update music_by_gender_rank_example.sql
1 parent a42d882 commit ef865f7

File tree

1 file changed

+42
-17
lines changed

1 file changed

+42
-17
lines changed
+42-17
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,59 @@
1-
DROP VIEW IF EXISTS popular_music;
2-
CREATE VIEW popular_music AS
1+
USE facebook;
2+
3+
# We create first a table of popular music across everyone
4+
# For efficiency we only keep music liked by more than 10 people
5+
DROP TEMPORARY TABLE IF EXISTS popular_music;
6+
CREATE TEMPORARY TABLE popular_music AS
37
SELECT M.Music AS music, COUNT(M.ProfileID) AS cnt
48
FROM FavoriteMusic M
59
GROUP BY M.Music
10+
HAVING cnt > 10
611
ORDER BY cnt DESC;
712

813

9-
DROP VIEW IF EXISTS popular_music_by_sex;
10-
CREATE VIEW popular_music_by_sex AS
14+
# We now calculate popularity of each music broken down by sex
15+
DROP TEMPORARY TABLE IF EXISTS popular_music_by_sex;
16+
CREATE TEMPORARY TABLE popular_music_by_sex AS
1117
SELECT M.Music AS music, P.Sex AS gender, COUNT(P.ProfileID) AS cnt
1218
FROM FavoriteMusic M JOIN Profiles P ON P.ProfileID = M.ProfileID
1319
WHERE P.Sex IS NOT NULL
1420
GROUP BY M.Music, P.Sex
1521
ORDER BY cnt DESC;
1622

17-
DROP VIEW IF EXISTS chart;
18-
CREATE VIEW chart AS
19-
SELECT music, gender, cnt,
20-
RANK() OVER (PARTITION BY gender ORDER BY cnt DESC) AS music_rank
23+
# We will now create two tables with music rank, one per gender
24+
# In principle, we could also do a window OVER (PARTITION BY gender ORDER BY cnt DESC)
25+
# However, MySQL has a bug that does not allow temporary tables to
26+
# join with itself, so we end up creating one temp table for males and another for females
27+
DROP TEMPORARY TABLE IF EXISTS chart_male;
28+
CREATE TEMPORARY TABLE chart_male AS
29+
SELECT music, cnt,
30+
RANK() OVER (ORDER BY cnt DESC) AS music_rank
2131
FROM popular_music_by_sex
22-
ORDER BY music_rank, gender;
23-
24-
32+
WHERE gender = 'Male'
33+
ORDER BY music_rank;
2534

35+
DROP TEMPORARY TABLE IF EXISTS chart_female;
36+
CREATE TEMPORARY TABLE chart_female AS
37+
SELECT music, cnt,
38+
RANK() OVER (ORDER BY cnt DESC) AS music_rank
39+
FROM popular_music_by_sex
40+
WHERE gender = 'Female'
41+
ORDER BY music_rank;
42+
43+
# Finally we bring everything together.
44+
# Note that we start with popular_music as a reference table
45+
# and we left join the other two tables, as there is no guarantee
46+
# that we will encounter a music in both males and females tables
47+
# To estimate the difference between males and females we take the
48+
# log of the rank, and then the difference; the reason we do that
49+
# is because a difference of 5 between No1 and No6 is very different
50+
# than a difference of 5 between No605 and No610. With LOG we kind
51+
# of estimate difference in "orders of magnitude"
2652
SELECT S.music, S.cnt,
2753
M.cnt AS male_cnt, M.music_rank AS male_rank,
28-
F.cnt AS female_cnt, F.music_rank AS female_rank
54+
F.cnt AS female_cnt, F.music_rank AS female_rank,
55+
ROUND(-LOG(F.music_rank / M.music_rank),2) AS diff_females
2956
FROM popular_music S
30-
LEFT JOIN chart M ON (S.music = M.music AND M.gender='Male')
31-
LEFT JOIN chart F ON (S.music = F.music AND F.gender='Female');
32-
33-
34-
57+
LEFT JOIN chart_female M ON (S.music = M.music)
58+
LEFT JOIN chart_male F ON (S.music = F.music)
59+
ORDER BY diff_females DESC;

0 commit comments

Comments
 (0)