|
1 |
| -DROP VIEW IF EXISTS popular_music; |
2 |
| -CREATE VIEW popular_music AS |
| 1 | +USE facebook; |
| 2 | + |
| 3 | +# We create first a table of popular music across everyone |
| 4 | +# For efficiency we only keep music liked by more than 10 people |
| 5 | +DROP TEMPORARY TABLE IF EXISTS popular_music; |
| 6 | +CREATE TEMPORARY TABLE popular_music AS |
3 | 7 | SELECT M.Music AS music, COUNT(M.ProfileID) AS cnt
|
4 | 8 | FROM FavoriteMusic M
|
5 | 9 | GROUP BY M.Music
|
| 10 | + HAVING cnt > 10 |
6 | 11 | ORDER BY cnt DESC;
|
7 | 12 |
|
8 | 13 |
|
9 |
| -DROP VIEW IF EXISTS popular_music_by_sex; |
10 |
| -CREATE VIEW popular_music_by_sex AS |
| 14 | +# We now calculate popularity of each music broken down by sex |
| 15 | +DROP TEMPORARY TABLE IF EXISTS popular_music_by_sex; |
| 16 | +CREATE TEMPORARY TABLE popular_music_by_sex AS |
11 | 17 | SELECT M.Music AS music, P.Sex AS gender, COUNT(P.ProfileID) AS cnt
|
12 | 18 | FROM FavoriteMusic M JOIN Profiles P ON P.ProfileID = M.ProfileID
|
13 | 19 | WHERE P.Sex IS NOT NULL
|
14 | 20 | GROUP BY M.Music, P.Sex
|
15 | 21 | ORDER BY cnt DESC;
|
16 | 22 |
|
17 |
| -DROP VIEW IF EXISTS chart; |
18 |
| -CREATE VIEW chart AS |
19 |
| - SELECT music, gender, cnt, |
20 |
| - RANK() OVER (PARTITION BY gender ORDER BY cnt DESC) AS music_rank |
| 23 | +# We will now create two tables with music rank, one per gender |
| 24 | +# In principle, we could also do a window OVER (PARTITION BY gender ORDER BY cnt DESC) |
| 25 | +# However, MySQL has a bug that does not allow temporary tables to |
| 26 | +# join with itself, so we end up creating one temp table for males and another for females |
| 27 | +DROP TEMPORARY TABLE IF EXISTS chart_male; |
| 28 | +CREATE TEMPORARY TABLE chart_male AS |
| 29 | + SELECT music, cnt, |
| 30 | + RANK() OVER (ORDER BY cnt DESC) AS music_rank |
21 | 31 | FROM popular_music_by_sex
|
22 |
| - ORDER BY music_rank, gender; |
23 |
| - |
24 |
| - |
| 32 | + WHERE gender = 'Male' |
| 33 | + ORDER BY music_rank; |
25 | 34 |
|
| 35 | +DROP TEMPORARY TABLE IF EXISTS chart_female; |
| 36 | +CREATE TEMPORARY TABLE chart_female AS |
| 37 | + SELECT music, cnt, |
| 38 | + RANK() OVER (ORDER BY cnt DESC) AS music_rank |
| 39 | + FROM popular_music_by_sex |
| 40 | + WHERE gender = 'Female' |
| 41 | + ORDER BY music_rank; |
| 42 | + |
| 43 | +# Finally we bring everything together. |
| 44 | +# Note that we start with popular_music as a reference table |
| 45 | +# and we left join the other two tables, as there is no guarantee |
| 46 | +# that we will encounter a music in both males and females tables |
| 47 | +# To estimate the difference between males and females we take the |
| 48 | +# log of the rank, and then the difference; the reason we do that |
| 49 | +# is because a difference of 5 between No1 and No6 is very different |
| 50 | +# than a difference of 5 between No605 and No610. With LOG we kind |
| 51 | +# of estimate difference in "orders of magnitude" |
26 | 52 | SELECT S.music, S.cnt,
|
27 | 53 | M.cnt AS male_cnt, M.music_rank AS male_rank,
|
28 |
| - F.cnt AS female_cnt, F.music_rank AS female_rank |
| 54 | + F.cnt AS female_cnt, F.music_rank AS female_rank, |
| 55 | + ROUND(-LOG(F.music_rank / M.music_rank),2) AS diff_females |
29 | 56 | FROM popular_music S
|
30 |
| - LEFT JOIN chart M ON (S.music = M.music AND M.gender='Male') |
31 |
| - LEFT JOIN chart F ON (S.music = F.music AND F.gender='Female'); |
32 |
| - |
33 |
| - |
34 |
| - |
| 57 | + LEFT JOIN chart_female M ON (S.music = M.music) |
| 58 | + LEFT JOIN chart_male F ON (S.music = F.music) |
| 59 | +ORDER BY diff_females DESC; |
0 commit comments