聚合列与非聚合列
当你在 SELECT
子句中使用了 聚合函数(如 COUNT
, SUM
, AVG
, MAX
, MIN
)时:
聚合列:就是使用了聚合函数的列或表达式。
非聚合列:就是没有使用任何聚合函数的普通列。
关键规则: 所有出现在 SELECT
子句中的非聚合列,必须同时出现在 GROUP BY
子句中。告诉数据库你要按哪个字段来分组计算。否则,MySQL 会不知道如何显示这些非聚合列的值,从而报错或返回不确定的值。
总结对比表
GROUP BY是什么
简单说:GROUP BY用于合并列中相同的值。
GROUP BY 会将指定列中值相同的行分配到同一个组里。然后,你可以对每个组使用聚合函数(如 COUNT, SUM, AVG, MAX, MIN)来生成一个摘要值。最终结果集中的每一行都代表一个分组。
练习题
一、基础聚合函数练习
统计每个部门的员工数量;
计算每个部门的平均工资、最高工资和最低工资;
统计每个省份的员工数量、平均年龄和平均工资;
统计每种学历背景的员工数量和平均工资;
统计每个部门男女人数及比例;
二、关系运算符练习(>, <, =, >=, <=)
找出平均工资超过10000元的部门;
统计每个部门年龄小于30岁的员工数量;
按薪资等级分组统计各等级人数:<8000(低), 8000-12000(中), >12000(高);
统计2018年以前入职的员工在每个部门的分布情况;
将员工按年龄分段统计各段人数和平均工资:20-30, 31-40, 41-50;
三、逻辑运算符练习(AND, OR, NOT)
统计每个部门中"年龄大于35岁且工资高于10000元"的员工数量;
找出各部门中"研究生学历或工资超过12000元"的员工数量;
统计每个部门中"非后勤部且年龄小于40岁"的员工平均工资;
统计各部门中"(学历为本科或研究生)且(工资在8000-15000之间)"的员工数量;
找出各部门中"不是技术部也不是市场部"的员工最高工资;
四、包含运算符练习(IN, NOT IN)
统计来自'河北','河南','山东'这三个省份的员工在每个部门的分布;
统计各部门中职位不是'保安','电工','后勤主任'的员工平均工资;
找出'技术部','市场部','财务部'这三个部门的员工数量和工资总和;
统计各部门中"学历在('本科','研究生')且省份不在('山西','河北')"的员工数量;
分析各部门中"职位包含'总监'或'主管'且工资大于10000元"的员工统计信息;
五、匹配运算符练习(LIKE, NOT LIKE)
统计职位名称中包含'总监'的员工在每个部门的分布;
按邮箱域名分组统计员工数量(@163.com, @qq.com, @126.com, @sohu.com);
统计姓氏分布(张、王、李、赵等大姓的员工数量);
找出各部门中"姓名不包含'晓'或'小'且邮箱不是qq邮箱"的员工数量;
统计职位以'员'结尾或包含'技术'关键词的员工在各城市的分布;
六、综合应用题
为HR部门提供分析报告:每个省份、每种学历背景下,工资超过10000元的30岁以上员工统计;
分析技术部:按年龄分段统计各段的平均工资和人数:25-30,31-35,36-40,40+;
找出"平均工资高于10000元且员工平均年龄小于35岁"的部门;
统计各部门2018年前后入职员工的平均工资对比;
综合分析:各省份、各部门、各学历背景下,30岁以上高薪(>12000)员工的分布情况;
答案
一、基础聚合函数练习
1. 统计每个部门的员工数量
SELECT
e_department AS 部门,
COUNT(*) AS 员工数量
FROM employees
GROUP BY e_department
ORDER BY 员工数量 DESC;
2. 计算每个部门的平均工资、最高工资和最低工资
SELECT
e_department AS 部门,
AVG(e_salary) AS 平均工资,
MAX(e_salary) AS 最高工资,
MIN(e_salary) AS 最低工资
FROM employees
GROUP BY e_department
ORDER BY 平均工资 DESC;
3. 统计每个省份的员工数量、平均年龄和平均工资
SELECT
e_province AS 省份,
COUNT(*) AS 员工数量,
AVG(e_age) AS 平均年龄,
AVG(e_salary) AS 平均工资
FROM employees
GROUP BY e_province
ORDER BY 员工数量 DESC;
4. 统计每种学历背景的员工数量和平均工资
SELECT
e_education AS 学历,
COUNT(*) AS 员工数量,
AVG(e_salary) AS 平均工资
FROM employees
GROUP BY e_education
ORDER BY 平均工资 DESC;
5. 统计每个部门男女人数及比例
SELECT
e_department AS 部门,
e_gender AS 性别,
COUNT(*) AS 人数,
ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(PARTITION BY e_department), 2) AS 比例
FROM employees
GROUP BY e_department, e_gender
ORDER BY e_department, e_gender;
二、关系运算符练习
6. 找出平均工资超过10000元的部门
SELECT
e_department AS 部门,
AVG(e_salary) AS 平均工资
FROM employees
GROUP BY e_department
HAVING AVG(e_salary) > 10000
ORDER BY 平均工资 DESC;
7. 统计每个部门年龄小于30岁的员工数量
SELECT
e_department AS 部门,
COUNT(*) AS 年轻员工数量
FROM employees
WHERE e_age < 30
GROUP BY e_department
ORDER BY 年轻员工数量 DESC;
8. 按薪资等级分组统计各等级人数
SELECT
CASE
WHEN e_salary < 8000 THEN '低薪'
WHEN e_salary BETWEEN 8000 AND 12000 THEN '中薪'
ELSE '高薪'
END AS 薪资等级,
COUNT(*) AS 人数
FROM employees
GROUP BY
CASE
WHEN e_salary < 8000 THEN '低薪'
WHEN e_salary BETWEEN 8000 AND 12000 THEN '中薪'
ELSE '高薪'
END
ORDER BY 人数 DESC;
9. 统计2018年以前入职的员工在每个部门的分布情况
SELECT
e_department AS 部门,
COUNT(*) AS 老员工数量
FROM employees
WHERE e_hiredate < '2018-01-01'
GROUP BY e_department
ORDER BY 老员工数量 DESC;
10. 将员工按年龄分段统计各段人数和平均工资
SELECT
CASE
WHEN e_age BETWEEN 20 AND 30 THEN '20-30岁'
WHEN e_age BETWEEN 31 AND 40 THEN '31-40岁'
ELSE '41-50岁'
END AS 年龄段,
COUNT(*) AS 人数,
AVG(e_salary) AS 平均工资
FROM employees
GROUP BY
CASE
WHEN e_age BETWEEN 20 AND 30 THEN '20-30岁'
WHEN e_age BETWEEN 31 AND 40 THEN '31-40岁'
ELSE '41-50岁'
END
ORDER BY 年龄段;
三、逻辑运算符练习
11. 统计每个部门中"年龄大于35岁且工资高于10000元"的员工数量
SELECT
e_department AS 部门,
COUNT(*) AS 高薪资深员工数量
FROM employees
WHERE e_age > 35 AND e_salary > 10000
GROUP BY e_department
ORDER BY 高薪资深员工数量 DESC;
12. 找出各部门中"研究生学历或工资超过12000元"的员工数量
SELECT
e_department AS 部门,
COUNT(*) AS 符合条件的员工数量
FROM employees
WHERE e_education = '研究生' OR e_salary > 12000
GROUP BY e_department
ORDER BY 符合条件的员工数量 DESC;
13. 统计每个部门中"非后勤部且年龄小于40岁"的员工平均工资
SELECT
e_department AS 部门,
AVG(e_salary) AS 平均工资
FROM employees
WHERE e_department != '后勤部' AND e_age < 40
GROUP BY e_department
ORDER BY 平均工资 DESC;
14. 统计各部门中"(学历为本科或研究生)且(工资在8000-15000之间)"的员工数量
SELECT
e_department AS 部门,
COUNT(*) AS 符合条件的员工数量
FROM employees
WHERE (e_education = '本科' OR e_education = '研究生')
AND e_salary BETWEEN 8000 AND 15000
GROUP BY e_department
ORDER BY 符合条件的员工数量 DESC;
15. 找出各部门中"不是技术部也不是市场部"的员工最高工资
SELECT
e_department AS 部门,
MAX(e_salary) AS 最高工资
FROM employees
WHERE e_department NOT IN ('技术部', '市场部')
GROUP BY e_department
ORDER BY 最高工资 DESC;
四、包含运算符练习
16. 统计来自'河北','河南','山东'这三个省份的员工在每个部门的分布
SELECT
e_department AS 部门,
e_province AS 省份,
COUNT(*) AS 员工数量
FROM employees
WHERE e_province IN ('河北', '河南', '山东')
GROUP BY e_department, e_province
ORDER BY e_department, 员工数量 DESC;
17. 统计各部门中职位不是'保安','电工','后勤主任'的员工平均工资
SELECT
e_department AS 部门,
AVG(e_salary) AS 平均工资
FROM employees
WHERE e_job NOT IN ('保安', '电工', '后勤主任')
GROUP BY e_department
ORDER BY 平均工资 DESC;
18. 找出'技术部','市场部','财务部'这三个部门的员工数量和工资总和
SELECT
e_department AS 部门,
COUNT(*) AS 员工数量,
SUM(e_salary) AS 工资总和
FROM employees
WHERE e_department IN ('技术部', '市场部', '财务部')
GROUP BY e_department
ORDER BY 工资总和 DESC;
19. 统计各部门中"学历在('本科','研究生')且省份不在('山西','河北')"的员工数量
SELECT
e_department AS 部门,
COUNT(*) AS 员工数量
FROM employees
WHERE e_education IN ('本科', '研究生')
AND e_province NOT IN ('山西', '河北')
GROUP BY e_department
ORDER BY 员工数量 DESC;
20. 分析各部门中"职位包含'总监'或'主管'且工资大于10000元"的员工统计信息
SELECT
e_department AS 部门,
COUNT(*) AS 人数,
AVG(e_salary) AS 平均工资,
MAX(e_salary) AS 最高工资
FROM employees
WHERE (e_job LIKE '%总监%' OR e_job LIKE '%主管%')
AND e_salary > 10000
GROUP BY e_department
ORDER BY 平均工资 DESC;
五、匹配运算符练习
21. 统计职位名称中包含'总监'的员工在每个部门的分布
SELECT
e_department AS 部门,
COUNT(*) AS 总监人数
FROM employees
WHERE e_job LIKE '%总监%'
GROUP BY e_department
ORDER BY 总监人数 DESC;
22. 按邮箱域名分组统计员工数量
SELECT
SUBSTRING(e_email, LOCATE('@', e_email) + 1) AS 邮箱域名,
COUNT(*) AS 员工数量
FROM employees
GROUP BY SUBSTRING(e_email, LOCATE('@', e_email) + 1)
ORDER BY 员工数量 DESC;
23. 统计姓氏分布
SELECT
LEFT(e_name, 1) AS 姓氏,
COUNT(*) AS 人数
FROM employees
GROUP BY LEFT(e_name, 1)
ORDER BY 人数 DESC;
24. 找出各部门中"姓名不包含'晓'或'小'且邮箱不是qq邮箱"的员工数量
SELECT
e_department AS 部门,
COUNT(*) AS 员工数量
FROM employees
WHERE e_name NOT LIKE '%晓%'
AND e_name NOT LIKE '%小%'
AND e_email NOT LIKE '%@qq.com'
GROUP BY e_department
ORDER BY 员工数量 DESC;
25. 统计职位以'员'结尾或包含'技术'关键词的员工在各城市的分布
SELECT
e_city AS 城市,
COUNT(*) AS 员工数量
FROM employees
WHERE e_job LIKE '%员' OR e_job LIKE '%技术%'
GROUP BY e_city
ORDER BY 员工数量 DESC;
六、综合应用题
26. 每个省份、每种学历背景下,工资超过10000元的30岁以上员工统计
SELECT
e_province AS 省份,
e_education AS 学历,
COUNT(*) AS 员工数量,
AVG(e_salary) AS 平均工资
FROM employees
WHERE e_salary > 10000 AND e_age > 30
GROUP BY e_province, e_education
ORDER BY 省份, 员工数量 DESC;
27. 分析技术部:按年龄分段统计各段的平均工资和人数
SELECT
CASE
WHEN e_age BETWEEN 25 AND 30 THEN '25-30岁'
WHEN e_age BETWEEN 31 AND 35 THEN '31-35岁'
WHEN e_age BETWEEN 36 AND 40 THEN '36-40岁'
ELSE '40岁以上'
END AS 年龄段,
COUNT(*) AS 人数,
AVG(e_salary) AS 平均工资
FROM employees
WHERE e_department = '技术部'
GROUP BY
CASE
WHEN e_age BETWEEN 25 AND 30 THEN '25-30岁'
WHEN e_age BETWEEN 31 AND 35 THEN '31-35岁'
WHEN e_age BETWEEN 36 AND 40 THEN '36-40岁'
ELSE '40岁以上'
END
ORDER BY 年龄段;
28. 找出"平均工资高于10000元且员工平均年龄小于35岁"的部门
SELECT
e_department AS 部门,
AVG(e_salary) AS 平均工资,
AVG(e_age) AS 平均年龄
FROM employees
GROUP BY e_department
HAVING AVG(e_salary) > 10000 AND AVG(e_age) < 35
ORDER BY 平均工资 DESC;
29. 统计各部门2018年前后入职员工的平均工资对比
SELECT
e_department AS 部门,
CASE
WHEN e_hiredate < '2018-01-01' THEN '2018年前入职'
ELSE '2018年后入职'
END AS 入职时间,
COUNT(*) AS 人数,
AVG(e_salary) AS 平均工资
FROM employees
GROUP BY e_department,
CASE
WHEN e_hiredate < '2018-01-01' THEN '2018年前入职'
ELSE '2018年后入职'
END
ORDER BY e_department, 入职时间;
30. 综合分析:各省份、各部门、各学历背景下,30岁以上高薪员工的分布情况
SELECT
e_province AS 省份,
e_department AS 部门,
e_education AS 学历,
COUNT(*) AS 员工数量,
AVG(e_salary) AS 平均工资
FROM employees
WHERE e_age > 30 AND e_salary > 12000
GROUP BY e_province, e_department, e_education
ORDER BY 省份, 部门, 员工数量 DESC;