Py学习  »  DATABASE

2020-11-10-Mysql(练习题第二套)

隐约喜欢萌萌哒 • 3 年前 • 208 次点击  

遗留问题 where cast(event_day as DATE ) between cast('20200320' as DATE ) and date_add ('20200320' ,interval 7 day ); 这种语法为什么是错误的:发现问题:date_add 后有空格

//select  cast('20200102' as DATE );  能够将这种字符串转换成日期格式

create table tb_tmp_1d(
    cuid varchar(10),  //用户唯一标识
    os_nmae varchar(7), /平台
    soft_version varchar(10),  //版本
    event_day varchar(8)    //日期
);

insert into tb_tmp_1d  values('esd23jd','android','10.10.0','20200101');
insert into tb_tmp_1d  values('ewerjjd','android','10.10.0','20200320');
insert into tb_tmp_1d  values('esftyjd','apple','10.10.1','20200321');
insert into tb_tmp_1d  values('wet23jd','android','10.11.1','20200322');
insert into tb_tmp_1d  values('esd23jd','apple','10.12.0','20200323');
insert into tb_tmp_1d  values('esiopd','android','10.13.1','20200324');
insert into tb_tmp_1d  values('ewe23jd','apple','10.13.1','20200326');
insert into tb_tmp_1d  values('esd23jd','android','10.13.0','20200421');
insert into tb_tmp_1d  values('qqd23jd','apple','10.13.0','20200521');
insert into tb_tmp_1d  values('esd67jd','android','10.13.0','20200621');
insert into tb_tmp_1d  values('e34623jd','apple','10.14.0','20200721');
insert into tb_tmp_1d  values('esduijd','apple','10.31.0','20200821');
insert into tb_tmp_1d  values('edeu23jd','android','11.10.0','20200921');
insert into tb_tmp_1d  values('edeu2216d','android','11.11.0','20200922');

1.
UV(unique visitor)
是指自然人登录自己账号访问量
KPI是指UV

PV (Page View)
是指网页的浏览量

vv(Visitor view)
是指每次登录网站的访问次数

//分析是否要对字段分类以及怎么分类  grouping sets() 聚合不同粒度 ?

2.
写出20200320的次日 次7日的留存分析

//怎么表示留存?  考虑使用每日占比来表示  //这里的次日指的是哪一日?

//分析首先统计20200320的登录人数  
//然后统计次日的登录人数(要去重,考虑同一标识码登录的情况)
//问题:怎么表示次7日,考虑使用窗口函数 count() 统计每日的登录人数 
//接着使用 lag()获取上一日的登录情况  ? 这里使用比较复杂 采用order by 进行比较


#使用窗口函数??怎么写
select
count (distinct cuid ) over( order by event_day rows between 1 preceding and current row ) as result
from tb_tmp_1d 
group by  event_day 


select
count(cuid)
from tb_tmp_1d
where event_day = '20200320'

#采用分解步骤,求取结果
select
t2.event_day,
(t2.number1 / t2.number) * 100
from
(
select
t1.event_day,
t1.number,
lag(t1.number,1,0) over () as number1
from
(
select
event_day,
count(distinct cuid) as number
from tb_tmp_1d
group by event_day) t1 ) t2;

create  table  tb_accounts_df(
cuid varchar(10),
uids varchar(10),
even_day varchar(10)
);

insert into  tb_accounts_df values ('esd23jd','10001','20201010');
insert into  tb_accounts_df values ('ewerjjd','10002','20200908');
insert into  tb_accounts_df values ('esftyjd','10003','20201209');
insert into  tb_accounts_df values ('wet23jd','10008','122100');
insert into  tb_accounts_df values ('esd23jd','10003','202012');
insert into  tb_accounts_df values ('esiopd','100123','23434');
insert into  tb_accounts_df values ('ewe23jd','311097','5663');
insert into  tb_accounts_df values ('esd23jd','3456','432');
insert into  tb_accounts_df values ('qqd23jd','98097','4352');
insert into  tb_accounts_df values ('esd67jd','1000676','4567');
insert into  tb_accounts_df values ('e34623jd','1000890','7823');
insert into  tb_accounts_df values ('esduijd','1000078','878');
insert into  tb_accounts_df values ('edeu23jd','1000088','685');
insert into  tb_accounts_df values ('edeu23jd','1000088','685');
insert into  tb_accounts_df values ('edeuwrqrq','1000088','685');
insert into  tb_accounts_df values ('edffra23jd','1000088','645345');
insert into  tb_accounts_df values ('edttw23jd','1000088','6356465');
insert into  tb_accounts_df values ('edyur3jd','1000088','646435');
insert into  tb_accounts_df values ('edeueewrd','1000088','652525');

3.求取活跃的用户量和活跃的登录用户量
#明确什么是用户量? 指示某区域的留存量  登录量指示某一日总留存中有多少用户登录

//第二张表是全部的用户标识,,分别count() 但是有歧义我不知道一共有多少用户 我只知道总共的用户标识和区域的活跃用户

select 
os_nmae,
count(d.cuid),
count(s.cuid)
from
tb_tmp_1d d
right join 
tb_accounts_df s
on d.cuid = s.cuid
where  event_day  = '20200101'
group by  os_nmae 

4.求某一周的周活跃用户量的平均值

//分析:某一周怎么获取 是否去重  问题:分组和去重?什么时候使用 使用 date_add()方法
使用 between and 

select
   substr(count(1)/7,1,1)
from
    tb_tmp_1d
// #的写法为什么出错了????
#where cast(event_day as DATE ) between cast('20200320' as  DATE )and  date_add ('20200320' ,interval 7 day );

where event_day between  20200320 and 20200327;
Python社区是高质量的Python/Django开发社区
本文地址:http://www.python88.com/topic/99150
 
208 次点击