import pandas as pd
import numpy as np
# 导入数据
data = pd.read_excel("C:\\Users\\qianqian.wang\\Desktop\\测试代码文件夹\\BR-54751\\phone-size.xlsx")
data
# 也就是拥有93种不同的尺寸
data.shape
sum(data["screen_width"] > data["screen_height"]) # 66,黄色部分返回布尔值
# 设定手机宽是大于高的
for i in range(data.shape[0]):
if data.iloc[i,0]<data.iloc[i,1]:
temp = data.iloc[i,0]
data.iloc[i,0] = data.iloc[i,1]
data.iloc[i,1] = temp
data
按照某个字段排序
# 手机尺寸按照宽 从小到大排序
data = data.sort_values(by="screen_width",ascending=True)
data
修正索引
data.reset_index(drop=True,inplace=True) # 修正索引
data
计算给定尺寸的占比
import copy data_copy = copy.deepcopy(data) list1 = [1242,1125,1242,750,640,640,640,640,2048,1668,1668,1536,1536,2048,768,768] list2 = [2688,2436,2208,1334,1096,1136,920,960,2732,2388,2224,2008,2048,1496,1004,1024]
data2 = copy.deepcopy(data_copy)
count_people = 0
for i in range(len(list1)):
temp_index = (list2[i] == data2["screen_width"]) & (list1[i] == data2["screen_height"]) # 返回布尔值
print("sum(temp_index)",sum(temp_index))
temp_df = data2[temp_index]
count_people = sum(temp_df["people"]) + count_people
print("count_people=",count_people)
if sum(temp_index) != 0:
data2.drop(index=temp_df.index,inplace=True)
print("data2.shape=",data2.shape)
print("percent=",count_people/sum(data["people"])) # 0.5506492267356433
宽和高在加减5mm的情况下属于同一类
data3 = copy.deepcopy(data_copy) count_people = 0 for i in range(len(list1)): temp_index = (list2[i]-5 <= data3["screen_width"]) & (data3["screen_width"] <= list2[i]+5) & (list1[i]-5 <= data3["screen_height"]) & (data3["screen_height"]<= list1[i]+5) # 返回布尔值 print("sum(temp_index)",sum(temp_index)) temp_df = data3[temp_index] count_people = sum(temp_df["people"]) + count_people print("count_people=",count_people) if sum(temp_index) != 0: data3.drop(index=temp_df.index,inplace=True) # 删除指定索引的行 print("data3.shape=",data3.shape) print("percent=",count_people/sum(data["people"])) # 0.7277552800496253