一、场景一,每行数据带表头
现在有如下数据:
student_Id,score,rating,class_id
1,60,4,1
1,67,1.5,2
2,45,4,1
2,47,4,2
2,50,4,3
3,100,4,1
3,90,3,2
3,61,4,3
3,85,3,4
现在需求让每一行的数据都带有表头字段信息,代码如下:
import json
import pprint
from csv import DictReader
import os
def csv_to_dict(filename):
try:
with open('student.csv', 'r') as read_obj:
dict_reader = DictReader(read_obj)
list_of_dict = list(dict_reader)
result = json.dumps(list_of_dict, indent=2)
return result
except IOError as err:
print("I/O error({0})".format(err))
if __name__ == "__main__":
filename = os.path.join(os.getcwd(), 'student.csv')
result = csv_to_dict(filename)
print(result)
结果:
D:\Users\Administrator\Anaconda3\python.exe F:/testproject/appiumdedao/test04.py
[
{
"student_Id": "1",
"score": "60",
"rating": "4",
"class_id": "1"
},
{
"student_Id": "1",
"score": "67",
"rating": "1.5",
"class_id": "2"
},
{
"student_Id": "2",
"score": "45",
"rating": "4",
"class_id": "1"
},
{
"student_Id": "2",
"score": "47",
"rating": "4",
"class_id": "2"
},
{
"student_Id": "2",
"score": "50",
"rating": "4",
"class_id": "3"
},
{
"student_Id": "3",
"score": "100",
"rating": "4",
"class_id": "1"
},
{
"student_Id": "3",
"score": "90",
"rating": "3",
"class_id": "2"
},
{
"student_Id": "3",
"score": "61",
"rating": "4",
"class_id": "3"
},
{
"student_Id": "3",
"score": "85",
"rating": "3",
"class_id": "4"
}
]
二、场景二,输出行号及表头
现在有如下数据:
student_Id,score,rating,class_id
1,60,4,1
1,67,1.5,2
2,45,4,1
2,47,4,2
2,50,4,3
3,100,4,1
3,90,3,2
3,61,4,3
3,85,3,4
显示每行行号,并带表头信息,代码如下:
import json
import os
import pprint
def csv_to_dict(filename):
try:
with open(filename, 'r') as file:
header, *lines = file.readlines() # 读取文件数据(包含第一行列名)
header = header.split(",") # 第一行列名
header = [i.strip() for i in header] # 格式化
lines = [i.strip() for i in lines]
result = {}
for counter, line in enumerate(lines):
line_dict = {}
for idx, item in enumerate(line.split(",")):
line_dict[header[idx]] = item
result[str(counter)] = line_dict
return result
except IOError as err:
print("I/O error({0})".format(err))
if __name__ == "__main__":
filename = os.path.join(os.getcwd(), 'student.csv')
result = csv_to_dict(filename)
# pprint.pprint(result)
print(json.dumps(result,indent=2))
结果:
{
"0": {
"student_Id": "1",
"score": "60",
"rating": "4",
"class_id": "1"
},
"1": {
"student_Id": "1",
"score": "67",
"rating": "1.5",
"class_id": "2"
},
"2": {
"student_Id": "2",
"score": "45",
"rating": "4",
"class_id": "1"
},
"3": {
"student_Id": "2",
"score": "47",
"rating": "4",
"class_id": "2"
},
"4": {
"student_Id": "2",
"score": "50",
"rating": "4",
"class_id": "3"
},
"5": {
"student_Id": "3",
"score": "100",
"rating": "4",
"class_id": "1"
},
"6": {
"student_Id": "3",
"score": "90",
"rating": "3",
"class_id": "2"
},
"7": {
"student_Id": "3",
"score": "61",
"rating": "4",
"class_id": "3"
},
"8": {
"student_Id": "3",
"score": "85",
"rating": "3",
"class_id": "4"
}
}
三、场景三,按某列表头规类
现在有如下数据:
userId,movieId,rating
1,16,4
1,24,1.5
2,32,4
2,47,4
2,50,4
3,110,4
3,150,3
3,161,4
3,165,3
现在需求获得每个用户下,观看的电影及其评分,代码如下:
import csv
class AutoVivification(dict):
def __getitem__(self, item):
try:
return dict.__getitem__(self, item)
except KeyError:
value = self[item] = type(self)()
return value
def main():
d = AutoVivification()
filename = 'student.csv'
with open(filename, 'r') as f:
reader = csv.reader(f, delimiter=',')
next(reader)
for row in reader:
d[row[0]][row[1]] = row[2]
print(d)
if __name__ == '__main__':
main()
结果:
{'1': {'16': '4', '24': '1.5'}, '2': {'32': '4', '47': '4', '50': '4'}, '3': {'110': '4', '150': '3', '161': '4', '165': '3'}}
四、场景四,按列规类
现在有如下数据:
student_Id,score,rating,class_id
1,60,4,1
1,67,1.5,2
2,45,4,1
2,47,4,2
2,50,4,3
3,100,4,1
3,90,3,2
3,61,4,3
3,85,3,4
现在需求获取分数列下的数据,代码如下:
import json
import pprint
import pandas as pd
data = pd.read_csv("student.csv")
# data_dict = {col: list(data[col]) for col in data.columns}
#与字典生成式功能一致
data_dict = {}
for col in data.columns:
data_dict[col] = list(data[col])
# print(json.dumps(data_dict))
pprint.pprint(data_dict)
结果:
{'class_id': [1, 2, 1, 2, 3, 1, 2, 3, 4],
'rating': [4.0, 1.5, 4.0, 4.0, 4.0, 4.0, 3.0, 4.0, 3.0],
'score': [60, 67, 45, 47, 50, 100, 90, 61, 85],
'student_Id': [1, 1, 2, 2, 2, 3, 3, 3, 3]}