一、場景一,每行數據帶表頭
現在有如下數據:
student_Id,score,rating,class_id
1,60,4,1
1,67,1.5,2
2,45,4,1
2,47,4,2
2,50,4,3
3,100,4,1
3,90,3,2
3,61,4,3
3,85,3,4
現在需求讓每一行的數據都帶有表頭字段信息,代碼如下:
import json
import pprint
from csv import DictReader
import os
def csv_to_dict(filename):
try:
with open('student.csv', 'r') as read_obj:
dict_reader = DictReader(read_obj)
list_of_dict = list(dict_reader)
result = json.dumps(list_of_dict, indent=2)
return result
except IOError as err:
print("I/O error({0})".format(err))
if __name__ == "__main__":
filename = os.path.join(os.getcwd(), 'student.csv')
result = csv_to_dict(filename)
print(result)
結果:
D:\Users\Administrator\Anaconda3\python.exe F:/testproject/appiumdedao/test04.py
[
{
"student_Id": "1",
"score": "60",
"rating": "4",
"class_id": "1"
},
{
"student_Id": "1",
"score": "67",
"rating": "1.5",
"class_id": "2"
},
{
"student_Id": "2",
"score": "45",
"rating": "4",
"class_id": "1"
},
{
"student_Id": "2",
"score": "47",
"rating": "4",
"class_id": "2"
},
{
"student_Id": "2",
"score": "50",
"rating": "4",
"class_id": "3"
},
{
"student_Id": "3",
"score": "100",
"rating": "4",
"class_id": "1"
},
{
"student_Id": "3",
"score": "90",
"rating": "3",
"class_id": "2"
},
{
"student_Id": "3",
"score": "61",
"rating": "4",
"class_id": "3"
},
{
"student_Id": "3",
"score": "85",
"rating": "3",
"class_id": "4"
}
]
二、場景二,輸出行號及表頭
現在有如下數據:
student_Id,score,rating,class_id
1,60,4,1
1,67,1.5,2
2,45,4,1
2,47,4,2
2,50,4,3
3,100,4,1
3,90,3,2
3,61,4,3
3,85,3,4
顯示每行行號,並帶表頭信息,代碼如下:
import json
import os
import pprint
def csv_to_dict(filename):
try:
with open(filename, 'r') as file:
header, *lines = file.readlines() # 讀取文件數據(包含第一行列名)
header = header.split(",") # 第一行列名
header = [i.strip() for i in header] # 格式化
lines = [i.strip() for i in lines]
result = {}
for counter, line in enumerate(lines):
line_dict = {}
for idx, item in enumerate(line.split(",")):
line_dict[header[idx]] = item
result[str(counter)] = line_dict
return result
except IOError as err:
print("I/O error({0})".format(err))
if __name__ == "__main__":
filename = os.path.join(os.getcwd(), 'student.csv')
result = csv_to_dict(filename)
# pprint.pprint(result)
print(json.dumps(result,indent=2))
結果:
{
"0": {
"student_Id": "1",
"score": "60",
"rating": "4",
"class_id": "1"
},
"1": {
"student_Id": "1",
"score": "67",
"rating": "1.5",
"class_id": "2"
},
"2": {
"student_Id": "2",
"score": "45",
"rating": "4",
"class_id": "1"
},
"3": {
"student_Id": "2",
"score": "47",
"rating": "4",
"class_id": "2"
},
"4": {
"student_Id": "2",
"score": "50",
"rating": "4",
"class_id": "3"
},
"5": {
"student_Id": "3",
"score": "100",
"rating": "4",
"class_id": "1"
},
"6": {
"student_Id": "3",
"score": "90",
"rating": "3",
"class_id": "2"
},
"7": {
"student_Id": "3",
"score": "61",
"rating": "4",
"class_id": "3"
},
"8": {
"student_Id": "3",
"score": "85",
"rating": "3",
"class_id": "4"
}
}
三、場景三,按某列表頭規類
現在有如下數據:
userId,movieId,rating
1,16,4
1,24,1.5
2,32,4
2,47,4
2,50,4
3,110,4
3,150,3
3,161,4
3,165,3
現在需求獲得每個用戶下,觀看的電影及其評分,代碼如下:
import csv
class AutoVivification(dict):
def __getitem__(self, item):
try:
return dict.__getitem__(self, item)
except KeyError:
value = self[item] = type(self)()
return value
def main():
d = AutoVivification()
filename = 'student.csv'
with open(filename, 'r') as f:
reader = csv.reader(f, delimiter=',')
next(reader)
for row in reader:
d[row[0]][row[1]] = row[2]
print(d)
if __name__ == '__main__':
main()
結果:
{'1': {'16': '4', '24': '1.5'}, '2': {'32': '4', '47': '4', '50': '4'}, '3': {'110': '4', '150': '3', '161': '4', '165': '3'}}
四、場景四,按列規類
現在有如下數據:
student_Id,score,rating,class_id
1,60,4,1
1,67,1.5,2
2,45,4,1
2,47,4,2
2,50,4,3
3,100,4,1
3,90,3,2
3,61,4,3
3,85,3,4
現在需求獲取分數列下的數據,代碼如下:
import json
import pprint
import pandas as pd
data = pd.read_csv("student.csv")
# data_dict = {col: list(data[col]) for col in data.columns}
#與字典生成式功能一致
data_dict = {}
for col in data.columns:
data_dict[col] = list(data[col])
# print(json.dumps(data_dict))
pprint.pprint(data_dict)
結果:
{'class_id': [1, 2, 1, 2, 3, 1, 2, 3, 4],
'rating': [4.0, 1.5, 4.0, 4.0, 4.0, 4.0, 3.0, 4.0, 3.0],
'score': [60, 67, 45, 47, 50, 100, 90, 61, 85],
'student_Id': [1, 1, 2, 2, 2, 3, 3, 3, 3]}