Python:將CSV格式轉化為字典


一、場景一,每行數據帶表頭

現在有如下數據:

student_Id,score,rating,class_id
1,60,4,1
1,67,1.5,2
2,45,4,1
2,47,4,2
2,50,4,3
3,100,4,1
3,90,3,2
3,61,4,3
3,85,3,4

現在需求讓每一行的數據都帶有表頭字段信息,代碼如下:

import json
import pprint
from csv import DictReader
import os

def csv_to_dict(filename):
    try:
        with open('student.csv', 'r') as read_obj:
            dict_reader = DictReader(read_obj)
            list_of_dict = list(dict_reader)
            result = json.dumps(list_of_dict, indent=2)
        return result
    except IOError as err:
        print("I/O error({0})".format(err))

if __name__ == "__main__":
    filename = os.path.join(os.getcwd(), 'student.csv')
    result = csv_to_dict(filename)
    print(result)

結果:

D:\Users\Administrator\Anaconda3\python.exe F:/testproject/appiumdedao/test04.py
[
  {
    "student_Id": "1",
    "score": "60",
    "rating": "4",
    "class_id": "1"
  },
  {
    "student_Id": "1",
    "score": "67",
    "rating": "1.5",
    "class_id": "2"
  },
  {
    "student_Id": "2",
    "score": "45",
    "rating": "4",
    "class_id": "1"
  },
  {
    "student_Id": "2",
    "score": "47",
    "rating": "4",
    "class_id": "2"
  },
  {
    "student_Id": "2",
    "score": "50",
    "rating": "4",
    "class_id": "3"
  },
  {
    "student_Id": "3",
    "score": "100",
    "rating": "4",
    "class_id": "1"
  },
  {
    "student_Id": "3",
    "score": "90",
    "rating": "3",
    "class_id": "2"
  },
  {
    "student_Id": "3",
    "score": "61",
    "rating": "4",
    "class_id": "3"
  },
  {
    "student_Id": "3",
    "score": "85",
    "rating": "3",
    "class_id": "4"
  }
]

二、場景二,輸出行號及表頭

現在有如下數據:

student_Id,score,rating,class_id
1,60,4,1
1,67,1.5,2
2,45,4,1
2,47,4,2
2,50,4,3
3,100,4,1
3,90,3,2
3,61,4,3
3,85,3,4

顯示每行行號,並帶表頭信息,代碼如下:

import json
import os
import pprint


def csv_to_dict(filename):
    try:
        with open(filename, 'r') as file:
            header, *lines = file.readlines()  # 讀取文件數據(包含第一行列名)
            header = header.split(",")  # 第一行列名
            header = [i.strip() for i in header]  # 格式化
            lines = [i.strip() for i in lines]
            result = {}
            for counter, line in enumerate(lines):
                line_dict = {}
                for idx, item in enumerate(line.split(",")):
                    line_dict[header[idx]] = item
                result[str(counter)] = line_dict
            return result
    except IOError as err:
        print("I/O error({0})".format(err))


if __name__ == "__main__":
    filename = os.path.join(os.getcwd(), 'student.csv')
    result = csv_to_dict(filename)
    # pprint.pprint(result)
    print(json.dumps(result,indent=2))

結果:

{
  "0": {
    "student_Id": "1",
    "score": "60",
    "rating": "4",
    "class_id": "1"
  },
  "1": {
    "student_Id": "1",
    "score": "67",
    "rating": "1.5",
    "class_id": "2"
  },
  "2": {
    "student_Id": "2",
    "score": "45",
    "rating": "4",
    "class_id": "1"
  },
  "3": {
    "student_Id": "2",
    "score": "47",
    "rating": "4",
    "class_id": "2"
  },
  "4": {
    "student_Id": "2",
    "score": "50",
    "rating": "4",
    "class_id": "3"
  },
  "5": {
    "student_Id": "3",
    "score": "100",
    "rating": "4",
    "class_id": "1"
  },
  "6": {
    "student_Id": "3",
    "score": "90",
    "rating": "3",
    "class_id": "2"
  },
  "7": {
    "student_Id": "3",
    "score": "61",
    "rating": "4",
    "class_id": "3"
  },
  "8": {
    "student_Id": "3",
    "score": "85",
    "rating": "3",
    "class_id": "4"
  }
}

三、場景三,按某列表頭規類

現在有如下數據:

userId,movieId,rating
1,16,4
1,24,1.5
2,32,4
2,47,4
2,50,4
3,110,4
3,150,3
3,161,4
3,165,3

現在需求獲得每個用戶下,觀看的電影及其評分,代碼如下:

import csv

class AutoVivification(dict):
    def __getitem__(self, item):
        try:
            return dict.__getitem__(self, item)
        except KeyError:
            value = self[item] = type(self)()
            return value

def main():
    d = AutoVivification()
    filename = 'student.csv'
    with open(filename, 'r') as f:
        reader = csv.reader(f, delimiter=',')
        next(reader)
        for row in reader:
            d[row[0]][row[1]] = row[2]

    print(d)


if __name__ == '__main__':
    main()

結果:

{'1': {'16': '4', '24': '1.5'}, '2': {'32': '4', '47': '4', '50': '4'}, '3': {'110': '4', '150': '3', '161': '4', '165': '3'}}

四、場景四,按列規類

現在有如下數據:

student_Id,score,rating,class_id
1,60,4,1
1,67,1.5,2
2,45,4,1
2,47,4,2
2,50,4,3
3,100,4,1
3,90,3,2
3,61,4,3
3,85,3,4

現在需求獲取分數列下的數據,代碼如下:

import json
import pprint
import pandas as pd

data = pd.read_csv("student.csv")
# data_dict = {col: list(data[col]) for col in data.columns}

#與字典生成式功能一致
data_dict = {}
for col in data.columns:
    data_dict[col] = list(data[col])

# print(json.dumps(data_dict))
pprint.pprint(data_dict)

結果:

{'class_id': [1, 2, 1, 2, 3, 1, 2, 3, 4],
 'rating': [4.0, 1.5, 4.0, 4.0, 4.0, 4.0, 3.0, 4.0, 3.0],
 'score': [60, 67, 45, 47, 50, 100, 90, 61, 85],
 'student_Id': [1, 1, 2, 2, 2, 3, 3, 3, 3]}


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM