問題
做了個測試板子的程序,里面有一項寫鐵電的功能,要求寫入之后立即斷電,重啟后校驗數據准確性;鐵電設計是通過內存地址直接映射的,於是,使用mmap直接映射了/dev/mem文件,自然地寫入之后使用msync進行同步,最后使用munmap解映射;
然而,當我運行這段程序的時候,發現msync的MS_SYNC選項進行同步的時候會返回錯誤,錯誤碼是EINVAL;這就奇怪了;
查原因
1. 查看MAN手冊,如下:當地址不是頁的整數倍,或者參數傳遞錯誤時才返回這個結果;
1 EINVAL addr is not a multiple of PAGESIZE; or any bit other than MS_ASYNC | MS_INVALIDATE | MS_SYNC is set in flags; or both MS_SYNC 2 and MS_ASYNC are set in flags.
反復驗證,發現地址沒問題,而且將MS_SYNC換成MS_ASYNC就沒問題了,所以懷疑是內核不支持這個同步選項;為了求證,查看內核代碼:
2. sys_msync這個系統調用,在校驗參數時,如果不合法會返回-EINVAL,這點如上述MAN手冊所描述;
1 asmlinkage long sys_msync(unsigned long start, size_t len, int flags) 2 { 3 unsigned long end; 4 struct mm_struct *mm = current->mm; 5 struct vm_area_struct *vma; 6 int unmapped_error = 0; 7 int error = -EINVAL; 8 9 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) 10 goto out; 11 if (start & ~PAGE_MASK) 12 goto out; 13 if ((flags & MS_ASYNC) && (flags & MS_SYNC)) 14 goto out; 15 .... 16 }
3. 繼續往下看代碼,有這么一句,如果有MS_SYNC標記的話,會執行do_fsync(),出錯會返回error;
1 asmlinkage long sys_msync(unsigned long start, size_t len, int flags) 2 { 3 ... 4 if ((flags & MS_SYNC) && file && 5 (vma->vm_flags & VM_SHARED)) { 6 get_file(file); 7 up_read(&mm->mmap_sem); 8 error = do_fsync(file, 0); 9 fput(file); 10 if (error || start >= end) 11 goto out; 12 down_read(&mm->mmap_sem); 13 vma = find_vma(mm, start); 14 } else { 15 if (start >= end) { 16 error = 0; 17 goto out_unlock; 18 } 19 vma = vma->vm_next; 20 } 21 } 22 out_unlock: 23 up_read(&mm->mmap_sem); 24 out: 25 return error ? : unmapped_error; 26 }
4. 在do_fsync函數中,會對file_operations和里面的fsync函數做校驗,如果沒有,則返回-EINVAL,基本上可以確定,正是因為該文件沒有實現file_operations里面的fsync函數,所以返回參數錯誤了;
1 long do_fsync(struct file *file, int datasync) 2 { 3 int ret; 4 int err; 5 struct address_space *mapping = file->f_mapping; 6 7 if (!file->f_op || !file->f_op->fsync) { 8 /* Why? We can still call filemap_fdatawrite */ 9 ret = -EINVAL; 10 goto out; 11 } 12 13 ret = filemap_fdatawrite(mapping); 14 15 /* 16 * We need to protect against concurrent writers, which could cause 17 * livelocks in fsync_buffers_list(). 18 */ 19 mutex_lock(&mapping->host->i_mutex); 20 err = file->f_op->fsync(file, file->f_path.dentry, datasync); 21 if (!ret) 22 ret = err; 23 mutex_unlock(&mapping->host->i_mutex); 24 err = filemap_fdatawait(mapping); 25 if (!ret) 26 ret = err; 27 out: 28 return ret; 29 }
5. 我們來看看內存設備是在什么時候初始化的,如下代碼,在device_create函數調用中會對一系列的內存設備進行初始化,其中包括/dev/mem;
1 static int __init chr_dev_init(void) 2 { 3 int i; 4 int err; 5 6 err = bdi_init(&zero_bdi); 7 if (err) 8 return err; 9 10 if (register_chrdev(MEM_MAJOR,"mem",&memory_fops)) 11 printk("unable to get major %d for memory devs\n", MEM_MAJOR); 12 13 mem_class = class_create(THIS_MODULE, "mem"); 14 for (i = 0; i < ARRAY_SIZE(devlist); i++) 15 device_create(mem_class, NULL, 16 MKDEV(MEM_MAJOR, devlist[i].minor), 17 devlist[i].name); 18 19 return 0; 20 }
6. 這個/dev/mem對應着一個操作函數,如下代碼中的mem_fops:
1 static const struct { 2 unsigned int minor; 3 char *name; 4 umode_t mode; 5 const struct file_operations *fops; 6 } devlist[] = { /* list of minor devices */ 7 {1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops}, 8 {2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops}, 9 {3, "null", S_IRUGO | S_IWUGO, &null_fops}, 10 #ifdef CONFIG_DEVPORT 11 {4, "port", S_IRUSR | S_IWUSR | S_IRGRP, &port_fops}, 12 #endif 13 {5, "zero", S_IRUGO | S_IWUGO, &zero_fops}, 14 {7, "full", S_IRUGO | S_IWUGO, &full_fops}, 15 {8, "random", S_IRUGO | S_IWUSR, &random_fops}, 16 {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops}, 17 {11,"kmsg", S_IRUGO | S_IWUSR, &kmsg_fops}, 18 #ifdef CONFIG_CRASH_DUMP 19 {12,"oldmem", S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops}, 20 #endif 21 };
7. 看看這個mem_fops的實現,如下,可見其並沒有實現fsync函數;
1 static const struct file_operations mem_fops = { 2 .llseek = memory_lseek, 3 .read = read_mem, 4 .write = write_mem, 5 .mmap = mmap_mem, 6 .open = open_mem, 7 .get_unmapped_area = get_unmapped_area_mem, 8 };
到這,問題總算水落石出了;
8. 再來看看mmap函數的實現,里面調用了這個函數phys_mem_access_prot;
1 static int mmap_mem(struct file * file, struct vm_area_struct * vma) 2 { 3 size_t size = vma->vm_end - vma->vm_start; 4 5 if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) 6 return -EINVAL; 7 8 if (!private_mapping_ok(vma)) 9 return -ENOSYS; 10 11 vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, 12 size, 13 vma->vm_page_prot); 14 15 /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */ 16 if (remap_pfn_range(vma, 17 vma->vm_start, 18 vma->vm_pgoff, 19 size, 20 vma->vm_page_prot)) 21 return -EAGAIN; 22 return 0; 23 }
9. 上面提到的這個函數,如下,其中有個是否支持不緩存的方式判斷,uncached_access;
1 #ifndef __HAVE_PHYS_MEM_ACCESS_PROT 2 static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 3 unsigned long size, pgprot_t vma_prot) 4 { 5 #ifdef pgprot_noncached 6 unsigned long offset = pfn << PAGE_SHIFT; 7 8 if (uncached_access(file, offset)) 9 return pgprot_noncached(vma_prot); 10 #endif 11 return vma_prot; 12 } 13 #endif
10. 進入uncached_access非緩存訪問函數,可見其內部根據文件的O_SYNC選項來判斷是否支持不緩存的寫;
1 static inline int uncached_access(struct file *file, unsigned long addr) 2 { 3 #if defined(__i386__) && !defined(__arch_um__) 4 /* 5 * On the PPro and successors, the MTRRs are used to set 6 * memory types for physical addresses outside main memory, 7 * so blindly setting PCD or PWT on those pages is wrong. 8 * For Pentiums and earlier, the surround logic should disable 9 * caching for the high addresses through the KEN pin, but 10 * we maintain the tradition of paranoia in this code. 11 */ 12 if (file->f_flags & O_SYNC) 13 return 1; 14 return !( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || 15 test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || 16 test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || 17 test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability) ) 18 && addr >= __pa(high_memory); 19 #elif defined(__x86_64__) && !defined(__arch_um__) 20 /* 21 * This is broken because it can generate memory type aliases, 22 * which can cause cache corruptions 23 * But it is only available for root and we have to be bug-to-bug 24 * compatible with i386. 25 */ 26 if (file->f_flags & O_SYNC) 27 return 1; 28 /* same behaviour as i386. PAT always set to cached and MTRRs control the 29 caching behaviour. 30 Hopefully a full PAT implementation will fix that soon. */ 31 return 0; 32 #elif defined(CONFIG_IA64) 33 /* 34 * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases. 35 */ 36 return !(efi_mem_attributes(addr) & EFI_MEMORY_WB); 37 #elif defined(CONFIG_MIPS) 38 { 39 extern int __uncached_access(struct file *file, 40 unsigned long addr); 41 42 return __uncached_access(file, addr); 43 } 44 #else 45 /* 46 * Accessing memory above the top the kernel knows about or through a file pointer 47 * that was marked O_SYNC will be done non-cached. 48 */ 49 if (file->f_flags & O_SYNC) 50 return 1; 51 return addr >= __pa(high_memory); 52 #endif 53 }
好了,分析完畢;
解決辦法
在打開/dev/mem時,使用如下方式,即open增加O_SYNC選項,這個選項即上面uncached_access函數使用的判斷標記,表示每次寫操作都要等到數據和文件屬性都同步到物理存儲才返回;
1 int fd = open("/dev/mem", O_RDWR|O_SYNC);
參考文章:
https://blog.csdn.net/wlp600/article/details/6893636
http://www.armadeus.org/wiki/index.php?title=FPGA_registers_access_from_Linux_userspace
https://blog.csdn.net/tiantao2012/article/details/52168383?locationNum=2&fps=1