/dev/mem同步寫不能使用msync的MS_SYNC選項探究


問題

做了個測試板子的程序,里面有一項寫鐵電的功能,要求寫入之后立即斷電,重啟后校驗數據准確性;鐵電設計是通過內存地址直接映射的,於是,使用mmap直接映射了/dev/mem文件,自然地寫入之后使用msync進行同步,最后使用munmap解映射;

然而,當我運行這段程序的時候,發現msync的MS_SYNC選項進行同步的時候會返回錯誤,錯誤碼是EINVAL;這就奇怪了;

查原因

1. 查看MAN手冊,如下:當地址不是頁的整數倍,或者參數傳遞錯誤時才返回這個結果;

1 EINVAL addr  is not a multiple of PAGESIZE; or any bit other than MS_ASYNC | MS_INVALIDATE | MS_SYNC is set in flags; or both MS_SYNC
2 and MS_ASYNC are set in flags.

反復驗證,發現地址沒問題,而且將MS_SYNC換成MS_ASYNC就沒問題了,所以懷疑是內核不支持這個同步選項;為了求證,查看內核代碼:

2. sys_msync這個系統調用,在校驗參數時,如果不合法會返回-EINVAL,這點如上述MAN手冊所描述;

 1 asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
 2 {
 3     unsigned long end;
 4     struct mm_struct *mm = current->mm;
 5     struct vm_area_struct *vma;
 6     int unmapped_error = 0;
 7     int error = -EINVAL;
 8 
 9     if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
10         goto out;
11     if (start & ~PAGE_MASK)
12         goto out;
13     if ((flags & MS_ASYNC) && (flags & MS_SYNC))
14         goto out;
15         ....
16 }

3. 繼續往下看代碼,有這么一句,如果有MS_SYNC標記的話,會執行do_fsync(),出錯會返回error;

 1 asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
 2 {
 3     ...
 4         if ((flags & MS_SYNC) && file &&
 5                 (vma->vm_flags & VM_SHARED)) {
 6             get_file(file);
 7             up_read(&mm->mmap_sem);
 8             error = do_fsync(file, 0);
 9             fput(file);
10             if (error || start >= end)
11                 goto out;
12             down_read(&mm->mmap_sem);
13             vma = find_vma(mm, start);
14         } else {
15             if (start >= end) {
16                 error = 0;
17                 goto out_unlock;
18             }
19             vma = vma->vm_next;
20         }
21     }
22 out_unlock:
23     up_read(&mm->mmap_sem);
24 out:
25     return error ? : unmapped_error;
26 }

4. 在do_fsync函數中,會對file_operations和里面的fsync函數做校驗,如果沒有,則返回-EINVAL,基本上可以確定,正是因為該文件沒有實現file_operations里面的fsync函數,所以返回參數錯誤了;

 1 long do_fsync(struct file *file, int datasync)
 2 {
 3     int ret;
 4     int err;
 5     struct address_space *mapping = file->f_mapping;
 6 
 7     if (!file->f_op || !file->f_op->fsync) {
 8         /* Why?  We can still call filemap_fdatawrite */
 9         ret = -EINVAL;
10         goto out;
11     }
12 
13     ret = filemap_fdatawrite(mapping);
14 
15     /*
16      * We need to protect against concurrent writers, which could cause
17      * livelocks in fsync_buffers_list().
18      */
19     mutex_lock(&mapping->host->i_mutex);
20     err = file->f_op->fsync(file, file->f_path.dentry, datasync);
21     if (!ret)
22         ret = err;
23     mutex_unlock(&mapping->host->i_mutex);
24     err = filemap_fdatawait(mapping);
25     if (!ret)
26         ret = err;
27 out:
28     return ret;
29 }

5. 我們來看看內存設備是在什么時候初始化的,如下代碼,在device_create函數調用中會對一系列的內存設備進行初始化,其中包括/dev/mem;

 1 static int __init chr_dev_init(void)
 2 {
 3     int i;
 4     int err;
 5 
 6     err = bdi_init(&zero_bdi);
 7     if (err)
 8         return err;
 9 
10     if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
11         printk("unable to get major %d for memory devs\n", MEM_MAJOR);
12 
13     mem_class = class_create(THIS_MODULE, "mem");
14     for (i = 0; i < ARRAY_SIZE(devlist); i++)
15         device_create(mem_class, NULL,
16                   MKDEV(MEM_MAJOR, devlist[i].minor),
17                   devlist[i].name);
18 
19     return 0;
20 }

6. 這個/dev/mem對應着一個操作函數,如下代碼中的mem_fops:

 1 static const struct {
 2     unsigned int        minor;
 3     char            *name;
 4     umode_t            mode;
 5     const struct file_operations    *fops;
 6 } devlist[] = { /* list of minor devices */
 7     {1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
 8     {2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
 9     {3, "null",    S_IRUGO | S_IWUGO,           &null_fops},
10 #ifdef CONFIG_DEVPORT
11     {4, "port",    S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
12 #endif
13     {5, "zero",    S_IRUGO | S_IWUGO,           &zero_fops},
14     {7, "full",    S_IRUGO | S_IWUGO,           &full_fops},
15     {8, "random",  S_IRUGO | S_IWUSR,           &random_fops},
16     {9, "urandom", S_IRUGO | S_IWUSR,           &urandom_fops},
17     {11,"kmsg",    S_IRUGO | S_IWUSR,           &kmsg_fops},
18 #ifdef CONFIG_CRASH_DUMP
19     {12,"oldmem",    S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops},
20 #endif
21 };

7. 看看這個mem_fops的實現,如下,可見其並沒有實現fsync函數;

1 static const struct file_operations mem_fops = {
2     .llseek        = memory_lseek,
3     .read        = read_mem,
4     .write        = write_mem,
5     .mmap        = mmap_mem,
6     .open        = open_mem,
7     .get_unmapped_area = get_unmapped_area_mem,
8 };

到這,問題總算水落石出了;

8. 再來看看mmap函數的實現,里面調用了這個函數phys_mem_access_prot;

 1 static int mmap_mem(struct file * file, struct vm_area_struct * vma)
 2 {
 3     size_t size = vma->vm_end - vma->vm_start;
 4 
 5     if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
 6         return -EINVAL;
 7 
 8     if (!private_mapping_ok(vma))
 9         return -ENOSYS;
10 
11     vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
12                          size,
13                          vma->vm_page_prot);
14 
15     /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
16     if (remap_pfn_range(vma,
17                 vma->vm_start,
18                 vma->vm_pgoff,
19                 size,
20                 vma->vm_page_prot))
21         return -EAGAIN;
22     return 0;
23 }

9. 上面提到的這個函數,如下,其中有個是否支持不緩存的方式判斷,uncached_access;

 1 #ifndef __HAVE_PHYS_MEM_ACCESS_PROT
 2 static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 3                      unsigned long size, pgprot_t vma_prot)
 4 {
 5 #ifdef pgprot_noncached
 6     unsigned long offset = pfn << PAGE_SHIFT;
 7 
 8     if (uncached_access(file, offset))
 9         return pgprot_noncached(vma_prot);
10 #endif
11     return vma_prot;
12 }
13 #endif

10. 進入uncached_access非緩存訪問函數,可見其內部根據文件的O_SYNC選項來判斷是否支持不緩存的寫;

 1 static inline int uncached_access(struct file *file, unsigned long addr)
 2 {
 3 #if defined(__i386__) && !defined(__arch_um__)
 4     /*
 5      * On the PPro and successors, the MTRRs are used to set
 6      * memory types for physical addresses outside main memory,
 7      * so blindly setting PCD or PWT on those pages is wrong.
 8      * For Pentiums and earlier, the surround logic should disable
 9      * caching for the high addresses through the KEN pin, but
10      * we maintain the tradition of paranoia in this code.
11      */
12     if (file->f_flags & O_SYNC)
13         return 1;
14      return !( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
15           test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
16           test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
17           test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability) )
18       && addr >= __pa(high_memory);
19 #elif defined(__x86_64__) && !defined(__arch_um__)
20     /* 
21      * This is broken because it can generate memory type aliases,
22      * which can cause cache corruptions
23      * But it is only available for root and we have to be bug-to-bug
24      * compatible with i386.
25      */
26     if (file->f_flags & O_SYNC)
27         return 1;
28     /* same behaviour as i386. PAT always set to cached and MTRRs control the
29        caching behaviour. 
30        Hopefully a full PAT implementation will fix that soon. */       
31     return 0;
32 #elif defined(CONFIG_IA64)
33     /*
34      * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases.
35      */
36     return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
37 #elif defined(CONFIG_MIPS)
38     {
39         extern int __uncached_access(struct file *file,
40                          unsigned long addr);
41 
42         return __uncached_access(file, addr);
43     }
44 #else
45     /*
46      * Accessing memory above the top the kernel knows about or through a file pointer
47      * that was marked O_SYNC will be done non-cached.
48      */
49     if (file->f_flags & O_SYNC)
50         return 1;
51     return addr >= __pa(high_memory);
52 #endif
53 }

好了,分析完畢;

解決辦法

在打開/dev/mem時,使用如下方式,即open增加O_SYNC選項,這個選項即上面uncached_access函數使用的判斷標記,表示每次寫操作都要等到數據和文件屬性都同步到物理存儲才返回;

1 int fd = open("/dev/mem", O_RDWR|O_SYNC);

 

參考文章:

https://blog.csdn.net/wlp600/article/details/6893636

http://www.armadeus.org/wiki/index.php?title=FPGA_registers_access_from_Linux_userspace

https://stackoverflow.com/questions/20750176/how-to-get-writes-via-an-mmap-mapped-memory-pointer-to-flush-immediately

https://blog.csdn.net/tiantao2012/article/details/52168383?locationNum=2&fps=1

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM