kgsl_ioctl_gpumem_alloc

Posted bubbleben

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了kgsl_ioctl_gpumem_alloc相关的知识,希望对你有一定的参考价值。

static const struct kgsl_ioctl kgsl_ioctl_funcs[] = 
    ...
    // ioctl命令:IOCTL_KGSL_GPUMEM_ALLOC
    // ioctl函数:kgsl_ioctl_gpumem_alloc
	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC,
			kgsl_ioctl_gpumem_alloc),
    ...

1. kgsl_gpumem_alloc

struct kgsl_gpumem_alloc 
    // 返回值:GPU内存地址
	unsigned long gpuaddr; /* output param */
    // 申请的内存大小
	__kernel_size_t size;
    // 标志位
	unsigned int flags;
;

// ioctl参数:kgsl_gpumem_alloc
#define IOCTL_KGSL_GPUMEM_ALLOC \\
	_IOWR(KGSL_IOC_TYPE, 0x2f, struct kgsl_gpumem_alloc)

2. kgsl_ioctl_gpumem_alloc

long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv,
		unsigned int cmd, void *data)

	struct kgsl_device *device = dev_priv->device;
    // ioctl命令参数
	struct kgsl_gpumem_alloc *param = data;
    
    // kgsl_mem_entry用于描述用户空间的内存分配[见2.1节]
	struct kgsl_mem_entry *entry;
	uint64_t flags = param->flags;

	/*
	 * On 64 bit kernel, secure memory region is expanded and
	 * moved to 64 bit address, 32 bit apps can not access it from
	 * this IOCTL.
	 */
	if ((param->flags & KGSL_MEMFLAGS_SECURE) && is_compat_task()
			&& test_bit(KGSL_MMU_64BIT, &device->mmu.features))
		return -EOPNOTSUPP;

	/* Legacy functions doesn't support these advanced features */
	flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);

	if (is_compat_task())
		flags |= KGSL_MEMFLAGS_FORCE_32BIT;

    // 创建kgsl_mem_entry[见2.2节]
	entry = gpumem_alloc_entry(dev_priv, (uint64_t) param->size, flags);

	if (IS_ERR(entry))
		return PTR_ERR(entry);

    // 更新参数
	param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr;
	param->size = (size_t) entry->memdesc.size;
	param->flags = (unsigned int) entry->memdesc.flags;

	/* Put the extra ref from kgsl_mem_entry_create() */
    // 减少引用计数, 如果引用计数减为0则通过kgsl_mem_entry_destroy释放kgsl_mem_entry
	kgsl_mem_entry_put(entry);

	return 0;

2.1 kgsl_mem_entry

/*
 * struct kgsl_mem_entry - a userspace memory allocation
 */
struct kgsl_mem_entry 
    // Currently userspace can only hold a single reference count but the kernel may hold more
	struct kref refcount;
    // description of the memory[见2.1.1节]
	struct kgsl_memdesc memdesc;
    // type-specific data, such as the dma-buf attachment pointer
	void *priv_data;
    // rb_node for the gpu address lookup rb tree
	struct rb_node node;
    // idr index for this entry, can be used to find memory that does not have a valid GPU address
	unsigned int id;
    // 持有该内存的进程
	struct kgsl_process_private *priv;
    // if !0, userspace requested that his memory be freed, but there are still references to it
	int pending_free;
    // String containing user specified metadata for the entry
	char metadata[KGSL_GPUOBJ_ALLOC_METADATA_MAX + 1];
    // used to schedule a kgsl_mem_entry_put in atomic contexts
	struct work_struct work;
	/**
	 * @map_count: Count how many vmas this object is mapped in - used for
	 * debugfs accounting
	 */
    // 映射的VMA数量
	atomic_t map_count;
;

2.1.1 kgsl_memdesc

/**
 * struct kgsl_memdesc - GPU memory object descriptor
 */
struct kgsl_memdesc 
    // Pointer to the pagetable that the object is mapped in
	struct kgsl_pagetable *pagetable;
    // Kernel virtual address
	void *hostptr;
    // Number of threads using hostptr
	unsigned int hostptr_count;
    // GPU virtual address
	uint64_t gpuaddr;
    // Physical address of the memory object
	phys_addr_t physaddr;
    // Size of the memory object
	uint64_t size;
    // Internal flags and settings
	unsigned int priv;
	struct sg_table *sgt;
    // Function hooks for the memdesc memory type[见2.1.2节]
	const struct kgsl_memdesc_ops *ops;
    // Flags set from userspace
	uint64_t flags;
	struct device *dev;
    // dma attributes for this memory
	unsigned long attrs;
    // An array of pointers to allocated pages
	struct page **pages;
    // Total number of pages allocated
	unsigned int page_count;
	/*
	 * @lock: Spinlock to protect the gpuaddr from being accessed by
	 * multiple entities trying to map the same SVM region at once
	 */
	spinlock_t lock;
	/** @shmem_filp: Pointer to the shmem file backing this memdesc */
    // 共享内存的文件
	struct file *shmem_filp;
	/** @ranges: rbtree base for the interval list of vbo ranges */
	struct rb_root_cached ranges;
	/** @ranges_lock: Mutex to protect the range database */
	struct mutex ranges_lock;
	/** @gmuaddr: GMU VA if this is mapped in GMU */
	u32 gmuaddr;
;

2.1.2 kgsl_memdesc_ops

struct kgsl_memdesc_ops 
	unsigned int vmflags;
	vm_fault_t (*vmfault)(struct kgsl_memdesc *memdesc,
		struct vm_area_struct *vma, struct vm_fault *vmf);
	void (*free)(struct kgsl_memdesc *memdesc);
	int (*map_kernel)(struct kgsl_memdesc *memdesc);
	void (*unmap_kernel)(struct kgsl_memdesc *memdesc);
	/**
	 * @put_gpuaddr: Put away the GPU address and unmap the memory
	 * descriptor
	 */
	void (*put_gpuaddr)(struct kgsl_memdesc *memdesc);
;

2.2 gpumem_alloc_entry

struct kgsl_mem_entry *gpumem_alloc_entry(
		struct kgsl_device_private *dev_priv,
		uint64_t size, uint64_t flags)

	int ret;
	struct kgsl_process_private *private = dev_priv->process_priv;
	struct kgsl_mem_entry *entry;
	struct kgsl_device *device = dev_priv->device;
	u32 cachemode;

	/* For 32-bit kernel world nothing to do with this flag */
	if (BITS_PER_LONG == 32)
		flags &= ~((uint64_t) KGSL_MEMFLAGS_FORCE_32BIT);

	if (flags & KGSL_MEMFLAGS_VBO)
		return gpumem_alloc_vbo_entry(dev_priv, size, flags);

	flags &= KGSL_MEMFLAGS_GPUREADONLY
		| KGSL_CACHEMODE_MASK
		| KGSL_MEMTYPE_MASK
		| KGSL_MEMALIGN_MASK
		| KGSL_MEMFLAGS_USE_CPU_MAP
		| KGSL_MEMFLAGS_SECURE
		| KGSL_MEMFLAGS_FORCE_32BIT
		| KGSL_MEMFLAGS_IOCOHERENT
		| KGSL_MEMFLAGS_GUARD_PAGE;

	/* Return not supported error if secure memory isn't enabled */
	if ((flags & KGSL_MEMFLAGS_SECURE) && !check_and_warn_secured(device))
		return ERR_PTR(-EOPNOTSUPP);

	flags = cap_alignment(device, flags);

	/* For now only allow allocations up to 4G */
	if (size == 0 || size > UINT_MAX)
		return ERR_PTR(-EINVAL);

    // 更新缓存策略
	flags = kgsl_filter_cachemode(flags);

    // 前面主要完成标志位的校验和更新
    // 这里开始创建kgsl_mem_entry[见2.2.1节]
	entry = kgsl_mem_entry_create();
	if (entry == NULL)
		return ERR_PTR(-ENOMEM);

    // 根据标志位判断是否是cached buffer
	if (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT) &&
		kgsl_cachemode_is_cached(flags))
		flags |= KGSL_MEMFLAGS_IOCOHERENT;

    // 用户空间分配[2.2.2节]
	ret = kgsl_allocate_user(device, &entry->memdesc,
		size, flags, 0);
	if (ret != 0)
		goto err;

    // 绑定映射[2.2.7节]
	ret = kgsl_mem_entry_attach_and_map(device, private, entry);
	if (ret != 0) 
		kgsl_sharedmem_free(&entry->memdesc);
		goto err;
	

    // 获取缓存模式
	cachemode = kgsl_memdesc_get_cachemode(&entry->memdesc);
	/*
	 * Secure buffers cannot be reclaimed. Avoid reclaim of cached buffers
	 * as we could get request for cache operations on these buffers when
	 * they are reclaimed.
	 */
    // 确认memdesc的pages是否能够直接回收
	if (!(flags & KGSL_MEMFLAGS_SECURE) &&
			!(cachemode == KGSL_CACHEMODE_WRITEBACK) &&
			!(cachemode == KGSL_CACHEMODE_WRITETHROUGH))
		entry->memdesc.priv |= KGSL_MEMDESC_CAN_RECLAIM;

    // 首先确定kgsl_memdesc的buffer类型
    // 然后将其大小统计进kgsl_process_private的stats数组
	kgsl_process_add_stats(private,
			kgsl_memdesc_usermem_type(&entry->memdesc),
			entry->memdesc.size);
	trace_kgsl_mem_alloc(entry);

    // 将kgsl_mem_entry提交到kgsl_process_private, 以便其他操作也能够访问
	kgsl_mem_entry_commit_process(entry);
	return entry;
err:
	kfree(entry);
	return ERR_PTR(ret);

2.2.1 kgsl_mem_entry_create

static struct kgsl_mem_entry *kgsl_mem_entry_create(void)

    // 创建kgsl_mem_entry
	struct kgsl_mem_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);

	if (entry != NULL) 
        // 初始化kgsl_mem_entry引用计数为1
		kref_init(&entry->refcount);
		/* put this ref in userspace memory alloc and map ioctls */
        // 引用计数加1
		kref_get(&entry->refcount);
        // 初始化映射的VMA数量为0
		atomic_set(&entry->map_count, 0);
	

	return entry;

2.2.2 kgsl_allocate_user

enum kgsl_mmutype 
    // 支持IOMMU
	KGSL_MMU_TYPE_IOMMU = 0,
	KGSL_MMU_TYPE_NONE
;

int kgsl_allocate_user(struct kgsl_device *device, struct kgsl_memdesc *memdesc,
		u64 size, u64 flags, u32 priv)

    // 如果不支持IOMMU, 则需要分配连续内存
	if (device->mmu.type == KGSL_MMU_TYPE_NONE)
		return kgsl_alloc_contiguous(device, memdesc, size, flags,
			priv);
	else if (flags & KGSL_MEMFLAGS_SECURE)
		return kgsl_allocate_secure(device, memdesc, size, flags, priv);

    // 页面分配[见2.2.3节]
	return kgsl_alloc_pages(device, memdesc, size, flags, priv);

2.2.3 kgsl_alloc_pages

static int kgsl_alloc_pages(struct kgsl_device *device,
		struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv)

	struct page **pages;
	int count;

    // size大小对齐
	size = PAGE_ALIGN(size);

    // 判断size大小有效性
	if (!size || size > UINT_MAX)
		return -EINVAL;

    // 根据标志位初始化kgsl_memdesc[见2.2.4节]
	kgsl_memdesc_init(device, memdesc, flags);
    // 传入的priv为0
	memdesc->priv |= priv;

    // #define KGSL_MEMDESC_SYSMEM BIT(9)
	if (priv & KGSL_MEMDESC_SYSMEM) 
		memdesc->ops = &kgsl_system_ops;
		count = kgsl_system_alloc_pages(size, &pages, device->dev);
	 else 
        // 设置kgsl_memdesc的kgsl_memdesc_ops实现[2.2.5节]
		memdesc->ops = &kgsl_page_ops;
        // 分配页面并返回分配的page[2.2.6节]
		count = _kgsl_alloc_pages(memdesc, size, &pages, device->dev);
	

	if (count < 0)
		return count;

    // 页面指针
	memdesc->pages = pages;
    // 内存大小
	memdesc->size = size;
    // 页面数量
	memdesc->page_count = count;

    // 更新全局的kgsl的内存统计: 将申请的内存大小统计进kgsl_driver的stats结构体page_alloc成员
	KGSL_STATS_ADD(size, &kgsl_driver.stats.page_alloc,
		&kgsl_driver.stats.page_alloc_max);

	return 0;

2.2.4 kgsl_memdesc_init

void kgsl_memdesc_init(struct kgsl_device *device,
			struct kgsl_memdesc *memdesc, uint64_t flags)

	struct kgsl_mmu *mmu = &device->mmu;
	unsigned int align;

    // 初始化kgsl_memdesc
	memset(memdesc, 0, sizeof(*memdesc));
	/* Turn off SVM if the system doesn't support it */
    // 判断是否支持KGSL_MMU_IOPGTABLE
	if (!kgsl_mmu_is_perprocess(mmu))
		flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);

	/* Secure memory disables advanced addressing modes */
	if (flags & KGSL_MEMFLAGS_SECURE)
		flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);

	/* Disable IO coherence if it is not supported on the chip */
    // 判断是否支持I/O coherency
	if (!kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT)) 
		flags &= ~((uint64_t) KGSL_MEMFLAGS_IOCOHERENT);

		WARN_ONCE(IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT),
			"I/O coherency is not supported on this target\\n");
	 else if (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT))
		flags |= KGSL_MEMFLAGS_IOCOHERENT;

	/*
	 * We can't enable I/O coherency on uncached surfaces because of
	 * situations where hardware might snoop the cpu caches which can
	 * have stale data. This happens primarily due to the limitations
	 * of dma caching APIs available on arm64
	 */
	if (!kgsl_cachemode_is_cached(flags))
		flags &= ~((u64) KGSL_MEMFLAGS_IOCOHERENT);

	if (kgsl_mmu_has_feature(device, KGSL_MMU_NEED_GUARD_PAGE) ||
		(flags & KGSL_MEMFLAGS_GUARD_PAGE))
		memdesc->priv |= KGSL_MEMDESC_GUARD_PAGE;

	if (flags & KGSL_MEMFLAGS_SECURE)
		memdesc->priv |= KGSL_MEMDESC_SECURE;

    // 设置标志位
	memdesc->flags = flags;
    // 设置持有该内存的device
	memdesc->dev = &device->pdev->dev;

    // 对齐
	align = max_t(unsigned int,
		kgsl_memdesc_get_align(memdesc), ilog2(PAGE_SIZE));
    // 设置kgsl_memdesc的对齐标志位
	kgsl_memdesc_set_align(memdesc, align);

	spin_lock_init(&memdesc->lock);

2.2.5 kgsl_page_ops

static const struct kgsl_memdesc_ops kgsl_page_ops = 
	.free = kgsl_free_pages,
	.vmflags = VM_DONTDUMP | VM_DONTEXPAND | VM_DONTCOPY | VM_MIXEDMAP,
	.vmfault = kgsl_paged_vmfault,
	.map_kernel = kgsl_paged_map_kernel,
	.unmap_kernel = kgsl_paged_unmap_kernel,
	.put_gpuaddr = kgsl_unmap_and_put_gpuaddr,
;

2.2.6 _kgsl_alloc_pages

static int _kgsl_alloc_pages(struct kgsl_memdesc *memdesc,
		u64 size, struct page ***pages, struct device *dev)

	int count = 0;
    // 将内存大小转换为页面数量
	int npages = size >> PAGE_SHIFT;
    // attempt to allocate physically contiguous memory by kmalloc
    // but upon failure, fall back to non-contiguous (vmalloc) allocation
	struct page **local = kvcalloc(npages, sizeof(*local), GFP_KERNEL);
	u32 page_size, align;
	u64 len = size;

	if (!local)
		return -ENOMEM;

    // 共享内存设置成功或者未配置CONFIG_QCOM_KGSL_USE_SHMEM则返回0[见2.2.6.1节]
	count = kgsl_memdesc_file_setup(memdesc, size);
	if (count) 
		kvfree(local);
		return count;
	

	/* Start with 1MB alignment to get the biggest page we can */
	align = ilog2(SZ_1M);

    // 根据内存大小计算页面大小
	page_size = kgsl_get_page_size(len, align);

	while (len) 
        // 调用kgsl_pool_alloc_page分配, 并将获取的page通过local数组返回
		int ret = kgsl_alloc_page(&page_size, &local[count],
			npages, &align, count, memdesc->shmem_filp, dev);

		if (ret == -EAGAIN)
			continue;
		else if (ret <= 0) 
			int i;

			for (i = 0; i < count; ) 
				int n = 1 << compound_order(local[i]);

				kgsl_free_page(local[i]);
				i += n;
			
			kvfree(local);

			if (!kgsl_sharedmem_noretry_flag)
				pr_err_ratelimited("kgsl: out of memory: only allocated %lldKb of %lldKb requested\\n",
					(size - len) >> 10, size >> 10);

			if (memdesc->shmem_filp)
				fput(memdesc->shmem_filp);

			return -ENOMEM;
		

		count += ret;
		npages -= ret;
		len -= page_size;

		page_size = kgsl_get_page_size(len, align);
	

    // pages作为返回值
	*pages = local;

	return count;

2.2.6.1 kgsl_memdesc_file_setup

// 配置kgsl使用共享内存
#ifdef CONFIG_QCOM_KGSL_USE_SHMEM
static int kgsl_

以上是关于kgsl_ioctl_gpumem_alloc的主要内容,如果未能解决你的问题,请参考以下文章

adreno源码系列私有内存申请

❤️数据结构入门❤️(4 - 7)- 基数排序

我应该在基数排序中使用哪个基数?以及如何在基数之间转换?

基数排序和更改基数

基数排序与基数排序

基数排序:基数排序中的“组”是啥意思?