Linux内存从0到1学习笔记(七,用户空间虚拟内存之二 - 内存空间的建立)
Posted 高桐@BILL
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Linux内存从0到1学习笔记(七,用户空间虚拟内存之二 - 内存空间的建立)相关的知识,希望对你有一定的参考价值。
在使用load_elf_binary装在一个ELF二进制文件时,将创建进程的地址空间。Linux下的exec系统调用该函数来加载ELF文件。
linux_mainline-5.17.0/fs/binfmt_elf.c
823 static int load_elf_binary(struct linux_binprm *bprm)
824
825 struct file *interpreter = NULL; /* to shut gcc up */
826 unsigned long load_addr = 0, load_bias = 0;
827 int load_addr_set = 0;
828 unsigned long error;
829 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
830 struct elf_phdr *elf_property_phdata = NULL;
831 unsigned long elf_bss, elf_brk;
832 int bss_prot = 0;
833 int retval, i;
834 unsigned long elf_entry;
835 unsigned long e_entry;
836 unsigned long interp_load_addr = 0;
837 unsigned long start_code, end_code, start_data, end_data;
838 unsigned long reloc_func_desc __maybe_unused = 0;
839 int executable_stack = EXSTACK_DEFAULT;
840 struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
841 struct elfhdr *interp_elf_ex = NULL;
842 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
843 struct mm_struct *mm;
844 struct pt_regs *regs;
845
846 retval = -ENOEXEC;
847 /* First of all, some simple consistency checks */
848 if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
849 goto out;
850
851 if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
852 goto out;
853 if (!elf_check_arch(elf_ex))
854 goto out;
855 if (elf_check_fdpic(elf_ex))
856 goto out;
857 if (!bprm->file->f_op->mmap)
858 goto out;
859
860 elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
861 if (!elf_phdata)
862 goto out;
863
864 elf_ppnt = elf_phdata;
865 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
866 char *elf_interpreter;
867
868 if (elf_ppnt->p_type == PT_GNU_PROPERTY)
869 elf_property_phdata = elf_ppnt;
870 continue;
871
872
873 if (elf_ppnt->p_type != PT_INTERP)
874 continue;
875
876 /*
877 * This is the program interpreter used for shared libraries -
878 * for now assume that this is an a.out format binary.
879 */
880 retval = -ENOEXEC;
881 if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
882 goto out_free_ph;
883
884 retval = -ENOMEM;
885 elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
886 if (!elf_interpreter)
887 goto out_free_ph;
888
889 retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
890 elf_ppnt->p_offset);
891 if (retval < 0)
892 goto out_free_interp;
893 /* make sure path is NULL terminated */
894 retval = -ENOEXEC;
895 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\\0')
896 goto out_free_interp;
897
898 interpreter = open_exec(elf_interpreter);
899 kfree(elf_interpreter);
900 retval = PTR_ERR(interpreter);
901 if (IS_ERR(interpreter))
902 goto out_free_ph;
903
904 /*
905 * If the binary is not readable then enforce mm->dumpable = 0
906 * regardless of the interpreter's permissions.
907 */
908 would_dump(bprm, interpreter);
909
910 interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
911 if (!interp_elf_ex)
912 retval = -ENOMEM;
913 goto out_free_ph;
914
915
916 /* Get the exec headers */
917 retval = elf_read(interpreter, interp_elf_ex,
918 sizeof(*interp_elf_ex), 0);
919 if (retval < 0)
920 goto out_free_dentry;
921
922 break;
923
924 out_free_interp:
925 kfree(elf_interpreter);
926 goto out_free_ph;
927
928
929 elf_ppnt = elf_phdata;
930 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
931 switch (elf_ppnt->p_type)
932 case PT_GNU_STACK:
933 if (elf_ppnt->p_flags & PF_X)
934 executable_stack = EXSTACK_ENABLE_X;
935 else
936 executable_stack = EXSTACK_DISABLE_X;
937 break;
938
939 case PT_LOPROC ... PT_HIPROC:
940 retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
941 bprm->file, false,
942 &arch_state);
943 if (retval)
944 goto out_free_dentry;
945 break;
946
947
948 /* Some simple consistency checks for the interpreter */
949 if (interpreter)
950 retval = -ELIBBAD;
951 /* Not an ELF interpreter */
952 if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
953 goto out_free_dentry;
954 /* Verify the interpreter has a valid arch */
955 if (!elf_check_arch(interp_elf_ex) ||
956 elf_check_fdpic(interp_elf_ex))
957 goto out_free_dentry;
958
959 /* Load the interpreter program headers */
960 interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
961 interpreter);
962 if (!interp_elf_phdata)
963 goto out_free_dentry;
964
965 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
966 elf_property_phdata = NULL;
967 elf_ppnt = interp_elf_phdata;
968 for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
969 switch (elf_ppnt->p_type)
970 case PT_GNU_PROPERTY:
971 elf_property_phdata = elf_ppnt;
972 break;
973
974 case PT_LOPROC ... PT_HIPROC:
975 retval = arch_elf_pt_proc(interp_elf_ex,
976 elf_ppnt, interpreter,
977 true, &arch_state);
978 if (retval)
979 goto out_free_dentry;
980 break;
981
982
983
984 retval = parse_elf_properties(interpreter ?: bprm->file,
985 elf_property_phdata, &arch_state);
986 if (retval)
987 goto out_free_dentry;
988
989 /*
990 * Allow arch code to reject the ELF at this point, whilst it's
991 * still possible to return an error to the code that invoked
992 * the exec syscall.
993 */
994 retval = arch_check_elf(elf_ex,
995 !!interpreter, interp_elf_ex,
996 &arch_state);
997 if (retval)
998 goto out_free_dentry;
999
1000 /* Flush all traces of the currently running executable */
1001 retval = begin_new_exec(bprm);
1002 if (retval)
1003 goto out_free_dentry;
1004
1005 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
1006 may depend on the personality. */
1007 SET_PERSONALITY2(*elf_ex, &arch_state);
1008 if (elf_read_implies_exec(*elf_ex, executable_stack))
1009 current->personality |= READ_IMPLIES_EXEC;
1010
1011 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1012 current->flags |= PF_RANDOMIZE;
1013
1014 setup_new_exec(bprm);
1015
1016 /* Do this so that we can load the interpreter, if need be. We will
1017 change some of these later */
1018 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1019 executable_stack);
1020 if (retval < 0)
1021 goto out_free_dentry;
1022
1023 elf_bss = 0;
1024 elf_brk = 0;
1025
1026 start_code = ~0UL;
1027 end_code = 0;
1028 start_data = 0;
1029 end_data = 0;
1030
1031 /* Now we do a little grungy work by mmapping the ELF image into
1032 the correct location in memory. */
1033 for(i = 0, elf_ppnt = elf_phdata;
1034 i < elf_ex->e_phnum; i++, elf_ppnt++)
1035 int elf_prot, elf_flags;
1036 unsigned long k, vaddr;
1037 unsigned long total_size = 0;
1038 unsigned long alignment;
1039
1040 if (elf_ppnt->p_type != PT_LOAD)
1041 continue;
1042
1043 if (unlikely (elf_brk > elf_bss))
1044 unsigned long nbyte;
1045
1046 /* There was a PT_LOAD segment with p_memsz > p_filesz
1047 before this one. Map anonymous pages, if needed,
1048 and clear the area. */
1049 retval = set_brk(elf_bss + load_bias,
1050 elf_brk + load_bias,
1051 bss_prot);
1052 if (retval)
1053 goto out_free_dentry;
1054 nbyte = ELF_PAGEOFFSET(elf_bss);
1055 if (nbyte)
1056 nbyte = ELF_MIN_ALIGN - nbyte;
1057 if (nbyte > elf_brk - elf_bss)
1058 nbyte = elf_brk - elf_bss;
1059 if (clear_user((void __user *)elf_bss +
1060 load_bias, nbyte))
1061 /*
1062 * This bss-zeroing can fail if the ELF
1063 * file specifies odd protections. So
1064 * we don't check the return value
1065 */
1066
1067
1068
1069
1070 elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1071 !!interpreter, false);
1072
1073 elf_flags = MAP_PRIVATE;
1074
1075 vaddr = elf_ppnt->p_vaddr;
1076 /*
1077 * The first time through the loop, load_addr_set is false:
1078 * layout will be calculated. Once set, use MAP_FIXED since
1079 * we know we've already safely mapped the entire region with
1080 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
1081 */
1082 if (load_addr_set)
1083 elf_flags |= MAP_FIXED;
1084 else if (elf_ex->e_type == ET_EXEC)
1085 /*
1086 * This logic is run once for the first LOAD Program
1087 * Header for ET_EXEC binaries. No special handling
1088 * is needed.
1089 */
1090 elf_flags |= MAP_FIXED_NOREPLACE;
1091 else if (elf_ex->e_type == ET_DYN)
1092 /*
1093 * This logic is run once for the first LOAD Program
1094 * Header for ET_DYN binaries to calculate the
1095 * randomization (load_bias) for all the LOAD
1096 * Program Headers.
1097 *
1098 * There are effectively two types of ET_DYN
1099 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
1100 * and loaders (ET_DYN without INTERP, since they
1101 * _are_ the ELF interpreter). The loaders must
1102 * be loaded away from programs since the program
1103 * may otherwise collide with the loader (especially
1104 * for ET_EXEC which does not have a randomized
1105 * position). For example to handle invocations of
1106 * "./ld.so someprog" to test out a new version of
1107 * the loader, the subsequent program that the
1108 * loader loads must avoid the loader itself, so
1109 * they cannot share the same load range. Sufficient
1110 * room for the brk must be allocated with the
1111 * loader as well, since brk must be available with
1112 * the loader.
1113 *
1114 * Therefore, programs are loaded offset from
1115 * ELF_ET_DYN_BASE and loaders are loaded into the
1116 * independently randomized mmap region (0 load_bias
1117 * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
1118 */
1119 alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1120 if (interpreter || alignment > ELF_MIN_ALIGN)
1121 load_bias = ELF_ET_DYN_BASE;
1122 if (current->flags & PF_RANDOMIZE)
1123 load_bias += arch_mmap_rnd();
1124 if (alignment)
1125 load_bias &= ~(alignment - 1);
1126 elf_flags |= MAP_FIXED_NOREPLACE;
1127 else
1128 load_bias = 0;
1129
1130 /*
1131 * Since load_bias is used for all subsequent loading
1132 * calculations, we must lower it by the first vaddr
1133 * so that the remaining calculations based on the
1134 * ELF vaddrs will be correctly offset. The result
1135 * is then page aligned.
1136 */
1137 load_bias = ELF_PAGESTART(load_bias - vaddr);
1138
1139
1140 /*
1141 * Calculate the entire size of the ELF mapping (total_size).
1142 * (Note that load_addr_set is set to true later once the
1143 * initial mapping is performed.)
1144 */
1145 if (!load_addr_set)
1146 total_size = total_mapping_size(elf_phdata,
1147 elf_ex->e_phnum);
1148 if (!total_size)
1149 retval = -EINVAL;
1150 goto out_free_dentry;
1151
1152
1153
1154 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
1155 elf_prot, elf_flags, total_size);
1156 if (BAD_ADDR(error))
1157 retval = IS_ERR((void *)error) ?
1158 PTR_ERR((void*)error) : -EINVAL;
1159 goto out_free_dentry;
1160
1161
1162 if (!load_addr_set)
1163 load_addr_set = 1;
1164 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
1165 if (elf_ex->e_type == ET_DYN)
1166 load_bias += error -
1167 ELF_PAGESTART(load_bias + vaddr);
1168 load_addr += load_bias;
1169 reloc_func_desc = load_bias;
1170
1171
1172 k = elf_ppnt->p_vaddr;
1173 if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1174 start_code = k;
1175 if (start_data < k)
1176 start_data = k;
1177
1178 /*
1179 * Check to see if the section's size will overflow the
1180 * allowed task size. Note that p_filesz must always be
1181 * <= p_memsz so it is only necessary to check p_memsz.
1182 */
1183 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1184 elf_ppnt->p_memsz > TASK_SIZE ||
1185 TASK_SIZE - elf_ppnt->p_memsz < k)
1186 /* set_brk can never work. Avoid overflows. */
1187 retval = -EINVAL;
1188 goto out_free_dentry;
1189
1190
1191 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1192
1193 if (k > elf_bss)
1194 elf_bss = k;
1195 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1196 end_code = k;
1197 if (end_data < k)
1198 end_data = k;
1199 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1200 if (k > elf_brk)
1201 bss_prot = elf_prot;
1202 elf_brk = k;
1203
1204
1205
1206 e_entry = elf_ex->e_entry + load_bias;
1207 elf_bss += load_bias;
1208 elf_brk += load_bias;
1209 start_code += load_bias;
1210 end_code += load_bias;
1211 start_data += load_bias;
1212 end_data += load_bias;
1213
1214 /* Calling set_brk effectively mmaps the pages that we need
1215 * for the bss and break sections. We must do this before
1216 * mapping in the interpreter, to make sure it doesn't wind
1217 * up getting placed where the bss needs to go.
1218 */
1219 retval = set_brk(elf_bss, elf_brk, bss_prot);
1220 if (retval)
1221 goto out_free_dentry;
1222 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss)))
1223 retval = -EFAULT; /* Nobody gets to see this, but.. */
1224 goto out_free_dentry;
1225
1226
1227 if (interpreter)
1228 elf_entry = load_elf_interp(interp_elf_ex,
1229 interpreter,
1230 load_bias, interp_elf_phdata,
1231 &arch_state);
1232 if (!IS_ERR((void *)elf_entry))
1233 /*
1234 * load_elf_interp() returns relocation
1235 * adjustment
1236 */
1237 interp_load_addr = elf_entry;
1238 elf_entry += interp_elf_ex->e_entry;
1239
1240 if (BAD_ADDR(elf_entry))
1241 retval = IS_ERR((void *)elf_entry) ?
1242 (int)elf_entry : -EINVAL;
1243 goto out_free_dentry;
1244
1245 reloc_func_desc = interp_load_addr;
1246
1247 allow_write_access(interpreter);
1248 fput(interpreter);
1249
1250 kfree(interp_elf_ex);
1251 kfree(interp_elf_phdata);
1252 else
1253 elf_entry = e_entry;
1254 if (BAD_ADDR(elf_entry))
1255 retval = -EINVAL;
1256 goto out_free_dentry;
1257
1258
1259
1260 kfree(elf_phdata);
1261
1262 set_binfmt(&elf_format);
1263
1264 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1265 retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
1266 if (retval < 0)
1267 goto out;
1268 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1269
1270 retval = create_elf_tables(bprm, elf_ex,
1271 load_addr, interp_load_addr, e_entry);
1272 if (retval < 0)
1273 goto out;
1274
1275 mm = current->mm;
1276 mm->end_code = end_code;
1277 mm->start_code = start_code;
1278 mm->start_data = start_data;
1279 mm->end_data = end_data;
1280 mm->start_stack = bprm->p;
1281
1282 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
1283 /*
1284 * For architectures with ELF randomization, when executing
1285 * a loader directly (i.e. no interpreter listed in ELF
1286 * headers), move the brk area out of the mmap region
1287 * (since it grows up, and may collide early with the stack
1288 * growing down), and into the unused ELF_ET_DYN_BASE region.
1289 */
1290 if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1291 elf_ex->e_type == ET_DYN && !interpreter)
1292 mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1293
1294
1295 mm->brk = mm->start_brk = arch_randomize_brk(mm);
1296 #ifdef compat_brk_randomized
1297 current->brk_randomized = 1;
1298 #endif
1299
1300
1301 if (current->personality & MMAP_PAGE_ZERO)
1302 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1303 and some applications "depend" upon this behavior.
1304 Since we do not have the power to recompile these, we
1305 emulate the SVr4 behavior. Sigh. */
1306 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1307 MAP_FIXED | MAP_PRIVATE, 0);
1308
1309
1310 regs = current_pt_regs();
1311 #ifdef ELF_PLAT_INIT
1312 /*
1313 * The ABI may specify that certain registers be set up in special
1314 * ways (on i386 %edx is the address of a DT_FINI function, for
1315 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1316 * that the e_entry field is the address of the function descriptor
1317 * for the startup routine, rather than the address of the startup
1318 * routine itself. This macro performs whatever initialization to
1319 * the regs structure is required as well as any relocations to the
1320 * function descriptor entries when executing dynamically links apps.
1321 */
1322 ELF_PLAT_INIT(regs, reloc_func_desc);
1323 #endif
1324
1325 finalize_exec(bprm);
1326 START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1327 retval = 0;
1328 out:
1329 return retval;
1330
1331 /* error cleanup */
1332 out_free_dentry:
1333 kfree(interp_elf_ex);
1334 kfree(interp_elf_phdata);
1335 allow_write_access(interpreter);
1336 if (interpreter)
1337 fput(interpreter);
1338 out_free_ph:
1339 kfree(elf_phdata);
1340 goto out;
1341
setup_arg_pages函数:把栈顶设置为STACK_TOP减去随机值,然后把环境变量和参数从临时栈移到最终的用户栈;
set_brk函数:设置堆的起始地址,如果启用堆随机化,把堆的起始地址加上随机值。
arch_pick_mmap_layout函数:ARM64架构的下该函数负责选择内存映射区域的布局。
409 void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
410
411 unsigned long random_factor = 0UL;
412
413 if (current->flags & PF_RANDOMIZE)
414 random_factor = arch_mmap_rnd();
415
416 if (mmap_is_legacy(rlim_stack)) //如果给进程描述符的成员personality设置标志位ADDRCOMPAT LAYOUT表示使用传统的虚拟地址空间布局,或者用户栈可以无限增长,或者通过文件“/proc/sys/vm/legacy_va_layout”指定,那么使用传统的自底向上增长的布局,内存映射区域的起始地址是 TASK_UNMAPPED_BASE 加上随机值,分配未映射区域的函数是arch_get_unmapped_area。
417 mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
418 mm->get_unmapped_area = arch_get_unmapped_area;
419 else //如果使用自顶向下增长的布局,那么分配未映射区域的函数是arch_ get_unmapped_area_topdown,内存映射区域的起始地址的计算方法如下:先计算内存映射区域的起始地址和栈顶的间隙:初始值取用户栈的最大长度,限定不能小于“128MB + 栈的最大随机偏移值 + 1”,确保用户栈最大可以达到128MB;限定不能超过STACK_TOP的5/6。内存映射区域的起始地址等于“STACK_TOP−间隙−随机值”,然后向下对齐到页长度。
420 mm->mmap_base = mmap_base(random_factor, rlim_stack);
421 mm->get_unmapped_area = arch_get_unmapped_area_topdown;
422
423
以上是关于Linux内存从0到1学习笔记(七,用户空间虚拟内存之二 - 内存空间的建立)的主要内容,如果未能解决你的问题,请参考以下文章
Linux内存从0到1学习笔记(6.10 物理内存初始化之vmalloc分配器)