Page MenuHomeFreeBSD

rtld: rework how environment variables are named
ClosedPublic

Authored by kib on Aug 15 2021, 7:56 PM.
Tags
None
Referenced Files
Unknown Object (File)
Thu, Oct 31, 11:50 AM
Unknown Object (File)
Tue, Oct 22, 6:03 PM
Unknown Object (File)
Tue, Oct 22, 6:00 PM
Unknown Object (File)
Oct 1 2024, 10:50 PM
Unknown Object (File)
Sep 22 2024, 8:54 AM
Unknown Object (File)
Sep 18 2024, 10:52 PM
Unknown Object (File)
Sep 17 2024, 7:31 PM
Unknown Object (File)
Sep 17 2024, 10:18 AM
Subscribers

Details

Summary

Instead of specifying the main name part of the environment variable as the string literal, create array of the var names and access them by symbolic index. Convert main name parts into complete names by prefixing with ABI-specific ld_env_vars.

This way the name is not repeated, and also it can carry additional proporties explicitly. For instance, cleanup of the environment for the setuid image does not require retyping all names.

Diff Detail

Repository
rG FreeBSD src repository
Lint
Lint Skipped
Unit
Tests Skipped

Event Timeline

kib requested review of this revision.Aug 15 2021, 7:56 PM

One thought I had a while back when looking at the environment variable code in rtld was to do a single pass over environ that initializes all the variables (to avoid pulling in getenv.c from libc).
I.e. having an array of something like this

struct ld_env_var_desc {
	char n[64];
        char **dest;
	bool unsecure;
};

or possibly a slightly more complicated struct that can also handle conversion to int/bool:

enum ld_env_type { ENV_STR, ENV_INT, ENV_BOOL, };

struct ld_env_var_desc {
	char n[64]; 
	enum ld_env_type type;
	union {
		const char **strval;
		int *intval;
		bool *boolval;
	} dest; /* could also use void* but that loses type checking */
	bool unsecure;
};

static bool ld_debug;
const char* ld_library_path;

static struct ld_env_var_desc ld_env_vars[] = {
	{ .n = "BIND_NOW", .type = ENV_BOOL, { .boolval = &ld_debug}, .unsecure = false},
	{ .n = "LIBRARY_PATH", .type = ENV_STR, { .strval = &ld_library_path}, .unsecure = true},
};

void rtld_read_env_vars(void) 
{
    /* ... */
}
kib added a reviewer: arichardson.
kib removed a subscriber: arichardson.

Do not use libc getenv(3)/unsetenv(3) in rtld.

Requested by: arichardson

Per-commit view is available at https://kib.kiev.ua/git/gitweb.cgi?p=deviant3.git;a=shortlog;h=refs/heads/rtld

I like this approach, but how about avoiding duplicated work by only iterating over env once. I've attached a minimal test program based on this revision that should avoid unnecessary work by looping over env once and if the prefix matches checking the array of variables.

#include <sys/param.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>

const char *ld_env_prefix;
enum {
	LD_BIND_NOW = 0,
	LD_PRELOAD,
	LD_LIBMAP,
	LD_LIBRARY_PATH,
	LD_LIBRARY_PATH_FDS,
	LD_LIBMAP_DISABLE,
	LD_BIND_NOT,
	LD_DEBUG,
	LD_ELF_HINTS_PATH,
	LD_LOADFLTR,
	LD_LIBRARY_PATH_RPATH,
	LD_PRELOAD_FDS,
	LD_DYNAMIC_WEAK,
	LD_TRACE_LOADED_OBJECTS,
	LD_UTRACE,
	LD_DUMP_REL_PRE,
	LD_DUMP_REL_POST,
	LD_TRACE_LOADED_OBJECTS_PROGNAME,
	LD_TRACE_LOADED_OBJECTS_FMT1,
	LD_TRACE_LOADED_OBJECTS_FMT2,
	LD_TRACE_LOADED_OBJECTS_ALL,
};

struct ld_env_var_desc {
	char n[64];
	size_t namelen;
	char *val;
	bool unsecure;
};
#define LD_ENV_DESC(var, unsec) \
	[LD_##var] = { .n = #var, .namelen = strlen(#var), .unsecure = unsec }

static struct ld_env_var_desc ld_env_vars[] = {
	LD_ENV_DESC(BIND_NOW, false),
	LD_ENV_DESC(PRELOAD, true),
	LD_ENV_DESC(LIBMAP, true),
	LD_ENV_DESC(LIBRARY_PATH, true),
	LD_ENV_DESC(LIBRARY_PATH_FDS, true),
	LD_ENV_DESC(LIBMAP_DISABLE, true),
	LD_ENV_DESC(BIND_NOT, true),
	LD_ENV_DESC(DEBUG, true),
	LD_ENV_DESC(ELF_HINTS_PATH, true),
	LD_ENV_DESC(LOADFLTR, true),
	LD_ENV_DESC(LIBRARY_PATH_RPATH, true),
	LD_ENV_DESC(PRELOAD_FDS, true),
	LD_ENV_DESC(DYNAMIC_WEAK, true),
	LD_ENV_DESC(TRACE_LOADED_OBJECTS, false),
	LD_ENV_DESC(UTRACE, false),
	LD_ENV_DESC(DUMP_REL_PRE, false),
	LD_ENV_DESC(DUMP_REL_POST, false),
	LD_ENV_DESC(TRACE_LOADED_OBJECTS_PROGNAME, false),
	LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT1, false),
	LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT2, false),
	LD_ENV_DESC(TRACE_LOADED_OBJECTS_ALL, false),
};
#undef LD_ENV_DESC

static char *
ld_get_env_var(int idx)
{
	return (ld_env_vars[idx].val);
}

static char *
rtld_get_env_val(char **env, const char *name, size_t name_len)
{
	char **m, *n, *v;

	for (m = env; *m != NULL; m++) {
		n = *m;
		v = strchr(n, '=');
		if (v == NULL) {
			/* corrupt environment? */
			continue;
		}
		if (v - n == (ptrdiff_t)name_len &&
		    strncmp(name, n, name_len) == 0)
			return (v + 1);
	}
	return (NULL);
}

static void
rtld_init_env_vars(char **env)
{
	struct ld_env_var_desc *lvd;
	size_t prefix_len;
	char *v;

	prefix_len = strlen(ld_env_prefix);
	for (char **m = env; *m != NULL; m++) {
		char *n = *m;
		if (strncmp(ld_env_prefix, n, prefix_len) != 0) {
			/* Not a rtld environment variable. */
			continue;
		}
		n += prefix_len;
		v = strchr(n, '=');
		if (v == NULL) {
			/* corrupt environment? */
			continue;
		}
		for (int i = 0; i < (int)nitems(ld_env_vars); i++) {
			lvd = &ld_env_vars[i];
			if (v - n == (ptrdiff_t)lvd->namelen &&
			    strncmp(lvd->n, n, lvd->namelen) == 0) {
				lvd->val = v + 1;
			}
		}
	}
}

int
main()
{
	char *env[] = {
		"PATH=/bin:/usr/bin",
		"LD_PRELOAD=/foo/bar/libfoo.so",
		"LD64_BIND_NOW=1",
		NULL,
	};
	ld_env_prefix = "LD_";
	rtld_init_env_vars(env);
	printf("LD_BIND_NOW=%s\n", ld_get_env_var(LD_BIND_NOW));
	printf("LD_PRELOAD=%s\n", ld_get_env_var(LD_PRELOAD));
	printf("LD64_BIND_NOW=%s\n", rtld_get_env_val(env, "LD64_BIND_NOW", strlen("LD64_BIND_NOW")));
	printf("PATH=%s\n", rtld_get_env_val(env, "PATH", strlen("PATH")));
	return 0;
}
libexec/rtld-elf/rtld.c
423

Should this have a debug assertion that rtld_init_env_vars was called previously?

460

Can we move this into the !trust (or the for loop inside?). Should be fine according to style(9) now I believe.

I believe not modifying the n field should also make it easier to support multiple prefixes. For example like this:

#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>

#define nitems(x) (sizeof((x)) / sizeof((x)[0]))

const char *ld_env_prefixes[] = { "LD_64_", "LD_" };
enum {
	LD_BIND_NOW = 0,
	LD_PRELOAD,
	LD_LIBMAP,
	LD_LIBRARY_PATH,
	LD_LIBRARY_PATH_FDS,
	LD_LIBMAP_DISABLE,
	LD_BIND_NOT,
	LD_DEBUG,
	LD_ELF_HINTS_PATH,
	LD_LOADFLTR,
	LD_LIBRARY_PATH_RPATH,
	LD_PRELOAD_FDS,
	LD_DYNAMIC_WEAK,
	LD_TRACE_LOADED_OBJECTS,
	LD_UTRACE,
	LD_DUMP_REL_PRE,
	LD_DUMP_REL_POST,
	LD_TRACE_LOADED_OBJECTS_PROGNAME,
	LD_TRACE_LOADED_OBJECTS_FMT1,
	LD_TRACE_LOADED_OBJECTS_FMT2,
	LD_TRACE_LOADED_OBJECTS_ALL,
};

struct ld_env_var_desc {
	char n[64];
	size_t namelen;
	char *val;
	bool unsecure;
};
#define LD_ENV_DESC(var, unsec) \
	[LD_##var] = { .n = #var, .namelen = strlen(#var), .unsecure = unsec }

static struct ld_env_var_desc ld_env_vars[] = {
	LD_ENV_DESC(BIND_NOW, false),
	LD_ENV_DESC(PRELOAD, true),
	LD_ENV_DESC(LIBMAP, true),
	LD_ENV_DESC(LIBRARY_PATH, true),
	LD_ENV_DESC(LIBRARY_PATH_FDS, true),
	LD_ENV_DESC(LIBMAP_DISABLE, true),
	LD_ENV_DESC(BIND_NOT, true),
	LD_ENV_DESC(DEBUG, true),
	LD_ENV_DESC(ELF_HINTS_PATH, true),
	LD_ENV_DESC(LOADFLTR, true),
	LD_ENV_DESC(LIBRARY_PATH_RPATH, true),
	LD_ENV_DESC(PRELOAD_FDS, true),
	LD_ENV_DESC(DYNAMIC_WEAK, true),
	LD_ENV_DESC(TRACE_LOADED_OBJECTS, false),
	LD_ENV_DESC(UTRACE, false),
	LD_ENV_DESC(DUMP_REL_PRE, false),
	LD_ENV_DESC(DUMP_REL_POST, false),
	LD_ENV_DESC(TRACE_LOADED_OBJECTS_PROGNAME, false),
	LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT1, false),
	LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT2, false),
	LD_ENV_DESC(TRACE_LOADED_OBJECTS_ALL, false),
};

static char *
ld_get_env_var(int idx)
{
	return (ld_env_vars[idx].val);
}

static char *
rtld_get_env_val(char **env, const char *name, size_t name_len)
{
	char **m, *n, *v;

	for (m = env; *m != NULL; m++) {
		n = *m;
		v = strchr(n, '=');
		if (v == NULL) {
			/* corrupt environment? */
			continue;
		}
		if (v - n == (ptrdiff_t)name_len &&
		    strncmp(name, n, name_len) == 0)
			return (v + 1);
	}
	return (NULL);
}


static void
rtld_init_env_vars_for_prefix(char **env, const char *env_prefix)
{
	struct ld_env_var_desc *lvd;
	size_t prefix_len;
	char *v;

	prefix_len = strlen(env_prefix);
	for (char **m = env; *m != NULL; m++) {
		char *n = *m;
		if (strncmp(env_prefix, n, prefix_len) != 0) {
			/* Not a rtld environment variable. */
			continue;
		}
		n += prefix_len;
		v = strchr(n, '=');
		if (v == NULL) {
			/* corrupt environment? */
			continue;
		}
		for (int i = 0; i < (int)nitems(ld_env_vars); i++) {
			lvd = &ld_env_vars[i];
			if (lvd->val != NULL) {
				/* Saw higher-priority variable name already. */
				continue;
			}
			if (v - n == (ptrdiff_t)(lvd->namelen) &&
			    strncmp(lvd->n, n, lvd->namelen) == 0) {
				lvd->val = v + 1;
			}
		}
	}
}

static void
rtld_init_env_vars(char **env)
{
	for (int i = 0; i < (int)nitems(ld_env_prefixes); i++) {
		rtld_init_env_vars_for_prefix(env, ld_env_prefixes[i]);
	}
}

static char *environ[] = {
	"PATH=/bin:/usr/bin",
	"LD_PRELOAD=/foo/bar/libfoo.so",
	"LD_BIND_NOW=should not be used since LD64 is preferred",
	"LD_64_BIND_NOW=1",
	NULL,
};

char *
getenv(const char *name)
{
	return (rtld_get_env_val(environ, name, strlen(name)));
}

int
main()
{
	rtld_init_env_vars(environ);
	printf("LD_BIND_NOW=%s\n", ld_get_env_var(LD_BIND_NOW));
	printf("LD_PRELOAD=%s\n", ld_get_env_var(LD_PRELOAD));
	printf("LD_64_BIND_NOW=%s\n", getenv("LD_64_BIND_NOW"));
	printf("LD_BIND_NOW=%s\n", getenv("LD_BIND_NOW"));
	printf("PATH=%s\n", getenv("PATH"));
	return 0;
}

Take rtld_init_env_vars_for_prefix() from Alexander.
Actually adding more than one prefix requires more work, do not do it there for now.

Looks good to me, but maybe wait for @emaste or @markj to also have a look.

This revision is now accepted and ready to land.Aug 16 2021, 4:45 PM

Constify variables holding env var values.

This revision now requires review to proceed.Aug 16 2021, 4:59 PM
markj added inline comments.
libexec/rtld-elf/rtld.c
370

Could be const char *const n;

This revision is now accepted and ready to land.Aug 16 2021, 7:47 PM
kib marked an inline comment as done.Aug 16 2021, 8:53 PM