Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F97958349
D32758.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
6 KB
Referenced Files
None
Subscribers
None
D32758.id.diff
View Options
Index: sys/x86/x86/tsc.c
===================================================================
--- sys/x86/x86/tsc.c
+++ sys/x86/x86/tsc.c
@@ -49,6 +49,7 @@
#include <sys/vdso.h>
#include <machine/clock.h>
#include <machine/cputypes.h>
+#include <machine/fpu.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
#include <x86/vmware.h>
@@ -601,39 +602,162 @@
static void
tsc_calib(void *arg __unused)
{
- sbintime_t t_start, t_end;
- uint64_t freq_khz, tsc_start, tsc_end;
register_t flags;
- int cpu;
+ uint64_t tsc0, tsc, tsc_delay, n, passes = 0;
+ sbintime_t t0, t;
+ double mu_tsc = 0;
+ double mu_t = 0;
+ double va_tsc = 0;
+ double va_t = 0;
+ double cva = 0;
+ double q_tsc;
+ double q_t;
+ double d1, d2;
+ double inv_n;
+ uint64_t freq;
- flags = intr_disable();
- cpu = curcpu;
- tsc_start = rdtsc_ordered();
- t_start = sbinuptime();
- intr_restore(flags);
+ /*-
+ * The idea here is to compute a best-fit linear regression between
+ * the TSC and the reference clock; the slope of that line multiplied
+ * by the frequency of the reference clock (2^32 Hz, since we're
+ * using sbinuptime as a reference) gives us the frequency of the TSC.
+ * (Note that we could use a hardware clock directly as a reference
+ * rather than using sbinuptime and taking a detour via timecounter
+ * code; but using sbinuptime allows us to automatically use the best
+ * available reference clock.)
+ *
+ * To do this, we calculate the
+ * (a) mean of the TSC measurements,
+ * (b) variance of the TSC measurements,
+ * (c) mean of the reference clock measurements,
+ * (d) variance of the reference clock measurements, and
+ * (e) covariance of the TSC and reference clock measurements
+ * on an ongoing basis, updating all five values after each new data
+ * point arrives, stopping when we're confident that we've accurately
+ * measured the clock speed.
+ *
+ * Given those five values, the important formulas to remember from
+ * introductory statistics are:
+ * 1. slope of regression line = covariance(x, y) / variance(x)
+ * 2. (relative uncertainty in slope)^2 =
+ * (variance(x) * variance(y) - covariance(x, y)^2)
+ * ------------------------------------------------
+ * covariance(x, y)^2 * (N - 2)
+ *
+ * We adjust the second formula slightly, adding a term to each of
+ * the variance values to reflect the measurement quantization.
+ *
+ * Finally, we need to determine when to stop gathering data. We
+ * can't simply stop as soon as the computed uncertainty estimate
+ * is below our threshold; this would make us overconfident since it
+ * would introduce a multiple-comparisons problem (cf. sequential
+ * analysis in clinical trials). Instead, we stop with N data points
+ * if the estimated uncertainty of the first k data points meets our
+ * target for all N/2 < k <= N; this is not theoretically ideal, but
+ * in practice works well enough.
+ */
- DELAY(1000000);
+ /*
+ * Using floating-point arithmetic allows us to keep rounding errors
+ * under control with far less effort than using fixed-point math.
+ */
+ fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX);
+ /*
+ * Bind to the current CPU for the duration of our calibration, just
+ * in case moving between CPUs would introduce additional noise.
+ */
thread_lock(curthread);
- sched_bind(curthread, cpu);
+ sched_bind(curthread, curcpu);
+ thread_unlock(curthread);
+ /* Timer quantization granularity. */
+ q_tsc = 2.0;
+ q_t = (double)(SBT_1S) / tc_getfrequency() + 1.0;
+
+ /*
+ * Initial values for TSC and uptime; we'll subtract these off from
+ * values we measure later in order to reduce the size of values
+ * we're handling later (and thereby reduce rounding errors).
+ */
flags = intr_disable();
- tsc_end = rdtsc_ordered();
- t_end = sbinuptime();
+ tsc0 = rdtsc_ordered();
+ t0 = sbinuptime();
intr_restore(flags);
+ for (n = 1; ; n++) {
+ /* Get a new data point. */
+ flags = intr_disable();
+ tsc = rdtsc_ordered() - tsc0;
+ t = sbinuptime() - t0;
+ intr_restore(flags);
+
+ /* If we spent too long, bail. */
+ if (t > (double)(SBT_1S)) {
+ printf("Statistical TSC calibration failed! "
+ "Clocks might be ticking at variable rates.\n");
+ printf("Falling back to slow TSC calibration.\n");
+ freq = (double)(SBT_1S) * tsc / t;
+ break;
+ }
+
+ /* Precompute to save on divisions later. */
+ inv_n = 1.0 / n;
+
+ /* Update mean and variance of recorded TSC values. */
+ d1 = tsc - mu_tsc;
+ mu_tsc += d1 * inv_n;
+ d2 = d1 * (tsc - mu_tsc);
+ va_tsc += (d2 - va_tsc) * inv_n;
+
+ /* Update mean and variance of recorded time values. */
+ d1 = t - mu_t;
+ mu_t += d1 * inv_n;
+ d2 = d1 * (t - mu_t);
+ va_t += (d2 - va_t) * inv_n;
+
+ /* Update covariance. */
+ d2 = d1 * (tsc - mu_tsc);
+ cva += (d2 - cva) * inv_n;
+
+ /* Count low-uncertainty iterations. */
+#define TSC_PPM_UNCERTAINTY 1
+#define TSC_UNCERTAINTY TSC_PPM_UNCERTAINTY * 0.000001
+#define TSC_UNCERTAINTY_SQR TSC_UNCERTAINTY * TSC_UNCERTAINTY
+ if (TSC_UNCERTAINTY_SQR * (n - 2) * cva * cva >
+ (va_t + q_t * q_t) * (va_tsc + q_tsc * q_tsc) - cva * cva)
+ passes++;
+ else
+ passes = 0;
+
+ /* Break if we're consistently certain. */
+ if (passes * 2 > n) {
+ freq = (double)(SBT_1S) * cva / va_t;
+ if (bootverbose)
+ printf("TSC calibration took %ld us\n",
+ (long)(t * 1000000.0 / SBT_1S));
+ break;
+ }
+
+ /*
+ * Add variable delay to avoid theoretical risk of aliasing
+ * resulting from this loop synchronizing with the frequency
+ * of the reference clock. On the nth iteration, we spend
+ * O(1 / n) time here -- long enough to avoid aliasing, but
+ * short enough to be insignificant as n grows.
+ */
+ tsc_delay = rdtsc_ordered() + tsc / (n * n);
+ while (rdtsc_ordered() < tsc_delay)
+ /* Do nothing. */ ;
+ }
+
+ /* Unbind CPU and exit FPU mode. */
+ thread_lock(curthread);
sched_unbind(curthread);
thread_unlock(curthread);
+ fpu_kern_leave(curthread, NULL);
- /*
- * Direct use of the clock frequency of 10^9Hz would result in overflow
- * if more than ~18.5*10^9 TSC ticks elapse between measurements. While
- * this is unlikely for now, reduce precision slightly to better avoid
- * the problem.
- */
- freq_khz = 1000000ul * (tsc_end - tsc_start) / sbttons(t_end - t_start);
-
- tsc_update_freq(freq_khz * 1000);
+ tsc_update_freq(freq);
tc_init(&tsc_timecounter);
set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Oct 2, 8:53 PM (14 h, 25 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
13284123
Default Alt Text
D32758.id.diff (6 KB)
Attached To
Mode
D32758: Speed up tsc_calib
Attached
Detach File
Event Timeline
Log In to Comment