svn commit: r326383 - head/sys/x86/cpufreq

Thu Nov 30 20:08:57 UTC 2017

On 11/30/2017 14:32, Conrad Meyer wrote:
> Hi,
> 
> I don't think this answers the second question about the conditional.
> It seems like PCPU_GET() for the initial CPU should be pulled out of
> the loop, which binds the thread to a different CPU every iteration.

Ah, good catch.  I'll fix it soon.  Sorry.

> Also, as a side effect of disabling verification, you have fixed PR
> 221621, 219213, and probably 218262.

Probably.  However, I am just trying to fix my FX-8350 and A10-6800 and
I don't have Zen processors to verify the MSRs are actually working on
those CPUs.

Jung-uk Kim

> On Thu, Nov 30, 2017 at 11:21 AM, Jung-uk Kim <jkim at freebsd.org> wrote:
>> On 11/30/2017 13:57, Conrad Meyer wrote:
>>> Hi Jung-uk,
>>>
>>> I have some questions about this change.
>>>
>>> On Wed, Nov 29, 2017 at 5:40 PM, Jung-uk Kim <jkim at freebsd.org> wrote:
>>>> Author: jkim
>>>> Date: Thu Nov 30 01:40:07 2017
>>>> New Revision: 326383
>>>> URL: https://svnweb.freebsd.org/changeset/base/326383
>>>>
>>>> Log:
>>>>   Add a tunable "debug.hwpstate_verify" to check P-state after changing it and
>>>>   turn it off by default.  It is very inefficient to verify current P-state of
>>>>   each core, especially for CPUs with many cores.  When multiple commands are
>>>>   requested to the same power domain before completion of pending transitions,
>>>>   the last command is executed according to the manual.  Because requests are
>>>>   serialized by the caller, all cores will receive the same command for each
>>>>   call.  Do not call sched_bind() and sched_unbind().  It is redundant because
>>>>   the caller does it anyway.
>>>> ...
>>>> @@ -176,47 +178,57 @@ hwpstate_goto_pstate(device_t dev, int pstate)
>>>>         if (limit > id)
>>>>                 id = limit;
>>>>
>>>
>>> Should we bind the thread and record PCPU_GET() here?
>>>
>>>> +       HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id,
>>>> +           PCPU_GET(cpuid));
>>>> +       /* Go To Px-state */
>>>> +       wrmsr(MSR_AMD_10H_11H_CONTROL, id);
>>>> +
>>>>         /*
>>>>          * We are going to the same Px-state on all cpus.
>>>>          * Probably should take _PSD into account.
>>>>          */
>>>> -       error = 0;
>>>>         CPU_FOREACH(i) {
>>>> +               if (i == PCPU_GET(cpuid))
>>>
>>> It seems like this check could evaluate to a different CPU every time?
>>>  When really we are trying to avoid setting on the CPU we set on
>>> initially above?
>>>
>>>> +                       continue;
>>>> +
>>>>                 /* Bind to each cpu. */
>>>>                 thread_lock(curthread);
>>>>                 sched_bind(curthread, i);
>>>>                 thread_unlock(curthread);
>>>> -               HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n",
>>>> -                   id, PCPU_GET(cpuid));
>>>> +               HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, i);
>>>>                 /* Go To Px-state */
>>>>                 wrmsr(MSR_AMD_10H_11H_CONTROL, id);
>>>>         }
>>>> -       CPU_FOREACH(i) {
>>>> -               /* Bind to each cpu. */
>>>> -               thread_lock(curthread);
>>>> -               sched_bind(curthread, i);
>>>> -               thread_unlock(curthread);
>>>> -               /* wait loop (100*100 usec is enough ?) */
>>>> -               for (j = 0; j < 100; j++) {
>>>> -                       /* get the result. not assure msr=id */
>>>> -                       msr = rdmsr(MSR_AMD_10H_11H_STATUS);
>>>> -                       if (msr == id)
>>>> -                               break;
>>>> -                       sbt = SBT_1MS / 10;
>>>> -                       tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
>>>> -                           sbt >> tc_precexp, 0);
>>>> +
>>>> +       /*
>>>> +        * Verify whether each core is in the requested P-state.
>>>> +        */
>>>> +       if (hwpstate_verify) {
>>>> +               CPU_FOREACH(i) {
>>>> +                       thread_lock(curthread);
>>>> +                       sched_bind(curthread, i);
>>>> +                       thread_unlock(curthread);
>>>> +                       /* wait loop (100*100 usec is enough ?) */
>>>> +                       for (j = 0; j < 100; j++) {
>>>> +                               /* get the result. not assure msr=id */
>>>> +                               msr = rdmsr(MSR_AMD_10H_11H_STATUS);
>>>> +                               if (msr == id)
>>>> +                                       break;
>>>> +                               sbt = SBT_1MS / 10;
>>>> +                               tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
>>>> +                                   sbt >> tc_precexp, 0);
>>>> +                       }
>>>> +                       HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n",
>>>> +                           (int)msr, i);
>>>> +                       if (msr != id) {
>>>> +                               HWPSTATE_DEBUG(dev,
>>>> +                                   "error: loop is not enough.\n");
>>>
>>> In this error return, should the current thread be unbinded?  The old
>>> code did this by setting error and falling through to the ordinary
>>> exit path.  We could use 'goto out;' to avoid looping through the rest
>>> of the CPUs.
>>>
>>>> +                               return (ENXIO);
>>>> +                       }
>>>>                 }
>>>> -               HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n",
>>>> -                   (int)msr, PCPU_GET(cpuid));
>>>> -               if (msr != id) {
>>>> -                       HWPSTATE_DEBUG(dev, "error: loop is not enough.\n");
>>>> -                       error = ENXIO;
>>>> -               }
>>>>         }
>>>> -       thread_lock(curthread);
>>>> -       sched_unbind(curthread);
>>>> -       thread_unlock(curthread);
>>>> -       return (error);
>>>> +
>>>> +       return (0);
>>>>  }
>>>>
>>>>  static int
>>>>
>>
>> This driver is only called via cpufreq(4) (i.e., sys/kern/kern_cpu.c),
>> and sched_bind()/sched_unbind() is done from cf_set_method() for cpu0.
>> If you want to see the sequence, try "sysctl debug.cpufreq.verbose=1"
>> and "sysctl debug.hwpstate_verbose=1".

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: OpenPGP digital signature
URL: <http://lists.freebsd.org/pipermail/svn-src-all/attachments/20171130/cd7a8b8d/attachment.sig>