LLVM学习笔记(43-2)

Posted wuhui_gdnt

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了LLVM学习笔记(43-2)相关的知识,希望对你有一定的参考价值。

V7.0的变化

V7.0 SubtargetEmitter::EmitProcessorModels()改写颇多,因为对处理器的描述进行了相当程度的增强。

1344  void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS)

1345    // For each processor model.

1346    for (const CodeGenProcModel &PM : SchedModels.procModels())

1347      // Emit extra processor info if available.

1348      if (PM.hasExtraProcessorInfo())

1349        EmitExtraProcessorInfo(PM, OS);

1350      // Emit processor resource table.

1351      if (PM.hasInstrSchedModel())

1352        EmitProcessorResources(PM, OS);

1353      else if(!PM.ProcResourceDefs.empty())

1354        PrintFatalError(PM.ModelDef->getLoc(), "SchedMachineModel defines "

1355                      "ProcResources without defining WriteRes SchedWriteRes");

1356 

1357      // Begin processor itinerary properties

1358      OS << "\\n";

1359      OS << "static const llvm::MCSchedModel " << PM.ModelName << " = \\n";

1360      EmitProcessorProp(OS, PM.ModelDef, "IssueWidth", ',');

1361      EmitProcessorProp(OS, PM.ModelDef, "MicroOpBufferSize", ',');

1362      EmitProcessorProp(OS, PM.ModelDef, "LoopMicroOpBufferSize", ',');

1363      EmitProcessorProp(OS, PM.ModelDef, "LoadLatency", ',');

1364      EmitProcessorProp(OS, PM.ModelDef, "HighLatency", ',');

1365      EmitProcessorProp(OS, PM.ModelDef, "MispredictPenalty", ',');

1366 

1367      bool PostRAScheduler =

1368        (PM.ModelDef ? PM.ModelDef->getValueAsBit("PostRAScheduler") : false);

1369 

1370      OS << "  " << (PostRAScheduler ? "true" : "false")  << ", // "

1371         << "PostRAScheduler\\n";

1372 

1373      bool CompleteModel =

1374        (PM.ModelDef ? PM.ModelDef->getValueAsBit("CompleteModel") : false);

1375 

1376      OS << "  " << (CompleteModel ? "true" : "false") << ", // "

1377         << "CompleteModel\\n";

1378 

1379      OS << "  " << PM.Index << ", // Processor ID\\n";

1380      if (PM.hasInstrSchedModel())

1381        OS << "  " << PM.ModelName << "ProcResources" << ",\\n"

1382           << "  " << PM.ModelName << "SchedClasses" << ",\\n"

1383           << "  " << PM.ProcResourceDefs.size()+1 << ",\\n"

1384           << "  " << (SchedModels.schedClassEnd()

1385                       - SchedModels.schedClassBegin()) << ",\\n";

1386      else

1387        OS << "  nullptr, nullptr, 0, 0,"

1388           << " // No instruction-level machine model.\\n";

1389      if (PM.hasItineraries())

1390        OS << "  " << PM.ItinsDef->getName() << ",\\n";

1391      else

1392        OS << "  nullptr, // No Itinerary\\n";

1393      if (PM.hasExtraProcessorInfo())

1394        OS << "  &" << PM.ModelName << "ExtraInfo,\\n";

1395      else

1396        OS << "  nullptr // No extra processor descriptor\\n";

1397      OS << ";\\n";

1398   

1399 

我们知道在CodeGenSchedModelsProcModels容器里,第一个调度模型是等价于空指针的NoSchedModel。首先通过1360行一下这个模型:

static const llvm::MCSchedModel NoSchedModel =

  MCSchedModel::DefaultIssueWidth,

  MCSchedModel::DefaultMicroOpBufferSize,

  MCSchedModel::DefaultLoopMicroOpBufferSize,

  MCSchedModel::DefaultLoadLatency,

  MCSchedModel::DefaultHighLatency,

  MCSchedModel::DefaultMispredictPenalty,

  false, // PostRAScheduler

  false, // CompleteModel

  0, // Processor ID

  nullptr, nullptr, 0, 0, // No instruction-level machine model.

  nullptr, // No Itinerary

  nullptr // No extra processor descriptor

;

而对于定义了回收控制单元、寄存器文件、性能计数器的处理器来说,还需要上面1349行的EmitExtraProcessorInfo()来输出额外的处理器信息:

753     void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,

754                                                   raw_ostream &OS)

755       // Generate a table of register file descriptors (one entry per each user

756       // defined register file), and a table of register costs.

757       unsigned NumCostEntries = EmitRegisterFileTables(ProcModel, OS);

758    

759       // Generate a table of ProcRes counter names.

760       const bool HasPfmIssueCounters = EmitPfmIssueCountersTable(ProcModel, OS);

761    

762       // Now generate a table for the extra processor info.

763       OS << "\\nstatic const llvm::MCExtraProcessorInfo " << ProcModel.ModelName

764          << "ExtraInfo = \\n  ";

765    

766       // Add information related to the retire control unit.

767       EmitRetireControlUnitInfo(ProcModel, OS);

768    

769       // Add information related to the register files (i.e. where to find register

770       // file descriptors and register costs).

771       EmitRegisterFileInfo(ProcModel, ProcModel.RegisterFiles.size(),

772                            NumCostEntries, OS);

773    

774       EmitPfmCounters(ProcModel, HasPfmIssueCounters, OS);

775    

776       OS << ";\\n";

777    

首先由EmitRegisterFileTables()输出寄存器重命名代价表与寄存器文件描述表。

646     unsigned

647     SubtargetEmitter::EmitRegisterFileTables(const CodeGenProcModel &ProcModel,

648                                              raw_ostream &OS)

649       if (llvm::all_of(ProcModel.RegisterFiles, [](const CodeGenRegisterFile &RF)

650             return RF.hasDefaultCosts();

651           ))

652         return 0;

653    

654       // Print the RegisterCost table first.

655       OS << "\\n// RegisterClassID, Register Cost\\n";

656       OS << "static const llvm::MCRegisterCostEntry " << ProcModel.ModelName

657          << "RegisterCosts"

658          << "[] = \\n";

659    

660       for (const CodeGenRegisterFile &RF : ProcModel.RegisterFiles)

661         // Skip register files with a default cost table.

662         if (RF.hasDefaultCosts())

663           continue;

664         // Add entries to the cost table.

665         for (const CodeGenRegisterCost &RC : RF.Costs)

666           OS << "  ";

667           Record *Rec = RC.RCDef;

668           if (Rec->getValue("Namespace"))

669             OS << Rec->getValueAsString("Namespace") << "::";

670           OS << Rec->getName() << "RegClassID, " << RC.Cost << ",\\n";

671        

672      

673       OS << ";\\n";

674    

675       // Now generate a table with register file info.

676       OS << "\\n // Name, #PhysRegs, #CostEntries, IndexToCostTbl\\n";

677       OS << "static const llvm::MCRegisterFileDesc " << ProcModel.ModelName

678          << "RegisterFiles"

679          << "[] = \\n"

680          << "  \\"InvalidRegisterFile\\", 0, 0, 0 ,\\n";

681       unsigned CostTblIndex = 0;

682    

683       for (const CodeGenRegisterFile &RD : ProcModel.RegisterFiles)

684         OS << "  ";

685         OS << '"' << RD.Name << '"' << ", " << RD.NumPhysRegs << ", ";

686         unsigned NumCostEntries = RD.Costs.size();

687         OS << NumCostEntries << ", " << CostTblIndex << ",\\n";

688         CostTblIndex += NumCostEntries;

689      

690       OS << ";\\n";

691    

692       return CostTblIndex;

693    

RegisterFile的定义里(参考RegisterFile的定义一节),如果RegCosts部分是缺省的,表示重命名代价是一个物理寄存器,这时CodeGenSchedModels::collectRegisterFiles()会自动给相应的CodeGenRegisterFile实例的Costs里赋值1

上面这段代码,以Zen处理器为例,输出这两个表:

// RegisterClassID, Register Cost

static const llvm::MCRegisterCostEntry Znver1ModelRegisterCosts[] =

  X86::VR64RegClassID, 1,

  X86::VR128RegClassID, 1,

  X86::VR256RegClassID, 2,

  X86::GR64RegClassID, 1,

  X86::CCRRegClassID, 1,

;

 

 // Name, #PhysRegs, #CostEntries, IndexToCostTbl

static const llvm::MCRegisterFileDesc Znver1ModelRegisterFiles[] =

  "InvalidRegisterFile", 0, 0, 0 ,

  "ZnFpuPRF", 160, 3, 0,

  "ZnIntegerPRF", 168, 2, 3,

;

接下来,对于定义了性能计数器的处理器,比如SandyBridge,输出计数器表(参考处理器的性能计数器定义一节)。

695     static bool EmitPfmIssueCountersTable(const CodeGenProcModel &ProcModel,

696                                           raw_ostream &OS)

697       unsigned NumCounterDefs = 1 + ProcModel.ProcResourceDefs.size();

698       std::vector<const Record *> CounterDefs(NumCounterDefs);

699       bool HasCounters = false;

700       for (const Record *CounterDef : ProcModel.PfmIssueCounterDefs)

701         const Record *&CD = CounterDefs[ProcModel.getProcResourceIdx(

702             CounterDef->getValueAsDef("Resource"))];

703         if (CD)

704           PrintFatalError(CounterDef->getLoc(),

705                           "multiple issue counters for " +

706                               CounterDef->getValueAsDef("Resource")->getName());

707        

708         CD = CounterDef;

709         HasCounters = true;

710      

711       if (!HasCounters)

712         return false;

713      

714       OS << "\\nstatic const char* " << ProcModel.ModelName

715          << "PfmIssueCounters[] = \\n";

716       for (unsigned i = 0; i != NumCounterDefs; ++i)

717         const Record *CounterDef = CounterDefs[i];

718         if (CounterDef)

719           const auto PfmCounters = CounterDef->getValueAsListOfStrings("Counters");

720           if (PfmCounters.empty())

721             PrintFatalError(CounterDef->getLoc(), "empty counter list");

722           OS << "  \\"" << PfmCounters[0];

723           for (unsigned p = 1, e = PfmCounters.size(); p != e; ++p)

724             OS << ",\\" \\"" << PfmCounters[p];

725           OS << "\\",  // #" << i << " = ";

726           OS << CounterDef->getValueAsDef("Resource")->getName() << "\\n";

727         else

728           OS << "  nullptr, // #" << i << "\\n";

729        

730      

731       OS << ";\\n";

732       return true;

733    

SandyBridge的一个定义为例,这个定义是(X86PfmCounters.td):

14       let SchedModel = SandyBridgeModel in

15       def SBCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;

16       def SBPort0Counter : PfmIssueCounter<SBPort0, ["uops_dispatched_port:port_0"]>;

17       def SBPort1Counter : PfmIssueCounter<SBPort1, ["uops_dispatched_port:port_1"]>;

18       def SBPort23Counter : PfmIssueCounter<SBPort23,

19                                             ["uops_dispatched_port:port_2",

20                                              "uops_dispatched_port:port_3"]>;

21       def SBPort4Counter : PfmIssueCounter<SBPort4, ["uops_dispatched_port:port_4"]>;

22       def SBPort5Counter : PfmIssueCounter<SBPort5, ["uops_dispatched_port:port_5"]>;

23      

上述代码将输出这样的数组,这个数组的大小是资源单元定义的个数加1,因此数组是以处理器资源的序号为索引的:

static const char* SandyBridgeModelPfmIssueCounters[] =

  nullptr, // #0

  nullptr, // #1

  nullptr, // #2

  "uops_dispatched_port:port_0",  // #3 = SBPort0

  "uops_dispatched_port:port_1",  // #4 = SBPort1

  "uops_dispatched_port:port_4",  // #5 = SBPort4

  "uops_dispatched_port:port_5",  // #6 = SBPort5

  nullptr, // #7

  nullptr, // #8

  nullptr, // #9

  "uops_dispatched_port:port_2," "uops_dispatched_port:port_3",  // #10 = SBPort23

  nullptr, // #11

  nullptr, // #12

;

接着通过下面的EmitRetireControlUnitInfo()EmitRegisterFileInfo()EmitPfmCounters()输出所谓的ExtraInfo数组。这个数组的元素类型是结构体MCExtraProcessorInfo

170     struct MCExtraProcessorInfo

171       // Actual size of the reorder buffer in hardware.

172       unsigned ReorderBufferSize;

173       // Number of instructions retired per cycle.

174       unsigned MaxRetirePerCycle;

175       const MCRegisterFileDesc *RegisterFiles;

176       unsigned NumRegisterFiles;

177       const MCRegisterCostEntry *RegisterCostTable;

178       unsigned NumRegisterCostEntries;

179    

180       struct PfmCountersInfo

181         // An optional name of a performance counter that can be used to measure

182         // cycles.

183         const char *CycleCounter;

184    

185         // For each MCProcResourceDesc defined by the processor, an optional list of

186         // names of performance counters that can be used to measure the resource

187         // utilization.

188         const char **IssueCounters;

189       ;

190       PfmCountersInfo PfmCounters;

191     ;

因此,将输出这样的一个结构体定义:

static const llvm::MCExtraProcessorInfo SandyBridgeModelExtraInfo =

  0, // ReorderBufferSize

  0, // MaxRetirePerCycle

  nullptr,

  0, // Number of register files.

  nullptr,

  0, // Number of register cost entries.

 

    "unhalted_core_cycles",  // Cycle counter.

    SandyBridgeModelPfmIssueCounters

 

;

最后对有调度类型被重新映射的处理器模型,由下面的方法输出特定的数据结构。

779     void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,

780                                                   raw_ostream &OS)

781       EmitProcessorResourceSubUnits(ProcModel, OS);

782    

783       OS << "\\n// Name, NumUnits, SuperIdx, IsBuffered, SubUnitsIdxBegin\\n";

784       OS << "static const llvm::MCProcResourceDesc " << ProcModel.ModelName

785          << "ProcResources"

786          << "[] = \\n"

787          << "  \\"InvalidUnit\\", 0, 0, 0, 0,\\n";

788    

789       unsigned SubUnitsOffset = 1;

790       for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i)

791         Record *PRDef = ProcModel.ProcResourceDefs[i];

792    

793         Record *SuperDef = nullptr;

794         unsigned SuperIdx = 0;

795         unsigned NumUnits = 0;

796         const unsigned SubUnitsBeginOffset = SubUnitsOffset;

797         int BufferSize = PRDef->getValueAsInt("BufferSize");

798         if (PRDef->isSubClassOf("ProcResGroup"))

799           RecVec ResUnits = PRDef->getValueAsListOfDefs("Resources");

800           for (Record *RU : ResUnits)

801             NumUnits += RU->getValueAsInt("NumUnits");

802             SubUnitsOffset += RU->getValueAsInt("NumUnits");

803          

804        

805         else

806           // Find the SuperIdx

807           if (PRDef->getValueInit("Super")->isComplete())

808             SuperDef =

809                 SchedModels.findProcResUnits(PRDef->getValueAsDef("Super"),

810                                              ProcModel, PRDef->getLoc());

811             SuperIdx = ProcModel.getProcResourceIdx(SuperDef);

812          

813           NumUnits = PRDef->getValueAsInt("NumUnits");

814        

815         // Emit the ProcResourceDesc

816         OS << "  \\"" << PRDef->getName() << "\\", ";

817         if (PRDef->getName().size() < 15)

818           OS.indent(15 - PRDef->getName().size());

819         OS << NumUnits << ", " << SuperIdx << ", " << BufferSize << ", ";

820         if (SubUnitsBeginOffset != SubUnitsOffset)

821           OS << ProcModel.ModelName << "ProcResourceSubUnits + "

822              << SubUnitsBeginOffset;

823         else

824           OS << "nullptr";

825        

826         OS << ", // #" << i+1;

827         if (SuperDef)

828           OS << ", Super=" << SuperDef->getName();

829         OS << "\\n";

830      

831       OS << ";\\n";

832    

首先是输出资源子单元的描述。前面看到TD通过ProcResGroup或者ProcResource来组织与描述资源,资源描述的最小单位是资源单元(ProcResourceUnits),因此这里尝试输出一个以资源单元描述的资源组。

593     void SubtargetEmitter::EmitProcessorResourceSubUnits(

594         const CodeGenProcModel &ProcModel, raw_ostream &OS)

595       OS << "\\nstatic const unsigned " << ProcModel.ModelName

596          << "ProcResourceSubUnits[] = \\n"

597          << "  0,  // Invalid\\n";

598    

599       for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i)

600         Record *PRDef = ProcModel.ProcResourceDefs[i];

601         if (!PRDef->isSubClassOf("ProcResGroup"))

602           continue;

603         RecVec ResUnits = PRDef->getValueAsListOfDefs("Resources");

604         for (Record *RUDef : ResUnits)

605           Record *const RU =

606               SchedModels.findProcResUnits(RUDef, ProcModel, PRDef->getLoc());

607           for (unsigned J = 0; J < RU->getValueAsInt("NumUnits"); ++J)

608             OS << "  " << ProcModel.getProcResourceIdx(RU) << ", ";

609          

610        

611         OS << "  // " << PRDef->getName() << "\\n";

612      

613       OS << ";\\n";

614    

SandyBridge为例,将输出这样的一个数组:

static const unsigned SandyBridgeModelProcResourceSubUnits[] =

  0,  // Invalid

  3,   4,   // SBPort01

  3,   6,   // SBPort05

  4,   6,   // SBPort15

  3,   4,   6,   // SBPort015

  3,   4,   10,   10,   5,   6,   // SBPortAny

;

SubtargetEmitter::EmitProcessorResources()的主体输出资源单元的描述结构体。这个结构体的类型是MCProcResourceDesc

32       struct MCProcResourceDesc

33         const char *Name;

34         unsigned NumUnits; // Number of resource of this kind

35         unsigned SuperIdx; // Index of the resources kind that contains this kind.

36      

37         // Number of resources that may be buffered.

38         //

39         // Buffered resources (BufferSize != 0) may be consumed at some indeterminate

40         // cycle after dispatch. This should be used for out-of-order cpus when

41         // instructions that use this resource can be buffered in a reservaton

42         // station.

43         //

44         // Unbuffered resources (BufferSize == 0) always consume their resource some

45         // fixed number of cycles after dispatch. If a resource is unbuffered, then

46         // the scheduler will avoid scheduling instructions with conflicting resources

47         // in the same cycle. This is for in-order cpus, or the in-order portion of

48         // an out-of-order cpus.

49         int BufferSize;

50      

51         // If the resource has sub-units, a pointer to the first element of an array

52         // of `NumUnits` elements containing the ProcResourceIdx of the sub units.

53         // nullptr if the resource does not have sub-units.

54         const unsigned *SubUnitsIdxBegin;

55      

56         bool operator==(const MCProcResourceDesc &Other) const

57           return NumUnits == Other.NumUnits && SuperIdx == Other.SuperIdx

58             && BufferSize == Other.BufferSize;

59        

60       ;

因此,输出的数据是:

// Name, NumUnits, SuperIdx, IsBuffered, SubUnitsIdxBegin

static const llvm::MCProcResourceDesc SandyBridgeModelProcResources[] =

  "InvalidUnit", 0, 0, 0, 0,

  "SBDivider",       1, 0, -1, nullptr, // #1

  "SBFPDivider",     1, 0, -1, nullptr, // #2

  "SBPort0",         1, 0, -1, nullptr, // #3

  "SBPort1",         1, 0, -1, nullptr, // #4

  "SBPort4",         1, 0, -1, nullptr, // #5

  "SBPort5",         1, 0, -1, nullptr, // #6

  "SBPort01",        2, 0, -1, SandyBridgeModelProcResourceSubUnits + 1, // #7

  "SBPort05",        2, 0, -1, SandyBridgeModelProcResourceSubUnits + 3, // #8

  "SBPort15",        2, 0, -1, SandyBridgeModelProcResourceSubUnits + 5, // #9

  "SBPort23",        2, 0, -1, nullptr, // #10

  "SBPort015",       3, 0, -1, SandyBridgeModelProcResourceSubUnits + 7, // #11

  "SBPortAny",       6, 0, 54, SandyBridgeModelProcResourceSubUnits + 10, // #12

;

SandyBridge没有使用超级资源的描述方法,因此SuperIdx域都是0。而IsBuffered域实际上是缓冲的大小,BufferSize = -1表示发布端口由一体化保留站填充。

回到SubtargetEmitter::run(),下面的代码输出一个重要的方法:InitX86MCSubtargetInfo()。

SubtargetEmitter::run(续)

1437    // MCInstrInfo initialization routine.                                                                                     <- v7.0删除

1438    OS << "static inline void Init" << Target

1439       << "MCSubtargetInfo(MCSubtargetInfo *II, "

1440       << "const Triple &TT, StringRef CPU, StringRef FS) \\n";

1441    OS << "  II->InitMCSubtargetInfo(TT, CPU, FS, ";

 

  // MCInstrInfo initialization routine.                                                                                     <- v7.0增加

  emitGenMCSubtargetInfo(OS);

 

  OS << "\\nstatic inline MCSubtargetInfo *create" << Target

     << "MCSubtargetInfoImpl("

     << "const Triple &TT, StringRef CPU, StringRef FS) \\n";

  OS << "  return new " << Target << "GenMCSubtargetInfo(TT, CPU, FS, ";

1442    if (NumFeatures)

1443      OS << Target << "FeatureKV, ";

1444    else

1445      OS << "None, ";

1446    if (NumProcs)

1447      OS << Target << "SubTypeKV, ";

1448    else

1449      OS << "None, ";

1450    OS << '\\n'; OS.indent(22);

1451  以上是关于LLVM学习笔记(43-2)的主要内容,如果未能解决你的问题,请参考以下文章

LLVM学习笔记(52)

LLVM学习笔记(54)

LLVM学习笔记(54)

LLVM学习笔记(44-2)

LLVM学习笔记(53)

LLVM学习笔记(53)