SlideShare uma empresa Scribd logo
1 de 41
Baixar para ler offline
è
–
è
–
$ docker build –f docker/Dockerfile.tmpl -t chainer-compiler:cuda-10.0 .
$ nvidia-docker run -i -t --cap-add=SYS_PTRACE --security-opt="seccomp=unconfined” ¥
chainer_compiler:cuda-10.0 /bin/zsh
è
=================================== FAILURES ===================================
____________________________ TestPReLU.test_output _____________________________
self = <tests.functions_tests.test_activations.TestPReLU testMethod=test_output>
def test_output(self):
> self.expect(self.model, self.x)
third_party/onnx-chainer/tests/functions_tests/test_activations.py:61:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
third_party/onnx-chainer/tests/helper.py:106: in expect
self.check_out_values(test_path, input_names=graph_input_names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
test_path = 'out/opset7/test_prelu', input_names = ['Input_0']
def check_model_expect(test_path, input_names=None):
if not ONNXRUNTIME_AVAILABLE:
> raise ImportError('ONNX Runtime is not found on checking module.')
E ImportError: ONNX Runtime is not found on checking module.
third_party/onnx-chainer/onnx_chainer/testing/test_onnxruntime.py:39: ImportError
è
–
–
–
è
è
–
–
$ ./setup.sh
$ ./build/tools/run_onnx --device cuda --test data/resnet50 --trace
$ ./build/tools/run_onnx --device cuda --test data/mnist --trace
è
è
–
–
Check `chainerx::Shape(type.shape().begin(), type.shape().end())' == `a.shape()' failed! in CheckType
at ../runtime/xcvm.cc:57: ((1, 8, 28, 28) vs (1, 8, 24, 24)) Shape check failed in output #0: Conv(Input3,
Parameter5) -> (Convolution28_Output_0)
zsh: abort ./build/tools/run_onnx --device cuda --test data/mnist --trace
name: "Convolution28”
op_type: "Conv”
Attribute {
name: "auto_pad”
s: "SAME_UPPER”
type: STRING
}
auto pads = [&node]() {
std::vector<int64_t> pads = node.pads();
+
+ // Complement from auto_pad
+ if (pads.size() == 0) {
+ if (node.auto_pad() == "SAME_UPPER") {
+ const Value* weight = node.input(1);
+ const int pad_ndim = (weight->type().ndim() - 2)*2;
+ CHECK_GT(pad_ndim, 0) << weight->type().DebugString();
+ pads.resize(pad_ndim);
+ for (int i = 0; i < pad_ndim/2; ++i) {
+ pads[i] = pads[i+pad_ndim/2] = weight->type().dims()[i+2] / 2;
+ }
+ }
+ }
compiler/emitter.cc:167
Verifying the result...
OK: Plus214_Output_0
Elapsed: 23.395 msec
OK!
void RunMain(const std::vector<std::string>& argv) {
LOG() << "Loading model..." << std::endl;
onnx::ModelProto xmodel(
LoadLargeProto<onnx::ModelProto>(onnx_path)); // ONNXモデルのロード
Model model(xmodel); // onnx::ModelProtoからModelを構築
/* ... */
ModelRunner model_runner(args, initial_free_bytes, &model); // Modelに対してパスを実行しXCVMに変換
/* ... */
for (const std::unique_ptr<TestCase>& test_case : test_cases) {
LOG() << "Running for " << test_case->name << std::endl;
InOuts inputs(model_runner.params());
InOuts outputs(model_runner.Run(inputs)); // XCVMを用いてモデルを実行
}
/* ... */
}
tools/run_onnx.cc
è
class ModelRunner {
public:
ModelRunner(const cmdline::parser& args, int64_t initial_free_bytes, Model* model)
: model_(model), args_(args), initial_free_bytes_(initial_free_bytes) {
if (args.exist("backprop_two_phase")) {
/* ... */
} else {
LOG() << "Constructing model..." << std::endl;
RunDefaultPasses(model->mutable_graph(), args_.exist("backprop")); // 各種パスを適用しLowering
CompileModel(model, &xcvm_); // XCVMに変換
}
/* ... */
}
tools/run_onnx.cc
è
void CompileModel(Model* model, std::unique_ptr<XCVM>* xcvm, const char* name = nullptr, bool gen_backprop = false) {
std::string out_onnx = args_.get<std::string>("out_onnx");
if (!out_onnx.empty()) {
onnx::ModelProto xmodel;
model->ToONNX(&xmodel);
std::ofstream ofs(out_onnx);
CHECK(xmodel.SerializeToOstream(&ofs)); // ONNXのシリアライズ出力
}
LOG() << "Generate code..." << std::endl;
XCProgramProto xcvm_prog;
xcvm::Emit(*model, &xcvm_prog, trace_level() > 0); // ModelをXCVMProgramProtoに変換
const std::string out_xcvm = args_.get<std::string>("out_xcvm");
if (!out_xcvm.empty()) {
std::ofstream ofs(out_xcvm);
CHECK(ofs) << "Failed to open output XCVM: " << out_xcvm;
CHECK(xcvm_prog.SerializeToOstream(&ofs)); // XCVMのシリアライズ出力
}
xcvm->reset(new XCVM(xcvm_prog)); // XCVMProgramProtoをXCVMでラップ
}
tools/run_onnx.cc
è
–
–
è
è
–
class Model {
public:
explicit Model(const onnx::ModelProto& xmodel);
private:
int64_t ir_version_;
std::vector<onnx::OperatorSetIdProto> opset_import_;
std::string producer_name_;
std::string producer_version_;
std::string domain_;
int64_t model_version_;
std::string doc_string_;
std::map<std::string, std::string> metadata_props_;
std::unique_ptr<Graph> graph_;
};
compiler/model.h
è
–
class Graph {
public:
explicit Graph(const onnx::GraphProto& xgraph);
private:
std::vector<Value*> output_values_;
std::vector<Value*> input_values_;
std::vector<Value*> temp_values_;
std::vector<std::unique_ptr<Value>> all_values_;
std::vector<Node*> nodes_;
std::vector<std::unique_ptr<Node>> nodes_buf_;
std::string name_;
std::string doc_string_;
std::map<std::string, int> ids_;
};
compiler/graph.h
è
–
class Node : public NodeBase {
public:
Node(const onnx::NodeProto& xnode, const std::vector<Value*>& inputs, const
std::vector<Value*>& outputs);
Node(const std::string& name, OpType op_type, const std::vector<Value*>& inputs, const
std::vector<Value*>& outputs);
private:
std::vector<Value*> inputs_;
std::vector<Value*> outputs_;
std::string name_;
std::string domain_;
std::string doc_string_;
bool detached_ = false;
};
compiler/node.h
è
–
class NodeBase {
public:
enum OpType {
kIdentity,
kNeg,
...
};
protected:
std::vector<float> activation_alpha_;
bool was_activation_alpha_set_ = false;
std::vector<float> activation_beta_;
...
};
build/compiler/gen_node_base.h
class NodeDef(object):
def __init__(self, op_type, num_inputs, num_outputs, **kwargs):
self.op_type = op_type
self.num_inputs = num_inputs
self.num_outputs = num_outputs
self.attributes = kwargs
self.attributes.update(CHAINER_COMPILERX_GLOBAL_ATTRS)
self.attr_defs = {} # To be filled after parsed.
NODES.append(self)
NodeDef('Identity', 1, 1)
NodeDef('Neg', 1, 1)
…
def gen_gen_node_base_h():
public_lines = []
private_lines = []
public_lines.append('enum OpType {‘)
for node in NODES:
public_lines.append('k%s,' % (node.op_type))
…
compiler/gen_node.py
è
–
class Value {
public:
enum Kind { kTemp = 0, kInput = 1, kOutput = 2, kNull = 4 };
Value(const onnx::ValueInfoProto& xvalue, Kind kind);
private:
Kind kind_{Kind::kTemp};
std::string name_;
std::unique_ptr<Type> type_;
std::string doc_string_;
std::unique_ptr<Tensor> initializer_;
std::vector<Node*> users_;
Node* producer_ = nullptr;
Value* grad_ = nullptr;
int counter_ = 0;
};
compiler/value.h
è
–
class Tensor {
public:
typedef std::unique_ptr<void, decltype(&std::free)> UniqueData;
explicit Tensor(const onnx::TensorProto& xtensor);
private:
std::vector<int64_t> dims_;
Dtype dtype_;
UniqueData data_;
std::string name_;
std::string doc_string_;
};
compiler/tensor.h
è
è
–
–
•
void InferDtypeAndShape(Node* node) {
InferDtype(node);
}
void InferAllDtypeAndShape(Graph* graph) {
for (Node* node : graph->GetTopologicallySortedNodes()) {
InferDtypeAndShape(node);
}
}
compiler/type_inference.cc
è
Dtype CoerceDtype(Dtype dtype0, Dtype dtype1) {
if (dtype0 == dtype1) return dtype0;
if (dtype0 == Dtype::kUnknown || dtype1 == Dtype::kUnknown) return Dtype::kUnknown;
if (dtype0.IsFloat() && !dtype1.IsFloat()) return dtype0;
if (!dtype0.IsFloat() && dtype1.IsFloat()) return dtype1;
if (dtype0.SizeOf() > dtype1.SizeOf()) return dtype0;
if (dtype0.SizeOf() < dtype1.SizeOf()) return dtype1;
if (dtype1 == Dtype::kBool) return dtype0;
if (dtype0 == Dtype::kBool) return dtype1;
if (dtype0 == Dtype::kUInt8 || dtype1 == Dtype::kUInt8) return Dtype::kInt16;
CHECK(false) << "Unknown type coerce: " << dtype0.ToString() << " vs " << dtype1.ToString();
}
void InferDtype(Node* node) {
…
case Node::kConv: case Node::kConvTranspose: case Node::kChainerConvGradWeight: {
Dtype dtype = CoerceDtype(in0, in1);
if (node->inputs().size() >= 3) dtype = CoerceDtype(dtype, node->input(2)->type().dtype());
oset(0, dtype);
break;
}
…
}
compiler/dtype_inference.cc
è
void Simplify(const CompilerConfig& ccfg, Graph* graph, bool gen_backprop) {
std::map<Node::OpType, SimplifierFn> simplifiers;
/* 各Nodeのsimplifierの登録 */
CHECK(simplifiers.emplace(Node::kConv, ReplaceConv).second);
while (replaced) {
replaced = false;
for (Node* node : graph->GetLiveNodes()) {
auto found = simplifiers.find(node->op_type());
if (found == simplifiers.end()) continue;
if (found->second(graph, node)) {
graph->DetachNode(node);
replaced = true;
}
}
}
}
compiler/simplifier.cc
è
bool ReplaceConv(Graph* graph, Node* node) {
CHECK_LT(0, node->group());
if (node->group() == 1) return false;
GraphBuilder gb(graph, "SimplifyConvGroup", node->output(0));
// Split the input.
std::vector<Value*> inputs;
for (int i = 0; i < node->group(); ++i) {
inputs.push_back(gb.Temp());
}
gb.MOp(Node::kSplit, {node->input(0)}, inputs)->set_axis(1);
/* weight, bias, outputのsplit処理 */
gb.Op(Node::kConcat, outputs, node->output(0))->producer()->set_axis(1);
return true;
}
compiler/simplifier.cc
è
–
void DoConstantPropagation(Graph* graph, Node* node) {
/* Nodeの入力を集める処理 */
for (size_t i = 0; i < next_values.size(); ++i) {
auto& next_value = next_values[i];
GraphBuilder gb(graph, "Const", node->output(i));
if (next_value->is_tensor()) {
gb.Op(Node::kConstant, {}, node->output(i))->producer()->set_tensor_value(next_value->ReleaseTensor());
}
}
/* 置き換え前NodeのDetach */
}
void PropagateConstants(Graph* graph) {
bool replaced = true;
while (replaced) {
replaced = false;
for (Node* node : graph->GetLiveNodes()) {
if (!HasConstantInputsOnly(*node)) continue;
if (MaybePropagateConstant(graph, node)) { replaced = true; }
}
}
}
compiler/constant_propagation.cc
è
–
void FuseElementwiseOperations(Graph* graph) {
const std::set<Node::OpType> fusable_ops = {
Node::kIdentity,
Node::kAdd,
/* ... */
};
auto is_fusable = [&fusable_ops](const Node& node) {
/* ... */
};
FuseAllConnectedNodes("nvrtc", graph, 2, is_fusable);
}
void FuseOperations(Graph* graph, bool use_tvm, bool use_ngraph) {
/* subgraphのfusion */
if (use_ngraph) { FuseNGraphOperations(graph); }
if (use_tvm) { FuseTVMOperations(graph); }
FuseElementwiseOperations(graph);
}
compiler/fusion.cc
è
–
•
–
–
•
è
–
è
–
è
–
è
–
è
–
è
–
è
–
è
–
è
è
è
–
è
–
InOuts Run(const InOuts& inputs) {
if (trace_level()) std::cerr << "Running XCVM..." << std::endl;
InOuts outputs = xcvm_->Run(inputs, xcvm_opts_);
/* ... */
return outputs;
}
compiler/run_onnx.cc
void XCVM::Run(XCVMState* state) {
/* Stateの初期化 */
while (true) {
int pc = state->pc();
if (pc >= program_.size()) break;
XCVMOp* op = program_[pc].get();
try {
op->Run(state);
} catch (...) {
std::cerr << "Exception in " << op->debug_info() << std::endl;
throw;
}
compiler/run_onnx.cc
è
–
class XCVMOp {
public:
explicit XCVMOp(const XCInstructionProto& inst);
virtual void Run(XCVMState* state) = 0;
protected:
XCInstructionProto inst_;
const int64_t id_;
const XCInstructionProto::Op op_;
const std::string name_;
};
compiler/xcvm/xcvm_op.h class ConvOp : public XCVMOp {
public:
explicit ConvOp(const XCInstructionProto& inst);
chainerx::Array RunImpl(XCVMState* st, const chainerx::Array& x,
const chainerx::Array& w, const nonstd::optional<chainerx::Array>& b);
virtual void Run(XCVMState* st);
private:
int x;
int w;
int b;
chainerx::StackVector<int64_t, chainerx::kMaxNdim> strides;
chainerx::StackVector<int64_t, chainerx::kMaxNdim> pads;
int y;
};
build/runtime/gen_xcvm_ops.h
è
–
è
–
void ConvOp::Run(XCVMState* st) {
/* ... */
st->SetArray(y, RunImpl(st, st->GetArray(x), st->GetArray(w), st->GetOptionalArray(b)));
/* ... */
}
build/runtime/gen_xcvm_ops.cc
chainerx::Array ConvOp::RunImpl(
XCVMState* st, const chainerx::Array& x, const chainerx::Array& w, const nonstd::optional<chainerx::Array>& b)
{
return chainerx::Conv(x, w, b, ComplementStride(strides, x), ComplementPad(pads, x));
}
build/runtime/gen_xcvm_ops.cc

Mais conteúdo relacionado

Mais procurados

Standford 2015 week9
Standford 2015 week9Standford 2015 week9
Standford 2015 week9彼得潘 Pan
 
Евгений Крутько, Многопоточные вычисления, современный подход.
Евгений Крутько, Многопоточные вычисления, современный подход.Евгений Крутько, Многопоточные вычисления, современный подход.
Евгений Крутько, Многопоточные вычисления, современный подход.Platonov Sergey
 
Коварный code type ITGM #9
Коварный code type ITGM #9Коварный code type ITGM #9
Коварный code type ITGM #9Andrey Zakharevich
 
Standford 2015 week3: Objective-C Compatibility, Property List, Views
Standford 2015 week3: Objective-C Compatibility, Property List, ViewsStandford 2015 week3: Objective-C Compatibility, Property List, Views
Standford 2015 week3: Objective-C Compatibility, Property List, Views彼得潘 Pan
 
C++ Lambda and concurrency
C++ Lambda and concurrencyC++ Lambda and concurrency
C++ Lambda and concurrency명신 김
 
Imugi: Compiler made with Python
Imugi: Compiler made with PythonImugi: Compiler made with Python
Imugi: Compiler made with PythonHan Lee
 
Javascript & Ajax Basics
Javascript & Ajax BasicsJavascript & Ajax Basics
Javascript & Ajax BasicsRichard Paul
 
Как работает LLVM бэкенд в C#. Егор Богатов ➠ CoreHard Autumn 2019
Как работает LLVM бэкенд в C#. Егор Богатов ➠ CoreHard Autumn 2019Как работает LLVM бэкенд в C#. Егор Богатов ➠ CoreHard Autumn 2019
Как работает LLVM бэкенд в C#. Егор Богатов ➠ CoreHard Autumn 2019corehard_by
 
Basic C++ 11/14 for Python Programmers
Basic C++ 11/14 for Python ProgrammersBasic C++ 11/14 for Python Programmers
Basic C++ 11/14 for Python ProgrammersAppier
 
Chapter 7 functions (c)
Chapter 7 functions (c)Chapter 7 functions (c)
Chapter 7 functions (c)hhliu
 
"Немного о функциональном программирование в JavaScript" Алексей Коваленко
"Немного о функциональном программирование в JavaScript" Алексей Коваленко"Немного о функциональном программирование в JavaScript" Алексей Коваленко
"Немного о функциональном программирование в JavaScript" Алексей КоваленкоFwdays
 

Mais procurados (20)

C++ TUTORIAL 7
C++ TUTORIAL 7C++ TUTORIAL 7
C++ TUTORIAL 7
 
EcmaScript 6
EcmaScript 6 EcmaScript 6
EcmaScript 6
 
Standford 2015 week9
Standford 2015 week9Standford 2015 week9
Standford 2015 week9
 
Cquestions
Cquestions Cquestions
Cquestions
 
Vcs23
Vcs23Vcs23
Vcs23
 
Opp compile
Opp compileOpp compile
Opp compile
 
Евгений Крутько, Многопоточные вычисления, современный подход.
Евгений Крутько, Многопоточные вычисления, современный подход.Евгений Крутько, Многопоточные вычисления, современный подход.
Евгений Крутько, Многопоточные вычисления, современный подход.
 
Коварный code type ITGM #9
Коварный code type ITGM #9Коварный code type ITGM #9
Коварный code type ITGM #9
 
Standford 2015 week3: Objective-C Compatibility, Property List, Views
Standford 2015 week3: Objective-C Compatibility, Property List, ViewsStandford 2015 week3: Objective-C Compatibility, Property List, Views
Standford 2015 week3: Objective-C Compatibility, Property List, Views
 
"let ECMAScript = 6"
"let ECMAScript = 6" "let ECMAScript = 6"
"let ECMAScript = 6"
 
C++ Lambda and concurrency
C++ Lambda and concurrencyC++ Lambda and concurrency
C++ Lambda and concurrency
 
Imugi: Compiler made with Python
Imugi: Compiler made with PythonImugi: Compiler made with Python
Imugi: Compiler made with Python
 
Javascript & Ajax Basics
Javascript & Ajax BasicsJavascript & Ajax Basics
Javascript & Ajax Basics
 
Как работает LLVM бэкенд в C#. Егор Богатов ➠ CoreHard Autumn 2019
Как работает LLVM бэкенд в C#. Егор Богатов ➠ CoreHard Autumn 2019Как работает LLVM бэкенд в C#. Егор Богатов ➠ CoreHard Autumn 2019
Как работает LLVM бэкенд в C#. Егор Богатов ➠ CoreHard Autumn 2019
 
Basic C++ 11/14 for Python Programmers
Basic C++ 11/14 for Python ProgrammersBasic C++ 11/14 for Python Programmers
Basic C++ 11/14 for Python Programmers
 
Chapter 7 functions (c)
Chapter 7 functions (c)Chapter 7 functions (c)
Chapter 7 functions (c)
 
Javascript
JavascriptJavascript
Javascript
 
"Немного о функциональном программирование в JavaScript" Алексей Коваленко
"Немного о функциональном программирование в JavaScript" Алексей Коваленко"Немного о функциональном программирование в JavaScript" Алексей Коваленко
"Немного о функциональном программирование в JavaScript" Алексей Коваленко
 
Groovy
GroovyGroovy
Groovy
 
Container adapters
Container adaptersContainer adapters
Container adapters
 

Semelhante a Chainer-Compiler 動かしてみた

JVM code reading -- C2
JVM code reading -- C2JVM code reading -- C2
JVM code reading -- C2ytoshima
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryDatabricks
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryDatabricks
 
Analysis of Microsoft Code Contracts
Analysis of Microsoft Code ContractsAnalysis of Microsoft Code Contracts
Analysis of Microsoft Code ContractsPVS-Studio
 
Write Python for Speed
Write Python for SpeedWrite Python for Speed
Write Python for SpeedYung-Yu Chen
 
C++ lectures all chapters in one slide.pptx
C++ lectures all chapters in one slide.pptxC++ lectures all chapters in one slide.pptx
C++ lectures all chapters in one slide.pptxssuser3cbb4c
 
Modify this code to use multiple threads with the same data1.Modif.pdf
Modify this code to use multiple threads with the same data1.Modif.pdfModify this code to use multiple threads with the same data1.Modif.pdf
Modify this code to use multiple threads with the same data1.Modif.pdfmallik3000
 
Basic c++ 11/14 for python programmers
Basic c++ 11/14 for python programmersBasic c++ 11/14 for python programmers
Basic c++ 11/14 for python programmersJen Yee Hong
 
Dynamic C++ ACCU 2013
Dynamic C++ ACCU 2013Dynamic C++ ACCU 2013
Dynamic C++ ACCU 2013aleks-f
 
How to add an optimization for C# to RyuJIT
How to add an optimization for C# to RyuJITHow to add an optimization for C# to RyuJIT
How to add an optimization for C# to RyuJITEgor Bogatov
 
Whats new in_csharp4
Whats new in_csharp4Whats new in_csharp4
Whats new in_csharp4Abed Bukhari
 
How to Adopt Modern C++17 into Your C++ Code
How to Adopt Modern C++17 into Your C++ CodeHow to Adopt Modern C++17 into Your C++ Code
How to Adopt Modern C++17 into Your C++ CodeMicrosoft Tech Community
 
How to Adopt Modern C++17 into Your C++ Code
How to Adopt Modern C++17 into Your C++ CodeHow to Adopt Modern C++17 into Your C++ Code
How to Adopt Modern C++17 into Your C++ CodeMicrosoft Tech Community
 
Cs pritical file
Cs pritical fileCs pritical file
Cs pritical fileMitul Patel
 
54602399 c-examples-51-to-108-programe-ee01083101
54602399 c-examples-51-to-108-programe-ee0108310154602399 c-examples-51-to-108-programe-ee01083101
54602399 c-examples-51-to-108-programe-ee01083101premrings
 
3 mathematical challenge_code
3 mathematical challenge_code3 mathematical challenge_code
3 mathematical challenge_codeRussell Childs
 

Semelhante a Chainer-Compiler 動かしてみた (20)

JVM code reading -- C2
JVM code reading -- C2JVM code reading -- C2
JVM code reading -- C2
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love Story
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love Story
 
C++ manual Report Full
C++ manual Report FullC++ manual Report Full
C++ manual Report Full
 
Day 1
Day 1Day 1
Day 1
 
Analysis of Microsoft Code Contracts
Analysis of Microsoft Code ContractsAnalysis of Microsoft Code Contracts
Analysis of Microsoft Code Contracts
 
Dive Into PyTorch
Dive Into PyTorchDive Into PyTorch
Dive Into PyTorch
 
Write Python for Speed
Write Python for SpeedWrite Python for Speed
Write Python for Speed
 
C++ lectures all chapters in one slide.pptx
C++ lectures all chapters in one slide.pptxC++ lectures all chapters in one slide.pptx
C++ lectures all chapters in one slide.pptx
 
12
1212
12
 
Modify this code to use multiple threads with the same data1.Modif.pdf
Modify this code to use multiple threads with the same data1.Modif.pdfModify this code to use multiple threads with the same data1.Modif.pdf
Modify this code to use multiple threads with the same data1.Modif.pdf
 
Basic c++ 11/14 for python programmers
Basic c++ 11/14 for python programmersBasic c++ 11/14 for python programmers
Basic c++ 11/14 for python programmers
 
Dynamic C++ ACCU 2013
Dynamic C++ ACCU 2013Dynamic C++ ACCU 2013
Dynamic C++ ACCU 2013
 
How to add an optimization for C# to RyuJIT
How to add an optimization for C# to RyuJITHow to add an optimization for C# to RyuJIT
How to add an optimization for C# to RyuJIT
 
Whats new in_csharp4
Whats new in_csharp4Whats new in_csharp4
Whats new in_csharp4
 
How to Adopt Modern C++17 into Your C++ Code
How to Adopt Modern C++17 into Your C++ CodeHow to Adopt Modern C++17 into Your C++ Code
How to Adopt Modern C++17 into Your C++ Code
 
How to Adopt Modern C++17 into Your C++ Code
How to Adopt Modern C++17 into Your C++ CodeHow to Adopt Modern C++17 into Your C++ Code
How to Adopt Modern C++17 into Your C++ Code
 
Cs pritical file
Cs pritical fileCs pritical file
Cs pritical file
 
54602399 c-examples-51-to-108-programe-ee01083101
54602399 c-examples-51-to-108-programe-ee0108310154602399 c-examples-51-to-108-programe-ee01083101
54602399 c-examples-51-to-108-programe-ee01083101
 
3 mathematical challenge_code
3 mathematical challenge_code3 mathematical challenge_code
3 mathematical challenge_code
 

Último

VIP Call Girls Service Hitech City Hyderabad Call +91-8250192130
VIP Call Girls Service Hitech City Hyderabad Call +91-8250192130VIP Call Girls Service Hitech City Hyderabad Call +91-8250192130
VIP Call Girls Service Hitech City Hyderabad Call +91-8250192130Suhani Kapoor
 
SPICE PARK APR2024 ( 6,793 SPICE Models )
SPICE PARK APR2024 ( 6,793 SPICE Models )SPICE PARK APR2024 ( 6,793 SPICE Models )
SPICE PARK APR2024 ( 6,793 SPICE Models )Tsuyoshi Horigome
 
(PRIYA) Rajgurunagar Call Girls Just Call 7001035870 [ Cash on Delivery ] Pun...
(PRIYA) Rajgurunagar Call Girls Just Call 7001035870 [ Cash on Delivery ] Pun...(PRIYA) Rajgurunagar Call Girls Just Call 7001035870 [ Cash on Delivery ] Pun...
(PRIYA) Rajgurunagar Call Girls Just Call 7001035870 [ Cash on Delivery ] Pun...ranjana rawat
 
The Most Attractive Pune Call Girls Budhwar Peth 8250192130 Will You Miss Thi...
The Most Attractive Pune Call Girls Budhwar Peth 8250192130 Will You Miss Thi...The Most Attractive Pune Call Girls Budhwar Peth 8250192130 Will You Miss Thi...
The Most Attractive Pune Call Girls Budhwar Peth 8250192130 Will You Miss Thi...ranjana rawat
 
OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...
OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...
OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...Soham Mondal
 
MANUFACTURING PROCESS-II UNIT-2 LATHE MACHINE
MANUFACTURING PROCESS-II UNIT-2 LATHE MACHINEMANUFACTURING PROCESS-II UNIT-2 LATHE MACHINE
MANUFACTURING PROCESS-II UNIT-2 LATHE MACHINESIVASHANKAR N
 
Call Girls in Nagpur Suman Call 7001035870 Meet With Nagpur Escorts
Call Girls in Nagpur Suman Call 7001035870 Meet With Nagpur EscortsCall Girls in Nagpur Suman Call 7001035870 Meet With Nagpur Escorts
Call Girls in Nagpur Suman Call 7001035870 Meet With Nagpur EscortsCall Girls in Nagpur High Profile
 
KubeKraft presentation @CloudNativeHooghly
KubeKraft presentation @CloudNativeHooghlyKubeKraft presentation @CloudNativeHooghly
KubeKraft presentation @CloudNativeHooghlysanyuktamishra911
 
VIP Call Girls Service Kondapur Hyderabad Call +91-8250192130
VIP Call Girls Service Kondapur Hyderabad Call +91-8250192130VIP Call Girls Service Kondapur Hyderabad Call +91-8250192130
VIP Call Girls Service Kondapur Hyderabad Call +91-8250192130Suhani Kapoor
 
result management system report for college project
result management system report for college projectresult management system report for college project
result management system report for college projectTonystark477637
 
Call for Papers - Educational Administration: Theory and Practice, E-ISSN: 21...
Call for Papers - Educational Administration: Theory and Practice, E-ISSN: 21...Call for Papers - Educational Administration: Theory and Practice, E-ISSN: 21...
Call for Papers - Educational Administration: Theory and Practice, E-ISSN: 21...Christo Ananth
 
HARDNESS, FRACTURE TOUGHNESS AND STRENGTH OF CERAMICS
HARDNESS, FRACTURE TOUGHNESS AND STRENGTH OF CERAMICSHARDNESS, FRACTURE TOUGHNESS AND STRENGTH OF CERAMICS
HARDNESS, FRACTURE TOUGHNESS AND STRENGTH OF CERAMICSRajkumarAkumalla
 
Processing & Properties of Floor and Wall Tiles.pptx
Processing & Properties of Floor and Wall Tiles.pptxProcessing & Properties of Floor and Wall Tiles.pptx
Processing & Properties of Floor and Wall Tiles.pptxpranjaldaimarysona
 
247267395-1-Symmetric-and-distributed-shared-memory-architectures-ppt (1).ppt
247267395-1-Symmetric-and-distributed-shared-memory-architectures-ppt (1).ppt247267395-1-Symmetric-and-distributed-shared-memory-architectures-ppt (1).ppt
247267395-1-Symmetric-and-distributed-shared-memory-architectures-ppt (1).pptssuser5c9d4b1
 
Call for Papers - African Journal of Biological Sciences, E-ISSN: 2663-2187, ...
Call for Papers - African Journal of Biological Sciences, E-ISSN: 2663-2187, ...Call for Papers - African Journal of Biological Sciences, E-ISSN: 2663-2187, ...
Call for Papers - African Journal of Biological Sciences, E-ISSN: 2663-2187, ...Christo Ananth
 
APPLICATIONS-AC/DC DRIVES-OPERATING CHARACTERISTICS
APPLICATIONS-AC/DC DRIVES-OPERATING CHARACTERISTICSAPPLICATIONS-AC/DC DRIVES-OPERATING CHARACTERISTICS
APPLICATIONS-AC/DC DRIVES-OPERATING CHARACTERISTICSKurinjimalarL3
 
UNIT-III FMM. DIMENSIONAL ANALYSIS
UNIT-III FMM.        DIMENSIONAL ANALYSISUNIT-III FMM.        DIMENSIONAL ANALYSIS
UNIT-III FMM. DIMENSIONAL ANALYSISrknatarajan
 
(SHREYA) Chakan Call Girls Just Call 7001035870 [ Cash on Delivery ] Pune Esc...
(SHREYA) Chakan Call Girls Just Call 7001035870 [ Cash on Delivery ] Pune Esc...(SHREYA) Chakan Call Girls Just Call 7001035870 [ Cash on Delivery ] Pune Esc...
(SHREYA) Chakan Call Girls Just Call 7001035870 [ Cash on Delivery ] Pune Esc...ranjana rawat
 
Extrusion Processes and Their Limitations
Extrusion Processes and Their LimitationsExtrusion Processes and Their Limitations
Extrusion Processes and Their Limitations120cr0395
 

Último (20)

VIP Call Girls Service Hitech City Hyderabad Call +91-8250192130
VIP Call Girls Service Hitech City Hyderabad Call +91-8250192130VIP Call Girls Service Hitech City Hyderabad Call +91-8250192130
VIP Call Girls Service Hitech City Hyderabad Call +91-8250192130
 
SPICE PARK APR2024 ( 6,793 SPICE Models )
SPICE PARK APR2024 ( 6,793 SPICE Models )SPICE PARK APR2024 ( 6,793 SPICE Models )
SPICE PARK APR2024 ( 6,793 SPICE Models )
 
(PRIYA) Rajgurunagar Call Girls Just Call 7001035870 [ Cash on Delivery ] Pun...
(PRIYA) Rajgurunagar Call Girls Just Call 7001035870 [ Cash on Delivery ] Pun...(PRIYA) Rajgurunagar Call Girls Just Call 7001035870 [ Cash on Delivery ] Pun...
(PRIYA) Rajgurunagar Call Girls Just Call 7001035870 [ Cash on Delivery ] Pun...
 
The Most Attractive Pune Call Girls Budhwar Peth 8250192130 Will You Miss Thi...
The Most Attractive Pune Call Girls Budhwar Peth 8250192130 Will You Miss Thi...The Most Attractive Pune Call Girls Budhwar Peth 8250192130 Will You Miss Thi...
The Most Attractive Pune Call Girls Budhwar Peth 8250192130 Will You Miss Thi...
 
OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...
OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...
OSVC_Meta-Data based Simulation Automation to overcome Verification Challenge...
 
MANUFACTURING PROCESS-II UNIT-2 LATHE MACHINE
MANUFACTURING PROCESS-II UNIT-2 LATHE MACHINEMANUFACTURING PROCESS-II UNIT-2 LATHE MACHINE
MANUFACTURING PROCESS-II UNIT-2 LATHE MACHINE
 
Call Girls in Nagpur Suman Call 7001035870 Meet With Nagpur Escorts
Call Girls in Nagpur Suman Call 7001035870 Meet With Nagpur EscortsCall Girls in Nagpur Suman Call 7001035870 Meet With Nagpur Escorts
Call Girls in Nagpur Suman Call 7001035870 Meet With Nagpur Escorts
 
KubeKraft presentation @CloudNativeHooghly
KubeKraft presentation @CloudNativeHooghlyKubeKraft presentation @CloudNativeHooghly
KubeKraft presentation @CloudNativeHooghly
 
VIP Call Girls Service Kondapur Hyderabad Call +91-8250192130
VIP Call Girls Service Kondapur Hyderabad Call +91-8250192130VIP Call Girls Service Kondapur Hyderabad Call +91-8250192130
VIP Call Girls Service Kondapur Hyderabad Call +91-8250192130
 
result management system report for college project
result management system report for college projectresult management system report for college project
result management system report for college project
 
Call for Papers - Educational Administration: Theory and Practice, E-ISSN: 21...
Call for Papers - Educational Administration: Theory and Practice, E-ISSN: 21...Call for Papers - Educational Administration: Theory and Practice, E-ISSN: 21...
Call for Papers - Educational Administration: Theory and Practice, E-ISSN: 21...
 
HARDNESS, FRACTURE TOUGHNESS AND STRENGTH OF CERAMICS
HARDNESS, FRACTURE TOUGHNESS AND STRENGTH OF CERAMICSHARDNESS, FRACTURE TOUGHNESS AND STRENGTH OF CERAMICS
HARDNESS, FRACTURE TOUGHNESS AND STRENGTH OF CERAMICS
 
★ CALL US 9953330565 ( HOT Young Call Girls In Badarpur delhi NCR
★ CALL US 9953330565 ( HOT Young Call Girls In Badarpur delhi NCR★ CALL US 9953330565 ( HOT Young Call Girls In Badarpur delhi NCR
★ CALL US 9953330565 ( HOT Young Call Girls In Badarpur delhi NCR
 
Processing & Properties of Floor and Wall Tiles.pptx
Processing & Properties of Floor and Wall Tiles.pptxProcessing & Properties of Floor and Wall Tiles.pptx
Processing & Properties of Floor and Wall Tiles.pptx
 
247267395-1-Symmetric-and-distributed-shared-memory-architectures-ppt (1).ppt
247267395-1-Symmetric-and-distributed-shared-memory-architectures-ppt (1).ppt247267395-1-Symmetric-and-distributed-shared-memory-architectures-ppt (1).ppt
247267395-1-Symmetric-and-distributed-shared-memory-architectures-ppt (1).ppt
 
Call for Papers - African Journal of Biological Sciences, E-ISSN: 2663-2187, ...
Call for Papers - African Journal of Biological Sciences, E-ISSN: 2663-2187, ...Call for Papers - African Journal of Biological Sciences, E-ISSN: 2663-2187, ...
Call for Papers - African Journal of Biological Sciences, E-ISSN: 2663-2187, ...
 
APPLICATIONS-AC/DC DRIVES-OPERATING CHARACTERISTICS
APPLICATIONS-AC/DC DRIVES-OPERATING CHARACTERISTICSAPPLICATIONS-AC/DC DRIVES-OPERATING CHARACTERISTICS
APPLICATIONS-AC/DC DRIVES-OPERATING CHARACTERISTICS
 
UNIT-III FMM. DIMENSIONAL ANALYSIS
UNIT-III FMM.        DIMENSIONAL ANALYSISUNIT-III FMM.        DIMENSIONAL ANALYSIS
UNIT-III FMM. DIMENSIONAL ANALYSIS
 
(SHREYA) Chakan Call Girls Just Call 7001035870 [ Cash on Delivery ] Pune Esc...
(SHREYA) Chakan Call Girls Just Call 7001035870 [ Cash on Delivery ] Pune Esc...(SHREYA) Chakan Call Girls Just Call 7001035870 [ Cash on Delivery ] Pune Esc...
(SHREYA) Chakan Call Girls Just Call 7001035870 [ Cash on Delivery ] Pune Esc...
 
Extrusion Processes and Their Limitations
Extrusion Processes and Their LimitationsExtrusion Processes and Their Limitations
Extrusion Processes and Their Limitations
 

Chainer-Compiler 動かしてみた

  • 1.
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8. è – è – $ docker build –f docker/Dockerfile.tmpl -t chainer-compiler:cuda-10.0 . $ nvidia-docker run -i -t --cap-add=SYS_PTRACE --security-opt="seccomp=unconfined” ¥ chainer_compiler:cuda-10.0 /bin/zsh
  • 9. è =================================== FAILURES =================================== ____________________________ TestPReLU.test_output _____________________________ self = <tests.functions_tests.test_activations.TestPReLU testMethod=test_output> def test_output(self): > self.expect(self.model, self.x) third_party/onnx-chainer/tests/functions_tests/test_activations.py:61: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ third_party/onnx-chainer/tests/helper.py:106: in expect self.check_out_values(test_path, input_names=graph_input_names) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ test_path = 'out/opset7/test_prelu', input_names = ['Input_0'] def check_model_expect(test_path, input_names=None): if not ONNXRUNTIME_AVAILABLE: > raise ImportError('ONNX Runtime is not found on checking module.') E ImportError: ONNX Runtime is not found on checking module. third_party/onnx-chainer/onnx_chainer/testing/test_onnxruntime.py:39: ImportError
  • 11. è è – – $ ./setup.sh $ ./build/tools/run_onnx --device cuda --test data/resnet50 --trace $ ./build/tools/run_onnx --device cuda --test data/mnist --trace
  • 12. è è – – Check `chainerx::Shape(type.shape().begin(), type.shape().end())' == `a.shape()' failed! in CheckType at ../runtime/xcvm.cc:57: ((1, 8, 28, 28) vs (1, 8, 24, 24)) Shape check failed in output #0: Conv(Input3, Parameter5) -> (Convolution28_Output_0) zsh: abort ./build/tools/run_onnx --device cuda --test data/mnist --trace name: "Convolution28” op_type: "Conv” Attribute { name: "auto_pad” s: "SAME_UPPER” type: STRING }
  • 13. auto pads = [&node]() { std::vector<int64_t> pads = node.pads(); + + // Complement from auto_pad + if (pads.size() == 0) { + if (node.auto_pad() == "SAME_UPPER") { + const Value* weight = node.input(1); + const int pad_ndim = (weight->type().ndim() - 2)*2; + CHECK_GT(pad_ndim, 0) << weight->type().DebugString(); + pads.resize(pad_ndim); + for (int i = 0; i < pad_ndim/2; ++i) { + pads[i] = pads[i+pad_ndim/2] = weight->type().dims()[i+2] / 2; + } + } + } compiler/emitter.cc:167 Verifying the result... OK: Plus214_Output_0 Elapsed: 23.395 msec OK!
  • 14.
  • 15. void RunMain(const std::vector<std::string>& argv) { LOG() << "Loading model..." << std::endl; onnx::ModelProto xmodel( LoadLargeProto<onnx::ModelProto>(onnx_path)); // ONNXモデルのロード Model model(xmodel); // onnx::ModelProtoからModelを構築 /* ... */ ModelRunner model_runner(args, initial_free_bytes, &model); // Modelに対してパスを実行しXCVMに変換 /* ... */ for (const std::unique_ptr<TestCase>& test_case : test_cases) { LOG() << "Running for " << test_case->name << std::endl; InOuts inputs(model_runner.params()); InOuts outputs(model_runner.Run(inputs)); // XCVMを用いてモデルを実行 } /* ... */ } tools/run_onnx.cc
  • 16. è class ModelRunner { public: ModelRunner(const cmdline::parser& args, int64_t initial_free_bytes, Model* model) : model_(model), args_(args), initial_free_bytes_(initial_free_bytes) { if (args.exist("backprop_two_phase")) { /* ... */ } else { LOG() << "Constructing model..." << std::endl; RunDefaultPasses(model->mutable_graph(), args_.exist("backprop")); // 各種パスを適用しLowering CompileModel(model, &xcvm_); // XCVMに変換 } /* ... */ } tools/run_onnx.cc
  • 17. è void CompileModel(Model* model, std::unique_ptr<XCVM>* xcvm, const char* name = nullptr, bool gen_backprop = false) { std::string out_onnx = args_.get<std::string>("out_onnx"); if (!out_onnx.empty()) { onnx::ModelProto xmodel; model->ToONNX(&xmodel); std::ofstream ofs(out_onnx); CHECK(xmodel.SerializeToOstream(&ofs)); // ONNXのシリアライズ出力 } LOG() << "Generate code..." << std::endl; XCProgramProto xcvm_prog; xcvm::Emit(*model, &xcvm_prog, trace_level() > 0); // ModelをXCVMProgramProtoに変換 const std::string out_xcvm = args_.get<std::string>("out_xcvm"); if (!out_xcvm.empty()) { std::ofstream ofs(out_xcvm); CHECK(ofs) << "Failed to open output XCVM: " << out_xcvm; CHECK(xcvm_prog.SerializeToOstream(&ofs)); // XCVMのシリアライズ出力 } xcvm->reset(new XCVM(xcvm_prog)); // XCVMProgramProtoをXCVMでラップ } tools/run_onnx.cc
  • 18.
  • 20. è – class Model { public: explicit Model(const onnx::ModelProto& xmodel); private: int64_t ir_version_; std::vector<onnx::OperatorSetIdProto> opset_import_; std::string producer_name_; std::string producer_version_; std::string domain_; int64_t model_version_; std::string doc_string_; std::map<std::string, std::string> metadata_props_; std::unique_ptr<Graph> graph_; }; compiler/model.h
  • 21. è – class Graph { public: explicit Graph(const onnx::GraphProto& xgraph); private: std::vector<Value*> output_values_; std::vector<Value*> input_values_; std::vector<Value*> temp_values_; std::vector<std::unique_ptr<Value>> all_values_; std::vector<Node*> nodes_; std::vector<std::unique_ptr<Node>> nodes_buf_; std::string name_; std::string doc_string_; std::map<std::string, int> ids_; }; compiler/graph.h
  • 22. è – class Node : public NodeBase { public: Node(const onnx::NodeProto& xnode, const std::vector<Value*>& inputs, const std::vector<Value*>& outputs); Node(const std::string& name, OpType op_type, const std::vector<Value*>& inputs, const std::vector<Value*>& outputs); private: std::vector<Value*> inputs_; std::vector<Value*> outputs_; std::string name_; std::string domain_; std::string doc_string_; bool detached_ = false; }; compiler/node.h
  • 23. è – class NodeBase { public: enum OpType { kIdentity, kNeg, ... }; protected: std::vector<float> activation_alpha_; bool was_activation_alpha_set_ = false; std::vector<float> activation_beta_; ... }; build/compiler/gen_node_base.h class NodeDef(object): def __init__(self, op_type, num_inputs, num_outputs, **kwargs): self.op_type = op_type self.num_inputs = num_inputs self.num_outputs = num_outputs self.attributes = kwargs self.attributes.update(CHAINER_COMPILERX_GLOBAL_ATTRS) self.attr_defs = {} # To be filled after parsed. NODES.append(self) NodeDef('Identity', 1, 1) NodeDef('Neg', 1, 1) … def gen_gen_node_base_h(): public_lines = [] private_lines = [] public_lines.append('enum OpType {‘) for node in NODES: public_lines.append('k%s,' % (node.op_type)) … compiler/gen_node.py
  • 24. è – class Value { public: enum Kind { kTemp = 0, kInput = 1, kOutput = 2, kNull = 4 }; Value(const onnx::ValueInfoProto& xvalue, Kind kind); private: Kind kind_{Kind::kTemp}; std::string name_; std::unique_ptr<Type> type_; std::string doc_string_; std::unique_ptr<Tensor> initializer_; std::vector<Node*> users_; Node* producer_ = nullptr; Value* grad_ = nullptr; int counter_ = 0; }; compiler/value.h
  • 25. è – class Tensor { public: typedef std::unique_ptr<void, decltype(&std::free)> UniqueData; explicit Tensor(const onnx::TensorProto& xtensor); private: std::vector<int64_t> dims_; Dtype dtype_; UniqueData data_; std::string name_; std::string doc_string_; }; compiler/tensor.h
  • 26.
  • 27. è
  • 28. è – – • void InferDtypeAndShape(Node* node) { InferDtype(node); } void InferAllDtypeAndShape(Graph* graph) { for (Node* node : graph->GetTopologicallySortedNodes()) { InferDtypeAndShape(node); } } compiler/type_inference.cc
  • 29. è Dtype CoerceDtype(Dtype dtype0, Dtype dtype1) { if (dtype0 == dtype1) return dtype0; if (dtype0 == Dtype::kUnknown || dtype1 == Dtype::kUnknown) return Dtype::kUnknown; if (dtype0.IsFloat() && !dtype1.IsFloat()) return dtype0; if (!dtype0.IsFloat() && dtype1.IsFloat()) return dtype1; if (dtype0.SizeOf() > dtype1.SizeOf()) return dtype0; if (dtype0.SizeOf() < dtype1.SizeOf()) return dtype1; if (dtype1 == Dtype::kBool) return dtype0; if (dtype0 == Dtype::kBool) return dtype1; if (dtype0 == Dtype::kUInt8 || dtype1 == Dtype::kUInt8) return Dtype::kInt16; CHECK(false) << "Unknown type coerce: " << dtype0.ToString() << " vs " << dtype1.ToString(); } void InferDtype(Node* node) { … case Node::kConv: case Node::kConvTranspose: case Node::kChainerConvGradWeight: { Dtype dtype = CoerceDtype(in0, in1); if (node->inputs().size() >= 3) dtype = CoerceDtype(dtype, node->input(2)->type().dtype()); oset(0, dtype); break; } … } compiler/dtype_inference.cc
  • 30. è void Simplify(const CompilerConfig& ccfg, Graph* graph, bool gen_backprop) { std::map<Node::OpType, SimplifierFn> simplifiers; /* 各Nodeのsimplifierの登録 */ CHECK(simplifiers.emplace(Node::kConv, ReplaceConv).second); while (replaced) { replaced = false; for (Node* node : graph->GetLiveNodes()) { auto found = simplifiers.find(node->op_type()); if (found == simplifiers.end()) continue; if (found->second(graph, node)) { graph->DetachNode(node); replaced = true; } } } } compiler/simplifier.cc
  • 31. è bool ReplaceConv(Graph* graph, Node* node) { CHECK_LT(0, node->group()); if (node->group() == 1) return false; GraphBuilder gb(graph, "SimplifyConvGroup", node->output(0)); // Split the input. std::vector<Value*> inputs; for (int i = 0; i < node->group(); ++i) { inputs.push_back(gb.Temp()); } gb.MOp(Node::kSplit, {node->input(0)}, inputs)->set_axis(1); /* weight, bias, outputのsplit処理 */ gb.Op(Node::kConcat, outputs, node->output(0))->producer()->set_axis(1); return true; } compiler/simplifier.cc
  • 32. è – void DoConstantPropagation(Graph* graph, Node* node) { /* Nodeの入力を集める処理 */ for (size_t i = 0; i < next_values.size(); ++i) { auto& next_value = next_values[i]; GraphBuilder gb(graph, "Const", node->output(i)); if (next_value->is_tensor()) { gb.Op(Node::kConstant, {}, node->output(i))->producer()->set_tensor_value(next_value->ReleaseTensor()); } } /* 置き換え前NodeのDetach */ } void PropagateConstants(Graph* graph) { bool replaced = true; while (replaced) { replaced = false; for (Node* node : graph->GetLiveNodes()) { if (!HasConstantInputsOnly(*node)) continue; if (MaybePropagateConstant(graph, node)) { replaced = true; } } } } compiler/constant_propagation.cc
  • 33. è – void FuseElementwiseOperations(Graph* graph) { const std::set<Node::OpType> fusable_ops = { Node::kIdentity, Node::kAdd, /* ... */ }; auto is_fusable = [&fusable_ops](const Node& node) { /* ... */ }; FuseAllConnectedNodes("nvrtc", graph, 2, is_fusable); } void FuseOperations(Graph* graph, bool use_tvm, bool use_ngraph) { /* subgraphのfusion */ if (use_ngraph) { FuseNGraphOperations(graph); } if (use_tvm) { FuseTVMOperations(graph); } FuseElementwiseOperations(graph); } compiler/fusion.cc
  • 34.
  • 38. è
  • 39. è – è – InOuts Run(const InOuts& inputs) { if (trace_level()) std::cerr << "Running XCVM..." << std::endl; InOuts outputs = xcvm_->Run(inputs, xcvm_opts_); /* ... */ return outputs; } compiler/run_onnx.cc void XCVM::Run(XCVMState* state) { /* Stateの初期化 */ while (true) { int pc = state->pc(); if (pc >= program_.size()) break; XCVMOp* op = program_[pc].get(); try { op->Run(state); } catch (...) { std::cerr << "Exception in " << op->debug_info() << std::endl; throw; } compiler/run_onnx.cc
  • 40. è – class XCVMOp { public: explicit XCVMOp(const XCInstructionProto& inst); virtual void Run(XCVMState* state) = 0; protected: XCInstructionProto inst_; const int64_t id_; const XCInstructionProto::Op op_; const std::string name_; }; compiler/xcvm/xcvm_op.h class ConvOp : public XCVMOp { public: explicit ConvOp(const XCInstructionProto& inst); chainerx::Array RunImpl(XCVMState* st, const chainerx::Array& x, const chainerx::Array& w, const nonstd::optional<chainerx::Array>& b); virtual void Run(XCVMState* st); private: int x; int w; int b; chainerx::StackVector<int64_t, chainerx::kMaxNdim> strides; chainerx::StackVector<int64_t, chainerx::kMaxNdim> pads; int y; }; build/runtime/gen_xcvm_ops.h
  • 41. è – è – void ConvOp::Run(XCVMState* st) { /* ... */ st->SetArray(y, RunImpl(st, st->GetArray(x), st->GetArray(w), st->GetOptionalArray(b))); /* ... */ } build/runtime/gen_xcvm_ops.cc chainerx::Array ConvOp::RunImpl( XCVMState* st, const chainerx::Array& x, const chainerx::Array& w, const nonstd::optional<chainerx::Array>& b) { return chainerx::Conv(x, w, b, ComplementStride(strides, x), ComplementPad(pads, x)); } build/runtime/gen_xcvm_ops.cc