426 nvars = booster.num_feature()
427 nclasses = booster.num_model_per_iteration()
428 logging.info(
"using %d input features with %d classes", nvars, nclasses)
430 data_input =
get_test_data(booster.feature_name(), test_file, ntests)
433 results_lgbm = booster.predict(data_input)
435 "lgbm (vectorized) timing = %s ms/input", (time.time() - start) * 1000 / len(data_input)
438 input_values_vector = ROOT.std.vector(
"float")()
439 results_MVAUtils = []
441 for input_values
in data_input:
442 input_values_vector.clear()
443 for v
in input_values:
444 input_values_vector.push_back(v)
445 output_MVAUtils = np.asarray(mva_utils.GetMultiResponse(input_values_vector, nclasses))
446 results_MVAUtils.append(output_MVAUtils)
448 "mvautils (not vectorized+overhead) timing = %s ms/input",
449 (time.time() - start) * 1000 / len(data_input),
452 stop_event_loop =
False
453 for ievent, (input_values, output_lgbm, output_MVAUtils)
in enumerate(
454 zip(data_input, results_lgbm, results_MVAUtils), 1
456 if not np.allclose(output_lgbm, output_MVAUtils):
457 stop_event_loop =
True
458 logging.info(
"--> output are different on input %d/%d:\n", ievent, len(data_input))
459 for ivar, input_value
in enumerate(input_values):
460 logging.info(
"var %d: %.15f", ivar, input_value)
461 logging.info(
"=" * 50)
462 logging.info(
" mvautils lgbm")
463 for ioutput, (o1, o2)
in enumerate(zip(output_MVAUtils, output_lgbm)):
464 diff_flag =
"" if np.allclose(o1, o2)
else "<---"
465 logging.info(
"output %3d %.5e %.5e %s", ioutput, o1, o2, diff_flag)
468 stop_tree_loop =
False
469 for itree, output_tree_lgbm
in enumerate(output_trees_lgbm):
470 output_tree_mva_utils = [
471 mva_utils.GetTreeResponse(
list2stdvector(input_values), itree * nclasses + c)
472 for c
in range(nclasses)
474 if not np.allclose(output_tree_mva_utils, output_tree_lgbm[0]):
475 stop_tree_loop =
True
476 logging.info(
"first tree/class with different answer (%d)", itree)
477 for isubtree, (ol, om)
in enumerate(
478 zip(output_tree_lgbm[0], output_tree_mva_utils)
480 if not np.allclose(ol, om):
481 logging.info(
"different in position %d", isubtree)
482 logging.info(
"lgbm: %f", ol)
483 logging.info(
"mvautils: %f", om)
484 logging.info(
"=" * 50)
486 "tree %d (itree) * %d (nclasses)" "+ %d (isubtree) = %d",
490 itree * nclasses + isubtree,
492 mva_utils.PrintTree(itree * nclasses + isubtree)
501 booster.dump_model()[
"tree_info"][itree * nclasses + isubtree][
506 for node_info
in node_infos:
507 value = input_values[node_info[0]]
508 threshold = node_info[1]
509 if not np.isnan(value)
and (value <= threshold) != (
510 np.float32(value) <= np.float32(threshold)
513 "the problem could be due to double"
514 "(lgbm) -> float (mvautil) conversion"
515 "for variable %d: %f and threshold %f",
520 stop_tree_loop =
False
521 stop_event_loop =
False