unsigned int CPU_Ray_Tracer::kd_tree_traverse(const Ray4f &ray, Hit *pHit)
{
	struct Traversal_Data 
	{
		Traversal_Data(unsigned int a_noffs,float a_t_far)
		{
			far_node_offset = a_noffs;
			t_far = a_t_far;
		}

		unsigned int far_node_offset;
		float t_far;
	};

	std::stack<Traversal_Data> nstack;
	float t_near,t_far;
	// intersect ray with box
	bool hit_box = Intersect<Ray4f,AABB4f>::exec(ray,kd_tree->GetBindingBox(),&t_near,&t_far);

	if(t_near <= 0.0f)
		t_near = 0.0f;

	if( !hit_box || t_near >= t_far ) 
		return 0;

	vec4f one(1.0f,1.0f,1.0f,1.0f);
	vec4f rdir_inv = _mm_div_ps(one,ray.dir);

	const int left_or_right[4] = { (ray.dir[0] >= 0)?1:0,  
								   (ray.dir[1] >= 0)?1:0,  
								   (ray.dir[2] >= 0)?1:0, 0 };

	unsigned int nodeOffset=0;
	KdTreeNode node = *(kd_tree->GetRoot());

	float t_split = 0;

	while(true)
	{
		//err_stream << nodeOffset << std::endl;
		while(!node.Leaf())
		{
			// get axis number and t according to split_pos plane
			int axis = node.GetAxis();
			t_split = (node.GetSplitPos() - ray.pos[axis])*rdir_inv[axis];

			// get far and near nodes.
			// assume { left = near, far = right }; if(ray.dir[axis] < 0) swap left and right nodes.
			// this trick with left_or_right[axis] allows us to remove one 'if'
			unsigned int nearNodeOffset = node.GetLeftOffset() + (1-left_or_right[axis]);
			unsigned int farNodeOffset  = node.GetLeftOffset() + left_or_right[axis];
	
			// now kd-tree traversal algorithm specific
			if(t_split <= t_near)
			{
				node = kd_tree->GetNodeByOffset(farNodeOffset);
			}
			else if(t_split >= t_far)
			{
				node = kd_tree->GetNodeByOffset(nearNodeOffset);
			}
			else
			{
				nstack.push(Traversal_Data(farNodeOffset,t_far));
				node = kd_tree->GetNodeByOffset(nearNodeOffset);
				t_far = t_split;
			}

		}

		float t_hit = IntersectAllPrimitivesInLeaf(ray,node,pHit);
		if (t_hit <= t_far)
			return 0xFFFFFFFF; // early ray termination

		if (nstack.empty())
			return 0;	// noting else to traverse any more...
	
		t_near = t_far;
		
		//( t_near, t_far ) = stack.pop();
		nodeOffset = nstack.top().far_node_offset;
		t_far      = nstack.top().t_far;
		nstack.pop();

		node = kd_tree->GetNodeByOffset(nodeOffset);
	}

	return 0;
}